xref: /aosp_15_r20/external/tensorflow/tensorflow/lite/delegates/gpu/common/task/weights_conversion.h (revision b6fb3261f9314811a0f4371741dbb8839866f948)
1 /* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
2 
3 Licensed under the Apache License, Version 2.0 (the "License");
4 you may not use this file except in compliance with the License.
5 You may obtain a copy of the License at
6 
7     http://www.apache.org/licenses/LICENSE-2.0
8 
9 Unless required by applicable law or agreed to in writing, software
10 distributed under the License is distributed on an "AS IS" BASIS,
11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 See the License for the specific language governing permissions and
13 limitations under the License.
14 ==============================================================================*/
15 
16 #ifndef TENSORFLOW_LITE_DELEGATES_GPU_COMMON_TASK_WEIGHTS_CONVERSION_H_
17 #define TENSORFLOW_LITE_DELEGATES_GPU_COMMON_TASK_WEIGHTS_CONVERSION_H_
18 
19 #include <cstdint>
20 #include <string>
21 #include <vector>
22 
23 #include "absl/types/span.h"
24 #include "tensorflow/lite/delegates/gpu/common/data_type.h"
25 #include "tensorflow/lite/delegates/gpu/common/shape.h"
26 #include "tensorflow/lite/delegates/gpu/common/status.h"
27 #include "tensorflow/lite/delegates/gpu/common/task/weights_layout.h"
28 #include "tensorflow/lite/delegates/gpu/common/tensor.h"
29 #include "tensorflow/lite/delegates/gpu/common/types.h"
30 #include "tensorflow/lite/delegates/gpu/common/util.h"
31 
32 namespace tflite {
33 namespace gpu {
34 
35 using uint = unsigned int;
36 
37 template <DataType S, typename T>
RearrangeWeightsToOHWIOGroupI4O4(const tflite::gpu::Tensor<OHWI,S> & weights,int out_group_size,absl::Span<T> dst)38 void RearrangeWeightsToOHWIOGroupI4O4(
39     const tflite::gpu::Tensor<OHWI, S>& weights, int out_group_size,
40     absl::Span<T> dst) {
41   const int dst_slices = DivideRoundUp(weights.shape.o, 4);
42   const int src_slices = DivideRoundUp(weights.shape.i, 4);
43   const int dst_groups = DivideRoundUp(dst_slices, out_group_size);
44 
45   int counter = 0;
46   for (int d = 0; d < dst_groups; ++d) {
47     for (int y = 0; y < weights.shape.h; ++y) {
48       for (int x = 0; x < weights.shape.w; ++x) {
49         for (int s = 0; s < src_slices; ++s) {
50           for (int d_group = 0; d_group < out_group_size; ++d_group) {
51             for (int j = 0; j < 4; ++j) {
52               T filter;
53               for (int i = 0; i < 4; ++i) {
54                 const int s_ch = s * 4 + j;
55                 const int d_ch = (d * out_group_size + d_group) * 4 + i;
56                 if (s_ch < weights.shape.i && d_ch < weights.shape.o) {
57                   const int f_index =
58                       weights.shape.LinearIndex({d_ch, y, x, s_ch});
59                   filter[i] = weights.data[f_index];
60                 } else {
61                   filter[i] = 0.0f;
62                 }
63               }
64               dst[counter++] = filter;
65             }
66           }
67         }
68       }
69     }
70   }
71 }
72 
73 template <DataType S, typename T>
RearrangeWeightsToODHWIOGroupI4O4(const tflite::gpu::Tensor<OHWDI,S> & weights,int out_group_size,absl::Span<T> dst)74 void RearrangeWeightsToODHWIOGroupI4O4(
75     const tflite::gpu::Tensor<OHWDI, S>& weights, int out_group_size,
76     absl::Span<T> dst) {
77   const int dst_slices = DivideRoundUp(weights.shape.o, 4);
78   const int src_slices = DivideRoundUp(weights.shape.i, 4);
79   const int dst_groups = DivideRoundUp(dst_slices, out_group_size);
80 
81   int counter = 0;
82   for (int d = 0; d < dst_groups; ++d) {
83     for (int z = 0; z < weights.shape.d; ++z) {
84       for (int y = 0; y < weights.shape.h; ++y) {
85         for (int x = 0; x < weights.shape.w; ++x) {
86           for (int s = 0; s < src_slices; ++s) {
87             for (int d_group = 0; d_group < out_group_size; ++d_group) {
88               for (int j = 0; j < 4; ++j) {
89                 T filter;
90                 for (int i = 0; i < 4; ++i) {
91                   const int s_ch = s * 4 + j;
92                   const int d_ch = (d * out_group_size + d_group) * 4 + i;
93                   if (s_ch < weights.shape.i && d_ch < weights.shape.o) {
94                     const int f_index =
95                         weights.shape.LinearIndex({d_ch, y, x, z, s_ch});
96                     filter[i] = weights.data[f_index];
97                   } else {
98                     filter[i] = 0.0f;
99                   }
100                 }
101                 dst[counter++] = filter;
102               }
103             }
104           }
105         }
106       }
107     }
108   }
109 }
110 
111 template <DataType S, typename T>
RearrangeWeightsToOHWIOGroupO4I4(const tflite::gpu::Tensor<OHWI,S> & weights,int out_group_size,absl::Span<T> dst)112 void RearrangeWeightsToOHWIOGroupO4I4(
113     const tflite::gpu::Tensor<OHWI, S>& weights, int out_group_size,
114     absl::Span<T> dst) {
115   const int dst_slices = DivideRoundUp(weights.shape.o, 4);
116   const int src_slices = DivideRoundUp(weights.shape.i, 4);
117   const int dst_groups = DivideRoundUp(dst_slices, out_group_size);
118 
119   int counter = 0;
120   for (int d = 0; d < dst_groups; ++d) {
121     for (int y = 0; y < weights.shape.h; ++y) {
122       for (int x = 0; x < weights.shape.w; ++x) {
123         for (int s = 0; s < src_slices; ++s) {
124           for (int d_group = 0; d_group < out_group_size; ++d_group) {
125             for (int j = 0; j < 4; ++j) {
126               T filter;
127               for (int i = 0; i < 4; ++i) {
128                 const int s_ch = s * 4 + i;
129                 const int d_ch = (d * out_group_size + d_group) * 4 + j;
130                 if (s_ch < weights.shape.i && d_ch < weights.shape.o) {
131                   const int f_index =
132                       weights.shape.LinearIndex({d_ch, y, x, s_ch});
133                   filter[i] = weights.data[f_index];
134                 } else {
135                   filter[i] = 0.0f;
136                 }
137               }
138               dst[counter++] = filter;
139             }
140           }
141         }
142       }
143     }
144   }
145 }
146 
147 template <DataType S, typename T>
RearrangeWeightsToODHWIOGroupO4I4(const tflite::gpu::Tensor<OHWDI,S> & weights,int out_group_size,absl::Span<T> dst)148 void RearrangeWeightsToODHWIOGroupO4I4(
149     const tflite::gpu::Tensor<OHWDI, S>& weights, int out_group_size,
150     absl::Span<T> dst) {
151   const int dst_slices = DivideRoundUp(weights.shape.o, 4);
152   const int src_slices = DivideRoundUp(weights.shape.i, 4);
153   const int dst_groups = DivideRoundUp(dst_slices, out_group_size);
154 
155   int counter = 0;
156   for (int d = 0; d < dst_groups; ++d) {
157     for (int z = 0; z < weights.shape.d; ++z) {
158       for (int y = 0; y < weights.shape.h; ++y) {
159         for (int x = 0; x < weights.shape.w; ++x) {
160           for (int s = 0; s < src_slices; ++s) {
161             for (int d_group = 0; d_group < out_group_size; ++d_group) {
162               for (int j = 0; j < 4; ++j) {
163                 T filter;
164                 for (int i = 0; i < 4; ++i) {
165                   const int s_ch = s * 4 + i;
166                   const int d_ch = (d * out_group_size + d_group) * 4 + j;
167                   if (s_ch < weights.shape.i && d_ch < weights.shape.o) {
168                     const int f_index =
169                         weights.shape.LinearIndex({d_ch, y, x, z, s_ch});
170                     filter[i] = weights.data[f_index];
171                   } else {
172                     filter[i] = 0.0f;
173                   }
174                 }
175                 dst[counter++] = filter;
176               }
177             }
178           }
179         }
180       }
181     }
182   }
183 }
184 
185 template <DataType S, typename T>
RearrangeWeightsToI4HWIOOGroupO4(const tflite::gpu::Tensor<OHWI,S> & weights,int out_group_size,absl::Span<T> dst)186 void RearrangeWeightsToI4HWIOOGroupO4(
187     const tflite::gpu::Tensor<OHWI, S>& weights, int out_group_size,
188     absl::Span<T> dst) {
189   const int dst_slices = DivideRoundUp(weights.shape.o, 4);
190   const int src_slices = DivideRoundUp(weights.shape.i, 4);
191   const int dst_groups = DivideRoundUp(dst_slices, out_group_size);
192 
193   int counter = 0;
194   for (int j = 0; j < 4; ++j) {
195     for (int y = 0; y < weights.shape.h; ++y) {
196       for (int x = 0; x < weights.shape.w; ++x) {
197         for (int s = 0; s < src_slices; ++s) {
198           for (int d = 0; d < dst_groups; ++d) {
199             for (int d_group = 0; d_group < out_group_size; ++d_group) {
200               T filter;
201               for (int i = 0; i < 4; ++i) {
202                 const int s_ch = s * 4 + j;
203                 const int d_ch = (d * out_group_size + d_group) * 4 + i;
204                 if (s_ch < weights.shape.i && d_ch < weights.shape.o) {
205                   const int f_index =
206                       weights.shape.LinearIndex({d_ch, y, x, s_ch});
207                   filter[i] = weights.data[f_index];
208                 } else {
209                   filter[i] = 0.0f;
210                 }
211               }
212               dst[counter++] = filter;
213             }
214           }
215         }
216       }
217     }
218   }
219 }
220 
221 template <DataType S, typename T>
RearrangeWeightsToI4DHWIOOGroupO4(const tflite::gpu::Tensor<OHWDI,S> & weights,int out_group_size,absl::Span<T> dst)222 void RearrangeWeightsToI4DHWIOOGroupO4(
223     const tflite::gpu::Tensor<OHWDI, S>& weights, int out_group_size,
224     absl::Span<T> dst) {
225   const int dst_slices = DivideRoundUp(weights.shape.o, 4);
226   const int src_slices = DivideRoundUp(weights.shape.i, 4);
227   const int dst_groups = DivideRoundUp(dst_slices, out_group_size);
228 
229   int counter = 0;
230   for (int j = 0; j < 4; ++j) {
231     for (int z = 0; z < weights.shape.d; ++z) {
232       for (int y = 0; y < weights.shape.h; ++y) {
233         for (int x = 0; x < weights.shape.w; ++x) {
234           for (int s = 0; s < src_slices; ++s) {
235             for (int d = 0; d < dst_groups; ++d) {
236               for (int d_group = 0; d_group < out_group_size; ++d_group) {
237                 T filter;
238                 for (int i = 0; i < 4; ++i) {
239                   const int s_ch = s * 4 + j;
240                   const int d_ch = (d * out_group_size + d_group) * 4 + i;
241                   if (s_ch < weights.shape.i && d_ch < weights.shape.o) {
242                     const int f_index =
243                         weights.shape.LinearIndex({d_ch, y, x, z, s_ch});
244                     filter[i] = weights.data[f_index];
245                   } else {
246                     filter[i] = 0.0f;
247                   }
248                 }
249                 dst[counter++] = filter;
250               }
251             }
252           }
253         }
254       }
255     }
256   }
257 }
258 
259 template <DataType S, typename T>
RearrangeWeightsToO4HWIOOGroupI4(const tflite::gpu::Tensor<OHWI,S> & weights,int out_group_size,absl::Span<T> dst)260 void RearrangeWeightsToO4HWIOOGroupI4(
261     const tflite::gpu::Tensor<OHWI, S>& weights, int out_group_size,
262     absl::Span<T> dst) {
263   const int dst_slices = DivideRoundUp(weights.shape.o, 4);
264   const int src_slices = DivideRoundUp(weights.shape.i, 4);
265   const int dst_groups = DivideRoundUp(dst_slices, out_group_size);
266 
267   int counter = 0;
268   for (int j = 0; j < 4; ++j) {
269     for (int y = 0; y < weights.shape.h; ++y) {
270       for (int x = 0; x < weights.shape.w; ++x) {
271         for (int s = 0; s < src_slices; ++s) {
272           for (int d = 0; d < dst_groups; ++d) {
273             for (int d_group = 0; d_group < out_group_size; ++d_group) {
274               T filter;
275               for (int i = 0; i < 4; ++i) {
276                 const int s_ch = s * 4 + i;
277                 const int d_ch = (d * out_group_size + d_group) * 4 + j;
278                 if (s_ch < weights.shape.i && d_ch < weights.shape.o) {
279                   const int f_index =
280                       weights.shape.LinearIndex({d_ch, y, x, s_ch});
281                   filter[i] = weights.data[f_index];
282                 } else {
283                   filter[i] = 0.0f;
284                 }
285               }
286               dst[counter++] = filter;
287             }
288           }
289         }
290       }
291     }
292   }
293 }
294 
295 template <DataType S, typename T>
RearrangeWeightsToO4DHWIOOGroupI4(const tflite::gpu::Tensor<OHWDI,S> & weights,int out_group_size,absl::Span<T> dst)296 void RearrangeWeightsToO4DHWIOOGroupI4(
297     const tflite::gpu::Tensor<OHWDI, S>& weights, int out_group_size,
298     absl::Span<T> dst) {
299   const int dst_slices = DivideRoundUp(weights.shape.o, 4);
300   const int src_slices = DivideRoundUp(weights.shape.i, 4);
301   const int dst_groups = DivideRoundUp(dst_slices, out_group_size);
302 
303   int counter = 0;
304   for (int j = 0; j < 4; ++j) {
305     for (int z = 0; z < weights.shape.d; ++z) {
306       for (int y = 0; y < weights.shape.h; ++y) {
307         for (int x = 0; x < weights.shape.w; ++x) {
308           for (int s = 0; s < src_slices; ++s) {
309             for (int d = 0; d < dst_groups; ++d) {
310               for (int d_group = 0; d_group < out_group_size; ++d_group) {
311                 T filter;
312                 for (int i = 0; i < 4; ++i) {
313                   const int s_ch = s * 4 + i;
314                   const int d_ch = (d * out_group_size + d_group) * 4 + j;
315                   if (s_ch < weights.shape.i && d_ch < weights.shape.o) {
316                     const int f_index =
317                         weights.shape.LinearIndex({d_ch, y, x, z, s_ch});
318                     filter[i] = weights.data[f_index];
319                   } else {
320                     filter[i] = 0.0f;
321                   }
322                 }
323                 dst[counter++] = filter;
324               }
325             }
326           }
327         }
328       }
329     }
330   }
331 }
332 
333 template <DataType S, typename T>
RearrangeWeightsToOICustomSpatialI4O4(const tflite::gpu::Tensor<OHWI,S> & weights,const std::vector<int> & spatial_remap,absl::Span<T> dst)334 void RearrangeWeightsToOICustomSpatialI4O4(
335     const tflite::gpu::Tensor<OHWI, S>& weights,
336     const std::vector<int>& spatial_remap, absl::Span<T> dst) {
337   const int dst_slices = DivideRoundUp(weights.shape.o, 4);
338   const int src_slices = DivideRoundUp(weights.shape.i, 4);
339 
340   int counter = 0;
341   for (int d = 0; d < dst_slices; ++d) {
342     for (int s = 0; s < src_slices; ++s) {
343       for (int y = 0; y < weights.shape.h; ++y) {
344         for (int x = 0; x < weights.shape.w; ++x) {
345           const int kernel_index = spatial_remap[y * weights.shape.w + x];
346           const int kernel_index_x = kernel_index % weights.shape.w;
347           const int kernel_index_y = kernel_index / weights.shape.w;
348           for (int i = 0; i < 4; ++i) {
349             T filter;
350             for (int j = 0; j < 4; ++j) {
351               const int s_ch = s * 4 + i;
352               const int d_ch = d * 4 + j;
353               if (s_ch < weights.shape.i && d_ch < weights.shape.o) {
354                 const int f_index = weights.shape.LinearIndex(
355                     {d_ch, kernel_index_y, kernel_index_x, s_ch});
356                 filter[j] = weights.data[f_index];
357               } else {
358                 filter[j] = 0.0f;
359               }
360             }
361             dst[counter++] = filter;
362           }
363         }
364       }
365     }
366   }
367 }
368 
369 template <DataType S, typename T>
RearrangeWeightsToOICustomSpatialI4O4(const tflite::gpu::Tensor<OHWDI,S> & weights,const std::vector<int> & spatial_remap,absl::Span<T> dst)370 void RearrangeWeightsToOICustomSpatialI4O4(
371     const tflite::gpu::Tensor<OHWDI, S>& weights,
372     const std::vector<int>& spatial_remap, absl::Span<T> dst) {
373   const int dst_slices = DivideRoundUp(weights.shape.o, 4);
374   const int src_slices = DivideRoundUp(weights.shape.i, 4);
375 
376   int counter = 0;
377   for (int d = 0; d < dst_slices; ++d) {
378     for (int s = 0; s < src_slices; ++s) {
379       for (int z = 0; z < weights.shape.d; ++z) {
380         for (int y = 0; y < weights.shape.h; ++y) {
381           for (int x = 0; x < weights.shape.w; ++x) {
382             int kernel_index =
383                 spatial_remap[(z * weights.shape.h + y) * weights.shape.w + x];
384             const int kernel_index_x = kernel_index % weights.shape.w;
385             kernel_index /= weights.shape.w;
386             const int kernel_index_y = kernel_index % weights.shape.h;
387             const int kernel_index_z = kernel_index / weights.shape.h;
388             for (int i = 0; i < 4; ++i) {
389               T filter;
390               for (int j = 0; j < 4; ++j) {
391                 const int s_ch = s * 4 + i;
392                 const int d_ch = d * 4 + j;
393                 if (s_ch < weights.shape.i && d_ch < weights.shape.o) {
394                   const int f_index = weights.shape.LinearIndex(
395                       {d_ch, kernel_index_y, kernel_index_x, kernel_index_z,
396                        s_ch});
397                   filter[j] = weights.data[f_index];
398                 } else {
399                   filter[j] = 0.0f;
400                 }
401               }
402               dst[counter++] = filter;
403             }
404           }
405         }
406       }
407     }
408   }
409 }
410 
411 template <DataType S, typename T>
RearrangeWeightsToOICustomSpatialO4I4(const tflite::gpu::Tensor<OHWI,S> & weights,const std::vector<int> & spatial_remap,absl::Span<T> dst)412 void RearrangeWeightsToOICustomSpatialO4I4(
413     const tflite::gpu::Tensor<OHWI, S>& weights,
414     const std::vector<int>& spatial_remap, absl::Span<T> dst) {
415   const int dst_slices = DivideRoundUp(weights.shape.o, 4);
416   const int src_slices = DivideRoundUp(weights.shape.i, 4);
417 
418   int counter = 0;
419   for (int d = 0; d < dst_slices; ++d) {
420     for (int s = 0; s < src_slices; ++s) {
421       for (int y = 0; y < weights.shape.h; ++y) {
422         for (int x = 0; x < weights.shape.w; ++x) {
423           const int kernel_index = spatial_remap[y * weights.shape.w + x];
424           const int kernel_index_x = kernel_index % weights.shape.w;
425           const int kernel_index_y = kernel_index / weights.shape.w;
426           for (int i = 0; i < 4; ++i) {
427             T filter;
428             for (int j = 0; j < 4; ++j) {
429               const int s_ch = s * 4 + j;
430               const int d_ch = d * 4 + i;
431               if (s_ch < weights.shape.i && d_ch < weights.shape.o) {
432                 const int f_index = weights.shape.LinearIndex(
433                     {d_ch, kernel_index_y, kernel_index_x, s_ch});
434                 filter[j] = weights.data[f_index];
435               } else {
436                 filter[j] = 0.0f;
437               }
438             }
439             dst[counter++] = filter;
440           }
441         }
442       }
443     }
444   }
445 }
446 
447 template <DataType S, typename T>
RearrangeWeightsToOICustomSpatialO4I4(const tflite::gpu::Tensor<OHWDI,S> & weights,const std::vector<int> & spatial_remap,absl::Span<T> dst)448 void RearrangeWeightsToOICustomSpatialO4I4(
449     const tflite::gpu::Tensor<OHWDI, S>& weights,
450     const std::vector<int>& spatial_remap, absl::Span<T> dst) {
451   const int dst_slices = DivideRoundUp(weights.shape.o, 4);
452   const int src_slices = DivideRoundUp(weights.shape.i, 4);
453 
454   int counter = 0;
455   for (int d = 0; d < dst_slices; ++d) {
456     for (int s = 0; s < src_slices; ++s) {
457       for (int z = 0; z < weights.shape.d; ++z) {
458         for (int y = 0; y < weights.shape.h; ++y) {
459           for (int x = 0; x < weights.shape.w; ++x) {
460             int kernel_index =
461                 spatial_remap[(z * weights.shape.h + y) * weights.shape.w + x];
462             const int kernel_index_x = kernel_index % weights.shape.w;
463             kernel_index /= weights.shape.w;
464             const int kernel_index_y = kernel_index % weights.shape.h;
465             const int kernel_index_z = kernel_index / weights.shape.h;
466             for (int i = 0; i < 4; ++i) {
467               T filter;
468               for (int j = 0; j < 4; ++j) {
469                 const int s_ch = s * 4 + j;
470                 const int d_ch = d * 4 + i;
471                 if (s_ch < weights.shape.i && d_ch < weights.shape.o) {
472                   const int f_index = weights.shape.LinearIndex(
473                       {d_ch, kernel_index_y, kernel_index_x, kernel_index_z,
474                        s_ch});
475                   filter[j] = weights.data[f_index];
476                 } else {
477                   filter[j] = 0.0f;
478                 }
479               }
480               dst[counter++] = filter;
481             }
482           }
483         }
484       }
485     }
486   }
487 }
488 
489 uint GetTotalElementsCountForLayout(const WeightsDescription& weight_desc,
490                                     const OHWI& shape);
491 uint GetTotalElementsCountForLayout(const WeightsDescription& weight_desc,
492                                     const OHWDI& shape);
493 
494 // Applicable to:
495 //   k2DX4I4YIsSpatialIAndXIsOOGroupO4
496 //   k2DX4O4YIsSpatialIAndXIsOOGroupI4
497 uint2 Get2dResourceSize(const WeightsDescription& weight_desc,
498                         const OHWI& shape);
499 // Applicable to:
500 //   k2DX4I4YIsSpatialIAndXIsOOGroupO4
501 //   k2DX4O4YIsSpatialIAndXIsOOGroupI4
502 uint2 Get2dResourceSize(const WeightsDescription& weight_desc,
503                         const OHWDI& shape);
504 
505 void RearrangeWeights(
506     const tflite::gpu::Tensor<OHWI, DataType::FLOAT32>& weights,
507     const WeightsDescription& dst_weight_desc, absl::Span<uint8_t> dst);
508 
509 void RearrangeWeights(
510     const tflite::gpu::Tensor<OHWDI, DataType::FLOAT32>& weights,
511     const WeightsDescription& dst_weight_desc, absl::Span<uint8_t> dst);
512 
513 }  // namespace gpu
514 }  // namespace tflite
515 
516 #endif  // TENSORFLOW_LITE_DELEGATES_GPU_COMMON_TASK_WEIGHTS_CONVERSION_H_
517