1 /* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
2
3 Licensed under the Apache License, Version 2.0 (the "License");
4 you may not use this file except in compliance with the License.
5 You may obtain a copy of the License at
6
7 http://www.apache.org/licenses/LICENSE-2.0
8
9 Unless required by applicable law or agreed to in writing, software
10 distributed under the License is distributed on an "AS IS" BASIS,
11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 See the License for the specific language governing permissions and
13 limitations under the License.
14 ==============================================================================*/
15
16 #ifndef TENSORFLOW_LITE_DELEGATES_GPU_COMMON_TASK_WEIGHTS_CONVERSION_H_
17 #define TENSORFLOW_LITE_DELEGATES_GPU_COMMON_TASK_WEIGHTS_CONVERSION_H_
18
19 #include <cstdint>
20 #include <string>
21 #include <vector>
22
23 #include "absl/types/span.h"
24 #include "tensorflow/lite/delegates/gpu/common/data_type.h"
25 #include "tensorflow/lite/delegates/gpu/common/shape.h"
26 #include "tensorflow/lite/delegates/gpu/common/status.h"
27 #include "tensorflow/lite/delegates/gpu/common/task/weights_layout.h"
28 #include "tensorflow/lite/delegates/gpu/common/tensor.h"
29 #include "tensorflow/lite/delegates/gpu/common/types.h"
30 #include "tensorflow/lite/delegates/gpu/common/util.h"
31
32 namespace tflite {
33 namespace gpu {
34
35 using uint = unsigned int;
36
37 template <DataType S, typename T>
RearrangeWeightsToOHWIOGroupI4O4(const tflite::gpu::Tensor<OHWI,S> & weights,int out_group_size,absl::Span<T> dst)38 void RearrangeWeightsToOHWIOGroupI4O4(
39 const tflite::gpu::Tensor<OHWI, S>& weights, int out_group_size,
40 absl::Span<T> dst) {
41 const int dst_slices = DivideRoundUp(weights.shape.o, 4);
42 const int src_slices = DivideRoundUp(weights.shape.i, 4);
43 const int dst_groups = DivideRoundUp(dst_slices, out_group_size);
44
45 int counter = 0;
46 for (int d = 0; d < dst_groups; ++d) {
47 for (int y = 0; y < weights.shape.h; ++y) {
48 for (int x = 0; x < weights.shape.w; ++x) {
49 for (int s = 0; s < src_slices; ++s) {
50 for (int d_group = 0; d_group < out_group_size; ++d_group) {
51 for (int j = 0; j < 4; ++j) {
52 T filter;
53 for (int i = 0; i < 4; ++i) {
54 const int s_ch = s * 4 + j;
55 const int d_ch = (d * out_group_size + d_group) * 4 + i;
56 if (s_ch < weights.shape.i && d_ch < weights.shape.o) {
57 const int f_index =
58 weights.shape.LinearIndex({d_ch, y, x, s_ch});
59 filter[i] = weights.data[f_index];
60 } else {
61 filter[i] = 0.0f;
62 }
63 }
64 dst[counter++] = filter;
65 }
66 }
67 }
68 }
69 }
70 }
71 }
72
73 template <DataType S, typename T>
RearrangeWeightsToODHWIOGroupI4O4(const tflite::gpu::Tensor<OHWDI,S> & weights,int out_group_size,absl::Span<T> dst)74 void RearrangeWeightsToODHWIOGroupI4O4(
75 const tflite::gpu::Tensor<OHWDI, S>& weights, int out_group_size,
76 absl::Span<T> dst) {
77 const int dst_slices = DivideRoundUp(weights.shape.o, 4);
78 const int src_slices = DivideRoundUp(weights.shape.i, 4);
79 const int dst_groups = DivideRoundUp(dst_slices, out_group_size);
80
81 int counter = 0;
82 for (int d = 0; d < dst_groups; ++d) {
83 for (int z = 0; z < weights.shape.d; ++z) {
84 for (int y = 0; y < weights.shape.h; ++y) {
85 for (int x = 0; x < weights.shape.w; ++x) {
86 for (int s = 0; s < src_slices; ++s) {
87 for (int d_group = 0; d_group < out_group_size; ++d_group) {
88 for (int j = 0; j < 4; ++j) {
89 T filter;
90 for (int i = 0; i < 4; ++i) {
91 const int s_ch = s * 4 + j;
92 const int d_ch = (d * out_group_size + d_group) * 4 + i;
93 if (s_ch < weights.shape.i && d_ch < weights.shape.o) {
94 const int f_index =
95 weights.shape.LinearIndex({d_ch, y, x, z, s_ch});
96 filter[i] = weights.data[f_index];
97 } else {
98 filter[i] = 0.0f;
99 }
100 }
101 dst[counter++] = filter;
102 }
103 }
104 }
105 }
106 }
107 }
108 }
109 }
110
111 template <DataType S, typename T>
RearrangeWeightsToOHWIOGroupO4I4(const tflite::gpu::Tensor<OHWI,S> & weights,int out_group_size,absl::Span<T> dst)112 void RearrangeWeightsToOHWIOGroupO4I4(
113 const tflite::gpu::Tensor<OHWI, S>& weights, int out_group_size,
114 absl::Span<T> dst) {
115 const int dst_slices = DivideRoundUp(weights.shape.o, 4);
116 const int src_slices = DivideRoundUp(weights.shape.i, 4);
117 const int dst_groups = DivideRoundUp(dst_slices, out_group_size);
118
119 int counter = 0;
120 for (int d = 0; d < dst_groups; ++d) {
121 for (int y = 0; y < weights.shape.h; ++y) {
122 for (int x = 0; x < weights.shape.w; ++x) {
123 for (int s = 0; s < src_slices; ++s) {
124 for (int d_group = 0; d_group < out_group_size; ++d_group) {
125 for (int j = 0; j < 4; ++j) {
126 T filter;
127 for (int i = 0; i < 4; ++i) {
128 const int s_ch = s * 4 + i;
129 const int d_ch = (d * out_group_size + d_group) * 4 + j;
130 if (s_ch < weights.shape.i && d_ch < weights.shape.o) {
131 const int f_index =
132 weights.shape.LinearIndex({d_ch, y, x, s_ch});
133 filter[i] = weights.data[f_index];
134 } else {
135 filter[i] = 0.0f;
136 }
137 }
138 dst[counter++] = filter;
139 }
140 }
141 }
142 }
143 }
144 }
145 }
146
147 template <DataType S, typename T>
RearrangeWeightsToODHWIOGroupO4I4(const tflite::gpu::Tensor<OHWDI,S> & weights,int out_group_size,absl::Span<T> dst)148 void RearrangeWeightsToODHWIOGroupO4I4(
149 const tflite::gpu::Tensor<OHWDI, S>& weights, int out_group_size,
150 absl::Span<T> dst) {
151 const int dst_slices = DivideRoundUp(weights.shape.o, 4);
152 const int src_slices = DivideRoundUp(weights.shape.i, 4);
153 const int dst_groups = DivideRoundUp(dst_slices, out_group_size);
154
155 int counter = 0;
156 for (int d = 0; d < dst_groups; ++d) {
157 for (int z = 0; z < weights.shape.d; ++z) {
158 for (int y = 0; y < weights.shape.h; ++y) {
159 for (int x = 0; x < weights.shape.w; ++x) {
160 for (int s = 0; s < src_slices; ++s) {
161 for (int d_group = 0; d_group < out_group_size; ++d_group) {
162 for (int j = 0; j < 4; ++j) {
163 T filter;
164 for (int i = 0; i < 4; ++i) {
165 const int s_ch = s * 4 + i;
166 const int d_ch = (d * out_group_size + d_group) * 4 + j;
167 if (s_ch < weights.shape.i && d_ch < weights.shape.o) {
168 const int f_index =
169 weights.shape.LinearIndex({d_ch, y, x, z, s_ch});
170 filter[i] = weights.data[f_index];
171 } else {
172 filter[i] = 0.0f;
173 }
174 }
175 dst[counter++] = filter;
176 }
177 }
178 }
179 }
180 }
181 }
182 }
183 }
184
185 template <DataType S, typename T>
RearrangeWeightsToI4HWIOOGroupO4(const tflite::gpu::Tensor<OHWI,S> & weights,int out_group_size,absl::Span<T> dst)186 void RearrangeWeightsToI4HWIOOGroupO4(
187 const tflite::gpu::Tensor<OHWI, S>& weights, int out_group_size,
188 absl::Span<T> dst) {
189 const int dst_slices = DivideRoundUp(weights.shape.o, 4);
190 const int src_slices = DivideRoundUp(weights.shape.i, 4);
191 const int dst_groups = DivideRoundUp(dst_slices, out_group_size);
192
193 int counter = 0;
194 for (int j = 0; j < 4; ++j) {
195 for (int y = 0; y < weights.shape.h; ++y) {
196 for (int x = 0; x < weights.shape.w; ++x) {
197 for (int s = 0; s < src_slices; ++s) {
198 for (int d = 0; d < dst_groups; ++d) {
199 for (int d_group = 0; d_group < out_group_size; ++d_group) {
200 T filter;
201 for (int i = 0; i < 4; ++i) {
202 const int s_ch = s * 4 + j;
203 const int d_ch = (d * out_group_size + d_group) * 4 + i;
204 if (s_ch < weights.shape.i && d_ch < weights.shape.o) {
205 const int f_index =
206 weights.shape.LinearIndex({d_ch, y, x, s_ch});
207 filter[i] = weights.data[f_index];
208 } else {
209 filter[i] = 0.0f;
210 }
211 }
212 dst[counter++] = filter;
213 }
214 }
215 }
216 }
217 }
218 }
219 }
220
221 template <DataType S, typename T>
RearrangeWeightsToI4DHWIOOGroupO4(const tflite::gpu::Tensor<OHWDI,S> & weights,int out_group_size,absl::Span<T> dst)222 void RearrangeWeightsToI4DHWIOOGroupO4(
223 const tflite::gpu::Tensor<OHWDI, S>& weights, int out_group_size,
224 absl::Span<T> dst) {
225 const int dst_slices = DivideRoundUp(weights.shape.o, 4);
226 const int src_slices = DivideRoundUp(weights.shape.i, 4);
227 const int dst_groups = DivideRoundUp(dst_slices, out_group_size);
228
229 int counter = 0;
230 for (int j = 0; j < 4; ++j) {
231 for (int z = 0; z < weights.shape.d; ++z) {
232 for (int y = 0; y < weights.shape.h; ++y) {
233 for (int x = 0; x < weights.shape.w; ++x) {
234 for (int s = 0; s < src_slices; ++s) {
235 for (int d = 0; d < dst_groups; ++d) {
236 for (int d_group = 0; d_group < out_group_size; ++d_group) {
237 T filter;
238 for (int i = 0; i < 4; ++i) {
239 const int s_ch = s * 4 + j;
240 const int d_ch = (d * out_group_size + d_group) * 4 + i;
241 if (s_ch < weights.shape.i && d_ch < weights.shape.o) {
242 const int f_index =
243 weights.shape.LinearIndex({d_ch, y, x, z, s_ch});
244 filter[i] = weights.data[f_index];
245 } else {
246 filter[i] = 0.0f;
247 }
248 }
249 dst[counter++] = filter;
250 }
251 }
252 }
253 }
254 }
255 }
256 }
257 }
258
259 template <DataType S, typename T>
RearrangeWeightsToO4HWIOOGroupI4(const tflite::gpu::Tensor<OHWI,S> & weights,int out_group_size,absl::Span<T> dst)260 void RearrangeWeightsToO4HWIOOGroupI4(
261 const tflite::gpu::Tensor<OHWI, S>& weights, int out_group_size,
262 absl::Span<T> dst) {
263 const int dst_slices = DivideRoundUp(weights.shape.o, 4);
264 const int src_slices = DivideRoundUp(weights.shape.i, 4);
265 const int dst_groups = DivideRoundUp(dst_slices, out_group_size);
266
267 int counter = 0;
268 for (int j = 0; j < 4; ++j) {
269 for (int y = 0; y < weights.shape.h; ++y) {
270 for (int x = 0; x < weights.shape.w; ++x) {
271 for (int s = 0; s < src_slices; ++s) {
272 for (int d = 0; d < dst_groups; ++d) {
273 for (int d_group = 0; d_group < out_group_size; ++d_group) {
274 T filter;
275 for (int i = 0; i < 4; ++i) {
276 const int s_ch = s * 4 + i;
277 const int d_ch = (d * out_group_size + d_group) * 4 + j;
278 if (s_ch < weights.shape.i && d_ch < weights.shape.o) {
279 const int f_index =
280 weights.shape.LinearIndex({d_ch, y, x, s_ch});
281 filter[i] = weights.data[f_index];
282 } else {
283 filter[i] = 0.0f;
284 }
285 }
286 dst[counter++] = filter;
287 }
288 }
289 }
290 }
291 }
292 }
293 }
294
295 template <DataType S, typename T>
RearrangeWeightsToO4DHWIOOGroupI4(const tflite::gpu::Tensor<OHWDI,S> & weights,int out_group_size,absl::Span<T> dst)296 void RearrangeWeightsToO4DHWIOOGroupI4(
297 const tflite::gpu::Tensor<OHWDI, S>& weights, int out_group_size,
298 absl::Span<T> dst) {
299 const int dst_slices = DivideRoundUp(weights.shape.o, 4);
300 const int src_slices = DivideRoundUp(weights.shape.i, 4);
301 const int dst_groups = DivideRoundUp(dst_slices, out_group_size);
302
303 int counter = 0;
304 for (int j = 0; j < 4; ++j) {
305 for (int z = 0; z < weights.shape.d; ++z) {
306 for (int y = 0; y < weights.shape.h; ++y) {
307 for (int x = 0; x < weights.shape.w; ++x) {
308 for (int s = 0; s < src_slices; ++s) {
309 for (int d = 0; d < dst_groups; ++d) {
310 for (int d_group = 0; d_group < out_group_size; ++d_group) {
311 T filter;
312 for (int i = 0; i < 4; ++i) {
313 const int s_ch = s * 4 + i;
314 const int d_ch = (d * out_group_size + d_group) * 4 + j;
315 if (s_ch < weights.shape.i && d_ch < weights.shape.o) {
316 const int f_index =
317 weights.shape.LinearIndex({d_ch, y, x, z, s_ch});
318 filter[i] = weights.data[f_index];
319 } else {
320 filter[i] = 0.0f;
321 }
322 }
323 dst[counter++] = filter;
324 }
325 }
326 }
327 }
328 }
329 }
330 }
331 }
332
333 template <DataType S, typename T>
RearrangeWeightsToOICustomSpatialI4O4(const tflite::gpu::Tensor<OHWI,S> & weights,const std::vector<int> & spatial_remap,absl::Span<T> dst)334 void RearrangeWeightsToOICustomSpatialI4O4(
335 const tflite::gpu::Tensor<OHWI, S>& weights,
336 const std::vector<int>& spatial_remap, absl::Span<T> dst) {
337 const int dst_slices = DivideRoundUp(weights.shape.o, 4);
338 const int src_slices = DivideRoundUp(weights.shape.i, 4);
339
340 int counter = 0;
341 for (int d = 0; d < dst_slices; ++d) {
342 for (int s = 0; s < src_slices; ++s) {
343 for (int y = 0; y < weights.shape.h; ++y) {
344 for (int x = 0; x < weights.shape.w; ++x) {
345 const int kernel_index = spatial_remap[y * weights.shape.w + x];
346 const int kernel_index_x = kernel_index % weights.shape.w;
347 const int kernel_index_y = kernel_index / weights.shape.w;
348 for (int i = 0; i < 4; ++i) {
349 T filter;
350 for (int j = 0; j < 4; ++j) {
351 const int s_ch = s * 4 + i;
352 const int d_ch = d * 4 + j;
353 if (s_ch < weights.shape.i && d_ch < weights.shape.o) {
354 const int f_index = weights.shape.LinearIndex(
355 {d_ch, kernel_index_y, kernel_index_x, s_ch});
356 filter[j] = weights.data[f_index];
357 } else {
358 filter[j] = 0.0f;
359 }
360 }
361 dst[counter++] = filter;
362 }
363 }
364 }
365 }
366 }
367 }
368
369 template <DataType S, typename T>
RearrangeWeightsToOICustomSpatialI4O4(const tflite::gpu::Tensor<OHWDI,S> & weights,const std::vector<int> & spatial_remap,absl::Span<T> dst)370 void RearrangeWeightsToOICustomSpatialI4O4(
371 const tflite::gpu::Tensor<OHWDI, S>& weights,
372 const std::vector<int>& spatial_remap, absl::Span<T> dst) {
373 const int dst_slices = DivideRoundUp(weights.shape.o, 4);
374 const int src_slices = DivideRoundUp(weights.shape.i, 4);
375
376 int counter = 0;
377 for (int d = 0; d < dst_slices; ++d) {
378 for (int s = 0; s < src_slices; ++s) {
379 for (int z = 0; z < weights.shape.d; ++z) {
380 for (int y = 0; y < weights.shape.h; ++y) {
381 for (int x = 0; x < weights.shape.w; ++x) {
382 int kernel_index =
383 spatial_remap[(z * weights.shape.h + y) * weights.shape.w + x];
384 const int kernel_index_x = kernel_index % weights.shape.w;
385 kernel_index /= weights.shape.w;
386 const int kernel_index_y = kernel_index % weights.shape.h;
387 const int kernel_index_z = kernel_index / weights.shape.h;
388 for (int i = 0; i < 4; ++i) {
389 T filter;
390 for (int j = 0; j < 4; ++j) {
391 const int s_ch = s * 4 + i;
392 const int d_ch = d * 4 + j;
393 if (s_ch < weights.shape.i && d_ch < weights.shape.o) {
394 const int f_index = weights.shape.LinearIndex(
395 {d_ch, kernel_index_y, kernel_index_x, kernel_index_z,
396 s_ch});
397 filter[j] = weights.data[f_index];
398 } else {
399 filter[j] = 0.0f;
400 }
401 }
402 dst[counter++] = filter;
403 }
404 }
405 }
406 }
407 }
408 }
409 }
410
411 template <DataType S, typename T>
RearrangeWeightsToOICustomSpatialO4I4(const tflite::gpu::Tensor<OHWI,S> & weights,const std::vector<int> & spatial_remap,absl::Span<T> dst)412 void RearrangeWeightsToOICustomSpatialO4I4(
413 const tflite::gpu::Tensor<OHWI, S>& weights,
414 const std::vector<int>& spatial_remap, absl::Span<T> dst) {
415 const int dst_slices = DivideRoundUp(weights.shape.o, 4);
416 const int src_slices = DivideRoundUp(weights.shape.i, 4);
417
418 int counter = 0;
419 for (int d = 0; d < dst_slices; ++d) {
420 for (int s = 0; s < src_slices; ++s) {
421 for (int y = 0; y < weights.shape.h; ++y) {
422 for (int x = 0; x < weights.shape.w; ++x) {
423 const int kernel_index = spatial_remap[y * weights.shape.w + x];
424 const int kernel_index_x = kernel_index % weights.shape.w;
425 const int kernel_index_y = kernel_index / weights.shape.w;
426 for (int i = 0; i < 4; ++i) {
427 T filter;
428 for (int j = 0; j < 4; ++j) {
429 const int s_ch = s * 4 + j;
430 const int d_ch = d * 4 + i;
431 if (s_ch < weights.shape.i && d_ch < weights.shape.o) {
432 const int f_index = weights.shape.LinearIndex(
433 {d_ch, kernel_index_y, kernel_index_x, s_ch});
434 filter[j] = weights.data[f_index];
435 } else {
436 filter[j] = 0.0f;
437 }
438 }
439 dst[counter++] = filter;
440 }
441 }
442 }
443 }
444 }
445 }
446
447 template <DataType S, typename T>
RearrangeWeightsToOICustomSpatialO4I4(const tflite::gpu::Tensor<OHWDI,S> & weights,const std::vector<int> & spatial_remap,absl::Span<T> dst)448 void RearrangeWeightsToOICustomSpatialO4I4(
449 const tflite::gpu::Tensor<OHWDI, S>& weights,
450 const std::vector<int>& spatial_remap, absl::Span<T> dst) {
451 const int dst_slices = DivideRoundUp(weights.shape.o, 4);
452 const int src_slices = DivideRoundUp(weights.shape.i, 4);
453
454 int counter = 0;
455 for (int d = 0; d < dst_slices; ++d) {
456 for (int s = 0; s < src_slices; ++s) {
457 for (int z = 0; z < weights.shape.d; ++z) {
458 for (int y = 0; y < weights.shape.h; ++y) {
459 for (int x = 0; x < weights.shape.w; ++x) {
460 int kernel_index =
461 spatial_remap[(z * weights.shape.h + y) * weights.shape.w + x];
462 const int kernel_index_x = kernel_index % weights.shape.w;
463 kernel_index /= weights.shape.w;
464 const int kernel_index_y = kernel_index % weights.shape.h;
465 const int kernel_index_z = kernel_index / weights.shape.h;
466 for (int i = 0; i < 4; ++i) {
467 T filter;
468 for (int j = 0; j < 4; ++j) {
469 const int s_ch = s * 4 + j;
470 const int d_ch = d * 4 + i;
471 if (s_ch < weights.shape.i && d_ch < weights.shape.o) {
472 const int f_index = weights.shape.LinearIndex(
473 {d_ch, kernel_index_y, kernel_index_x, kernel_index_z,
474 s_ch});
475 filter[j] = weights.data[f_index];
476 } else {
477 filter[j] = 0.0f;
478 }
479 }
480 dst[counter++] = filter;
481 }
482 }
483 }
484 }
485 }
486 }
487 }
488
489 uint GetTotalElementsCountForLayout(const WeightsDescription& weight_desc,
490 const OHWI& shape);
491 uint GetTotalElementsCountForLayout(const WeightsDescription& weight_desc,
492 const OHWDI& shape);
493
494 // Applicable to:
495 // k2DX4I4YIsSpatialIAndXIsOOGroupO4
496 // k2DX4O4YIsSpatialIAndXIsOOGroupI4
497 uint2 Get2dResourceSize(const WeightsDescription& weight_desc,
498 const OHWI& shape);
499 // Applicable to:
500 // k2DX4I4YIsSpatialIAndXIsOOGroupO4
501 // k2DX4O4YIsSpatialIAndXIsOOGroupI4
502 uint2 Get2dResourceSize(const WeightsDescription& weight_desc,
503 const OHWDI& shape);
504
505 void RearrangeWeights(
506 const tflite::gpu::Tensor<OHWI, DataType::FLOAT32>& weights,
507 const WeightsDescription& dst_weight_desc, absl::Span<uint8_t> dst);
508
509 void RearrangeWeights(
510 const tflite::gpu::Tensor<OHWDI, DataType::FLOAT32>& weights,
511 const WeightsDescription& dst_weight_desc, absl::Span<uint8_t> dst);
512
513 } // namespace gpu
514 } // namespace tflite
515
516 #endif // TENSORFLOW_LITE_DELEGATES_GPU_COMMON_TASK_WEIGHTS_CONVERSION_H_
517