1 // Copyright 2022 Google LLC
2 //
3 // This source code is licensed under the BSD-style license found in the
4 // LICENSE file in the root directory of this source tree.
5
6 #include <algorithm>
7 #include <numeric>
8
9 #include <xnnpack/pack.h>
10 #include <xnnpack/aligned-allocator.h>
11
12 #include <gtest/gtest.h>
13 #include <fp16.h>
14
TEST(PACK_QU8_DWCONV_GHW_W,primary_tile_eq_kernel_size)15 TEST(PACK_QU8_DWCONV_GHW_W, primary_tile_eq_kernel_size) {
16 size_t primary_tile = 3;
17 size_t h = 3;
18 size_t w = 1;
19 size_t c = 2;
20 size_t cr = 2;
21
22 std::vector<int32_t> b(c);
23 std::iota(b.begin(), b.end(), 0); // b = [0, 1]
24 std::vector<uint8_t> k(c * h * w); // k = [2, 3, 4, 5, 6, 7]
25 std::iota(k.begin(), k.end(), b.size());
26
27 std::vector<uint8_t> packed_weights(((primary_tile + sizeof(int32_t)/sizeof(uint8_t)) * round_up_po2(c, cr)));
28
29 xnn_qu8_packing_params params = {
30 .input_zero_point = 127,
31 .kernel_zero_point = 127,
32 };
33 xnn_pack_qu8_dwconv_ghw_w(
34 primary_tile,
35 h,
36 w,
37 c,
38 cr,
39 k.data(),
40 b.data(),
41 packed_weights.data(),
42 0,
43 ¶ms);
44
45 const int32_t bias_offset = h * w * params.input_zero_point * params.kernel_zero_point;
46 ASSERT_EQ(bias_offset, 48387);
47 std::vector<uint8_t> expected = {
48 // bias first
49 // 48387 + 0 - (2 + 3 + 4) * 127 = 47,244 = 0xB88C
50 0x8C, 0xB8, 0, 0,
51 // 48387 + 1 - (5 + 6 + 7) * 127 = 46,102 = 0xB416
52 0x16, 0xB4, 0, 0,
53 // then weights, channels first
54 2, 5,
55 3, 6,
56 4, 7,
57 };
58 ASSERT_EQ(expected, packed_weights);
59 }
60
TEST(PACK_QU8_DWCONV_GHW_W,primary_tile_eq_kernel_size_channels_gt_cr)61 TEST(PACK_QU8_DWCONV_GHW_W, primary_tile_eq_kernel_size_channels_gt_cr) {
62 size_t primary_tile = 3;
63 size_t h = 3;
64 size_t w = 1;
65 size_t c = 5;
66 size_t cr = 2;
67
68 std::vector<int32_t> b(c);
69 std::iota(b.begin(), b.end(), 0); // b = [0, 1, 2, 3, 4]
70 std::vector<uint8_t> k(c * h * w); // k = [
71 // 5, 6, 7,
72 // 8, 9, 10,
73 // 11, 12, 13,
74 // 14, 15, 16,
75 // 17, 18, 19]
76 std::iota(k.begin(), k.end(), b.size());
77 std::vector<uint8_t> packed_weights(((primary_tile + sizeof(int32_t)/sizeof(uint8_t)) * round_up_po2(c, cr)));
78
79 xnn_qu8_packing_params params = {
80 .input_zero_point = 127,
81 .kernel_zero_point = 127,
82 };
83 xnn_pack_qu8_dwconv_ghw_w(
84 primary_tile,
85 h,
86 w,
87 c,
88 cr,
89 k.data(),
90 b.data(),
91 packed_weights.data(),
92 0,
93 ¶ms);
94
95 const int32_t bias_offset = h * w * params.input_zero_point * params.kernel_zero_point;
96 ASSERT_EQ(bias_offset, 48387);
97 std::vector<uint8_t> expected = {
98 // cr blocks
99 // bias first (cr == 2 of them)
100 // 48387 + 0 - (5 + 6 + 7) * 127 = 46,101 = 0xB415
101 0x15, 0xB4, 0, 0,
102 // 48387 + 1 - (8 + 9 + 10) * 127 = 44,959 = 0xAF9F
103 0x9F, 0xAF, 0, 0,
104 // then weights, channels first
105 5, 8, 6, 9, 7, 10,
106 // bias again
107 // 48387 + 2 - (11 + 12 + 13) * 127 = 43,817 = 0xAB29
108 0x29, 0xAB, 0, 0,
109 // 48387 + 3 - (14 + 15 + 16) * 127 = 42,675 = 0xA6B3
110 0xB3, 0xA6, 0, 0,
111 // then weights, channels first
112 11, 14, 12, 15, 13, 16,
113 // bias again
114 // 48387 + 4 - (17 + 18 + 19) * 127 = 41,533 = 0xA23D
115 0x3D, 0xA2, 0, 0,
116 0, 0, 0, 0,
117 // then weights, channels first
118 17, 0, 18, 0, 19, 0,
119 };
120 ASSERT_EQ(expected, packed_weights);
121 }
122
TEST(PACK_QU8_DWCONV_GHW_W,primary_tile_gt_kernel_size)123 TEST(PACK_QU8_DWCONV_GHW_W, primary_tile_gt_kernel_size) {
124 size_t primary_tile = 9;
125 size_t h = 2;
126 size_t w = 2;
127 size_t c = 2;
128 size_t cr = 2;
129
130 std::vector<int32_t> b(c);
131 std::iota(b.begin(), b.end(), 0); // b = [0, 1]
132 std::vector<uint8_t> k(c * h * w); // k = [
133 // 2, 3,
134 // 4, 5,
135 // 6, 7,
136 // 8, 9]
137 std::iota(k.begin(), k.end(), b.size());
138 std::vector<uint8_t> packed_weights(((primary_tile + sizeof(int32_t)/sizeof(uint8_t)) * round_up_po2(c, cr)));
139
140 xnn_qu8_packing_params params = {
141 .input_zero_point = 127,
142 .kernel_zero_point = 127,
143 };
144 xnn_pack_qu8_dwconv_ghw_w(
145 primary_tile,
146 h,
147 w,
148 c,
149 cr,
150 k.data(),
151 b.data(),
152 packed_weights.data(),
153 0,
154 ¶ms);
155
156 const int32_t bias_offset = h * w * params.input_zero_point * params.kernel_zero_point;
157 ASSERT_EQ(bias_offset, 64516);
158 std::vector<uint8_t> expected = {
159 // bias first (cr == 2 of them)
160 // 64516 + 0 - (2 + 3 + 4 + 5) * 127 = 62,738 = 0xF512
161 0x12, 0xF5, 0, 0,
162 // 64516 + 1 - (6 + 7 + 8 + 9) * 127 = 60,707 = 0xED23
163 0x23, 0xED, 0, 0,
164 // then weights, channels first
165 2, 6,
166 // go down the columns first
167 4, 8, 3, 7, 5, 9,
168 // followed by 10 zeros to make up the difference with primary_tile
169 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
170 };
171 ASSERT_EQ(expected, packed_weights);
172 }
173
TEST(PACK_QU8_DWCONV_GHW_W,primary_tile_gt_kernel_size_channels_gt_cr)174 TEST(PACK_QU8_DWCONV_GHW_W, primary_tile_gt_kernel_size_channels_gt_cr) {
175 size_t primary_tile = 9;
176 size_t h = 2;
177 size_t w = 2;
178 size_t c = 5;
179 size_t cr = 2;
180
181 std::vector<int32_t> b(c);
182 std::iota(b.begin(), b.end(), 0); // b = [0, 1, 2, 3, 4]
183 std::vector<uint8_t> k(c * h * w); // k = [
184 // 5, 6,
185 // 7, 8,
186 // 9, 10,
187 // 11, 12,
188 // 13, 14,
189 // 15, 16,
190 // 17, 18,
191 // 19, 20,
192 // 21, 22,
193 // 23, 24]
194 std::iota(k.begin(), k.end(), b.size());
195 std::vector<uint8_t> packed_weights(((primary_tile + sizeof(int32_t)/sizeof(uint8_t)) * round_up_po2(c, cr)));
196
197 xnn_qu8_packing_params params = {
198 .input_zero_point = 127,
199 .kernel_zero_point = 127,
200 };
201 xnn_pack_qu8_dwconv_ghw_w(
202 primary_tile,
203 h,
204 w,
205 c,
206 cr,
207 k.data(),
208 b.data(),
209 packed_weights.data(),
210 0,
211 ¶ms);
212
213 const int32_t bias_offset = h * w * params.input_zero_point * params.kernel_zero_point;
214 ASSERT_EQ(bias_offset, 64516);
215 std::vector<uint8_t> expected = {
216 // bias first (cr == 2 of them)
217 // 64516 + 0 - (5 + 6 + 7 + 8) * 127 = 61,214 = 0xEF1E
218 0x1E, 0xEF, 0, 0,
219 // 64516 + 1 - (9 + 10 + 11 + 12) * 127 = 59,183 = 0xE72F
220 0x2F, 0xE7, 0, 0,
221 // then weights, channels first
222 5, 9,
223 // go down the columns first
224 7, 11,
225 6, 10,
226 8, 12,
227 // followed by 10 zeros to make up the difference with primary_tile
228 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
229 // bias first (cr == 2 of them)
230 // 64516 + 2 - (13 + 14 + 15 + 16) * 127 = 57,152 = 0xDF40
231 0x40, 0xDF, 0, 0,
232 // 64516 + 3 - (17 + 18 + 19 + 20) * 127 = 55,121 = 0xD751
233 0x51, 0xD7, 0, 0,
234 // then weights, channels first
235 13, 17, 15, 19, 14, 18, 16, 20,
236 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
237 // bias
238 // 64516 + 4 - (21 + 22 + 23 + 24) * 127 = 53,090 = 0xCF62
239 0x62, 0xCF, 0, 0,
240 0, 0, 0, 0,
241 // weights
242 21, 0, 23, 0, 22, 0, 24, 0,
243 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
244 };
245 ASSERT_EQ(expected, packed_weights);
246 }
247
TEST(PACK_QU8_DWCONV_HWG_W,primary_tile_eq_kernel_size)248 TEST(PACK_QU8_DWCONV_HWG_W, primary_tile_eq_kernel_size) {
249 size_t primary_tile = 3;
250 size_t h = 3;
251 size_t w = 1;
252 size_t c = 2;
253 size_t cr = 2;
254
255 std::vector<int32_t> b(c);
256 std::iota(b.begin(), b.end(), 0); // b = [0, 1]
257 std::vector<uint8_t> k(c * h * w); // k = [2, 3, 4, 5, 6, 7]
258 std::iota(k.begin(), k.end(), b.size());
259
260 std::vector<uint8_t> packed_weights(((primary_tile + sizeof(int32_t)/sizeof(uint8_t)) * round_up_po2(c, cr)));
261
262 xnn_qu8_packing_params params = {
263 .input_zero_point = 127,
264 .kernel_zero_point = 127,
265 };
266 xnn_pack_qu8_dwconv_hwg_w(
267 primary_tile,
268 h,
269 w,
270 c,
271 cr,
272 k.data(),
273 b.data(),
274 packed_weights.data(),
275 0,
276 ¶ms);
277
278 const int32_t bias_offset = h * w * params.input_zero_point * params.kernel_zero_point;
279 ASSERT_EQ(bias_offset, 48387);
280 std::vector<uint8_t> expected = {
281 // bias first
282 // 48387 + 0 - (2 + 4 + 6) * 127 = 46,863 = 0xB70F
283 0x0F, 0xB7, 0, 0,
284 // 48387 + 1 - (3 + 5 + 7) * 127 = 46,483 = 0xB593
285 0x93, 0xB5, 0, 0,
286 // then weights, channels first
287 2, 3,
288 4, 5,
289 6, 7,
290 };
291 ASSERT_EQ(expected, packed_weights);
292 }
293
TEST(PACK_QU8_DWCONV_HWG_W,primary_tile_eq_kernel_size_channels_gt_cr)294 TEST(PACK_QU8_DWCONV_HWG_W, primary_tile_eq_kernel_size_channels_gt_cr) {
295 size_t primary_tile = 3;
296 size_t h = 3;
297 size_t w = 1;
298 size_t c = 5;
299 size_t cr = 2;
300
301 std::vector<int32_t> b(c);
302 std::iota(b.begin(), b.end(), 0); // b = [0, 1, 2, 3, 4]
303 std::vector<uint8_t> k(c * h * w); // k = [
304 // 5, 6, 7, 8, 9,
305 // 10, 11, 12, 13, 14,
306 // 15, 16, 17, 18, 19]
307 std::iota(k.begin(), k.end(), b.size());
308 std::vector<uint8_t> packed_weights(((primary_tile + sizeof(int32_t)/sizeof(uint8_t)) * round_up_po2(c, cr)));
309
310 xnn_qu8_packing_params params = {
311 .input_zero_point = 127,
312 .kernel_zero_point = 127,
313 };
314 xnn_pack_qu8_dwconv_hwg_w(
315 primary_tile,
316 h,
317 w,
318 c,
319 cr,
320 k.data(),
321 b.data(),
322 packed_weights.data(),
323 0,
324 ¶ms);
325
326 const int32_t bias_offset = h * w * params.input_zero_point * params.kernel_zero_point;
327 ASSERT_EQ(bias_offset, 48387);
328 std::vector<uint8_t> expected = {
329 // cr blocks
330 // bias first (cr == 2 of them)
331 // 48387 + 0 - (5 + 10 + 15) * 127 = 44577 = 0xAE21
332 0x21, 0xAE, 0, 0,
333 // 48387 + 1 - (6 + 11 + 16) * 127 = 44197 = 0xACA5
334 0xA5, 0xAC, 0, 0,
335 // then weights, channels first
336 5, 6, 10, 11, 15, 16,
337 // bias again
338 // 48387 + 2 - (7, 12, 17) * 127 = 43817 = 0xAB29
339 0x29, 0xAB, 0, 0,
340 // 48387 + 3 - (8, 13, 18) * 127 = 43434 = 0xA9AD
341 0xAD, 0xA9, 0, 0,
342 // then weights, channels first
343 7, 8, 12, 13, 17, 18,
344 // bias again
345 // 48387 + 4 - (9, 14, 19) * 127 = 43053 = 0xA831
346 0x31, 0xA8, 0, 0,
347 0, 0, 0, 0,
348 // then weights, channels first
349 9, 0, 14, 0, 19, 0,
350 };
351 ASSERT_EQ(expected, packed_weights);
352 }
353
TEST(PACK_QU8_DWCONV_HWG_W,primary_tile_gt_kernel_size)354 TEST(PACK_QU8_DWCONV_HWG_W, primary_tile_gt_kernel_size) {
355 size_t primary_tile = 9;
356 size_t h = 2;
357 size_t w = 2;
358 size_t c = 2;
359 size_t cr = 2;
360
361 std::vector<int32_t> b(c);
362 std::iota(b.begin(), b.end(), 0); // b = [0, 1]
363 std::vector<uint8_t> k(c * h * w); // k = [
364 // 2, 3,
365 // 4, 5,
366 // 6, 7,
367 // 8, 9]
368 std::iota(k.begin(), k.end(), b.size());
369 std::vector<uint8_t> packed_weights(((primary_tile + sizeof(int32_t)/sizeof(uint8_t)) * round_up_po2(c, cr)));
370
371 xnn_qu8_packing_params params = {
372 .input_zero_point = 127,
373 .kernel_zero_point = 127,
374 };
375 xnn_pack_qu8_dwconv_hwg_w(
376 primary_tile,
377 h,
378 w,
379 c,
380 cr,
381 k.data(),
382 b.data(),
383 packed_weights.data(),
384 0,
385 ¶ms);
386
387 const int32_t bias_offset = h * w * params.input_zero_point * params.kernel_zero_point;
388 ASSERT_EQ(bias_offset, 64516);
389 std::vector<uint8_t> expected = {
390 // bias first (cr == 2 of them)
391 // 64516 + 0 - (2 + 4 + 6 + 8) * 127 = 61976 = 0xF218
392 0x18, 0xF2, 0, 0,
393 // 64516 + 1 - (3 + 5 + 7 + 9) * 127 = 61469 = 0xF01D
394 0x1D, 0xF0, 0, 0,
395 // then weights, channels first
396 2, 3,
397 // go down the columns first
398 6, 7, 4, 5, 8, 9,
399 // followed by 10 zeros to make up the difference with primary_tile
400 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
401 };
402 ASSERT_EQ(expected, packed_weights);
403 }
404
TEST(PACK_QU8_DWCONV_HWG_W,primary_tile_gt_kernel_size_channels_gt_cr)405 TEST(PACK_QU8_DWCONV_HWG_W, primary_tile_gt_kernel_size_channels_gt_cr) {
406 size_t primary_tile = 9;
407 size_t h = 2;
408 size_t w = 2;
409 size_t c = 5;
410 size_t cr = 2;
411
412 std::vector<int32_t> b(c);
413 std::iota(b.begin(), b.end(), 0); // b = [0, 1, 2, 3, 4]
414 std::vector<uint8_t> k(c * h * w); // k = [
415 // 5, 6, 7, 8, 9,
416 // 10, 11, 12, 13, 14,
417 // 15, 16, 17, 18, 19,
418 // 20, 21, 22, 23, 24]
419 std::iota(k.begin(), k.end(), b.size());
420 std::vector<uint8_t> packed_weights(((primary_tile + sizeof(int32_t)/sizeof(uint8_t)) * round_up_po2(c, cr)));
421
422 xnn_qu8_packing_params params = {
423 .input_zero_point = 127,
424 .kernel_zero_point = 127,
425 };
426 xnn_pack_qu8_dwconv_hwg_w(
427 primary_tile,
428 h,
429 w,
430 c,
431 cr,
432 k.data(),
433 b.data(),
434 packed_weights.data(),
435 0,
436 ¶ms);
437
438 const int32_t bias_offset = h * w * params.input_zero_point * params.kernel_zero_point;
439 ASSERT_EQ(bias_offset, 64516);
440 std::vector<uint8_t> expected = {
441 // bias first (cr == 2 of them)
442 // 64516 + 0 - (5 + 10 + 15 + 20) * 127 = 58166 = 0xE336
443 0x36, 0xE3, 0, 0,
444 // 64516 + 1 - (6 + 11 + 16 + 21) * 127 = 57659 = 0xE13B
445 0x3B, 0xE1, 0, 0,
446 // then weights, channels first
447 5, 6,
448 // go down the columns first
449 15, 16,
450 10, 11,
451 20, 21,
452 // followed by 10 zeros to make up the difference with primary_tile
453 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
454 // bias first (cr == 2 of them)
455 // 64516 + 2 - (7 + 12 + 17 + 22) * 127 = 57152 = 0xDF40
456 0x40, 0xDF, 0, 0,
457 // 64516 + 3 - (8 + 13 + 18 + 23) * 127 = 56645 = 0xDD45
458 0x45, 0xDD, 0, 0,
459 // then weights, channels first
460 7, 8, 17, 18, 12, 13, 22, 23,
461 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
462 // bias
463 // 64516 + 4 - (9 + 14 + 19 + 24) * 127 = 56138 = 0xDB4A
464 0x4A, 0xDB, 0, 0,
465 0, 0, 0, 0,
466 // weights
467 9, 0, 19, 0, 14, 0, 24, 0,
468 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
469 };
470 ASSERT_EQ(expected, packed_weights);
471 }
472
TEST(PACK_QS8_DWCONV_GHW_W,primary_tile_eq_kernel_size)473 TEST(PACK_QS8_DWCONV_GHW_W, primary_tile_eq_kernel_size) {
474 size_t primary_tile = 3;
475 size_t h = 3;
476 size_t w = 1;
477 size_t c = 2;
478 size_t cr = 2;
479
480 std::vector<int32_t> b(c);
481 std::iota(b.begin(), b.end(), 0); // b = [0, 1]
482 std::vector<int8_t> k(c * h * w); // k = [2, 3, 4, 5, 6, 7]
483 std::iota(k.begin(), k.end(), b.size());
484
485 std::vector<uint8_t> packed_weights(((primary_tile + sizeof(int32_t)/sizeof(uint8_t)) * round_up_po2(c, cr)));
486
487 xnn_qs8_packing_params params = {
488 .input_zero_point = 127,
489 };
490 xnn_pack_qs8_dwconv_ghw_w(
491 primary_tile,
492 h,
493 w,
494 c,
495 cr,
496 k.data(),
497 b.data(),
498 packed_weights.data(),
499 0,
500 ¶ms);
501
502 std::vector<uint8_t> expected = {
503 // bias first
504 // (2 + 3 + 4) * 127 = -1143 = 0xFFFFFB89
505 0x89, 0xFB, 0xFF, 0xFF,
506 // (5 + 6 + 7) * 127 = -2285 = 0xFFFFF713
507 0x13, 0xF7, 0xFF, 0xFF,
508 // then weights, channels first
509 2, 5,
510 3, 6,
511 4, 7,
512 };
513 ASSERT_EQ(expected, packed_weights);
514 }
515
TEST(PACK_QS8_DWCONV_GHW_W,primary_tile_eq_kernel_size_channels_gt_cr)516 TEST(PACK_QS8_DWCONV_GHW_W, primary_tile_eq_kernel_size_channels_gt_cr) {
517 size_t primary_tile = 3;
518 size_t h = 3;
519 size_t w = 1;
520 size_t c = 5;
521 size_t cr = 2;
522
523 std::vector<int32_t> b(c);
524 std::iota(b.begin(), b.end(), 0); // b = [0, 1, 2, 3, 4]
525 std::vector<int8_t> k(c * h * w); // k = [
526 // 5, 6, 7,
527 // 8, 9, 10,
528 // 11, 12, 13,
529 // 14, 15, 16,
530 // 17, 18, 19]
531 std::iota(k.begin(), k.end(), b.size());
532 std::vector<uint8_t> packed_weights(((primary_tile + sizeof(int32_t)/sizeof(uint8_t)) * round_up_po2(c, cr)));
533
534 xnn_qs8_packing_params params = {
535 .input_zero_point = 127,
536 };
537 xnn_pack_qs8_dwconv_ghw_w(
538 primary_tile,
539 h,
540 w,
541 c,
542 cr,
543 k.data(),
544 b.data(),
545 packed_weights.data(),
546 0,
547 ¶ms);
548
549 std::vector<uint8_t> expected = {
550 // cr blocks
551 // bias first (cr == 2 of them)
552 // 0 - (5 + 6 + 7) * 127 = -2286 = 0xFFFFF712
553 0x12, 0xF7, 0xFF, 0xFF,
554 // 1 - (8 + 9 + 10) * 127 = -3428 = 0xFFFFF29C
555 0x9C, 0xF2, 0xFF, 0xFF,
556 // then weights, channels first
557 5, 8, 6, 9, 7, 10,
558 // bias again
559 // 2 - (11 + 12 + 13) * 127 = -4570 = 0xFFFFEE26
560 0x26, 0xEE, 0xFF, 0xFF,
561 // 3 - (14 + 15 + 16) * 127 = -5712 = 0xFFFFE9B0
562 0xB0, 0xE9, 0xFF, 0xFF,
563 // then weights, channels first
564 11, 14, 12, 15, 13, 16,
565 // bias again
566 // 4 - (17 + 18 + 19) * 127 = -6854 = 0xFFFFE53A
567 0x3A, 0xE5, 0xFF, 0xFF,
568 0, 0, 0, 0,
569 // then weights, channels first
570 17, 0, 18, 0, 19, 0,
571 };
572 ASSERT_EQ(expected, packed_weights);
573 }
574
TEST(PACK_QS8_DWCONV_GHW_W,primary_tile_gt_kernel_size)575 TEST(PACK_QS8_DWCONV_GHW_W, primary_tile_gt_kernel_size) {
576 size_t primary_tile = 9;
577 size_t h = 2;
578 size_t w = 2;
579 size_t c = 2;
580 size_t cr = 2;
581
582 std::vector<int32_t> b(c);
583 std::iota(b.begin(), b.end(), 0); // b = [0, 1]
584 std::vector<int8_t> k(c * h * w); // k = [
585 // 2, 3,
586 // 4, 5,
587 // 6, 7,
588 // 8, 9]
589 std::iota(k.begin(), k.end(), b.size());
590 std::vector<uint8_t> packed_weights(((primary_tile + sizeof(int32_t)/sizeof(uint8_t)) * round_up_po2(c, cr)));
591
592 xnn_qs8_packing_params params = {
593 .input_zero_point = 127,
594 };
595 xnn_pack_qs8_dwconv_ghw_w(
596 primary_tile,
597 h,
598 w,
599 c,
600 cr,
601 k.data(),
602 b.data(),
603 packed_weights.data(),
604 0,
605 ¶ms);
606
607 std::vector<uint8_t> expected = {
608 // bias first (cr == 2 of them)
609 // 0 - (2 + 3 + 4 + 5) * 127 = -1778 = 0xFFFFF90E
610 0x0E, 0xF9, 0xFF, 0xFF,
611 // 1 - (6 + 7 + 8 + 9) * 127 = -3809 = 0xFFFFF11F
612 0x1F, 0xF1, 0xFF, 0xFF,
613 // then weights, channels first
614 2, 6,
615 // go down the columns first
616 4, 8, 3, 7, 5, 9,
617 // followed by 10 zeros to make up the difference with primary_tile
618 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
619 };
620 ASSERT_EQ(expected, packed_weights);
621 }
622
TEST(PACK_QS8_DWCONV_GHW_W,primary_tile_gt_kernel_size_channels_gt_cr)623 TEST(PACK_QS8_DWCONV_GHW_W, primary_tile_gt_kernel_size_channels_gt_cr) {
624 size_t primary_tile = 9;
625 size_t h = 2;
626 size_t w = 2;
627 size_t c = 5;
628 size_t cr = 2;
629
630 std::vector<int32_t> b(c);
631 std::iota(b.begin(), b.end(), 0); // b = [0, 1, 2, 3, 4]
632 std::vector<int8_t> k(c * h * w); // k = [
633 // 5, 6,
634 // 7, 8,
635 // 9, 10,
636 // 11, 12,
637 // 13, 14,
638 // 15, 16,
639 // 17, 18,
640 // 19, 20,
641 // 21, 22,
642 // 23, 24]
643 std::iota(k.begin(), k.end(), b.size());
644 std::vector<uint8_t> packed_weights(((primary_tile + sizeof(int32_t)/sizeof(uint8_t)) * round_up_po2(c, cr)));
645
646 xnn_qs8_packing_params params = {
647 .input_zero_point = 127,
648 };
649 xnn_pack_qs8_dwconv_ghw_w(
650 primary_tile,
651 h,
652 w,
653 c,
654 cr,
655 k.data(),
656 b.data(),
657 packed_weights.data(),
658 0,
659 ¶ms);
660
661 std::vector<uint8_t> expected = {
662 // bias first (cr == 2 of them)
663 // 0 - (5 + 6 + 7 + 8) * 127 = -3302 = 0xFFFFF31A
664 0x1A, 0xF3, 0xFF, 0xFF,
665 // 1 - (9 + 10 + 11 + 12) * 127 = -5333 = 0xFFFFEB2B
666 0x2B, 0xEB, 0xFF, 0xFF,
667 // then weights, channels first
668 5, 9,
669 // go down the columns first
670 7, 11,
671 6, 10,
672 8, 12,
673 // followed by 10 zeros to make up the difference with primary_tile
674 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
675 // bias first (cr == 2 of them)
676 // 2 - (13 + 14 + 15 + 16) * 127 = -7364 = 0xFFFFE33C
677 0x3C, 0xE3, 0xFF, 0xFF,
678 // 3 - (17 + 18 + 19 + 20) * 127 = -9395 = 0xFFFFDB4D
679 0x4D, 0xDB, 0xFF, 0xFF,
680 // then weights, channels first
681 13, 17, 15, 19, 14, 18, 16, 20,
682 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
683 // bias
684 // 4 - (21 + 22 + 23 + 24) * 127 = -11426 = 0xFFFFD35E
685 0x5E, 0xD3, 0xFF, 0xFF,
686 0, 0, 0, 0,
687 // weights
688 21, 0, 23, 0, 22, 0, 24, 0,
689 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
690 };
691 ASSERT_EQ(expected, packed_weights);
692 }
693
TEST(PACK_QS8_DWCONV_HWG_W,primary_tile_eq_kernel_size)694 TEST(PACK_QS8_DWCONV_HWG_W, primary_tile_eq_kernel_size) {
695 size_t primary_tile = 3;
696 size_t h = 3;
697 size_t w = 1;
698 size_t c = 2;
699 size_t cr = 2;
700
701 std::vector<int32_t> b(c);
702 std::iota(b.begin(), b.end(), 0); // b = [0, 1]
703 std::vector<int8_t> k(c * h * w); // k = [2, 3, 4, 5, 6, 7]
704 std::iota(k.begin(), k.end(), b.size());
705
706 std::vector<uint8_t> packed_weights(((primary_tile + sizeof(int32_t)/sizeof(uint8_t)) * round_up_po2(c, cr)));
707
708 xnn_qs8_packing_params params = {
709 .input_zero_point = 127,
710 };
711 xnn_pack_qs8_dwconv_hwg_w(
712 primary_tile,
713 h,
714 w,
715 c,
716 cr,
717 k.data(),
718 b.data(),
719 packed_weights.data(),
720 0,
721 ¶ms);
722
723 std::vector<uint8_t> expected = {
724 // bias first
725 // 0 - (2 + 4 + 6) * 127 = -1524 = 0xFFFFFA0C
726 0x0C, 0xFA, 0xFF, 0xFF,
727 // 1 - (3 + 5 + 7) * 127 = -1904 = 0xFFFFF890
728 0x90, 0xF8, 0xFF, 0xFF,
729 // then weights, channels first
730 2, 3,
731 4, 5,
732 6, 7,
733 };
734 ASSERT_EQ(expected, packed_weights);
735 }
736
TEST(PACK_QS8_DWCONV_HWG_W,primary_tile_eq_kernel_size_channels_gt_cr)737 TEST(PACK_QS8_DWCONV_HWG_W, primary_tile_eq_kernel_size_channels_gt_cr) {
738 size_t primary_tile = 3;
739 size_t h = 3;
740 size_t w = 1;
741 size_t c = 5;
742 size_t cr = 2;
743
744 std::vector<int32_t> b(c);
745 std::iota(b.begin(), b.end(), 0); // b = [0, 1, 2, 3, 4]
746 std::vector<int8_t> k(c * h * w); // k = [
747 // 5, 6, 7, 8, 9,
748 // 10, 11, 12, 13, 14,
749 // 15, 16, 17, 18, 19]
750 std::iota(k.begin(), k.end(), b.size());
751 std::vector<uint8_t> packed_weights(((primary_tile + sizeof(int32_t)/sizeof(uint8_t)) * round_up_po2(c, cr)));
752
753 xnn_qs8_packing_params params = {
754 .input_zero_point = 127,
755 };
756 xnn_pack_qs8_dwconv_hwg_w(
757 primary_tile,
758 h,
759 w,
760 c,
761 cr,
762 k.data(),
763 b.data(),
764 packed_weights.data(),
765 0,
766 ¶ms);
767
768 std::vector<uint8_t> expected = {
769 // cr blocks
770 // bias first (cr == 2 of them)
771 // 0 - (5 + 10 + 15) * 127 = -3810 = 0xFFFFF11E
772 0x1E, 0xF1, 0xFF, 0xFF,
773 // 1 - (6 + 11 + 16) * 127 = -4190 = 0xFFFFEFA2
774 0xA2, 0xEF, 0xFF, 0xFF,
775 // then weights, channels first
776 5, 6, 10, 11, 15, 16,
777 // bias again
778 // 2 - (7, 12, 17) * 127 = -45709 = 0xFFFFEE26
779 0x26, 0xEE, 0xFF, 0xFF,
780 // 3 - (8, 13, 18) * 127 = -4950 = 0xFFFFECAA
781 0xAA, 0xEC, 0xFF, 0xFF,
782 // then weights, channels first
783 7, 8, 12, 13, 17, 18,
784 // bias again
785 // 4 - (9, 14, 19) * 127 = -5330 = 0xFFFFEB2E
786 0x2E, 0xEB, 0xFF, 0xFF,
787 0, 0, 0, 0,
788 // then weights, channels first
789 9, 0, 14, 0, 19, 0,
790 };
791 ASSERT_EQ(expected, packed_weights);
792 }
793
TEST(PACK_QS8_DWCONV_HWG_W,primary_tile_gt_kernel_size)794 TEST(PACK_QS8_DWCONV_HWG_W, primary_tile_gt_kernel_size) {
795 size_t primary_tile = 9;
796 size_t h = 2;
797 size_t w = 2;
798 size_t c = 2;
799 size_t cr = 2;
800
801 std::vector<int32_t> b(c);
802 std::iota(b.begin(), b.end(), 0); // b = [0, 1]
803 std::vector<int8_t> k(c * h * w); // k = [
804 // 2, 3,
805 // 4, 5,
806 // 6, 7,
807 // 8, 9]
808 std::iota(k.begin(), k.end(), b.size());
809 std::vector<uint8_t> packed_weights(((primary_tile + sizeof(int32_t)/sizeof(uint8_t)) * round_up_po2(c, cr)));
810
811 xnn_qs8_packing_params params = {
812 .input_zero_point = 127,
813 };
814 xnn_pack_qs8_dwconv_hwg_w(
815 primary_tile,
816 h,
817 w,
818 c,
819 cr,
820 k.data(),
821 b.data(),
822 packed_weights.data(),
823 0,
824 ¶ms);
825
826 std::vector<uint8_t> expected = {
827 // bias first (cr == 2 of them)
828 // 0 - (2 + 4 + 6 + 8) * 127 = -2540 = 0xFFFFF614
829 0x14, 0xF6, 0xFF, 0xFF,
830 // 1 - (3 + 5 + 7 + 9) * 127 = -3047 = 0xFFFFF419
831 0x19, 0xF4, 0xFF, 0xFF,
832 // then weights, channels first
833 2, 3,
834 // go down the columns first
835 6, 7, 4, 5, 8, 9,
836 // followed by 10 zeros to make up the difference with primary_tile
837 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
838 };
839 ASSERT_EQ(expected, packed_weights);
840 }
841
TEST(PACK_QS8_DWCONV_HWG_W,primary_tile_gt_kernel_size_channels_gt_cr)842 TEST(PACK_QS8_DWCONV_HWG_W, primary_tile_gt_kernel_size_channels_gt_cr) {
843 size_t primary_tile = 9;
844 size_t h = 2;
845 size_t w = 2;
846 size_t c = 5;
847 size_t cr = 2;
848
849 std::vector<int32_t> b(c);
850 std::iota(b.begin(), b.end(), 0); // b = [0, 1, 2, 3, 4]
851 std::vector<int8_t> k(c * h * w); // k = [
852 // 5, 6, 7, 8, 9,
853 // 10, 11, 12, 13, 14,
854 // 15, 16, 17, 18, 19,
855 // 20, 21, 22, 23, 24]
856 std::iota(k.begin(), k.end(), b.size());
857 std::vector<uint8_t> packed_weights(((primary_tile + sizeof(int32_t)/sizeof(uint8_t)) * round_up_po2(c, cr)));
858
859 xnn_qs8_packing_params params = {
860 .input_zero_point = 127,
861 };
862 xnn_pack_qs8_dwconv_hwg_w(
863 primary_tile,
864 h,
865 w,
866 c,
867 cr,
868 k.data(),
869 b.data(),
870 packed_weights.data(),
871 0,
872 ¶ms);
873
874 std::vector<uint8_t> expected = {
875 // bias first (cr == 2 of them)
876 // 0 - (5 + 10 + 15 + 20) * 127 = -6350 = 0xFFFFE732
877 0x32, 0xE7, 0xFF, 0xFF,
878 // 1 - (6 + 11 + 16 + 21) * 127 = -6857 = 0xFFFFE537
879 0x37, 0xE5, 0xFF, 0xFF,
880 // then weights, channels first
881 5, 6,
882 // go down the columns first
883 15, 16,
884 10, 11,
885 20, 21,
886 // followed by 10 zeros to make up the difference with primary_tile
887 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
888 // bias first (cr == 2 of them)
889 // 2 - (7 + 12 + 17 + 22) * 127 = -7364 = 0xFFFFE33C
890 0x3C, 0xE3, 0xFF, 0xFF,
891 // 3 - (8 + 13 + 18 + 23) * 127 = -7871 = 0xFFFFE141
892 0x41, 0xE1, 0xFF, 0xFF,
893 // then weights, channels first
894 7, 8, 17, 18, 12, 13, 22, 23,
895 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
896 // bias
897 // 4 - (9 + 14 + 19 + 24) * 127 = -8378 = 0xFFFFDF46
898 0x46, 0xDF, 0xFF, 0xFF,
899 0, 0, 0, 0,
900 // weights
901 9, 0, 19, 0, 14, 0, 24, 0,
902 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
903 };
904 ASSERT_EQ(expected, packed_weights);
905 }
906
TEST(PACK_F16_DWCONV_GHW_W,primary_tile_eq_kernel_size)907 TEST(PACK_F16_DWCONV_GHW_W, primary_tile_eq_kernel_size) {
908 size_t primary_tile = 3;
909 size_t h = 3;
910 size_t w = 1;
911 size_t c = 2;
912 size_t cr = 2;
913
914 std::vector<uint16_t> b(c);
915 std::iota(b.begin(), b.end(), 0); // b = [0, 1]
916 std::vector<uint16_t> k(c * h * w); // k = [2, 3, 4, 5, 6, 7]
917 std::iota(k.begin(), k.end(), b.size());
918 std::vector<uint16_t> packed_weights(((primary_tile + 1) * round_up_po2(c, cr)));
919
920 xnn_pack_f16_dwconv_ghw_w(
921 primary_tile,
922 h,
923 w,
924 c,
925 cr,
926 k.data(),
927 b.data(),
928 packed_weights.data(),
929 0,
930 nullptr);
931
932 std::vector<uint16_t> expected = {
933 // bias first
934 0, 1,
935 // then weights, channels first
936 2, 5,
937 3, 6,
938 4, 7,
939 };
940 ASSERT_EQ(expected, packed_weights);
941 }
942
TEST(PACK_F16_DWCONV_GHW_W,primary_tile_eq_kernel_size_channels_gt_cr)943 TEST(PACK_F16_DWCONV_GHW_W, primary_tile_eq_kernel_size_channels_gt_cr) {
944 size_t primary_tile = 3;
945 size_t h = 3;
946 size_t w = 1;
947 size_t c = 5;
948 size_t cr = 2;
949
950 std::vector<uint16_t> b(c);
951 std::iota(b.begin(), b.end(), 0); // b = [0, 1, 2, 3, 4]
952 std::vector<uint16_t> k(c * h * w); // k = [
953 // 5, 6, 7,
954 // 8, 9, 10,
955 // 11, 12, 13,
956 // 14, 15, 16,
957 // 17, 18, 19]
958 std::iota(k.begin(), k.end(), b.size());
959 std::vector<uint16_t> packed_weights(((primary_tile + 1) * round_up_po2(c, cr)));
960
961 xnn_pack_f16_dwconv_ghw_w(
962 primary_tile,
963 h,
964 w,
965 c,
966 cr,
967 k.data(),
968 b.data(),
969 packed_weights.data(),
970 0,
971 nullptr);
972
973 std::vector<uint16_t> expected = {
974 // cr blocks
975 // bias first (cr == 2 of them)
976 0, 1,
977 // then weights, channels first
978 5, 8, 6, 9, 7, 10,
979 // bias again
980 2, 3,
981 // then weights, channels first
982 11, 14, 12, 15, 13, 16,
983 // bias again
984 4, 0,
985 // then weights, channels first
986 17, 0, 18, 0, 19, 0,
987 };
988 ASSERT_EQ(expected, packed_weights);
989 }
990
TEST(PACK_F16_DWCONV_GHW_W,primary_tile_gt_kernel_size)991 TEST(PACK_F16_DWCONV_GHW_W, primary_tile_gt_kernel_size) {
992 size_t primary_tile = 9;
993 size_t h = 2;
994 size_t w = 2;
995 size_t c = 2;
996 size_t cr = 2;
997
998 std::vector<uint16_t> b(c);
999 std::iota(b.begin(), b.end(), 0); // b = [0, 1]
1000 std::vector<uint16_t> k(c * h * w); // k = [
1001 // 2, 3,
1002 // 4, 5,
1003 // 6, 7,
1004 // 8, 9]
1005 std::iota(k.begin(), k.end(), b.size());
1006 std::vector<uint16_t> packed_weights(((primary_tile + 1) * round_up_po2(c, cr)));
1007
1008 xnn_pack_f16_dwconv_ghw_w(
1009 primary_tile,
1010 h,
1011 w,
1012 c,
1013 cr,
1014 k.data(),
1015 b.data(),
1016 packed_weights.data(),
1017 0,
1018 nullptr);
1019
1020 std::vector<uint16_t> expected = {
1021 // bias first (cr == 2 of them)
1022 0, 1,
1023 // then weights, channels first
1024 2, 6,
1025 // go down the columns first
1026 4, 8, 3, 7, 5, 9,
1027 // followed by 10 zeros to make up the difference with primary_tile
1028 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1029 };
1030 ASSERT_EQ(expected, packed_weights);
1031 }
1032
TEST(PACK_F16_DWCONV_GHW_W,primary_tile_gt_kernel_size_channels_gt_cr)1033 TEST(PACK_F16_DWCONV_GHW_W, primary_tile_gt_kernel_size_channels_gt_cr) {
1034 size_t primary_tile = 9;
1035 size_t h = 2;
1036 size_t w = 2;
1037 size_t c = 5;
1038 size_t cr = 2;
1039
1040 std::vector<uint16_t> b(c);
1041 std::iota(b.begin(), b.end(), 0); // b = [0, 1, 2, 3, 4]
1042 std::vector<uint16_t> k(c * h * w); // k = [
1043 // 5, 6,
1044 // 7, 8,
1045 // 9, 10,
1046 // 11, 12,
1047 // 13, 14,
1048 // 15, 16,
1049 // 17, 18,
1050 // 19, 20,
1051 // 21, 22,
1052 // 23, 24]
1053 std::iota(k.begin(), k.end(), b.size());
1054 std::vector<uint16_t> packed_weights(((primary_tile + 1) * round_up_po2(c, cr)));
1055
1056 xnn_pack_f16_dwconv_ghw_w(
1057 primary_tile,
1058 h,
1059 w,
1060 c,
1061 cr,
1062 k.data(),
1063 b.data(),
1064 packed_weights.data(),
1065 0,
1066 nullptr);
1067
1068 std::vector<uint16_t> expected = {
1069 // bias first (cr == 2 of them)
1070 0, 1,
1071 // then weights, channels first
1072 5, 9,
1073 // go down the columns first
1074 7, 11,
1075 6, 10,
1076 8, 12,
1077 // followed by 10 zeros to make up the difference with primary_tile
1078 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1079 // bias first (cr == 2 of them)
1080 2, 3,
1081 // then weights, channels first
1082 13, 17, 15, 19, 14, 18, 16, 20,
1083 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1084 // bias
1085 4, 0,
1086 // weights
1087 21, 0, 23, 0, 22, 0, 24, 0,
1088 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1089 };
1090 ASSERT_EQ(expected, packed_weights);
1091 }
1092
TEST(PACK_F16_DWCONV_HWG_W,primary_tile_eq_kernel_size)1093 TEST(PACK_F16_DWCONV_HWG_W, primary_tile_eq_kernel_size) {
1094 size_t primary_tile = 3;
1095 size_t h = 3;
1096 size_t w = 1;
1097 size_t c = 2;
1098 size_t cr = 2;
1099
1100 std::vector<uint16_t> b(c);
1101 std::iota(b.begin(), b.end(), 0); // b = [0, 1]
1102 std::vector<uint16_t> k(c * h * w); // k = [2, 3, 4, 5, 6, 7]
1103 std::iota(k.begin(), k.end(), b.size());
1104 std::vector<uint16_t> packed_weights(((primary_tile + 1) * round_up_po2(c, cr)));
1105
1106 xnn_pack_f16_dwconv_hwg_w(
1107 primary_tile,
1108 h,
1109 w,
1110 c,
1111 cr,
1112 k.data(),
1113 b.data(),
1114 packed_weights.data(),
1115 0,
1116 nullptr);
1117
1118 std::vector<uint16_t> expected = {
1119 // bias first
1120 0, 1,
1121 // then weights, channels first
1122 2, 3,
1123 4, 5,
1124 6, 7,
1125 };
1126 ASSERT_EQ(expected, packed_weights);
1127 }
1128
TEST(PACK_F16_DWCONV_HWG_W,primary_tile_eq_kernel_size_channels_gt_cr)1129 TEST(PACK_F16_DWCONV_HWG_W, primary_tile_eq_kernel_size_channels_gt_cr) {
1130 size_t primary_tile = 3;
1131 size_t h = 3;
1132 size_t w = 1;
1133 size_t c = 5;
1134 size_t cr = 2;
1135
1136 std::vector<uint16_t> b(c);
1137 std::iota(b.begin(), b.end(), 0); // b = [0, 1, 2, 3, 4]
1138 std::vector<uint16_t> k(c * h * w); // k = [
1139 // 5, 6, 7, 8, 9,
1140 // 10, 11, 12, 13, 14,
1141 // 15, 16, 17, 18, 19]
1142 std::iota(k.begin(), k.end(), b.size());
1143 std::vector<uint16_t> packed_weights(((primary_tile + 1) * round_up_po2(c, cr)));
1144
1145 xnn_pack_f16_dwconv_hwg_w(
1146 primary_tile,
1147 h,
1148 w,
1149 c,
1150 cr,
1151 k.data(),
1152 b.data(),
1153 packed_weights.data(),
1154 0,
1155 nullptr);
1156
1157 std::vector<uint16_t> expected = {
1158 // cr blocks
1159 // bias first (cr == 2 of them)
1160 0, 1,
1161 // then weights, channels first
1162 5, 6, 10, 11, 15, 16,
1163 // bias again
1164 2, 3,
1165 // then weights, channels first
1166 7, 8, 12, 13, 17, 18,
1167 // bias again
1168 4, 0,
1169 // then weights, channels first
1170 9, 0, 14, 0, 19, 0,
1171 };
1172 ASSERT_EQ(expected, packed_weights);
1173 }
1174
TEST(PACK_F16_DWCONV_HWG_W,primary_tile_gt_kernel_size)1175 TEST(PACK_F16_DWCONV_HWG_W, primary_tile_gt_kernel_size) {
1176 size_t primary_tile = 9;
1177 size_t h = 2;
1178 size_t w = 2;
1179 size_t c = 2;
1180 size_t cr = 2;
1181
1182 std::vector<uint16_t> b(c);
1183 std::iota(b.begin(), b.end(), 0); // b = [0, 1]
1184 std::vector<uint16_t> k(c * h * w); // k = [
1185 // 2, 3,
1186 // 4, 5,
1187 // 6, 7,
1188 // 8, 9]
1189 std::iota(k.begin(), k.end(), b.size());
1190 std::vector<uint16_t> packed_weights(((primary_tile + 1) * round_up_po2(c, cr)));
1191
1192 xnn_pack_f16_dwconv_hwg_w(
1193 primary_tile,
1194 h,
1195 w,
1196 c,
1197 cr,
1198 k.data(),
1199 b.data(),
1200 packed_weights.data(),
1201 0,
1202 nullptr);
1203
1204 std::vector<uint16_t> expected = {
1205 // bias first (cr == 2 of them)
1206 0, 1,
1207 // then weights, channels first
1208 2, 3,
1209 // go down the columns first
1210 6, 7, 4, 5, 8, 9,
1211 // followed by 10 zeros to make up the difference with primary_tile
1212 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1213 };
1214 ASSERT_EQ(expected, packed_weights);
1215 }
1216
TEST(PACK_F16_DWCONV_HWG_W,primary_tile_gt_kernel_size_channels_gt_cr)1217 TEST(PACK_F16_DWCONV_HWG_W, primary_tile_gt_kernel_size_channels_gt_cr) {
1218 size_t primary_tile = 9;
1219 size_t h = 2;
1220 size_t w = 2;
1221 size_t c = 5;
1222 size_t cr = 2;
1223
1224 std::vector<uint16_t> b(c);
1225 std::iota(b.begin(), b.end(), 0); // b = [0, 1, 2, 3, 4]
1226 std::vector<uint16_t> k(c * h * w); // k = [
1227 // 5, 6, 7, 8, 9,
1228 // 10, 11, 12, 13, 14,
1229 // 15, 16, 17, 18, 19,
1230 // 20, 21, 22, 23, 24]
1231 std::iota(k.begin(), k.end(), b.size());
1232 std::vector<uint16_t> packed_weights(((primary_tile + 1) * round_up_po2(c, cr)));
1233
1234 xnn_pack_f16_dwconv_hwg_w(
1235 primary_tile,
1236 h,
1237 w,
1238 c,
1239 cr,
1240 k.data(),
1241 b.data(),
1242 packed_weights.data(),
1243 0,
1244 nullptr);
1245
1246 std::vector<uint16_t> expected = {
1247 // bias first (cr == 2 of them)
1248 0, 1,
1249 // then weights, channels first
1250 5, 6,
1251 // go down the columns first
1252 15, 16,
1253 10, 11,
1254 20, 21,
1255 // followed by 10 zeros to make up the difference with primary_tile
1256 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1257 // bias first (cr == 2 of them)
1258 2, 3,
1259 // then weights, channels first
1260 7, 8, 17, 18, 12, 13, 22, 23,
1261 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1262 // bias
1263 4, 0,
1264 // weights
1265 9, 0, 19, 0, 14, 0, 24, 0,
1266 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1267 };
1268 ASSERT_EQ(expected, packed_weights);
1269 }
1270
TEST(PACK_F32_DWCONV_GHW_W,primary_tile_eq_kernel_size)1271 TEST(PACK_F32_DWCONV_GHW_W, primary_tile_eq_kernel_size) {
1272 size_t primary_tile = 3;
1273 size_t h = 3;
1274 size_t w = 1;
1275 size_t c = 2;
1276 size_t cr = 2;
1277
1278 std::vector<float> b(c);
1279 std::iota(b.begin(), b.end(), 0); // b = [0, 1]
1280 std::vector<float> k(c * h * w); // k = [2, 3, 4, 5, 6, 7]
1281 std::iota(k.begin(), k.end(), b.size());
1282 std::vector<float> packed_weights(((primary_tile + 1) * round_up_po2(c, cr)));
1283
1284 xnn_pack_f32_dwconv_ghw_w(
1285 primary_tile,
1286 h,
1287 w,
1288 c,
1289 cr,
1290 k.data(),
1291 b.data(),
1292 packed_weights.data(),
1293 0,
1294 nullptr);
1295
1296 std::vector<float> expected = {
1297 // bias first
1298 0.0f, 1.0f,
1299 // then weights, channels first
1300 2.0f, 5.0f,
1301 3.0f, 6.0f,
1302 4.0f, 7.0f,
1303 };
1304 ASSERT_EQ(expected, packed_weights);
1305 }
1306
TEST(PACK_F32_DWCONV_GHW_W,primary_tile_eq_kernel_size_channels_gt_cr)1307 TEST(PACK_F32_DWCONV_GHW_W, primary_tile_eq_kernel_size_channels_gt_cr) {
1308 size_t primary_tile = 3;
1309 size_t h = 3;
1310 size_t w = 1;
1311 size_t c = 5;
1312 size_t cr = 2;
1313
1314 std::vector<float> b(c);
1315 std::iota(b.begin(), b.end(), 0); // b = [0, 1, 2, 3, 4]
1316 std::vector<float> k(c * h * w); // k = [
1317 // 5, 6, 7,
1318 // 8, 9, 10,
1319 // 11, 12, 13,
1320 // 14, 15, 16,
1321 // 17, 18, 19]
1322 std::iota(k.begin(), k.end(), b.size());
1323 std::vector<float> packed_weights(((primary_tile + 1) * round_up_po2(c, cr)));
1324
1325 xnn_pack_f32_dwconv_ghw_w(
1326 primary_tile,
1327 h,
1328 w,
1329 c,
1330 cr,
1331 k.data(),
1332 b.data(),
1333 packed_weights.data(),
1334 0,
1335 nullptr);
1336
1337 std::vector<float> expected = {
1338 // cr blocks
1339 // bias first (cr == 2 of them)
1340 0.0f, 1.0f,
1341 // then weights, channels first
1342 5.0f, 8.0f, 6.0f, 9.0f, 7.0f, 10.0f,
1343 // bias again
1344 2.0f, 3.0f,
1345 // then weights, channels first
1346 11.0f, 14.0f, 12.0f, 15.0f, 13.0f, 16.0f,
1347 // bias again
1348 4.0f, 0.0f,
1349 // then weights, channels first
1350 17.0f, 0.0f, 18.0f, 0.0f, 19.0f, 0.0f,
1351 };
1352 ASSERT_EQ(expected, packed_weights);
1353 }
1354
TEST(PACK_F32_DWCONV_GHW_W,primary_tile_gt_kernel_size)1355 TEST(PACK_F32_DWCONV_GHW_W, primary_tile_gt_kernel_size) {
1356 size_t primary_tile = 9;
1357 size_t h = 2;
1358 size_t w = 2;
1359 size_t c = 2;
1360 size_t cr = 2;
1361
1362 std::vector<float> b(c);
1363 std::iota(b.begin(), b.end(), 0); // b = [0, 1]
1364 std::vector<float> k(c * h * w); // k = [
1365 // 2, 3,
1366 // 4, 5,
1367 // 6, 7,
1368 // 8, 9]
1369 std::iota(k.begin(), k.end(), b.size());
1370 std::vector<float> packed_weights(((primary_tile + 1) * round_up_po2(c, cr)));
1371
1372 xnn_pack_f32_dwconv_ghw_w(
1373 primary_tile,
1374 h,
1375 w,
1376 c,
1377 cr,
1378 k.data(),
1379 b.data(),
1380 packed_weights.data(),
1381 0,
1382 nullptr);
1383
1384 std::vector<float> expected = {
1385 // bias first (cr == 2 of them)
1386 0.0f, 1.0f,
1387 // then weights, channels first
1388 2.0f, 6.0f,
1389 // go down the columns first
1390 4.0f, 8.0f, 3.0f, 7.0f, 5.0f, 9.0f,
1391 // followed by 10 zeros to make up the difference with primary_tile
1392 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f,
1393 };
1394 ASSERT_EQ(expected, packed_weights);
1395 }
1396
TEST(PACK_F32_DWCONV_GHW_W,primary_tile_gt_kernel_size_channels_gt_cr)1397 TEST(PACK_F32_DWCONV_GHW_W, primary_tile_gt_kernel_size_channels_gt_cr) {
1398 size_t primary_tile = 9;
1399 size_t h = 2;
1400 size_t w = 2;
1401 size_t c = 5;
1402 size_t cr = 2;
1403
1404 std::vector<float> b(c);
1405 std::iota(b.begin(), b.end(), 0); // b = [0, 1, 2, 3, 4]
1406 std::vector<float> k(c * h * w); // k = [
1407 // 5, 6,
1408 // 7, 8,
1409 // 9, 10,
1410 // 11, 12,
1411 // 13, 14,
1412 // 15, 16,
1413 // 17, 18,
1414 // 19, 20,
1415 // 21, 22,
1416 // 23, 24]
1417 std::iota(k.begin(), k.end(), b.size());
1418 std::vector<float> packed_weights(((primary_tile + 1) * round_up_po2(c, cr)));
1419
1420 xnn_pack_f32_dwconv_ghw_w(
1421 primary_tile,
1422 h,
1423 w,
1424 c,
1425 cr,
1426 k.data(),
1427 b.data(),
1428 packed_weights.data(),
1429 0,
1430 nullptr);
1431
1432 std::vector<float> expected = {
1433 // bias first (cr == 2 of them)
1434 0.0f, 1.0f,
1435 // then weights, channels first
1436 5.0f, 9.0f,
1437 // go down the columns first
1438 7.0f, 11.0f,
1439 6.0f, 10.0f,
1440 8.0f, 12.0f,
1441 // followed by 10 zeros to make up the difference with primary_tile
1442 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f,
1443 // bias first (cr == 2 of them)
1444 2.0f, 3.0f,
1445 // then weights, channels first
1446 13.0f, 17.0f, 15.0f, 19.0f, 14.0f, 18.0f, 16.0f, 20.0f,
1447 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f,
1448 // bias
1449 4.0f, 0.0f,
1450 // weights
1451 21.0f, 0.0f, 23.0f, 0.0f, 22.0f, 0.0f, 24.0f, 0.0f,
1452 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f,
1453 };
1454 ASSERT_EQ(expected, packed_weights);
1455 }
1456
TEST(PACK_F32_DWCONV_HWG_W,primary_tile_eq_kernel_size)1457 TEST(PACK_F32_DWCONV_HWG_W, primary_tile_eq_kernel_size) {
1458 size_t primary_tile = 3;
1459 size_t h = 3;
1460 size_t w = 1;
1461 size_t c = 2;
1462 size_t cr = 2;
1463
1464 std::vector<float> b(c);
1465 std::iota(b.begin(), b.end(), 0); // b = [0, 1]
1466 std::vector<float> k(c * h * w); // k = [2, 3, 4, 5, 6, 7]
1467 std::iota(k.begin(), k.end(), b.size());
1468 std::vector<float> packed_weights(((primary_tile + 1) * round_up_po2(c, cr)));
1469
1470 xnn_pack_f32_dwconv_hwg_w(
1471 primary_tile,
1472 h,
1473 w,
1474 c,
1475 cr,
1476 k.data(),
1477 b.data(),
1478 packed_weights.data(),
1479 0,
1480 nullptr);
1481
1482 std::vector<float> expected = {
1483 // bias first
1484 0.0f, 1.0f,
1485 // then weights, channels first
1486 2.0f, 3.0f,
1487 4.0f, 5.0f,
1488 6.0f, 7.0f,
1489 };
1490 ASSERT_EQ(expected, packed_weights);
1491 }
1492
TEST(PACK_F32_DWCONV_HWG_W,primary_tile_eq_kernel_size_channels_gt_cr)1493 TEST(PACK_F32_DWCONV_HWG_W, primary_tile_eq_kernel_size_channels_gt_cr) {
1494 size_t primary_tile = 3;
1495 size_t h = 3;
1496 size_t w = 1;
1497 size_t c = 5;
1498 size_t cr = 2;
1499
1500 std::vector<float> b(c);
1501 std::iota(b.begin(), b.end(), 0); // b = [0, 1, 2, 3, 4]
1502 std::vector<float> k(c * h * w); // k = [
1503 // 5, 6, 7, 8, 9,
1504 // 10, 11, 12, 13, 14,
1505 // 15, 16, 17, 18, 19]
1506 std::iota(k.begin(), k.end(), b.size());
1507 std::vector<float> packed_weights(((primary_tile + 1) * round_up_po2(c, cr)));
1508
1509 xnn_pack_f32_dwconv_hwg_w(
1510 primary_tile,
1511 h,
1512 w,
1513 c,
1514 cr,
1515 k.data(),
1516 b.data(),
1517 packed_weights.data(),
1518 0,
1519 nullptr);
1520
1521 std::vector<float> expected = {
1522 // cr blocks
1523 // bias first (cr == 2 of them)
1524 0.0f, 1.0f,
1525 // then weights, channels first
1526 5.0f, 6.0f, 10.0f, 11.0f, 15.0f, 16.0f,
1527 // bias again
1528 2.0f, 3.0f,
1529 // then weights, channels first
1530 7.0f, 8.0f, 12.0f, 13.0f, 17.0f, 18.0f,
1531 // bias again
1532 4.0f, 0.0f,
1533 // then weights, channels first
1534 9.0f, 0.0f, 14.0f, 0.0f, 19.0f, 0.0f,
1535 };
1536 ASSERT_EQ(expected, packed_weights);
1537 }
1538
TEST(PACK_F32_DWCONV_HWG_W,primary_tile_gt_kernel_size)1539 TEST(PACK_F32_DWCONV_HWG_W, primary_tile_gt_kernel_size) {
1540 size_t primary_tile = 9;
1541 size_t h = 2;
1542 size_t w = 2;
1543 size_t c = 2;
1544 size_t cr = 2;
1545
1546 std::vector<float> b(c);
1547 std::iota(b.begin(), b.end(), 0); // b = [0, 1]
1548 std::vector<float> k(c * h * w); // k = [
1549 // 2, 3,
1550 // 4, 5,
1551 // 6, 7,
1552 // 8, 9]
1553 std::iota(k.begin(), k.end(), b.size());
1554 std::vector<float> packed_weights(((primary_tile + 1) * round_up_po2(c, cr)));
1555
1556 xnn_pack_f32_dwconv_hwg_w(
1557 primary_tile,
1558 h,
1559 w,
1560 c,
1561 cr,
1562 k.data(),
1563 b.data(),
1564 packed_weights.data(),
1565 0,
1566 nullptr);
1567
1568 std::vector<float> expected = {
1569 // bias first (cr == 2 of them)
1570 0.0f, 1.0f,
1571 // then weights, channels first
1572 2.0f, 3.0f,
1573 // go down the columns first
1574 6.0f, 7.0f, 4.0f, 5.0f, 8.0f, 9.0f,
1575 // followed by 10 zeros to make up the difference with primary_tile
1576 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f,
1577 };
1578 ASSERT_EQ(expected, packed_weights);
1579 }
1580
TEST(PACK_F32_DWCONV_HWG_W,primary_tile_gt_kernel_size_channels_gt_cr)1581 TEST(PACK_F32_DWCONV_HWG_W, primary_tile_gt_kernel_size_channels_gt_cr) {
1582 size_t primary_tile = 9;
1583 size_t h = 2;
1584 size_t w = 2;
1585 size_t c = 5;
1586 size_t cr = 2;
1587
1588 std::vector<float> b(c);
1589 std::iota(b.begin(), b.end(), 0); // b = [0, 1, 2, 3, 4]
1590 std::vector<float> k(c * h * w); // k = [
1591 // 5, 6, 7, 8, 9,
1592 // 10, 11, 12, 13, 14,
1593 // 15, 16, 17, 18, 19,
1594 // 20, 21, 22, 23, 24]
1595 std::iota(k.begin(), k.end(), b.size());
1596 std::vector<float> packed_weights(((primary_tile + 1) * round_up_po2(c, cr)));
1597
1598 xnn_pack_f32_dwconv_hwg_w(
1599 primary_tile,
1600 h,
1601 w,
1602 c,
1603 cr,
1604 k.data(),
1605 b.data(),
1606 packed_weights.data(),
1607 0,
1608 nullptr);
1609
1610 std::vector<float> expected = {
1611 // bias first (cr == 2 of them)
1612 0.0f, 1.0f,
1613 // then weights, channels first
1614 5.0f, 6.0f,
1615 // go down the columns first
1616 15.0f, 16.0f,
1617 10.0f, 11.0f,
1618 20.0f, 21.0f,
1619 // followed by 10 zeros to make up the difference with primary_tile
1620 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f,
1621 // bias first (cr == 2 of them)
1622 2.0f, 3.0f,
1623 // then weights, channels first
1624 7.0f, 8.0f, 17.0f, 18.0f, 12.0f, 13.0f, 22.0f, 23.0f,
1625 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f,
1626 // bias
1627 4.0f, 0.0f,
1628 // weights
1629 9.0f, 0.0f, 19.0f, 0.0f, 14.0f, 0.0f, 24.0f, 0.0f,
1630 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f,
1631 };
1632 ASSERT_EQ(expected, packed_weights);
1633 }
1634
TEST(PACK_F32_TO_F16_DWCONV_GHW_W,primary_tile_eq_kernel_size)1635 TEST(PACK_F32_TO_F16_DWCONV_GHW_W, primary_tile_eq_kernel_size) {
1636 size_t primary_tile = 3;
1637 size_t h = 3;
1638 size_t w = 1;
1639 size_t c = 2;
1640 size_t cr = 2;
1641
1642 std::vector<float> b(c);
1643 std::iota(b.begin(), b.end(), 0); // b = [0, 1]
1644 std::vector<float> k(c * h * w); // k = [2, 3, 4, 5, 6, 7]
1645 std::iota(k.begin(), k.end(), b.size());
1646 std::vector<uint16_t> packed_weights(((primary_tile + 1) * round_up_po2(c, cr)));
1647
1648 xnn_pack_f32_to_f16_dwconv_ghw_w(
1649 primary_tile,
1650 h,
1651 w,
1652 c,
1653 cr,
1654 k.data(),
1655 b.data(),
1656 packed_weights.data(),
1657 0,
1658 nullptr);
1659
1660 std::vector<float> expected_float = {
1661 // bias first
1662 0.0f, 1.0f,
1663 // then weights, channels first
1664 2.0f, 5.0f,
1665 3.0f, 6.0f,
1666 4.0f, 7.0f,
1667 };
1668 std::vector<uint16_t> expected(expected_float.size());
1669 std::transform(expected_float.begin(), expected_float.end(), expected.begin(),
1670 [](float f) { return fp16_ieee_from_fp32_value(f); });
1671 ASSERT_EQ(expected, packed_weights);
1672 }
1673
TEST(PACK_F32_TO_F16_DWCONV_GHW_W,primary_tile_eq_kernel_size_channels_gt_cr)1674 TEST(PACK_F32_TO_F16_DWCONV_GHW_W, primary_tile_eq_kernel_size_channels_gt_cr) {
1675 size_t primary_tile = 3;
1676 size_t h = 3;
1677 size_t w = 1;
1678 size_t c = 5;
1679 size_t cr = 2;
1680
1681 std::vector<float> b(c);
1682 std::iota(b.begin(), b.end(), 0); // b = [0, 1, 2, 3, 4]
1683 std::vector<float> k(c * h * w); // k = [
1684 // 5, 6, 7,
1685 // 8, 9, 10,
1686 // 11, 12, 13,
1687 // 14, 15, 16,
1688 // 17, 18, 19]
1689 std::iota(k.begin(), k.end(), b.size());
1690 std::vector<uint16_t> packed_weights(((primary_tile + 1) * round_up_po2(c, cr)));
1691
1692 xnn_pack_f32_to_f16_dwconv_ghw_w(
1693 primary_tile,
1694 h,
1695 w,
1696 c,
1697 cr,
1698 k.data(),
1699 b.data(),
1700 packed_weights.data(),
1701 0,
1702 nullptr);
1703
1704 std::vector<float> expected_float = {
1705 // cr blocks
1706 // bias first (cr == 2 of them)
1707 0.0f, 1.0f,
1708 // then weights, channels first
1709 5.0f, 8.0f, 6.0f, 9.0f, 7.0f, 10.0f,
1710 // bias again
1711 2.0f, 3.0f,
1712 // then weights, channels first
1713 11.0f, 14.0f, 12.0f, 15.0f, 13.0f, 16.0f,
1714 // bias again
1715 4.0f, 0.0f,
1716 // then weights, channels first
1717 17.0f, 0.0f, 18.0f, 0.0f, 19.0f, 0.0f,
1718 };
1719 std::vector<uint16_t> expected(expected_float.size());
1720 std::transform(expected_float.begin(), expected_float.end(), expected.begin(),
1721 [](float f) { return fp16_ieee_from_fp32_value(f); });
1722 ASSERT_EQ(expected, packed_weights);
1723 }
1724
TEST(PACK_F32_TO_F16_DWCONV_GHW_W,primary_tile_gt_kernel_size)1725 TEST(PACK_F32_TO_F16_DWCONV_GHW_W, primary_tile_gt_kernel_size) {
1726 size_t primary_tile = 9;
1727 size_t h = 2;
1728 size_t w = 2;
1729 size_t c = 2;
1730 size_t cr = 2;
1731
1732 std::vector<float> b(c);
1733 std::iota(b.begin(), b.end(), 0); // b = [0, 1]
1734 std::vector<float> k(c * h * w); // k = [
1735 // 2, 3,
1736 // 4, 5,
1737 // 6, 7,
1738 // 8, 9]
1739 std::iota(k.begin(), k.end(), b.size());
1740 std::vector<uint16_t> packed_weights(((primary_tile + 1) * round_up_po2(c, cr)));
1741
1742 xnn_pack_f32_to_f16_dwconv_ghw_w(
1743 primary_tile,
1744 h,
1745 w,
1746 c,
1747 cr,
1748 k.data(),
1749 b.data(),
1750 packed_weights.data(),
1751 0,
1752 nullptr);
1753
1754 std::vector<float> expected_float = {
1755 // bias first (cr == 2 of them)
1756 0.0f, 1.0f,
1757 // then weights, channels first
1758 2.0f, 6.0f,
1759 // go down the columns first
1760 4.0f, 8.0f, 3.0f, 7.0f, 5.0f, 9.0f,
1761 // followed by 10 zeros to make up the difference with primary_tile
1762 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f,
1763 };
1764 std::vector<uint16_t> expected(expected_float.size());
1765 std::transform(expected_float.begin(), expected_float.end(), expected.begin(),
1766 [](float f) { return fp16_ieee_from_fp32_value(f); });
1767 ASSERT_EQ(expected, packed_weights);
1768 }
1769
TEST(PACK_F32_TO_F16_DWCONV_GHW_W,primary_tile_gt_kernel_size_channels_gt_cr)1770 TEST(PACK_F32_TO_F16_DWCONV_GHW_W, primary_tile_gt_kernel_size_channels_gt_cr) {
1771 size_t primary_tile = 9;
1772 size_t h = 2;
1773 size_t w = 2;
1774 size_t c = 5;
1775 size_t cr = 2;
1776
1777 std::vector<float> b(c);
1778 std::iota(b.begin(), b.end(), 0); // b = [0, 1, 2, 3, 4]
1779 std::vector<float> k(c * h * w); // k = [
1780 // 5, 6,
1781 // 7, 8,
1782 // 9, 10,
1783 // 11, 12,
1784 // 13, 14,
1785 // 15, 16,
1786 // 17, 18,
1787 // 19, 20,
1788 // 21, 22,
1789 // 23, 24]
1790 std::iota(k.begin(), k.end(), b.size());
1791 std::vector<uint16_t> packed_weights(((primary_tile + 1) * round_up_po2(c, cr)));
1792
1793 xnn_pack_f32_to_f16_dwconv_ghw_w(
1794 primary_tile,
1795 h,
1796 w,
1797 c,
1798 cr,
1799 k.data(),
1800 b.data(),
1801 packed_weights.data(),
1802 0,
1803 nullptr);
1804
1805 std::vector<float> expected_float = {
1806 // bias first (cr == 2 of them)
1807 0.0f, 1.0f,
1808 // then weights, channels first
1809 5.0f, 9.0f,
1810 // go down the columns first
1811 7.0f, 11.0f,
1812 6.0f, 10.0f,
1813 8.0f, 12.0f,
1814 // followed by 10 zeros to make up the difference with primary_tile
1815 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f,
1816 // bias first (cr == 2 of them)
1817 2.0f, 3.0f,
1818 // then weights, channels first
1819 13.0f, 17.0f, 15.0f, 19.0f, 14.0f, 18.0f, 16.0f, 20.0f,
1820 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f,
1821 // bias
1822 4.0f, 0.0f,
1823 // weights
1824 21.0f, 0.0f, 23.0f, 0.0f, 22.0f, 0.0f, 24.0f, 0.0f,
1825 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f,
1826 };
1827 std::vector<uint16_t> expected(expected_float.size());
1828 std::transform(expected_float.begin(), expected_float.end(), expected.begin(),
1829 [](float f) { return fp16_ieee_from_fp32_value(f); });
1830 ASSERT_EQ(expected, packed_weights);
1831 }
1832
TEST(PACK_F32_TO_F16_DWCONV_HWG_W,primary_tile_eq_kernel_size)1833 TEST(PACK_F32_TO_F16_DWCONV_HWG_W, primary_tile_eq_kernel_size) {
1834 size_t primary_tile = 3;
1835 size_t h = 3;
1836 size_t w = 1;
1837 size_t c = 2;
1838 size_t cr = 2;
1839
1840 std::vector<float> b(c);
1841 std::iota(b.begin(), b.end(), 0); // b = [0, 1]
1842 std::vector<float> k(c * h * w); // k = [2, 3, 4, 5, 6, 7]
1843 std::iota(k.begin(), k.end(), b.size());
1844 std::vector<uint16_t> packed_weights(((primary_tile + 1) * round_up_po2(c, cr)));
1845
1846 xnn_pack_f32_to_f16_dwconv_hwg_w(
1847 primary_tile,
1848 h,
1849 w,
1850 c,
1851 cr,
1852 k.data(),
1853 b.data(),
1854 packed_weights.data(),
1855 0,
1856 nullptr);
1857
1858 std::vector<float> expected_float = {
1859 // bias first
1860 0.0f, 1.0f,
1861 // then weights, channels first
1862 2.0f, 3.0f,
1863 4.0f, 5.0f,
1864 6.0f, 7.0f,
1865 };
1866 std::vector<uint16_t> expected(expected_float.size());
1867 std::transform(expected_float.begin(), expected_float.end(), expected.begin(),
1868 [](float f) { return fp16_ieee_from_fp32_value(f); });
1869 ASSERT_EQ(expected, packed_weights);
1870 }
1871
TEST(PACK_F32_TO_F16_DWCONV_HWG_W,primary_tile_eq_kernel_size_channels_gt_cr)1872 TEST(PACK_F32_TO_F16_DWCONV_HWG_W, primary_tile_eq_kernel_size_channels_gt_cr) {
1873 size_t primary_tile = 3;
1874 size_t h = 3;
1875 size_t w = 1;
1876 size_t c = 5;
1877 size_t cr = 2;
1878
1879 std::vector<float> b(c);
1880 std::iota(b.begin(), b.end(), 0); // b = [0, 1, 2, 3, 4]
1881 std::vector<float> k(c * h * w); // k = [
1882 // 5, 6, 7, 8, 9,
1883 // 10, 11, 12, 13, 14,
1884 // 15, 16, 17, 18, 19]
1885 std::iota(k.begin(), k.end(), b.size());
1886 std::vector<uint16_t> packed_weights(((primary_tile + 1) * round_up_po2(c, cr)));
1887
1888 xnn_pack_f32_to_f16_dwconv_hwg_w(
1889 primary_tile,
1890 h,
1891 w,
1892 c,
1893 cr,
1894 k.data(),
1895 b.data(),
1896 packed_weights.data(),
1897 0,
1898 nullptr);
1899
1900 std::vector<float> expected_float = {
1901 // cr blocks
1902 // bias first (cr == 2 of them)
1903 0.0f, 1.0f,
1904 // then weights, channels first
1905 5.0f, 6.0f, 10.0f, 11.0f, 15.0f, 16.0f,
1906 // bias again
1907 2.0f, 3.0f,
1908 // then weights, channels first
1909 7.0f, 8.0f, 12.0f, 13.0f, 17.0f, 18.0f,
1910 // bias again
1911 4.0f, 0.0f,
1912 // then weights, channels first
1913 9.0f, 0.0f, 14.0f, 0.0f, 19.0f, 0.0f,
1914 };
1915 std::vector<uint16_t> expected(expected_float.size());
1916 std::transform(expected_float.begin(), expected_float.end(), expected.begin(),
1917 [](float f) { return fp16_ieee_from_fp32_value(f); });
1918 ASSERT_EQ(expected, packed_weights);
1919 }
1920
TEST(PACK_F32_TO_F16_DWCONV_HWG_W,primary_tile_gt_kernel_size)1921 TEST(PACK_F32_TO_F16_DWCONV_HWG_W, primary_tile_gt_kernel_size) {
1922 size_t primary_tile = 9;
1923 size_t h = 2;
1924 size_t w = 2;
1925 size_t c = 2;
1926 size_t cr = 2;
1927
1928 std::vector<float> b(c);
1929 std::iota(b.begin(), b.end(), 0); // b = [0, 1]
1930 std::vector<float> k(c * h * w); // k = [
1931 // 2, 3,
1932 // 4, 5,
1933 // 6, 7,
1934 // 8, 9]
1935 std::iota(k.begin(), k.end(), b.size());
1936 std::vector<uint16_t> packed_weights(((primary_tile + 1) * round_up_po2(c, cr)));
1937
1938 xnn_pack_f32_to_f16_dwconv_hwg_w(
1939 primary_tile,
1940 h,
1941 w,
1942 c,
1943 cr,
1944 k.data(),
1945 b.data(),
1946 packed_weights.data(),
1947 0,
1948 nullptr);
1949
1950 std::vector<float> expected_float = {
1951 // bias first (cr == 2 of them)
1952 0.0f, 1.0f,
1953 // then weights, channels first
1954 2.0f, 3.0f,
1955 // go down the columns first
1956 6.0f, 7.0f, 4.0f, 5.0f, 8.0f, 9.0f,
1957 // followed by 10 zeros to make up the difference with primary_tile
1958 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f,
1959 };
1960 std::vector<uint16_t> expected(expected_float.size());
1961 std::transform(expected_float.begin(), expected_float.end(), expected.begin(),
1962 [](float f) { return fp16_ieee_from_fp32_value(f); });
1963 ASSERT_EQ(expected, packed_weights);
1964 }
1965
TEST(PACK_F32_TO_F16_DWCONV_HWG_W,primary_tile_gt_kernel_size_channels_gt_cr)1966 TEST(PACK_F32_TO_F16_DWCONV_HWG_W, primary_tile_gt_kernel_size_channels_gt_cr) {
1967 size_t primary_tile = 9;
1968 size_t h = 2;
1969 size_t w = 2;
1970 size_t c = 5;
1971 size_t cr = 2;
1972
1973 std::vector<float> b(c);
1974 std::iota(b.begin(), b.end(), 0); // b = [0, 1, 2, 3, 4]
1975 std::vector<float> k(c * h * w); // k = [
1976 // 5, 6, 7, 8, 9,
1977 // 10, 11, 12, 13, 14,
1978 // 15, 16, 17, 18, 19,
1979 // 20, 21, 22, 23, 24]
1980 std::iota(k.begin(), k.end(), b.size());
1981 std::vector<uint16_t> packed_weights(((primary_tile + 1) * round_up_po2(c, cr)));
1982
1983 xnn_pack_f32_to_f16_dwconv_hwg_w(
1984 primary_tile,
1985 h,
1986 w,
1987 c,
1988 cr,
1989 k.data(),
1990 b.data(),
1991 packed_weights.data(),
1992 0,
1993 nullptr);
1994
1995 std::vector<float> expected_float = {
1996 // bias first (cr == 2 of them)
1997 0.0f, 1.0f,
1998 // then weights, channels first
1999 5.0f, 6.0f,
2000 // go down the columns first
2001 15.0f, 16.0f,
2002 10.0f, 11.0f,
2003 20.0f, 21.0f,
2004 // followed by 10 zeros to make up the difference with primary_tile
2005 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f,
2006 // bias first (cr == 2 of them)
2007 2.0f, 3.0f,
2008 // then weights, channels first
2009 7.0f, 8.0f, 17.0f, 18.0f, 12.0f, 13.0f, 22.0f, 23.0f,
2010 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f,
2011 // bias
2012 4.0f, 0.0f,
2013 // weights
2014 9.0f, 0.0f, 19.0f, 0.0f, 14.0f, 0.0f, 24.0f, 0.0f,
2015 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f,
2016 };
2017 std::vector<uint16_t> expected(expected_float.size());
2018 std::transform(expected_float.begin(), expected_float.end(), expected.begin(),
2019 [](float f) { return fp16_ieee_from_fp32_value(f); });
2020 ASSERT_EQ(expected, packed_weights);
2021 }
2022