1 // Copyright 2022 Google LLC
2 //
3 // This source code is licensed under the BSD-style license found in the
4 // LICENSE file in the root directory of this source tree.
5
6 #include <xnnpack/common.h>
7 #include <xnnpack/operator.h>
8 #include <xnnpack/params.h>
9 #include <gtest/gtest.h>
10
TEST(COMPUTE_CONVOLUTION_OUTPUT_DMENSION,compute)11 TEST(COMPUTE_CONVOLUTION_OUTPUT_DMENSION, compute) {
12 ASSERT_EQ(xnn_compute_convolution_output_dimension(5, 3, 1, 1), 3);
13 ASSERT_EQ(xnn_compute_convolution_output_dimension(10, 3, 2, 1), 6);
14 ASSERT_EQ(xnn_compute_convolution_output_dimension(5, 3, 1, 2), 2);
15 }
16
17 namespace {
18 // A dummy, nop microkernel for testing.
dummy_gemm(size_t mr,size_t nr,size_t k,const void * a,size_t a_stride,const void * w,void * c,size_t cm_stride,size_t cn_stride,const void * params)19 void dummy_gemm(size_t mr, size_t nr, size_t k, const void *a, size_t a_stride,
20 const void *w, void *c, size_t cm_stride, size_t cn_stride,
21 const void *params) {}
22 xnn_hmp_gemm_ukernel empty_gemm_ukernel = {};
23 xnn_hmp_gemm_ukernel dummy_gemm_ukernel = xnn_init_hmp_gemm_ukernel(dummy_gemm);
24
dummy_igemm(size_t mr,size_t nr,size_t kc,size_t ks,const void ** a,const void * w,void * c,size_t cm_stride,size_t cn_stride,size_t a_offset,const void * zero,const void * params)25 void dummy_igemm(size_t mr, size_t nr, size_t kc, size_t ks, const void **a,
26 const void *w, void *c, size_t cm_stride, size_t cn_stride,
27 size_t a_offset, const void *zero, const void *params) {}
28 xnn_hmp_igemm_ukernel empty_igemm_ukernel = {};
29 xnn_hmp_igemm_ukernel dummy_igemm_ukernel = xnn_init_hmp_igemm_ukernel(dummy_igemm);
30 } // namespace
31
TEST(HEURISTIC_MR,batch_size_same_as_mr)32 TEST(HEURISTIC_MR, batch_size_same_as_mr) {
33 gemm_parameters params = {
34 .minmax = {
35 .gemm = {
36 dummy_gemm_ukernel,
37 dummy_gemm_ukernel,
38 },
39 .igemm = {
40 dummy_igemm_ukernel,
41 dummy_igemm_ukernel,
42 },
43 },
44 .mr = 2,
45 .nr = 8,
46 };
47
48 ASSERT_EQ(2, xnn_get_heuristic_mr_gemm(2, params.mr, params.nr, params.minmax.gemm));
49 ASSERT_EQ(2, xnn_get_heuristic_mr_igemm(2, params.mr, params.nr, params.minmax.igemm));
50
51 params = (gemm_parameters) {
52 .minmax = {
53 .gemm = {
54 dummy_gemm_ukernel,
55 dummy_gemm_ukernel,
56 empty_gemm_ukernel,
57 dummy_gemm_ukernel
58 },
59 .igemm = {
60 dummy_igemm_ukernel,
61 dummy_igemm_ukernel,
62 empty_igemm_ukernel,
63 dummy_igemm_ukernel
64 },
65 },
66 .mr = 4,
67 .nr = 8,
68 };
69
70 ASSERT_EQ(4, xnn_get_heuristic_mr_gemm(4, params.mr, params.nr, params.minmax.gemm));
71 ASSERT_EQ(4, xnn_get_heuristic_mr_igemm(4, params.mr, params.nr, params.minmax.igemm));
72 }
73
TEST(HEURISTIC_MR,batch_size_smaller_than_mr)74 TEST(HEURISTIC_MR, batch_size_smaller_than_mr) {
75 gemm_parameters params = {
76 .minmax = {
77 .gemm = {
78 dummy_gemm_ukernel,
79 dummy_gemm_ukernel,
80 dummy_gemm_ukernel,
81 dummy_gemm_ukernel,
82 },
83 .igemm = {
84 dummy_igemm_ukernel,
85 dummy_igemm_ukernel,
86 dummy_igemm_ukernel,
87 dummy_igemm_ukernel,
88 },
89 },
90 .mr = 4,
91 .nr = 8,
92 };
93
94 // batch size == 3 < mr == 4, pick smallest available kernel to minimize clamps.
95 ASSERT_EQ(3, xnn_get_heuristic_mr_gemm(3, params.mr, params.nr, params.minmax.gemm));
96 ASSERT_EQ(3, xnn_get_heuristic_mr_igemm(3, params.mr, params.nr, params.minmax.igemm));
97
98 params = (gemm_parameters) {
99 .minmax = {
100 .gemm = {
101 dummy_gemm_ukernel,
102 empty_gemm_ukernel,
103 empty_gemm_ukernel,
104 dummy_gemm_ukernel,
105 },
106 .igemm = {
107 dummy_igemm_ukernel,
108 empty_igemm_ukernel,
109 empty_igemm_ukernel,
110 dummy_igemm_ukernel,
111 },
112 },
113 .mr = 4,
114 .nr = 8,
115 };
116
117 // The only kernel with mr < 2 is mr == 1, which is too inefficient for this batch size 2.
118 ASSERT_EQ(4, xnn_get_heuristic_mr_gemm(2, params.mr, params.nr, params.minmax.gemm));
119 ASSERT_EQ(4, xnn_get_heuristic_mr_igemm(2, params.mr, params.nr, params.minmax.igemm));
120 ASSERT_EQ(1, xnn_get_heuristic_mr_gemm(1, params.mr, params.nr, params.minmax.gemm));
121 ASSERT_EQ(1, xnn_get_heuristic_mr_igemm(1, params.mr, params.nr, params.minmax.igemm));
122
123 params = (gemm_parameters) {
124 .minmax = {
125 .gemm = {
126 dummy_gemm_ukernel,
127 empty_gemm_ukernel,
128 empty_gemm_ukernel,
129 dummy_gemm_ukernel,
130 dummy_gemm_ukernel,
131 dummy_gemm_ukernel,
132 },
133 .igemm = {
134 dummy_igemm_ukernel,
135 empty_igemm_ukernel,
136 empty_igemm_ukernel,
137 dummy_igemm_ukernel,
138 dummy_igemm_ukernel,
139 dummy_igemm_ukernel,
140 },
141 },
142 .mr = 6,
143 .nr = 8,
144 };
145
146 ASSERT_EQ(5, xnn_get_heuristic_mr_gemm(5, params.mr, params.nr, params.minmax.gemm));
147 ASSERT_EQ(5, xnn_get_heuristic_mr_igemm(5, params.mr, params.nr, params.minmax.igemm));
148 ASSERT_EQ(4, xnn_get_heuristic_mr_gemm(4, params.mr, params.nr, params.minmax.gemm));
149 ASSERT_EQ(4, xnn_get_heuristic_mr_igemm(4, params.mr, params.nr, params.minmax.igemm));
150 ASSERT_EQ(4, xnn_get_heuristic_mr_gemm(2, params.mr, params.nr, params.minmax.gemm));
151 ASSERT_EQ(4, xnn_get_heuristic_mr_igemm(2, params.mr, params.nr, params.minmax.igemm));
152 ASSERT_EQ(1, xnn_get_heuristic_mr_gemm(1, params.mr, params.nr, params.minmax.gemm));
153 ASSERT_EQ(1, xnn_get_heuristic_mr_igemm(1, params.mr, params.nr, params.minmax.igemm));
154 }
155
TEST(HEURISTIC_MR,batch_size_larger_than_mr)156 TEST(HEURISTIC_MR, batch_size_larger_than_mr) {
157 gemm_parameters params = {
158 .minmax = {
159 .gemm = {
160 dummy_gemm_ukernel,
161 empty_gemm_ukernel,
162 dummy_gemm_ukernel,
163 dummy_gemm_ukernel,
164 },
165 .igemm = {
166 dummy_igemm_ukernel,
167 empty_igemm_ukernel,
168 dummy_igemm_ukernel,
169 dummy_igemm_ukernel,
170 },
171 },
172 .mr = 4,
173 .nr = 8,
174 };
175
176 ASSERT_EQ(3, xnn_get_heuristic_mr_gemm(5, params.mr, params.nr, params.minmax.gemm));
177 ASSERT_EQ(3, xnn_get_heuristic_mr_igemm(5, params.mr, params.nr, params.minmax.igemm));
178
179 params = (gemm_parameters) {
180 .minmax = {
181 .gemm = {
182 dummy_gemm_ukernel,
183 dummy_gemm_ukernel,
184 dummy_gemm_ukernel,
185 dummy_gemm_ukernel,
186 dummy_gemm_ukernel,
187 dummy_gemm_ukernel,
188 },
189 .igemm = {
190 dummy_igemm_ukernel,
191 dummy_igemm_ukernel,
192 dummy_igemm_ukernel,
193 dummy_igemm_ukernel,
194 dummy_igemm_ukernel,
195 dummy_igemm_ukernel,
196 },
197 },
198 .mr = 6,
199 .nr = 8,
200 };
201
202 ASSERT_EQ(4, xnn_get_heuristic_mr_gemm(7, params.mr, params.nr, params.minmax.gemm));
203 ASSERT_EQ(4, xnn_get_heuristic_mr_igemm(7, params.mr, params.nr, params.minmax.igemm));
204 ASSERT_EQ(6, xnn_get_heuristic_mr_gemm(11, params.mr, params.nr, params.minmax.gemm));
205 ASSERT_EQ(6, xnn_get_heuristic_mr_igemm(11, params.mr, params.nr, params.minmax.igemm));
206 ASSERT_EQ(6, xnn_get_heuristic_mr_gemm(22, params.mr, params.nr, params.minmax.gemm));
207 ASSERT_EQ(6, xnn_get_heuristic_mr_igemm(22, params.mr, params.nr, params.minmax.igemm));
208 ASSERT_EQ(5, xnn_get_heuristic_mr_gemm(50, params.mr, params.nr, params.minmax.gemm));
209 ASSERT_EQ(5, xnn_get_heuristic_mr_igemm(50, params.mr, params.nr, params.minmax.igemm));
210 ASSERT_EQ(5, xnn_get_heuristic_mr_gemm(50, params.mr, params.nr, params.minmax.gemm));
211 ASSERT_EQ(5, xnn_get_heuristic_mr_igemm(50, params.mr, params.nr, params.minmax.igemm));
212 // Tests some MobiletNet params.
213 ASSERT_EQ(6, xnn_get_heuristic_mr_gemm(112*112, params.mr, params.nr, params.minmax.gemm));
214 ASSERT_EQ(6, xnn_get_heuristic_mr_igemm(112*112, params.mr, params.nr, params.minmax.igemm));
215 ASSERT_EQ(6, xnn_get_heuristic_mr_gemm(56*56, params.mr, params.nr, params.minmax.gemm));
216 ASSERT_EQ(6, xnn_get_heuristic_mr_igemm(56*56, params.mr, params.nr, params.minmax.igemm));
217 ASSERT_EQ(6, xnn_get_heuristic_mr_gemm(14 * 14, params.mr, params.nr, params.minmax.gemm));
218 ASSERT_EQ(6, xnn_get_heuristic_mr_igemm(14 * 14, params.mr, params.nr, params.minmax.igemm));
219 ASSERT_EQ(5, xnn_get_heuristic_mr_gemm(7*7, params.mr, params.nr, params.minmax.gemm));
220 ASSERT_EQ(5, xnn_get_heuristic_mr_igemm(7*7, params.mr, params.nr, params.minmax.igemm));
221 }
222
TEST(HEURISTIC_MR,max_mr_without_mr1_kernel)223 TEST(HEURISTIC_MR, max_mr_without_mr1_kernel) {
224 gemm_parameters params = {
225 .minmax = {
226 .gemm = {
227 empty_gemm_ukernel,
228 empty_gemm_ukernel,
229 empty_gemm_ukernel,
230 dummy_gemm_ukernel,
231 },
232 .igemm = {
233 empty_igemm_ukernel,
234 empty_igemm_ukernel,
235 empty_igemm_ukernel,
236 dummy_igemm_ukernel,
237 },
238 },
239 .mr = 4,
240 .nr = 8,
241 };
242
243 // batch size == 3 < mr == 4, pick smallest available kernel to minimize clamps.
244 ASSERT_EQ(4, xnn_get_heuristic_mr_gemm(3, params.mr, params.nr, params.minmax.gemm));
245 }
246