xref: /aosp_15_r20/external/XNNPACK/src/f32-conv-hwc/3x3s2p1c3x4-scalar-1x1.c (revision 4bdc94577ba0e567308109d787f7fec7b531ce36)
1*4bdc9457SAndroid Build Coastguard Worker // Copyright 2019 Google LLC
2*4bdc9457SAndroid Build Coastguard Worker //
3*4bdc9457SAndroid Build Coastguard Worker // This source code is licensed under the BSD-style license found in the
4*4bdc9457SAndroid Build Coastguard Worker // LICENSE file in the root directory of this source tree.
5*4bdc9457SAndroid Build Coastguard Worker 
6*4bdc9457SAndroid Build Coastguard Worker #include <assert.h>
7*4bdc9457SAndroid Build Coastguard Worker 
8*4bdc9457SAndroid Build Coastguard Worker #include <xnnpack/conv.h>
9*4bdc9457SAndroid Build Coastguard Worker #include <xnnpack/math.h>
10*4bdc9457SAndroid Build Coastguard Worker 
11*4bdc9457SAndroid Build Coastguard Worker 
xnn_f32_conv_hwc_ukernel_3x3s2p1c3x4__scalar_1x1(size_t input_height,size_t input_width,size_t output_y_start,size_t output_y_end,const float * input,const float * zero,const float * weights,float * output,size_t input_padding_top,size_t output_channels,size_t output_height_stride,size_t output_width_stride,const union xnn_f32_minmax_params params[restrict XNN_MIN_ELEMENTS (1)])12*4bdc9457SAndroid Build Coastguard Worker void xnn_f32_conv_hwc_ukernel_3x3s2p1c3x4__scalar_1x1(
13*4bdc9457SAndroid Build Coastguard Worker     size_t input_height,
14*4bdc9457SAndroid Build Coastguard Worker     size_t input_width,
15*4bdc9457SAndroid Build Coastguard Worker     size_t output_y_start,
16*4bdc9457SAndroid Build Coastguard Worker     size_t output_y_end,
17*4bdc9457SAndroid Build Coastguard Worker     const float* input,
18*4bdc9457SAndroid Build Coastguard Worker     const float* zero,
19*4bdc9457SAndroid Build Coastguard Worker     const float* weights,
20*4bdc9457SAndroid Build Coastguard Worker     float* output,
21*4bdc9457SAndroid Build Coastguard Worker     size_t input_padding_top,
22*4bdc9457SAndroid Build Coastguard Worker     size_t output_channels,
23*4bdc9457SAndroid Build Coastguard Worker     size_t output_height_stride,
24*4bdc9457SAndroid Build Coastguard Worker     size_t output_width_stride,
25*4bdc9457SAndroid Build Coastguard Worker     const union xnn_f32_minmax_params params[restrict XNN_MIN_ELEMENTS(1)])
26*4bdc9457SAndroid Build Coastguard Worker {
27*4bdc9457SAndroid Build Coastguard Worker   assert(input_width != 0);
28*4bdc9457SAndroid Build Coastguard Worker   assert(output_y_end > output_y_start);
29*4bdc9457SAndroid Build Coastguard Worker   assert(input_padding_top <= 1);
30*4bdc9457SAndroid Build Coastguard Worker   assert(output_channels != 0);
31*4bdc9457SAndroid Build Coastguard Worker 
32*4bdc9457SAndroid Build Coastguard Worker   const size_t input_height_stride = input_width * 3 /* channels */ * sizeof(float);
33*4bdc9457SAndroid Build Coastguard Worker   const size_t input_width_decrement = round_down_po2(input_width, 2) * 3 /* channels */ * sizeof(float);
34*4bdc9457SAndroid Build Coastguard Worker   const size_t output_width = (input_width + 1) / 2;
35*4bdc9457SAndroid Build Coastguard Worker   const size_t output_channel_decrement = output_width * output_width_stride - 4 * sizeof(float);
36*4bdc9457SAndroid Build Coastguard Worker   const size_t output_height_increment = output_height_stride - round_up_po2(output_channels, 4) * sizeof(float);
37*4bdc9457SAndroid Build Coastguard Worker 
38*4bdc9457SAndroid Build Coastguard Worker   // Adjustment for padding processed below
39*4bdc9457SAndroid Build Coastguard Worker   const float* i0 = (const float*) ((uintptr_t) input + input_height_stride * (output_y_start * 2 - input_padding_top));
40*4bdc9457SAndroid Build Coastguard Worker   const float* i1 = (const float*) ((uintptr_t) i0 + input_height_stride);
41*4bdc9457SAndroid Build Coastguard Worker   const float* i2 = (const float*) ((uintptr_t) i1 + input_height_stride);
42*4bdc9457SAndroid Build Coastguard Worker   float* o0 = (float*) ((uintptr_t) output + output_height_stride * output_y_start);
43*4bdc9457SAndroid Build Coastguard Worker 
44*4bdc9457SAndroid Build Coastguard Worker   if XNN_UNPREDICTABLE(output_y_start < input_padding_top) {
45*4bdc9457SAndroid Build Coastguard Worker     i0 = zero;
46*4bdc9457SAndroid Build Coastguard Worker   }
47*4bdc9457SAndroid Build Coastguard Worker 
48*4bdc9457SAndroid Build Coastguard Worker   const float voutput_max = params->scalar.max;
49*4bdc9457SAndroid Build Coastguard Worker   const float voutput_min = params->scalar.min;
50*4bdc9457SAndroid Build Coastguard Worker 
51*4bdc9457SAndroid Build Coastguard Worker   for (size_t output_y = output_y_start; output_y < output_y_end; output_y += 1) {
52*4bdc9457SAndroid Build Coastguard Worker     const size_t input_y2 = output_y * 2 + 2 - input_padding_top;
53*4bdc9457SAndroid Build Coastguard Worker     if XNN_UNPREDICTABLE(input_y2 >= input_height) {
54*4bdc9457SAndroid Build Coastguard Worker       i2 = zero;
55*4bdc9457SAndroid Build Coastguard Worker     }
56*4bdc9457SAndroid Build Coastguard Worker 
57*4bdc9457SAndroid Build Coastguard Worker     const float* w = weights;
58*4bdc9457SAndroid Build Coastguard Worker     size_t c = output_channels;
59*4bdc9457SAndroid Build Coastguard Worker     do {
60*4bdc9457SAndroid Build Coastguard Worker       float vi00c0 = 0.0f;
61*4bdc9457SAndroid Build Coastguard Worker       float vi00c1 = 0.0f;
62*4bdc9457SAndroid Build Coastguard Worker       float vi00c2 = 0.0f;
63*4bdc9457SAndroid Build Coastguard Worker       float vi10c0 = 0.0f;
64*4bdc9457SAndroid Build Coastguard Worker       float vi10c1 = 0.0f;
65*4bdc9457SAndroid Build Coastguard Worker       float vi10c2 = 0.0f;
66*4bdc9457SAndroid Build Coastguard Worker       float vi20c0 = 0.0f;
67*4bdc9457SAndroid Build Coastguard Worker       float vi20c1 = 0.0f;
68*4bdc9457SAndroid Build Coastguard Worker       float vi20c2 = 0.0f;
69*4bdc9457SAndroid Build Coastguard Worker 
70*4bdc9457SAndroid Build Coastguard Worker       size_t iw = input_width;
71*4bdc9457SAndroid Build Coastguard Worker       for (; iw >= 2; iw -= 2) {
72*4bdc9457SAndroid Build Coastguard Worker         // start with biases
73*4bdc9457SAndroid Build Coastguard Worker         float voc0 = w[0];
74*4bdc9457SAndroid Build Coastguard Worker         float voc1 = w[1];
75*4bdc9457SAndroid Build Coastguard Worker         float voc2 = w[2];
76*4bdc9457SAndroid Build Coastguard Worker         float voc3 = w[3];
77*4bdc9457SAndroid Build Coastguard Worker 
78*4bdc9457SAndroid Build Coastguard Worker         const float vk00c0x0 = w[4];
79*4bdc9457SAndroid Build Coastguard Worker         const float vk00c0x1 = w[5];
80*4bdc9457SAndroid Build Coastguard Worker         const float vk00c0x2 = w[6];
81*4bdc9457SAndroid Build Coastguard Worker         const float vk00c0x3 = w[7];
82*4bdc9457SAndroid Build Coastguard Worker 
83*4bdc9457SAndroid Build Coastguard Worker         voc0 += vk00c0x0 * vi00c0;
84*4bdc9457SAndroid Build Coastguard Worker         voc1 += vk00c0x1 * vi00c0;
85*4bdc9457SAndroid Build Coastguard Worker         voc2 += vk00c0x2 * vi00c0;
86*4bdc9457SAndroid Build Coastguard Worker         voc3 += vk00c0x3 * vi00c0;
87*4bdc9457SAndroid Build Coastguard Worker 
88*4bdc9457SAndroid Build Coastguard Worker         const float vk10c0x0 = w[8];
89*4bdc9457SAndroid Build Coastguard Worker         const float vk10c0x1 = w[9];
90*4bdc9457SAndroid Build Coastguard Worker         const float vk10c0x2 = w[10];
91*4bdc9457SAndroid Build Coastguard Worker         const float vk10c0x3 = w[11];
92*4bdc9457SAndroid Build Coastguard Worker 
93*4bdc9457SAndroid Build Coastguard Worker         voc0 += vk10c0x0 * vi10c0;
94*4bdc9457SAndroid Build Coastguard Worker         voc1 += vk10c0x1 * vi10c0;
95*4bdc9457SAndroid Build Coastguard Worker         voc2 += vk10c0x2 * vi10c0;
96*4bdc9457SAndroid Build Coastguard Worker         voc3 += vk10c0x3 * vi10c0;
97*4bdc9457SAndroid Build Coastguard Worker 
98*4bdc9457SAndroid Build Coastguard Worker         const float vk20c0x0 = w[12];
99*4bdc9457SAndroid Build Coastguard Worker         const float vk20c0x1 = w[13];
100*4bdc9457SAndroid Build Coastguard Worker         const float vk20c0x2 = w[14];
101*4bdc9457SAndroid Build Coastguard Worker         const float vk20c0x3 = w[15];
102*4bdc9457SAndroid Build Coastguard Worker 
103*4bdc9457SAndroid Build Coastguard Worker         voc0 += vk20c0x0 * vi20c0;
104*4bdc9457SAndroid Build Coastguard Worker         voc1 += vk20c0x1 * vi20c0;
105*4bdc9457SAndroid Build Coastguard Worker         voc2 += vk20c0x2 * vi20c0;
106*4bdc9457SAndroid Build Coastguard Worker         voc3 += vk20c0x3 * vi20c0;
107*4bdc9457SAndroid Build Coastguard Worker 
108*4bdc9457SAndroid Build Coastguard Worker         const float vk00c1x0 = w[16];
109*4bdc9457SAndroid Build Coastguard Worker         const float vk00c1x1 = w[17];
110*4bdc9457SAndroid Build Coastguard Worker         const float vk00c1x2 = w[18];
111*4bdc9457SAndroid Build Coastguard Worker         const float vk00c1x3 = w[19];
112*4bdc9457SAndroid Build Coastguard Worker 
113*4bdc9457SAndroid Build Coastguard Worker         voc0 += vk00c1x0 * vi00c1;
114*4bdc9457SAndroid Build Coastguard Worker         voc1 += vk00c1x1 * vi00c1;
115*4bdc9457SAndroid Build Coastguard Worker         voc2 += vk00c1x2 * vi00c1;
116*4bdc9457SAndroid Build Coastguard Worker         voc3 += vk00c1x3 * vi00c1;
117*4bdc9457SAndroid Build Coastguard Worker 
118*4bdc9457SAndroid Build Coastguard Worker         const float vk10c1x0 = w[20];
119*4bdc9457SAndroid Build Coastguard Worker         const float vk10c1x1 = w[21];
120*4bdc9457SAndroid Build Coastguard Worker         const float vk10c1x2 = w[22];
121*4bdc9457SAndroid Build Coastguard Worker         const float vk10c1x3 = w[23];
122*4bdc9457SAndroid Build Coastguard Worker 
123*4bdc9457SAndroid Build Coastguard Worker         voc0 += vk10c1x0 * vi10c1;
124*4bdc9457SAndroid Build Coastguard Worker         voc1 += vk10c1x1 * vi10c1;
125*4bdc9457SAndroid Build Coastguard Worker         voc2 += vk10c1x2 * vi10c1;
126*4bdc9457SAndroid Build Coastguard Worker         voc3 += vk10c1x3 * vi10c1;
127*4bdc9457SAndroid Build Coastguard Worker 
128*4bdc9457SAndroid Build Coastguard Worker         const float vk20c1x0 = w[24];
129*4bdc9457SAndroid Build Coastguard Worker         const float vk20c1x1 = w[25];
130*4bdc9457SAndroid Build Coastguard Worker         const float vk20c1x2 = w[26];
131*4bdc9457SAndroid Build Coastguard Worker         const float vk20c1x3 = w[27];
132*4bdc9457SAndroid Build Coastguard Worker 
133*4bdc9457SAndroid Build Coastguard Worker         voc0 += vk20c1x0 * vi20c1;
134*4bdc9457SAndroid Build Coastguard Worker         voc1 += vk20c1x1 * vi20c1;
135*4bdc9457SAndroid Build Coastguard Worker         voc2 += vk20c1x2 * vi20c1;
136*4bdc9457SAndroid Build Coastguard Worker         voc3 += vk20c1x3 * vi20c1;
137*4bdc9457SAndroid Build Coastguard Worker 
138*4bdc9457SAndroid Build Coastguard Worker         const float vk00c2x0 = w[28];
139*4bdc9457SAndroid Build Coastguard Worker         const float vk00c2x1 = w[29];
140*4bdc9457SAndroid Build Coastguard Worker         const float vk00c2x2 = w[30];
141*4bdc9457SAndroid Build Coastguard Worker         const float vk00c2x3 = w[31];
142*4bdc9457SAndroid Build Coastguard Worker 
143*4bdc9457SAndroid Build Coastguard Worker         voc0 += vk00c2x0 * vi00c2;
144*4bdc9457SAndroid Build Coastguard Worker         voc1 += vk00c2x1 * vi00c2;
145*4bdc9457SAndroid Build Coastguard Worker         voc2 += vk00c2x2 * vi00c2;
146*4bdc9457SAndroid Build Coastguard Worker         voc3 += vk00c2x3 * vi00c2;
147*4bdc9457SAndroid Build Coastguard Worker 
148*4bdc9457SAndroid Build Coastguard Worker         const float vk10c2x0 = w[32];
149*4bdc9457SAndroid Build Coastguard Worker         const float vk10c2x1 = w[33];
150*4bdc9457SAndroid Build Coastguard Worker         const float vk10c2x2 = w[34];
151*4bdc9457SAndroid Build Coastguard Worker         const float vk10c2x3 = w[35];
152*4bdc9457SAndroid Build Coastguard Worker 
153*4bdc9457SAndroid Build Coastguard Worker         voc0 += vk10c2x0 * vi10c2;
154*4bdc9457SAndroid Build Coastguard Worker         voc1 += vk10c2x1 * vi10c2;
155*4bdc9457SAndroid Build Coastguard Worker         voc2 += vk10c2x2 * vi10c2;
156*4bdc9457SAndroid Build Coastguard Worker         voc3 += vk10c2x3 * vi10c2;
157*4bdc9457SAndroid Build Coastguard Worker 
158*4bdc9457SAndroid Build Coastguard Worker         const float vk20c2x0 = w[36];
159*4bdc9457SAndroid Build Coastguard Worker         const float vk20c2x1 = w[37];
160*4bdc9457SAndroid Build Coastguard Worker         const float vk20c2x2 = w[38];
161*4bdc9457SAndroid Build Coastguard Worker         const float vk20c2x3 = w[39];
162*4bdc9457SAndroid Build Coastguard Worker 
163*4bdc9457SAndroid Build Coastguard Worker         voc0 += vk20c2x0 * vi20c2;
164*4bdc9457SAndroid Build Coastguard Worker         voc1 += vk20c2x1 * vi20c2;
165*4bdc9457SAndroid Build Coastguard Worker         voc2 += vk20c2x2 * vi20c2;
166*4bdc9457SAndroid Build Coastguard Worker         voc3 += vk20c2x3 * vi20c2;
167*4bdc9457SAndroid Build Coastguard Worker 
168*4bdc9457SAndroid Build Coastguard Worker         const float vk01c0x0 = w[40];
169*4bdc9457SAndroid Build Coastguard Worker         const float vk01c0x1 = w[41];
170*4bdc9457SAndroid Build Coastguard Worker         const float vk01c0x2 = w[42];
171*4bdc9457SAndroid Build Coastguard Worker         const float vk01c0x3 = w[43];
172*4bdc9457SAndroid Build Coastguard Worker 
173*4bdc9457SAndroid Build Coastguard Worker         const float vi01c0 = i0[0];
174*4bdc9457SAndroid Build Coastguard Worker 
175*4bdc9457SAndroid Build Coastguard Worker         voc0 += vk01c0x0 * vi01c0;
176*4bdc9457SAndroid Build Coastguard Worker         voc1 += vk01c0x1 * vi01c0;
177*4bdc9457SAndroid Build Coastguard Worker         voc2 += vk01c0x2 * vi01c0;
178*4bdc9457SAndroid Build Coastguard Worker         voc3 += vk01c0x3 * vi01c0;
179*4bdc9457SAndroid Build Coastguard Worker 
180*4bdc9457SAndroid Build Coastguard Worker         const float vk11c0x0 = w[44];
181*4bdc9457SAndroid Build Coastguard Worker         const float vk11c0x1 = w[45];
182*4bdc9457SAndroid Build Coastguard Worker         const float vk11c0x2 = w[46];
183*4bdc9457SAndroid Build Coastguard Worker         const float vk11c0x3 = w[47];
184*4bdc9457SAndroid Build Coastguard Worker 
185*4bdc9457SAndroid Build Coastguard Worker         const float vi11c0 = i1[0];
186*4bdc9457SAndroid Build Coastguard Worker 
187*4bdc9457SAndroid Build Coastguard Worker         voc0 += vk11c0x0 * vi11c0;
188*4bdc9457SAndroid Build Coastguard Worker         voc1 += vk11c0x1 * vi11c0;
189*4bdc9457SAndroid Build Coastguard Worker         voc2 += vk11c0x2 * vi11c0;
190*4bdc9457SAndroid Build Coastguard Worker         voc3 += vk11c0x3 * vi11c0;
191*4bdc9457SAndroid Build Coastguard Worker 
192*4bdc9457SAndroid Build Coastguard Worker         const float vk21c0x0 = w[48];
193*4bdc9457SAndroid Build Coastguard Worker         const float vk21c0x1 = w[49];
194*4bdc9457SAndroid Build Coastguard Worker         const float vk21c0x2 = w[50];
195*4bdc9457SAndroid Build Coastguard Worker         const float vk21c0x3 = w[51];
196*4bdc9457SAndroid Build Coastguard Worker 
197*4bdc9457SAndroid Build Coastguard Worker         const float vi21c0 = i2[0];
198*4bdc9457SAndroid Build Coastguard Worker 
199*4bdc9457SAndroid Build Coastguard Worker         voc0 += vk21c0x0 * vi21c0;
200*4bdc9457SAndroid Build Coastguard Worker         voc1 += vk21c0x1 * vi21c0;
201*4bdc9457SAndroid Build Coastguard Worker         voc2 += vk21c0x2 * vi21c0;
202*4bdc9457SAndroid Build Coastguard Worker         voc3 += vk21c0x3 * vi21c0;
203*4bdc9457SAndroid Build Coastguard Worker 
204*4bdc9457SAndroid Build Coastguard Worker         const float vk01c1x0 = w[52];
205*4bdc9457SAndroid Build Coastguard Worker         const float vk01c1x1 = w[53];
206*4bdc9457SAndroid Build Coastguard Worker         const float vk01c1x2 = w[54];
207*4bdc9457SAndroid Build Coastguard Worker         const float vk01c1x3 = w[55];
208*4bdc9457SAndroid Build Coastguard Worker 
209*4bdc9457SAndroid Build Coastguard Worker         const float vi01c1 = i0[1];
210*4bdc9457SAndroid Build Coastguard Worker 
211*4bdc9457SAndroid Build Coastguard Worker         voc0 += vk01c1x0 * vi01c1;
212*4bdc9457SAndroid Build Coastguard Worker         voc1 += vk01c1x1 * vi01c1;
213*4bdc9457SAndroid Build Coastguard Worker         voc2 += vk01c1x2 * vi01c1;
214*4bdc9457SAndroid Build Coastguard Worker         voc3 += vk01c1x3 * vi01c1;
215*4bdc9457SAndroid Build Coastguard Worker 
216*4bdc9457SAndroid Build Coastguard Worker         const float vk11c1x0 = w[56];
217*4bdc9457SAndroid Build Coastguard Worker         const float vk11c1x1 = w[57];
218*4bdc9457SAndroid Build Coastguard Worker         const float vk11c1x2 = w[58];
219*4bdc9457SAndroid Build Coastguard Worker         const float vk11c1x3 = w[59];
220*4bdc9457SAndroid Build Coastguard Worker 
221*4bdc9457SAndroid Build Coastguard Worker         const float vi11c1 = i1[1];
222*4bdc9457SAndroid Build Coastguard Worker 
223*4bdc9457SAndroid Build Coastguard Worker         voc0 += vk11c1x0 * vi11c1;
224*4bdc9457SAndroid Build Coastguard Worker         voc1 += vk11c1x1 * vi11c1;
225*4bdc9457SAndroid Build Coastguard Worker         voc2 += vk11c1x2 * vi11c1;
226*4bdc9457SAndroid Build Coastguard Worker         voc3 += vk11c1x3 * vi11c1;
227*4bdc9457SAndroid Build Coastguard Worker 
228*4bdc9457SAndroid Build Coastguard Worker         const float vk21c1x0 = w[60];
229*4bdc9457SAndroid Build Coastguard Worker         const float vk21c1x1 = w[61];
230*4bdc9457SAndroid Build Coastguard Worker         const float vk21c1x2 = w[62];
231*4bdc9457SAndroid Build Coastguard Worker         const float vk21c1x3 = w[63];
232*4bdc9457SAndroid Build Coastguard Worker 
233*4bdc9457SAndroid Build Coastguard Worker         const float vi21c1 = i2[1];
234*4bdc9457SAndroid Build Coastguard Worker 
235*4bdc9457SAndroid Build Coastguard Worker         voc0 += vk21c1x0 * vi21c1;
236*4bdc9457SAndroid Build Coastguard Worker         voc1 += vk21c1x1 * vi21c1;
237*4bdc9457SAndroid Build Coastguard Worker         voc2 += vk21c1x2 * vi21c1;
238*4bdc9457SAndroid Build Coastguard Worker         voc3 += vk21c1x3 * vi21c1;
239*4bdc9457SAndroid Build Coastguard Worker 
240*4bdc9457SAndroid Build Coastguard Worker         const float vk01c2x0 = w[64];
241*4bdc9457SAndroid Build Coastguard Worker         const float vk01c2x1 = w[65];
242*4bdc9457SAndroid Build Coastguard Worker         const float vk01c2x2 = w[66];
243*4bdc9457SAndroid Build Coastguard Worker         const float vk01c2x3 = w[67];
244*4bdc9457SAndroid Build Coastguard Worker 
245*4bdc9457SAndroid Build Coastguard Worker         const float vi01c2 = i0[2];
246*4bdc9457SAndroid Build Coastguard Worker 
247*4bdc9457SAndroid Build Coastguard Worker         voc0 += vk01c2x0 * vi01c2;
248*4bdc9457SAndroid Build Coastguard Worker         voc1 += vk01c2x1 * vi01c2;
249*4bdc9457SAndroid Build Coastguard Worker         voc2 += vk01c2x2 * vi01c2;
250*4bdc9457SAndroid Build Coastguard Worker         voc3 += vk01c2x3 * vi01c2;
251*4bdc9457SAndroid Build Coastguard Worker 
252*4bdc9457SAndroid Build Coastguard Worker         const float vk11c2x0 = w[68];
253*4bdc9457SAndroid Build Coastguard Worker         const float vk11c2x1 = w[69];
254*4bdc9457SAndroid Build Coastguard Worker         const float vk11c2x2 = w[70];
255*4bdc9457SAndroid Build Coastguard Worker         const float vk11c2x3 = w[71];
256*4bdc9457SAndroid Build Coastguard Worker 
257*4bdc9457SAndroid Build Coastguard Worker         const float vi11c2 = i1[2];
258*4bdc9457SAndroid Build Coastguard Worker 
259*4bdc9457SAndroid Build Coastguard Worker         voc0 += vk11c2x0 * vi11c2;
260*4bdc9457SAndroid Build Coastguard Worker         voc1 += vk11c2x1 * vi11c2;
261*4bdc9457SAndroid Build Coastguard Worker         voc2 += vk11c2x2 * vi11c2;
262*4bdc9457SAndroid Build Coastguard Worker         voc3 += vk11c2x3 * vi11c2;
263*4bdc9457SAndroid Build Coastguard Worker 
264*4bdc9457SAndroid Build Coastguard Worker         const float vk21c2x0 = w[72];
265*4bdc9457SAndroid Build Coastguard Worker         const float vk21c2x1 = w[73];
266*4bdc9457SAndroid Build Coastguard Worker         const float vk21c2x2 = w[74];
267*4bdc9457SAndroid Build Coastguard Worker         const float vk21c2x3 = w[75];
268*4bdc9457SAndroid Build Coastguard Worker 
269*4bdc9457SAndroid Build Coastguard Worker         const float vi21c2 = i2[2];
270*4bdc9457SAndroid Build Coastguard Worker 
271*4bdc9457SAndroid Build Coastguard Worker         voc0 += vk21c2x0 * vi21c2;
272*4bdc9457SAndroid Build Coastguard Worker         voc1 += vk21c2x1 * vi21c2;
273*4bdc9457SAndroid Build Coastguard Worker         voc2 += vk21c2x2 * vi21c2;
274*4bdc9457SAndroid Build Coastguard Worker         voc3 += vk21c2x3 * vi21c2;
275*4bdc9457SAndroid Build Coastguard Worker 
276*4bdc9457SAndroid Build Coastguard Worker         const float vk02c0x0 = w[76];
277*4bdc9457SAndroid Build Coastguard Worker         const float vk02c0x1 = w[77];
278*4bdc9457SAndroid Build Coastguard Worker         const float vk02c0x2 = w[78];
279*4bdc9457SAndroid Build Coastguard Worker         const float vk02c0x3 = w[79];
280*4bdc9457SAndroid Build Coastguard Worker 
281*4bdc9457SAndroid Build Coastguard Worker         const float vi02c0 = i0[3];
282*4bdc9457SAndroid Build Coastguard Worker 
283*4bdc9457SAndroid Build Coastguard Worker         voc0 += vk02c0x0 * vi02c0;
284*4bdc9457SAndroid Build Coastguard Worker         voc1 += vk02c0x1 * vi02c0;
285*4bdc9457SAndroid Build Coastguard Worker         voc2 += vk02c0x2 * vi02c0;
286*4bdc9457SAndroid Build Coastguard Worker         voc3 += vk02c0x3 * vi02c0;
287*4bdc9457SAndroid Build Coastguard Worker 
288*4bdc9457SAndroid Build Coastguard Worker         const float vk12c0x0 = w[80];
289*4bdc9457SAndroid Build Coastguard Worker         const float vk12c0x1 = w[81];
290*4bdc9457SAndroid Build Coastguard Worker         const float vk12c0x2 = w[82];
291*4bdc9457SAndroid Build Coastguard Worker         const float vk12c0x3 = w[83];
292*4bdc9457SAndroid Build Coastguard Worker 
293*4bdc9457SAndroid Build Coastguard Worker         const float vi12c0 = i1[3];
294*4bdc9457SAndroid Build Coastguard Worker 
295*4bdc9457SAndroid Build Coastguard Worker         voc0 += vk12c0x0 * vi12c0;
296*4bdc9457SAndroid Build Coastguard Worker         voc1 += vk12c0x1 * vi12c0;
297*4bdc9457SAndroid Build Coastguard Worker         voc2 += vk12c0x2 * vi12c0;
298*4bdc9457SAndroid Build Coastguard Worker         voc3 += vk12c0x3 * vi12c0;
299*4bdc9457SAndroid Build Coastguard Worker 
300*4bdc9457SAndroid Build Coastguard Worker         const float vk22c0x0 = w[84];
301*4bdc9457SAndroid Build Coastguard Worker         const float vk22c0x1 = w[85];
302*4bdc9457SAndroid Build Coastguard Worker         const float vk22c0x2 = w[86];
303*4bdc9457SAndroid Build Coastguard Worker         const float vk22c0x3 = w[87];
304*4bdc9457SAndroid Build Coastguard Worker 
305*4bdc9457SAndroid Build Coastguard Worker         const float vi22c0 = i2[3];
306*4bdc9457SAndroid Build Coastguard Worker 
307*4bdc9457SAndroid Build Coastguard Worker         voc0 += vk22c0x0 * vi22c0;
308*4bdc9457SAndroid Build Coastguard Worker         voc1 += vk22c0x1 * vi22c0;
309*4bdc9457SAndroid Build Coastguard Worker         voc2 += vk22c0x2 * vi22c0;
310*4bdc9457SAndroid Build Coastguard Worker         voc3 += vk22c0x3 * vi22c0;
311*4bdc9457SAndroid Build Coastguard Worker 
312*4bdc9457SAndroid Build Coastguard Worker         vi00c0 = vi02c0;
313*4bdc9457SAndroid Build Coastguard Worker         vi10c0 = vi12c0;
314*4bdc9457SAndroid Build Coastguard Worker         vi20c0 = vi22c0;
315*4bdc9457SAndroid Build Coastguard Worker 
316*4bdc9457SAndroid Build Coastguard Worker         const float vk02c1x0 = w[88];
317*4bdc9457SAndroid Build Coastguard Worker         const float vk02c1x1 = w[89];
318*4bdc9457SAndroid Build Coastguard Worker         const float vk02c1x2 = w[90];
319*4bdc9457SAndroid Build Coastguard Worker         const float vk02c1x3 = w[91];
320*4bdc9457SAndroid Build Coastguard Worker 
321*4bdc9457SAndroid Build Coastguard Worker         const float vi02c1 = i0[4];
322*4bdc9457SAndroid Build Coastguard Worker 
323*4bdc9457SAndroid Build Coastguard Worker         voc0 += vk02c1x0 * vi02c1;
324*4bdc9457SAndroid Build Coastguard Worker         voc1 += vk02c1x1 * vi02c1;
325*4bdc9457SAndroid Build Coastguard Worker         voc2 += vk02c1x2 * vi02c1;
326*4bdc9457SAndroid Build Coastguard Worker         voc3 += vk02c1x3 * vi02c1;
327*4bdc9457SAndroid Build Coastguard Worker 
328*4bdc9457SAndroid Build Coastguard Worker         const float vk12c1x0 = w[92];
329*4bdc9457SAndroid Build Coastguard Worker         const float vk12c1x1 = w[93];
330*4bdc9457SAndroid Build Coastguard Worker         const float vk12c1x2 = w[94];
331*4bdc9457SAndroid Build Coastguard Worker         const float vk12c1x3 = w[95];
332*4bdc9457SAndroid Build Coastguard Worker 
333*4bdc9457SAndroid Build Coastguard Worker         const float vi12c1 = i1[4];
334*4bdc9457SAndroid Build Coastguard Worker 
335*4bdc9457SAndroid Build Coastguard Worker         voc0 += vk12c1x0 * vi12c1;
336*4bdc9457SAndroid Build Coastguard Worker         voc1 += vk12c1x1 * vi12c1;
337*4bdc9457SAndroid Build Coastguard Worker         voc2 += vk12c1x2 * vi12c1;
338*4bdc9457SAndroid Build Coastguard Worker         voc3 += vk12c1x3 * vi12c1;
339*4bdc9457SAndroid Build Coastguard Worker 
340*4bdc9457SAndroid Build Coastguard Worker         const float vk22c1x0 = w[96];
341*4bdc9457SAndroid Build Coastguard Worker         const float vk22c1x1 = w[97];
342*4bdc9457SAndroid Build Coastguard Worker         const float vk22c1x2 = w[98];
343*4bdc9457SAndroid Build Coastguard Worker         const float vk22c1x3 = w[99];
344*4bdc9457SAndroid Build Coastguard Worker 
345*4bdc9457SAndroid Build Coastguard Worker         const float vi22c1 = i2[4];
346*4bdc9457SAndroid Build Coastguard Worker 
347*4bdc9457SAndroid Build Coastguard Worker         voc0 += vk22c1x0 * vi22c1;
348*4bdc9457SAndroid Build Coastguard Worker         voc1 += vk22c1x1 * vi22c1;
349*4bdc9457SAndroid Build Coastguard Worker         voc2 += vk22c1x2 * vi22c1;
350*4bdc9457SAndroid Build Coastguard Worker         voc3 += vk22c1x3 * vi22c1;
351*4bdc9457SAndroid Build Coastguard Worker 
352*4bdc9457SAndroid Build Coastguard Worker         vi00c1 = vi02c1;
353*4bdc9457SAndroid Build Coastguard Worker         vi10c1 = vi12c1;
354*4bdc9457SAndroid Build Coastguard Worker         vi20c1 = vi22c1;
355*4bdc9457SAndroid Build Coastguard Worker 
356*4bdc9457SAndroid Build Coastguard Worker         const float vk02c2x0 = w[100];
357*4bdc9457SAndroid Build Coastguard Worker         const float vk02c2x1 = w[101];
358*4bdc9457SAndroid Build Coastguard Worker         const float vk02c2x2 = w[102];
359*4bdc9457SAndroid Build Coastguard Worker         const float vk02c2x3 = w[103];
360*4bdc9457SAndroid Build Coastguard Worker 
361*4bdc9457SAndroid Build Coastguard Worker         const float vi02c2 = i0[5];
362*4bdc9457SAndroid Build Coastguard Worker 
363*4bdc9457SAndroid Build Coastguard Worker         voc0 += vk02c2x0 * vi02c2;
364*4bdc9457SAndroid Build Coastguard Worker         voc1 += vk02c2x1 * vi02c2;
365*4bdc9457SAndroid Build Coastguard Worker         voc2 += vk02c2x2 * vi02c2;
366*4bdc9457SAndroid Build Coastguard Worker         voc3 += vk02c2x3 * vi02c2;
367*4bdc9457SAndroid Build Coastguard Worker 
368*4bdc9457SAndroid Build Coastguard Worker         const float vk12c2x0 = w[104];
369*4bdc9457SAndroid Build Coastguard Worker         const float vk12c2x1 = w[105];
370*4bdc9457SAndroid Build Coastguard Worker         const float vk12c2x2 = w[106];
371*4bdc9457SAndroid Build Coastguard Worker         const float vk12c2x3 = w[107];
372*4bdc9457SAndroid Build Coastguard Worker 
373*4bdc9457SAndroid Build Coastguard Worker         const float vi12c2 = i1[5];
374*4bdc9457SAndroid Build Coastguard Worker 
375*4bdc9457SAndroid Build Coastguard Worker         voc0 += vk12c2x0 * vi12c2;
376*4bdc9457SAndroid Build Coastguard Worker         voc1 += vk12c2x1 * vi12c2;
377*4bdc9457SAndroid Build Coastguard Worker         voc2 += vk12c2x2 * vi12c2;
378*4bdc9457SAndroid Build Coastguard Worker         voc3 += vk12c2x3 * vi12c2;
379*4bdc9457SAndroid Build Coastguard Worker 
380*4bdc9457SAndroid Build Coastguard Worker         const float vk22c2x0 = w[108];
381*4bdc9457SAndroid Build Coastguard Worker         const float vk22c2x1 = w[109];
382*4bdc9457SAndroid Build Coastguard Worker         const float vk22c2x2 = w[110];
383*4bdc9457SAndroid Build Coastguard Worker         const float vk22c2x3 = w[111];
384*4bdc9457SAndroid Build Coastguard Worker 
385*4bdc9457SAndroid Build Coastguard Worker         const float vi22c2 = i2[5];
386*4bdc9457SAndroid Build Coastguard Worker 
387*4bdc9457SAndroid Build Coastguard Worker         voc0 += vk22c2x0 * vi22c2;
388*4bdc9457SAndroid Build Coastguard Worker         voc1 += vk22c2x1 * vi22c2;
389*4bdc9457SAndroid Build Coastguard Worker         voc2 += vk22c2x2 * vi22c2;
390*4bdc9457SAndroid Build Coastguard Worker         voc3 += vk22c2x3 * vi22c2;
391*4bdc9457SAndroid Build Coastguard Worker 
392*4bdc9457SAndroid Build Coastguard Worker         vi00c2 = vi02c2;
393*4bdc9457SAndroid Build Coastguard Worker         vi10c2 = vi12c2;
394*4bdc9457SAndroid Build Coastguard Worker         vi20c2 = vi22c2;
395*4bdc9457SAndroid Build Coastguard Worker 
396*4bdc9457SAndroid Build Coastguard Worker         voc0 = math_min_f32(voc0, voutput_max);
397*4bdc9457SAndroid Build Coastguard Worker         voc1 = math_min_f32(voc1, voutput_max);
398*4bdc9457SAndroid Build Coastguard Worker         voc2 = math_min_f32(voc2, voutput_max);
399*4bdc9457SAndroid Build Coastguard Worker         voc3 = math_min_f32(voc3, voutput_max);
400*4bdc9457SAndroid Build Coastguard Worker 
401*4bdc9457SAndroid Build Coastguard Worker         voc0 = math_max_f32(voc0, voutput_min);
402*4bdc9457SAndroid Build Coastguard Worker         voc1 = math_max_f32(voc1, voutput_min);
403*4bdc9457SAndroid Build Coastguard Worker         voc2 = math_max_f32(voc2, voutput_min);
404*4bdc9457SAndroid Build Coastguard Worker         voc3 = math_max_f32(voc3, voutput_min);
405*4bdc9457SAndroid Build Coastguard Worker 
406*4bdc9457SAndroid Build Coastguard Worker         if XNN_LIKELY(c >= 4) {
407*4bdc9457SAndroid Build Coastguard Worker           o0[0] = voc0;
408*4bdc9457SAndroid Build Coastguard Worker           o0[1] = voc1;
409*4bdc9457SAndroid Build Coastguard Worker           o0[2] = voc2;
410*4bdc9457SAndroid Build Coastguard Worker           o0[3] = voc3;
411*4bdc9457SAndroid Build Coastguard Worker           o0 = (float*) ((uintptr_t) o0 + output_width_stride);
412*4bdc9457SAndroid Build Coastguard Worker         } else {
413*4bdc9457SAndroid Build Coastguard Worker           float* o0_tmp = o0;
414*4bdc9457SAndroid Build Coastguard Worker           if (c & 2) {
415*4bdc9457SAndroid Build Coastguard Worker             o0_tmp[0] = voc0;
416*4bdc9457SAndroid Build Coastguard Worker             o0_tmp[1] = voc1;
417*4bdc9457SAndroid Build Coastguard Worker             o0_tmp += 2;
418*4bdc9457SAndroid Build Coastguard Worker             voc0 = voc2;
419*4bdc9457SAndroid Build Coastguard Worker           }
420*4bdc9457SAndroid Build Coastguard Worker           if (c & 1) {
421*4bdc9457SAndroid Build Coastguard Worker             *o0_tmp++ = voc0;
422*4bdc9457SAndroid Build Coastguard Worker           }
423*4bdc9457SAndroid Build Coastguard Worker           o0 = (float*) ((uintptr_t) o0 + output_width_stride);
424*4bdc9457SAndroid Build Coastguard Worker         }
425*4bdc9457SAndroid Build Coastguard Worker 
426*4bdc9457SAndroid Build Coastguard Worker         i0 += 6;
427*4bdc9457SAndroid Build Coastguard Worker         i1 += 6;
428*4bdc9457SAndroid Build Coastguard Worker         i2 += 6;
429*4bdc9457SAndroid Build Coastguard Worker       }
430*4bdc9457SAndroid Build Coastguard Worker       assert(iw < 2);
431*4bdc9457SAndroid Build Coastguard Worker       if XNN_UNLIKELY(iw != 0) {
432*4bdc9457SAndroid Build Coastguard Worker         float voc0 = w[0];
433*4bdc9457SAndroid Build Coastguard Worker         float voc1 = w[1];
434*4bdc9457SAndroid Build Coastguard Worker         float voc2 = w[2];
435*4bdc9457SAndroid Build Coastguard Worker         float voc3 = w[3];
436*4bdc9457SAndroid Build Coastguard Worker 
437*4bdc9457SAndroid Build Coastguard Worker         const float vk00c0x0 = w[4];
438*4bdc9457SAndroid Build Coastguard Worker         const float vk00c0x1 = w[5];
439*4bdc9457SAndroid Build Coastguard Worker         const float vk00c0x2 = w[6];
440*4bdc9457SAndroid Build Coastguard Worker         const float vk00c0x3 = w[7];
441*4bdc9457SAndroid Build Coastguard Worker 
442*4bdc9457SAndroid Build Coastguard Worker         voc0 += vk00c0x0 * vi00c0;
443*4bdc9457SAndroid Build Coastguard Worker         voc1 += vk00c0x1 * vi00c0;
444*4bdc9457SAndroid Build Coastguard Worker         voc2 += vk00c0x2 * vi00c0;
445*4bdc9457SAndroid Build Coastguard Worker         voc3 += vk00c0x3 * vi00c0;
446*4bdc9457SAndroid Build Coastguard Worker 
447*4bdc9457SAndroid Build Coastguard Worker         const float vk10c0x0 = w[8];
448*4bdc9457SAndroid Build Coastguard Worker         const float vk10c0x1 = w[9];
449*4bdc9457SAndroid Build Coastguard Worker         const float vk10c0x2 = w[10];
450*4bdc9457SAndroid Build Coastguard Worker         const float vk10c0x3 = w[11];
451*4bdc9457SAndroid Build Coastguard Worker 
452*4bdc9457SAndroid Build Coastguard Worker         voc0 += vk10c0x0 * vi10c0;
453*4bdc9457SAndroid Build Coastguard Worker         voc1 += vk10c0x1 * vi10c0;
454*4bdc9457SAndroid Build Coastguard Worker         voc2 += vk10c0x2 * vi10c0;
455*4bdc9457SAndroid Build Coastguard Worker         voc3 += vk10c0x3 * vi10c0;
456*4bdc9457SAndroid Build Coastguard Worker 
457*4bdc9457SAndroid Build Coastguard Worker         const float vk20c0x0 = w[12];
458*4bdc9457SAndroid Build Coastguard Worker         const float vk20c0x1 = w[13];
459*4bdc9457SAndroid Build Coastguard Worker         const float vk20c0x2 = w[14];
460*4bdc9457SAndroid Build Coastguard Worker         const float vk20c0x3 = w[15];
461*4bdc9457SAndroid Build Coastguard Worker 
462*4bdc9457SAndroid Build Coastguard Worker         voc0 += vk20c0x0 * vi20c0;
463*4bdc9457SAndroid Build Coastguard Worker         voc1 += vk20c0x1 * vi20c0;
464*4bdc9457SAndroid Build Coastguard Worker         voc2 += vk20c0x2 * vi20c0;
465*4bdc9457SAndroid Build Coastguard Worker         voc3 += vk20c0x3 * vi20c0;
466*4bdc9457SAndroid Build Coastguard Worker 
467*4bdc9457SAndroid Build Coastguard Worker         const float vk00c1x0 = w[16];
468*4bdc9457SAndroid Build Coastguard Worker         const float vk00c1x1 = w[17];
469*4bdc9457SAndroid Build Coastguard Worker         const float vk00c1x2 = w[18];
470*4bdc9457SAndroid Build Coastguard Worker         const float vk00c1x3 = w[19];
471*4bdc9457SAndroid Build Coastguard Worker 
472*4bdc9457SAndroid Build Coastguard Worker         voc0 += vk00c1x0 * vi00c1;
473*4bdc9457SAndroid Build Coastguard Worker         voc1 += vk00c1x1 * vi00c1;
474*4bdc9457SAndroid Build Coastguard Worker         voc2 += vk00c1x2 * vi00c1;
475*4bdc9457SAndroid Build Coastguard Worker         voc3 += vk00c1x3 * vi00c1;
476*4bdc9457SAndroid Build Coastguard Worker 
477*4bdc9457SAndroid Build Coastguard Worker         const float vk10c1x0 = w[20];
478*4bdc9457SAndroid Build Coastguard Worker         const float vk10c1x1 = w[21];
479*4bdc9457SAndroid Build Coastguard Worker         const float vk10c1x2 = w[22];
480*4bdc9457SAndroid Build Coastguard Worker         const float vk10c1x3 = w[23];
481*4bdc9457SAndroid Build Coastguard Worker 
482*4bdc9457SAndroid Build Coastguard Worker         voc0 += vk10c1x0 * vi10c1;
483*4bdc9457SAndroid Build Coastguard Worker         voc1 += vk10c1x1 * vi10c1;
484*4bdc9457SAndroid Build Coastguard Worker         voc2 += vk10c1x2 * vi10c1;
485*4bdc9457SAndroid Build Coastguard Worker         voc3 += vk10c1x3 * vi10c1;
486*4bdc9457SAndroid Build Coastguard Worker 
487*4bdc9457SAndroid Build Coastguard Worker         const float vk20c1x0 = w[24];
488*4bdc9457SAndroid Build Coastguard Worker         const float vk20c1x1 = w[25];
489*4bdc9457SAndroid Build Coastguard Worker         const float vk20c1x2 = w[26];
490*4bdc9457SAndroid Build Coastguard Worker         const float vk20c1x3 = w[27];
491*4bdc9457SAndroid Build Coastguard Worker 
492*4bdc9457SAndroid Build Coastguard Worker         voc0 += vk20c1x0 * vi20c1;
493*4bdc9457SAndroid Build Coastguard Worker         voc1 += vk20c1x1 * vi20c1;
494*4bdc9457SAndroid Build Coastguard Worker         voc2 += vk20c1x2 * vi20c1;
495*4bdc9457SAndroid Build Coastguard Worker         voc3 += vk20c1x3 * vi20c1;
496*4bdc9457SAndroid Build Coastguard Worker 
497*4bdc9457SAndroid Build Coastguard Worker         const float vk00c2x0 = w[28];
498*4bdc9457SAndroid Build Coastguard Worker         const float vk00c2x1 = w[29];
499*4bdc9457SAndroid Build Coastguard Worker         const float vk00c2x2 = w[30];
500*4bdc9457SAndroid Build Coastguard Worker         const float vk00c2x3 = w[31];
501*4bdc9457SAndroid Build Coastguard Worker 
502*4bdc9457SAndroid Build Coastguard Worker         voc0 += vk00c2x0 * vi00c2;
503*4bdc9457SAndroid Build Coastguard Worker         voc1 += vk00c2x1 * vi00c2;
504*4bdc9457SAndroid Build Coastguard Worker         voc2 += vk00c2x2 * vi00c2;
505*4bdc9457SAndroid Build Coastguard Worker         voc3 += vk00c2x3 * vi00c2;
506*4bdc9457SAndroid Build Coastguard Worker 
507*4bdc9457SAndroid Build Coastguard Worker         const float vk10c2x0 = w[32];
508*4bdc9457SAndroid Build Coastguard Worker         const float vk10c2x1 = w[33];
509*4bdc9457SAndroid Build Coastguard Worker         const float vk10c2x2 = w[34];
510*4bdc9457SAndroid Build Coastguard Worker         const float vk10c2x3 = w[35];
511*4bdc9457SAndroid Build Coastguard Worker 
512*4bdc9457SAndroid Build Coastguard Worker         voc0 += vk10c2x0 * vi10c2;
513*4bdc9457SAndroid Build Coastguard Worker         voc1 += vk10c2x1 * vi10c2;
514*4bdc9457SAndroid Build Coastguard Worker         voc2 += vk10c2x2 * vi10c2;
515*4bdc9457SAndroid Build Coastguard Worker         voc3 += vk10c2x3 * vi10c2;
516*4bdc9457SAndroid Build Coastguard Worker 
517*4bdc9457SAndroid Build Coastguard Worker         const float vk20c2x0 = w[36];
518*4bdc9457SAndroid Build Coastguard Worker         const float vk20c2x1 = w[37];
519*4bdc9457SAndroid Build Coastguard Worker         const float vk20c2x2 = w[38];
520*4bdc9457SAndroid Build Coastguard Worker         const float vk20c2x3 = w[39];
521*4bdc9457SAndroid Build Coastguard Worker 
522*4bdc9457SAndroid Build Coastguard Worker         voc0 += vk20c2x0 * vi20c2;
523*4bdc9457SAndroid Build Coastguard Worker         voc1 += vk20c2x1 * vi20c2;
524*4bdc9457SAndroid Build Coastguard Worker         voc2 += vk20c2x2 * vi20c2;
525*4bdc9457SAndroid Build Coastguard Worker         voc3 += vk20c2x3 * vi20c2;
526*4bdc9457SAndroid Build Coastguard Worker 
527*4bdc9457SAndroid Build Coastguard Worker         const float vk01c0x0 = w[40];
528*4bdc9457SAndroid Build Coastguard Worker         const float vk01c0x1 = w[41];
529*4bdc9457SAndroid Build Coastguard Worker         const float vk01c0x2 = w[42];
530*4bdc9457SAndroid Build Coastguard Worker         const float vk01c0x3 = w[43];
531*4bdc9457SAndroid Build Coastguard Worker 
532*4bdc9457SAndroid Build Coastguard Worker         const float vi01c0 = i0[0];
533*4bdc9457SAndroid Build Coastguard Worker 
534*4bdc9457SAndroid Build Coastguard Worker         voc0 += vk01c0x0 * vi01c0;
535*4bdc9457SAndroid Build Coastguard Worker         voc1 += vk01c0x1 * vi01c0;
536*4bdc9457SAndroid Build Coastguard Worker         voc2 += vk01c0x2 * vi01c0;
537*4bdc9457SAndroid Build Coastguard Worker         voc3 += vk01c0x3 * vi01c0;
538*4bdc9457SAndroid Build Coastguard Worker 
539*4bdc9457SAndroid Build Coastguard Worker         const float vk11c0x0 = w[44];
540*4bdc9457SAndroid Build Coastguard Worker         const float vk11c0x1 = w[45];
541*4bdc9457SAndroid Build Coastguard Worker         const float vk11c0x2 = w[46];
542*4bdc9457SAndroid Build Coastguard Worker         const float vk11c0x3 = w[47];
543*4bdc9457SAndroid Build Coastguard Worker 
544*4bdc9457SAndroid Build Coastguard Worker         const float vi11c0 = i1[0];
545*4bdc9457SAndroid Build Coastguard Worker 
546*4bdc9457SAndroid Build Coastguard Worker         voc0 += vk11c0x0 * vi11c0;
547*4bdc9457SAndroid Build Coastguard Worker         voc1 += vk11c0x1 * vi11c0;
548*4bdc9457SAndroid Build Coastguard Worker         voc2 += vk11c0x2 * vi11c0;
549*4bdc9457SAndroid Build Coastguard Worker         voc3 += vk11c0x3 * vi11c0;
550*4bdc9457SAndroid Build Coastguard Worker 
551*4bdc9457SAndroid Build Coastguard Worker         const float vk21c0x0 = w[48];
552*4bdc9457SAndroid Build Coastguard Worker         const float vk21c0x1 = w[49];
553*4bdc9457SAndroid Build Coastguard Worker         const float vk21c0x2 = w[50];
554*4bdc9457SAndroid Build Coastguard Worker         const float vk21c0x3 = w[51];
555*4bdc9457SAndroid Build Coastguard Worker 
556*4bdc9457SAndroid Build Coastguard Worker         const float vi21c0 = i2[0];
557*4bdc9457SAndroid Build Coastguard Worker 
558*4bdc9457SAndroid Build Coastguard Worker         voc0 += vk21c0x0 * vi21c0;
559*4bdc9457SAndroid Build Coastguard Worker         voc1 += vk21c0x1 * vi21c0;
560*4bdc9457SAndroid Build Coastguard Worker         voc2 += vk21c0x2 * vi21c0;
561*4bdc9457SAndroid Build Coastguard Worker         voc3 += vk21c0x3 * vi21c0;
562*4bdc9457SAndroid Build Coastguard Worker 
563*4bdc9457SAndroid Build Coastguard Worker         const float vk01c1x0 = w[52];
564*4bdc9457SAndroid Build Coastguard Worker         const float vk01c1x1 = w[53];
565*4bdc9457SAndroid Build Coastguard Worker         const float vk01c1x2 = w[54];
566*4bdc9457SAndroid Build Coastguard Worker         const float vk01c1x3 = w[55];
567*4bdc9457SAndroid Build Coastguard Worker 
568*4bdc9457SAndroid Build Coastguard Worker         const float vi01c1 = i0[1];
569*4bdc9457SAndroid Build Coastguard Worker 
570*4bdc9457SAndroid Build Coastguard Worker         voc0 += vk01c1x0 * vi01c1;
571*4bdc9457SAndroid Build Coastguard Worker         voc1 += vk01c1x1 * vi01c1;
572*4bdc9457SAndroid Build Coastguard Worker         voc2 += vk01c1x2 * vi01c1;
573*4bdc9457SAndroid Build Coastguard Worker         voc3 += vk01c1x3 * vi01c1;
574*4bdc9457SAndroid Build Coastguard Worker 
575*4bdc9457SAndroid Build Coastguard Worker         const float vk11c1x0 = w[56];
576*4bdc9457SAndroid Build Coastguard Worker         const float vk11c1x1 = w[57];
577*4bdc9457SAndroid Build Coastguard Worker         const float vk11c1x2 = w[58];
578*4bdc9457SAndroid Build Coastguard Worker         const float vk11c1x3 = w[59];
579*4bdc9457SAndroid Build Coastguard Worker 
580*4bdc9457SAndroid Build Coastguard Worker         const float vi11c1 = i1[1];
581*4bdc9457SAndroid Build Coastguard Worker 
582*4bdc9457SAndroid Build Coastguard Worker         voc0 += vk11c1x0 * vi11c1;
583*4bdc9457SAndroid Build Coastguard Worker         voc1 += vk11c1x1 * vi11c1;
584*4bdc9457SAndroid Build Coastguard Worker         voc2 += vk11c1x2 * vi11c1;
585*4bdc9457SAndroid Build Coastguard Worker         voc3 += vk11c1x3 * vi11c1;
586*4bdc9457SAndroid Build Coastguard Worker 
587*4bdc9457SAndroid Build Coastguard Worker         const float vk21c1x0 = w[60];
588*4bdc9457SAndroid Build Coastguard Worker         const float vk21c1x1 = w[61];
589*4bdc9457SAndroid Build Coastguard Worker         const float vk21c1x2 = w[62];
590*4bdc9457SAndroid Build Coastguard Worker         const float vk21c1x3 = w[63];
591*4bdc9457SAndroid Build Coastguard Worker 
592*4bdc9457SAndroid Build Coastguard Worker         const float vi21c1 = i2[1];
593*4bdc9457SAndroid Build Coastguard Worker 
594*4bdc9457SAndroid Build Coastguard Worker         voc0 += vk21c1x0 * vi21c1;
595*4bdc9457SAndroid Build Coastguard Worker         voc1 += vk21c1x1 * vi21c1;
596*4bdc9457SAndroid Build Coastguard Worker         voc2 += vk21c1x2 * vi21c1;
597*4bdc9457SAndroid Build Coastguard Worker         voc3 += vk21c1x3 * vi21c1;
598*4bdc9457SAndroid Build Coastguard Worker 
599*4bdc9457SAndroid Build Coastguard Worker         const float vk01c2x0 = w[64];
600*4bdc9457SAndroid Build Coastguard Worker         const float vk01c2x1 = w[65];
601*4bdc9457SAndroid Build Coastguard Worker         const float vk01c2x2 = w[66];
602*4bdc9457SAndroid Build Coastguard Worker         const float vk01c2x3 = w[67];
603*4bdc9457SAndroid Build Coastguard Worker 
604*4bdc9457SAndroid Build Coastguard Worker         const float vi01c2 = i0[2];
605*4bdc9457SAndroid Build Coastguard Worker 
606*4bdc9457SAndroid Build Coastguard Worker         voc0 += vk01c2x0 * vi01c2;
607*4bdc9457SAndroid Build Coastguard Worker         voc1 += vk01c2x1 * vi01c2;
608*4bdc9457SAndroid Build Coastguard Worker         voc2 += vk01c2x2 * vi01c2;
609*4bdc9457SAndroid Build Coastguard Worker         voc3 += vk01c2x3 * vi01c2;
610*4bdc9457SAndroid Build Coastguard Worker 
611*4bdc9457SAndroid Build Coastguard Worker         const float vk11c2x0 = w[68];
612*4bdc9457SAndroid Build Coastguard Worker         const float vk11c2x1 = w[69];
613*4bdc9457SAndroid Build Coastguard Worker         const float vk11c2x2 = w[70];
614*4bdc9457SAndroid Build Coastguard Worker         const float vk11c2x3 = w[71];
615*4bdc9457SAndroid Build Coastguard Worker 
616*4bdc9457SAndroid Build Coastguard Worker         const float vi11c2 = i1[2];
617*4bdc9457SAndroid Build Coastguard Worker 
618*4bdc9457SAndroid Build Coastguard Worker         voc0 += vk11c2x0 * vi11c2;
619*4bdc9457SAndroid Build Coastguard Worker         voc1 += vk11c2x1 * vi11c2;
620*4bdc9457SAndroid Build Coastguard Worker         voc2 += vk11c2x2 * vi11c2;
621*4bdc9457SAndroid Build Coastguard Worker         voc3 += vk11c2x3 * vi11c2;
622*4bdc9457SAndroid Build Coastguard Worker 
623*4bdc9457SAndroid Build Coastguard Worker         const float vk21c2x0 = w[72];
624*4bdc9457SAndroid Build Coastguard Worker         const float vk21c2x1 = w[73];
625*4bdc9457SAndroid Build Coastguard Worker         const float vk21c2x2 = w[74];
626*4bdc9457SAndroid Build Coastguard Worker         const float vk21c2x3 = w[75];
627*4bdc9457SAndroid Build Coastguard Worker 
628*4bdc9457SAndroid Build Coastguard Worker         const float vi21c2 = i2[2];
629*4bdc9457SAndroid Build Coastguard Worker 
630*4bdc9457SAndroid Build Coastguard Worker         voc0 += vk21c2x0 * vi21c2;
631*4bdc9457SAndroid Build Coastguard Worker         voc1 += vk21c2x1 * vi21c2;
632*4bdc9457SAndroid Build Coastguard Worker         voc2 += vk21c2x2 * vi21c2;
633*4bdc9457SAndroid Build Coastguard Worker         voc3 += vk21c2x3 * vi21c2;
634*4bdc9457SAndroid Build Coastguard Worker 
635*4bdc9457SAndroid Build Coastguard Worker         voc0 = math_min_f32(voc0, voutput_max);
636*4bdc9457SAndroid Build Coastguard Worker         voc1 = math_min_f32(voc1, voutput_max);
637*4bdc9457SAndroid Build Coastguard Worker         voc2 = math_min_f32(voc2, voutput_max);
638*4bdc9457SAndroid Build Coastguard Worker         voc3 = math_min_f32(voc3, voutput_max);
639*4bdc9457SAndroid Build Coastguard Worker 
640*4bdc9457SAndroid Build Coastguard Worker         voc0 = math_max_f32(voc0, voutput_min);
641*4bdc9457SAndroid Build Coastguard Worker         voc1 = math_max_f32(voc1, voutput_min);
642*4bdc9457SAndroid Build Coastguard Worker         voc2 = math_max_f32(voc2, voutput_min);
643*4bdc9457SAndroid Build Coastguard Worker         voc3 = math_max_f32(voc3, voutput_min);
644*4bdc9457SAndroid Build Coastguard Worker 
645*4bdc9457SAndroid Build Coastguard Worker         if XNN_LIKELY(c >= 4) {
646*4bdc9457SAndroid Build Coastguard Worker           o0[0] = voc0;
647*4bdc9457SAndroid Build Coastguard Worker           o0[1] = voc1;
648*4bdc9457SAndroid Build Coastguard Worker           o0[2] = voc2;
649*4bdc9457SAndroid Build Coastguard Worker           o0[3] = voc3;
650*4bdc9457SAndroid Build Coastguard Worker           o0 = (float*) ((uintptr_t) o0 + output_width_stride);
651*4bdc9457SAndroid Build Coastguard Worker         } else {
652*4bdc9457SAndroid Build Coastguard Worker           float* o0_tmp = o0;
653*4bdc9457SAndroid Build Coastguard Worker           if (c & 2) {
654*4bdc9457SAndroid Build Coastguard Worker             o0_tmp[0] = voc0;
655*4bdc9457SAndroid Build Coastguard Worker             o0_tmp[1] = voc1;
656*4bdc9457SAndroid Build Coastguard Worker             o0_tmp += 2;
657*4bdc9457SAndroid Build Coastguard Worker             voc0 = voc2;
658*4bdc9457SAndroid Build Coastguard Worker           }
659*4bdc9457SAndroid Build Coastguard Worker           if (c & 1) {
660*4bdc9457SAndroid Build Coastguard Worker             *o0_tmp++ = voc0;
661*4bdc9457SAndroid Build Coastguard Worker           }
662*4bdc9457SAndroid Build Coastguard Worker           o0 = (float*) ((uintptr_t) o0 + output_width_stride);
663*4bdc9457SAndroid Build Coastguard Worker         }
664*4bdc9457SAndroid Build Coastguard Worker       }
665*4bdc9457SAndroid Build Coastguard Worker       // Move output pointers back to the position of the first pixel in a row,
666*4bdc9457SAndroid Build Coastguard Worker       // and forward to the next block of output channels
667*4bdc9457SAndroid Build Coastguard Worker       o0 = (float*) ((uintptr_t) o0 - output_channel_decrement);
668*4bdc9457SAndroid Build Coastguard Worker       // Revert input pointers to the position of the first pixel in a row
669*4bdc9457SAndroid Build Coastguard Worker       i0 = (const float*) ((uintptr_t) i0 - input_width_decrement);
670*4bdc9457SAndroid Build Coastguard Worker       i1 = (const float*) ((uintptr_t) i1 - input_width_decrement);
671*4bdc9457SAndroid Build Coastguard Worker       i2 = (const float*) ((uintptr_t) i2 - input_width_decrement);
672*4bdc9457SAndroid Build Coastguard Worker       // Move to the block of weights for the next 4 output channels
673*4bdc9457SAndroid Build Coastguard Worker       w += 112;
674*4bdc9457SAndroid Build Coastguard Worker       c = doz(c, 4);
675*4bdc9457SAndroid Build Coastguard Worker     } while (c != 0);
676*4bdc9457SAndroid Build Coastguard Worker     // Move output pointers back to the position of the first channel, and forward to the next block of rows
677*4bdc9457SAndroid Build Coastguard Worker     o0 = (float*) ((uintptr_t) o0 + output_height_increment);
678*4bdc9457SAndroid Build Coastguard Worker     // Move input pointers forward to the next row
679*4bdc9457SAndroid Build Coastguard Worker     i0 = i2;
680*4bdc9457SAndroid Build Coastguard Worker     i1 = (const float*) ((uintptr_t) i0 + input_height_stride);
681*4bdc9457SAndroid Build Coastguard Worker     i2 = (const float*) ((uintptr_t) i1 + input_height_stride);
682*4bdc9457SAndroid Build Coastguard Worker   }
683*4bdc9457SAndroid Build Coastguard Worker }
684