1 // Auto-generated file. Do not edit!
2 // Template: src/qs8-gavgpool/multipass-scalar.c.in
3 // Generator: tools/xngen
4 //
5 // Copyright 2021 Google LLC
6 //
7 // This source code is licensed under the BSD-style license found in the
8 // LICENSE file in the root directory of this source tree.
9
10 #include <assert.h>
11
12 #include <xnnpack/gavgpool.h>
13 #include <xnnpack/math.h>
14
15
xnn_qu8_gavgpool_minmax_fp32_ukernel_7p7x__scalar_imagic_c1(size_t rows,size_t channels,const uint8_t * input,size_t input_stride,const uint8_t * zero,int32_t * buffer,uint8_t * output,const union xnn_qu8_avgpool_minmax_params params[restrict XNN_MIN_ELEMENTS (1)])16 void xnn_qu8_gavgpool_minmax_fp32_ukernel_7p7x__scalar_imagic_c1(
17 size_t rows,
18 size_t channels,
19 const uint8_t* input,
20 size_t input_stride,
21 const uint8_t* zero,
22 int32_t* buffer,
23 uint8_t* output,
24 const union xnn_qu8_avgpool_minmax_params params[restrict XNN_MIN_ELEMENTS(1)])
25 {
26 assert(rows > 7);
27 assert(channels != 0);
28
29 const uint8_t* i0 = input;
30 const uint8_t* i1 = (const uint8_t*) ((uintptr_t) i0 + input_stride);
31 const uint8_t* i2 = (const uint8_t*) ((uintptr_t) i1 + input_stride);
32 const uint8_t* i3 = (const uint8_t*) ((uintptr_t) i2 + input_stride);
33 const uint8_t* i4 = (const uint8_t*) ((uintptr_t) i3 + input_stride);
34 const uint8_t* i5 = (const uint8_t*) ((uintptr_t) i4 + input_stride);
35 const uint8_t* i6 = (const uint8_t*) ((uintptr_t) i5 + input_stride);
36 const size_t input_increment = 7 * input_stride - round_up_po2(channels, 1) * sizeof(uint8_t);
37
38 const int32_t vinit_bias = params->fp32_scalar_imagic.init_bias;
39 int32_t* b = buffer;
40 size_t c = channels;
41 do {
42 int32_t vacc = vinit_bias;
43 const int32_t vi0 = (int32_t) *i0++;
44 const int32_t vi1 = (int32_t) *i1++;
45
46 vacc += vi0;
47 const int32_t vi2 = (int32_t) *i2++;
48 vacc += vi1;
49 const int32_t vi3 = (int32_t) *i3++;
50 vacc += vi2;
51 const int32_t vi4 = (int32_t) *i4++;
52 vacc += vi3;
53 const int32_t vi5 = (int32_t) *i5++;
54 vacc += vi4;
55 const int32_t vi6 = (int32_t) *i6++;
56
57 vacc += vi5;
58 vacc += vi6;
59
60 *b++ = vacc;
61 } while (--c != 0);
62
63 for (rows -= 7; rows > 7; rows -= 7) {
64 i0 = (const uint8_t*) ((uintptr_t) i0 + input_increment);
65 i1 = (const uint8_t*) ((uintptr_t) i1 + input_increment);
66 i2 = (const uint8_t*) ((uintptr_t) i2 + input_increment);
67 i3 = (const uint8_t*) ((uintptr_t) i3 + input_increment);
68 i4 = (const uint8_t*) ((uintptr_t) i4 + input_increment);
69 i5 = (const uint8_t*) ((uintptr_t) i5 + input_increment);
70 i6 = (const uint8_t*) ((uintptr_t) i6 + input_increment);
71
72 int32_t* b = buffer;
73 size_t c = channels;
74 do {
75 int32_t vacc = *b;
76 const int32_t vi0 = (int32_t) *i0++;
77 const int32_t vi1 = (int32_t) *i1++;
78
79 vacc += vi0;
80 const int32_t vi2 = (int32_t) *i2++;
81 vacc += vi1;
82 const int32_t vi3 = (int32_t) *i3++;
83 vacc += vi2;
84 const int32_t vi4 = (int32_t) *i4++;
85 vacc += vi3;
86 const int32_t vi5 = (int32_t) *i5++;
87 vacc += vi4;
88 const int32_t vi6 = (int32_t) *i6++;
89
90 vacc += vi5;
91 vacc += vi6;
92
93 *b++ = vacc;
94 } while (--c != 0);
95 }
96
97 i0 = (const uint8_t*) ((uintptr_t) i0 + input_increment);
98 i1 = (const uint8_t*) ((uintptr_t) i1 + input_increment);
99 if XNN_UNPREDICTABLE(rows < 2) {
100 i1 = zero;
101 }
102 i2 = (const uint8_t*) ((uintptr_t) i2 + input_increment);
103 if XNN_UNPREDICTABLE(rows <= 2) {
104 i2 = zero;
105 }
106 i3 = (const uint8_t*) ((uintptr_t) i3 + input_increment);
107 if XNN_UNPREDICTABLE(rows < 4) {
108 i3 = zero;
109 }
110 i4 = (const uint8_t*) ((uintptr_t) i4 + input_increment);
111 if XNN_UNPREDICTABLE(rows <= 4) {
112 i4 = zero;
113 }
114 i5 = (const uint8_t*) ((uintptr_t) i5 + input_increment);
115 if XNN_UNPREDICTABLE(rows < 6) {
116 i5 = zero;
117 }
118 i6 = (const uint8_t*) ((uintptr_t) i6 + input_increment);
119 if XNN_UNPREDICTABLE(rows <= 6) {
120 i6 = zero;
121 }
122
123 const float vscale = params->fp32_scalar_imagic.scale;
124 const float vmagic_bias = params->fp32_scalar_imagic.magic_bias;
125 const int32_t vmagic_min = params->fp32_scalar_imagic.magic_min;
126 const int32_t vmagic_max = params->fp32_scalar_imagic.magic_max;
127 const int32_t vmagic_bias_less_zero_point = params->fp32_scalar_imagic.magic_bias_less_zero_point;
128 do {
129 int32_t vacc = *buffer++;
130 const int32_t vi0 = (int32_t) *i0++;
131 const int32_t vi1 = (int32_t) *i1++;
132
133 vacc += vi0;
134 const int32_t vi2 = (int32_t) *i2++;
135 vacc += vi1;
136 const int32_t vi3 = (int32_t) *i3++;
137 vacc += vi2;
138 const int32_t vi4 = (int32_t) *i4++;
139 vacc += vi3;
140 const int32_t vi5 = (int32_t) *i5++;
141 vacc += vi4;
142 const int32_t vi6 = (int32_t) *i6++;
143
144 vacc += vi5;
145 vacc += vi6;
146
147 float vfpacc = (float) vacc * vscale;
148 vfpacc += vmagic_bias;
149 int32_t vout = (int32_t) float_as_uint32(vfpacc);
150 vout = math_max_s32(vout, vmagic_min);
151 vout = math_min_s32(vout, vmagic_max);
152 vout -= vmagic_bias_less_zero_point;
153
154 *output++ = (uint8_t) vout;
155 } while (--channels != 0);
156 }
157