1 // Copyright 2021 Google LLC
2 //
3 // This source code is licensed under the BSD-style license found in the
4 // LICENSE file in the root directory of this source tree.
5
6 #include <assert.h>
7
8 #include <xnnpack/math.h>
9 #include <xnnpack/maxpool.h>
10
11
xnn_s8_maxpool_minmax_ukernel_9p8x__scalar_c1(size_t output_pixels,size_t kernel_elements,size_t channels,const int8_t ** input,size_t input_offset,int8_t * output,size_t input_increment,size_t output_increment,const union xnn_s8_minmax_params params[restrict XNN_MIN_ELEMENTS (1)])12 void xnn_s8_maxpool_minmax_ukernel_9p8x__scalar_c1(
13 size_t output_pixels,
14 size_t kernel_elements,
15 size_t channels,
16 const int8_t** input,
17 size_t input_offset,
18 int8_t* output,
19 size_t input_increment,
20 size_t output_increment,
21 const union xnn_s8_minmax_params params[restrict XNN_MIN_ELEMENTS(1)])
22 {
23 assert(output_pixels != 0);
24 assert(kernel_elements != 0);
25 assert(channels != 0);
26
27 const int32_t voutput_max = params->scalar.max;
28 const int32_t voutput_min = params->scalar.min;
29 do {
30 int8_t* o = output;
31 {
32 const int8_t* i0 = *input++;
33 const int8_t* i1 = *input++;
34 const int8_t* i2 = *input++;
35 const int8_t* i3 = *input++;
36 const int8_t* i4 = *input++;
37 const int8_t* i5 = *input++;
38 const int8_t* i6 = *input++;
39 const int8_t* i7 = *input++;
40 const int8_t* i8 = *input++;
41 i0 = (const int8_t*) ((uintptr_t) i0 + input_offset);
42 i1 = (const int8_t*) ((uintptr_t) i1 + input_offset);
43 i2 = (const int8_t*) ((uintptr_t) i2 + input_offset);
44 i3 = (const int8_t*) ((uintptr_t) i3 + input_offset);
45 i4 = (const int8_t*) ((uintptr_t) i4 + input_offset);
46 i5 = (const int8_t*) ((uintptr_t) i5 + input_offset);
47 i6 = (const int8_t*) ((uintptr_t) i6 + input_offset);
48 i7 = (const int8_t*) ((uintptr_t) i7 + input_offset);
49 i8 = (const int8_t*) ((uintptr_t) i8 + input_offset);
50 if (kernel_elements < 2) {
51 i1 = i0;
52 }
53 if (kernel_elements <= 2) {
54 i2 = i0;
55 }
56 if (kernel_elements < 4) {
57 i3 = i0;
58 }
59 if (kernel_elements <= 4) {
60 i4 = i0;
61 }
62 if (kernel_elements < 6) {
63 i5 = i0;
64 }
65 if (kernel_elements <= 6) {
66 i6 = i0;
67 }
68 if (kernel_elements < 8) {
69 i7 = i0;
70 }
71 if (kernel_elements <= 8) {
72 i8 = i0;
73 }
74
75 size_t c = channels;
76 do {
77 const int32_t vi0 = (int32_t) *i0++;
78 const int32_t vi1 = (int32_t) *i1++;
79 const int32_t vi2 = (int32_t) *i2++;
80 const int32_t vi3 = (int32_t) *i3++;
81 const int32_t vi4 = (int32_t) *i4++;
82 const int32_t vi5 = (int32_t) *i5++;
83 const int32_t vi6 = (int32_t) *i6++;
84 const int32_t vi7 = (int32_t) *i7++;
85 const int32_t vi8 = (int32_t) *i8++;
86
87 const int32_t vmax01 = math_max_s32(vi0, vi1);
88 const int32_t vmax23 = math_max_s32(vi2, vi3);
89 const int32_t vmax45 = math_max_s32(vi4, vi5);
90 const int32_t vmax67 = math_max_s32(vi6, vi7);
91 const int32_t vmax018 = math_max_s32(vmax01, vi8);
92
93 const int32_t vmax2345 = math_max_s32(vmax23, vmax45);
94 const int32_t vmax01678 = math_max_s32(vmax018, vmax67);
95
96 int32_t vout = math_max_s32(vmax2345, vmax01678);
97 vout = math_min_s32(vout, voutput_max);
98 vout = math_max_s32(vout, voutput_min);
99
100 *o++ = (int8_t) vout;
101 } while (--c != 0);
102 }
103
104 for (ptrdiff_t k = (ptrdiff_t) kernel_elements - 9; k > 0; k -= 8) {
105 const int8_t* i0 = *input++;
106 const int8_t* i1 = *input++;
107 const int8_t* i2 = *input++;
108 const int8_t* i3 = *input++;
109 const int8_t* i4 = *input++;
110 const int8_t* i5 = *input++;
111 const int8_t* i6 = *input++;
112 const int8_t* i7 = *input++;
113 i0 = (const int8_t*) ((uintptr_t) i0 + input_offset);
114 i1 = (const int8_t*) ((uintptr_t) i1 + input_offset);
115 i2 = (const int8_t*) ((uintptr_t) i2 + input_offset);
116 i3 = (const int8_t*) ((uintptr_t) i3 + input_offset);
117 i4 = (const int8_t*) ((uintptr_t) i4 + input_offset);
118 i5 = (const int8_t*) ((uintptr_t) i5 + input_offset);
119 i6 = (const int8_t*) ((uintptr_t) i6 + input_offset);
120 i7 = (const int8_t*) ((uintptr_t) i7 + input_offset);
121 if (k < 2) {
122 i1 = i0;
123 }
124 if (k <= 2) {
125 i2 = i0;
126 }
127 if (k < 4) {
128 i3 = i0;
129 }
130 if (k <= 4) {
131 i4 = i0;
132 }
133 if (k < 6) {
134 i5 = i0;
135 }
136 if (k <= 6) {
137 i6 = i0;
138 }
139 if (k < 8) {
140 i7 = i0;
141 }
142
143 o = output;
144 size_t c = channels;
145 do {
146 const int32_t vi0 = (int32_t) *i0++;
147 const int32_t vi1 = (int32_t) *i1++;
148 const int32_t vi2 = (int32_t) *i2++;
149 const int32_t vi3 = (int32_t) *i3++;
150 const int32_t vi4 = (int32_t) *i4++;
151 const int32_t vi5 = (int32_t) *i5++;
152 const int32_t vi6 = (int32_t) *i6++;
153 const int32_t vi7 = (int32_t) *i7++;
154 const int32_t vi8 = (int32_t) *o;
155
156 const int32_t vmax01 = math_max_s32(vi0, vi1);
157 const int32_t vmax23 = math_max_s32(vi2, vi3);
158 const int32_t vmax45 = math_max_s32(vi4, vi5);
159 const int32_t vmax67 = math_max_s32(vi6, vi7);
160 const int32_t vmax018 = math_max_s32(vmax01, vi8);
161
162 const int32_t vmax2345 = math_max_s32(vmax23, vmax45);
163 const int32_t vmax01678 = math_max_s32(vmax018, vmax67);
164
165 int32_t vout = math_max_s32(vmax2345, vmax01678);
166 vout = math_min_s32(vout, voutput_max);
167 vout = math_max_s32(vout, voutput_min);
168
169 *o++ = (int8_t) vout;
170 } while (--c != 0);
171 }
172 input = (const int8_t**) ((uintptr_t) input + input_increment);
173 output = (int8_t*) ((uintptr_t) o + output_increment);
174 } while (--output_pixels != 0);
175 }
176