xref: /aosp_15_r20/external/XNNPACK/src/indirection.c (revision 4bdc94577ba0e567308109d787f7fec7b531ce36)
1*4bdc9457SAndroid Build Coastguard Worker // Copyright (c) Facebook, Inc. and its affiliates.
2*4bdc9457SAndroid Build Coastguard Worker // All rights reserved.
3*4bdc9457SAndroid Build Coastguard Worker //
4*4bdc9457SAndroid Build Coastguard Worker // Copyright 2019 Google LLC
5*4bdc9457SAndroid Build Coastguard Worker //
6*4bdc9457SAndroid Build Coastguard Worker // This source code is licensed under the BSD-style license found in the
7*4bdc9457SAndroid Build Coastguard Worker // LICENSE file in the root directory of this source tree.
8*4bdc9457SAndroid Build Coastguard Worker 
9*4bdc9457SAndroid Build Coastguard Worker #include <stddef.h>
10*4bdc9457SAndroid Build Coastguard Worker #include <math.h>
11*4bdc9457SAndroid Build Coastguard Worker 
12*4bdc9457SAndroid Build Coastguard Worker #include <fp16.h>
13*4bdc9457SAndroid Build Coastguard Worker 
14*4bdc9457SAndroid Build Coastguard Worker #include <fxdiv.h>
15*4bdc9457SAndroid Build Coastguard Worker 
16*4bdc9457SAndroid Build Coastguard Worker #include <xnnpack/indirection.h>
17*4bdc9457SAndroid Build Coastguard Worker #include <xnnpack/operator.h>
18*4bdc9457SAndroid Build Coastguard Worker #include <xnnpack/math.h>
19*4bdc9457SAndroid Build Coastguard Worker 
20*4bdc9457SAndroid Build Coastguard Worker 
xnn_indirection_init_conv2d(xnn_operator_t op,size_t output_tile_size,uint32_t log2_element_size)21*4bdc9457SAndroid Build Coastguard Worker void xnn_indirection_init_conv2d(
22*4bdc9457SAndroid Build Coastguard Worker   xnn_operator_t op,
23*4bdc9457SAndroid Build Coastguard Worker   size_t output_tile_size,
24*4bdc9457SAndroid Build Coastguard Worker   uint32_t log2_element_size)
25*4bdc9457SAndroid Build Coastguard Worker {
26*4bdc9457SAndroid Build Coastguard Worker   const void** indirection_buffer          = op->indirection_buffer;
27*4bdc9457SAndroid Build Coastguard Worker   const void* input                        = op->input;
28*4bdc9457SAndroid Build Coastguard Worker   const void* zero                         = op->zero_buffer;
29*4bdc9457SAndroid Build Coastguard Worker   const size_t input_pixel_stride          = op->input_pixel_stride << log2_element_size;
30*4bdc9457SAndroid Build Coastguard Worker   const size_t input_height                = op->input_height;
31*4bdc9457SAndroid Build Coastguard Worker   const size_t input_width                 = op->input_width;
32*4bdc9457SAndroid Build Coastguard Worker   const size_t output_height               = op->output_height;
33*4bdc9457SAndroid Build Coastguard Worker   const size_t output_width                = op->output_width;
34*4bdc9457SAndroid Build Coastguard Worker   const size_t kernel_height               = op->kernel_height;
35*4bdc9457SAndroid Build Coastguard Worker   const size_t kernel_width                = op->kernel_width;
36*4bdc9457SAndroid Build Coastguard Worker   const size_t stride_height               = op->stride_height;
37*4bdc9457SAndroid Build Coastguard Worker   const size_t stride_width                = op->stride_width;
38*4bdc9457SAndroid Build Coastguard Worker   const size_t dilation_height             = op->dilation_height;
39*4bdc9457SAndroid Build Coastguard Worker   const size_t dilation_width              = op->dilation_width;
40*4bdc9457SAndroid Build Coastguard Worker   const size_t input_padding_top           = op->padding_top;
41*4bdc9457SAndroid Build Coastguard Worker   const size_t input_padding_left          = op->padding_left;
42*4bdc9457SAndroid Build Coastguard Worker 
43*4bdc9457SAndroid Build Coastguard Worker   const size_t output_size = output_height * output_width;
44*4bdc9457SAndroid Build Coastguard Worker   const size_t tiled_output_size = round_up(output_size, output_tile_size);
45*4bdc9457SAndroid Build Coastguard Worker   const size_t kernel_size = kernel_height * kernel_width;
46*4bdc9457SAndroid Build Coastguard Worker 
47*4bdc9457SAndroid Build Coastguard Worker   const struct fxdiv_divisor_size_t output_width_divisor = fxdiv_init_size_t(output_width);
48*4bdc9457SAndroid Build Coastguard Worker 
49*4bdc9457SAndroid Build Coastguard Worker   for (size_t output_tile_start = 0; output_tile_start < tiled_output_size; output_tile_start += output_tile_size) {
50*4bdc9457SAndroid Build Coastguard Worker     for (size_t output_tile_offset = 0; output_tile_offset < output_tile_size; output_tile_offset++) {
51*4bdc9457SAndroid Build Coastguard Worker       const size_t output_index = min(output_tile_start + output_tile_offset, output_size - 1);
52*4bdc9457SAndroid Build Coastguard Worker       const struct fxdiv_result_size_t output_y_x = fxdiv_divide_size_t(output_index, output_width_divisor);
53*4bdc9457SAndroid Build Coastguard Worker       const size_t output_x = output_y_x.remainder;
54*4bdc9457SAndroid Build Coastguard Worker       const size_t output_y = output_y_x.quotient;
55*4bdc9457SAndroid Build Coastguard Worker       for (size_t kernel_y = 0; kernel_y < kernel_height; kernel_y++) {
56*4bdc9457SAndroid Build Coastguard Worker         const size_t input_y = output_y * stride_height + kernel_y * dilation_height - input_padding_top;
57*4bdc9457SAndroid Build Coastguard Worker         if (input_y < input_height) {
58*4bdc9457SAndroid Build Coastguard Worker           for (size_t kernel_x = 0; kernel_x < kernel_width; kernel_x++) {
59*4bdc9457SAndroid Build Coastguard Worker             const size_t input_x = output_x * stride_width + kernel_x * dilation_width - input_padding_left;
60*4bdc9457SAndroid Build Coastguard Worker             const size_t kernel_index = kernel_y * kernel_width + kernel_x;
61*4bdc9457SAndroid Build Coastguard Worker             const size_t index = output_tile_start * kernel_size + kernel_index * output_tile_size + output_tile_offset;
62*4bdc9457SAndroid Build Coastguard Worker             if (input_x < input_width) {
63*4bdc9457SAndroid Build Coastguard Worker               indirection_buffer[index] = (const void*)
64*4bdc9457SAndroid Build Coastguard Worker                 ((uintptr_t) input + (input_y * input_width + input_x) * input_pixel_stride);
65*4bdc9457SAndroid Build Coastguard Worker             } else {
66*4bdc9457SAndroid Build Coastguard Worker               indirection_buffer[index] = zero;
67*4bdc9457SAndroid Build Coastguard Worker             }
68*4bdc9457SAndroid Build Coastguard Worker           }
69*4bdc9457SAndroid Build Coastguard Worker         } else {
70*4bdc9457SAndroid Build Coastguard Worker           for (size_t kernel_x = 0; kernel_x < kernel_width; kernel_x++) {
71*4bdc9457SAndroid Build Coastguard Worker             const size_t kernel_index = kernel_y * kernel_width + kernel_x;
72*4bdc9457SAndroid Build Coastguard Worker             const size_t index = output_tile_start * kernel_size + kernel_index * output_tile_size + output_tile_offset;
73*4bdc9457SAndroid Build Coastguard Worker             indirection_buffer[index] = zero;
74*4bdc9457SAndroid Build Coastguard Worker           }
75*4bdc9457SAndroid Build Coastguard Worker         }
76*4bdc9457SAndroid Build Coastguard Worker       }
77*4bdc9457SAndroid Build Coastguard Worker     }
78*4bdc9457SAndroid Build Coastguard Worker   }
79*4bdc9457SAndroid Build Coastguard Worker }
80*4bdc9457SAndroid Build Coastguard Worker 
xnn_indirection_init_deconv2d(xnn_operator_t op,size_t output_tile_size,uint32_t log2_element_size)81*4bdc9457SAndroid Build Coastguard Worker void xnn_indirection_init_deconv2d(
82*4bdc9457SAndroid Build Coastguard Worker   xnn_operator_t op,
83*4bdc9457SAndroid Build Coastguard Worker   size_t output_tile_size,
84*4bdc9457SAndroid Build Coastguard Worker   uint32_t log2_element_size)
85*4bdc9457SAndroid Build Coastguard Worker {
86*4bdc9457SAndroid Build Coastguard Worker   const void** indirection_buffer = op->indirection_buffer;
87*4bdc9457SAndroid Build Coastguard Worker   const void* input               = op->input;
88*4bdc9457SAndroid Build Coastguard Worker   const size_t input_pixel_stride = op->input_pixel_stride << log2_element_size;
89*4bdc9457SAndroid Build Coastguard Worker   const void* zero                = op->zero_buffer;
90*4bdc9457SAndroid Build Coastguard Worker   const size_t input_height       = op->input_height;
91*4bdc9457SAndroid Build Coastguard Worker   const size_t input_width        = op->input_width;
92*4bdc9457SAndroid Build Coastguard Worker   const size_t output_height      = op->output_height;
93*4bdc9457SAndroid Build Coastguard Worker   const size_t output_width       = op->output_width;
94*4bdc9457SAndroid Build Coastguard Worker   const size_t kernel_height      = op->kernel_height;
95*4bdc9457SAndroid Build Coastguard Worker   const size_t kernel_width       = op->kernel_width;
96*4bdc9457SAndroid Build Coastguard Worker   const size_t stride_height      = op->stride_height;
97*4bdc9457SAndroid Build Coastguard Worker   const size_t stride_width       = op->stride_width;
98*4bdc9457SAndroid Build Coastguard Worker   const size_t dilation_height    = op->dilation_height;
99*4bdc9457SAndroid Build Coastguard Worker   const size_t dilation_width     = op->dilation_width;
100*4bdc9457SAndroid Build Coastguard Worker   const size_t padding_top        = op->padding_top;
101*4bdc9457SAndroid Build Coastguard Worker   const size_t padding_left       = op->padding_left;
102*4bdc9457SAndroid Build Coastguard Worker 
103*4bdc9457SAndroid Build Coastguard Worker   const size_t output_size = output_height * output_width;
104*4bdc9457SAndroid Build Coastguard Worker   const size_t tiled_output_size = round_up(output_size, output_tile_size);
105*4bdc9457SAndroid Build Coastguard Worker   const size_t kernel_size = kernel_height * kernel_width;
106*4bdc9457SAndroid Build Coastguard Worker 
107*4bdc9457SAndroid Build Coastguard Worker   const struct fxdiv_divisor_size_t output_width_divisor = fxdiv_init_size_t(output_width);
108*4bdc9457SAndroid Build Coastguard Worker   const struct fxdiv_divisor_size_t stride_height_divisor = fxdiv_init_size_t(stride_height);
109*4bdc9457SAndroid Build Coastguard Worker   const struct fxdiv_divisor_size_t stride_width_divisor = fxdiv_init_size_t(stride_width);
110*4bdc9457SAndroid Build Coastguard Worker 
111*4bdc9457SAndroid Build Coastguard Worker   for (size_t output_tile_start = 0; output_tile_start < tiled_output_size; output_tile_start += output_tile_size) {
112*4bdc9457SAndroid Build Coastguard Worker     for (size_t output_tile_offset = 0; output_tile_offset < output_tile_size; output_tile_offset++) {
113*4bdc9457SAndroid Build Coastguard Worker       const size_t output_index = min(output_tile_start + output_tile_offset, output_size - 1);
114*4bdc9457SAndroid Build Coastguard Worker       const struct fxdiv_result_size_t output_y_x = fxdiv_divide_size_t(output_index, output_width_divisor);
115*4bdc9457SAndroid Build Coastguard Worker       const size_t output_x = output_y_x.remainder;
116*4bdc9457SAndroid Build Coastguard Worker       const size_t output_y = output_y_x.quotient;
117*4bdc9457SAndroid Build Coastguard Worker       for (size_t kernel_y = 0; kernel_y < kernel_height; kernel_y++) {
118*4bdc9457SAndroid Build Coastguard Worker         const size_t y = output_y + padding_top - kernel_y * dilation_height;
119*4bdc9457SAndroid Build Coastguard Worker         const size_t input_y = fxdiv_quotient_size_t(y, stride_height_divisor);
120*4bdc9457SAndroid Build Coastguard Worker         for (size_t kernel_x = 0; kernel_x < kernel_width; kernel_x++) {
121*4bdc9457SAndroid Build Coastguard Worker           const size_t x = output_x + padding_left - kernel_x * dilation_width;
122*4bdc9457SAndroid Build Coastguard Worker           const size_t input_x = fxdiv_quotient_size_t(x, stride_width_divisor);
123*4bdc9457SAndroid Build Coastguard Worker           const size_t kernel_index = kernel_y * kernel_width + kernel_x;
124*4bdc9457SAndroid Build Coastguard Worker           const size_t index = output_tile_start * kernel_size + kernel_index * output_tile_size + output_tile_offset;
125*4bdc9457SAndroid Build Coastguard Worker           if (input_y * stride_height == y && input_y < input_height && input_x * stride_width == x && input_x < input_width) {
126*4bdc9457SAndroid Build Coastguard Worker             indirection_buffer[index] = (const void*) ((uintptr_t) input + (input_y * input_width + input_x) * input_pixel_stride);
127*4bdc9457SAndroid Build Coastguard Worker           } else {
128*4bdc9457SAndroid Build Coastguard Worker             indirection_buffer[index] = zero;
129*4bdc9457SAndroid Build Coastguard Worker           }
130*4bdc9457SAndroid Build Coastguard Worker         }
131*4bdc9457SAndroid Build Coastguard Worker       }
132*4bdc9457SAndroid Build Coastguard Worker     }
133*4bdc9457SAndroid Build Coastguard Worker   }
134*4bdc9457SAndroid Build Coastguard Worker }
135*4bdc9457SAndroid Build Coastguard Worker 
xnn_indirection_init_subconv2d(xnn_operator_t op,size_t output_tile_size,uint32_t log2_element_size)136*4bdc9457SAndroid Build Coastguard Worker void xnn_indirection_init_subconv2d(
137*4bdc9457SAndroid Build Coastguard Worker   xnn_operator_t op,
138*4bdc9457SAndroid Build Coastguard Worker   size_t output_tile_size,
139*4bdc9457SAndroid Build Coastguard Worker   uint32_t log2_element_size)
140*4bdc9457SAndroid Build Coastguard Worker {
141*4bdc9457SAndroid Build Coastguard Worker   const void** indirection_buffer                     = op->indirection_buffer;
142*4bdc9457SAndroid Build Coastguard Worker   struct subconvolution_params* subconvolution_params = op->subconvolution_buffer;
143*4bdc9457SAndroid Build Coastguard Worker   const void* input                                   = op->input;
144*4bdc9457SAndroid Build Coastguard Worker   const size_t input_pixel_stride                     = op->input_pixel_stride << log2_element_size;
145*4bdc9457SAndroid Build Coastguard Worker   const void* zero                                    = op->zero_buffer;
146*4bdc9457SAndroid Build Coastguard Worker   const size_t input_height                           = op->input_height;
147*4bdc9457SAndroid Build Coastguard Worker   const size_t input_width                            = op->input_width;
148*4bdc9457SAndroid Build Coastguard Worker   const size_t output_height                          = op->output_height;
149*4bdc9457SAndroid Build Coastguard Worker   const size_t output_width                           = op->output_width;
150*4bdc9457SAndroid Build Coastguard Worker   const size_t kernel_height                          = op->kernel_height;
151*4bdc9457SAndroid Build Coastguard Worker   const size_t kernel_width                           = op->kernel_width;
152*4bdc9457SAndroid Build Coastguard Worker   const size_t stride_height                          = op->stride_height;
153*4bdc9457SAndroid Build Coastguard Worker   const size_t stride_width                           = op->stride_width;
154*4bdc9457SAndroid Build Coastguard Worker   const size_t padding_top                            = op->padding_top;
155*4bdc9457SAndroid Build Coastguard Worker   const size_t padding_left                           = op->padding_left;
156*4bdc9457SAndroid Build Coastguard Worker 
157*4bdc9457SAndroid Build Coastguard Worker   const size_t modulo_padding_top = padding_top % stride_height;
158*4bdc9457SAndroid Build Coastguard Worker   const size_t modulo_padding_left = padding_left % stride_width;
159*4bdc9457SAndroid Build Coastguard Worker   for (size_t offset_y = 0; offset_y < stride_height; offset_y++) {
160*4bdc9457SAndroid Build Coastguard Worker     const size_t output_y_start = subtract_modulo(offset_y, modulo_padding_top, stride_height);
161*4bdc9457SAndroid Build Coastguard Worker     for (size_t offset_x = 0; offset_x < stride_width; offset_x++) {
162*4bdc9457SAndroid Build Coastguard Worker       const size_t output_x_start = subtract_modulo(offset_x, modulo_padding_left, stride_width);
163*4bdc9457SAndroid Build Coastguard Worker       const size_t sliced_output_width = divide_round_up(output_width - output_x_start, stride_width);
164*4bdc9457SAndroid Build Coastguard Worker 
165*4bdc9457SAndroid Build Coastguard Worker       subconvolution_params->indirection_buffer = indirection_buffer;
166*4bdc9457SAndroid Build Coastguard Worker       subconvolution_params->indirection_y_stride =
167*4bdc9457SAndroid Build Coastguard Worker         subconvolution_params->indirection_x_stride * round_up(sliced_output_width, output_tile_size);
168*4bdc9457SAndroid Build Coastguard Worker       ++subconvolution_params;
169*4bdc9457SAndroid Build Coastguard Worker 
170*4bdc9457SAndroid Build Coastguard Worker       for (size_t output_y = output_y_start; output_y < output_height; output_y += stride_height) {
171*4bdc9457SAndroid Build Coastguard Worker         for (size_t output_tile_start = 0; output_tile_start < sliced_output_width; output_tile_start += output_tile_size) {
172*4bdc9457SAndroid Build Coastguard Worker           for (size_t kernel_y = offset_y; kernel_y < kernel_height; kernel_y += stride_height) {
173*4bdc9457SAndroid Build Coastguard Worker             assert(doz(output_y + padding_top, kernel_y) % stride_height == 0);
174*4bdc9457SAndroid Build Coastguard Worker             const size_t y = output_y + padding_top - kernel_y;
175*4bdc9457SAndroid Build Coastguard Worker             const size_t input_y = y / stride_height;
176*4bdc9457SAndroid Build Coastguard Worker 
177*4bdc9457SAndroid Build Coastguard Worker             for (size_t kernel_x = offset_x; kernel_x < kernel_width; kernel_x += stride_width) {
178*4bdc9457SAndroid Build Coastguard Worker               for (size_t output_tile_offset = 0; output_tile_offset < output_tile_size; output_tile_offset++) {
179*4bdc9457SAndroid Build Coastguard Worker                 const size_t sliced_output_x = min(output_tile_start + output_tile_offset, sliced_output_width - 1);
180*4bdc9457SAndroid Build Coastguard Worker                 const size_t output_x = output_x_start + sliced_output_x * stride_width;
181*4bdc9457SAndroid Build Coastguard Worker 
182*4bdc9457SAndroid Build Coastguard Worker                 assert(doz(output_x + padding_left, kernel_x) % stride_width == 0);
183*4bdc9457SAndroid Build Coastguard Worker                 const size_t x = output_x + padding_left - kernel_x;
184*4bdc9457SAndroid Build Coastguard Worker                 const size_t input_x = x / stride_width;
185*4bdc9457SAndroid Build Coastguard Worker 
186*4bdc9457SAndroid Build Coastguard Worker                 if (input_y < input_height && input_x < input_width) {
187*4bdc9457SAndroid Build Coastguard Worker                   *indirection_buffer++ =
188*4bdc9457SAndroid Build Coastguard Worker                     (const void*) ((uintptr_t) input + (input_y * input_width + input_x) * input_pixel_stride);
189*4bdc9457SAndroid Build Coastguard Worker                 } else {
190*4bdc9457SAndroid Build Coastguard Worker                   *indirection_buffer++ = zero;
191*4bdc9457SAndroid Build Coastguard Worker                 }
192*4bdc9457SAndroid Build Coastguard Worker               }
193*4bdc9457SAndroid Build Coastguard Worker             }
194*4bdc9457SAndroid Build Coastguard Worker           }
195*4bdc9457SAndroid Build Coastguard Worker         }
196*4bdc9457SAndroid Build Coastguard Worker       }
197*4bdc9457SAndroid Build Coastguard Worker     }
198*4bdc9457SAndroid Build Coastguard Worker   }
199*4bdc9457SAndroid Build Coastguard Worker }
200*4bdc9457SAndroid Build Coastguard Worker 
xnn_indirection_init_dwconv2d(xnn_operator_t op,size_t step_height,size_t step_width,size_t primary_tile,uint32_t log2_element_size)201*4bdc9457SAndroid Build Coastguard Worker void xnn_indirection_init_dwconv2d(
202*4bdc9457SAndroid Build Coastguard Worker   xnn_operator_t op,
203*4bdc9457SAndroid Build Coastguard Worker   size_t step_height,
204*4bdc9457SAndroid Build Coastguard Worker   size_t step_width,
205*4bdc9457SAndroid Build Coastguard Worker   size_t primary_tile,
206*4bdc9457SAndroid Build Coastguard Worker   uint32_t log2_element_size)
207*4bdc9457SAndroid Build Coastguard Worker {
208*4bdc9457SAndroid Build Coastguard Worker   const void** indirection_buffer = op->indirection_buffer;
209*4bdc9457SAndroid Build Coastguard Worker   const void* input               = op->input;
210*4bdc9457SAndroid Build Coastguard Worker   const size_t input_pixel_stride = op->input_pixel_stride << log2_element_size;
211*4bdc9457SAndroid Build Coastguard Worker   const void* zero                = op->zero_buffer;
212*4bdc9457SAndroid Build Coastguard Worker   const size_t input_height       = op->input_height;
213*4bdc9457SAndroid Build Coastguard Worker   const size_t input_width        = op->input_width;
214*4bdc9457SAndroid Build Coastguard Worker   const size_t output_height      = op->output_height;
215*4bdc9457SAndroid Build Coastguard Worker   const size_t output_width       = op->output_width;
216*4bdc9457SAndroid Build Coastguard Worker   const size_t kernel_height      = op->kernel_height;
217*4bdc9457SAndroid Build Coastguard Worker   const size_t kernel_width       = op->kernel_width;
218*4bdc9457SAndroid Build Coastguard Worker   const size_t stride_height      = op->stride_height;
219*4bdc9457SAndroid Build Coastguard Worker   const size_t stride_width       = op->stride_width;
220*4bdc9457SAndroid Build Coastguard Worker   const size_t dilation_height    = op->dilation_height;
221*4bdc9457SAndroid Build Coastguard Worker   const size_t dilation_width     = op->dilation_width;
222*4bdc9457SAndroid Build Coastguard Worker   const size_t input_padding_top  = op->padding_top;
223*4bdc9457SAndroid Build Coastguard Worker   const size_t input_padding_left = op->padding_left;
224*4bdc9457SAndroid Build Coastguard Worker 
225*4bdc9457SAndroid Build Coastguard Worker   for (size_t output_y = 0; output_y < output_height; output_y++) {
226*4bdc9457SAndroid Build Coastguard Worker     for (size_t kernel_y = 0; kernel_y < kernel_height; kernel_y++) {
227*4bdc9457SAndroid Build Coastguard Worker       const size_t input_y = output_y * stride_height + kernel_y * dilation_height - input_padding_top;
228*4bdc9457SAndroid Build Coastguard Worker       if (input_y < input_height) {
229*4bdc9457SAndroid Build Coastguard Worker         for (size_t output_x = 0; output_x < output_width; output_x++) {
230*4bdc9457SAndroid Build Coastguard Worker           for (size_t kernel_x = 0; kernel_x < kernel_width; kernel_x++) {
231*4bdc9457SAndroid Build Coastguard Worker             const size_t input_x = output_x * stride_width + kernel_x * dilation_width - input_padding_left;
232*4bdc9457SAndroid Build Coastguard Worker             const size_t index = output_y * step_height + output_x * step_width * kernel_height + kernel_x * kernel_height + kernel_y;
233*4bdc9457SAndroid Build Coastguard Worker             if (input_x < input_width) {
234*4bdc9457SAndroid Build Coastguard Worker               indirection_buffer[index] =
235*4bdc9457SAndroid Build Coastguard Worker                 (const void*) ((uintptr_t) input + (input_y * input_width + input_x) * input_pixel_stride);
236*4bdc9457SAndroid Build Coastguard Worker             } else {
237*4bdc9457SAndroid Build Coastguard Worker               indirection_buffer[index] = zero;
238*4bdc9457SAndroid Build Coastguard Worker             }
239*4bdc9457SAndroid Build Coastguard Worker           }
240*4bdc9457SAndroid Build Coastguard Worker         }
241*4bdc9457SAndroid Build Coastguard Worker       } else {
242*4bdc9457SAndroid Build Coastguard Worker         for (size_t output_x = 0; output_x < output_width; output_x++) {
243*4bdc9457SAndroid Build Coastguard Worker           for (size_t kernel_x = 0; kernel_x < kernel_width; kernel_x++) {
244*4bdc9457SAndroid Build Coastguard Worker             const size_t index = output_y * step_height + output_x * step_width * kernel_height + kernel_x * kernel_height + kernel_y;
245*4bdc9457SAndroid Build Coastguard Worker             indirection_buffer[index] = zero;
246*4bdc9457SAndroid Build Coastguard Worker           }
247*4bdc9457SAndroid Build Coastguard Worker         }
248*4bdc9457SAndroid Build Coastguard Worker       }
249*4bdc9457SAndroid Build Coastguard Worker     }
250*4bdc9457SAndroid Build Coastguard Worker   }
251*4bdc9457SAndroid Build Coastguard Worker 
252*4bdc9457SAndroid Build Coastguard Worker   const void* last_output_pixel = indirection_buffer[output_height * step_height - 1];
253*4bdc9457SAndroid Build Coastguard Worker   const size_t last_kernel_index = output_height * step_height - (kernel_height * kernel_width);
254*4bdc9457SAndroid Build Coastguard Worker   for (size_t tile_index = kernel_height * kernel_width; tile_index < primary_tile; tile_index++) {
255*4bdc9457SAndroid Build Coastguard Worker     indirection_buffer[last_kernel_index + tile_index] = last_output_pixel;
256*4bdc9457SAndroid Build Coastguard Worker   }
257*4bdc9457SAndroid Build Coastguard Worker }
258*4bdc9457SAndroid Build Coastguard Worker 
xnn_indirection_init_maxpool2d(xnn_operator_t op,size_t step_height,size_t step_width,uint32_t log2_element_size)259*4bdc9457SAndroid Build Coastguard Worker void xnn_indirection_init_maxpool2d(
260*4bdc9457SAndroid Build Coastguard Worker   xnn_operator_t op,
261*4bdc9457SAndroid Build Coastguard Worker   size_t step_height,
262*4bdc9457SAndroid Build Coastguard Worker   size_t step_width,
263*4bdc9457SAndroid Build Coastguard Worker   uint32_t log2_element_size)
264*4bdc9457SAndroid Build Coastguard Worker {
265*4bdc9457SAndroid Build Coastguard Worker   const void** indirection_buffer = op->indirection_buffer;
266*4bdc9457SAndroid Build Coastguard Worker   const void* input               = op->input;
267*4bdc9457SAndroid Build Coastguard Worker   const size_t input_pixel_stride = op->input_pixel_stride << log2_element_size;
268*4bdc9457SAndroid Build Coastguard Worker   const size_t input_height       = op->input_height;
269*4bdc9457SAndroid Build Coastguard Worker   const size_t input_width        = op->input_width;
270*4bdc9457SAndroid Build Coastguard Worker   const size_t output_height      = op->output_height;
271*4bdc9457SAndroid Build Coastguard Worker   const size_t output_width       = op->output_width;
272*4bdc9457SAndroid Build Coastguard Worker   const size_t pooling_height     = op->kernel_height;
273*4bdc9457SAndroid Build Coastguard Worker   const size_t pooling_width      = op->kernel_width;
274*4bdc9457SAndroid Build Coastguard Worker   const size_t stride_height      = op->stride_height;
275*4bdc9457SAndroid Build Coastguard Worker   const size_t stride_width       = op->stride_width;
276*4bdc9457SAndroid Build Coastguard Worker   const size_t dilation_height    = op->dilation_height;
277*4bdc9457SAndroid Build Coastguard Worker   const size_t dilation_width     = op->dilation_width;
278*4bdc9457SAndroid Build Coastguard Worker   const size_t input_padding_top  = op->padding_top;
279*4bdc9457SAndroid Build Coastguard Worker   const size_t input_padding_left = op->padding_left;
280*4bdc9457SAndroid Build Coastguard Worker 
281*4bdc9457SAndroid Build Coastguard Worker   const bool any_dilation = (dilation_height | dilation_width) > 1;
282*4bdc9457SAndroid Build Coastguard Worker 
283*4bdc9457SAndroid Build Coastguard Worker   if (any_dilation) {
284*4bdc9457SAndroid Build Coastguard Worker     // Clamp to the border doesn't work for pooling with dilation.
285*4bdc9457SAndroid Build Coastguard Worker     const size_t adjusted_padding_top = input_padding_top % dilation_height;
286*4bdc9457SAndroid Build Coastguard Worker     const size_t adjusted_padding_left = input_padding_left % dilation_width;
287*4bdc9457SAndroid Build Coastguard Worker     for (size_t output_y = 0; output_y < output_height; output_y++) {
288*4bdc9457SAndroid Build Coastguard Worker       for (size_t pooling_y = 0; pooling_y < pooling_height; pooling_y++) {
289*4bdc9457SAndroid Build Coastguard Worker         size_t safe_input_y = output_y * stride_height;
290*4bdc9457SAndroid Build Coastguard Worker         if XNN_UNPREDICTABLE(safe_input_y < adjusted_padding_top) {
291*4bdc9457SAndroid Build Coastguard Worker           safe_input_y += dilation_height;
292*4bdc9457SAndroid Build Coastguard Worker         }
293*4bdc9457SAndroid Build Coastguard Worker         safe_input_y -= adjusted_padding_top;
294*4bdc9457SAndroid Build Coastguard Worker 
295*4bdc9457SAndroid Build Coastguard Worker         size_t input_y = output_y * stride_height + pooling_y * dilation_height - input_padding_top;
296*4bdc9457SAndroid Build Coastguard Worker         if XNN_UNPREDICTABLE(input_y >= input_height) {
297*4bdc9457SAndroid Build Coastguard Worker           input_y = safe_input_y;
298*4bdc9457SAndroid Build Coastguard Worker         }
299*4bdc9457SAndroid Build Coastguard Worker 
300*4bdc9457SAndroid Build Coastguard Worker         for (size_t output_x = 0; output_x < output_width; output_x++) {
301*4bdc9457SAndroid Build Coastguard Worker           for (size_t pooling_x = 0; pooling_x < pooling_width; pooling_x++) {
302*4bdc9457SAndroid Build Coastguard Worker             size_t safe_input_x = output_x * stride_width;
303*4bdc9457SAndroid Build Coastguard Worker             if XNN_UNPREDICTABLE(safe_input_x < adjusted_padding_left) {
304*4bdc9457SAndroid Build Coastguard Worker               safe_input_x += dilation_width;
305*4bdc9457SAndroid Build Coastguard Worker             }
306*4bdc9457SAndroid Build Coastguard Worker             safe_input_x -= adjusted_padding_left;
307*4bdc9457SAndroid Build Coastguard Worker 
308*4bdc9457SAndroid Build Coastguard Worker             size_t input_x = output_x * stride_width + pooling_x * dilation_width - input_padding_left;
309*4bdc9457SAndroid Build Coastguard Worker             if XNN_UNPREDICTABLE(input_x >= input_width) {
310*4bdc9457SAndroid Build Coastguard Worker               input_x = safe_input_x;
311*4bdc9457SAndroid Build Coastguard Worker             }
312*4bdc9457SAndroid Build Coastguard Worker 
313*4bdc9457SAndroid Build Coastguard Worker             const size_t index = output_y * step_height + output_x * step_width * pooling_height + pooling_x * pooling_height + pooling_y;
314*4bdc9457SAndroid Build Coastguard Worker             indirection_buffer[index] = (const void*) ((uintptr_t) input + (input_y * input_width + input_x) * input_pixel_stride);
315*4bdc9457SAndroid Build Coastguard Worker           }
316*4bdc9457SAndroid Build Coastguard Worker         }
317*4bdc9457SAndroid Build Coastguard Worker       }
318*4bdc9457SAndroid Build Coastguard Worker     }
319*4bdc9457SAndroid Build Coastguard Worker   } else {
320*4bdc9457SAndroid Build Coastguard Worker     const size_t input_x_max = input_width - 1;
321*4bdc9457SAndroid Build Coastguard Worker     const size_t input_y_max = input_height - 1;
322*4bdc9457SAndroid Build Coastguard Worker     for (size_t output_y = 0; output_y < output_height; output_y++) {
323*4bdc9457SAndroid Build Coastguard Worker       for (size_t pooling_y = 0; pooling_y < pooling_height; pooling_y++) {
324*4bdc9457SAndroid Build Coastguard Worker         const size_t input_y = min(doz(output_y * stride_height + pooling_y * dilation_height, input_padding_top), input_y_max);
325*4bdc9457SAndroid Build Coastguard Worker         for (size_t output_x = 0; output_x < output_width; output_x++) {
326*4bdc9457SAndroid Build Coastguard Worker           for (size_t pooling_x = 0; pooling_x < pooling_width; pooling_x++) {
327*4bdc9457SAndroid Build Coastguard Worker             const size_t input_x = min(doz(output_x * stride_width + pooling_x * dilation_width, input_padding_left), input_x_max);
328*4bdc9457SAndroid Build Coastguard Worker             const size_t index = output_y * step_height + output_x * step_width * pooling_height + pooling_x * pooling_height + pooling_y;
329*4bdc9457SAndroid Build Coastguard Worker             indirection_buffer[index] = (const void*) ((uintptr_t) input + (input_y * input_width + input_x) * input_pixel_stride);
330*4bdc9457SAndroid Build Coastguard Worker           }
331*4bdc9457SAndroid Build Coastguard Worker         }
332*4bdc9457SAndroid Build Coastguard Worker       }
333*4bdc9457SAndroid Build Coastguard Worker     }
334*4bdc9457SAndroid Build Coastguard Worker   }
335*4bdc9457SAndroid Build Coastguard Worker }
336*4bdc9457SAndroid Build Coastguard Worker 
xnn_indirection_init_resize_bilinear2d_hwc_f16(size_t input_pixel_stride,size_t input_height,size_t input_width,size_t output_height,size_t output_width,const void * input,const void ** indirection_buffer,void * packed_weights,bool align_corners,bool tensorflow_legacy)337*4bdc9457SAndroid Build Coastguard Worker void xnn_indirection_init_resize_bilinear2d_hwc_f16(
338*4bdc9457SAndroid Build Coastguard Worker   size_t input_pixel_stride,
339*4bdc9457SAndroid Build Coastguard Worker   size_t input_height,
340*4bdc9457SAndroid Build Coastguard Worker   size_t input_width,
341*4bdc9457SAndroid Build Coastguard Worker   size_t output_height,
342*4bdc9457SAndroid Build Coastguard Worker   size_t output_width,
343*4bdc9457SAndroid Build Coastguard Worker   const void* input,
344*4bdc9457SAndroid Build Coastguard Worker   const void** indirection_buffer,
345*4bdc9457SAndroid Build Coastguard Worker   void* packed_weights,
346*4bdc9457SAndroid Build Coastguard Worker   bool align_corners,
347*4bdc9457SAndroid Build Coastguard Worker   bool tensorflow_legacy)
348*4bdc9457SAndroid Build Coastguard Worker {
349*4bdc9457SAndroid Build Coastguard Worker   assert(input_height != 0);
350*4bdc9457SAndroid Build Coastguard Worker   assert(input_height < 16777216 /* 2**24 */);
351*4bdc9457SAndroid Build Coastguard Worker   assert(input_width != 0);
352*4bdc9457SAndroid Build Coastguard Worker   assert(input_width < 16777216 /* 2**24 */);
353*4bdc9457SAndroid Build Coastguard Worker   assert(output_height != 0);
354*4bdc9457SAndroid Build Coastguard Worker   assert(output_height < 16777216 /* 2**24 */);
355*4bdc9457SAndroid Build Coastguard Worker   assert(output_width != 0);
356*4bdc9457SAndroid Build Coastguard Worker   assert(output_width < 16777216 /* 2**24 */);
357*4bdc9457SAndroid Build Coastguard Worker 
358*4bdc9457SAndroid Build Coastguard Worker   const int32_t width_adjustment = (int32_t) (align_corners && output_width != 1);
359*4bdc9457SAndroid Build Coastguard Worker   const int32_t height_adjustment = (int32_t) (align_corners && output_height != 1);
360*4bdc9457SAndroid Build Coastguard Worker   const float width_scale =
361*4bdc9457SAndroid Build Coastguard Worker     (float) ((int32_t) input_width - width_adjustment) / (float) ((int32_t) output_width - width_adjustment);
362*4bdc9457SAndroid Build Coastguard Worker   const float height_scale =
363*4bdc9457SAndroid Build Coastguard Worker     (float) ((int32_t) input_height - height_adjustment) / (float) ((int32_t) output_height - height_adjustment);
364*4bdc9457SAndroid Build Coastguard Worker 
365*4bdc9457SAndroid Build Coastguard Worker   uint16_t* w = (uint16_t*) packed_weights;
366*4bdc9457SAndroid Build Coastguard Worker   const uint32_t input_y_max = (uint32_t) input_height - 1;
367*4bdc9457SAndroid Build Coastguard Worker   const uint32_t input_x_max = (uint32_t) input_width - 1;
368*4bdc9457SAndroid Build Coastguard Worker   if (tensorflow_legacy || align_corners) {
369*4bdc9457SAndroid Build Coastguard Worker     for (size_t output_y = 0; output_y < output_height; output_y++) {
370*4bdc9457SAndroid Build Coastguard Worker       const float input_y = (float) (int32_t) output_y * height_scale;
371*4bdc9457SAndroid Build Coastguard Worker       assert(input_y >= 0.0f);
372*4bdc9457SAndroid Build Coastguard Worker       assert(input_y < (float) input_height);
373*4bdc9457SAndroid Build Coastguard Worker 
374*4bdc9457SAndroid Build Coastguard Worker       const uint32_t input_y_top = (uint32_t) (int32_t) input_y;
375*4bdc9457SAndroid Build Coastguard Worker       const uint32_t input_y_bottom = math_min_u32(input_y_top + 1, input_y_max);
376*4bdc9457SAndroid Build Coastguard Worker       const float alpha_y = input_y - (float) input_y_top;
377*4bdc9457SAndroid Build Coastguard Worker       for (size_t output_x = 0; output_x < output_width; output_x++) {
378*4bdc9457SAndroid Build Coastguard Worker         const float input_x = (float) (int32_t) output_x * width_scale;
379*4bdc9457SAndroid Build Coastguard Worker         assert(input_x >= 0.0f);
380*4bdc9457SAndroid Build Coastguard Worker         assert(input_x < (float) input_width);
381*4bdc9457SAndroid Build Coastguard Worker 
382*4bdc9457SAndroid Build Coastguard Worker         const uint32_t input_x_left = (uint32_t) (int32_t) input_x;
383*4bdc9457SAndroid Build Coastguard Worker         const uint32_t input_x_right = math_min_u32(input_x_left + 1, input_x_max);
384*4bdc9457SAndroid Build Coastguard Worker         const float alpha_x = input_x - (float) input_x_left;
385*4bdc9457SAndroid Build Coastguard Worker         indirection_buffer[0] =
386*4bdc9457SAndroid Build Coastguard Worker           (void*) ((uintptr_t) input + (input_y_top * input_width + input_x_left) * input_pixel_stride);
387*4bdc9457SAndroid Build Coastguard Worker         indirection_buffer[1] =
388*4bdc9457SAndroid Build Coastguard Worker           (void*) ((uintptr_t) input + (input_y_top * input_width + input_x_right) * input_pixel_stride);
389*4bdc9457SAndroid Build Coastguard Worker         indirection_buffer[2] =
390*4bdc9457SAndroid Build Coastguard Worker           (void*) ((uintptr_t) input + (input_y_bottom * input_width + input_x_left) * input_pixel_stride);
391*4bdc9457SAndroid Build Coastguard Worker         indirection_buffer[3] =
392*4bdc9457SAndroid Build Coastguard Worker           (void*) ((uintptr_t) input + (input_y_bottom * input_width + input_x_right) * input_pixel_stride);
393*4bdc9457SAndroid Build Coastguard Worker         w[0] = fp16_ieee_from_fp32_value(alpha_x);
394*4bdc9457SAndroid Build Coastguard Worker         w[1] = fp16_ieee_from_fp32_value(alpha_y);
395*4bdc9457SAndroid Build Coastguard Worker         indirection_buffer += 4;
396*4bdc9457SAndroid Build Coastguard Worker         w += 2;
397*4bdc9457SAndroid Build Coastguard Worker       }
398*4bdc9457SAndroid Build Coastguard Worker     }
399*4bdc9457SAndroid Build Coastguard Worker   } else {
400*4bdc9457SAndroid Build Coastguard Worker     const float height_offset = 0.5f * height_scale - 0.5f;
401*4bdc9457SAndroid Build Coastguard Worker     const float width_offset = 0.5f * width_scale - 0.5f;
402*4bdc9457SAndroid Build Coastguard Worker     for (size_t output_y = 0; output_y < output_height; output_y++) {
403*4bdc9457SAndroid Build Coastguard Worker       float input_y = (float) (int32_t) output_y * height_scale + height_offset;
404*4bdc9457SAndroid Build Coastguard Worker       input_y = math_min_f32(math_max_f32(input_y, 0.0f), (float) input_y_max);
405*4bdc9457SAndroid Build Coastguard Worker       const uint32_t input_y_top = (uint32_t) (int32_t) input_y;
406*4bdc9457SAndroid Build Coastguard Worker       assert((int32_t) input_y_top >= 0);
407*4bdc9457SAndroid Build Coastguard Worker       const uint32_t input_y_bottom = math_min_u32(input_y_top + 1, input_y_max);
408*4bdc9457SAndroid Build Coastguard Worker       const float alpha_y = input_y - (float) input_y_top;
409*4bdc9457SAndroid Build Coastguard Worker       for (size_t output_x = 0; output_x < output_width; output_x++) {
410*4bdc9457SAndroid Build Coastguard Worker         float input_x = (float) (int32_t) output_x * width_scale + width_offset;
411*4bdc9457SAndroid Build Coastguard Worker         input_x = math_min_f32(math_max_f32(input_x, 0.0f), (float) input_x_max);
412*4bdc9457SAndroid Build Coastguard Worker         const uint32_t input_x_left = (uint32_t) (int32_t) input_x;
413*4bdc9457SAndroid Build Coastguard Worker         assert((int32_t) input_x_left >= 0);
414*4bdc9457SAndroid Build Coastguard Worker         const uint32_t input_x_right = math_min_u32(input_x_left + 1, input_x_max);
415*4bdc9457SAndroid Build Coastguard Worker         const float alpha_x = input_x - (float) input_x_left;
416*4bdc9457SAndroid Build Coastguard Worker         indirection_buffer[0] =
417*4bdc9457SAndroid Build Coastguard Worker           (void*) ((uintptr_t) input + (input_y_top * input_width + input_x_left) * input_pixel_stride);
418*4bdc9457SAndroid Build Coastguard Worker         indirection_buffer[1] =
419*4bdc9457SAndroid Build Coastguard Worker           (void*) ((uintptr_t) input + (input_y_top * input_width + input_x_right) * input_pixel_stride);
420*4bdc9457SAndroid Build Coastguard Worker         indirection_buffer[2] =
421*4bdc9457SAndroid Build Coastguard Worker           (void*) ((uintptr_t) input + (input_y_bottom * input_width + input_x_left) * input_pixel_stride);
422*4bdc9457SAndroid Build Coastguard Worker         indirection_buffer[3] =
423*4bdc9457SAndroid Build Coastguard Worker           (void*) ((uintptr_t) input + (input_y_bottom * input_width + input_x_right) * input_pixel_stride);
424*4bdc9457SAndroid Build Coastguard Worker         w[0] = fp16_ieee_from_fp32_value(alpha_x);
425*4bdc9457SAndroid Build Coastguard Worker         w[1] = fp16_ieee_from_fp32_value(alpha_y);
426*4bdc9457SAndroid Build Coastguard Worker         indirection_buffer += 4;
427*4bdc9457SAndroid Build Coastguard Worker         w += 2;
428*4bdc9457SAndroid Build Coastguard Worker       }
429*4bdc9457SAndroid Build Coastguard Worker     }
430*4bdc9457SAndroid Build Coastguard Worker   }
431*4bdc9457SAndroid Build Coastguard Worker }
432*4bdc9457SAndroid Build Coastguard Worker 
xnn_indirection_init_resize_bilinear2d_hwc_f32(size_t input_pixel_stride,size_t input_height,size_t input_width,size_t output_height,size_t output_width,const void * input,const void ** indirection_buffer,float * packed_weights,bool align_corners,bool tensorflow_legacy)433*4bdc9457SAndroid Build Coastguard Worker void xnn_indirection_init_resize_bilinear2d_hwc_f32(
434*4bdc9457SAndroid Build Coastguard Worker   size_t input_pixel_stride,
435*4bdc9457SAndroid Build Coastguard Worker   size_t input_height,
436*4bdc9457SAndroid Build Coastguard Worker   size_t input_width,
437*4bdc9457SAndroid Build Coastguard Worker   size_t output_height,
438*4bdc9457SAndroid Build Coastguard Worker   size_t output_width,
439*4bdc9457SAndroid Build Coastguard Worker   const void* input,
440*4bdc9457SAndroid Build Coastguard Worker   const void** indirection_buffer,
441*4bdc9457SAndroid Build Coastguard Worker   float* packed_weights,
442*4bdc9457SAndroid Build Coastguard Worker   bool align_corners,
443*4bdc9457SAndroid Build Coastguard Worker   bool tensorflow_legacy)
444*4bdc9457SAndroid Build Coastguard Worker {
445*4bdc9457SAndroid Build Coastguard Worker   assert(input_height != 0);
446*4bdc9457SAndroid Build Coastguard Worker   assert(input_height < 16777216 /* 2**24 */);
447*4bdc9457SAndroid Build Coastguard Worker   assert(input_width != 0);
448*4bdc9457SAndroid Build Coastguard Worker   assert(input_width < 16777216 /* 2**24 */);
449*4bdc9457SAndroid Build Coastguard Worker   assert(output_height != 0);
450*4bdc9457SAndroid Build Coastguard Worker   assert(output_height < 16777216 /* 2**24 */);
451*4bdc9457SAndroid Build Coastguard Worker   assert(output_width != 0);
452*4bdc9457SAndroid Build Coastguard Worker   assert(output_width < 16777216 /* 2**24 */);
453*4bdc9457SAndroid Build Coastguard Worker 
454*4bdc9457SAndroid Build Coastguard Worker   const int32_t width_adjustment = (int32_t) (align_corners && output_width != 1);
455*4bdc9457SAndroid Build Coastguard Worker   const int32_t height_adjustment = (int32_t) (align_corners && output_height != 1);
456*4bdc9457SAndroid Build Coastguard Worker   const float width_scale =
457*4bdc9457SAndroid Build Coastguard Worker     (float) ((int32_t) input_width - width_adjustment) / (float) ((int32_t) output_width - width_adjustment);
458*4bdc9457SAndroid Build Coastguard Worker   const float height_scale =
459*4bdc9457SAndroid Build Coastguard Worker     (float) ((int32_t) input_height - height_adjustment) / (float) ((int32_t) output_height - height_adjustment);
460*4bdc9457SAndroid Build Coastguard Worker 
461*4bdc9457SAndroid Build Coastguard Worker   const uint32_t input_y_max = (uint32_t) input_height - 1;
462*4bdc9457SAndroid Build Coastguard Worker   const uint32_t input_x_max = (uint32_t) input_width - 1;
463*4bdc9457SAndroid Build Coastguard Worker   if (tensorflow_legacy || align_corners) {
464*4bdc9457SAndroid Build Coastguard Worker     for (size_t output_y = 0; output_y < output_height; output_y++) {
465*4bdc9457SAndroid Build Coastguard Worker       const float input_y = (float) (int32_t) output_y * height_scale;
466*4bdc9457SAndroid Build Coastguard Worker       assert(input_y >= 0.0f);
467*4bdc9457SAndroid Build Coastguard Worker       assert(input_y < (float) input_height);
468*4bdc9457SAndroid Build Coastguard Worker 
469*4bdc9457SAndroid Build Coastguard Worker       const uint32_t input_y_top = (uint32_t) (int32_t) input_y;
470*4bdc9457SAndroid Build Coastguard Worker       const uint32_t input_y_bottom = math_min_u32(input_y_top + 1, input_y_max);
471*4bdc9457SAndroid Build Coastguard Worker       const float alpha_y = input_y - (float) input_y_top;
472*4bdc9457SAndroid Build Coastguard Worker       for (size_t output_x = 0; output_x < output_width; output_x++) {
473*4bdc9457SAndroid Build Coastguard Worker         const float input_x = (float) (int32_t) output_x * width_scale;
474*4bdc9457SAndroid Build Coastguard Worker         assert(input_x >= 0.0f);
475*4bdc9457SAndroid Build Coastguard Worker         assert(input_x < (float) input_width);
476*4bdc9457SAndroid Build Coastguard Worker 
477*4bdc9457SAndroid Build Coastguard Worker         const uint32_t input_x_left = (uint32_t) (int32_t) input_x;
478*4bdc9457SAndroid Build Coastguard Worker         const uint32_t input_x_right = math_min_u32(input_x_left + 1, input_x_max);
479*4bdc9457SAndroid Build Coastguard Worker         const float alpha_x = input_x - (float) input_x_left;
480*4bdc9457SAndroid Build Coastguard Worker         indirection_buffer[0] =
481*4bdc9457SAndroid Build Coastguard Worker           (void*) ((uintptr_t) input + (input_y_top * input_width + input_x_left) * input_pixel_stride);
482*4bdc9457SAndroid Build Coastguard Worker         indirection_buffer[1] =
483*4bdc9457SAndroid Build Coastguard Worker           (void*) ((uintptr_t) input + (input_y_top * input_width + input_x_right) * input_pixel_stride);
484*4bdc9457SAndroid Build Coastguard Worker         indirection_buffer[2] =
485*4bdc9457SAndroid Build Coastguard Worker           (void*) ((uintptr_t) input + (input_y_bottom * input_width + input_x_left) * input_pixel_stride);
486*4bdc9457SAndroid Build Coastguard Worker         indirection_buffer[3] =
487*4bdc9457SAndroid Build Coastguard Worker           (void*) ((uintptr_t) input + (input_y_bottom * input_width + input_x_right) * input_pixel_stride);
488*4bdc9457SAndroid Build Coastguard Worker         packed_weights[0] = alpha_x;
489*4bdc9457SAndroid Build Coastguard Worker         packed_weights[1] = alpha_y;
490*4bdc9457SAndroid Build Coastguard Worker         indirection_buffer += 4;
491*4bdc9457SAndroid Build Coastguard Worker         packed_weights += 2;
492*4bdc9457SAndroid Build Coastguard Worker       }
493*4bdc9457SAndroid Build Coastguard Worker     }
494*4bdc9457SAndroid Build Coastguard Worker   } else {
495*4bdc9457SAndroid Build Coastguard Worker     const float height_offset = 0.5f * height_scale - 0.5f;
496*4bdc9457SAndroid Build Coastguard Worker     const float width_offset = 0.5f * width_scale - 0.5f;
497*4bdc9457SAndroid Build Coastguard Worker     for (size_t output_y = 0; output_y < output_height; output_y++) {
498*4bdc9457SAndroid Build Coastguard Worker       float input_y = (float) (int32_t) output_y * height_scale + height_offset;
499*4bdc9457SAndroid Build Coastguard Worker       input_y = math_min_f32(math_max_f32(input_y, 0.0f), (float) input_y_max);
500*4bdc9457SAndroid Build Coastguard Worker       const uint32_t input_y_top = (uint32_t) (int32_t) input_y;
501*4bdc9457SAndroid Build Coastguard Worker       assert((int32_t) input_y_top >= 0);
502*4bdc9457SAndroid Build Coastguard Worker       const uint32_t input_y_bottom = math_min_u32(input_y_top + 1, input_y_max);
503*4bdc9457SAndroid Build Coastguard Worker       const float alpha_y = input_y - (float) input_y_top;
504*4bdc9457SAndroid Build Coastguard Worker       for (size_t output_x = 0; output_x < output_width; output_x++) {
505*4bdc9457SAndroid Build Coastguard Worker         float input_x = (float) (int32_t) output_x * width_scale + width_offset;
506*4bdc9457SAndroid Build Coastguard Worker         input_x = math_min_f32(math_max_f32(input_x, 0.0f), (float) input_x_max);
507*4bdc9457SAndroid Build Coastguard Worker         const uint32_t input_x_left = (uint32_t) (int32_t) input_x;
508*4bdc9457SAndroid Build Coastguard Worker         assert((int32_t) input_x_left >= 0);
509*4bdc9457SAndroid Build Coastguard Worker         const uint32_t input_x_right = math_min_u32(input_x_left + 1, input_x_max);
510*4bdc9457SAndroid Build Coastguard Worker         const float alpha_x = input_x - (float) input_x_left;
511*4bdc9457SAndroid Build Coastguard Worker         indirection_buffer[0] =
512*4bdc9457SAndroid Build Coastguard Worker           (void*) ((uintptr_t) input + (input_y_top * input_width + input_x_left) * input_pixel_stride);
513*4bdc9457SAndroid Build Coastguard Worker         indirection_buffer[1] =
514*4bdc9457SAndroid Build Coastguard Worker           (void*) ((uintptr_t) input + (input_y_top * input_width + input_x_right) * input_pixel_stride);
515*4bdc9457SAndroid Build Coastguard Worker         indirection_buffer[2] =
516*4bdc9457SAndroid Build Coastguard Worker           (void*) ((uintptr_t) input + (input_y_bottom * input_width + input_x_left) * input_pixel_stride);
517*4bdc9457SAndroid Build Coastguard Worker         indirection_buffer[3] =
518*4bdc9457SAndroid Build Coastguard Worker           (void*) ((uintptr_t) input + (input_y_bottom * input_width + input_x_right) * input_pixel_stride);
519*4bdc9457SAndroid Build Coastguard Worker         packed_weights[0] = alpha_x;
520*4bdc9457SAndroid Build Coastguard Worker         packed_weights[1] = alpha_y;
521*4bdc9457SAndroid Build Coastguard Worker         indirection_buffer += 4;
522*4bdc9457SAndroid Build Coastguard Worker         packed_weights += 2;
523*4bdc9457SAndroid Build Coastguard Worker       }
524*4bdc9457SAndroid Build Coastguard Worker     }
525*4bdc9457SAndroid Build Coastguard Worker   }
526*4bdc9457SAndroid Build Coastguard Worker }
527*4bdc9457SAndroid Build Coastguard Worker 
xnn_indirection_init_resize_bilinear2d_hwc_q11(size_t input_pixel_stride,size_t input_height,size_t input_width,size_t output_height,size_t output_width,const void * input,const void ** indirection_buffer,int16_t * packed_weights,bool align_corners,bool tensorflow_legacy)528*4bdc9457SAndroid Build Coastguard Worker void xnn_indirection_init_resize_bilinear2d_hwc_q11(
529*4bdc9457SAndroid Build Coastguard Worker   size_t input_pixel_stride,
530*4bdc9457SAndroid Build Coastguard Worker   size_t input_height,
531*4bdc9457SAndroid Build Coastguard Worker   size_t input_width,
532*4bdc9457SAndroid Build Coastguard Worker   size_t output_height,
533*4bdc9457SAndroid Build Coastguard Worker   size_t output_width,
534*4bdc9457SAndroid Build Coastguard Worker   const void* input,
535*4bdc9457SAndroid Build Coastguard Worker   const void** indirection_buffer,
536*4bdc9457SAndroid Build Coastguard Worker   int16_t* packed_weights,
537*4bdc9457SAndroid Build Coastguard Worker   bool align_corners,
538*4bdc9457SAndroid Build Coastguard Worker   bool tensorflow_legacy)
539*4bdc9457SAndroid Build Coastguard Worker {
540*4bdc9457SAndroid Build Coastguard Worker   assert(input_height != 0);
541*4bdc9457SAndroid Build Coastguard Worker   assert(input_height < 16777216 /* 2**24 */);
542*4bdc9457SAndroid Build Coastguard Worker   assert(input_width != 0);
543*4bdc9457SAndroid Build Coastguard Worker   assert(input_width < 16777216 /* 2**24 */);
544*4bdc9457SAndroid Build Coastguard Worker   assert(output_height != 0);
545*4bdc9457SAndroid Build Coastguard Worker   assert(output_height < 16777216 /* 2**24 */);
546*4bdc9457SAndroid Build Coastguard Worker   assert(output_width != 0);
547*4bdc9457SAndroid Build Coastguard Worker   assert(output_width < 16777216 /* 2**24 */);
548*4bdc9457SAndroid Build Coastguard Worker 
549*4bdc9457SAndroid Build Coastguard Worker   const int32_t width_adjustment = (int32_t) (align_corners && output_width != 1);
550*4bdc9457SAndroid Build Coastguard Worker   const int32_t height_adjustment = (int32_t) (align_corners && output_height != 1);
551*4bdc9457SAndroid Build Coastguard Worker   const float width_scale =
552*4bdc9457SAndroid Build Coastguard Worker     (float) ((int32_t) input_width - width_adjustment) / (float) ((int32_t) output_width - width_adjustment);
553*4bdc9457SAndroid Build Coastguard Worker   const float height_scale =
554*4bdc9457SAndroid Build Coastguard Worker     (float) ((int32_t) input_height - height_adjustment) / (float) ((int32_t) output_height - height_adjustment);
555*4bdc9457SAndroid Build Coastguard Worker 
556*4bdc9457SAndroid Build Coastguard Worker   const uint32_t input_y_max = (uint32_t) input_height - 1;
557*4bdc9457SAndroid Build Coastguard Worker   const uint32_t input_x_max = (uint32_t) input_width - 1;
558*4bdc9457SAndroid Build Coastguard Worker   if (tensorflow_legacy || align_corners) {
559*4bdc9457SAndroid Build Coastguard Worker     for (size_t output_y = 0; output_y < output_height; output_y++) {
560*4bdc9457SAndroid Build Coastguard Worker       const float input_y = (float) (int32_t) output_y * height_scale;
561*4bdc9457SAndroid Build Coastguard Worker       assert(input_y >= 0.0f);
562*4bdc9457SAndroid Build Coastguard Worker       assert(input_y < (float) input_height);
563*4bdc9457SAndroid Build Coastguard Worker 
564*4bdc9457SAndroid Build Coastguard Worker       const uint32_t input_y_top = (uint32_t) (int32_t) input_y;
565*4bdc9457SAndroid Build Coastguard Worker       const uint32_t input_y_bottom = math_min_u32(input_y_top + 1, input_y_max);
566*4bdc9457SAndroid Build Coastguard Worker       const float alpha_y = input_y - (float) input_y_top;
567*4bdc9457SAndroid Build Coastguard Worker       for (size_t output_x = 0; output_x < output_width; output_x++) {
568*4bdc9457SAndroid Build Coastguard Worker         const float input_x = (float) (int32_t) output_x * width_scale;
569*4bdc9457SAndroid Build Coastguard Worker         assert(input_x >= 0.0f);
570*4bdc9457SAndroid Build Coastguard Worker         assert(input_x < (float) input_width);
571*4bdc9457SAndroid Build Coastguard Worker 
572*4bdc9457SAndroid Build Coastguard Worker         const uint32_t input_x_left = (uint32_t) (int32_t) input_x;
573*4bdc9457SAndroid Build Coastguard Worker         const uint32_t input_x_right = math_min_u32(input_x_left + 1, input_x_max);
574*4bdc9457SAndroid Build Coastguard Worker         const float alpha_x = input_x - (float) input_x_left;
575*4bdc9457SAndroid Build Coastguard Worker         indirection_buffer[0] =
576*4bdc9457SAndroid Build Coastguard Worker           (void*) ((uintptr_t) input + (input_y_top * input_width + input_x_left) * input_pixel_stride);
577*4bdc9457SAndroid Build Coastguard Worker         indirection_buffer[1] =
578*4bdc9457SAndroid Build Coastguard Worker           (void*) ((uintptr_t) input + (input_y_top * input_width + input_x_right) * input_pixel_stride);
579*4bdc9457SAndroid Build Coastguard Worker         indirection_buffer[2] =
580*4bdc9457SAndroid Build Coastguard Worker           (void*) ((uintptr_t) input + (input_y_bottom * input_width + input_x_left) * input_pixel_stride);
581*4bdc9457SAndroid Build Coastguard Worker         indirection_buffer[3] =
582*4bdc9457SAndroid Build Coastguard Worker           (void*) ((uintptr_t) input + (input_y_bottom * input_width + input_x_right) * input_pixel_stride);
583*4bdc9457SAndroid Build Coastguard Worker         packed_weights[0] = (int16_t) lrintf(alpha_x * 0x1.0p+11f);
584*4bdc9457SAndroid Build Coastguard Worker         packed_weights[1] = (int16_t) lrintf(alpha_y * 0x1.0p+11f);
585*4bdc9457SAndroid Build Coastguard Worker         indirection_buffer += 4;
586*4bdc9457SAndroid Build Coastguard Worker         packed_weights += 2;
587*4bdc9457SAndroid Build Coastguard Worker       }
588*4bdc9457SAndroid Build Coastguard Worker     }
589*4bdc9457SAndroid Build Coastguard Worker   } else {
590*4bdc9457SAndroid Build Coastguard Worker     const float height_offset = 0.5f * height_scale - 0.5f;
591*4bdc9457SAndroid Build Coastguard Worker     const float width_offset = 0.5f * width_scale - 0.5f;
592*4bdc9457SAndroid Build Coastguard Worker     for (size_t output_y = 0; output_y < output_height; output_y++) {
593*4bdc9457SAndroid Build Coastguard Worker       float input_y = (float) (int32_t) output_y * height_scale + height_offset;
594*4bdc9457SAndroid Build Coastguard Worker       input_y = math_min_f32(math_max_f32(input_y, 0.0f), (float) input_y_max);
595*4bdc9457SAndroid Build Coastguard Worker       const uint32_t input_y_top = (uint32_t) (int32_t) input_y;
596*4bdc9457SAndroid Build Coastguard Worker       assert((int32_t) input_y_top >= 0);
597*4bdc9457SAndroid Build Coastguard Worker       const uint32_t input_y_bottom = math_min_u32(input_y_top + 1, input_y_max);
598*4bdc9457SAndroid Build Coastguard Worker       const float alpha_y = input_y - (float) input_y_top;
599*4bdc9457SAndroid Build Coastguard Worker       for (size_t output_x = 0; output_x < output_width; output_x++) {
600*4bdc9457SAndroid Build Coastguard Worker         float input_x = (float) (int32_t) output_x * width_scale + width_offset;
601*4bdc9457SAndroid Build Coastguard Worker         input_x = math_min_f32(math_max_f32(input_x, 0.0f), (float) input_x_max);
602*4bdc9457SAndroid Build Coastguard Worker         const uint32_t input_x_left = (uint32_t) (int32_t) input_x;
603*4bdc9457SAndroid Build Coastguard Worker         assert((int32_t) input_x_left >= 0);
604*4bdc9457SAndroid Build Coastguard Worker         const uint32_t input_x_right = math_min_u32(input_x_left + 1, input_x_max);
605*4bdc9457SAndroid Build Coastguard Worker         const float alpha_x = input_x - (float) input_x_left;
606*4bdc9457SAndroid Build Coastguard Worker         indirection_buffer[0] =
607*4bdc9457SAndroid Build Coastguard Worker           (void*) ((uintptr_t) input + (input_y_top * input_width + input_x_left) * input_pixel_stride);
608*4bdc9457SAndroid Build Coastguard Worker         indirection_buffer[1] =
609*4bdc9457SAndroid Build Coastguard Worker           (void*) ((uintptr_t) input + (input_y_top * input_width + input_x_right) * input_pixel_stride);
610*4bdc9457SAndroid Build Coastguard Worker         indirection_buffer[2] =
611*4bdc9457SAndroid Build Coastguard Worker           (void*) ((uintptr_t) input + (input_y_bottom * input_width + input_x_left) * input_pixel_stride);
612*4bdc9457SAndroid Build Coastguard Worker         indirection_buffer[3] =
613*4bdc9457SAndroid Build Coastguard Worker           (void*) ((uintptr_t) input + (input_y_bottom * input_width + input_x_right) * input_pixel_stride);
614*4bdc9457SAndroid Build Coastguard Worker         packed_weights[0] = (int16_t) lrintf(alpha_x * 0x1.0p+11f);
615*4bdc9457SAndroid Build Coastguard Worker         packed_weights[1] = (int16_t) lrintf(alpha_y * 0x1.0p+11f);
616*4bdc9457SAndroid Build Coastguard Worker         indirection_buffer += 4;
617*4bdc9457SAndroid Build Coastguard Worker         packed_weights += 2;
618*4bdc9457SAndroid Build Coastguard Worker       }
619*4bdc9457SAndroid Build Coastguard Worker     }
620*4bdc9457SAndroid Build Coastguard Worker   }
621*4bdc9457SAndroid Build Coastguard Worker }
622*4bdc9457SAndroid Build Coastguard Worker 
xnn_indirection_init_resize_bilinear2d_chw_f16(size_t input_pixel_stride,size_t input_height,size_t input_width,size_t output_height,size_t output_width,const void * input,const void ** indirection_buffer,void * packed_weights,bool align_corners,bool tensorflow_legacy)623*4bdc9457SAndroid Build Coastguard Worker void xnn_indirection_init_resize_bilinear2d_chw_f16(
624*4bdc9457SAndroid Build Coastguard Worker   size_t input_pixel_stride,
625*4bdc9457SAndroid Build Coastguard Worker   size_t input_height,
626*4bdc9457SAndroid Build Coastguard Worker   size_t input_width,
627*4bdc9457SAndroid Build Coastguard Worker   size_t output_height,
628*4bdc9457SAndroid Build Coastguard Worker   size_t output_width,
629*4bdc9457SAndroid Build Coastguard Worker   const void* input,
630*4bdc9457SAndroid Build Coastguard Worker   const void** indirection_buffer,
631*4bdc9457SAndroid Build Coastguard Worker   void* packed_weights,
632*4bdc9457SAndroid Build Coastguard Worker   bool align_corners,
633*4bdc9457SAndroid Build Coastguard Worker   bool tensorflow_legacy)
634*4bdc9457SAndroid Build Coastguard Worker {
635*4bdc9457SAndroid Build Coastguard Worker   assert(input_height > 1);
636*4bdc9457SAndroid Build Coastguard Worker   assert(input_height < 16777216 /* 2**24 */);
637*4bdc9457SAndroid Build Coastguard Worker   assert(input_width > 1);
638*4bdc9457SAndroid Build Coastguard Worker   assert(input_width < 16777216 /* 2**24 */);
639*4bdc9457SAndroid Build Coastguard Worker   assert(output_height != 0);
640*4bdc9457SAndroid Build Coastguard Worker   assert(output_height < 16777216 /* 2**24 */);
641*4bdc9457SAndroid Build Coastguard Worker   assert(output_width != 0);
642*4bdc9457SAndroid Build Coastguard Worker   assert(output_width < 16777216 /* 2**24 */);
643*4bdc9457SAndroid Build Coastguard Worker 
644*4bdc9457SAndroid Build Coastguard Worker   const int32_t width_adjustment = (int32_t) (align_corners && output_width != 1);
645*4bdc9457SAndroid Build Coastguard Worker   const int32_t height_adjustment = (int32_t) (align_corners && output_height != 1);
646*4bdc9457SAndroid Build Coastguard Worker   const float width_scale =
647*4bdc9457SAndroid Build Coastguard Worker     (float) ((int32_t) input_width - width_adjustment) / (float) ((int32_t) output_width - width_adjustment);
648*4bdc9457SAndroid Build Coastguard Worker   const float height_scale =
649*4bdc9457SAndroid Build Coastguard Worker     (float) ((int32_t) input_height - height_adjustment) / (float) ((int32_t) output_height - height_adjustment);
650*4bdc9457SAndroid Build Coastguard Worker 
651*4bdc9457SAndroid Build Coastguard Worker   uint16_t* w = (uint16_t*) packed_weights;
652*4bdc9457SAndroid Build Coastguard Worker   const uint32_t input_y_max = (uint32_t) input_height - 1;
653*4bdc9457SAndroid Build Coastguard Worker   const uint32_t input_x_max = (uint32_t) input_width - 1;
654*4bdc9457SAndroid Build Coastguard Worker   if (tensorflow_legacy || align_corners) {
655*4bdc9457SAndroid Build Coastguard Worker     for (size_t output_y = 0; output_y < output_height; output_y++) {
656*4bdc9457SAndroid Build Coastguard Worker       const float input_y = (float) (int32_t) output_y * height_scale;
657*4bdc9457SAndroid Build Coastguard Worker       assert(input_y >= 0.0f);
658*4bdc9457SAndroid Build Coastguard Worker       assert(input_y < (float) input_height);
659*4bdc9457SAndroid Build Coastguard Worker 
660*4bdc9457SAndroid Build Coastguard Worker       const uint32_t input_y_top = (uint32_t) (int32_t) input_y;
661*4bdc9457SAndroid Build Coastguard Worker       const uint32_t input_y_bottom = math_min_u32(input_y_top + 1, input_y_max);
662*4bdc9457SAndroid Build Coastguard Worker       const float alpha_y = input_y - (float) input_y_top;
663*4bdc9457SAndroid Build Coastguard Worker       for (size_t output_x = 0; output_x < output_width; output_x++) {
664*4bdc9457SAndroid Build Coastguard Worker         const float input_x = (float) (int32_t) output_x * width_scale;
665*4bdc9457SAndroid Build Coastguard Worker         assert(input_x >= 0.0f);
666*4bdc9457SAndroid Build Coastguard Worker         assert(input_x < (float) input_width);
667*4bdc9457SAndroid Build Coastguard Worker 
668*4bdc9457SAndroid Build Coastguard Worker         uint32_t input_x_left = (uint32_t) (int32_t) input_x;
669*4bdc9457SAndroid Build Coastguard Worker 
670*4bdc9457SAndroid Build Coastguard Worker         float alpha_x = input_x - (float) input_x_left;
671*4bdc9457SAndroid Build Coastguard Worker         if (input_x_left == input_x_max) {
672*4bdc9457SAndroid Build Coastguard Worker           // Ensure that there is a pixel to the right of the one pointed at,
673*4bdc9457SAndroid Build Coastguard Worker           // as required by some CHW kernels.
674*4bdc9457SAndroid Build Coastguard Worker           --input_x_left;
675*4bdc9457SAndroid Build Coastguard Worker           alpha_x = 1.0f;
676*4bdc9457SAndroid Build Coastguard Worker         }
677*4bdc9457SAndroid Build Coastguard Worker        indirection_buffer[0] =
678*4bdc9457SAndroid Build Coastguard Worker           (void*) ((uintptr_t) input + (input_y_top * input_width + input_x_left) * input_pixel_stride);
679*4bdc9457SAndroid Build Coastguard Worker        indirection_buffer[1] =
680*4bdc9457SAndroid Build Coastguard Worker           (void*) ((uintptr_t) input + (input_y_bottom * input_width + input_x_left) * input_pixel_stride);
681*4bdc9457SAndroid Build Coastguard Worker         w[0] = fp16_ieee_from_fp32_value(alpha_x);
682*4bdc9457SAndroid Build Coastguard Worker         w[1] = fp16_ieee_from_fp32_value(alpha_y);
683*4bdc9457SAndroid Build Coastguard Worker         indirection_buffer += 2;
684*4bdc9457SAndroid Build Coastguard Worker         w += 2;
685*4bdc9457SAndroid Build Coastguard Worker       }
686*4bdc9457SAndroid Build Coastguard Worker     }
687*4bdc9457SAndroid Build Coastguard Worker   } else {
688*4bdc9457SAndroid Build Coastguard Worker     const float height_offset = 0.5f * height_scale - 0.5f;
689*4bdc9457SAndroid Build Coastguard Worker     const float width_offset = 0.5f * width_scale - 0.5f;
690*4bdc9457SAndroid Build Coastguard Worker     for (size_t output_y = 0; output_y < output_height; output_y++) {
691*4bdc9457SAndroid Build Coastguard Worker       float input_y = (float) (int32_t) output_y * height_scale + height_offset;
692*4bdc9457SAndroid Build Coastguard Worker       input_y = math_min_f32(math_max_f32(input_y, 0.0f), (float) input_y_max);
693*4bdc9457SAndroid Build Coastguard Worker       const uint32_t input_y_top = (uint32_t) (int32_t) input_y;
694*4bdc9457SAndroid Build Coastguard Worker       assert((int32_t) input_y_top >= 0);
695*4bdc9457SAndroid Build Coastguard Worker       const uint32_t input_y_bottom = math_min_u32(input_y_top + 1, input_y_max);
696*4bdc9457SAndroid Build Coastguard Worker       const float alpha_y = input_y - (float) input_y_top;
697*4bdc9457SAndroid Build Coastguard Worker       for (size_t output_x = 0; output_x < output_width; output_x++) {
698*4bdc9457SAndroid Build Coastguard Worker         float input_x = (float) (int32_t) output_x * width_scale + width_offset;
699*4bdc9457SAndroid Build Coastguard Worker         input_x = math_min_f32(math_max_f32(input_x, 0.0f), (float) input_x_max);
700*4bdc9457SAndroid Build Coastguard Worker         uint32_t input_x_left = (uint32_t) (int32_t) input_x;
701*4bdc9457SAndroid Build Coastguard Worker         assert((int32_t) input_x_left >= 0);
702*4bdc9457SAndroid Build Coastguard Worker 
703*4bdc9457SAndroid Build Coastguard Worker         float alpha_x = input_x - (float) input_x_left;
704*4bdc9457SAndroid Build Coastguard Worker         if (input_x_left == input_x_max) {
705*4bdc9457SAndroid Build Coastguard Worker           // Ensure that there is a pixel to the right of the one pointed at,
706*4bdc9457SAndroid Build Coastguard Worker           // as required by some CHW kernels.
707*4bdc9457SAndroid Build Coastguard Worker           --input_x_left;
708*4bdc9457SAndroid Build Coastguard Worker           alpha_x = 1.0f;
709*4bdc9457SAndroid Build Coastguard Worker         }
710*4bdc9457SAndroid Build Coastguard Worker 
711*4bdc9457SAndroid Build Coastguard Worker         indirection_buffer[0] =
712*4bdc9457SAndroid Build Coastguard Worker           (void*) ((uintptr_t) input + (input_y_top * input_width + input_x_left) * input_pixel_stride);
713*4bdc9457SAndroid Build Coastguard Worker         indirection_buffer[1] =
714*4bdc9457SAndroid Build Coastguard Worker           (void*) ((uintptr_t) input + (input_y_bottom * input_width + input_x_left) * input_pixel_stride);
715*4bdc9457SAndroid Build Coastguard Worker         w[0] = fp16_ieee_from_fp32_value(alpha_x);
716*4bdc9457SAndroid Build Coastguard Worker         w[1] = fp16_ieee_from_fp32_value(alpha_y);
717*4bdc9457SAndroid Build Coastguard Worker         indirection_buffer += 2;
718*4bdc9457SAndroid Build Coastguard Worker         w += 2;
719*4bdc9457SAndroid Build Coastguard Worker       }
720*4bdc9457SAndroid Build Coastguard Worker     }
721*4bdc9457SAndroid Build Coastguard Worker   }
722*4bdc9457SAndroid Build Coastguard Worker }
723*4bdc9457SAndroid Build Coastguard Worker 
xnn_indirection_init_resize_bilinear2d_chw_f32(size_t input_pixel_stride,size_t input_height,size_t input_width,size_t output_height,size_t output_width,const void * input,const void ** indirection_buffer,float * packed_weights,bool align_corners,bool tensorflow_legacy)724*4bdc9457SAndroid Build Coastguard Worker void xnn_indirection_init_resize_bilinear2d_chw_f32(
725*4bdc9457SAndroid Build Coastguard Worker   size_t input_pixel_stride,
726*4bdc9457SAndroid Build Coastguard Worker   size_t input_height,
727*4bdc9457SAndroid Build Coastguard Worker   size_t input_width,
728*4bdc9457SAndroid Build Coastguard Worker   size_t output_height,
729*4bdc9457SAndroid Build Coastguard Worker   size_t output_width,
730*4bdc9457SAndroid Build Coastguard Worker   const void* input,
731*4bdc9457SAndroid Build Coastguard Worker   const void** indirection_buffer,
732*4bdc9457SAndroid Build Coastguard Worker   float* packed_weights,
733*4bdc9457SAndroid Build Coastguard Worker   bool align_corners,
734*4bdc9457SAndroid Build Coastguard Worker   bool tensorflow_legacy)
735*4bdc9457SAndroid Build Coastguard Worker {
736*4bdc9457SAndroid Build Coastguard Worker   assert(input_height > 1);
737*4bdc9457SAndroid Build Coastguard Worker   assert(input_height < 16777216 /* 2**24 */);
738*4bdc9457SAndroid Build Coastguard Worker   assert(input_width > 1);
739*4bdc9457SAndroid Build Coastguard Worker   assert(input_width < 16777216 /* 2**24 */);
740*4bdc9457SAndroid Build Coastguard Worker   assert(output_height != 0);
741*4bdc9457SAndroid Build Coastguard Worker   assert(output_height < 16777216 /* 2**24 */);
742*4bdc9457SAndroid Build Coastguard Worker   assert(output_width != 0);
743*4bdc9457SAndroid Build Coastguard Worker   assert(output_width < 16777216 /* 2**24 */);
744*4bdc9457SAndroid Build Coastguard Worker 
745*4bdc9457SAndroid Build Coastguard Worker   const int32_t width_adjustment = (int32_t) (align_corners && output_width != 1);
746*4bdc9457SAndroid Build Coastguard Worker   const int32_t height_adjustment = (int32_t) (align_corners && output_height != 1);
747*4bdc9457SAndroid Build Coastguard Worker   const float width_scale =
748*4bdc9457SAndroid Build Coastguard Worker     (float) ((int32_t) input_width - width_adjustment) / (float) ((int32_t) output_width - width_adjustment);
749*4bdc9457SAndroid Build Coastguard Worker   const float height_scale =
750*4bdc9457SAndroid Build Coastguard Worker     (float) ((int32_t) input_height - height_adjustment) / (float) ((int32_t) output_height - height_adjustment);
751*4bdc9457SAndroid Build Coastguard Worker 
752*4bdc9457SAndroid Build Coastguard Worker   const uint32_t input_y_max = (uint32_t) input_height - 1;
753*4bdc9457SAndroid Build Coastguard Worker   const uint32_t input_x_max = (uint32_t) input_width - 1;
754*4bdc9457SAndroid Build Coastguard Worker   if (tensorflow_legacy || align_corners) {
755*4bdc9457SAndroid Build Coastguard Worker     for (size_t output_y = 0; output_y < output_height; output_y++) {
756*4bdc9457SAndroid Build Coastguard Worker       const float input_y = (float) (int32_t) output_y * height_scale;
757*4bdc9457SAndroid Build Coastguard Worker       assert(input_y >= 0.0f);
758*4bdc9457SAndroid Build Coastguard Worker       assert(input_y < (float) input_height);
759*4bdc9457SAndroid Build Coastguard Worker 
760*4bdc9457SAndroid Build Coastguard Worker       const uint32_t input_y_top = (uint32_t) (int32_t) input_y;
761*4bdc9457SAndroid Build Coastguard Worker       const uint32_t input_y_bottom = math_min_u32(input_y_top + 1, input_y_max);
762*4bdc9457SAndroid Build Coastguard Worker       const float alpha_y = input_y - (float) input_y_top;
763*4bdc9457SAndroid Build Coastguard Worker       for (size_t output_x = 0; output_x < output_width; output_x++) {
764*4bdc9457SAndroid Build Coastguard Worker         const float input_x = (float) (int32_t) output_x * width_scale;
765*4bdc9457SAndroid Build Coastguard Worker         assert(input_x >= 0.0f);
766*4bdc9457SAndroid Build Coastguard Worker         assert(input_x < (float) input_width);
767*4bdc9457SAndroid Build Coastguard Worker 
768*4bdc9457SAndroid Build Coastguard Worker         uint32_t input_x_left = (uint32_t) (int32_t) input_x;
769*4bdc9457SAndroid Build Coastguard Worker 
770*4bdc9457SAndroid Build Coastguard Worker         float alpha_x = input_x - (float) input_x_left;
771*4bdc9457SAndroid Build Coastguard Worker         if (input_x_left == input_x_max) {
772*4bdc9457SAndroid Build Coastguard Worker           // Ensure that there is a pixel to the right of the one pointed at,
773*4bdc9457SAndroid Build Coastguard Worker           // as required by some CHW kernels.
774*4bdc9457SAndroid Build Coastguard Worker           --input_x_left;
775*4bdc9457SAndroid Build Coastguard Worker           alpha_x = 1.0f;
776*4bdc9457SAndroid Build Coastguard Worker         }
777*4bdc9457SAndroid Build Coastguard Worker        indirection_buffer[0] =
778*4bdc9457SAndroid Build Coastguard Worker           (void*) ((uintptr_t) input + (input_y_top * input_width + input_x_left) * input_pixel_stride);
779*4bdc9457SAndroid Build Coastguard Worker        indirection_buffer[1] =
780*4bdc9457SAndroid Build Coastguard Worker           (void*) ((uintptr_t) input + (input_y_bottom * input_width + input_x_left) * input_pixel_stride);
781*4bdc9457SAndroid Build Coastguard Worker         packed_weights[0] = alpha_x;
782*4bdc9457SAndroid Build Coastguard Worker         packed_weights[1] = alpha_y;
783*4bdc9457SAndroid Build Coastguard Worker         indirection_buffer += 2;
784*4bdc9457SAndroid Build Coastguard Worker         packed_weights += 2;
785*4bdc9457SAndroid Build Coastguard Worker       }
786*4bdc9457SAndroid Build Coastguard Worker     }
787*4bdc9457SAndroid Build Coastguard Worker   } else {
788*4bdc9457SAndroid Build Coastguard Worker     const float height_offset = 0.5f * height_scale - 0.5f;
789*4bdc9457SAndroid Build Coastguard Worker     const float width_offset = 0.5f * width_scale - 0.5f;
790*4bdc9457SAndroid Build Coastguard Worker     for (size_t output_y = 0; output_y < output_height; output_y++) {
791*4bdc9457SAndroid Build Coastguard Worker       float input_y = (float) (int32_t) output_y * height_scale + height_offset;
792*4bdc9457SAndroid Build Coastguard Worker       input_y = math_min_f32(math_max_f32(input_y, 0.0f), (float) input_y_max);
793*4bdc9457SAndroid Build Coastguard Worker       const uint32_t input_y_top = (uint32_t) (int32_t) input_y;
794*4bdc9457SAndroid Build Coastguard Worker       assert((int32_t) input_y_top >= 0);
795*4bdc9457SAndroid Build Coastguard Worker       const uint32_t input_y_bottom = math_min_u32(input_y_top + 1, input_y_max);
796*4bdc9457SAndroid Build Coastguard Worker       const float alpha_y = input_y - (float) input_y_top;
797*4bdc9457SAndroid Build Coastguard Worker       for (size_t output_x = 0; output_x < output_width; output_x++) {
798*4bdc9457SAndroid Build Coastguard Worker         float input_x = (float) (int32_t) output_x * width_scale + width_offset;
799*4bdc9457SAndroid Build Coastguard Worker         input_x = math_min_f32(math_max_f32(input_x, 0.0f), (float) input_x_max);
800*4bdc9457SAndroid Build Coastguard Worker         uint32_t input_x_left = (uint32_t) (int32_t) input_x;
801*4bdc9457SAndroid Build Coastguard Worker         assert((int32_t) input_x_left >= 0);
802*4bdc9457SAndroid Build Coastguard Worker 
803*4bdc9457SAndroid Build Coastguard Worker         float alpha_x = input_x - (float) input_x_left;
804*4bdc9457SAndroid Build Coastguard Worker         if (input_x_left == input_x_max) {
805*4bdc9457SAndroid Build Coastguard Worker           // Ensure that there is a pixel to the right of the one pointed at,
806*4bdc9457SAndroid Build Coastguard Worker           // as required by some CHW kernels.
807*4bdc9457SAndroid Build Coastguard Worker           --input_x_left;
808*4bdc9457SAndroid Build Coastguard Worker           alpha_x = 1.0f;
809*4bdc9457SAndroid Build Coastguard Worker         }
810*4bdc9457SAndroid Build Coastguard Worker 
811*4bdc9457SAndroid Build Coastguard Worker         indirection_buffer[0] =
812*4bdc9457SAndroid Build Coastguard Worker           (void*) ((uintptr_t) input + (input_y_top * input_width + input_x_left) * input_pixel_stride);
813*4bdc9457SAndroid Build Coastguard Worker         indirection_buffer[1] =
814*4bdc9457SAndroid Build Coastguard Worker           (void*) ((uintptr_t) input + (input_y_bottom * input_width + input_x_left) * input_pixel_stride);
815*4bdc9457SAndroid Build Coastguard Worker         packed_weights[0] = alpha_x;
816*4bdc9457SAndroid Build Coastguard Worker         packed_weights[1] = alpha_y;
817*4bdc9457SAndroid Build Coastguard Worker         indirection_buffer += 2;
818*4bdc9457SAndroid Build Coastguard Worker         packed_weights += 2;
819*4bdc9457SAndroid Build Coastguard Worker       }
820*4bdc9457SAndroid Build Coastguard Worker     }
821*4bdc9457SAndroid Build Coastguard Worker   }
822*4bdc9457SAndroid Build Coastguard Worker }
823*4bdc9457SAndroid Build Coastguard Worker 
xnn_indirection_init_unpool2d(xnn_operator_t op,size_t batch_start,uint32_t log2_element_size)824*4bdc9457SAndroid Build Coastguard Worker void xnn_indirection_init_unpool2d(
825*4bdc9457SAndroid Build Coastguard Worker   xnn_operator_t op,
826*4bdc9457SAndroid Build Coastguard Worker   size_t batch_start,
827*4bdc9457SAndroid Build Coastguard Worker   uint32_t log2_element_size)
828*4bdc9457SAndroid Build Coastguard Worker {
829*4bdc9457SAndroid Build Coastguard Worker   const void** indirection_buffer  = op->indirection_buffer;
830*4bdc9457SAndroid Build Coastguard Worker   const void* output               = op->output;
831*4bdc9457SAndroid Build Coastguard Worker   const size_t output_pixel_stride = op->output_pixel_stride << log2_element_size;
832*4bdc9457SAndroid Build Coastguard Worker   const size_t batch_size          = op->batch_size;
833*4bdc9457SAndroid Build Coastguard Worker   const size_t input_height        = op->input_height;
834*4bdc9457SAndroid Build Coastguard Worker   const size_t input_width         = op->input_width;
835*4bdc9457SAndroid Build Coastguard Worker   const size_t output_height       = op->output_height;
836*4bdc9457SAndroid Build Coastguard Worker   const size_t output_width        = op->output_width;
837*4bdc9457SAndroid Build Coastguard Worker   const size_t pooling_height      = op->kernel_height;
838*4bdc9457SAndroid Build Coastguard Worker   const size_t pooling_width       = op->kernel_width;
839*4bdc9457SAndroid Build Coastguard Worker   const size_t output_padding_top  = op->padding_top;
840*4bdc9457SAndroid Build Coastguard Worker   const size_t output_padding_left = op->padding_left;
841*4bdc9457SAndroid Build Coastguard Worker 
842*4bdc9457SAndroid Build Coastguard Worker   for (size_t image = batch_start; image < batch_size; image++) {
843*4bdc9457SAndroid Build Coastguard Worker     for (size_t input_y = 0; input_y < input_height; input_y++) {
844*4bdc9457SAndroid Build Coastguard Worker       for (size_t pooling_y = 0; pooling_y < pooling_height; pooling_y++) {
845*4bdc9457SAndroid Build Coastguard Worker         const size_t output_y = min(doz(input_y * pooling_height + pooling_y, output_padding_top), output_height - 1);
846*4bdc9457SAndroid Build Coastguard Worker         for (size_t input_x = 0; input_x < input_width; input_x++) {
847*4bdc9457SAndroid Build Coastguard Worker           for (size_t pooling_x = 0; pooling_x < pooling_width; pooling_x++) {
848*4bdc9457SAndroid Build Coastguard Worker             const size_t output_x = min(doz(input_x * pooling_width + pooling_x, output_padding_left), output_width - 1);
849*4bdc9457SAndroid Build Coastguard Worker             indirection_buffer[(((image * input_height + input_y) * input_width + input_x) * pooling_width + pooling_x) * pooling_height + pooling_y] =
850*4bdc9457SAndroid Build Coastguard Worker               (const void*) ((uintptr_t) output + ((image * output_height + output_y) * output_width + output_x) * output_pixel_stride);
851*4bdc9457SAndroid Build Coastguard Worker           }
852*4bdc9457SAndroid Build Coastguard Worker         }
853*4bdc9457SAndroid Build Coastguard Worker       }
854*4bdc9457SAndroid Build Coastguard Worker     }
855*4bdc9457SAndroid Build Coastguard Worker   }
856*4bdc9457SAndroid Build Coastguard Worker }
857*4bdc9457SAndroid Build Coastguard Worker 
xnn_indirection_init_pavgpool2d_f16(size_t input_height,size_t input_width,size_t output_height,size_t output_width,size_t pooling_height,size_t pooling_width,size_t stride_height,size_t stride_width,size_t padding_top,size_t padding_left,uint16_t * pixelwise_buffer)858*4bdc9457SAndroid Build Coastguard Worker void xnn_indirection_init_pavgpool2d_f16(
859*4bdc9457SAndroid Build Coastguard Worker   size_t input_height,
860*4bdc9457SAndroid Build Coastguard Worker   size_t input_width,
861*4bdc9457SAndroid Build Coastguard Worker   size_t output_height,
862*4bdc9457SAndroid Build Coastguard Worker   size_t output_width,
863*4bdc9457SAndroid Build Coastguard Worker   size_t pooling_height,
864*4bdc9457SAndroid Build Coastguard Worker   size_t pooling_width,
865*4bdc9457SAndroid Build Coastguard Worker   size_t stride_height,
866*4bdc9457SAndroid Build Coastguard Worker   size_t stride_width,
867*4bdc9457SAndroid Build Coastguard Worker   size_t padding_top,
868*4bdc9457SAndroid Build Coastguard Worker   size_t padding_left,
869*4bdc9457SAndroid Build Coastguard Worker   uint16_t* pixelwise_buffer)
870*4bdc9457SAndroid Build Coastguard Worker {
871*4bdc9457SAndroid Build Coastguard Worker   for (size_t output_y = 0; output_y < output_height; output_y++) {
872*4bdc9457SAndroid Build Coastguard Worker     const size_t input_y_start = doz(output_y * stride_height, padding_top);
873*4bdc9457SAndroid Build Coastguard Worker     const size_t input_y_end = min(doz(output_y * stride_height + pooling_height, padding_top), input_height);
874*4bdc9457SAndroid Build Coastguard Worker     const uint32_t input_y_range = (uint32_t) (input_y_end - input_y_start);
875*4bdc9457SAndroid Build Coastguard Worker     for (size_t output_x = 0; output_x < output_width; output_x++) {
876*4bdc9457SAndroid Build Coastguard Worker       const size_t input_x_start = doz(output_x * stride_width, padding_left);
877*4bdc9457SAndroid Build Coastguard Worker       const size_t input_x_end = min(doz(output_x * stride_width + pooling_width, padding_left), input_width);
878*4bdc9457SAndroid Build Coastguard Worker       const uint32_t input_x_range = (uint32_t) (input_x_end - input_x_start);
879*4bdc9457SAndroid Build Coastguard Worker       *pixelwise_buffer++ = fp16_ieee_from_fp32_value(1.0f / ((float) (int32_t) (input_y_range * input_x_range)));
880*4bdc9457SAndroid Build Coastguard Worker     }
881*4bdc9457SAndroid Build Coastguard Worker   }
882*4bdc9457SAndroid Build Coastguard Worker }
883*4bdc9457SAndroid Build Coastguard Worker 
xnn_indirection_init_pavgpool2d_f32(size_t input_height,size_t input_width,size_t output_height,size_t output_width,size_t pooling_height,size_t pooling_width,size_t stride_height,size_t stride_width,size_t padding_top,size_t padding_left,float * pixelwise_buffer)884*4bdc9457SAndroid Build Coastguard Worker void xnn_indirection_init_pavgpool2d_f32(
885*4bdc9457SAndroid Build Coastguard Worker   size_t input_height,
886*4bdc9457SAndroid Build Coastguard Worker   size_t input_width,
887*4bdc9457SAndroid Build Coastguard Worker   size_t output_height,
888*4bdc9457SAndroid Build Coastguard Worker   size_t output_width,
889*4bdc9457SAndroid Build Coastguard Worker   size_t pooling_height,
890*4bdc9457SAndroid Build Coastguard Worker   size_t pooling_width,
891*4bdc9457SAndroid Build Coastguard Worker   size_t stride_height,
892*4bdc9457SAndroid Build Coastguard Worker   size_t stride_width,
893*4bdc9457SAndroid Build Coastguard Worker   size_t padding_top,
894*4bdc9457SAndroid Build Coastguard Worker   size_t padding_left,
895*4bdc9457SAndroid Build Coastguard Worker   float* pixelwise_buffer)
896*4bdc9457SAndroid Build Coastguard Worker {
897*4bdc9457SAndroid Build Coastguard Worker   for (size_t output_y = 0; output_y < output_height; output_y++) {
898*4bdc9457SAndroid Build Coastguard Worker     const size_t input_y_start = doz(output_y * stride_height, padding_top);
899*4bdc9457SAndroid Build Coastguard Worker     const size_t input_y_end = min(doz(output_y * stride_height + pooling_height, padding_top), input_height);
900*4bdc9457SAndroid Build Coastguard Worker     const uint32_t input_y_range = (uint32_t) (input_y_end - input_y_start);
901*4bdc9457SAndroid Build Coastguard Worker     for (size_t output_x = 0; output_x < output_width; output_x++) {
902*4bdc9457SAndroid Build Coastguard Worker       const size_t input_x_start = doz(output_x * stride_width, padding_left);
903*4bdc9457SAndroid Build Coastguard Worker       const size_t input_x_end = min(doz(output_x * stride_width + pooling_width, padding_left), input_width);
904*4bdc9457SAndroid Build Coastguard Worker       const uint32_t input_x_range = (uint32_t) (input_x_end - input_x_start);
905*4bdc9457SAndroid Build Coastguard Worker       *pixelwise_buffer++ = 1.0f / ((float) (int32_t) (input_y_range * input_x_range));
906*4bdc9457SAndroid Build Coastguard Worker     }
907*4bdc9457SAndroid Build Coastguard Worker   }
908*4bdc9457SAndroid Build Coastguard Worker }
909