1/* 2 * Copyright (c) 2019-2021 Arm Limited. 3 * 4 * SPDX-License-Identifier: MIT 5 * 6 * Permission is hereby granted, free of charge, to any person obtaining a copy 7 * of this software and associated documentation files (the "Software"), to 8 * deal in the Software without restriction, including without limitation the 9 * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or 10 * sell copies of the Software, and to permit persons to whom the Software is 11 * furnished to do so, subject to the following conditions: 12 * 13 * The above copyright notice and this permission notice shall be included in all 14 * copies or substantial portions of the Software. 15 * 16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 19 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 22 * SOFTWARE. 23 */ 24#include "helpers_asymm.h" 25 26// This specifies the value to shift the result of roi_dims / pooled_dims before ceiling. 27// It is close to the epsilon machine (for a floating point system, x and x+EPS are the same number). 28#define EPS_GRID 0.00001f 29 30#if defined(DATA_TYPE) && defined(POOLED_DIM_X) && defined(POOLED_DIM_Y) && defined(MAX_DIM_X) && defined(MAX_DIM_Y) && defined(MAX_DIM_Z) && defined(SPATIAL_SCALE) && defined(OFFSET_IN) && defined(OFFSET_OUT) && defined(SCALE_IN) && defined(SCALE_OUT) && defined(OFFSET_ROIS) && defined(SCALE_ROIS) // Check for compile time constants 31 32/** Performs a roi align on a single output pixel. 33 * 34 * @param[in] input Pointer to input Tensor3D struct. 35 * @param[in] region_start_x Start x index projected onto the input tensor. 36 * @param[in] region_end_x End x index projected onto the input tensor. 37 * @param[in] region_start_y Start y index projected onto the input tensor. 38 * @param[in] region_end_y End y index projected onto the input tensor. 39 * @param[in] pz z index of the input tensor. 40 * 41 * @return An average pooled value from the region specified in the input tensor. 42 */ 43inline DATA_TYPE roi_align_1x1(const Tensor3D *input, float region_start_x, 44 float bin_size_x, 45 float grid_size_x, 46 float region_end_x, 47 float region_start_y, 48 float bin_size_y, 49 float grid_size_y, 50 float region_end_y, 51 int pz) 52{ 53 // Iterate through the pooling region 54 float sum = 0; 55 for(int iy = 0; iy < grid_size_y; ++iy) 56 { 57 for(int ix = 0; ix < grid_size_x; ++ix) 58 { 59 // Align the window in the middle of every bin 60 const float y = region_start_y + (iy + 0.5f) * bin_size_y / (float)grid_size_y; 61 const float x = region_start_x + (ix + 0.5f) * bin_size_x / (float)grid_size_x; 62 63 // Interpolation in the unit square 64 const int y_low = (int)y; 65 const int x_low = (int)x; 66 const int y_high = y_low + 1; 67 const int x_high = x_low + 1; 68 69 const float ly = y - y_low; 70 const float lx = x - x_low; 71 const float hy = 1.f - ly; 72 const float hx = 1.f - lx; 73 74 const float w1 = hy * hx; 75 const float w2 = hy * lx; 76 const float w3 = ly * hx; 77 const float w4 = ly * lx; 78#if defined(NHWC) 79 const DATA_TYPE data1 = *(__global DATA_TYPE *)tensor3D_offset(input, pz, x_low, y_low); 80 const DATA_TYPE data2 = *(__global DATA_TYPE *)tensor3D_offset(input, pz, x_high, y_low); 81 const DATA_TYPE data3 = *(__global DATA_TYPE *)tensor3D_offset(input, pz, x_low, y_high); 82 const DATA_TYPE data4 = *(__global DATA_TYPE *)tensor3D_offset(input, pz, x_high, y_high); 83#else // !defined(NHWC) 84 const DATA_TYPE data1 = *(__global DATA_TYPE *)tensor3D_offset(input, x_low, y_low, pz); 85 const DATA_TYPE data2 = *(__global DATA_TYPE *)tensor3D_offset(input, x_high, y_low, pz); 86 const DATA_TYPE data3 = *(__global DATA_TYPE *)tensor3D_offset(input, x_low, y_high, pz); 87 const DATA_TYPE data4 = *(__global DATA_TYPE *)tensor3D_offset(input, x_high, y_high, pz); 88#endif // defined(NHWC) 89 90 const float data1_f32 = DEQUANTIZE(data1, OFFSET_IN, SCALE_IN, DATA_TYPE, 1); 91 const float data2_f32 = DEQUANTIZE(data2, OFFSET_IN, SCALE_IN, DATA_TYPE, 1); 92 const float data3_f32 = DEQUANTIZE(data3, OFFSET_IN, SCALE_IN, DATA_TYPE, 1); 93 const float data4_f32 = DEQUANTIZE(data4, OFFSET_IN, SCALE_IN, DATA_TYPE, 1); 94 sum += w1 * data1_f32 + w2 * data2_f32 + w3 * data3_f32 + w4 * data4_f32; 95 } 96 } 97 98 const float res_f32 = sum / (grid_size_x * grid_size_y); 99 return QUANTIZE(res_f32, OFFSET_OUT, SCALE_OUT, DATA_TYPE, 1); 100} 101 102/** Performs a roi align function. 103 * 104 * @note Datatype must be passed using -DDATA_TYPE e.g. -DDATA_TYPE=uchar 105 * @note Datasize must be passed using -DDATA_SIZE e.g. -DDATA_SIZE=32; 106 * @note Input dimensions must be passed using -DMAX_DIM_X, -DMAX_DIM_Y and -DMAX_DIM_Z; 107 * @note Pooled region dimensions must be passed using -DPOOLED_DIM_X and -DPOOLED_DIM_Y; 108 * @note Spatial scale must be passed using -DSPATIAL_SCALE; 109 * @note Sampling ratio (i.e., the number of samples in each bin) may be passed using -DSAMPLING_RATIO. If not defined each roi 110 * will have a default sampling ratio of roi_dims/pooling_dims 111 * 112 * @param[in] input_ptr Pointer to the source tensor. Supported data types: QASYMM8 113 * @param[in] input_stride_x Stride of the source tensor in X dimension (in bytes) 114 * @param[in] input_step_x input_stride_x * number of elements along X processed per workitem(in bytes) 115 * @param[in] input_stride_y Stride of the source tensor in Y dimension (in bytes) 116 * @param[in] input_step_y input_stride_y * number of elements along Y processed per workitem(in bytes) 117 * @param[in] input_stride_z Stride of the source tensor in Z dimension (in bytes) 118 * @param[in] input_step_z input_stride_z * number of elements along Z processed per workitem(in bytes) 119 * @param[in] input_offset_first_element_in_bytes The offset of the first element in the pooled region of the source tensor as specifed by ROI 120 * @param[in] rois_ptr Pointer to the ROIs tensor. Layout: { batch_index, x1, y1, x2, y2 }. 121 * Supported data types: QASYMM16 with 0.125f scale and 0 offset 122 * @param[in] rois_stride_x Stride of the ROIs tensor in X dimension (in bytes) 123 * @param[in] rois_step_x Step of the ROIs tensor in X dimension (in bytes) 124 * @param[in] rois_stride_y Stride of the ROIs tensor in Y dimension (in bytes) 125 * @param[in] rois_step_y Step of the ROIs tensor in Y dimension (in bytes) 126 * @param[in] rois_offset_first_element_in_bytes The offset of the first element in the ROIs tensor 127 * @param[out] output_ptr Pointer to the destination tensor. Supported data types: Supported data types: same as @p input_ptr 128 * @param[in] output_stride_x Stride of the destination tensor in X dimension (in bytes) 129 * @param[in] output_step_x output_stride_x * number of elements along X processed per workitem(in bytes) 130 * @param[in] output_stride_y Stride of the destination tensor in Y dimension (in bytes) 131 * @param[in] output_step_y output_stride_y * number of elements along Y processed per workitem(in bytes) 132 * @param[in] output_stride_z Stride of the destination tensor in Z dimension (in bytes) 133 * @param[in] output_step_z output_stride_z * number of elements along Z processed per workitem(in bytes) 134 * @param[in] output_offset_first_element_in_bytes The offset of the first element in the destination tensor 135 * @param[in] input_stride_w Stride of the source tensor in W dimension (in bytes) 136 * @param[in] output_stride_w Stride of the destination tensor in W dimension (in bytes) 137 */ 138__kernel void roi_align_layer_quantized( 139 TENSOR3D_DECLARATION(input), 140 IMAGE_DECLARATION(rois), 141 TENSOR3D_DECLARATION(output), 142 unsigned int input_stride_w, unsigned int output_stride_w) 143{ 144 // Get pixels pointer 145 Tensor3D input = CONVERT_TO_TENSOR3D_STRUCT_NO_STEP(input); 146 Image rois = CONVERT_TO_IMAGE_STRUCT_NO_STEP(rois); 147 Tensor3D output = CONVERT_TO_TENSOR3D_STRUCT_NO_STEP(output); 148 149#if defined(NHWC) 150 const int px = get_global_id(1); 151 const int py = get_global_id(2); 152 const int pw = get_global_id(0); 153#else // !defined(NHWC) 154 const int px = get_global_id(0); 155 const int py = get_global_id(1); 156 const int pw = get_global_id(2); 157#endif // defined(NHWC) 158 159 // Load roi parameters 160 // roi is laid out as follows { batch_index, x1, y1, x2, y2 } 161 const ushort roi_batch = *((__global ushort *)offset(&rois, 0, pw)); 162 float4 roi = DEQUANTIZE(vload4(0, (__global ushort *)offset(&rois, 1, pw)), OFFSET_ROIS, SCALE_ROIS, ushort, 4); 163 float2 roi_anchor = roi.s01 * convert_float(SPATIAL_SCALE); 164 float2 roi_dims = fmax((roi.s23 - roi.s01) * convert_float(SPATIAL_SCALE), 1.f); 165 166 // Calculate pooled region start and end 167 float2 spatial_indx = (float2)(px, py); 168 float2 pooled_dims = (float2)(POOLED_DIM_X, POOLED_DIM_Y); 169 float2 max_spatial_dims = (float2)(MAX_DIM_X, MAX_DIM_Y); 170 171 float2 bin_size = (float2)((roi_dims.s0 / (float)POOLED_DIM_X), (roi_dims.s1 / (float)POOLED_DIM_Y)); 172 float2 region_start = spatial_indx * bin_size + roi_anchor; 173 float2 region_end = (spatial_indx + 1) * bin_size + roi_anchor; 174 175 region_start = clamp(region_start, 0, max_spatial_dims); 176 region_end = clamp(region_end, 0, max_spatial_dims); 177 178#if defined(SAMPLING_RATIO) 179 float2 roi_bin_grid = SAMPLING_RATIO; 180#else // !defined(SAMPLING_RATIO) 181 // Note that we subtract EPS_GRID before ceiling. This is to avoid situations where 1.000001 gets ceiled to 2. 182 float2 roi_bin_grid = ceil(bin_size - EPS_GRID); 183#endif // defined(SAMPLING_RATIO) 184 185 // Move input and output pointer across the fourth dimension 186 input.ptr += roi_batch * input_stride_w; 187 output.ptr += pw * output_stride_w; 188 for(int pz = 0; pz < MAX_DIM_Z; ++pz) 189 { 190#if defined(NHWC) 191 __global DATA_TYPE *_output_ptr = (__global DATA_TYPE *)tensor3D_offset(&output, pz, px, py); 192#else // !defined(NHWC) 193 __global DATA_TYPE *_output_ptr = (__global DATA_TYPE *)tensor3D_offset(&output, px, py, pz); 194#endif // defined(NHWC) 195 *_output_ptr = (__global DATA_TYPE)roi_align_1x1(&input, 196 region_start.x, 197 bin_size.x, 198 roi_bin_grid.x, 199 region_end.x, 200 region_start.y, 201 bin_size.y, 202 roi_bin_grid.y, 203 region_end.y, pz); 204 } 205} 206#endif // Check for compile time constants 207