1/* 2 * Copyright (c) 2017-2021 Arm Limited. 3 * 4 * SPDX-License-Identifier: MIT 5 * 6 * Permission is hereby granted, free of charge, to any person obtaining a copy 7 * of this software and associated documentation files (the "Software"), to 8 * deal in the Software without restriction, including without limitation the 9 * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or 10 * sell copies of the Software, and to permit persons to whom the Software is 11 * furnished to do so, subject to the following conditions: 12 * 13 * The above copyright notice and this permission notice shall be included in all 14 * copies or substantial portions of the Software. 15 * 16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 19 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 22 * SOFTWARE. 23 */ 24#include "helpers.h" 25#include "tile_helpers.h" 26 27#define MUL_OP(x, y) ((x) * (y)) 28#define ADD_OP(x, y) ((x) + (y)) 29#define DIV_OP(x, y) ((x) / (y)) 30#define POW_OP(x, y) pow((x), (y)) 31#define SQCVT_SAT(a) (a) 32 33#if defined(NUM_SLICES) 34/** Apply cross-map normalization. 35 * 36 * @note Datatype should be given as a preprocessor argument using -DDATA_TYPE=type. e.g. -DDATA_TYPE=short 37 * @note Vector size should be given as a preprocessor argument using -DVEC_SIZE=size, e.g. -DVEC_SIZE=16 38 * @note The radius should be given as a preprocessor argument using -DRADIUS=size. e.g. -DRADIUS=5 39 * @note The number of slices should be given as a preprocessor argument using -DNUM_SLICES=size. e.g. -DNUM_SLICES=192 40 * @note Scaling coefficient (= alpha/norm_size), beta and kappa need to be passed at compile time using -DCOEFF, -DALPHA and -DKAPPA 41 * 42 * @param[in] input_ptr Pointer to the first source tensor. Supported data types: F16/F32 43 * @param[in] input_stride_x Stride of the first source tensor in X dimension (in bytes) 44 * @param[in] input_step_x input_stride_x * number of elements along X processed per workitem(in bytes) 45 * @param[in] input_stride_y Stride of the first source tensor in Y dimension (in bytes) 46 * @param[in] input_step_y input_stride_y * number of elements along Y processed per workitem(in bytes) 47 * @param[in] input_stride_z Stride of the first source tensor in Z dimension (in bytes) 48 * @param[in] input_step_z input_stride_z * number of elements along Z processed per workitem(in bytes) 49 * @param[in] input_offset_first_element_in_bytes The offset of the first element in the first source tensor 50 * @param[out] output_ptr Pointer to the destination tensor. Supported data types: same as @p input_ptr 51 * @param[in] output_stride_x Stride of the destination tensor in X dimension (in bytes) 52 * @param[in] output_step_x output_stride_x * number of elements along X processed per workitem(in bytes) 53 * @param[in] output_stride_y Stride of the destination tensor in Y dimension (in bytes) 54 * @param[in] output_step_y output_stride_y * number of elements along Y processed per workitem(in bytes) 55 * @param[in] output_stride_z Stride of the destination tensor in Z dimension (in bytes) 56 * @param[in] output_step_z output_stride_z * number of elements along Z processed per workitem(in bytes) 57 * @param[in] output_offset_first_element_in_bytes The offset of the first element in the destination tensor 58 */ 59__kernel void normalization_layer_cross_map_nchw(TENSOR3D_DECLARATION(input), 60 TENSOR3D_DECLARATION(output)) 61{ 62 Tensor3D in = CONVERT_TO_TENSOR3D_STRUCT(input); 63 Tensor3D out = CONVERT_TO_TENSOR3D_STRUCT(output); 64 65 VEC_DATA_TYPE(DATA_TYPE, VEC_SIZE) 66 acc = (VEC_DATA_TYPE(DATA_TYPE, VEC_SIZE))0; 67 const VEC_DATA_TYPE(DATA_TYPE, VEC_SIZE) 68 coeff_v = (VEC_DATA_TYPE(DATA_TYPE, VEC_SIZE))SQCVT_SAT(COEFF); 69 const VEC_DATA_TYPE(DATA_TYPE, VEC_SIZE) 70 beta_v = (VEC_DATA_TYPE(DATA_TYPE, VEC_SIZE))SQCVT_SAT(BETA); 71 const VEC_DATA_TYPE(DATA_TYPE, VEC_SIZE) 72 kappa_v = (VEC_DATA_TYPE(DATA_TYPE, VEC_SIZE))SQCVT_SAT(KAPPA); 73 74 const int current_slice = get_global_id(2); 75 const int left_slice = max(-(int)RADIUS, -current_slice); 76 const int right_slice = min((int)RADIUS, (int)NUM_SLICES - 1 - current_slice); 77 78 for(int i = left_slice; i <= right_slice; i++) 79 { 80 VEC_DATA_TYPE(DATA_TYPE, VEC_SIZE) 81 values = VLOAD(VEC_SIZE)(0, (__global DATA_TYPE *)tensor3D_offset(&in, 0, 0, i)); 82 acc = ADD_OP(acc, MUL_OP(values, values)); 83 } 84 85 acc = ADD_OP(MUL_OP(acc, coeff_v), kappa_v); 86 const VEC_DATA_TYPE(DATA_TYPE, VEC_SIZE) 87 normalized = POW_OP(acc, beta_v); 88 const VEC_DATA_TYPE(DATA_TYPE, VEC_SIZE) 89 normalized_pixel = DIV_OP(VLOAD(VEC_SIZE)(0, (__global DATA_TYPE *)in.ptr), normalized); 90 91 VSTORE(VEC_SIZE) 92 (normalized_pixel, 0, (__global DATA_TYPE *)out.ptr); 93} 94#endif /* defined(NUM_SLICES) */ 95 96#if defined(WIDTH_SIZE) 97/** Apply in-map normalization when tensors are in the NCHW data layout format. 98 * 99 * @note Datatype should be given as a preprocessor argument using -DDATA_TYPE=type. e.g. -DDATA_TYPE=short 100 * @note Vector size should be given as a preprocessor argument using -DVEC_SIZE=size, e.g. -DVEC_SIZE=16 101 * @note The radius should be given as a preprocessor argument using -DRADIUS=size. e.g. -DRADIUS=5 102 * @note Scaling coefficient (= alpha/norm_size), beta and kappa need to be passed at compile time using -DCOEFF, -DALPHA and -DKAPPA 103 * @note The leftover size in the X dimension shoud be given as preprocessor argument using -DVEC_SIZE_LEFTOVER is; x_dimension % VEC_SIZE. e.g. -DVEC_SIZE_LEFTOVER=1 104 * 105 * @param[in] input_ptr Pointer to the first source tensor. Supported data types: F16/F32 106 * @param[in] input_stride_x Stride of the first source tensor in X dimension (in bytes) 107 * @param[in] input_step_x input_stride_x * number of elements along X processed per workitem(in bytes) 108 * @param[in] input_stride_y Stride of the first source tensor in Y dimension (in bytes) 109 * @param[in] input_step_y input_stride_y * number of elements along Y processed per workitem(in bytes) 110 * @param[in] input_stride_z Stride of the first source tensor in Z dimension (in bytes) 111 * @param[in] input_step_z input_stride_z * number of elements along Z processed per workitem(in bytes) 112 * @param[in] input_offset_first_element_in_bytes The offset of the first element in the first source tensor 113 * @param[out] output_ptr Pointer to the destination tensor. Supported data types: same as @p input_ptr 114 * @param[in] output_stride_x Stride of the destination tensor in X dimension (in bytes) 115 * @param[in] output_step_x output_stride_x * number of elements along X processed per workitem(in bytes) 116 * @param[in] output_stride_y Stride of the first destination tensor in Y dimension (in bytes) 117 * @param[in] output_step_y output_stride_y * number of elements along Y processed per workitem(in bytes) 118 * @param[in] output_stride_z Stride of the first source tensor in Z dimension (in bytes) 119 * @param[in] output_step_z output_stride_z * number of elements along Z processed per workitem(in bytes) 120 * @param[in] output_offset_first_element_in_bytes The offset of the first element in the destination tensor 121 */ 122__kernel void normalization_layer_in_map_nchw(TENSOR3D_DECLARATION(input), 123 TENSOR3D_DECLARATION(output)) 124{ 125 Tensor3D in = CONVERT_TO_TENSOR3D_STRUCT(input); 126 Tensor3D out = CONVERT_TO_TENSOR3D_STRUCT(output); 127 128 VEC_DATA_TYPE(DATA_TYPE, VEC_SIZE) 129 acc = 0; 130 const VEC_DATA_TYPE(DATA_TYPE, VEC_SIZE) 131 coeff_v = SQCVT_SAT(COEFF); 132 const VEC_DATA_TYPE(DATA_TYPE, VEC_SIZE) 133 beta_v = SQCVT_SAT(BETA); 134 const VEC_DATA_TYPE(DATA_TYPE, VEC_SIZE) 135 kappa_v = SQCVT_SAT(KAPPA); 136 137 const int left_pos = -(int)RADIUS; 138 const int right_pos = (int)RADIUS; 139 140#if defined(IN_MAP_2D) 141 const int current_row = get_global_id(1); 142 const int first_row = max(-(int)RADIUS, -current_row); 143 const int last_row = min((int)RADIUS, (int)get_global_size(1) - 1 - current_row); 144#endif /* defined(IN_MAP_2D) */ 145 146#if defined(IN_MAP_2D) 147 for(int j = first_row; j <= last_row; ++j) 148 { 149#endif /* defined(IN_MAP_2D) */ 150 for(int i = left_pos; i <= right_pos; ++i) 151 { 152#if defined(IN_MAP_2D) 153 VEC_DATA_TYPE(DATA_TYPE, VEC_SIZE) 154 values = VLOAD(VEC_SIZE)(0, (__global DATA_TYPE *)tensor3D_offset(&in, i, j, 0)); 155#else /* defined(IN_MAP_2D) */ 156 VEC_DATA_TYPE(DATA_TYPE, VEC_SIZE) 157 values = VLOAD(VEC_SIZE)(0, (__global DATA_TYPE *)tensor3D_offset(&in, i, 0, 0)); 158#endif /* defined(IN_MAP_2D) */ 159 acc = ADD_OP(acc, MUL_OP(values, values)); 160 } 161#if defined(IN_MAP_2D) 162 } 163#endif /* defined(IN_MAP_2D) */ 164 165 acc = ADD_OP(MUL_OP(acc, coeff_v), kappa_v); 166 const VEC_DATA_TYPE(DATA_TYPE, VEC_SIZE) 167 normalized = POW_OP(acc, beta_v); 168 const VEC_DATA_TYPE(DATA_TYPE, VEC_SIZE) 169 normalized_pixel = DIV_OP(VLOAD(VEC_SIZE)(0, (__global DATA_TYPE *)in.ptr), normalized); 170 171 VSTORE(VEC_SIZE) 172 (normalized_pixel, 0, (__global DATA_TYPE *)out.ptr); 173} 174#endif // defined(WIDTH_SIZE)