1 /* 2 * Copyright (c) 2019-2021 Arm Limited. 3 * 4 * SPDX-License-Identifier: MIT 5 * 6 * Permission is hereby granted, free of charge, to any person obtaining a copy 7 * of this software and associated documentation files (the "Software"), to 8 * deal in the Software without restriction, including without limitation the 9 * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or 10 * sell copies of the Software, and to permit persons to whom the Software is 11 * furnished to do so, subject to the following conditions: 12 * 13 * The above copyright notice and this permission notice shall be included in all 14 * copies or substantial portions of the Software. 15 * 16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 19 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 22 * SOFTWARE. 23 */ 24 #ifndef ARM_COMPUTE_CLLSTMLAYERQUANTIZED_H 25 #define ARM_COMPUTE_CLLSTMLAYERQUANTIZED_H 26 27 #include "arm_compute/core/Types.h" 28 #include "arm_compute/runtime/CL/functions/CLActivationLayer.h" 29 #include "arm_compute/runtime/CL/functions/CLConcatenateLayer.h" 30 #include "arm_compute/runtime/CL/functions/CLDequantizationLayer.h" 31 #include "arm_compute/runtime/CL/functions/CLElementwiseOperations.h" 32 #include "arm_compute/runtime/CL/functions/CLGEMMLowpMatrixMultiplyCore.h" 33 #include "arm_compute/runtime/CL/functions/CLGEMMLowpOutputStage.h" 34 #include "arm_compute/runtime/CL/functions/CLPixelWiseMultiplication.h" 35 #include "arm_compute/runtime/CL/functions/CLQuantizationLayer.h" 36 #include "arm_compute/runtime/CL/functions/CLSlice.h" 37 #include "arm_compute/runtime/CL/functions/CLTranspose.h" 38 39 #include "arm_compute/runtime/common/LSTMParams.h" 40 41 namespace arm_compute 42 { 43 // Forward declarations 44 class ICLTensor; 45 46 /** Basic function to run @ref CLLSTMLayerQuantized 47 * 48 * This function calls the following CL functions/kernels: 49 * 50 * -# @ref CLGEMMLowpMatrixMultiplyCore Quantized matrix multiplication core. Accumulators are 32-bit integers 51 * -# @ref CLGEMMLowpOutputStage Convert 32-bit integers into QSYMM16 52 * -# @ref CLTranspose Matrix transpose 53 * -# @ref CLConcatenateLayer Tensor concatenation 54 * -# @ref CLActivationLayer Activation functions (tanh and logistic) 55 * -# @ref CLArithmeticAddition Elementwise addition 56 * -# @ref CLPixelWiseMultiplication Elementwise multiplication 57 * -# @ref CLSlice Tensor slicing 58 * -# @ref CLDequantizationLayer Dequantize into float 59 * -# @ref CLQuantizationLayer Quantize from float 60 * */ 61 class CLLSTMLayerQuantized : public IFunction 62 { 63 public: 64 /** Default constructor */ 65 CLLSTMLayerQuantized(std::shared_ptr<IMemoryManager> memory_manager = nullptr); 66 /** Prevent instances of this class from being copied (As this class contains pointers) */ 67 CLLSTMLayerQuantized(const CLLSTMLayerQuantized &) = delete; 68 /** Default move constructor */ 69 CLLSTMLayerQuantized(CLLSTMLayerQuantized &&) = default; 70 /** Prevent instances of this class from being copied (As this class contains pointers) */ 71 CLLSTMLayerQuantized &operator=(const CLLSTMLayerQuantized &) = delete; 72 /** Default move assignment operator */ 73 CLLSTMLayerQuantized &operator=(CLLSTMLayerQuantized &&) = default; 74 /** Initialize function's tensors. 75 * 76 * Valid data layouts: 77 * - All 78 * 79 * Valid data type configurations: 80 * |src0 - src8 |src9 - src12 |src13 |src14 |dst0 |dst1 | 81 * |:-----------|:------------|:-------|:------|:------|:------| 82 * |QASYMM8 |S32 |QSYMM16 |QASYMM8|QSYMM16|QASYMM8| 83 * 84 * @param[in] input Source tensor. Input is a 2D tensor with dimensions [input_size, batch_size]. Data types supported: QASYMM8. 85 * @param[in] input_to_input_weights 2D weights tensor with dimensions [input_size, output_size]. Data type supported: Same as @p input. 86 * @param[in] input_to_forget_weights 2D weights tensor with dimensions [input_size, output_size]. Data type supported: Same as @p input. 87 * @param[in] input_to_cell_weights 2D weights tensor with dimensions [input_size, output_size]. Data type supported: Same as @p input. 88 * @param[in] input_to_output_weights 2D weights tensor with dimensions [input_size, output_size]. Data type supported: Same as @p input. 89 * @param[in] recurrent_to_input_weights 2D weights tensor with dimensions [output_size, output_size]. Data type supported: Same as @p input. 90 * @param[in] recurrent_to_forget_weights 2D weights tensor with dimensions [output_size, output_size]. Data type supported: Same as @p input. 91 * @param[in] recurrent_to_cell_weights 2D weights tensor with dimensions [output_size, output_size]. Data type supported: Same as @p input. 92 * @param[in] recurrent_to_output_weights 2D weights tensor with dimensions [output_size, output_size]. Data type supported: Same as @p input. 93 * @param[in] input_gate_bias 1D weights tensor with dimensions [output_size]. Data type supported: S32. 94 * @param[in] forget_gate_bias 1D weights tensor with dimensions [output_size]. Data type supported: S32. 95 * @param[in] cell_bias 1D weights tensor with dimensions [output_size]. Data type supported: S32. 96 * @param[in] output_gate_bias 1D weights tensor with dimensions [output_size]. Data type supported: S32. 97 * @param[in] cell_state_in 2D tensor with dimensions [output_size, batch_size]. Data type supported: QSYMM16. 98 * @param[in] output_state_in 2D tensor with dimensions [output_size, batch_size]. Data type supported: Same as @p input. 99 * @param[out] cell_state_out Destination tensor. Output is a 2D tensor with dimensions [output_size, batch_size]. Data type supported: QSYMM16. 100 * @param[out] output_state_out Destination tensor. Output is a 2D tensor with dimensions [output_size, batch_size].Data types supported: Same as @p input. 101 */ 102 void configure(const ICLTensor *input, 103 const ICLTensor *input_to_input_weights, const ICLTensor *input_to_forget_weights, const ICLTensor *input_to_cell_weights, const ICLTensor *input_to_output_weights, 104 const ICLTensor *recurrent_to_input_weights, const ICLTensor *recurrent_to_forget_weights, const ICLTensor *recurrent_to_cell_weights, const ICLTensor *recurrent_to_output_weights, 105 const ICLTensor *input_gate_bias, const ICLTensor *forget_gate_bias, const ICLTensor *cell_bias, const ICLTensor *output_gate_bias, 106 ICLTensor *cell_state_in, const ICLTensor *output_state_in, 107 ICLTensor *cell_state_out, ICLTensor *output_state_out); 108 /** Initialize function's tensors. 109 * 110 * @param[in] compile_context The compile context to be used. 111 * @param[in] input Source tensor. Input is a 2D tensor with dimensions [input_size, batch_size]. Data types supported: QASYMM8. 112 * @param[in] input_to_input_weights 2D weights tensor with dimensions [input_size, output_size]. Data type supported: Same as @p input. 113 * @param[in] input_to_forget_weights 2D weights tensor with dimensions [input_size, output_size]. Data type supported: Same as @p input. 114 * @param[in] input_to_cell_weights 2D weights tensor with dimensions [input_size, output_size]. Data type supported: Same as @p input. 115 * @param[in] input_to_output_weights 2D weights tensor with dimensions [input_size, output_size]. Data type supported: Same as @p input. 116 * @param[in] recurrent_to_input_weights 2D weights tensor with dimensions [output_size, output_size]. Data type supported: Same as @p input. 117 * @param[in] recurrent_to_forget_weights 2D weights tensor with dimensions [output_size, output_size]. Data type supported: Same as @p input. 118 * @param[in] recurrent_to_cell_weights 2D weights tensor with dimensions [output_size, output_size]. Data type supported: Same as @p input. 119 * @param[in] recurrent_to_output_weights 2D weights tensor with dimensions [output_size, output_size]. Data type supported: Same as @p input. 120 * @param[in] input_gate_bias 1D weights tensor with dimensions [output_size]. Data type supported: S32. 121 * @param[in] forget_gate_bias 1D weights tensor with dimensions [output_size]. Data type supported: S32. 122 * @param[in] cell_bias 1D weights tensor with dimensions [output_size]. Data type supported: S32. 123 * @param[in] output_gate_bias 1D weights tensor with dimensions [output_size]. Data type supported: S32. 124 * @param[in] cell_state_in 2D tensor with dimensions [output_size, batch_size]. Data type supported: QSYMM16. 125 * @param[in] output_state_in 2D tensor with dimensions [output_size, batch_size]. Data type supported: Same as @p input. 126 * @param[out] cell_state_out Destination tensor. Output is a 2D tensor with dimensions [output_size, batch_size]. Data type supported: QSYMM16. 127 * @param[out] output_state_out Destination tensor. Output is a 2D tensor with dimensions [output_size, batch_size].Data types supported: Same as @p input. 128 */ 129 void configure(const CLCompileContext &compile_context, const ICLTensor *input, 130 const ICLTensor *input_to_input_weights, const ICLTensor *input_to_forget_weights, const ICLTensor *input_to_cell_weights, const ICLTensor *input_to_output_weights, 131 const ICLTensor *recurrent_to_input_weights, const ICLTensor *recurrent_to_forget_weights, const ICLTensor *recurrent_to_cell_weights, const ICLTensor *recurrent_to_output_weights, 132 const ICLTensor *input_gate_bias, const ICLTensor *forget_gate_bias, const ICLTensor *cell_bias, const ICLTensor *output_gate_bias, 133 ICLTensor *cell_state_in, const ICLTensor *output_state_in, 134 ICLTensor *cell_state_out, ICLTensor *output_state_out); 135 136 /** Static function to check if given info will lead to a valid configuration of @ref CLLSTMLayerQuantized 137 * 138 * @param[in] input Source tensor info. Input is a 2D tensor info with dimensions [input_size, batch_size]. Data types supported: QASYMM8. 139 * @param[in] input_to_input_weights 2D weights tensor info with dimensions [input_size, output_size]. Data type supported: Same as @p input. 140 * @param[in] input_to_forget_weights 2D weights tensor info with dimensions [input_size, output_size]. Data type supported: Same as @p input. 141 * @param[in] input_to_cell_weights 2D weights tensor info with dimensions [input_size, output_size]. Data type supported: Same as @p input. 142 * @param[in] input_to_output_weights 2D weights tensor info with dimensions [input_size, output_size]. Data type supported: Same as @p input. 143 * @param[in] recurrent_to_input_weights 2D weights tensor info with dimensions [output_size, output_size]. Data type supported: Same as @p input. 144 * @param[in] recurrent_to_forget_weights 2D weights tensor info with dimensions [output_size, output_size]. Data type supported: Same as @p input. 145 * @param[in] recurrent_to_cell_weights 2D weights tensor info with dimensions [output_size, output_size]. Data type supported: Same as @p input. 146 * @param[in] recurrent_to_output_weights 2D weights tensor info with dimensions [output_size, output_size]. Data type supported: Same as @p input. 147 * @param[in] input_gate_bias 1D weights tensor info with dimensions [output_size]. Data type supported: S32. 148 * @param[in] forget_gate_bias 1D weights tensor info with dimensions [output_size]. Data type supported: S32. 149 * @param[in] cell_bias 1D weights tensor info with dimensions [output_size]. Data type supported: S32. 150 * @param[in] output_gate_bias 1D weights tensor info with dimensions [output_size]. Data type supported: S32. 151 * @param[in] cell_state_in 2D tensor info with dimensions [output_size, batch_size]. Data type supported: QSYMM16. 152 * @param[in] output_state_in 2D tensor info with dimensions [output_size, batch_size]. Data type supported: Same as @p input. 153 * @param[out] cell_state_out Destination tensor info. Output is a 2D tensor info with dimensions [output_size, batch_size]. Data type supported: QSYMM16. 154 * @param[out] output_state_out Destination tensor info. Output is a 2D tensor info with dimensions [output_size, batch_size].Data types supported: Same as @p input. 155 * 156 * @return a status 157 */ 158 static Status validate(const ITensorInfo *input, 159 const ITensorInfo *input_to_input_weights, const ITensorInfo *input_to_forget_weights, const ITensorInfo *input_to_cell_weights, const ITensorInfo *input_to_output_weights, 160 const ITensorInfo *recurrent_to_input_weights, const ITensorInfo *recurrent_to_forget_weights, const ITensorInfo *recurrent_to_cell_weights, const ITensorInfo *recurrent_to_output_weights, 161 const ITensorInfo *input_gate_bias, const ITensorInfo *forget_gate_bias, const ITensorInfo *cell_bias, const ITensorInfo *output_gate_bias, 162 const ITensorInfo *cell_state_in, const ITensorInfo *output_state_in, 163 const ITensorInfo *cell_state_out, const ITensorInfo *output_state_out); 164 165 // Inherited methods overridden: 166 void run() override; 167 void prepare() override; 168 169 private: 170 MemoryGroup _memory_group; 171 172 // Functions used 173 CLGEMMLowpMatrixMultiplyCore _gemmlowp; 174 CLGEMMLowpOutputStage _output_stage; 175 CLTranspose _transpose_weights; 176 CLConcatenateLayer _concat_input_weights; 177 CLConcatenateLayer _concat_recurrent_weights; 178 CLConcatenateLayer _concat_weights; 179 CLConcatenateLayer _concat_inputs; 180 CLConcatenateLayer _concat_bias; 181 CLActivationLayer _sigmoid_forget_gate; 182 CLActivationLayer _sigmoid_input_gate; 183 CLActivationLayer _sigmoid_output_gate; 184 CLActivationLayer _tanh_modulation_gate; 185 CLActivationLayer _tanh_output_state; 186 CLArithmeticAddition _add_cell_state_tmps; 187 CLArithmeticAddition _add2; 188 CLPixelWiseMultiplication _mul_forget_gate_cell_state; 189 CLPixelWiseMultiplication _mul_input_gate_input_mod_gate; 190 CLPixelWiseMultiplication _mul_output_state_tmp_output_gate; 191 CLSlice _slice_input_tensor; 192 CLSlice _slice_forget_tensor; 193 CLSlice _slice_cell_tensor; 194 CLSlice _slice_output_tensor; 195 CLDequantizationLayer _dequantize; 196 CLQuantizationLayer _quantize; 197 198 // Tensor pointers 199 const ICLTensor *_input_to_input_weights; 200 const ICLTensor *_input_to_forget_weights; 201 const ICLTensor *_input_to_cell_weights; 202 const ICLTensor *_input_to_output_weights; 203 const ICLTensor *_recurrent_to_input_weights; 204 const ICLTensor *_recurrent_to_forget_weights; 205 const ICLTensor *_recurrent_to_cell_weights; 206 const ICLTensor *_recurrent_to_output_weights; 207 const ICLTensor *_input_gate_bias; 208 const ICLTensor *_forget_gate_bias; 209 const ICLTensor *_cell_bias; 210 const ICLTensor *_output_gate_bias; 211 212 // Temporary tensors 213 CLTensor _recurrent_weights; 214 CLTensor _input_weights; 215 CLTensor _weights; 216 CLTensor _input; 217 CLTensor _weights_transposed; 218 CLTensor _output_highp; 219 CLTensor _output_lowp; 220 CLTensor _bias; 221 CLTensor _forget_gate_input; 222 CLTensor _input_gate_input; 223 CLTensor _output_gate_input; 224 CLTensor _input_modulation_gate_input; 225 CLTensor _forget_gate_output; 226 CLTensor _input_gate_output; 227 CLTensor _output_gate_output; 228 CLTensor _input_modulation_gate_output; 229 CLTensor _cell_state_tmp1; 230 CLTensor _cell_state_tmp2; 231 CLTensor _output_state_tmp; 232 CLTensor _output_state_out_symm; 233 CLTensor _output_state_out_f32; 234 235 bool _is_prepared; 236 }; 237 } // namespace arm_compute 238 #endif /* ARM_COMPUTE_CLLSTMLAYERQUANTIZED_H */ 239