1 /* 2 * Copyright (c) 2019-2021 Arm Limited. 3 * 4 * SPDX-License-Identifier: MIT 5 * 6 * Permission is hereby granted, free of charge, to any person obtaining a copy 7 * of this software and associated documentation files (the "Software"), to 8 * deal in the Software without restriction, including without limitation the 9 * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or 10 * sell copies of the Software, and to permit persons to whom the Software is 11 * furnished to do so, subject to the following conditions: 12 * 13 * The above copyright notice and this permission notice shall be included in all 14 * copies or substantial portions of the Software. 15 * 16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 19 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 22 * SOFTWARE. 23 */ 24 #ifndef ARM_COMPUTE_NELSTMLAYERQUANTIZED_H 25 #define ARM_COMPUTE_NELSTMLAYERQUANTIZED_H 26 27 #include "arm_compute/core/Types.h" 28 #include "arm_compute/runtime/NEON/functions/NEActivationLayer.h" 29 #include "arm_compute/runtime/NEON/functions/NEArithmeticAddition.h" 30 #include "arm_compute/runtime/NEON/functions/NEConcatenateLayer.h" 31 #include "arm_compute/runtime/NEON/functions/NEDequantizationLayer.h" 32 #include "arm_compute/runtime/NEON/functions/NEElementwiseOperations.h" 33 #include "arm_compute/runtime/NEON/functions/NEFullyConnectedLayer.h" 34 #include "arm_compute/runtime/NEON/functions/NEGEMMLowpMatrixMultiplyCore.h" 35 #include "arm_compute/runtime/NEON/functions/NEGEMMLowpOutputStage.h" 36 #include "arm_compute/runtime/NEON/functions/NEPixelWiseMultiplication.h" 37 #include "arm_compute/runtime/NEON/functions/NEQuantizationLayer.h" 38 #include "arm_compute/runtime/NEON/functions/NESlice.h" 39 #include "arm_compute/runtime/NEON/functions/NETranspose.h" 40 41 #include "arm_compute/runtime/common/LSTMParams.h" 42 43 namespace arm_compute 44 { 45 // Forward declarations 46 class ITensor; 47 48 /** Basic function to run @ref NELSTMLayerQuantized 49 * 50 * This function calls the following functions/kernels: 51 * 52 * -# @ref NEGEMMLowpMatrixMultiplyCore Quantized matrix multiplication core. Accumulators are 32-bit integers 53 * -# @ref NEGEMMLowpOutputStage Convert 32-bit integers into QSYMM16 54 * -# @ref NETranspose Matrix transpose 55 * -# @ref NEConcatenateLayer Tensor concatenation 56 * -# @ref NEActivationLayer Activation functions (tanh and logistic) 57 * -# @ref NEArithmeticAddition Elementwise addition 58 * -# @ref NEPixelWiseMultiplication Elementwise multiplication 59 * -# @ref NESlice Tensor slicing 60 * -# @ref NEDequantizationLayer Dequantize into float 61 * -# @ref NEQuantizationLayer Quantize from float 62 * */ 63 class NELSTMLayerQuantized : public IFunction 64 { 65 public: 66 /** Default constructor */ 67 NELSTMLayerQuantized(std::shared_ptr<IMemoryManager> memory_manager = nullptr); 68 /** Prevent instances of this class from being copied (As this class contains pointers) */ 69 NELSTMLayerQuantized(const NELSTMLayerQuantized &) = delete; 70 /** Prevent instances of this class from being moved (As this class contains pointers) */ 71 NELSTMLayerQuantized(NELSTMLayerQuantized &&) = delete; 72 /** Prevent instances of this class from being copied (As this class contains pointers) */ 73 NELSTMLayerQuantized &operator=(const NELSTMLayerQuantized &) = delete; 74 /** Prevent instances of this class from being moved (As this class contains pointers) */ 75 NELSTMLayerQuantized &operator=(NELSTMLayerQuantized &&) = delete; 76 /** Default destructor */ 77 ~NELSTMLayerQuantized(); 78 /** Initialize function's tensors. 79 * 80 * Valid data layouts: 81 * - All 82 * 83 * Valid data type configurations: 84 * |src0 - src8 |src9 - src12 |src13 |src14 |dst0 |dst1 | 85 * |:-----------|:------------|:-------|:------|:------|:------| 86 * |QASYMM8 |S32 |QSYMM16 |QASYMM8|QSYMM16|QASYMM8| 87 * 88 * @param[in] input Source tensor. Input is a 2D tensor with dimensions [input_size, batch_size]. Data types supported: QASYMM8. 89 * @param[in] input_to_input_weights 2D weights tensor with dimensions [input_size, output_size]. Data type supported: Same as @p input. 90 * @param[in] input_to_forget_weights 2D weights tensor with dimensions [input_size, output_size]. Data type supported: Same as @p input. 91 * @param[in] input_to_cell_weights 2D weights tensor with dimensions [input_size, output_size]. Data type supported: Same as @p input. 92 * @param[in] input_to_output_weights 2D weights tensor with dimensions [input_size, output_size]. Data type supported: Same as @p input. 93 * @param[in] recurrent_to_input_weights 2D weights tensor with dimensions [output_size, output_size]. Data type supported: Same as @p input. 94 * @param[in] recurrent_to_forget_weights 2D weights tensor with dimensions [output_size, output_size]. Data type supported: Same as @p input. 95 * @param[in] recurrent_to_cell_weights 2D weights tensor with dimensions [output_size, output_size]. Data type supported: Same as @p input. 96 * @param[in] recurrent_to_output_weights 2D weights tensor with dimensions [output_size, output_size]. Data type supported: Same as @p input. 97 * @param[in] input_gate_bias 1D weights tensor with dimensions [output_size]. Data type supported: S32. 98 * @param[in] forget_gate_bias 1D weights tensor with dimensions [output_size]. Data type supported: S32. 99 * @param[in] cell_bias 1D weights tensor with dimensions [output_size]. Data type supported: S32. 100 * @param[in] output_gate_bias 1D weights tensor with dimensions [output_size]. Data type supported: S32. 101 * @param[in] cell_state_in 2D tensor with dimensions [output_size, batch_size]. Data type supported: QSYMM16. 102 * @param[in] output_state_in 2D tensor with dimensions [output_size, batch_size]. Data type supported: Same as @p input. 103 * @param[out] cell_state_out Destination tensor. Output is a 2D tensor with dimensions [output_size, batch_size]. Data type supported: QSYMM16. 104 * @param[out] output_state_out Destination tensor. Output is a 2D tensor with dimensions [output_size, batch_size].Data types supported: Same as @p input. 105 */ 106 void configure(const ITensor *input, 107 const ITensor *input_to_input_weights, const ITensor *input_to_forget_weights, const ITensor *input_to_cell_weights, const ITensor *input_to_output_weights, 108 const ITensor *recurrent_to_input_weights, const ITensor *recurrent_to_forget_weights, const ITensor *recurrent_to_cell_weights, const ITensor *recurrent_to_output_weights, 109 const ITensor *input_gate_bias, const ITensor *forget_gate_bias, const ITensor *cell_bias, const ITensor *output_gate_bias, 110 ITensor *cell_state_in, const ITensor *output_state_in, 111 ITensor *cell_state_out, ITensor *output_state_out); 112 113 /** Static function to check if given info will lead to a valid configuration of @ref NELSTMLayer 114 * 115 * @param[in] input Source tensor info. Input is a 2D tensor info with dimensions [input_size, batch_size]. Data types supported: QASYMM8. 116 * @param[in] input_to_input_weights 2D weights tensor info with dimensions [input_size, output_size]. Data type supported: Same as @p input. 117 * @param[in] input_to_forget_weights 2D weights tensor info with dimensions [input_size, output_size]. Data type supported: Same as @p input. 118 * @param[in] input_to_cell_weights 2D weights tensor info with dimensions [input_size, output_size]. Data type supported: Same as @p input. 119 * @param[in] input_to_output_weights 2D weights tensor info with dimensions [input_size, output_size]. Data type supported: Same as @p input. 120 * @param[in] recurrent_to_input_weights 2D weights tensor info with dimensions [output_size, output_size]. Data type supported: Same as @p input. 121 * @param[in] recurrent_to_forget_weights 2D weights tensor info with dimensions [output_size, output_size]. Data type supported: Same as @p input. 122 * @param[in] recurrent_to_cell_weights 2D weights tensor info with dimensions [output_size, output_size]. Data type supported: Same as @p input. 123 * @param[in] recurrent_to_output_weights 2D weights tensor info with dimensions [output_size, output_size]. Data type supported: Same as @p input. 124 * @param[in] input_gate_bias 1D weights tensor info with dimensions [output_size]. Data type supported: S32. 125 * @param[in] forget_gate_bias 1D weights tensor info with dimensions [output_size]. Data type supported: S32. 126 * @param[in] cell_bias 1D weights tensor info with dimensions [output_size]. Data type supported: S32. 127 * @param[in] output_gate_bias 1D weights tensor info with dimensions [output_size]. Data type supported: S32. 128 * @param[in] cell_state_in 2D tensor info with dimensions [output_size, batch_size]. Data type supported: QSYMM16. 129 * @param[in] output_state_in 2D tensor info with dimensions [output_size, batch_size]. Data type supported: Same as @p input. 130 * @param[out] cell_state_out Destination tensor info. Output is a 2D tensor info with dimensions [output_size, batch_size]. Data type supported: QSYMM16. 131 * @param[out] output_state_out Destination tensor info. Output is a 2D tensor info with dimensions [output_size, batch_size].Data types supported: Same as @p input. 132 * 133 * @return a status 134 */ 135 static Status validate(const ITensorInfo *input, 136 const ITensorInfo *input_to_input_weights, const ITensorInfo *input_to_forget_weights, const ITensorInfo *input_to_cell_weights, const ITensorInfo *input_to_output_weights, 137 const ITensorInfo *recurrent_to_input_weights, const ITensorInfo *recurrent_to_forget_weights, const ITensorInfo *recurrent_to_cell_weights, const ITensorInfo *recurrent_to_output_weights, 138 const ITensorInfo *input_gate_bias, const ITensorInfo *forget_gate_bias, const ITensorInfo *cell_bias, const ITensorInfo *output_gate_bias, 139 const ITensorInfo *cell_state_in, const ITensorInfo *output_state_in, 140 const ITensorInfo *cell_state_out, const ITensorInfo *output_state_out); 141 142 // Inherited methods overridden: 143 void run() override; 144 void prepare() override; 145 146 private: 147 MemoryGroup _memory_group; 148 149 // Functions used 150 NEGEMMLowpMatrixMultiplyCore _gemmlowp; 151 NEGEMMLowpOutputStage _output_stage; 152 NETranspose _transpose_weights; 153 NEConcatenateLayer _concat_input_weights; 154 NEConcatenateLayer _concat_recurrent_weights; 155 NEConcatenateLayer _concat_weights; 156 NEConcatenateLayer _concat_inputs; 157 NEConcatenateLayer _concat_bias; 158 NEActivationLayer _sigmoid_forget_gate; 159 NEActivationLayer _sigmoid_input_gate; 160 NEActivationLayer _sigmoid_output_gate; 161 NEActivationLayer _tanh_modulation_gate; 162 NEActivationLayer _tanh_output_state; 163 NEArithmeticAddition _add1; 164 NEArithmeticAddition _add2; 165 NEPixelWiseMultiplication _mul1; 166 NEPixelWiseMultiplication _mul2; 167 NEPixelWiseMultiplication _mul3; 168 NESlice _slice_input_tensor; 169 NESlice _slice_forget_tensor; 170 NESlice _slice_cell_tensor; 171 NESlice _slice_output_tensor; 172 NEDequantizationLayer _dequantize; 173 NEQuantizationLayer _quantize; 174 175 // Tensor pointers 176 const ITensor *_input_to_input_weights; 177 const ITensor *_input_to_forget_weights; 178 const ITensor *_input_to_cell_weights; 179 const ITensor *_input_to_output_weights; 180 const ITensor *_recurrent_to_input_weights; 181 const ITensor *_recurrent_to_forget_weights; 182 const ITensor *_recurrent_to_cell_weights; 183 const ITensor *_recurrent_to_output_weights; 184 const ITensor *_input_gate_bias; 185 const ITensor *_forget_gate_bias; 186 const ITensor *_cell_bias; 187 const ITensor *_output_gate_bias; 188 189 // Temporary tensors 190 Tensor _recurrent_weights; 191 Tensor _input_weights; 192 Tensor _weights; 193 Tensor _input; 194 Tensor _weights_transposed; 195 Tensor _output_highp; 196 Tensor _output_lowp; 197 Tensor _bias; 198 Tensor _forget_gate_input; 199 Tensor _input_gate_input; 200 Tensor _output_gate_input; 201 Tensor _input_modulation_gate_input; 202 Tensor _forget_gate_output; 203 Tensor _input_gate_output; 204 Tensor _output_gate_output; 205 Tensor _input_modulation_gate_output; 206 Tensor _cell_state1; 207 Tensor _cell_state2; 208 Tensor _output_state_tmp; 209 Tensor _output_state_out_symm; 210 Tensor _output_state_out_f32; 211 212 bool _is_prepared; 213 }; 214 } // namespace arm_compute 215 #endif /* ARM_COMPUTE_NELSTMLAYERQUANTIZED_H */ 216