xref: /aosp_15_r20/external/ComputeLibrary/src/cpu/kernels/CpuAddMulAddKernel.cpp (revision c217d954acce2dbc11938adb493fc0abd69584f3)
1 /*
2  * Copyright (c) 2023 Arm Limited.
3  *
4  * SPDX-License-Identifier: MIT
5  *
6  * Permission is hereby granted, free of charge, to any person obtaining a copy
7  * of this software and associated documentation files (the "Software"), to
8  * deal in the Software without restriction, including without limitation the
9  * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
10  * sell copies of the Software, and to permit persons to whom the Software is
11  * furnished to do so, subject to the following conditions:
12  *
13  * The above copyright notice and this permission notice shall be included in all
14  * copies or substantial portions of the Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19  * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22  * SOFTWARE.
23  */
24 #include "src/cpu/kernels/CpuAddMulAddKernel.h"
25 
26 #include "arm_compute/core/ITensor.h"
27 #include "arm_compute/core/TensorInfo.h"
28 #include "arm_compute/core/Validate.h"
29 
30 #include "src/core/CPP/Validate.h"
31 #include "src/core/common/Registrars.h"
32 #include "src/core/helpers/AutoConfiguration.h"
33 #include "src/core/helpers/WindowHelpers.h"
34 #include "src/cpu/kernels/addmuladd/list.h"
35 
36 namespace arm_compute
37 {
38 namespace cpu
39 {
40 namespace kernels
41 {
42 namespace
43 {
44 static const std::vector<CpuAddMulAddKernel::AddMulAddKernel> available_kernels =
45 {
46 #ifdef __aarch64__
47     {
48         "neon_fp32_add_mul_add",
__anonb68491330202() 49         [](const DataTypeISASelectorData & data) { return (data.dt == DataType::F32); },
50         REGISTER_FP32_NEON(arm_compute::cpu::add_mul_add_fp32_neon)
51     },
52     {
53         "neon_fp16_add_mul_add",
__anonb68491330302() 54         [](const DataTypeISASelectorData & data) { return (data.dt == DataType::F16); },
55         REGISTER_FP16_NEON(arm_compute::cpu::add_mul_add_fp16_neon)
56     },
57     {
58         "neon_qasymm8_add_mul_add",
__anonb68491330402() 59         [](const DataTypeISASelectorData & data) { return (data.dt == DataType::QASYMM8); },
60         REGISTER_QASYMM8_NEON(arm_compute::cpu::add_mul_add_u8_neon)
61     },
62     {
63         "neon_qasymm8_signed_add_mul_add",
__anonb68491330502() 64         [](const DataTypeISASelectorData & data) { return (data.dt == DataType::QASYMM8_SIGNED); },
65         REGISTER_QASYMM8_SIGNED_NEON(arm_compute::cpu::add_mul_add_s8_neon)
66     }
67 #endif // __aarch64__
68 };
69 
validate_arguments(const ITensorInfo * input1,const ITensorInfo * input2,const ITensorInfo * bn_mul,const ITensorInfo * bn_add,const ITensorInfo * add_output,const ITensorInfo * final_output,ConvertPolicy policy,const ActivationLayerInfo & act_info)70 Status validate_arguments(const ITensorInfo *input1, const ITensorInfo *input2,
71                           const ITensorInfo *bn_mul, const ITensorInfo *bn_add,
72                           const ITensorInfo *add_output, const ITensorInfo *final_output,
73                           ConvertPolicy policy, const ActivationLayerInfo &act_info)
74 {
75     ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(input1, input2, bn_mul, bn_add, final_output);
76 
77     ARM_COMPUTE_RETURN_ERROR_ON_MSG(policy != ConvertPolicy::SATURATE, "Only Saturate Policy is supported");
78 
79     using ActFunction          = ActivationLayerInfo::ActivationFunction;
80     const ActFunction act_func = act_info.activation();
81     ARM_COMPUTE_RETURN_ERROR_ON_MSG(
82         (act_func != ActFunction::BOUNDED_RELU && act_func != ActFunction::RELU && act_func != ActFunction::LU_BOUNDED_RELU && act_func != ActFunction::IDENTITY),
83         "Only RELU Family activations, or no activation, is supported");
84 
85     ARM_COMPUTE_RETURN_ERROR_ON_CPU_F16_UNSUPPORTED(input1);
86     ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input1, 1, DataType::QASYMM8, DataType::QASYMM8_SIGNED,
87                                                          DataType::F16, DataType::F32);
88     ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(input1, input2);
89 
90     if(is_data_type_quantized(input1->data_type()))
91     {
92         ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(bn_mul, 1, DataType::F32);
93         ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(bn_add, 1, DataType::F32);
94     }
95     else
96     {
97         ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(input1, bn_mul);
98         ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(input1, bn_add);
99     }
100 
101     ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_SHAPES(input1, input2); // No broadcasting
102     ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_SHAPES(bn_mul, bn_add);
103     ARM_COMPUTE_RETURN_ERROR_ON_MSG(bn_mul->num_dimensions() != 1, "BatchNorm coefficients should be 1D array");
104     ARM_COMPUTE_RETURN_ERROR_ON_MSG(bn_mul->tensor_shape()[0] != input1->tensor_shape()[0], "First dimensions of inputs and batchNorm coefs should match");
105 
106     // Validate in case we have add layer's output (intermediate) initialized
107     if(add_output != nullptr && add_output->total_size() > 0)
108     {
109         ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(input1, add_output);
110         ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_SHAPES(input1, add_output);
111     }
112 
113     // Validate in case final output has been initialized
114     if(final_output->total_size() > 0)
115     {
116         ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(input1, final_output);
117         ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_SHAPES(input1, final_output);
118     }
119 
120     const auto uk = CpuAddMulAddKernel::get_implementation<DataTypeISASelectorData>(DataTypeISASelectorData{ input1->data_type(), CPUInfo::get().get_isa() });
121     ARM_COMPUTE_RETURN_ERROR_ON(uk == nullptr || uk->ukernel == nullptr);
122 
123     return Status{};
124 }
125 } // namespace
126 
configure(const ITensorInfo * input1,const ITensorInfo * input2,const ITensorInfo * bn_mul,const ITensorInfo * bn_add,ITensorInfo * add_output,ITensorInfo * final_output,ConvertPolicy policy,const ActivationLayerInfo & act_info)127 void CpuAddMulAddKernel::configure(const ITensorInfo *input1, const ITensorInfo *input2,
128                                    const ITensorInfo *bn_mul, const ITensorInfo *bn_add,
129                                    ITensorInfo *add_output, ITensorInfo *final_output,
130                                    ConvertPolicy policy, const ActivationLayerInfo &act_info)
131 {
132     ARM_COMPUTE_UNUSED(bn_mul, bn_add, input2);
133     ARM_COMPUTE_ERROR_ON_NULLPTR(input1, input2, bn_add, bn_mul, final_output);
134     ARM_COMPUTE_ERROR_THROW_ON(validate_arguments(input1, input2, bn_mul, bn_add, add_output, final_output, policy, act_info));
135 
136     const auto uk = CpuAddMulAddKernel::get_implementation<DataTypeISASelectorData>(DataTypeISASelectorData{ input1->data_type(), CPUInfo::get().get_isa() });
137     ARM_COMPUTE_ERROR_ON_NULLPTR(uk);
138     ARM_COMPUTE_ERROR_ON(uk->ukernel == nullptr);
139 
140     _policy     = policy;
141     _act_info   = act_info;
142     _run_method = uk->ukernel;
143     _name       = std::string("CpuAddMulAddKernel/").append(uk->name);
144 
145     // Auto initialize outputs if not initialized
146     set_shape_if_empty(*final_output, input1->tensor_shape());
147     set_data_type_if_unknown(*final_output, input1->data_type());
148 
149     if(add_output != nullptr)
150     {
151         set_shape_if_empty(*add_output, input1->tensor_shape());
152         set_data_type_if_unknown(*add_output, input1->data_type());
153     }
154 
155     // Configure kernel window
156     Window win;
157     win = calculate_max_window(*final_output, Steps());
158     ICpuKernel::configure(win);
159 }
160 
validate(const ITensorInfo * input1,const ITensorInfo * input2,const ITensorInfo * bn_mul,const ITensorInfo * bn_add,const ITensorInfo * add_output,const ITensorInfo * final_output,ConvertPolicy policy,const ActivationLayerInfo & act_info)161 Status CpuAddMulAddKernel::validate(const ITensorInfo *input1, const ITensorInfo *input2,
162                                     const ITensorInfo *bn_mul, const ITensorInfo *bn_add,
163                                     const ITensorInfo *add_output, const ITensorInfo *final_output,
164                                     ConvertPolicy policy, const ActivationLayerInfo &act_info)
165 {
166     ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(input1, input2, bn_mul, bn_add, final_output);
167 
168     ARM_COMPUTE_RETURN_ON_ERROR(validate_arguments(input1, input2, bn_mul, bn_add, add_output, final_output, policy, act_info));
169 
170     return Status{};
171 }
172 
run_op(ITensorPack & tensors,const Window & window,const ThreadInfo & info)173 void CpuAddMulAddKernel::run_op(ITensorPack &tensors, const Window &window, const ThreadInfo &info)
174 {
175     ARM_COMPUTE_UNUSED(info);
176 
177     ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this);
178     ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(ICpuKernel::window(), window);
179 
180     ARM_COMPUTE_ERROR_ON(tensors.empty());
181     ARM_COMPUTE_ERROR_ON(_run_method == nullptr);
182 
183     const ITensor *input1       = tensors.get_const_tensor(TensorType::ACL_SRC_0);
184     const ITensor *input2       = tensors.get_const_tensor(TensorType::ACL_SRC_1);
185     const ITensor *bn_mul       = tensors.get_const_tensor(TensorType::ACL_SRC_2);
186     const ITensor *bn_add       = tensors.get_const_tensor(TensorType::ACL_SRC_3);
187     ITensor       *add_output   = tensors.get_tensor(TensorType::ACL_DST_0);
188     ITensor       *final_output = tensors.get_tensor(TensorType::ACL_DST_1);
189 
190     _run_method(input1, input2, bn_mul, bn_add, add_output, final_output, _policy, _act_info, window);
191 }
192 
name() const193 const char *CpuAddMulAddKernel::name() const
194 {
195     return _name.c_str();
196 }
197 
get_available_kernels()198 const std::vector<CpuAddMulAddKernel::AddMulAddKernel> &CpuAddMulAddKernel::get_available_kernels()
199 {
200     return available_kernels;
201 }
202 } // namespace kernels
203 } // namespace cpu
204 } // namespace arm_compute
205