1 /*
2 * Copyright (c) 2019-2021 Arm Limited.
3 *
4 * SPDX-License-Identifier: MIT
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a copy
7 * of this software and associated documentation files (the "Software"), to
8 * deal in the Software without restriction, including without limitation the
9 * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
10 * sell copies of the Software, and to permit persons to whom the Software is
11 * furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included in all
14 * copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22 * SOFTWARE.
23 */
24 #include "src/core/CL/kernels/CLInstanceNormalizationLayerKernel.h"
25
26 #include "arm_compute/core/CL/CLHelpers.h"
27 #include "arm_compute/core/CL/CLKernelLibrary.h"
28 #include "arm_compute/core/CL/ICLTensor.h"
29 #include "arm_compute/core/Helpers.h"
30 #include "arm_compute/core/TensorInfo.h"
31 #include "arm_compute/core/Utils.h"
32 #include "src/core/CL/CLValidate.h"
33 #include "src/core/helpers/AutoConfiguration.h"
34 #include "src/core/helpers/WindowHelpers.h"
35 #include "support/StringSupport.h"
36
37 namespace arm_compute
38 {
39 namespace
40 {
validate_arguments(const ITensorInfo * input,const ITensorInfo * output,const InstanceNormalizationLayerKernelInfo & info)41 Status validate_arguments(const ITensorInfo *input, const ITensorInfo *output, const InstanceNormalizationLayerKernelInfo &info)
42 {
43 ARM_COMPUTE_RETURN_ERROR_ON_MSG(info.epsilon == 0.f, "Epsilon must be different than 0");
44 ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_NOT_IN(input, DataType::F16, DataType::F32);
45
46 if(output != nullptr && output->total_size() != 0)
47 {
48 ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_SHAPES(input, output);
49 ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(input, output);
50 ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_LAYOUT(input, output);
51 ARM_COMPUTE_RETURN_ERROR_ON_MSG(input->num_channels() != output->num_channels(), "Input and output have different number of channels");
52 }
53
54 return Status{};
55 }
56
validate_arguments_meanvar(const ITensorInfo * input,const ITensorInfo * output)57 Status validate_arguments_meanvar(const ITensorInfo *input, const ITensorInfo *output)
58 {
59 ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_NOT_IN(input, DataType::F16, DataType::F32);
60
61 if(output != nullptr && output->total_size() != 0)
62 {
63 ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(input, output);
64 ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_LAYOUT(input, output);
65 ARM_COMPUTE_RETURN_ERROR_ON_MSG(input->num_channels() != output->num_channels(), "Input and output have different number of channels");
66 }
67
68 return Status{};
69 }
70 } // namespace
71
CLComputeMeanVariance()72 CLComputeMeanVariance::CLComputeMeanVariance()
73 : _input(nullptr), _output(nullptr)
74 {
75 _type = CLKernelType::ELEMENTWISE;
76 }
77
configure(const CLCompileContext & compile_context,ICLTensor * input,ICLTensor * output,bool use_mixed_precision)78 void CLComputeMeanVariance::configure(const CLCompileContext &compile_context, ICLTensor *input, ICLTensor *output, bool use_mixed_precision)
79 {
80 ARM_COMPUTE_ERROR_ON_NULLPTR(input);
81 auto padding_info = get_padding_info({ input, output });
82
83 _input = input;
84 _output = output == nullptr ? input : output;
85
86 ARM_COMPUTE_ERROR_THROW_ON(validate_arguments_meanvar(_input->info(), _output->info()));
87 const unsigned int num_elems_processed_per_iteration = 16 / input->info()->element_size();
88
89 CLBuildOptions build_opts;
90 build_opts.add_option("-DINTERNAL_DATA_TYPE=" + (use_mixed_precision ? "float" : get_cl_type_from_data_type(input->info()->data_type())));
91 build_opts.add_option("-DDATA_TYPE=" + get_cl_type_from_data_type(input->info()->data_type()));
92 build_opts.add_option("-DVEC_SIZE=" + support::cpp11::to_string(num_elems_processed_per_iteration));
93 build_opts.add_option("-DDIM_X=" + support::cpp11::to_string(input->info()->dimension(0)));
94 build_opts.add_option("-DDIM_Y=" + support::cpp11::to_string(input->info()->dimension(1)));
95 build_opts.add_option("-DDIM_Z=" + support::cpp11::to_string(input->info()->dimension(2)));
96 build_opts.add_option_if(_input->info()->data_layout() == DataLayout::NHWC, "-DNHWC");
97 // Create kernel
98 _kernel = create_kernel(compile_context, "compute_mean_var", build_opts.options());
99
100 // We handle the planes manually
101 Window win = calculate_max_window(*(input->info()), Steps(1));
102 const auto data_layout = input->info()->data_layout();
103 const unsigned int channel_idx = get_data_layout_dimension_index(data_layout, DataLayoutDimension::CHANNEL);
104 const unsigned int batches_idx = get_data_layout_dimension_index(data_layout, DataLayoutDimension::BATCHES);
105 const unsigned int input_channel = input->info()->dimension(channel_idx);
106 const unsigned int input_batches = input->info()->dimension(batches_idx);
107 const TensorShape out_shape(input_channel, 2u, input_batches);
108
109 // Output auto initialization if not yet initialized
110 if(use_mixed_precision)
111 {
112 auto_init_if_empty(*_output->info(), out_shape, 1, DataType::F32);
113 }
114 else
115 {
116 auto_init_if_empty(*_output->info(), out_shape, 1, input->info()->data_type());
117 }
118 ICLKernel::configure_internal(win);
119 ARM_COMPUTE_ERROR_ON(has_padding_changed(padding_info));
120 }
121
validate(const ITensorInfo * input,const ITensorInfo * output)122 Status CLComputeMeanVariance::validate(const ITensorInfo *input, const ITensorInfo *output)
123 {
124 ARM_COMPUTE_RETURN_ON_ERROR(validate_arguments_meanvar(input, output));
125 return Status{};
126 }
127
run(const Window & window,cl::CommandQueue & queue)128 void CLComputeMeanVariance::run(const Window &window, cl::CommandQueue &queue)
129 {
130 ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this);
131 ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(IKernel::window(), window);
132
133 Window collapsed_window = window.collapse(window, Window::DimZ);
134
135 // We will process the planes together
136 if(_input->info()->data_layout() == DataLayout::NCHW)
137 {
138 collapsed_window.set(Window::DimX, Window::Dimension(0, 1, 1));
139 collapsed_window.set(Window::DimY, Window::Dimension(0, 1, 1));
140 }
141 else
142 {
143 collapsed_window.set(Window::DimZ, Window::Dimension(0, 1, 1));
144 collapsed_window.set(Window::DimY, Window::Dimension(0, _input->info()->dimension(3), 1));
145 }
146 unsigned int idx = 0;
147 add_4D_tensor_argument(idx, _input, collapsed_window);
148 add_3D_tensor_argument(idx, _output, collapsed_window);
149
150 enqueue(queue, *this, collapsed_window, lws_hint());
151 }
152
CLInstanceNormalizationLayerKernel()153 CLInstanceNormalizationLayerKernel::CLInstanceNormalizationLayerKernel()
154 : _input(nullptr), _output(nullptr), _mean(nullptr), _run_in_place(false)
155 {
156 _type = CLKernelType::ELEMENTWISE;
157 }
158
configure(const CLCompileContext & compile_context,ICLTensor * input,ICLTensor * mean_var,ICLTensor * output,const InstanceNormalizationLayerKernelInfo & info)159 void CLInstanceNormalizationLayerKernel::configure(const CLCompileContext &compile_context, ICLTensor *input, ICLTensor *mean_var, ICLTensor *output, const InstanceNormalizationLayerKernelInfo &info)
160 {
161 ARM_COMPUTE_ERROR_ON_NULLPTR(input);
162 auto padding_info = get_padding_info({ input, output });
163
164 _input = input;
165 _output = output == nullptr ? input : output;
166 _mean = mean_var;
167
168 _run_in_place = (output == nullptr) || (output == input);
169 ARM_COMPUTE_ERROR_THROW_ON(validate_arguments(_input->info(), _output->info(), info));
170 const unsigned int num_elems_processed_per_iteration = 16 / input->info()->element_size();
171
172 CLBuildOptions build_opts;
173 build_opts.add_option("-DDATA_TYPE=" + get_cl_type_from_data_type(input->info()->data_type()));
174 build_opts.add_option("-DINTERNAL_DATA_TYPE=" + (info.use_mixed_precision ? "float" : get_cl_type_from_data_type(input->info()->data_type())));
175 build_opts.add_option("-DVEC_SIZE=" + support::cpp11::to_string(num_elems_processed_per_iteration));
176 build_opts.add_option("-DDIM_X=" + support::cpp11::to_string(input->info()->dimension(0)));
177 build_opts.add_option("-DDIM_Y=" + support::cpp11::to_string(input->info()->dimension(1)));
178 build_opts.add_option("-DDIM_Z=" + support::cpp11::to_string(input->info()->dimension(2)));
179 build_opts.add_option("-DGAMMA=" + float_to_string_with_full_precision(info.gamma));
180 build_opts.add_option("-DBETA=" + float_to_string_with_full_precision(info.beta));
181 build_opts.add_option("-DEPSILON=" + float_to_string_with_full_precision(info.epsilon));
182 build_opts.add_option_if(_run_in_place, "-DIN_PLACE");
183 build_opts.add_option_if(_input->info()->data_layout() == DataLayout::NHWC, "-DNHWC");
184
185 // Create kernel
186 _kernel = create_kernel(compile_context, "instance_normalization", build_opts.options());
187
188 // Configure kernel window
189 Window win = calculate_max_window(*input->info(), Steps(1));
190 if(output != nullptr)
191 {
192 auto_init_if_empty(*output->info(), input->info()->tensor_shape(), 1, input->info()->data_type());
193 }
194
195 ICLKernel::configure_internal(win);
196 ARM_COMPUTE_ERROR_ON(has_padding_changed(padding_info));
197 }
198
validate(const ITensorInfo * input,const ITensorInfo * output,const InstanceNormalizationLayerKernelInfo & info)199 Status CLInstanceNormalizationLayerKernel::validate(const ITensorInfo *input, const ITensorInfo *output, const InstanceNormalizationLayerKernelInfo &info)
200 {
201 ARM_COMPUTE_RETURN_ON_ERROR(validate_arguments(input, output, info));
202 return Status{};
203 }
204
run(const Window & window,cl::CommandQueue & queue)205 void CLInstanceNormalizationLayerKernel::run(const Window &window, cl::CommandQueue &queue)
206 {
207 ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this);
208 ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(IKernel::window(), window);
209
210 Window collapsed_window = window.collapse(window, Window::DimZ);
211
212 // We will process the planes together
213 if(_input->info()->data_layout() == DataLayout::NCHW)
214 {
215 collapsed_window.set(Window::DimX, Window::Dimension(0, 1, 1));
216 collapsed_window.set(Window::DimY, Window::Dimension(0, 1, 1));
217 }
218 else
219 {
220 collapsed_window.set(Window::DimY, Window::Dimension(0, 1, 1));
221 collapsed_window.set(Window::DimZ, Window::Dimension(0, _input->info()->dimension(3), 1));
222 }
223
224 unsigned int idx = 0;
225 add_4D_tensor_argument(idx, _input, collapsed_window);
226 add_3D_tensor_argument(idx, _mean, collapsed_window);
227
228 if(!_run_in_place)
229 {
230 add_4D_tensor_argument(idx, _output, collapsed_window);
231 }
232
233 enqueue(queue, *this, collapsed_window, lws_hint());
234 }
235 } // namespace arm_compute
236