xref: /aosp_15_r20/external/ComputeLibrary/tests/validation/reference/DFT.cpp (revision c217d954acce2dbc11938adb493fc0abd69584f3)
1 /*
2  * Copyright (c) 2019-2020 Arm Limited.
3  *
4  * SPDX-License-Identifier: MIT
5  *
6  * Permission is hereby granted, free of charge, to any person obtaining a copy
7  * of this software and associated documentation files (the "Software"), to
8  * deal in the Software without restriction, including without limitation the
9  * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
10  * sell copies of the Software, and to permit persons to whom the Software is
11  * furnished to do so, subject to the following conditions:
12  *
13  * The above copyright notice and this permission notice shall be included in all
14  * copies or substantial portions of the Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19  * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22  * SOFTWARE.
23  */
24 #include "DFT.h"
25 
26 #include "PadLayer.h"
27 #include "Permute.h"
28 #include "Reverse.h"
29 #include "SliceOperations.h"
30 #include "support/ToolchainSupport.h"
31 
32 #include <cmath>
33 
34 namespace arm_compute
35 {
36 namespace test
37 {
38 namespace validation
39 {
40 namespace reference
41 {
42 namespace
43 {
44 /** Performs an one dimensional DFT on a given real sequence.
45  *
46  * @param[in]  src_ptr Pointer to the real input sequence.
47  * @param[in]  N       Size of input sequence.
48  * @param[out] dst_ptr Pointer to the complex output sequence.
49  * @param[out] K       Size of the output sequence
50  */
51 template <typename T>
rdft_1d_step(const T * src_ptr,size_t N,T * dst_ptr,size_t K)52 void rdft_1d_step(const T *src_ptr, size_t N, T *dst_ptr, size_t K)
53 {
54 #if defined(_OPENMP)
55     #pragma omp parallel for
56 #endif /* _OPENMP */
57     for(unsigned int k = 0; k < K; ++k)
58     {
59         float Xr = 0;
60         float Xi = 0;
61         for(unsigned int n = 0; n < N; ++n)
62         {
63             const float alpha = (2 * M_PI * k * n) / N;
64             const float val_r = src_ptr[n];
65             // Assuming DFT from the R domain thus skipping imaginary calculations
66             Xr += val_r * cos(alpha);
67             Xi -= val_r * sin(alpha);
68         }
69 
70         dst_ptr[k * 2]     = Xr;
71         dst_ptr[k * 2 + 1] = Xi;
72     }
73 }
74 
75 /** Performs an one dimensional DFT on a given complex sequence.
76  *
77  * @param[in]  src_ptr Pointer to the complex input sequence.
78  * @param[out] dst_ptr Pointer to the complex output sequence.
79  * @param[in]  N       Size of the sequences
80  */
81 template <typename T>
dft_1d_step(const T * src_ptr,T * dst_ptr,size_t N)82 void dft_1d_step(const T *src_ptr, T *dst_ptr, size_t N)
83 {
84 #if defined(_OPENMP)
85     #pragma omp parallel for
86 #endif /* _OPENMP */
87     for(unsigned int k = 0; k < N; ++k)
88     {
89         float Xr = 0;
90         float Xi = 0;
91         for(unsigned int n = 0; n < N; ++n)
92         {
93             const float alpha     = (2 * M_PI * k * n) / N;
94             const float val_r     = src_ptr[2 * n];
95             const float val_i     = src_ptr[2 * n + 1];
96             const float cos_alpha = cos(alpha);
97             const float sin_alpha = sin(alpha);
98 
99             Xr += val_r * cos_alpha + val_i * sin_alpha;
100             Xi += val_i * cos_alpha - val_r * sin_alpha;
101         }
102 
103         dst_ptr[k * 2]     = Xr;
104         dst_ptr[k * 2 + 1] = Xi;
105     }
106 }
107 
108 /** Performs an one dimensional inverse DFT on a given real sequence.
109  *
110  * @param[in]  src_ptr Pointer to the real input sequence.
111  * @param[in]  K       Size of input sequence.
112  * @param[out] dst_ptr Pointer to the complex output sequence.
113  * @param[out] N       Size of the output sequence
114  */
115 template <typename T>
irdft_1d_step(const T * src_ptr,size_t K,T * dst_ptr,size_t N)116 void irdft_1d_step(const T *src_ptr, size_t K, T *dst_ptr, size_t N)
117 {
118     const bool         is_odd     = N % 2;
119     const unsigned int Nleft      = N - K;
120     const int          tail_start = is_odd ? K - 1 : K - 2;
121 #if defined(_OPENMP)
122     #pragma omp parallel for
123 #endif /* _OPENMP */
124     for(unsigned int n = 0; n < N; ++n)
125     {
126         float xr = 0;
127         for(unsigned int k = 0; k < K; ++k)
128         {
129             const float alpha = (2 * M_PI * k * n) / N;
130             xr += src_ptr[2 * k] * cos(alpha) - src_ptr[2 * k + 1] * sin(alpha);
131         }
132 
133         unsigned int j = tail_start;
134         for(unsigned int k = 0; k < Nleft; ++k)
135         {
136             const float alpha = (2 * M_PI * (k + K) * n) / N;
137             xr += src_ptr[2 * j] * cos(alpha) + src_ptr[2 * j + 1] * sin(alpha);
138             --j;
139         }
140 
141         dst_ptr[n] = xr;
142     }
143 }
144 
145 /** Performs an one dimensional inverse DFT on a given complex sequence.
146  *
147  * @param[in]  src_ptr Pointer to the complex input sequence.
148  * @param[out] dst_ptr Pointer to the complex output sequence.
149  * @param[in]  N       Size of the sequences
150  */
151 template <typename T>
idft_1d_step(const T * src_ptr,T * dst_ptr,size_t N)152 void idft_1d_step(const T *src_ptr, T *dst_ptr, size_t N)
153 {
154 #if defined(_OPENMP)
155     #pragma omp parallel for
156 #endif /* _OPENMP */
157     for(unsigned int n = 0; n < N; ++n)
158     {
159         float xr = 0;
160         float xi = 0;
161         for(unsigned int k = 0; k < N; ++k)
162         {
163             const float alpha     = (2 * M_PI * k * n) / N;
164             const float cos_alpha = cos(alpha);
165             const float sin_alpha = sin(alpha);
166             const float val_r     = src_ptr[2 * k];
167             const float val_i     = src_ptr[2 * k + 1];
168 
169             xr += val_r * cos_alpha - val_i * sin_alpha;
170             xi += val_i * cos_alpha + val_r * sin_alpha;
171         }
172 
173         dst_ptr[2 * n]     = xr;
174         dst_ptr[2 * n + 1] = xi;
175     }
176 }
177 
178 template <typename T>
rdft_1d_core(const SimpleTensor<T> & src,FFTDirection direction,bool is_odd)179 SimpleTensor<T> rdft_1d_core(const SimpleTensor<T> &src, FFTDirection direction, bool is_odd)
180 {
181     // Performs only rdft
182     ARM_COMPUTE_ERROR_ON(direction == FFTDirection::Forward && src.num_channels() != 1);
183     ARM_COMPUTE_ERROR_ON(direction == FFTDirection::Inverse && src.num_channels() != 2);
184 
185     const unsigned int inverse_tail = is_odd ? 1 : 0;
186     const unsigned int N            = src.shape()[0];
187     const unsigned int K            = direction == FFTDirection::Forward ? N / 2 + 1 : (N - 1) * 2 + inverse_tail;
188     const unsigned int num_channels = direction == FFTDirection::Forward ? 2 : 1;
189 
190     TensorShape dst_shape = src.shape();
191     dst_shape.set(0, K);
192 
193     SimpleTensor<T> dst(dst_shape, src.data_type(), num_channels);
194 
195     const unsigned int upper_dims = src.shape().total_size_upper(1);
196 #if defined(_OPENMP)
197     #pragma omp parallel for
198 #endif /* _OPENMP */
199     for(unsigned int du = 0; du < upper_dims; ++du)
200     {
201         const T *src_row_ptr = src.data() + du * N * src.num_channels();
202         T       *dst_row_ptr = dst.data() + du * K * dst.num_channels();
203         direction == FFTDirection::Forward ? rdft_1d_step(src_row_ptr, N, dst_row_ptr, K) : irdft_1d_step(src_row_ptr, N, dst_row_ptr, K);
204     }
205 
206     return dst;
207 }
208 
209 template <typename T>
dft_1d_core(const SimpleTensor<T> & src,FFTDirection direction)210 SimpleTensor<T> dft_1d_core(const SimpleTensor<T> &src, FFTDirection direction)
211 {
212     ARM_COMPUTE_ERROR_ON(src.num_channels() != 2);
213 
214     const unsigned int N = src.shape()[0];
215 
216     SimpleTensor<T> dst(src.shape(), src.data_type(), src.num_channels());
217 
218     const unsigned int upper_dims = src.shape().total_size_upper(1);
219 #if defined(_OPENMP)
220     #pragma omp parallel for
221 #endif /* _OPENMP */
222     for(unsigned int du = 0; du < upper_dims; ++du)
223     {
224         const T *src_row_ptr = src.data() + du * N * src.num_channels();
225         T       *dst_row_ptr = dst.data() + du * N * dst.num_channels();
226         direction == FFTDirection::Forward ? dft_1d_step(src_row_ptr, dst_row_ptr, N) : idft_1d_step(src_row_ptr, dst_row_ptr, N);
227     }
228 
229     return dst;
230 }
231 
232 /** Scale a tensor by a given scaling factor.
233  *
234  * @param[in,out] tensor         Tensor to scale.
235  * @param[in]     scaling_factor Scaling to scale the tensor data with.
236  */
237 template <typename T>
scale(SimpleTensor<T> & tensor,T scaling_factor)238 void scale(SimpleTensor<T> &tensor, T scaling_factor)
239 {
240     const int total_elements = tensor.num_elements() * tensor.num_channels();
241     T        *data_ptr       = tensor.data();
242 #if defined(_OPENMP)
243     #pragma omp parallel for
244 #endif /* _OPENMP */
245     for(int i = 0; i < total_elements; ++i)
246     {
247         data_ptr[i] /= scaling_factor;
248     }
249 }
250 
251 /** Performs a complex element-wise multiplication with reduction across the channels axis.
252  *
253  * @param[in] input   Input tensor.
254  * @param[in] weights Weights tensor.
255  *
256  * @return Output tensor.
257  */
258 template <typename T>
complex_mul_and_reduce(const SimpleTensor<T> & input,const SimpleTensor<T> & weights)259 SimpleTensor<T> complex_mul_and_reduce(const SimpleTensor<T> &input, const SimpleTensor<T> &weights)
260 {
261     const uint32_t W  = input.shape().x();
262     const uint32_t H  = input.shape().y();
263     const uint32_t Ci = input.shape().z();
264     const uint32_t Co = weights.shape()[3];
265     const uint32_t N  = input.shape().total_size() / (W * H * Ci);
266 
267     TensorShape output_shape = input.shape();
268     output_shape.set(2, Co);
269     SimpleTensor<T> dst(output_shape, input.data_type(), input.num_channels());
270 
271     // dst memory to zero
272     const auto total_element_count = dst.num_channels() * dst.num_elements();
273     std::fill_n(dst.data(), total_element_count, 0);
274 
275     for(uint32_t b = 0; b < N; ++b)
276     {
277         for(uint32_t co = 0; co < Co; ++co)
278         {
279             for(uint32_t ci = 0; ci < Ci; ++ci)
280             {
281                 for(uint32_t h = 0; h < H; ++h)
282                 {
283                     for(uint32_t w = 0; w < W; ++w)
284                     {
285                         const uint32_t    i_index  = w + h * W + ci * H * W + b * H * W * Ci;
286                         const uint32_t    w_index  = w + h * W + ci * H * W + co * H * W * Ci;
287                         const uint32_t    o_index  = w + h * W + co * H * W + b * H * W * Co;
288                         const Coordinates i_coords = index2coords(input.shape(), i_index);
289                         const Coordinates w_coords = index2coords(weights.shape(), w_index);
290                         const Coordinates o_coords = index2coords(dst.shape(), o_index);
291 
292                         auto i_ptr = static_cast<const T *>(input(i_coords));
293                         auto w_ptr = static_cast<const T *>(weights(w_coords));
294                         auto o_ptr = static_cast<T *>(dst(o_coords));
295 
296                         const T Rin = i_ptr[0];
297                         const T Iin = i_ptr[1];
298                         const T Rw  = w_ptr[0];
299                         const T Iw  = w_ptr[1];
300 
301                         o_ptr[0] += Rin * Rw - Iin * Iw;
302                         o_ptr[1] += Rin * Iw + Rw * Iin;
303                     }
304                 }
305             }
306         }
307     }
308     return dst;
309 }
310 } // namespace
311 
312 template <typename T>
rdft_1d(const SimpleTensor<T> & src)313 SimpleTensor<T> rdft_1d(const SimpleTensor<T> &src)
314 {
315     return rdft_1d_core(src, FFTDirection::Forward, false);
316 }
317 
318 template <typename T>
ridft_1d(const SimpleTensor<T> & src,bool is_odd)319 SimpleTensor<T> ridft_1d(const SimpleTensor<T> &src, bool is_odd)
320 {
321     auto dst = rdft_1d_core(src, FFTDirection::Inverse, is_odd);
322 
323     const T scaling_factor = T(dst.shape()[0]);
324     scale(dst, scaling_factor);
325 
326     return dst;
327 }
328 
329 template <typename T>
dft_1d(const SimpleTensor<T> & src,FFTDirection direction)330 SimpleTensor<T> dft_1d(const SimpleTensor<T> &src, FFTDirection direction)
331 {
332     auto dst = dft_1d_core(src, direction);
333     if(direction == FFTDirection::Inverse)
334     {
335         const T scaling_factor = T(dst.shape()[0]);
336         scale(dst, scaling_factor);
337     }
338     return dst;
339 }
340 
341 template <typename T>
rdft_2d(const SimpleTensor<T> & src)342 SimpleTensor<T> rdft_2d(const SimpleTensor<T> &src)
343 {
344     ARM_COMPUTE_ERROR_ON(src.num_channels() != 1);
345     constexpr FFTDirection direction = FFTDirection::Forward;
346 
347     auto first_pass  = rdft_1d_core(src, direction, false);
348     auto transposed  = permute(first_pass, PermutationVector(1U, 0U));
349     auto second_pass = dft_1d_core(transposed, direction);
350     return permute(second_pass, PermutationVector(1U, 0U));
351 }
352 
353 template <typename T>
ridft_2d(const SimpleTensor<T> & src,bool is_odd)354 SimpleTensor<T> ridft_2d(const SimpleTensor<T> &src, bool is_odd)
355 {
356     ARM_COMPUTE_ERROR_ON(src.num_channels() != 2);
357     constexpr FFTDirection direction = FFTDirection::Inverse;
358 
359     auto transposed   = permute(src, PermutationVector(1U, 0U));
360     auto first_pass   = dft_1d_core(transposed, direction);
361     auto transposed_2 = permute(first_pass, PermutationVector(1U, 0U));
362     auto dst          = rdft_1d_core(transposed_2, direction, is_odd);
363 
364     const T scaling_factor = T(dst.shape()[0] * dst.shape()[1]);
365     scale(dst, scaling_factor);
366     return dst;
367 }
368 
369 template <typename T>
dft_2d(const SimpleTensor<T> & src,FFTDirection direction)370 SimpleTensor<T> dft_2d(const SimpleTensor<T> &src, FFTDirection direction)
371 {
372     ARM_COMPUTE_ERROR_ON(src.num_channels() != 2);
373 
374     if(direction == FFTDirection::Forward)
375     {
376         auto first_pass  = dft_1d_core(src, direction);
377         auto transposed  = permute(first_pass, PermutationVector(1U, 0U));
378         auto second_pass = dft_1d_core(transposed, direction);
379         return permute(second_pass, PermutationVector(1U, 0U));
380     }
381     else
382     {
383         auto transposed   = permute(src, PermutationVector(1U, 0U));
384         auto first_pass   = dft_1d_core(transposed, direction);
385         auto transposed_2 = permute(first_pass, PermutationVector(1U, 0U));
386         auto dst          = dft_1d_core(transposed_2, direction);
387 
388         const T scaling_factor = T(dst.shape()[0] * dst.shape()[1]);
389         scale(dst, scaling_factor);
390 
391         return dst;
392     }
393 }
394 
395 template <typename T>
conv2d_dft(const SimpleTensor<T> & src,const SimpleTensor<T> & w,const PadStrideInfo & conv_info)396 SimpleTensor<T> conv2d_dft(const SimpleTensor<T> &src, const SimpleTensor<T> &w, const PadStrideInfo &conv_info)
397 {
398     // Pad input to full padding
399     const PaddingList padding_in = { { 0, w.shape()[0] - 1 }, { 0, w.shape()[1] - 1 } };
400     auto              padded_src = pad_layer(src, padding_in);
401 
402     // Flip weights
403     std::vector<uint32_t>  axis_v = { 0, 1 };
404     SimpleTensor<uint32_t> axis{ TensorShape(2U), DataType::U32 };
405     std::copy(axis_v.begin(), axis_v.begin() + axis.shape().x(), axis.data());
406     auto flipped_w = reverse(w, axis);
407 
408     // Pad weights to have the same size as input
409     const PaddingList paddings_w = { { 0, src.shape()[0] - 1 }, { 0, src.shape()[1] - 1 } };
410     auto              padded_w   = pad_layer(flipped_w, paddings_w);
411 
412     // Transform input and weights to frequency domain
413     auto Fsrc = rdft_2d(padded_src);
414     auto Fw   = rdft_2d(padded_w);
415 
416     // Perform dot product
417     auto Fdst = complex_mul_and_reduce(Fsrc, Fw);
418 
419     // Transform output back to frequency domain
420     auto conv_res = ridft_2d(Fdst);
421 
422     // Slice output
423     const int start_left = w.shape().x() - conv_info.pad_left() - 1;
424     const int start_top  = w.shape().y() - conv_info.pad_top() - 1;
425     const int end_right  = conv_res.shape().x() - (w.shape().x() - conv_info.pad_right() - 1);
426     const int end_botton = conv_res.shape().y() - (w.shape().y() - conv_info.pad_bottom() - 1);
427     return slice(conv_res, Coordinates(start_left, start_top), Coordinates(end_right, end_botton));
428 }
429 
430 // FP32
431 template SimpleTensor<float> rdft_1d(const SimpleTensor<float> &src);
432 template SimpleTensor<float> ridft_1d(const SimpleTensor<float> &src, bool is_odd);
433 template SimpleTensor<float> dft_1d(const SimpleTensor<float> &src, FFTDirection direction);
434 
435 template SimpleTensor<float> rdft_2d(const SimpleTensor<float> &src);
436 template SimpleTensor<float> ridft_2d(const SimpleTensor<float> &src, bool is_odd);
437 template SimpleTensor<float> dft_2d(const SimpleTensor<float> &src, FFTDirection direction);
438 
439 template SimpleTensor<float> conv2d_dft(const SimpleTensor<float> &src, const SimpleTensor<float> &w, const PadStrideInfo &conv_info);
440 
441 // FP16
442 template SimpleTensor<half> rdft_1d(const SimpleTensor<half> &src);
443 template SimpleTensor<half> ridft_1d(const SimpleTensor<half> &src, bool is_odd);
444 template SimpleTensor<half> dft_1d(const SimpleTensor<half> &src, FFTDirection direction);
445 
446 template SimpleTensor<half> rdft_2d(const SimpleTensor<half> &src);
447 template SimpleTensor<half> ridft_2d(const SimpleTensor<half> &src, bool is_odd);
448 template SimpleTensor<half> dft_2d(const SimpleTensor<half> &src, FFTDirection direction);
449 
450 template SimpleTensor<half> conv2d_dft(const SimpleTensor<half> &src, const SimpleTensor<half> &w, const PadStrideInfo &conv_info);
451 } // namespace reference
452 } // namespace validation
453 } // namespace test
454 } // namespace arm_compute
455