xref: /aosp_15_r20/external/gemmlowp/meta/single_thread_transform.h (revision 5f39d1b313f0528e11bae88b3029b54b9e1033e7)
1*5f39d1b3SJooyung Han // Copyright 2016 The Gemmlowp Authors. All Rights Reserved.
2*5f39d1b3SJooyung Han //
3*5f39d1b3SJooyung Han // Licensed under the Apache License, Version 2.0 (the "License");
4*5f39d1b3SJooyung Han // you may not use this file except in compliance with the License.
5*5f39d1b3SJooyung Han // You may obtain a copy of the License at
6*5f39d1b3SJooyung Han //
7*5f39d1b3SJooyung Han //     http://www.apache.org/licenses/LICENSE-2.0
8*5f39d1b3SJooyung Han //
9*5f39d1b3SJooyung Han // Unless required by applicable law or agreed to in writing, software
10*5f39d1b3SJooyung Han // distributed under the License is distributed on an "AS IS" BASIS,
11*5f39d1b3SJooyung Han // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12*5f39d1b3SJooyung Han // See the License for the specific language governing permissions and
13*5f39d1b3SJooyung Han // limitations under the License.
14*5f39d1b3SJooyung Han 
15*5f39d1b3SJooyung Han #ifndef GEMMLOWP_META_SINGLE_THREAD_TRANSFORM_H_
16*5f39d1b3SJooyung Han #define GEMMLOWP_META_SINGLE_THREAD_TRANSFORM_H_
17*5f39d1b3SJooyung Han 
18*5f39d1b3SJooyung Han #include <iostream>
19*5f39d1b3SJooyung Han #include "base.h"
20*5f39d1b3SJooyung Han 
21*5f39d1b3SJooyung Han namespace gemmlowp {
22*5f39d1b3SJooyung Han namespace meta {
23*5f39d1b3SJooyung Han 
24*5f39d1b3SJooyung Han template <typename Params, int kernel_size>
25*5f39d1b3SJooyung Han void Transform1D(const Params& params);
26*5f39d1b3SJooyung Han 
27*5f39d1b3SJooyung Han namespace internal {
28*5f39d1b3SJooyung Han 
29*5f39d1b3SJooyung Han class Transform1DExecutor {
30*5f39d1b3SJooyung Han  public:
31*5f39d1b3SJooyung Han   template <typename P, int kernel_size, int leftovers>
ExecuteDispatch1D(const P & params)32*5f39d1b3SJooyung Han   static void ExecuteDispatch1D(const P& params) {
33*5f39d1b3SJooyung Han     Transform1DKernel<typename P::InType, typename P::OutType,
34*5f39d1b3SJooyung Han                       typename P::Kernel, kernel_size,
35*5f39d1b3SJooyung Han                       leftovers>::Transform(params.input, params.kernel,
36*5f39d1b3SJooyung Han                                             params.output);
37*5f39d1b3SJooyung Han   }
38*5f39d1b3SJooyung Han };
39*5f39d1b3SJooyung Han 
40*5f39d1b3SJooyung Han template <typename E, typename P, int kernel_size, int variable_leftovers>
41*5f39d1b3SJooyung Han struct Dispatch1D {
ExecuteDispatch1D42*5f39d1b3SJooyung Han   static void Execute(const P& params, int leftovers) {
43*5f39d1b3SJooyung Han #ifdef DEBUG
44*5f39d1b3SJooyung Han #ifdef DEBUG_METAGEMM_VERBOSE
45*5f39d1b3SJooyung Han     std::cout << "Dispatch(1): " << kernel_size << ":" << variable_leftovers
46*5f39d1b3SJooyung Han               << std::endl
47*5f39d1b3SJooyung Han               << std::flush;
48*5f39d1b3SJooyung Han #endif
49*5f39d1b3SJooyung Han #endif
50*5f39d1b3SJooyung Han     if (leftovers == variable_leftovers) {
51*5f39d1b3SJooyung Han       E::template ExecuteDispatch1D<P, kernel_size, variable_leftovers>(params);
52*5f39d1b3SJooyung Han     } else {
53*5f39d1b3SJooyung Han       Dispatch1D<E, P, kernel_size, variable_leftovers - 1>::Execute(params,
54*5f39d1b3SJooyung Han                                                                      leftovers);
55*5f39d1b3SJooyung Han     }
56*5f39d1b3SJooyung Han   }
57*5f39d1b3SJooyung Han };
58*5f39d1b3SJooyung Han 
59*5f39d1b3SJooyung Han template <typename E, typename P, int kernel_size>
60*5f39d1b3SJooyung Han struct Dispatch1D<E, P, kernel_size, 0> {
61*5f39d1b3SJooyung Han   static void Execute(const P& params, int leftovers) {
62*5f39d1b3SJooyung Han #ifdef DEBUG
63*5f39d1b3SJooyung Han #ifdef DEBUG_METAGEMM_VERBOSE
64*5f39d1b3SJooyung Han     std::cout << "Dispatch(1): " << kernel_size << ": 0" << std::endl
65*5f39d1b3SJooyung Han               << std::flush;
66*5f39d1b3SJooyung Han #endif
67*5f39d1b3SJooyung Han #endif
68*5f39d1b3SJooyung Han     if (leftovers == 0) {
69*5f39d1b3SJooyung Han       E::template ExecuteDispatch1D<P, kernel_size, 0>(params);
70*5f39d1b3SJooyung Han     } else {
71*5f39d1b3SJooyung Han       std::cerr << "FATAL: dispatch1D failed: ran out of cases." << std::endl
72*5f39d1b3SJooyung Han                 << std::flush;
73*5f39d1b3SJooyung Han       std::exit(1);
74*5f39d1b3SJooyung Han     }
75*5f39d1b3SJooyung Han   }
76*5f39d1b3SJooyung Han };
77*5f39d1b3SJooyung Han 
78*5f39d1b3SJooyung Han }  // namespace internal
79*5f39d1b3SJooyung Han 
80*5f39d1b3SJooyung Han template <typename Params, int kernel_size>
81*5f39d1b3SJooyung Han inline void Transform1D(const Params& params) {
82*5f39d1b3SJooyung Han   internal::Dispatch1D<internal::Transform1DExecutor, Params, kernel_size,
83*5f39d1b3SJooyung Han                        kernel_size - 1>::Execute(params, params.kernel.count %
84*5f39d1b3SJooyung Han                                                              kernel_size);
85*5f39d1b3SJooyung Han }
86*5f39d1b3SJooyung Han 
87*5f39d1b3SJooyung Han }  // namespace meta
88*5f39d1b3SJooyung Han }  // namespace gemmlowp
89*5f39d1b3SJooyung Han 
90*5f39d1b3SJooyung Han #endif  // GEMMLOWP_META_SINGLE_THREAD_TRANSFORM_H_
91