xref: /aosp_15_r20/external/gemmlowp/meta/multi_thread_transform.h (revision 5f39d1b313f0528e11bae88b3029b54b9e1033e7)
1*5f39d1b3SJooyung Han // Copyright 2016 The Gemmlowp Authors. All Rights Reserved.
2*5f39d1b3SJooyung Han //
3*5f39d1b3SJooyung Han // Licensed under the Apache License, Version 2.0 (the "License");
4*5f39d1b3SJooyung Han // you may not use this file except in compliance with the License.
5*5f39d1b3SJooyung Han // You may obtain a copy of the License at
6*5f39d1b3SJooyung Han //
7*5f39d1b3SJooyung Han //     http://www.apache.org/licenses/LICENSE-2.0
8*5f39d1b3SJooyung Han //
9*5f39d1b3SJooyung Han // Unless required by applicable law or agreed to in writing, software
10*5f39d1b3SJooyung Han // distributed under the License is distributed on an "AS IS" BASIS,
11*5f39d1b3SJooyung Han // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12*5f39d1b3SJooyung Han // See the License for the specific language governing permissions and
13*5f39d1b3SJooyung Han // limitations under the License.
14*5f39d1b3SJooyung Han 
15*5f39d1b3SJooyung Han #ifndef GEMMLOWP_META_MULTI_THREAD_TRANSFORM_H_
16*5f39d1b3SJooyung Han #define GEMMLOWP_META_MULTI_THREAD_TRANSFORM_H_
17*5f39d1b3SJooyung Han 
18*5f39d1b3SJooyung Han #include "multi_thread_common.h"
19*5f39d1b3SJooyung Han #include "single_thread_transform.h"
20*5f39d1b3SJooyung Han 
21*5f39d1b3SJooyung Han namespace gemmlowp {
22*5f39d1b3SJooyung Han namespace meta {
23*5f39d1b3SJooyung Han namespace internal {
24*5f39d1b3SJooyung Han 
25*5f39d1b3SJooyung Han const int kTransformTaskOverhead = 128000;
26*5f39d1b3SJooyung Han const int kMinTransformTaskSize = 32000;
27*5f39d1b3SJooyung Han 
28*5f39d1b3SJooyung Han template <typename MultiThreadingContext, typename Params>
PrepareTransform1DTasks(MultiThreadingContext * context,const Params & params,int kernel_size,std::vector<Params> * task_params)29*5f39d1b3SJooyung Han inline bool PrepareTransform1DTasks(MultiThreadingContext* context,
30*5f39d1b3SJooyung Han                                     const Params& params, int kernel_size,
31*5f39d1b3SJooyung Han                                     std::vector<Params>* task_params) {
32*5f39d1b3SJooyung Han   typedef Transform1DUtil<typename Params::InType, typename Params::OutType,
33*5f39d1b3SJooyung Han                           typename Params::Kernel>
34*5f39d1b3SJooyung Han       Util;
35*5f39d1b3SJooyung Han 
36*5f39d1b3SJooyung Han   const int max_threads = ResolveMaxThreads(context->max_num_threads());
37*5f39d1b3SJooyung Han   const int task_size = Util::EstimateComputeCost(params.kernel);
38*5f39d1b3SJooyung Han   const int max_tasks_by_size =
39*5f39d1b3SJooyung Han       (task_size - kTransformTaskOverhead) / kMinTransformTaskSize;
40*5f39d1b3SJooyung Han 
41*5f39d1b3SJooyung Han   const int real_tasks = std::max(1, std::min(max_threads, max_tasks_by_size));
42*5f39d1b3SJooyung Han 
43*5f39d1b3SJooyung Han   if (real_tasks == 1) {
44*5f39d1b3SJooyung Han     return false;
45*5f39d1b3SJooyung Han   }
46*5f39d1b3SJooyung Han 
47*5f39d1b3SJooyung Han   const int chunk = params.kernel.count / real_tasks;
48*5f39d1b3SJooyung Han   for (int i = 0; i < real_tasks - 1; ++i) {
49*5f39d1b3SJooyung Han     task_params->push_back(params);
50*5f39d1b3SJooyung Han     Params& task = task_params->back();
51*5f39d1b3SJooyung Han     task.kernel.count = chunk;
52*5f39d1b3SJooyung Han     task.input = Util::OffsetInput(params.kernel, params.input, i * chunk);
53*5f39d1b3SJooyung Han     task.output = Util::OffsetOutput(params.kernel, params.output, i * chunk);
54*5f39d1b3SJooyung Han   }
55*5f39d1b3SJooyung Han   task_params->push_back(params);
56*5f39d1b3SJooyung Han   Params& task = task_params->back();
57*5f39d1b3SJooyung Han   const int sum_chunk = (real_tasks - 1) * chunk;
58*5f39d1b3SJooyung Han   task.kernel.count = params.kernel.count - sum_chunk;
59*5f39d1b3SJooyung Han   task.input = Util::OffsetInput(params.kernel, params.input, sum_chunk);
60*5f39d1b3SJooyung Han   task.output = Util::OffsetOutput(params.kernel, params.output, sum_chunk);
61*5f39d1b3SJooyung Han   return true;
62*5f39d1b3SJooyung Han }
63*5f39d1b3SJooyung Han 
64*5f39d1b3SJooyung Han template <typename Params, int kernel_size>
65*5f39d1b3SJooyung Han struct Transform1DTaskRunner : gemmlowp::Task {
Transform1DTaskRunnerTransform1DTaskRunner66*5f39d1b3SJooyung Han   Transform1DTaskRunner(const Params& params) : params(params) {}
67*5f39d1b3SJooyung Han 
RunTransform1DTaskRunner68*5f39d1b3SJooyung Han   void Run() override { Transform1D<Params, kernel_size>(params); }
69*5f39d1b3SJooyung Han 
70*5f39d1b3SJooyung Han   Params params;
71*5f39d1b3SJooyung Han };
72*5f39d1b3SJooyung Han 
73*5f39d1b3SJooyung Han }  // namespace internal
74*5f39d1b3SJooyung Han 
75*5f39d1b3SJooyung Han template <typename MultiThreadingContext, typename Params, int kernel_size>
MultiThreadTransform1D(MultiThreadingContext * context,const Params & params)76*5f39d1b3SJooyung Han inline void MultiThreadTransform1D(MultiThreadingContext* context,
77*5f39d1b3SJooyung Han                                    const Params& params) {
78*5f39d1b3SJooyung Han   typedef internal::Transform1DTaskRunner<Params, kernel_size> TaskRunnerType;
79*5f39d1b3SJooyung Han 
80*5f39d1b3SJooyung Han   std::vector<Params> task_params;
81*5f39d1b3SJooyung Han   if (!internal::PrepareTransform1DTasks<MultiThreadingContext, Params>(
82*5f39d1b3SJooyung Han           context, params, kernel_size, &task_params)) {
83*5f39d1b3SJooyung Han     Transform1D<Params, kernel_size>(params);
84*5f39d1b3SJooyung Han     return;
85*5f39d1b3SJooyung Han   }
86*5f39d1b3SJooyung Han 
87*5f39d1b3SJooyung Han   auto workers_pool = context->workers_pool();
88*5f39d1b3SJooyung Han   std::vector<Task*> tasks;
89*5f39d1b3SJooyung Han   for (auto& task_param : task_params) {
90*5f39d1b3SJooyung Han     tasks.push_back(new TaskRunnerType(task_param));
91*5f39d1b3SJooyung Han   }
92*5f39d1b3SJooyung Han   workers_pool->Execute(tasks);
93*5f39d1b3SJooyung Han }
94*5f39d1b3SJooyung Han 
95*5f39d1b3SJooyung Han }  // namespace meta
96*5f39d1b3SJooyung Han }  // namespace gemmlowp
97*5f39d1b3SJooyung Han 
98*5f39d1b3SJooyung Han #endif  // GEMMLOWP_META_MULTI_THREAD_TRANSFORM_H_
99