xref: /aosp_15_r20/external/ComputeLibrary/arm_compute/runtime/IScheduler.h (revision c217d954acce2dbc11938adb493fc0abd69584f3)
1 /*
2  * Copyright (c) 2017-2021 Arm Limited.
3  *
4  * SPDX-License-Identifier: MIT
5  *
6  * Permission is hereby granted, free of charge, to any person obtaining a copy
7  * of this software and associated documentation files (the "Software"), to
8  * deal in the Software without restriction, including without limitation the
9  * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
10  * sell copies of the Software, and to permit persons to whom the Software is
11  * furnished to do so, subject to the following conditions:
12  *
13  * The above copyright notice and this permission notice shall be included in all
14  * copies or substantial portions of the Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19  * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22  * SOFTWARE.
23  */
24 #ifndef ARM_COMPUTE_ISCHEDULER_H
25 #define ARM_COMPUTE_ISCHEDULER_H
26 
27 #include "arm_compute/core/CPP/CPPTypes.h"
28 #include "arm_compute/core/Types.h"
29 #include "arm_compute/core/experimental/Types.h"
30 
31 #include <functional>
32 #include <limits>
33 
34 namespace arm_compute
35 {
36 class ICPPKernel;
37 class ITensor;
38 class Window;
39 
40 /** Scheduler interface to run kernels */
41 class IScheduler
42 {
43 public:
44     /** Strategies available to split a workload */
45     enum class StrategyHint
46     {
47         STATIC,  /**< Split the workload evenly among the threads */
48         DYNAMIC, /**< Split the workload dynamically using a bucket system */
49     };
50 
51     /** Function to be used and map a given thread id to a logical core id
52      *
53      * Mapping function expects the thread index and total number of cores as input,
54      * and returns the logical core index to bind against
55      */
56     using BindFunc = std::function<int(int, int)>;
57 
58     /** When arm_compute::ISchedular::Hints::_split_dimension is initialized with this value
59      * then the schedular is free to break down the problem space over as many dimensions
60      * as it wishes
61      */
62     static constexpr unsigned int split_dimensions_all = std::numeric_limits<unsigned>::max();
63 
64     /** Scheduler hints
65      *
66      * Collection of preferences set by the function regarding how to split a given workload
67      */
68     class Hints
69     {
70     public:
71         /** Constructor
72          *
73          * @param[in] split_dimension Dimension along which to split the kernel's execution window.
74          * @param[in] strategy        (Optional) Split strategy.
75          * @param[in] threshold       (Optional) Dynamic scheduling capping threshold.
76          */
77         Hints(unsigned int split_dimension, StrategyHint strategy = StrategyHint::STATIC, int threshold = 0)
_split_dimension(split_dimension)78             : _split_dimension(split_dimension), _strategy(strategy), _threshold(threshold)
79         {
80         }
81         /** Set the split_dimension hint
82          *
83          * @param[in] split_dimension Dimension along which to split the kernel's execution window.
84          *
85          * @return the Hints object
86          */
set_split_dimension(unsigned int split_dimension)87         Hints &set_split_dimension(unsigned int split_dimension)
88         {
89             _split_dimension = split_dimension;
90             return *this;
91         }
92         /** Return the prefered split dimension
93          *
94          * @return The split dimension
95          */
split_dimension()96         unsigned int split_dimension() const
97         {
98             return _split_dimension;
99         }
100 
101         /** Set the strategy hint
102          *
103          * @param[in] strategy Prefered strategy to use to split the workload
104          *
105          * @return the Hints object
106          */
set_strategy(StrategyHint strategy)107         Hints &set_strategy(StrategyHint strategy)
108         {
109             _strategy = strategy;
110             return *this;
111         }
112         /** Return the prefered strategy to use to split workload.
113          *
114          * @return The strategy
115          */
strategy()116         StrategyHint strategy() const
117         {
118             return _strategy;
119         }
120         /** Return the granule capping threshold to be used by dynamic scheduling.
121          *
122          * @return The capping threshold
123          */
threshold()124         int threshold() const
125         {
126             return _threshold;
127         }
128 
129     private:
130         unsigned int _split_dimension{};
131         StrategyHint _strategy{};
132         int          _threshold{};
133     };
134     /** Signature for the workloads to execute */
135     using Workload = std::function<void(const ThreadInfo &)>;
136     /** Default constructor. */
137     IScheduler();
138 
139     /** Destructor. */
140     virtual ~IScheduler() = default;
141 
142     /** Sets the number of threads the scheduler will use to run the kernels.
143      *
144      * @param[in] num_threads If set to 0, then one thread per CPU core available on the system will be used, otherwise the number of threads specified.
145      */
146     virtual void set_num_threads(unsigned int num_threads) = 0;
147 
148     /** Sets the number of threads the scheduler will use to run the kernels but also using a binding function to pin the threads to given logical cores
149      *
150      * @param[in] num_threads If set to 0, then one thread per CPU core available on the system will be used, otherwise the number of threads specified.
151      * @param[in] func        Binding function to use.
152      */
153     virtual void set_num_threads_with_affinity(unsigned int num_threads, BindFunc func);
154 
155     /** Returns the number of threads that the SingleThreadScheduler has in its pool.
156      *
157      * @return Number of threads available in SingleThreadScheduler.
158      */
159     virtual unsigned int num_threads() const = 0;
160 
161     /** Runs the kernel in the same thread as the caller synchronously.
162      *
163      * @param[in] kernel Kernel to execute.
164      * @param[in] hints  Hints for the scheduler.
165      */
166     virtual void schedule(ICPPKernel *kernel, const Hints &hints) = 0;
167 
168     /** Runs the kernel in the same thread as the caller synchronously.
169      *
170      * @param[in] kernel  Kernel to execute.
171      * @param[in] hints   Hints for the scheduler.
172      * @param[in] window  Window to use for kernel execution.
173      * @param[in] tensors Vector containing the tensors to operate on.
174      */
175     virtual void schedule_op(ICPPKernel *kernel, const Hints &hints, const Window &window, ITensorPack &tensors) = 0;
176 
177     /** Execute all the passed workloads
178      *
179      * @note there is no guarantee regarding the order in which the workloads will be executed or whether or not they will be executed in parallel.
180      *
181      * @param[in] workloads Array of workloads to run
182      * @param[in] tag       String that can be used by profiling tools to identify the workloads run by the scheduler (Can be null).
183      */
184     virtual void run_tagged_workloads(std::vector<Workload> &workloads, const char *tag);
185 
186     /** Get CPU info.
187      *
188      * @return CPU info.
189      */
190     CPUInfo &cpu_info();
191     /** Get a hint for the best possible number of execution threads
192      *
193      * @warning In case we can't work out the best number of threads,
194      *          std::thread::hardware_concurrency() is returned else 1 in case of bare metal builds
195      *
196      * @return Best possible number of execution threads to use
197      */
198     unsigned int num_threads_hint() const;
199 
200 protected:
201     /** Execute all the passed workloads
202      *
203      * @note there is no guarantee regarding the order in which the workloads will be executed or whether or not they will be executed in parallel.
204      *
205      * @param[in] workloads Array of workloads to run
206      */
207     virtual void run_workloads(std::vector<Workload> &workloads) = 0;
208 
209     /** Common scheduler logic to execute the given kernel
210      *
211      * @param[in] kernel  Kernel to execute.
212      * @param[in] hints   Hints for the scheduler.
213      * @param[in] window  Window to use for kernel execution.
214      * @param[in] tensors Vector containing the tensors to operate on.
215      */
216     void schedule_common(ICPPKernel *kernel, const Hints &hints, const Window &window, ITensorPack &tensors);
217 
218     /** Adjust the number of windows to the optimize performance
219      * (used for small workloads where smaller number of threads might improve the performance)
220      *
221      * @param[in] window           Window to use for kernel execution
222      * @param[in] split_dimension  Axis of dimension to split
223      * @param[in] init_num_windows Initial number of sub-windows to split
224      * @param[in] kernel           Kernel to execute
225      * @param[in] cpu_info         The CPU platform used to create the context.
226      *
227      * @return Adjusted number of windows
228      */
229     std::size_t adjust_num_of_windows(const Window &window, std::size_t split_dimension, std::size_t init_num_windows, const ICPPKernel &kernel, const CPUInfo &cpu_info);
230 
231 private:
232     unsigned int _num_threads_hint = {};
233 };
234 } // namespace arm_compute
235 #endif /* ARM_COMPUTE_ISCHEDULER_H */
236