1 /* 2 * Copyright (c) 2017-2021 Arm Limited. 3 * 4 * SPDX-License-Identifier: MIT 5 * 6 * Permission is hereby granted, free of charge, to any person obtaining a copy 7 * of this software and associated documentation files (the "Software"), to 8 * deal in the Software without restriction, including without limitation the 9 * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or 10 * sell copies of the Software, and to permit persons to whom the Software is 11 * furnished to do so, subject to the following conditions: 12 * 13 * The above copyright notice and this permission notice shall be included in all 14 * copies or substantial portions of the Software. 15 * 16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 19 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 22 * SOFTWARE. 23 */ 24 #ifndef ARM_COMPUTE_ISCHEDULER_H 25 #define ARM_COMPUTE_ISCHEDULER_H 26 27 #include "arm_compute/core/CPP/CPPTypes.h" 28 #include "arm_compute/core/Types.h" 29 #include "arm_compute/core/experimental/Types.h" 30 31 #include <functional> 32 #include <limits> 33 34 namespace arm_compute 35 { 36 class ICPPKernel; 37 class ITensor; 38 class Window; 39 40 /** Scheduler interface to run kernels */ 41 class IScheduler 42 { 43 public: 44 /** Strategies available to split a workload */ 45 enum class StrategyHint 46 { 47 STATIC, /**< Split the workload evenly among the threads */ 48 DYNAMIC, /**< Split the workload dynamically using a bucket system */ 49 }; 50 51 /** Function to be used and map a given thread id to a logical core id 52 * 53 * Mapping function expects the thread index and total number of cores as input, 54 * and returns the logical core index to bind against 55 */ 56 using BindFunc = std::function<int(int, int)>; 57 58 /** When arm_compute::ISchedular::Hints::_split_dimension is initialized with this value 59 * then the schedular is free to break down the problem space over as many dimensions 60 * as it wishes 61 */ 62 static constexpr unsigned int split_dimensions_all = std::numeric_limits<unsigned>::max(); 63 64 /** Scheduler hints 65 * 66 * Collection of preferences set by the function regarding how to split a given workload 67 */ 68 class Hints 69 { 70 public: 71 /** Constructor 72 * 73 * @param[in] split_dimension Dimension along which to split the kernel's execution window. 74 * @param[in] strategy (Optional) Split strategy. 75 * @param[in] threshold (Optional) Dynamic scheduling capping threshold. 76 */ 77 Hints(unsigned int split_dimension, StrategyHint strategy = StrategyHint::STATIC, int threshold = 0) _split_dimension(split_dimension)78 : _split_dimension(split_dimension), _strategy(strategy), _threshold(threshold) 79 { 80 } 81 /** Set the split_dimension hint 82 * 83 * @param[in] split_dimension Dimension along which to split the kernel's execution window. 84 * 85 * @return the Hints object 86 */ set_split_dimension(unsigned int split_dimension)87 Hints &set_split_dimension(unsigned int split_dimension) 88 { 89 _split_dimension = split_dimension; 90 return *this; 91 } 92 /** Return the prefered split dimension 93 * 94 * @return The split dimension 95 */ split_dimension()96 unsigned int split_dimension() const 97 { 98 return _split_dimension; 99 } 100 101 /** Set the strategy hint 102 * 103 * @param[in] strategy Prefered strategy to use to split the workload 104 * 105 * @return the Hints object 106 */ set_strategy(StrategyHint strategy)107 Hints &set_strategy(StrategyHint strategy) 108 { 109 _strategy = strategy; 110 return *this; 111 } 112 /** Return the prefered strategy to use to split workload. 113 * 114 * @return The strategy 115 */ strategy()116 StrategyHint strategy() const 117 { 118 return _strategy; 119 } 120 /** Return the granule capping threshold to be used by dynamic scheduling. 121 * 122 * @return The capping threshold 123 */ threshold()124 int threshold() const 125 { 126 return _threshold; 127 } 128 129 private: 130 unsigned int _split_dimension{}; 131 StrategyHint _strategy{}; 132 int _threshold{}; 133 }; 134 /** Signature for the workloads to execute */ 135 using Workload = std::function<void(const ThreadInfo &)>; 136 /** Default constructor. */ 137 IScheduler(); 138 139 /** Destructor. */ 140 virtual ~IScheduler() = default; 141 142 /** Sets the number of threads the scheduler will use to run the kernels. 143 * 144 * @param[in] num_threads If set to 0, then one thread per CPU core available on the system will be used, otherwise the number of threads specified. 145 */ 146 virtual void set_num_threads(unsigned int num_threads) = 0; 147 148 /** Sets the number of threads the scheduler will use to run the kernels but also using a binding function to pin the threads to given logical cores 149 * 150 * @param[in] num_threads If set to 0, then one thread per CPU core available on the system will be used, otherwise the number of threads specified. 151 * @param[in] func Binding function to use. 152 */ 153 virtual void set_num_threads_with_affinity(unsigned int num_threads, BindFunc func); 154 155 /** Returns the number of threads that the SingleThreadScheduler has in its pool. 156 * 157 * @return Number of threads available in SingleThreadScheduler. 158 */ 159 virtual unsigned int num_threads() const = 0; 160 161 /** Runs the kernel in the same thread as the caller synchronously. 162 * 163 * @param[in] kernel Kernel to execute. 164 * @param[in] hints Hints for the scheduler. 165 */ 166 virtual void schedule(ICPPKernel *kernel, const Hints &hints) = 0; 167 168 /** Runs the kernel in the same thread as the caller synchronously. 169 * 170 * @param[in] kernel Kernel to execute. 171 * @param[in] hints Hints for the scheduler. 172 * @param[in] window Window to use for kernel execution. 173 * @param[in] tensors Vector containing the tensors to operate on. 174 */ 175 virtual void schedule_op(ICPPKernel *kernel, const Hints &hints, const Window &window, ITensorPack &tensors) = 0; 176 177 /** Execute all the passed workloads 178 * 179 * @note there is no guarantee regarding the order in which the workloads will be executed or whether or not they will be executed in parallel. 180 * 181 * @param[in] workloads Array of workloads to run 182 * @param[in] tag String that can be used by profiling tools to identify the workloads run by the scheduler (Can be null). 183 */ 184 virtual void run_tagged_workloads(std::vector<Workload> &workloads, const char *tag); 185 186 /** Get CPU info. 187 * 188 * @return CPU info. 189 */ 190 CPUInfo &cpu_info(); 191 /** Get a hint for the best possible number of execution threads 192 * 193 * @warning In case we can't work out the best number of threads, 194 * std::thread::hardware_concurrency() is returned else 1 in case of bare metal builds 195 * 196 * @return Best possible number of execution threads to use 197 */ 198 unsigned int num_threads_hint() const; 199 200 protected: 201 /** Execute all the passed workloads 202 * 203 * @note there is no guarantee regarding the order in which the workloads will be executed or whether or not they will be executed in parallel. 204 * 205 * @param[in] workloads Array of workloads to run 206 */ 207 virtual void run_workloads(std::vector<Workload> &workloads) = 0; 208 209 /** Common scheduler logic to execute the given kernel 210 * 211 * @param[in] kernel Kernel to execute. 212 * @param[in] hints Hints for the scheduler. 213 * @param[in] window Window to use for kernel execution. 214 * @param[in] tensors Vector containing the tensors to operate on. 215 */ 216 void schedule_common(ICPPKernel *kernel, const Hints &hints, const Window &window, ITensorPack &tensors); 217 218 /** Adjust the number of windows to the optimize performance 219 * (used for small workloads where smaller number of threads might improve the performance) 220 * 221 * @param[in] window Window to use for kernel execution 222 * @param[in] split_dimension Axis of dimension to split 223 * @param[in] init_num_windows Initial number of sub-windows to split 224 * @param[in] kernel Kernel to execute 225 * @param[in] cpu_info The CPU platform used to create the context. 226 * 227 * @return Adjusted number of windows 228 */ 229 std::size_t adjust_num_of_windows(const Window &window, std::size_t split_dimension, std::size_t init_num_windows, const ICPPKernel &kernel, const CPUInfo &cpu_info); 230 231 private: 232 unsigned int _num_threads_hint = {}; 233 }; 234 } // namespace arm_compute 235 #endif /* ARM_COMPUTE_ISCHEDULER_H */ 236