1*c217d954SCole Faust /* 2*c217d954SCole Faust * Copyright (c) 2017-2021 Arm Limited. 3*c217d954SCole Faust * 4*c217d954SCole Faust * SPDX-License-Identifier: MIT 5*c217d954SCole Faust * 6*c217d954SCole Faust * Permission is hereby granted, free of charge, to any person obtaining a copy 7*c217d954SCole Faust * of this software and associated documentation files (the "Software"), to 8*c217d954SCole Faust * deal in the Software without restriction, including without limitation the 9*c217d954SCole Faust * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or 10*c217d954SCole Faust * sell copies of the Software, and to permit persons to whom the Software is 11*c217d954SCole Faust * furnished to do so, subject to the following conditions: 12*c217d954SCole Faust * 13*c217d954SCole Faust * The above copyright notice and this permission notice shall be included in all 14*c217d954SCole Faust * copies or substantial portions of the Software. 15*c217d954SCole Faust * 16*c217d954SCole Faust * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17*c217d954SCole Faust * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18*c217d954SCole Faust * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 19*c217d954SCole Faust * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20*c217d954SCole Faust * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 21*c217d954SCole Faust * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 22*c217d954SCole Faust * SOFTWARE. 23*c217d954SCole Faust */ 24*c217d954SCole Faust #ifndef ARM_COMPUTE_ISCHEDULER_H 25*c217d954SCole Faust #define ARM_COMPUTE_ISCHEDULER_H 26*c217d954SCole Faust 27*c217d954SCole Faust #include "arm_compute/core/CPP/CPPTypes.h" 28*c217d954SCole Faust #include "arm_compute/core/Types.h" 29*c217d954SCole Faust #include "arm_compute/core/experimental/Types.h" 30*c217d954SCole Faust 31*c217d954SCole Faust #include <functional> 32*c217d954SCole Faust #include <limits> 33*c217d954SCole Faust 34*c217d954SCole Faust namespace arm_compute 35*c217d954SCole Faust { 36*c217d954SCole Faust class ICPPKernel; 37*c217d954SCole Faust class ITensor; 38*c217d954SCole Faust class Window; 39*c217d954SCole Faust 40*c217d954SCole Faust /** Scheduler interface to run kernels */ 41*c217d954SCole Faust class IScheduler 42*c217d954SCole Faust { 43*c217d954SCole Faust public: 44*c217d954SCole Faust /** Strategies available to split a workload */ 45*c217d954SCole Faust enum class StrategyHint 46*c217d954SCole Faust { 47*c217d954SCole Faust STATIC, /**< Split the workload evenly among the threads */ 48*c217d954SCole Faust DYNAMIC, /**< Split the workload dynamically using a bucket system */ 49*c217d954SCole Faust }; 50*c217d954SCole Faust 51*c217d954SCole Faust /** Function to be used and map a given thread id to a logical core id 52*c217d954SCole Faust * 53*c217d954SCole Faust * Mapping function expects the thread index and total number of cores as input, 54*c217d954SCole Faust * and returns the logical core index to bind against 55*c217d954SCole Faust */ 56*c217d954SCole Faust using BindFunc = std::function<int(int, int)>; 57*c217d954SCole Faust 58*c217d954SCole Faust /** When arm_compute::ISchedular::Hints::_split_dimension is initialized with this value 59*c217d954SCole Faust * then the schedular is free to break down the problem space over as many dimensions 60*c217d954SCole Faust * as it wishes 61*c217d954SCole Faust */ 62*c217d954SCole Faust static constexpr unsigned int split_dimensions_all = std::numeric_limits<unsigned>::max(); 63*c217d954SCole Faust 64*c217d954SCole Faust /** Scheduler hints 65*c217d954SCole Faust * 66*c217d954SCole Faust * Collection of preferences set by the function regarding how to split a given workload 67*c217d954SCole Faust */ 68*c217d954SCole Faust class Hints 69*c217d954SCole Faust { 70*c217d954SCole Faust public: 71*c217d954SCole Faust /** Constructor 72*c217d954SCole Faust * 73*c217d954SCole Faust * @param[in] split_dimension Dimension along which to split the kernel's execution window. 74*c217d954SCole Faust * @param[in] strategy (Optional) Split strategy. 75*c217d954SCole Faust * @param[in] threshold (Optional) Dynamic scheduling capping threshold. 76*c217d954SCole Faust */ 77*c217d954SCole Faust Hints(unsigned int split_dimension, StrategyHint strategy = StrategyHint::STATIC, int threshold = 0) _split_dimension(split_dimension)78*c217d954SCole Faust : _split_dimension(split_dimension), _strategy(strategy), _threshold(threshold) 79*c217d954SCole Faust { 80*c217d954SCole Faust } 81*c217d954SCole Faust /** Set the split_dimension hint 82*c217d954SCole Faust * 83*c217d954SCole Faust * @param[in] split_dimension Dimension along which to split the kernel's execution window. 84*c217d954SCole Faust * 85*c217d954SCole Faust * @return the Hints object 86*c217d954SCole Faust */ set_split_dimension(unsigned int split_dimension)87*c217d954SCole Faust Hints &set_split_dimension(unsigned int split_dimension) 88*c217d954SCole Faust { 89*c217d954SCole Faust _split_dimension = split_dimension; 90*c217d954SCole Faust return *this; 91*c217d954SCole Faust } 92*c217d954SCole Faust /** Return the prefered split dimension 93*c217d954SCole Faust * 94*c217d954SCole Faust * @return The split dimension 95*c217d954SCole Faust */ split_dimension()96*c217d954SCole Faust unsigned int split_dimension() const 97*c217d954SCole Faust { 98*c217d954SCole Faust return _split_dimension; 99*c217d954SCole Faust } 100*c217d954SCole Faust 101*c217d954SCole Faust /** Set the strategy hint 102*c217d954SCole Faust * 103*c217d954SCole Faust * @param[in] strategy Prefered strategy to use to split the workload 104*c217d954SCole Faust * 105*c217d954SCole Faust * @return the Hints object 106*c217d954SCole Faust */ set_strategy(StrategyHint strategy)107*c217d954SCole Faust Hints &set_strategy(StrategyHint strategy) 108*c217d954SCole Faust { 109*c217d954SCole Faust _strategy = strategy; 110*c217d954SCole Faust return *this; 111*c217d954SCole Faust } 112*c217d954SCole Faust /** Return the prefered strategy to use to split workload. 113*c217d954SCole Faust * 114*c217d954SCole Faust * @return The strategy 115*c217d954SCole Faust */ strategy()116*c217d954SCole Faust StrategyHint strategy() const 117*c217d954SCole Faust { 118*c217d954SCole Faust return _strategy; 119*c217d954SCole Faust } 120*c217d954SCole Faust /** Return the granule capping threshold to be used by dynamic scheduling. 121*c217d954SCole Faust * 122*c217d954SCole Faust * @return The capping threshold 123*c217d954SCole Faust */ threshold()124*c217d954SCole Faust int threshold() const 125*c217d954SCole Faust { 126*c217d954SCole Faust return _threshold; 127*c217d954SCole Faust } 128*c217d954SCole Faust 129*c217d954SCole Faust private: 130*c217d954SCole Faust unsigned int _split_dimension{}; 131*c217d954SCole Faust StrategyHint _strategy{}; 132*c217d954SCole Faust int _threshold{}; 133*c217d954SCole Faust }; 134*c217d954SCole Faust /** Signature for the workloads to execute */ 135*c217d954SCole Faust using Workload = std::function<void(const ThreadInfo &)>; 136*c217d954SCole Faust /** Default constructor. */ 137*c217d954SCole Faust IScheduler(); 138*c217d954SCole Faust 139*c217d954SCole Faust /** Destructor. */ 140*c217d954SCole Faust virtual ~IScheduler() = default; 141*c217d954SCole Faust 142*c217d954SCole Faust /** Sets the number of threads the scheduler will use to run the kernels. 143*c217d954SCole Faust * 144*c217d954SCole Faust * @param[in] num_threads If set to 0, then one thread per CPU core available on the system will be used, otherwise the number of threads specified. 145*c217d954SCole Faust */ 146*c217d954SCole Faust virtual void set_num_threads(unsigned int num_threads) = 0; 147*c217d954SCole Faust 148*c217d954SCole Faust /** Sets the number of threads the scheduler will use to run the kernels but also using a binding function to pin the threads to given logical cores 149*c217d954SCole Faust * 150*c217d954SCole Faust * @param[in] num_threads If set to 0, then one thread per CPU core available on the system will be used, otherwise the number of threads specified. 151*c217d954SCole Faust * @param[in] func Binding function to use. 152*c217d954SCole Faust */ 153*c217d954SCole Faust virtual void set_num_threads_with_affinity(unsigned int num_threads, BindFunc func); 154*c217d954SCole Faust 155*c217d954SCole Faust /** Returns the number of threads that the SingleThreadScheduler has in its pool. 156*c217d954SCole Faust * 157*c217d954SCole Faust * @return Number of threads available in SingleThreadScheduler. 158*c217d954SCole Faust */ 159*c217d954SCole Faust virtual unsigned int num_threads() const = 0; 160*c217d954SCole Faust 161*c217d954SCole Faust /** Runs the kernel in the same thread as the caller synchronously. 162*c217d954SCole Faust * 163*c217d954SCole Faust * @param[in] kernel Kernel to execute. 164*c217d954SCole Faust * @param[in] hints Hints for the scheduler. 165*c217d954SCole Faust */ 166*c217d954SCole Faust virtual void schedule(ICPPKernel *kernel, const Hints &hints) = 0; 167*c217d954SCole Faust 168*c217d954SCole Faust /** Runs the kernel in the same thread as the caller synchronously. 169*c217d954SCole Faust * 170*c217d954SCole Faust * @param[in] kernel Kernel to execute. 171*c217d954SCole Faust * @param[in] hints Hints for the scheduler. 172*c217d954SCole Faust * @param[in] window Window to use for kernel execution. 173*c217d954SCole Faust * @param[in] tensors Vector containing the tensors to operate on. 174*c217d954SCole Faust */ 175*c217d954SCole Faust virtual void schedule_op(ICPPKernel *kernel, const Hints &hints, const Window &window, ITensorPack &tensors) = 0; 176*c217d954SCole Faust 177*c217d954SCole Faust /** Execute all the passed workloads 178*c217d954SCole Faust * 179*c217d954SCole Faust * @note there is no guarantee regarding the order in which the workloads will be executed or whether or not they will be executed in parallel. 180*c217d954SCole Faust * 181*c217d954SCole Faust * @param[in] workloads Array of workloads to run 182*c217d954SCole Faust * @param[in] tag String that can be used by profiling tools to identify the workloads run by the scheduler (Can be null). 183*c217d954SCole Faust */ 184*c217d954SCole Faust virtual void run_tagged_workloads(std::vector<Workload> &workloads, const char *tag); 185*c217d954SCole Faust 186*c217d954SCole Faust /** Get CPU info. 187*c217d954SCole Faust * 188*c217d954SCole Faust * @return CPU info. 189*c217d954SCole Faust */ 190*c217d954SCole Faust CPUInfo &cpu_info(); 191*c217d954SCole Faust /** Get a hint for the best possible number of execution threads 192*c217d954SCole Faust * 193*c217d954SCole Faust * @warning In case we can't work out the best number of threads, 194*c217d954SCole Faust * std::thread::hardware_concurrency() is returned else 1 in case of bare metal builds 195*c217d954SCole Faust * 196*c217d954SCole Faust * @return Best possible number of execution threads to use 197*c217d954SCole Faust */ 198*c217d954SCole Faust unsigned int num_threads_hint() const; 199*c217d954SCole Faust 200*c217d954SCole Faust protected: 201*c217d954SCole Faust /** Execute all the passed workloads 202*c217d954SCole Faust * 203*c217d954SCole Faust * @note there is no guarantee regarding the order in which the workloads will be executed or whether or not they will be executed in parallel. 204*c217d954SCole Faust * 205*c217d954SCole Faust * @param[in] workloads Array of workloads to run 206*c217d954SCole Faust */ 207*c217d954SCole Faust virtual void run_workloads(std::vector<Workload> &workloads) = 0; 208*c217d954SCole Faust 209*c217d954SCole Faust /** Common scheduler logic to execute the given kernel 210*c217d954SCole Faust * 211*c217d954SCole Faust * @param[in] kernel Kernel to execute. 212*c217d954SCole Faust * @param[in] hints Hints for the scheduler. 213*c217d954SCole Faust * @param[in] window Window to use for kernel execution. 214*c217d954SCole Faust * @param[in] tensors Vector containing the tensors to operate on. 215*c217d954SCole Faust */ 216*c217d954SCole Faust void schedule_common(ICPPKernel *kernel, const Hints &hints, const Window &window, ITensorPack &tensors); 217*c217d954SCole Faust 218*c217d954SCole Faust /** Adjust the number of windows to the optimize performance 219*c217d954SCole Faust * (used for small workloads where smaller number of threads might improve the performance) 220*c217d954SCole Faust * 221*c217d954SCole Faust * @param[in] window Window to use for kernel execution 222*c217d954SCole Faust * @param[in] split_dimension Axis of dimension to split 223*c217d954SCole Faust * @param[in] init_num_windows Initial number of sub-windows to split 224*c217d954SCole Faust * @param[in] kernel Kernel to execute 225*c217d954SCole Faust * @param[in] cpu_info The CPU platform used to create the context. 226*c217d954SCole Faust * 227*c217d954SCole Faust * @return Adjusted number of windows 228*c217d954SCole Faust */ 229*c217d954SCole Faust std::size_t adjust_num_of_windows(const Window &window, std::size_t split_dimension, std::size_t init_num_windows, const ICPPKernel &kernel, const CPUInfo &cpu_info); 230*c217d954SCole Faust 231*c217d954SCole Faust private: 232*c217d954SCole Faust unsigned int _num_threads_hint = {}; 233*c217d954SCole Faust }; 234*c217d954SCole Faust } // namespace arm_compute 235*c217d954SCole Faust #endif /* ARM_COMPUTE_ISCHEDULER_H */ 236