1 // 2 // Copyright © 2020 Arm Ltd. All rights reserved. 3 // SPDX-License-Identifier: MIT 4 // 5 6 #pragma once 7 8 #include "ArmnnDriver.hpp" 9 #include "ArmnnDriverImpl.hpp" 10 #include "RequestThread_1_3.hpp" 11 #include "ModelToINetworkConverter.hpp" 12 13 #include <NeuralNetworks.h> 14 #include <armnn/ArmNN.hpp> 15 #include <armnn/Threadpool.hpp> 16 17 18 #include <string> 19 #include <vector> 20 21 namespace armnn_driver 22 { 23 using CallbackAsync_1_3 = std::function< 24 void(V1_3::ErrorStatus errorStatus, 25 std::vector<::android::hardware::neuralnetworks::V1_2::OutputShape> outputShapes, 26 const ::android::hardware::neuralnetworks::V1_2::Timing& timing, 27 std::string callingFunction)>; 28 29 struct ExecutionContext_1_3 30 { 31 ::android::hardware::neuralnetworks::V1_2::MeasureTiming measureTimings = 32 ::android::hardware::neuralnetworks::V1_2::MeasureTiming::NO; 33 TimePoint driverStart; 34 TimePoint driverEnd; 35 TimePoint deviceStart; 36 TimePoint deviceEnd; 37 }; 38 39 using CallbackContext_1_3 = CallbackContext<CallbackAsync_1_3, ExecutionContext_1_3>; 40 41 using executeFenced_cb = std::function<void(::android::hardware::neuralnetworks::V1_3::ErrorStatus status, 42 const ::android::hardware::hidl_handle& syncFence, 43 const ::android::sp<::android::hardware::neuralnetworks::V1_3::IFencedExecutionCallback>& callback)>; 44 45 template <typename HalVersion> 46 class ArmnnPreparedModel_1_3 : public V1_3::IPreparedModel 47 { 48 public: 49 using HalModel = typename V1_3::Model; 50 51 ArmnnPreparedModel_1_3(armnn::NetworkId networkId, 52 armnn::IRuntime* runtime, 53 const HalModel& model, 54 const std::string& requestInputsAndOutputsDumpDir, 55 const bool gpuProfilingEnabled, 56 V1_3::Priority priority = V1_3::Priority::MEDIUM, 57 const bool asyncModelExecutionEnabled = false, 58 const unsigned int numberOfThreads = 1, 59 const bool importEnabled = false, 60 const bool exportEnabled = false); 61 62 ArmnnPreparedModel_1_3(armnn::NetworkId networkId, 63 armnn::IRuntime* runtime, 64 const std::string& requestInputsAndOutputsDumpDir, 65 const bool gpuProfilingEnabled, 66 V1_3::Priority priority = V1_3::Priority::MEDIUM, 67 const bool asyncModelExecutionEnabled = false, 68 const unsigned int numberOfThreads = 1, 69 const bool importEnabled = false, 70 const bool exportEnabled = false, 71 const bool preparedFromCache = false); 72 73 virtual ~ArmnnPreparedModel_1_3(); 74 75 Return<V1_0::ErrorStatus> execute(const V1_0::Request& request, 76 const ::android::sp<V1_0::IExecutionCallback>& callback) override; 77 78 Return<V1_0::ErrorStatus> execute_1_2(const V1_0::Request& request, V1_2::MeasureTiming measure, 79 const ::android::sp<V1_2::IExecutionCallback>& callback) override; 80 81 Return<V1_3::ErrorStatus> execute_1_3(const V1_3::Request& request, 82 V1_2::MeasureTiming measure, 83 const V1_3::OptionalTimePoint&, 84 const V1_3::OptionalTimeoutDuration&, 85 const ::android::sp<V1_3::IExecutionCallback>& callback) override; 86 87 Return<void> executeSynchronously(const V1_0::Request &request, 88 V1_2::MeasureTiming measure, 89 V1_3::IPreparedModel::executeSynchronously_cb cb) override; 90 91 Return<void> executeSynchronously_1_3(const V1_3::Request &request, 92 V1_2::MeasureTiming measure, 93 const V1_3::OptionalTimePoint& deadline, 94 const V1_3::OptionalTimeoutDuration& loopTimeoutDuration, 95 V1_3::IPreparedModel::executeSynchronously_1_3_cb cb) override; 96 97 Return<void> executeFenced(const V1_3::Request& request, 98 const android::hardware::hidl_vec<android::hardware::hidl_handle>& fenceWaitFor, 99 V1_2::MeasureTiming measure, 100 const V1_3::OptionalTimePoint& deadline, 101 const V1_3::OptionalTimeoutDuration& loopTimeoutDuration, 102 const V1_3::OptionalTimeoutDuration& duration, 103 executeFenced_cb callback) override; 104 105 Return<void> configureExecutionBurst( 106 const ::android::sp<V1_2::IBurstCallback>& callback, 107 const android::hardware::MQDescriptorSync<V1_2::FmqRequestDatum>& requestChannel, 108 const android::hardware::MQDescriptorSync<V1_2::FmqResultDatum>& resultChannel, 109 configureExecutionBurst_cb cb) override; 110 111 template<typename CallbackContext> 112 Return<void> ExecuteSynchronously(const V1_3::Request& request, CallbackContext cbCtx); 113 114 /// execute the graph prepared from the request 115 template<typename CallbackContext> 116 Return <V1_3::ErrorStatus> ExecuteGraph( 117 std::shared_ptr<std::vector<::android::nn::RunTimePoolInfo>>& pMemPools, 118 armnn::InputTensors& inputTensors, 119 armnn::OutputTensors& outputTensors, 120 CallbackContext callback); 121 122 /// Executes this model with dummy inputs (e.g. all zeroes). 123 /// \return false on failure, otherwise true 124 bool ExecuteWithDummyInputs(unsigned int numInputs, unsigned int numOutputs); 125 126 V1_3::Priority GetModelPriority(); 127 128 private: 129 130 template<typename CallbackContext> 131 class ArmnnThreadPoolCallback_1_3 : public armnn::IAsyncExecutionCallback 132 { 133 public: ArmnnThreadPoolCallback_1_3(ArmnnPreparedModel_1_3<HalVersion> * model,std::shared_ptr<std::vector<::android::nn::RunTimePoolInfo>> & pMemPools,std::vector<V1_2::OutputShape> outputShapes,std::shared_ptr<armnn::InputTensors> & inputTensors,std::shared_ptr<armnn::OutputTensors> & outputTensors,CallbackContext callbackContext)134 ArmnnThreadPoolCallback_1_3(ArmnnPreparedModel_1_3<HalVersion>* model, 135 std::shared_ptr<std::vector<::android::nn::RunTimePoolInfo>>& pMemPools, 136 std::vector<V1_2::OutputShape> outputShapes, 137 std::shared_ptr<armnn::InputTensors>& inputTensors, 138 std::shared_ptr<armnn::OutputTensors>& outputTensors, 139 CallbackContext callbackContext) : 140 m_Model(model), 141 m_MemPools(pMemPools), 142 m_OutputShapes(outputShapes), 143 m_InputTensors(inputTensors), 144 m_OutputTensors(outputTensors), 145 m_CallbackContext(callbackContext) 146 {} 147 148 void Notify(armnn::Status status, armnn::InferenceTimingPair timeTaken) override; 149 150 ArmnnPreparedModel_1_3<HalVersion>* m_Model; 151 std::shared_ptr<std::vector<::android::nn::RunTimePoolInfo>> m_MemPools; 152 std::vector<V1_2::OutputShape> m_OutputShapes; 153 std::shared_ptr<armnn::InputTensors> m_InputTensors; 154 std::shared_ptr<armnn::OutputTensors> m_OutputTensors; 155 CallbackContext m_CallbackContext; 156 }; 157 158 Return <V1_3::ErrorStatus> Execute(const V1_3::Request& request, 159 V1_2::MeasureTiming measureTiming, 160 CallbackAsync_1_3 callback); 161 162 Return<V1_3::ErrorStatus> PrepareMemoryForInputs( 163 armnn::InputTensors& inputs, 164 const V1_3::Request& request, 165 const std::vector<android::nn::RunTimePoolInfo>& memPools); 166 167 Return<V1_3::ErrorStatus> PrepareMemoryForOutputs( 168 armnn::OutputTensors& outputs, 169 std::vector<V1_2::OutputShape> &outputShapes, 170 const V1_3::Request& request, 171 const std::vector<android::nn::RunTimePoolInfo>& memPools); 172 173 std::tuple<V1_3::ErrorStatus, hidl_vec<V1_2::OutputShape>, V1_2::Timing, std::string> PrepareMemoryForIO( 174 armnn::InputTensors& inputs, 175 armnn::OutputTensors& outputs, 176 std::vector<android::nn::RunTimePoolInfo>& memPools, 177 const V1_3::Request& request); 178 179 template <typename TensorBindingCollection> 180 void DumpTensorsIfRequired(char const* tensorNamePrefix, const TensorBindingCollection& tensorBindings); 181 182 /// schedule the graph prepared from the request for execution 183 template<typename CallbackContext> 184 void ScheduleGraphForExecution( 185 std::shared_ptr<std::vector<::android::nn::RunTimePoolInfo>>& pMemPools, 186 std::shared_ptr<armnn::InputTensors>& inputTensors, 187 std::shared_ptr<armnn::OutputTensors>& outputTensors, 188 CallbackContext m_CallbackContext, 189 armnn::QosExecPriority priority); 190 191 armnn::NetworkId m_NetworkId; 192 armnn::IRuntime* m_Runtime; 193 V1_3::Model m_Model; 194 // There must be a single RequestThread for all ArmnnPreparedModel objects to ensure serial execution of workloads 195 // It is specific to this class, so it is declared as static here 196 static RequestThread_1_3<ArmnnPreparedModel_1_3, 197 HalVersion, 198 CallbackContext_1_3> m_RequestThread; 199 uint32_t m_RequestCount; 200 const std::string& m_RequestInputsAndOutputsDumpDir; 201 const bool m_GpuProfilingEnabled; 202 V1_3::Priority m_ModelPriority; 203 204 // Static to allow sharing of threadpool between ArmnnPreparedModel instances 205 static std::unique_ptr<armnn::Threadpool> m_Threadpool; 206 std::shared_ptr<IWorkingMemHandle> m_WorkingMemHandle; 207 const bool m_AsyncModelExecutionEnabled; 208 const bool m_EnableImport; 209 const bool m_EnableExport; 210 const bool m_PreparedFromCache; 211 }; 212 213 } 214