1*3e777be0SXin Li //
2*3e777be0SXin Li // Copyright © 2017-2023 Arm Ltd and Contributors. All rights reserved.
3*3e777be0SXin Li // SPDX-License-Identifier: MIT
4*3e777be0SXin Li //
5*3e777be0SXin Li
6*3e777be0SXin Li #define LOG_TAG "ArmnnDriver"
7*3e777be0SXin Li
8*3e777be0SXin Li #include "ArmnnPreparedModel.hpp"
9*3e777be0SXin Li #include "Utils.hpp"
10*3e777be0SXin Li
11*3e777be0SXin Li #include <armnn/Types.hpp>
12*3e777be0SXin Li
13*3e777be0SXin Li #include <log/log.h>
14*3e777be0SXin Li #include <OperationsUtils.h>
15*3e777be0SXin Li #include <ValidateHal.h>
16*3e777be0SXin Li
17*3e777be0SXin Li #include <chrono>
18*3e777be0SXin Li #include <cinttypes>
19*3e777be0SXin Li
20*3e777be0SXin Li #ifdef ARMNN_ANDROID_S
21*3e777be0SXin Li #include <LegacyUtils.h>
22*3e777be0SXin Li #endif
23*3e777be0SXin Li
24*3e777be0SXin Li using namespace android;
25*3e777be0SXin Li
26*3e777be0SXin Li namespace
27*3e777be0SXin Li {
28*3e777be0SXin Li using namespace armnn_driver;
29*3e777be0SXin Li
NotifyCallbackAndCheck(const::android::sp<V1_0::IExecutionCallback> & callback,V1_0::ErrorStatus errorStatus,std::string callingFunction)30*3e777be0SXin Li void NotifyCallbackAndCheck(const ::android::sp<V1_0::IExecutionCallback>& callback, V1_0::ErrorStatus errorStatus,
31*3e777be0SXin Li std::string callingFunction)
32*3e777be0SXin Li {
33*3e777be0SXin Li Return<void> returned = callback->notify(errorStatus);
34*3e777be0SXin Li // This check is required, if the callback fails and it isn't checked it will bring down the service
35*3e777be0SXin Li if (!returned.isOk())
36*3e777be0SXin Li {
37*3e777be0SXin Li ALOGE("ArmnnDriver::%s: hidl callback failed to return properly: %s",
38*3e777be0SXin Li callingFunction.c_str(), returned.description().c_str());
39*3e777be0SXin Li }
40*3e777be0SXin Li }
41*3e777be0SXin Li
ValidateRequestArgument(const V1_0::RequestArgument & requestArg,const armnn::TensorInfo & tensorInfo)42*3e777be0SXin Li bool ValidateRequestArgument(const V1_0::RequestArgument& requestArg, const armnn::TensorInfo& tensorInfo)
43*3e777be0SXin Li {
44*3e777be0SXin Li if (requestArg.dimensions.size() != 0)
45*3e777be0SXin Li {
46*3e777be0SXin Li if (requestArg.dimensions.size() != tensorInfo.GetNumDimensions())
47*3e777be0SXin Li {
48*3e777be0SXin Li ALOGE("Mismatched dimensions (request argument: %zu, expected: %u)",
49*3e777be0SXin Li requestArg.dimensions.size(), tensorInfo.GetNumDimensions());
50*3e777be0SXin Li return false;
51*3e777be0SXin Li }
52*3e777be0SXin Li
53*3e777be0SXin Li for (unsigned int d = 0; d < tensorInfo.GetNumDimensions(); ++d)
54*3e777be0SXin Li {
55*3e777be0SXin Li if (requestArg.dimensions[d] != 0 && requestArg.dimensions[d] != tensorInfo.GetShape()[d])
56*3e777be0SXin Li {
57*3e777be0SXin Li ALOGE("Mismatched size for dimension %d (request argument: %u, expected %u)",
58*3e777be0SXin Li d, requestArg.dimensions[d], tensorInfo.GetShape()[d]);
59*3e777be0SXin Li return false;
60*3e777be0SXin Li }
61*3e777be0SXin Li }
62*3e777be0SXin Li }
63*3e777be0SXin Li
64*3e777be0SXin Li return true;
65*3e777be0SXin Li }
66*3e777be0SXin Li
GetTensorForRequestArgument(const V1_0::RequestArgument & requestArg,const armnn::TensorInfo & tensorInfo,const std::vector<::android::nn::RunTimePoolInfo> & requestPools)67*3e777be0SXin Li armnn::Tensor GetTensorForRequestArgument(const V1_0::RequestArgument& requestArg,
68*3e777be0SXin Li const armnn::TensorInfo& tensorInfo,
69*3e777be0SXin Li const std::vector<::android::nn::RunTimePoolInfo>& requestPools)
70*3e777be0SXin Li {
71*3e777be0SXin Li if (!ValidateRequestArgument(requestArg, tensorInfo))
72*3e777be0SXin Li {
73*3e777be0SXin Li return armnn::Tensor();
74*3e777be0SXin Li }
75*3e777be0SXin Li
76*3e777be0SXin Li return armnn::Tensor(tensorInfo, GetMemoryFromPool(requestArg.location, requestPools));
77*3e777be0SXin Li }
78*3e777be0SXin Li
BuildTensorName(const char * tensorNamePrefix,std::size_t index)79*3e777be0SXin Li inline std::string BuildTensorName(const char* tensorNamePrefix, std::size_t index)
80*3e777be0SXin Li {
81*3e777be0SXin Li return tensorNamePrefix + std::to_string(index);
82*3e777be0SXin Li }
83*3e777be0SXin Li
84*3e777be0SXin Li } // anonymous namespace
85*3e777be0SXin Li
86*3e777be0SXin Li using namespace android::hardware;
87*3e777be0SXin Li
88*3e777be0SXin Li namespace armnn_driver
89*3e777be0SXin Li {
90*3e777be0SXin Li template<typename HalVersion>
91*3e777be0SXin Li RequestThread<ArmnnPreparedModel, HalVersion, CallbackContext_1_0>
92*3e777be0SXin Li ArmnnPreparedModel<HalVersion>::m_RequestThread;
93*3e777be0SXin Li
94*3e777be0SXin Li template<typename HalVersion>
95*3e777be0SXin Li std::unique_ptr<armnn::Threadpool> ArmnnPreparedModel<HalVersion>::m_Threadpool(nullptr);
96*3e777be0SXin Li
97*3e777be0SXin Li template<typename HalVersion>
98*3e777be0SXin Li template <typename TensorBindingCollection>
DumpTensorsIfRequired(char const * tensorNamePrefix,const TensorBindingCollection & tensorBindings)99*3e777be0SXin Li void ArmnnPreparedModel<HalVersion>::DumpTensorsIfRequired(char const* tensorNamePrefix,
100*3e777be0SXin Li const TensorBindingCollection& tensorBindings)
101*3e777be0SXin Li {
102*3e777be0SXin Li if (!m_RequestInputsAndOutputsDumpDir.empty())
103*3e777be0SXin Li {
104*3e777be0SXin Li const std::string requestName = std::to_string(m_NetworkId) + "_" + std::to_string(m_RequestCount) + ".dump";
105*3e777be0SXin Li for (std::size_t i = 0u; i < tensorBindings.size(); ++i)
106*3e777be0SXin Li {
107*3e777be0SXin Li DumpTensor(m_RequestInputsAndOutputsDumpDir,
108*3e777be0SXin Li requestName,
109*3e777be0SXin Li BuildTensorName(tensorNamePrefix, i),
110*3e777be0SXin Li tensorBindings[i].second);
111*3e777be0SXin Li }
112*3e777be0SXin Li }
113*3e777be0SXin Li }
114*3e777be0SXin Li
115*3e777be0SXin Li template<typename HalVersion>
ArmnnPreparedModel(armnn::NetworkId networkId,armnn::IRuntime * runtime,const HalModel & model,const std::string & requestInputsAndOutputsDumpDir,const bool gpuProfilingEnabled,const bool asyncModelExecutionEnabled,const unsigned int numberOfThreads,const bool importEnabled,const bool exportEnabled)116*3e777be0SXin Li ArmnnPreparedModel<HalVersion>::ArmnnPreparedModel(armnn::NetworkId networkId,
117*3e777be0SXin Li armnn::IRuntime* runtime,
118*3e777be0SXin Li const HalModel& model,
119*3e777be0SXin Li const std::string& requestInputsAndOutputsDumpDir,
120*3e777be0SXin Li const bool gpuProfilingEnabled,
121*3e777be0SXin Li const bool asyncModelExecutionEnabled,
122*3e777be0SXin Li const unsigned int numberOfThreads,
123*3e777be0SXin Li const bool importEnabled,
124*3e777be0SXin Li const bool exportEnabled)
125*3e777be0SXin Li : m_NetworkId(networkId)
126*3e777be0SXin Li , m_Runtime(runtime)
127*3e777be0SXin Li , m_Model(model)
128*3e777be0SXin Li , m_RequestCount(0)
129*3e777be0SXin Li , m_RequestInputsAndOutputsDumpDir(requestInputsAndOutputsDumpDir)
130*3e777be0SXin Li , m_GpuProfilingEnabled(gpuProfilingEnabled)
131*3e777be0SXin Li , m_AsyncModelExecutionEnabled(asyncModelExecutionEnabled)
132*3e777be0SXin Li , m_EnableImport(importEnabled)
133*3e777be0SXin Li , m_EnableExport(exportEnabled)
134*3e777be0SXin Li {
135*3e777be0SXin Li // Enable profiling if required.
136*3e777be0SXin Li m_Runtime->GetProfiler(m_NetworkId)->EnableProfiling(m_GpuProfilingEnabled);
137*3e777be0SXin Li
138*3e777be0SXin Li if (m_AsyncModelExecutionEnabled)
139*3e777be0SXin Li {
140*3e777be0SXin Li std::vector<std::shared_ptr<armnn::IWorkingMemHandle>> memHandles;
141*3e777be0SXin Li for (unsigned int i=0; i < numberOfThreads; ++i)
142*3e777be0SXin Li {
143*3e777be0SXin Li memHandles.emplace_back(m_Runtime->CreateWorkingMemHandle(networkId));
144*3e777be0SXin Li }
145*3e777be0SXin Li
146*3e777be0SXin Li if (!m_Threadpool)
147*3e777be0SXin Li {
148*3e777be0SXin Li m_Threadpool = std::make_unique<armnn::Threadpool>(numberOfThreads, runtime, memHandles);
149*3e777be0SXin Li }
150*3e777be0SXin Li else
151*3e777be0SXin Li {
152*3e777be0SXin Li m_Threadpool->LoadMemHandles(memHandles);
153*3e777be0SXin Li }
154*3e777be0SXin Li
155*3e777be0SXin Li m_WorkingMemHandle = memHandles.back();
156*3e777be0SXin Li }
157*3e777be0SXin Li }
158*3e777be0SXin Li
159*3e777be0SXin Li template<typename HalVersion>
~ArmnnPreparedModel()160*3e777be0SXin Li ArmnnPreparedModel<HalVersion>::~ArmnnPreparedModel()
161*3e777be0SXin Li {
162*3e777be0SXin Li // Get a hold of the profiler used by this model.
163*3e777be0SXin Li std::shared_ptr<armnn::IProfiler> profiler = m_Runtime->GetProfiler(m_NetworkId);
164*3e777be0SXin Li if (profiler && m_GpuProfilingEnabled)
165*3e777be0SXin Li {
166*3e777be0SXin Li // Dump the profiling info to a file if required.
167*3e777be0SXin Li DumpJsonProfilingIfRequired(m_GpuProfilingEnabled, m_RequestInputsAndOutputsDumpDir, m_NetworkId,
168*3e777be0SXin Li profiler.get());
169*3e777be0SXin Li }
170*3e777be0SXin Li
171*3e777be0SXin Li // Unload the network associated with this model.
172*3e777be0SXin Li m_Runtime->UnloadNetwork(m_NetworkId);
173*3e777be0SXin Li
174*3e777be0SXin Li // Unload the network memhandles from the threadpool
175*3e777be0SXin Li if (m_AsyncModelExecutionEnabled)
176*3e777be0SXin Li {
177*3e777be0SXin Li m_Threadpool->UnloadMemHandles(m_NetworkId);
178*3e777be0SXin Li }
179*3e777be0SXin Li }
180*3e777be0SXin Li
181*3e777be0SXin Li template<typename HalVersion>
execute(const V1_0::Request & request,const::android::sp<V1_0::IExecutionCallback> & callback)182*3e777be0SXin Li Return<V1_0::ErrorStatus> ArmnnPreparedModel<HalVersion>::execute(
183*3e777be0SXin Li const V1_0::Request& request,
184*3e777be0SXin Li const ::android::sp<V1_0::IExecutionCallback>& callback)
185*3e777be0SXin Li {
186*3e777be0SXin Li ALOGV("ArmnnPreparedModel::execute(): %s", GetModelSummary(m_Model).c_str());
187*3e777be0SXin Li m_RequestCount++;
188*3e777be0SXin Li
189*3e777be0SXin Li if (callback.get() == nullptr) {
190*3e777be0SXin Li ALOGE("ArmnnPreparedModel::execute invalid callback passed");
191*3e777be0SXin Li return V1_0::ErrorStatus::INVALID_ARGUMENT;
192*3e777be0SXin Li }
193*3e777be0SXin Li
194*3e777be0SXin Li if (!android::nn::validateRequest(request, m_Model))
195*3e777be0SXin Li {
196*3e777be0SXin Li NotifyCallbackAndCheck(callback, V1_0::ErrorStatus::INVALID_ARGUMENT, "ArmnnPreparedModel::execute");
197*3e777be0SXin Li return V1_0::ErrorStatus::INVALID_ARGUMENT;
198*3e777be0SXin Li }
199*3e777be0SXin Li
200*3e777be0SXin Li if (!m_RequestInputsAndOutputsDumpDir.empty())
201*3e777be0SXin Li {
202*3e777be0SXin Li ALOGD("Dumping inputs and outputs for request %" PRIuPTR, reinterpret_cast<std::uintptr_t>(callback.get()));
203*3e777be0SXin Li }
204*3e777be0SXin Li
205*3e777be0SXin Li // allocate the tensors on the heap, as they are passed to the request thread
206*3e777be0SXin Li auto pInputTensors = std::make_shared<armnn::InputTensors>();
207*3e777be0SXin Li auto pOutputTensors = std::make_shared<armnn::OutputTensors>();
208*3e777be0SXin Li
209*3e777be0SXin Li // map the memory pool into shared pointers
210*3e777be0SXin Li // use a shared memory pools vector on the heap, as it is passed to the request thread
211*3e777be0SXin Li auto pMemPools = std::make_shared<std::vector<android::nn::RunTimePoolInfo>>();
212*3e777be0SXin Li #if !defined(ARMNN_ANDROID_S)
213*3e777be0SXin Li if (!setRunTimePoolInfosFromHidlMemories(pMemPools.get(), request.pools))
214*3e777be0SXin Li #else
215*3e777be0SXin Li if (!setRunTimePoolInfosFromCanonicalMemories(pMemPools.get(), uncheckedConvert(request.pools)))
216*3e777be0SXin Li #endif
217*3e777be0SXin Li {
218*3e777be0SXin Li NotifyCallbackAndCheck(callback, V1_0::ErrorStatus::GENERAL_FAILURE, "ArmnnPreparedModel::execute");
219*3e777be0SXin Li return V1_0::ErrorStatus::GENERAL_FAILURE;
220*3e777be0SXin Li }
221*3e777be0SXin Li
222*3e777be0SXin Li // add the inputs and outputs with their data
223*3e777be0SXin Li try
224*3e777be0SXin Li {
225*3e777be0SXin Li pInputTensors->reserve(request.inputs.size());
226*3e777be0SXin Li for (unsigned int i = 0; i < request.inputs.size(); i++)
227*3e777be0SXin Li {
228*3e777be0SXin Li const auto& inputArg = request.inputs[i];
229*3e777be0SXin Li armnn::TensorInfo inputTensorInfo = m_Runtime->GetInputTensorInfo(m_NetworkId, i);
230*3e777be0SXin Li // pInputTensors (of type InputTensors) is composed of a vector of ConstTensors.
231*3e777be0SXin Li // Therefore, set all TensorInfo isConstant parameters of input Tensors to true.
232*3e777be0SXin Li inputTensorInfo.SetConstant();
233*3e777be0SXin Li auto result = ValidateRequestArgument<V1_0::ErrorStatus, V1_0::Request>(request,
234*3e777be0SXin Li inputTensorInfo,
235*3e777be0SXin Li inputArg,
236*3e777be0SXin Li "input");
237*3e777be0SXin Li if (result != V1_0::ErrorStatus::NONE)
238*3e777be0SXin Li {
239*3e777be0SXin Li return result;
240*3e777be0SXin Li }
241*3e777be0SXin Li
242*3e777be0SXin Li const armnn::Tensor inputTensor = GetTensorForRequestArgument(inputArg, inputTensorInfo, *pMemPools);
243*3e777be0SXin Li if (inputTensor.GetMemoryArea() == nullptr)
244*3e777be0SXin Li {
245*3e777be0SXin Li ALOGE("Cannot execute request. Error converting request input %u to tensor", i);
246*3e777be0SXin Li return V1_0::ErrorStatus::GENERAL_FAILURE;
247*3e777be0SXin Li }
248*3e777be0SXin Li
249*3e777be0SXin Li pInputTensors->emplace_back(i, inputTensor);
250*3e777be0SXin Li }
251*3e777be0SXin Li
252*3e777be0SXin Li pOutputTensors->reserve(request.outputs.size());
253*3e777be0SXin Li for (unsigned int i = 0; i < request.outputs.size(); i++)
254*3e777be0SXin Li {
255*3e777be0SXin Li const auto& outputArg = request.outputs[i];
256*3e777be0SXin Li const armnn::TensorInfo outputTensorInfo = m_Runtime->GetOutputTensorInfo(m_NetworkId, i);
257*3e777be0SXin Li auto result = ValidateRequestArgument<V1_0::ErrorStatus, V1_0::Request>(request,
258*3e777be0SXin Li outputTensorInfo,
259*3e777be0SXin Li outputArg,
260*3e777be0SXin Li "output");
261*3e777be0SXin Li
262*3e777be0SXin Li if (result != V1_0::ErrorStatus::NONE)
263*3e777be0SXin Li {
264*3e777be0SXin Li return result;
265*3e777be0SXin Li }
266*3e777be0SXin Li
267*3e777be0SXin Li const armnn::Tensor outputTensor = GetTensorForRequestArgument(outputArg, outputTensorInfo, *pMemPools);
268*3e777be0SXin Li if (outputTensor.GetMemoryArea() == nullptr)
269*3e777be0SXin Li {
270*3e777be0SXin Li ALOGE("Cannot execute request. Error converting request output %u to tensor", i);
271*3e777be0SXin Li return V1_0::ErrorStatus::GENERAL_FAILURE;
272*3e777be0SXin Li }
273*3e777be0SXin Li
274*3e777be0SXin Li pOutputTensors->emplace_back(i, outputTensor);
275*3e777be0SXin Li }
276*3e777be0SXin Li }
277*3e777be0SXin Li catch (armnn::Exception& e)
278*3e777be0SXin Li {
279*3e777be0SXin Li ALOGW("armnn::Exception caught while preparing for EnqueueWorkload: %s", e.what());
280*3e777be0SXin Li NotifyCallbackAndCheck(callback, V1_0::ErrorStatus::GENERAL_FAILURE, "ArmnnPreparedModel::execute");
281*3e777be0SXin Li return V1_0::ErrorStatus::GENERAL_FAILURE;
282*3e777be0SXin Li }
283*3e777be0SXin Li catch (std::exception& e)
284*3e777be0SXin Li {
285*3e777be0SXin Li ALOGE("std::exception caught while preparing for EnqueueWorkload: %s", e.what());
286*3e777be0SXin Li NotifyCallbackAndCheck(callback, V1_0::ErrorStatus::GENERAL_FAILURE, "ArmnnPreparedModel::execute");
287*3e777be0SXin Li return V1_0::ErrorStatus::GENERAL_FAILURE;
288*3e777be0SXin Li }
289*3e777be0SXin Li
290*3e777be0SXin Li auto cb = [callback](V1_0::ErrorStatus errorStatus, std::string callingFunction)
291*3e777be0SXin Li {
292*3e777be0SXin Li NotifyCallbackAndCheck(callback, errorStatus, callingFunction);
293*3e777be0SXin Li };
294*3e777be0SXin Li
295*3e777be0SXin Li CallbackContext_1_0 armnnCb;
296*3e777be0SXin Li armnnCb.callback = cb;
297*3e777be0SXin Li
298*3e777be0SXin Li if (m_AsyncModelExecutionEnabled)
299*3e777be0SXin Li {
300*3e777be0SXin Li ALOGV("ArmnnPreparedModel::execute(...) before ScheduleGraphForExecution");
301*3e777be0SXin Li ScheduleGraphForExecution(pMemPools, pInputTensors, pOutputTensors, armnnCb);
302*3e777be0SXin Li ALOGV("ArmnnPreparedModel::execute(...) after ScheduleGraphForExecution");
303*3e777be0SXin Li return V1_0::ErrorStatus::NONE;
304*3e777be0SXin Li }
305*3e777be0SXin Li
306*3e777be0SXin Li // post the request for asynchronous execution
307*3e777be0SXin Li ALOGV("ArmnnPreparedModel::execute(...) before PostMsg");
308*3e777be0SXin Li m_RequestThread.PostMsg(this, pMemPools, pInputTensors, pOutputTensors, armnnCb);
309*3e777be0SXin Li ALOGV("ArmnnPreparedModel::execute(...) after PostMsg");
310*3e777be0SXin Li return V1_0::ErrorStatus::NONE; // successfully queued
311*3e777be0SXin Li }
312*3e777be0SXin Li
313*3e777be0SXin Li template<typename HalVersion>
ExecuteGraph(std::shared_ptr<std::vector<::android::nn::RunTimePoolInfo>> & pMemPools,armnn::InputTensors & inputTensors,armnn::OutputTensors & outputTensors,CallbackContext_1_0 cb)314*3e777be0SXin Li void ArmnnPreparedModel<HalVersion>::ExecuteGraph(
315*3e777be0SXin Li std::shared_ptr<std::vector<::android::nn::RunTimePoolInfo>>& pMemPools,
316*3e777be0SXin Li armnn::InputTensors& inputTensors,
317*3e777be0SXin Li armnn::OutputTensors& outputTensors,
318*3e777be0SXin Li CallbackContext_1_0 cb)
319*3e777be0SXin Li {
320*3e777be0SXin Li ALOGV("ArmnnPreparedModel::ExecuteGraph(...)");
321*3e777be0SXin Li // Capture the graph execution start time.
322*3e777be0SXin Li std::chrono::time_point<std::chrono::system_clock> graphExecutionStart = std::chrono::system_clock::now();
323*3e777be0SXin Li
324*3e777be0SXin Li DumpTensorsIfRequired("Input", inputTensors);
325*3e777be0SXin Li
326*3e777be0SXin Li // run it
327*3e777be0SXin Li try
328*3e777be0SXin Li {
329*3e777be0SXin Li armnn::Status status;
330*3e777be0SXin Li if (m_AsyncModelExecutionEnabled)
331*3e777be0SXin Li {
332*3e777be0SXin Li ALOGW("ArmnnPreparedModel::ExecuteGraph m_AsyncModelExecutionEnabled true");
333*3e777be0SXin Li status = m_Runtime->Execute(*m_WorkingMemHandle, inputTensors, outputTensors);
334*3e777be0SXin Li }
335*3e777be0SXin Li else
336*3e777be0SXin Li {
337*3e777be0SXin Li ALOGW("ArmnnPreparedModel::ExecuteGraph m_AsyncModelExecutionEnabled false");
338*3e777be0SXin Li // Create a vector of Input and Output Ids which can be imported. An empty vector means all will be copied.
339*3e777be0SXin Li std::vector<armnn::ImportedInputId> importedInputIds;
340*3e777be0SXin Li if (m_EnableImport)
341*3e777be0SXin Li {
342*3e777be0SXin Li importedInputIds = m_Runtime->ImportInputs(m_NetworkId, inputTensors, armnn::MemorySource::Malloc);
343*3e777be0SXin Li }
344*3e777be0SXin Li std::vector<armnn::ImportedOutputId> importedOutputIds;
345*3e777be0SXin Li if (m_EnableExport)
346*3e777be0SXin Li {
347*3e777be0SXin Li importedOutputIds = m_Runtime->ImportOutputs(m_NetworkId, outputTensors, armnn::MemorySource::Malloc);
348*3e777be0SXin Li }
349*3e777be0SXin Li status = m_Runtime->EnqueueWorkload(m_NetworkId, inputTensors, outputTensors,
350*3e777be0SXin Li importedInputIds, importedOutputIds);
351*3e777be0SXin Li }
352*3e777be0SXin Li if (status != armnn::Status::Success)
353*3e777be0SXin Li {
354*3e777be0SXin Li ALOGW("EnqueueWorkload failed");
355*3e777be0SXin Li cb.callback(V1_0::ErrorStatus::GENERAL_FAILURE, "ArmnnPreparedModel::ExecuteGraph");
356*3e777be0SXin Li return;
357*3e777be0SXin Li }
358*3e777be0SXin Li }
359*3e777be0SXin Li catch (armnn::Exception& e)
360*3e777be0SXin Li {
361*3e777be0SXin Li ALOGW("armnn::Exception caught from EnqueueWorkload: %s", e.what());
362*3e777be0SXin Li cb.callback(V1_0::ErrorStatus::GENERAL_FAILURE, "ArmnnPreparedModel::ExecuteGraph");
363*3e777be0SXin Li return;
364*3e777be0SXin Li }
365*3e777be0SXin Li catch (std::exception& e)
366*3e777be0SXin Li {
367*3e777be0SXin Li ALOGE("std::exception caught from EnqueueWorkload: %s", e.what());
368*3e777be0SXin Li cb.callback(V1_0::ErrorStatus::GENERAL_FAILURE, "ArmnnPreparedModel::ExecuteGraph");
369*3e777be0SXin Li return;
370*3e777be0SXin Li }
371*3e777be0SXin Li
372*3e777be0SXin Li DumpTensorsIfRequired("Output", outputTensors);
373*3e777be0SXin Li
374*3e777be0SXin Li // Commit output buffers.
375*3e777be0SXin Li // Note that we update *all* pools, even if they aren't actually used as outputs -
376*3e777be0SXin Li // this is simpler and is what the CpuExecutor does.
377*3e777be0SXin Li for (android::nn::RunTimePoolInfo& pool : *pMemPools)
378*3e777be0SXin Li {
379*3e777be0SXin Li // Type android::nn::RunTimePoolInfo has changed between Android P & Q and Android R, where
380*3e777be0SXin Li // update() has been removed and flush() added.
381*3e777be0SXin Li #if defined(ARMNN_ANDROID_R) || defined(ARMNN_ANDROID_S) // Use the new Android implementation.
382*3e777be0SXin Li pool.flush();
383*3e777be0SXin Li #else
384*3e777be0SXin Li pool.update();
385*3e777be0SXin Li #endif
386*3e777be0SXin Li }
387*3e777be0SXin Li
388*3e777be0SXin Li // Log the total time in this call. This is a good number to compare to that printed out by
389*3e777be0SXin Li // RuntimeImpl::EnqueueWorkload. The difference should be the execution overhead of the driver.
390*3e777be0SXin Li ALOGI("ArmnnPreparedModel::ExecuteGraph Execution time = %lld µs",
391*3e777be0SXin Li std::chrono::duration_cast<std::chrono::microseconds>
392*3e777be0SXin Li (std::chrono::system_clock::now() - graphExecutionStart).count());
393*3e777be0SXin Li
394*3e777be0SXin Li cb.callback(V1_0::ErrorStatus::NONE, "ExecuteGraph");
395*3e777be0SXin Li }
396*3e777be0SXin Li
397*3e777be0SXin Li template<typename HalVersion>
ExecuteWithDummyInputs()398*3e777be0SXin Li bool ArmnnPreparedModel<HalVersion>::ExecuteWithDummyInputs()
399*3e777be0SXin Li {
400*3e777be0SXin Li std::vector<std::vector<char>> storage;
401*3e777be0SXin Li armnn::InputTensors inputTensors;
402*3e777be0SXin Li for (unsigned int i = 0; i < getMainModel(m_Model).inputIndexes.size(); i++)
403*3e777be0SXin Li {
404*3e777be0SXin Li armnn::TensorInfo inputTensorInfo = m_Runtime->GetInputTensorInfo(m_NetworkId, i);
405*3e777be0SXin Li // pInputTensors (of type InputTensors) is composed of a vector of ConstTensors.
406*3e777be0SXin Li // Therefore, set all TensorInfo isConstant parameters of input Tensors to true.
407*3e777be0SXin Li inputTensorInfo.SetConstant();
408*3e777be0SXin Li
409*3e777be0SXin Li storage.emplace_back(inputTensorInfo.GetNumBytes());
410*3e777be0SXin Li const armnn::ConstTensor inputTensor(inputTensorInfo, storage.back().data());
411*3e777be0SXin Li
412*3e777be0SXin Li inputTensors.emplace_back(i, inputTensor);
413*3e777be0SXin Li }
414*3e777be0SXin Li
415*3e777be0SXin Li armnn::OutputTensors outputTensors;
416*3e777be0SXin Li for (unsigned int i = 0; i < getMainModel(m_Model).outputIndexes.size(); i++)
417*3e777be0SXin Li {
418*3e777be0SXin Li const armnn::TensorInfo outputTensorInfo = m_Runtime->GetOutputTensorInfo(m_NetworkId, i);
419*3e777be0SXin Li storage.emplace_back(outputTensorInfo.GetNumBytes());
420*3e777be0SXin Li const armnn::Tensor outputTensor(outputTensorInfo, storage.back().data());
421*3e777be0SXin Li
422*3e777be0SXin Li outputTensors.emplace_back(i, outputTensor);
423*3e777be0SXin Li }
424*3e777be0SXin Li
425*3e777be0SXin Li try
426*3e777be0SXin Li {
427*3e777be0SXin Li armnn::Status status;
428*3e777be0SXin Li if (m_AsyncModelExecutionEnabled)
429*3e777be0SXin Li {
430*3e777be0SXin Li ALOGW("ArmnnPreparedModel::ExecuteGraph m_AsyncModelExecutionEnabled true");
431*3e777be0SXin Li status = m_Runtime->Execute(*m_WorkingMemHandle, inputTensors, outputTensors);
432*3e777be0SXin Li }
433*3e777be0SXin Li else
434*3e777be0SXin Li {
435*3e777be0SXin Li ALOGW("ArmnnPreparedModel::ExecuteGraph m_AsyncModelExecutionEnabled false");
436*3e777be0SXin Li // Create a vector of Input and Output Ids which can be imported. An empty vector means all will be copied.
437*3e777be0SXin Li std::vector<armnn::ImportedInputId> importedInputIds;
438*3e777be0SXin Li if (m_EnableImport)
439*3e777be0SXin Li {
440*3e777be0SXin Li importedInputIds = m_Runtime->ImportInputs(m_NetworkId, inputTensors, armnn::MemorySource::Malloc);
441*3e777be0SXin Li }
442*3e777be0SXin Li std::vector<armnn::ImportedOutputId> importedOutputIds;
443*3e777be0SXin Li if (m_EnableExport)
444*3e777be0SXin Li {
445*3e777be0SXin Li importedOutputIds = m_Runtime->ImportOutputs(m_NetworkId, outputTensors, armnn::MemorySource::Malloc);
446*3e777be0SXin Li }
447*3e777be0SXin Li status = m_Runtime->EnqueueWorkload(m_NetworkId, inputTensors, outputTensors,
448*3e777be0SXin Li importedInputIds, importedOutputIds);
449*3e777be0SXin Li }
450*3e777be0SXin Li if (status != armnn::Status::Success)
451*3e777be0SXin Li {
452*3e777be0SXin Li ALOGW("ExecuteWithDummyInputs: EnqueueWorkload failed");
453*3e777be0SXin Li return false;
454*3e777be0SXin Li }
455*3e777be0SXin Li }
456*3e777be0SXin Li catch (armnn::Exception& e)
457*3e777be0SXin Li {
458*3e777be0SXin Li ALOGW("ExecuteWithDummyInputs: armnn::Exception caught from EnqueueWorkload: %s", e.what());
459*3e777be0SXin Li return false;
460*3e777be0SXin Li }
461*3e777be0SXin Li catch (std::exception& e)
462*3e777be0SXin Li {
463*3e777be0SXin Li ALOGE("ExecuteWithDummyInputs: std::exception caught from EnqueueWorkload: %s", e.what());
464*3e777be0SXin Li return false;
465*3e777be0SXin Li }
466*3e777be0SXin Li return true;
467*3e777be0SXin Li }
468*3e777be0SXin Li
469*3e777be0SXin Li /// Schedule the graph prepared from the request for execution
470*3e777be0SXin Li template<typename HalVersion>
471*3e777be0SXin Li template<typename CallbackContext>
ScheduleGraphForExecution(std::shared_ptr<std::vector<::android::nn::RunTimePoolInfo>> & pMemPools,std::shared_ptr<armnn::InputTensors> & inputTensors,std::shared_ptr<armnn::OutputTensors> & outputTensors,CallbackContext callbackContext)472*3e777be0SXin Li void ArmnnPreparedModel<HalVersion>::ScheduleGraphForExecution(
473*3e777be0SXin Li std::shared_ptr<std::vector<::android::nn::RunTimePoolInfo>>& pMemPools,
474*3e777be0SXin Li std::shared_ptr<armnn::InputTensors>& inputTensors,
475*3e777be0SXin Li std::shared_ptr<armnn::OutputTensors>& outputTensors,
476*3e777be0SXin Li CallbackContext callbackContext)
477*3e777be0SXin Li {
478*3e777be0SXin Li ALOGV("ArmnnPreparedModel::ScheduleGraphForExecution(...)");
479*3e777be0SXin Li
480*3e777be0SXin Li DumpTensorsIfRequired("Input", *inputTensors);
481*3e777be0SXin Li
482*3e777be0SXin Li
483*3e777be0SXin Li auto tpCb = std::make_shared<
484*3e777be0SXin Li ArmnnThreadPoolCallback<CallbackContext_1_0>>(this,
485*3e777be0SXin Li pMemPools,
486*3e777be0SXin Li inputTensors,
487*3e777be0SXin Li outputTensors,
488*3e777be0SXin Li callbackContext);
489*3e777be0SXin Li
490*3e777be0SXin Li m_Threadpool->Schedule(m_NetworkId,
491*3e777be0SXin Li *tpCb->m_InputTensors,
492*3e777be0SXin Li *tpCb->m_OutputTensors,
493*3e777be0SXin Li armnn::QosExecPriority::Medium,
494*3e777be0SXin Li tpCb);
495*3e777be0SXin Li ALOGV("ArmnnPreparedModel::ScheduleGraphForExecution end");
496*3e777be0SXin Li }
497*3e777be0SXin Li
498*3e777be0SXin Li template<typename HalVersion>
499*3e777be0SXin Li template <typename CallbackContext>
Notify(armnn::Status status,armnn::InferenceTimingPair timeTaken)500*3e777be0SXin Li void ArmnnPreparedModel<HalVersion>::ArmnnThreadPoolCallback<CallbackContext>::Notify(
501*3e777be0SXin Li armnn::Status status, armnn::InferenceTimingPair timeTaken)
502*3e777be0SXin Li {
503*3e777be0SXin Li armnn::IgnoreUnused(status, timeTaken);
504*3e777be0SXin Li ALOGV("ArmnnPreparedModel::ArmnnThreadPoolCallback_1_2 Notify");
505*3e777be0SXin Li
506*3e777be0SXin Li m_Model->DumpTensorsIfRequired("Output", *m_OutputTensors);
507*3e777be0SXin Li
508*3e777be0SXin Li // Commit output buffers.
509*3e777be0SXin Li // Note that we update *all* pools, even if they aren't actually used as outputs -
510*3e777be0SXin Li // this is simpler and is what the CpuExecutor does.
511*3e777be0SXin Li for (android::nn::RunTimePoolInfo& pool : *m_MemPools)
512*3e777be0SXin Li {
513*3e777be0SXin Li // Type android::nn::RunTimePoolInfo has changed between Android P & Q and Android R, where
514*3e777be0SXin Li // update() has been removed and flush() added.
515*3e777be0SXin Li #if defined(ARMNN_ANDROID_R) || defined(ARMNN_ANDROID_S) // Use the new Android implementation.
516*3e777be0SXin Li pool.flush();
517*3e777be0SXin Li #else
518*3e777be0SXin Li pool.update();
519*3e777be0SXin Li #endif
520*3e777be0SXin Li }
521*3e777be0SXin Li
522*3e777be0SXin Li m_CallbackContext.callback(V1_0::ErrorStatus::NONE, "ArmnnPreparedModel::ArmnnThreadPoolCallback_1_2 Notify");
523*3e777be0SXin Li return;
524*3e777be0SXin Li }
525*3e777be0SXin Li
526*3e777be0SXin Li ///
527*3e777be0SXin Li /// Class template specializations
528*3e777be0SXin Li ///
529*3e777be0SXin Li
530*3e777be0SXin Li template class ArmnnPreparedModel<hal_1_0::HalPolicy>;
531*3e777be0SXin Li template void ArmnnPreparedModel<hal_1_0::HalPolicy>::ScheduleGraphForExecution<CallbackContext_1_0>(
532*3e777be0SXin Li std::shared_ptr<std::vector<::android::nn::RunTimePoolInfo>>& pMemPools,
533*3e777be0SXin Li std::shared_ptr<armnn::InputTensors>& inputTensors,
534*3e777be0SXin Li std::shared_ptr<armnn::OutputTensors>& outputTensors,
535*3e777be0SXin Li CallbackContext_1_0 callbackContext);
536*3e777be0SXin Li
537*3e777be0SXin Li #ifdef ARMNN_ANDROID_NN_V1_1
538*3e777be0SXin Li template class ArmnnPreparedModel<hal_1_1::HalPolicy>;
539*3e777be0SXin Li #endif
540*3e777be0SXin Li
541*3e777be0SXin Li #ifdef ARMNN_ANDROID_NN_V1_2
542*3e777be0SXin Li template class ArmnnPreparedModel<hal_1_1::HalPolicy>;
543*3e777be0SXin Li template class ArmnnPreparedModel<hal_1_2::HalPolicy>;
544*3e777be0SXin Li #endif
545*3e777be0SXin Li
546*3e777be0SXin Li #ifdef ARMNN_ANDROID_NN_V1_3
547*3e777be0SXin Li template class ArmnnPreparedModel<hal_1_1::HalPolicy>;
548*3e777be0SXin Li template class ArmnnPreparedModel<hal_1_2::HalPolicy>;
549*3e777be0SXin Li template class ArmnnPreparedModel<hal_1_3::HalPolicy>;
550*3e777be0SXin Li #endif
551*3e777be0SXin Li } // namespace armnn_driver
552