xref: /aosp_15_r20/external/armnn/shim/sl/canonical/ArmnnPreparedModel.cpp (revision 89c4ff92f2867872bb9e2354d150bf0c8c502810)
1 //
2 // Copyright © 2022 Arm Ltd and Contributors. All rights reserved.
3 // SPDX-License-Identifier: MIT
4 //
5 
6 #define LOG_TAG "arm-armnn-sl"
7 
8 #include "ArmnnPreparedModel.hpp"
9 #include "CanonicalUtils.hpp"
10 
11 #include <DefaultExecution.h>
12 #include <LegacyUtils.h>
13 #include <nnapi/IBurst.h>
14 #include <nnapi/IPreparedModel.h>
15 #include <nnapi/Result.h>
16 #include <nnapi/SharedMemory.h>
17 #include <nnapi/TypeUtils.h>
18 #include <nnapi/Types.h>
19 #include <nnapi/Validation.h>
20 
21 #include <memory>
22 #include <tuple>
23 #include <utility>
24 #include <vector>
25 
26 using namespace android;
27 using namespace android::nn;
28 
29 static const Timing g_NoTiming = {};
30 
31 namespace {
32 
33 using namespace armnn_driver;
34 
MicrosecondsDuration(android::nn::TimePoint endPoint,android::nn::TimePoint startPoint)35 unsigned long MicrosecondsDuration(android::nn::TimePoint endPoint, android::nn::TimePoint startPoint)
36 {
37     return static_cast<unsigned long>(std::chrono::duration_cast<std::chrono::microseconds>(
38                                       endPoint - startPoint).count());
39 }
40 
ValidateRequestArgument(const Request::Argument & requestArg,const armnn::TensorInfo & tensorInfo)41 bool ValidateRequestArgument(const Request::Argument& requestArg, const armnn::TensorInfo& tensorInfo)
42 {
43     if (requestArg.dimensions.size() != 0)
44     {
45         if (requestArg.dimensions.size() != tensorInfo.GetNumDimensions())
46         {
47             VLOG(DRIVER) << "Mismatched dimensions (request argument: "
48                          << requestArg.dimensions.size() << " expected: " << tensorInfo.GetNumDimensions();
49             return false;
50         }
51 
52         for (unsigned int d = 0; d < tensorInfo.GetNumDimensions(); ++d)
53         {
54             if (requestArg.dimensions[d] != 0 && requestArg.dimensions[d] != tensorInfo.GetShape()[d])
55             {
56                 VLOG(DRIVER) << "Mismatched dimensions " << d
57                              << " (request argument: " << requestArg.dimensions[d]
58                              << " expected: " << tensorInfo.GetShape()[d];
59                 return false;
60             }
61         }
62     }
63 
64     return true;
65 }
66 
GetTensorForRequestArgument(const Request::Argument & requestArg,const armnn::TensorInfo & tensorInfo,const std::vector<::android::nn::RunTimePoolInfo> & requestPools)67 armnn::Tensor GetTensorForRequestArgument(const Request::Argument& requestArg,
68                                           const armnn::TensorInfo& tensorInfo,
69                                           const std::vector<::android::nn::RunTimePoolInfo>& requestPools)
70 {
71     if (!ValidateRequestArgument(requestArg, tensorInfo))
72     {
73         return armnn::Tensor();
74     }
75 
76     if (requestArg.lifetime == Request::Argument::LifeTime::POINTER)
77     {
78         return armnn::Tensor(tensorInfo, GetMemoryFromPointer(requestArg));
79     }
80     else if (requestArg.lifetime == Request::Argument::LifeTime::POOL)
81     {
82         return armnn::Tensor(tensorInfo, GetMemoryFromPool(requestArg.location, requestPools));
83     }
84     return armnn::Tensor();
85 }
86 
BuildTensorName(const char * tensorNamePrefix,std::size_t index)87 inline std::string BuildTensorName(const char* tensorNamePrefix, std::size_t index)
88 {
89     return tensorNamePrefix + std::to_string(index);
90 }
91 
IsPointerTypeMemory(const Request & request)92 bool IsPointerTypeMemory(const Request& request)
93 {
94     for (auto& input : request.inputs)
95     {
96         if (input.lifetime != Request::Argument::LifeTime::POINTER)
97         {
98             return false;
99         }
100     }
101 
102     for (auto& output: request.outputs)
103     {
104         if (output.lifetime != Request::Argument::LifeTime::POINTER)
105         {
106            return false;
107         }
108     }
109 
110     return true;
111 }
112 
113 } // anonymous namespace
114 
115 using namespace android::nn;
116 
117 namespace armnn_driver
118 {
119 
Init()120 void ArmnnPreparedModel::Init()
121 {
122     // Enable profiling if required.
123     m_Runtime->GetProfiler(m_NetworkId)->EnableProfiling(m_GpuProfilingEnabled);
124 }
125 
ArmnnPreparedModel(armnn::NetworkId networkId,armnn::IRuntime * runtime,const Model & model,const std::string & requestInputsAndOutputsDumpDir,const bool gpuProfilingEnabled,Priority priority)126 ArmnnPreparedModel::ArmnnPreparedModel(armnn::NetworkId networkId,
127                                        armnn::IRuntime* runtime,
128                                        const Model& model,
129                                        const std::string& requestInputsAndOutputsDumpDir,
130                                        const bool gpuProfilingEnabled,
131                                        Priority priority)
132     : m_NetworkId(networkId)
133     , m_Runtime(runtime)
134     , m_Model(model)
135     , m_RequestInputsAndOutputsDumpDir(requestInputsAndOutputsDumpDir)
136     , m_GpuProfilingEnabled(gpuProfilingEnabled)
137     , m_ModelPriority(priority)
138     , m_PrepareFromCache(false)
139 {
140     Init();
141 }
142 
ArmnnPreparedModel(armnn::NetworkId networkId,armnn::IRuntime * runtime,const std::string & requestInputsAndOutputsDumpDir,const bool gpuProfilingEnabled,Priority priority,const bool prepareModelFromCache)143 ArmnnPreparedModel::ArmnnPreparedModel(armnn::NetworkId networkId,
144                                        armnn::IRuntime* runtime,
145                                        const std::string& requestInputsAndOutputsDumpDir,
146                                        const bool gpuProfilingEnabled,
147                                        Priority priority,
148                                        const bool prepareModelFromCache)
149     : m_NetworkId(networkId)
150     , m_Runtime(runtime)
151     , m_RequestInputsAndOutputsDumpDir(requestInputsAndOutputsDumpDir)
152     , m_GpuProfilingEnabled(gpuProfilingEnabled)
153     , m_ModelPriority(priority)
154     , m_PrepareFromCache(prepareModelFromCache)
155 {
156     Init();
157 }
158 
159 
PrepareMemoryForInputs(armnn::InputTensors & inputs,const Request & request,const std::vector<android::nn::RunTimePoolInfo> & memPools) const160 ErrorStatus ArmnnPreparedModel::PrepareMemoryForInputs(
161     armnn::InputTensors& inputs,
162     const Request& request,
163     const std::vector<android::nn::RunTimePoolInfo>& memPools) const
164 {
165     inputs.reserve(request.inputs.size());
166     for (unsigned int i = 0; i < request.inputs.size(); i++)
167     {
168         const auto& inputArg = request.inputs[i];
169 
170         armnn::TensorInfo inputTensorInfo = m_Runtime->GetInputTensorInfo(m_NetworkId, i);
171         // inputs (of type InputTensors) is composed of a vector of ConstTensors.
172         // Therefore, set all TensorInfo isConstant parameters of input Tensors to true.
173         inputTensorInfo.SetConstant();
174         const armnn::Tensor inputTensor = GetTensorForRequestArgument(inputArg, inputTensorInfo, memPools);
175 
176         if (inputTensor.GetMemoryArea() == nullptr)
177         {
178             VLOG(DRIVER) << "Cannot execute request. Error converting request input " << i << "to tensor.";
179             return ErrorStatus::GENERAL_FAILURE;
180         }
181         inputs.emplace_back(i, inputTensor);
182     }
183 
184     return ErrorStatus::NONE;
185 }
186 
PrepareMemoryForOutputs(armnn::OutputTensors & outputs,std::vector<OutputShape> & outputShapes,const Request & request,const std::vector<android::nn::RunTimePoolInfo> & memPools) const187 ErrorStatus ArmnnPreparedModel::PrepareMemoryForOutputs(
188     armnn::OutputTensors& outputs,
189     std::vector<OutputShape> &outputShapes,
190     const Request& request,
191     const std::vector<android::nn::RunTimePoolInfo>& memPools) const
192 {
193     outputs.reserve(request.outputs.size());
194     for (unsigned int i = 0; i < request.outputs.size(); i++)
195     {
196         auto& outputArg = request.outputs[i];
197 
198         armnn::TensorInfo outputTensorInfo = m_Runtime->GetOutputTensorInfo(m_NetworkId, i);
199         armnn::Tensor outputTensor = GetTensorForRequestArgument(outputArg, outputTensorInfo, memPools);
200         if (outputTensor.GetMemoryArea() == nullptr)
201         {
202             VLOG(DRIVER) << "Cannot execute request. Error converting request output " << i << "to tensor.";
203             return ErrorStatus::GENERAL_FAILURE;
204         }
205 
206         const size_t outputSize = outputTensorInfo.GetNumBytes();
207 
208         unsigned int count = 0;
209         std::for_each(outputArg.dimensions.begin(), outputArg.dimensions.end(), [&](auto dim)
210         {
211             if (dim != 0)
212             {
213                 outputTensorInfo.GetShape()[count] = dim;
214             }
215             else
216             {
217                 outputTensorInfo.GetShape()[count] = outputArg.dimensions.size();
218             }
219 
220             count++;
221         });
222 
223         outputs.emplace_back(i, outputTensor);
224         outputShapes[i] = ComputeShape(outputTensorInfo);
225 
226         if (outputArg.location.length < outputSize)
227         {
228             VLOG(DRIVER) << "ArmnnPreparedModel::Execute failed outputArg.location.length "
229                   << std::to_string(outputArg.location.length).c_str()
230                   << " < outputSize " << std::to_string(outputSize).c_str();
231             outputShapes[i].isSufficient = false;
232             return ErrorStatus::OUTPUT_INSUFFICIENT_SIZE;
233         }
234 
235         //TODO: Need to check for Request::Argument::LifeTime::POINTER
236         if (outputArg.lifetime == Request::Argument::LifeTime::POOL)
237         {
238             size_t bufferSize = memPools.at(outputArg.location.poolIndex).getSize();
239             if (bufferSize < outputSize)
240             {
241                 VLOG(DRIVER) << "ArmnnPreparedModel::Execute failed bufferSize "
242                              << std::to_string(outputArg.location.length).c_str()
243                              << " < outputSize " << std::to_string(outputSize).c_str();
244                 outputShapes[i].isSufficient = false;
245                 return ErrorStatus::OUTPUT_INSUFFICIENT_SIZE;
246             }
247         }
248     }
249     return ErrorStatus::NONE;
250 }
251 
PrepareMemoryForIO(armnn::InputTensors & inputs,armnn::OutputTensors & outputs,std::vector<android::nn::RunTimePoolInfo> & memPools,const Request & request,const bool pointerMemory) const252 ErrorStatus ArmnnPreparedModel::PrepareMemoryForIO(armnn::InputTensors& inputs,
253                                                    armnn::OutputTensors& outputs,
254                                                    std::vector<android::nn::RunTimePoolInfo>& memPools,
255                                                    const Request& request,
256                                                    const bool pointerMemory) const
257 {
258     //Check memory pools are not empty
259     // add the inputs and outputs with their data
260     try
261     {
262         if (!pointerMemory && !setRunTimePoolInfosFromMemoryPools(&memPools, request.pools))
263         {
264             return ErrorStatus::INVALID_ARGUMENT;
265         }
266 
267         if (PrepareMemoryForInputs(inputs, request, memPools) != ErrorStatus::NONE)
268         {
269             VLOG(DRIVER) << "Failed when preparing memory for Inputs";
270             return ErrorStatus::GENERAL_FAILURE;
271         }
272 
273         std::vector<OutputShape> outputShapes(request.outputs.size());
274 
275         auto errorStatus = PrepareMemoryForOutputs(outputs, outputShapes, request, memPools);
276         if (errorStatus != ErrorStatus::NONE)
277         {
278             return errorStatus;
279         }
280     }
281     catch (armnn::Exception& e)
282     {
283         VLOG(DRIVER) << "armnn::Exception caught while preparing for EnqueueWorkload: " << e.what();
284         return ErrorStatus::GENERAL_FAILURE;
285     }
286     catch (std::exception& e)
287     {
288         VLOG(DRIVER) << "std::exception caught while preparing for EnqueueWorkload: " << e.what();
289         return ErrorStatus::GENERAL_FAILURE;
290     }
291 
292     return ErrorStatus::NONE;
293 }
294 
execute(const Request & request,MeasureTiming measureTiming,const OptionalTimePoint & deadline,const OptionalDuration &,const std::vector<android::nn::TokenValuePair> & hints,const std::vector<android::nn::ExtensionNameAndPrefix> & extensionNameToPrefix) const295 ExecutionResult<std::pair<std::vector<OutputShape>, Timing>> ArmnnPreparedModel::execute(
296     const Request& request,
297     MeasureTiming measureTiming,
298     const OptionalTimePoint& deadline,
299     const OptionalDuration&,
300     const std::vector<android::nn::TokenValuePair>& hints,
301     const std::vector<android::nn::ExtensionNameAndPrefix>& extensionNameToPrefix) const
302 {
303     VLOG(DRIVER) << "CanonicalDriver::PreparedModel::execute()";
304 
305     CanonicalExecutionContext ctx;
306     if (measureTiming == MeasureTiming::YES)
307     {
308         ctx.measureTimings = measureTiming;
309         ctx.driverStart =  Clock::now();
310     }
311 
312     if (!m_PrepareFromCache)
313     {
314         const auto modelRequest = validateRequestForModel(request, m_Model);
315         if (!modelRequest.ok())
316         {
317             return NN_ERROR(ErrorStatus::INVALID_ARGUMENT) << modelRequest.error();
318         }
319         VLOG(DRIVER) << "ArmnnPreparedModel::execute(): " << GetModelSummary(m_Model).c_str();
320     }
321     if (hasDeadlinePassed(deadline))
322     {
323         return NN_ERROR(ErrorStatus::MISSED_DEADLINE_PERSISTENT);
324     }
325 
326     // map the memory pool into shared pointers
327     // use a shared memory pools vector on the heap, as it is passed to the request thread
328     auto memPools = std::make_shared<std::vector<android::nn::RunTimePoolInfo>>();
329 
330     // allocate the tensors on the heap, as they are passed to the request thread
331     auto inputTensors = std::make_shared<armnn::InputTensors>();
332     auto outputTensors = std::make_shared<armnn::OutputTensors>();
333 
334     auto isPointerTypeMemory = IsPointerTypeMemory(request);
335     ErrorStatus theErrorStatus = PrepareMemoryForIO(*inputTensors,
336                                                     *outputTensors,
337                                                     *memPools,
338                                                     request,
339                                                     isPointerTypeMemory);
340 
341     switch(theErrorStatus)
342     {
343         case ErrorStatus::OUTPUT_INSUFFICIENT_SIZE:
344             return NN_ERROR(ErrorStatus::OUTPUT_INSUFFICIENT_SIZE);
345         case ErrorStatus::GENERAL_FAILURE:
346             return NN_ERROR(ErrorStatus::GENERAL_FAILURE);
347         case ErrorStatus::INVALID_ARGUMENT:
348             return NN_ERROR(ErrorStatus::INVALID_ARGUMENT);
349         default:
350         {}
351     }
352 
353     std::vector<OutputShape> outputShapes(outputTensors->size());
354     for (unsigned int i = 0; i < outputTensors->size(); i++)
355     {
356         std::pair<int, armnn::Tensor> outputTensorPair = (*outputTensors)[i];
357         const armnn::Tensor outputTensor = outputTensorPair.second;
358         const armnn::TensorInfo outputTensorInfo = outputTensor.GetInfo();
359 
360         outputShapes[i] = ComputeShape(outputTensorInfo);
361     }
362     Timing theTiming;
363 
364     VLOG(DRIVER) << "ArmnnPreparedModel::execute(...) before ExecuteGraph";
365     auto errorStatus = ExecuteGraph(memPools, *inputTensors, *outputTensors, ctx, isPointerTypeMemory);
366     if (errorStatus != ErrorStatus::NONE)
367     {
368         return NN_ERROR(errorStatus) << "execute() failed";
369     }
370     VLOG(DRIVER) << "ArmnnPreparedModel::execute(...) after ExecuteGraph";
371 
372     return std::make_pair(outputShapes, theTiming);
373 }
374 
ExecuteGraph(std::shared_ptr<std::vector<android::nn::RunTimePoolInfo>> & pMemPools,armnn::InputTensors & inputTensors,armnn::OutputTensors & outputTensors,CanonicalExecutionContext ctx,const bool pointerMemory) const375 ErrorStatus ArmnnPreparedModel::ExecuteGraph(
376     std::shared_ptr<std::vector<android::nn::RunTimePoolInfo>>& pMemPools,
377     armnn::InputTensors& inputTensors,
378     armnn::OutputTensors& outputTensors,
379     CanonicalExecutionContext ctx,
380     const bool pointerMemory) const
381 {
382     VLOG(DRIVER) << "ArmnnPreparedModel::ExecuteGraph(...)";
383 
384     DumpTensorsIfRequired("Input", inputTensors);
385     std::vector<armnn::ImportedInputId> importedInputIds;
386     std::vector<armnn::ImportedOutputId> importedOutputIds;
387     try
388     {
389         if (ctx.measureTimings == MeasureTiming::YES)
390         {
391             ctx.deviceStart =  Clock::now();
392         }
393         armnn::Status status;
394         VLOG(DRIVER) << "ArmnnPreparedModel::ExecuteGraph m_AsyncModelExecutionEnabled false";
395         importedInputIds = m_Runtime->ImportInputs(m_NetworkId, inputTensors, armnn::MemorySource::Malloc);
396         if (!importedInputIds.empty())
397         {
398             // Some or all of the input tensors been imported. We need to remove the ones that could from
399             // inputTensors.
400             for (armnn::ImportedInputId& importedId : importedInputIds)
401             {
402                 inputTensors.erase(
403                         std::remove_if(
404                                 inputTensors.begin(), inputTensors.end(),
405                                 [&importedId](std::pair<armnn::LayerBindingId, class armnn::ConstTensor>& element) {
406                                     return (element.first == static_cast<int>(importedId));
407                                 }),
408                         inputTensors.end());
409             }
410         }
411         importedOutputIds = m_Runtime->ImportOutputs(m_NetworkId, outputTensors, armnn::MemorySource::Malloc);
412         if (!importedOutputIds.empty())
413         {
414             // Some or all of the output tensors could not be imported. We need to remove the ones that could
415             // from outputTensors.
416             for (armnn::ImportedInputId& importedId : importedOutputIds)
417             {
418                 outputTensors.erase(
419                         std::remove_if(
420                                 outputTensors.begin(), outputTensors.end(),
421                                 [&importedId](std::pair<armnn::LayerBindingId, class armnn::Tensor>& element) {
422                                     return (element.first == static_cast<int>(importedId));
423                                 }),
424                         outputTensors.end());
425             }
426         }
427         status = m_Runtime->EnqueueWorkload(m_NetworkId,
428                                             inputTensors,
429                                             outputTensors,
430                                             importedInputIds,
431                                             importedOutputIds);
432 
433         if (ctx.measureTimings == MeasureTiming::YES)
434         {
435             ctx.deviceEnd =  Clock::now();
436         }
437         if (status != armnn::Status::Success)
438         {
439             VLOG(DRIVER) << "ArmnnPreparedModel:ExecuteGraph EnqueueWorkload failed";
440             return ErrorStatus::GENERAL_FAILURE;
441         }
442     }
443     catch (armnn::Exception& e)
444     {
445         VLOG(DRIVER) << "armnn:Exception caught from EnqueueWorkload: " << e.what();
446         return ErrorStatus::GENERAL_FAILURE;
447     }
448     catch (std::exception& e)
449     {
450         VLOG(DRIVER) << "std::exception caught from EnqueueWorkload: " << e.what();
451         return ErrorStatus::GENERAL_FAILURE;
452     }
453 
454     if (!pointerMemory && (!importedInputIds.empty() || !importedOutputIds.empty()))
455     {
456         CommitPools(*pMemPools);
457     }
458     DumpTensorsIfRequired("Output", outputTensors);
459 
460     if (ctx.measureTimings == MeasureTiming::YES)
461     {
462         ctx.driverEnd =  Clock::now();
463         Timing timing;
464         timing.timeOnDevice = ctx.deviceEnd - ctx.deviceStart;
465         timing.timeInDriver = ctx.driverEnd - ctx.driverStart;
466         VLOG(DRIVER) << "ArmnnPreparedModel::execute timing - Device = "
467                      << timing.timeOnDevice << "Driver = " <<  timing.timeInDriver;
468     }
469     return ErrorStatus::NONE;
470 }
471 
GetModelPriority() const472 Priority ArmnnPreparedModel::GetModelPriority() const
473 {
474     return m_ModelPriority;
475 }
476 
477 
executeFenced(const Request & request,const std::vector<SyncFence> & waitFor,MeasureTiming measureTiming,const OptionalTimePoint & deadline,const OptionalDuration &,const OptionalDuration &,const std::vector<android::nn::TokenValuePair> & hints,const std::vector<android::nn::ExtensionNameAndPrefix> & extensionNameToPrefix) const478 GeneralResult<std::pair<SyncFence, ExecuteFencedInfoCallback>> ArmnnPreparedModel::executeFenced(
479     const Request& request,
480     const std::vector<SyncFence>& waitFor,
481     MeasureTiming measureTiming,
482     const OptionalTimePoint& deadline,
483     const OptionalDuration&,
484     const OptionalDuration&,
485     const std::vector<android::nn::TokenValuePair>& hints,
486     const std::vector<android::nn::ExtensionNameAndPrefix>& extensionNameToPrefix) const
487 {
488     VLOG(DRIVER) << "ArmnnPreparedModel::executeFenced()";
489 
490     if (!m_PrepareFromCache) {
491         const auto modelRequest = validateRequestForModel(request, m_Model);
492         if (!modelRequest.ok())
493         {
494             return NN_ERROR(ErrorStatus::INVALID_ARGUMENT) << modelRequest.error();
495         }
496         VLOG(DRIVER) << "ArmnnPreparedModel::executeFenced(): " << GetModelSummary(m_Model).c_str();
497     }
498     if (hasDeadlinePassed(deadline))
499     {
500         return NN_ERROR(ErrorStatus::MISSED_DEADLINE_PERSISTENT);
501     }
502 
503     CanonicalExecutionContext ctx;
504     if (measureTiming == MeasureTiming::YES)
505     {
506         ctx.measureTimings = measureTiming;
507         ctx.driverStart =  Clock::now();
508     }
509 
510     // Wait for the dependent events to signal
511     for (const auto& syncFence : waitFor)
512     {
513         if (!syncFence.getSharedHandle())
514         {
515             return NN_ERROR(ErrorStatus::INVALID_ARGUMENT);
516         }
517         if (syncFence.syncWait({}) != SyncFence::FenceState::SIGNALED)
518         {
519             return NN_ERROR(ErrorStatus::GENERAL_FAILURE) << "syncWait failed";
520         }
521     }
522 
523     android::nn::TimePoint fenceExecutionStart;
524     if (measureTiming == MeasureTiming::YES)
525     {
526         fenceExecutionStart = Clock::now();
527     }
528 
529     // map the memory pool into shared pointers
530     // use a shared memory pools vector on the heap, as it is passed to the request thread
531     auto memPools = std::make_shared<std::vector<android::nn::RunTimePoolInfo>>();
532 
533     // allocate the tensors on the heap, as they are passed to the request thread
534     auto inputTensors = std::make_shared<armnn::InputTensors>();
535     auto outputTensors = std::make_shared<armnn::OutputTensors>();
536 
537     auto isPointerTypeMemory = IsPointerTypeMemory(request);
538     ErrorStatus theErrorStatus = PrepareMemoryForIO(*inputTensors,
539                                                     *outputTensors,
540                                                     *memPools,
541                                                     request,
542                                                     isPointerTypeMemory);
543 
544     if (theErrorStatus != ErrorStatus::NONE)
545     {
546         return NN_ERROR(ErrorStatus::INVALID_ARGUMENT) << "executeFenced() failed";
547     }
548 
549     Timing timingSinceLaunch = {};
550     Timing timingAfterFence  = {};
551     if (measureTiming == MeasureTiming::YES)
552     {
553         timingAfterFence.timeOnDevice = ctx.deviceEnd - ctx.deviceStart;
554         timingAfterFence.timeInDriver = ctx.driverEnd - fenceExecutionStart;
555         VLOG(DRIVER) << "executeFenced timingSinceLaunch = " << timingAfterFence.timeOnDevice;
556         VLOG(DRIVER) << "executeFenced timingAfterFence = " << timingAfterFence.timeInDriver;
557     }
558 
559     VLOG(DRIVER) << "ArmnnCanonicalPreparedModel::executeFenced(...) before ExecuteGraph";
560     auto errorStatus = ExecuteGraph(memPools, *inputTensors, *outputTensors, ctx, isPointerTypeMemory);
561     VLOG(DRIVER) << "ArmnnCanonicalPreparedModel::executeFenced(...) after ExecuteGraph";
562 
563     ExecuteFencedInfoCallback armnnFencedExecutionCallback =
564             [timingSinceLaunch, timingAfterFence, errorStatus]() {
565 
566                 GeneralResult<std::pair<Timing, Timing>> result;
567 
568                 switch(errorStatus)
569                 {
570                     case ErrorStatus::OUTPUT_INSUFFICIENT_SIZE:
571                         result.error().code = (ErrorStatus::OUTPUT_INSUFFICIENT_SIZE);
572                         [[fallthrough]];
573                     case ErrorStatus::GENERAL_FAILURE:
574                         result.error().code = (ErrorStatus::GENERAL_FAILURE);
575                         [[fallthrough]];
576                     case ErrorStatus::INVALID_ARGUMENT:
577                         result.error().code = (ErrorStatus::INVALID_ARGUMENT);
578                         [[fallthrough]];
579                     default:
580                     {
581                         result.value() = std::make_pair(timingSinceLaunch, timingAfterFence);
582                     }
583                 }
584                 return result;
585             };
586     return std::make_pair(SyncFence::createAsSignaled(), std::move(armnnFencedExecutionCallback ));
587 }
588 
createReusableExecution(const Request & request,MeasureTiming measureTiming,const OptionalDuration & loopTimeoutDuration,const std::vector<android::nn::TokenValuePair> & hints,const std::vector<android::nn::ExtensionNameAndPrefix> & extensionNameToPrefix) const589 GeneralResult<SharedExecution> ArmnnPreparedModel::createReusableExecution(
590     const Request& request,
591     MeasureTiming measureTiming,
592     const OptionalDuration& loopTimeoutDuration,
593     const std::vector<android::nn::TokenValuePair>& hints,
594     const std::vector<android::nn::ExtensionNameAndPrefix>& extensionNameToPrefix) const
595 {
596     VLOG(DRIVER) << "ArmnnPreparedModel::createReusableExecution()";
597     return std::make_shared<DefaultExecution>(shared_from_this(),
598                                               request,
599                                               measureTiming,
600                                               loopTimeoutDuration);
601 }
602 
configureExecutionBurst() const603 GeneralResult<SharedBurst> ArmnnPreparedModel::configureExecutionBurst() const
604 {
605     // TODO: Implement BURST
606     return nullptr;
607 }
608 
getUnderlyingResource() const609 std::any ArmnnPreparedModel::getUnderlyingResource() const
610 {
611     return &m_Model;
612 }
613 
614 template<typename TensorBindingCollection>
DumpTensorsIfRequired(char const * tensorNamePrefix,const TensorBindingCollection & tensorBindings) const615 void ArmnnPreparedModel::DumpTensorsIfRequired(char const* tensorNamePrefix,
616                                                const TensorBindingCollection& tensorBindings) const
617 {
618     if (!m_RequestInputsAndOutputsDumpDir.empty())
619     {
620         const std::string requestName = std::to_string(m_NetworkId) + ".dump";
621         for (std::size_t i = 0u; i < tensorBindings.size(); ++i)
622         {
623             DumpTensor(m_RequestInputsAndOutputsDumpDir,
624                        requestName,
625                        BuildTensorName(tensorNamePrefix, i),
626                        tensorBindings[i].second);
627         }
628     }
629 }
630 
~ArmnnPreparedModel()631 ArmnnPreparedModel::~ArmnnPreparedModel()
632 {
633     VLOG(DRIVER) << "ArmnnPreparedModel::~ArmnnPreparedModel()";
634     // Get a hold of the profiler used by this model.
635     if (m_GpuProfilingEnabled)
636     {
637         auto profiler = m_Runtime->GetProfiler(m_NetworkId);
638         if (profiler)
639         {
640             // Dump the profiling info to a file if required.
641             DumpJsonProfilingIfRequired(m_GpuProfilingEnabled,
642                                         m_RequestInputsAndOutputsDumpDir,
643                                         m_NetworkId,
644                                         profiler.get());
645         }
646     }
647     // Unload the network associated with this model
648     m_Runtime->UnloadNetwork(m_NetworkId);
649 }
650 
ExecuteWithDummyInputs(unsigned int numInputs,unsigned int numOutputs) const651 bool ArmnnPreparedModel::ExecuteWithDummyInputs(unsigned int numInputs, unsigned int numOutputs) const
652 {
653     std::vector<std::vector<char>> storage;
654     armnn::InputTensors inputTensors;
655     for (unsigned int i = 0; i < numInputs; i++)
656     {
657         armnn::TensorInfo inputTensorInfo = m_Runtime->GetInputTensorInfo(m_NetworkId, i);
658         // pInputTensors (of type InputTensors) is composed of a vector of ConstTensors.
659         // Therefore, set all TensorInfo isConstant parameters of input Tensors to true.
660         inputTensorInfo.SetConstant();
661         storage.emplace_back(inputTensorInfo.GetNumBytes());
662         const armnn::ConstTensor inputTensor(inputTensorInfo, storage.back().data());
663 
664         inputTensors.emplace_back(i, inputTensor);
665     }
666 
667     armnn::OutputTensors outputTensors;
668     for (unsigned int i = 0; i < numOutputs; i++)
669     {
670         const armnn::TensorInfo outputTensorInfo = m_Runtime->GetOutputTensorInfo(m_NetworkId, i);
671         storage.emplace_back(outputTensorInfo.GetNumBytes());
672         const armnn::Tensor outputTensor(outputTensorInfo, storage.back().data());
673 
674         outputTensors.emplace_back(i, outputTensor);
675     }
676     CanonicalExecutionContext ctx;
677     ctx.measureTimings = MeasureTiming::NO;
678     auto memPools = std::make_shared<std::vector<::android::nn::RunTimePoolInfo>>();
679 
680     auto errorStatus = ExecuteGraph(memPools,
681                                     inputTensors,
682                                     outputTensors,
683                                     ctx);
684 
685     return errorStatus == ErrorStatus::NONE;
686 }
687 
688 } // namespace armnn_driver
689