1 //
2 // Copyright © 2022 Arm Ltd and Contributors. All rights reserved.
3 // SPDX-License-Identifier: MIT
4 //
5
6 #define LOG_TAG "arm-armnn-sl"
7
8 #include "ArmnnPreparedModel.hpp"
9 #include "CanonicalUtils.hpp"
10
11 #include <DefaultExecution.h>
12 #include <LegacyUtils.h>
13 #include <nnapi/IBurst.h>
14 #include <nnapi/IPreparedModel.h>
15 #include <nnapi/Result.h>
16 #include <nnapi/SharedMemory.h>
17 #include <nnapi/TypeUtils.h>
18 #include <nnapi/Types.h>
19 #include <nnapi/Validation.h>
20
21 #include <memory>
22 #include <tuple>
23 #include <utility>
24 #include <vector>
25
26 using namespace android;
27 using namespace android::nn;
28
29 static const Timing g_NoTiming = {};
30
31 namespace {
32
33 using namespace armnn_driver;
34
MicrosecondsDuration(android::nn::TimePoint endPoint,android::nn::TimePoint startPoint)35 unsigned long MicrosecondsDuration(android::nn::TimePoint endPoint, android::nn::TimePoint startPoint)
36 {
37 return static_cast<unsigned long>(std::chrono::duration_cast<std::chrono::microseconds>(
38 endPoint - startPoint).count());
39 }
40
ValidateRequestArgument(const Request::Argument & requestArg,const armnn::TensorInfo & tensorInfo)41 bool ValidateRequestArgument(const Request::Argument& requestArg, const armnn::TensorInfo& tensorInfo)
42 {
43 if (requestArg.dimensions.size() != 0)
44 {
45 if (requestArg.dimensions.size() != tensorInfo.GetNumDimensions())
46 {
47 VLOG(DRIVER) << "Mismatched dimensions (request argument: "
48 << requestArg.dimensions.size() << " expected: " << tensorInfo.GetNumDimensions();
49 return false;
50 }
51
52 for (unsigned int d = 0; d < tensorInfo.GetNumDimensions(); ++d)
53 {
54 if (requestArg.dimensions[d] != 0 && requestArg.dimensions[d] != tensorInfo.GetShape()[d])
55 {
56 VLOG(DRIVER) << "Mismatched dimensions " << d
57 << " (request argument: " << requestArg.dimensions[d]
58 << " expected: " << tensorInfo.GetShape()[d];
59 return false;
60 }
61 }
62 }
63
64 return true;
65 }
66
GetTensorForRequestArgument(const Request::Argument & requestArg,const armnn::TensorInfo & tensorInfo,const std::vector<::android::nn::RunTimePoolInfo> & requestPools)67 armnn::Tensor GetTensorForRequestArgument(const Request::Argument& requestArg,
68 const armnn::TensorInfo& tensorInfo,
69 const std::vector<::android::nn::RunTimePoolInfo>& requestPools)
70 {
71 if (!ValidateRequestArgument(requestArg, tensorInfo))
72 {
73 return armnn::Tensor();
74 }
75
76 if (requestArg.lifetime == Request::Argument::LifeTime::POINTER)
77 {
78 return armnn::Tensor(tensorInfo, GetMemoryFromPointer(requestArg));
79 }
80 else if (requestArg.lifetime == Request::Argument::LifeTime::POOL)
81 {
82 return armnn::Tensor(tensorInfo, GetMemoryFromPool(requestArg.location, requestPools));
83 }
84 return armnn::Tensor();
85 }
86
BuildTensorName(const char * tensorNamePrefix,std::size_t index)87 inline std::string BuildTensorName(const char* tensorNamePrefix, std::size_t index)
88 {
89 return tensorNamePrefix + std::to_string(index);
90 }
91
IsPointerTypeMemory(const Request & request)92 bool IsPointerTypeMemory(const Request& request)
93 {
94 for (auto& input : request.inputs)
95 {
96 if (input.lifetime != Request::Argument::LifeTime::POINTER)
97 {
98 return false;
99 }
100 }
101
102 for (auto& output: request.outputs)
103 {
104 if (output.lifetime != Request::Argument::LifeTime::POINTER)
105 {
106 return false;
107 }
108 }
109
110 return true;
111 }
112
113 } // anonymous namespace
114
115 using namespace android::nn;
116
117 namespace armnn_driver
118 {
119
Init()120 void ArmnnPreparedModel::Init()
121 {
122 // Enable profiling if required.
123 m_Runtime->GetProfiler(m_NetworkId)->EnableProfiling(m_GpuProfilingEnabled);
124 }
125
ArmnnPreparedModel(armnn::NetworkId networkId,armnn::IRuntime * runtime,const Model & model,const std::string & requestInputsAndOutputsDumpDir,const bool gpuProfilingEnabled,Priority priority)126 ArmnnPreparedModel::ArmnnPreparedModel(armnn::NetworkId networkId,
127 armnn::IRuntime* runtime,
128 const Model& model,
129 const std::string& requestInputsAndOutputsDumpDir,
130 const bool gpuProfilingEnabled,
131 Priority priority)
132 : m_NetworkId(networkId)
133 , m_Runtime(runtime)
134 , m_Model(model)
135 , m_RequestInputsAndOutputsDumpDir(requestInputsAndOutputsDumpDir)
136 , m_GpuProfilingEnabled(gpuProfilingEnabled)
137 , m_ModelPriority(priority)
138 , m_PrepareFromCache(false)
139 {
140 Init();
141 }
142
ArmnnPreparedModel(armnn::NetworkId networkId,armnn::IRuntime * runtime,const std::string & requestInputsAndOutputsDumpDir,const bool gpuProfilingEnabled,Priority priority,const bool prepareModelFromCache)143 ArmnnPreparedModel::ArmnnPreparedModel(armnn::NetworkId networkId,
144 armnn::IRuntime* runtime,
145 const std::string& requestInputsAndOutputsDumpDir,
146 const bool gpuProfilingEnabled,
147 Priority priority,
148 const bool prepareModelFromCache)
149 : m_NetworkId(networkId)
150 , m_Runtime(runtime)
151 , m_RequestInputsAndOutputsDumpDir(requestInputsAndOutputsDumpDir)
152 , m_GpuProfilingEnabled(gpuProfilingEnabled)
153 , m_ModelPriority(priority)
154 , m_PrepareFromCache(prepareModelFromCache)
155 {
156 Init();
157 }
158
159
PrepareMemoryForInputs(armnn::InputTensors & inputs,const Request & request,const std::vector<android::nn::RunTimePoolInfo> & memPools) const160 ErrorStatus ArmnnPreparedModel::PrepareMemoryForInputs(
161 armnn::InputTensors& inputs,
162 const Request& request,
163 const std::vector<android::nn::RunTimePoolInfo>& memPools) const
164 {
165 inputs.reserve(request.inputs.size());
166 for (unsigned int i = 0; i < request.inputs.size(); i++)
167 {
168 const auto& inputArg = request.inputs[i];
169
170 armnn::TensorInfo inputTensorInfo = m_Runtime->GetInputTensorInfo(m_NetworkId, i);
171 // inputs (of type InputTensors) is composed of a vector of ConstTensors.
172 // Therefore, set all TensorInfo isConstant parameters of input Tensors to true.
173 inputTensorInfo.SetConstant();
174 const armnn::Tensor inputTensor = GetTensorForRequestArgument(inputArg, inputTensorInfo, memPools);
175
176 if (inputTensor.GetMemoryArea() == nullptr)
177 {
178 VLOG(DRIVER) << "Cannot execute request. Error converting request input " << i << "to tensor.";
179 return ErrorStatus::GENERAL_FAILURE;
180 }
181 inputs.emplace_back(i, inputTensor);
182 }
183
184 return ErrorStatus::NONE;
185 }
186
PrepareMemoryForOutputs(armnn::OutputTensors & outputs,std::vector<OutputShape> & outputShapes,const Request & request,const std::vector<android::nn::RunTimePoolInfo> & memPools) const187 ErrorStatus ArmnnPreparedModel::PrepareMemoryForOutputs(
188 armnn::OutputTensors& outputs,
189 std::vector<OutputShape> &outputShapes,
190 const Request& request,
191 const std::vector<android::nn::RunTimePoolInfo>& memPools) const
192 {
193 outputs.reserve(request.outputs.size());
194 for (unsigned int i = 0; i < request.outputs.size(); i++)
195 {
196 auto& outputArg = request.outputs[i];
197
198 armnn::TensorInfo outputTensorInfo = m_Runtime->GetOutputTensorInfo(m_NetworkId, i);
199 armnn::Tensor outputTensor = GetTensorForRequestArgument(outputArg, outputTensorInfo, memPools);
200 if (outputTensor.GetMemoryArea() == nullptr)
201 {
202 VLOG(DRIVER) << "Cannot execute request. Error converting request output " << i << "to tensor.";
203 return ErrorStatus::GENERAL_FAILURE;
204 }
205
206 const size_t outputSize = outputTensorInfo.GetNumBytes();
207
208 unsigned int count = 0;
209 std::for_each(outputArg.dimensions.begin(), outputArg.dimensions.end(), [&](auto dim)
210 {
211 if (dim != 0)
212 {
213 outputTensorInfo.GetShape()[count] = dim;
214 }
215 else
216 {
217 outputTensorInfo.GetShape()[count] = outputArg.dimensions.size();
218 }
219
220 count++;
221 });
222
223 outputs.emplace_back(i, outputTensor);
224 outputShapes[i] = ComputeShape(outputTensorInfo);
225
226 if (outputArg.location.length < outputSize)
227 {
228 VLOG(DRIVER) << "ArmnnPreparedModel::Execute failed outputArg.location.length "
229 << std::to_string(outputArg.location.length).c_str()
230 << " < outputSize " << std::to_string(outputSize).c_str();
231 outputShapes[i].isSufficient = false;
232 return ErrorStatus::OUTPUT_INSUFFICIENT_SIZE;
233 }
234
235 //TODO: Need to check for Request::Argument::LifeTime::POINTER
236 if (outputArg.lifetime == Request::Argument::LifeTime::POOL)
237 {
238 size_t bufferSize = memPools.at(outputArg.location.poolIndex).getSize();
239 if (bufferSize < outputSize)
240 {
241 VLOG(DRIVER) << "ArmnnPreparedModel::Execute failed bufferSize "
242 << std::to_string(outputArg.location.length).c_str()
243 << " < outputSize " << std::to_string(outputSize).c_str();
244 outputShapes[i].isSufficient = false;
245 return ErrorStatus::OUTPUT_INSUFFICIENT_SIZE;
246 }
247 }
248 }
249 return ErrorStatus::NONE;
250 }
251
PrepareMemoryForIO(armnn::InputTensors & inputs,armnn::OutputTensors & outputs,std::vector<android::nn::RunTimePoolInfo> & memPools,const Request & request,const bool pointerMemory) const252 ErrorStatus ArmnnPreparedModel::PrepareMemoryForIO(armnn::InputTensors& inputs,
253 armnn::OutputTensors& outputs,
254 std::vector<android::nn::RunTimePoolInfo>& memPools,
255 const Request& request,
256 const bool pointerMemory) const
257 {
258 //Check memory pools are not empty
259 // add the inputs and outputs with their data
260 try
261 {
262 if (!pointerMemory && !setRunTimePoolInfosFromMemoryPools(&memPools, request.pools))
263 {
264 return ErrorStatus::INVALID_ARGUMENT;
265 }
266
267 if (PrepareMemoryForInputs(inputs, request, memPools) != ErrorStatus::NONE)
268 {
269 VLOG(DRIVER) << "Failed when preparing memory for Inputs";
270 return ErrorStatus::GENERAL_FAILURE;
271 }
272
273 std::vector<OutputShape> outputShapes(request.outputs.size());
274
275 auto errorStatus = PrepareMemoryForOutputs(outputs, outputShapes, request, memPools);
276 if (errorStatus != ErrorStatus::NONE)
277 {
278 return errorStatus;
279 }
280 }
281 catch (armnn::Exception& e)
282 {
283 VLOG(DRIVER) << "armnn::Exception caught while preparing for EnqueueWorkload: " << e.what();
284 return ErrorStatus::GENERAL_FAILURE;
285 }
286 catch (std::exception& e)
287 {
288 VLOG(DRIVER) << "std::exception caught while preparing for EnqueueWorkload: " << e.what();
289 return ErrorStatus::GENERAL_FAILURE;
290 }
291
292 return ErrorStatus::NONE;
293 }
294
execute(const Request & request,MeasureTiming measureTiming,const OptionalTimePoint & deadline,const OptionalDuration &,const std::vector<android::nn::TokenValuePair> & hints,const std::vector<android::nn::ExtensionNameAndPrefix> & extensionNameToPrefix) const295 ExecutionResult<std::pair<std::vector<OutputShape>, Timing>> ArmnnPreparedModel::execute(
296 const Request& request,
297 MeasureTiming measureTiming,
298 const OptionalTimePoint& deadline,
299 const OptionalDuration&,
300 const std::vector<android::nn::TokenValuePair>& hints,
301 const std::vector<android::nn::ExtensionNameAndPrefix>& extensionNameToPrefix) const
302 {
303 VLOG(DRIVER) << "CanonicalDriver::PreparedModel::execute()";
304
305 CanonicalExecutionContext ctx;
306 if (measureTiming == MeasureTiming::YES)
307 {
308 ctx.measureTimings = measureTiming;
309 ctx.driverStart = Clock::now();
310 }
311
312 if (!m_PrepareFromCache)
313 {
314 const auto modelRequest = validateRequestForModel(request, m_Model);
315 if (!modelRequest.ok())
316 {
317 return NN_ERROR(ErrorStatus::INVALID_ARGUMENT) << modelRequest.error();
318 }
319 VLOG(DRIVER) << "ArmnnPreparedModel::execute(): " << GetModelSummary(m_Model).c_str();
320 }
321 if (hasDeadlinePassed(deadline))
322 {
323 return NN_ERROR(ErrorStatus::MISSED_DEADLINE_PERSISTENT);
324 }
325
326 // map the memory pool into shared pointers
327 // use a shared memory pools vector on the heap, as it is passed to the request thread
328 auto memPools = std::make_shared<std::vector<android::nn::RunTimePoolInfo>>();
329
330 // allocate the tensors on the heap, as they are passed to the request thread
331 auto inputTensors = std::make_shared<armnn::InputTensors>();
332 auto outputTensors = std::make_shared<armnn::OutputTensors>();
333
334 auto isPointerTypeMemory = IsPointerTypeMemory(request);
335 ErrorStatus theErrorStatus = PrepareMemoryForIO(*inputTensors,
336 *outputTensors,
337 *memPools,
338 request,
339 isPointerTypeMemory);
340
341 switch(theErrorStatus)
342 {
343 case ErrorStatus::OUTPUT_INSUFFICIENT_SIZE:
344 return NN_ERROR(ErrorStatus::OUTPUT_INSUFFICIENT_SIZE);
345 case ErrorStatus::GENERAL_FAILURE:
346 return NN_ERROR(ErrorStatus::GENERAL_FAILURE);
347 case ErrorStatus::INVALID_ARGUMENT:
348 return NN_ERROR(ErrorStatus::INVALID_ARGUMENT);
349 default:
350 {}
351 }
352
353 std::vector<OutputShape> outputShapes(outputTensors->size());
354 for (unsigned int i = 0; i < outputTensors->size(); i++)
355 {
356 std::pair<int, armnn::Tensor> outputTensorPair = (*outputTensors)[i];
357 const armnn::Tensor outputTensor = outputTensorPair.second;
358 const armnn::TensorInfo outputTensorInfo = outputTensor.GetInfo();
359
360 outputShapes[i] = ComputeShape(outputTensorInfo);
361 }
362 Timing theTiming;
363
364 VLOG(DRIVER) << "ArmnnPreparedModel::execute(...) before ExecuteGraph";
365 auto errorStatus = ExecuteGraph(memPools, *inputTensors, *outputTensors, ctx, isPointerTypeMemory);
366 if (errorStatus != ErrorStatus::NONE)
367 {
368 return NN_ERROR(errorStatus) << "execute() failed";
369 }
370 VLOG(DRIVER) << "ArmnnPreparedModel::execute(...) after ExecuteGraph";
371
372 return std::make_pair(outputShapes, theTiming);
373 }
374
ExecuteGraph(std::shared_ptr<std::vector<android::nn::RunTimePoolInfo>> & pMemPools,armnn::InputTensors & inputTensors,armnn::OutputTensors & outputTensors,CanonicalExecutionContext ctx,const bool pointerMemory) const375 ErrorStatus ArmnnPreparedModel::ExecuteGraph(
376 std::shared_ptr<std::vector<android::nn::RunTimePoolInfo>>& pMemPools,
377 armnn::InputTensors& inputTensors,
378 armnn::OutputTensors& outputTensors,
379 CanonicalExecutionContext ctx,
380 const bool pointerMemory) const
381 {
382 VLOG(DRIVER) << "ArmnnPreparedModel::ExecuteGraph(...)";
383
384 DumpTensorsIfRequired("Input", inputTensors);
385 std::vector<armnn::ImportedInputId> importedInputIds;
386 std::vector<armnn::ImportedOutputId> importedOutputIds;
387 try
388 {
389 if (ctx.measureTimings == MeasureTiming::YES)
390 {
391 ctx.deviceStart = Clock::now();
392 }
393 armnn::Status status;
394 VLOG(DRIVER) << "ArmnnPreparedModel::ExecuteGraph m_AsyncModelExecutionEnabled false";
395 importedInputIds = m_Runtime->ImportInputs(m_NetworkId, inputTensors, armnn::MemorySource::Malloc);
396 if (!importedInputIds.empty())
397 {
398 // Some or all of the input tensors been imported. We need to remove the ones that could from
399 // inputTensors.
400 for (armnn::ImportedInputId& importedId : importedInputIds)
401 {
402 inputTensors.erase(
403 std::remove_if(
404 inputTensors.begin(), inputTensors.end(),
405 [&importedId](std::pair<armnn::LayerBindingId, class armnn::ConstTensor>& element) {
406 return (element.first == static_cast<int>(importedId));
407 }),
408 inputTensors.end());
409 }
410 }
411 importedOutputIds = m_Runtime->ImportOutputs(m_NetworkId, outputTensors, armnn::MemorySource::Malloc);
412 if (!importedOutputIds.empty())
413 {
414 // Some or all of the output tensors could not be imported. We need to remove the ones that could
415 // from outputTensors.
416 for (armnn::ImportedInputId& importedId : importedOutputIds)
417 {
418 outputTensors.erase(
419 std::remove_if(
420 outputTensors.begin(), outputTensors.end(),
421 [&importedId](std::pair<armnn::LayerBindingId, class armnn::Tensor>& element) {
422 return (element.first == static_cast<int>(importedId));
423 }),
424 outputTensors.end());
425 }
426 }
427 status = m_Runtime->EnqueueWorkload(m_NetworkId,
428 inputTensors,
429 outputTensors,
430 importedInputIds,
431 importedOutputIds);
432
433 if (ctx.measureTimings == MeasureTiming::YES)
434 {
435 ctx.deviceEnd = Clock::now();
436 }
437 if (status != armnn::Status::Success)
438 {
439 VLOG(DRIVER) << "ArmnnPreparedModel:ExecuteGraph EnqueueWorkload failed";
440 return ErrorStatus::GENERAL_FAILURE;
441 }
442 }
443 catch (armnn::Exception& e)
444 {
445 VLOG(DRIVER) << "armnn:Exception caught from EnqueueWorkload: " << e.what();
446 return ErrorStatus::GENERAL_FAILURE;
447 }
448 catch (std::exception& e)
449 {
450 VLOG(DRIVER) << "std::exception caught from EnqueueWorkload: " << e.what();
451 return ErrorStatus::GENERAL_FAILURE;
452 }
453
454 if (!pointerMemory && (!importedInputIds.empty() || !importedOutputIds.empty()))
455 {
456 CommitPools(*pMemPools);
457 }
458 DumpTensorsIfRequired("Output", outputTensors);
459
460 if (ctx.measureTimings == MeasureTiming::YES)
461 {
462 ctx.driverEnd = Clock::now();
463 Timing timing;
464 timing.timeOnDevice = ctx.deviceEnd - ctx.deviceStart;
465 timing.timeInDriver = ctx.driverEnd - ctx.driverStart;
466 VLOG(DRIVER) << "ArmnnPreparedModel::execute timing - Device = "
467 << timing.timeOnDevice << "Driver = " << timing.timeInDriver;
468 }
469 return ErrorStatus::NONE;
470 }
471
GetModelPriority() const472 Priority ArmnnPreparedModel::GetModelPriority() const
473 {
474 return m_ModelPriority;
475 }
476
477
executeFenced(const Request & request,const std::vector<SyncFence> & waitFor,MeasureTiming measureTiming,const OptionalTimePoint & deadline,const OptionalDuration &,const OptionalDuration &,const std::vector<android::nn::TokenValuePair> & hints,const std::vector<android::nn::ExtensionNameAndPrefix> & extensionNameToPrefix) const478 GeneralResult<std::pair<SyncFence, ExecuteFencedInfoCallback>> ArmnnPreparedModel::executeFenced(
479 const Request& request,
480 const std::vector<SyncFence>& waitFor,
481 MeasureTiming measureTiming,
482 const OptionalTimePoint& deadline,
483 const OptionalDuration&,
484 const OptionalDuration&,
485 const std::vector<android::nn::TokenValuePair>& hints,
486 const std::vector<android::nn::ExtensionNameAndPrefix>& extensionNameToPrefix) const
487 {
488 VLOG(DRIVER) << "ArmnnPreparedModel::executeFenced()";
489
490 if (!m_PrepareFromCache) {
491 const auto modelRequest = validateRequestForModel(request, m_Model);
492 if (!modelRequest.ok())
493 {
494 return NN_ERROR(ErrorStatus::INVALID_ARGUMENT) << modelRequest.error();
495 }
496 VLOG(DRIVER) << "ArmnnPreparedModel::executeFenced(): " << GetModelSummary(m_Model).c_str();
497 }
498 if (hasDeadlinePassed(deadline))
499 {
500 return NN_ERROR(ErrorStatus::MISSED_DEADLINE_PERSISTENT);
501 }
502
503 CanonicalExecutionContext ctx;
504 if (measureTiming == MeasureTiming::YES)
505 {
506 ctx.measureTimings = measureTiming;
507 ctx.driverStart = Clock::now();
508 }
509
510 // Wait for the dependent events to signal
511 for (const auto& syncFence : waitFor)
512 {
513 if (!syncFence.getSharedHandle())
514 {
515 return NN_ERROR(ErrorStatus::INVALID_ARGUMENT);
516 }
517 if (syncFence.syncWait({}) != SyncFence::FenceState::SIGNALED)
518 {
519 return NN_ERROR(ErrorStatus::GENERAL_FAILURE) << "syncWait failed";
520 }
521 }
522
523 android::nn::TimePoint fenceExecutionStart;
524 if (measureTiming == MeasureTiming::YES)
525 {
526 fenceExecutionStart = Clock::now();
527 }
528
529 // map the memory pool into shared pointers
530 // use a shared memory pools vector on the heap, as it is passed to the request thread
531 auto memPools = std::make_shared<std::vector<android::nn::RunTimePoolInfo>>();
532
533 // allocate the tensors on the heap, as they are passed to the request thread
534 auto inputTensors = std::make_shared<armnn::InputTensors>();
535 auto outputTensors = std::make_shared<armnn::OutputTensors>();
536
537 auto isPointerTypeMemory = IsPointerTypeMemory(request);
538 ErrorStatus theErrorStatus = PrepareMemoryForIO(*inputTensors,
539 *outputTensors,
540 *memPools,
541 request,
542 isPointerTypeMemory);
543
544 if (theErrorStatus != ErrorStatus::NONE)
545 {
546 return NN_ERROR(ErrorStatus::INVALID_ARGUMENT) << "executeFenced() failed";
547 }
548
549 Timing timingSinceLaunch = {};
550 Timing timingAfterFence = {};
551 if (measureTiming == MeasureTiming::YES)
552 {
553 timingAfterFence.timeOnDevice = ctx.deviceEnd - ctx.deviceStart;
554 timingAfterFence.timeInDriver = ctx.driverEnd - fenceExecutionStart;
555 VLOG(DRIVER) << "executeFenced timingSinceLaunch = " << timingAfterFence.timeOnDevice;
556 VLOG(DRIVER) << "executeFenced timingAfterFence = " << timingAfterFence.timeInDriver;
557 }
558
559 VLOG(DRIVER) << "ArmnnCanonicalPreparedModel::executeFenced(...) before ExecuteGraph";
560 auto errorStatus = ExecuteGraph(memPools, *inputTensors, *outputTensors, ctx, isPointerTypeMemory);
561 VLOG(DRIVER) << "ArmnnCanonicalPreparedModel::executeFenced(...) after ExecuteGraph";
562
563 ExecuteFencedInfoCallback armnnFencedExecutionCallback =
564 [timingSinceLaunch, timingAfterFence, errorStatus]() {
565
566 GeneralResult<std::pair<Timing, Timing>> result;
567
568 switch(errorStatus)
569 {
570 case ErrorStatus::OUTPUT_INSUFFICIENT_SIZE:
571 result.error().code = (ErrorStatus::OUTPUT_INSUFFICIENT_SIZE);
572 [[fallthrough]];
573 case ErrorStatus::GENERAL_FAILURE:
574 result.error().code = (ErrorStatus::GENERAL_FAILURE);
575 [[fallthrough]];
576 case ErrorStatus::INVALID_ARGUMENT:
577 result.error().code = (ErrorStatus::INVALID_ARGUMENT);
578 [[fallthrough]];
579 default:
580 {
581 result.value() = std::make_pair(timingSinceLaunch, timingAfterFence);
582 }
583 }
584 return result;
585 };
586 return std::make_pair(SyncFence::createAsSignaled(), std::move(armnnFencedExecutionCallback ));
587 }
588
createReusableExecution(const Request & request,MeasureTiming measureTiming,const OptionalDuration & loopTimeoutDuration,const std::vector<android::nn::TokenValuePair> & hints,const std::vector<android::nn::ExtensionNameAndPrefix> & extensionNameToPrefix) const589 GeneralResult<SharedExecution> ArmnnPreparedModel::createReusableExecution(
590 const Request& request,
591 MeasureTiming measureTiming,
592 const OptionalDuration& loopTimeoutDuration,
593 const std::vector<android::nn::TokenValuePair>& hints,
594 const std::vector<android::nn::ExtensionNameAndPrefix>& extensionNameToPrefix) const
595 {
596 VLOG(DRIVER) << "ArmnnPreparedModel::createReusableExecution()";
597 return std::make_shared<DefaultExecution>(shared_from_this(),
598 request,
599 measureTiming,
600 loopTimeoutDuration);
601 }
602
configureExecutionBurst() const603 GeneralResult<SharedBurst> ArmnnPreparedModel::configureExecutionBurst() const
604 {
605 // TODO: Implement BURST
606 return nullptr;
607 }
608
getUnderlyingResource() const609 std::any ArmnnPreparedModel::getUnderlyingResource() const
610 {
611 return &m_Model;
612 }
613
614 template<typename TensorBindingCollection>
DumpTensorsIfRequired(char const * tensorNamePrefix,const TensorBindingCollection & tensorBindings) const615 void ArmnnPreparedModel::DumpTensorsIfRequired(char const* tensorNamePrefix,
616 const TensorBindingCollection& tensorBindings) const
617 {
618 if (!m_RequestInputsAndOutputsDumpDir.empty())
619 {
620 const std::string requestName = std::to_string(m_NetworkId) + ".dump";
621 for (std::size_t i = 0u; i < tensorBindings.size(); ++i)
622 {
623 DumpTensor(m_RequestInputsAndOutputsDumpDir,
624 requestName,
625 BuildTensorName(tensorNamePrefix, i),
626 tensorBindings[i].second);
627 }
628 }
629 }
630
~ArmnnPreparedModel()631 ArmnnPreparedModel::~ArmnnPreparedModel()
632 {
633 VLOG(DRIVER) << "ArmnnPreparedModel::~ArmnnPreparedModel()";
634 // Get a hold of the profiler used by this model.
635 if (m_GpuProfilingEnabled)
636 {
637 auto profiler = m_Runtime->GetProfiler(m_NetworkId);
638 if (profiler)
639 {
640 // Dump the profiling info to a file if required.
641 DumpJsonProfilingIfRequired(m_GpuProfilingEnabled,
642 m_RequestInputsAndOutputsDumpDir,
643 m_NetworkId,
644 profiler.get());
645 }
646 }
647 // Unload the network associated with this model
648 m_Runtime->UnloadNetwork(m_NetworkId);
649 }
650
ExecuteWithDummyInputs(unsigned int numInputs,unsigned int numOutputs) const651 bool ArmnnPreparedModel::ExecuteWithDummyInputs(unsigned int numInputs, unsigned int numOutputs) const
652 {
653 std::vector<std::vector<char>> storage;
654 armnn::InputTensors inputTensors;
655 for (unsigned int i = 0; i < numInputs; i++)
656 {
657 armnn::TensorInfo inputTensorInfo = m_Runtime->GetInputTensorInfo(m_NetworkId, i);
658 // pInputTensors (of type InputTensors) is composed of a vector of ConstTensors.
659 // Therefore, set all TensorInfo isConstant parameters of input Tensors to true.
660 inputTensorInfo.SetConstant();
661 storage.emplace_back(inputTensorInfo.GetNumBytes());
662 const armnn::ConstTensor inputTensor(inputTensorInfo, storage.back().data());
663
664 inputTensors.emplace_back(i, inputTensor);
665 }
666
667 armnn::OutputTensors outputTensors;
668 for (unsigned int i = 0; i < numOutputs; i++)
669 {
670 const armnn::TensorInfo outputTensorInfo = m_Runtime->GetOutputTensorInfo(m_NetworkId, i);
671 storage.emplace_back(outputTensorInfo.GetNumBytes());
672 const armnn::Tensor outputTensor(outputTensorInfo, storage.back().data());
673
674 outputTensors.emplace_back(i, outputTensor);
675 }
676 CanonicalExecutionContext ctx;
677 ctx.measureTimings = MeasureTiming::NO;
678 auto memPools = std::make_shared<std::vector<::android::nn::RunTimePoolInfo>>();
679
680 auto errorStatus = ExecuteGraph(memPools,
681 inputTensors,
682 outputTensors,
683 ctx);
684
685 return errorStatus == ErrorStatus::NONE;
686 }
687
688 } // namespace armnn_driver
689