xref: /aosp_15_r20/external/armnn/shim/sl/canonical/ArmnnDriverImpl.cpp (revision 89c4ff92f2867872bb9e2354d150bf0c8c502810)
1 //
2 // Copyright © 2022-2023 Arm Ltd and Contributors. All rights reserved.
3 // SPDX-License-Identifier: MIT
4 //
5 
6 #include "ArmnnDriverImpl.hpp"
7 #include "ArmnnPreparedModel.hpp"
8 #include "ModelToINetworkTransformer.hpp"
9 #include "SystemPropertiesUtils.hpp"
10 
11 #include <armnnDeserializer/IDeserializer.hpp>
12 
13 #include <log/log.h>
14 #include <sys/stat.h>
15 
16 namespace
17 {
18 
GenerateCapabilities()19 Capabilities GenerateCapabilities()
20 {
21     VLOG(DRIVER) << "ArmnnDriverImpl::GenerateCapabilities()";
22 
23     float defaultPerfValue = .1f;
24     const Capabilities::PerformanceInfo defaultPerfInfo = { /* execTime */ defaultPerfValue,
25                                                             /* powerUsage */ defaultPerfValue
26                                                           };
27     std::vector<OperandType> operandsTypes({
28                 OperandType::FLOAT32,
29                 OperandType::INT32,
30                 OperandType::UINT32,
31                 OperandType::TENSOR_FLOAT32,
32                 OperandType::TENSOR_INT32,
33                 OperandType::TENSOR_QUANT8_ASYMM,
34                 OperandType::BOOL,
35                 OperandType::TENSOR_QUANT16_SYMM,
36                 OperandType::TENSOR_FLOAT16,
37                 OperandType::TENSOR_BOOL8,
38                 OperandType::FLOAT16,
39                 OperandType::TENSOR_QUANT8_SYMM_PER_CHANNEL,
40                 OperandType::TENSOR_QUANT16_ASYMM,
41                 OperandType::TENSOR_QUANT8_SYMM,
42                 OperandType::TENSOR_QUANT8_ASYMM_SIGNED,
43     });
44 
45     std::vector<Capabilities::OperandPerformance> operandPerformances;
46     operandPerformances.reserve(operandsTypes.size());
47 
48     for (auto opType : operandsTypes)
49     {
50         operandPerformances.push_back(
51                 Capabilities::OperandPerformance{ /* type */ opType, /* info */ defaultPerfInfo });
52     }
53 
54     auto operandPerformanceTable =
55                Capabilities::OperandPerformanceTable::create(std::move(operandPerformances)).value();
56 
57     return { /* relaxedFloat32toFloat16PerformanceScalar */ defaultPerfInfo,
58              /* relaxedFloat32toFloat16PerformanceTensor */ defaultPerfInfo,
59              /* operandPerformance */ std::move(operandPerformanceTable),
60              /* ifPerformance */ defaultPerfInfo,
61              /* whilePerformance */ defaultPerfInfo };
62 }
63 
Hash(std::vector<uint8_t> & cacheData)64 size_t Hash(std::vector<uint8_t>& cacheData)
65 {
66     std::size_t hash = cacheData.size();
67     for (auto& i : cacheData)
68     {
69         hash = ((hash << 5) - hash) + i;
70     }
71     return hash;
72 }
73 
74 } // anonymous namespace
75 
76 using namespace android::nn;
77 
78 namespace armnn_driver
79 {
80 
ValidateSharedHandle(const SharedHandle & sharedHandle)81 bool ArmnnDriverImpl::ValidateSharedHandle(const SharedHandle& sharedHandle)
82 {
83     bool valid = true;
84 
85     if (*sharedHandle < 0)
86     {
87         return !valid;
88     }
89 
90     int dataCacheFileAccessMode = fcntl(*sharedHandle, F_GETFL) & O_ACCMODE;
91     if (dataCacheFileAccessMode != O_RDWR)
92     {
93         return !valid;
94     }
95 
96     return valid;
97 }
98 
PrepareArmnnModel(const armnn::IRuntimePtr & runtime,const armnn::IGpuAccTunedParametersPtr & clTunedParameters,const DriverOptions & options,const Model & model,const std::vector<SharedHandle> & modelCacheHandle,const std::vector<SharedHandle> & dataCacheHandle,const CacheToken & token,bool float32ToFloat16,Priority priority)99 GeneralResult<SharedPreparedModel> ArmnnDriverImpl::PrepareArmnnModel(
100     const armnn::IRuntimePtr& runtime,
101     const armnn::IGpuAccTunedParametersPtr& clTunedParameters,
102     const DriverOptions& options,
103     const Model& model,
104     const std::vector<SharedHandle>& modelCacheHandle,
105     const std::vector<SharedHandle>& dataCacheHandle,
106     const CacheToken& token,
107     bool float32ToFloat16,
108     Priority priority)
109 {
110     VLOG(DRIVER) << "ArmnnDriverImpl::PrepareArmnnModel()";
111 
112     if (!runtime)
113     {
114         return NN_ERROR(ErrorStatus::DEVICE_UNAVAILABLE) << "Device unavailable";
115     }
116 
117     if (const auto result = validate(model); !result.ok())
118     {
119         return NN_ERROR(ErrorStatus::INVALID_ARGUMENT) << "Invalid model passed as input";
120     }
121 
122     // Deliberately ignore any unsupported operations requested by the options -
123     // at this point we're being asked to prepare a model that we've already declared support for
124     // and the operation indices may be different to those in getSupportedOperations anyway.
125     std::set<unsigned int> unsupportedOperations;
126     ModelToINetworkTransformer modelConverter(options.GetBackends(),
127                                               model,
128                                               unsupportedOperations);
129 
130     if (modelConverter.GetConversionResult() != ConversionResult::Success)
131     {
132         return NN_ERROR(ErrorStatus::GENERAL_FAILURE) << "ModelToINetworkConverter failed";
133     }
134 
135     // Serialize the network graph to a .armnn file if an output directory
136     // has been specified in the drivers' arguments.
137     std::vector<uint8_t> dataCacheData;
138     bool serializeToFile = dataCacheHandle.size() < 1 ? false : true;
139     auto serializedNetworkFileName =
140             SerializeNetwork(*modelConverter.GetINetwork(),
141                              options.GetRequestInputsAndOutputsDumpDir(),
142                              dataCacheData,
143                              serializeToFile);
144 
145     // Optimize the network
146     armnn::IOptimizedNetworkPtr optNet(nullptr, nullptr);
147     armnn::OptimizerOptionsOpaque OptOptions;
148     OptOptions.SetReduceFp32ToFp16(float32ToFloat16);
149     OptOptions.SetProfilingEnabled(options.IsGpuProfilingEnabled());
150 
151     int cachedFd = -1;
152     bool saveCachedNetwork = options.SaveCachedNetwork();
153 
154     unsigned int numberOfCachedModelFiles = 0;
155     if (modelCacheHandle.size() > 0)
156     {
157         unsigned int index = 0;
158         for (auto& backend : options.GetBackends())
159         {
160             // modelCacheHandle size should be equal to numberOfCachedModelFiles
161             // modelCacheHandle vector should be in same order as backends
162             auto numberOfCacheFiles = GetNumberOfCacheFiles(backend);
163             if (numberOfCacheFiles > 0)
164             {
165                 numberOfCachedModelFiles += numberOfCacheFiles;
166                 // For GpuAcc numberOfCachedFiles is 1
167                 if (backend == armnn::Compute::GpuAcc)
168                 {
169                     cachedFd = *modelCacheHandle[index];
170                     saveCachedNetwork = true;
171                 }
172                 index += numberOfCachedModelFiles;
173             }
174         }
175     }
176 
177     armnn::BackendOptions gpuAcc("GpuAcc",
178     {
179         { "FastMathEnabled", options.IsFastMathEnabled() },
180         { "SaveCachedNetwork", saveCachedNetwork },
181         { "CachedNetworkFilePath", options.GetCachedNetworkFilePath() },
182         { "MLGOTuningFilePath", options.GetClMLGOTunedParametersFile() },
183         { "CachedFileDescriptor", cachedFd }
184     });
185 
186     armnn::BackendOptions cpuAcc("CpuAcc",
187     {
188         { "FastMathEnabled", options.IsFastMathEnabled() },
189         { "NumberOfThreads", options.GetNumberOfThreads() }
190     });
191     OptOptions.AddModelOption(gpuAcc);
192     OptOptions.AddModelOption(cpuAcc);
193 
194     std::vector<std::string> errMessages;
195     try
196     {
197         optNet = armnn::Optimize(*modelConverter.GetINetwork(),
198                                  options.GetBackends(),
199                                  runtime->GetDeviceSpec(),
200                                  OptOptions,
201                                  errMessages);
202     }
203     catch (std::exception& e)
204     {
205         return NN_ERROR(ErrorStatus::GENERAL_FAILURE) << e.what();
206     }
207 
208     // Check that the optimized network is valid.
209     if (!optNet)
210     {
211         std::stringstream message;
212         message << "Invalid optimized network";
213         for (const std::string& msg : errMessages)
214         {
215             message << "\n" << msg;
216         }
217         return NN_ERROR(ErrorStatus::GENERAL_FAILURE) << message.str();
218     }
219 
220     // Export the optimized network graph to a dot file if an output dump directory
221     // has been specified in the drivers' arguments.
222     std::string dotGraphFileName = ExportNetworkGraphToDotFile(*optNet,
223                                                                options.GetRequestInputsAndOutputsDumpDir());
224 
225     // Load it into the runtime.
226     armnn::NetworkId netId = 0;
227     std::string msg;
228     armnn::INetworkProperties networkProperties(options.isAsyncModelExecutionEnabled(),
229                                                 MemorySource::Undefined,
230                                                 MemorySource::Undefined,
231                                                 options.IsGpuProfilingEnabled());
232     auto numInputs  = getMainModel(model).inputIndexes.size();
233     auto numOutputs = getMainModel(model).outputIndexes.size();
234     try
235     {
236         if (runtime->LoadNetwork(netId, move(optNet), msg, networkProperties) != armnn::Status::Success)
237         {
238             return NN_ERROR(ErrorStatus::GENERAL_FAILURE) << "Network could not be loaded";
239         }
240     }
241     catch (std::exception& e)
242     {
243         std::stringstream message;
244         message << "Exception (" << e.what()<< ") caught from LoadNetwork.";
245         return NN_ERROR(ErrorStatus::GENERAL_FAILURE) << message.str();
246     }
247 
248     // Now that we have a networkId for the graph rename the exported files to use it
249     // so that we can associate the graph file and the input/output tensor exported files
250     RenameExportedFiles(serializedNetworkFileName,
251                         dotGraphFileName,
252                         options.GetRequestInputsAndOutputsDumpDir(),
253                         netId);
254 
255     // Cache the model
256     size_t hashValue = 0;
257     if (dataCacheHandle.size() == 1 )
258     {
259         hashValue = Hash(dataCacheData);
260     }
261 
262     // Cache the model data
263     if (modelCacheHandle.size() > 0)
264     {
265         if (modelCacheHandle.size() == numberOfCachedModelFiles)
266         {
267             for (uint32_t i = 0; i < modelCacheHandle.size(); ++i)
268             {
269                 int modelCacheFileAccessMode = fcntl(*modelCacheHandle[i], F_GETFL) & O_ACCMODE;
270                 if (modelCacheFileAccessMode != O_RDONLY)
271                 {
272                     struct stat statBuffer;
273                     if (fstat(*modelCacheHandle[i], &statBuffer) == 0)
274                     {
275                         long modelDataSize = statBuffer.st_size;
276                         if (modelDataSize > 0)
277                         {
278                             std::vector<uint8_t> modelData(modelDataSize);
279                             pread(*modelCacheHandle[i], modelData.data(), modelData.size(), 0);
280                             hashValue ^= Hash(modelData);
281                         }
282                     }
283                 }
284             }
285         }
286     }
287     if (dataCacheHandle.size() == 1 && hashValue != 0)
288     {
289         std::vector<uint8_t> theHashValue(sizeof(hashValue));
290         ::memcpy(theHashValue.data(), &hashValue, sizeof(hashValue));
291 
292         write(*dataCacheHandle[0], theHashValue.data(), theHashValue.size());
293         pwrite(*dataCacheHandle[0], dataCacheData.data(), dataCacheData.size(), theHashValue.size());
294     }
295 
296     bool executeWithDummyInputs = (std::find(options.GetBackends().begin(),
297                                             options.GetBackends().end(),
298                                             armnn::Compute::GpuAcc) != options.GetBackends().end());
299 
300     auto preparedModel = std::make_shared<const ArmnnPreparedModel>(netId,
301                                                                     runtime.get(),
302                                                                     model,
303                                                                     options.GetRequestInputsAndOutputsDumpDir(),
304                                                                     options.IsGpuProfilingEnabled(),
305                                                                     priority);
306 
307     // Run a single 'dummy' inference of the model. This means that CL kernels will get compiled (and tuned if
308     // this is enabled) before the first 'real' inference which removes the overhead of the first inference.
309     // Only run this if the GpuAcc backend has been added to options
310     if (std::find(options.GetBackends().begin(),
311                   options.GetBackends().end(),
312                   armnn::Compute::GpuAcc) != options.GetBackends().end())
313     {
314         if (!preparedModel->ExecuteWithDummyInputs(numInputs, numOutputs))
315         {
316             return NN_ERROR(ErrorStatus::GENERAL_FAILURE) << "Network could not be executed";
317         }
318 
319         if (clTunedParameters &&
320             options.GetClTunedParametersMode() == armnn::IGpuAccTunedParameters::Mode::UpdateTunedParameters)
321         {
322             // Now that we've done one inference the CL kernel parameters will have been tuned,
323             // so save the updated file.
324             try
325             {
326                 clTunedParameters->Save(options.GetClTunedParametersFile().c_str());
327             }
328             catch (std::exception& error)
329             {
330                 VLOG(DRIVER) << "ArmnnDriverImpl::prepareModel: Failed to save CL tuned parameters file"
331                              << options.GetClTunedParametersFile().c_str() << error.what();
332             }
333         }
334     }
335     return std::move(preparedModel);
336 }
337 
PrepareArmnnModelFromCache(const armnn::IRuntimePtr & runtime,const armnn::IGpuAccTunedParametersPtr & clTunedParameters,const DriverOptions & options,const std::vector<SharedHandle> & modelCacheHandle,const std::vector<SharedHandle> & dataCacheHandle,const CacheToken & token,bool float32ToFloat16)338 GeneralResult<SharedPreparedModel> ArmnnDriverImpl::PrepareArmnnModelFromCache(
339     const armnn::IRuntimePtr& runtime,
340     const armnn::IGpuAccTunedParametersPtr& clTunedParameters,
341     const DriverOptions& options,
342     const std::vector<SharedHandle>& modelCacheHandle,
343     const std::vector<SharedHandle>& dataCacheHandle,
344     const CacheToken& token,
345     bool float32ToFloat16)
346 {
347     VLOG(DRIVER) << "ArmnnDriverImpl::PrepareArmnnModelFromCache()";
348 
349     if (!runtime)
350     {
351         return NN_ERROR(ErrorStatus::DEVICE_UNAVAILABLE)
352                             << "ArmnnDriverImpl::prepareModelFromCache(): Device unavailable";
353     }
354 
355     if (token.size() != ANEURALNETWORKS_BYTE_SIZE_OF_CACHE_TOKEN)
356     {
357         return NN_ERROR(ErrorStatus::GENERAL_FAILURE)
358                             << "ArmnnDriverImpl::prepareModelFromCache(): Token size does not match!";
359     }
360 
361     // Validate dataCacheHandle
362     if (dataCacheHandle.size() != 1)
363     {
364         return NN_ERROR(ErrorStatus::GENERAL_FAILURE)
365                             << "ArmnnDriverImpl::prepareModelFromCache(): Not valid data cache handle!";
366     }
367 
368     if (!ValidateSharedHandle(dataCacheHandle[0]))
369     {
370         return NN_ERROR(ErrorStatus::GENERAL_FAILURE)
371                 << "ArmnnDriverImpl::prepareModelFromCache(): Not valid data cache handle!";
372     }
373 
374     size_t cachedDataSize = 0;
375     struct stat dataStatBuffer;
376     if (fstat(*dataCacheHandle[0], &dataStatBuffer) == 0)
377     {
378         cachedDataSize = dataStatBuffer.st_size;
379     }
380     if (cachedDataSize == 0)
381     {
382         return NN_ERROR(ErrorStatus::GENERAL_FAILURE)
383                 << "ArmnnDriverImpl::prepareModelFromCache(): Not valid cached data!";
384     }
385 
386     // Check if model files cached they match the expected value
387     unsigned int numberOfCachedModelFiles = 0;
388     for (auto& backend : options.GetBackends())
389     {
390         numberOfCachedModelFiles += GetNumberOfCacheFiles(backend);
391     }
392     if (modelCacheHandle.size() != numberOfCachedModelFiles)
393     {
394         return NN_ERROR(ErrorStatus::GENERAL_FAILURE)
395                            << "ArmnnDriverImpl::prepareModelFromCache(): Model cache handle size does not match.";
396     }
397 
398     // Read the hashValue
399     std::vector<uint8_t> hashValue(sizeof(size_t));
400     pread(*dataCacheHandle[0], hashValue.data(), hashValue.size(), 0);
401 
402     // Read the model
403     if (cachedDataSize < hashValue.size())
404     {
405         return NN_ERROR(ErrorStatus::GENERAL_FAILURE)
406                 << "ArmnnDriverImpl::prepareModelFromCache(): cachedDataSize is less than hashValue!";
407     }
408     std::vector<uint8_t> dataCacheData(cachedDataSize - hashValue.size());
409     pread(*dataCacheHandle[0], dataCacheData.data(), dataCacheData.size(), hashValue.size());
410     auto calculatedHashValue = Hash(dataCacheData);
411 
412     int gpuAccCachedFd = -1;
413     if (modelCacheHandle.size() > 0)
414     {
415         unsigned int index = 0;
416         for (auto& backend : options.GetBackends())
417         {
418             // modelCacheHandle size should be equal to numberOfCachedModelFiles
419             // modelCacheHandle vector should be in same order as backends
420             auto numberOfCacheFiles = GetNumberOfCacheFiles(backend);
421             if (numberOfCacheFiles > 0)
422             {
423                 if (!ValidateSharedHandle(modelCacheHandle[index]))
424                 {
425                     return NN_ERROR(ErrorStatus::GENERAL_FAILURE)
426                             << "ArmnnDriverImpl::prepareModelFromCache(): Invalid model cache handle!";
427                 }
428                 int cachedFd = *modelCacheHandle[index];
429                 struct stat statBuffer;
430                 if (fstat(cachedFd, &statBuffer) == 0)
431                 {
432                     long modelDataSize = statBuffer.st_size;
433                     if (modelDataSize > 0)
434                     {
435                         std::vector<uint8_t> modelData(modelDataSize);
436                         pread(cachedFd, modelData.data(), modelData.size(), 0);
437                         calculatedHashValue ^= Hash(modelData);
438 
439                         if (backend == armnn::Compute::GpuAcc)
440                         {
441                             gpuAccCachedFd = cachedFd;
442                         }
443                     }
444                 }
445                 index += numberOfCacheFiles;
446             }
447         }
448     }
449 
450     std::vector<uint8_t> calculatedHashData(sizeof(calculatedHashValue));
451     ::memcpy(calculatedHashData.data(), &calculatedHashValue, sizeof(calculatedHashValue));
452     if (hashValue != calculatedHashData)
453     {
454         return NN_ERROR(ErrorStatus::GENERAL_FAILURE)
455                 << "ArmnnDriverImpl::prepareModelFromCache(): ValidateHash() failed!";
456     }
457 
458     // Deserialize the network..
459     armnn::INetworkPtr network = armnn::INetworkPtr(nullptr, [](armnn::INetwork*){});
460     try
461     {
462         network = armnnDeserializer::IDeserializer::Create()->CreateNetworkFromBinary(dataCacheData);
463     }
464     catch (std::exception&)
465     {
466         return NN_ERROR(ErrorStatus::GENERAL_FAILURE)
467                 << "ArmnnDriverImpl::prepareModelFromCache(): Exception caught from Deserializer!";
468     }
469 
470     // Optimize the network
471     armnn::IOptimizedNetworkPtr optNet(nullptr, nullptr);
472     armnn::OptimizerOptionsOpaque OptOptions;
473     OptOptions.SetReduceFp32ToFp16(float32ToFloat16);
474     OptOptions.SetProfilingEnabled(options.IsGpuProfilingEnabled());
475 
476     armnn::BackendOptions gpuAcc("GpuAcc",
477     {
478         { "FastMathEnabled", options.IsFastMathEnabled() },
479         { "SaveCachedNetwork", false },
480         { "CachedNetworkFilePath", options.GetCachedNetworkFilePath() },
481         { "MLGOTuningFilePath", options.GetClMLGOTunedParametersFile() },
482         { "CachedFileDescriptor", gpuAccCachedFd }
483     });
484 
485     armnn::BackendOptions cpuAcc("CpuAcc",
486     {
487         { "FastMathEnabled", options.IsFastMathEnabled() },
488         { "NumberOfThreads", options.GetNumberOfThreads() }
489     });
490     OptOptions.AddModelOption(gpuAcc);
491     OptOptions.AddModelOption(cpuAcc);
492 
493     std::vector<std::string> errMessages;
494     try
495     {
496         optNet = armnn::Optimize(*network.get(),
497                                  options.GetBackends(),
498                                  runtime->GetDeviceSpec(),
499                                  OptOptions,
500                                  errMessages);
501     }
502     catch (std::exception& e)
503     {
504         return NN_ERROR(ErrorStatus::GENERAL_FAILURE) << e.what();
505     }
506 
507     // Check that the optimized network is valid.
508     if (!optNet)
509     {
510         std::stringstream message;
511         message << "Invalid optimized network";
512         for (const std::string& msg : errMessages)
513         {
514             message << "\n" << msg;
515         }
516         return NN_ERROR(ErrorStatus::GENERAL_FAILURE) << message.str();
517     }
518 
519     // Export the optimized network graph to a dot file if an output dump directory
520     // has been specified in the drivers' arguments.
521     std::string dotGraphFileName = ExportNetworkGraphToDotFile(*optNet,
522                                                                options.GetRequestInputsAndOutputsDumpDir());
523 
524     // Load it into the runtime.
525     armnn::NetworkId netId = 0;
526     std::string msg;
527     armnn::INetworkProperties networkProperties(options.isAsyncModelExecutionEnabled(),
528                                                 MemorySource::Undefined,
529                                                 MemorySource::Undefined,
530                                                 options.IsGpuProfilingEnabled());
531     try
532     {
533         if (runtime->LoadNetwork(netId, move(optNet), msg, networkProperties) != armnn::Status::Success)
534         {
535             return NN_ERROR(ErrorStatus::GENERAL_FAILURE) << "Network could not be loaded";
536         }
537     }
538     catch (std::exception& e)
539     {
540         std::stringstream message;
541         message << "Exception (" << e.what()<< ") caught from LoadNetwork.";
542         return NN_ERROR(ErrorStatus::GENERAL_FAILURE) << message.str();
543     }
544 
545     auto preparedModel = std::make_shared<const ArmnnPreparedModel>(netId,
546                                                       runtime.get(),
547                                                       options.GetRequestInputsAndOutputsDumpDir(),
548                                                       options.IsGpuProfilingEnabled(),
549                                                       Priority::MEDIUM,
550                                                       true);
551     return std::move(preparedModel);
552 }
553 
GetCapabilities(const armnn::IRuntimePtr & runtime)554 const Capabilities& ArmnnDriverImpl::GetCapabilities(const armnn::IRuntimePtr& runtime)
555 {
556     VLOG(DRIVER) << "ArmnnDriverImpl::GetCapabilities()";
557     static const Capabilities theCapabilities = GenerateCapabilities();
558     return theCapabilities;
559 }
560 
561 } // namespace armnn_driver
562