1 /* 2 * Copyright (c) 2024 MediaTek Inc. 3 * 4 * Licensed under the BSD License (the "License"); you may not use this file 5 * except in compliance with the License. See the license file in the root 6 * directory of this source tree for more details. 7 */ 8 9 #pragma once 10 11 #include "NeuronBufferAllocator.h" 12 #include "NeuronExecutor.h" 13 #include "NeuronLog.h" 14 #include "NeuronPayloadHeader.h" 15 #include "api/APUWareUtilsLib.h" 16 #include "api/NeuronAdapter.h" 17 18 #include <executorch/runtime/backend/interface.h> 19 #include <executorch/runtime/core/error.h> 20 #include <executorch/runtime/core/evalue.h> 21 22 #include <memory> 23 #include <unordered_map> 24 #include <unordered_set> 25 26 namespace executorch { 27 namespace backends { 28 namespace neuron { 29 30 class NeuronBackend final : public ::executorch::runtime::BackendInterface { 31 public: 32 ::executorch::runtime::Result<::executorch::runtime::DelegateHandle*> init( 33 ::executorch::runtime::BackendInitContext& context, 34 ::executorch::runtime::FreeableBuffer* processed, 35 ::executorch::runtime::ArrayRef<::executorch::runtime::CompileSpec> 36 compile_specs) const override; 37 38 ::executorch::runtime::Error execute( 39 ET_UNUSED ::executorch::runtime::BackendExecutionContext& context, 40 ::executorch::runtime::DelegateHandle* handle, 41 ::executorch::runtime::EValue** args) const override; 42 43 void destroy(::executorch::runtime::DelegateHandle* handle) const override; 44 45 bool is_available() const override; 46 }; 47 48 extern const char kHighAddrKey[]; 49 extern const char kImportForeverKey[]; 50 51 struct NeuronDelegateSetting { 52 bool mHighAddr = false; 53 54 bool mImportForever = false; 55 ToRuntimeOptionNeuronDelegateSetting56 std::string ToRuntimeOption() { 57 if (mHighAddr && mImportForever) { 58 return "--apusys-config \"{ \\\"high_addr\\\": true, \\\"import_forever\\\": true }\""; 59 } else if (mHighAddr) { 60 return "--apusys-config \"{ \\\"high_addr\\\": true }\""; 61 } else if (mImportForever) { 62 return "--apusys-config \"{ \\\"import_forever\\\": true }\""; 63 } else { 64 return ""; 65 } 66 } 67 }; 68 69 class NeuronExecuTorchDelegate { 70 public: 71 class MemoryCache { 72 public: 73 template <bool isInput> IsCached(int i,void * ptr)74 bool IsCached(int i, void* ptr) { 75 const auto& cache = isInput ? mInputCache : mOutputCache; 76 auto it = cache.find(i); 77 return (it != cache.end()) && (ptr == it->second); 78 } 79 80 template <bool isInput> UpdateCache(int i,void * ptr)81 void UpdateCache(int i, void* ptr) { 82 (isInput ? mInputCache[i] : mOutputCache[i]) = ptr; 83 return; 84 } 85 86 private: 87 std::unordered_map<int, void*> mInputCache; 88 89 std::unordered_map<int, void*> mOutputCache; 90 }; 91 NeuronExecuTorchDelegate()92 NeuronExecuTorchDelegate() {} 93 ~NeuronExecuTorchDelegate()94 ~NeuronExecuTorchDelegate() { 95 mPLock->Stop(); 96 } 97 LoadCompiledNetwork(NeuronPayload payload,NeuronDelegateSetting options)98 int LoadCompiledNetwork( 99 NeuronPayload payload, 100 NeuronDelegateSetting options) { 101 mSettings = options; 102 auto runtimeOption = mSettings.ToRuntimeOption(); 103 auto res = mExecutor.LoadFromCompiledNetwork( 104 payload.CompiledNetwork, 105 payload.Header.DataLen, 106 payload.Header.InputCount, 107 payload.Header.OutputCount, 108 runtimeOption); 109 CHECK_NO_ERROR(res); 110 CHECK_TRUE(mExecutor.IsValid()); 111 SummaryIoCounts(); 112 mPLock = std::unique_ptr<ScopePerformancer>(new ScopePerformancer); 113 return NEURON_NO_ERROR; 114 } 115 116 ::executorch::runtime::Error execute( 117 ET_UNUSED ::executorch::runtime::BackendExecutionContext& context, 118 ::executorch::runtime::EValue** args) const; 119 120 private: 121 template <bool isInput> IsCached(int index,void * ptr)122 bool IsCached(int index, void* ptr) const { 123 return mCache.IsCached</*isInput=*/isInput>(index, ptr); 124 } 125 126 template <bool isInput> UpdateCache(int index,void * ptr)127 void UpdateCache(int index, void* ptr) const { 128 mCache.UpdateCache<isInput>(index, ptr); 129 } 130 SummaryIoCounts()131 int SummaryIoCounts() { 132 for (int i = 0;; i++) { 133 size_t size = mExecutor.GetInputOutputPaddedSize</*isInput*/ true>(i); 134 if (size == 0) { 135 break; 136 } 137 LogInfo("NeuronBackend", "Model input:%d size: %lu", i, size); 138 mInputSizes.push_back(size); 139 } 140 for (int o = 0;; o++) { 141 size_t size = mExecutor.GetInputOutputPaddedSize</*isInput*/ false>(o); 142 if (size == 0) { 143 break; 144 } 145 LogInfo("NeuronBackend", "Model output:%d size: %lu", o, size); 146 mOutputSizes.push_back(size); 147 } 148 return NEURON_NO_ERROR; 149 } 150 151 int HintNeuronBackend(::executorch::runtime::EValue** args) const; 152 153 private: 154 std::vector<size_t> mInputSizes; 155 156 std::vector<size_t> mOutputSizes; 157 158 mutable MemoryCache mCache; 159 160 std::unique_ptr<ScopePerformancer> mPLock; 161 162 neuron::NeuronExecutor mExecutor; 163 164 NeuronDelegateSetting mSettings; 165 166 mutable std::unordered_set<const void*> mHasImported; 167 168 private: 169 NeuronExecuTorchDelegate(const NeuronExecuTorchDelegate&); 170 171 NeuronExecuTorchDelegate operator=(const NeuronExecuTorchDelegate&); 172 }; 173 174 } // namespace neuron 175 } // namespace backends 176 } // namespace executorch 177