xref: /aosp_15_r20/external/executorch/backends/mediatek/runtime/include/NeuronBackend.h (revision 523fa7a60841cd1ecfb9cc4201f1ca8b03ed023a)
1 /*
2  * Copyright (c) 2024 MediaTek Inc.
3  *
4  * Licensed under the BSD License (the "License"); you may not use this file
5  * except in compliance with the License. See the license file in the root
6  * directory of this source tree for more details.
7  */
8 
9 #pragma once
10 
11 #include "NeuronBufferAllocator.h"
12 #include "NeuronExecutor.h"
13 #include "NeuronLog.h"
14 #include "NeuronPayloadHeader.h"
15 #include "api/APUWareUtilsLib.h"
16 #include "api/NeuronAdapter.h"
17 
18 #include <executorch/runtime/backend/interface.h>
19 #include <executorch/runtime/core/error.h>
20 #include <executorch/runtime/core/evalue.h>
21 
22 #include <memory>
23 #include <unordered_map>
24 #include <unordered_set>
25 
26 namespace executorch {
27 namespace backends {
28 namespace neuron {
29 
30 class NeuronBackend final : public ::executorch::runtime::BackendInterface {
31  public:
32   ::executorch::runtime::Result<::executorch::runtime::DelegateHandle*> init(
33       ::executorch::runtime::BackendInitContext& context,
34       ::executorch::runtime::FreeableBuffer* processed,
35       ::executorch::runtime::ArrayRef<::executorch::runtime::CompileSpec>
36           compile_specs) const override;
37 
38   ::executorch::runtime::Error execute(
39       ET_UNUSED ::executorch::runtime::BackendExecutionContext& context,
40       ::executorch::runtime::DelegateHandle* handle,
41       ::executorch::runtime::EValue** args) const override;
42 
43   void destroy(::executorch::runtime::DelegateHandle* handle) const override;
44 
45   bool is_available() const override;
46 };
47 
48 extern const char kHighAddrKey[];
49 extern const char kImportForeverKey[];
50 
51 struct NeuronDelegateSetting {
52   bool mHighAddr = false;
53 
54   bool mImportForever = false;
55 
ToRuntimeOptionNeuronDelegateSetting56   std::string ToRuntimeOption() {
57     if (mHighAddr && mImportForever) {
58       return "--apusys-config \"{ \\\"high_addr\\\": true, \\\"import_forever\\\": true }\"";
59     } else if (mHighAddr) {
60       return "--apusys-config \"{ \\\"high_addr\\\": true }\"";
61     } else if (mImportForever) {
62       return "--apusys-config \"{ \\\"import_forever\\\": true }\"";
63     } else {
64       return "";
65     }
66   }
67 };
68 
69 class NeuronExecuTorchDelegate {
70  public:
71   class MemoryCache {
72    public:
73     template <bool isInput>
IsCached(int i,void * ptr)74     bool IsCached(int i, void* ptr) {
75       const auto& cache = isInput ? mInputCache : mOutputCache;
76       auto it = cache.find(i);
77       return (it != cache.end()) && (ptr == it->second);
78     }
79 
80     template <bool isInput>
UpdateCache(int i,void * ptr)81     void UpdateCache(int i, void* ptr) {
82       (isInput ? mInputCache[i] : mOutputCache[i]) = ptr;
83       return;
84     }
85 
86    private:
87     std::unordered_map<int, void*> mInputCache;
88 
89     std::unordered_map<int, void*> mOutputCache;
90   };
91 
NeuronExecuTorchDelegate()92   NeuronExecuTorchDelegate() {}
93 
~NeuronExecuTorchDelegate()94   ~NeuronExecuTorchDelegate() {
95     mPLock->Stop();
96   }
97 
LoadCompiledNetwork(NeuronPayload payload,NeuronDelegateSetting options)98   int LoadCompiledNetwork(
99       NeuronPayload payload,
100       NeuronDelegateSetting options) {
101     mSettings = options;
102     auto runtimeOption = mSettings.ToRuntimeOption();
103     auto res = mExecutor.LoadFromCompiledNetwork(
104         payload.CompiledNetwork,
105         payload.Header.DataLen,
106         payload.Header.InputCount,
107         payload.Header.OutputCount,
108         runtimeOption);
109     CHECK_NO_ERROR(res);
110     CHECK_TRUE(mExecutor.IsValid());
111     SummaryIoCounts();
112     mPLock = std::unique_ptr<ScopePerformancer>(new ScopePerformancer);
113     return NEURON_NO_ERROR;
114   }
115 
116   ::executorch::runtime::Error execute(
117       ET_UNUSED ::executorch::runtime::BackendExecutionContext& context,
118       ::executorch::runtime::EValue** args) const;
119 
120  private:
121   template <bool isInput>
IsCached(int index,void * ptr)122   bool IsCached(int index, void* ptr) const {
123     return mCache.IsCached</*isInput=*/isInput>(index, ptr);
124   }
125 
126   template <bool isInput>
UpdateCache(int index,void * ptr)127   void UpdateCache(int index, void* ptr) const {
128     mCache.UpdateCache<isInput>(index, ptr);
129   }
130 
SummaryIoCounts()131   int SummaryIoCounts() {
132     for (int i = 0;; i++) {
133       size_t size = mExecutor.GetInputOutputPaddedSize</*isInput*/ true>(i);
134       if (size == 0) {
135         break;
136       }
137       LogInfo("NeuronBackend", "Model input:%d size: %lu", i, size);
138       mInputSizes.push_back(size);
139     }
140     for (int o = 0;; o++) {
141       size_t size = mExecutor.GetInputOutputPaddedSize</*isInput*/ false>(o);
142       if (size == 0) {
143         break;
144       }
145       LogInfo("NeuronBackend", "Model output:%d size: %lu", o, size);
146       mOutputSizes.push_back(size);
147     }
148     return NEURON_NO_ERROR;
149   }
150 
151   int HintNeuronBackend(::executorch::runtime::EValue** args) const;
152 
153  private:
154   std::vector<size_t> mInputSizes;
155 
156   std::vector<size_t> mOutputSizes;
157 
158   mutable MemoryCache mCache;
159 
160   std::unique_ptr<ScopePerformancer> mPLock;
161 
162   neuron::NeuronExecutor mExecutor;
163 
164   NeuronDelegateSetting mSettings;
165 
166   mutable std::unordered_set<const void*> mHasImported;
167 
168  private:
169   NeuronExecuTorchDelegate(const NeuronExecuTorchDelegate&);
170 
171   NeuronExecuTorchDelegate operator=(const NeuronExecuTorchDelegate&);
172 };
173 
174 } // namespace neuron
175 } // namespace backends
176 } // namespace executorch
177