xref: /aosp_15_r20/external/executorch/examples/mediatek/executor_runner/mtk_oss_executor_runner.cpp (revision 523fa7a60841cd1ecfb9cc4201f1ca8b03ed023a)
1 /*
2  * Copyright (c) Meta Platforms, Inc. and affiliates.
3  * Copyright (c) 2024 MediaTek Inc.
4  * All rights reserved.
5  *
6  * This source code is licensed under the BSD-style license found in the
7  * LICENSE file in the root directory of this source tree.
8  */
9 
10 /**
11  * @file
12  *
13  * This tool can run ExecuTorch model files that only use operators that
14  * are covered by the portable kernels, with possible delegate to the
15  * test_backend_compiler_lib.
16  *
17  * It sets all input tensor data to ones, and assumes that the outputs are
18  * all fp32 tensors.
19  */
20 
21 #include <cstdlib>
22 #include <ctime>
23 #include <filesystem>
24 #include <fstream>
25 #include <iostream>
26 #include <memory>
27 
28 #include <gflags/gflags.h>
29 
30 #include <executorch/extension/data_loader/file_data_loader.h>
31 #include <executorch/extension/evalue_util/print_evalue.h>
32 #include <executorch/extension/runner_util/inputs.h>
33 #include <executorch/runtime/executor/method.h>
34 #include <executorch/runtime/executor/program.h>
35 #include <executorch/runtime/platform/log.h>
36 #include <executorch/runtime/platform/runtime.h>
37 
38 static uint8_t method_allocator_pool[8 * 1024U * 1024U]; // 8 MB
39 
40 // Model Path
41 DEFINE_string(
42     model_path,
43     "model.pte",
44     "Model serialized in flatbuffer format. Default to 'model.pte'");
45 DEFINE_string(
46     input_list,
47     "input_list.txt",
48     "Model input list. Default to 'input_list.txt'");
49 DEFINE_string(
50     output_folder,
51     "outputs",
52     "Model output folder. Default to 'outputs'");
53 
54 using executorch::aten::Tensor;
55 using executorch::aten::TensorImpl;
56 using executorch::extension::BufferCleanup;
57 using executorch::extension::FileDataLoader;
58 using executorch::runtime::Error;
59 using executorch::runtime::EValue;
60 using executorch::runtime::HierarchicalAllocator;
61 using executorch::runtime::MemoryAllocator;
62 using executorch::runtime::MemoryManager;
63 using executorch::runtime::Method;
64 using executorch::runtime::MethodMeta;
65 using executorch::runtime::Program;
66 using executorch::runtime::Result;
67 using executorch::runtime::Span;
68 using executorch::runtime::Tag;
69 using executorch::runtime::TensorInfo;
70 
71 using namespace std::filesystem;
72 
main(int argc,char ** argv)73 int main(int argc, char** argv) {
74   executorch::runtime::runtime_init();
75 
76   gflags::ParseCommandLineFlags(&argc, &argv, true);
77   if (argc != 1) {
78     std::string msg = "Extra commandline args:";
79     for (int i = 1 /* skip argv[0] (program name) */; i < argc; i++) {
80       msg += std::string(" ") + argv[i];
81     }
82     ET_LOG(Error, "%s", msg.c_str());
83     return 1;
84   }
85 
86   // Create output folder
87   create_directories(FLAGS_output_folder);
88 
89   // Create a loader to get the data of the program file. There are other
90   // DataLoaders that use mmap() or point to data that's already in memory, and
91   // users can create their own DataLoaders to load from arbitrary sources.
92   const char* model_path = FLAGS_model_path.c_str();
93   Result<FileDataLoader> loader = FileDataLoader::from(model_path);
94   ET_CHECK_MSG(
95       loader.ok(),
96       "FileDataLoader::from() failed: 0x%" PRIx32,
97       (uint32_t)loader.error());
98 
99   // Parse the program file. This is immutable, and can also be reused between
100   // multiple execution invocations across multiple threads.
101   Result<Program> program = Program::load(&loader.get());
102   if (!program.ok()) {
103     ET_LOG(Error, "Failed to parse model file %s", model_path);
104     return 1;
105   }
106   ET_LOG(Info, "Model file %s is loaded.", model_path);
107 
108   // Use the first method in the program.
109   const char* method_name = nullptr;
110   {
111     const auto method_name_result = program->get_method_name(0);
112     ET_CHECK_MSG(method_name_result.ok(), "Program has no methods");
113     method_name = *method_name_result;
114   }
115   ET_LOG(Info, "Using method %s", method_name);
116 
117   // MethodMeta describes the memory requirements of the method.
118   Result<MethodMeta> method_meta_result = program->method_meta(method_name);
119   ET_CHECK_MSG(
120       method_meta_result.ok(),
121       "Failed to get method_meta for %s: 0x%" PRIx32,
122       method_name,
123       (uint32_t)method_meta_result.error());
124 
125   //
126   // The runtime does not use malloc/new; it allocates all memory using the
127   // MemoryManger provided by the client. Clients are responsible for allocating
128   // the memory ahead of time, or providing MemoryAllocator subclasses that can
129   // do it dynamically.
130   //
131 
132   // The method allocator is used to allocate all dynamic C++ metadata/objects
133   // used to represent the loaded method. This allocator is only used during
134   // loading a method of the program, which will return an error if there was
135   // not enough memory.
136   //
137   // The amount of memory required depends on the loaded method and the runtime
138   // code itself. The amount of memory here is usually determined by running the
139   // method and seeing how much memory is actually used, though it's possible to
140   // subclass MemoryAllocator so that it calls malloc() under the hood (see
141   // MallocMemoryAllocator).
142   //
143   // In this example we use a statically allocated memory pool.
144   MemoryAllocator method_allocator{
145       MemoryAllocator(sizeof(method_allocator_pool), method_allocator_pool)};
146 
147   // The memory-planned buffers will back the mutable tensors used by the
148   // method. The sizes of these buffers were determined ahead of time during the
149   // memory-planning pasees.
150   //
151   // Each buffer typically corresponds to a different hardware memory bank. Most
152   // mobile environments will only have a single buffer. Some embedded
153   // environments may have more than one for, e.g., slow/large DRAM and
154   // fast/small SRAM, or for memory associated with particular cores.
155   std::vector<std::unique_ptr<uint8_t[]>> planned_buffers; // Owns the memory
156   std::vector<Span<uint8_t>> planned_spans; // Passed to the allocator
157   size_t num_memory_planned_buffers =
158       method_meta_result->num_memory_planned_buffers();
159   for (size_t id = 0; id < num_memory_planned_buffers; ++id) {
160     // .get() will always succeed because id < num_memory_planned_buffers.
161     size_t buffer_size = static_cast<size_t>(
162         method_meta_result->memory_planned_buffer_size(id).get());
163     ET_LOG(Info, "Setting up planned buffer %zu, size %zu.", id, buffer_size);
164     planned_buffers.push_back(std::make_unique<uint8_t[]>(buffer_size));
165     planned_spans.push_back({planned_buffers.back().get(), buffer_size});
166   }
167   HierarchicalAllocator planned_memory(
168       {planned_spans.data(), planned_spans.size()});
169 
170   // Assemble all of the allocators into the MemoryManager that the Executor
171   // will use.
172   MemoryManager memory_manager(&method_allocator, &planned_memory);
173 
174   //
175   // Load the method from the program, using the provided allocators. Running
176   // the method can mutate the memory-planned buffers, so the method should only
177   // be used by a single thread at at time, but it can be reused.
178   //
179   Result<Method> method = program->load_method(method_name, &memory_manager);
180   ET_CHECK_MSG(
181       method.ok(),
182       "Loading of method %s failed with status 0x%" PRIx32,
183       method_name,
184       (uint32_t)method.error());
185   ET_LOG(Info, "Method loaded.");
186 
187   std::ifstream input_list(FLAGS_input_list);
188   ET_CHECK_MSG(
189       input_list.is_open(),
190       "Error: cannot open input file %s",
191       FLAGS_input_list.c_str());
192 
193   auto split = [](std::string s, std::string delimiter) {
194     size_t pos_start = 0, pos_end, delim_len = delimiter.length();
195     std::string token;
196     std::vector<std::string> res;
197 
198     while ((pos_end = s.find(delimiter, pos_start)) != std::string::npos) {
199       token = s.substr(pos_start, pos_end - pos_start);
200       pos_start = pos_end + delim_len;
201       res.push_back(token);
202     }
203     res.push_back(s.substr(pos_start));
204     return res;
205   };
206 
207   MethodMeta method_meta = method->method_meta();
208   size_t num_inputs = method_meta.num_inputs();
209   std::string file_path;
210   int inference_index = 0;
211   while (std::getline(input_list, file_path)) {
212     auto input_files = split(file_path, " ");
213     if (input_files.size() == 0) {
214       break;
215     }
216     ET_CHECK_MSG(
217         input_files.size() == num_inputs,
218         "Model expect %zu inputs but get %zu from input files",
219         num_inputs,
220         input_files.size());
221 
222     // Prepare the inputs.
223     size_t num_allocated = 0;
224     ET_LOG(Info, "Number of inputs: %zu", num_inputs);
225     void** inputs = (void**)malloc(num_inputs * sizeof(void*));
226 
227     for (size_t i = 0; i < num_inputs; i++) {
228       auto tag = method_meta.input_tag(i);
229       if (tag.get() != Tag::Tensor) {
230         ET_LOG(Debug, "Skipping malloc non-tensor input %zu", i);
231         continue;
232       }
233       Result<TensorInfo> tensor_meta = method_meta.input_tensor_meta(i);
234       const auto nbytes = tensor_meta->nbytes();
235       // This input is a tensor. Allocate a buffer for it.
236       void* data_ptr = malloc(nbytes);
237 
238       // Read data from file
239       std::ifstream fin(input_files[i], std::ios::binary);
240       fin.seekg(0, fin.end);
241       size_t file_size = fin.tellg();
242 
243       ET_CHECK_MSG(
244           file_size == nbytes,
245           "Input %zu size mismatch. file bytes: %zu, tensor bytes: %zu",
246           i,
247           file_size,
248           nbytes);
249 
250       fin.seekg(0, fin.beg);
251       fin.read(static_cast<char*>(data_ptr), file_size);
252       fin.close();
253       inputs[num_allocated++] = data_ptr;
254 
255       // Set backend input
256       auto scalar_type = tensor_meta->scalar_type();
257       auto sizes_raw = tensor_meta->sizes();
258       auto dim = sizes_raw.size();
259       auto dim_order_raw = tensor_meta->dim_order();
260       std::vector sizes(sizes_raw.begin(), sizes_raw.end());
261       std::vector dim_order(dim_order_raw.begin(), dim_order_raw.end());
262 
263       TensorImpl impl = TensorImpl(
264           scalar_type, dim, sizes.data(), data_ptr, dim_order.data());
265 
266       Tensor tensor(&impl);
267       Error ret = method->set_input(tensor, i);
268       if (ret != Error::Ok) {
269         ET_LOG(Error, "Failed to set input %zu: 0x%" PRIx32, i, (uint32_t)ret);
270         // The BufferCleanup will free the inputs when it goes out of scope.
271         BufferCleanup cleanup({inputs, num_allocated});
272         return 1;
273       }
274     }
275     BufferCleanup({inputs, num_allocated});
276     ET_LOG(Info, "Inputs prepared.");
277 
278     // Run the model.
279     auto before_exec = std::chrono::high_resolution_clock::now();
280     Error status = Error::Ok;
281     status = method->execute();
282     auto after_exec = std::chrono::high_resolution_clock::now();
283     double elapsed_time = std::chrono::duration_cast<std::chrono::microseconds>(
284                               after_exec - before_exec)
285                               .count() /
286         1000.0;
287 
288     ET_LOG(Info, "Inference took %f ms", elapsed_time);
289     ET_CHECK_MSG(
290         status == Error::Ok,
291         "Execution of method %s failed with status 0x%" PRIx32,
292         method_name,
293         (uint32_t)status);
294     ET_LOG(Info, "Model executed successfully.");
295 
296     // Get output data
297     size_t output_size = method->outputs_size();
298     ET_LOG(Info, "Number of outputs: %zu", output_size);
299     std::vector<EValue> outputs(output_size);
300     status = method->get_outputs(outputs.data(), output_size);
301     ET_CHECK(status == Error::Ok);
302     for (size_t i = 0; i < output_size; i++) {
303       auto output_tensor = outputs[i].toTensor();
304       auto output_file_name = FLAGS_output_folder + "/output_" +
305           std::to_string(inference_index) + "_" + std::to_string(i) + ".bin";
306       std::ofstream fout(output_file_name.c_str(), std::ios::binary);
307       fout.write(output_tensor.const_data_ptr<char>(), output_tensor.nbytes());
308       fout.close();
309     }
310 
311     inference_index++;
312   }
313 
314   return 0;
315 }
316