1 /*
2 * Copyright (c) Meta Platforms, Inc. and affiliates.
3 * Copyright (c) 2024 MediaTek Inc.
4 * All rights reserved.
5 *
6 * This source code is licensed under the BSD-style license found in the
7 * LICENSE file in the root directory of this source tree.
8 */
9
10 /**
11 * @file
12 *
13 * This tool can run ExecuTorch model files that only use operators that
14 * are covered by the portable kernels, with possible delegate to the
15 * test_backend_compiler_lib.
16 *
17 * It sets all input tensor data to ones, and assumes that the outputs are
18 * all fp32 tensors.
19 */
20
21 #include <chrono>
22 #include <ctime>
23 #include <iostream>
24 #include <memory>
25
26 #include <gflags/gflags.h>
27
28 #include <executorch/extension/data_loader/file_data_loader.h>
29 #include <executorch/extension/evalue_util/print_evalue.h>
30 #include <executorch/extension/runner_util/inputs.h>
31 #include <executorch/runtime/executor/method.h>
32 #include <executorch/runtime/executor/program.h>
33 #include <executorch/runtime/platform/log.h>
34 #include <executorch/runtime/platform/runtime.h>
35
36 static uint8_t method_allocator_pool[4 * 1024U * 1024U]; // 4 MB
37
38 DEFINE_string(
39 model_path,
40 "model.pte",
41 "Model serialized in flatbuffer format.");
42 DEFINE_int32(iteration, 1, "Iterations of inference.");
43
44 using executorch::extension::FileDataLoader;
45 using executorch::extension::prepare_input_tensors;
46 using executorch::runtime::Error;
47 using executorch::runtime::EValue;
48 using executorch::runtime::HierarchicalAllocator;
49 using executorch::runtime::MemoryAllocator;
50 using executorch::runtime::MemoryManager;
51 using executorch::runtime::Method;
52 using executorch::runtime::MethodMeta;
53 using executorch::runtime::Program;
54 using executorch::runtime::Result;
55 using executorch::runtime::Span;
56
main(int argc,char ** argv)57 int main(int argc, char** argv) {
58 executorch::runtime::runtime_init();
59
60 gflags::ParseCommandLineFlags(&argc, &argv, true);
61 if (argc != 1) {
62 std::string msg = "Extra commandline args:";
63 for (int i = 1 /* skip argv[0] (program name) */; i < argc; i++) {
64 msg += std::string(" ") + argv[i];
65 }
66 ET_LOG(Error, "%s", msg.c_str());
67 return 1;
68 }
69
70 // Create a loader to get the data of the program file. There are other
71 // DataLoaders that use mmap() or point to data that's already in memory, and
72 // users can create their own DataLoaders to load from arbitrary sources.
73 const char* model_path = FLAGS_model_path.c_str();
74 Result<FileDataLoader> loader = FileDataLoader::from(model_path);
75 ET_CHECK_MSG(
76 loader.ok(),
77 "FileDataLoader::from() failed: 0x%" PRIx32,
78 (uint32_t)loader.error());
79
80 // Parse the program file. This is immutable, and can also be reused between
81 // multiple execution invocations across multiple threads.
82 Result<Program> program = Program::load(&loader.get());
83 if (!program.ok()) {
84 ET_LOG(Error, "Failed to parse model file %s", model_path);
85 return 1;
86 }
87 ET_LOG(Info, "Model file %s is loaded.", model_path);
88
89 // Use the first method in the program.
90 const char* method_name = nullptr;
91 {
92 const auto method_name_result = program->get_method_name(0);
93 ET_CHECK_MSG(method_name_result.ok(), "Program has no methods");
94 method_name = *method_name_result;
95 }
96 ET_LOG(Info, "Using method %s", method_name);
97
98 // MethodMeta describes the memory requirements of the method.
99 Result<MethodMeta> method_meta = program->method_meta(method_name);
100 ET_CHECK_MSG(
101 method_meta.ok(),
102 "Failed to get method_meta for %s: 0x%" PRIx32,
103 method_name,
104 (uint32_t)method_meta.error());
105
106 //
107 // The runtime does not use malloc/new; it allocates all memory using the
108 // MemoryManger provided by the client. Clients are responsible for allocating
109 // the memory ahead of time, or providing MemoryAllocator subclasses that can
110 // do it dynamically.
111 //
112
113 // The method allocator is used to allocate all dynamic C++ metadata/objects
114 // used to represent the loaded method. This allocator is only used during
115 // loading a method of the program, which will return an error if there was
116 // not enough memory.
117 //
118 // The amount of memory required depends on the loaded method and the runtime
119 // code itself. The amount of memory here is usually determined by running the
120 // method and seeing how much memory is actually used, though it's possible to
121 // subclass MemoryAllocator so that it calls malloc() under the hood (see
122 // MallocMemoryAllocator).
123 //
124 // In this example we use a statically allocated memory pool.
125 MemoryAllocator method_allocator{
126 MemoryAllocator(sizeof(method_allocator_pool), method_allocator_pool)};
127
128 // The memory-planned buffers will back the mutable tensors used by the
129 // method. The sizes of these buffers were determined ahead of time during the
130 // memory-planning pasees.
131 //
132 // Each buffer typically corresponds to a different hardware memory bank. Most
133 // mobile environments will only have a single buffer. Some embedded
134 // environments may have more than one for, e.g., slow/large DRAM and
135 // fast/small SRAM, or for memory associated with particular cores.
136 std::vector<std::unique_ptr<uint8_t[]>> planned_buffers; // Owns the memory
137 std::vector<Span<uint8_t>> planned_spans; // Passed to the allocator
138 size_t num_memory_planned_buffers = method_meta->num_memory_planned_buffers();
139 for (size_t id = 0; id < num_memory_planned_buffers; ++id) {
140 // .get() will always succeed because id < num_memory_planned_buffers.
141 size_t buffer_size =
142 static_cast<size_t>(method_meta->memory_planned_buffer_size(id).get());
143 ET_LOG(Info, "Setting up planned buffer %zu, size %zu.", id, buffer_size);
144 planned_buffers.push_back(std::make_unique<uint8_t[]>(buffer_size));
145 planned_spans.push_back({planned_buffers.back().get(), buffer_size});
146 }
147 HierarchicalAllocator planned_memory(
148 {planned_spans.data(), planned_spans.size()});
149
150 // Assemble all of the allocators into the MemoryManager that the Executor
151 // will use.
152 MemoryManager memory_manager(&method_allocator, &planned_memory);
153
154 //
155 // Load the method from the program, using the provided allocators. Running
156 // the method can mutate the memory-planned buffers, so the method should only
157 // be used by a single thread at at time, but it can be reused.
158 //
159
160 Result<Method> method = program->load_method(method_name, &memory_manager);
161 ET_CHECK_MSG(
162 method.ok(),
163 "Loading of method %s failed with status 0x%" PRIx32,
164 method_name,
165 (uint32_t)method.error());
166 ET_LOG(Info, "Method loaded.");
167
168 // Allocate input tensors and set all of their elements to 1. The `inputs`
169 // variable owns the allocated memory and must live past the last call to
170 // `execute()`.
171 auto inputs = prepare_input_tensors(*method);
172 ET_CHECK_MSG(
173 inputs.ok(),
174 "Could not prepare inputs: 0x%" PRIx32,
175 (uint32_t)inputs.error());
176 ET_LOG(Info, "Inputs prepared.");
177
178 // Run the model.
179 auto before_exec = std::chrono::high_resolution_clock::now();
180 Error status = Error::Ok;
181 for (int i = 0; i < FLAGS_iteration; ++i) {
182 status = method->execute();
183 }
184 auto after_exec = std::chrono::high_resolution_clock::now();
185 double elapsed_time = std::chrono::duration_cast<std::chrono::microseconds>(
186 after_exec - before_exec)
187 .count() /
188 1000.0;
189
190 ET_LOG(
191 Info,
192 "%d inference took %f ms, avg %f ms",
193 FLAGS_iteration,
194 elapsed_time,
195 elapsed_time / (float)FLAGS_iteration);
196 ET_CHECK_MSG(
197 status == Error::Ok,
198 "Execution of method %s failed with status 0x%" PRIx32,
199 method_name,
200 (uint32_t)status);
201 ET_LOG(Info, "Model executed successfully.");
202
203 // Print the outputs.
204 std::vector<EValue> outputs(method->outputs_size());
205 ET_LOG(Info, "%zu outputs: ", outputs.size());
206 status = method->get_outputs(outputs.data(), outputs.size());
207 ET_CHECK(status == Error::Ok);
208 // Print the first and last 100 elements of long lists of scalars.
209 std::cout << executorch::extension::evalue_edge_items(100);
210 for (int i = 0; i < outputs.size(); ++i) {
211 std::cout << "Output " << i << ": " << outputs[i] << std::endl;
212 }
213
214 return 0;
215 }
216