xref: /aosp_15_r20/external/executorch/backends/xnnpack/runtime/XNNExecutor.cpp (revision 523fa7a60841cd1ecfb9cc4201f1ca8b03ed023a)
1 /*
2  * Copyright (c) Meta Platforms, Inc. and affiliates.
3  * All rights reserved.
4  *
5  * This source code is licensed under the BSD-style license found in the
6  * LICENSE file in the root directory of this source tree.
7  */
8 
9 #include <executorch/backends/xnnpack/runtime/XNNExecutor.h>
10 
11 namespace executorch {
12 namespace backends {
13 namespace xnnpack {
14 namespace delegate {
15 
16 using executorch::aten::ScalarType;
17 using executorch::aten::SizesType;
18 using executorch::aten::Tensor;
19 using executorch::runtime::BackendExecutionContext;
20 using executorch::runtime::Error;
21 using executorch::runtime::EValue;
22 using executorch::runtime::is_contiguous_dim_order;
23 using executorch::runtime::kTensorDimensionLimit;
24 
25 /**
26  * Initializes the XNNExecutor with the runtime and given number of
27  * inputs/outputs externals_ is resized to the total number of inputs and
28  * outputs
29  */
initialize(xnn_runtime_t runtime,std::vector<uint32_t> && input_ids,std::vector<uint32_t> && output_ids)30 ET_NODISCARD Error XNNExecutor::initialize(
31     xnn_runtime_t runtime,
32     std::vector<uint32_t>&& input_ids,
33     std::vector<uint32_t>&& output_ids) {
34   runtime_ = std::unique_ptr<xnn_runtime, decltype(&xnn_delete_runtime)>(
35       runtime, xnn_delete_runtime);
36 
37   auto error = profiler_.initialize(runtime);
38   if (error != Error::Ok) {
39     ET_LOG(
40         Error,
41         "Failed to start profiling: %u.",
42         static_cast<unsigned int>(error));
43   }
44 
45   // Initialize the external values for inputs and outputs
46   // mapping the executorch arg idx to external IDs
47   input_ids_ = std::move(input_ids);
48   std::sort(input_ids_.begin(), input_ids_.end());
49 
50   output_ids_ = std::move(output_ids);
51   std::sort(output_ids_.begin(), output_ids_.end());
52 
53   externals_.resize(input_ids_.size() + output_ids_.size());
54 
55   return Error::Ok;
56 }
57 
58 /**
59  * Prepares the args for XNNPACK Runtime.
60  *
61  * Creates an array of xnn_externals_values from the EValues passed in.
62  * Reshapes all the external input tensors, in case any input shapes have
63  * changed. The reshapes the entire runtime, propagating shape information
64  * through the runtime.
65  *
66  * Note: the external ids given to the external tensors in the XNNPACK
67  * runtime correspond to their index in the list of arg passed into
68  * delegate->execute()
69  */
prepare_args(EValue ** args)70 ET_NODISCARD Error XNNExecutor::prepare_args(EValue** args) {
71   // Create xnn_externals_value from evalue args
72   xnn_status status;
73   for (uint32_t i = 0; i < externals_.size(); ++i) {
74     if (i < input_ids_.size()) {
75       externals_[i].id = input_ids_[i];
76     } else {
77       externals_[i].id = output_ids_[i - input_ids_.size()];
78     }
79     uint32_t ext_id = externals_[i].id;
80 
81     ET_CHECK_OR_RETURN_ERROR(
82         args[ext_id]->isTensor(),
83         InvalidArgument,
84         "Expected argument to delegate at index %u to be a Tensor, but got %" PRIu32,
85         i,
86         static_cast<uint32_t>(args[ext_id]->tag));
87 
88     Tensor* tensor = &args[ext_id]->toTensor();
89     externals_[i].data = tensor->mutable_data_ptr<float>();
90 
91     // Reshape runtime inputs
92     if (i < input_ids_.size()) {
93       size_t num_dims = tensor->dim();
94       ET_CHECK_OR_RETURN_ERROR(
95           is_contiguous_dim_order(tensor->dim_order().data(), tensor->dim()),
96           Internal,
97           "Expecting default dim_order but got a non default dim_order tensor for external input %u",
98           i);
99       size_t dims[XNN_MAX_TENSOR_DIMS];
100       ET_CHECK_OR_RETURN_ERROR(
101           num_dims <= XNN_MAX_TENSOR_DIMS,
102           InvalidArgument,
103           "XNNPACK backend accepts tensors with at most %d dims, but got %zu",
104           XNN_MAX_TENSOR_DIMS,
105           num_dims);
106       for (int d = 0; d < num_dims; ++d) {
107         dims[d] = tensor->size(d);
108       }
109       status =
110           xnn_reshape_external_value(runtime_.get(), ext_id, num_dims, dims);
111       ET_CHECK_OR_RETURN_ERROR(
112           status == xnn_status_success,
113           Internal,
114           "Internal Error: Reshape Input Tensor Failed with code: %s",
115           xnn_status_to_string(status));
116     }
117   }
118   // // Propagate Input Shape and Memory Plan for increased allocation
119   status = xnn_reshape_runtime(runtime_.get());
120 
121   ET_CHECK_OR_RETURN_ERROR(
122       status == xnn_status_success,
123       Internal,
124       "Internal Error: Propagating input shapes failed with code: %s",
125       xnn_status_to_string(status));
126 
127   return Error::Ok;
128 }
129 
130 /**
131  * Runs the XNNPACK Runtime.
132  *
133  * We first setup the runtime by feeding the externals_ to runtime setup.
134  * After which we then execute the runtime through invoke_runtime.
135  */
forward(BackendExecutionContext & context)136 ET_NODISCARD Error XNNExecutor::forward(BackendExecutionContext& context) {
137   ET_CHECK_OR_RETURN_ERROR(
138       runtime_ != nullptr,
139       Internal,
140       "XNNPACK Delegate did not compile correctly");
141 
142   xnn_status status = xnn_setup_runtime_v2(
143       runtime_.get(), externals_.size(), externals_.data());
144 
145   ET_CHECK_OR_RETURN_ERROR(
146       status == xnn_status_success,
147       Internal,
148       "Internal Error: Setting up the runtime failed with code: %s",
149       xnn_status_to_string(status));
150 
151   auto error = profiler_.start(context.event_tracer());
152   if (error != Error::Ok) {
153     ET_LOG(
154         Error,
155         "Failed to start profiling: %u.",
156         static_cast<unsigned int>(error));
157   }
158 
159   status = xnn_invoke_runtime(runtime_.get());
160 
161   error = profiler_.end();
162   if (error != Error::Ok) {
163     ET_LOG(
164         Error,
165         "Failed to end profiling: %u.",
166         static_cast<unsigned int>(error));
167   }
168 
169   ET_CHECK_OR_RETURN_ERROR(
170       status == xnn_status_success,
171       Internal,
172       "XNN Runtime invoke failed with code: %s",
173       xnn_status_to_string(status));
174 
175   return Error::Ok;
176 }
177 
178 /**
179  * Prepares the outputs for ExecuTorch
180  *
181  * Resizes the output tensors based on the output shapes returned by
182  * the xnnpack runtime.
183  *
184  * Note: For arg_max pooling, we recast the output index tensor. Since
185  * XNNPACK gives the index tensor to us as int32, we need to convert it
186  * back to int64 for ExecuTorch.
187  */
resize_outputs(EValue ** args) const188 ET_NODISCARD Error XNNExecutor::resize_outputs(EValue** args) const {
189   size_t output_idx_start = input_ids_.size();
190   for (size_t i = output_idx_start; i < externals_.size(); ++i) {
191     uint32_t ext_id = externals_[i].id;
192     Tensor* out_tensor = &args[ext_id]->toTensor();
193 
194     size_t num_dim;
195     size_t dims[XNN_MAX_TENSOR_DIMS];
196 
197     // Fetch the updated output shapes from xnnpack runtime
198     xnn_status status =
199         xnn_get_external_value_shape(runtime_.get(), ext_id, &num_dim, dims);
200 
201     ET_CHECK_OR_RETURN_ERROR(
202         status == xnn_status_success,
203         Internal,
204         "Internal Error: Failed to retrieve graph output shapes");
205 
206     // Convert new output shape into SizesType
207     SizesType expected_output_size[kTensorDimensionLimit];
208     for (size_t d = 0; d < num_dim; ++d) {
209       expected_output_size[d] = static_cast<SizesType>(dims[d]);
210     }
211 
212     executorch::aten::ArrayRef<SizesType> output_size{
213         expected_output_size, static_cast<size_t>(num_dim)};
214 
215     ET_LOG(Debug, "Resizing output tensor to a new shape");
216     Error err = resize_tensor(*out_tensor, output_size);
217     if (err != Error::Ok) {
218       ET_LOG(Error, "Failed to resize output tensor for XNNExecutor");
219       return err;
220     }
221 
222     // Output datatype is int64. However, XNNPACK doesn't support
223     // int64. This means that the data was put into this tensor
224     // by XNNPACK as int32 and needs to be copied to int64 form
225     if (out_tensor->scalar_type() == ScalarType::Long) {
226       int64_t* data_64 = out_tensor->mutable_data_ptr<int64_t>();
227       const int32_t* data_32 = out_tensor->const_data_ptr<int32_t>();
228       for (size_t j = out_tensor->numel() - 1; j >= 0; --j) {
229         data_64[j] = data_32[j];
230       }
231     }
232   }
233 
234   return Error::Ok;
235 }
236 
237 } // namespace delegate
238 } // namespace xnnpack
239 } // namespace backends
240 } // namespace executorch
241