1 /*
2 * Copyright (c) Meta Platforms, Inc. and affiliates.
3 * All rights reserved.
4 *
5 * This source code is licensed under the BSD-style license found in the
6 * LICENSE file in the root directory of this source tree.
7 */
8
9 #include <executorch/backends/xnnpack/runtime/XNNExecutor.h>
10
11 namespace executorch {
12 namespace backends {
13 namespace xnnpack {
14 namespace delegate {
15
16 using executorch::aten::ScalarType;
17 using executorch::aten::SizesType;
18 using executorch::aten::Tensor;
19 using executorch::runtime::BackendExecutionContext;
20 using executorch::runtime::Error;
21 using executorch::runtime::EValue;
22 using executorch::runtime::is_contiguous_dim_order;
23 using executorch::runtime::kTensorDimensionLimit;
24
25 /**
26 * Initializes the XNNExecutor with the runtime and given number of
27 * inputs/outputs externals_ is resized to the total number of inputs and
28 * outputs
29 */
initialize(xnn_runtime_t runtime,std::vector<uint32_t> && input_ids,std::vector<uint32_t> && output_ids)30 ET_NODISCARD Error XNNExecutor::initialize(
31 xnn_runtime_t runtime,
32 std::vector<uint32_t>&& input_ids,
33 std::vector<uint32_t>&& output_ids) {
34 runtime_ = std::unique_ptr<xnn_runtime, decltype(&xnn_delete_runtime)>(
35 runtime, xnn_delete_runtime);
36
37 auto error = profiler_.initialize(runtime);
38 if (error != Error::Ok) {
39 ET_LOG(
40 Error,
41 "Failed to start profiling: %u.",
42 static_cast<unsigned int>(error));
43 }
44
45 // Initialize the external values for inputs and outputs
46 // mapping the executorch arg idx to external IDs
47 input_ids_ = std::move(input_ids);
48 std::sort(input_ids_.begin(), input_ids_.end());
49
50 output_ids_ = std::move(output_ids);
51 std::sort(output_ids_.begin(), output_ids_.end());
52
53 externals_.resize(input_ids_.size() + output_ids_.size());
54
55 return Error::Ok;
56 }
57
58 /**
59 * Prepares the args for XNNPACK Runtime.
60 *
61 * Creates an array of xnn_externals_values from the EValues passed in.
62 * Reshapes all the external input tensors, in case any input shapes have
63 * changed. The reshapes the entire runtime, propagating shape information
64 * through the runtime.
65 *
66 * Note: the external ids given to the external tensors in the XNNPACK
67 * runtime correspond to their index in the list of arg passed into
68 * delegate->execute()
69 */
prepare_args(EValue ** args)70 ET_NODISCARD Error XNNExecutor::prepare_args(EValue** args) {
71 // Create xnn_externals_value from evalue args
72 xnn_status status;
73 for (uint32_t i = 0; i < externals_.size(); ++i) {
74 if (i < input_ids_.size()) {
75 externals_[i].id = input_ids_[i];
76 } else {
77 externals_[i].id = output_ids_[i - input_ids_.size()];
78 }
79 uint32_t ext_id = externals_[i].id;
80
81 ET_CHECK_OR_RETURN_ERROR(
82 args[ext_id]->isTensor(),
83 InvalidArgument,
84 "Expected argument to delegate at index %u to be a Tensor, but got %" PRIu32,
85 i,
86 static_cast<uint32_t>(args[ext_id]->tag));
87
88 Tensor* tensor = &args[ext_id]->toTensor();
89 externals_[i].data = tensor->mutable_data_ptr<float>();
90
91 // Reshape runtime inputs
92 if (i < input_ids_.size()) {
93 size_t num_dims = tensor->dim();
94 ET_CHECK_OR_RETURN_ERROR(
95 is_contiguous_dim_order(tensor->dim_order().data(), tensor->dim()),
96 Internal,
97 "Expecting default dim_order but got a non default dim_order tensor for external input %u",
98 i);
99 size_t dims[XNN_MAX_TENSOR_DIMS];
100 ET_CHECK_OR_RETURN_ERROR(
101 num_dims <= XNN_MAX_TENSOR_DIMS,
102 InvalidArgument,
103 "XNNPACK backend accepts tensors with at most %d dims, but got %zu",
104 XNN_MAX_TENSOR_DIMS,
105 num_dims);
106 for (int d = 0; d < num_dims; ++d) {
107 dims[d] = tensor->size(d);
108 }
109 status =
110 xnn_reshape_external_value(runtime_.get(), ext_id, num_dims, dims);
111 ET_CHECK_OR_RETURN_ERROR(
112 status == xnn_status_success,
113 Internal,
114 "Internal Error: Reshape Input Tensor Failed with code: %s",
115 xnn_status_to_string(status));
116 }
117 }
118 // // Propagate Input Shape and Memory Plan for increased allocation
119 status = xnn_reshape_runtime(runtime_.get());
120
121 ET_CHECK_OR_RETURN_ERROR(
122 status == xnn_status_success,
123 Internal,
124 "Internal Error: Propagating input shapes failed with code: %s",
125 xnn_status_to_string(status));
126
127 return Error::Ok;
128 }
129
130 /**
131 * Runs the XNNPACK Runtime.
132 *
133 * We first setup the runtime by feeding the externals_ to runtime setup.
134 * After which we then execute the runtime through invoke_runtime.
135 */
forward(BackendExecutionContext & context)136 ET_NODISCARD Error XNNExecutor::forward(BackendExecutionContext& context) {
137 ET_CHECK_OR_RETURN_ERROR(
138 runtime_ != nullptr,
139 Internal,
140 "XNNPACK Delegate did not compile correctly");
141
142 xnn_status status = xnn_setup_runtime_v2(
143 runtime_.get(), externals_.size(), externals_.data());
144
145 ET_CHECK_OR_RETURN_ERROR(
146 status == xnn_status_success,
147 Internal,
148 "Internal Error: Setting up the runtime failed with code: %s",
149 xnn_status_to_string(status));
150
151 auto error = profiler_.start(context.event_tracer());
152 if (error != Error::Ok) {
153 ET_LOG(
154 Error,
155 "Failed to start profiling: %u.",
156 static_cast<unsigned int>(error));
157 }
158
159 status = xnn_invoke_runtime(runtime_.get());
160
161 error = profiler_.end();
162 if (error != Error::Ok) {
163 ET_LOG(
164 Error,
165 "Failed to end profiling: %u.",
166 static_cast<unsigned int>(error));
167 }
168
169 ET_CHECK_OR_RETURN_ERROR(
170 status == xnn_status_success,
171 Internal,
172 "XNN Runtime invoke failed with code: %s",
173 xnn_status_to_string(status));
174
175 return Error::Ok;
176 }
177
178 /**
179 * Prepares the outputs for ExecuTorch
180 *
181 * Resizes the output tensors based on the output shapes returned by
182 * the xnnpack runtime.
183 *
184 * Note: For arg_max pooling, we recast the output index tensor. Since
185 * XNNPACK gives the index tensor to us as int32, we need to convert it
186 * back to int64 for ExecuTorch.
187 */
resize_outputs(EValue ** args) const188 ET_NODISCARD Error XNNExecutor::resize_outputs(EValue** args) const {
189 size_t output_idx_start = input_ids_.size();
190 for (size_t i = output_idx_start; i < externals_.size(); ++i) {
191 uint32_t ext_id = externals_[i].id;
192 Tensor* out_tensor = &args[ext_id]->toTensor();
193
194 size_t num_dim;
195 size_t dims[XNN_MAX_TENSOR_DIMS];
196
197 // Fetch the updated output shapes from xnnpack runtime
198 xnn_status status =
199 xnn_get_external_value_shape(runtime_.get(), ext_id, &num_dim, dims);
200
201 ET_CHECK_OR_RETURN_ERROR(
202 status == xnn_status_success,
203 Internal,
204 "Internal Error: Failed to retrieve graph output shapes");
205
206 // Convert new output shape into SizesType
207 SizesType expected_output_size[kTensorDimensionLimit];
208 for (size_t d = 0; d < num_dim; ++d) {
209 expected_output_size[d] = static_cast<SizesType>(dims[d]);
210 }
211
212 executorch::aten::ArrayRef<SizesType> output_size{
213 expected_output_size, static_cast<size_t>(num_dim)};
214
215 ET_LOG(Debug, "Resizing output tensor to a new shape");
216 Error err = resize_tensor(*out_tensor, output_size);
217 if (err != Error::Ok) {
218 ET_LOG(Error, "Failed to resize output tensor for XNNExecutor");
219 return err;
220 }
221
222 // Output datatype is int64. However, XNNPACK doesn't support
223 // int64. This means that the data was put into this tensor
224 // by XNNPACK as int32 and needs to be copied to int64 form
225 if (out_tensor->scalar_type() == ScalarType::Long) {
226 int64_t* data_64 = out_tensor->mutable_data_ptr<int64_t>();
227 const int32_t* data_32 = out_tensor->const_data_ptr<int32_t>();
228 for (size_t j = out_tensor->numel() - 1; j >= 0; --j) {
229 data_64[j] = data_32[j];
230 }
231 }
232 }
233
234 return Error::Ok;
235 }
236
237 } // namespace delegate
238 } // namespace xnnpack
239 } // namespace backends
240 } // namespace executorch
241