1 /*
2 * Copyright (c) 2023-2024 Tomeu Vizoso <[email protected]>
3 * SPDX-License-Identifier: MIT
4 */
5
6 #include "pipe-loader/pipe_loader.h"
7 #include "pipe/p_context.h"
8 #include "pipe/p_screen.h"
9 #include "pipe/p_state.h"
10 #include "util/format/u_format.h"
11 #include "util/u_inlines.h"
12
13 #include "tensorflow/lite/c/common.h"
14 #include "tensorflow/lite/builtin_ops.h"
15 #include "tensorflow/lite/core/c/builtin_op_data.h"
16
17 /* TODO: Move to TfLiteAsyncKernel for zero-copy of buffers */
18
19 enum teflon_debug_flags {
20 TEFLON_DEBUG_VERBOSE = 1 << 1,
21 };
22
23 static const struct debug_named_value teflon_debug_flags[] = {
24 { "verbose", TEFLON_DEBUG_VERBOSE, "Verbose logging." },
25 DEBUG_NAMED_VALUE_END
26 };
27
28 DEBUG_GET_ONCE_FLAGS_OPTION(debug_teflon, "TEFLON_DEBUG", teflon_debug_flags, 0)
29
30 static inline void
teflon_debug(const char * format,...)31 teflon_debug(const char *format, ...)
32 {
33 if (unlikely(debug_get_option_debug_teflon() & TEFLON_DEBUG_VERBOSE)) {
34 va_list ap;
35 va_start(ap, format);
36 _debug_vprintf(format, ap);
37 va_end(ap);
38 }
39 }
40
41 struct teflon_delegate
42 {
43 TfLiteDelegate base;
44 struct pipe_loader_device *dev;
45 struct pipe_context *context;
46 };
47
48 struct teflon_subgraph
49 {
50 struct pipe_ml_subgraph *base;
51
52 unsigned *input_tensors;
53 unsigned input_count;
54
55 unsigned *output_tensors;
56 unsigned output_count;
57 };
58
59 static struct pipe_resource *
create_resource(struct pipe_context * context,TfLiteTensor tensor)60 create_resource(struct pipe_context *context, TfLiteTensor tensor)
61 {
62 unsigned bytes;
63 unsigned size = 1;
64
65 for (int i = 0; i < tensor.dims->size; i++)
66 size *= tensor.dims->data[i];
67
68 switch(tensor.type) {
69 case kTfLiteInt8:
70 case kTfLiteUInt8:
71 bytes = 1;
72 break;
73 case kTfLiteInt16:
74 case kTfLiteUInt16:
75 case kTfLiteFloat16:
76 bytes = 2;
77 break;
78 case kTfLiteInt32:
79 case kTfLiteUInt32:
80 case kTfLiteFloat32:
81 bytes = 4;
82 break;
83 case kTfLiteInt64:
84 case kTfLiteUInt64:
85 case kTfLiteFloat64:
86 case kTfLiteComplex64:
87 bytes = 8;
88 break;
89 default:
90 unreachable("Unsupported TF type");
91 }
92
93 return pipe_buffer_create_with_data(context, 0, PIPE_USAGE_DEFAULT, size * bytes, tensor.data.data);
94 }
95
96 static void
fill_operation(struct teflon_delegate * delegate,TfLiteContext * tf_context,TfLiteNode * node,TfLiteRegistration * node_registration,struct pipe_ml_operation * operation,struct pipe_tensor * tensors)97 fill_operation(struct teflon_delegate *delegate, TfLiteContext *tf_context, TfLiteNode *node, TfLiteRegistration *node_registration, struct pipe_ml_operation *operation, struct pipe_tensor *tensors)
98 {
99 TfLiteConvParams* params = (TfLiteConvParams*)node->builtin_data;
100
101 operation->input_tensor = &tensors[node->inputs->data[0]];
102 operation->output_tensor = &tensors[node->outputs->data[0]];
103
104 switch(node_registration->builtin_code) {
105 case kTfLiteBuiltinConv2d:
106 case kTfLiteBuiltinDepthwiseConv2d:
107 operation->type = PIPE_ML_OPERATION_TYPE_CONVOLUTION;
108 operation->conv.weight_tensor = &tensors[node->inputs->data[1]];
109 operation->conv.bias_tensor = &tensors[node->inputs->data[2]];
110 operation->conv.stride_x = params->stride_width;
111 operation->conv.stride_y = params->stride_height;
112 operation->conv.padding_same = params->padding == kTfLitePaddingSame;
113 operation->conv.depthwise = node_registration->builtin_code == kTfLiteBuiltinDepthwiseConv2d;
114 operation->conv.pointwise = operation->conv.weight_tensor->dims[1] == 1 && \
115 operation->conv.weight_tensor->dims[2] == 1;
116 break;
117 case kTfLiteBuiltinAveragePool2d:
118 operation->type = PIPE_ML_OPERATION_TYPE_POOLING;
119 break;
120 case kTfLiteBuiltinAdd:
121 operation->type = PIPE_ML_OPERATION_TYPE_ADD;
122 operation->add.input_tensor = &tensors[node->inputs->data[1]];
123 break;
124 default:
125 unreachable("Unsupported ML operation type");
126 }
127 }
128
129 static void
fill_tensor(struct teflon_delegate * delegate,TfLiteContext * tf_context,struct pipe_tensor * tensor,unsigned index)130 fill_tensor(struct teflon_delegate *delegate, TfLiteContext *tf_context, struct pipe_tensor *tensor, unsigned index)
131 {
132 struct pipe_context *context = delegate->context;
133 TfLiteTensor tf_tensor = tf_context->tensors[index];
134 const TfLiteAffineQuantization *quant = (const TfLiteAffineQuantization *)tf_tensor.quantization.params;
135
136 if (tf_tensor.type == kTfLiteNoType)
137 return; /* Placeholder tensor */
138
139 if (tf_tensor.data.data)
140 tensor->resource = create_resource(context, tf_tensor);
141
142 tensor->index = index;
143 memcpy(tensor->dims, tf_tensor.dims->data, tf_tensor.dims->size * sizeof(*tensor->dims));
144 tensor->scale = quant->scale->data[0];
145 tensor->zero_point = quant->zero_point->data[0];
146
147 switch(tf_tensor.type) {
148 case kTfLiteUInt8:
149 case kTfLiteUInt16:
150 case kTfLiteUInt32:
151 case kTfLiteUInt64:
152 tensor->is_signed = false;
153 break;
154 default:
155 tensor->is_signed = true;
156 }
157 }
158
159 static void
dump_graph(struct pipe_tensor * tensors,unsigned tensor_count,struct pipe_ml_operation * operations,unsigned operation_count)160 dump_graph(struct pipe_tensor *tensors, unsigned tensor_count, struct pipe_ml_operation *operations, unsigned operation_count)
161 {
162 teflon_debug("\n");
163 teflon_debug("teflon: compiling graph: %d tensors %d operations\n",
164 tensor_count, operation_count);
165
166 teflon_debug("%3s %-8s %3s %s %-12s\n", "idx", "scale", "zp", "has_data", "size");
167 teflon_debug("=======================================\n");
168 for (int i = 0; i < tensor_count; i++) {
169 teflon_debug("%3d %6f %3x %-8s %dx%dx%dx%d\n",
170 tensors[i].index,
171 tensors[i].scale,
172 tensors[i].zero_point,
173 tensors[i].resource == NULL ? "no" : "yes",
174 tensors[i].dims[0], tensors[i].dims[1], tensors[i].dims[2], tensors[i].dims[3]);
175 }
176
177 teflon_debug("\n");
178 teflon_debug("%3s %-6s %3s %3s %s\n", "idx", "type", "in", "out", "operation type-specific");
179 teflon_debug("================================================================================================\n");
180 for (int i = 0; i < operation_count; i++) {
181 switch(operations[i].type) {
182 case PIPE_ML_OPERATION_TYPE_ADD:
183 teflon_debug("%3d %-6s %3d %3d in: %d",
184 i,
185 "ADD",
186 operations[i].input_tensor->index,
187 operations[i].output_tensor->index,
188 operations[i].add.input_tensor->index);
189 break;
190 case PIPE_ML_OPERATION_TYPE_CONVOLUTION:
191 teflon_debug("%3d %-6s %3d %3d w: %d b: %d stride: %d pad: %s",
192 i,
193 operations[i].conv.depthwise ? "DWCONV" : "CONV",
194 operations[i].input_tensor->index,
195 operations[i].output_tensor->index,
196 operations[i].conv.weight_tensor->index,
197 operations[i].conv.bias_tensor->index,
198 operations[i].conv.stride_x,
199 operations[i].conv.padding_same ? "SAME" : "VALID");
200 break;
201 case PIPE_ML_OPERATION_TYPE_POOLING:
202 teflon_debug("%3d %-6s %3d %3d filter: %dx%d stride: %d pad: %s",
203 i,
204 "POOL",
205 operations[i].input_tensor->index,
206 operations[i].output_tensor->index,
207 operations[i].pooling.filter_height,
208 operations[i].pooling.filter_width,
209 operations[i].pooling.stride_x,
210 operations[i].pooling.padding_same ? "SAME" : "VALID");
211 break;
212 }
213
214 teflon_debug("\n");
215 }
216 teflon_debug("\n");
217 }
218
219 static void *
partition_init(TfLiteContext * tf_context,const char * buffer,size_t length)220 partition_init(TfLiteContext *tf_context, const char *buffer, size_t length)
221 {
222 const TfLiteDelegateParams *params = (const TfLiteDelegateParams *)buffer;
223 struct teflon_delegate *delegate = (struct teflon_delegate *)params->delegate;
224 struct pipe_context *context = delegate->context;
225 struct pipe_ml_operation operations[params->nodes_to_replace->size];
226 struct pipe_tensor tensors[tf_context->tensors_size];
227 long start = 0, end = 0;
228
229 memset(operations, 0, sizeof(operations));
230 memset(tensors, 0, sizeof(tensors));
231
232 if (unlikely(debug_get_option_debug_teflon() & TEFLON_DEBUG_VERBOSE)) {
233 struct timespec time;
234 clock_gettime(CLOCK_MONOTONIC, &time);
235 start = (long)time.tv_sec * 1000 + (long)time.tv_nsec / 1000000;
236 }
237
238 for (int i = 0; i < tf_context->tensors_size; i++)
239 fill_tensor(delegate, tf_context, &tensors[i], i);
240
241 for (int i = 0; i < params->nodes_to_replace->size; i++)
242 {
243 const int node_index = params->nodes_to_replace->data[i];
244 TfLiteNode *delegated_node = NULL;
245 TfLiteRegistration *delegated_node_registration = NULL;
246 tf_context->GetNodeAndRegistration(tf_context, node_index, &delegated_node,
247 &delegated_node_registration);
248
249 fill_operation(delegate, tf_context, delegated_node, delegated_node_registration, &operations[i], tensors);
250 }
251
252 if (debug_get_option_debug_teflon() & TEFLON_DEBUG_VERBOSE)
253 dump_graph(tensors, tf_context->tensors_size, operations, params->nodes_to_replace->size);
254
255 struct pipe_ml_subgraph *subgraph;
256 subgraph = context->ml_subgraph_create(context,
257 operations,
258 params->nodes_to_replace->size);
259
260 for (int i = 0; i < tf_context->tensors_size; i++)
261 pipe_resource_reference(&tensors[i].resource, NULL);
262
263 struct teflon_subgraph *tsubgraph = calloc(1, sizeof(*tsubgraph));
264 tsubgraph->base = subgraph;
265
266 tsubgraph->input_tensors = malloc(params->input_tensors->size * sizeof(*tsubgraph->input_tensors));
267 for (int i = 0; i < params->input_tensors->size; i++) {
268 unsigned tensor_idx = params->input_tensors->data[i];
269 TfLiteTensor *tensor = &tf_context->tensors[tensor_idx];
270 if (tensor->allocation_type == kTfLiteMmapRo)
271 continue;
272 tsubgraph->input_tensors[tsubgraph->input_count] = tensor_idx;
273 tsubgraph->input_count++;
274 }
275
276 tsubgraph->output_count = params->output_tensors->size;
277 tsubgraph->output_tensors = malloc(params->output_tensors->size * sizeof(*tsubgraph->output_tensors));
278 memcpy(tsubgraph->output_tensors, params->output_tensors->data,
279 params->output_tensors->size * sizeof(*tsubgraph->output_tensors));
280
281 if (unlikely(debug_get_option_debug_teflon() & TEFLON_DEBUG_VERBOSE)) {
282 struct timespec time;
283 clock_gettime(CLOCK_MONOTONIC, &time);
284 end = (long)time.tv_sec * 1000 + (long)time.tv_nsec / 1000000;
285 teflon_debug("teflon: compiled graph, took %ld ms\n", (end - start));
286 }
287
288 return tsubgraph;
289 }
290
291 static TfLiteStatus
partition_prepare(TfLiteContext * context,TfLiteNode * node)292 partition_prepare(TfLiteContext *context, TfLiteNode *node)
293 {
294 // TODO: If input size has changed, resize input, intermediate and output buffers
295
296 return kTfLiteOk;
297 }
298
299 // De-allocates the per-node-and-Interpreter custom data.
300 static void
partition_free(TfLiteContext * tf_context,void * buffer)301 partition_free(TfLiteContext *tf_context, void *buffer)
302 {
303 struct teflon_subgraph *tsubgraph = (struct teflon_subgraph *)buffer;
304 struct pipe_ml_subgraph *subgraph = tsubgraph->base;
305 struct pipe_context *context = subgraph->context;
306
307 context->ml_subgraph_destroy(context, subgraph);
308 free(tsubgraph->input_tensors);
309 free(tsubgraph->output_tensors);
310 free(tsubgraph);
311 }
312
313 static TfLiteStatus
partition_invoke(TfLiteContext * tf_context,TfLiteNode * node)314 partition_invoke(TfLiteContext *tf_context, TfLiteNode *node)
315 {
316 struct teflon_delegate *delegate = (struct teflon_delegate *)node->delegate;
317 struct teflon_subgraph *tsubgraph = (struct teflon_subgraph *)node->user_data;
318 struct pipe_ml_subgraph *subgraph = tsubgraph->base;
319 struct pipe_context *context = delegate->context;
320 long start = 0, end = 0;
321
322 if (unlikely(debug_get_option_debug_teflon() & TEFLON_DEBUG_VERBOSE)) {
323 struct timespec time;
324 clock_gettime(CLOCK_MONOTONIC, &time);
325 start = (long)time.tv_sec * 1000 + (long)time.tv_nsec / 1000000;
326 }
327
328 struct pipe_tensor input = {0};
329 /* FIXME: Support mutiple inputs */
330 fill_tensor(delegate, tf_context, &input, tsubgraph->input_tensors[0]);
331 context->ml_subgraph_invoke(context, subgraph, &input);
332
333 void **buffers = malloc(tsubgraph->output_count * sizeof(*buffers));
334 for (unsigned i = 0; i < tsubgraph->output_count; i++)
335 buffers[i] = tf_context->tensors[tsubgraph->output_tensors[i]].data.data;
336 context->ml_subgraph_read_output(context, subgraph, tsubgraph->output_count, tsubgraph->output_tensors, buffers);
337 free(buffers);
338
339 pipe_resource_reference(&input.resource, NULL);
340
341 if (unlikely(debug_get_option_debug_teflon() & TEFLON_DEBUG_VERBOSE)) {
342 struct timespec time;
343 clock_gettime(CLOCK_MONOTONIC, &time);
344 end = (long)time.tv_sec * 1000 + (long)time.tv_nsec / 1000000;
345 teflon_debug("teflon: invoked graph, took %ld ms\n", (end - start));
346 }
347
348 return kTfLiteOk;
349 }
350
351 static TfLiteStatus
PrepareDelegate(TfLiteContext * context,TfLiteDelegate * delegate)352 PrepareDelegate(TfLiteContext *context, TfLiteDelegate *delegate)
353 {
354 TfLiteIntArray *plan;
355 TfLiteNode *node;
356 TF_LITE_ENSURE_STATUS(context->GetExecutionPlan(context, &plan));
357
358 // Get a list of supported nodes.
359 TfLiteIntArray *supported_nodes = malloc(plan->size * sizeof(int) + sizeof(*supported_nodes));
360 supported_nodes->size = plan->size;
361 unsigned node_count = 0;
362 for (int i = 0; i < plan->size; i++) {
363 int node_index = plan->data[i];
364 bool supported = false;
365 TfLiteRegistration *registration;
366 TF_LITE_ENSURE_STATUS(context->GetNodeAndRegistration(
367 context, node_index, &node, ®istration));
368
369 switch(registration->builtin_code) {
370 case kTfLiteBuiltinConv2d:
371 case kTfLiteBuiltinDepthwiseConv2d:
372 case kTfLiteBuiltinAdd:
373 supported = true;
374 break;
375 }
376
377 if (supported)
378 supported_nodes->data[node_count++] = node_index;
379 }
380 supported_nodes->size = node_count;
381
382 TfLiteRegistration registration;
383
384 registration.init = partition_init;
385 registration.free = partition_free;
386 registration.prepare = partition_prepare;
387 registration.invoke = partition_invoke;
388
389 registration.profiling_string = NULL;
390 registration.builtin_code = kTfLiteBuiltinDelegate;
391 registration.version = 1;
392 registration.registration_external = NULL;
393 registration.custom_name = "Teflon Delegate";
394
395 // Replace supported subgraphs.
396 TfLiteStatus status = context->ReplaceNodeSubsetsWithDelegateKernels(
397 context,
398 registration,
399 supported_nodes,
400 delegate);
401
402 free(supported_nodes);
403
404 return status;
405 }
406
407 static TfLiteStatus
CopyFromBufferHandle(TfLiteContext * context,TfLiteDelegate * delegate,TfLiteBufferHandle buffer_handle,TfLiteTensor * tensor)408 CopyFromBufferHandle(TfLiteContext *context,
409 TfLiteDelegate *delegate,
410 TfLiteBufferHandle buffer_handle,
411 TfLiteTensor *tensor)
412 {
413 return kTfLiteOk;
414 }
415
416 static void
FreeBufferHandle(TfLiteContext * context,TfLiteDelegate * delegate,TfLiteBufferHandle * handle)417 FreeBufferHandle(TfLiteContext *context,
418 TfLiteDelegate *delegate,
419 TfLiteBufferHandle *handle)
420 {
421 }
422
423 TfLiteDelegate *tflite_plugin_create_delegate(char **options_keys,
424 char **options_values,
425 size_t num_options,
426 void (*report_error)(const char *));
427
428 void tflite_plugin_destroy_delegate(TfLiteDelegate *delegate);
429
tflite_plugin_create_delegate(char ** options_keys,char ** options_values,size_t num_options,void (* report_error)(const char *))430 __attribute__((visibility("default"))) TfLiteDelegate *tflite_plugin_create_delegate(char **options_keys,
431 char **options_values,
432 size_t num_options,
433 void (*report_error)(const char *))
434 {
435 struct teflon_delegate *delegate = (struct teflon_delegate *)calloc(1, sizeof(*delegate));
436 struct pipe_screen *screen;
437 struct pipe_loader_device **devs;
438
439 delegate->base.flags = kTfLiteDelegateFlagsAllowDynamicTensors | kTfLiteDelegateFlagsRequirePropagatedShapes;
440 delegate->base.Prepare = &PrepareDelegate;
441 delegate->base.CopyFromBufferHandle = &CopyFromBufferHandle;
442 delegate->base.FreeBufferHandle = &FreeBufferHandle;
443
444 int n = pipe_loader_probe(NULL, 0, false);
445 devs = (struct pipe_loader_device **)malloc(sizeof(*devs) * n);
446 pipe_loader_probe(devs, n, false);
447
448 for (int i = 0; i < n; i++) {
449 if (strstr("etnaviv", devs[i]->driver_name))
450 delegate->dev = devs[i];
451 else
452 pipe_loader_release(&devs[i], 1);
453 }
454 free(devs);
455
456 if (delegate->dev == NULL) {
457 fprintf(stderr, "Couldn't open kernel device\n");
458 return NULL;
459 }
460
461 teflon_debug("Teflon delegate: loaded %s driver\n", delegate->dev->driver_name);
462
463 screen = pipe_loader_create_screen(delegate->dev, false);
464 delegate->context = screen->context_create(screen, NULL, PIPE_CONTEXT_COMPUTE_ONLY);
465
466 return &delegate->base;
467 }
468
tflite_plugin_destroy_delegate(TfLiteDelegate * tflite_delegate)469 __attribute__((visibility("default"))) void tflite_plugin_destroy_delegate(TfLiteDelegate *tflite_delegate)
470 {
471 struct teflon_delegate *delegate = (struct teflon_delegate *)tflite_delegate;
472 struct pipe_screen *screen;
473
474 if (tflite_delegate == NULL) {
475 fprintf(stderr, "tflite_plugin_destroy_delegate: NULL delegate!\n");
476 return;
477 }
478
479 screen = delegate->context->screen;
480 delegate->context->destroy(delegate->context);
481 screen->destroy(screen);
482 pipe_loader_release(&delegate->dev, 1);
483 free(delegate);
484 }
485