1 /*
2 * Copyright © 2024 Intel Corporation
3 * SPDX-License-Identifier: MIT
4 */
5
6 #include <fcntl.h>
7 #include <stdio.h>
8 #include <string.h>
9 #include <sys/mman.h>
10
11 #include <lua.h>
12 #include <lualib.h>
13 #include <lauxlib.h>
14
15 #include "util/ralloc.h"
16
17 #include <xf86drm.h>
18 #include "drm-uapi/i915_drm.h"
19 #include "drm-uapi/xe_drm.h"
20
21 #include "intel/compiler/brw_asm.h"
22 #include "intel/compiler/brw_isa_info.h"
23 #include "intel/common/intel_gem.h"
24 #include "intel/common/xe/intel_engine.h"
25 #include "intel/decoder/intel_decoder.h"
26 #include "intel/dev/intel_debug.h"
27
28 #include "executor.h"
29
30 enum {
31 /* Predictable base addresses here make it easier to spot errors. */
32 EXECUTOR_BO_BATCH_ADDR = 0x10000000,
33 EXECUTOR_BO_EXTRA_ADDR = 0x20000000,
34 EXECUTOR_BO_DATA_ADDR = 0x30000000,
35
36 /* Apply to all BOs. */
37 EXECUTOR_BO_SIZE = 10 * 1024 * 1024,
38 };
39
40 static void
print_help()41 print_help()
42 {
43 printf(
44 "Executes shaders written for Intel GPUs\n"
45 "usage: executor FILENAME\n"
46 "\n"
47 "The input is a Lua script that can perform data manipulation\n"
48 "and dispatch execution of compute shaders, written in Xe assembly,\n"
49 "the same format used by the brw_asm assembler or when dumping\n"
50 "shaders in debug mode.\n"
51 "\n"
52 "The goal is to have a tool to experiment directly with certain\n"
53 "assembly instructions and the shared units without having to\n"
54 "instrument the drivers.\n"
55 "\n"
56 "EXECUTION CONTEXT\n"
57 "\n"
58 "By default compute shaders are used with SIMD8 for Gfx9-125 and SIMD16\n"
59 "for Xe2. Only a single thread is dispatched. A data buffer is used to\n"
60 "pipe data into the shader and out of it, it is bound to the graphics\n"
61 "address 0x%08x.\n"
62 "\n"
63 "The Gfx versions have differences in their assembly and shared units, so\n"
64 "other than very simple examples, scripts for this program will be either\n"
65 "specific to a version or provide shader variants for multiple versions.\n"
66 "\n"
67 "ASSEMBLY MACROS\n"
68 "\n"
69 "In addition to regular instructions, the follow macros will generate\n"
70 "assembly code based on the Gfx version being executed. Unlike in regular\n"
71 "instructions, REGs don't use regions and can't be immediates.\n"
72 "\n"
73 "- @eot\n"
74 " Send an EOT message.\n"
75 "\n"
76 "- @mov REG IMM\n"
77 " Like a regular MOV but accepts numbers in both decimal and\n"
78 " floating-point.\n"
79 "\n"
80 "- @id REG\n"
81 " Write a local invocation index into REG.\n"
82 "\n"
83 "- @read DST_REG OFFSET_REG\n"
84 " Read 32-bit values from the memory buffer at OFFSET_REG into DST_REG.\n"
85 "\n"
86 "- @write OFFSET_REG SRC_REG\n"
87 " Write 32-bit values from SRC_REG to the memory buffer at OFFSET_REG.\n"
88 "\n"
89 "- @syncnop\n"
90 " Produce a coarse grained sync.nop (when applicable) to ensure data from\n"
91 " macros above are read/written.\n"
92 "\n"
93 "LUA ENVIRONMENT\n"
94 "\n"
95 "In addition to the regular Lua standard library the following variables and.\n"
96 "functions are available.\n"
97 "\n"
98 "- execute({src=STR, data=ARRAY}) -> ARRAY\n"
99 " Takes a table as argument. The 'src' in the table contains the shader to be\n"
100 " executed. The 'data' argument will be used to fill the data buffer with 32-bit\n"
101 " values. The function returns an ARRAY with the contents of the data buffer\n"
102 " after the shader completes.\n"
103 "\n"
104 "- dump(ARRAY, COUNT)\n"
105 " Pretty print the COUNT first elements of an array of 32-bit values.\n"
106 "\n"
107 "- check_ver(V, ...), check_verx10(V, ...)\n"
108 " Exit if the Gfx version being executed isn't in the arguments list.\n"
109 "\n"
110 "- ver, verx10\n"
111 " Variables containing the Gfx version being executed.\n"
112 "\n"
113 "This program was compiled with %s.\n"
114 "\n"
115 "ENVIRONMENT VARIABLES\n"
116 "\n"
117 "The following INTEL_DEBUG values (comma separated) are used:\n"
118 "\n"
119 " - bat Dumps the batch buffer.\n"
120 " - color Uses colors for the batch buffer dump.\n"
121 " - cs Dumps the assembly after macro processing.\n"
122 "\n"
123 "EXAMPLE\n"
124 "\n"
125 "The following script\n"
126 "\n"
127 " local r = execute {\n"
128 " data={ [42] = 0x100 },\n"
129 " src=[[\n"
130 " @mov g1 42\n"
131 " @read g2 g1\n"
132 "\n"
133 " @id g3\n"
134 "\n"
135 " add(8) g4<1>UD g2<8,8,1>UD g3<8,8,1>UD { align1 @1 1Q };\n"
136 "\n"
137 " @write g3 g4\n"
138 " @eot\n"
139 " ]]\n"
140 " }\n"
141 "\n"
142 " dump(r, 4)\n"
143 "\n"
144 "Will produce the following output\n"
145 "\n"
146 " [0x00000000] 0x00000100 0x00000101 0x00000102 0x00000103\n"
147 "\n"
148 "More examples can be found in the examples/ directory in the source code.\n"
149 "\n", EXECUTOR_BO_DATA_ADDR, LUA_RELEASE);
150 }
151
152 static struct {
153 struct intel_device_info devinfo;
154 struct isl_device isl_dev;
155 struct brw_isa_info isa;
156 int fd;
157 } E;
158
159 #define genX_call(func, ...) \
160 switch (E.devinfo.verx10) { \
161 case 90: gfx9_ ##func(__VA_ARGS__); break; \
162 case 110: gfx11_ ##func(__VA_ARGS__); break; \
163 case 120: gfx12_ ##func(__VA_ARGS__); break; \
164 case 125: gfx125_##func(__VA_ARGS__); break; \
165 case 200: gfx20_ ##func(__VA_ARGS__); break; \
166 default: unreachable("Unsupported hardware generation"); \
167 }
168
169 static void
executor_create_bo(executor_context * ec,executor_bo * bo,uint64_t addr,uint32_t size_in_bytes)170 executor_create_bo(executor_context *ec, executor_bo *bo, uint64_t addr, uint32_t size_in_bytes)
171 {
172 if (ec->devinfo->kmd_type == INTEL_KMD_TYPE_I915) {
173 struct drm_i915_gem_create gem_create = {
174 .size = size_in_bytes,
175 };
176
177 int err = intel_ioctl(ec->fd, DRM_IOCTL_I915_GEM_CREATE, &gem_create);
178 if (err)
179 failf("i915_gem_create");
180
181 struct drm_i915_gem_mmap_offset mm = {
182 .handle = gem_create.handle,
183 .flags = ec->devinfo->has_local_mem ? I915_MMAP_OFFSET_FIXED
184 : I915_MMAP_OFFSET_WC,
185 };
186
187 err = intel_ioctl(ec->fd, DRM_IOCTL_I915_GEM_MMAP_OFFSET, &mm);
188 if (err)
189 failf("i915_gem_mmap_offset");
190
191 bo->handle = gem_create.handle;
192 bo->map = mmap(NULL, size_in_bytes, PROT_READ | PROT_WRITE,
193 MAP_SHARED, ec->fd, mm.offset);
194 if (!bo->map)
195 failf("mmap");
196 } else {
197 assert(ec->devinfo->kmd_type == INTEL_KMD_TYPE_XE);
198
199 struct drm_xe_gem_create gem_create = {
200 .size = size_in_bytes,
201 .cpu_caching = DRM_XE_GEM_CPU_CACHING_WB,
202 .placement = 1u << ec->devinfo->mem.sram.mem.instance,
203 };
204
205 int err = intel_ioctl(ec->fd, DRM_IOCTL_XE_GEM_CREATE, &gem_create);
206 if (err)
207 failf("xe_gem_create");
208
209 struct drm_xe_gem_mmap_offset mm = {
210 .handle = gem_create.handle,
211 };
212
213 err = intel_ioctl(ec->fd, DRM_IOCTL_XE_GEM_MMAP_OFFSET, &mm);
214 if (err)
215 failf("xe_gem_mmap_offset");
216
217 bo->handle = gem_create.handle;
218 bo->map = mmap(NULL, size_in_bytes, PROT_READ | PROT_WRITE,
219 MAP_SHARED, ec->fd, mm.offset);
220 if (!bo->map)
221 failf("mmap");
222 }
223
224 bo->size = size_in_bytes;
225 bo->addr = addr;
226 bo->cursor = bo->map;
227 }
228
229 static void
executor_destroy_bo(executor_context * ec,executor_bo * bo)230 executor_destroy_bo(executor_context *ec, executor_bo *bo)
231 {
232 struct drm_gem_close gem_close = {
233 .handle = bo->handle,
234 };
235
236 int err = munmap(bo->map, bo->size);
237 if (err)
238 failf("munmap");
239
240 err = intel_ioctl(ec->fd, DRM_IOCTL_GEM_CLOSE, &gem_close);
241 if (err)
242 failf("gem_close");
243
244 memset(bo, 0, sizeof(*bo));
245 }
246
247 static void
executor_print_bo(executor_bo * bo,const char * name)248 executor_print_bo(executor_bo *bo, const char *name)
249 {
250 assert((bo->cursor - bo->map) % 4 == 0);
251 uint32_t *dw = bo->map;
252 uint32_t len = (uint32_t *)bo->cursor - dw;
253
254 printf("=== %s (0x%08"PRIx64", %td bytes) ===\n", name, bo->addr, bo->cursor - bo->map);
255
256 for (int i = 0; i < len; i++) {
257 if ((i % 8) == 0) printf("[0x%08x] ", (i*4) + (uint32_t)bo->addr);
258 printf("0x%08x ", dw[i]);
259 if ((i % 8) == 7) printf("\n");
260 }
261 printf("\n");
262 }
263
264 void *
executor_alloc_bytes(executor_bo * bo,uint32_t size)265 executor_alloc_bytes(executor_bo *bo, uint32_t size)
266 {
267 return executor_alloc_bytes_aligned(bo, size, 0);
268 }
269
270 void *
executor_alloc_bytes_aligned(executor_bo * bo,uint32_t size,uint32_t alignment)271 executor_alloc_bytes_aligned(executor_bo *bo, uint32_t size, uint32_t alignment)
272 {
273 void *r = bo->cursor;
274 if (alignment) {
275 r = (void *)(((uintptr_t)r + alignment-1) & ~((uintptr_t)alignment-1));
276 }
277 bo->cursor = r + size;
278 return r;
279 }
280
281 executor_address
executor_address_of_ptr(executor_bo * bo,void * ptr)282 executor_address_of_ptr(executor_bo *bo, void *ptr)
283 {
284 return (executor_address){ptr - bo->map + bo->addr};
285 }
286
287 static int
get_drm_device(struct intel_device_info * devinfo)288 get_drm_device(struct intel_device_info *devinfo)
289 {
290 drmDevicePtr devices[8];
291 int max_devices = drmGetDevices2(0, devices, 8);
292
293 int i, fd = -1;
294 for (i = 0; i < max_devices; i++) {
295 if (devices[i]->available_nodes & 1 << DRM_NODE_RENDER &&
296 devices[i]->bustype == DRM_BUS_PCI &&
297 devices[i]->deviceinfo.pci->vendor_id == 0x8086) {
298 fd = open(devices[i]->nodes[DRM_NODE_RENDER], O_RDWR | O_CLOEXEC);
299 if (fd < 0)
300 continue;
301
302 if (!intel_get_device_info_from_fd(fd, devinfo, -1, -1) ||
303 devinfo->ver < 8) {
304 close(fd);
305 fd = -1;
306 continue;
307 }
308
309 /* Found a device! */
310 break;
311 }
312 }
313 drmFreeDevices(devices, max_devices);
314
315 return fd;
316 }
317
318 static struct intel_batch_decode_bo
decode_get_bo(void * _ec,bool ppgtt,uint64_t address)319 decode_get_bo(void *_ec, bool ppgtt, uint64_t address)
320 {
321 executor_context *ec = _ec;
322 struct intel_batch_decode_bo bo = {0};
323
324 if (address >= ec->bo.batch.addr && address < ec->bo.batch.addr + ec->bo.batch.size) {
325 bo.addr = ec->bo.batch.addr;
326 bo.size = ec->bo.batch.size;
327 bo.map = ec->bo.batch.map;
328 } else if (address >= ec->bo.extra.addr && address < ec->bo.extra.addr + ec->bo.extra.size) {
329 bo.addr = ec->bo.extra.addr;
330 bo.size = ec->bo.extra.size;
331 bo.map = ec->bo.extra.map;
332 } else if (address >= ec->bo.data.addr && address < ec->bo.data.addr + ec->bo.data.size) {
333 bo.addr = ec->bo.data.addr;
334 bo.size = ec->bo.data.size;
335 bo.map = ec->bo.data.map;
336 }
337
338 return bo;
339 }
340
341 static unsigned
decode_get_state_size(void * _ec,uint64_t address,uint64_t base_address)342 decode_get_state_size(void *_ec, uint64_t address, uint64_t base_address)
343 {
344 return EXECUTOR_BO_SIZE;
345 }
346
347 static void
parse_execute_data(executor_context * ec,lua_State * L,int table_idx)348 parse_execute_data(executor_context *ec, lua_State *L, int table_idx)
349 {
350 uint32_t *data = ec->bo.data.map;
351
352 lua_pushvalue(L, table_idx);
353
354 lua_pushnil(L);
355 while (lua_next(L, -2) != 0) {
356 int val_idx = lua_gettop(L);
357 int key_idx = val_idx - 1;
358
359 if (lua_type(L, key_idx) != LUA_TNUMBER || !lua_isinteger(L, key_idx))
360 failf("invalid key for data in execute call");
361
362 lua_Integer key = lua_tointeger(L, key_idx);
363 assert(key <= 10 * 1024 * 1024 / 4);
364 lua_Integer val = lua_tointeger(L, val_idx);
365 data[key] = val;
366
367 lua_pop(L, 1);
368 }
369
370 lua_pop(L, 1);
371 }
372
373 static void
parse_execute_args(executor_context * ec,lua_State * L,executor_params * params)374 parse_execute_args(executor_context *ec, lua_State *L, executor_params *params)
375 {
376 int opts = lua_gettop(L);
377
378 lua_pushnil(L);
379
380 while (lua_next(L, opts) != 0) {
381 int val_idx = lua_gettop(L);
382 int key_idx = val_idx - 1;
383
384 if (lua_type(L, key_idx) != LUA_TSTRING) {
385 lua_pop(L, 1);
386 continue;
387 }
388
389 const char *key = lua_tostring(L, key_idx);
390
391 if (!strcmp(key, "src")) {
392 params->original_src = ralloc_strdup(ec->mem_ctx, luaL_checkstring(L, val_idx));
393 } else if (!strcmp(key, "data")) {
394 parse_execute_data(ec, L, val_idx);
395 } else {
396 failf("unknown parameter '%s' for execute()", key);
397 }
398
399 lua_pop(L, 1);
400 }
401 }
402
403 static void
executor_context_setup(executor_context * ec)404 executor_context_setup(executor_context *ec)
405 {
406 if (ec->devinfo->kmd_type == INTEL_KMD_TYPE_I915) {
407 struct drm_i915_gem_context_create create = {0};
408 int err = intel_ioctl(ec->fd, DRM_IOCTL_I915_GEM_CONTEXT_CREATE, &create);
409 if (err)
410 failf("i915_gem_context_create");
411 ec->i915.ctx_id = create.ctx_id;
412 } else {
413 assert(ec->devinfo->kmd_type == INTEL_KMD_TYPE_XE);
414
415 struct drm_xe_vm_create create = {
416 .flags = DRM_XE_VM_CREATE_FLAG_SCRATCH_PAGE,
417 };
418 int err = intel_ioctl(ec->fd, DRM_IOCTL_XE_VM_CREATE, &create);
419 if (err)
420 failf("xe_vm_create");
421 ec->xe.vm_id = create.vm_id;
422
423 struct drm_xe_engine_class_instance instance = {0};
424
425 struct intel_query_engine_info *engines_info = xe_engine_get_info(ec->fd);
426 assert(engines_info);
427
428 bool found_engine = false;
429 for (int i = 0; i < engines_info->num_engines; i++) {
430 struct intel_engine_class_instance *e = &engines_info->engines[i];
431 if (e->engine_class == INTEL_ENGINE_CLASS_RENDER) {
432 instance.engine_class = DRM_XE_ENGINE_CLASS_RENDER;
433 instance.engine_instance = e->engine_instance;
434 instance.gt_id = e->gt_id;
435 found_engine = true;
436 break;
437 }
438 }
439 assert(found_engine);
440 free(engines_info);
441
442 struct drm_xe_exec_queue_create queue_create = {
443 .vm_id = ec->xe.vm_id,
444 .width = 1,
445 .num_placements = 1,
446 .instances = (uintptr_t)&instance,
447 };
448 err = intel_ioctl(ec->fd, DRM_IOCTL_XE_EXEC_QUEUE_CREATE, &queue_create);
449 if (err)
450 failf("xe_exec_queue_create");
451 ec->xe.queue_id = queue_create.exec_queue_id;
452 }
453
454 executor_create_bo(ec, &ec->bo.batch, EXECUTOR_BO_BATCH_ADDR, EXECUTOR_BO_SIZE);
455 executor_create_bo(ec, &ec->bo.extra, EXECUTOR_BO_EXTRA_ADDR, EXECUTOR_BO_SIZE);
456 executor_create_bo(ec, &ec->bo.data, EXECUTOR_BO_DATA_ADDR, EXECUTOR_BO_SIZE);
457
458 uint32_t *data = ec->bo.data.map;
459 for (int i = 0; i < EXECUTOR_BO_SIZE / 4; i++)
460 data[i] = 0xABABABAB;
461 }
462
463 static void
executor_context_dispatch(executor_context * ec)464 executor_context_dispatch(executor_context *ec)
465 {
466 if (ec->devinfo->kmd_type == INTEL_KMD_TYPE_I915) {
467 struct drm_i915_gem_exec_object2 objs[] = {
468 {
469 .handle = ec->bo.batch.handle,
470 .offset = ec->bo.batch.addr,
471 .flags = EXEC_OBJECT_PINNED,
472 },
473 {
474 .handle = ec->bo.extra.handle,
475 .offset = ec->bo.extra.addr,
476 .flags = EXEC_OBJECT_PINNED,
477 },
478 {
479 .handle = ec->bo.data.handle,
480 .offset = ec->bo.data.addr,
481 .flags = EXEC_OBJECT_PINNED | EXEC_OBJECT_WRITE,
482 },
483 };
484
485 struct drm_i915_gem_execbuffer2 exec = {0};
486 exec.buffers_ptr = (uintptr_t)objs;
487 exec.buffer_count = ARRAY_SIZE(objs);
488 exec.batch_start_offset = ec->batch_start - ec->bo.batch.addr;
489 exec.flags = I915_EXEC_BATCH_FIRST;
490 exec.rsvd1 = ec->i915.ctx_id;
491
492 int err = intel_ioctl(ec->fd, DRM_IOCTL_I915_GEM_EXECBUFFER2, &exec);
493 if (err)
494 failf("i915_gem_execbuffer2");
495
496 struct drm_i915_gem_wait wait = {0};
497 wait.bo_handle = ec->bo.batch.handle;
498 wait.timeout_ns = INT64_MAX;
499
500 err = intel_ioctl(ec->fd, DRM_IOCTL_I915_GEM_WAIT, &wait);
501 if (err)
502 failf("i915_gem_wait");
503 } else {
504 assert(ec->devinfo->kmd_type == INTEL_KMD_TYPE_XE);
505
506 /* First syncobj is signalled by the binding operation and waited by the
507 * execution of the batch buffer.
508 *
509 * Second syncobj is singalled by the execution of batch buffer and
510 * waited at the end.
511 */
512 uint32_t sync_handles[2] = {0};
513 for (int i = 0; i < 2; i++) {
514 struct drm_syncobj_create sync_create = {0};
515 int err = intel_ioctl(ec->fd, DRM_IOCTL_SYNCOBJ_CREATE, &sync_create);
516 if (err)
517 failf("syncobj_create");
518 sync_handles[i] = sync_create.handle;
519 }
520
521 struct drm_xe_vm_bind_op bind_ops[] = {
522 {
523 .op = DRM_XE_VM_BIND_OP_MAP,
524 .obj = ec->bo.batch.handle,
525 .addr = ec->bo.batch.addr,
526 .range = EXECUTOR_BO_SIZE,
527 .pat_index = ec->devinfo->pat.cached_coherent.index,
528 },
529 {
530 .op = DRM_XE_VM_BIND_OP_MAP,
531 .obj = ec->bo.extra.handle,
532 .addr = ec->bo.extra.addr,
533 .range = EXECUTOR_BO_SIZE,
534 .pat_index = ec->devinfo->pat.cached_coherent.index,
535 },
536 {
537 .op = DRM_XE_VM_BIND_OP_MAP,
538 .obj = ec->bo.data.handle,
539 .addr = ec->bo.data.addr,
540 .range = EXECUTOR_BO_SIZE,
541 .pat_index = ec->devinfo->pat.cached_coherent.index,
542 },
543 };
544
545 struct drm_xe_sync bind_syncs[] = {
546 {
547 .type = DRM_XE_SYNC_TYPE_SYNCOBJ,
548 .handle = sync_handles[0],
549 .flags = DRM_XE_SYNC_FLAG_SIGNAL,
550 },
551 };
552
553 struct drm_xe_vm_bind bind = {
554 .vm_id = ec->xe.vm_id,
555 .num_binds = ARRAY_SIZE(bind_ops),
556 .vector_of_binds = (uintptr_t)bind_ops,
557 .num_syncs = 1,
558 .syncs = (uintptr_t)bind_syncs,
559 };
560
561 int err = intel_ioctl(ec->fd, DRM_IOCTL_XE_VM_BIND, &bind);
562 if (err)
563 failf("xe_vm_bind");
564
565 struct drm_xe_sync exec_syncs[] = {
566 {
567 .type = DRM_XE_SYNC_TYPE_SYNCOBJ,
568 .handle = sync_handles[0],
569 },
570 {
571 .type = DRM_XE_SYNC_TYPE_SYNCOBJ,
572 .handle = sync_handles[1],
573 .flags = DRM_XE_SYNC_FLAG_SIGNAL,
574 }
575 };
576
577 struct drm_xe_exec exec = {
578 .exec_queue_id = ec->xe.queue_id,
579 .num_batch_buffer = 1,
580 .address = ec->batch_start,
581 .num_syncs = 2,
582 .syncs = (uintptr_t)exec_syncs,
583 };
584 err = intel_ioctl(ec->fd, DRM_IOCTL_XE_EXEC, &exec);
585 if (err)
586 failf("xe_exec");
587
588 struct drm_syncobj_wait wait = {
589 .count_handles = 1,
590 .handles = (uintptr_t)&sync_handles[1],
591 .timeout_nsec = INT64_MAX,
592 };
593 err = intel_ioctl(ec->fd, DRM_IOCTL_SYNCOBJ_WAIT, &wait);
594 if (err)
595 failf("syncobj_wait");
596 }
597 }
598
599 static void
executor_context_teardown(executor_context * ec)600 executor_context_teardown(executor_context *ec)
601 {
602 executor_destroy_bo(ec, &ec->bo.batch);
603 executor_destroy_bo(ec, &ec->bo.extra);
604 executor_destroy_bo(ec, &ec->bo.data);
605
606 if (ec->devinfo->kmd_type == INTEL_KMD_TYPE_I915) {
607 struct drm_i915_gem_context_destroy destroy = {
608 .ctx_id = ec->i915.ctx_id,
609 };
610 int err = intel_ioctl(ec->fd, DRM_IOCTL_I915_GEM_CONTEXT_DESTROY, &destroy);
611 if (err)
612 failf("i915_gem_context_destroy");
613 } else {
614 assert(ec->devinfo->kmd_type == INTEL_KMD_TYPE_XE);
615
616 struct drm_xe_exec_queue_destroy queue_destroy = {
617 .exec_queue_id = ec->xe.queue_id,
618 };
619 int err = intel_ioctl(ec->fd, DRM_IOCTL_XE_EXEC_QUEUE_DESTROY, &queue_destroy);
620 if (err)
621 failf("xe_exec_queue_destroy");
622
623 struct drm_xe_vm_destroy destroy = {
624 .vm_id = ec->xe.vm_id,
625 };
626 err = intel_ioctl(ec->fd, DRM_IOCTL_XE_VM_DESTROY, &destroy);
627 if (err)
628 failf("xe_vm_destroy");
629 }
630 }
631
632 static int
l_execute(lua_State * L)633 l_execute(lua_State *L)
634 {
635 executor_context ec = {
636 .mem_ctx = ralloc_context(NULL),
637 .devinfo = &E.devinfo,
638 .isl_dev = &E.isl_dev,
639 .fd = E.fd,
640 };
641
642 executor_context_setup(&ec);
643
644 executor_params params = {0};
645
646 {
647 if (lua_gettop(L) != 1)
648 failf("execute() must have a single table argument");
649
650 parse_execute_args(&ec, L, ¶ms);
651
652 const char *src = executor_apply_macros(&ec, params.original_src);
653
654 FILE *f = fmemopen((void *)src, strlen(src), "r");
655 brw_assemble_result asm = brw_assemble(ec.mem_ctx, ec.devinfo, f, "", 0);
656 fclose(f);
657
658 if (INTEL_DEBUG(DEBUG_CS) || !asm.bin) {
659 printf("=== Processed assembly source ===\n"
660 "%s"
661 "=================================\n\n", src);
662 }
663
664 if (!asm.bin)
665 failf("assembler failure");
666
667 params.kernel_bin = asm.bin;
668 params.kernel_size = asm.bin_size;
669 }
670
671 genX_call(emit_execute, &ec, ¶ms);
672
673 if (INTEL_DEBUG(DEBUG_BATCH)) {
674 struct intel_batch_decode_ctx decoder;
675 enum intel_batch_decode_flags flags = INTEL_BATCH_DECODE_DEFAULT_FLAGS;
676 if (INTEL_DEBUG(DEBUG_COLOR))
677 flags |= INTEL_BATCH_DECODE_IN_COLOR;
678
679 intel_batch_decode_ctx_init_brw(&decoder, &E.isa, &E.devinfo, stdout,
680 flags, NULL, decode_get_bo, decode_get_state_size, &ec);
681
682 assert(ec.bo.batch.cursor > ec.bo.batch.map);
683 const int batch_offset = ec.batch_start - ec.bo.batch.addr;
684 const int batch_size = (ec.bo.batch.cursor - ec.bo.batch.map) - batch_offset;
685 assert(batch_offset < batch_size);
686
687 intel_print_batch(&decoder, ec.bo.batch.map, batch_size, ec.batch_start, false);
688
689 intel_batch_decode_ctx_finish(&decoder);
690 }
691
692 executor_context_dispatch(&ec);
693
694 {
695 /* TODO: Use userdata to return a wrapped C array instead of building
696 * values. Could make integration with array operations better.
697 */
698 uint32_t *data = ec.bo.data.map;
699 const int n = ec.bo.data.size / 4;
700 lua_createtable(L, n, 0);
701 for (int i = 0; i < 8; i++) {
702 lua_pushinteger(L, data[i]);
703 lua_seti(L, -2, i);
704 }
705 }
706
707 executor_context_teardown(&ec);
708 ralloc_free(ec.mem_ctx);
709
710 return 1;
711 }
712
713 static int
l_dump(lua_State * L)714 l_dump(lua_State *L)
715 {
716 /* TODO: Use a table to add options for the dump, e.g.
717 * starting offset, format, etc.
718 */
719
720 assert(lua_type(L, 1) == LUA_TTABLE);
721 assert(lua_type(L, 2) == LUA_TNUMBER);
722 assert(lua_isinteger(L, 2));
723
724 lua_Integer len_ = lua_tointeger(L, 2);
725 assert(len_ >= 0 && len_ <= INT_MAX);
726 int len = len_;
727
728 int i;
729 for (i = 0; i < len; i++) {
730 if (i%8 == 0) printf("[0x%08x]", i * 4);
731 lua_rawgeti(L, 1, i);
732 lua_Integer val = lua_tointeger(L, -1);
733 printf(" 0x%08x", (uint32_t)val);
734 lua_pop(L, 1);
735 if (i%8 == 7) printf("\n");
736 }
737 if (i%8 != 0) printf("\n");
738 return 0;
739 }
740
741 static int
l_check_ver(lua_State * L)742 l_check_ver(lua_State *L)
743 {
744 int top = lua_gettop(L);
745 for (int i = 1; i <= top; i++) {
746 lua_Integer v = luaL_checknumber(L, i);
747 if (E.devinfo.ver == v) {
748 return 0;
749 }
750 }
751 failf("script doesn't support version=%d verx10=%d\n",
752 E.devinfo.ver, E.devinfo.verx10);
753 return 0;
754 }
755
756 static int
l_check_verx10(lua_State * L)757 l_check_verx10(lua_State *L)
758 {
759 int top = lua_gettop(L);
760 for (int i = 1; i <= top; i++) {
761 lua_Integer v = luaL_checknumber(L, i);
762 if (E.devinfo.verx10 == v) {
763 return 0;
764 }
765 }
766 failf("script doesn't support version=%d verx10=%d\n",
767 E.devinfo.ver, E.devinfo.verx10);
768 return 0;
769 }
770
771 /* TODO: Review numeric limits in the code, specially around Lua integer
772 * conversion.
773 */
774
775 int
main(int argc,char * argv[])776 main(int argc, char *argv[])
777 {
778 if (argc < 2 ||
779 !strcmp(argv[1], "--help") ||
780 !strcmp(argv[1], "-help") ||
781 !strcmp(argv[1], "-h") ||
782 !strcmp(argv[1], "help")) {
783 print_help();
784 return 0;
785 }
786
787 if (argc > 2) {
788 /* TODO: Expose extra arguments to the script as a variable. */
789 failf("invalid extra arguments\nusage: executor FILENAME");
790 return 1;
791 }
792
793 process_intel_debug_variable();
794
795 E.fd = get_drm_device(&E.devinfo);
796 isl_device_init(&E.isl_dev, &E.devinfo);
797 brw_init_isa_info(&E.isa, &E.devinfo);
798 assert(E.devinfo.kmd_type == INTEL_KMD_TYPE_I915 ||
799 E.devinfo.kmd_type == INTEL_KMD_TYPE_XE);
800
801 lua_State *L = luaL_newstate();
802
803 /* TODO: Could be nice to export some kind of builder interface,
804 * maybe even let the script construct a shader at the BRW IR
805 * level and let the later passes kick in.
806 */
807
808 luaL_openlibs(L);
809
810 lua_pushinteger(L, E.devinfo.ver);
811 lua_setglobal(L, "ver");
812
813 lua_pushinteger(L, E.devinfo.verx10);
814 lua_setglobal(L, "verx10");
815
816 lua_pushcfunction(L, l_execute);
817 lua_setglobal(L, "execute");
818
819 lua_pushcfunction(L, l_dump);
820 lua_setglobal(L, "dump");
821
822 lua_pushcfunction(L, l_check_ver);
823 lua_setglobal(L, "check_ver");
824
825 lua_pushcfunction(L, l_check_verx10);
826 lua_setglobal(L, "check_verx10");
827
828 const char *filename = argv[1];
829 int err = luaL_loadfile(L, filename);
830 if (err)
831 failf("failed to load script: %s", lua_tostring(L, -1));
832
833 err = lua_pcall(L, 0, 0, 0);
834 if (err)
835 failf("failed to run script: %s", lua_tostring(L, -1));
836
837 lua_close(L);
838 close(E.fd);
839
840 return 0;
841 }
842
843 void
failf(const char * fmt,...)844 failf(const char *fmt, ...)
845 {
846 va_list args;
847 va_start(args, fmt);
848 fprintf(stderr, "ERROR: ");
849 vfprintf(stderr, fmt, args);
850 fprintf(stderr, "\n");
851 va_end(args);
852 exit(1);
853 }
854