xref: /aosp_15_r20/external/mesa3d/src/intel/executor/executor_main.c (revision 6104692788411f58d303aa86923a9ff6ecaded22)
1 /*
2  * Copyright © 2024 Intel Corporation
3  * SPDX-License-Identifier: MIT
4  */
5 
6 #include <fcntl.h>
7 #include <stdio.h>
8 #include <string.h>
9 #include <sys/mman.h>
10 
11 #include <lua.h>
12 #include <lualib.h>
13 #include <lauxlib.h>
14 
15 #include "util/ralloc.h"
16 
17 #include <xf86drm.h>
18 #include "drm-uapi/i915_drm.h"
19 #include "drm-uapi/xe_drm.h"
20 
21 #include "intel/compiler/brw_asm.h"
22 #include "intel/compiler/brw_isa_info.h"
23 #include "intel/common/intel_gem.h"
24 #include "intel/common/xe/intel_engine.h"
25 #include "intel/decoder/intel_decoder.h"
26 #include "intel/dev/intel_debug.h"
27 
28 #include "executor.h"
29 
30 enum {
31    /* Predictable base addresses here make it easier to spot errors. */
32    EXECUTOR_BO_BATCH_ADDR = 0x10000000,
33    EXECUTOR_BO_EXTRA_ADDR = 0x20000000,
34    EXECUTOR_BO_DATA_ADDR  = 0x30000000,
35 
36    /* Apply to all BOs. */
37    EXECUTOR_BO_SIZE = 10 * 1024 * 1024,
38 };
39 
40 static void
print_help()41 print_help()
42 {
43    printf(
44       "Executes shaders written for Intel GPUs\n"
45       "usage: executor FILENAME\n"
46       "\n"
47       "The input is a Lua script that can perform data manipulation\n"
48       "and dispatch execution of compute shaders, written in Xe assembly,\n"
49       "the same format used by the brw_asm assembler or when dumping\n"
50       "shaders in debug mode.\n"
51       "\n"
52       "The goal is to have a tool to experiment directly with certain\n"
53       "assembly instructions and the shared units without having to\n"
54       "instrument the drivers.\n"
55       "\n"
56       "EXECUTION CONTEXT\n"
57       "\n"
58       "By default compute shaders are used with SIMD8 for Gfx9-125 and SIMD16\n"
59       "for Xe2.  Only a single thread is dispatched.  A data buffer is used to\n"
60       "pipe data into the shader and out of it, it is bound to the graphics\n"
61       "address 0x%08x.\n"
62       "\n"
63       "The Gfx versions have differences in their assembly and shared units, so\n"
64       "other than very simple examples, scripts for this program will be either\n"
65       "specific to a version or provide shader variants for multiple versions.\n"
66       "\n"
67       "ASSEMBLY MACROS\n"
68       "\n"
69       "In addition to regular instructions, the follow macros will generate\n"
70       "assembly code based on the Gfx version being executed.  Unlike in regular\n"
71       "instructions, REGs don't use regions and can't be immediates.\n"
72       "\n"
73       "- @eot\n"
74       "  Send an EOT message.\n"
75       "\n"
76       "- @mov REG IMM\n"
77       "  Like a regular MOV but accepts numbers in both decimal and\n"
78       "  floating-point.\n"
79       "\n"
80       "- @id REG\n"
81       "  Write a local invocation index into REG.\n"
82       "\n"
83       "- @read DST_REG OFFSET_REG\n"
84       "  Read 32-bit values from the memory buffer at OFFSET_REG into DST_REG.\n"
85       "\n"
86       "- @write OFFSET_REG SRC_REG\n"
87       "  Write 32-bit values from SRC_REG to the memory buffer at OFFSET_REG.\n"
88       "\n"
89       "- @syncnop\n"
90       "  Produce a coarse grained sync.nop (when applicable) to ensure data from\n"
91       "  macros above are read/written.\n"
92       "\n"
93       "LUA ENVIRONMENT\n"
94       "\n"
95       "In addition to the regular Lua standard library the following variables and.\n"
96       "functions are available.\n"
97       "\n"
98       "- execute({src=STR, data=ARRAY}) -> ARRAY\n"
99       "  Takes a table as argument.  The 'src' in the table contains the shader to be\n"
100       "  executed.  The 'data' argument will be used to fill the data buffer with 32-bit\n"
101       "  values.  The function returns an ARRAY with the contents of the data buffer\n"
102       "  after the shader completes.\n"
103       "\n"
104       "- dump(ARRAY, COUNT)\n"
105       "  Pretty print the COUNT first elements of an array of 32-bit values.\n"
106       "\n"
107       "- check_ver(V, ...), check_verx10(V, ...)\n"
108       "  Exit if the Gfx version being executed isn't in the arguments list.\n"
109       "\n"
110       "- ver, verx10\n"
111       "  Variables containing the Gfx version being executed.\n"
112       "\n"
113       "This program was compiled with %s.\n"
114       "\n"
115       "ENVIRONMENT VARIABLES\n"
116       "\n"
117       "The following INTEL_DEBUG values (comma separated) are used:\n"
118       "\n"
119       " - bat             Dumps the batch buffer.\n"
120       " - color           Uses colors for the batch buffer dump.\n"
121       " - cs              Dumps the assembly after macro processing.\n"
122       "\n"
123       "EXAMPLE\n"
124       "\n"
125       "The following script\n"
126       "\n"
127       "  local r = execute {\n"
128       "    data={ [42] = 0x100 },\n"
129       "    src=[[\n"
130       "      @mov     g1      42\n"
131       "      @read    g2      g1\n"
132       "\n"
133       "      @id      g3\n"
134       "\n"
135       "      add(8)   g4<1>UD  g2<8,8,1>UD  g3<8,8,1>UD  { align1 @1 1Q };\n"
136       "\n"
137       "      @write   g3       g4\n"
138       "      @eot\n"
139       "    ]]\n"
140       "  }\n"
141       "\n"
142       "  dump(r, 4)\n"
143       "\n"
144       "Will produce the following output\n"
145       "\n"
146       "   [0x00000000] 0x00000100 0x00000101 0x00000102 0x00000103\n"
147       "\n"
148       "More examples can be found in the examples/ directory in the source code.\n"
149       "\n", EXECUTOR_BO_DATA_ADDR, LUA_RELEASE);
150 }
151 
152 static struct {
153    struct intel_device_info devinfo;
154    struct isl_device isl_dev;
155    struct brw_isa_info isa;
156    int fd;
157 } E;
158 
159 #define genX_call(func, ...)                                \
160    switch (E.devinfo.verx10) {                              \
161    case 90:  gfx9_  ##func(__VA_ARGS__); break;             \
162    case 110: gfx11_ ##func(__VA_ARGS__); break;             \
163    case 120: gfx12_ ##func(__VA_ARGS__); break;             \
164    case 125: gfx125_##func(__VA_ARGS__); break;             \
165    case 200: gfx20_ ##func(__VA_ARGS__); break;             \
166    default: unreachable("Unsupported hardware generation"); \
167    }
168 
169 static void
executor_create_bo(executor_context * ec,executor_bo * bo,uint64_t addr,uint32_t size_in_bytes)170 executor_create_bo(executor_context *ec, executor_bo *bo, uint64_t addr, uint32_t size_in_bytes)
171 {
172    if (ec->devinfo->kmd_type == INTEL_KMD_TYPE_I915) {
173       struct drm_i915_gem_create gem_create = {
174          .size = size_in_bytes,
175       };
176 
177       int err = intel_ioctl(ec->fd, DRM_IOCTL_I915_GEM_CREATE, &gem_create);
178       if (err)
179          failf("i915_gem_create");
180 
181       struct drm_i915_gem_mmap_offset mm = {
182          .handle = gem_create.handle,
183          .flags  = ec->devinfo->has_local_mem ? I915_MMAP_OFFSET_FIXED
184                                               : I915_MMAP_OFFSET_WC,
185       };
186 
187       err = intel_ioctl(ec->fd, DRM_IOCTL_I915_GEM_MMAP_OFFSET, &mm);
188       if (err)
189          failf("i915_gem_mmap_offset");
190 
191       bo->handle = gem_create.handle;
192       bo->map    = mmap(NULL, size_in_bytes, PROT_READ | PROT_WRITE,
193                         MAP_SHARED, ec->fd, mm.offset);
194       if (!bo->map)
195          failf("mmap");
196    } else {
197       assert(ec->devinfo->kmd_type == INTEL_KMD_TYPE_XE);
198 
199       struct drm_xe_gem_create gem_create = {
200          .size        = size_in_bytes,
201          .cpu_caching = DRM_XE_GEM_CPU_CACHING_WB,
202          .placement   = 1u << ec->devinfo->mem.sram.mem.instance,
203       };
204 
205       int err = intel_ioctl(ec->fd, DRM_IOCTL_XE_GEM_CREATE, &gem_create);
206       if (err)
207          failf("xe_gem_create");
208 
209       struct drm_xe_gem_mmap_offset mm = {
210          .handle = gem_create.handle,
211       };
212 
213       err = intel_ioctl(ec->fd, DRM_IOCTL_XE_GEM_MMAP_OFFSET, &mm);
214       if (err)
215          failf("xe_gem_mmap_offset");
216 
217       bo->handle = gem_create.handle;
218       bo->map    = mmap(NULL, size_in_bytes, PROT_READ | PROT_WRITE,
219                         MAP_SHARED, ec->fd, mm.offset);
220       if (!bo->map)
221          failf("mmap");
222    }
223 
224    bo->size   = size_in_bytes;
225    bo->addr   = addr;
226    bo->cursor = bo->map;
227 }
228 
229 static void
executor_destroy_bo(executor_context * ec,executor_bo * bo)230 executor_destroy_bo(executor_context *ec, executor_bo *bo)
231 {
232    struct drm_gem_close gem_close = {
233       .handle = bo->handle,
234    };
235 
236    int err = munmap(bo->map, bo->size);
237    if (err)
238       failf("munmap");
239 
240    err = intel_ioctl(ec->fd, DRM_IOCTL_GEM_CLOSE, &gem_close);
241    if (err)
242       failf("gem_close");
243 
244    memset(bo, 0, sizeof(*bo));
245 }
246 
247 static void
executor_print_bo(executor_bo * bo,const char * name)248 executor_print_bo(executor_bo *bo, const char *name)
249 {
250    assert((bo->cursor - bo->map) % 4 == 0);
251    uint32_t *dw = bo->map;
252    uint32_t len = (uint32_t *)bo->cursor - dw;
253 
254    printf("=== %s (0x%08"PRIx64", %td bytes) ===\n", name, bo->addr, bo->cursor - bo->map);
255 
256    for (int i = 0; i < len; i++) {
257       if ((i % 8) == 0) printf("[0x%08x] ", (i*4) + (uint32_t)bo->addr);
258       printf("0x%08x ", dw[i]);
259       if ((i % 8) == 7) printf("\n");
260    }
261    printf("\n");
262 }
263 
264 void *
executor_alloc_bytes(executor_bo * bo,uint32_t size)265 executor_alloc_bytes(executor_bo *bo, uint32_t size)
266 {
267    return executor_alloc_bytes_aligned(bo, size, 0);
268 }
269 
270 void *
executor_alloc_bytes_aligned(executor_bo * bo,uint32_t size,uint32_t alignment)271 executor_alloc_bytes_aligned(executor_bo *bo, uint32_t size, uint32_t alignment)
272 {
273    void *r = bo->cursor;
274    if (alignment) {
275       r = (void *)(((uintptr_t)r + alignment-1) & ~((uintptr_t)alignment-1));
276    }
277    bo->cursor = r + size;
278    return r;
279 }
280 
281 executor_address
executor_address_of_ptr(executor_bo * bo,void * ptr)282 executor_address_of_ptr(executor_bo *bo, void *ptr)
283 {
284    return (executor_address){ptr - bo->map + bo->addr};
285 }
286 
287 static int
get_drm_device(struct intel_device_info * devinfo)288 get_drm_device(struct intel_device_info *devinfo)
289 {
290    drmDevicePtr devices[8];
291    int max_devices = drmGetDevices2(0, devices, 8);
292 
293    int i, fd = -1;
294    for (i = 0; i < max_devices; i++) {
295       if (devices[i]->available_nodes & 1 << DRM_NODE_RENDER &&
296           devices[i]->bustype == DRM_BUS_PCI &&
297           devices[i]->deviceinfo.pci->vendor_id == 0x8086) {
298          fd = open(devices[i]->nodes[DRM_NODE_RENDER], O_RDWR | O_CLOEXEC);
299          if (fd < 0)
300             continue;
301 
302          if (!intel_get_device_info_from_fd(fd, devinfo, -1, -1) ||
303              devinfo->ver < 8) {
304             close(fd);
305             fd = -1;
306             continue;
307          }
308 
309          /* Found a device! */
310          break;
311       }
312    }
313    drmFreeDevices(devices, max_devices);
314 
315    return fd;
316 }
317 
318 static struct intel_batch_decode_bo
decode_get_bo(void * _ec,bool ppgtt,uint64_t address)319 decode_get_bo(void *_ec, bool ppgtt, uint64_t address)
320 {
321    executor_context *ec = _ec;
322    struct intel_batch_decode_bo bo = {0};
323 
324    if (address >= ec->bo.batch.addr && address < ec->bo.batch.addr + ec->bo.batch.size) {
325       bo.addr = ec->bo.batch.addr;
326       bo.size = ec->bo.batch.size;
327       bo.map  = ec->bo.batch.map;
328    } else if (address >= ec->bo.extra.addr && address < ec->bo.extra.addr + ec->bo.extra.size) {
329       bo.addr = ec->bo.extra.addr;
330       bo.size = ec->bo.extra.size;
331       bo.map  = ec->bo.extra.map;
332    } else if (address >= ec->bo.data.addr && address < ec->bo.data.addr + ec->bo.data.size) {
333       bo.addr = ec->bo.data.addr;
334       bo.size = ec->bo.data.size;
335       bo.map  = ec->bo.data.map;
336    }
337 
338    return bo;
339 }
340 
341 static unsigned
decode_get_state_size(void * _ec,uint64_t address,uint64_t base_address)342 decode_get_state_size(void *_ec, uint64_t address, uint64_t base_address)
343 {
344    return EXECUTOR_BO_SIZE;
345 }
346 
347 static void
parse_execute_data(executor_context * ec,lua_State * L,int table_idx)348 parse_execute_data(executor_context *ec, lua_State *L, int table_idx)
349 {
350    uint32_t *data = ec->bo.data.map;
351 
352    lua_pushvalue(L, table_idx);
353 
354    lua_pushnil(L);
355    while (lua_next(L, -2) != 0) {
356       int val_idx = lua_gettop(L);
357       int key_idx = val_idx - 1;
358 
359       if (lua_type(L, key_idx) != LUA_TNUMBER || !lua_isinteger(L, key_idx))
360          failf("invalid key for data in execute call");
361 
362       lua_Integer key = lua_tointeger(L, key_idx);
363       assert(key <= 10 * 1024 * 1024 / 4);
364       lua_Integer val = lua_tointeger(L, val_idx);
365       data[key] = val;
366 
367       lua_pop(L, 1);
368    }
369 
370    lua_pop(L, 1);
371 }
372 
373 static void
parse_execute_args(executor_context * ec,lua_State * L,executor_params * params)374 parse_execute_args(executor_context *ec, lua_State *L, executor_params *params)
375 {
376    int opts = lua_gettop(L);
377 
378    lua_pushnil(L);
379 
380    while (lua_next(L, opts) != 0) {
381       int val_idx = lua_gettop(L);
382       int key_idx = val_idx - 1;
383 
384       if (lua_type(L, key_idx) != LUA_TSTRING) {
385          lua_pop(L, 1);
386          continue;
387       }
388 
389       const char *key = lua_tostring(L, key_idx);
390 
391       if (!strcmp(key, "src")) {
392          params->original_src = ralloc_strdup(ec->mem_ctx, luaL_checkstring(L, val_idx));
393       } else if (!strcmp(key, "data")) {
394          parse_execute_data(ec, L, val_idx);
395       } else {
396          failf("unknown parameter '%s' for execute()", key);
397       }
398 
399       lua_pop(L, 1);
400    }
401 }
402 
403 static void
executor_context_setup(executor_context * ec)404 executor_context_setup(executor_context *ec)
405 {
406    if (ec->devinfo->kmd_type == INTEL_KMD_TYPE_I915) {
407       struct drm_i915_gem_context_create create = {0};
408       int err = intel_ioctl(ec->fd, DRM_IOCTL_I915_GEM_CONTEXT_CREATE, &create);
409       if (err)
410          failf("i915_gem_context_create");
411       ec->i915.ctx_id = create.ctx_id;
412    } else {
413       assert(ec->devinfo->kmd_type == INTEL_KMD_TYPE_XE);
414 
415       struct drm_xe_vm_create create = {
416          .flags = DRM_XE_VM_CREATE_FLAG_SCRATCH_PAGE,
417       };
418       int err = intel_ioctl(ec->fd, DRM_IOCTL_XE_VM_CREATE, &create);
419       if (err)
420          failf("xe_vm_create");
421       ec->xe.vm_id = create.vm_id;
422 
423       struct drm_xe_engine_class_instance instance = {0};
424 
425       struct intel_query_engine_info *engines_info = xe_engine_get_info(ec->fd);
426       assert(engines_info);
427 
428       bool found_engine = false;
429       for (int i = 0; i < engines_info->num_engines; i++) {
430          struct intel_engine_class_instance *e = &engines_info->engines[i];
431          if (e->engine_class == INTEL_ENGINE_CLASS_RENDER) {
432             instance.engine_class = DRM_XE_ENGINE_CLASS_RENDER;
433             instance.engine_instance = e->engine_instance;
434             instance.gt_id = e->gt_id;
435             found_engine = true;
436             break;
437          }
438       }
439       assert(found_engine);
440       free(engines_info);
441 
442       struct drm_xe_exec_queue_create queue_create = {
443          .vm_id          = ec->xe.vm_id,
444          .width          = 1,
445          .num_placements = 1,
446          .instances      = (uintptr_t)&instance,
447       };
448       err = intel_ioctl(ec->fd, DRM_IOCTL_XE_EXEC_QUEUE_CREATE, &queue_create);
449       if (err)
450          failf("xe_exec_queue_create");
451       ec->xe.queue_id = queue_create.exec_queue_id;
452    }
453 
454    executor_create_bo(ec, &ec->bo.batch, EXECUTOR_BO_BATCH_ADDR, EXECUTOR_BO_SIZE);
455    executor_create_bo(ec, &ec->bo.extra, EXECUTOR_BO_EXTRA_ADDR, EXECUTOR_BO_SIZE);
456    executor_create_bo(ec, &ec->bo.data,  EXECUTOR_BO_DATA_ADDR, EXECUTOR_BO_SIZE);
457 
458    uint32_t *data = ec->bo.data.map;
459    for (int i = 0; i < EXECUTOR_BO_SIZE / 4; i++)
460       data[i] = 0xABABABAB;
461 }
462 
463 static void
executor_context_dispatch(executor_context * ec)464 executor_context_dispatch(executor_context *ec)
465 {
466    if (ec->devinfo->kmd_type == INTEL_KMD_TYPE_I915) {
467       struct drm_i915_gem_exec_object2 objs[] = {
468          {
469             .handle = ec->bo.batch.handle,
470             .offset = ec->bo.batch.addr,
471             .flags  = EXEC_OBJECT_PINNED,
472          },
473          {
474             .handle = ec->bo.extra.handle,
475             .offset = ec->bo.extra.addr,
476             .flags  = EXEC_OBJECT_PINNED,
477          },
478          {
479             .handle = ec->bo.data.handle,
480             .offset = ec->bo.data.addr,
481             .flags  = EXEC_OBJECT_PINNED | EXEC_OBJECT_WRITE,
482          },
483       };
484 
485       struct drm_i915_gem_execbuffer2 exec = {0};
486       exec.buffers_ptr = (uintptr_t)objs;
487       exec.buffer_count = ARRAY_SIZE(objs);
488       exec.batch_start_offset = ec->batch_start - ec->bo.batch.addr;
489       exec.flags = I915_EXEC_BATCH_FIRST;
490       exec.rsvd1 = ec->i915.ctx_id;
491 
492       int err = intel_ioctl(ec->fd, DRM_IOCTL_I915_GEM_EXECBUFFER2, &exec);
493       if (err)
494           failf("i915_gem_execbuffer2");
495 
496       struct drm_i915_gem_wait wait = {0};
497       wait.bo_handle = ec->bo.batch.handle;
498       wait.timeout_ns = INT64_MAX;
499 
500       err = intel_ioctl(ec->fd, DRM_IOCTL_I915_GEM_WAIT, &wait);
501       if (err)
502          failf("i915_gem_wait");
503    } else {
504       assert(ec->devinfo->kmd_type == INTEL_KMD_TYPE_XE);
505 
506       /* First syncobj is signalled by the binding operation and waited by the
507        * execution of the batch buffer.
508        *
509        * Second syncobj is singalled by the execution of batch buffer and
510        * waited at the end.
511        */
512       uint32_t sync_handles[2] = {0};
513       for (int i = 0; i < 2; i++) {
514          struct drm_syncobj_create sync_create = {0};
515          int err = intel_ioctl(ec->fd, DRM_IOCTL_SYNCOBJ_CREATE, &sync_create);
516          if (err)
517             failf("syncobj_create");
518          sync_handles[i] = sync_create.handle;
519       }
520 
521       struct drm_xe_vm_bind_op bind_ops[] = {
522          {
523             .op        = DRM_XE_VM_BIND_OP_MAP,
524             .obj       = ec->bo.batch.handle,
525             .addr      = ec->bo.batch.addr,
526             .range     = EXECUTOR_BO_SIZE,
527             .pat_index = ec->devinfo->pat.cached_coherent.index,
528          },
529          {
530             .op        = DRM_XE_VM_BIND_OP_MAP,
531             .obj       = ec->bo.extra.handle,
532             .addr      = ec->bo.extra.addr,
533             .range     = EXECUTOR_BO_SIZE,
534             .pat_index = ec->devinfo->pat.cached_coherent.index,
535          },
536          {
537             .op        = DRM_XE_VM_BIND_OP_MAP,
538             .obj       = ec->bo.data.handle,
539             .addr      = ec->bo.data.addr,
540             .range     = EXECUTOR_BO_SIZE,
541             .pat_index = ec->devinfo->pat.cached_coherent.index,
542          },
543       };
544 
545       struct drm_xe_sync bind_syncs[] = {
546          {
547             .type   = DRM_XE_SYNC_TYPE_SYNCOBJ,
548             .handle = sync_handles[0],
549             .flags  = DRM_XE_SYNC_FLAG_SIGNAL,
550          },
551       };
552 
553       struct drm_xe_vm_bind bind = {
554          .vm_id           = ec->xe.vm_id,
555          .num_binds       = ARRAY_SIZE(bind_ops),
556          .vector_of_binds = (uintptr_t)bind_ops,
557          .num_syncs       = 1,
558          .syncs           = (uintptr_t)bind_syncs,
559       };
560 
561       int err = intel_ioctl(ec->fd, DRM_IOCTL_XE_VM_BIND, &bind);
562       if (err)
563          failf("xe_vm_bind");
564 
565       struct drm_xe_sync exec_syncs[] = {
566          {
567             .type   = DRM_XE_SYNC_TYPE_SYNCOBJ,
568             .handle = sync_handles[0],
569          },
570          {
571             .type   = DRM_XE_SYNC_TYPE_SYNCOBJ,
572             .handle = sync_handles[1],
573             .flags  = DRM_XE_SYNC_FLAG_SIGNAL,
574          }
575       };
576 
577       struct drm_xe_exec exec = {
578          .exec_queue_id    = ec->xe.queue_id,
579          .num_batch_buffer = 1,
580          .address          = ec->batch_start,
581          .num_syncs        = 2,
582          .syncs            = (uintptr_t)exec_syncs,
583       };
584       err = intel_ioctl(ec->fd, DRM_IOCTL_XE_EXEC, &exec);
585       if (err)
586          failf("xe_exec");
587 
588       struct drm_syncobj_wait wait = {
589          .count_handles = 1,
590          .handles       = (uintptr_t)&sync_handles[1],
591          .timeout_nsec  = INT64_MAX,
592       };
593       err = intel_ioctl(ec->fd, DRM_IOCTL_SYNCOBJ_WAIT, &wait);
594       if (err)
595          failf("syncobj_wait");
596    }
597 }
598 
599 static void
executor_context_teardown(executor_context * ec)600 executor_context_teardown(executor_context *ec)
601 {
602    executor_destroy_bo(ec, &ec->bo.batch);
603    executor_destroy_bo(ec, &ec->bo.extra);
604    executor_destroy_bo(ec, &ec->bo.data);
605 
606    if (ec->devinfo->kmd_type == INTEL_KMD_TYPE_I915) {
607       struct drm_i915_gem_context_destroy destroy = {
608          .ctx_id = ec->i915.ctx_id,
609       };
610       int err = intel_ioctl(ec->fd, DRM_IOCTL_I915_GEM_CONTEXT_DESTROY, &destroy);
611       if (err)
612          failf("i915_gem_context_destroy");
613    } else {
614       assert(ec->devinfo->kmd_type == INTEL_KMD_TYPE_XE);
615 
616       struct drm_xe_exec_queue_destroy queue_destroy = {
617          .exec_queue_id = ec->xe.queue_id,
618       };
619       int err = intel_ioctl(ec->fd, DRM_IOCTL_XE_EXEC_QUEUE_DESTROY, &queue_destroy);
620       if (err)
621          failf("xe_exec_queue_destroy");
622 
623       struct drm_xe_vm_destroy destroy = {
624          .vm_id =  ec->xe.vm_id,
625       };
626       err = intel_ioctl(ec->fd, DRM_IOCTL_XE_VM_DESTROY, &destroy);
627       if (err)
628          failf("xe_vm_destroy");
629    }
630 }
631 
632 static int
l_execute(lua_State * L)633 l_execute(lua_State *L)
634 {
635    executor_context ec = {
636       .mem_ctx = ralloc_context(NULL),
637       .devinfo = &E.devinfo,
638       .isl_dev = &E.isl_dev,
639       .fd      = E.fd,
640    };
641 
642    executor_context_setup(&ec);
643 
644    executor_params params = {0};
645 
646    {
647       if (lua_gettop(L) != 1)
648          failf("execute() must have a single table argument");
649 
650       parse_execute_args(&ec, L, &params);
651 
652       const char *src = executor_apply_macros(&ec, params.original_src);
653 
654       FILE *f = fmemopen((void *)src, strlen(src), "r");
655       brw_assemble_result asm = brw_assemble(ec.mem_ctx, ec.devinfo, f, "", 0);
656       fclose(f);
657 
658       if (INTEL_DEBUG(DEBUG_CS) || !asm.bin) {
659          printf("=== Processed assembly source ===\n"
660                 "%s"
661                 "=================================\n\n", src);
662       }
663 
664       if (!asm.bin)
665          failf("assembler failure");
666 
667       params.kernel_bin = asm.bin;
668       params.kernel_size = asm.bin_size;
669    }
670 
671    genX_call(emit_execute, &ec, &params);
672 
673    if (INTEL_DEBUG(DEBUG_BATCH)) {
674       struct intel_batch_decode_ctx decoder;
675       enum intel_batch_decode_flags flags = INTEL_BATCH_DECODE_DEFAULT_FLAGS;
676       if (INTEL_DEBUG(DEBUG_COLOR))
677          flags |= INTEL_BATCH_DECODE_IN_COLOR;
678 
679       intel_batch_decode_ctx_init_brw(&decoder, &E.isa, &E.devinfo, stdout,
680                                       flags, NULL, decode_get_bo, decode_get_state_size, &ec);
681 
682       assert(ec.bo.batch.cursor > ec.bo.batch.map);
683       const int batch_offset = ec.batch_start - ec.bo.batch.addr;
684       const int batch_size = (ec.bo.batch.cursor - ec.bo.batch.map) - batch_offset;
685       assert(batch_offset < batch_size);
686 
687       intel_print_batch(&decoder, ec.bo.batch.map, batch_size, ec.batch_start, false);
688 
689       intel_batch_decode_ctx_finish(&decoder);
690    }
691 
692    executor_context_dispatch(&ec);
693 
694    {
695       /* TODO: Use userdata to return a wrapped C array instead of building
696        * values.  Could make integration with array operations better.
697        */
698       uint32_t *data = ec.bo.data.map;
699       const int n = ec.bo.data.size / 4;
700       lua_createtable(L, n, 0);
701       for (int i = 0; i < 8; i++) {
702          lua_pushinteger(L, data[i]);
703          lua_seti(L, -2, i);
704       }
705    }
706 
707    executor_context_teardown(&ec);
708    ralloc_free(ec.mem_ctx);
709 
710    return 1;
711 }
712 
713 static int
l_dump(lua_State * L)714 l_dump(lua_State *L)
715 {
716    /* TODO: Use a table to add options for the dump, e.g.
717     * starting offset, format, etc.
718     */
719 
720    assert(lua_type(L, 1) == LUA_TTABLE);
721    assert(lua_type(L, 2) == LUA_TNUMBER);
722    assert(lua_isinteger(L, 2));
723 
724    lua_Integer len_ = lua_tointeger(L, 2);
725    assert(len_ >= 0 && len_ <= INT_MAX);
726    int len = len_;
727 
728    int i;
729    for (i = 0; i < len; i++) {
730       if (i%8 == 0) printf("[0x%08x]", i * 4);
731       lua_rawgeti(L, 1, i);
732       lua_Integer val = lua_tointeger(L, -1);
733       printf(" 0x%08x", (uint32_t)val);
734       lua_pop(L, 1);
735       if (i%8 == 7) printf("\n");
736    }
737    if (i%8 != 0) printf("\n");
738    return 0;
739 }
740 
741 static int
l_check_ver(lua_State * L)742 l_check_ver(lua_State *L)
743 {
744    int top = lua_gettop(L);
745    for (int i = 1; i <= top; i++) {
746       lua_Integer v = luaL_checknumber(L, i);
747       if (E.devinfo.ver == v) {
748          return 0;
749       }
750    }
751    failf("script doesn't support version=%d verx10=%d\n",
752          E.devinfo.ver, E.devinfo.verx10);
753    return 0;
754 }
755 
756 static int
l_check_verx10(lua_State * L)757 l_check_verx10(lua_State *L)
758 {
759    int top = lua_gettop(L);
760    for (int i = 1; i <= top; i++) {
761       lua_Integer v = luaL_checknumber(L, i);
762       if (E.devinfo.verx10 == v) {
763          return 0;
764       }
765    }
766    failf("script doesn't support version=%d verx10=%d\n",
767          E.devinfo.ver, E.devinfo.verx10);
768    return 0;
769 }
770 
771 /* TODO: Review numeric limits in the code, specially around Lua integer
772  * conversion.
773  */
774 
775 int
main(int argc,char * argv[])776 main(int argc, char *argv[])
777 {
778    if (argc < 2 ||
779        !strcmp(argv[1], "--help") ||
780        !strcmp(argv[1], "-help") ||
781        !strcmp(argv[1], "-h") ||
782        !strcmp(argv[1], "help")) {
783       print_help();
784       return 0;
785    }
786 
787    if (argc > 2) {
788       /* TODO: Expose extra arguments to the script as a variable. */
789       failf("invalid extra arguments\nusage: executor FILENAME");
790       return 1;
791    }
792 
793    process_intel_debug_variable();
794 
795    E.fd = get_drm_device(&E.devinfo);
796    isl_device_init(&E.isl_dev, &E.devinfo);
797    brw_init_isa_info(&E.isa, &E.devinfo);
798    assert(E.devinfo.kmd_type == INTEL_KMD_TYPE_I915 ||
799           E.devinfo.kmd_type == INTEL_KMD_TYPE_XE);
800 
801    lua_State *L = luaL_newstate();
802 
803    /* TODO: Could be nice to export some kind of builder interface,
804     * maybe even let the script construct a shader at the BRW IR
805     * level and let the later passes kick in.
806     */
807 
808    luaL_openlibs(L);
809 
810    lua_pushinteger(L, E.devinfo.ver);
811    lua_setglobal(L, "ver");
812 
813    lua_pushinteger(L, E.devinfo.verx10);
814    lua_setglobal(L, "verx10");
815 
816    lua_pushcfunction(L, l_execute);
817    lua_setglobal(L, "execute");
818 
819    lua_pushcfunction(L, l_dump);
820    lua_setglobal(L, "dump");
821 
822    lua_pushcfunction(L, l_check_ver);
823    lua_setglobal(L, "check_ver");
824 
825    lua_pushcfunction(L, l_check_verx10);
826    lua_setglobal(L, "check_verx10");
827 
828    const char *filename = argv[1];
829    int err = luaL_loadfile(L, filename);
830    if (err)
831       failf("failed to load script: %s", lua_tostring(L, -1));
832 
833    err = lua_pcall(L, 0, 0, 0);
834    if (err)
835       failf("failed to run script: %s", lua_tostring(L, -1));
836 
837    lua_close(L);
838    close(E.fd);
839 
840    return 0;
841 }
842 
843 void
failf(const char * fmt,...)844 failf(const char *fmt, ...)
845 {
846    va_list args;
847    va_start(args, fmt);
848    fprintf(stderr, "ERROR: ");
849    vfprintf(stderr, fmt, args);
850    fprintf(stderr, "\n");
851    va_end(args);
852    exit(1);
853 }
854