xref: /aosp_15_r20/external/mesa3d/src/asahi/lib/shaders/helper.cl (revision 6104692788411f58d303aa86923a9ff6ecaded22)
1/*
2 * Copyright 2023 Asahi Lina
3 * SPDX-License-Identifier: MIT
4 */
5#include "helper.h"
6#include "libagx.h"
7
8#define DB_NEXT 32
9#define DB_ACK  48
10#define DB_NACK 49
11
12enum helper_op {
13   OP_STACK_ALLOC = 0,
14   OP_STACK_FREE = 1,
15   OP_THREADGROUP_ALLOC = 4,
16   OP_THREADGROUP_FREE = 5,
17   OP_END = 15,
18};
19
20void
21libagx_helper(void)
22{
23   uint64_t arg =
24      nir_load_helper_arg_lo_agx() | (((uint64_t)nir_load_helper_arg_hi_agx()) << 32);
25
26   global struct agx_helper_header *hdr =
27      (global struct agx_helper_header *)arg;
28
29   uint32_t core_index = nir_load_core_id_agx();
30   uint32_t subgroups = hdr->subgroups;
31   global struct agx_helper_core *core = &hdr->cores[core_index];
32
33   while (1) {
34      nir_doorbell_agx(DB_NEXT);
35      uint32_t op = nir_load_helper_op_id_agx();
36      uint32_t arg = nir_load_helper_arg_lo_agx();
37
38      switch (op) {
39      case OP_STACK_ALLOC: {
40         uint32_t idx = core->alloc_cur;
41         if (idx >= subgroups) {
42            core->alloc_failed++;
43            nir_doorbell_agx(DB_NACK);
44            break;
45         }
46         core->alloc_max = max(core->alloc_max, ++core->alloc_cur);
47         core->alloc_count[arg]++;
48
49         nir_stack_map_agx(0, core->blocklist[idx].blocks[0]);
50         nir_stack_map_agx(1, core->blocklist[idx].blocks[1]);
51         nir_stack_map_agx(2, core->blocklist[idx].blocks[2]);
52         nir_stack_map_agx(3, core->blocklist[idx].blocks[3]);
53         nir_doorbell_agx(DB_ACK);
54         break;
55      }
56
57      case OP_STACK_FREE: {
58         if (!core->alloc_cur) { // underflow
59            nir_doorbell_agx(DB_NACK);
60            break;
61         }
62         uint32_t idx = --core->alloc_cur;
63         core->blocklist[idx].blocks[0] = nir_stack_unmap_agx(0);
64         core->blocklist[idx].blocks[1] = nir_stack_unmap_agx(1);
65         core->blocklist[idx].blocks[2] = nir_stack_unmap_agx(2);
66         core->blocklist[idx].blocks[3] = nir_stack_unmap_agx(3);
67         nir_doorbell_agx(DB_ACK);
68         break;
69      }
70
71      // TODO: Implement threadgroup allocs (for compute preemption)
72      case OP_THREADGROUP_ALLOC: {
73         nir_doorbell_agx(DB_NACK);
74         break;
75      }
76
77      case OP_THREADGROUP_FREE: {
78         nir_doorbell_agx(DB_NACK);
79         break;
80      }
81
82      case OP_END: {
83         nir_fence_helper_exit_agx();
84         return;
85      }
86
87      default:
88         *(global uint32_t *)(0xdead0000 | (op << 8)) = 0;
89         nir_fence_helper_exit_agx();
90         return;
91      }
92   }
93}
94