xref: /aosp_15_r20/external/mesa3d/src/asahi/lib/shaders/tessellation.cl (revision 6104692788411f58d303aa86923a9ff6ecaded22)
1/*
2 * Copyright 2023 Alyssa Rosenzweig
3 * SPDX-License-Identifier: MIT
4 */
5
6#include "geometry.h"
7#include "tessellator.h"
8#include <agx_pack.h>
9
10uint
11libagx_tcs_patch_vertices_in(constant struct libagx_tess_args *p)
12{
13   return p->input_patch_size;
14}
15
16uint
17libagx_tes_patch_vertices_in(constant struct libagx_tess_args *p)
18{
19   return p->output_patch_size;
20}
21
22uint
23libagx_tcs_unrolled_id(constant struct libagx_tess_args *p, uint3 wg_id)
24{
25   return (wg_id.y * p->patches_per_instance) + wg_id.x;
26}
27
28uint64_t
29libagx_tes_buffer(constant struct libagx_tess_args *p)
30{
31   return p->tes_buffer;
32}
33
34/*
35 * Helper to lower indexing for a tess eval shader ran as a compute shader. This
36 * handles the tess+geom case. This is simpler than the general input assembly
37 * lowering, as we know:
38 *
39 * 1. the index buffer is U32
40 * 2. the index is in bounds
41 *
42 * Therefore we do a simple load. No bounds checking needed.
43 */
44uint32_t
45libagx_load_tes_index(constant struct libagx_tess_args *p, uint32_t index)
46{
47   return p->index_buffer[index];
48}
49
50ushort
51libagx_tcs_in_offset(uint vtx, gl_varying_slot location,
52                     uint64_t crosslane_vs_out_mask)
53{
54   return libagx_tcs_in_offs(vtx, location, crosslane_vs_out_mask);
55}
56
57uintptr_t
58libagx_tcs_out_address(constant struct libagx_tess_args *p, uint patch_id,
59                       uint vtx_id, gl_varying_slot location, uint nr_patch_out,
60                       uint out_patch_size, uint64_t vtx_out_mask)
61{
62   uint stride =
63      libagx_tcs_out_stride(nr_patch_out, out_patch_size, vtx_out_mask);
64
65   uint offs = libagx_tcs_out_offs(vtx_id, location, nr_patch_out,
66                                   out_patch_size, vtx_out_mask);
67
68   return (uintptr_t)(p->tcs_buffer) + (patch_id * stride) + offs;
69}
70
71static uint
72libagx_tes_unrolled_patch_id(uint raw_id)
73{
74   return raw_id / LIBAGX_TES_PATCH_ID_STRIDE;
75}
76
77uint
78libagx_tes_patch_id(constant struct libagx_tess_args *p, uint raw_id)
79{
80   return libagx_tes_unrolled_patch_id(raw_id) % p->patches_per_instance;
81}
82
83static uint
84tes_vertex_id_in_patch(uint raw_id)
85{
86   return raw_id % LIBAGX_TES_PATCH_ID_STRIDE;
87}
88
89float2
90libagx_load_tess_coord(constant struct libagx_tess_args *p, uint raw_id)
91{
92   uint patch = libagx_tes_unrolled_patch_id(raw_id);
93   uint vtx = tes_vertex_id_in_patch(raw_id);
94
95   global struct libagx_tess_point *t =
96      &p->patch_coord_buffer[p->coord_allocs[patch] + vtx];
97
98   /* Written weirdly because NIR struggles with loads of structs */
99   return *((global float2 *)t);
100}
101
102uintptr_t
103libagx_tes_in_address(constant struct libagx_tess_args *p, uint raw_id,
104                      uint vtx_id, gl_varying_slot location)
105{
106   uint patch = libagx_tes_unrolled_patch_id(raw_id);
107
108   return libagx_tcs_out_address(p, patch, vtx_id, location,
109                                 p->tcs_patch_constants, p->output_patch_size,
110                                 p->tcs_per_vertex_outputs);
111}
112
113float4
114libagx_tess_level_outer_default(constant struct libagx_tess_args *p)
115{
116   return (
117      float4)(p->tess_level_outer_default[0], p->tess_level_outer_default[1],
118              p->tess_level_outer_default[2], p->tess_level_outer_default[3]);
119}
120
121float2
122libagx_tess_level_inner_default(constant struct libagx_tess_args *p)
123{
124   return (float2)(p->tess_level_inner_default[0],
125                   p->tess_level_inner_default[1]);
126}
127
128void
129libagx_tess_setup_indirect(global struct libagx_tess_args *p, bool with_counts,
130                           bool point_mode)
131{
132   uint count = p->indirect[0], instance_count = p->indirect[1];
133   unsigned in_patches = count / p->input_patch_size;
134
135   /* TCS invocation counter increments once per-patch */
136   if (p->tcs_statistic) {
137      *(p->tcs_statistic) += in_patches;
138   }
139
140   size_t draw_stride =
141      ((!with_counts && point_mode) ? 4 : 6) * sizeof(uint32_t);
142
143   unsigned unrolled_patches = in_patches * instance_count;
144
145   uint32_t alloc = 0;
146   uint32_t tcs_out_offs = alloc;
147   alloc += unrolled_patches * p->tcs_stride_el * 4;
148
149   uint32_t patch_coord_offs = alloc;
150   alloc += unrolled_patches * 4;
151
152   uint32_t count_offs = alloc;
153   if (with_counts)
154      alloc += unrolled_patches * sizeof(uint32_t);
155
156   uint vb_offs = alloc;
157   uint vb_size = libagx_tcs_in_size(count * instance_count, p->vertex_outputs);
158   alloc += vb_size;
159
160   /* Allocate all patch calculations in one go */
161   global uchar *blob = p->heap->heap + p->heap->heap_bottom;
162   p->heap->heap_bottom += alloc;
163
164   p->tcs_buffer = (global float *)(blob + tcs_out_offs);
165   p->patches_per_instance = in_patches;
166   p->coord_allocs = (global uint *)(blob + patch_coord_offs);
167   p->nr_patches = unrolled_patches;
168
169   *(p->vertex_output_buffer_ptr) = (uintptr_t)(blob + vb_offs);
170
171   if (with_counts) {
172      p->counts = (global uint32_t *)(blob + count_offs);
173   } else {
174#if 0
175      /* Arrange so we return after all generated draws. agx_pack would be nicer
176       * here but designated initializers lead to scratch access...
177       */
178      global uint32_t *ret =
179         (global uint32_t *)(blob + draw_offs +
180                             (draw_stride * unrolled_patches));
181
182      *ret = (AGX_VDM_BLOCK_TYPE_BARRIER << 29) | /* with return */ (1u << 27);
183#endif
184      /* TODO */
185   }
186
187   /* VS grid size */
188   p->grids[0] = count;
189   p->grids[1] = instance_count;
190   p->grids[2] = 1;
191
192   /* VS workgroup size */
193   p->grids[3] = 64;
194   p->grids[4] = 1;
195   p->grids[5] = 1;
196
197   /* TCS grid size */
198   p->grids[6] = in_patches * p->output_patch_size;
199   p->grids[7] = instance_count;
200   p->grids[8] = 1;
201
202   /* TCS workgroup size */
203   p->grids[9] = p->output_patch_size;
204   p->grids[10] = 1;
205   p->grids[11] = 1;
206
207   /* Tess grid size */
208   p->grids[12] = unrolled_patches;
209   p->grids[13] = 1;
210   p->grids[14] = 1;
211
212   /* Tess workgroup size */
213   p->grids[15] = 64;
214   p->grids[16] = 1;
215   p->grids[17] = 1;
216}
217