1 /*
2 * Copyright 2023 Alyssa Rosenzweig
3 * SPDX-License-Identifier: MIT
4 */
5
6 #include "shaders/geometry.h"
7 #include "util/bitscan.h"
8 #include "util/macros.h"
9 #include "agx_nir_lower_gs.h"
10 #include "libagx_shaders.h"
11 #include "nir.h"
12 #include "nir_builder.h"
13 #include "nir_builder_opcodes.h"
14 #include "nir_intrinsics.h"
15 #include "nir_intrinsics_indices.h"
16 #include "shader_enums.h"
17
18 static nir_def *
tcs_patch_id(nir_builder * b)19 tcs_patch_id(nir_builder *b)
20 {
21 return nir_channel(b, nir_load_workgroup_id(b), 0);
22 }
23
24 static nir_def *
tcs_instance_id(nir_builder * b)25 tcs_instance_id(nir_builder *b)
26 {
27 return nir_channel(b, nir_load_workgroup_id(b), 1);
28 }
29
30 static nir_def *
tcs_unrolled_id(nir_builder * b)31 tcs_unrolled_id(nir_builder *b)
32 {
33 return libagx_tcs_unrolled_id(b, nir_load_tess_param_buffer_agx(b),
34 nir_load_workgroup_id(b));
35 }
36
37 uint64_t
agx_tcs_per_vertex_outputs(const nir_shader * nir)38 agx_tcs_per_vertex_outputs(const nir_shader *nir)
39 {
40 return nir->info.outputs_written &
41 ~(VARYING_BIT_TESS_LEVEL_INNER | VARYING_BIT_TESS_LEVEL_OUTER |
42 VARYING_BIT_BOUNDING_BOX0 | VARYING_BIT_BOUNDING_BOX1);
43 }
44
45 unsigned
agx_tcs_output_stride(const nir_shader * nir)46 agx_tcs_output_stride(const nir_shader *nir)
47 {
48 return libagx_tcs_out_stride(util_last_bit(nir->info.patch_outputs_written),
49 nir->info.tess.tcs_vertices_out,
50 agx_tcs_per_vertex_outputs(nir));
51 }
52
53 static nir_def *
tcs_out_addr(nir_builder * b,nir_intrinsic_instr * intr,nir_def * vertex_id)54 tcs_out_addr(nir_builder *b, nir_intrinsic_instr *intr, nir_def *vertex_id)
55 {
56 nir_io_semantics sem = nir_intrinsic_io_semantics(intr);
57
58 nir_def *offset = nir_get_io_offset_src(intr)->ssa;
59 nir_def *addr = libagx_tcs_out_address(
60 b, nir_load_tess_param_buffer_agx(b), tcs_unrolled_id(b), vertex_id,
61 nir_iadd_imm(b, offset, sem.location),
62 nir_imm_int(b, util_last_bit(b->shader->info.patch_outputs_written)),
63 nir_imm_int(b, b->shader->info.tess.tcs_vertices_out),
64 nir_imm_int64(b, agx_tcs_per_vertex_outputs(b->shader)));
65
66 addr = nir_iadd_imm(b, addr, nir_intrinsic_component(intr) * 4);
67
68 return addr;
69 }
70
71 static nir_def *
lower_tes_load(nir_builder * b,nir_intrinsic_instr * intr)72 lower_tes_load(nir_builder *b, nir_intrinsic_instr *intr)
73 {
74 gl_varying_slot location = nir_intrinsic_io_semantics(intr).location;
75 nir_src *offset_src = nir_get_io_offset_src(intr);
76
77 nir_def *vertex = nir_imm_int(b, 0);
78 nir_def *offset = offset_src ? offset_src->ssa : nir_imm_int(b, 0);
79
80 if (intr->intrinsic == nir_intrinsic_load_per_vertex_input)
81 vertex = intr->src[0].ssa;
82
83 nir_def *addr = libagx_tes_in_address(b, nir_load_tess_param_buffer_agx(b),
84 nir_load_vertex_id(b), vertex,
85 nir_iadd_imm(b, offset, location));
86
87 if (nir_intrinsic_has_component(intr))
88 addr = nir_iadd_imm(b, addr, nir_intrinsic_component(intr) * 4);
89
90 return nir_load_global_constant(b, addr, 4, intr->def.num_components,
91 intr->def.bit_size);
92 }
93
94 static nir_def *
tcs_load_input(nir_builder * b,nir_intrinsic_instr * intr)95 tcs_load_input(nir_builder *b, nir_intrinsic_instr *intr)
96 {
97 nir_def *base = nir_imul(
98 b, tcs_unrolled_id(b),
99 libagx_tcs_patch_vertices_in(b, nir_load_tess_param_buffer_agx(b)));
100 nir_def *vertex = nir_iadd(b, base, intr->src[0].ssa);
101
102 return agx_load_per_vertex_input(b, intr, vertex);
103 }
104
105 static nir_def *
lower_tcs_impl(nir_builder * b,nir_intrinsic_instr * intr)106 lower_tcs_impl(nir_builder *b, nir_intrinsic_instr *intr)
107 {
108 switch (intr->intrinsic) {
109 case nir_intrinsic_barrier:
110 /* A patch fits in a subgroup, so the barrier is unnecessary. */
111 return NIR_LOWER_INSTR_PROGRESS_REPLACE;
112
113 case nir_intrinsic_load_primitive_id:
114 return tcs_patch_id(b);
115
116 case nir_intrinsic_load_instance_id:
117 return tcs_instance_id(b);
118
119 case nir_intrinsic_load_invocation_id:
120 return nir_channel(b, nir_load_local_invocation_id(b), 0);
121
122 case nir_intrinsic_load_per_vertex_input:
123 return tcs_load_input(b, intr);
124
125 case nir_intrinsic_load_patch_vertices_in:
126 return libagx_tcs_patch_vertices_in(b, nir_load_tess_param_buffer_agx(b));
127
128 case nir_intrinsic_load_tess_level_outer_default:
129 return libagx_tess_level_outer_default(b,
130 nir_load_tess_param_buffer_agx(b));
131
132 case nir_intrinsic_load_tess_level_inner_default:
133 return libagx_tess_level_inner_default(b,
134 nir_load_tess_param_buffer_agx(b));
135
136 case nir_intrinsic_load_output: {
137 nir_def *addr = tcs_out_addr(b, intr, nir_undef(b, 1, 32));
138 return nir_load_global(b, addr, 4, intr->def.num_components,
139 intr->def.bit_size);
140 }
141
142 case nir_intrinsic_load_per_vertex_output: {
143 nir_def *addr = tcs_out_addr(b, intr, intr->src[0].ssa);
144 return nir_load_global(b, addr, 4, intr->def.num_components,
145 intr->def.bit_size);
146 }
147
148 case nir_intrinsic_store_output: {
149 nir_store_global(b, tcs_out_addr(b, intr, nir_undef(b, 1, 32)), 4,
150 intr->src[0].ssa, nir_intrinsic_write_mask(intr));
151 return NIR_LOWER_INSTR_PROGRESS_REPLACE;
152 }
153
154 case nir_intrinsic_store_per_vertex_output: {
155 nir_store_global(b, tcs_out_addr(b, intr, intr->src[1].ssa), 4,
156 intr->src[0].ssa, nir_intrinsic_write_mask(intr));
157 return NIR_LOWER_INSTR_PROGRESS_REPLACE;
158 }
159
160 default:
161 return NULL;
162 }
163 }
164
165 static bool
lower_tcs(nir_builder * b,nir_intrinsic_instr * intr,void * data)166 lower_tcs(nir_builder *b, nir_intrinsic_instr *intr, void *data)
167 {
168 b->cursor = nir_before_instr(&intr->instr);
169
170 nir_def *repl = lower_tcs_impl(b, intr);
171 if (!repl)
172 return false;
173
174 if (repl != NIR_LOWER_INSTR_PROGRESS_REPLACE)
175 nir_def_rewrite_uses(&intr->def, repl);
176
177 nir_instr_remove(&intr->instr);
178 return true;
179 }
180
181 static void
link_libagx(nir_shader * nir,const nir_shader * libagx)182 link_libagx(nir_shader *nir, const nir_shader *libagx)
183 {
184 nir_link_shader_functions(nir, libagx);
185 NIR_PASS(_, nir, nir_inline_functions);
186 nir_remove_non_entrypoints(nir);
187 NIR_PASS(_, nir, nir_lower_indirect_derefs, nir_var_function_temp, 64);
188 NIR_PASS(_, nir, nir_opt_dce);
189 NIR_PASS(_, nir, nir_lower_vars_to_explicit_types, nir_var_function_temp,
190 glsl_get_cl_type_size_align);
191 NIR_PASS(_, nir, nir_opt_deref);
192 NIR_PASS(_, nir, nir_lower_vars_to_ssa);
193 NIR_PASS(_, nir, nir_lower_explicit_io,
194 nir_var_shader_temp | nir_var_function_temp | nir_var_mem_shared |
195 nir_var_mem_global,
196 nir_address_format_62bit_generic);
197 }
198
199 bool
agx_nir_lower_tcs(nir_shader * tcs,const struct nir_shader * libagx)200 agx_nir_lower_tcs(nir_shader *tcs, const struct nir_shader *libagx)
201 {
202 nir_shader_intrinsics_pass(tcs, lower_tcs, nir_metadata_control_flow, NULL);
203
204 link_libagx(tcs, libagx);
205 return true;
206 }
207
208 static nir_def *
lower_tes_impl(nir_builder * b,nir_intrinsic_instr * intr,void * data)209 lower_tes_impl(nir_builder *b, nir_intrinsic_instr *intr, void *data)
210 {
211 switch (intr->intrinsic) {
212 case nir_intrinsic_load_tess_coord_xy:
213 return libagx_load_tess_coord(b, nir_load_tess_param_buffer_agx(b),
214 nir_load_vertex_id(b));
215
216 case nir_intrinsic_load_primitive_id:
217 return libagx_tes_patch_id(b, nir_load_tess_param_buffer_agx(b),
218 nir_load_vertex_id(b));
219
220 case nir_intrinsic_load_input:
221 case nir_intrinsic_load_per_vertex_input:
222 case nir_intrinsic_load_tess_level_inner:
223 case nir_intrinsic_load_tess_level_outer:
224 return lower_tes_load(b, intr);
225
226 case nir_intrinsic_load_patch_vertices_in:
227 return libagx_tes_patch_vertices_in(b, nir_load_tess_param_buffer_agx(b));
228
229 default:
230 return NULL;
231 }
232 }
233
234 static bool
lower_tes(nir_builder * b,nir_intrinsic_instr * intr,void * data)235 lower_tes(nir_builder *b, nir_intrinsic_instr *intr, void *data)
236 {
237 b->cursor = nir_before_instr(&intr->instr);
238 nir_def *repl = lower_tes_impl(b, intr, data);
239
240 if (repl) {
241 nir_def_replace(&intr->def, repl);
242 return true;
243 } else {
244 return false;
245 }
246 }
247
248 static bool
lower_tes_indexing(nir_builder * b,nir_intrinsic_instr * intr,void * data)249 lower_tes_indexing(nir_builder *b, nir_intrinsic_instr *intr, void *data)
250 {
251 if (intr->intrinsic == nir_intrinsic_load_instance_id)
252 unreachable("todo");
253
254 if (intr->intrinsic != nir_intrinsic_load_vertex_id)
255 return false;
256
257 b->cursor = nir_before_instr(&intr->instr);
258 nir_def *p = nir_load_tess_param_buffer_agx(b);
259 nir_def *id = nir_channel(b, nir_load_global_invocation_id(b, 32), 0);
260 nir_def_replace(&intr->def, libagx_load_tes_index(b, p, id));
261 return true;
262 }
263
264 bool
agx_nir_lower_tes(nir_shader * tes,const nir_shader * libagx,bool to_hw_vs)265 agx_nir_lower_tes(nir_shader *tes, const nir_shader *libagx, bool to_hw_vs)
266 {
267 nir_lower_tess_coord_z(
268 tes, tes->info.tess._primitive_mode == TESS_PRIMITIVE_TRIANGLES);
269
270 nir_shader_intrinsics_pass(tes, lower_tes, nir_metadata_control_flow, NULL);
271
272 /* Points mode renders as points, make sure we write point size for the HW */
273 if (tes->info.tess.point_mode &&
274 !(tes->info.outputs_written & VARYING_BIT_PSIZ) && to_hw_vs) {
275
276 nir_function_impl *impl = nir_shader_get_entrypoint(tes);
277 nir_builder b = nir_builder_at(nir_after_impl(impl));
278
279 nir_store_output(&b, nir_imm_float(&b, 1.0), nir_imm_int(&b, 0),
280 .io_semantics.location = VARYING_SLOT_PSIZ,
281 .write_mask = nir_component_mask(1), .range = 1,
282 .src_type = nir_type_float32);
283
284 tes->info.outputs_written |= VARYING_BIT_PSIZ;
285 }
286
287 if (to_hw_vs) {
288 /* We lower to a HW VS, so update the shader info so the compiler does the
289 * right thing.
290 */
291 tes->info.stage = MESA_SHADER_VERTEX;
292 memset(&tes->info.vs, 0, sizeof(tes->info.vs));
293 tes->info.vs.tes_agx = true;
294 } else {
295 /* If we're running as a compute shader, we need to load from the index
296 * buffer manually. Fortunately, this doesn't require a shader key:
297 * tess-as-compute always use U32 index buffers.
298 */
299 nir_shader_intrinsics_pass(tes, lower_tes_indexing,
300 nir_metadata_control_flow, NULL);
301 }
302
303 link_libagx(tes, libagx);
304 nir_lower_idiv(tes, &(nir_lower_idiv_options){.allow_fp16 = true});
305 nir_metadata_preserve(nir_shader_get_entrypoint(tes), nir_metadata_none);
306 return true;
307 }
308