xref: /aosp_15_r20/external/mesa3d/src/intel/compiler/brw_nir_lower_ray_queries.c (revision 6104692788411f58d303aa86923a9ff6ecaded22)
1 /*
2  * Copyright (c) 2021 Intel Corporation
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21  * IN THE SOFTWARE.
22  */
23 
24 #include "brw_nir_rt.h"
25 #include "brw_nir_rt_builder.h"
26 
27 #include "nir_deref.h"
28 
29 #include "util/macros.h"
30 
31 struct lowering_state {
32    const struct intel_device_info *devinfo;
33 
34    nir_function_impl *impl;
35 
36    struct hash_table *queries;
37    uint32_t n_queries;
38 
39    struct brw_nir_rt_globals_defs globals;
40    nir_def *rq_globals;
41 };
42 
43 struct brw_ray_query {
44    nir_variable *opaque_var;
45    nir_variable *internal_var;
46    uint32_t id;
47 };
48 
49 #define SIZEOF_QUERY_STATE (sizeof(uint32_t))
50 
51 static bool
need_spill_fill(struct lowering_state * state)52 need_spill_fill(struct lowering_state *state)
53 {
54    return state->n_queries > 1;
55 }
56 
57 /**
58  * This pass converts opaque RayQuery structures from SPIRV into a vec3 where
59  * the first 2 elements store a global address for the query and the third
60  * element is an incremented counter on the number of executed
61  * nir_intrinsic_rq_proceed.
62  */
63 
64 static void
register_opaque_var(nir_variable * opaque_var,struct lowering_state * state)65 register_opaque_var(nir_variable *opaque_var, struct lowering_state *state)
66 {
67    struct hash_entry *entry = _mesa_hash_table_search(state->queries, opaque_var);
68    assert(entry == NULL);
69 
70    struct brw_ray_query *rq = rzalloc(state->queries, struct brw_ray_query);
71    rq->opaque_var = opaque_var;
72    rq->id = state->n_queries;
73 
74    unsigned aoa_size = glsl_get_aoa_size(opaque_var->type);
75    state->n_queries += MAX2(1, aoa_size);
76 
77    _mesa_hash_table_insert(state->queries, opaque_var, rq);
78 }
79 
80 static void
create_internal_var(struct brw_ray_query * rq,struct lowering_state * state)81 create_internal_var(struct brw_ray_query *rq, struct lowering_state *state)
82 {
83    const struct glsl_type *opaque_type = rq->opaque_var->type;
84    const struct glsl_type *internal_type = glsl_uint16_t_type();
85 
86    while (glsl_type_is_array(opaque_type)) {
87       assert(!glsl_type_is_unsized_array(opaque_type));
88       internal_type = glsl_array_type(internal_type,
89                                       glsl_array_size(opaque_type),
90                                       0);
91       opaque_type = glsl_get_array_element(opaque_type);
92    }
93 
94    rq->internal_var = nir_local_variable_create(state->impl,
95                                                 internal_type,
96                                                 NULL);
97 }
98 
99 
100 
101 static nir_def *
get_ray_query_shadow_addr(nir_builder * b,nir_deref_instr * deref,struct lowering_state * state,nir_deref_instr ** out_state_deref)102 get_ray_query_shadow_addr(nir_builder *b,
103                           nir_deref_instr *deref,
104                           struct lowering_state *state,
105                           nir_deref_instr **out_state_deref)
106 {
107    nir_deref_path path;
108    nir_deref_path_init(&path, deref, NULL);
109    assert(path.path[0]->deref_type == nir_deref_type_var);
110 
111    nir_variable *opaque_var = nir_deref_instr_get_variable(path.path[0]);
112    struct hash_entry *entry = _mesa_hash_table_search(state->queries, opaque_var);
113    assert(entry);
114 
115    struct brw_ray_query *rq = entry->data;
116 
117    /* Base address in the shadow memory of the variable associated with this
118     * ray query variable.
119     */
120    nir_def *base_addr =
121       nir_iadd_imm(b, state->globals.resume_sbt_addr,
122                    brw_rt_ray_queries_shadow_stack_size(state->devinfo) * rq->id);
123 
124    bool spill_fill = need_spill_fill(state);
125    *out_state_deref = nir_build_deref_var(b, rq->internal_var);
126 
127    if (!spill_fill)
128       return NULL;
129 
130    /* Just emit code and let constant-folding go to town */
131    nir_deref_instr **p = &path.path[1];
132    for (; *p; p++) {
133       if ((*p)->deref_type == nir_deref_type_array) {
134          nir_def *index = (*p)->arr.index.ssa;
135 
136          /**/
137          *out_state_deref = nir_build_deref_array(b, *out_state_deref, index);
138 
139          /**/
140          uint64_t size = MAX2(1, glsl_get_aoa_size((*p)->type)) *
141             brw_rt_ray_queries_shadow_stack_size(state->devinfo);
142 
143          nir_def *mul = nir_amul_imm(b, nir_i2i64(b, index), size);
144 
145          base_addr = nir_iadd(b, base_addr, mul);
146       } else {
147          unreachable("Unsupported deref type");
148       }
149    }
150 
151    nir_deref_path_finish(&path);
152 
153    /* Add the lane offset to the shadow memory address */
154    nir_def *lane_offset =
155       nir_imul_imm(
156          b,
157          nir_iadd(
158             b,
159             nir_imul(
160                b,
161                brw_load_btd_dss_id(b),
162                brw_nir_rt_load_num_simd_lanes_per_dss(b, state->devinfo)),
163             brw_nir_rt_sync_stack_id(b)),
164          BRW_RT_SIZEOF_SHADOW_RAY_QUERY);
165 
166    return nir_iadd(b, base_addr, nir_i2i64(b, lane_offset));
167 }
168 
169 static void
update_trace_ctrl_level(nir_builder * b,nir_deref_instr * state_deref,nir_def ** out_old_ctrl,nir_def ** out_old_level,nir_def * new_ctrl,nir_def * new_level)170 update_trace_ctrl_level(nir_builder *b,
171                         nir_deref_instr *state_deref,
172                         nir_def **out_old_ctrl,
173                         nir_def **out_old_level,
174                         nir_def *new_ctrl,
175                         nir_def *new_level)
176 {
177    nir_def *old_value = nir_load_deref(b, state_deref);
178    nir_def *old_ctrl = nir_ishr_imm(b, old_value, 2);
179    nir_def *old_level = nir_iand_imm(b, old_value, 0x3);
180 
181    if (out_old_ctrl)
182       *out_old_ctrl = old_ctrl;
183    if (out_old_level)
184       *out_old_level = old_level;
185 
186    if (new_ctrl)
187       new_ctrl = nir_i2i16(b, new_ctrl);
188    if (new_level)
189       new_level = nir_i2i16(b, new_level);
190 
191    if (new_ctrl || new_level) {
192       if (!new_ctrl)
193          new_ctrl = old_ctrl;
194       if (!new_level)
195          new_level = old_level;
196 
197       nir_def *new_value = nir_ior(b, nir_ishl_imm(b, new_ctrl, 2), new_level);
198       nir_store_deref(b, state_deref, new_value, 0x1);
199    }
200 }
201 
202 static void
fill_query(nir_builder * b,nir_def * hw_stack_addr,nir_def * shadow_stack_addr,nir_def * ctrl)203 fill_query(nir_builder *b,
204            nir_def *hw_stack_addr,
205            nir_def *shadow_stack_addr,
206            nir_def *ctrl)
207 {
208    brw_nir_memcpy_global(b, hw_stack_addr, 64, shadow_stack_addr, 64,
209                          BRW_RT_SIZEOF_RAY_QUERY);
210 }
211 
212 static void
spill_query(nir_builder * b,nir_def * hw_stack_addr,nir_def * shadow_stack_addr)213 spill_query(nir_builder *b,
214             nir_def *hw_stack_addr,
215             nir_def *shadow_stack_addr)
216 {
217    brw_nir_memcpy_global(b, shadow_stack_addr, 64, hw_stack_addr, 64,
218                          BRW_RT_SIZEOF_RAY_QUERY);
219 }
220 
221 
222 static void
lower_ray_query_intrinsic(nir_builder * b,nir_intrinsic_instr * intrin,struct lowering_state * state)223 lower_ray_query_intrinsic(nir_builder *b,
224                           nir_intrinsic_instr *intrin,
225                           struct lowering_state *state)
226 {
227    nir_deref_instr *deref = nir_src_as_deref(intrin->src[0]);
228 
229    b->cursor = nir_instr_remove(&intrin->instr);
230 
231    nir_deref_instr *ctrl_level_deref;
232    nir_def *shadow_stack_addr =
233       get_ray_query_shadow_addr(b, deref, state, &ctrl_level_deref);
234    nir_def *hw_stack_addr =
235       brw_nir_rt_sync_stack_addr(b, state->globals.base_mem_addr, state->devinfo);
236    nir_def *stack_addr = shadow_stack_addr ? shadow_stack_addr : hw_stack_addr;
237 
238    switch (intrin->intrinsic) {
239    case nir_intrinsic_rq_initialize: {
240       nir_def *as_addr = intrin->src[1].ssa;
241       nir_def *ray_flags = intrin->src[2].ssa;
242       /* From the SPIR-V spec:
243        *
244        *    "Only the 8 least-significant bits of Cull Mask are used by
245        *    this instruction - other bits are ignored.
246        *
247        *    Only the 16 least-significant bits of Miss Index are used by
248        *    this instruction - other bits are ignored."
249        */
250       nir_def *cull_mask = nir_iand_imm(b, intrin->src[3].ssa, 0xff);
251       nir_def *ray_orig = intrin->src[4].ssa;
252       nir_def *ray_t_min = intrin->src[5].ssa;
253       nir_def *ray_dir = intrin->src[6].ssa;
254       nir_def *ray_t_max = intrin->src[7].ssa;
255 
256       nir_def *root_node_ptr =
257          brw_nir_rt_acceleration_structure_to_root_node(b, as_addr);
258 
259       struct brw_nir_rt_mem_ray_defs ray_defs = {
260          .root_node_ptr = root_node_ptr,
261          .ray_flags = nir_u2u16(b, ray_flags),
262          .ray_mask = cull_mask,
263          .orig = ray_orig,
264          .t_near = ray_t_min,
265          .dir = ray_dir,
266          .t_far = ray_t_max,
267       };
268 
269       nir_def *ray_addr =
270          brw_nir_rt_mem_ray_addr(b, stack_addr, BRW_RT_BVH_LEVEL_WORLD);
271 
272       brw_nir_rt_query_mark_init(b, stack_addr);
273       brw_nir_rt_store_mem_ray_query_at_addr(b, ray_addr, &ray_defs);
274 
275       update_trace_ctrl_level(b, ctrl_level_deref,
276                               NULL, NULL,
277                               nir_imm_int(b, GEN_RT_TRACE_RAY_INITAL),
278                               nir_imm_int(b, BRW_RT_BVH_LEVEL_WORLD));
279       break;
280    }
281 
282    case nir_intrinsic_rq_proceed: {
283       nir_def *not_done =
284          nir_inot(b, brw_nir_rt_query_done(b, stack_addr));
285       nir_def *not_done_then, *not_done_else;
286 
287       nir_push_if(b, not_done);
288       {
289          nir_def *ctrl, *level;
290          update_trace_ctrl_level(b, ctrl_level_deref,
291                                  &ctrl, &level,
292                                  NULL,
293                                  NULL);
294 
295          /* Mark the query as done because handing it over to the HW for
296           * processing. If the HW make any progress, it will write back some
297           * data and as a side effect, clear the "done" bit. If no progress is
298           * made, HW does not write anything back and we can use this bit to
299           * detect that.
300           */
301          brw_nir_rt_query_mark_done(b, stack_addr);
302 
303          if (shadow_stack_addr)
304             fill_query(b, hw_stack_addr, shadow_stack_addr, ctrl);
305 
306          nir_trace_ray_intel(b, state->rq_globals, level, ctrl, .synchronous = true);
307 
308          struct brw_nir_rt_mem_hit_defs hit_in = {};
309          brw_nir_rt_load_mem_hit_from_addr(b, &hit_in, hw_stack_addr, false);
310 
311          if (shadow_stack_addr)
312             spill_query(b, hw_stack_addr, shadow_stack_addr);
313 
314          update_trace_ctrl_level(b, ctrl_level_deref,
315                                  NULL, NULL,
316                                  nir_imm_int(b, GEN_RT_TRACE_RAY_CONTINUE),
317                                  hit_in.bvh_level);
318 
319          not_done_then = nir_inot(b, hit_in.done);
320       }
321       nir_push_else(b, NULL);
322       {
323          not_done_else = nir_imm_false(b);
324       }
325       nir_pop_if(b, NULL);
326       not_done = nir_if_phi(b, not_done_then, not_done_else);
327       nir_def_rewrite_uses(&intrin->def, not_done);
328       break;
329    }
330 
331    case nir_intrinsic_rq_confirm_intersection: {
332       brw_nir_memcpy_global(b,
333                             brw_nir_rt_mem_hit_addr_from_addr(b, stack_addr, true), 16,
334                             brw_nir_rt_mem_hit_addr_from_addr(b, stack_addr, false), 16,
335                             BRW_RT_SIZEOF_HIT_INFO);
336       update_trace_ctrl_level(b, ctrl_level_deref,
337                               NULL, NULL,
338                               nir_imm_int(b, GEN_RT_TRACE_RAY_COMMIT),
339                               nir_imm_int(b, BRW_RT_BVH_LEVEL_OBJECT));
340       break;
341    }
342 
343    case nir_intrinsic_rq_generate_intersection: {
344       brw_nir_rt_generate_hit_addr(b, stack_addr, intrin->src[1].ssa);
345       update_trace_ctrl_level(b, ctrl_level_deref,
346                               NULL, NULL,
347                               nir_imm_int(b, GEN_RT_TRACE_RAY_COMMIT),
348                               nir_imm_int(b, BRW_RT_BVH_LEVEL_OBJECT));
349       break;
350    }
351 
352    case nir_intrinsic_rq_terminate: {
353       brw_nir_rt_query_mark_done(b, stack_addr);
354       break;
355    }
356 
357    case nir_intrinsic_rq_load: {
358       const bool committed = nir_intrinsic_committed(intrin);
359 
360       struct brw_nir_rt_mem_ray_defs world_ray_in = {};
361       struct brw_nir_rt_mem_ray_defs object_ray_in = {};
362       struct brw_nir_rt_mem_hit_defs hit_in = {};
363       brw_nir_rt_load_mem_ray_from_addr(b, &world_ray_in, stack_addr,
364                                         BRW_RT_BVH_LEVEL_WORLD);
365       brw_nir_rt_load_mem_ray_from_addr(b, &object_ray_in, stack_addr,
366                                         BRW_RT_BVH_LEVEL_OBJECT);
367       brw_nir_rt_load_mem_hit_from_addr(b, &hit_in, stack_addr, committed);
368 
369       nir_def *sysval = NULL;
370       switch (nir_intrinsic_ray_query_value(intrin)) {
371       case nir_ray_query_value_intersection_type:
372          if (committed) {
373             /* Values we want to generate :
374              *
375              * RayQueryCommittedIntersectionNoneEXT = 0U        <= hit_in.valid == false
376              * RayQueryCommittedIntersectionTriangleEXT = 1U    <= hit_in.leaf_type == BRW_RT_BVH_NODE_TYPE_QUAD (4)
377              * RayQueryCommittedIntersectionGeneratedEXT = 2U   <= hit_in.leaf_type == BRW_RT_BVH_NODE_TYPE_PROCEDURAL (3)
378              */
379             sysval =
380                nir_bcsel(b, nir_ieq_imm(b, hit_in.leaf_type, 4),
381                          nir_imm_int(b, 1), nir_imm_int(b, 2));
382             sysval =
383                nir_bcsel(b, hit_in.valid,
384                          sysval, nir_imm_int(b, 0));
385          } else {
386             /* 0 -> triangle, 1 -> AABB */
387             sysval =
388                nir_b2i32(b,
389                          nir_ieq_imm(b, hit_in.leaf_type,
390                                      BRW_RT_BVH_NODE_TYPE_PROCEDURAL));
391          }
392          break;
393 
394       case nir_ray_query_value_intersection_t:
395          sysval = hit_in.t;
396          break;
397 
398       case nir_ray_query_value_intersection_instance_custom_index: {
399          struct brw_nir_rt_bvh_instance_leaf_defs leaf;
400          brw_nir_rt_load_bvh_instance_leaf(b, &leaf, hit_in.inst_leaf_ptr);
401          sysval = leaf.instance_id;
402          break;
403       }
404 
405       case nir_ray_query_value_intersection_instance_id: {
406          struct brw_nir_rt_bvh_instance_leaf_defs leaf;
407          brw_nir_rt_load_bvh_instance_leaf(b, &leaf, hit_in.inst_leaf_ptr);
408          sysval = leaf.instance_index;
409          break;
410       }
411 
412       case nir_ray_query_value_intersection_instance_sbt_index: {
413          struct brw_nir_rt_bvh_instance_leaf_defs leaf;
414          brw_nir_rt_load_bvh_instance_leaf(b, &leaf, hit_in.inst_leaf_ptr);
415          sysval = leaf.contribution_to_hit_group_index;
416          break;
417       }
418 
419       case nir_ray_query_value_intersection_geometry_index: {
420          nir_def *geometry_index_dw =
421             nir_load_global(b, nir_iadd_imm(b, hit_in.prim_leaf_ptr, 4), 4,
422                             1, 32);
423          sysval = nir_iand_imm(b, geometry_index_dw, BITFIELD_MASK(29));
424          break;
425       }
426 
427       case nir_ray_query_value_intersection_primitive_index:
428          sysval = brw_nir_rt_load_primitive_id_from_hit(b, NULL /* is_procedural */, &hit_in);
429          break;
430 
431       case nir_ray_query_value_intersection_barycentrics:
432          sysval = hit_in.tri_bary;
433          break;
434 
435       case nir_ray_query_value_intersection_front_face:
436          sysval = hit_in.front_face;
437          break;
438 
439       case nir_ray_query_value_intersection_object_ray_direction:
440          sysval = world_ray_in.dir;
441          break;
442 
443       case nir_ray_query_value_intersection_object_ray_origin:
444          sysval = world_ray_in.orig;
445          break;
446 
447       case nir_ray_query_value_intersection_object_to_world: {
448          struct brw_nir_rt_bvh_instance_leaf_defs leaf;
449          brw_nir_rt_load_bvh_instance_leaf(b, &leaf, hit_in.inst_leaf_ptr);
450          sysval = leaf.object_to_world[nir_intrinsic_column(intrin)];
451          break;
452       }
453 
454       case nir_ray_query_value_intersection_world_to_object: {
455          struct brw_nir_rt_bvh_instance_leaf_defs leaf;
456          brw_nir_rt_load_bvh_instance_leaf(b, &leaf, hit_in.inst_leaf_ptr);
457          sysval = leaf.world_to_object[nir_intrinsic_column(intrin)];
458          break;
459       }
460 
461       case nir_ray_query_value_intersection_candidate_aabb_opaque:
462          sysval = hit_in.front_face;
463          break;
464 
465       case nir_ray_query_value_tmin:
466          sysval = world_ray_in.t_near;
467          break;
468 
469       case nir_ray_query_value_flags:
470          sysval = nir_u2u32(b, world_ray_in.ray_flags);
471          break;
472 
473       case nir_ray_query_value_world_ray_direction:
474          sysval = world_ray_in.dir;
475          break;
476 
477       case nir_ray_query_value_world_ray_origin:
478          sysval = world_ray_in.orig;
479          break;
480 
481       case nir_ray_query_value_intersection_triangle_vertex_positions: {
482          struct brw_nir_rt_bvh_primitive_leaf_positions_defs pos;
483          brw_nir_rt_load_bvh_primitive_leaf_positions(b, &pos, hit_in.prim_leaf_ptr);
484          sysval = pos.positions[nir_intrinsic_column(intrin)];
485          break;
486       }
487 
488       default:
489          unreachable("Invalid ray query");
490       }
491 
492       assert(sysval);
493       nir_def_rewrite_uses(&intrin->def, sysval);
494       break;
495    }
496 
497    default:
498       unreachable("Invalid intrinsic");
499    }
500 }
501 
502 static void
lower_ray_query_impl(nir_function_impl * impl,struct lowering_state * state)503 lower_ray_query_impl(nir_function_impl *impl, struct lowering_state *state)
504 {
505    nir_builder _b, *b = &_b;
506    _b = nir_builder_at(nir_before_impl(impl));
507 
508    state->rq_globals = nir_load_ray_query_global_intel(b);
509 
510    brw_nir_rt_load_globals_addr(b, &state->globals, state->rq_globals);
511 
512    nir_foreach_block_safe(block, impl) {
513       nir_foreach_instr_safe(instr, block) {
514          if (instr->type != nir_instr_type_intrinsic)
515             continue;
516 
517          nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
518          if (intrin->intrinsic != nir_intrinsic_rq_initialize &&
519              intrin->intrinsic != nir_intrinsic_rq_terminate &&
520              intrin->intrinsic != nir_intrinsic_rq_proceed &&
521              intrin->intrinsic != nir_intrinsic_rq_generate_intersection &&
522              intrin->intrinsic != nir_intrinsic_rq_confirm_intersection &&
523              intrin->intrinsic != nir_intrinsic_rq_load)
524             continue;
525 
526          lower_ray_query_intrinsic(b, intrin, state);
527       }
528    }
529 
530    nir_metadata_preserve(impl, nir_metadata_none);
531 }
532 
533 bool
brw_nir_lower_ray_queries(nir_shader * shader,const struct intel_device_info * devinfo)534 brw_nir_lower_ray_queries(nir_shader *shader,
535                           const struct intel_device_info *devinfo)
536 {
537    assert(exec_list_length(&shader->functions) == 1);
538 
539    struct lowering_state state = {
540       .devinfo = devinfo,
541       .impl = nir_shader_get_entrypoint(shader),
542       .queries = _mesa_pointer_hash_table_create(NULL),
543    };
544 
545    /* Map all query variable to internal type variables */
546    nir_foreach_function_temp_variable(var, state.impl) {
547       if (!var->data.ray_query)
548          continue;
549       register_opaque_var(var, &state);
550    }
551    hash_table_foreach(state.queries, entry)
552       create_internal_var(entry->data, &state);
553 
554    bool progress = state.n_queries > 0;
555 
556    if (progress) {
557       lower_ray_query_impl(state.impl, &state);
558 
559       nir_remove_dead_derefs(shader);
560       nir_remove_dead_variables(shader,
561                                 nir_var_shader_temp | nir_var_function_temp,
562                                 NULL);
563 
564       nir_metadata_preserve(state.impl, nir_metadata_none);
565    }
566 
567    ralloc_free(state.queries);
568 
569    return progress;
570 }
571