xref: /aosp_15_r20/external/mesa3d/src/asahi/compiler/agx_nir_lower_sample_mask.c (revision 6104692788411f58d303aa86923a9ff6ecaded22)
1 /*
2  * Copyright 2023 Alyssa Rosenzweig
3  * SPDX-License-Identifier: MIT
4  */
5 
6 #include "compiler/glsl/list.h"
7 #include "compiler/nir/nir_builder.h"
8 #include "agx_compiler.h"
9 #include "nir.h"
10 #include "nir_builder_opcodes.h"
11 #include "nir_intrinsics.h"
12 
13 /*
14  * sample_mask takes two bitmasks as arguments, TARGET and LIVE. Each bit refers
15  * to an indexed sample. Roughly, the instruction does:
16  *
17  *    foreach sample in TARGET {
18  *       if sample in LIVE {
19  *          run depth/stencil/occlusion test/update
20  *       } else {
21  *          kill sample
22  *       }
23  *    }
24  *
25  * As a special case, TARGET may be set to all-1s (~0) to refer to all samples
26  * regardless of the framebuffer sample count.
27  *
28  * For example, to discard an entire pixel unconditionally, we could run:
29  *
30  *    sample_mask ~0, 0
31  *
32  * sample_mask must follow these rules:
33  *
34  * 1. All sample_mask instructions affecting a sample must execute before a
35  *    local_store_pixel instruction targeting that sample. This ensures that
36  *    nothing is written for discarded samples (whether discarded in shader or
37  *    due to a failed depth/stencil test).
38  *
39  * 2. If sample_mask is used anywhere in a shader, then on every execution path,
40  *    every sample must be killed or else run depth/stencil tests exactly ONCE.
41  *
42  * 3. If a sample is killed, future sample_mask instructions have
43  *    no effect on that sample. The following code sequence correctly implements
44  *    a conditional discard (if there are no other sample_mask instructions in
45  *    the shader):
46  *
47  *       sample_mask discarded, 0
48  *       sample_mask ~0, ~0
49  *
50  *    but this sequence is incorrect:
51  *
52  *       sample_mask ~0, ~discarded
53  *       sample_mask ~0, ~0         <-- incorrect: depth/stencil tests run twice
54  *
55  * 4. Conversely, if a sample is tested, future sample_mask instructions may not
56  *    discard that sample. The following code is invalid:
57  *
58  *      sample_mask ~0, ~0
59  *      sample_mask discarded, 0
60  *      st_tile
61  *
62  *    To implement the semantic of "force early tests with discard", manual
63  *    colour masking must be used. It's a weird case, but CTS does it.
64  *
65  * 5. zs_emit may be used in the shader exactly once to trigger tests.
66  * sample_mask with 0 may be used to discard early.
67  *
68  * This pass lowers discard_agx to sample_mask instructions satisfying these
69  * rules. Other passes should not generate sample_mask instructions, as there
70  * are too many footguns.
71  */
72 
73 #define ALL_SAMPLES (0xFF)
74 #define BASE_Z      1
75 #define BASE_S      2
76 
77 static bool
lower_discard_to_sample_mask_0(nir_builder * b,nir_intrinsic_instr * intr,UNUSED void * data)78 lower_discard_to_sample_mask_0(nir_builder *b, nir_intrinsic_instr *intr,
79                                UNUSED void *data)
80 {
81    if (intr->intrinsic != nir_intrinsic_discard_agx)
82       return false;
83 
84    b->cursor = nir_before_instr(&intr->instr);
85    nir_sample_mask_agx(b, intr->src[0].ssa, nir_imm_intN_t(b, 0, 16));
86    nir_instr_remove(&intr->instr);
87    return true;
88 }
89 
90 static nir_intrinsic_instr *
last_discard_in_block(nir_block * block)91 last_discard_in_block(nir_block *block)
92 {
93    nir_foreach_instr_reverse(instr, block) {
94       if (instr->type != nir_instr_type_intrinsic)
95          continue;
96 
97       nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
98       if (intr->intrinsic == nir_intrinsic_discard_agx)
99          return intr;
100    }
101 
102    return NULL;
103 }
104 
105 static bool
cf_node_contains_discard(nir_cf_node * node)106 cf_node_contains_discard(nir_cf_node *node)
107 {
108    nir_foreach_block_in_cf_node(block, node) {
109       if (last_discard_in_block(block))
110          return true;
111    }
112 
113    return false;
114 }
115 
116 /*
117  * We want to run depth/stencil tests as early as possible, but we have to
118  * wait until after the last discard. We find the last discard and
119  * execute depth/stencil tests in the first unconditional block after (if
120  * in conditional control flow), or fuse depth/stencil tests into the
121  * sample instruction (if in unconditional control flow).
122  *
123  * To do so, we walk the root control flow list backwards, looking for the
124  * earliest unconditionally executed instruction after all discard.
125  */
126 static void
run_tests_after_last_discard(nir_builder * b)127 run_tests_after_last_discard(nir_builder *b)
128 {
129    foreach_list_typed_reverse(nir_cf_node, node, node, &b->impl->body) {
130       if (node->type == nir_cf_node_block) {
131          /* Unconditionally executed block */
132          nir_block *block = nir_cf_node_as_block(node);
133          nir_intrinsic_instr *intr = last_discard_in_block(block);
134 
135          if (intr) {
136             /* Last discard is executed unconditionally, so fuse tests:
137              *
138              *    sample_mask (testing | killed), ~killed
139              *
140              * When testing, this is `sample_mask ~0, ~killed` which kills the
141              * kill set and triggers tests on the rest.
142              *
143              * When not testing, this is `sample_mask killed, ~killed` which is
144              * equivalent to `sample_mask killed, 0`, killing without testing.
145              */
146             b->cursor = nir_before_instr(&intr->instr);
147 
148             nir_def *all_samples = nir_imm_intN_t(b, ALL_SAMPLES, 16);
149             nir_def *killed = intr->src[0].ssa;
150             nir_def *live = nir_ixor(b, killed, all_samples);
151 
152             nir_def *testing = nir_load_shader_part_tests_zs_agx(b);
153             nir_def *affected = nir_ior(b, testing, killed);
154 
155             nir_sample_mask_agx(b, affected, live);
156             nir_instr_remove(&intr->instr);
157             return;
158          } else {
159             /* Set cursor for insertion due to a preceding conditionally
160              * executed discard.
161              */
162             b->cursor = nir_before_block_after_phis(block);
163          }
164       } else if (cf_node_contains_discard(node)) {
165          /* Conditionally executed block contains the last discard. Test
166           * depth/stencil for remaining samples in unconditional code after.
167           *
168           * If we're not testing, this turns into sample_mask(0, ~0) which is a
169           * no-op.
170           */
171          nir_sample_mask_agx(b, nir_load_shader_part_tests_zs_agx(b),
172                              nir_imm_intN_t(b, ALL_SAMPLES, 16));
173          return;
174       }
175    }
176 }
177 
178 static void
run_tests_at_start(nir_shader * shader)179 run_tests_at_start(nir_shader *shader)
180 {
181    nir_function_impl *impl = nir_shader_get_entrypoint(shader);
182    nir_builder b = nir_builder_at(nir_before_impl(impl));
183 
184    nir_sample_mask_agx(&b, nir_imm_intN_t(&b, ALL_SAMPLES, 16),
185                        nir_imm_intN_t(&b, ALL_SAMPLES, 16));
186 }
187 
188 bool
agx_nir_lower_sample_mask(nir_shader * shader)189 agx_nir_lower_sample_mask(nir_shader *shader)
190 {
191    nir_function_impl *impl = nir_shader_get_entrypoint(shader);
192 
193    bool writes_zs =
194       shader->info.outputs_written &
195       (BITFIELD64_BIT(FRAG_RESULT_STENCIL) | BITFIELD64_BIT(FRAG_RESULT_DEPTH));
196 
197    if (shader->info.fs.early_fragment_tests) {
198       /* run tests early, if we need testing */
199       if (shader->info.fs.uses_discard || writes_zs ||
200           shader->info.writes_memory) {
201 
202          run_tests_at_start(shader);
203       }
204    } else if (shader->info.fs.uses_discard) {
205       /* If we have zs_emit, the tests will be triggered by zs_emit, otherwise
206        * we need to trigger tests explicitly. Allow sample_mask with zs_emit.
207        */
208       if (!writes_zs) {
209          nir_builder b = nir_builder_create(impl);
210 
211          /* run tests late */
212          run_tests_after_last_discard(&b);
213       }
214    } else {
215       /* regular shaders that don't use discard have nothing to lower */
216       nir_metadata_preserve(impl, nir_metadata_all);
217       return false;
218    }
219 
220    nir_metadata_preserve(impl, nir_metadata_control_flow);
221 
222    nir_shader_intrinsics_pass(shader, lower_discard_to_sample_mask_0,
223                               nir_metadata_control_flow, NULL);
224 
225    return true;
226 }
227