1 /*
2 * Copyright 2023 Alyssa Rosenzweig
3 * SPDX-License-Identifier: MIT
4 */
5
6 #include "compiler/glsl/list.h"
7 #include "compiler/nir/nir_builder.h"
8 #include "agx_compiler.h"
9 #include "nir.h"
10 #include "nir_builder_opcodes.h"
11 #include "nir_intrinsics.h"
12
13 /*
14 * sample_mask takes two bitmasks as arguments, TARGET and LIVE. Each bit refers
15 * to an indexed sample. Roughly, the instruction does:
16 *
17 * foreach sample in TARGET {
18 * if sample in LIVE {
19 * run depth/stencil/occlusion test/update
20 * } else {
21 * kill sample
22 * }
23 * }
24 *
25 * As a special case, TARGET may be set to all-1s (~0) to refer to all samples
26 * regardless of the framebuffer sample count.
27 *
28 * For example, to discard an entire pixel unconditionally, we could run:
29 *
30 * sample_mask ~0, 0
31 *
32 * sample_mask must follow these rules:
33 *
34 * 1. All sample_mask instructions affecting a sample must execute before a
35 * local_store_pixel instruction targeting that sample. This ensures that
36 * nothing is written for discarded samples (whether discarded in shader or
37 * due to a failed depth/stencil test).
38 *
39 * 2. If sample_mask is used anywhere in a shader, then on every execution path,
40 * every sample must be killed or else run depth/stencil tests exactly ONCE.
41 *
42 * 3. If a sample is killed, future sample_mask instructions have
43 * no effect on that sample. The following code sequence correctly implements
44 * a conditional discard (if there are no other sample_mask instructions in
45 * the shader):
46 *
47 * sample_mask discarded, 0
48 * sample_mask ~0, ~0
49 *
50 * but this sequence is incorrect:
51 *
52 * sample_mask ~0, ~discarded
53 * sample_mask ~0, ~0 <-- incorrect: depth/stencil tests run twice
54 *
55 * 4. Conversely, if a sample is tested, future sample_mask instructions may not
56 * discard that sample. The following code is invalid:
57 *
58 * sample_mask ~0, ~0
59 * sample_mask discarded, 0
60 * st_tile
61 *
62 * To implement the semantic of "force early tests with discard", manual
63 * colour masking must be used. It's a weird case, but CTS does it.
64 *
65 * 5. zs_emit may be used in the shader exactly once to trigger tests.
66 * sample_mask with 0 may be used to discard early.
67 *
68 * This pass lowers discard_agx to sample_mask instructions satisfying these
69 * rules. Other passes should not generate sample_mask instructions, as there
70 * are too many footguns.
71 */
72
73 #define ALL_SAMPLES (0xFF)
74 #define BASE_Z 1
75 #define BASE_S 2
76
77 static bool
lower_discard_to_sample_mask_0(nir_builder * b,nir_intrinsic_instr * intr,UNUSED void * data)78 lower_discard_to_sample_mask_0(nir_builder *b, nir_intrinsic_instr *intr,
79 UNUSED void *data)
80 {
81 if (intr->intrinsic != nir_intrinsic_discard_agx)
82 return false;
83
84 b->cursor = nir_before_instr(&intr->instr);
85 nir_sample_mask_agx(b, intr->src[0].ssa, nir_imm_intN_t(b, 0, 16));
86 nir_instr_remove(&intr->instr);
87 return true;
88 }
89
90 static nir_intrinsic_instr *
last_discard_in_block(nir_block * block)91 last_discard_in_block(nir_block *block)
92 {
93 nir_foreach_instr_reverse(instr, block) {
94 if (instr->type != nir_instr_type_intrinsic)
95 continue;
96
97 nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
98 if (intr->intrinsic == nir_intrinsic_discard_agx)
99 return intr;
100 }
101
102 return NULL;
103 }
104
105 static bool
cf_node_contains_discard(nir_cf_node * node)106 cf_node_contains_discard(nir_cf_node *node)
107 {
108 nir_foreach_block_in_cf_node(block, node) {
109 if (last_discard_in_block(block))
110 return true;
111 }
112
113 return false;
114 }
115
116 /*
117 * We want to run depth/stencil tests as early as possible, but we have to
118 * wait until after the last discard. We find the last discard and
119 * execute depth/stencil tests in the first unconditional block after (if
120 * in conditional control flow), or fuse depth/stencil tests into the
121 * sample instruction (if in unconditional control flow).
122 *
123 * To do so, we walk the root control flow list backwards, looking for the
124 * earliest unconditionally executed instruction after all discard.
125 */
126 static void
run_tests_after_last_discard(nir_builder * b)127 run_tests_after_last_discard(nir_builder *b)
128 {
129 foreach_list_typed_reverse(nir_cf_node, node, node, &b->impl->body) {
130 if (node->type == nir_cf_node_block) {
131 /* Unconditionally executed block */
132 nir_block *block = nir_cf_node_as_block(node);
133 nir_intrinsic_instr *intr = last_discard_in_block(block);
134
135 if (intr) {
136 /* Last discard is executed unconditionally, so fuse tests:
137 *
138 * sample_mask (testing | killed), ~killed
139 *
140 * When testing, this is `sample_mask ~0, ~killed` which kills the
141 * kill set and triggers tests on the rest.
142 *
143 * When not testing, this is `sample_mask killed, ~killed` which is
144 * equivalent to `sample_mask killed, 0`, killing without testing.
145 */
146 b->cursor = nir_before_instr(&intr->instr);
147
148 nir_def *all_samples = nir_imm_intN_t(b, ALL_SAMPLES, 16);
149 nir_def *killed = intr->src[0].ssa;
150 nir_def *live = nir_ixor(b, killed, all_samples);
151
152 nir_def *testing = nir_load_shader_part_tests_zs_agx(b);
153 nir_def *affected = nir_ior(b, testing, killed);
154
155 nir_sample_mask_agx(b, affected, live);
156 nir_instr_remove(&intr->instr);
157 return;
158 } else {
159 /* Set cursor for insertion due to a preceding conditionally
160 * executed discard.
161 */
162 b->cursor = nir_before_block_after_phis(block);
163 }
164 } else if (cf_node_contains_discard(node)) {
165 /* Conditionally executed block contains the last discard. Test
166 * depth/stencil for remaining samples in unconditional code after.
167 *
168 * If we're not testing, this turns into sample_mask(0, ~0) which is a
169 * no-op.
170 */
171 nir_sample_mask_agx(b, nir_load_shader_part_tests_zs_agx(b),
172 nir_imm_intN_t(b, ALL_SAMPLES, 16));
173 return;
174 }
175 }
176 }
177
178 static void
run_tests_at_start(nir_shader * shader)179 run_tests_at_start(nir_shader *shader)
180 {
181 nir_function_impl *impl = nir_shader_get_entrypoint(shader);
182 nir_builder b = nir_builder_at(nir_before_impl(impl));
183
184 nir_sample_mask_agx(&b, nir_imm_intN_t(&b, ALL_SAMPLES, 16),
185 nir_imm_intN_t(&b, ALL_SAMPLES, 16));
186 }
187
188 bool
agx_nir_lower_sample_mask(nir_shader * shader)189 agx_nir_lower_sample_mask(nir_shader *shader)
190 {
191 nir_function_impl *impl = nir_shader_get_entrypoint(shader);
192
193 bool writes_zs =
194 shader->info.outputs_written &
195 (BITFIELD64_BIT(FRAG_RESULT_STENCIL) | BITFIELD64_BIT(FRAG_RESULT_DEPTH));
196
197 if (shader->info.fs.early_fragment_tests) {
198 /* run tests early, if we need testing */
199 if (shader->info.fs.uses_discard || writes_zs ||
200 shader->info.writes_memory) {
201
202 run_tests_at_start(shader);
203 }
204 } else if (shader->info.fs.uses_discard) {
205 /* If we have zs_emit, the tests will be triggered by zs_emit, otherwise
206 * we need to trigger tests explicitly. Allow sample_mask with zs_emit.
207 */
208 if (!writes_zs) {
209 nir_builder b = nir_builder_create(impl);
210
211 /* run tests late */
212 run_tests_after_last_discard(&b);
213 }
214 } else {
215 /* regular shaders that don't use discard have nothing to lower */
216 nir_metadata_preserve(impl, nir_metadata_all);
217 return false;
218 }
219
220 nir_metadata_preserve(impl, nir_metadata_control_flow);
221
222 nir_shader_intrinsics_pass(shader, lower_discard_to_sample_mask_0,
223 nir_metadata_control_flow, NULL);
224
225 return true;
226 }
227