1 /*
2 * Copyright 2023 Valve Corporation
3 * Copyright 2023 Alyssa Rosenzweig
4 * SPDX-License-Identifier: MIT
5 */
6
7 #include "agx_tilebuffer.h"
8 #include "nir.h"
9 #include "nir_builder.h"
10 #include "nir_builder_opcodes.h"
11 #include "nir_intrinsics.h"
12
13 static nir_def *
select_if_msaa_else_0(nir_builder * b,nir_def * x)14 select_if_msaa_else_0(nir_builder *b, nir_def *x)
15 {
16 /* Sample count > 1 <==> log2(Sample count) > 0 */
17 nir_def *msaa = nir_ugt_imm(b, nir_load_samples_log2_agx(b), 0);
18
19 return nir_bcsel(b, msaa, x, nir_imm_intN_t(b, 0, x->bit_size));
20 }
21
22 static bool
lower(nir_builder * b,nir_intrinsic_instr * intr,void * data)23 lower(nir_builder *b, nir_intrinsic_instr *intr, void *data)
24 {
25 b->cursor = nir_before_instr(&intr->instr);
26
27 switch (intr->intrinsic) {
28 case nir_intrinsic_load_sample_pos:
29 case nir_intrinsic_load_sample_pos_or_center: {
30 /* Handle the center special case */
31 if (!b->shader->info.fs.uses_sample_shading) {
32 assert(intr->intrinsic == nir_intrinsic_load_sample_pos_or_center);
33 nir_def_replace(&intr->def, nir_imm_vec2(b, 0.5, 0.5));
34 return true;
35 }
36
37 /* Lower sample positions to decode the packed fixed-point register:
38 *
39 * uint32_t packed = load_sample_positions();
40 * uint32_t shifted = packed >> (sample_id * 8);
41 *
42 * for (i = 0; i < 2; ++i) {
43 * uint8_t nibble = (shifted >> (i * 4)) & 0xF;
44 * xy[component] = ((float)nibble) / 16.0;
45 * }
46 */
47 nir_def *packed = nir_load_sample_positions_agx(b);
48
49 /* The n'th sample is the in the n'th byte of the register */
50 nir_def *shifted = nir_ushr(
51 b, packed, nir_u2u32(b, nir_imul_imm(b, nir_load_sample_id(b), 8)));
52
53 nir_def *xy[2];
54 for (unsigned i = 0; i < 2; ++i) {
55 /* Get the appropriate nibble */
56 nir_def *nibble =
57 nir_iand_imm(b, nir_ushr_imm(b, shifted, i * 4), 0xF);
58
59 /* Convert it from fixed point to float */
60 xy[i] = nir_fmul_imm(b, nir_u2f16(b, nibble), 1.0 / 16.0);
61
62 /* Upconvert if necessary */
63 xy[i] = nir_f2fN(b, xy[i], intr->def.bit_size);
64 }
65
66 /* Collect and rewrite */
67 nir_def_replace(&intr->def, nir_vec2(b, xy[0], xy[1]));
68 return true;
69 }
70
71 case nir_intrinsic_load_sample_mask_in: {
72 /* Apply API sample mask to sample mask inputs, lowering:
73 *
74 * sample_mask_in --> sample_mask_in & api_sample_mask
75 *
76 * Furthermore in OpenGL, gl_SampleMaskIn is only supposed to have the
77 * single bit set of the sample currently being shaded when sample shading
78 * is used. Mask by the sample ID to make that happen.
79 */
80 b->cursor = nir_after_instr(&intr->instr);
81 nir_def *old = &intr->def;
82 nir_def *lowered = nir_iand(
83 b, old, nir_u2uN(b, nir_load_api_sample_mask_agx(b), old->bit_size));
84
85 if (b->shader->info.fs.uses_sample_shading) {
86 nir_def *bit = nir_load_active_samples_agx(b);
87 lowered = nir_iand(b, lowered, nir_u2uN(b, bit, old->bit_size));
88 }
89
90 nir_def_rewrite_uses_after(old, lowered, lowered->parent_instr);
91 return true;
92 }
93
94 case nir_intrinsic_load_helper_invocation: {
95 /* When sample shading is enabled, we may execute helper invocations for
96 * samples that are not covered. Mask so that load_helper_invocation
97 * returns the right thing. By extension, this ensures we don't execute
98 * stores for non-covered samples.
99 */
100 if (!b->shader->info.fs.uses_sample_shading)
101 return false;
102
103 b->cursor = nir_instr_remove(&intr->instr);
104 nir_def *active = nir_load_active_samples_agx(b);
105 nir_def *mask = nir_u2uN(b, nir_load_sample_mask(b), active->bit_size);
106 nir_def *def = nir_ieq_imm(b, nir_iand(b, mask, active), 0);
107 nir_def_rewrite_uses(&intr->def, def);
108 return true;
109 }
110
111 case nir_intrinsic_load_barycentric_sample: {
112 /* Lower fragment varyings with "sample" interpolation to
113 * interpolateAtSample() with the sample ID. If multisampling is disabled,
114 * the sample ID is 0, so we don't need to mask unlike for
115 * load_barycentric_at_sample.
116 */
117 b->cursor = nir_after_instr(&intr->instr);
118 nir_def *old = &intr->def;
119
120 nir_def *lowered = nir_load_barycentric_at_sample(
121 b, intr->def.bit_size, nir_load_sample_id(b),
122 .interp_mode = nir_intrinsic_interp_mode(intr));
123
124 nir_def_rewrite_uses_after(old, lowered, lowered->parent_instr);
125 return true;
126 }
127
128 case nir_intrinsic_load_barycentric_at_sample: {
129 /*
130 * In OpenGL, interpolateAtSample interpolates at the centre when
131 * multisampling is disabled. Furthermore, results are undefined when
132 * multisampling is enabled but the sample ID is out-of-bounds.
133 *
134 * To handle the former case, we force the sample ID to 0 when
135 * multisampling is disabled. To optimize the latter case, we force the
136 * sample ID to 0 when the requested sample is definitively out-of-bounds.
137 */
138 b->cursor = nir_before_instr(&intr->instr);
139
140 nir_src *src = &intr->src[0];
141 nir_def *sample = src->ssa;
142
143 if (nir_src_is_const(*src) && nir_src_as_uint(*src) >= 4) {
144 sample = nir_imm_int(b, 0);
145 } else {
146 sample = select_if_msaa_else_0(b, sample);
147 }
148
149 nir_src_rewrite(src, sample);
150 return true;
151 }
152
153 case nir_intrinsic_store_output: {
154 /*
155 * In OpenGL, sample mask writes are ignored unless multisampling is used.
156 * This is not the case in Vulkan, disambiguated by the
157 * ignore_sample_mask_without_msaa flag.
158 *
159 * If it is used, the Vulkan spec says:
160 *
161 * If sample shading is enabled, bits written to SampleMask
162 * corresponding to samples that are not being shaded by the fragment
163 * shader invocation are ignored.
164 *
165 * That will be satisfied by outputting gl_SampleMask for the whole pixel
166 * and then lowering sample shading after (splitting up discard targets).
167 */
168 nir_io_semantics sem = nir_intrinsic_io_semantics(intr);
169 if (sem.location != FRAG_RESULT_SAMPLE_MASK)
170 return false;
171
172 nir_def *mask = nir_inot(b, nir_u2u16(b, intr->src[0].ssa));
173 bool *ignore_sample_mask_without_msaa = data;
174
175 if (*ignore_sample_mask_without_msaa)
176 mask = select_if_msaa_else_0(b, mask);
177
178 nir_discard_agx(b, mask);
179 nir_instr_remove(&intr->instr);
180
181 b->shader->info.fs.uses_discard = true;
182 return true;
183 }
184
185 default:
186 return false;
187 }
188 }
189
190 /*
191 * In a fragment shader using sample shading, lower intrinsics like
192 * load_sample_position to variants in terms of load_sample_id. Except for a
193 * possible API bit to force sample shading in shaders that don't otherwise need
194 * it, this pass does not depend on the shader key. In particular, it does not
195 * depend on the sample count. So it runs on fragment shaders at compile-time.
196 * The load_sample_id intrinsics themselves are lowered later, with different
197 * lowerings for monolithic vs epilogs.
198 *
199 * Note that fragment I/O (like store_local_pixel_agx and discard_agx) does not
200 * get lowered here, because that lowering is different for monolithic vs FS
201 * epilogs even though there's no dependency on sample count.
202 */
203 bool
agx_nir_lower_sample_intrinsics(nir_shader * shader,bool ignore_sample_mask_without_msaa)204 agx_nir_lower_sample_intrinsics(nir_shader *shader,
205 bool ignore_sample_mask_without_msaa)
206 {
207 return nir_shader_intrinsics_pass(shader, lower, nir_metadata_control_flow,
208 &ignore_sample_mask_without_msaa);
209 }
210