xref: /aosp_15_r20/external/mesa3d/src/asahi/lib/agx_nir_lower_sample_intrinsics.c (revision 6104692788411f58d303aa86923a9ff6ecaded22)
1 /*
2  * Copyright 2023 Valve Corporation
3  * Copyright 2023 Alyssa Rosenzweig
4  * SPDX-License-Identifier: MIT
5  */
6 
7 #include "agx_tilebuffer.h"
8 #include "nir.h"
9 #include "nir_builder.h"
10 #include "nir_builder_opcodes.h"
11 #include "nir_intrinsics.h"
12 
13 static nir_def *
select_if_msaa_else_0(nir_builder * b,nir_def * x)14 select_if_msaa_else_0(nir_builder *b, nir_def *x)
15 {
16    /* Sample count > 1 <==> log2(Sample count) > 0 */
17    nir_def *msaa = nir_ugt_imm(b, nir_load_samples_log2_agx(b), 0);
18 
19    return nir_bcsel(b, msaa, x, nir_imm_intN_t(b, 0, x->bit_size));
20 }
21 
22 static bool
lower(nir_builder * b,nir_intrinsic_instr * intr,void * data)23 lower(nir_builder *b, nir_intrinsic_instr *intr, void *data)
24 {
25    b->cursor = nir_before_instr(&intr->instr);
26 
27    switch (intr->intrinsic) {
28    case nir_intrinsic_load_sample_pos:
29    case nir_intrinsic_load_sample_pos_or_center: {
30       /* Handle the center special case */
31       if (!b->shader->info.fs.uses_sample_shading) {
32          assert(intr->intrinsic == nir_intrinsic_load_sample_pos_or_center);
33          nir_def_replace(&intr->def, nir_imm_vec2(b, 0.5, 0.5));
34          return true;
35       }
36 
37       /* Lower sample positions to decode the packed fixed-point register:
38        *
39        *    uint32_t packed = load_sample_positions();
40        *    uint32_t shifted = packed >> (sample_id * 8);
41        *
42        *    for (i = 0; i < 2; ++i) {
43        *       uint8_t nibble = (shifted >> (i * 4)) & 0xF;
44        *       xy[component] = ((float)nibble) / 16.0;
45        *    }
46        */
47       nir_def *packed = nir_load_sample_positions_agx(b);
48 
49       /* The n'th sample is the in the n'th byte of the register */
50       nir_def *shifted = nir_ushr(
51          b, packed, nir_u2u32(b, nir_imul_imm(b, nir_load_sample_id(b), 8)));
52 
53       nir_def *xy[2];
54       for (unsigned i = 0; i < 2; ++i) {
55          /* Get the appropriate nibble */
56          nir_def *nibble =
57             nir_iand_imm(b, nir_ushr_imm(b, shifted, i * 4), 0xF);
58 
59          /* Convert it from fixed point to float */
60          xy[i] = nir_fmul_imm(b, nir_u2f16(b, nibble), 1.0 / 16.0);
61 
62          /* Upconvert if necessary */
63          xy[i] = nir_f2fN(b, xy[i], intr->def.bit_size);
64       }
65 
66       /* Collect and rewrite */
67       nir_def_replace(&intr->def, nir_vec2(b, xy[0], xy[1]));
68       return true;
69    }
70 
71    case nir_intrinsic_load_sample_mask_in: {
72       /* Apply API sample mask to sample mask inputs, lowering:
73        *
74        *     sample_mask_in --> sample_mask_in & api_sample_mask
75        *
76        * Furthermore in OpenGL, gl_SampleMaskIn is only supposed to have the
77        * single bit set of the sample currently being shaded when sample shading
78        * is used. Mask by the sample ID to make that happen.
79        */
80       b->cursor = nir_after_instr(&intr->instr);
81       nir_def *old = &intr->def;
82       nir_def *lowered = nir_iand(
83          b, old, nir_u2uN(b, nir_load_api_sample_mask_agx(b), old->bit_size));
84 
85       if (b->shader->info.fs.uses_sample_shading) {
86          nir_def *bit = nir_load_active_samples_agx(b);
87          lowered = nir_iand(b, lowered, nir_u2uN(b, bit, old->bit_size));
88       }
89 
90       nir_def_rewrite_uses_after(old, lowered, lowered->parent_instr);
91       return true;
92    }
93 
94    case nir_intrinsic_load_helper_invocation: {
95       /* When sample shading is enabled, we may execute helper invocations for
96        * samples that are not covered. Mask so that load_helper_invocation
97        * returns the right thing. By extension, this ensures we don't execute
98        * stores for non-covered samples.
99        */
100       if (!b->shader->info.fs.uses_sample_shading)
101          return false;
102 
103       b->cursor = nir_instr_remove(&intr->instr);
104       nir_def *active = nir_load_active_samples_agx(b);
105       nir_def *mask = nir_u2uN(b, nir_load_sample_mask(b), active->bit_size);
106       nir_def *def = nir_ieq_imm(b, nir_iand(b, mask, active), 0);
107       nir_def_rewrite_uses(&intr->def, def);
108       return true;
109    }
110 
111    case nir_intrinsic_load_barycentric_sample: {
112       /* Lower fragment varyings with "sample" interpolation to
113        * interpolateAtSample() with the sample ID. If multisampling is disabled,
114        * the sample ID is 0, so we don't need to mask unlike for
115        * load_barycentric_at_sample.
116        */
117       b->cursor = nir_after_instr(&intr->instr);
118       nir_def *old = &intr->def;
119 
120       nir_def *lowered = nir_load_barycentric_at_sample(
121          b, intr->def.bit_size, nir_load_sample_id(b),
122          .interp_mode = nir_intrinsic_interp_mode(intr));
123 
124       nir_def_rewrite_uses_after(old, lowered, lowered->parent_instr);
125       return true;
126    }
127 
128    case nir_intrinsic_load_barycentric_at_sample: {
129       /*
130        * In OpenGL, interpolateAtSample interpolates at the centre when
131        * multisampling is disabled. Furthermore, results are undefined when
132        * multisampling is enabled but the sample ID is out-of-bounds.
133        *
134        * To handle the former case, we force the sample ID to 0 when
135        * multisampling is disabled. To optimize the latter case, we force the
136        * sample ID to 0 when the requested sample is definitively out-of-bounds.
137        */
138       b->cursor = nir_before_instr(&intr->instr);
139 
140       nir_src *src = &intr->src[0];
141       nir_def *sample = src->ssa;
142 
143       if (nir_src_is_const(*src) && nir_src_as_uint(*src) >= 4) {
144          sample = nir_imm_int(b, 0);
145       } else {
146          sample = select_if_msaa_else_0(b, sample);
147       }
148 
149       nir_src_rewrite(src, sample);
150       return true;
151    }
152 
153    case nir_intrinsic_store_output: {
154       /*
155        * In OpenGL, sample mask writes are ignored unless multisampling is used.
156        * This is not the case in Vulkan, disambiguated by the
157        * ignore_sample_mask_without_msaa flag.
158        *
159        * If it is used, the Vulkan spec says:
160        *
161        *    If sample shading is enabled, bits written to SampleMask
162        *    corresponding to samples that are not being shaded by the fragment
163        *    shader invocation are ignored.
164        *
165        * That will be satisfied by outputting gl_SampleMask for the whole pixel
166        * and then lowering sample shading after (splitting up discard targets).
167        */
168       nir_io_semantics sem = nir_intrinsic_io_semantics(intr);
169       if (sem.location != FRAG_RESULT_SAMPLE_MASK)
170          return false;
171 
172       nir_def *mask = nir_inot(b, nir_u2u16(b, intr->src[0].ssa));
173       bool *ignore_sample_mask_without_msaa = data;
174 
175       if (*ignore_sample_mask_without_msaa)
176          mask = select_if_msaa_else_0(b, mask);
177 
178       nir_discard_agx(b, mask);
179       nir_instr_remove(&intr->instr);
180 
181       b->shader->info.fs.uses_discard = true;
182       return true;
183    }
184 
185    default:
186       return false;
187    }
188 }
189 
190 /*
191  * In a fragment shader using sample shading, lower intrinsics like
192  * load_sample_position to variants in terms of load_sample_id. Except for a
193  * possible API bit to force sample shading in shaders that don't otherwise need
194  * it, this pass does not depend on the shader key. In particular, it does not
195  * depend on the sample count. So it runs on fragment shaders at compile-time.
196  * The load_sample_id intrinsics themselves are lowered later, with different
197  * lowerings for monolithic vs epilogs.
198  *
199  * Note that fragment I/O (like store_local_pixel_agx and discard_agx) does not
200  * get lowered here, because that lowering is different for monolithic vs FS
201  * epilogs even though there's no dependency on sample count.
202  */
203 bool
agx_nir_lower_sample_intrinsics(nir_shader * shader,bool ignore_sample_mask_without_msaa)204 agx_nir_lower_sample_intrinsics(nir_shader *shader,
205                                 bool ignore_sample_mask_without_msaa)
206 {
207    return nir_shader_intrinsics_pass(shader, lower, nir_metadata_control_flow,
208                                      &ignore_sample_mask_without_msaa);
209 }
210