1 /*
2 * Copyright 2019 Advanced Micro Devices, Inc.
3 * Copyright 2021 Valve Corporation
4 *
5 * SPDX-License-Identifier: MIT
6 */
7
8 #include "ac_nir.h"
9 #include "ac_nir_helpers.h"
10 #include "nir_builder.h"
11
12 /* This code is adapted from ac_llvm_cull.c, hence the copyright to AMD. */
13
14 typedef struct
15 {
16 nir_def *w_reflection;
17 nir_def *all_w_negative;
18 nir_def *any_w_negative;
19 } position_w_info;
20
21 static void
analyze_position_w(nir_builder * b,nir_def * pos[][4],unsigned num_vertices,position_w_info * w_info)22 analyze_position_w(nir_builder *b, nir_def *pos[][4], unsigned num_vertices,
23 position_w_info *w_info)
24 {
25 w_info->all_w_negative = nir_imm_true(b);
26 w_info->w_reflection = nir_imm_false(b);
27 w_info->any_w_negative = nir_imm_false(b);
28
29 for (unsigned i = 0; i < num_vertices; ++i) {
30 nir_def *neg_w = nir_flt_imm(b, pos[i][3], 0.0f);
31 w_info->w_reflection = nir_ixor(b, neg_w, w_info->w_reflection);
32 w_info->any_w_negative = nir_ior(b, neg_w, w_info->any_w_negative);
33 w_info->all_w_negative = nir_iand(b, neg_w, w_info->all_w_negative);
34 }
35 }
36
37 static nir_def *
cull_face_triangle(nir_builder * b,nir_def * pos[3][4],const position_w_info * w_info)38 cull_face_triangle(nir_builder *b, nir_def *pos[3][4], const position_w_info *w_info)
39 {
40 nir_def *det_t0 = nir_fsub(b, pos[2][0], pos[0][0]);
41 nir_def *det_t1 = nir_fsub(b, pos[1][1], pos[0][1]);
42 nir_def *det_t2 = nir_fsub(b, pos[0][0], pos[1][0]);
43 nir_def *det_t3 = nir_fsub(b, pos[0][1], pos[2][1]);
44 nir_def *det_p0 = nir_fmul(b, det_t0, det_t1);
45 nir_def *det_p1 = nir_fmul(b, det_t2, det_t3);
46 nir_def *det = nir_fsub(b, det_p0, det_p1);
47
48 det = nir_bcsel(b, w_info->w_reflection, nir_fneg(b, det), det);
49
50 nir_def *front_facing_ccw = nir_fgt_imm(b, det, 0.0f);
51 nir_def *zero_area = nir_feq_imm(b, det, 0.0f);
52 nir_def *ccw = nir_load_cull_ccw_amd(b);
53 nir_def *front_facing = nir_ieq(b, front_facing_ccw, ccw);
54 nir_def *cull_front = nir_load_cull_front_face_enabled_amd(b);
55 nir_def *cull_back = nir_load_cull_back_face_enabled_amd(b);
56
57 nir_def *face_culled = nir_bcsel(b, front_facing, cull_front, cull_back);
58 face_culled = nir_ior(b, face_culled, zero_area);
59
60 /* Don't reject NaN and +/-infinity, these are tricky.
61 * Just trust fixed-function HW to handle these cases correctly.
62 */
63 return nir_iand(b, face_culled, nir_fisfinite(b, det));
64 }
65
66 static void
calc_bbox_triangle(nir_builder * b,nir_def * pos[3][4],nir_def * bbox_min[2],nir_def * bbox_max[2])67 calc_bbox_triangle(nir_builder *b, nir_def *pos[3][4], nir_def *bbox_min[2], nir_def *bbox_max[2])
68 {
69 for (unsigned chan = 0; chan < 2; ++chan) {
70 bbox_min[chan] = nir_fmin(b, pos[0][chan], nir_fmin(b, pos[1][chan], pos[2][chan]));
71 bbox_max[chan] = nir_fmax(b, pos[0][chan], nir_fmax(b, pos[1][chan], pos[2][chan]));
72 }
73 }
74
75 static nir_def *
cull_frustrum(nir_builder * b,nir_def * bbox_min[2],nir_def * bbox_max[2])76 cull_frustrum(nir_builder *b, nir_def *bbox_min[2], nir_def *bbox_max[2])
77 {
78 nir_def *prim_outside_view = nir_imm_false(b);
79
80 for (unsigned chan = 0; chan < 2; ++chan) {
81 prim_outside_view = nir_ior(b, prim_outside_view, nir_flt_imm(b, bbox_max[chan], -1.0f));
82 prim_outside_view = nir_ior(b, prim_outside_view, nir_fgt_imm(b, bbox_min[chan], 1.0f));
83 }
84
85 return prim_outside_view;
86 }
87
88 static nir_def *
cull_small_primitive_triangle(nir_builder * b,nir_def * bbox_min[2],nir_def * bbox_max[2],nir_def * prim_is_small_else)89 cull_small_primitive_triangle(nir_builder *b, nir_def *bbox_min[2], nir_def *bbox_max[2],
90 nir_def *prim_is_small_else)
91 {
92 nir_def *prim_is_small = NULL;
93
94 nir_if *if_cull_small_prims = nir_push_if(b, nir_load_cull_small_primitives_enabled_amd(b));
95 {
96 nir_def *vp = nir_load_viewport_xy_scale_and_offset(b);
97 nir_def *small_prim_precision = nir_load_cull_small_prim_precision_amd(b);
98 prim_is_small = prim_is_small_else;
99
100 for (unsigned chan = 0; chan < 2; ++chan) {
101 nir_def *vp_scale = nir_channel(b, vp, chan);
102 nir_def *vp_translate = nir_channel(b, vp, 2 + chan);
103
104 /* Convert the position to screen-space coordinates. */
105 nir_def *min = nir_ffma(b, bbox_min[chan], vp_scale, vp_translate);
106 nir_def *max = nir_ffma(b, bbox_max[chan], vp_scale, vp_translate);
107
108 /* Scale the bounding box according to precision. */
109 min = nir_fsub(b, min, small_prim_precision);
110 max = nir_fadd(b, max, small_prim_precision);
111
112 /* Determine if the bbox intersects the sample point, by checking if the min and max round to the same int. */
113 min = nir_fround_even(b, min);
114 max = nir_fround_even(b, max);
115
116 nir_def *rounded_to_eq = nir_feq(b, min, max);
117 prim_is_small = nir_ior(b, prim_is_small, rounded_to_eq);
118 }
119 }
120 nir_pop_if(b, if_cull_small_prims);
121
122 return nir_if_phi(b, prim_is_small, prim_is_small_else);
123 }
124
125 static nir_def *
ac_nir_cull_triangle(nir_builder * b,nir_def * initially_accepted,nir_def * pos[3][4],position_w_info * w_info,ac_nir_cull_accepted accept_func,void * state)126 ac_nir_cull_triangle(nir_builder *b,
127 nir_def *initially_accepted,
128 nir_def *pos[3][4],
129 position_w_info *w_info,
130 ac_nir_cull_accepted accept_func,
131 void *state)
132 {
133 nir_def *accepted = initially_accepted;
134 accepted = nir_iand(b, accepted, nir_inot(b, w_info->all_w_negative));
135 accepted = nir_iand(b, accepted, nir_inot(b, cull_face_triangle(b, pos, w_info)));
136
137 nir_def *bbox_accepted = NULL;
138
139 nir_if *if_accepted = nir_push_if(b, accepted);
140 {
141 nir_def *bbox_min[2] = {0}, *bbox_max[2] = {0};
142 calc_bbox_triangle(b, pos, bbox_min, bbox_max);
143
144 nir_def *prim_outside_view = cull_frustrum(b, bbox_min, bbox_max);
145 nir_def *prim_invisible =
146 cull_small_primitive_triangle(b, bbox_min, bbox_max, prim_outside_view);
147
148 bbox_accepted = nir_ior(b, nir_inot(b, prim_invisible), w_info->any_w_negative);
149
150 /* for caller which need to react when primitive is accepted */
151 if (accept_func) {
152 nir_if *if_still_accepted = nir_push_if(b, bbox_accepted);
153 if_still_accepted->control = nir_selection_control_divergent_always_taken;
154 {
155 accept_func(b, state);
156 }
157 nir_pop_if(b, if_still_accepted);
158 }
159 }
160 nir_pop_if(b, if_accepted);
161
162 return nir_if_phi(b, bbox_accepted, accepted);
163 }
164
165 static void
rotate_45degrees(nir_builder * b,nir_def * v[2])166 rotate_45degrees(nir_builder *b, nir_def *v[2])
167 {
168 /* Rotating a triangle by 45 degrees:
169 *
170 * x2 = x*cos(45) - y*sin(45)
171 * y2 = x*sin(45) + y*cos(45)
172 *
173 * Since sin(45) == cos(45), we can write:
174 *
175 * x2 = x*cos(45) - y*cos(45) = (x - y) * cos(45)
176 * y2 = x*cos(45) + y*cos(45) = (x + y) * cos(45)
177 *
178 * The width of each square (rotated diamond) is sqrt(0.5), so we have to scale it to 1
179 * by multiplying by 1/sqrt(0.5) = sqrt(2) because we want round() to give us the position
180 * of the closest center of the square (rotated diamond). After scaling, we get:
181 *
182 * x2 = (x - y) * cos(45) * sqrt(2)
183 * y2 = (x + y) * cos(45) * sqrt(2)
184 *
185 * Since cos(45) * sqrt(2) = 1, we get:
186 *
187 * x2 = x - y
188 * y2 = x + y
189 */
190 nir_def *result[2];
191 result[0] = nir_fsub(b, v[0], v[1]);
192 result[1] = nir_fadd(b, v[0], v[1]);
193
194 memcpy(v, result, sizeof(result));
195 }
196
197 static void
calc_bbox_line(nir_builder * b,nir_def * pos[3][4],nir_def * bbox_min[2],nir_def * bbox_max[2])198 calc_bbox_line(nir_builder *b, nir_def *pos[3][4], nir_def *bbox_min[2], nir_def *bbox_max[2])
199 {
200 nir_def *clip_half_line_width = nir_load_clip_half_line_width_amd(b);
201
202 for (unsigned chan = 0; chan < 2; ++chan) {
203 bbox_min[chan] = nir_fmin(b, pos[0][chan], pos[1][chan]);
204 bbox_max[chan] = nir_fmax(b, pos[0][chan], pos[1][chan]);
205
206 nir_def *width = nir_channel(b, clip_half_line_width, chan);
207 bbox_min[chan] = nir_fsub(b, bbox_min[chan], width);
208 bbox_max[chan] = nir_fadd(b, bbox_max[chan], width);
209 }
210 }
211
212 static nir_def *
cull_small_primitive_line(nir_builder * b,nir_def * pos[3][4],nir_def * bbox_min[2],nir_def * bbox_max[2],nir_def * prim_is_small_else)213 cull_small_primitive_line(nir_builder *b, nir_def *pos[3][4],
214 nir_def *bbox_min[2], nir_def *bbox_max[2],
215 nir_def *prim_is_small_else)
216 {
217 nir_def *prim_is_small = NULL;
218
219 /* Small primitive filter - eliminate lines that are too small to affect a sample. */
220 nir_if *if_cull_small_prims = nir_push_if(b, nir_load_cull_small_primitives_enabled_amd(b));
221 {
222 /* This only works with lines without perpendicular end caps (lines with perpendicular
223 * end caps are rasterized as quads and thus can't be culled as small prims in 99% of
224 * cases because line_width >= 1).
225 *
226 * This takes advantage of the diamond exit rule, which says that every pixel
227 * has a diamond inside it touching the pixel boundary and only if a line exits
228 * the diamond, that pixel is filled. If a line enters the diamond or stays
229 * outside the diamond, the pixel isn't filled.
230 *
231 * This algorithm is a little simpler than that. The space outside all diamonds also
232 * has the same diamond shape, which we'll call corner diamonds.
233 *
234 * The idea is to cull all lines that are entirely inside a diamond, including
235 * corner diamonds. If a line is entirely inside a diamond, it can be culled because
236 * it doesn't exit it. If a line is entirely inside a corner diamond, it can be culled
237 * because it doesn't enter any diamond and thus can't exit any diamond.
238 *
239 * The viewport is rotated by 45 degrees to turn diamonds into squares, and a bounding
240 * box test is used to determine whether a line is entirely inside any square (diamond).
241 *
242 * The line width doesn't matter. Wide lines only duplicate filled pixels in either X or
243 * Y direction from the filled pixels. MSAA also doesn't matter. MSAA should ideally use
244 * perpendicular end caps that enable quad rasterization for lines. Thus, this should
245 * always use non-MSAA viewport transformation and non-MSAA small prim precision.
246 *
247 * A good test is piglit/lineloop because it draws 10k subpixel lines in a circle.
248 * It should contain no holes if this matches hw behavior.
249 */
250 nir_def *v0[2], *v1[2];
251 nir_def *vp = nir_load_viewport_xy_scale_and_offset(b);
252
253 /* Get vertex positions in pixels. */
254 for (unsigned chan = 0; chan < 2; chan++) {
255 nir_def *vp_scale = nir_channel(b, vp, chan);
256 nir_def *vp_translate = nir_channel(b, vp, 2 + chan);
257
258 v0[chan] = nir_ffma(b, pos[0][chan], vp_scale, vp_translate);
259 v1[chan] = nir_ffma(b, pos[1][chan], vp_scale, vp_translate);
260 }
261
262 /* Rotate the viewport by 45 degrees, so that diamonds become squares. */
263 rotate_45degrees(b, v0);
264 rotate_45degrees(b, v1);
265
266 nir_def *small_prim_precision = nir_load_cull_small_prim_precision_amd(b);
267
268 nir_def *rounded_to_eq[2];
269 for (unsigned chan = 0; chan < 2; chan++) {
270 /* Compute the bounding box around both vertices. We do this because we must
271 * enlarge the line area by the precision of the rasterizer.
272 */
273 nir_def *min = nir_fmin(b, v0[chan], v1[chan]);
274 nir_def *max = nir_fmax(b, v0[chan], v1[chan]);
275
276 /* Enlarge the bounding box by the precision of the rasterizer. */
277 min = nir_fsub(b, min, small_prim_precision);
278 max = nir_fadd(b, max, small_prim_precision);
279
280 /* Round the bounding box corners. If both rounded corners are equal,
281 * the bounding box is entirely inside a square (diamond).
282 */
283 min = nir_fround_even(b, min);
284 max = nir_fround_even(b, max);
285
286 rounded_to_eq[chan] = nir_feq(b, min, max);
287 }
288
289 prim_is_small = nir_iand(b, rounded_to_eq[0], rounded_to_eq[1]);
290 prim_is_small = nir_ior(b, prim_is_small, prim_is_small_else);
291 }
292 nir_pop_if(b, if_cull_small_prims);
293
294 return nir_if_phi(b, prim_is_small, prim_is_small_else);
295 }
296
297 static nir_def *
ac_nir_cull_line(nir_builder * b,nir_def * initially_accepted,nir_def * pos[3][4],position_w_info * w_info,ac_nir_cull_accepted accept_func,void * state)298 ac_nir_cull_line(nir_builder *b,
299 nir_def *initially_accepted,
300 nir_def *pos[3][4],
301 position_w_info *w_info,
302 ac_nir_cull_accepted accept_func,
303 void *state)
304 {
305 nir_def *accepted = initially_accepted;
306 accepted = nir_iand(b, accepted, nir_inot(b, w_info->all_w_negative));
307
308 nir_def *bbox_accepted = NULL;
309
310 nir_if *if_accepted = nir_push_if(b, accepted);
311 {
312 nir_def *bbox_min[2] = {0}, *bbox_max[2] = {0};
313 calc_bbox_line(b, pos, bbox_min, bbox_max);
314
315 /* Frustrum culling - eliminate lines that are fully outside the view. */
316 nir_def *prim_outside_view = cull_frustrum(b, bbox_min, bbox_max);
317 nir_def *prim_invisible =
318 cull_small_primitive_line(b, pos, bbox_min, bbox_max, prim_outside_view);
319
320 bbox_accepted = nir_ior(b, nir_inot(b, prim_invisible), w_info->any_w_negative);
321
322 /* for caller which need to react when primitive is accepted */
323 if (accept_func) {
324 nir_if *if_still_accepted = nir_push_if(b, bbox_accepted);
325 {
326 accept_func(b, state);
327 }
328 nir_pop_if(b, if_still_accepted);
329 }
330 }
331 nir_pop_if(b, if_accepted);
332
333 return nir_if_phi(b, bbox_accepted, accepted);
334 }
335
336 nir_def *
ac_nir_cull_primitive(nir_builder * b,nir_def * initially_accepted,nir_def * pos[3][4],unsigned num_vertices,ac_nir_cull_accepted accept_func,void * state)337 ac_nir_cull_primitive(nir_builder *b,
338 nir_def *initially_accepted,
339 nir_def *pos[3][4],
340 unsigned num_vertices,
341 ac_nir_cull_accepted accept_func,
342 void *state)
343 {
344 position_w_info w_info = {0};
345 analyze_position_w(b, pos, num_vertices, &w_info);
346
347 if (num_vertices == 3)
348 return ac_nir_cull_triangle(b, initially_accepted, pos, &w_info, accept_func, state);
349 else if (num_vertices == 2)
350 return ac_nir_cull_line(b, initially_accepted, pos, &w_info, accept_func, state);
351 else
352 unreachable("point culling not implemented");
353
354 return NULL;
355 }
356