1 /*
2 * Copyright © 2016 Rob Clark <[email protected]>
3 * Copyright © 2018 Google, Inc.
4 * SPDX-License-Identifier: MIT
5 *
6 * Authors:
7 * Rob Clark <[email protected]>
8 */
9
10 #define FD_BO_NO_HARDPIN 1
11
12 #include "pipe/p_state.h"
13 #include "util/u_memory.h"
14 #include "util/u_string.h"
15
16 #include "fd6_context.h"
17 #include "fd6_pack.h"
18 #include "fd6_zsa.h"
19
20 /* update lza state based on stencil-test func:
21 *
22 * Conceptually the order of the pipeline is:
23 *
24 *
25 * FS -> Alpha-Test -> Stencil-Test -> Depth-Test
26 * | |
27 * if wrmask != 0 if wrmask != 0
28 * | |
29 * v v
30 * Stencil-Write Depth-Write
31 *
32 * Because Stencil-Test can have side effects (Stencil-Write) prior
33 * to depth test, in this case we potentially need to disable early
34 * lrz-test. See:
35 *
36 * https://www.khronos.org/opengl/wiki/Per-Sample_Processing
37 */
38 static void
update_lrz_stencil(struct fd6_zsa_stateobj * so,enum pipe_compare_func func,bool stencil_write)39 update_lrz_stencil(struct fd6_zsa_stateobj *so, enum pipe_compare_func func,
40 bool stencil_write)
41 {
42 switch (func) {
43 case PIPE_FUNC_ALWAYS:
44 /* nothing to do for LRZ, but for stencil test when stencil-
45 * write is enabled, we need to disable lrz-test, since
46 * conceptually stencil test and write happens before depth-
47 * test:
48 */
49 if (stencil_write) {
50 so->lrz.enable = false;
51 so->lrz.test = false;
52 }
53 break;
54 case PIPE_FUNC_NEVER:
55 /* fragment never passes, disable lrz_write for this draw: */
56 so->lrz.write = false;
57 break;
58 default:
59 /* whether the fragment passes or not depends on result
60 * of stencil test, which we cannot know when doing binning
61 * pass:
62 */
63 so->lrz.write = false;
64 /* similarly to the PIPE_FUNC_ALWAY case, if there are side-
65 * effects from stencil test we need to disable lrz-test.
66 */
67 if (stencil_write) {
68 so->lrz.enable = false;
69 so->lrz.test = false;
70 }
71 break;
72 }
73 }
74
75 template <chip CHIP>
76 void *
fd6_zsa_state_create(struct pipe_context * pctx,const struct pipe_depth_stencil_alpha_state * cso)77 fd6_zsa_state_create(struct pipe_context *pctx,
78 const struct pipe_depth_stencil_alpha_state *cso)
79 {
80 struct fd_context *ctx = fd_context(pctx);
81 struct fd6_zsa_stateobj *so;
82
83 so = CALLOC_STRUCT(fd6_zsa_stateobj);
84 if (!so)
85 return NULL;
86
87 so->base = *cso;
88
89 so->writes_zs = util_writes_depth_stencil(cso);
90 so->writes_z = util_writes_depth(cso);
91
92 enum adreno_compare_func depth_func =
93 (enum adreno_compare_func)cso->depth_func; /* maps 1:1 */
94
95 /* On some GPUs it is necessary to enable z test for depth bounds test
96 * when UBWC is enabled. Otherwise, the GPU would hang. FUNC_ALWAYS is
97 * required to pass z test. Relevant tests:
98 * dEQP-VK.pipeline.extended_dynamic_state.two_draws_dynamic.depth_bounds_test_disable
99 * dEQP-VK.dynamic_state.ds_state.depth_bounds_1
100 */
101 if (cso->depth_bounds_test && !cso->depth_enabled &&
102 ctx->screen->info->a6xx.depth_bounds_require_depth_test_quirk) {
103 so->rb_depth_cntl |= A6XX_RB_DEPTH_CNTL_Z_TEST_ENABLE;
104 depth_func = FUNC_ALWAYS;
105 }
106
107 so->rb_depth_cntl |= A6XX_RB_DEPTH_CNTL_ZFUNC(depth_func);
108
109 if (cso->depth_enabled) {
110 so->rb_depth_cntl |=
111 A6XX_RB_DEPTH_CNTL_Z_TEST_ENABLE | A6XX_RB_DEPTH_CNTL_Z_READ_ENABLE;
112
113 so->lrz.test = true;
114
115 if (cso->depth_writemask) {
116 so->lrz.write = true;
117 }
118
119 switch (cso->depth_func) {
120 case PIPE_FUNC_LESS:
121 case PIPE_FUNC_LEQUAL:
122 so->lrz.enable = true;
123 so->lrz.direction = FD_LRZ_LESS;
124 break;
125
126 case PIPE_FUNC_GREATER:
127 case PIPE_FUNC_GEQUAL:
128 so->lrz.enable = true;
129 so->lrz.direction = FD_LRZ_GREATER;
130 break;
131
132 case PIPE_FUNC_NEVER:
133 so->lrz.enable = true;
134 so->lrz.write = false;
135 so->lrz.direction = FD_LRZ_LESS;
136 break;
137
138 case PIPE_FUNC_ALWAYS:
139 case PIPE_FUNC_NOTEQUAL:
140 if (cso->depth_writemask) {
141 perf_debug_ctx(ctx, "Invalidating LRZ due to ALWAYS/NOTEQUAL with depth write");
142 so->lrz.write = false;
143 so->invalidate_lrz = true;
144 } else {
145 perf_debug_ctx(ctx, "Skipping LRZ due to ALWAYS/NOTEQUAL");
146 so->lrz.enable = false;
147 so->lrz.write = false;
148 }
149 break;
150
151 case PIPE_FUNC_EQUAL:
152 so->lrz.enable = false;
153 so->lrz.write = false;
154 break;
155 }
156 }
157
158 if (cso->depth_writemask)
159 so->rb_depth_cntl |= A6XX_RB_DEPTH_CNTL_Z_WRITE_ENABLE;
160
161 if (cso->stencil[0].enabled) {
162 const struct pipe_stencil_state *s = &cso->stencil[0];
163
164 /* stencil test happens before depth test, so without performing
165 * stencil test we don't really know what the updates to the
166 * depth buffer will be.
167 */
168 update_lrz_stencil(so, (enum pipe_compare_func)s->func, util_writes_stencil(s));
169
170 so->rb_stencil_control |=
171 A6XX_RB_STENCIL_CONTROL_STENCIL_READ |
172 A6XX_RB_STENCIL_CONTROL_STENCIL_ENABLE |
173 A6XX_RB_STENCIL_CONTROL_FUNC((enum adreno_compare_func)s->func) | /* maps 1:1 */
174 A6XX_RB_STENCIL_CONTROL_FAIL(fd_stencil_op(s->fail_op)) |
175 A6XX_RB_STENCIL_CONTROL_ZPASS(fd_stencil_op(s->zpass_op)) |
176 A6XX_RB_STENCIL_CONTROL_ZFAIL(fd_stencil_op(s->zfail_op));
177
178 so->rb_stencilmask = A6XX_RB_STENCILMASK_MASK(s->valuemask);
179 so->rb_stencilwrmask = A6XX_RB_STENCILWRMASK_WRMASK(s->writemask);
180
181 if (cso->stencil[1].enabled) {
182 const struct pipe_stencil_state *bs = &cso->stencil[1];
183
184 update_lrz_stencil(so, (enum pipe_compare_func)bs->func, util_writes_stencil(bs));
185
186 so->rb_stencil_control |=
187 A6XX_RB_STENCIL_CONTROL_STENCIL_ENABLE_BF |
188 A6XX_RB_STENCIL_CONTROL_FUNC_BF((enum adreno_compare_func)bs->func) | /* maps 1:1 */
189 A6XX_RB_STENCIL_CONTROL_FAIL_BF(fd_stencil_op(bs->fail_op)) |
190 A6XX_RB_STENCIL_CONTROL_ZPASS_BF(fd_stencil_op(bs->zpass_op)) |
191 A6XX_RB_STENCIL_CONTROL_ZFAIL_BF(fd_stencil_op(bs->zfail_op));
192
193 so->rb_stencilmask |= A6XX_RB_STENCILMASK_BFMASK(bs->valuemask);
194 so->rb_stencilwrmask |= A6XX_RB_STENCILWRMASK_BFWRMASK(bs->writemask);
195 }
196 }
197
198 if (cso->alpha_enabled) {
199 /* Alpha test is functionally a conditional discard, so we can't
200 * write LRZ before seeing if we end up discarding or not
201 */
202 if (cso->alpha_func != PIPE_FUNC_ALWAYS) {
203 so->lrz.write = false;
204 so->alpha_test = true;
205 }
206
207 uint32_t ref = cso->alpha_ref_value * 255.0f;
208 so->rb_alpha_control =
209 A6XX_RB_ALPHA_CONTROL_ALPHA_TEST |
210 A6XX_RB_ALPHA_CONTROL_ALPHA_REF(ref) |
211 A6XX_RB_ALPHA_CONTROL_ALPHA_TEST_FUNC(
212 (enum adreno_compare_func)cso->alpha_func);
213 }
214
215 if (cso->depth_bounds_test) {
216 so->rb_depth_cntl |= A6XX_RB_DEPTH_CNTL_Z_BOUNDS_ENABLE |
217 A6XX_RB_DEPTH_CNTL_Z_READ_ENABLE;
218 so->lrz.z_bounds_enable = true;
219 }
220
221 /* Build the four state permutations (with/without alpha/depth-clamp)*/
222 for (int i = 0; i < 4; i++) {
223 struct fd_ringbuffer *ring = fd_ringbuffer_new_object(ctx->pipe, 12 * 4);
224 bool depth_clamp_enable = (i & FD6_ZSA_DEPTH_CLAMP);
225
226 OUT_PKT4(ring, REG_A6XX_RB_ALPHA_CONTROL, 1);
227 OUT_RING(ring,
228 (i & FD6_ZSA_NO_ALPHA)
229 ? so->rb_alpha_control & ~A6XX_RB_ALPHA_CONTROL_ALPHA_TEST
230 : so->rb_alpha_control);
231
232 OUT_PKT4(ring, REG_A6XX_RB_STENCIL_CONTROL, 1);
233 OUT_RING(ring, so->rb_stencil_control);
234
235 OUT_PKT4(ring, REG_A6XX_RB_DEPTH_CNTL, 1);
236 OUT_RING(ring,
237 so->rb_depth_cntl | COND(depth_clamp_enable || CHIP >= A7XX,
238 A6XX_RB_DEPTH_CNTL_Z_CLAMP_ENABLE));
239
240 OUT_PKT4(ring, REG_A6XX_RB_STENCILMASK, 2);
241 OUT_RING(ring, so->rb_stencilmask);
242 OUT_RING(ring, so->rb_stencilwrmask);
243
244 if (CHIP >= A7XX && !depth_clamp_enable) {
245 OUT_REG(ring,
246 A6XX_RB_Z_BOUNDS_MIN(0.0f),
247 A6XX_RB_Z_BOUNDS_MAX(1.0f),
248 );
249 } else {
250 OUT_REG(ring,
251 A6XX_RB_Z_BOUNDS_MIN(cso->depth_bounds_min),
252 A6XX_RB_Z_BOUNDS_MAX(cso->depth_bounds_max),
253 );
254 }
255
256 so->stateobj[i] = ring;
257 }
258
259 return so;
260 }
261 FD_GENX(fd6_zsa_state_create);
262
263 void
fd6_zsa_state_delete(struct pipe_context * pctx,void * hwcso)264 fd6_zsa_state_delete(struct pipe_context *pctx, void *hwcso)
265 {
266 struct fd6_zsa_stateobj *so = (struct fd6_zsa_stateobj *)hwcso;
267
268 for (int i = 0; i < ARRAY_SIZE(so->stateobj); i++)
269 fd_ringbuffer_del(so->stateobj[i]);
270 FREE(hwcso);
271 }
272