xref: /aosp_15_r20/external/mesa3d/src/gallium/drivers/r300/r300_hyperz.c (revision 6104692788411f58d303aa86923a9ff6ecaded22)
1 /*
2  * Copyright 2008 Corbin Simpson <[email protected]>
3  * Copyright 2009 Marek Olšák <[email protected]>
4  * SPDX-License-Identifier: MIT
5  */
6 
7 #include "r300_context.h"
8 #include "r300_reg.h"
9 #include "r300_fs.h"
10 
11 #include "util/format/u_format.h"
12 
13 /*
14   HiZ rules - taken from various docs
15    1. HiZ only works on depth values
16    2. Cannot HiZ if stencil fail or zfail is !KEEP
17    3. on R300/400, HiZ is disabled if depth test is EQUAL
18    4. comparison changes without clears usually mean disabling HiZ
19 */
20 /*****************************************************************************/
21 /* The HyperZ setup                                                          */
22 /*****************************************************************************/
23 
r300_get_hiz_func(struct r300_context * r300)24 static enum r300_hiz_func r300_get_hiz_func(struct r300_context *r300)
25 {
26     struct r300_dsa_state *dsa = r300->dsa_state.state;
27 
28     switch (dsa->dsa.depth_func) {
29     case PIPE_FUNC_NEVER:
30     case PIPE_FUNC_EQUAL:
31     case PIPE_FUNC_NOTEQUAL:
32     case PIPE_FUNC_ALWAYS:
33     default:
34         /* Guess MAX for uncertain cases. */
35     case PIPE_FUNC_LESS:
36     case PIPE_FUNC_LEQUAL:
37         return HIZ_FUNC_MAX;
38 
39     case PIPE_FUNC_GREATER:
40     case PIPE_FUNC_GEQUAL:
41         return HIZ_FUNC_MIN;
42     }
43 }
44 
45 /* Return what's used for the depth test (either minimum or maximum). */
r300_get_sc_hz_max(struct r300_context * r300)46 static unsigned r300_get_sc_hz_max(struct r300_context *r300)
47 {
48     struct r300_dsa_state *dsa = r300->dsa_state.state;
49     unsigned func = dsa->dsa.depth_func;
50 
51     return func >= PIPE_FUNC_GREATER ? R300_SC_HYPERZ_MAX : R300_SC_HYPERZ_MIN;
52 }
53 
r300_is_hiz_func_valid(struct r300_context * r300)54 static bool r300_is_hiz_func_valid(struct r300_context *r300)
55 {
56     struct r300_dsa_state *dsa = r300->dsa_state.state;
57     unsigned func = dsa->dsa.depth_func;
58 
59     if (r300->hiz_func == HIZ_FUNC_NONE)
60         return true;
61 
62     /* func1 is less/lessthan */
63     if (r300->hiz_func == HIZ_FUNC_MAX &&
64         (func == PIPE_FUNC_GEQUAL || func == PIPE_FUNC_GREATER))
65         return false;
66 
67     /* func1 is greater/greaterthan */
68     if (r300->hiz_func == HIZ_FUNC_MIN &&
69         (func == PIPE_FUNC_LESS   || func == PIPE_FUNC_LEQUAL))
70         return false;
71 
72     return true;
73 }
74 
r300_dsa_stencil_op_not_keep(struct pipe_stencil_state * s)75 static bool r300_dsa_stencil_op_not_keep(struct pipe_stencil_state *s)
76 {
77     return s->enabled && (s->fail_op != PIPE_STENCIL_OP_KEEP ||
78                           s->zfail_op != PIPE_STENCIL_OP_KEEP);
79 }
80 
r300_hiz_allowed(struct r300_context * r300)81 static bool r300_hiz_allowed(struct r300_context *r300)
82 {
83     struct r300_dsa_state *dsa = r300->dsa_state.state;
84     struct r300_screen *r300screen = r300->screen;
85 
86     if (r300_fragment_shader_writes_depth(r300_fs(r300)))
87         return false;
88 
89     if (r300->query_current)
90         return false;
91 
92     /* If the depth function is inverted, HiZ must be disabled. */
93     if (!r300_is_hiz_func_valid(r300))
94         return false;
95 
96     /* if stencil fail/zfail op is not KEEP */
97     if (r300_dsa_stencil_op_not_keep(&dsa->dsa.stencil[0]) ||
98         r300_dsa_stencil_op_not_keep(&dsa->dsa.stencil[1]))
99         return false;
100 
101     if (dsa->dsa.depth_enabled) {
102         /* if depth func is EQUAL pre-r500 */
103         if (dsa->dsa.depth_func == PIPE_FUNC_EQUAL && !r300screen->caps.is_r500)
104             return false;
105 
106         /* if depth func is NOTEQUAL */
107         if (dsa->dsa.depth_func == PIPE_FUNC_NOTEQUAL)
108             return false;
109     }
110     return true;
111 }
112 
r300_update_hyperz(struct r300_context * r300)113 static void r300_update_hyperz(struct r300_context* r300)
114 {
115     struct r300_hyperz_state *z =
116         (struct r300_hyperz_state*)r300->hyperz_state.state;
117     struct pipe_framebuffer_state *fb =
118         (struct pipe_framebuffer_state*)r300->fb_state.state;
119     struct r300_dsa_state *dsa = r300->dsa_state.state;
120     struct r300_resource *zstex =
121             fb->zsbuf ? r300_resource(fb->zsbuf->texture) : NULL;
122 
123     z->gb_z_peq_config = 0;
124     z->zb_bw_cntl = 0;
125     z->sc_hyperz = R300_SC_HYPERZ_ADJ_2;
126     z->flush = 0;
127 
128     if (r300->cbzb_clear) {
129         z->zb_bw_cntl |= R300_ZB_CB_CLEAR_CACHE_LINE_WRITE_ONLY;
130         return;
131     }
132 
133     if (!zstex || !r300->hyperz_enabled)
134         return;
135 
136     /* Set the size of ZMASK tiles. */
137     if (zstex->tex.zcomp8x8[fb->zsbuf->u.tex.level]) {
138         z->gb_z_peq_config |= R300_GB_Z_PEQ_CONFIG_Z_PEQ_SIZE_8_8;
139     }
140 
141     /* R500-specific features and optimizations. */
142     if (r300->screen->caps.is_r500) {
143         z->zb_bw_cntl |= R500_PEQ_PACKING_ENABLE |
144                          R500_COVERED_PTR_MASKING_ENABLE;
145     }
146 
147     /* Setup decompression if needed. No other HyperZ setting is required. */
148     if (r300->zmask_decompress) {
149         z->zb_bw_cntl |= R300_FAST_FILL_ENABLE |
150                          R300_RD_COMP_ENABLE;
151         return;
152     }
153 
154     /* Do not set anything if depth and stencil tests are off. */
155     if (!dsa->dsa.depth_enabled &&
156         !dsa->dsa.stencil[0].enabled &&
157         !dsa->dsa.stencil[1].enabled) {
158         assert(!dsa->dsa.depth_writemask);
159         return;
160     }
161 
162     /* Zbuffer compression. */
163     if (r300->zmask_in_use && !r300->locked_zbuffer) {
164         z->zb_bw_cntl |= R300_FAST_FILL_ENABLE |
165                          R300_RD_COMP_ENABLE |
166                          R300_WR_COMP_ENABLE;
167     }
168 
169     /* HiZ. */
170     if (r300->hiz_in_use && !r300->locked_zbuffer) {
171         /* HiZ cannot be used under some circumstances. */
172         if (!r300_hiz_allowed(r300)) {
173             /* If writemask is disabled, the HiZ memory will not be changed,
174              * so we can keep its content for later. */
175             if (dsa->dsa.depth_writemask) {
176                 r300->hiz_in_use = false;
177             }
178             return;
179         }
180         DBG(r300, DBG_HYPERZ, "r300: Z-func: %i\n", dsa->dsa.depth_func);
181 
182         /* Set the HiZ function if needed. */
183         if (r300->hiz_func == HIZ_FUNC_NONE) {
184             r300->hiz_func = r300_get_hiz_func(r300);
185         }
186 
187         /* Setup the HiZ bits. */
188         z->zb_bw_cntl |= R300_HIZ_ENABLE |
189                 (r300->hiz_func == HIZ_FUNC_MIN ? R300_HIZ_MIN : R300_HIZ_MAX);
190 
191         z->sc_hyperz |= R300_SC_HYPERZ_ENABLE |
192                         r300_get_sc_hz_max(r300);
193 
194         if (r300->screen->caps.is_r500) {
195             z->zb_bw_cntl |= R500_HIZ_EQUAL_REJECT_ENABLE;
196         }
197     }
198 }
199 
200 /*****************************************************************************/
201 /* The ZTOP state                                                            */
202 /*****************************************************************************/
203 
r300_dsa_alpha_test_enabled(struct pipe_depth_stencil_alpha_state * dsa)204 static bool r300_dsa_alpha_test_enabled(
205         struct pipe_depth_stencil_alpha_state *dsa)
206 {
207     /* We are interested only in the cases when alpha testing can kill
208      * a fragment. */
209 
210     return dsa->alpha_enabled && dsa->alpha_func != PIPE_FUNC_ALWAYS;
211 }
212 
r300_update_ztop(struct r300_context * r300)213 static void r300_update_ztop(struct r300_context* r300)
214 {
215     struct r300_ztop_state* ztop_state =
216         (struct r300_ztop_state*)r300->ztop_state.state;
217     uint32_t old_ztop = ztop_state->z_buffer_top;
218 
219     /* This is important enough that I felt it warranted a comment.
220      *
221      * According to the docs, these are the conditions where ZTOP must be
222      * disabled:
223      * 1) Alpha testing enabled
224      * 2) Texture kill instructions in fragment shader
225      * 3) Chroma key culling enabled
226      * 4) W-buffering enabled
227      *
228      * The docs claim that for the first three cases, if no ZS writes happen,
229      * then ZTOP can be used.
230      *
231      * (3) will never apply since we do not support chroma-keyed operations.
232      * (4) will need to be re-examined (and this comment updated) if/when
233      * Hyper-Z becomes supported.
234      *
235      * Additionally, the following conditions require disabled ZTOP:
236      * 5) Depth writes in fragment shader
237      * 6) Outstanding occlusion queries
238      *
239      * This register causes stalls all the way from SC to CB when changed,
240      * but it is buffered on-chip so it does not hurt to write it if it has
241      * not changed.
242      *
243      * ~C.
244      */
245 
246     /* ZS writes */
247     if (util_writes_depth_stencil(r300->dsa_state.state) &&
248            (r300_dsa_alpha_test_enabled(r300->dsa_state.state) ||  /* (1) */
249             r300_fs(r300)->shader->info.uses_kill)) {              /* (2) */
250         ztop_state->z_buffer_top = R300_ZTOP_DISABLE;
251     } else if (r300_fragment_shader_writes_depth(r300_fs(r300))) { /* (5) */
252         ztop_state->z_buffer_top = R300_ZTOP_DISABLE;
253     } else if (r300->query_current) {                              /* (6) */
254         ztop_state->z_buffer_top = R300_ZTOP_DISABLE;
255     } else {
256         ztop_state->z_buffer_top = R300_ZTOP_ENABLE;
257     }
258     if (ztop_state->z_buffer_top != old_ztop)
259         r300_mark_atom_dirty(r300, &r300->ztop_state);
260 }
261 
r300_update_hyperz_state(struct r300_context * r300)262 void r300_update_hyperz_state(struct r300_context* r300)
263 {
264     r300_update_ztop(r300);
265 
266     if (r300->hyperz_state.dirty) {
267         r300_update_hyperz(r300);
268     }
269 }
270