xref: /aosp_15_r20/external/mesa3d/src/gallium/drivers/nouveau/nvc0/nvc0_shader_state.c (revision 6104692788411f58d303aa86923a9ff6ecaded22)
1 /*
2  * Copyright 2010 Christoph Bumiller
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice shall be included in
12  * all copies or substantial portions of the Software.
13  *
14  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
17  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
18  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20  * OTHER DEALINGS IN THE SOFTWARE.
21  */
22 
23 #include "pipe/p_context.h"
24 #include "pipe/p_defines.h"
25 #include "pipe/p_state.h"
26 #include "util/u_inlines.h"
27 
28 #include "nvc0/nvc0_context.h"
29 #include "nvc0/nvc0_query_hw.h"
30 
31 #include "nvc0/nvc0_compute.xml.h"
32 
33 static inline void
nvc0_program_update_context_state(struct nvc0_context * nvc0,struct nvc0_program * prog,int stage)34 nvc0_program_update_context_state(struct nvc0_context *nvc0,
35                                   struct nvc0_program *prog, int stage)
36 {
37    if (prog && prog->need_tls) {
38       const uint32_t flags = NV_VRAM_DOMAIN(&nvc0->screen->base) | NOUVEAU_BO_RDWR;
39       if (!nvc0->state.tls_required)
40          BCTX_REFN_bo(nvc0->bufctx_3d, 3D_TLS, flags, nvc0->screen->tls);
41       nvc0->state.tls_required |= 1 << stage;
42    } else {
43       if (nvc0->state.tls_required == (1 << stage))
44          nouveau_bufctx_reset(nvc0->bufctx_3d, NVC0_BIND_3D_TLS);
45       nvc0->state.tls_required &= ~(1 << stage);
46    }
47 }
48 
49 static inline bool
nvc0_program_validate(struct nvc0_context * nvc0,struct nvc0_program * prog)50 nvc0_program_validate(struct nvc0_context *nvc0, struct nvc0_program *prog)
51 {
52    if (prog->mem)
53       return true;
54 
55    if (!prog->translated) {
56       prog->translated = nvc0_program_translate(
57          prog, nvc0->screen->base.device->chipset,
58          nvc0->screen->base.disk_shader_cache, &nvc0->base.debug);
59       if (!prog->translated)
60          return false;
61    }
62 
63    if (likely(prog->code_size))
64       return nvc0_program_upload(nvc0, prog);
65    return true; /* stream output info only */
66 }
67 
68 void
nvc0_program_sp_start_id(struct nvc0_context * nvc0,int stage,struct nvc0_program * prog)69 nvc0_program_sp_start_id(struct nvc0_context *nvc0, int stage,
70                          struct nvc0_program *prog)
71 {
72    struct nouveau_pushbuf *push = nvc0->base.pushbuf;
73 
74    simple_mtx_assert_locked(&nvc0->screen->state_lock);
75    if (nvc0->screen->eng3d->oclass < GV100_3D_CLASS) {
76       BEGIN_NVC0(push, NVC0_3D(SP_START_ID(stage)), 1);
77       PUSH_DATA (push, prog->code_base);
78    } else {
79       BEGIN_NVC0(push, SUBC_3D(GV100_3D_SP_ADDRESS_HIGH(stage)), 2);
80       PUSH_DATAh(push, nvc0->screen->text->offset + prog->code_base);
81       PUSH_DATA (push, nvc0->screen->text->offset + prog->code_base);
82    }
83 }
84 
85 void
nvc0_vertprog_validate(struct nvc0_context * nvc0)86 nvc0_vertprog_validate(struct nvc0_context *nvc0)
87 {
88    struct nouveau_pushbuf *push = nvc0->base.pushbuf;
89    struct nvc0_program *vp = nvc0->vertprog;
90 
91    if (!nvc0_program_validate(nvc0, vp))
92          return;
93    nvc0_program_update_context_state(nvc0, vp, 0);
94 
95    BEGIN_NVC0(push, NVC0_3D(SP_SELECT(1)), 1);
96    PUSH_DATA (push, 0x11);
97    nvc0_program_sp_start_id(nvc0, 1, vp);
98    BEGIN_NVC0(push, NVC0_3D(SP_GPR_ALLOC(1)), 1);
99    PUSH_DATA (push, vp->num_gprs);
100 
101    // BEGIN_NVC0(push, NVC0_3D_(0x163c), 1);
102    // PUSH_DATA (push, 0);
103 }
104 
105 void
nvc0_fragprog_validate(struct nvc0_context * nvc0)106 nvc0_fragprog_validate(struct nvc0_context *nvc0)
107 {
108    struct nouveau_pushbuf *push = nvc0->base.pushbuf;
109    struct nvc0_program *fp = nvc0->fragprog;
110    struct pipe_rasterizer_state *rast = &nvc0->rast->pipe;
111 
112    if (fp->fp.force_persample_interp != rast->force_persample_interp) {
113       /* Force the program to be reuploaded, which will trigger interp fixups
114        * to get applied
115        */
116       if (fp->mem)
117          nouveau_heap_free(&fp->mem);
118 
119       fp->fp.force_persample_interp = rast->force_persample_interp;
120    }
121 
122    if (fp->fp.msaa != rast->multisample) {
123       /* Force the program to be reuploaded, which will trigger interp fixups
124        * to get applied
125        */
126       if (fp->mem)
127          nouveau_heap_free(&fp->mem);
128 
129       fp->fp.msaa = rast->multisample;
130    }
131 
132    /* Shade model works well enough when both colors follow it. However if one
133     * (or both) is explicitly set, then we have to go the patching route.
134     */
135    bool has_explicit_color = fp->fp.colors &&
136       (((fp->fp.colors & 1) && !fp->fp.color_interp[0]) ||
137        ((fp->fp.colors & 2) && !fp->fp.color_interp[1]));
138    bool hwflatshade = false;
139    if (has_explicit_color && fp->fp.flatshade != rast->flatshade) {
140       /* Force re-upload */
141       if (fp->mem)
142          nouveau_heap_free(&fp->mem);
143 
144       fp->fp.flatshade = rast->flatshade;
145 
146       /* Always smooth-shade in this mode, the shader will decide on its own
147        * when to flat-shade.
148        */
149    } else if (!has_explicit_color) {
150       hwflatshade = rast->flatshade;
151 
152       /* No need to binary-patch the shader each time, make sure that it's set
153        * up for the default behaviour.
154        */
155       fp->fp.flatshade = 0;
156    }
157 
158    if (hwflatshade != nvc0->state.flatshade) {
159       nvc0->state.flatshade = hwflatshade;
160       BEGIN_NVC0(push, NVC0_3D(SHADE_MODEL), 1);
161       PUSH_DATA (push, hwflatshade ? NVC0_3D_SHADE_MODEL_FLAT :
162                                      NVC0_3D_SHADE_MODEL_SMOOTH);
163    }
164 
165    if (fp->mem && !(nvc0->dirty_3d & NVC0_NEW_3D_FRAGPROG)) {
166       return;
167    }
168 
169    if (!nvc0_program_validate(nvc0, fp))
170          return;
171    nvc0_program_update_context_state(nvc0, fp, 4);
172 
173    if (fp->fp.early_z != nvc0->state.early_z_forced) {
174       nvc0->state.early_z_forced = fp->fp.early_z;
175       IMMED_NVC0(push, NVC0_3D(FORCE_EARLY_FRAGMENT_TESTS), fp->fp.early_z);
176    }
177    if (fp->fp.post_depth_coverage != nvc0->state.post_depth_coverage) {
178       nvc0->state.post_depth_coverage = fp->fp.post_depth_coverage;
179       IMMED_NVC0(push, NVC0_3D(POST_DEPTH_COVERAGE),
180                  fp->fp.post_depth_coverage);
181    }
182 
183    BEGIN_NVC0(push, NVC0_3D(SP_SELECT(5)), 1);
184    PUSH_DATA (push, 0x51);
185    nvc0_program_sp_start_id(nvc0, 5, fp);
186    BEGIN_NVC0(push, NVC0_3D(SP_GPR_ALLOC(5)), 1);
187    PUSH_DATA (push, fp->num_gprs);
188 
189    BEGIN_NVC0(push, SUBC_3D(0x0360), 2);
190    PUSH_DATA (push, 0x20164010);
191    PUSH_DATA (push, 0x20);
192    BEGIN_NVC0(push, NVC0_3D(ZCULL_TEST_MASK), 1);
193    PUSH_DATA (push, fp->flags[0]);
194 }
195 
196 void
nvc0_tctlprog_validate(struct nvc0_context * nvc0)197 nvc0_tctlprog_validate(struct nvc0_context *nvc0)
198 {
199    struct nouveau_pushbuf *push = nvc0->base.pushbuf;
200    struct nvc0_program *tp = nvc0->tctlprog;
201 
202    if (tp && nvc0_program_validate(nvc0, tp)) {
203       if (tp->tp.tess_mode != ~0) {
204          BEGIN_NVC0(push, NVC0_3D(TESS_MODE), 1);
205          PUSH_DATA (push, tp->tp.tess_mode);
206       }
207       BEGIN_NVC0(push, NVC0_3D(SP_SELECT(2)), 1);
208       PUSH_DATA (push, 0x21);
209       nvc0_program_sp_start_id(nvc0, 2, tp);
210       BEGIN_NVC0(push, NVC0_3D(SP_GPR_ALLOC(2)), 1);
211       PUSH_DATA (push, tp->num_gprs);
212    } else {
213       tp = nvc0->tcp_empty;
214       /* not a whole lot we can do to handle this failure */
215       if (!nvc0_program_validate(nvc0, tp))
216          assert(!"unable to validate empty tcp");
217       BEGIN_NVC0(push, NVC0_3D(SP_SELECT(2)), 1);
218       PUSH_DATA (push, 0x20);
219       nvc0_program_sp_start_id(nvc0, 2, tp);
220    }
221    nvc0_program_update_context_state(nvc0, tp, 1);
222 }
223 
224 void
nvc0_tevlprog_validate(struct nvc0_context * nvc0)225 nvc0_tevlprog_validate(struct nvc0_context *nvc0)
226 {
227    struct nouveau_pushbuf *push = nvc0->base.pushbuf;
228    struct nvc0_program *tp = nvc0->tevlprog;
229 
230    if (tp && nvc0_program_validate(nvc0, tp)) {
231       if (tp->tp.tess_mode != ~0) {
232          BEGIN_NVC0(push, NVC0_3D(TESS_MODE), 1);
233          PUSH_DATA (push, tp->tp.tess_mode);
234       }
235       BEGIN_NVC0(push, NVC0_3D(MACRO_TEP_SELECT), 1);
236       PUSH_DATA (push, 0x31);
237       nvc0_program_sp_start_id(nvc0, 3, tp);
238       BEGIN_NVC0(push, NVC0_3D(SP_GPR_ALLOC(3)), 1);
239       PUSH_DATA (push, tp->num_gprs);
240    } else {
241       BEGIN_NVC0(push, NVC0_3D(MACRO_TEP_SELECT), 1);
242       PUSH_DATA (push, 0x30);
243    }
244    nvc0_program_update_context_state(nvc0, tp, 2);
245 }
246 
247 void
nvc0_gmtyprog_validate(struct nvc0_context * nvc0)248 nvc0_gmtyprog_validate(struct nvc0_context *nvc0)
249 {
250    struct nouveau_pushbuf *push = nvc0->base.pushbuf;
251    struct nvc0_program *gp = nvc0->gmtyprog;
252 
253    /* we allow GPs with no code for specifying stream output state only */
254    if (gp && nvc0_program_validate(nvc0, gp) && gp->code_size) {
255       BEGIN_NVC0(push, NVC0_3D(MACRO_GP_SELECT), 1);
256       PUSH_DATA (push, 0x41);
257       nvc0_program_sp_start_id(nvc0, 4, gp);
258       BEGIN_NVC0(push, NVC0_3D(SP_GPR_ALLOC(4)), 1);
259       PUSH_DATA (push, gp->num_gprs);
260    } else {
261       BEGIN_NVC0(push, NVC0_3D(MACRO_GP_SELECT), 1);
262       PUSH_DATA (push, 0x40);
263    }
264    nvc0_program_update_context_state(nvc0, gp, 3);
265 }
266 
267 void
nvc0_compprog_validate(struct nvc0_context * nvc0)268 nvc0_compprog_validate(struct nvc0_context *nvc0)
269 {
270    struct nouveau_pushbuf *push = nvc0->base.pushbuf;
271    struct nvc0_program *cp = nvc0->compprog;
272 
273    if (cp && !nvc0_program_validate(nvc0, cp))
274       return;
275 
276    BEGIN_NVC0(push, NVC0_CP(FLUSH), 1);
277    PUSH_DATA (push, NVC0_COMPUTE_FLUSH_CODE);
278 }
279 
280 void
nvc0_layer_validate(struct nvc0_context * nvc0)281 nvc0_layer_validate(struct nvc0_context *nvc0)
282 {
283    struct nouveau_pushbuf *push = nvc0->base.pushbuf;
284    struct nvc0_program *last;
285    bool prog_selects_layer = false;
286    bool layer_viewport_relative = false;
287 
288    if (nvc0->gmtyprog)
289       last = nvc0->gmtyprog;
290    else if (nvc0->tevlprog)
291       last = nvc0->tevlprog;
292    else
293       last = nvc0->vertprog;
294 
295    if (last) {
296       prog_selects_layer = !!(last->hdr[13] & (1 << 9));
297       layer_viewport_relative = last->vp.layer_viewport_relative;
298    }
299 
300    BEGIN_NVC0(push, NVC0_3D(LAYER), 1);
301    PUSH_DATA (push, prog_selects_layer ? NVC0_3D_LAYER_USE_GP : 0);
302    if (nvc0->screen->eng3d->oclass >= GM200_3D_CLASS) {
303       IMMED_NVC0(push, NVC0_3D(LAYER_VIEWPORT_RELATIVE),
304                  layer_viewport_relative);
305    }
306 }
307 
308 void
nvc0_tfb_validate(struct nvc0_context * nvc0)309 nvc0_tfb_validate(struct nvc0_context *nvc0)
310 {
311    struct nouveau_pushbuf *push = nvc0->base.pushbuf;
312    struct nvc0_transform_feedback_state *tfb;
313    unsigned b;
314 
315    if (nvc0->gmtyprog) tfb = nvc0->gmtyprog->tfb;
316    else
317    if (nvc0->tevlprog) tfb = nvc0->tevlprog->tfb;
318    else
319       tfb = nvc0->vertprog->tfb;
320 
321    IMMED_NVC0(push, NVC0_3D(TFB_ENABLE), (tfb && nvc0->num_tfbbufs) ? 1 : 0);
322 
323    if (tfb && tfb != nvc0->state.tfb) {
324       for (b = 0; b < 4; ++b) {
325          if (tfb->varying_count[b]) {
326             unsigned n = (tfb->varying_count[b] + 3) / 4;
327 
328             BEGIN_NVC0(push, NVC0_3D(TFB_STREAM(b)), 3);
329             PUSH_DATA (push, tfb->stream[b]);
330             PUSH_DATA (push, tfb->varying_count[b]);
331             PUSH_DATA (push, tfb->stride[b]);
332             BEGIN_NVC0(push, NVC0_3D(TFB_VARYING_LOCS(b, 0)), n);
333             PUSH_DATAp(push, tfb->varying_index[b], n);
334 
335             if (nvc0->tfbbuf[b])
336                nvc0_so_target(nvc0->tfbbuf[b])->stride = tfb->stride[b];
337          } else {
338             IMMED_NVC0(push, NVC0_3D(TFB_VARYING_COUNT(b)), 0);
339          }
340       }
341    }
342 
343    simple_mtx_assert_locked(&nvc0->screen->state_lock);
344    nvc0->state.tfb = tfb;
345 
346    if (!(nvc0->dirty_3d & NVC0_NEW_3D_TFB_TARGETS))
347       return;
348 
349    for (b = 0; b < nvc0->num_tfbbufs; ++b) {
350       struct nvc0_so_target *targ = nvc0_so_target(nvc0->tfbbuf[b]);
351       struct nv04_resource *buf;
352 
353       if (targ && tfb)
354          targ->stride = tfb->stride[b];
355 
356       if (!targ || !targ->stride) {
357          IMMED_NVC0(push, NVC0_3D(TFB_BUFFER_ENABLE(b)), 0);
358          continue;
359       }
360 
361       buf = nv04_resource(targ->pipe.buffer);
362 
363       BCTX_REFN(nvc0->bufctx_3d, 3D_TFB, buf, WR);
364 
365       if (!(nvc0->tfbbuf_dirty & (1 << b)))
366          continue;
367 
368       if (!targ->clean)
369          nvc0_hw_query_fifo_wait(nvc0, nvc0_query(targ->pq));
370       PUSH_SPACE_EX(push, 0, 0, 1);
371       BEGIN_NVC0(push, NVC0_3D(TFB_BUFFER_ENABLE(b)), 5);
372       PUSH_DATA (push, 1);
373       PUSH_DATAh(push, buf->address + targ->pipe.buffer_offset);
374       PUSH_DATA (push, buf->address + targ->pipe.buffer_offset);
375       PUSH_DATA (push, targ->pipe.buffer_size);
376       if (!targ->clean) {
377          nvc0_hw_query_pushbuf_submit(push, nvc0_query(targ->pq), 0x4);
378       } else {
379          PUSH_DATA(push, 0); /* TFB_BUFFER_OFFSET */
380          targ->clean = false;
381       }
382    }
383    for (; b < 4; ++b)
384       IMMED_NVC0(push, NVC0_3D(TFB_BUFFER_ENABLE(b)), 0);
385 }
386