xref: /aosp_15_r20/external/mesa3d/src/gallium/drivers/freedreno/a3xx/fd3_program.c (revision 6104692788411f58d303aa86923a9ff6ecaded22)
1 /*
2  * Copyright © 2013 Rob Clark <[email protected]>
3  * SPDX-License-Identifier: MIT
4  *
5  * Authors:
6  *    Rob Clark <[email protected]>
7  */
8 
9 #include "pipe/p_state.h"
10 #include "util/format/u_format.h"
11 #include "util/u_inlines.h"
12 #include "util/u_math.h"
13 #include "util/u_memory.h"
14 #include "util/u_string.h"
15 
16 #include "freedreno_program.h"
17 
18 #include "fd3_emit.h"
19 #include "fd3_format.h"
20 #include "fd3_program.h"
21 #include "fd3_texture.h"
22 
23 bool
fd3_needs_manual_clipping(const struct ir3_shader * shader,const struct pipe_rasterizer_state * rast)24 fd3_needs_manual_clipping(const struct ir3_shader *shader,
25                           const struct pipe_rasterizer_state *rast)
26 {
27    uint64_t outputs = ir3_shader_outputs(shader);
28 
29    return (!rast->depth_clip_near ||
30            util_bitcount(rast->clip_plane_enable) > 6 ||
31            outputs & ((1ULL << VARYING_SLOT_CLIP_VERTEX) |
32                       (1ULL << VARYING_SLOT_CLIP_DIST0) |
33                       (1ULL << VARYING_SLOT_CLIP_DIST1)));
34 }
35 
36 static void
emit_shader(struct fd_ringbuffer * ring,const struct ir3_shader_variant * so)37 emit_shader(struct fd_ringbuffer *ring, const struct ir3_shader_variant *so)
38 {
39    const struct ir3_info *si = &so->info;
40    enum adreno_state_block sb;
41    enum adreno_state_src src;
42    uint32_t i, sz, *bin;
43 
44    if (so->type == MESA_SHADER_VERTEX) {
45       sb = SB_VERT_SHADER;
46    } else {
47       sb = SB_FRAG_SHADER;
48    }
49 
50    if (FD_DBG(DIRECT)) {
51       sz = si->sizedwords;
52       src = SS_DIRECT;
53       bin = fd_bo_map(so->bo);
54    } else {
55       sz = 0;
56       src = SS_INDIRECT;
57       bin = NULL;
58    }
59 
60    OUT_PKT3(ring, CP_LOAD_STATE, 2 + sz);
61    OUT_RING(ring, CP_LOAD_STATE_0_DST_OFF(0) | CP_LOAD_STATE_0_STATE_SRC(src) |
62                      CP_LOAD_STATE_0_STATE_BLOCK(sb) |
63                      CP_LOAD_STATE_0_NUM_UNIT(so->instrlen));
64    if (bin) {
65       OUT_RING(ring, CP_LOAD_STATE_1_EXT_SRC_ADDR(0) |
66                         CP_LOAD_STATE_1_STATE_TYPE(ST_SHADER));
67    } else {
68       OUT_RELOC(ring, so->bo, 0, CP_LOAD_STATE_1_STATE_TYPE(ST_SHADER), 0);
69    }
70    for (i = 0; i < sz; i++) {
71       OUT_RING(ring, bin[i]);
72    }
73 }
74 
75 void
fd3_program_emit(struct fd_ringbuffer * ring,struct fd3_emit * emit,int nr,struct pipe_surface ** bufs)76 fd3_program_emit(struct fd_ringbuffer *ring, struct fd3_emit *emit, int nr,
77                  struct pipe_surface **bufs)
78 {
79    const struct ir3_shader_variant *vp, *fp;
80    const struct ir3_info *vsi, *fsi;
81    enum a3xx_instrbuffermode fpbuffer, vpbuffer;
82    uint32_t fpbuffersz, vpbuffersz, fsoff;
83    uint32_t pos_regid, posz_regid, psize_regid;
84    uint32_t ij_regid[4], face_regid, coord_regid, zwcoord_regid;
85    uint32_t color_regid[4] = {0};
86    int constmode;
87    int i, j;
88 
89    assert(nr <= ARRAY_SIZE(color_regid));
90 
91    vp = fd3_emit_get_vp(emit);
92    fp = fd3_emit_get_fp(emit);
93 
94    vsi = &vp->info;
95    fsi = &fp->info;
96 
97    fpbuffer = BUFFER;
98    vpbuffer = BUFFER;
99    fpbuffersz = fp->instrlen;
100    vpbuffersz = vp->instrlen;
101 
102    /*
103     * Decide whether to use BUFFER or CACHE mode for VS and FS.  It
104     * appears like 256 is the hard limit, but when the combined size
105     * exceeds 128 then blob will try to keep FS in BUFFER mode and
106     * switch to CACHE for VS until VS is too large.  The blob seems
107     * to switch FS out of BUFFER mode at slightly under 128.  But
108     * a bit fuzzy on the decision tree, so use slightly conservative
109     * limits.
110     *
111     * TODO check if these thresholds for BUFFER vs CACHE mode are the
112     *      same for all a3xx or whether we need to consider the gpuid
113     */
114 
115    if ((fpbuffersz + vpbuffersz) > 128) {
116       if (fpbuffersz < 112) {
117          /* FP:BUFFER   VP:CACHE  */
118          vpbuffer = CACHE;
119          vpbuffersz = 256 - fpbuffersz;
120       } else if (vpbuffersz < 112) {
121          /* FP:CACHE    VP:BUFFER */
122          fpbuffer = CACHE;
123          fpbuffersz = 256 - vpbuffersz;
124       } else {
125          /* FP:CACHE    VP:CACHE  */
126          vpbuffer = fpbuffer = CACHE;
127          vpbuffersz = fpbuffersz = 192;
128       }
129    }
130 
131    if (fpbuffer == BUFFER) {
132       fsoff = 128 - fpbuffersz;
133    } else {
134       fsoff = 256 - fpbuffersz;
135    }
136 
137    /* seems like vs->constlen + fs->constlen > 256, then CONSTMODE=1 */
138    constmode = ((vp->constlen + fp->constlen) > 256) ? 1 : 0;
139 
140    pos_regid = ir3_find_output_regid(vp, VARYING_SLOT_POS);
141    posz_regid = ir3_find_output_regid(fp, FRAG_RESULT_DEPTH);
142    psize_regid = ir3_find_output_regid(vp, VARYING_SLOT_PSIZ);
143    if (fp->color0_mrt) {
144       color_regid[0] = color_regid[1] = color_regid[2] = color_regid[3] =
145          ir3_find_output_regid(fp, FRAG_RESULT_COLOR);
146    } else {
147       color_regid[0] = ir3_find_output_regid(fp, FRAG_RESULT_DATA0);
148       color_regid[1] = ir3_find_output_regid(fp, FRAG_RESULT_DATA1);
149       color_regid[2] = ir3_find_output_regid(fp, FRAG_RESULT_DATA2);
150       color_regid[3] = ir3_find_output_regid(fp, FRAG_RESULT_DATA3);
151    }
152 
153    face_regid = ir3_find_sysval_regid(fp, SYSTEM_VALUE_FRONT_FACE);
154    coord_regid = ir3_find_sysval_regid(fp, SYSTEM_VALUE_FRAG_COORD);
155    zwcoord_regid =
156       (coord_regid == regid(63, 0)) ? regid(63, 0) : (coord_regid + 2);
157    ij_regid[0] =
158       ir3_find_sysval_regid(fp, SYSTEM_VALUE_BARYCENTRIC_PERSP_PIXEL);
159    ij_regid[1] =
160       ir3_find_sysval_regid(fp, SYSTEM_VALUE_BARYCENTRIC_LINEAR_PIXEL);
161    ij_regid[2] =
162       ir3_find_sysval_regid(fp, SYSTEM_VALUE_BARYCENTRIC_PERSP_CENTROID);
163    ij_regid[3] =
164       ir3_find_sysval_regid(fp, SYSTEM_VALUE_BARYCENTRIC_LINEAR_CENTROID);
165 
166    /* adjust regids for alpha output formats. there is no alpha render
167     * format, so it's just treated like red
168     */
169    for (i = 0; i < nr; i++)
170       if (util_format_is_alpha(pipe_surface_format(bufs[i])))
171          color_regid[i] += 3;
172 
173    /* we could probably divide this up into things that need to be
174     * emitted if frag-prog is dirty vs if vert-prog is dirty..
175     */
176 
177    OUT_PKT0(ring, REG_A3XX_HLSQ_CONTROL_0_REG, 6);
178    OUT_RING(ring, A3XX_HLSQ_CONTROL_0_REG_FSTHREADSIZE(FOUR_QUADS) |
179                      A3XX_HLSQ_CONTROL_0_REG_FSSUPERTHREADENABLE |
180                      A3XX_HLSQ_CONTROL_0_REG_CONSTMODE(constmode) |
181                      /* NOTE:  I guess SHADERRESTART and CONSTFULLUPDATE maybe
182                       * flush some caches? I think we only need to set those
183                       * bits if we have updated const or shader..
184                       */
185                      A3XX_HLSQ_CONTROL_0_REG_SPSHADERRESTART |
186                      A3XX_HLSQ_CONTROL_0_REG_SPCONSTFULLUPDATE);
187    OUT_RING(ring, A3XX_HLSQ_CONTROL_1_REG_VSTHREADSIZE(TWO_QUADS) |
188                      A3XX_HLSQ_CONTROL_1_REG_VSSUPERTHREADENABLE |
189                      A3XX_HLSQ_CONTROL_1_REG_FRAGCOORDXYREGID(coord_regid) |
190                      A3XX_HLSQ_CONTROL_1_REG_FRAGCOORDZWREGID(zwcoord_regid));
191    OUT_RING(ring, A3XX_HLSQ_CONTROL_2_REG_PRIMALLOCTHRESHOLD(31) |
192                      A3XX_HLSQ_CONTROL_2_REG_FACENESSREGID(face_regid));
193    OUT_RING(ring,
194             A3XX_HLSQ_CONTROL_3_REG_IJPERSPCENTERREGID(ij_regid[0]) |
195                A3XX_HLSQ_CONTROL_3_REG_IJNONPERSPCENTERREGID(ij_regid[1]) |
196                A3XX_HLSQ_CONTROL_3_REG_IJPERSPCENTROIDREGID(ij_regid[2]) |
197                A3XX_HLSQ_CONTROL_3_REG_IJNONPERSPCENTROIDREGID(ij_regid[3]));
198    OUT_RING(ring, A3XX_HLSQ_VS_CONTROL_REG_CONSTLENGTH(vp->constlen) |
199                      A3XX_HLSQ_VS_CONTROL_REG_CONSTSTARTOFFSET(0) |
200                      A3XX_HLSQ_VS_CONTROL_REG_INSTRLENGTH(vpbuffersz));
201    OUT_RING(ring, A3XX_HLSQ_FS_CONTROL_REG_CONSTLENGTH(fp->constlen) |
202                      A3XX_HLSQ_FS_CONTROL_REG_CONSTSTARTOFFSET(128) |
203                      A3XX_HLSQ_FS_CONTROL_REG_INSTRLENGTH(fpbuffersz));
204 
205    OUT_PKT0(ring, REG_A3XX_SP_SP_CTRL_REG, 1);
206    OUT_RING(ring, A3XX_SP_SP_CTRL_REG_CONSTMODE(constmode) |
207                      COND(emit->binning_pass, A3XX_SP_SP_CTRL_REG_BINNING) |
208                      A3XX_SP_SP_CTRL_REG_SLEEPMODE(1) |
209                      A3XX_SP_SP_CTRL_REG_L0MODE(0));
210 
211    OUT_PKT0(ring, REG_A3XX_SP_VS_LENGTH_REG, 1);
212    OUT_RING(ring, A3XX_SP_VS_LENGTH_REG_SHADERLENGTH(vp->instrlen));
213 
214    OUT_PKT0(ring, REG_A3XX_SP_VS_CTRL_REG0, 3);
215    OUT_RING(ring,
216             A3XX_SP_VS_CTRL_REG0_THREADMODE(MULTI) |
217                A3XX_SP_VS_CTRL_REG0_INSTRBUFFERMODE(vpbuffer) |
218                COND(vpbuffer == CACHE, A3XX_SP_VS_CTRL_REG0_CACHEINVALID) |
219                A3XX_SP_VS_CTRL_REG0_HALFREGFOOTPRINT(vsi->max_half_reg + 1) |
220                A3XX_SP_VS_CTRL_REG0_FULLREGFOOTPRINT(vsi->max_reg + 1) |
221                A3XX_SP_VS_CTRL_REG0_THREADSIZE(TWO_QUADS) |
222                A3XX_SP_VS_CTRL_REG0_SUPERTHREADMODE |
223                A3XX_SP_VS_CTRL_REG0_LENGTH(vpbuffersz));
224    OUT_RING(ring,
225             A3XX_SP_VS_CTRL_REG1_CONSTLENGTH(vp->constlen) |
226                A3XX_SP_VS_CTRL_REG1_INITIALOUTSTANDING(vp->total_in) |
227                A3XX_SP_VS_CTRL_REG1_CONSTFOOTPRINT(MAX2(vp->constlen - 1, 0)));
228    OUT_RING(ring, A3XX_SP_VS_PARAM_REG_POSREGID(pos_regid) |
229                      A3XX_SP_VS_PARAM_REG_PSIZEREGID(psize_regid) |
230                      A3XX_SP_VS_PARAM_REG_TOTALVSOUTVAR(fp->varying_in));
231 
232    struct ir3_shader_linkage l = {0};
233    ir3_link_shaders(&l, vp, fp, false);
234 
235    for (i = 0, j = 0; (i < 16) && (j < l.cnt); i++) {
236       uint32_t reg = 0;
237 
238       OUT_PKT0(ring, REG_A3XX_SP_VS_OUT_REG(i), 1);
239 
240       reg |= A3XX_SP_VS_OUT_REG_A_REGID(l.var[j].regid);
241       reg |= A3XX_SP_VS_OUT_REG_A_COMPMASK(l.var[j].compmask);
242       j++;
243 
244       reg |= A3XX_SP_VS_OUT_REG_B_REGID(l.var[j].regid);
245       reg |= A3XX_SP_VS_OUT_REG_B_COMPMASK(l.var[j].compmask);
246       j++;
247 
248       OUT_RING(ring, reg);
249    }
250 
251    for (i = 0, j = 0; (i < 8) && (j < l.cnt); i++) {
252       uint32_t reg = 0;
253 
254       OUT_PKT0(ring, REG_A3XX_SP_VS_VPC_DST_REG(i), 1);
255 
256       reg |= A3XX_SP_VS_VPC_DST_REG_OUTLOC0(l.var[j++].loc + 8);
257       reg |= A3XX_SP_VS_VPC_DST_REG_OUTLOC1(l.var[j++].loc + 8);
258       reg |= A3XX_SP_VS_VPC_DST_REG_OUTLOC2(l.var[j++].loc + 8);
259       reg |= A3XX_SP_VS_VPC_DST_REG_OUTLOC3(l.var[j++].loc + 8);
260 
261       OUT_RING(ring, reg);
262    }
263 
264    OUT_PKT0(ring, REG_A3XX_SP_VS_OBJ_OFFSET_REG, 2);
265    OUT_RING(ring, A3XX_SP_VS_OBJ_OFFSET_REG_CONSTOBJECTOFFSET(0) |
266                      A3XX_SP_VS_OBJ_OFFSET_REG_SHADEROBJOFFSET(0));
267    OUT_RELOC(ring, vp->bo, 0, 0, 0); /* SP_VS_OBJ_START_REG */
268 
269    if (emit->binning_pass) {
270       OUT_PKT0(ring, REG_A3XX_SP_FS_LENGTH_REG, 1);
271       OUT_RING(ring, 0x00000000);
272 
273       OUT_PKT0(ring, REG_A3XX_SP_FS_CTRL_REG0, 2);
274       OUT_RING(ring, A3XX_SP_FS_CTRL_REG0_THREADMODE(MULTI) |
275                         A3XX_SP_FS_CTRL_REG0_INSTRBUFFERMODE(BUFFER));
276       OUT_RING(ring, 0x00000000);
277 
278       OUT_PKT0(ring, REG_A3XX_SP_FS_OBJ_OFFSET_REG, 1);
279       OUT_RING(ring, A3XX_SP_FS_OBJ_OFFSET_REG_CONSTOBJECTOFFSET(128) |
280                         A3XX_SP_FS_OBJ_OFFSET_REG_SHADEROBJOFFSET(0));
281    } else {
282       OUT_PKT0(ring, REG_A3XX_SP_FS_LENGTH_REG, 1);
283       OUT_RING(ring, A3XX_SP_FS_LENGTH_REG_SHADERLENGTH(fp->instrlen));
284 
285       OUT_PKT0(ring, REG_A3XX_SP_FS_CTRL_REG0, 2);
286       OUT_RING(ring,
287                A3XX_SP_FS_CTRL_REG0_THREADMODE(MULTI) |
288                   A3XX_SP_FS_CTRL_REG0_INSTRBUFFERMODE(fpbuffer) |
289                   COND(fpbuffer == CACHE, A3XX_SP_FS_CTRL_REG0_CACHEINVALID) |
290                   A3XX_SP_FS_CTRL_REG0_HALFREGFOOTPRINT(fsi->max_half_reg + 1) |
291                   A3XX_SP_FS_CTRL_REG0_FULLREGFOOTPRINT(fsi->max_reg + 1) |
292                   A3XX_SP_FS_CTRL_REG0_INOUTREGOVERLAP |
293                   A3XX_SP_FS_CTRL_REG0_THREADSIZE(FOUR_QUADS) |
294                   A3XX_SP_FS_CTRL_REG0_SUPERTHREADMODE |
295                   COND(fp->need_pixlod, A3XX_SP_FS_CTRL_REG0_PIXLODENABLE) |
296                   A3XX_SP_FS_CTRL_REG0_LENGTH(fpbuffersz));
297       OUT_RING(ring, A3XX_SP_FS_CTRL_REG1_CONSTLENGTH(fp->constlen) |
298                         A3XX_SP_FS_CTRL_REG1_INITIALOUTSTANDING(fp->sysval_in) |
299                         A3XX_SP_FS_CTRL_REG1_CONSTFOOTPRINT(
300                            MAX2(fp->constlen - 1, 0)) |
301                         A3XX_SP_FS_CTRL_REG1_HALFPRECVAROFFSET(63));
302 
303       OUT_PKT0(ring, REG_A3XX_SP_FS_OBJ_OFFSET_REG, 2);
304       OUT_RING(ring, A3XX_SP_FS_OBJ_OFFSET_REG_CONSTOBJECTOFFSET(
305                         MAX2(128, vp->constlen)) |
306                         A3XX_SP_FS_OBJ_OFFSET_REG_SHADEROBJOFFSET(fsoff));
307       OUT_RELOC(ring, fp->bo, 0, 0, 0); /* SP_FS_OBJ_START_REG */
308    }
309 
310    OUT_PKT0(ring, REG_A3XX_SP_FS_OUTPUT_REG, 1);
311    OUT_RING(ring, COND(fp->writes_pos, A3XX_SP_FS_OUTPUT_REG_DEPTH_ENABLE) |
312                      A3XX_SP_FS_OUTPUT_REG_DEPTH_REGID(posz_regid) |
313                      A3XX_SP_FS_OUTPUT_REG_MRT(MAX2(1, nr) - 1));
314 
315    OUT_PKT0(ring, REG_A3XX_SP_FS_MRT_REG(0), 4);
316    for (i = 0; i < 4; i++) {
317       uint32_t mrt_reg =
318          A3XX_SP_FS_MRT_REG_REGID(color_regid[i]) |
319          COND(color_regid[i] & HALF_REG_ID, A3XX_SP_FS_MRT_REG_HALF_PRECISION);
320 
321       if (i < nr) {
322          enum pipe_format fmt = pipe_surface_format(bufs[i]);
323          mrt_reg |=
324             COND(util_format_is_pure_uint(fmt), A3XX_SP_FS_MRT_REG_UINT) |
325             COND(util_format_is_pure_sint(fmt), A3XX_SP_FS_MRT_REG_SINT);
326       }
327       OUT_RING(ring, mrt_reg);
328    }
329 
330    if (emit->binning_pass) {
331       OUT_PKT0(ring, REG_A3XX_VPC_ATTR, 2);
332       OUT_RING(ring, A3XX_VPC_ATTR_THRDASSIGN(1) | A3XX_VPC_ATTR_LMSIZE(1) |
333                         COND(vp->writes_psize, A3XX_VPC_ATTR_PSIZE));
334       OUT_RING(ring, 0x00000000);
335    } else {
336       uint32_t vinterp[4], flatshade[2], vpsrepl[4];
337 
338       memset(vinterp, 0, sizeof(vinterp));
339       memset(flatshade, 0, sizeof(flatshade));
340       memset(vpsrepl, 0, sizeof(vpsrepl));
341 
342       /* figure out VARYING_INTERP / FLAT_SHAD register values: */
343       for (j = -1; (j = ir3_next_varying(fp, j)) < (int)fp->inputs_count;) {
344          /* NOTE: varyings are packed, so if compmask is 0xb
345           * then first, third, and fourth component occupy
346           * three consecutive varying slots:
347           */
348          unsigned compmask = fp->inputs[j].compmask;
349 
350          uint32_t inloc = fp->inputs[j].inloc;
351 
352          if (fp->inputs[j].flat ||
353              (fp->inputs[j].rasterflat && emit->rasterflat)) {
354             uint32_t loc = inloc;
355 
356             for (i = 0; i < 4; i++) {
357                if (compmask & (1 << i)) {
358                   vinterp[loc / 16] |= FLAT << ((loc % 16) * 2);
359                   flatshade[loc / 32] |= 1 << (loc % 32);
360                   loc++;
361                }
362             }
363          }
364 
365          bool coord_mode = emit->sprite_coord_mode;
366          if (ir3_point_sprite(fp, j, emit->sprite_coord_enable, &coord_mode)) {
367             /* mask is two 2-bit fields, where:
368              *   '01' -> S
369              *   '10' -> T
370              *   '11' -> 1 - T  (flip mode)
371              */
372             unsigned mask = coord_mode ? 0b1101 : 0b1001;
373             uint32_t loc = inloc;
374             if (compmask & 0x1) {
375                vpsrepl[loc / 16] |= ((mask >> 0) & 0x3) << ((loc % 16) * 2);
376                loc++;
377             }
378             if (compmask & 0x2) {
379                vpsrepl[loc / 16] |= ((mask >> 2) & 0x3) << ((loc % 16) * 2);
380                loc++;
381             }
382             if (compmask & 0x4) {
383                /* .z <- 0.0f */
384                vinterp[loc / 16] |= 0b10 << ((loc % 16) * 2);
385                loc++;
386             }
387             if (compmask & 0x8) {
388                /* .w <- 1.0f */
389                vinterp[loc / 16] |= 0b11 << ((loc % 16) * 2);
390                loc++;
391             }
392          }
393       }
394 
395       OUT_PKT0(ring, REG_A3XX_VPC_ATTR, 2);
396       OUT_RING(ring, A3XX_VPC_ATTR_TOTALATTR(fp->total_in) |
397                         A3XX_VPC_ATTR_THRDASSIGN(1) | A3XX_VPC_ATTR_LMSIZE(1) |
398                         COND(vp->writes_psize, A3XX_VPC_ATTR_PSIZE));
399       OUT_RING(ring, A3XX_VPC_PACK_NUMFPNONPOSVAR(fp->total_in) |
400                         A3XX_VPC_PACK_NUMNONPOSVSVAR(fp->total_in));
401 
402       OUT_PKT0(ring, REG_A3XX_VPC_VARYING_INTERP_MODE(0), 4);
403       OUT_RING(ring, vinterp[0]); /* VPC_VARYING_INTERP[0].MODE */
404       OUT_RING(ring, vinterp[1]); /* VPC_VARYING_INTERP[1].MODE */
405       OUT_RING(ring, vinterp[2]); /* VPC_VARYING_INTERP[2].MODE */
406       OUT_RING(ring, vinterp[3]); /* VPC_VARYING_INTERP[3].MODE */
407 
408       OUT_PKT0(ring, REG_A3XX_VPC_VARYING_PS_REPL_MODE(0), 4);
409       OUT_RING(ring, vpsrepl[0]); /* VPC_VARYING_PS_REPL[0].MODE */
410       OUT_RING(ring, vpsrepl[1]); /* VPC_VARYING_PS_REPL[1].MODE */
411       OUT_RING(ring, vpsrepl[2]); /* VPC_VARYING_PS_REPL[2].MODE */
412       OUT_RING(ring, vpsrepl[3]); /* VPC_VARYING_PS_REPL[3].MODE */
413 
414       OUT_PKT0(ring, REG_A3XX_SP_FS_FLAT_SHAD_MODE_REG_0, 2);
415       OUT_RING(ring, flatshade[0]); /* SP_FS_FLAT_SHAD_MODE_REG_0 */
416       OUT_RING(ring, flatshade[1]); /* SP_FS_FLAT_SHAD_MODE_REG_1 */
417    }
418 
419    if (vpbuffer == BUFFER)
420       emit_shader(ring, vp);
421 
422    OUT_PKT0(ring, REG_A3XX_VFD_PERFCOUNTER0_SELECT, 1);
423    OUT_RING(ring, 0x00000000); /* VFD_PERFCOUNTER0_SELECT */
424 
425    if (!emit->binning_pass) {
426       if (fpbuffer == BUFFER)
427          emit_shader(ring, fp);
428 
429       OUT_PKT0(ring, REG_A3XX_VFD_PERFCOUNTER0_SELECT, 1);
430       OUT_RING(ring, 0x00000000); /* VFD_PERFCOUNTER0_SELECT */
431    }
432 }
433 
434 static struct ir3_program_state *
fd3_program_create(void * data,const struct ir3_shader_variant * bs,const struct ir3_shader_variant * vs,const struct ir3_shader_variant * hs,const struct ir3_shader_variant * ds,const struct ir3_shader_variant * gs,const struct ir3_shader_variant * fs,const struct ir3_cache_key * key)435 fd3_program_create(void *data, const struct ir3_shader_variant *bs,
436                    const struct ir3_shader_variant *vs,
437                    const struct ir3_shader_variant *hs,
438                    const struct ir3_shader_variant *ds,
439                    const struct ir3_shader_variant *gs,
440                    const struct ir3_shader_variant *fs,
441                    const struct ir3_cache_key *key) in_dt
442 {
443    struct fd_context *ctx = fd_context(data);
444    struct fd3_program_state *state = CALLOC_STRUCT(fd3_program_state);
445 
446    tc_assert_driver_thread(ctx->tc);
447 
448    state->bs = bs;
449    state->vs = vs;
450    state->fs = fs;
451 
452    return &state->base;
453 }
454 
455 static void
fd3_program_destroy(void * data,struct ir3_program_state * state)456 fd3_program_destroy(void *data, struct ir3_program_state *state)
457 {
458    struct fd3_program_state *so = fd3_program_state(state);
459    free(so);
460 }
461 
462 static const struct ir3_cache_funcs cache_funcs = {
463    .create_state = fd3_program_create,
464    .destroy_state = fd3_program_destroy,
465 };
466 
467 void
fd3_prog_init(struct pipe_context * pctx)468 fd3_prog_init(struct pipe_context *pctx)
469 {
470    struct fd_context *ctx = fd_context(pctx);
471 
472    ctx->shader_cache = ir3_cache_create(&cache_funcs, ctx);
473    ir3_prog_init(pctx);
474    fd_prog_init(pctx);
475 }
476