xref: /aosp_15_r20/external/mesa3d/src/gallium/drivers/v3d/v3dx_emit.c (revision 6104692788411f58d303aa86923a9ff6ecaded22)
1 /*
2  * Copyright © 2014-2017 Broadcom
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21  * IN THE SOFTWARE.
22  */
23 
24 #include "util/format/u_format.h"
25 #include "util/half_float.h"
26 #include "v3d_context.h"
27 #include "broadcom/common/v3d_macros.h"
28 #include "broadcom/cle/v3dx_pack.h"
29 #include "broadcom/common/v3d_util.h"
30 #include "broadcom/compiler/v3d_compiler.h"
31 
32 static uint8_t
v3d_factor(enum pipe_blendfactor factor,bool dst_alpha_one)33 v3d_factor(enum pipe_blendfactor factor, bool dst_alpha_one)
34 {
35         /* We may get a bad blendfactor when blending is disabled. */
36         if (factor == 0)
37                 return V3D_BLEND_FACTOR_ZERO;
38 
39         switch (factor) {
40         case PIPE_BLENDFACTOR_ZERO:
41                 return V3D_BLEND_FACTOR_ZERO;
42         case PIPE_BLENDFACTOR_ONE:
43                 return V3D_BLEND_FACTOR_ONE;
44         case PIPE_BLENDFACTOR_SRC_COLOR:
45                 return V3D_BLEND_FACTOR_SRC_COLOR;
46         case PIPE_BLENDFACTOR_INV_SRC_COLOR:
47                 return V3D_BLEND_FACTOR_INV_SRC_COLOR;
48         case PIPE_BLENDFACTOR_DST_COLOR:
49                 return V3D_BLEND_FACTOR_DST_COLOR;
50         case PIPE_BLENDFACTOR_INV_DST_COLOR:
51                 return V3D_BLEND_FACTOR_INV_DST_COLOR;
52         case PIPE_BLENDFACTOR_SRC_ALPHA:
53                 return V3D_BLEND_FACTOR_SRC_ALPHA;
54         case PIPE_BLENDFACTOR_INV_SRC_ALPHA:
55                 return V3D_BLEND_FACTOR_INV_SRC_ALPHA;
56         case PIPE_BLENDFACTOR_DST_ALPHA:
57                 return (dst_alpha_one ?
58                         V3D_BLEND_FACTOR_ONE :
59                         V3D_BLEND_FACTOR_DST_ALPHA);
60         case PIPE_BLENDFACTOR_INV_DST_ALPHA:
61                 return (dst_alpha_one ?
62                         V3D_BLEND_FACTOR_ZERO :
63                         V3D_BLEND_FACTOR_INV_DST_ALPHA);
64         case PIPE_BLENDFACTOR_CONST_COLOR:
65                 return V3D_BLEND_FACTOR_CONST_COLOR;
66         case PIPE_BLENDFACTOR_INV_CONST_COLOR:
67                 return V3D_BLEND_FACTOR_INV_CONST_COLOR;
68         case PIPE_BLENDFACTOR_CONST_ALPHA:
69                 return V3D_BLEND_FACTOR_CONST_ALPHA;
70         case PIPE_BLENDFACTOR_INV_CONST_ALPHA:
71                 return V3D_BLEND_FACTOR_INV_CONST_ALPHA;
72         case PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE:
73                 return (dst_alpha_one ?
74                         V3D_BLEND_FACTOR_ZERO :
75                         V3D_BLEND_FACTOR_SRC_ALPHA_SATURATE);
76         default:
77                 unreachable("Bad blend factor");
78         }
79 }
80 
81 static uint32_t
translate_colormask(struct v3d_context * v3d,uint32_t colormask,int rt)82 translate_colormask(struct v3d_context *v3d, uint32_t colormask, int rt)
83 {
84         if (v3d->swap_color_rb & (1 << rt)) {
85                 colormask = ((colormask & (2 | 8)) |
86                              ((colormask & 1) << 2) |
87                              ((colormask & 4) >> 2));
88         }
89 
90         return (~colormask) & 0xf;
91 }
92 
93 static void
emit_rt_blend(struct v3d_context * v3d,struct v3d_job * job,struct pipe_blend_state * blend,int rt,uint8_t rt_mask,bool blend_dst_alpha_one)94 emit_rt_blend(struct v3d_context *v3d, struct v3d_job *job,
95               struct pipe_blend_state *blend, int rt, uint8_t rt_mask,
96               bool blend_dst_alpha_one)
97 {
98         struct pipe_rt_blend_state *rtblend = &blend->rt[rt];
99 
100         /* We don't need to emit blend state for disabled RTs. */
101         if (!rtblend->blend_enable)
102                 return;
103 
104         cl_emit(&job->bcl, BLEND_CFG, config) {
105                 config.render_target_mask = rt_mask;
106 
107                 config.color_blend_mode = rtblend->rgb_func;
108                 config.color_blend_dst_factor =
109                         v3d_factor(rtblend->rgb_dst_factor,
110                                    blend_dst_alpha_one);
111                 config.color_blend_src_factor =
112                         v3d_factor(rtblend->rgb_src_factor,
113                                    blend_dst_alpha_one);
114 
115                 config.alpha_blend_mode = rtblend->alpha_func;
116                 config.alpha_blend_dst_factor =
117                         v3d_factor(rtblend->alpha_dst_factor,
118                                    blend_dst_alpha_one);
119                 config.alpha_blend_src_factor =
120                         v3d_factor(rtblend->alpha_src_factor,
121                                    blend_dst_alpha_one);
122         }
123 }
124 
125 static void
emit_flat_shade_flags(struct v3d_job * job,int varying_offset,uint32_t varyings,enum V3DX (Varying_Flags_Action)lower,enum V3DX (Varying_Flags_Action)higher)126 emit_flat_shade_flags(struct v3d_job *job,
127                       int varying_offset,
128                       uint32_t varyings,
129                       enum V3DX(Varying_Flags_Action) lower,
130                       enum V3DX(Varying_Flags_Action) higher)
131 {
132         cl_emit(&job->bcl, FLAT_SHADE_FLAGS, flags) {
133                 flags.varying_offset_v0 = varying_offset;
134                 flags.flat_shade_flags_for_varyings_v024 = varyings;
135                 flags.action_for_flat_shade_flags_of_lower_numbered_varyings =
136                         lower;
137                 flags.action_for_flat_shade_flags_of_higher_numbered_varyings =
138                         higher;
139         }
140 }
141 
142 static void
emit_noperspective_flags(struct v3d_job * job,int varying_offset,uint32_t varyings,enum V3DX (Varying_Flags_Action)lower,enum V3DX (Varying_Flags_Action)higher)143 emit_noperspective_flags(struct v3d_job *job,
144                          int varying_offset,
145                          uint32_t varyings,
146                          enum V3DX(Varying_Flags_Action) lower,
147                          enum V3DX(Varying_Flags_Action) higher)
148 {
149         cl_emit(&job->bcl, NON_PERSPECTIVE_FLAGS, flags) {
150                 flags.varying_offset_v0 = varying_offset;
151                 flags.non_perspective_flags_for_varyings_v024 = varyings;
152                 flags.action_for_non_perspective_flags_of_lower_numbered_varyings =
153                         lower;
154                 flags.action_for_non_perspective_flags_of_higher_numbered_varyings =
155                         higher;
156         }
157 }
158 
159 static void
emit_centroid_flags(struct v3d_job * job,int varying_offset,uint32_t varyings,enum V3DX (Varying_Flags_Action)lower,enum V3DX (Varying_Flags_Action)higher)160 emit_centroid_flags(struct v3d_job *job,
161                     int varying_offset,
162                     uint32_t varyings,
163                     enum V3DX(Varying_Flags_Action) lower,
164                     enum V3DX(Varying_Flags_Action) higher)
165 {
166         cl_emit(&job->bcl, CENTROID_FLAGS, flags) {
167                 flags.varying_offset_v0 = varying_offset;
168                 flags.centroid_flags_for_varyings_v024 = varyings;
169                 flags.action_for_centroid_flags_of_lower_numbered_varyings =
170                         lower;
171                 flags.action_for_centroid_flags_of_higher_numbered_varyings =
172                         higher;
173         }
174 }
175 
176 static bool
emit_varying_flags(struct v3d_job * job,uint32_t * flags,void (* flag_emit_callback)(struct v3d_job * job,int varying_offset,uint32_t flags,enum V3DX (Varying_Flags_Action)lower,enum V3DX (Varying_Flags_Action)higher))177 emit_varying_flags(struct v3d_job *job, uint32_t *flags,
178                    void (*flag_emit_callback)(struct v3d_job *job,
179                                               int varying_offset,
180                                               uint32_t flags,
181                                               enum V3DX(Varying_Flags_Action) lower,
182                                               enum V3DX(Varying_Flags_Action) higher))
183 {
184         struct v3d_context *v3d = job->v3d;
185         bool emitted_any = false;
186 
187         for (int i = 0; i < ARRAY_SIZE(v3d->prog.fs->prog_data.fs->flat_shade_flags); i++) {
188                 if (!flags[i])
189                         continue;
190 
191                 if (emitted_any) {
192                         flag_emit_callback(job, i, flags[i],
193                                            V3D_VARYING_FLAGS_ACTION_UNCHANGED,
194                                            V3D_VARYING_FLAGS_ACTION_UNCHANGED);
195                 } else if (i == 0) {
196                         flag_emit_callback(job, i, flags[i],
197                                            V3D_VARYING_FLAGS_ACTION_UNCHANGED,
198                                            V3D_VARYING_FLAGS_ACTION_ZEROED);
199                 } else {
200                         flag_emit_callback(job, i, flags[i],
201                                            V3D_VARYING_FLAGS_ACTION_ZEROED,
202                                            V3D_VARYING_FLAGS_ACTION_ZEROED);
203                 }
204                 emitted_any = true;
205         }
206 
207         return emitted_any;
208 }
209 
210 static inline struct v3d_uncompiled_shader *
get_tf_shader(struct v3d_context * v3d)211 get_tf_shader(struct v3d_context *v3d)
212 {
213         if (v3d->prog.bind_gs)
214                 return v3d->prog.bind_gs;
215         else
216                 return v3d->prog.bind_vs;
217 }
218 
219 void
v3dX(emit_state)220 v3dX(emit_state)(struct pipe_context *pctx)
221 {
222         struct v3d_context *v3d = v3d_context(pctx);
223         struct v3d_job *job = v3d->job;
224         bool rasterizer_discard = v3d->rasterizer->base.rasterizer_discard;
225 
226         if (v3d->dirty & (V3D_DIRTY_SCISSOR | V3D_DIRTY_VIEWPORT |
227                           V3D_DIRTY_RASTERIZER)) {
228                 float *vpscale = v3d->viewport.scale;
229                 float *vptranslate = v3d->viewport.translate;
230                 float vp_minx = -fabsf(vpscale[0]) + vptranslate[0];
231                 float vp_maxx = fabsf(vpscale[0]) + vptranslate[0];
232                 float vp_miny = -fabsf(vpscale[1]) + vptranslate[1];
233                 float vp_maxy = fabsf(vpscale[1]) + vptranslate[1];
234 
235                 /* Clip to the scissor if it's enabled, but still clip to the
236                  * drawable regardless since that controls where the binner
237                  * tries to put things.
238                  *
239                  * Additionally, always clip the rendering to the viewport,
240                  * since the hardware does guardband clipping, meaning
241                  * primitives would rasterize outside of the view volume.
242                  */
243                 uint32_t minx, miny, maxx, maxy;
244                 if (!v3d->rasterizer->base.scissor) {
245                         minx = MAX2(vp_minx, 0);
246                         miny = MAX2(vp_miny, 0);
247                         maxx = MIN2(vp_maxx, job->draw_width);
248                         maxy = MIN2(vp_maxy, job->draw_height);
249                 } else {
250                         minx = MAX2(vp_minx, v3d->scissor.minx);
251                         miny = MAX2(vp_miny, v3d->scissor.miny);
252                         maxx = MIN2(vp_maxx, v3d->scissor.maxx);
253                         maxy = MIN2(vp_maxy, v3d->scissor.maxy);
254                 }
255 
256                 cl_emit(&job->bcl, CLIP_WINDOW, clip) {
257                         clip.clip_window_left_pixel_coordinate = minx;
258                         clip.clip_window_bottom_pixel_coordinate = miny;
259                         if (maxx > minx && maxy > miny) {
260                                 clip.clip_window_width_in_pixels = maxx - minx;
261                                 clip.clip_window_height_in_pixels = maxy - miny;
262                         }
263                 }
264 
265                 job->draw_min_x = MIN2(job->draw_min_x, minx);
266                 job->draw_min_y = MIN2(job->draw_min_y, miny);
267                 job->draw_max_x = MAX2(job->draw_max_x, maxx);
268                 job->draw_max_y = MAX2(job->draw_max_y, maxy);
269 
270                 if (!v3d->rasterizer->base.scissor) {
271                     job->scissor.disabled = true;
272                 } else if (!job->scissor.disabled &&
273                            (v3d->dirty & V3D_DIRTY_SCISSOR)) {
274                         if (job->scissor.count < MAX_JOB_SCISSORS) {
275                                 job->scissor.rects[job->scissor.count].min_x =
276                                         v3d->scissor.minx;
277                                 job->scissor.rects[job->scissor.count].min_y =
278                                         v3d->scissor.miny;
279                                 job->scissor.rects[job->scissor.count].max_x =
280                                         v3d->scissor.maxx - 1;
281                                 job->scissor.rects[job->scissor.count].max_y =
282                                         v3d->scissor.maxy - 1;
283                                 job->scissor.count++;
284                         } else {
285                                 job->scissor.disabled = true;
286                                 perf_debug("Too many scissor rects.");
287                         }
288                 }
289         }
290 
291         if (v3d->dirty & (V3D_DIRTY_RASTERIZER |
292                           V3D_DIRTY_ZSA |
293                           V3D_DIRTY_BLEND |
294                           V3D_DIRTY_COMPILED_FS)) {
295                 cl_emit(&job->bcl, CFG_BITS, config) {
296                         config.enable_forward_facing_primitive =
297                                 !rasterizer_discard &&
298                                 !(v3d->rasterizer->base.cull_face &
299                                   PIPE_FACE_FRONT);
300                         config.enable_reverse_facing_primitive =
301                                 !rasterizer_discard &&
302                                 !(v3d->rasterizer->base.cull_face &
303                                   PIPE_FACE_BACK);
304                         /* This seems backwards, but it's what gets the
305                          * clipflat test to pass.
306                          */
307                         config.clockwise_primitives =
308                                 v3d->rasterizer->base.front_ccw;
309 
310                         config.enable_depth_offset =
311                                 v3d->rasterizer->base.offset_tri;
312 
313                         /* V3D follows GL behavior where the sample mask only
314                          * applies when MSAA is enabled.  Gallium has sample
315                          * mask apply anyway, and the MSAA blit shaders will
316                          * set sample mask without explicitly setting
317                          * rasterizer oversample.  Just force it on here,
318                          * since the blit shaders are the only way to have
319                          * !multisample && samplemask != 0xf.
320                          */
321                         config.rasterizer_oversample_mode =
322                                 v3d->rasterizer->base.multisample ||
323                                 v3d->sample_mask != 0xf;
324 
325                         config.direct3d_provoking_vertex =
326                                 v3d->rasterizer->base.flatshade_first;
327 
328                         config.blend_enable = v3d->blend->blend_enables;
329 
330                         /* Note: EZ state may update based on the compiled FS,
331                          * along with ZSA
332                          */
333 #if V3D_VERSION == 42
334                         config.early_z_updates_enable =
335                                 (job->ez_state != V3D_EZ_DISABLED);
336 #endif
337                         if (v3d->zsa->base.depth_enabled) {
338                                 config.z_updates_enable =
339                                         v3d->zsa->base.depth_writemask;
340 #if V3D_VERSION == 42
341                                 config.early_z_enable =
342                                         config.early_z_updates_enable;
343 #endif
344                                 config.depth_test_function =
345                                         v3d->zsa->base.depth_func;
346                         } else {
347                                 config.depth_test_function = PIPE_FUNC_ALWAYS;
348                         }
349 
350                         config.stencil_enable =
351                                 v3d->zsa->base.stencil[0].enabled;
352 
353                         /* Use nicer line caps when line smoothing is
354                          * enabled
355                          */
356                         config.line_rasterization =
357                                 v3d_line_smoothing_enabled(v3d) ?
358                                 V3D_LINE_RASTERIZATION_PERP_END_CAPS :
359                                 V3D_LINE_RASTERIZATION_DIAMOND_EXIT;
360 
361                         if (config.enable_forward_facing_primitive &&
362                             config.enable_reverse_facing_primitive &&
363                             v3d->rasterizer->base.fill_front != v3d->rasterizer->base.fill_back) {
364                                 mesa_logw_once("Setting a different polygon mode for "
365                                                "front and back faces is not supported");
366                         }
367 
368                         if (config.enable_forward_facing_primitive) {
369                                 if (v3d->rasterizer->base.fill_front != PIPE_POLYGON_MODE_FILL) {
370                                         config.direct3d_wireframe_triangles_mode = true;
371                                         config.direct3d_point_fill_mode =
372                                                 v3d->rasterizer->base.fill_front == PIPE_POLYGON_MODE_POINT;
373                                 }
374                         } else {
375                                 if (v3d->rasterizer->base.fill_back != PIPE_POLYGON_MODE_FILL) {
376                                         config.direct3d_wireframe_triangles_mode = true;
377                                         config.direct3d_point_fill_mode =
378                                                 v3d->rasterizer->base.fill_back == PIPE_POLYGON_MODE_POINT;
379                                 }
380                         }
381 
382 #if V3D_VERSION >= 71
383                         config.z_clipping_mode = v3d->rasterizer->base.depth_clip_near ||
384                            v3d->rasterizer->base.depth_clip_far ?
385                            V3D_Z_CLIP_MODE_MIN_ONE_TO_ONE : V3D_Z_CLIP_MODE_NONE;
386 
387                         config.z_clamp_mode = v3d->rasterizer->base.depth_clamp;
388 #endif
389                 }
390         }
391 
392         if (v3d->dirty & V3D_DIRTY_RASTERIZER &&
393             v3d->rasterizer->base.offset_tri) {
394                 if (v3d->screen->devinfo.ver == 42 &&
395                     job->zsbuf &&
396                     job->zsbuf->format == PIPE_FORMAT_Z16_UNORM) {
397                         cl_emit_prepacked_sized(&job->bcl,
398                                                 v3d->rasterizer->depth_offset_z16,
399                                                 cl_packet_length(DEPTH_OFFSET));
400                 } else {
401                         cl_emit_prepacked_sized(&job->bcl,
402                                                 v3d->rasterizer->depth_offset,
403                                                 cl_packet_length(DEPTH_OFFSET));
404                 }
405         }
406 
407         if (v3d->dirty & V3D_DIRTY_RASTERIZER) {
408                 cl_emit(&job->bcl, POINT_SIZE, point_size) {
409                         point_size.point_size = v3d->rasterizer->point_size;
410                 }
411 
412                 cl_emit(&job->bcl, LINE_WIDTH, line_width) {
413                         line_width.line_width = v3d_get_real_line_width(v3d);
414                 }
415         }
416 
417         if (v3d->dirty & V3D_DIRTY_VIEWPORT) {
418 #if V3D_VERSION == 42
419                 cl_emit(&job->bcl, CLIPPER_XY_SCALING, clip) {
420                         clip.viewport_half_width_in_1_256th_of_pixel =
421                                 v3d->viewport.scale[0] * 256.0f;
422                         clip.viewport_half_height_in_1_256th_of_pixel =
423                                 v3d->viewport.scale[1] * 256.0f;
424                 }
425 #endif
426 #if V3D_VERSION >= 71
427                 cl_emit(&job->bcl, CLIPPER_XY_SCALING, clip) {
428                         clip.viewport_half_width_in_1_64th_of_pixel =
429                                 v3d->viewport.scale[0] * 64.0f;
430                         clip.viewport_half_height_in_1_64th_of_pixel =
431                                 v3d->viewport.scale[1] * 64.0f;
432                 }
433 #endif
434 
435 
436                 cl_emit(&job->bcl, CLIPPER_Z_SCALE_AND_OFFSET, clip) {
437                         clip.viewport_z_offset_zc_to_zs =
438                                 v3d->viewport.translate[2];
439                         clip.viewport_z_scale_zc_to_zs =
440                                 v3d->viewport.scale[2];
441                 }
442                 cl_emit(&job->bcl, CLIPPER_Z_MIN_MAX_CLIPPING_PLANES, clip) {
443                         float z1 = (v3d->viewport.translate[2] -
444                                     v3d->viewport.scale[2]);
445                         float z2 = (v3d->viewport.translate[2] +
446                                     v3d->viewport.scale[2]);
447                         clip.minimum_zw = MIN2(z1, z2);
448                         clip.maximum_zw = MAX2(z1, z2);
449                 }
450 
451                 cl_emit(&job->bcl, VIEWPORT_OFFSET, vp) {
452                         float vp_fine_x = v3d->viewport.translate[0];
453                         float vp_fine_y = v3d->viewport.translate[1];
454                         int32_t vp_coarse_x = 0;
455                         int32_t vp_coarse_y = 0;
456 
457                         /* The fine coordinates must be unsigned, but coarse
458                          * can be signed.
459                          */
460                         if (unlikely(vp_fine_x < 0)) {
461                                 int32_t blocks_64 =
462                                         DIV_ROUND_UP(fabsf(vp_fine_x), 64);
463                                 vp_fine_x += 64.0f * blocks_64;
464                                 vp_coarse_x -= blocks_64;
465                         }
466 
467                         if (unlikely(vp_fine_y < 0)) {
468                                 int32_t blocks_64 =
469                                         DIV_ROUND_UP(fabsf(vp_fine_y), 64);
470                                 vp_fine_y += 64.0f * blocks_64;
471                                 vp_coarse_y -= blocks_64;
472                         }
473 
474                         vp.fine_x = vp_fine_x;
475                         vp.fine_y = vp_fine_y;
476                         vp.coarse_x = vp_coarse_x;
477                         vp.coarse_y = vp_coarse_y;
478                 }
479         }
480 
481         if (v3d->dirty & V3D_DIRTY_BLEND) {
482                 struct v3d_blend_state *blend = v3d->blend;
483 
484                 if (blend->blend_enables) {
485                         cl_emit(&job->bcl, BLEND_ENABLES, enables) {
486                                 enables.mask = blend->blend_enables;
487                         }
488 
489                         const uint32_t max_rts =
490                                 V3D_MAX_RENDER_TARGETS(v3d->screen->devinfo.ver);
491                         if (blend->base.independent_blend_enable) {
492                                 for (int i = 0; i < max_rts; i++)
493                                         emit_rt_blend(v3d, job, &blend->base, i,
494                                                       (1 << i),
495                                                       v3d->blend_dst_alpha_one & (1 << i));
496                         } else if (v3d->blend_dst_alpha_one &&
497                                    util_bitcount(v3d->blend_dst_alpha_one) < job->nr_cbufs) {
498                                 /* Even if we don't have independent per-RT
499                                  * blending, we may have a combination of RT
500                                  * formats were some RTs have an alpha channel
501                                  * and others don't. Since this affects how
502                                  * blending is performed, we also need to emit
503                                  * independent blend configurations in this
504                                  * case: one for RTs with alpha and one for
505                                  * RTs without.
506                                  */
507                                 emit_rt_blend(v3d, job, &blend->base, 0,
508                                               ((1 << max_rts) - 1) &
509                                                    v3d->blend_dst_alpha_one,
510                                               true);
511                                 emit_rt_blend(v3d, job, &blend->base, 0,
512                                               ((1 << max_rts) - 1) &
513                                                    ~v3d->blend_dst_alpha_one,
514                                               false);
515                         } else {
516                                 emit_rt_blend(v3d, job, &blend->base, 0,
517                                               (1 << max_rts) - 1,
518                                               v3d->blend_dst_alpha_one);
519                         }
520                 }
521         }
522 
523         if (v3d->dirty & V3D_DIRTY_BLEND) {
524                 struct pipe_blend_state *blend = &v3d->blend->base;
525 
526                 const uint32_t max_rts =
527                         V3D_MAX_RENDER_TARGETS(v3d->screen->devinfo.ver);
528                 cl_emit(&job->bcl, COLOR_WRITE_MASKS, mask) {
529                         for (int i = 0; i < max_rts; i++) {
530                                 int rt = blend->independent_blend_enable ? i : 0;
531                                 int rt_mask = blend->rt[rt].colormask;
532 
533                                 mask.mask |= translate_colormask(v3d, rt_mask,
534                                                                  i) << (4 * i);
535                         }
536                 }
537         }
538 
539         /* GFXH-1431: On V3D 3.x, writing BLEND_CONFIG resets the constant
540          * color.
541          */
542         if (v3d->dirty & V3D_DIRTY_BLEND_COLOR) {
543                 cl_emit(&job->bcl, BLEND_CONSTANT_COLOR, color) {
544                         color.red_f16 = (v3d->swap_color_rb ?
545                                           v3d->blend_color.hf[2] :
546                                           v3d->blend_color.hf[0]);
547                         color.green_f16 = v3d->blend_color.hf[1];
548                         color.blue_f16 = (v3d->swap_color_rb ?
549                                            v3d->blend_color.hf[0] :
550                                            v3d->blend_color.hf[2]);
551                         color.alpha_f16 = v3d->blend_color.hf[3];
552                 }
553         }
554 
555         if (v3d->dirty & (V3D_DIRTY_ZSA | V3D_DIRTY_STENCIL_REF)) {
556                 struct pipe_stencil_state *front = &v3d->zsa->base.stencil[0];
557                 struct pipe_stencil_state *back = &v3d->zsa->base.stencil[1];
558 
559                 if (front->enabled) {
560                         cl_emit_with_prepacked(&job->bcl, STENCIL_CFG,
561                                                v3d->zsa->stencil_front, config) {
562                                 config.stencil_ref_value =
563                                         v3d->stencil_ref.ref_value[0];
564                         }
565                 }
566 
567                 if (back->enabled) {
568                         cl_emit_with_prepacked(&job->bcl, STENCIL_CFG,
569                                                v3d->zsa->stencil_back, config) {
570                                 config.stencil_ref_value =
571                                         v3d->stencil_ref.ref_value[1];
572                         }
573                 }
574         }
575 
576         if (v3d->dirty & V3D_DIRTY_FLAT_SHADE_FLAGS) {
577                 if (!emit_varying_flags(job,
578                                         v3d->prog.fs->prog_data.fs->flat_shade_flags,
579                                         emit_flat_shade_flags)) {
580                         cl_emit(&job->bcl, ZERO_ALL_FLAT_SHADE_FLAGS, flags);
581                 }
582         }
583 
584         if (v3d->dirty & V3D_DIRTY_NOPERSPECTIVE_FLAGS) {
585                 if (!emit_varying_flags(job,
586                                         v3d->prog.fs->prog_data.fs->noperspective_flags,
587                                         emit_noperspective_flags)) {
588                         cl_emit(&job->bcl, ZERO_ALL_NON_PERSPECTIVE_FLAGS, flags);
589                 }
590         }
591 
592         if (v3d->dirty & V3D_DIRTY_CENTROID_FLAGS) {
593                 if (!emit_varying_flags(job,
594                                         v3d->prog.fs->prog_data.fs->centroid_flags,
595                                         emit_centroid_flags)) {
596                         cl_emit(&job->bcl, ZERO_ALL_CENTROID_FLAGS, flags);
597                 }
598         }
599 
600         /* Set up the transform feedback data specs (which VPM entries to
601          * output to which buffers).
602          */
603         if (v3d->dirty & (V3D_DIRTY_STREAMOUT |
604                           V3D_DIRTY_RASTERIZER |
605                           V3D_DIRTY_PRIM_MODE)) {
606                 struct v3d_streamout_stateobj *so = &v3d->streamout;
607                 if (so->num_targets) {
608                         bool psiz_per_vertex = (v3d->prim_mode == MESA_PRIM_POINTS &&
609                                                 v3d->rasterizer->base.point_size_per_vertex);
610                         struct v3d_uncompiled_shader *tf_shader =
611                                 get_tf_shader(v3d);
612                         uint16_t *tf_specs = (psiz_per_vertex ?
613                                               tf_shader->tf_specs_psiz :
614                                               tf_shader->tf_specs);
615 
616                         bool tf_enabled = v3d_transform_feedback_enabled(v3d);
617                         job->tf_enabled |= tf_enabled;
618 
619                         cl_emit(&job->bcl, TRANSFORM_FEEDBACK_SPECS, tfe) {
620                                 tfe.number_of_16_bit_output_data_specs_following =
621                                         tf_shader->num_tf_specs;
622                                 tfe.enable = tf_enabled;
623                         };
624                         for (int i = 0; i < tf_shader->num_tf_specs; i++) {
625                                 cl_emit_prepacked(&job->bcl, &tf_specs[i]);
626                         }
627                 } else {
628                         cl_emit(&job->bcl, TRANSFORM_FEEDBACK_SPECS, tfe) {
629                                 tfe.enable = false;
630                         };
631                 }
632         }
633 
634         /* Set up the transform feedback buffers. */
635         if (v3d->dirty & V3D_DIRTY_STREAMOUT) {
636                 struct v3d_uncompiled_shader *tf_shader = get_tf_shader(v3d);
637                 struct v3d_streamout_stateobj *so = &v3d->streamout;
638                 for (int i = 0; i < so->num_targets; i++) {
639                         struct pipe_stream_output_target *target =
640                                 so->targets[i];
641                         struct v3d_resource *rsc = target ?
642                                 v3d_resource(target->buffer) : NULL;
643                         struct pipe_shader_state *ss = &tf_shader->base;
644                         struct pipe_stream_output_info *info = &ss->stream_output;
645                         uint32_t offset = target ?
646                                 v3d_stream_output_target(target)->offset * info->stride[i] * 4 : 0;
647 
648                         if (!target)
649                                 continue;
650 
651                         cl_emit(&job->bcl, TRANSFORM_FEEDBACK_BUFFER, output) {
652                                 output.buffer_address =
653                                         cl_address(rsc->bo,
654                                                    target->buffer_offset +
655                                                    offset);
656                                 output.buffer_size_in_32_bit_words =
657                                         (target->buffer_size - offset) >> 2;
658                                 output.buffer_number = i;
659                         }
660                         if (target) {
661                                 v3d_job_add_tf_write_resource(v3d->job,
662                                                               target->buffer);
663                         }
664                         /* XXX: buffer_size? */
665                 }
666         }
667 
668         if (v3d->dirty & V3D_DIRTY_OQ) {
669                 cl_emit(&job->bcl, OCCLUSION_QUERY_COUNTER, counter) {
670                         if (v3d->active_queries && v3d->current_oq) {
671                                 counter.address = cl_address(v3d->current_oq, 0);
672                         }
673                 }
674         }
675 
676         if (v3d->dirty & V3D_DIRTY_SAMPLE_STATE) {
677                 cl_emit(&job->bcl, SAMPLE_STATE, state) {
678                         /* Note: SampleCoverage was handled at the
679                          * frontend level by converting to sample_mask.
680                          */
681                         state.coverage = 1.0;
682                         state.mask = job->msaa ? v3d->sample_mask : 0xf;
683                 }
684         }
685 }
686