/*
 * Copyright © 2014-2017 Broadcom
 *
 * Permission is hereby granted, free of charge, to any person obtaining a
 * copy of this software and associated documentation files (the "Software"),
 * to deal in the Software without restriction, including without limitation
 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
 * and/or sell copies of the Software, and to permit persons to whom the
 * Software is furnished to do so, subject to the following conditions:
 *
 * The above copyright notice and this permission notice (including the next
 * paragraph) shall be included in all copies or substantial portions of the
 * Software.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
 * IN THE SOFTWARE.
 */

#include "util/format/u_format.h"
#include "util/half_float.h"
#include "v3d_context.h"
#include "broadcom/common/v3d_macros.h"
#include "broadcom/cle/v3dx_pack.h"
#include "broadcom/common/v3d_util.h"
#include "broadcom/compiler/v3d_compiler.h"

static uint8_t
v3d_factor(enum pipe_blendfactor factor, bool dst_alpha_one)
{
        /* We may get a bad blendfactor when blending is disabled. */
        if (factor == 0)
                return V3D_BLEND_FACTOR_ZERO;

        switch (factor) {
        case PIPE_BLENDFACTOR_ZERO:
                return V3D_BLEND_FACTOR_ZERO;
        case PIPE_BLENDFACTOR_ONE:
                return V3D_BLEND_FACTOR_ONE;
        case PIPE_BLENDFACTOR_SRC_COLOR:
                return V3D_BLEND_FACTOR_SRC_COLOR;
        case PIPE_BLENDFACTOR_INV_SRC_COLOR:
                return V3D_BLEND_FACTOR_INV_SRC_COLOR;
        case PIPE_BLENDFACTOR_DST_COLOR:
                return V3D_BLEND_FACTOR_DST_COLOR;
        case PIPE_BLENDFACTOR_INV_DST_COLOR:
                return V3D_BLEND_FACTOR_INV_DST_COLOR;
        case PIPE_BLENDFACTOR_SRC_ALPHA:
                return V3D_BLEND_FACTOR_SRC_ALPHA;
        case PIPE_BLENDFACTOR_INV_SRC_ALPHA:
                return V3D_BLEND_FACTOR_INV_SRC_ALPHA;
        case PIPE_BLENDFACTOR_DST_ALPHA:
                return (dst_alpha_one ?
                        V3D_BLEND_FACTOR_ONE :
                        V3D_BLEND_FACTOR_DST_ALPHA);
        case PIPE_BLENDFACTOR_INV_DST_ALPHA:
                return (dst_alpha_one ?
                        V3D_BLEND_FACTOR_ZERO :
                        V3D_BLEND_FACTOR_INV_DST_ALPHA);
        case PIPE_BLENDFACTOR_CONST_COLOR:
                return V3D_BLEND_FACTOR_CONST_COLOR;
        case PIPE_BLENDFACTOR_INV_CONST_COLOR:
                return V3D_BLEND_FACTOR_INV_CONST_COLOR;
        case PIPE_BLENDFACTOR_CONST_ALPHA:
                return V3D_BLEND_FACTOR_CONST_ALPHA;
        case PIPE_BLENDFACTOR_INV_CONST_ALPHA:
                return V3D_BLEND_FACTOR_INV_CONST_ALPHA;
        case PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE:
                return (dst_alpha_one ?
                        V3D_BLEND_FACTOR_ZERO :
                        V3D_BLEND_FACTOR_SRC_ALPHA_SATURATE);
        default:
                unreachable("Bad blend factor");
        }
}

static uint32_t
translate_colormask(struct v3d_context *v3d, uint32_t colormask, int rt)
{
        if (v3d->swap_color_rb & (1 << rt)) {
                colormask = ((colormask & (2 | 8)) |
                             ((colormask & 1) << 2) |
                             ((colormask & 4) >> 2));
        }

        return (~colormask) & 0xf;
}

static void
emit_rt_blend(struct v3d_context *v3d, struct v3d_job *job,
              struct pipe_blend_state *blend, int rt, uint8_t rt_mask,
              bool blend_dst_alpha_one)
{
        struct pipe_rt_blend_state *rtblend = &blend->rt[rt];

        /* We don't need to emit blend state for disabled RTs. */
        if (!rtblend->blend_enable)
                return;

        cl_emit(&job->bcl, BLEND_CFG, config) {
                config.render_target_mask = rt_mask;

                config.color_blend_mode = rtblend->rgb_func;
                config.color_blend_dst_factor =
                        v3d_factor(rtblend->rgb_dst_factor,
                                   blend_dst_alpha_one);
                config.color_blend_src_factor =
                        v3d_factor(rtblend->rgb_src_factor,
                                   blend_dst_alpha_one);

                config.alpha_blend_mode = rtblend->alpha_func;
                config.alpha_blend_dst_factor =
                        v3d_factor(rtblend->alpha_dst_factor,
                                   blend_dst_alpha_one);
                config.alpha_blend_src_factor =
                        v3d_factor(rtblend->alpha_src_factor,
                                   blend_dst_alpha_one);
        }
}

static void
emit_flat_shade_flags(struct v3d_job *job,
                      int varying_offset,
                      uint32_t varyings,
                      enum V3DX(Varying_Flags_Action) lower,
                      enum V3DX(Varying_Flags_Action) higher)
{
        cl_emit(&job->bcl, FLAT_SHADE_FLAGS, flags) {
                flags.varying_offset_v0 = varying_offset;
                flags.flat_shade_flags_for_varyings_v024 = varyings;
                flags.action_for_flat_shade_flags_of_lower_numbered_varyings =
                        lower;
                flags.action_for_flat_shade_flags_of_higher_numbered_varyings =
                        higher;
        }
}

static void
emit_noperspective_flags(struct v3d_job *job,
                         int varying_offset,
                         uint32_t varyings,
                         enum V3DX(Varying_Flags_Action) lower,
                         enum V3DX(Varying_Flags_Action) higher)
{
        cl_emit(&job->bcl, NON_PERSPECTIVE_FLAGS, flags) {
                flags.varying_offset_v0 = varying_offset;
                flags.non_perspective_flags_for_varyings_v024 = varyings;
                flags.action_for_non_perspective_flags_of_lower_numbered_varyings =
                        lower;
                flags.action_for_non_perspective_flags_of_higher_numbered_varyings =
                        higher;
        }
}

static void
emit_centroid_flags(struct v3d_job *job,
                    int varying_offset,
                    uint32_t varyings,
                    enum V3DX(Varying_Flags_Action) lower,
                    enum V3DX(Varying_Flags_Action) higher)
{
        cl_emit(&job->bcl, CENTROID_FLAGS, flags) {
                flags.varying_offset_v0 = varying_offset;
                flags.centroid_flags_for_varyings_v024 = varyings;
                flags.action_for_centroid_flags_of_lower_numbered_varyings =
                        lower;
                flags.action_for_centroid_flags_of_higher_numbered_varyings =
                        higher;
        }
}

static bool
emit_varying_flags(struct v3d_job *job, uint32_t *flags,
                   void (*flag_emit_callback)(struct v3d_job *job,
                                              int varying_offset,
                                              uint32_t flags,
                                              enum V3DX(Varying_Flags_Action) lower,
                                              enum V3DX(Varying_Flags_Action) higher))
{
        struct v3d_context *v3d = job->v3d;
        bool emitted_any = false;

        for (int i = 0; i < ARRAY_SIZE(v3d->prog.fs->prog_data.fs->flat_shade_flags); i++) {
                if (!flags[i])
                        continue;

                if (emitted_any) {
                        flag_emit_callback(job, i, flags[i],
                                           V3D_VARYING_FLAGS_ACTION_UNCHANGED,
                                           V3D_VARYING_FLAGS_ACTION_UNCHANGED);
                } else if (i == 0) {
                        flag_emit_callback(job, i, flags[i],
                                           V3D_VARYING_FLAGS_ACTION_UNCHANGED,
                                           V3D_VARYING_FLAGS_ACTION_ZEROED);
                } else {
                        flag_emit_callback(job, i, flags[i],
                                           V3D_VARYING_FLAGS_ACTION_ZEROED,
                                           V3D_VARYING_FLAGS_ACTION_ZEROED);
                }
                emitted_any = true;
        }

        return emitted_any;
}

static inline struct v3d_uncompiled_shader *
get_tf_shader(struct v3d_context *v3d)
{
        if (v3d->prog.bind_gs)
                return v3d->prog.bind_gs;
        else
                return v3d->prog.bind_vs;
}

void
v3dX(emit_state)(struct pipe_context *pctx)
{
        struct v3d_context *v3d = v3d_context(pctx);
        struct v3d_job *job = v3d->job;
        bool rasterizer_discard = v3d->rasterizer->base.rasterizer_discard;

        if (v3d->dirty & (V3D_DIRTY_SCISSOR | V3D_DIRTY_VIEWPORT |
                          V3D_DIRTY_RASTERIZER)) {
                float *vpscale = v3d->viewport.scale;
                float *vptranslate = v3d->viewport.translate;
                float vp_minx = -fabsf(vpscale[0]) + vptranslate[0];
                float vp_maxx = fabsf(vpscale[0]) + vptranslate[0];
                float vp_miny = -fabsf(vpscale[1]) + vptranslate[1];
                float vp_maxy = fabsf(vpscale[1]) + vptranslate[1];

                /* Clip to the scissor if it's enabled, but still clip to the
                 * drawable regardless since that controls where the binner
                 * tries to put things.
                 *
                 * Additionally, always clip the rendering to the viewport,
                 * since the hardware does guardband clipping, meaning
                 * primitives would rasterize outside of the view volume.
                 */
                uint32_t minx, miny, maxx, maxy;
                if (!v3d->rasterizer->base.scissor) {
                        minx = MAX2(vp_minx, 0);
                        miny = MAX2(vp_miny, 0);
                        maxx = MIN2(vp_maxx, job->draw_width);
                        maxy = MIN2(vp_maxy, job->draw_height);
                } else {
                        minx = MAX2(vp_minx, v3d->scissor.minx);
                        miny = MAX2(vp_miny, v3d->scissor.miny);
                        maxx = MIN2(vp_maxx, v3d->scissor.maxx);
                        maxy = MIN2(vp_maxy, v3d->scissor.maxy);
                }

                cl_emit(&job->bcl, CLIP_WINDOW, clip) {
                        clip.clip_window_left_pixel_coordinate = minx;
                        clip.clip_window_bottom_pixel_coordinate = miny;
                        if (maxx > minx && maxy > miny) {
                                clip.clip_window_width_in_pixels = maxx - minx;
                                clip.clip_window_height_in_pixels = maxy - miny;
                        }
                }

                job->draw_min_x = MIN2(job->draw_min_x, minx);
                job->draw_min_y = MIN2(job->draw_min_y, miny);
                job->draw_max_x = MAX2(job->draw_max_x, maxx);
                job->draw_max_y = MAX2(job->draw_max_y, maxy);

                if (!v3d->rasterizer->base.scissor) {
                    job->scissor.disabled = true;
                } else if (!job->scissor.disabled &&
                           (v3d->dirty & V3D_DIRTY_SCISSOR)) {
                        if (job->scissor.count < MAX_JOB_SCISSORS) {
                                job->scissor.rects[job->scissor.count].min_x =
                                        v3d->scissor.minx;
                                job->scissor.rects[job->scissor.count].min_y =
                                        v3d->scissor.miny;
                                job->scissor.rects[job->scissor.count].max_x =
                                        v3d->scissor.maxx - 1;
                                job->scissor.rects[job->scissor.count].max_y =
                                        v3d->scissor.maxy - 1;
                                job->scissor.count++;
                        } else {
                                job->scissor.disabled = true;
                                perf_debug("Too many scissor rects.");
                        }
                }
        }

        if (v3d->dirty & (V3D_DIRTY_RASTERIZER |
                          V3D_DIRTY_ZSA |
                          V3D_DIRTY_BLEND |
                          V3D_DIRTY_COMPILED_FS)) {
                cl_emit(&job->bcl, CFG_BITS, config) {
                        config.enable_forward_facing_primitive =
                                !rasterizer_discard &&
                                !(v3d->rasterizer->base.cull_face &
                                  PIPE_FACE_FRONT);
                        config.enable_reverse_facing_primitive =
                                !rasterizer_discard &&
                                !(v3d->rasterizer->base.cull_face &
                                  PIPE_FACE_BACK);
                        /* This seems backwards, but it's what gets the
                         * clipflat test to pass.
                         */
                        config.clockwise_primitives =
                                v3d->rasterizer->base.front_ccw;

                        config.enable_depth_offset =
                                v3d->rasterizer->base.offset_tri;

                        /* V3D follows GL behavior where the sample mask only
                         * applies when MSAA is enabled.  Gallium has sample
                         * mask apply anyway, and the MSAA blit shaders will
                         * set sample mask without explicitly setting
                         * rasterizer oversample.  Just force it on here,
                         * since the blit shaders are the only way to have
                         * !multisample && samplemask != 0xf.
                         */
                        config.rasterizer_oversample_mode =
                                v3d->rasterizer->base.multisample ||
                                v3d->sample_mask != 0xf;

                        config.direct3d_provoking_vertex =
                                v3d->rasterizer->base.flatshade_first;

                        config.blend_enable = v3d->blend->blend_enables;

                        /* Note: EZ state may update based on the compiled FS,
                         * along with ZSA
                         */
#if V3D_VERSION == 42
                        config.early_z_updates_enable =
                                (job->ez_state != V3D_EZ_DISABLED);
#endif
                        if (v3d->zsa->base.depth_enabled) {
                                config.z_updates_enable =
                                        v3d->zsa->base.depth_writemask;
#if V3D_VERSION == 42
                                config.early_z_enable =
                                        config.early_z_updates_enable;
#endif
                                config.depth_test_function =
                                        v3d->zsa->base.depth_func;
                        } else {
                                config.depth_test_function = PIPE_FUNC_ALWAYS;
                        }

                        config.stencil_enable =
                                v3d->zsa->base.stencil[0].enabled;

                        /* Use nicer line caps when line smoothing is
                         * enabled
                         */
                        config.line_rasterization =
                                v3d_line_smoothing_enabled(v3d) ?
                                V3D_LINE_RASTERIZATION_PERP_END_CAPS :
                                V3D_LINE_RASTERIZATION_DIAMOND_EXIT;

                        if (config.enable_forward_facing_primitive &&
                            config.enable_reverse_facing_primitive &&
                            v3d->rasterizer->base.fill_front != v3d->rasterizer->base.fill_back) {
                                mesa_logw_once("Setting a different polygon mode for "
                                               "front and back faces is not supported");
                        }

                        if (config.enable_forward_facing_primitive) {
                                if (v3d->rasterizer->base.fill_front != PIPE_POLYGON_MODE_FILL) {
                                        config.direct3d_wireframe_triangles_mode = true;
                                        config.direct3d_point_fill_mode =
                                                v3d->rasterizer->base.fill_front == PIPE_POLYGON_MODE_POINT;
                                }
                        } else {
                                if (v3d->rasterizer->base.fill_back != PIPE_POLYGON_MODE_FILL) {
                                        config.direct3d_wireframe_triangles_mode = true;
                                        config.direct3d_point_fill_mode =
                                                v3d->rasterizer->base.fill_back == PIPE_POLYGON_MODE_POINT;
                                }
                        }

#if V3D_VERSION >= 71
                        config.z_clipping_mode = v3d->rasterizer->base.depth_clip_near ||
                           v3d->rasterizer->base.depth_clip_far ?
                           V3D_Z_CLIP_MODE_MIN_ONE_TO_ONE : V3D_Z_CLIP_MODE_NONE;

                        config.z_clamp_mode = v3d->rasterizer->base.depth_clamp;
#endif
                }
        }

        if (v3d->dirty & V3D_DIRTY_RASTERIZER &&
            v3d->rasterizer->base.offset_tri) {
                if (v3d->screen->devinfo.ver == 42 &&
                    job->zsbuf &&
                    job->zsbuf->format == PIPE_FORMAT_Z16_UNORM) {
                        cl_emit_prepacked_sized(&job->bcl,
                                                v3d->rasterizer->depth_offset_z16,
                                                cl_packet_length(DEPTH_OFFSET));
                } else {
                        cl_emit_prepacked_sized(&job->bcl,
                                                v3d->rasterizer->depth_offset,
                                                cl_packet_length(DEPTH_OFFSET));
                }
        }

        if (v3d->dirty & V3D_DIRTY_RASTERIZER) {
                cl_emit(&job->bcl, POINT_SIZE, point_size) {
                        point_size.point_size = v3d->rasterizer->point_size;
                }

                cl_emit(&job->bcl, LINE_WIDTH, line_width) {
                        line_width.line_width = v3d_get_real_line_width(v3d);
                }
        }

        if (v3d->dirty & V3D_DIRTY_VIEWPORT) {
#if V3D_VERSION == 42
                cl_emit(&job->bcl, CLIPPER_XY_SCALING, clip) {
                        clip.viewport_half_width_in_1_256th_of_pixel =
                                v3d->viewport.scale[0] * 256.0f;
                        clip.viewport_half_height_in_1_256th_of_pixel =
                                v3d->viewport.scale[1] * 256.0f;
                }
#endif
#if V3D_VERSION >= 71
                cl_emit(&job->bcl, CLIPPER_XY_SCALING, clip) {
                        clip.viewport_half_width_in_1_64th_of_pixel =
                                v3d->viewport.scale[0] * 64.0f;
                        clip.viewport_half_height_in_1_64th_of_pixel =
                                v3d->viewport.scale[1] * 64.0f;
                }
#endif


                cl_emit(&job->bcl, CLIPPER_Z_SCALE_AND_OFFSET, clip) {
                        clip.viewport_z_offset_zc_to_zs =
                                v3d->viewport.translate[2];
                        clip.viewport_z_scale_zc_to_zs =
                                v3d->viewport.scale[2];
                }
                cl_emit(&job->bcl, CLIPPER_Z_MIN_MAX_CLIPPING_PLANES, clip) {
                        float z1 = (v3d->viewport.translate[2] -
                                    v3d->viewport.scale[2]);
                        float z2 = (v3d->viewport.translate[2] +
                                    v3d->viewport.scale[2]);
                        clip.minimum_zw = MIN2(z1, z2);
                        clip.maximum_zw = MAX2(z1, z2);
                }

                cl_emit(&job->bcl, VIEWPORT_OFFSET, vp) {
                        float vp_fine_x = v3d->viewport.translate[0];
                        float vp_fine_y = v3d->viewport.translate[1];
                        int32_t vp_coarse_x = 0;
                        int32_t vp_coarse_y = 0;

                        /* The fine coordinates must be unsigned, but coarse
                         * can be signed.
                         */
                        if (unlikely(vp_fine_x < 0)) {
                                int32_t blocks_64 =
                                        DIV_ROUND_UP(fabsf(vp_fine_x), 64);
                                vp_fine_x += 64.0f * blocks_64;
                                vp_coarse_x -= blocks_64;
                        }

                        if (unlikely(vp_fine_y < 0)) {
                                int32_t blocks_64 =
                                        DIV_ROUND_UP(fabsf(vp_fine_y), 64);
                                vp_fine_y += 64.0f * blocks_64;
                                vp_coarse_y -= blocks_64;
                        }

                        vp.fine_x = vp_fine_x;
                        vp.fine_y = vp_fine_y;
                        vp.coarse_x = vp_coarse_x;
                        vp.coarse_y = vp_coarse_y;
                }
        }

        if (v3d->dirty & V3D_DIRTY_BLEND) {
                struct v3d_blend_state *blend = v3d->blend;

                if (blend->blend_enables) {
                        cl_emit(&job->bcl, BLEND_ENABLES, enables) {
                                enables.mask = blend->blend_enables;
                        }

                        const uint32_t max_rts =
                                V3D_MAX_RENDER_TARGETS(v3d->screen->devinfo.ver);
                        if (blend->base.independent_blend_enable) {
                                for (int i = 0; i < max_rts; i++)
                                        emit_rt_blend(v3d, job, &blend->base, i,
                                                      (1 << i),
                                                      v3d->blend_dst_alpha_one & (1 << i));
                        } else if (v3d->blend_dst_alpha_one &&
                                   util_bitcount(v3d->blend_dst_alpha_one) < job->nr_cbufs) {
                                /* Even if we don't have independent per-RT
                                 * blending, we may have a combination of RT
                                 * formats were some RTs have an alpha channel
                                 * and others don't. Since this affects how
                                 * blending is performed, we also need to emit
                                 * independent blend configurations in this
                                 * case: one for RTs with alpha and one for
                                 * RTs without.
                                 */
                                emit_rt_blend(v3d, job, &blend->base, 0,
                                              ((1 << max_rts) - 1) &
                                                   v3d->blend_dst_alpha_one,
                                              true);
                                emit_rt_blend(v3d, job, &blend->base, 0,
                                              ((1 << max_rts) - 1) &
                                                   ~v3d->blend_dst_alpha_one,
                                              false);
                        } else {
                                emit_rt_blend(v3d, job, &blend->base, 0,
                                              (1 << max_rts) - 1,
                                              v3d->blend_dst_alpha_one);
                        }
                }
        }

        if (v3d->dirty & V3D_DIRTY_BLEND) {
                struct pipe_blend_state *blend = &v3d->blend->base;

                const uint32_t max_rts =
                        V3D_MAX_RENDER_TARGETS(v3d->screen->devinfo.ver);
                cl_emit(&job->bcl, COLOR_WRITE_MASKS, mask) {
                        for (int i = 0; i < max_rts; i++) {
                                int rt = blend->independent_blend_enable ? i : 0;
                                int rt_mask = blend->rt[rt].colormask;

                                mask.mask |= translate_colormask(v3d, rt_mask,
                                                                 i) << (4 * i);
                        }
                }
        }

        /* GFXH-1431: On V3D 3.x, writing BLEND_CONFIG resets the constant
         * color.
         */
        if (v3d->dirty & V3D_DIRTY_BLEND_COLOR) {
                cl_emit(&job->bcl, BLEND_CONSTANT_COLOR, color) {
                        color.red_f16 = (v3d->swap_color_rb ?
                                          v3d->blend_color.hf[2] :
                                          v3d->blend_color.hf[0]);
                        color.green_f16 = v3d->blend_color.hf[1];
                        color.blue_f16 = (v3d->swap_color_rb ?
                                           v3d->blend_color.hf[0] :
                                           v3d->blend_color.hf[2]);
                        color.alpha_f16 = v3d->blend_color.hf[3];
                }
        }

        if (v3d->dirty & (V3D_DIRTY_ZSA | V3D_DIRTY_STENCIL_REF)) {
                struct pipe_stencil_state *front = &v3d->zsa->base.stencil[0];
                struct pipe_stencil_state *back = &v3d->zsa->base.stencil[1];

                if (front->enabled) {
                        cl_emit_with_prepacked(&job->bcl, STENCIL_CFG,
                                               v3d->zsa->stencil_front, config) {
                                config.stencil_ref_value =
                                        v3d->stencil_ref.ref_value[0];
                        }
                }

                if (back->enabled) {
                        cl_emit_with_prepacked(&job->bcl, STENCIL_CFG,
                                               v3d->zsa->stencil_back, config) {
                                config.stencil_ref_value =
                                        v3d->stencil_ref.ref_value[1];
                        }
                }
        }

        if (v3d->dirty & V3D_DIRTY_FLAT_SHADE_FLAGS) {
                if (!emit_varying_flags(job,
                                        v3d->prog.fs->prog_data.fs->flat_shade_flags,
                                        emit_flat_shade_flags)) {
                        cl_emit(&job->bcl, ZERO_ALL_FLAT_SHADE_FLAGS, flags);
                }
        }

        if (v3d->dirty & V3D_DIRTY_NOPERSPECTIVE_FLAGS) {
                if (!emit_varying_flags(job,
                                        v3d->prog.fs->prog_data.fs->noperspective_flags,
                                        emit_noperspective_flags)) {
                        cl_emit(&job->bcl, ZERO_ALL_NON_PERSPECTIVE_FLAGS, flags);
                }
        }

        if (v3d->dirty & V3D_DIRTY_CENTROID_FLAGS) {
                if (!emit_varying_flags(job,
                                        v3d->prog.fs->prog_data.fs->centroid_flags,
                                        emit_centroid_flags)) {
                        cl_emit(&job->bcl, ZERO_ALL_CENTROID_FLAGS, flags);
                }
        }

        /* Set up the transform feedback data specs (which VPM entries to
         * output to which buffers).
         */
        if (v3d->dirty & (V3D_DIRTY_STREAMOUT |
                          V3D_DIRTY_RASTERIZER |
                          V3D_DIRTY_PRIM_MODE)) {
                struct v3d_streamout_stateobj *so = &v3d->streamout;
                if (so->num_targets) {
                        bool psiz_per_vertex = (v3d->prim_mode == MESA_PRIM_POINTS &&
                                                v3d->rasterizer->base.point_size_per_vertex);
                        struct v3d_uncompiled_shader *tf_shader =
                                get_tf_shader(v3d);
                        uint16_t *tf_specs = (psiz_per_vertex ?
                                              tf_shader->tf_specs_psiz :
                                              tf_shader->tf_specs);

                        bool tf_enabled = v3d_transform_feedback_enabled(v3d);
                        job->tf_enabled |= tf_enabled;

                        cl_emit(&job->bcl, TRANSFORM_FEEDBACK_SPECS, tfe) {
                                tfe.number_of_16_bit_output_data_specs_following =
                                        tf_shader->num_tf_specs;
                                tfe.enable = tf_enabled;
                        };
                        for (int i = 0; i < tf_shader->num_tf_specs; i++) {
                                cl_emit_prepacked(&job->bcl, &tf_specs[i]);
                        }
                } else {
                        cl_emit(&job->bcl, TRANSFORM_FEEDBACK_SPECS, tfe) {
                                tfe.enable = false;
                        };
                }
        }

        /* Set up the transform feedback buffers. */
        if (v3d->dirty & V3D_DIRTY_STREAMOUT) {
                struct v3d_uncompiled_shader *tf_shader = get_tf_shader(v3d);
                struct v3d_streamout_stateobj *so = &v3d->streamout;
                for (int i = 0; i < so->num_targets; i++) {
                        struct pipe_stream_output_target *target =
                                so->targets[i];
                        struct v3d_resource *rsc = target ?
                                v3d_resource(target->buffer) : NULL;
                        struct pipe_shader_state *ss = &tf_shader->base;
                        struct pipe_stream_output_info *info = &ss->stream_output;
                        uint32_t offset = target ?
                                v3d_stream_output_target(target)->offset * info->stride[i] * 4 : 0;

                        if (!target)
                                continue;

                        cl_emit(&job->bcl, TRANSFORM_FEEDBACK_BUFFER, output) {
                                output.buffer_address =
                                        cl_address(rsc->bo,
                                                   target->buffer_offset +
                                                   offset);
                                output.buffer_size_in_32_bit_words =
                                        (target->buffer_size - offset) >> 2;
                                output.buffer_number = i;
                        }
                        if (target) {
                                v3d_job_add_tf_write_resource(v3d->job,
                                                              target->buffer);
                        }
                        /* XXX: buffer_size? */
                }
        }

        if (v3d->dirty & V3D_DIRTY_OQ) {
                cl_emit(&job->bcl, OCCLUSION_QUERY_COUNTER, counter) {
                        if (v3d->active_queries && v3d->current_oq) {
                                counter.address = cl_address(v3d->current_oq, 0);
                        }
                }
        }

        if (v3d->dirty & V3D_DIRTY_SAMPLE_STATE) {
                cl_emit(&job->bcl, SAMPLE_STATE, state) {
                        /* Note: SampleCoverage was handled at the
                         * frontend level by converting to sample_mask.
                         */
                        state.coverage = 1.0;
                        state.mask = job->msaa ? v3d->sample_mask : 0xf;
                }
        }
}