#include "zink_batch.h" #include "zink_compiler.h" #include "zink_context.h" #include "zink_descriptors.h" #include "zink_program.h" #include "zink_program_state.hpp" #include "zink_query.h" #include "zink_resource.h" #include "zink_screen.h" #include "zink_state.h" #include "zink_surface.h" #include "zink_inlines.h" #include "util/hash_table.h" #include "util/u_cpu_detect.h" #include "util/u_debug.h" #include "util/u_helpers.h" #include "util/u_inlines.h" #include "util/u_prim.h" #include "util/u_prim_restart.h" static void zink_emit_xfb_counter_barrier(struct zink_context *ctx) { for (unsigned i = 0; i < ctx->num_so_targets; i++) { struct zink_so_target *t = zink_so_target(ctx->so_targets[i]); if (!t) continue; struct zink_resource *res = zink_resource(t->counter_buffer); VkAccessFlags access = VK_ACCESS_TRANSFORM_FEEDBACK_COUNTER_WRITE_BIT_EXT; VkPipelineStageFlags stage = VK_PIPELINE_STAGE_TRANSFORM_FEEDBACK_BIT_EXT; if (t->counter_buffer_valid) { /* Between the pause and resume there needs to be a memory barrier for the counter buffers * with a source access of VK_ACCESS_TRANSFORM_FEEDBACK_COUNTER_WRITE_BIT_EXT * at pipeline stage VK_PIPELINE_STAGE_TRANSFORM_FEEDBACK_BIT_EXT * to a destination access of VK_ACCESS_TRANSFORM_FEEDBACK_COUNTER_READ_BIT_EXT * at pipeline stage VK_PIPELINE_STAGE_DRAW_INDIRECT_BIT. * * - from VK_EXT_transform_feedback spec */ access |= VK_ACCESS_TRANSFORM_FEEDBACK_COUNTER_READ_BIT_EXT; stage |= VK_PIPELINE_STAGE_DRAW_INDIRECT_BIT; } zink_screen(ctx->base.screen)->buffer_barrier(ctx, res, access, stage); if (!ctx->unordered_blitting) res->obj->unordered_read = false; } } static void zink_emit_stream_output_targets(struct pipe_context *pctx) { struct zink_context *ctx = zink_context(pctx); VkBuffer buffers[PIPE_MAX_SO_BUFFERS] = {0}; VkDeviceSize buffer_offsets[PIPE_MAX_SO_BUFFERS] = {0}; VkDeviceSize buffer_sizes[PIPE_MAX_SO_BUFFERS] = {0}; for (unsigned i = 0; i < ctx->num_so_targets; i++) { struct zink_so_target *t = (struct zink_so_target *)ctx->so_targets[i]; if (!t) { /* no need to reference this or anything */ buffers[i] = zink_resource(ctx->dummy_xfb_buffer)->obj->buffer; buffer_offsets[i] = 0; buffer_sizes[i] = sizeof(uint8_t); continue; } struct zink_resource *res = zink_resource(t->base.buffer); if (!res->so_valid) /* resource has been rebound */ t->counter_buffer_valid = false; buffers[i] = res->obj->buffer; zink_batch_reference_resource_rw(ctx, res, true); buffer_offsets[i] = t->base.buffer_offset; buffer_sizes[i] = t->base.buffer_size; res->so_valid = true; if (!ctx->unordered_blitting) { res->obj->unordered_read = res->obj->unordered_write = false; res->obj->access = VK_ACCESS_TRANSFORM_FEEDBACK_COUNTER_WRITE_BIT_EXT; res->obj->access_stage = VK_PIPELINE_STAGE_TRANSFORM_FEEDBACK_BIT_EXT; } util_range_add(t->base.buffer, &res->valid_buffer_range, t->base.buffer_offset, t->base.buffer_offset + t->base.buffer_size); } VKCTX(CmdBindTransformFeedbackBuffersEXT)(ctx->bs->cmdbuf, 0, ctx->num_so_targets, buffers, buffer_offsets, buffer_sizes); ctx->dirty_so_targets = false; } ALWAYS_INLINE static void check_buffer_barrier(struct zink_context *ctx, struct pipe_resource *pres, VkAccessFlags flags, VkPipelineStageFlags pipeline) { struct zink_resource *res = zink_resource(pres); zink_screen(ctx->base.screen)->buffer_barrier(ctx, res, flags, pipeline); if (!ctx->unordered_blitting) res->obj->unordered_read = false; } ALWAYS_INLINE static void barrier_draw_buffers(struct zink_context *ctx, const struct pipe_draw_info *dinfo, const struct pipe_draw_indirect_info *dindirect, struct pipe_resource *index_buffer) { if (index_buffer) check_buffer_barrier(ctx, index_buffer, VK_ACCESS_INDEX_READ_BIT, VK_PIPELINE_STAGE_VERTEX_INPUT_BIT); if (dindirect && dindirect->buffer) { check_buffer_barrier(ctx, dindirect->buffer, VK_ACCESS_INDIRECT_COMMAND_READ_BIT, VK_PIPELINE_STAGE_DRAW_INDIRECT_BIT); if (dindirect->indirect_draw_count) check_buffer_barrier(ctx, dindirect->indirect_draw_count, VK_ACCESS_INDIRECT_COMMAND_READ_BIT, VK_PIPELINE_STAGE_DRAW_INDIRECT_BIT); } } template static void zink_bind_vertex_buffers(struct zink_context *ctx) { VkBuffer buffers[PIPE_MAX_ATTRIBS]; VkDeviceSize buffer_offsets[PIPE_MAX_ATTRIBS]; struct zink_vertex_elements_state *elems = ctx->element_state; struct zink_screen *screen = zink_screen(ctx->base.screen); for (unsigned i = 0; i < elems->hw_state.num_bindings; i++) { struct pipe_vertex_buffer *vb = ctx->vertex_buffers + elems->hw_state.binding_map[i]; assert(vb); if (vb->buffer.resource) { struct zink_resource *res = zink_resource(vb->buffer.resource); assert(res->obj->buffer); buffers[i] = res->obj->buffer; buffer_offsets[i] = vb->buffer_offset; } else { buffers[i] = zink_resource(ctx->dummy_vertex_buffer)->obj->buffer; buffer_offsets[i] = 0; } } if (DYNAMIC_STATE != ZINK_NO_DYNAMIC_STATE && DYNAMIC_STATE != ZINK_DYNAMIC_VERTEX_INPUT2 && DYNAMIC_STATE != ZINK_DYNAMIC_VERTEX_INPUT) { if (elems->hw_state.num_bindings) VKCTX(CmdBindVertexBuffers2)(ctx->bs->cmdbuf, 0, elems->hw_state.num_bindings, buffers, buffer_offsets, NULL, elems->hw_state.b.strides); } else if (elems->hw_state.num_bindings) VKSCR(CmdBindVertexBuffers)(ctx->bs->cmdbuf, 0, elems->hw_state.num_bindings, buffers, buffer_offsets); if (DYNAMIC_STATE == ZINK_DYNAMIC_VERTEX_INPUT2 || DYNAMIC_STATE == ZINK_DYNAMIC_VERTEX_INPUT) VKCTX(CmdSetVertexInputEXT)(ctx->bs->cmdbuf, elems->hw_state.num_bindings, elems->hw_state.dynbindings, elems->hw_state.num_attribs, elems->hw_state.dynattribs); ctx->vertex_buffers_dirty = false; } ALWAYS_INLINE static void update_drawid(struct zink_context *ctx, unsigned draw_id) { VKCTX(CmdPushConstants)(ctx->bs->cmdbuf, ctx->curr_program->base.layout, VK_SHADER_STAGE_ALL_GRAPHICS, offsetof(struct zink_gfx_push_constant, draw_id), sizeof(unsigned), &draw_id); } ALWAYS_INLINE static void draw_indexed_need_index_buffer_unref(struct zink_context *ctx, const struct pipe_draw_info *dinfo, const struct pipe_draw_start_count_bias *draws, unsigned num_draws, unsigned draw_id, bool needs_drawid) { VkCommandBuffer cmdbuf = ctx->bs->cmdbuf; if (dinfo->increment_draw_id && needs_drawid) { for (unsigned i = 0; i < num_draws; i++) { update_drawid(ctx, draw_id); VKCTX(CmdDrawIndexed)(cmdbuf, draws[i].count, dinfo->instance_count, 0, draws[i].index_bias, dinfo->start_instance); draw_id++; } } else { if (needs_drawid) update_drawid(ctx, draw_id); for (unsigned i = 0; i < num_draws; i++) VKCTX(CmdDrawIndexed)(cmdbuf, draws[i].count, dinfo->instance_count, 0, draws[i].index_bias, dinfo->start_instance); } } template ALWAYS_INLINE static void draw_indexed(struct zink_context *ctx, const struct pipe_draw_info *dinfo, const struct pipe_draw_start_count_bias *draws, unsigned num_draws, unsigned draw_id, bool needs_drawid) { VkCommandBuffer cmdbuf = ctx->bs->cmdbuf; if (dinfo->increment_draw_id && needs_drawid) { for (unsigned i = 0; i < num_draws; i++) { update_drawid(ctx, draw_id); VKCTX(CmdDrawIndexed)(cmdbuf, draws[i].count, dinfo->instance_count, draws[i].start, draws[i].index_bias, dinfo->start_instance); draw_id++; } } else { if (needs_drawid) update_drawid(ctx, draw_id); if (HAS_MULTIDRAW) { VKCTX(CmdDrawMultiIndexedEXT)(cmdbuf, num_draws, (const VkMultiDrawIndexedInfoEXT*)draws, dinfo->instance_count, dinfo->start_instance, sizeof(struct pipe_draw_start_count_bias), dinfo->index_bias_varies ? NULL : &draws[0].index_bias); } else { for (unsigned i = 0; i < num_draws; i++) VKCTX(CmdDrawIndexed)(cmdbuf, draws[i].count, dinfo->instance_count, draws[i].start, draws[i].index_bias, dinfo->start_instance); } } } template ALWAYS_INLINE static void draw(struct zink_context *ctx, const struct pipe_draw_info *dinfo, const struct pipe_draw_start_count_bias *draws, unsigned num_draws, unsigned draw_id, bool needs_drawid) { VkCommandBuffer cmdbuf = ctx->bs->cmdbuf; if (dinfo->increment_draw_id && needs_drawid) { for (unsigned i = 0; i < num_draws; i++) { update_drawid(ctx, draw_id); VKCTX(CmdDraw)(cmdbuf, draws[i].count, dinfo->instance_count, draws[i].start, dinfo->start_instance); draw_id++; } } else { if (needs_drawid) update_drawid(ctx, draw_id); if (HAS_MULTIDRAW) VKCTX(CmdDrawMultiEXT)(cmdbuf, num_draws, (const VkMultiDrawInfoEXT*)draws, dinfo->instance_count, dinfo->start_instance, sizeof(struct pipe_draw_start_count_bias)); else { for (unsigned i = 0; i < num_draws; i++) VKCTX(CmdDraw)(cmdbuf, draws[i].count, dinfo->instance_count, draws[i].start, dinfo->start_instance); } } } template static bool update_gfx_pipeline(struct zink_context *ctx, struct zink_batch_state *bs, enum mesa_prim mode) { VkPipeline prev_pipeline = ctx->gfx_pipeline_state.pipeline; const struct zink_screen *screen = zink_screen(ctx->base.screen); bool shaders_changed = ctx->gfx_dirty || ctx->dirty_gfx_stages; if (screen->optimal_keys && !ctx->is_generated_gs_bound) zink_gfx_program_update_optimal(ctx); else zink_gfx_program_update(ctx); bool pipeline_changed = false; VkPipeline pipeline = VK_NULL_HANDLE; if (!ctx->curr_program->base.uses_shobj) { if (screen->info.have_EXT_graphics_pipeline_library) pipeline = zink_get_gfx_pipeline(ctx, ctx->curr_program, &ctx->gfx_pipeline_state, mode); else pipeline = zink_get_gfx_pipeline(ctx, ctx->curr_program, &ctx->gfx_pipeline_state, mode); } if (pipeline) { pipeline_changed = prev_pipeline != pipeline; if (BATCH_CHANGED || pipeline_changed || ctx->shobj_draw) VKCTX(CmdBindPipeline)(bs->cmdbuf, VK_PIPELINE_BIND_POINT_GRAPHICS, pipeline); ctx->shobj_draw = false; } else { if (BATCH_CHANGED || shaders_changed || !ctx->shobj_draw) { VkShaderStageFlagBits stages[] = { VK_SHADER_STAGE_VERTEX_BIT, VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT, VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT, VK_SHADER_STAGE_GEOMETRY_BIT, VK_SHADER_STAGE_FRAGMENT_BIT, }; /* always rebind all stages */ VKCTX(CmdBindShadersEXT)(bs->cmdbuf, ZINK_GFX_SHADER_COUNT, stages, ctx->curr_program->objects); VKCTX(CmdSetDepthBiasEnable)(bs->cmdbuf, VK_TRUE); VKCTX(CmdSetTessellationDomainOriginEXT)(bs->cmdbuf, VK_TESSELLATION_DOMAIN_ORIGIN_LOWER_LEFT); VKCTX(CmdSetSampleLocationsEnableEXT)(bs->cmdbuf, ctx->gfx_pipeline_state.sample_locations_enabled); VKCTX(CmdSetRasterizationStreamEXT)(bs->cmdbuf, 0); } ctx->shobj_draw = true; } return pipeline_changed; } static enum mesa_prim zink_prim_type(const struct zink_context *ctx, const struct pipe_draw_info *dinfo) { if (ctx->gfx_pipeline_state.shader_rast_prim != MESA_PRIM_COUNT) return ctx->gfx_pipeline_state.shader_rast_prim; return u_reduced_prim((enum mesa_prim)dinfo->mode); } static enum mesa_prim zink_rast_prim(const struct zink_context *ctx, const struct pipe_draw_info *dinfo) { enum mesa_prim prim_type = zink_prim_type(ctx, dinfo); assert(prim_type != MESA_PRIM_COUNT); if (prim_type == MESA_PRIM_TRIANGLES && ctx->rast_state->base.fill_front != PIPE_POLYGON_MODE_FILL) { switch(ctx->rast_state->base.fill_front) { case PIPE_POLYGON_MODE_POINT: return MESA_PRIM_POINTS; case PIPE_POLYGON_MODE_LINE: return MESA_PRIM_LINES; default: unreachable("unexpected polygon mode"); } } return prim_type; } template void zink_draw(struct pipe_context *pctx, const struct pipe_draw_info *dinfo, unsigned drawid_offset, const struct pipe_draw_indirect_info *dindirect, const struct pipe_draw_start_count_bias *draws, unsigned num_draws, struct pipe_vertex_state *vstate, uint32_t partial_velem_mask) { if (!dindirect && (!draws[0].count || !dinfo->instance_count)) return; struct zink_context *ctx = zink_context(pctx); struct zink_screen *screen = zink_screen(pctx->screen); struct zink_rasterizer_state *rast_state = ctx->rast_state; struct zink_depth_stencil_alpha_state *dsa_state = ctx->dsa_state; struct zink_batch_state *bs = ctx->bs; struct zink_so_target *so_target = dindirect && dindirect->count_from_stream_output ? zink_so_target(dindirect->count_from_stream_output) : NULL; VkBuffer counter_buffers[PIPE_MAX_SO_BUFFERS]; VkDeviceSize counter_buffer_offsets[PIPE_MAX_SO_BUFFERS]; bool need_index_buffer_unref = false; bool mode_changed = ctx->gfx_pipeline_state.gfx_prim_mode != dinfo->mode; bool reads_drawid = ctx->shader_reads_drawid; bool reads_basevertex = ctx->shader_reads_basevertex; unsigned work_count = ctx->work_count; enum mesa_prim mode = (enum mesa_prim)dinfo->mode; if (ctx->memory_barrier && !ctx->blitting) zink_flush_memory_barrier(ctx, false); if (unlikely(ctx->buffer_rebind_counter < screen->buffer_rebind_counter && !ctx->blitting)) { ctx->buffer_rebind_counter = screen->buffer_rebind_counter; zink_rebind_all_buffers(ctx); } if (unlikely(ctx->image_rebind_counter < screen->image_rebind_counter && !ctx->blitting)) { ctx->image_rebind_counter = screen->image_rebind_counter; zink_rebind_all_images(ctx); } unsigned index_offset = 0; unsigned index_size = dinfo->index_size; struct pipe_resource *index_buffer = NULL; if (index_size > 0) { if (dinfo->has_user_indices) { if (!util_upload_index_buffer(pctx, dinfo, &draws[0], &index_buffer, &index_offset, 4)) { debug_printf("util_upload_index_buffer() failed\n"); return; } /* this will have extra refs from tc */ if (screen->threaded) zink_batch_reference_resource_move(ctx, zink_resource(index_buffer)); else zink_batch_reference_resource(ctx, zink_resource(index_buffer)); } else { index_buffer = dinfo->index.resource; zink_batch_reference_resource_rw(ctx, zink_resource(index_buffer), false); } assert(index_size <= 4 && index_size != 3); assert(index_size != 1 || screen->info.have_EXT_index_type_uint8); } ctx->was_line_loop = dinfo->was_line_loop; bool have_streamout = !!ctx->num_so_targets; if (have_streamout) { zink_emit_xfb_counter_barrier(ctx); if (ctx->dirty_so_targets) { /* have to loop here and below because barriers must be emitted out of renderpass, * but xfb buffers can't be bound before the renderpass is active to avoid * breaking from recursion */ for (unsigned i = 0; i < ctx->num_so_targets; i++) { struct zink_so_target *t = (struct zink_so_target *)ctx->so_targets[i]; if (t) { struct zink_resource *res = zink_resource(t->base.buffer); zink_screen(ctx->base.screen)->buffer_barrier(ctx, res, VK_ACCESS_TRANSFORM_FEEDBACK_WRITE_BIT_EXT, VK_PIPELINE_STAGE_TRANSFORM_FEEDBACK_BIT_EXT); if (!ctx->unordered_blitting) res->obj->unordered_read = res->obj->unordered_write = false; } } } } barrier_draw_buffers(ctx, dinfo, dindirect, index_buffer); /* this may re-emit draw buffer barriers, but such synchronization is harmless */ if (!ctx->blitting) zink_update_barriers(ctx, false, index_buffer, dindirect ? dindirect->buffer : NULL, dindirect ? dindirect->indirect_draw_count : NULL); /* ensure synchronization between doing streamout with counter buffer * and using counter buffer for indirect draw */ if (so_target && so_target->counter_buffer_valid) { struct zink_resource *res = zink_resource(so_target->counter_buffer); zink_screen(ctx->base.screen)->buffer_barrier(ctx, res, VK_ACCESS_TRANSFORM_FEEDBACK_COUNTER_READ_BIT_EXT, VK_PIPELINE_STAGE_DRAW_INDIRECT_BIT); if (!ctx->unordered_blitting) res->obj->unordered_read = false; } zink_query_update_gs_states(ctx); if (unlikely(zink_debug & ZINK_DEBUG_SYNC)) { zink_batch_no_rp(ctx); VkMemoryBarrier mb; mb.sType = VK_STRUCTURE_TYPE_MEMORY_BARRIER; mb.pNext = NULL; mb.srcAccessMask = VK_ACCESS_MEMORY_WRITE_BIT; mb.dstAccessMask = VK_ACCESS_MEMORY_READ_BIT; VKSCR(CmdPipelineBarrier)(ctx->bs->cmdbuf, VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, 0, 1, &mb, 0, NULL, 0, NULL); } zink_batch_rp(ctx); /* check dead swapchain */ if (unlikely(!ctx->in_rp)) return; if (BATCH_CHANGED) zink_update_descriptor_refs(ctx, false); /* these must be after renderpass start to avoid issues with recursion */ bool drawid_broken = false; if (reads_drawid && (!dindirect || !dindirect->buffer)) drawid_broken = (drawid_offset != 0 || (!HAS_MULTIDRAW && num_draws > 1) || (HAS_MULTIDRAW && num_draws > 1 && !dinfo->increment_draw_id)); if (drawid_broken != zink_get_last_vertex_key(ctx)->push_drawid) zink_set_last_vertex_key(ctx)->push_drawid = drawid_broken; bool rast_prim_changed = false; bool prim_changed = false; bool rast_state_changed = ctx->rast_state_changed; if (mode_changed || ctx->gfx_pipeline_state.modules_changed || rast_state_changed) { enum mesa_prim rast_prim = zink_rast_prim(ctx, dinfo); if (rast_prim != ctx->gfx_pipeline_state.rast_prim) { bool points_changed = (ctx->gfx_pipeline_state.rast_prim == MESA_PRIM_POINTS) != (rast_prim == MESA_PRIM_POINTS); prim_changed = ctx->gfx_pipeline_state.rast_prim != rast_prim; static bool rect_warned = false; if (DYNAMIC_STATE >= ZINK_DYNAMIC_STATE3 && rast_prim == MESA_PRIM_LINES && !rect_warned && (VkLineRasterizationModeEXT)rast_state->hw_state.line_mode == VK_LINE_RASTERIZATION_MODE_RECTANGULAR_EXT) { if (screen->info.line_rast_feats.rectangularLines) rect_warned = true; else warn_missing_feature(rect_warned, "rectangularLines"); } ctx->gfx_pipeline_state.rast_prim = rast_prim; rast_prim_changed = true; if (points_changed && ctx->rast_state->base.point_quad_rasterization) zink_set_fs_point_coord_key(ctx); } } ctx->gfx_pipeline_state.gfx_prim_mode = mode; if ((mode_changed || prim_changed || rast_state_changed || ctx->gfx_pipeline_state.modules_changed)) { zink_set_primitive_emulation_keys(ctx); } if (index_size) { const VkIndexType index_type[3] = { VK_INDEX_TYPE_UINT8_EXT, VK_INDEX_TYPE_UINT16, VK_INDEX_TYPE_UINT32, }; struct zink_resource *res = zink_resource(index_buffer); VKCTX(CmdBindIndexBuffer)(bs->cmdbuf, res->obj->buffer, index_offset, index_type[index_size >> 1]); } if (DYNAMIC_STATE < ZINK_DYNAMIC_STATE2) { if (ctx->gfx_pipeline_state.dyn_state2.primitive_restart != dinfo->primitive_restart) ctx->gfx_pipeline_state.dirty = true; ctx->gfx_pipeline_state.dyn_state2.primitive_restart = dinfo->primitive_restart; } if (have_streamout && ctx->dirty_so_targets) zink_emit_stream_output_targets(pctx); bool pipeline_changed = update_gfx_pipeline(ctx, bs, mode); if (BATCH_CHANGED || ctx->vp_state_changed || (DYNAMIC_STATE == ZINK_NO_DYNAMIC_STATE && pipeline_changed)) { VkViewport viewports[PIPE_MAX_VIEWPORTS]; for (unsigned i = 0; i < ctx->vp_state.num_viewports; i++) { VkViewport viewport = { ctx->vp_state.viewport_states[i].translate[0] - ctx->vp_state.viewport_states[i].scale[0], ctx->vp_state.viewport_states[i].translate[1] - ctx->vp_state.viewport_states[i].scale[1], MAX2(ctx->vp_state.viewport_states[i].scale[0] * 2, 1), ctx->vp_state.viewport_states[i].scale[1] * 2, CLAMP(ctx->rast_state->base.clip_halfz ? ctx->vp_state.viewport_states[i].translate[2] : ctx->vp_state.viewport_states[i].translate[2] - ctx->vp_state.viewport_states[i].scale[2], 0, 1), CLAMP(ctx->vp_state.viewport_states[i].translate[2] + ctx->vp_state.viewport_states[i].scale[2], 0, 1) }; if (!ctx->rast_state->base.half_pixel_center) { /* magic constant value from dxvk */ float cf = 0.5f - (1.0f / 128.0f); viewport.x += cf; if (viewport.height < 0) viewport.y += cf; else viewport.y -= cf; } viewports[i] = viewport; } if (DYNAMIC_STATE != ZINK_NO_DYNAMIC_STATE) VKCTX(CmdSetViewportWithCount)(bs->cmdbuf, ctx->vp_state.num_viewports, viewports); else VKCTX(CmdSetViewport)(bs->cmdbuf, 0, ctx->vp_state.num_viewports, viewports); } if (BATCH_CHANGED || ctx->scissor_changed || ctx->vp_state_changed || (DYNAMIC_STATE == ZINK_NO_DYNAMIC_STATE && pipeline_changed)) { VkRect2D scissors[PIPE_MAX_VIEWPORTS]; if (ctx->rast_state->base.scissor) { for (unsigned i = 0; i < ctx->vp_state.num_viewports; i++) { scissors[i].offset.x = ctx->vp_state.scissor_states[i].minx; scissors[i].offset.y = ctx->vp_state.scissor_states[i].miny; scissors[i].extent.width = ctx->vp_state.scissor_states[i].maxx - ctx->vp_state.scissor_states[i].minx; scissors[i].extent.height = ctx->vp_state.scissor_states[i].maxy - ctx->vp_state.scissor_states[i].miny; } } else { for (unsigned i = 0; i < ctx->vp_state.num_viewports; i++) { scissors[i].offset.x = 0; scissors[i].offset.y = 0; scissors[i].extent.width = ctx->fb_state.width; scissors[i].extent.height = ctx->fb_state.height; } } if (DYNAMIC_STATE != ZINK_NO_DYNAMIC_STATE) VKCTX(CmdSetScissorWithCount)(bs->cmdbuf, ctx->vp_state.num_viewports, scissors); else VKCTX(CmdSetScissor)(bs->cmdbuf, 0, ctx->vp_state.num_viewports, scissors); } ctx->vp_state_changed = false; ctx->scissor_changed = false; if (BATCH_CHANGED || ctx->stencil_ref_changed) { VKCTX(CmdSetStencilReference)(bs->cmdbuf, VK_STENCIL_FACE_FRONT_BIT, ctx->stencil_ref.ref_value[0]); VKCTX(CmdSetStencilReference)(bs->cmdbuf, VK_STENCIL_FACE_BACK_BIT, ctx->stencil_ref.ref_value[1]); ctx->stencil_ref_changed = false; } if (DYNAMIC_STATE != ZINK_NO_DYNAMIC_STATE && (BATCH_CHANGED || ctx->dsa_state_changed)) { VKCTX(CmdSetDepthBoundsTestEnable)(bs->cmdbuf, dsa_state->hw_state.depth_bounds_test); if (dsa_state->hw_state.depth_bounds_test) VKCTX(CmdSetDepthBounds)(bs->cmdbuf, dsa_state->hw_state.min_depth_bounds, dsa_state->hw_state.max_depth_bounds); VKCTX(CmdSetDepthTestEnable)(bs->cmdbuf, dsa_state->hw_state.depth_test); VKCTX(CmdSetDepthCompareOp)(bs->cmdbuf, dsa_state->hw_state.depth_compare_op); VKCTX(CmdSetDepthWriteEnable)(bs->cmdbuf, dsa_state->hw_state.depth_write); VKCTX(CmdSetStencilTestEnable)(bs->cmdbuf, dsa_state->hw_state.stencil_test); if (dsa_state->hw_state.stencil_test) { VKCTX(CmdSetStencilOp)(bs->cmdbuf, VK_STENCIL_FACE_FRONT_BIT, dsa_state->hw_state.stencil_front.failOp, dsa_state->hw_state.stencil_front.passOp, dsa_state->hw_state.stencil_front.depthFailOp, dsa_state->hw_state.stencil_front.compareOp); VKCTX(CmdSetStencilOp)(bs->cmdbuf, VK_STENCIL_FACE_BACK_BIT, dsa_state->hw_state.stencil_back.failOp, dsa_state->hw_state.stencil_back.passOp, dsa_state->hw_state.stencil_back.depthFailOp, dsa_state->hw_state.stencil_back.compareOp); if (dsa_state->base.stencil[1].enabled) { VKCTX(CmdSetStencilWriteMask)(bs->cmdbuf, VK_STENCIL_FACE_FRONT_BIT, dsa_state->hw_state.stencil_front.writeMask); VKCTX(CmdSetStencilWriteMask)(bs->cmdbuf, VK_STENCIL_FACE_BACK_BIT, dsa_state->hw_state.stencil_back.writeMask); VKCTX(CmdSetStencilCompareMask)(bs->cmdbuf, VK_STENCIL_FACE_FRONT_BIT, dsa_state->hw_state.stencil_front.compareMask); VKCTX(CmdSetStencilCompareMask)(bs->cmdbuf, VK_STENCIL_FACE_BACK_BIT, dsa_state->hw_state.stencil_back.compareMask); } else { VKCTX(CmdSetStencilWriteMask)(bs->cmdbuf, VK_STENCIL_FACE_FRONT_AND_BACK, dsa_state->hw_state.stencil_front.writeMask); VKCTX(CmdSetStencilCompareMask)(bs->cmdbuf, VK_STENCIL_FACE_FRONT_AND_BACK, dsa_state->hw_state.stencil_front.compareMask); } } else { VKCTX(CmdSetStencilWriteMask)(bs->cmdbuf, VK_STENCIL_FACE_FRONT_AND_BACK, dsa_state->hw_state.stencil_front.writeMask); VKCTX(CmdSetStencilCompareMask)(bs->cmdbuf, VK_STENCIL_FACE_FRONT_AND_BACK, dsa_state->hw_state.stencil_front.compareMask); VKCTX(CmdSetStencilOp)(bs->cmdbuf, VK_STENCIL_FACE_FRONT_AND_BACK, VK_STENCIL_OP_KEEP, VK_STENCIL_OP_KEEP, VK_STENCIL_OP_KEEP, VK_COMPARE_OP_ALWAYS); } } ctx->dsa_state_changed = false; if (BATCH_CHANGED || rast_state_changed) { if (DYNAMIC_STATE != ZINK_NO_DYNAMIC_STATE) { VKCTX(CmdSetFrontFace)(bs->cmdbuf, (VkFrontFace)ctx->gfx_pipeline_state.dyn_state1.front_face); VKCTX(CmdSetCullMode)(bs->cmdbuf, ctx->gfx_pipeline_state.dyn_state1.cull_mode); } if (DYNAMIC_STATE >= ZINK_DYNAMIC_STATE3) { if (ctx->ds3_states & BITFIELD_BIT(ZINK_DS3_RAST_STIPPLE)) VKCTX(CmdSetLineStippleEXT)(bs->cmdbuf, rast_state->base.line_stipple_factor, rast_state->base.line_stipple_pattern); if (ctx->ds3_states & BITFIELD_BIT(ZINK_DS3_RAST_CLIP)) VKCTX(CmdSetDepthClipEnableEXT)(bs->cmdbuf, rast_state->hw_state.depth_clip); if (ctx->ds3_states & BITFIELD_BIT(ZINK_DS3_RAST_CLAMP)) VKCTX(CmdSetDepthClampEnableEXT)(bs->cmdbuf, rast_state->hw_state.depth_clamp); if (ctx->ds3_states & BITFIELD_BIT(ZINK_DS3_RAST_POLYGON)) VKCTX(CmdSetPolygonModeEXT)(bs->cmdbuf, (VkPolygonMode)rast_state->hw_state.polygon_mode); if (ctx->ds3_states & BITFIELD_BIT(ZINK_DS3_RAST_HALFZ)) VKCTX(CmdSetDepthClipNegativeOneToOneEXT)(bs->cmdbuf, !rast_state->hw_state.clip_halfz); if (ctx->ds3_states & BITFIELD_BIT(ZINK_DS3_RAST_PV)) VKCTX(CmdSetProvokingVertexModeEXT)(bs->cmdbuf, rast_state->hw_state.pv_last ? VK_PROVOKING_VERTEX_MODE_LAST_VERTEX_EXT : VK_PROVOKING_VERTEX_MODE_FIRST_VERTEX_EXT); if (ctx->ds3_states & BITFIELD_BIT(ZINK_DS3_RAST_CLIP)) VKCTX(CmdSetLineRasterizationModeEXT)(bs->cmdbuf, rast_state->dynamic_line_mode); if (ctx->ds3_states & BITFIELD_BIT(ZINK_DS3_RAST_STIPPLE_ON)) VKCTX(CmdSetLineStippleEnableEXT)(bs->cmdbuf, rast_state->hw_state.line_stipple_enable); } } if ((BATCH_CHANGED || ctx->sample_mask_changed) && screen->have_full_ds3) { VKCTX(CmdSetRasterizationSamplesEXT)(bs->cmdbuf, (VkSampleCountFlagBits)(ctx->gfx_pipeline_state.rast_samples + 1)); VKCTX(CmdSetSampleMaskEXT)(bs->cmdbuf, (VkSampleCountFlagBits)(ctx->gfx_pipeline_state.rast_samples + 1), &ctx->gfx_pipeline_state.sample_mask); ctx->sample_mask_changed = false; } if ((BATCH_CHANGED || ctx->blend_state_changed)) { if (ctx->gfx_pipeline_state.blend_state) { if (ctx->ds3_states & BITFIELD_BIT(ZINK_DS3_BLEND_A2C)) VKCTX(CmdSetAlphaToCoverageEnableEXT)(bs->cmdbuf, ctx->gfx_pipeline_state.blend_state->alpha_to_coverage && ctx->gfx_stages[MESA_SHADER_FRAGMENT]->info.outputs_written & BITFIELD_BIT(FRAG_RESULT_DATA0)); if (ctx->ds3_states & BITFIELD_BIT(ZINK_DS3_BLEND_A21)) VKCTX(CmdSetAlphaToOneEnableEXT)(bs->cmdbuf, ctx->gfx_pipeline_state.blend_state->alpha_to_one); if (ctx->fb_state.nr_cbufs) { if (ctx->ds3_states & BITFIELD_BIT(ZINK_DS3_BLEND_ON)) VKCTX(CmdSetColorBlendEnableEXT)(bs->cmdbuf, 0, ctx->fb_state.nr_cbufs, ctx->gfx_pipeline_state.blend_state->ds3.enables); if (ctx->ds3_states & BITFIELD_BIT(ZINK_DS3_BLEND_WRITE)) VKCTX(CmdSetColorWriteMaskEXT)(bs->cmdbuf, 0, ctx->fb_state.nr_cbufs, ctx->gfx_pipeline_state.blend_state->ds3.wrmask); if (ctx->ds3_states & BITFIELD_BIT(ZINK_DS3_BLEND_EQ)) VKCTX(CmdSetColorBlendEquationEXT)(bs->cmdbuf, 0, ctx->fb_state.nr_cbufs, ctx->gfx_pipeline_state.blend_state->ds3.eq); } if (ctx->ds3_states & BITFIELD_BIT(ZINK_DS3_BLEND_LOGIC_ON)) VKCTX(CmdSetLogicOpEnableEXT)(bs->cmdbuf, ctx->gfx_pipeline_state.blend_state->logicop_enable); if (ctx->ds3_states & BITFIELD_BIT(ZINK_DS3_BLEND_LOGIC)) VKCTX(CmdSetLogicOpEXT)(bs->cmdbuf, ctx->gfx_pipeline_state.blend_state->logicop_func); } } ctx->ds3_states = 0; if (BATCH_CHANGED || /* only re-emit on non-batch change when actually drawing lines */ ((ctx->line_width_changed || rast_prim_changed) && ctx->gfx_pipeline_state.rast_prim == MESA_PRIM_LINES)) { VKCTX(CmdSetLineWidth)(bs->cmdbuf, rast_state->line_width); ctx->line_width_changed = false; } if (BATCH_CHANGED || mode_changed || ctx->gfx_pipeline_state.modules_changed || rast_state_changed) { bool depth_bias = zink_prim_type(ctx, dinfo) == MESA_PRIM_TRIANGLES && rast_state->offset_fill; if (depth_bias) { if (rast_state->base.offset_units_unscaled) { VKCTX(CmdSetDepthBias)(bs->cmdbuf, rast_state->offset_units * ctx->depth_bias_scale_factor, rast_state->offset_clamp, rast_state->offset_scale); } else { VKCTX(CmdSetDepthBias)(bs->cmdbuf, rast_state->offset_units, rast_state->offset_clamp, rast_state->offset_scale); } } else { VKCTX(CmdSetDepthBias)(bs->cmdbuf, 0.0f, 0.0f, 0.0f); } } ctx->rast_state_changed = false; if (DYNAMIC_STATE != ZINK_NO_DYNAMIC_STATE) { if (ctx->sample_locations_changed) { VkSampleLocationsInfoEXT loc; zink_init_vk_sample_locations(ctx, &loc); VKCTX(CmdSetSampleLocationsEXT)(bs->cmdbuf, &loc); } ctx->sample_locations_changed = false; } if (BATCH_CHANGED || ctx->blend_color_changed) { VKCTX(CmdSetBlendConstants)(bs->cmdbuf, ctx->blend_constants); } ctx->blend_state_changed = false; ctx->blend_color_changed = false; if (!DRAW_STATE) { if (BATCH_CHANGED || ctx->vertex_buffers_dirty) { if (DYNAMIC_STATE == ZINK_DYNAMIC_VERTEX_INPUT || ctx->gfx_pipeline_state.uses_dynamic_stride) zink_bind_vertex_buffers(ctx); else zink_bind_vertex_buffers(ctx); } } if (BATCH_CHANGED) { ctx->pipeline_changed[0] = false; zink_select_draw_vbo(ctx); } if (DYNAMIC_STATE != ZINK_NO_DYNAMIC_STATE && (BATCH_CHANGED || mode_changed)) VKCTX(CmdSetPrimitiveTopology)(bs->cmdbuf, zink_primitive_topology(mode)); if (DYNAMIC_STATE >= ZINK_DYNAMIC_STATE2 && (BATCH_CHANGED || ctx->primitive_restart != dinfo->primitive_restart)) { VKCTX(CmdSetPrimitiveRestartEnable)(bs->cmdbuf, dinfo->primitive_restart); ctx->primitive_restart = dinfo->primitive_restart; } if (DYNAMIC_STATE >= ZINK_DYNAMIC_STATE2 && (BATCH_CHANGED || ctx->rasterizer_discard_changed)) { VKCTX(CmdSetRasterizerDiscardEnable)(bs->cmdbuf, ctx->gfx_pipeline_state.dyn_state2.rasterizer_discard); ctx->rasterizer_discard_changed = false; } if (zink_program_has_descriptors(&ctx->curr_program->base)) zink_descriptors_update(ctx, false); if (ctx->di.any_bindless_dirty && /* some apps (d3dretrace) call MakeTextureHandleResidentARB randomly */ zink_program_has_descriptors(&ctx->curr_program->base) && ctx->curr_program->base.dd.bindless) zink_descriptors_update_bindless(ctx); if (reads_basevertex) { unsigned draw_mode_is_indexed = index_size > 0; VKCTX(CmdPushConstants)(bs->cmdbuf, ctx->curr_program->base.layout, VK_SHADER_STAGE_ALL_GRAPHICS, offsetof(struct zink_gfx_push_constant, draw_mode_is_indexed), sizeof(unsigned), &draw_mode_is_indexed); } if (ctx->curr_program->shaders[MESA_SHADER_TESS_CTRL] && ctx->curr_program->shaders[MESA_SHADER_TESS_CTRL]->non_fs.is_generated) { VKCTX(CmdPushConstants)(bs->cmdbuf, ctx->curr_program->base.layout, VK_SHADER_STAGE_ALL_GRAPHICS, offsetof(struct zink_gfx_push_constant, default_inner_level), sizeof(float) * 6, &ctx->tess_levels[0]); } if (!screen->optimal_keys) { if (zink_get_fs_key(ctx)->lower_line_stipple || zink_get_gs_key(ctx)->lower_gl_point || zink_get_fs_key(ctx)->lower_line_smooth) { assert(zink_get_gs_key(ctx)->lower_line_stipple == zink_get_fs_key(ctx)->lower_line_stipple); assert(zink_get_gs_key(ctx)->lower_line_smooth == zink_get_fs_key(ctx)->lower_line_smooth); float viewport_scale[2] = { ctx->vp_state.viewport_states[0].scale[0], ctx->vp_state.viewport_states[0].scale[1] }; VKCTX(CmdPushConstants)(bs->cmdbuf, ctx->curr_program->base.layout, VK_SHADER_STAGE_ALL_GRAPHICS, offsetof(struct zink_gfx_push_constant, viewport_scale), sizeof(float) * 2, &viewport_scale); uint32_t stipple = ctx->rast_state->base.line_stipple_pattern; stipple |= ctx->rast_state->base.line_stipple_factor << 16; VKCTX(CmdPushConstants)(bs->cmdbuf, ctx->curr_program->base.layout, VK_SHADER_STAGE_ALL_GRAPHICS, offsetof(struct zink_gfx_push_constant, line_stipple_pattern), sizeof(uint32_t), &stipple); if (ctx->gfx_pipeline_state.shader_keys.key[MESA_SHADER_FRAGMENT].key.fs.lower_line_smooth) { float line_width = ctx->rast_state->base.line_width; VKCTX(CmdPushConstants)(bs->cmdbuf, ctx->curr_program->base.layout, VK_SHADER_STAGE_ALL_GRAPHICS, offsetof(struct zink_gfx_push_constant, line_width), sizeof(uint32_t), &line_width); } } } if (have_streamout) { for (unsigned i = 0; i < ctx->num_so_targets; i++) { struct zink_so_target *t = zink_so_target(ctx->so_targets[i]); counter_buffers[i] = VK_NULL_HANDLE; if (t) { struct zink_resource *res = zink_resource(t->counter_buffer); t->stride = ctx->last_vertex_stage->sinfo.stride[i]; zink_batch_reference_resource_rw(ctx, res, true); if (!ctx->unordered_blitting) res->obj->unordered_read = res->obj->unordered_write = false; if (t->counter_buffer_valid) { counter_buffers[i] = res->obj->buffer; counter_buffer_offsets[i] = t->counter_buffer_offset; } } } VKCTX(CmdBeginTransformFeedbackEXT)(bs->cmdbuf, 0, ctx->num_so_targets, counter_buffers, counter_buffer_offsets); } bool marker = false; if (unlikely(zink_tracing)) { VkViewport viewport = { ctx->vp_state.viewport_states[0].translate[0] - ctx->vp_state.viewport_states[0].scale[0], ctx->vp_state.viewport_states[0].translate[1] - ctx->vp_state.viewport_states[0].scale[1], MAX2(ctx->vp_state.viewport_states[0].scale[0] * 2, 1), ctx->vp_state.viewport_states[0].scale[1] * 2, CLAMP(ctx->rast_state->base.clip_halfz ? ctx->vp_state.viewport_states[0].translate[2] : ctx->vp_state.viewport_states[0].translate[2] - ctx->vp_state.viewport_states[0].scale[2], 0, 1), CLAMP(ctx->vp_state.viewport_states[0].translate[2] + ctx->vp_state.viewport_states[0].scale[2], 0, 1) }; if (ctx->blitting) { bool is_zs = util_format_is_depth_or_stencil(ctx->sampler_views[MESA_SHADER_FRAGMENT][0]->format); marker = zink_cmd_debug_marker_begin(ctx, VK_NULL_HANDLE, "u_blitter(%s->%s, %dx%d)", util_format_short_name(ctx->sampler_views[MESA_SHADER_FRAGMENT][0]->format), util_format_short_name((is_zs ? ctx->fb_state.zsbuf : ctx->fb_state.cbufs[0])->format), lround(viewport.width), lround(viewport.height)); } else { marker = zink_cmd_debug_marker_begin(ctx, VK_NULL_HANDLE, "draw(%u cbufs|%s, %dx%d)", ctx->fb_state.nr_cbufs, ctx->fb_state.zsbuf ? "zsbuf" : "", lround(viewport.width), lround(viewport.height)); } } bool needs_drawid = reads_drawid && zink_get_last_vertex_key(ctx)->push_drawid; work_count += num_draws; if (index_size > 0) { if (dindirect && dindirect->buffer) { assert(num_draws == 1); if (needs_drawid) update_drawid(ctx, drawid_offset); struct zink_resource *indirect = zink_resource(dindirect->buffer); zink_batch_reference_resource_rw(ctx, indirect, false); if (dindirect->indirect_draw_count) { struct zink_resource *indirect_draw_count = zink_resource(dindirect->indirect_draw_count); zink_batch_reference_resource_rw(ctx, indirect_draw_count, false); VKCTX(CmdDrawIndexedIndirectCount)(bs->cmdbuf, indirect->obj->buffer, dindirect->offset, indirect_draw_count->obj->buffer, dindirect->indirect_draw_count_offset, dindirect->draw_count, dindirect->stride); } else VKCTX(CmdDrawIndexedIndirect)(bs->cmdbuf, indirect->obj->buffer, dindirect->offset, dindirect->draw_count, dindirect->stride); } else { if (need_index_buffer_unref) draw_indexed_need_index_buffer_unref(ctx, dinfo, draws, num_draws, drawid_offset, needs_drawid); else draw_indexed(ctx, dinfo, draws, num_draws, drawid_offset, needs_drawid); } } else { if (so_target && screen->info.tf_props.transformFeedbackDraw) { /* GTF-GL46.gtf40.GL3Tests.transform_feedback2.transform_feedback2_api attempts a bogus xfb * draw using a streamout target that has no data * to avoid hanging the gpu, reject any such draws */ if (so_target->counter_buffer_valid) { if (needs_drawid) update_drawid(ctx, drawid_offset); zink_batch_reference_resource_rw(ctx, zink_resource(so_target->base.buffer), false); zink_batch_reference_resource_rw(ctx, zink_resource(so_target->counter_buffer), true); VKCTX(CmdDrawIndirectByteCountEXT)(bs->cmdbuf, dinfo->instance_count, dinfo->start_instance, zink_resource(so_target->counter_buffer)->obj->buffer, so_target->counter_buffer_offset, 0, MIN2(so_target->stride, screen->info.tf_props.maxTransformFeedbackBufferDataStride)); } } else if (dindirect && dindirect->buffer) { assert(num_draws == 1); if (needs_drawid) update_drawid(ctx, drawid_offset); struct zink_resource *indirect = zink_resource(dindirect->buffer); zink_batch_reference_resource_rw(ctx, indirect, false); if (dindirect->indirect_draw_count) { struct zink_resource *indirect_draw_count = zink_resource(dindirect->indirect_draw_count); zink_batch_reference_resource_rw(ctx, indirect_draw_count, false); VKCTX(CmdDrawIndirectCount)(bs->cmdbuf, indirect->obj->buffer, dindirect->offset, indirect_draw_count->obj->buffer, dindirect->indirect_draw_count_offset, dindirect->draw_count, dindirect->stride); } else VKCTX(CmdDrawIndirect)(bs->cmdbuf, indirect->obj->buffer, dindirect->offset, dindirect->draw_count, dindirect->stride); } else { draw(ctx, dinfo, draws, num_draws, drawid_offset, needs_drawid); } } if (unlikely(zink_tracing)) zink_cmd_debug_marker_end(ctx, bs->cmdbuf, marker); if (have_streamout) { for (unsigned i = 0; i < ctx->num_so_targets; i++) { struct zink_so_target *t = zink_so_target(ctx->so_targets[i]); if (t) { counter_buffers[i] = zink_resource(t->counter_buffer)->obj->buffer; counter_buffer_offsets[i] = t->counter_buffer_offset; t->counter_buffer_valid = true; } } VKCTX(CmdEndTransformFeedbackEXT)(bs->cmdbuf, 0, ctx->num_so_targets, counter_buffers, counter_buffer_offsets); } ctx->last_work_was_compute = false; ctx->work_count = work_count; /* flush if there's >100k draws */ if (!ctx->unordered_blitting && (unlikely(work_count >= 30000) || ctx->oom_flush)) pctx->flush(pctx, NULL, 0); } template static void zink_draw_vbo(struct pipe_context *pctx, const struct pipe_draw_info *info, unsigned drawid_offset, const struct pipe_draw_indirect_info *indirect, const struct pipe_draw_start_count_bias *draws, unsigned num_draws) { zink_draw(pctx, info, drawid_offset, indirect, draws, num_draws, NULL, 0); } template static void zink_vertex_state_mask(struct zink_context *ctx, struct pipe_vertex_state *vstate, uint32_t partial_velem_mask) { struct zink_vertex_state *zstate = (struct zink_vertex_state *)vstate; VkCommandBuffer cmdbuf = ctx->bs->cmdbuf; if (partial_velem_mask == vstate->input.full_velem_mask) { VKCTX(CmdSetVertexInputEXT)(cmdbuf, zstate->velems.hw_state.num_bindings, zstate->velems.hw_state.dynbindings, zstate->velems.hw_state.num_attribs, zstate->velems.hw_state.dynattribs); return; } VkVertexInputAttributeDescription2EXT dynattribs[PIPE_MAX_ATTRIBS]; unsigned num_attribs = 0; u_foreach_bit(elem, vstate->input.full_velem_mask & partial_velem_mask) { unsigned idx = util_bitcount_fast(vstate->input.full_velem_mask & BITFIELD_MASK(elem)); dynattribs[num_attribs] = zstate->velems.hw_state.dynattribs[idx]; dynattribs[num_attribs].location = num_attribs; num_attribs++; } VKCTX(CmdSetVertexInputEXT)(cmdbuf, zstate->velems.hw_state.num_bindings, zstate->velems.hw_state.dynbindings, num_attribs, dynattribs); } template static void zink_bind_vertex_state(struct zink_context *ctx, struct pipe_vertex_state *vstate, uint32_t partial_velem_mask) { struct zink_vertex_state *zstate = (struct zink_vertex_state *)vstate; VkCommandBuffer cmdbuf = ctx->bs->cmdbuf; if (!vstate->input.vbuffer.buffer.resource) return; zink_vertex_state_mask(ctx, vstate, partial_velem_mask); struct zink_resource *res = zink_resource(vstate->input.vbuffer.buffer.resource); zink_batch_resource_usage_set(ctx->bs, res, false, true); VkDeviceSize offset = vstate->input.vbuffer.buffer_offset; VKCTX(CmdBindVertexBuffers)(cmdbuf, 0, zstate->velems.hw_state.num_bindings, &res->obj->buffer, &offset); } template static void zink_draw_vertex_state(struct pipe_context *pctx, struct pipe_vertex_state *vstate, uint32_t partial_velem_mask, struct pipe_draw_vertex_state_info info, const struct pipe_draw_start_count_bias *draws, unsigned num_draws) { struct pipe_draw_info dinfo = {}; dinfo.mode = info.mode; dinfo.index_size = 4; dinfo.instance_count = 1; dinfo.index.resource = vstate->input.indexbuf; struct zink_context *ctx = zink_context(pctx); struct zink_resource *res = zink_resource(vstate->input.vbuffer.buffer.resource); zink_screen(ctx->base.screen)->buffer_barrier(ctx, res, VK_ACCESS_VERTEX_ATTRIBUTE_READ_BIT, VK_PIPELINE_STAGE_VERTEX_INPUT_BIT); if (!ctx->unordered_blitting) res->obj->unordered_read = false; zink_bind_vertex_state(ctx, vstate, partial_velem_mask); zink_draw(pctx, &dinfo, 0, NULL, draws, num_draws, vstate, partial_velem_mask); /* ensure ctx->vertex_buffers gets rebound on next non-vstate draw */ ctx->vertex_buffers_dirty = true; if (info.take_vertex_state_ownership) pipe_vertex_state_reference(&vstate, NULL); } template static void zink_launch_grid(struct pipe_context *pctx, const struct pipe_grid_info *info) { struct zink_context *ctx = zink_context(pctx); struct zink_batch_state *bs = ctx->bs; struct zink_screen *screen = zink_screen(pctx->screen); if (ctx->render_condition_active) zink_start_conditional_render(ctx); if (info->indirect) { /* VK_ACCESS_INDIRECT_COMMAND_READ_BIT specifies read access to indirect command data read as part of an indirect build, trace, drawing or dispatching command. Such access occurs in the VK_PIPELINE_STAGE_DRAW_INDIRECT_BIT pipeline stage. - Chapter 7. Synchronization and Cache Control */ check_buffer_barrier(ctx, info->indirect, VK_ACCESS_INDIRECT_COMMAND_READ_BIT, VK_PIPELINE_STAGE_DRAW_INDIRECT_BIT); } zink_update_barriers(ctx, true, NULL, info->indirect, NULL); if (ctx->memory_barrier) zink_flush_memory_barrier(ctx, true); if (unlikely(zink_debug & ZINK_DEBUG_SYNC)) { zink_batch_no_rp(ctx); VkMemoryBarrier mb; mb.sType = VK_STRUCTURE_TYPE_MEMORY_BARRIER; mb.pNext = NULL; mb.srcAccessMask = VK_ACCESS_MEMORY_WRITE_BIT; mb.dstAccessMask = VK_ACCESS_MEMORY_READ_BIT; VKSCR(CmdPipelineBarrier)(ctx->bs->cmdbuf, VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, 0, 1, &mb, 0, NULL, 0, NULL); } zink_program_update_compute_pipeline_state(ctx, ctx->curr_compute, info); VkPipeline prev_pipeline = ctx->compute_pipeline_state.pipeline; if (BATCH_CHANGED) { zink_update_descriptor_refs(ctx, true); } if (ctx->compute_dirty) { /* update inlinable constants */ zink_update_compute_program(ctx); ctx->compute_dirty = false; } VkPipeline pipeline = zink_get_compute_pipeline(screen, ctx->curr_compute, &ctx->compute_pipeline_state); if (prev_pipeline != pipeline || BATCH_CHANGED) VKCTX(CmdBindPipeline)(bs->cmdbuf, VK_PIPELINE_BIND_POINT_COMPUTE, pipeline); if (BATCH_CHANGED) { ctx->pipeline_changed[1] = false; zink_select_launch_grid(ctx); } if (zink_program_has_descriptors(&ctx->curr_compute->base)) zink_descriptors_update(ctx, true); if (ctx->di.any_bindless_dirty && ctx->curr_compute->base.dd.bindless) zink_descriptors_update_bindless(ctx); ctx->work_count++; zink_batch_no_rp(ctx); if (!ctx->queries_disabled) zink_resume_cs_query(ctx); if (info->indirect) { VKCTX(CmdDispatchIndirect)(bs->cmdbuf, zink_resource(info->indirect)->obj->buffer, info->indirect_offset); zink_batch_reference_resource_rw(ctx, zink_resource(info->indirect), false); } else VKCTX(CmdDispatch)(bs->cmdbuf, info->grid[0], info->grid[1], info->grid[2]); bs->has_work = true; ctx->last_work_was_compute = true; /* flush if there's >100k computes */ if (!ctx->unordered_blitting && (unlikely(ctx->work_count >= 30000) || ctx->oom_flush)) pctx->flush(pctx, NULL, 0); } template static void init_batch_changed_functions(struct zink_context *ctx, pipe_draw_func draw_vbo_array[2][6][2], pipe_draw_vertex_state_func draw_state_array[2][6][2][2]) { draw_vbo_array[HAS_MULTIDRAW][DYNAMIC_STATE][BATCH_CHANGED] = zink_draw_vbo; draw_state_array[HAS_MULTIDRAW][DYNAMIC_STATE][0][BATCH_CHANGED] = zink_draw_vertex_state; draw_state_array[HAS_MULTIDRAW][DYNAMIC_STATE][1][BATCH_CHANGED] = zink_draw_vertex_state; } template static void init_dynamic_state_functions(struct zink_context *ctx, pipe_draw_func draw_vbo_array[2][6][2], pipe_draw_vertex_state_func draw_state_array[2][6][2][2]) { init_batch_changed_functions(ctx, draw_vbo_array, draw_state_array); init_batch_changed_functions(ctx, draw_vbo_array, draw_state_array); } template static void init_multidraw_functions(struct zink_context *ctx, pipe_draw_func draw_vbo_array[2][6][2], pipe_draw_vertex_state_func draw_state_array[2][6][2][2]) { init_dynamic_state_functions(ctx, draw_vbo_array, draw_state_array); init_dynamic_state_functions(ctx, draw_vbo_array, draw_state_array); init_dynamic_state_functions(ctx, draw_vbo_array, draw_state_array); init_dynamic_state_functions(ctx, draw_vbo_array, draw_state_array); init_dynamic_state_functions(ctx, draw_vbo_array, draw_state_array); init_dynamic_state_functions(ctx, draw_vbo_array, draw_state_array); } static void init_all_draw_functions(struct zink_context *ctx, pipe_draw_func draw_vbo_array[2][6][2], pipe_draw_vertex_state_func draw_state_array[2][6][2][2]) { init_multidraw_functions(ctx, draw_vbo_array, draw_state_array); init_multidraw_functions(ctx, draw_vbo_array, draw_state_array); } template static void init_grid_batch_changed_functions(struct zink_context *ctx) { ctx->launch_grid[BATCH_CHANGED] = zink_launch_grid; } static void init_all_grid_functions(struct zink_context *ctx) { init_grid_batch_changed_functions(ctx); init_grid_batch_changed_functions(ctx); } static void zink_invalid_draw_vbo(struct pipe_context *pipe, const struct pipe_draw_info *dinfo, unsigned drawid_offset, const struct pipe_draw_indirect_info *dindirect, const struct pipe_draw_start_count_bias *draws, unsigned num_draws) { unreachable("vertex shader not bound"); } static void zink_invalid_draw_vertex_state(struct pipe_context *pipe, struct pipe_vertex_state *vstate, uint32_t partial_velem_mask, struct pipe_draw_vertex_state_info info, const struct pipe_draw_start_count_bias *draws, unsigned num_draws) { unreachable("vertex shader not bound"); } static void zink_invalid_launch_grid(struct pipe_context *pctx, const struct pipe_grid_info *info) { unreachable("compute shader not bound"); } #define STAGE_BASE 0 #define STAGE_BASE_GS (BITFIELD_BIT(MESA_SHADER_GEOMETRY) >> 1) #define STAGE_BASE_TES (BITFIELD_BIT(MESA_SHADER_TESS_EVAL) >> 1) #define STAGE_BASE_TES_GS ((BITFIELD_BIT(MESA_SHADER_TESS_EVAL) | BITFIELD_BIT(MESA_SHADER_GEOMETRY)) >> 1) #define STAGE_BASE_TCS_TES ((BITFIELD_BIT(MESA_SHADER_TESS_CTRL) | BITFIELD_BIT(MESA_SHADER_TESS_EVAL)) >> 1) #define STAGE_BASE_TCS_TES_GS ((BITFIELD_BIT(MESA_SHADER_TESS_CTRL) | BITFIELD_BIT(MESA_SHADER_TESS_EVAL) | BITFIELD_BIT(MESA_SHADER_GEOMETRY)) >> 1) template static uint32_t hash_gfx_program(const void *key) { const struct zink_shader **shaders = (const struct zink_shader**)key; uint32_t base_hash = shaders[MESA_SHADER_VERTEX]->hash ^ shaders[MESA_SHADER_FRAGMENT]->hash; if (STAGE_MASK == STAGE_BASE) //VS+FS return base_hash; if (STAGE_MASK == STAGE_BASE_GS) //VS+GS+FS return base_hash ^ shaders[MESA_SHADER_GEOMETRY]->hash; /*VS+TCS+FS isn't a thing */ /*VS+TCS+GS+FS isn't a thing */ if (STAGE_MASK == STAGE_BASE_TES) //VS+TES+FS return base_hash ^ shaders[MESA_SHADER_TESS_EVAL]->hash; if (STAGE_MASK == STAGE_BASE_TES_GS) //VS+TES+GS+FS return base_hash ^ shaders[MESA_SHADER_GEOMETRY]->hash ^ shaders[MESA_SHADER_TESS_EVAL]->hash; if (STAGE_MASK == STAGE_BASE_TCS_TES) //VS+TCS+TES+FS return base_hash ^ shaders[MESA_SHADER_TESS_CTRL]->hash ^ shaders[MESA_SHADER_TESS_EVAL]->hash; /* all stages */ return base_hash ^ shaders[MESA_SHADER_GEOMETRY]->hash ^ shaders[MESA_SHADER_TESS_CTRL]->hash ^ shaders[MESA_SHADER_TESS_EVAL]->hash; } template static bool equals_gfx_program(const void *a, const void *b) { const void **sa = (const void**)a; const void **sb = (const void**)b; STATIC_ASSERT(MESA_SHADER_VERTEX == 0); STATIC_ASSERT(MESA_SHADER_TESS_CTRL == 1); STATIC_ASSERT(MESA_SHADER_TESS_EVAL == 2); STATIC_ASSERT(MESA_SHADER_GEOMETRY == 3); STATIC_ASSERT(MESA_SHADER_FRAGMENT == 4); if (STAGE_MASK == STAGE_BASE) //VS+FS return sa[MESA_SHADER_VERTEX] == sb[MESA_SHADER_VERTEX] && sa[MESA_SHADER_FRAGMENT] == sb[MESA_SHADER_FRAGMENT]; if (STAGE_MASK == STAGE_BASE_GS) //VS+GS+FS return sa[MESA_SHADER_VERTEX] == sb[MESA_SHADER_VERTEX] && !memcmp(&sa[MESA_SHADER_GEOMETRY], &sb[MESA_SHADER_GEOMETRY], sizeof(void*) * 2); /*VS+TCS+FS isn't a thing */ /*VS+TCS+GS+FS isn't a thing */ if (STAGE_MASK == STAGE_BASE_TES) //VS+TES+FS return sa[MESA_SHADER_VERTEX] == sb[MESA_SHADER_VERTEX] && sa[MESA_SHADER_TESS_EVAL] == sb[MESA_SHADER_TESS_EVAL] && sa[MESA_SHADER_FRAGMENT] == sb[MESA_SHADER_FRAGMENT]; if (STAGE_MASK == STAGE_BASE_TES_GS) //VS+TES+GS+FS return sa[MESA_SHADER_VERTEX] == sb[MESA_SHADER_VERTEX] && !memcmp(&sa[MESA_SHADER_TESS_EVAL], &sb[MESA_SHADER_TESS_EVAL], sizeof(void*) * 3); if (STAGE_MASK == STAGE_BASE_TCS_TES) //VS+TCS+TES+FS return !memcmp(sa, sb, sizeof(void*) * 3) && sa[MESA_SHADER_FRAGMENT] == sb[MESA_SHADER_FRAGMENT]; /* all stages */ return !memcmp(a, b, sizeof(void*) * ZINK_GFX_SHADER_COUNT); } extern "C" void zink_init_draw_functions(struct zink_context *ctx, struct zink_screen *screen) { pipe_draw_func draw_vbo_array[2][6] //multidraw, zink_dynamic_state [2]; //batch changed pipe_draw_vertex_state_func draw_state_array[2][6] //multidraw, zink_dynamic_state [2][2]; //has_popcnt, batch changed zink_dynamic_state dynamic; if (screen->info.have_EXT_extended_dynamic_state) { if (screen->info.have_EXT_extended_dynamic_state2) { if (screen->info.have_EXT_extended_dynamic_state3) { if (screen->info.have_EXT_vertex_input_dynamic_state) dynamic = ZINK_DYNAMIC_VERTEX_INPUT; else dynamic = ZINK_DYNAMIC_STATE3; } else { if (screen->info.have_EXT_vertex_input_dynamic_state) dynamic = ZINK_DYNAMIC_VERTEX_INPUT2; else dynamic = ZINK_DYNAMIC_STATE2; } } else { dynamic = ZINK_DYNAMIC_STATE; } } else { dynamic = ZINK_NO_DYNAMIC_STATE; } init_all_draw_functions(ctx, draw_vbo_array, draw_state_array); memcpy(ctx->draw_vbo, &draw_vbo_array[screen->info.have_EXT_multi_draw] [dynamic], sizeof(ctx->draw_vbo)); memcpy(ctx->draw_state, &draw_state_array[screen->info.have_EXT_multi_draw] [dynamic][util_get_cpu_caps()->has_popcnt], sizeof(ctx->draw_state)); /* Bind a fake draw_vbo, so that draw_vbo isn't NULL, which would skip * initialization of callbacks in upper layers (such as u_threaded_context). */ ctx->base.draw_vbo = zink_invalid_draw_vbo; ctx->base.draw_vertex_state = zink_invalid_draw_vertex_state; _mesa_hash_table_init(&ctx->program_cache[0], ctx, hash_gfx_program<0>, equals_gfx_program<0>); _mesa_hash_table_init(&ctx->program_cache[1], ctx, hash_gfx_program<1>, equals_gfx_program<1>); _mesa_hash_table_init(&ctx->program_cache[2], ctx, hash_gfx_program<2>, equals_gfx_program<2>); _mesa_hash_table_init(&ctx->program_cache[3], ctx, hash_gfx_program<3>, equals_gfx_program<3>); _mesa_hash_table_init(&ctx->program_cache[4], ctx, hash_gfx_program<4>, equals_gfx_program<4>); _mesa_hash_table_init(&ctx->program_cache[5], ctx, hash_gfx_program<5>, equals_gfx_program<5>); _mesa_hash_table_init(&ctx->program_cache[6], ctx, hash_gfx_program<6>, equals_gfx_program<6>); _mesa_hash_table_init(&ctx->program_cache[7], ctx, hash_gfx_program<7>, equals_gfx_program<7>); for (unsigned i = 0; i < ARRAY_SIZE(ctx->program_lock); i++) simple_mtx_init(&ctx->program_lock[i], mtx_plain); } void zink_init_grid_functions(struct zink_context *ctx) { init_all_grid_functions(ctx); /* Bind a fake launch_grid, so that draw_vbo isn't NULL, which would skip * initialization of callbacks in upper layers (such as u_threaded_context). */ ctx->base.launch_grid = zink_invalid_launch_grid; } void zink_init_screen_pipeline_libs(struct zink_screen *screen) { _mesa_set_init(&screen->pipeline_libs[0], screen, hash_gfx_program<0>, equals_gfx_program<0>); _mesa_set_init(&screen->pipeline_libs[1], screen, hash_gfx_program<1>, equals_gfx_program<1>); _mesa_set_init(&screen->pipeline_libs[2], screen, hash_gfx_program<2>, equals_gfx_program<2>); _mesa_set_init(&screen->pipeline_libs[3], screen, hash_gfx_program<3>, equals_gfx_program<3>); _mesa_set_init(&screen->pipeline_libs[4], screen, hash_gfx_program<4>, equals_gfx_program<4>); _mesa_set_init(&screen->pipeline_libs[5], screen, hash_gfx_program<5>, equals_gfx_program<5>); _mesa_set_init(&screen->pipeline_libs[6], screen, hash_gfx_program<6>, equals_gfx_program<6>); _mesa_set_init(&screen->pipeline_libs[7], screen, hash_gfx_program<7>, equals_gfx_program<7>); for (unsigned i = 0; i < ARRAY_SIZE(screen->pipeline_libs_lock); i++) simple_mtx_init(&screen->pipeline_libs_lock[i], mtx_plain); }