/* * Copyright 2018 Collabora Ltd. * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the "Software"), * to deal in the Software without restriction, including without limitation * on the rights to use, copy, modify, merge, publish, distribute, sub * license, and/or sell copies of the Software, and to permit persons to whom * the Software is furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice (including the next * paragraph) shall be included in all copies or substantial portions of the * Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE * USE OR OTHER DEALINGS IN THE SOFTWARE. */ #include "zink_program.h" #include "zink_compiler.h" #include "zink_context.h" #include "zink_descriptors.h" #include "zink_helpers.h" #include "zink_pipeline.h" #include "zink_render_pass.h" #include "zink_resource.h" #include "zink_screen.h" #include "zink_state.h" #include "zink_inlines.h" #include "util/memstream.h" #include "util/u_debug.h" #include "util/u_memory.h" #include "util/u_prim.h" #include "nir_serialize.h" #include "nir/nir_draw_helpers.h" /* for pipeline cache */ #define XXH_INLINE_ALL #include "util/xxhash.h" static void gfx_program_precompile_job(void *data, void *gdata, int thread_index); struct zink_gfx_program * create_gfx_program_separable(struct zink_context *ctx, struct zink_shader **stages, unsigned vertices_per_patch); void debug_describe_zink_gfx_program(char *buf, const struct zink_gfx_program *ptr) { sprintf(buf, "zink_gfx_program"); } void debug_describe_zink_compute_program(char *buf, const struct zink_compute_program *ptr) { sprintf(buf, "zink_compute_program"); } ALWAYS_INLINE static bool shader_key_matches_tcs_nongenerated(const struct zink_shader_module *zm, const struct zink_shader_key *key, unsigned num_uniforms) { if (zm->num_uniforms != num_uniforms || zm->has_nonseamless != !!key->base.nonseamless_cube_mask || zm->needs_zs_shader_swizzle != key->base.needs_zs_shader_swizzle) return false; const uint32_t nonseamless_size = zm->has_nonseamless ? sizeof(uint32_t) : 0; return (!nonseamless_size || !memcmp(zm->key + zm->key_size, &key->base.nonseamless_cube_mask, nonseamless_size)) && (!num_uniforms || !memcmp(zm->key + zm->key_size + nonseamless_size, key->base.inlined_uniform_values, zm->num_uniforms * sizeof(uint32_t))); } ALWAYS_INLINE static bool shader_key_matches(const struct zink_shader_module *zm, const struct zink_shader_key *key, unsigned num_uniforms, bool has_inline, bool has_nonseamless) { const uint32_t nonseamless_size = !has_nonseamless && zm->has_nonseamless ? sizeof(uint32_t) : 0; if (has_inline) { if (zm->num_uniforms != num_uniforms || (num_uniforms && memcmp(zm->key + zm->key_size + nonseamless_size, key->base.inlined_uniform_values, zm->num_uniforms * sizeof(uint32_t)))) return false; } if (!has_nonseamless) { if (zm->has_nonseamless != !!key->base.nonseamless_cube_mask || (nonseamless_size && memcmp(zm->key + zm->key_size, &key->base.nonseamless_cube_mask, nonseamless_size))) return false; } if (zm->needs_zs_shader_swizzle != key->base.needs_zs_shader_swizzle) return false; return !memcmp(zm->key, key, zm->key_size); } static uint32_t shader_module_hash(const struct zink_shader_module *zm) { const uint32_t nonseamless_size = zm->has_nonseamless ? sizeof(uint32_t) : 0; unsigned key_size = zm->key_size + nonseamless_size + zm->num_uniforms * sizeof(uint32_t); return _mesa_hash_data(zm->key, key_size); } ALWAYS_INLINE static void gather_shader_module_info(struct zink_context *ctx, struct zink_screen *screen, struct zink_shader *zs, struct zink_gfx_program *prog, struct zink_gfx_pipeline_state *state, bool has_inline, //is inlining enabled? bool has_nonseamless, //is nonseamless ext present? unsigned *inline_size, unsigned *nonseamless_size) { gl_shader_stage stage = zs->info.stage; struct zink_shader_key *key = &state->shader_keys.key[stage]; if (has_inline && ctx && zs->info.num_inlinable_uniforms && ctx->inlinable_uniforms_valid_mask & BITFIELD64_BIT(stage)) { if (zs->can_inline && (screen->is_cpu || prog->inlined_variant_count[stage] < ZINK_MAX_INLINED_VARIANTS)) *inline_size = zs->info.num_inlinable_uniforms; else key->inline_uniforms = false; } if (!has_nonseamless && key->base.nonseamless_cube_mask) *nonseamless_size = sizeof(uint32_t); } ALWAYS_INLINE static struct zink_shader_module * create_shader_module_for_stage(struct zink_context *ctx, struct zink_screen *screen, struct zink_shader *zs, struct zink_gfx_program *prog, gl_shader_stage stage, struct zink_gfx_pipeline_state *state, unsigned inline_size, unsigned nonseamless_size, bool has_inline, //is inlining enabled? bool has_nonseamless) //is nonseamless ext present? { struct zink_shader_module *zm; const struct zink_shader_key *key = &state->shader_keys.key[stage]; /* non-generated tcs won't use the shader key */ const bool is_nongenerated_tcs = stage == MESA_SHADER_TESS_CTRL && !zs->non_fs.is_generated; const bool shadow_needs_shader_swizzle = key->base.needs_zs_shader_swizzle || (stage == MESA_SHADER_FRAGMENT && key->key.fs.base.shadow_needs_shader_swizzle); zm = malloc(sizeof(struct zink_shader_module) + key->size + (!has_nonseamless ? nonseamless_size : 0) + inline_size * sizeof(uint32_t) + (shadow_needs_shader_swizzle ? sizeof(struct zink_zs_swizzle_key) : 0)); if (!zm) { return NULL; } unsigned patch_vertices = state->shader_keys.key[MESA_SHADER_TESS_CTRL].key.tcs.patch_vertices; if (stage == MESA_SHADER_TESS_CTRL && zs->non_fs.is_generated && zs->spirv) { assert(ctx); //TODO async zm->obj = zink_shader_tcs_compile(screen, zs, patch_vertices, prog->base.uses_shobj, &prog->base); } else { zm->obj = zink_shader_compile(screen, prog->base.uses_shobj, zs, zink_shader_blob_deserialize(screen, &prog->blobs[stage]), key, &ctx->di.zs_swizzle[stage], &prog->base); } if (!zm->obj.mod) { FREE(zm); return NULL; } zm->shobj = prog->base.uses_shobj; zm->num_uniforms = inline_size; if (!is_nongenerated_tcs) { zm->key_size = key->size; memcpy(zm->key, key, key->size); } else { zm->key_size = 0; memset(zm->key, 0, key->size); } if (!has_nonseamless && nonseamless_size) { /* nonseamless mask gets added to base key if it exists */ memcpy(zm->key + key->size, &key->base.nonseamless_cube_mask, nonseamless_size); } zm->needs_zs_shader_swizzle = shadow_needs_shader_swizzle; zm->has_nonseamless = has_nonseamless ? 0 : !!nonseamless_size; if (inline_size) memcpy(zm->key + key->size + nonseamless_size, key->base.inlined_uniform_values, inline_size * sizeof(uint32_t)); if (stage == MESA_SHADER_TESS_CTRL && zs->non_fs.is_generated) zm->hash = patch_vertices; else zm->hash = shader_module_hash(zm); if (unlikely(shadow_needs_shader_swizzle)) { memcpy(zm->key + key->size + nonseamless_size + inline_size * sizeof(uint32_t), &ctx->di.zs_swizzle[stage], sizeof(struct zink_zs_swizzle_key)); zm->hash ^= _mesa_hash_data(&ctx->di.zs_swizzle[stage], sizeof(struct zink_zs_swizzle_key)); } zm->default_variant = !shadow_needs_shader_swizzle && !inline_size && !util_dynarray_contains(&prog->shader_cache[stage][0][0], void*); if (inline_size) prog->inlined_variant_count[stage]++; util_dynarray_append(&prog->shader_cache[stage][has_nonseamless ? 0 : !!nonseamless_size][!!inline_size], void*, zm); return zm; } ALWAYS_INLINE static struct zink_shader_module * get_shader_module_for_stage(struct zink_context *ctx, struct zink_screen *screen, struct zink_shader *zs, struct zink_gfx_program *prog, gl_shader_stage stage, struct zink_gfx_pipeline_state *state, unsigned inline_size, unsigned nonseamless_size, bool has_inline, //is inlining enabled? bool has_nonseamless) //is nonseamless ext present? { const struct zink_shader_key *key = &state->shader_keys.key[stage]; /* non-generated tcs won't use the shader key */ const bool is_nongenerated_tcs = stage == MESA_SHADER_TESS_CTRL && !zs->non_fs.is_generated; const bool shadow_needs_shader_swizzle = unlikely(key->base.needs_zs_shader_swizzle) || (stage == MESA_SHADER_FRAGMENT && unlikely(key->key.fs.base.shadow_needs_shader_swizzle)); struct util_dynarray *shader_cache = &prog->shader_cache[stage][!has_nonseamless ? !!nonseamless_size : 0][has_inline ? !!inline_size : 0]; unsigned count = util_dynarray_num_elements(shader_cache, struct zink_shader_module *); struct zink_shader_module **pzm = shader_cache->data; for (unsigned i = 0; i < count; i++) { struct zink_shader_module *iter = pzm[i]; if (is_nongenerated_tcs) { if (!shader_key_matches_tcs_nongenerated(iter, key, has_inline ? !!inline_size : 0)) continue; } else { if (stage == MESA_SHADER_VERTEX && iter->key_size != key->size) continue; if (!shader_key_matches(iter, key, inline_size, has_inline, has_nonseamless)) continue; if (unlikely(shadow_needs_shader_swizzle)) { /* shadow swizzle data needs a manual compare since it's so fat */ if (memcmp(iter->key + iter->key_size + nonseamless_size + iter->num_uniforms * sizeof(uint32_t), &ctx->di.zs_swizzle[stage], sizeof(struct zink_zs_swizzle_key))) continue; } } if (i > 0) { struct zink_shader_module *zero = pzm[0]; pzm[0] = iter; pzm[i] = zero; } return iter; } return NULL; } ALWAYS_INLINE static struct zink_shader_module * create_shader_module_for_stage_optimal(struct zink_context *ctx, struct zink_screen *screen, struct zink_shader *zs, struct zink_gfx_program *prog, gl_shader_stage stage, struct zink_gfx_pipeline_state *state) { struct zink_shader_module *zm; uint16_t *key; unsigned mask = stage == MESA_SHADER_FRAGMENT ? BITFIELD_MASK(16) : BITFIELD_MASK(8); bool shadow_needs_shader_swizzle = false; if (zs == prog->last_vertex_stage) { key = (uint16_t*)&state->shader_keys_optimal.key.vs_base; } else if (stage == MESA_SHADER_FRAGMENT) { key = (uint16_t*)&state->shader_keys_optimal.key.fs; shadow_needs_shader_swizzle = ctx ? ctx->gfx_pipeline_state.shader_keys_optimal.key.fs.shadow_needs_shader_swizzle : false; } else if (stage == MESA_SHADER_TESS_CTRL && zs->non_fs.is_generated) { key = (uint16_t*)&state->shader_keys_optimal.key.tcs; } else { key = NULL; } size_t key_size = sizeof(uint16_t); zm = calloc(1, sizeof(struct zink_shader_module) + (key ? key_size : 0) + (unlikely(shadow_needs_shader_swizzle) ? sizeof(struct zink_zs_swizzle_key) : 0)); if (!zm) { return NULL; } if (stage == MESA_SHADER_TESS_CTRL && zs->non_fs.is_generated && zs->spirv) { assert(ctx || screen->info.dynamic_state2_feats.extendedDynamicState2PatchControlPoints); unsigned patch_vertices = 3; if (ctx) { struct zink_tcs_key *tcs = (struct zink_tcs_key*)key; patch_vertices = tcs->patch_vertices; } zm->obj = zink_shader_tcs_compile(screen, zs, patch_vertices, prog->base.uses_shobj, &prog->base); } else { zm->obj = zink_shader_compile(screen, prog->base.uses_shobj, zs, zink_shader_blob_deserialize(screen, &prog->blobs[stage]), (struct zink_shader_key*)key, shadow_needs_shader_swizzle ? &ctx->di.zs_swizzle[stage] : NULL, &prog->base); } if (!zm->obj.mod) { FREE(zm); return NULL; } zm->shobj = prog->base.uses_shobj; /* non-generated tcs won't use the shader key */ const bool is_nongenerated_tcs = stage == MESA_SHADER_TESS_CTRL && !zs->non_fs.is_generated; if (key && !is_nongenerated_tcs) { zm->key_size = key_size; uint16_t *data = (uint16_t*)zm->key; /* sanitize actual key bits */ *data = (*key) & mask; if (unlikely(shadow_needs_shader_swizzle)) memcpy(&data[1], &ctx->di.zs_swizzle[stage], sizeof(struct zink_zs_swizzle_key)); } zm->default_variant = !util_dynarray_contains(&prog->shader_cache[stage][0][0], void*); util_dynarray_append(&prog->shader_cache[stage][0][0], void*, zm); return zm; } ALWAYS_INLINE static struct zink_shader_module * get_shader_module_for_stage_optimal(struct zink_context *ctx, struct zink_screen *screen, struct zink_shader *zs, struct zink_gfx_program *prog, gl_shader_stage stage, struct zink_gfx_pipeline_state *state) { /* non-generated tcs won't use the shader key */ const bool is_nongenerated_tcs = stage == MESA_SHADER_TESS_CTRL && !zs->non_fs.is_generated; bool shadow_needs_shader_swizzle = false; uint16_t *key; unsigned mask = stage == MESA_SHADER_FRAGMENT ? BITFIELD_MASK(16) : BITFIELD_MASK(8); if (zs == prog->last_vertex_stage) { key = (uint16_t*)&ctx->gfx_pipeline_state.shader_keys_optimal.key.vs_base; } else if (stage == MESA_SHADER_FRAGMENT) { key = (uint16_t*)&ctx->gfx_pipeline_state.shader_keys_optimal.key.fs; shadow_needs_shader_swizzle = ctx->gfx_pipeline_state.shader_keys_optimal.key.fs.shadow_needs_shader_swizzle; } else if (stage == MESA_SHADER_TESS_CTRL && zs->non_fs.is_generated) { key = (uint16_t*)&ctx->gfx_pipeline_state.shader_keys_optimal.key.tcs; } else { key = NULL; } struct util_dynarray *shader_cache = &prog->shader_cache[stage][0][0]; unsigned count = util_dynarray_num_elements(shader_cache, struct zink_shader_module *); struct zink_shader_module **pzm = shader_cache->data; for (unsigned i = 0; i < count; i++) { struct zink_shader_module *iter = pzm[i]; if (is_nongenerated_tcs) { /* always match */ } else if (key) { uint16_t val = (*key) & mask; /* no key is bigger than uint16_t */ if (memcmp(iter->key, &val, sizeof(uint16_t))) continue; if (unlikely(shadow_needs_shader_swizzle)) { /* shadow swizzle data needs a manual compare since it's so fat */ if (memcmp(iter->key + sizeof(uint16_t), &ctx->di.zs_swizzle[stage], sizeof(struct zink_zs_swizzle_key))) continue; } } if (i > 0) { struct zink_shader_module *zero = pzm[0]; pzm[0] = iter; pzm[i] = zero; } return iter; } return NULL; } static void zink_destroy_shader_module(struct zink_screen *screen, struct zink_shader_module *zm) { if (zm->shobj) VKSCR(DestroyShaderEXT)(screen->dev, zm->obj.obj, NULL); else VKSCR(DestroyShaderModule)(screen->dev, zm->obj.mod, NULL); ralloc_free(zm->obj.spirv); free(zm); } static void destroy_shader_cache(struct zink_screen *screen, struct util_dynarray *sc) { while (util_dynarray_contains(sc, void*)) { struct zink_shader_module *zm = util_dynarray_pop(sc, struct zink_shader_module*); zink_destroy_shader_module(screen, zm); } } ALWAYS_INLINE static void update_gfx_shader_modules(struct zink_context *ctx, struct zink_screen *screen, struct zink_gfx_program *prog, uint32_t mask, struct zink_gfx_pipeline_state *state, bool has_inline, //is inlining enabled? bool has_nonseamless) //is nonseamless ext present? { bool hash_changed = false; bool default_variants = true; assert(prog->objs[MESA_SHADER_VERTEX].mod); uint32_t variant_hash = prog->last_variant_hash; prog->has_edgeflags = prog->shaders[MESA_SHADER_VERTEX]->has_edgeflags; for (unsigned i = 0; i < MESA_SHADER_COMPUTE; i++) { if (!(mask & BITFIELD_BIT(i))) continue; assert(prog->shaders[i]); unsigned inline_size = 0, nonseamless_size = 0; gather_shader_module_info(ctx, screen, prog->shaders[i], prog, state, has_inline, has_nonseamless, &inline_size, &nonseamless_size); struct zink_shader_module *zm = get_shader_module_for_stage(ctx, screen, prog->shaders[i], prog, i, state, inline_size, nonseamless_size, has_inline, has_nonseamless); if (!zm) zm = create_shader_module_for_stage(ctx, screen, prog->shaders[i], prog, i, state, inline_size, nonseamless_size, has_inline, has_nonseamless); state->modules[i] = zm->obj.mod; if (prog->objs[i].mod == zm->obj.mod) continue; prog->optimal_keys &= !prog->shaders[i]->non_fs.is_generated; variant_hash ^= prog->module_hash[i]; hash_changed = true; default_variants &= zm->default_variant; prog->objs[i] = zm->obj; prog->objects[i] = zm->obj.obj; prog->module_hash[i] = zm->hash; if (has_inline) { if (zm->num_uniforms) prog->inline_variants |= BITFIELD_BIT(i); else prog->inline_variants &= ~BITFIELD_BIT(i); } variant_hash ^= prog->module_hash[i]; } if (hash_changed && state) { if (default_variants) prog->last_variant_hash = prog->default_variant_hash; else prog->last_variant_hash = variant_hash; state->modules_changed = true; } } static void generate_gfx_program_modules(struct zink_context *ctx, struct zink_screen *screen, struct zink_gfx_program *prog, struct zink_gfx_pipeline_state *state) { assert(!prog->objs[MESA_SHADER_VERTEX].mod); uint32_t variant_hash = 0; bool default_variants = true; for (unsigned i = 0; i < MESA_SHADER_COMPUTE; i++) { if (!(prog->stages_present & BITFIELD_BIT(i))) continue; assert(prog->shaders[i]); unsigned inline_size = 0, nonseamless_size = 0; gather_shader_module_info(ctx, screen, prog->shaders[i], prog, state, screen->driconf.inline_uniforms, screen->info.have_EXT_non_seamless_cube_map, &inline_size, &nonseamless_size); struct zink_shader_module *zm = create_shader_module_for_stage(ctx, screen, prog->shaders[i], prog, i, state, inline_size, nonseamless_size, screen->driconf.inline_uniforms, screen->info.have_EXT_non_seamless_cube_map); state->modules[i] = zm->obj.mod; prog->objs[i] = zm->obj; prog->objects[i] = zm->obj.obj; prog->module_hash[i] = zm->hash; if (zm->num_uniforms) prog->inline_variants |= BITFIELD_BIT(i); default_variants &= zm->default_variant; variant_hash ^= prog->module_hash[i]; } state->modules_changed = true; prog->last_variant_hash = variant_hash; if (default_variants) prog->default_variant_hash = prog->last_variant_hash; } static void generate_gfx_program_modules_optimal(struct zink_context *ctx, struct zink_screen *screen, struct zink_gfx_program *prog, struct zink_gfx_pipeline_state *state) { assert(!prog->objs[MESA_SHADER_VERTEX].mod); for (unsigned i = 0; i < MESA_SHADER_COMPUTE; i++) { if (!(prog->stages_present & BITFIELD_BIT(i))) continue; assert(prog->shaders[i]); struct zink_shader_module *zm = create_shader_module_for_stage_optimal(ctx, screen, prog->shaders[i], prog, i, state); prog->objs[i] = zm->obj; prog->objects[i] = zm->obj.obj; } state->modules_changed = true; prog->last_variant_hash = state->optimal_key; } static uint32_t hash_pipeline_lib_generated_tcs(const void *key) { const struct zink_gfx_library_key *gkey = key; return gkey->optimal_key; } static bool equals_pipeline_lib_generated_tcs(const void *a, const void *b) { return !memcmp(a, b, sizeof(uint32_t)); } static uint32_t hash_pipeline_lib(const void *key) { const struct zink_gfx_library_key *gkey = key; /* remove generated tcs bits */ return zink_shader_key_optimal_no_tcs(gkey->optimal_key); } static bool equals_pipeline_lib(const void *a, const void *b) { const struct zink_gfx_library_key *ak = a; const struct zink_gfx_library_key *bk = b; /* remove generated tcs bits */ uint32_t val_a = zink_shader_key_optimal_no_tcs(ak->optimal_key); uint32_t val_b = zink_shader_key_optimal_no_tcs(bk->optimal_key); return val_a == val_b; } uint32_t hash_gfx_input_dynamic(const void *key) { const struct zink_gfx_input_key *ikey = key; return ikey->idx; } static bool equals_gfx_input_dynamic(const void *a, const void *b) { const struct zink_gfx_input_key *ikey_a = a; const struct zink_gfx_input_key *ikey_b = b; return ikey_a->idx == ikey_b->idx; } uint32_t hash_gfx_input(const void *key) { const struct zink_gfx_input_key *ikey = key; if (ikey->uses_dynamic_stride) return ikey->input; return _mesa_hash_data(key, offsetof(struct zink_gfx_input_key, pipeline)); } static bool equals_gfx_input(const void *a, const void *b) { const struct zink_gfx_input_key *ikey_a = a; const struct zink_gfx_input_key *ikey_b = b; if (ikey_a->uses_dynamic_stride) return ikey_a->element_state == ikey_b->element_state && !memcmp(a, b, offsetof(struct zink_gfx_input_key, vertex_buffers_enabled_mask)); return !memcmp(a, b, offsetof(struct zink_gfx_input_key, pipeline)); } uint32_t hash_gfx_output_ds3(const void *key) { const uint8_t *data = key; return _mesa_hash_data(data, sizeof(uint32_t)); } static bool equals_gfx_output_ds3(const void *a, const void *b) { const uint8_t *da = a; const uint8_t *db = b; return !memcmp(da, db, sizeof(uint32_t)); } uint32_t hash_gfx_output(const void *key) { const uint8_t *data = key; return _mesa_hash_data(data, offsetof(struct zink_gfx_output_key, pipeline)); } static bool equals_gfx_output(const void *a, const void *b) { const uint8_t *da = a; const uint8_t *db = b; return !memcmp(da, db, offsetof(struct zink_gfx_output_key, pipeline)); } ALWAYS_INLINE static void update_gfx_program_nonseamless(struct zink_context *ctx, struct zink_gfx_program *prog, bool has_nonseamless) { struct zink_screen *screen = zink_screen(ctx->base.screen); if (screen->driconf.inline_uniforms || prog->needs_inlining) update_gfx_shader_modules(ctx, screen, prog, ctx->dirty_gfx_stages & prog->stages_present, &ctx->gfx_pipeline_state, true, has_nonseamless); else update_gfx_shader_modules(ctx, screen, prog, ctx->dirty_gfx_stages & prog->stages_present, &ctx->gfx_pipeline_state, false, has_nonseamless); } static void update_gfx_program(struct zink_context *ctx, struct zink_gfx_program *prog) { struct zink_screen *screen = zink_screen(ctx->base.screen); if (screen->info.have_EXT_non_seamless_cube_map) update_gfx_program_nonseamless(ctx, prog, true); else update_gfx_program_nonseamless(ctx, prog, false); } void zink_gfx_program_update(struct zink_context *ctx) { if (ctx->last_vertex_stage_dirty) { gl_shader_stage pstage = ctx->last_vertex_stage->info.stage; ctx->dirty_gfx_stages |= BITFIELD_BIT(pstage); memcpy(&ctx->gfx_pipeline_state.shader_keys.key[pstage].key.vs_base, &ctx->gfx_pipeline_state.shader_keys.last_vertex.key.vs_base, sizeof(struct zink_vs_key_base)); ctx->last_vertex_stage_dirty = false; } if (ctx->gfx_dirty) { struct zink_gfx_program *prog = NULL; simple_mtx_lock(&ctx->program_lock[zink_program_cache_stages(ctx->shader_stages)]); struct hash_table *ht = &ctx->program_cache[zink_program_cache_stages(ctx->shader_stages)]; const uint32_t hash = ctx->gfx_hash; struct hash_entry *entry = _mesa_hash_table_search_pre_hashed(ht, hash, ctx->gfx_stages); /* this must be done before prog is updated */ if (ctx->curr_program) ctx->gfx_pipeline_state.final_hash ^= ctx->curr_program->last_variant_hash; if (entry) { prog = (struct zink_gfx_program*)entry->data; for (unsigned i = 0; i < ZINK_GFX_SHADER_COUNT; i++) { if (prog->stages_present & ~ctx->dirty_gfx_stages & BITFIELD_BIT(i)) ctx->gfx_pipeline_state.modules[i] = prog->objs[i].mod; } /* ensure variants are always updated if keys have changed since last use */ ctx->dirty_gfx_stages |= prog->stages_present; update_gfx_program(ctx, prog); } else { ctx->dirty_gfx_stages |= ctx->shader_stages; prog = zink_create_gfx_program(ctx, ctx->gfx_stages, ctx->gfx_pipeline_state.dyn_state2.vertices_per_patch, hash); zink_screen_get_pipeline_cache(zink_screen(ctx->base.screen), &prog->base, false); _mesa_hash_table_insert_pre_hashed(ht, hash, prog->shaders, prog); prog->base.removed = false; generate_gfx_program_modules(ctx, zink_screen(ctx->base.screen), prog, &ctx->gfx_pipeline_state); } simple_mtx_unlock(&ctx->program_lock[zink_program_cache_stages(ctx->shader_stages)]); if (prog && prog != ctx->curr_program) zink_batch_reference_program(ctx, &prog->base); ctx->curr_program = prog; ctx->gfx_pipeline_state.final_hash ^= ctx->curr_program->last_variant_hash; ctx->gfx_dirty = false; } else if (ctx->dirty_gfx_stages) { /* remove old hash */ ctx->gfx_pipeline_state.final_hash ^= ctx->curr_program->last_variant_hash; update_gfx_program(ctx, ctx->curr_program); /* apply new hash */ ctx->gfx_pipeline_state.final_hash ^= ctx->curr_program->last_variant_hash; } ctx->dirty_gfx_stages = 0; } ALWAYS_INLINE static bool update_gfx_shader_module_optimal(struct zink_context *ctx, struct zink_gfx_program *prog, gl_shader_stage pstage) { struct zink_screen *screen = zink_screen(ctx->base.screen); if (screen->info.have_EXT_graphics_pipeline_library) util_queue_fence_wait(&prog->base.cache_fence); struct zink_shader_module *zm = get_shader_module_for_stage_optimal(ctx, screen, prog->shaders[pstage], prog, pstage, &ctx->gfx_pipeline_state); if (!zm) { zm = create_shader_module_for_stage_optimal(ctx, screen, prog->shaders[pstage], prog, pstage, &ctx->gfx_pipeline_state); perf_debug(ctx, "zink[gfx_compile]: %s shader variant required\n", _mesa_shader_stage_to_string(pstage)); } bool changed = prog->objs[pstage].mod != zm->obj.mod; prog->objs[pstage] = zm->obj; prog->objects[pstage] = zm->obj.obj; return changed; } static void update_gfx_program_optimal(struct zink_context *ctx, struct zink_gfx_program *prog) { const union zink_shader_key_optimal *key = (union zink_shader_key_optimal*)&ctx->gfx_pipeline_state.optimal_key; const union zink_shader_key_optimal *last_prog_key = (union zink_shader_key_optimal*)&prog->last_variant_hash; if (key->vs_bits != last_prog_key->vs_bits) { assert(!prog->is_separable); bool changed = update_gfx_shader_module_optimal(ctx, prog, ctx->last_vertex_stage->info.stage); ctx->gfx_pipeline_state.modules_changed |= changed; } const bool shadow_needs_shader_swizzle = last_prog_key->fs.shadow_needs_shader_swizzle && (ctx->dirty_gfx_stages & BITFIELD_BIT(MESA_SHADER_FRAGMENT)); if (key->fs_bits != last_prog_key->fs_bits || /* always recheck shadow swizzles since they aren't directly part of the key */ unlikely(shadow_needs_shader_swizzle)) { assert(!prog->is_separable); bool changed = update_gfx_shader_module_optimal(ctx, prog, MESA_SHADER_FRAGMENT); ctx->gfx_pipeline_state.modules_changed |= changed; if (unlikely(shadow_needs_shader_swizzle)) { struct zink_shader_module **pzm = prog->shader_cache[MESA_SHADER_FRAGMENT][0][0].data; ctx->gfx_pipeline_state.shadow = (struct zink_zs_swizzle_key*)pzm[0]->key + sizeof(uint16_t); } } if (prog->shaders[MESA_SHADER_TESS_CTRL] && prog->shaders[MESA_SHADER_TESS_CTRL]->non_fs.is_generated && key->tcs_bits != last_prog_key->tcs_bits) { assert(!prog->is_separable); bool changed = update_gfx_shader_module_optimal(ctx, prog, MESA_SHADER_TESS_CTRL); ctx->gfx_pipeline_state.modules_changed |= changed; } prog->last_variant_hash = ctx->gfx_pipeline_state.optimal_key; } static struct zink_gfx_program * replace_separable_prog(struct zink_context *ctx, struct hash_entry *entry, struct zink_gfx_program *prog) { struct zink_screen *screen = zink_screen(ctx->base.screen); struct zink_gfx_program *real = prog->full_prog ? prog->full_prog : /* this will be NULL with ZINK_DEBUG_NOOPT */ zink_create_gfx_program(ctx, ctx->gfx_stages, ctx->gfx_pipeline_state.dyn_state2.vertices_per_patch, ctx->gfx_hash); entry->data = real; entry->key = real->shaders; real->base.removed = false; zink_gfx_program_reference(screen, &prog->full_prog, NULL); prog->base.removed = true; return real; } void zink_gfx_program_update_optimal(struct zink_context *ctx) { struct zink_screen *screen = zink_screen(ctx->base.screen); if (ctx->gfx_dirty) { struct zink_gfx_program *prog = NULL; ctx->gfx_pipeline_state.optimal_key = zink_sanitize_optimal_key(ctx->gfx_stages, ctx->gfx_pipeline_state.shader_keys_optimal.key.val); struct hash_table *ht = &ctx->program_cache[zink_program_cache_stages(ctx->shader_stages)]; const uint32_t hash = ctx->gfx_hash; simple_mtx_lock(&ctx->program_lock[zink_program_cache_stages(ctx->shader_stages)]); struct hash_entry *entry = _mesa_hash_table_search_pre_hashed(ht, hash, ctx->gfx_stages); if (ctx->curr_program) ctx->gfx_pipeline_state.final_hash ^= ctx->curr_program->last_variant_hash; if (entry) { prog = (struct zink_gfx_program*)entry->data; bool must_replace = prog->base.uses_shobj ? !zink_can_use_shader_objects(ctx) : (prog->is_separable && !zink_can_use_pipeline_libs(ctx)); if (prog->is_separable) { /* shader variants can't be handled by separable programs: sync and compile */ if (!ZINK_SHADER_KEY_OPTIMAL_IS_DEFAULT(ctx->gfx_pipeline_state.optimal_key) || must_replace) util_queue_fence_wait(&prog->base.cache_fence); /* If the optimized linked pipeline is done compiling, swap it into place. */ if (util_queue_fence_is_signalled(&prog->base.cache_fence) && /* but only if needed for ZINK_DEBUG=noopt */ (!(zink_debug & ZINK_DEBUG_NOOPT) || !ZINK_SHADER_KEY_OPTIMAL_IS_DEFAULT(ctx->gfx_pipeline_state.optimal_key) || must_replace)) { prog = replace_separable_prog(ctx, entry, prog); } } update_gfx_program_optimal(ctx, prog); } else { ctx->dirty_gfx_stages |= ctx->shader_stages; prog = create_gfx_program_separable(ctx, ctx->gfx_stages, ctx->gfx_pipeline_state.dyn_state2.vertices_per_patch); prog->base.removed = false; _mesa_hash_table_insert_pre_hashed(ht, hash, prog->shaders, prog); if (!prog->is_separable) { zink_screen_get_pipeline_cache(screen, &prog->base, false); perf_debug(ctx, "zink[gfx_compile]: new program created (probably legacy GL features in use)\n"); generate_gfx_program_modules_optimal(ctx, screen, prog, &ctx->gfx_pipeline_state); } } simple_mtx_unlock(&ctx->program_lock[zink_program_cache_stages(ctx->shader_stages)]); if (prog && prog != ctx->curr_program) zink_batch_reference_program(ctx, &prog->base); ctx->curr_program = prog; ctx->gfx_pipeline_state.final_hash ^= ctx->curr_program->last_variant_hash; } else if (ctx->dirty_gfx_stages) { /* remove old hash */ ctx->gfx_pipeline_state.optimal_key = zink_sanitize_optimal_key(ctx->gfx_stages, ctx->gfx_pipeline_state.shader_keys_optimal.key.val); ctx->gfx_pipeline_state.final_hash ^= ctx->curr_program->last_variant_hash; bool must_replace = ctx->curr_program->base.uses_shobj ? !zink_can_use_shader_objects(ctx) : (ctx->curr_program->is_separable && !zink_can_use_pipeline_libs(ctx)); if (must_replace || (ctx->curr_program->is_separable && !ZINK_SHADER_KEY_OPTIMAL_IS_DEFAULT(ctx->gfx_pipeline_state.optimal_key))) { struct zink_gfx_program *prog = ctx->curr_program; util_queue_fence_wait(&prog->base.cache_fence); /* shader variants can't be handled by separable programs: sync and compile */ perf_debug(ctx, "zink[gfx_compile]: non-default shader variant required with separate shader object program\n"); struct hash_table *ht = &ctx->program_cache[zink_program_cache_stages(ctx->shader_stages)]; const uint32_t hash = ctx->gfx_hash; simple_mtx_lock(&ctx->program_lock[zink_program_cache_stages(ctx->shader_stages)]); struct hash_entry *entry = _mesa_hash_table_search_pre_hashed(ht, hash, ctx->gfx_stages); ctx->curr_program = replace_separable_prog(ctx, entry, prog); simple_mtx_unlock(&ctx->program_lock[zink_program_cache_stages(ctx->shader_stages)]); } update_gfx_program_optimal(ctx, ctx->curr_program); /* apply new hash */ ctx->gfx_pipeline_state.final_hash ^= ctx->curr_program->last_variant_hash; } ctx->dirty_gfx_stages = 0; ctx->gfx_dirty = false; ctx->last_vertex_stage_dirty = false; } static void optimized_compile_job(void *data, void *gdata, int thread_index) { struct zink_gfx_pipeline_cache_entry *pc_entry = data; struct zink_screen *screen = gdata; VkPipeline pipeline; if (pc_entry->gpl.gkey) pipeline = zink_create_gfx_pipeline_combined(screen, pc_entry->prog, pc_entry->gpl.ikey->pipeline, &pc_entry->gpl.gkey->pipeline, 1, pc_entry->gpl.okey->pipeline, true, false); else pipeline = zink_create_gfx_pipeline(screen, pc_entry->prog, pc_entry->prog->objs, &pc_entry->state, pc_entry->state.element_state->binding_map, zink_primitive_topology(pc_entry->state.gfx_prim_mode), true); if (pipeline) { pc_entry->gpl.unoptimized_pipeline = pc_entry->pipeline; pc_entry->pipeline = pipeline; } } static void optimized_shobj_compile_job(void *data, void *gdata, int thread_index) { struct zink_gfx_pipeline_cache_entry *pc_entry = data; struct zink_screen *screen = gdata; struct zink_shader_object objs[ZINK_GFX_SHADER_COUNT]; for (unsigned i = 0; i < ZINK_GFX_SHADER_COUNT; i++) { objs[i].mod = VK_NULL_HANDLE; objs[i].spirv = pc_entry->shobjs[i].spirv; } pc_entry->pipeline = zink_create_gfx_pipeline(screen, pc_entry->prog, objs, &pc_entry->state, NULL, zink_primitive_topology(pc_entry->state.gfx_prim_mode), true); /* no unoptimized_pipeline dance */ } void zink_gfx_program_compile_queue(struct zink_context *ctx, struct zink_gfx_pipeline_cache_entry *pc_entry) { struct zink_screen *screen = zink_screen(ctx->base.screen); if (screen->driver_workarounds.disable_optimized_compile) return; if (zink_debug & ZINK_DEBUG_NOBGC) { if (pc_entry->prog->base.uses_shobj) optimized_shobj_compile_job(pc_entry, screen, 0); else optimized_compile_job(pc_entry, screen, 0); } else { util_queue_add_job(&screen->cache_get_thread, pc_entry, &pc_entry->fence, pc_entry->prog->base.uses_shobj ? optimized_shobj_compile_job : optimized_compile_job, NULL, 0); } } void zink_program_finish(struct zink_context *ctx, struct zink_program *pg) { util_queue_fence_wait(&pg->cache_fence); if (pg->is_compute) return; struct zink_gfx_program *prog = (struct zink_gfx_program*)pg; for (int r = 0; r < ARRAY_SIZE(prog->pipelines); ++r) { for (int i = 0; i < ARRAY_SIZE(prog->pipelines[0]); ++i) { hash_table_foreach(&prog->pipelines[r][i], entry) { struct zink_gfx_pipeline_cache_entry *pc_entry = entry->data; util_queue_fence_wait(&pc_entry->fence); } } } } static void update_cs_shader_module(struct zink_context *ctx, struct zink_compute_program *comp) { struct zink_screen *screen = zink_screen(ctx->base.screen); struct zink_shader *zs = comp->shader; struct zink_shader_module *zm = NULL; unsigned inline_size = 0, nonseamless_size = 0, zs_swizzle_size = 0; struct zink_shader_key *key = &ctx->compute_pipeline_state.key; ASSERTED bool check_robustness = screen->driver_compiler_workarounds.lower_robustImageAccess2 && (ctx->flags & PIPE_CONTEXT_ROBUST_BUFFER_ACCESS); assert(zink_cs_key(key)->robust_access == check_robustness); if (ctx && zs->info.num_inlinable_uniforms && ctx->inlinable_uniforms_valid_mask & BITFIELD64_BIT(MESA_SHADER_COMPUTE)) { if (screen->is_cpu || comp->inlined_variant_count < ZINK_MAX_INLINED_VARIANTS) inline_size = zs->info.num_inlinable_uniforms; else key->inline_uniforms = false; } if (key->base.nonseamless_cube_mask) nonseamless_size = sizeof(uint32_t); if (key->base.needs_zs_shader_swizzle) zs_swizzle_size = sizeof(struct zink_zs_swizzle_key); if (inline_size || nonseamless_size || zink_cs_key(key)->robust_access || zs_swizzle_size) { struct util_dynarray *shader_cache = &comp->shader_cache[!!nonseamless_size]; unsigned count = util_dynarray_num_elements(shader_cache, struct zink_shader_module *); struct zink_shader_module **pzm = shader_cache->data; for (unsigned i = 0; i < count; i++) { struct zink_shader_module *iter = pzm[i]; if (!shader_key_matches(iter, key, inline_size, screen->driconf.inline_uniforms, screen->info.have_EXT_non_seamless_cube_map)) continue; if (unlikely(zs_swizzle_size)) { /* zs swizzle data needs a manual compare since it's so fat */ if (memcmp(iter->key + iter->key_size + nonseamless_size + inline_size * sizeof(uint32_t), &ctx->di.zs_swizzle[MESA_SHADER_COMPUTE], zs_swizzle_size)) continue; } if (i > 0) { struct zink_shader_module *zero = pzm[0]; pzm[0] = iter; pzm[i] = zero; } zm = iter; } } else { zm = comp->module; } if (!zm) { zm = malloc(sizeof(struct zink_shader_module) + nonseamless_size + inline_size * sizeof(uint32_t) + zs_swizzle_size); if (!zm) { return; } zm->shobj = false; zm->obj = zink_shader_compile(screen, false, zs, zink_shader_blob_deserialize(screen, &comp->shader->blob), key, zs_swizzle_size ? &ctx->di.zs_swizzle[MESA_SHADER_COMPUTE] : NULL, &comp->base); if (!zm->obj.spirv) { FREE(zm); return; } zm->num_uniforms = inline_size; zm->key_size = key->size; memcpy(zm->key, key, key->size); zm->has_nonseamless = !!nonseamless_size; zm->needs_zs_shader_swizzle = !!zs_swizzle_size; assert(nonseamless_size || inline_size || zink_cs_key(key)->robust_access || zs_swizzle_size); if (nonseamless_size) memcpy(zm->key + zm->key_size, &key->base.nonseamless_cube_mask, nonseamless_size); if (inline_size) memcpy(zm->key + zm->key_size + nonseamless_size, key->base.inlined_uniform_values, inline_size * sizeof(uint32_t)); if (zs_swizzle_size) memcpy(zm->key + zm->key_size + nonseamless_size + inline_size * sizeof(uint32_t), &ctx->di.zs_swizzle[MESA_SHADER_COMPUTE], zs_swizzle_size); zm->hash = shader_module_hash(zm); zm->default_variant = false; if (inline_size) comp->inlined_variant_count++; /* this is otherwise the default variant, which is stored as comp->module */ if (zm->num_uniforms || nonseamless_size || zink_cs_key(key)->robust_access || zs_swizzle_size) util_dynarray_append(&comp->shader_cache[!!nonseamless_size], void*, zm); } if (comp->curr == zm) return; ctx->compute_pipeline_state.final_hash ^= ctx->compute_pipeline_state.module_hash; comp->curr = zm; ctx->compute_pipeline_state.module_hash = zm->hash; ctx->compute_pipeline_state.final_hash ^= ctx->compute_pipeline_state.module_hash; ctx->compute_pipeline_state.module_changed = true; } void zink_update_compute_program(struct zink_context *ctx) { util_queue_fence_wait(&ctx->curr_compute->base.cache_fence); update_cs_shader_module(ctx, ctx->curr_compute); } VkPipelineLayout zink_pipeline_layout_create(struct zink_screen *screen, VkDescriptorSetLayout *dsl, unsigned num_dsl, bool is_compute, VkPipelineLayoutCreateFlags flags) { VkPipelineLayoutCreateInfo plci = {0}; plci.sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO; plci.flags = flags; plci.pSetLayouts = dsl; plci.setLayoutCount = num_dsl; VkPushConstantRange pcr; if (!is_compute) { pcr.stageFlags = VK_SHADER_STAGE_ALL_GRAPHICS; pcr.offset = 0; pcr.size = sizeof(struct zink_gfx_push_constant); plci.pushConstantRangeCount = 1; plci.pPushConstantRanges = &pcr; } VkPipelineLayout layout; VkResult result = VKSCR(CreatePipelineLayout)(screen->dev, &plci, NULL, &layout); if (result != VK_SUCCESS) { mesa_loge("vkCreatePipelineLayout failed (%s)", vk_Result_to_str(result)); return VK_NULL_HANDLE; } return layout; } static void * create_program(struct zink_context *ctx, bool is_compute) { struct zink_program *pg = rzalloc_size(NULL, is_compute ? sizeof(struct zink_compute_program) : sizeof(struct zink_gfx_program)); if (!pg) return NULL; pipe_reference_init(&pg->reference, 1); u_rwlock_init(&pg->pipeline_cache_lock); util_queue_fence_init(&pg->cache_fence); pg->is_compute = is_compute; pg->ctx = ctx; return (void*)pg; } static void assign_io(struct zink_screen *screen, nir_shader *shaders[ZINK_GFX_SHADER_COUNT]) { for (unsigned i = 0; i < MESA_SHADER_FRAGMENT;) { nir_shader *producer = shaders[i]; for (unsigned j = i + 1; j < ZINK_GFX_SHADER_COUNT; i++, j++) { nir_shader *consumer = shaders[j]; if (!consumer) continue; zink_compiler_assign_io(screen, producer, consumer); i = j; break; } } } void zink_gfx_lib_cache_unref(struct zink_screen *screen, struct zink_gfx_lib_cache *libs) { if (!p_atomic_dec_zero(&libs->refcount)) return; simple_mtx_destroy(&libs->lock); set_foreach_remove(&libs->libs, he) { struct zink_gfx_library_key *gkey = (void*)he->key; VKSCR(DestroyPipeline)(screen->dev, gkey->pipeline, NULL); FREE(gkey); } ralloc_free(libs->libs.table); FREE(libs); } static struct zink_gfx_lib_cache * create_lib_cache(struct zink_gfx_program *prog, bool generated_tcs) { struct zink_gfx_lib_cache *libs = CALLOC_STRUCT(zink_gfx_lib_cache); libs->stages_present = prog->stages_present; if (generated_tcs) libs->stages_present &= ~BITFIELD_BIT(MESA_SHADER_TESS_CTRL); simple_mtx_init(&libs->lock, mtx_plain); if (generated_tcs) _mesa_set_init(&libs->libs, NULL, hash_pipeline_lib_generated_tcs, equals_pipeline_lib_generated_tcs); else _mesa_set_init(&libs->libs, NULL, hash_pipeline_lib, equals_pipeline_lib); return libs; } static struct zink_gfx_lib_cache * find_or_create_lib_cache(struct zink_screen *screen, struct zink_gfx_program *prog) { unsigned stages_present = prog->stages_present; bool generated_tcs = prog->shaders[MESA_SHADER_TESS_CTRL] && prog->shaders[MESA_SHADER_TESS_CTRL]->non_fs.is_generated; if (generated_tcs) stages_present &= ~BITFIELD_BIT(MESA_SHADER_TESS_CTRL); unsigned idx = zink_program_cache_stages(stages_present); struct set *ht = &screen->pipeline_libs[idx]; const uint32_t hash = prog->gfx_hash; simple_mtx_lock(&screen->pipeline_libs_lock[idx]); bool found = false; struct set_entry *entry = _mesa_set_search_or_add_pre_hashed(ht, hash, prog->shaders, &found); struct zink_gfx_lib_cache *libs; if (found) { libs = (void*)entry->key; } else { libs = create_lib_cache(prog, generated_tcs); memcpy(libs->shaders, prog->shaders, sizeof(prog->shaders)); entry->key = libs; unsigned refs = 0; for (unsigned i = 0; i < MESA_SHADER_COMPUTE; i++) { if (prog->shaders[i] && (!generated_tcs || i != MESA_SHADER_TESS_CTRL)) { simple_mtx_lock(&prog->shaders[i]->lock); util_dynarray_append(&prog->shaders[i]->pipeline_libs, struct zink_gfx_lib_cache*, libs); simple_mtx_unlock(&prog->shaders[i]->lock); refs++; } } p_atomic_set(&libs->refcount, refs); } simple_mtx_unlock(&screen->pipeline_libs_lock[idx]); return libs; } static struct zink_gfx_program * gfx_program_create(struct zink_context *ctx, struct zink_shader **stages, unsigned vertices_per_patch, uint32_t gfx_hash) { struct zink_screen *screen = zink_screen(ctx->base.screen); struct zink_gfx_program *prog = create_program(ctx, false); if (!prog) goto fail; prog->gfx_hash = gfx_hash; prog->base.removed = true; prog->optimal_keys = screen->optimal_keys; prog->has_edgeflags = prog->shaders[MESA_SHADER_VERTEX] && prog->shaders[MESA_SHADER_VERTEX]->has_edgeflags; for (int i = 0; i < ZINK_GFX_SHADER_COUNT; ++i) { util_dynarray_init(&prog->shader_cache[i][0][0], prog); util_dynarray_init(&prog->shader_cache[i][0][1], prog); util_dynarray_init(&prog->shader_cache[i][1][0], prog); util_dynarray_init(&prog->shader_cache[i][1][1], prog); if (stages[i]) { prog->shaders[i] = stages[i]; prog->stages_present |= BITFIELD_BIT(i); if (i != MESA_SHADER_FRAGMENT) prog->optimal_keys &= !prog->shaders[i]->non_fs.is_generated; prog->needs_inlining |= prog->shaders[i]->needs_inlining; } } if (stages[MESA_SHADER_TESS_EVAL] && !stages[MESA_SHADER_TESS_CTRL]) { util_queue_fence_wait(&stages[MESA_SHADER_TESS_EVAL]->precompile.fence); prog->shaders[MESA_SHADER_TESS_EVAL]->non_fs.generated_tcs = prog->shaders[MESA_SHADER_TESS_CTRL] = zink_shader_tcs_create(screen, vertices_per_patch); prog->stages_present |= BITFIELD_BIT(MESA_SHADER_TESS_CTRL); } prog->stages_remaining = prog->stages_present; for (int i = 0; i < ZINK_GFX_SHADER_COUNT; ++i) { if (prog->shaders[i]) { simple_mtx_lock(&prog->shaders[i]->lock); _mesa_set_add(prog->shaders[i]->programs, prog); simple_mtx_unlock(&prog->shaders[i]->lock); zink_gfx_program_reference(screen, NULL, prog); } } p_atomic_dec(&prog->base.reference.count); if (stages[MESA_SHADER_GEOMETRY]) prog->last_vertex_stage = stages[MESA_SHADER_GEOMETRY]; else if (stages[MESA_SHADER_TESS_EVAL]) prog->last_vertex_stage = stages[MESA_SHADER_TESS_EVAL]; else prog->last_vertex_stage = stages[MESA_SHADER_VERTEX]; for (int r = 0; r < ARRAY_SIZE(prog->pipelines); ++r) { for (int i = 0; i < ARRAY_SIZE(prog->pipelines[0]); ++i) { _mesa_hash_table_init(&prog->pipelines[r][i], prog, NULL, zink_get_gfx_pipeline_eq_func(screen, prog)); /* only need first 3/4 for point/line/tri/patch */ if (screen->info.have_EXT_extended_dynamic_state && i == (prog->last_vertex_stage->info.stage == MESA_SHADER_TESS_EVAL ? 4 : 3)) break; } } return prog; fail: if (prog) zink_destroy_gfx_program(screen, prog); return NULL; } /* NO THREAD-UNSAFE ctx USAGE! */ static struct zink_gfx_program * gfx_program_init(struct zink_context *ctx, struct zink_gfx_program *prog) { struct zink_screen *screen = zink_screen(ctx->base.screen); nir_shader *nir[ZINK_GFX_SHADER_COUNT]; /* iterate in reverse order to create TES before generated TCS */ for (int i = MESA_SHADER_FRAGMENT; i >= MESA_SHADER_VERTEX; i--) { if (prog->shaders[i]) { util_queue_fence_wait(&prog->shaders[i]->precompile.fence); /* this may have already been precompiled for separate shader */ if (i == MESA_SHADER_TESS_CTRL && prog->shaders[i]->non_fs.is_generated && prog->shaders[MESA_SHADER_TESS_CTRL]->nir) zink_shader_tcs_init(screen, prog->shaders[MESA_SHADER_TESS_CTRL], nir[MESA_SHADER_TESS_EVAL], &nir[i]); else nir[i] = zink_shader_deserialize(screen, prog->shaders[i]); } else { nir[i] = NULL; } } assign_io(screen, nir); for (unsigned i = 0; i < ZINK_GFX_SHADER_COUNT; i++) { if (nir[i]) zink_shader_serialize_blob(nir[i], &prog->blobs[i]); ralloc_free(nir[i]); } if (screen->optimal_keys) prog->libs = find_or_create_lib_cache(screen, prog); if (prog->libs) p_atomic_inc(&prog->libs->refcount); struct mesa_blake3 sctx; _mesa_blake3_init(&sctx); for (int i = 0; i < ZINK_GFX_SHADER_COUNT; ++i) { if (prog->shaders[i]) _mesa_blake3_update(&sctx, prog->shaders[i]->base.sha1, sizeof(prog->shaders[i]->base.sha1)); } _mesa_blake3_final(&sctx, prog->base.blake3); if (!zink_descriptor_program_init(ctx, &prog->base)) goto fail; return prog; fail: if (prog) zink_destroy_gfx_program(screen, prog); return NULL; } struct zink_gfx_program * zink_create_gfx_program(struct zink_context *ctx, struct zink_shader **stages, unsigned vertices_per_patch, uint32_t gfx_hash) { struct zink_gfx_program *prog = gfx_program_create(ctx, stages, vertices_per_patch, gfx_hash); if (prog) prog = gfx_program_init(ctx, prog); return prog; } /* Creates a replacement, optimized zink_gfx_program for this set of separate shaders, which will * be swapped in in place of the fast-linked separable program once it's done compiling. */ static void create_linked_separable_job(void *data, void *gdata, int thread_index) { struct zink_gfx_program *prog = data; /* this is a dead program */ if (prog->base.removed) return; prog->full_prog = gfx_program_create(prog->base.ctx, prog->shaders, 0, prog->gfx_hash); /* block gfx_shader_prune in the main thread */ util_queue_fence_reset(&prog->full_prog->base.cache_fence); /* add an ownership ref */ zink_gfx_program_reference(zink_screen(prog->base.ctx->base.screen), NULL, prog->full_prog); /* this is otherwise a dead program */ if (prog->full_prog->stages_present == prog->full_prog->stages_remaining) gfx_program_precompile_job(prog->full_prog, gdata, thread_index); util_queue_fence_signal(&prog->full_prog->base.cache_fence); } struct zink_gfx_program * create_gfx_program_separable(struct zink_context *ctx, struct zink_shader **stages, unsigned vertices_per_patch) { struct zink_screen *screen = zink_screen(ctx->base.screen); bool is_separate = true; for (unsigned i = 0; i < ZINK_GFX_SHADER_COUNT; i++) is_separate &= !stages[i] || stages[i]->info.separate_shader; /* filter cases that need real pipelines */ if (!is_separate || /* TODO: maybe try variants? grimace */ !ZINK_SHADER_KEY_OPTIMAL_IS_DEFAULT(ctx->gfx_pipeline_state.optimal_key) || !zink_can_use_pipeline_libs(ctx)) return zink_create_gfx_program(ctx, stages, vertices_per_patch, ctx->gfx_hash); for (unsigned i = 0; i < ZINK_GFX_SHADER_COUNT; i++) { /* ensure async shader creation is done */ if (stages[i]) { util_queue_fence_wait(&stages[i]->precompile.fence); if (!stages[i]->precompile.obj.mod) return zink_create_gfx_program(ctx, stages, vertices_per_patch, ctx->gfx_hash); } } struct zink_gfx_program *prog = create_program(ctx, false); if (!prog) goto fail; prog->is_separable = true; prog->gfx_hash = ctx->gfx_hash; prog->base.uses_shobj = screen->info.have_EXT_shader_object; prog->stages_remaining = prog->stages_present = ctx->shader_stages; memcpy(prog->shaders, stages, sizeof(prog->shaders)); prog->last_vertex_stage = ctx->last_vertex_stage; if (stages[MESA_SHADER_TESS_EVAL] && !stages[MESA_SHADER_TESS_CTRL]) { prog->shaders[MESA_SHADER_TESS_CTRL] = stages[MESA_SHADER_TESS_EVAL]->non_fs.generated_tcs; prog->stages_present |= BITFIELD_BIT(MESA_SHADER_TESS_CTRL); } if (!screen->info.have_EXT_shader_object) { prog->libs = create_lib_cache(prog, false); /* this libs cache is owned by the program */ p_atomic_set(&prog->libs->refcount, 1); } unsigned refs = 0; for (int i = 0; i < ZINK_GFX_SHADER_COUNT; ++i) { if (prog->shaders[i]) { simple_mtx_lock(&prog->shaders[i]->lock); _mesa_set_add(prog->shaders[i]->programs, prog); simple_mtx_unlock(&prog->shaders[i]->lock); if (screen->info.have_EXT_shader_object) { if (!prog->objects[i]) prog->objects[i] = prog->shaders[i]->precompile.obj.obj; } refs++; } } /* We can do this add after the _mesa_set_adds above because we know the prog->shaders[] are * referenced by the draw state and zink_gfx_shader_free() can't be called on them while we're in here. */ p_atomic_add(&prog->base.reference.count, refs - 1); for (int r = 0; r < ARRAY_SIZE(prog->pipelines); ++r) { for (int i = 0; i < ARRAY_SIZE(prog->pipelines[0]); ++i) { _mesa_hash_table_init(&prog->pipelines[r][i], prog, NULL, zink_get_gfx_pipeline_eq_func(screen, prog)); /* only need first 3/4 for point/line/tri/patch */ if (screen->info.have_EXT_extended_dynamic_state && i == (prog->last_vertex_stage->info.stage == MESA_SHADER_TESS_EVAL ? 4 : 3)) break; } } for (int i = 0; i < ZINK_GFX_SHADER_COUNT; ++i) { if (!prog->shaders[i] || !prog->shaders[i]->precompile.dsl) continue; int idx = !i ? 0 : screen->info.have_EXT_shader_object ? i : 1; prog->base.dd.binding_usage |= BITFIELD_BIT(idx); prog->base.dsl[idx] = prog->shaders[i]->precompile.dsl; /* guarantee a null dsl if previous stages don't have descriptors */ if (prog->shaders[i]->precompile.dsl) prog->base.num_dsl = idx + 1; prog->base.dd.bindless |= prog->shaders[i]->bindless; } if (prog->base.dd.bindless) { prog->base.num_dsl = screen->compact_descriptors ? ZINK_DESCRIPTOR_ALL_TYPES - ZINK_DESCRIPTOR_COMPACT : ZINK_DESCRIPTOR_ALL_TYPES; prog->base.dsl[screen->desc_set_id[ZINK_DESCRIPTOR_BINDLESS]] = screen->bindless_layout; } prog->base.layout = zink_pipeline_layout_create(screen, prog->base.dsl, prog->base.num_dsl, false, VK_PIPELINE_LAYOUT_CREATE_INDEPENDENT_SETS_BIT_EXT); prog->last_variant_hash = ctx->gfx_pipeline_state.optimal_key; if (!screen->info.have_EXT_shader_object) { VkPipeline libs[] = {stages[MESA_SHADER_VERTEX]->precompile.gpl, stages[MESA_SHADER_FRAGMENT]->precompile.gpl}; struct zink_gfx_library_key *gkey = CALLOC_STRUCT(zink_gfx_library_key); if (!gkey) { mesa_loge("ZINK: failed to allocate gkey!"); goto fail; } gkey->optimal_key = prog->last_variant_hash; assert(gkey->optimal_key); gkey->pipeline = zink_create_gfx_pipeline_combined(screen, prog, VK_NULL_HANDLE, libs, 2, VK_NULL_HANDLE, false, false); _mesa_set_add(&prog->libs->libs, gkey); } if (!(zink_debug & ZINK_DEBUG_NOOPT)) util_queue_add_job(&screen->cache_get_thread, prog, &prog->base.cache_fence, create_linked_separable_job, NULL, 0); return prog; fail: if (prog) zink_destroy_gfx_program(screen, prog); return NULL; } static void print_pipeline_stats(struct zink_screen *screen, VkPipeline pipeline, struct util_debug_callback *debug) { VkPipelineInfoKHR pinfo = { VK_STRUCTURE_TYPE_PIPELINE_INFO_KHR, NULL, pipeline }; unsigned exe_count = 0; VkPipelineExecutablePropertiesKHR props[10] = {0}; for (unsigned i = 0; i < ARRAY_SIZE(props); i++) { props[i].sType = VK_STRUCTURE_TYPE_PIPELINE_EXECUTABLE_PROPERTIES_KHR; props[i].pNext = NULL; } VKSCR(GetPipelineExecutablePropertiesKHR)(screen->dev, &pinfo, &exe_count, NULL); VKSCR(GetPipelineExecutablePropertiesKHR)(screen->dev, &pinfo, &exe_count, props); for (unsigned e = 0; e < exe_count; e++) { VkPipelineExecutableInfoKHR info = { VK_STRUCTURE_TYPE_PIPELINE_EXECUTABLE_INFO_KHR, NULL, pipeline, e }; unsigned count = 0; struct u_memstream stream; char *print_buf; size_t print_buf_sz; if (!u_memstream_open(&stream, &print_buf, &print_buf_sz)) { mesa_loge("ZINK: failed to open memstream!"); return; } FILE *f = u_memstream_get(&stream); fprintf(f, "%s shader: ", props[e].name); VkPipelineExecutableStatisticKHR *stats = NULL; VKSCR(GetPipelineExecutableStatisticsKHR)(screen->dev, &info, &count, NULL); stats = calloc(count, sizeof(VkPipelineExecutableStatisticKHR)); if (!stats) { mesa_loge("ZINK: failed to allocate stats!"); return; } for (unsigned i = 0; i < count; i++) stats[i].sType = VK_STRUCTURE_TYPE_PIPELINE_EXECUTABLE_STATISTIC_KHR; VKSCR(GetPipelineExecutableStatisticsKHR)(screen->dev, &info, &count, stats); for (unsigned i = 0; i < count; i++) { if (i) fprintf(f, ", "); switch (stats[i].format) { case VK_PIPELINE_EXECUTABLE_STATISTIC_FORMAT_BOOL32_KHR: fprintf(f, "%u %s", stats[i].value.b32, stats[i].name); break; case VK_PIPELINE_EXECUTABLE_STATISTIC_FORMAT_INT64_KHR: fprintf(f, "%" PRIi64 " %s", stats[i].value.i64, stats[i].name); break; case VK_PIPELINE_EXECUTABLE_STATISTIC_FORMAT_UINT64_KHR: fprintf(f, "%" PRIu64 " %s", stats[i].value.u64, stats[i].name); break; case VK_PIPELINE_EXECUTABLE_STATISTIC_FORMAT_FLOAT64_KHR: fprintf(f, "%g %s", stats[i].value.f64, stats[i].name); break; default: unreachable("unknown statistic"); } } /* print_buf is only valid after flushing. */ fflush(f); util_debug_message(debug, SHADER_INFO, "%s", print_buf); u_memstream_close(&stream); free(print_buf); } } static uint32_t hash_compute_pipeline_state_local_size(const void *key) { const struct zink_compute_pipeline_state *state = key; uint32_t hash = _mesa_hash_data(state, offsetof(struct zink_compute_pipeline_state, hash)); hash = XXH32(&state->local_size[0], sizeof(state->local_size), hash); return hash; } static uint32_t hash_compute_pipeline_state(const void *key) { const struct zink_compute_pipeline_state *state = key; return _mesa_hash_data(state, offsetof(struct zink_compute_pipeline_state, hash)); } void zink_program_update_compute_pipeline_state(struct zink_context *ctx, struct zink_compute_program *comp, const struct pipe_grid_info *info) { if (comp->use_local_size) { for (int i = 0; i < ARRAY_SIZE(ctx->compute_pipeline_state.local_size); i++) { if (ctx->compute_pipeline_state.local_size[i] != info->block[i]) ctx->compute_pipeline_state.dirty = true; ctx->compute_pipeline_state.local_size[i] = info->block[i]; } } if (ctx->compute_pipeline_state.variable_shared_mem != info->variable_shared_mem) { ctx->compute_pipeline_state.dirty = true; ctx->compute_pipeline_state.variable_shared_mem = info->variable_shared_mem; } } static bool equals_compute_pipeline_state(const void *a, const void *b) { const struct zink_compute_pipeline_state *sa = a; const struct zink_compute_pipeline_state *sb = b; return !memcmp(a, b, offsetof(struct zink_compute_pipeline_state, hash)) && sa->module == sb->module; } static bool equals_compute_pipeline_state_local_size(const void *a, const void *b) { const struct zink_compute_pipeline_state *sa = a; const struct zink_compute_pipeline_state *sb = b; return !memcmp(a, b, offsetof(struct zink_compute_pipeline_state, hash)) && !memcmp(sa->local_size, sb->local_size, sizeof(sa->local_size)) && sa->module == sb->module; } static void precompile_compute_job(void *data, void *gdata, int thread_index) { struct zink_compute_program *comp = data; struct zink_screen *screen = gdata; comp->shader = zink_shader_create(screen, comp->nir); zink_shader_init(screen, comp->shader); comp->curr = comp->module = CALLOC_STRUCT(zink_shader_module); assert(comp->module); comp->module->shobj = false; comp->module->obj = zink_shader_compile(screen, false, comp->shader, comp->nir, NULL, NULL, &comp->base); /* comp->nir will be freed by zink_shader_compile */ comp->nir = NULL; assert(comp->module->obj.spirv); util_dynarray_init(&comp->shader_cache[0], comp); util_dynarray_init(&comp->shader_cache[1], comp); struct mesa_blake3 blake3_ctx; _mesa_blake3_init(&blake3_ctx); _mesa_blake3_update(&blake3_ctx, comp->shader->blob.data, comp->shader->blob.size); _mesa_blake3_final(&blake3_ctx, comp->base.blake3); zink_descriptor_program_init(comp->base.ctx, &comp->base); zink_screen_get_pipeline_cache(screen, &comp->base, true); if (comp->base.can_precompile) comp->base_pipeline = zink_create_compute_pipeline(screen, comp, NULL); if (comp->base_pipeline) zink_screen_update_pipeline_cache(screen, &comp->base, true); } static struct zink_compute_program * create_compute_program(struct zink_context *ctx, nir_shader *nir) { struct zink_screen *screen = zink_screen(ctx->base.screen); struct zink_compute_program *comp = create_program(ctx, true); if (!comp) return NULL; simple_mtx_init(&comp->cache_lock, mtx_plain); comp->scratch_size = nir->scratch_size; comp->nir = nir; comp->num_inlinable_uniforms = nir->info.num_inlinable_uniforms; comp->use_local_size = !(nir->info.workgroup_size[0] || nir->info.workgroup_size[1] || nir->info.workgroup_size[2]); comp->has_variable_shared_mem = nir->info.cs.has_variable_shared_mem; comp->base.can_precompile = !comp->use_local_size && (screen->info.have_EXT_non_seamless_cube_map || !zink_shader_has_cubes(nir)) && (screen->info.rb2_feats.robustImageAccess2 || !(ctx->flags & PIPE_CONTEXT_ROBUST_BUFFER_ACCESS)); _mesa_hash_table_init(&comp->pipelines, comp, NULL, comp->use_local_size ? equals_compute_pipeline_state_local_size : equals_compute_pipeline_state); if (zink_debug & (ZINK_DEBUG_NOBGC|ZINK_DEBUG_SHADERDB)) precompile_compute_job(comp, screen, 0); else util_queue_add_job(&screen->cache_get_thread, comp, &comp->base.cache_fence, precompile_compute_job, NULL, 0); if (zink_debug & ZINK_DEBUG_SHADERDB) { print_pipeline_stats(screen, comp->base_pipeline, &ctx->dbg); } return comp; } bool zink_program_descriptor_is_buffer(struct zink_context *ctx, gl_shader_stage stage, enum zink_descriptor_type type, unsigned i) { struct zink_shader *zs = NULL; switch (stage) { case MESA_SHADER_VERTEX: case MESA_SHADER_TESS_CTRL: case MESA_SHADER_TESS_EVAL: case MESA_SHADER_GEOMETRY: case MESA_SHADER_FRAGMENT: zs = ctx->gfx_stages[stage]; break; case MESA_SHADER_COMPUTE: { zs = ctx->curr_compute->shader; break; } default: unreachable("unknown shader type"); } if (!zs) return false; return zink_shader_descriptor_is_buffer(zs, type, i); } static unsigned get_num_bindings(struct zink_shader *zs, enum zink_descriptor_type type) { switch (type) { case ZINK_DESCRIPTOR_TYPE_UNIFORMS: return !!zs->has_uniforms; case ZINK_DESCRIPTOR_TYPE_UBO: case ZINK_DESCRIPTOR_TYPE_SSBO: return zs->num_bindings[type]; default: break; } unsigned num_bindings = 0; for (int i = 0; i < zs->num_bindings[type]; i++) num_bindings += zs->bindings[type][i].size; return num_bindings; } unsigned zink_program_num_bindings_typed(const struct zink_program *pg, enum zink_descriptor_type type) { unsigned num_bindings = 0; if (pg->is_compute) { struct zink_compute_program *comp = (void*)pg; return get_num_bindings(comp->shader, type); } struct zink_gfx_program *prog = (void*)pg; for (unsigned i = 0; i < ZINK_GFX_SHADER_COUNT; i++) { if (prog->shaders[i]) num_bindings += get_num_bindings(prog->shaders[i], type); } return num_bindings; } unsigned zink_program_num_bindings(const struct zink_program *pg) { unsigned num_bindings = 0; for (unsigned i = 0; i < ZINK_DESCRIPTOR_BASE_TYPES; i++) num_bindings += zink_program_num_bindings_typed(pg, i); return num_bindings; } static void deinit_program(struct zink_screen *screen, struct zink_program *pg) { util_queue_fence_wait(&pg->cache_fence); if (pg->layout) VKSCR(DestroyPipelineLayout)(screen->dev, pg->layout, NULL); if (pg->pipeline_cache) VKSCR(DestroyPipelineCache)(screen->dev, pg->pipeline_cache, NULL); u_rwlock_destroy(&pg->pipeline_cache_lock); zink_descriptor_program_deinit(screen, pg); } void zink_destroy_gfx_program(struct zink_screen *screen, struct zink_gfx_program *prog) { unsigned max_idx = ARRAY_SIZE(prog->pipelines[0]); if (screen->info.have_EXT_extended_dynamic_state) { /* only need first 3/4 for point/line/tri/patch */ if ((prog->stages_present & (BITFIELD_BIT(MESA_SHADER_TESS_EVAL) | BITFIELD_BIT(MESA_SHADER_GEOMETRY))) == BITFIELD_BIT(MESA_SHADER_TESS_EVAL)) max_idx = 4; else max_idx = 3; max_idx++; } if (prog->is_separable) zink_gfx_program_reference(screen, &prog->full_prog, NULL); for (unsigned r = 0; r < ARRAY_SIZE(prog->pipelines); r++) { for (int i = 0; i < max_idx; ++i) { hash_table_foreach(&prog->pipelines[r][i], entry) { struct zink_gfx_pipeline_cache_entry *pc_entry = entry->data; util_queue_fence_wait(&pc_entry->fence); VKSCR(DestroyPipeline)(screen->dev, pc_entry->pipeline, NULL); VKSCR(DestroyPipeline)(screen->dev, pc_entry->gpl.unoptimized_pipeline, NULL); free(pc_entry); } } } deinit_program(screen, &prog->base); for (int i = 0; i < ZINK_GFX_SHADER_COUNT; ++i) { if (prog->shaders[i]) { _mesa_set_remove_key(prog->shaders[i]->programs, prog); prog->shaders[i] = NULL; } if (!prog->is_separable) { destroy_shader_cache(screen, &prog->shader_cache[i][0][0]); destroy_shader_cache(screen, &prog->shader_cache[i][0][1]); destroy_shader_cache(screen, &prog->shader_cache[i][1][0]); destroy_shader_cache(screen, &prog->shader_cache[i][1][1]); blob_finish(&prog->blobs[i]); } } if (prog->libs) zink_gfx_lib_cache_unref(screen, prog->libs); ralloc_free(prog); } void zink_destroy_compute_program(struct zink_screen *screen, struct zink_compute_program *comp) { deinit_program(screen, &comp->base); assert(comp->shader); assert(!comp->shader->spirv); zink_shader_free(screen, comp->shader); destroy_shader_cache(screen, &comp->shader_cache[0]); destroy_shader_cache(screen, &comp->shader_cache[1]); hash_table_foreach(&comp->pipelines, entry) { struct compute_pipeline_cache_entry *pc_entry = entry->data; VKSCR(DestroyPipeline)(screen->dev, pc_entry->pipeline, NULL); free(pc_entry); } VKSCR(DestroyPipeline)(screen->dev, comp->base_pipeline, NULL); zink_destroy_shader_module(screen, comp->module); ralloc_free(comp); } ALWAYS_INLINE static bool compute_can_shortcut(const struct zink_compute_program *comp) { return !comp->use_local_size && !comp->curr->num_uniforms && !comp->curr->has_nonseamless; } VkPipeline zink_get_compute_pipeline(struct zink_screen *screen, struct zink_compute_program *comp, struct zink_compute_pipeline_state *state) { struct hash_entry *entry = NULL; struct compute_pipeline_cache_entry *cache_entry; if (!state->dirty && !state->module_changed) return state->pipeline; if (state->dirty) { if (state->pipeline) //avoid on first hash state->final_hash ^= state->hash; if (comp->use_local_size) state->hash = hash_compute_pipeline_state_local_size(state); else state->hash = hash_compute_pipeline_state(state); state->dirty = false; state->final_hash ^= state->hash; } util_queue_fence_wait(&comp->base.cache_fence); if (comp->base_pipeline && compute_can_shortcut(comp)) { state->pipeline = comp->base_pipeline; return state->pipeline; } entry = _mesa_hash_table_search_pre_hashed(&comp->pipelines, state->final_hash, state); if (!entry) { simple_mtx_lock(&comp->cache_lock); entry = _mesa_hash_table_search_pre_hashed(&comp->pipelines, state->final_hash, state); if (entry) { simple_mtx_unlock(&comp->cache_lock); goto out; } VkPipeline pipeline = zink_create_compute_pipeline(screen, comp, state); if (pipeline == VK_NULL_HANDLE) { simple_mtx_unlock(&comp->cache_lock); return VK_NULL_HANDLE; } zink_screen_update_pipeline_cache(screen, &comp->base, false); if (compute_can_shortcut(comp)) { simple_mtx_unlock(&comp->cache_lock); /* don't add base pipeline to cache */ state->pipeline = comp->base_pipeline = pipeline; return state->pipeline; } struct compute_pipeline_cache_entry *pc_entry = CALLOC_STRUCT(compute_pipeline_cache_entry); if (!pc_entry) { simple_mtx_unlock(&comp->cache_lock); return VK_NULL_HANDLE; } memcpy(&pc_entry->state, state, sizeof(*state)); pc_entry->pipeline = pipeline; entry = _mesa_hash_table_insert_pre_hashed(&comp->pipelines, state->final_hash, pc_entry, pc_entry); assert(entry); simple_mtx_unlock(&comp->cache_lock); } out: cache_entry = entry->data; state->pipeline = cache_entry->pipeline; return state->pipeline; } static void bind_gfx_stage(struct zink_context *ctx, gl_shader_stage stage, struct zink_shader *shader) { if (shader && shader->info.num_inlinable_uniforms) ctx->shader_has_inlinable_uniforms_mask |= 1 << stage; else ctx->shader_has_inlinable_uniforms_mask &= ~(1 << stage); if (ctx->gfx_stages[stage]) ctx->gfx_hash ^= ctx->gfx_stages[stage]->hash; if (stage == MESA_SHADER_GEOMETRY && ctx->is_generated_gs_bound && (!shader || !shader->non_fs.parent)) { ctx->inlinable_uniforms_valid_mask &= ~BITFIELD64_BIT(MESA_SHADER_GEOMETRY); ctx->is_generated_gs_bound = false; } ctx->gfx_stages[stage] = shader; ctx->gfx_dirty = ctx->gfx_stages[MESA_SHADER_FRAGMENT] && ctx->gfx_stages[MESA_SHADER_VERTEX]; ctx->gfx_pipeline_state.modules_changed = true; if (shader) { ctx->shader_stages |= BITFIELD_BIT(stage); ctx->gfx_hash ^= ctx->gfx_stages[stage]->hash; } else { ctx->gfx_pipeline_state.modules[stage] = VK_NULL_HANDLE; if (ctx->curr_program) ctx->gfx_pipeline_state.final_hash ^= ctx->curr_program->last_variant_hash; ctx->curr_program = NULL; ctx->shader_stages &= ~BITFIELD_BIT(stage); } } static enum mesa_prim gs_output_to_reduced_prim_type(struct shader_info *info) { switch (info->gs.output_primitive) { case MESA_PRIM_POINTS: return MESA_PRIM_POINTS; case MESA_PRIM_LINES: case MESA_PRIM_LINE_LOOP: case MESA_PRIM_LINE_STRIP: case MESA_PRIM_LINES_ADJACENCY: case MESA_PRIM_LINE_STRIP_ADJACENCY: return MESA_PRIM_LINES; case MESA_PRIM_TRIANGLES: case MESA_PRIM_TRIANGLE_STRIP: case MESA_PRIM_TRIANGLE_FAN: case MESA_PRIM_TRIANGLES_ADJACENCY: case MESA_PRIM_TRIANGLE_STRIP_ADJACENCY: return MESA_PRIM_TRIANGLES; default: unreachable("unexpected output primitive type"); } } static enum mesa_prim update_rast_prim(struct zink_shader *shader) { struct shader_info *info = &shader->info; if (info->stage == MESA_SHADER_GEOMETRY) return gs_output_to_reduced_prim_type(info); else if (info->stage == MESA_SHADER_TESS_EVAL) { if (info->tess.point_mode) return MESA_PRIM_POINTS; else { switch (info->tess._primitive_mode) { case TESS_PRIMITIVE_ISOLINES: return MESA_PRIM_LINES; case TESS_PRIMITIVE_TRIANGLES: case TESS_PRIMITIVE_QUADS: return MESA_PRIM_TRIANGLES; default: return MESA_PRIM_COUNT; } } } return MESA_PRIM_COUNT; } static void unbind_generated_gs(struct zink_context *ctx, gl_shader_stage stage, struct zink_shader *prev_shader) { if (prev_shader->non_fs.is_generated) ctx->inlinable_uniforms_valid_mask &= ~BITFIELD64_BIT(MESA_SHADER_GEOMETRY); if (ctx->gfx_stages[MESA_SHADER_GEOMETRY] && ctx->gfx_stages[MESA_SHADER_GEOMETRY]->non_fs.parent == prev_shader) { bind_gfx_stage(ctx, MESA_SHADER_GEOMETRY, NULL); } } static void bind_last_vertex_stage(struct zink_context *ctx, gl_shader_stage stage, struct zink_shader *prev_shader) { if (prev_shader && stage < MESA_SHADER_GEOMETRY) unbind_generated_gs(ctx, stage, prev_shader); gl_shader_stage old = ctx->last_vertex_stage ? ctx->last_vertex_stage->info.stage : MESA_SHADER_STAGES; if (ctx->gfx_stages[MESA_SHADER_GEOMETRY]) ctx->last_vertex_stage = ctx->gfx_stages[MESA_SHADER_GEOMETRY]; else if (ctx->gfx_stages[MESA_SHADER_TESS_EVAL]) ctx->last_vertex_stage = ctx->gfx_stages[MESA_SHADER_TESS_EVAL]; else ctx->last_vertex_stage = ctx->gfx_stages[MESA_SHADER_VERTEX]; gl_shader_stage current = ctx->last_vertex_stage ? ctx->last_vertex_stage->info.stage : MESA_SHADER_VERTEX; /* update rast_prim */ ctx->gfx_pipeline_state.shader_rast_prim = ctx->last_vertex_stage ? update_rast_prim(ctx->last_vertex_stage) : MESA_PRIM_COUNT; if (old != current) { if (!zink_screen(ctx->base.screen)->optimal_keys) { if (old != MESA_SHADER_STAGES) { memset(&ctx->gfx_pipeline_state.shader_keys.key[old].key.vs_base, 0, sizeof(struct zink_vs_key_base)); ctx->dirty_gfx_stages |= BITFIELD_BIT(old); } else { /* always unset vertex shader values when changing to a non-vs last stage */ memset(&ctx->gfx_pipeline_state.shader_keys.key[MESA_SHADER_VERTEX].key.vs_base, 0, sizeof(struct zink_vs_key_base)); } } unsigned num_viewports = ctx->vp_state.num_viewports; struct zink_screen *screen = zink_screen(ctx->base.screen); /* number of enabled viewports is based on whether last vertex stage writes viewport index */ if (ctx->last_vertex_stage) { if (ctx->last_vertex_stage->info.outputs_written & (VARYING_BIT_VIEWPORT | VARYING_BIT_VIEWPORT_MASK)) ctx->vp_state.num_viewports = MIN2(screen->info.props.limits.maxViewports, PIPE_MAX_VIEWPORTS); else ctx->vp_state.num_viewports = 1; } else { ctx->vp_state.num_viewports = 1; } ctx->vp_state_changed |= num_viewports != ctx->vp_state.num_viewports; if (!screen->info.have_EXT_extended_dynamic_state) { if (ctx->gfx_pipeline_state.dyn_state1.num_viewports != ctx->vp_state.num_viewports) ctx->gfx_pipeline_state.dirty = true; ctx->gfx_pipeline_state.dyn_state1.num_viewports = ctx->vp_state.num_viewports; } ctx->last_vertex_stage_dirty = true; } } static void zink_bind_vs_state(struct pipe_context *pctx, void *cso) { struct zink_context *ctx = zink_context(pctx); if (!cso && !ctx->gfx_stages[MESA_SHADER_VERTEX]) return; struct zink_shader *prev_shader = ctx->gfx_stages[MESA_SHADER_VERTEX]; bind_gfx_stage(ctx, MESA_SHADER_VERTEX, cso); bind_last_vertex_stage(ctx, MESA_SHADER_VERTEX, prev_shader); if (cso) { struct zink_shader *zs = cso; ctx->shader_reads_drawid = BITSET_TEST(zs->info.system_values_read, SYSTEM_VALUE_DRAW_ID); ctx->shader_reads_basevertex = BITSET_TEST(zs->info.system_values_read, SYSTEM_VALUE_BASE_VERTEX); } else { ctx->shader_reads_drawid = false; ctx->shader_reads_basevertex = false; } } /* if gl_SampleMask[] is written to, we have to ensure that we get a shader with the same sample count: * in GL, samples==1 means ignore gl_SampleMask[] * in VK, gl_SampleMask[] is never ignored */ void zink_update_fs_key_samples(struct zink_context *ctx) { if (!ctx->gfx_stages[MESA_SHADER_FRAGMENT]) return; if (zink_shader_uses_samples(ctx->gfx_stages[MESA_SHADER_FRAGMENT])) { bool samples = zink_get_fs_base_key(ctx)->samples; if (samples != (ctx->fb_state.samples > 1)) zink_set_fs_base_key(ctx)->samples = ctx->fb_state.samples > 1; } } void zink_update_gs_key_rectangular_line(struct zink_context *ctx) { bool line_rectangular = zink_get_gs_key(ctx)->line_rectangular; if (line_rectangular != ctx->rast_state->base.line_rectangular) zink_set_gs_key(ctx)->line_rectangular = ctx->rast_state->base.line_rectangular; } static void zink_bind_fs_state(struct pipe_context *pctx, void *cso) { struct zink_context *ctx = zink_context(pctx); if (!cso && !ctx->gfx_stages[MESA_SHADER_FRAGMENT]) return; if (ctx->disable_fs && !ctx->disable_color_writes && cso != ctx->null_fs) { ctx->saved_fs = cso; zink_set_null_fs(ctx); return; } bool writes_cbuf0 = ctx->gfx_stages[MESA_SHADER_FRAGMENT] ? (ctx->gfx_stages[MESA_SHADER_FRAGMENT]->info.outputs_written & BITFIELD_BIT(FRAG_RESULT_DATA0)) > 0 : true; unsigned shadow_mask = ctx->gfx_stages[MESA_SHADER_FRAGMENT] ? ctx->gfx_stages[MESA_SHADER_FRAGMENT]->fs.legacy_shadow_mask : 0; bind_gfx_stage(ctx, MESA_SHADER_FRAGMENT, cso); ctx->fbfetch_outputs = 0; if (cso) { shader_info *info = &ctx->gfx_stages[MESA_SHADER_FRAGMENT]->info; bool new_writes_cbuf0 = (info->outputs_written & BITFIELD_BIT(FRAG_RESULT_DATA0)) > 0; if (ctx->gfx_pipeline_state.blend_state && ctx->gfx_pipeline_state.blend_state->alpha_to_coverage && writes_cbuf0 != new_writes_cbuf0 && zink_screen(pctx->screen)->info.have_EXT_extended_dynamic_state3) { ctx->blend_state_changed = true; ctx->ds3_states |= BITFIELD_BIT(ZINK_DS3_BLEND_A2C); } if (info->fs.uses_fbfetch_output) { if (info->outputs_read & (BITFIELD_BIT(FRAG_RESULT_DEPTH) | BITFIELD_BIT(FRAG_RESULT_STENCIL))) ctx->fbfetch_outputs |= BITFIELD_BIT(PIPE_MAX_COLOR_BUFS); ctx->fbfetch_outputs |= info->outputs_read >> FRAG_RESULT_DATA0; } zink_update_fs_key_samples(ctx); if (zink_screen(pctx->screen)->info.have_EXT_rasterization_order_attachment_access) { if (ctx->gfx_pipeline_state.rast_attachment_order != info->fs.uses_fbfetch_output) ctx->gfx_pipeline_state.dirty = true; ctx->gfx_pipeline_state.rast_attachment_order = info->fs.uses_fbfetch_output; } zink_set_zs_needs_shader_swizzle_key(ctx, MESA_SHADER_FRAGMENT, false); if (shadow_mask != ctx->gfx_stages[MESA_SHADER_FRAGMENT]->fs.legacy_shadow_mask && !zink_screen(pctx->screen)->driver_compiler_workarounds.needs_zs_shader_swizzle) zink_update_shadow_samplerviews(ctx, shadow_mask | ctx->gfx_stages[MESA_SHADER_FRAGMENT]->fs.legacy_shadow_mask); if (!ctx->track_renderpasses && !ctx->blitting) ctx->rp_tc_info_updated = true; } zink_update_fbfetch(ctx); } static void zink_bind_gs_state(struct pipe_context *pctx, void *cso) { struct zink_context *ctx = zink_context(pctx); if (!cso && !ctx->gfx_stages[MESA_SHADER_GEOMETRY]) return; bind_gfx_stage(ctx, MESA_SHADER_GEOMETRY, cso); bind_last_vertex_stage(ctx, MESA_SHADER_GEOMETRY, NULL); } static void zink_bind_tcs_state(struct pipe_context *pctx, void *cso) { bind_gfx_stage(zink_context(pctx), MESA_SHADER_TESS_CTRL, cso); } static void zink_bind_tes_state(struct pipe_context *pctx, void *cso) { struct zink_context *ctx = zink_context(pctx); if (!cso && !ctx->gfx_stages[MESA_SHADER_TESS_EVAL]) return; if (!!ctx->gfx_stages[MESA_SHADER_TESS_EVAL] != !!cso) { if (!cso) { /* if unsetting a TESS that uses a generated TCS, ensure the TCS is unset */ if (ctx->gfx_stages[MESA_SHADER_TESS_CTRL] == ctx->gfx_stages[MESA_SHADER_TESS_EVAL]->non_fs.generated_tcs) ctx->gfx_stages[MESA_SHADER_TESS_CTRL] = NULL; } } struct zink_shader *prev_shader = ctx->gfx_stages[MESA_SHADER_TESS_EVAL]; bind_gfx_stage(ctx, MESA_SHADER_TESS_EVAL, cso); bind_last_vertex_stage(ctx, MESA_SHADER_TESS_EVAL, prev_shader); } static void * zink_create_cs_state(struct pipe_context *pctx, const struct pipe_compute_state *shader) { struct nir_shader *nir; if (shader->ir_type != PIPE_SHADER_IR_NIR) nir = zink_tgsi_to_nir(pctx->screen, shader->prog); else nir = (struct nir_shader *)shader->prog; if (nir->info.uses_bindless) zink_descriptors_init_bindless(zink_context(pctx)); return create_compute_program(zink_context(pctx), nir); } static void zink_bind_cs_state(struct pipe_context *pctx, void *cso) { struct zink_context *ctx = zink_context(pctx); struct zink_compute_program *comp = cso; if (comp && comp->num_inlinable_uniforms) ctx->shader_has_inlinable_uniforms_mask |= 1 << MESA_SHADER_COMPUTE; else ctx->shader_has_inlinable_uniforms_mask &= ~(1 << MESA_SHADER_COMPUTE); if (ctx->curr_compute) { zink_batch_reference_program(ctx, &ctx->curr_compute->base); ctx->compute_pipeline_state.final_hash ^= ctx->compute_pipeline_state.module_hash; ctx->compute_pipeline_state.module = VK_NULL_HANDLE; ctx->compute_pipeline_state.module_hash = 0; } ctx->compute_pipeline_state.dirty = true; ctx->curr_compute = comp; if (comp && comp != ctx->curr_compute) { ctx->compute_pipeline_state.module_hash = ctx->curr_compute->curr->hash; if (util_queue_fence_is_signalled(&comp->base.cache_fence)) ctx->compute_pipeline_state.module = ctx->curr_compute->curr->obj.mod; ctx->compute_pipeline_state.final_hash ^= ctx->compute_pipeline_state.module_hash; if (ctx->compute_pipeline_state.key.base.nonseamless_cube_mask) ctx->compute_dirty = true; } zink_select_launch_grid(ctx); } static void zink_get_compute_state_info(struct pipe_context *pctx, void *cso, struct pipe_compute_state_object_info *info) { struct zink_compute_program *comp = cso; struct zink_screen *screen = zink_screen(pctx->screen); info->max_threads = screen->info.props.limits.maxComputeWorkGroupInvocations; info->private_memory = comp->scratch_size; if (screen->info.props11.subgroupSize) { info->preferred_simd_size = screen->info.props11.subgroupSize; info->simd_sizes = info->preferred_simd_size; } else { // just guess it info->preferred_simd_size = 64; // only used for actual subgroup support info->simd_sizes = 0; } } static void zink_delete_cs_shader_state(struct pipe_context *pctx, void *cso) { struct zink_compute_program *comp = cso; zink_compute_program_reference(zink_screen(pctx->screen), &comp, NULL); } /* caller must lock prog->libs->lock */ struct zink_gfx_library_key * zink_create_pipeline_lib(struct zink_screen *screen, struct zink_gfx_program *prog, struct zink_gfx_pipeline_state *state) { struct zink_gfx_library_key *gkey = CALLOC_STRUCT(zink_gfx_library_key); if (!gkey) { mesa_loge("ZINK: failed to allocate gkey!"); return NULL; } gkey->optimal_key = state->optimal_key; assert(gkey->optimal_key); for (unsigned i = 0; i < ZINK_GFX_SHADER_COUNT; i++) gkey->modules[i] = prog->objs[i].mod; gkey->pipeline = zink_create_gfx_pipeline_library(screen, prog); _mesa_set_add(&prog->libs->libs, gkey); return gkey; } static const char * print_exe_stages(VkShaderStageFlags stages) { if (stages == VK_SHADER_STAGE_VERTEX_BIT) return "VS"; if (stages == (VK_SHADER_STAGE_VERTEX_BIT | VK_SHADER_STAGE_GEOMETRY_BIT)) return "VS+GS"; if (stages == (VK_SHADER_STAGE_VERTEX_BIT | VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT | VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT)) return "VS+TCS+TES"; if (stages == (VK_SHADER_STAGE_VERTEX_BIT | VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT | VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT | VK_SHADER_STAGE_GEOMETRY_BIT)) return "VS+TCS+TES+GS"; if (stages == VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT) return "TCS"; if (stages == VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT) return "TES"; if (stages == VK_SHADER_STAGE_GEOMETRY_BIT) return "GS"; if (stages == VK_SHADER_STAGE_FRAGMENT_BIT) return "FS"; if (stages == VK_SHADER_STAGE_COMPUTE_BIT) return "CS"; unreachable("unhandled combination of stages!"); } static void gfx_program_precompile_job(void *data, void *gdata, int thread_index) { struct zink_screen *screen = gdata; struct zink_gfx_program *prog = data; /* this is threadsafe */ gfx_program_init(prog->base.ctx, prog); struct zink_gfx_pipeline_state state = {0}; state.shader_keys_optimal.key.vs_base.last_vertex_stage = true; state.shader_keys_optimal.key.tcs.patch_vertices = 3; //random guess, generated tcs precompile is hard state.optimal_key = state.shader_keys_optimal.key.val; generate_gfx_program_modules_optimal(NULL, screen, prog, &state); zink_screen_get_pipeline_cache(screen, &prog->base, true); if (!screen->info.have_EXT_shader_object) { simple_mtx_lock(&prog->libs->lock); zink_create_pipeline_lib(screen, prog, &state); simple_mtx_unlock(&prog->libs->lock); } zink_screen_update_pipeline_cache(screen, &prog->base, true); } static void zink_link_gfx_shader(struct pipe_context *pctx, void **shaders) { struct zink_context *ctx = zink_context(pctx); struct zink_shader **zshaders = (struct zink_shader **)shaders; if (shaders[MESA_SHADER_COMPUTE]) return; /* explicitly block sample shading: this needs full pipelines always */ if (zshaders[MESA_SHADER_FRAGMENT] && zshaders[MESA_SHADER_FRAGMENT]->info.fs.uses_sample_shading) return; /* can't precompile fixedfunc */ if (!shaders[MESA_SHADER_VERTEX] || !shaders[MESA_SHADER_FRAGMENT]) { /* handled directly from shader create */ return; } unsigned hash = 0; unsigned shader_stages = 0; for (unsigned i = 0; i < ZINK_GFX_SHADER_COUNT; i++) { if (zshaders[i]) { hash ^= zshaders[i]->hash; shader_stages |= BITFIELD_BIT(i); } } unsigned tess_stages = BITFIELD_BIT(MESA_SHADER_TESS_CTRL) | BITFIELD_BIT(MESA_SHADER_TESS_EVAL); unsigned tess = shader_stages & tess_stages; /* can't do fixedfunc tes either */ if (tess && !shaders[MESA_SHADER_TESS_EVAL]) return; struct hash_table *ht = &ctx->program_cache[zink_program_cache_stages(shader_stages)]; simple_mtx_lock(&ctx->program_lock[zink_program_cache_stages(shader_stages)]); /* link can be called repeatedly with the same shaders: ignore */ if (_mesa_hash_table_search_pre_hashed(ht, hash, shaders)) { simple_mtx_unlock(&ctx->program_lock[zink_program_cache_stages(shader_stages)]); return; } struct zink_gfx_program *prog = gfx_program_create(ctx, zshaders, 3, hash); u_foreach_bit(i, shader_stages) assert(prog->shaders[i]); _mesa_hash_table_insert_pre_hashed(ht, hash, prog->shaders, prog); prog->base.removed = false; simple_mtx_unlock(&ctx->program_lock[zink_program_cache_stages(shader_stages)]); if (zink_debug & ZINK_DEBUG_SHADERDB) { struct zink_screen *screen = zink_screen(pctx->screen); gfx_program_init(ctx, prog); if (screen->optimal_keys) generate_gfx_program_modules_optimal(ctx, screen, prog, &ctx->gfx_pipeline_state); else generate_gfx_program_modules(ctx, screen, prog, &ctx->gfx_pipeline_state); VkPipeline pipeline = zink_create_gfx_pipeline(screen, prog, prog->objs, &ctx->gfx_pipeline_state, ctx->gfx_pipeline_state.element_state->binding_map, shaders[MESA_SHADER_TESS_EVAL] ? VK_PRIMITIVE_TOPOLOGY_PATCH_LIST : VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST, true); print_pipeline_stats(screen, pipeline, &ctx->dbg); VKSCR(DestroyPipeline)(screen->dev, pipeline, NULL); } else { if (zink_screen(pctx->screen)->info.have_EXT_shader_object) prog->base.uses_shobj = !BITSET_TEST(zshaders[MESA_SHADER_FRAGMENT]->info.system_values_read, SYSTEM_VALUE_SAMPLE_MASK_IN); if (zink_debug & ZINK_DEBUG_NOBGC) gfx_program_precompile_job(prog, pctx->screen, 0); else util_queue_add_job(&zink_screen(pctx->screen)->cache_get_thread, prog, &prog->base.cache_fence, gfx_program_precompile_job, NULL, 0); } } void zink_delete_shader_state(struct pipe_context *pctx, void *cso) { zink_gfx_shader_free(zink_screen(pctx->screen), cso); } static void precompile_separate_shader(struct zink_shader *zs, struct zink_screen *screen) { zs->precompile.obj = zink_shader_compile_separate(screen, zs); if (!screen->info.have_EXT_shader_object) { struct zink_shader_object objs[ZINK_GFX_SHADER_COUNT] = {0}; objs[zs->info.stage].mod = zs->precompile.obj.mod; zs->precompile.gpl = zink_create_gfx_pipeline_separate(screen, objs, zs->precompile.layout, zs->info.stage); } } static void gfx_shader_init_job(void *data, void *gdata, int thread_index) { struct zink_screen *screen = gdata; struct zink_shader *zs = data; zink_shader_init(screen, zs); if (zink_debug & ZINK_DEBUG_NOPC) { ralloc_free(zs->nir); zs->nir = NULL; return; } if (zs->info.separate_shader && zink_descriptor_mode == ZINK_DESCRIPTOR_MODE_DB && (screen->info.have_EXT_shader_object || (screen->info.have_EXT_graphics_pipeline_library && (zs->info.stage == MESA_SHADER_FRAGMENT || zs->info.stage == MESA_SHADER_VERTEX)))) { /* sample shading can't precompile */ if (zs->info.stage != MESA_SHADER_FRAGMENT || !zs->info.fs.uses_sample_shading) precompile_separate_shader(zs, screen); } ralloc_free(zs->nir); zs->nir = NULL; } void * zink_create_gfx_shader_state(struct pipe_context *pctx, const struct pipe_shader_state *shader) { struct zink_screen *screen = zink_screen(pctx->screen); nir_shader *nir; if (shader->type != PIPE_SHADER_IR_NIR) nir = zink_tgsi_to_nir(pctx->screen, shader->tokens); else nir = (struct nir_shader *)shader->ir.nir; if (nir->info.stage == MESA_SHADER_FRAGMENT && nir->info.fs.uses_fbfetch_output) zink_descriptor_util_init_fbfetch(zink_context(pctx)); if (nir->info.uses_bindless) zink_descriptors_init_bindless(zink_context(pctx)); struct zink_shader *zs = zink_shader_create(zink_screen(pctx->screen), nir); if (zink_debug & ZINK_DEBUG_NOBGC) gfx_shader_init_job(zs, screen, 0); else util_queue_add_job(&screen->cache_get_thread, zs, &zs->precompile.fence, gfx_shader_init_job, NULL, 0); return zs; } static void zink_delete_cached_shader_state(struct pipe_context *pctx, void *cso) { struct zink_screen *screen = zink_screen(pctx->screen); util_shader_reference(pctx, &screen->shaders, &cso, NULL); } static void * zink_create_cached_shader_state(struct pipe_context *pctx, const struct pipe_shader_state *shader) { bool cache_hit; struct zink_screen *screen = zink_screen(pctx->screen); return util_live_shader_cache_get(pctx, &screen->shaders, shader, &cache_hit); } void zink_program_init(struct zink_context *ctx) { ctx->base.create_vs_state = zink_create_cached_shader_state; ctx->base.bind_vs_state = zink_bind_vs_state; ctx->base.delete_vs_state = zink_delete_cached_shader_state; ctx->base.create_fs_state = zink_create_cached_shader_state; ctx->base.bind_fs_state = zink_bind_fs_state; ctx->base.delete_fs_state = zink_delete_cached_shader_state; ctx->base.create_gs_state = zink_create_cached_shader_state; ctx->base.bind_gs_state = zink_bind_gs_state; ctx->base.delete_gs_state = zink_delete_cached_shader_state; ctx->base.create_tcs_state = zink_create_cached_shader_state; ctx->base.bind_tcs_state = zink_bind_tcs_state; ctx->base.delete_tcs_state = zink_delete_cached_shader_state; ctx->base.create_tes_state = zink_create_cached_shader_state; ctx->base.bind_tes_state = zink_bind_tes_state; ctx->base.delete_tes_state = zink_delete_cached_shader_state; ctx->base.create_compute_state = zink_create_cs_state; ctx->base.bind_compute_state = zink_bind_cs_state; ctx->base.get_compute_state_info = zink_get_compute_state_info; ctx->base.delete_compute_state = zink_delete_cs_shader_state; if (zink_screen(ctx->base.screen)->info.have_EXT_vertex_input_dynamic_state) _mesa_set_init(&ctx->gfx_inputs, ctx, hash_gfx_input_dynamic, equals_gfx_input_dynamic); else _mesa_set_init(&ctx->gfx_inputs, ctx, hash_gfx_input, equals_gfx_input); if (zink_screen(ctx->base.screen)->have_full_ds3) _mesa_set_init(&ctx->gfx_outputs, ctx, hash_gfx_output_ds3, equals_gfx_output_ds3); else _mesa_set_init(&ctx->gfx_outputs, ctx, hash_gfx_output, equals_gfx_output); /* validate struct packing */ STATIC_ASSERT(offsetof(struct zink_gfx_output_key, sample_mask) == sizeof(uint32_t)); STATIC_ASSERT(offsetof(struct zink_gfx_pipeline_state, vertex_buffers_enabled_mask) - offsetof(struct zink_gfx_pipeline_state, input) == offsetof(struct zink_gfx_input_key, vertex_buffers_enabled_mask) - offsetof(struct zink_gfx_input_key, input)); STATIC_ASSERT(offsetof(struct zink_gfx_pipeline_state, vertex_strides) - offsetof(struct zink_gfx_pipeline_state, input) == offsetof(struct zink_gfx_input_key, vertex_strides) - offsetof(struct zink_gfx_input_key, input)); STATIC_ASSERT(offsetof(struct zink_gfx_pipeline_state, element_state) - offsetof(struct zink_gfx_pipeline_state, input) == offsetof(struct zink_gfx_input_key, element_state) - offsetof(struct zink_gfx_input_key, input)); STATIC_ASSERT(sizeof(union zink_shader_key_optimal) == sizeof(uint32_t)); /* no precompile at all */ if (zink_debug & ZINK_DEBUG_NOPC) return; struct zink_screen *screen = zink_screen(ctx->base.screen); if (screen->info.have_EXT_graphics_pipeline_library || screen->info.have_EXT_shader_object || zink_debug & ZINK_DEBUG_SHADERDB) ctx->base.link_shader = zink_link_gfx_shader; } bool zink_set_rasterizer_discard(struct zink_context *ctx, bool disable) { bool value = disable ? false : (ctx->rast_state ? ctx->rast_state->base.rasterizer_discard : false); bool changed = ctx->gfx_pipeline_state.dyn_state2.rasterizer_discard != value; ctx->gfx_pipeline_state.dyn_state2.rasterizer_discard = value; if (!changed) return false; if (!zink_screen(ctx->base.screen)->info.have_EXT_extended_dynamic_state2) ctx->gfx_pipeline_state.dirty |= true; ctx->rasterizer_discard_changed = true; return true; } void zink_driver_thread_add_job(struct pipe_screen *pscreen, void *data, struct util_queue_fence *fence, pipe_driver_thread_func execute, pipe_driver_thread_func cleanup, const size_t job_size) { struct zink_screen *screen = zink_screen(pscreen); util_queue_add_job(&screen->cache_get_thread, data, fence, execute, cleanup, job_size); } static bool has_edge_flags(struct zink_context *ctx) { switch(ctx->gfx_pipeline_state.gfx_prim_mode) { case MESA_PRIM_POINTS: case MESA_PRIM_LINE_STRIP: case MESA_PRIM_LINE_STRIP_ADJACENCY: case MESA_PRIM_LINES: case MESA_PRIM_LINE_LOOP: case MESA_PRIM_LINES_ADJACENCY: case MESA_PRIM_TRIANGLE_STRIP: case MESA_PRIM_TRIANGLE_FAN: case MESA_PRIM_TRIANGLE_STRIP_ADJACENCY: case MESA_PRIM_QUAD_STRIP: case MESA_PRIM_PATCHES: return false; case MESA_PRIM_TRIANGLES: case MESA_PRIM_TRIANGLES_ADJACENCY: case MESA_PRIM_QUADS: case MESA_PRIM_POLYGON: case MESA_PRIM_COUNT: default: break; } return (ctx->gfx_pipeline_state.rast_prim == MESA_PRIM_LINES || ctx->gfx_pipeline_state.rast_prim == MESA_PRIM_POINTS) && ctx->gfx_stages[MESA_SHADER_VERTEX]->has_edgeflags; } static enum zink_rast_prim zink_rast_prim_for_pipe(enum mesa_prim prim) { switch (prim) { case MESA_PRIM_POINTS: return ZINK_PRIM_POINTS; case MESA_PRIM_LINES: return ZINK_PRIM_LINES; case MESA_PRIM_TRIANGLES: default: return ZINK_PRIM_TRIANGLES; } } static enum mesa_prim zink_tess_prim_type(struct zink_shader *tess) { if (tess->info.tess.point_mode) return MESA_PRIM_POINTS; else { switch (tess->info.tess._primitive_mode) { case TESS_PRIMITIVE_ISOLINES: return MESA_PRIM_LINES; case TESS_PRIMITIVE_TRIANGLES: case TESS_PRIMITIVE_QUADS: return MESA_PRIM_TRIANGLES; default: return MESA_PRIM_COUNT; } } } static inline void zink_add_inline_uniform(nir_shader *shader, int offset) { shader->info.inlinable_uniform_dw_offsets[shader->info.num_inlinable_uniforms] = offset; ++shader->info.num_inlinable_uniforms; } static unsigned encode_lower_pv_mode(enum mesa_prim prim_type) { switch (prim_type) { case MESA_PRIM_TRIANGLE_STRIP: case MESA_PRIM_QUAD_STRIP: return ZINK_PVE_PRIMITIVE_TRISTRIP; case MESA_PRIM_TRIANGLE_FAN: return ZINK_PVE_PRIMITIVE_FAN; default: return ZINK_PVE_PRIMITIVE_SIMPLE; } } void zink_set_primitive_emulation_keys(struct zink_context *ctx) { struct zink_screen *screen = zink_screen(ctx->base.screen); bool lower_line_stipple = false, lower_line_smooth = false; unsigned lower_pv_mode = 0; if (!screen->optimal_keys) { lower_line_stipple = ctx->gfx_pipeline_state.rast_prim == MESA_PRIM_LINES && screen->driver_workarounds.no_linestipple && ctx->rast_state->base.line_stipple_enable && !ctx->num_so_targets; bool lower_point_smooth = ctx->gfx_pipeline_state.rast_prim == MESA_PRIM_POINTS && screen->driconf.emulate_point_smooth && ctx->rast_state->base.point_smooth; if (zink_get_fs_key(ctx)->lower_line_stipple != lower_line_stipple) { assert(zink_get_gs_key(ctx)->lower_line_stipple == zink_get_fs_key(ctx)->lower_line_stipple); zink_set_fs_key(ctx)->lower_line_stipple = lower_line_stipple; zink_set_gs_key(ctx)->lower_line_stipple = lower_line_stipple; } lower_line_smooth = ctx->gfx_pipeline_state.rast_prim == MESA_PRIM_LINES && screen->driver_workarounds.no_linesmooth && ctx->rast_state->base.line_smooth && !ctx->num_so_targets; if (zink_get_fs_key(ctx)->lower_line_smooth != lower_line_smooth) { assert(zink_get_gs_key(ctx)->lower_line_smooth == zink_get_fs_key(ctx)->lower_line_smooth); zink_set_fs_key(ctx)->lower_line_smooth = lower_line_smooth; zink_set_gs_key(ctx)->lower_line_smooth = lower_line_smooth; } if (zink_get_fs_key(ctx)->lower_point_smooth != lower_point_smooth) { zink_set_fs_key(ctx)->lower_point_smooth = lower_point_smooth; } lower_pv_mode = ctx->gfx_pipeline_state.dyn_state3.pv_last && !screen->info.have_EXT_provoking_vertex; if (lower_pv_mode) lower_pv_mode = encode_lower_pv_mode(ctx->gfx_pipeline_state.gfx_prim_mode); if (zink_get_gs_key(ctx)->lower_pv_mode != lower_pv_mode) zink_set_gs_key(ctx)->lower_pv_mode = lower_pv_mode; } bool lower_edge_flags = has_edge_flags(ctx); bool lower_quad_prim = ctx->gfx_pipeline_state.gfx_prim_mode == MESA_PRIM_QUADS; bool lower_filled_quad = lower_quad_prim && ctx->gfx_pipeline_state.rast_prim == MESA_PRIM_TRIANGLES; if (lower_line_stipple || lower_line_smooth || lower_edge_flags || lower_quad_prim || lower_pv_mode || zink_get_gs_key(ctx)->lower_gl_point) { enum pipe_shader_type prev_vertex_stage = ctx->gfx_stages[MESA_SHADER_TESS_EVAL] ? MESA_SHADER_TESS_EVAL : MESA_SHADER_VERTEX; enum zink_rast_prim zink_prim_type = zink_rast_prim_for_pipe(ctx->gfx_pipeline_state.rast_prim); //when using transform feedback primitives must be tessellated lower_filled_quad |= lower_quad_prim && ctx->gfx_stages[prev_vertex_stage]->info.has_transform_feedback_varyings; if (!ctx->gfx_stages[MESA_SHADER_GEOMETRY] || (ctx->gfx_stages[MESA_SHADER_GEOMETRY]->non_fs.is_generated && ctx->gfx_stages[MESA_SHADER_GEOMETRY]->info.gs.input_primitive != ctx->gfx_pipeline_state.gfx_prim_mode)) { if (!ctx->gfx_stages[prev_vertex_stage]->non_fs.generated_gs[ctx->gfx_pipeline_state.gfx_prim_mode][zink_prim_type]) { util_queue_fence_wait(&ctx->gfx_stages[prev_vertex_stage]->precompile.fence); nir_shader *prev_stage = zink_shader_deserialize(screen, ctx->gfx_stages[prev_vertex_stage]); nir_shader *nir; if (lower_filled_quad) { nir = zink_create_quads_emulation_gs( &screen->nir_options, prev_stage); } else { enum mesa_prim prim = ctx->gfx_pipeline_state.gfx_prim_mode; if (prev_vertex_stage == MESA_SHADER_TESS_EVAL) prim = zink_tess_prim_type(ctx->gfx_stages[MESA_SHADER_TESS_EVAL]); nir = nir_create_passthrough_gs( &screen->nir_options, prev_stage, prim, ctx->gfx_pipeline_state.rast_prim, lower_edge_flags, lower_line_stipple || lower_quad_prim); } zink_lower_system_values_to_inlined_uniforms(nir); zink_add_inline_uniform(nir, ZINK_INLINE_VAL_FLAT_MASK); zink_add_inline_uniform(nir, ZINK_INLINE_VAL_FLAT_MASK+1); zink_add_inline_uniform(nir, ZINK_INLINE_VAL_PV_LAST_VERT); ralloc_free(prev_stage); struct zink_shader *shader = zink_shader_create(screen, nir); zink_shader_init(screen, shader); shader->needs_inlining = true; ctx->gfx_stages[prev_vertex_stage]->non_fs.generated_gs[ctx->gfx_pipeline_state.gfx_prim_mode][zink_prim_type] = shader; shader->non_fs.is_generated = true; shader->non_fs.parent = ctx->gfx_stages[prev_vertex_stage]; shader->can_inline = true; memcpy(shader->sinfo.stride, ctx->gfx_stages[prev_vertex_stage]->sinfo.stride, sizeof(shader->sinfo.stride)); } ctx->base.bind_gs_state(&ctx->base, ctx->gfx_stages[prev_vertex_stage]->non_fs.generated_gs[ctx->gfx_pipeline_state.gfx_prim_mode][zink_prim_type]); ctx->is_generated_gs_bound = true; } ctx->base.set_inlinable_constants(&ctx->base, MESA_SHADER_GEOMETRY, 3, (uint32_t []){ctx->gfx_stages[MESA_SHADER_FRAGMENT]->flat_flags, ctx->gfx_stages[MESA_SHADER_FRAGMENT]->flat_flags >> 32, ctx->gfx_pipeline_state.dyn_state3.pv_last}); } else if (ctx->gfx_stages[MESA_SHADER_GEOMETRY] && ctx->gfx_stages[MESA_SHADER_GEOMETRY]->non_fs.is_generated) ctx->base.bind_gs_state(&ctx->base, NULL); }