/* * Copyright © 2019 Raspberry Pi Ltd * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the "Software"), * to deal in the Software without restriction, including without limitation * the rights to use, copy, modify, merge, publish, distribute, sublicense, * and/or sell copies of the Software, and to permit persons to whom the * Software is furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice (including the next * paragraph) shall be included in all copies or substantial portions of the * Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS * IN THE SOFTWARE. */ #include "vk_util.h" #include "v3dv_debug.h" #include "v3dv_private.h" #include "common/v3d_debug.h" #include "qpu/qpu_disasm.h" #include "compiler/nir/nir_builder.h" #include "nir/nir_serialize.h" #include "util/u_atomic.h" #include "util/os_time.h" #include "vk_format.h" #include "vk_nir_convert_ycbcr.h" #include "vk_pipeline.h" static VkResult compute_vpm_config(struct v3dv_pipeline *pipeline); void v3dv_print_v3d_key(struct v3d_key *key, uint32_t v3d_key_size) { struct mesa_sha1 ctx; unsigned char sha1[20]; char sha1buf[41]; _mesa_sha1_init(&ctx); _mesa_sha1_update(&ctx, key, v3d_key_size); _mesa_sha1_final(&ctx, sha1); _mesa_sha1_format(sha1buf, sha1); fprintf(stderr, "key %p: %s\n", key, sha1buf); } static void pipeline_compute_sha1_from_nir(struct v3dv_pipeline_stage *p_stage) { VkPipelineShaderStageCreateInfo info = { .module = vk_shader_module_handle_from_nir(p_stage->nir), .pName = p_stage->entrypoint, .stage = mesa_to_vk_shader_stage(p_stage->nir->info.stage), }; vk_pipeline_hash_shader_stage(0, &info, NULL, p_stage->shader_sha1); } void v3dv_shader_variant_destroy(struct v3dv_device *device, struct v3dv_shader_variant *variant) { /* The assembly BO is shared by all variants in the pipeline, so it can't * be freed here and should be freed with the pipeline */ if (variant->qpu_insts) { free(variant->qpu_insts); variant->qpu_insts = NULL; } ralloc_free(variant->prog_data.base); vk_free(&device->vk.alloc, variant); } static void destroy_pipeline_stage(struct v3dv_device *device, struct v3dv_pipeline_stage *p_stage, const VkAllocationCallbacks *pAllocator) { if (!p_stage) return; ralloc_free(p_stage->nir); vk_free2(&device->vk.alloc, pAllocator, p_stage); } static void pipeline_free_stages(struct v3dv_device *device, struct v3dv_pipeline *pipeline, const VkAllocationCallbacks *pAllocator) { assert(pipeline); for (uint8_t stage = 0; stage < BROADCOM_SHADER_STAGES; stage++) { destroy_pipeline_stage(device, pipeline->stages[stage], pAllocator); pipeline->stages[stage] = NULL; } } static void v3dv_destroy_pipeline(struct v3dv_pipeline *pipeline, struct v3dv_device *device, const VkAllocationCallbacks *pAllocator) { if (!pipeline) return; pipeline_free_stages(device, pipeline, pAllocator); if (pipeline->shared_data) { v3dv_pipeline_shared_data_unref(device, pipeline->shared_data); pipeline->shared_data = NULL; } if (pipeline->spill.bo) { assert(pipeline->spill.size_per_thread > 0); v3dv_bo_free(device, pipeline->spill.bo); } if (pipeline->default_attribute_values) { v3dv_bo_free(device, pipeline->default_attribute_values); pipeline->default_attribute_values = NULL; } if (pipeline->executables.mem_ctx) ralloc_free(pipeline->executables.mem_ctx); if (pipeline->layout) v3dv_pipeline_layout_unref(device, pipeline->layout, pAllocator); vk_object_free(&device->vk, pAllocator, pipeline); } VKAPI_ATTR void VKAPI_CALL v3dv_DestroyPipeline(VkDevice _device, VkPipeline _pipeline, const VkAllocationCallbacks *pAllocator) { V3DV_FROM_HANDLE(v3dv_device, device, _device); V3DV_FROM_HANDLE(v3dv_pipeline, pipeline, _pipeline); if (!pipeline) return; v3dv_destroy_pipeline(pipeline, device, pAllocator); } static const struct spirv_to_nir_options default_spirv_options = { .ubo_addr_format = nir_address_format_32bit_index_offset, .ssbo_addr_format = nir_address_format_32bit_index_offset, .phys_ssbo_addr_format = nir_address_format_2x32bit_global, .push_const_addr_format = nir_address_format_logical, .shared_addr_format = nir_address_format_32bit_offset, }; const nir_shader_compiler_options * v3dv_pipeline_get_nir_options(const struct v3d_device_info *devinfo) { static bool initialized = false; static nir_shader_compiler_options options = { .lower_uadd_sat = true, .lower_usub_sat = true, .lower_iadd_sat = true, .lower_all_io_to_temps = true, .lower_extract_byte = true, .lower_extract_word = true, .lower_insert_byte = true, .lower_insert_word = true, .lower_bitfield_insert = true, .lower_bitfield_extract = true, .lower_bitfield_reverse = true, .lower_bit_count = true, .lower_cs_local_id_to_index = true, .lower_ffract = true, .lower_fmod = true, .lower_pack_unorm_2x16 = true, .lower_pack_snorm_2x16 = true, .lower_unpack_unorm_2x16 = true, .lower_unpack_snorm_2x16 = true, .lower_pack_unorm_4x8 = true, .lower_pack_snorm_4x8 = true, .lower_unpack_unorm_4x8 = true, .lower_unpack_snorm_4x8 = true, .lower_pack_half_2x16 = true, .lower_unpack_half_2x16 = true, .lower_pack_32_2x16 = true, .lower_pack_32_2x16_split = true, .lower_unpack_32_2x16_split = true, .lower_mul_2x32_64 = true, .lower_fdiv = true, .lower_find_lsb = true, .lower_ffma16 = true, .lower_ffma32 = true, .lower_ffma64 = true, .lower_flrp32 = true, .lower_fpow = true, .lower_fsqrt = true, .lower_ifind_msb = true, .lower_isign = true, .lower_ldexp = true, .lower_mul_high = true, .lower_wpos_pntc = false, .lower_to_scalar = true, .lower_device_index_to_zero = true, .lower_fquantize2f16 = true, .lower_ufind_msb = true, .has_fsub = true, .has_isub = true, .has_uclz = true, .vertex_id_zero_based = false, /* FIXME: to set this to true, the intrinsic * needs to be supported */ .lower_interpolate_at = true, .max_unroll_iterations = 16, .force_indirect_unrolling = (nir_var_shader_in | nir_var_function_temp), .divergence_analysis_options = nir_divergence_multiple_workgroup_per_compute_subgroup, .discard_is_demote = true, .has_ddx_intrinsics = true, .scalarize_ddx = true, }; if (!initialized) { options.lower_fsat = devinfo->ver < 71; initialized = true; } return &options; } static const struct vk_ycbcr_conversion_state * lookup_ycbcr_conversion(const void *_pipeline_layout, uint32_t set, uint32_t binding, uint32_t array_index) { struct v3dv_pipeline_layout *pipeline_layout = (struct v3dv_pipeline_layout *) _pipeline_layout; assert(set < pipeline_layout->num_sets); struct v3dv_descriptor_set_layout *set_layout = pipeline_layout->set[set].layout; assert(binding < set_layout->binding_count); struct v3dv_descriptor_set_binding_layout *bind_layout = &set_layout->binding[binding]; if (bind_layout->immutable_samplers_offset) { const struct v3dv_sampler *immutable_samplers = v3dv_immutable_samplers(set_layout, bind_layout); const struct v3dv_sampler *sampler = &immutable_samplers[array_index]; return sampler->conversion ? &sampler->conversion->state : NULL; } else { return NULL; } } static void preprocess_nir(nir_shader *nir) { const struct nir_lower_sysvals_to_varyings_options sysvals_to_varyings = { .frag_coord = true, .point_coord = true, }; NIR_PASS(_, nir, nir_lower_sysvals_to_varyings, &sysvals_to_varyings); /* Vulkan uses the separate-shader linking model */ nir->info.separate_shader = true; /* Make sure we lower variable initializers on output variables so that * nir_remove_dead_variables below sees the corresponding stores */ NIR_PASS(_, nir, nir_lower_variable_initializers, nir_var_shader_out); if (nir->info.stage == MESA_SHADER_FRAGMENT) NIR_PASS(_, nir, nir_lower_io_to_vector, nir_var_shader_out); if (nir->info.stage == MESA_SHADER_FRAGMENT) { NIR_PASS(_, nir, nir_lower_input_attachments, &(nir_input_attachment_options) { .use_fragcoord_sysval = false, }); } NIR_PASS_V(nir, nir_lower_io_to_temporaries, nir_shader_get_entrypoint(nir), true, false); NIR_PASS(_, nir, nir_lower_system_values); NIR_PASS(_, nir, nir_lower_alu_to_scalar, NULL, NULL); NIR_PASS(_, nir, nir_normalize_cubemap_coords); NIR_PASS(_, nir, nir_lower_global_vars_to_local); NIR_PASS(_, nir, nir_split_var_copies); NIR_PASS(_, nir, nir_split_struct_vars, nir_var_function_temp); v3d_optimize_nir(NULL, nir); NIR_PASS(_, nir, nir_lower_explicit_io, nir_var_mem_push_const, nir_address_format_32bit_offset); NIR_PASS(_, nir, nir_lower_explicit_io, nir_var_mem_ubo | nir_var_mem_ssbo, nir_address_format_32bit_index_offset); NIR_PASS(_, nir, nir_lower_explicit_io, nir_var_mem_global, nir_address_format_2x32bit_global); NIR_PASS(_, nir, nir_lower_load_const_to_scalar); /* Lower a bunch of stuff */ NIR_PASS(_, nir, nir_lower_var_copies); NIR_PASS(_, nir, nir_lower_indirect_derefs, nir_var_shader_in, UINT32_MAX); NIR_PASS(_, nir, nir_lower_indirect_derefs, nir_var_function_temp, 2); NIR_PASS(_, nir, nir_lower_array_deref_of_vec, nir_var_mem_ubo | nir_var_mem_ssbo, NULL, nir_lower_direct_array_deref_of_vec_load); NIR_PASS(_, nir, nir_lower_frexp); /* Get rid of split copies */ v3d_optimize_nir(NULL, nir); } static nir_shader * shader_module_compile_to_nir(struct v3dv_device *device, struct v3dv_pipeline_stage *stage) { assert(stage->module || stage->module_info); nir_shader *nir; const nir_shader_compiler_options *nir_options = v3dv_pipeline_get_nir_options(&device->devinfo); gl_shader_stage gl_stage = broadcom_shader_stage_to_gl(stage->stage); const VkPipelineShaderStageCreateInfo stage_info = { .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO, .pNext = !stage->module ? stage->module_info : NULL, .stage = mesa_to_vk_shader_stage(gl_stage), .module = vk_shader_module_to_handle((struct vk_shader_module *)stage->module), .pName = stage->entrypoint, .pSpecializationInfo = stage->spec_info, }; /* vk_pipeline_shader_stage_to_nir also handles internal shaders when * module->nir != NULL. It also calls nir_validate_shader on both cases * so we don't have to call it here. */ VkResult result = vk_pipeline_shader_stage_to_nir(&device->vk, stage->pipeline->flags, &stage_info, &default_spirv_options, nir_options, NULL, &nir); if (result != VK_SUCCESS) return NULL; assert(nir->info.stage == gl_stage); if (V3D_DBG(SHADERDB) && (!stage->module || stage->module->nir == NULL)) { char sha1buf[41]; _mesa_sha1_format(sha1buf, stage->pipeline->sha1); nir->info.name = ralloc_strdup(nir, sha1buf); } if (V3D_DBG(NIR) || v3d_debug_flag_for_shader_stage(gl_stage)) { fprintf(stderr, "NIR after vk_pipeline_shader_stage_to_nir: %s prog %d NIR:\n", broadcom_shader_stage_name(stage->stage), stage->program_id); nir_print_shader(nir, stderr); fprintf(stderr, "\n"); } preprocess_nir(nir); return nir; } static int type_size_vec4(const struct glsl_type *type, bool bindless) { return glsl_count_attribute_slots(type, false); } /* FIXME: the number of parameters for this method is somewhat big. Perhaps * rethink. */ static unsigned descriptor_map_add(struct v3dv_descriptor_map *map, int set, int binding, int array_index, int array_size, int start_index, uint8_t return_size, uint8_t plane) { assert(array_index < array_size); assert(return_size == 16 || return_size == 32); unsigned index = start_index; for (; index < map->num_desc; index++) { if (map->used[index] && set == map->set[index] && binding == map->binding[index] && array_index == map->array_index[index] && plane == map->plane[index]) { assert(array_size == map->array_size[index]); if (return_size != map->return_size[index]) { /* It the return_size is different it means that the same sampler * was used for operations with different precision * requirement. In this case we need to ensure that we use the * larger one. */ map->return_size[index] = 32; } return index; } else if (!map->used[index]) { break; } } assert(index < DESCRIPTOR_MAP_SIZE); assert(!map->used[index]); map->used[index] = true; map->set[index] = set; map->binding[index] = binding; map->array_index[index] = array_index; map->array_size[index] = array_size; map->return_size[index] = return_size; map->plane[index] = plane; map->num_desc = MAX2(map->num_desc, index + 1); return index; } struct lower_pipeline_layout_state { struct v3dv_pipeline *pipeline; const struct v3dv_pipeline_layout *layout; bool needs_default_sampler_state; }; static void lower_load_push_constant(nir_builder *b, nir_intrinsic_instr *instr, struct lower_pipeline_layout_state *state) { assert(instr->intrinsic == nir_intrinsic_load_push_constant); instr->intrinsic = nir_intrinsic_load_uniform; } static struct v3dv_descriptor_map* pipeline_get_descriptor_map(struct v3dv_pipeline *pipeline, VkDescriptorType desc_type, gl_shader_stage gl_stage, bool is_sampler) { enum broadcom_shader_stage broadcom_stage = gl_shader_stage_to_broadcom(gl_stage); assert(pipeline->shared_data && pipeline->shared_data->maps[broadcom_stage]); switch(desc_type) { case VK_DESCRIPTOR_TYPE_SAMPLER: return &pipeline->shared_data->maps[broadcom_stage]->sampler_map; case VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE: case VK_DESCRIPTOR_TYPE_INPUT_ATTACHMENT: case VK_DESCRIPTOR_TYPE_STORAGE_IMAGE: case VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER: case VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER: return &pipeline->shared_data->maps[broadcom_stage]->texture_map; case VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER: return is_sampler ? &pipeline->shared_data->maps[broadcom_stage]->sampler_map : &pipeline->shared_data->maps[broadcom_stage]->texture_map; case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER: case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC: case VK_DESCRIPTOR_TYPE_INLINE_UNIFORM_BLOCK: return &pipeline->shared_data->maps[broadcom_stage]->ubo_map; case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER: case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC: return &pipeline->shared_data->maps[broadcom_stage]->ssbo_map; default: unreachable("Descriptor type unknown or not having a descriptor map"); } } /* Gathers info from the intrinsic (set and binding) and then lowers it so it * could be used by the v3d_compiler */ static void lower_vulkan_resource_index(nir_builder *b, nir_intrinsic_instr *instr, struct lower_pipeline_layout_state *state) { assert(instr->intrinsic == nir_intrinsic_vulkan_resource_index); nir_const_value *const_val = nir_src_as_const_value(instr->src[0]); unsigned set = nir_intrinsic_desc_set(instr); unsigned binding = nir_intrinsic_binding(instr); struct v3dv_descriptor_set_layout *set_layout = state->layout->set[set].layout; struct v3dv_descriptor_set_binding_layout *binding_layout = &set_layout->binding[binding]; unsigned index = 0; switch (binding_layout->type) { case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER: case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER: case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC: case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC: case VK_DESCRIPTOR_TYPE_INLINE_UNIFORM_BLOCK: { struct v3dv_descriptor_map *descriptor_map = pipeline_get_descriptor_map(state->pipeline, binding_layout->type, b->shader->info.stage, false); if (!const_val) unreachable("non-constant vulkan_resource_index array index"); /* At compile-time we will need to know if we are processing a UBO load * for an inline or a regular UBO so we can handle inline loads like * push constants. At the level of NIR level however, the inline * information is gone, so we rely on the index to make this distinction. * Particularly, we reserve indices 1..MAX_INLINE_UNIFORM_BUFFERS for * inline buffers. This means that at the descriptor map level * we store inline buffers at slots 0..MAX_INLINE_UNIFORM_BUFFERS - 1, * and regular UBOs at indices starting from MAX_INLINE_UNIFORM_BUFFERS. */ uint32_t start_index = 0; if (binding_layout->type == VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER || binding_layout->type == VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC) { start_index += MAX_INLINE_UNIFORM_BUFFERS; } index = descriptor_map_add(descriptor_map, set, binding, const_val->u32, binding_layout->array_size, start_index, 32 /* return_size: doesn't really apply for this case */, 0); break; } default: unreachable("unsupported descriptor type for vulkan_resource_index"); break; } /* Since we use the deref pass, both vulkan_resource_index and * vulkan_load_descriptor return a vec2 providing an index and * offset. Our backend compiler only cares about the index part. */ nir_def_replace(&instr->def, nir_imm_ivec2(b, index, 0)); } static uint8_t tex_instr_get_and_remove_plane_src(nir_tex_instr *tex) { int plane_src_idx = nir_tex_instr_src_index(tex, nir_tex_src_plane); if (plane_src_idx < 0) return 0; uint8_t plane = nir_src_as_uint(tex->src[plane_src_idx].src); nir_tex_instr_remove_src(tex, plane_src_idx); return plane; } /* Returns return_size, so it could be used for the case of not having a * sampler object */ static uint8_t lower_tex_src(nir_builder *b, nir_tex_instr *instr, unsigned src_idx, struct lower_pipeline_layout_state *state) { nir_def *index = NULL; unsigned base_index = 0; unsigned array_elements = 1; nir_tex_src *src = &instr->src[src_idx]; bool is_sampler = src->src_type == nir_tex_src_sampler_deref; uint8_t plane = tex_instr_get_and_remove_plane_src(instr); /* We compute first the offsets */ nir_deref_instr *deref = nir_instr_as_deref(src->src.ssa->parent_instr); while (deref->deref_type != nir_deref_type_var) { nir_deref_instr *parent = nir_instr_as_deref(deref->parent.ssa->parent_instr); assert(deref->deref_type == nir_deref_type_array); if (nir_src_is_const(deref->arr.index) && index == NULL) { /* We're still building a direct index */ base_index += nir_src_as_uint(deref->arr.index) * array_elements; } else { if (index == NULL) { /* We used to be direct but not anymore */ index = nir_imm_int(b, base_index); base_index = 0; } index = nir_iadd(b, index, nir_imul_imm(b, deref->arr.index.ssa, array_elements)); } array_elements *= glsl_get_length(parent->type); deref = parent; } if (index) index = nir_umin(b, index, nir_imm_int(b, array_elements - 1)); /* We have the offsets, we apply them, rewriting the source or removing * instr if needed */ if (index) { nir_src_rewrite(&src->src, index); src->src_type = is_sampler ? nir_tex_src_sampler_offset : nir_tex_src_texture_offset; } else { nir_tex_instr_remove_src(instr, src_idx); } uint32_t set = deref->var->data.descriptor_set; uint32_t binding = deref->var->data.binding; /* FIXME: this is a really simplified check for the precision to be used * for the sampling. Right now we are only checking for the variables used * on the operation itself, but there are other cases that we could use to * infer the precision requirement. */ bool relaxed_precision = deref->var->data.precision == GLSL_PRECISION_MEDIUM || deref->var->data.precision == GLSL_PRECISION_LOW; struct v3dv_descriptor_set_layout *set_layout = state->layout->set[set].layout; struct v3dv_descriptor_set_binding_layout *binding_layout = &set_layout->binding[binding]; /* For input attachments, the shader includes the attachment_idx. As we are * treating them as a texture, we only want the base_index */ uint32_t array_index = binding_layout->type != VK_DESCRIPTOR_TYPE_INPUT_ATTACHMENT ? deref->var->data.index + base_index : base_index; uint8_t return_size; if (V3D_DBG(TMU_16BIT)) return_size = 16; else if (V3D_DBG(TMU_32BIT)) return_size = 32; else return_size = relaxed_precision ? 16 : 32; struct v3dv_descriptor_map *map = pipeline_get_descriptor_map(state->pipeline, binding_layout->type, b->shader->info.stage, is_sampler); int desc_index = descriptor_map_add(map, deref->var->data.descriptor_set, deref->var->data.binding, array_index, binding_layout->array_size, 0, return_size, plane); if (is_sampler) instr->sampler_index = desc_index; else instr->texture_index = desc_index; return return_size; } static bool lower_sampler(nir_builder *b, nir_tex_instr *instr, struct lower_pipeline_layout_state *state) { uint8_t return_size = 0; int texture_idx = nir_tex_instr_src_index(instr, nir_tex_src_texture_deref); if (texture_idx >= 0) return_size = lower_tex_src(b, instr, texture_idx, state); int sampler_idx = nir_tex_instr_src_index(instr, nir_tex_src_sampler_deref); if (sampler_idx >= 0) { assert(nir_tex_instr_need_sampler(instr)); lower_tex_src(b, instr, sampler_idx, state); } if (texture_idx < 0 && sampler_idx < 0) return false; /* If the instruction doesn't have a sampler (i.e. txf) we use backend_flags * to bind a default sampler state to configure precission. */ if (sampler_idx < 0) { state->needs_default_sampler_state = true; instr->backend_flags = return_size == 16 ? V3DV_NO_SAMPLER_16BIT_IDX : V3DV_NO_SAMPLER_32BIT_IDX; } return true; } /* FIXME: really similar to lower_tex_src, perhaps refactor? */ static void lower_image_deref(nir_builder *b, nir_intrinsic_instr *instr, struct lower_pipeline_layout_state *state) { nir_deref_instr *deref = nir_src_as_deref(instr->src[0]); nir_def *index = NULL; unsigned array_elements = 1; unsigned base_index = 0; while (deref->deref_type != nir_deref_type_var) { nir_deref_instr *parent = nir_instr_as_deref(deref->parent.ssa->parent_instr); assert(deref->deref_type == nir_deref_type_array); if (nir_src_is_const(deref->arr.index) && index == NULL) { /* We're still building a direct index */ base_index += nir_src_as_uint(deref->arr.index) * array_elements; } else { if (index == NULL) { /* We used to be direct but not anymore */ index = nir_imm_int(b, base_index); base_index = 0; } index = nir_iadd(b, index, nir_imul_imm(b, deref->arr.index.ssa, array_elements)); } array_elements *= glsl_get_length(parent->type); deref = parent; } if (index) index = nir_umin(b, index, nir_imm_int(b, array_elements - 1)); uint32_t set = deref->var->data.descriptor_set; uint32_t binding = deref->var->data.binding; struct v3dv_descriptor_set_layout *set_layout = state->layout->set[set].layout; struct v3dv_descriptor_set_binding_layout *binding_layout = &set_layout->binding[binding]; uint32_t array_index = deref->var->data.index + base_index; assert(binding_layout->type == VK_DESCRIPTOR_TYPE_STORAGE_IMAGE || binding_layout->type == VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER); struct v3dv_descriptor_map *map = pipeline_get_descriptor_map(state->pipeline, binding_layout->type, b->shader->info.stage, false); int desc_index = descriptor_map_add(map, deref->var->data.descriptor_set, deref->var->data.binding, array_index, binding_layout->array_size, 0, 32 /* return_size: doesn't apply for textures */, 0); /* Note: we don't need to do anything here in relation to the precision and * the output size because for images we can infer that info from the image * intrinsic, that includes the image format (see * NIR_INTRINSIC_FORMAT). That is done by the v3d compiler. */ index = nir_imm_int(b, desc_index); nir_rewrite_image_intrinsic(instr, index, false); } static bool lower_intrinsic(nir_builder *b, nir_intrinsic_instr *instr, struct lower_pipeline_layout_state *state) { switch (instr->intrinsic) { case nir_intrinsic_load_push_constant: lower_load_push_constant(b, instr, state); return true; case nir_intrinsic_vulkan_resource_index: lower_vulkan_resource_index(b, instr, state); return true; case nir_intrinsic_load_vulkan_descriptor: { /* Loading the descriptor happens as part of load/store instructions, * so for us this is a no-op. */ nir_def_replace(&instr->def, instr->src[0].ssa); return true; } case nir_intrinsic_image_deref_load: case nir_intrinsic_image_deref_store: case nir_intrinsic_image_deref_atomic: case nir_intrinsic_image_deref_atomic_swap: case nir_intrinsic_image_deref_size: case nir_intrinsic_image_deref_samples: lower_image_deref(b, instr, state); return true; default: return false; } } static bool lower_pipeline_layout_cb(nir_builder *b, nir_instr *instr, void *_state) { bool progress = false; struct lower_pipeline_layout_state *state = _state; b->cursor = nir_before_instr(instr); switch (instr->type) { case nir_instr_type_tex: progress |= lower_sampler(b, nir_instr_as_tex(instr), state); break; case nir_instr_type_intrinsic: progress |= lower_intrinsic(b, nir_instr_as_intrinsic(instr), state); break; default: break; } return progress; } static bool lower_pipeline_layout_info(nir_shader *shader, struct v3dv_pipeline *pipeline, const struct v3dv_pipeline_layout *layout, bool *needs_default_sampler_state) { bool progress = false; struct lower_pipeline_layout_state state = { .pipeline = pipeline, .layout = layout, .needs_default_sampler_state = false, }; progress = nir_shader_instructions_pass(shader, lower_pipeline_layout_cb, nir_metadata_control_flow, &state); *needs_default_sampler_state = state.needs_default_sampler_state; return progress; } /* This flips gl_PointCoord.y to match Vulkan requirements */ static bool lower_point_coord_cb(nir_builder *b, nir_intrinsic_instr *intr, void *_state) { if (intr->intrinsic != nir_intrinsic_load_input) return false; if (nir_intrinsic_io_semantics(intr).location != VARYING_SLOT_PNTC) return false; b->cursor = nir_after_instr(&intr->instr); nir_def *result = &intr->def; result = nir_vector_insert_imm(b, result, nir_fsub_imm(b, 1.0, nir_channel(b, result, 1)), 1); nir_def_rewrite_uses_after(&intr->def, result, result->parent_instr); return true; } static bool v3d_nir_lower_point_coord(nir_shader *s) { assert(s->info.stage == MESA_SHADER_FRAGMENT); return nir_shader_intrinsics_pass(s, lower_point_coord_cb, nir_metadata_control_flow, NULL); } static void lower_fs_io(nir_shader *nir) { /* Our backend doesn't handle array fragment shader outputs */ NIR_PASS_V(nir, nir_lower_io_arrays_to_elements_no_indirects, false); NIR_PASS(_, nir, nir_remove_dead_variables, nir_var_shader_out, NULL); nir_assign_io_var_locations(nir, nir_var_shader_in, &nir->num_inputs, MESA_SHADER_FRAGMENT); nir_assign_io_var_locations(nir, nir_var_shader_out, &nir->num_outputs, MESA_SHADER_FRAGMENT); NIR_PASS(_, nir, nir_lower_io, nir_var_shader_in | nir_var_shader_out, type_size_vec4, 0); } static void lower_gs_io(struct nir_shader *nir) { NIR_PASS_V(nir, nir_lower_io_arrays_to_elements_no_indirects, false); nir_assign_io_var_locations(nir, nir_var_shader_in, &nir->num_inputs, MESA_SHADER_GEOMETRY); nir_assign_io_var_locations(nir, nir_var_shader_out, &nir->num_outputs, MESA_SHADER_GEOMETRY); } static void lower_vs_io(struct nir_shader *nir) { NIR_PASS_V(nir, nir_lower_io_arrays_to_elements_no_indirects, false); nir_assign_io_var_locations(nir, nir_var_shader_in, &nir->num_inputs, MESA_SHADER_VERTEX); nir_assign_io_var_locations(nir, nir_var_shader_out, &nir->num_outputs, MESA_SHADER_VERTEX); /* FIXME: if we call nir_lower_io, we get a crash later. Likely because it * overlaps with v3d_nir_lower_io. Need further research though. */ } static void shader_debug_output(const char *message, void *data) { /* FIXME: We probably don't want to debug anything extra here, and in fact * the compiler is not using this callback too much, only as an alternative * way to debug out the shaderdb stats, that you can already get using * V3D_DEBUG=shaderdb. Perhaps it would make sense to revisit the v3d * compiler to remove that callback. */ } static void pipeline_populate_v3d_key(struct v3d_key *key, const struct v3dv_pipeline_stage *p_stage, uint32_t ucp_enables) { assert(p_stage->pipeline->shared_data && p_stage->pipeline->shared_data->maps[p_stage->stage]); /* The following values are default values used at pipeline create. We use * there 32 bit as default return size. */ struct v3dv_descriptor_map *sampler_map = &p_stage->pipeline->shared_data->maps[p_stage->stage]->sampler_map; struct v3dv_descriptor_map *texture_map = &p_stage->pipeline->shared_data->maps[p_stage->stage]->texture_map; key->num_tex_used = texture_map->num_desc; assert(key->num_tex_used <= V3D_MAX_TEXTURE_SAMPLERS); for (uint32_t tex_idx = 0; tex_idx < texture_map->num_desc; tex_idx++) { key->tex[tex_idx].swizzle[0] = PIPE_SWIZZLE_X; key->tex[tex_idx].swizzle[1] = PIPE_SWIZZLE_Y; key->tex[tex_idx].swizzle[2] = PIPE_SWIZZLE_Z; key->tex[tex_idx].swizzle[3] = PIPE_SWIZZLE_W; } key->num_samplers_used = sampler_map->num_desc; assert(key->num_samplers_used <= V3D_MAX_TEXTURE_SAMPLERS); for (uint32_t sampler_idx = 0; sampler_idx < sampler_map->num_desc; sampler_idx++) { key->sampler[sampler_idx].return_size = sampler_map->return_size[sampler_idx]; key->sampler[sampler_idx].return_channels = key->sampler[sampler_idx].return_size == 32 ? 4 : 2; } switch (p_stage->stage) { case BROADCOM_SHADER_VERTEX: case BROADCOM_SHADER_VERTEX_BIN: key->is_last_geometry_stage = p_stage->pipeline->stages[BROADCOM_SHADER_GEOMETRY] == NULL; break; case BROADCOM_SHADER_GEOMETRY: case BROADCOM_SHADER_GEOMETRY_BIN: /* FIXME: while we don't implement tessellation shaders */ key->is_last_geometry_stage = true; break; case BROADCOM_SHADER_FRAGMENT: case BROADCOM_SHADER_COMPUTE: key->is_last_geometry_stage = false; break; default: unreachable("unsupported shader stage"); } /* Vulkan doesn't have fixed function state for user clip planes. Instead, * shaders can write to gl_ClipDistance[], in which case the SPIR-V compiler * takes care of adding a single compact array variable at * VARYING_SLOT_CLIP_DIST0, so we don't need any user clip plane lowering. * * The only lowering we are interested is specific to the fragment shader, * where we want to emit discards to honor writes to gl_ClipDistance[] in * previous stages. This is done via nir_lower_clip_fs() so we only set up * the ucp enable mask for that stage. */ key->ucp_enables = ucp_enables; const VkPipelineRobustnessBufferBehaviorEXT robust_buffer_enabled = VK_PIPELINE_ROBUSTNESS_BUFFER_BEHAVIOR_ROBUST_BUFFER_ACCESS_EXT; const VkPipelineRobustnessImageBehaviorEXT robust_image_enabled = VK_PIPELINE_ROBUSTNESS_IMAGE_BEHAVIOR_ROBUST_IMAGE_ACCESS_EXT; key->robust_uniform_access = p_stage->robustness.uniform_buffers == robust_buffer_enabled; key->robust_storage_access = p_stage->robustness.storage_buffers == robust_buffer_enabled; key->robust_image_access = p_stage->robustness.images == robust_image_enabled; } /* FIXME: anv maps to hw primitive type. Perhaps eventually we would do the * same. For not using prim_mode that is the one already used on v3d */ static const enum mesa_prim vk_to_mesa_prim[] = { [VK_PRIMITIVE_TOPOLOGY_POINT_LIST] = MESA_PRIM_POINTS, [VK_PRIMITIVE_TOPOLOGY_LINE_LIST] = MESA_PRIM_LINES, [VK_PRIMITIVE_TOPOLOGY_LINE_STRIP] = MESA_PRIM_LINE_STRIP, [VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST] = MESA_PRIM_TRIANGLES, [VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP] = MESA_PRIM_TRIANGLE_STRIP, [VK_PRIMITIVE_TOPOLOGY_TRIANGLE_FAN] = MESA_PRIM_TRIANGLE_FAN, [VK_PRIMITIVE_TOPOLOGY_LINE_LIST_WITH_ADJACENCY] = MESA_PRIM_LINES_ADJACENCY, [VK_PRIMITIVE_TOPOLOGY_LINE_STRIP_WITH_ADJACENCY] = MESA_PRIM_LINE_STRIP_ADJACENCY, [VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST_WITH_ADJACENCY] = MESA_PRIM_TRIANGLES_ADJACENCY, [VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP_WITH_ADJACENCY] = MESA_PRIM_TRIANGLE_STRIP_ADJACENCY, }; uint32_t v3dv_pipeline_primitive(VkPrimitiveTopology vk_prim) { return v3d_hw_prim_type(vk_to_mesa_prim[vk_prim]); } static const enum pipe_logicop vk_to_pipe_logicop[] = { [VK_LOGIC_OP_CLEAR] = PIPE_LOGICOP_CLEAR, [VK_LOGIC_OP_AND] = PIPE_LOGICOP_AND, [VK_LOGIC_OP_AND_REVERSE] = PIPE_LOGICOP_AND_REVERSE, [VK_LOGIC_OP_COPY] = PIPE_LOGICOP_COPY, [VK_LOGIC_OP_AND_INVERTED] = PIPE_LOGICOP_AND_INVERTED, [VK_LOGIC_OP_NO_OP] = PIPE_LOGICOP_NOOP, [VK_LOGIC_OP_XOR] = PIPE_LOGICOP_XOR, [VK_LOGIC_OP_OR] = PIPE_LOGICOP_OR, [VK_LOGIC_OP_NOR] = PIPE_LOGICOP_NOR, [VK_LOGIC_OP_EQUIVALENT] = PIPE_LOGICOP_EQUIV, [VK_LOGIC_OP_INVERT] = PIPE_LOGICOP_INVERT, [VK_LOGIC_OP_OR_REVERSE] = PIPE_LOGICOP_OR_REVERSE, [VK_LOGIC_OP_COPY_INVERTED] = PIPE_LOGICOP_COPY_INVERTED, [VK_LOGIC_OP_OR_INVERTED] = PIPE_LOGICOP_OR_INVERTED, [VK_LOGIC_OP_NAND] = PIPE_LOGICOP_NAND, [VK_LOGIC_OP_SET] = PIPE_LOGICOP_SET, }; static bool enable_line_smooth(struct v3dv_pipeline *pipeline, const VkPipelineRasterizationStateCreateInfo *rs_info) { if (!pipeline->rasterization_enabled) return false; const VkPipelineRasterizationLineStateCreateInfoKHR *ls_info = vk_find_struct_const(rs_info->pNext, PIPELINE_RASTERIZATION_LINE_STATE_CREATE_INFO_KHR); if (!ls_info) return false; /* Although topology is dynamic now, the topology class can't change * because we don't support dynamicPrimitiveTopologyUnrestricted, so we can * use the static topology from the pipeline for this. */ switch(pipeline->topology) { case MESA_PRIM_LINES: case MESA_PRIM_LINE_LOOP: case MESA_PRIM_LINE_STRIP: case MESA_PRIM_LINES_ADJACENCY: case MESA_PRIM_LINE_STRIP_ADJACENCY: return ls_info->lineRasterizationMode == VK_LINE_RASTERIZATION_MODE_RECTANGULAR_SMOOTH_KHR; default: return false; } } static void v3d_fs_key_set_color_attachment(struct v3d_fs_key *key, const struct v3dv_pipeline_stage *p_stage, uint32_t index, VkFormat fb_format) { key->cbufs |= 1 << index; enum pipe_format fb_pipe_format = vk_format_to_pipe_format(fb_format); /* If logic operations are enabled then we might emit color reads and we * need to know the color buffer format and swizzle for that */ if (key->logicop_func != PIPE_LOGICOP_COPY) { /* Framebuffer formats should be single plane */ assert(vk_format_get_plane_count(fb_format) == 1); key->color_fmt[index].format = fb_pipe_format; memcpy(key->color_fmt[index].swizzle, v3dv_get_format_swizzle(p_stage->pipeline->device, fb_format, 0), sizeof(key->color_fmt[index].swizzle)); } const struct util_format_description *desc = vk_format_description(fb_format); if (desc->channel[0].type == UTIL_FORMAT_TYPE_FLOAT && desc->channel[0].size == 32) { key->f32_color_rb |= 1 << index; } if (p_stage->nir->info.fs.untyped_color_outputs) { if (util_format_is_pure_uint(fb_pipe_format)) key->uint_color_rb |= 1 << index; else if (util_format_is_pure_sint(fb_pipe_format)) key->int_color_rb |= 1 << index; } } static void pipeline_populate_v3d_fs_key(struct v3d_fs_key *key, const VkGraphicsPipelineCreateInfo *pCreateInfo, const struct vk_render_pass_state *rendering_info, const struct v3dv_pipeline_stage *p_stage, bool has_geometry_shader, uint32_t ucp_enables) { assert(p_stage->stage == BROADCOM_SHADER_FRAGMENT); memset(key, 0, sizeof(*key)); struct v3dv_device *device = p_stage->pipeline->device; assert(device); pipeline_populate_v3d_key(&key->base, p_stage, ucp_enables); const VkPipelineInputAssemblyStateCreateInfo *ia_info = pCreateInfo->pInputAssemblyState; uint8_t topology = vk_to_mesa_prim[ia_info->topology]; key->is_points = (topology == MESA_PRIM_POINTS); key->is_lines = (topology >= MESA_PRIM_LINES && topology <= MESA_PRIM_LINE_STRIP); if (key->is_points) { /* This mask represents state for GL_ARB_point_sprite which is not * relevant to Vulkan. */ key->point_sprite_mask = 0; /* Vulkan mandates upper left. */ key->point_coord_upper_left = true; } key->has_gs = has_geometry_shader; const VkPipelineColorBlendStateCreateInfo *cb_info = p_stage->pipeline->rasterization_enabled ? pCreateInfo->pColorBlendState : NULL; key->logicop_func = cb_info && cb_info->logicOpEnable == VK_TRUE ? vk_to_pipe_logicop[cb_info->logicOp] : PIPE_LOGICOP_COPY; /* Multisample rasterization state must be ignored if rasterization * is disabled. */ const VkPipelineMultisampleStateCreateInfo *ms_info = p_stage->pipeline->rasterization_enabled ? pCreateInfo->pMultisampleState : NULL; if (ms_info) { assert(ms_info->rasterizationSamples == VK_SAMPLE_COUNT_1_BIT || ms_info->rasterizationSamples == VK_SAMPLE_COUNT_4_BIT); key->msaa = ms_info->rasterizationSamples > VK_SAMPLE_COUNT_1_BIT; if (key->msaa) key->sample_alpha_to_coverage = ms_info->alphaToCoverageEnable; key->sample_alpha_to_one = ms_info->alphaToOneEnable; } key->line_smoothing = enable_line_smooth(p_stage->pipeline, pCreateInfo->pRasterizationState); /* This is intended for V3D versions before 4.1, otherwise we just use the * tile buffer load/store swap R/B bit. */ key->swap_color_rb = 0; for (uint32_t i = 0; i < rendering_info->color_attachment_count; i++) { if (rendering_info->color_attachment_formats[i] == VK_FORMAT_UNDEFINED) continue; v3d_fs_key_set_color_attachment(key, p_stage, i, rendering_info->color_attachment_formats[i]); } } static void setup_stage_outputs_from_next_stage_inputs( uint8_t next_stage_num_inputs, struct v3d_varying_slot *next_stage_input_slots, uint8_t *num_used_outputs, struct v3d_varying_slot *used_output_slots, uint32_t size_of_used_output_slots) { *num_used_outputs = next_stage_num_inputs; memcpy(used_output_slots, next_stage_input_slots, size_of_used_output_slots); } static void pipeline_populate_v3d_gs_key(struct v3d_gs_key *key, const VkGraphicsPipelineCreateInfo *pCreateInfo, const struct v3dv_pipeline_stage *p_stage) { assert(p_stage->stage == BROADCOM_SHADER_GEOMETRY || p_stage->stage == BROADCOM_SHADER_GEOMETRY_BIN); struct v3dv_device *device = p_stage->pipeline->device; assert(device); memset(key, 0, sizeof(*key)); pipeline_populate_v3d_key(&key->base, p_stage, 0); struct v3dv_pipeline *pipeline = p_stage->pipeline; key->per_vertex_point_size = p_stage->nir->info.outputs_written & (1ull << VARYING_SLOT_PSIZ); key->is_coord = broadcom_shader_stage_is_binning(p_stage->stage); assert(key->base.is_last_geometry_stage); if (key->is_coord) { /* Output varyings in the last binning shader are only used for transform * feedback. Set to 0 as VK_EXT_transform_feedback is not supported. */ key->num_used_outputs = 0; } else { struct v3dv_shader_variant *fs_variant = pipeline->shared_data->variants[BROADCOM_SHADER_FRAGMENT]; STATIC_ASSERT(sizeof(key->used_outputs) == sizeof(fs_variant->prog_data.fs->input_slots)); setup_stage_outputs_from_next_stage_inputs( fs_variant->prog_data.fs->num_inputs, fs_variant->prog_data.fs->input_slots, &key->num_used_outputs, key->used_outputs, sizeof(key->used_outputs)); } } static void pipeline_populate_v3d_vs_key(struct v3d_vs_key *key, const VkGraphicsPipelineCreateInfo *pCreateInfo, const struct v3dv_pipeline_stage *p_stage) { assert(p_stage->stage == BROADCOM_SHADER_VERTEX || p_stage->stage == BROADCOM_SHADER_VERTEX_BIN); struct v3dv_device *device = p_stage->pipeline->device; assert(device); memset(key, 0, sizeof(*key)); pipeline_populate_v3d_key(&key->base, p_stage, 0); struct v3dv_pipeline *pipeline = p_stage->pipeline; key->per_vertex_point_size = p_stage->nir->info.outputs_written & (1ull << VARYING_SLOT_PSIZ); key->is_coord = broadcom_shader_stage_is_binning(p_stage->stage); if (key->is_coord) { /* Binning VS*/ if (key->base.is_last_geometry_stage) { /* Output varyings in the last binning shader are only used for * transform feedback. Set to 0 as VK_EXT_transform_feedback is not * supported. */ key->num_used_outputs = 0; } else { /* Linking against GS binning program */ assert(pipeline->stages[BROADCOM_SHADER_GEOMETRY]); struct v3dv_shader_variant *gs_bin_variant = pipeline->shared_data->variants[BROADCOM_SHADER_GEOMETRY_BIN]; STATIC_ASSERT(sizeof(key->used_outputs) == sizeof(gs_bin_variant->prog_data.gs->input_slots)); setup_stage_outputs_from_next_stage_inputs( gs_bin_variant->prog_data.gs->num_inputs, gs_bin_variant->prog_data.gs->input_slots, &key->num_used_outputs, key->used_outputs, sizeof(key->used_outputs)); } } else { /* Render VS */ if (pipeline->stages[BROADCOM_SHADER_GEOMETRY]) { /* Linking against GS render program */ struct v3dv_shader_variant *gs_variant = pipeline->shared_data->variants[BROADCOM_SHADER_GEOMETRY]; STATIC_ASSERT(sizeof(key->used_outputs) == sizeof(gs_variant->prog_data.gs->input_slots)); setup_stage_outputs_from_next_stage_inputs( gs_variant->prog_data.gs->num_inputs, gs_variant->prog_data.gs->input_slots, &key->num_used_outputs, key->used_outputs, sizeof(key->used_outputs)); } else { /* Linking against FS program */ struct v3dv_shader_variant *fs_variant = pipeline->shared_data->variants[BROADCOM_SHADER_FRAGMENT]; STATIC_ASSERT(sizeof(key->used_outputs) == sizeof(fs_variant->prog_data.fs->input_slots)); setup_stage_outputs_from_next_stage_inputs( fs_variant->prog_data.fs->num_inputs, fs_variant->prog_data.fs->input_slots, &key->num_used_outputs, key->used_outputs, sizeof(key->used_outputs)); } } const VkPipelineVertexInputStateCreateInfo *vi_info = pCreateInfo->pVertexInputState; for (uint32_t i = 0; i < vi_info->vertexAttributeDescriptionCount; i++) { const VkVertexInputAttributeDescription *desc = &vi_info->pVertexAttributeDescriptions[i]; assert(desc->location < MAX_VERTEX_ATTRIBS); if (desc->format == VK_FORMAT_B8G8R8A8_UNORM || desc->format == VK_FORMAT_A2R10G10B10_UNORM_PACK32) { key->va_swap_rb_mask |= 1 << (VERT_ATTRIB_GENERIC0 + desc->location); } } } /** * Creates the initial form of the pipeline stage for a binning shader by * cloning the render shader and flagging it as a coordinate shader. * * Returns NULL if it was not able to allocate the object, so it should be * handled as a VK_ERROR_OUT_OF_HOST_MEMORY error. */ static struct v3dv_pipeline_stage * pipeline_stage_create_binning(const struct v3dv_pipeline_stage *src, const VkAllocationCallbacks *pAllocator) { struct v3dv_device *device = src->pipeline->device; struct v3dv_pipeline_stage *p_stage = vk_zalloc2(&device->vk.alloc, pAllocator, sizeof(*p_stage), 8, VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); if (p_stage == NULL) return NULL; assert(src->stage == BROADCOM_SHADER_VERTEX || src->stage == BROADCOM_SHADER_GEOMETRY); enum broadcom_shader_stage bin_stage = src->stage == BROADCOM_SHADER_VERTEX ? BROADCOM_SHADER_VERTEX_BIN : BROADCOM_SHADER_GEOMETRY_BIN; p_stage->pipeline = src->pipeline; p_stage->stage = bin_stage; p_stage->entrypoint = src->entrypoint; p_stage->module = src->module; p_stage->module_info = src->module_info; /* For binning shaders we will clone the NIR code from the corresponding * render shader later, when we call pipeline_compile_xxx_shader. This way * we only have to run the relevant NIR lowerings once for render shaders */ p_stage->nir = NULL; p_stage->program_id = src->program_id; p_stage->spec_info = src->spec_info; p_stage->feedback = (VkPipelineCreationFeedback) { 0 }; p_stage->robustness = src->robustness; memcpy(p_stage->shader_sha1, src->shader_sha1, 20); return p_stage; } /* * Based on some creation flags we assume that the QPU would be needed later * to gather further info. In that case we just keep the qput_insts around, * instead of map/unmap the bo later. */ static bool pipeline_keep_qpu(struct v3dv_pipeline *pipeline) { return pipeline->flags & (VK_PIPELINE_CREATE_CAPTURE_INTERNAL_REPRESENTATIONS_BIT_KHR | VK_PIPELINE_CREATE_CAPTURE_STATISTICS_BIT_KHR); } /** * Returns false if it was not able to allocate or map the assembly bo memory. */ static bool upload_assembly(struct v3dv_pipeline *pipeline) { uint32_t total_size = 0; for (uint8_t stage = 0; stage < BROADCOM_SHADER_STAGES; stage++) { struct v3dv_shader_variant *variant = pipeline->shared_data->variants[stage]; if (variant != NULL) total_size += variant->qpu_insts_size; } struct v3dv_bo *bo = v3dv_bo_alloc(pipeline->device, total_size, "pipeline shader assembly", true); if (!bo) { fprintf(stderr, "failed to allocate memory for shader\n"); return false; } bool ok = v3dv_bo_map(pipeline->device, bo, total_size); if (!ok) { fprintf(stderr, "failed to map source shader buffer\n"); return false; } uint32_t offset = 0; for (uint8_t stage = 0; stage < BROADCOM_SHADER_STAGES; stage++) { struct v3dv_shader_variant *variant = pipeline->shared_data->variants[stage]; if (variant != NULL) { variant->assembly_offset = offset; memcpy(bo->map + offset, variant->qpu_insts, variant->qpu_insts_size); offset += variant->qpu_insts_size; if (!pipeline_keep_qpu(pipeline)) { free(variant->qpu_insts); variant->qpu_insts = NULL; } } } assert(total_size == offset); pipeline->shared_data->assembly_bo = bo; return true; } static void pipeline_hash_graphics(const struct v3dv_pipeline *pipeline, struct v3dv_pipeline_key *key, unsigned char *sha1_out) { struct mesa_sha1 ctx; _mesa_sha1_init(&ctx); if (pipeline->layout) { _mesa_sha1_update(&ctx, &pipeline->layout->sha1, sizeof(pipeline->layout->sha1)); } /* We need to include all shader stages in the sha1 key as linking may * modify the shader code in any stage. An alternative would be to use the * serialized NIR, but that seems like an overkill. */ for (uint8_t stage = 0; stage < BROADCOM_SHADER_STAGES; stage++) { if (broadcom_shader_stage_is_binning(stage)) continue; struct v3dv_pipeline_stage *p_stage = pipeline->stages[stage]; if (p_stage == NULL) continue; assert(stage != BROADCOM_SHADER_COMPUTE); _mesa_sha1_update(&ctx, p_stage->shader_sha1, sizeof(p_stage->shader_sha1)); } _mesa_sha1_update(&ctx, key, sizeof(struct v3dv_pipeline_key)); _mesa_sha1_final(&ctx, sha1_out); } static void pipeline_hash_compute(const struct v3dv_pipeline *pipeline, struct v3dv_pipeline_key *key, unsigned char *sha1_out) { struct mesa_sha1 ctx; _mesa_sha1_init(&ctx); if (pipeline->layout) { _mesa_sha1_update(&ctx, &pipeline->layout->sha1, sizeof(pipeline->layout->sha1)); } struct v3dv_pipeline_stage *p_stage = pipeline->stages[BROADCOM_SHADER_COMPUTE]; _mesa_sha1_update(&ctx, p_stage->shader_sha1, sizeof(p_stage->shader_sha1)); _mesa_sha1_update(&ctx, key, sizeof(struct v3dv_pipeline_key)); _mesa_sha1_final(&ctx, sha1_out); } /* Checks that the pipeline has enough spill size to use for any of their * variants */ static void pipeline_check_spill_size(struct v3dv_pipeline *pipeline) { uint32_t max_spill_size = 0; for(uint8_t stage = 0; stage < BROADCOM_SHADER_STAGES; stage++) { struct v3dv_shader_variant *variant = pipeline->shared_data->variants[stage]; if (variant != NULL) { max_spill_size = MAX2(variant->prog_data.base->spill_size, max_spill_size); } } if (max_spill_size > 0) { struct v3dv_device *device = pipeline->device; /* The TIDX register we use for choosing the area to access * for scratch space is: (core << 6) | (qpu << 2) | thread. * Even at minimum threadcount in a particular shader, that * means we still multiply by qpus by 4. */ const uint32_t total_spill_size = 4 * device->devinfo.qpu_count * max_spill_size; if (pipeline->spill.bo) { assert(pipeline->spill.size_per_thread > 0); v3dv_bo_free(device, pipeline->spill.bo); } pipeline->spill.bo = v3dv_bo_alloc(device, total_spill_size, "spill", true); pipeline->spill.size_per_thread = max_spill_size; } } /** * Creates a new shader_variant_create. Note that for prog_data is not const, * so it is assumed that the caller will prove a pointer that the * shader_variant will own. * * Creation doesn't include allocate a BO to store the content of qpu_insts, * as we will try to share the same bo for several shader variants. Also note * that qpu_ints being NULL is valid, for example if we are creating the * shader_variants from the cache, so we can just upload the assembly of all * the shader stages at once. */ struct v3dv_shader_variant * v3dv_shader_variant_create(struct v3dv_device *device, enum broadcom_shader_stage stage, struct v3d_prog_data *prog_data, uint32_t prog_data_size, uint32_t assembly_offset, uint64_t *qpu_insts, uint32_t qpu_insts_size, VkResult *out_vk_result) { struct v3dv_shader_variant *variant = vk_zalloc(&device->vk.alloc, sizeof(*variant), 8, VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); if (variant == NULL) { *out_vk_result = VK_ERROR_OUT_OF_HOST_MEMORY; return NULL; } variant->stage = stage; variant->prog_data_size = prog_data_size; variant->prog_data.base = prog_data; variant->assembly_offset = assembly_offset; variant->qpu_insts_size = qpu_insts_size; variant->qpu_insts = qpu_insts; *out_vk_result = VK_SUCCESS; return variant; } /* For a given key, it returns the compiled version of the shader. Returns a * new reference to the shader_variant to the caller, or NULL. * * If the method returns NULL it means that something wrong happened: * * Not enough memory: this is one of the possible outcomes defined by * vkCreateXXXPipelines. out_vk_result will return the proper oom error. * * Compilation error: hypothetically this shouldn't happen, as the spec * states that vkShaderModule needs to be created with a valid SPIR-V, so * any compilation failure is a driver bug. In the practice, something as * common as failing to register allocate can lead to a compilation * failure. In that case the only option (for any driver) is * VK_ERROR_UNKNOWN, even if we know that the problem was a compiler * error. */ static struct v3dv_shader_variant * pipeline_compile_shader_variant(struct v3dv_pipeline_stage *p_stage, struct v3d_key *key, size_t key_size, const VkAllocationCallbacks *pAllocator, VkResult *out_vk_result) { int64_t stage_start = os_time_get_nano(); struct v3dv_pipeline *pipeline = p_stage->pipeline; struct v3dv_physical_device *physical_device = pipeline->device->pdevice; const struct v3d_compiler *compiler = physical_device->compiler; gl_shader_stage gl_stage = broadcom_shader_stage_to_gl(p_stage->stage); if (V3D_DBG(NIR) || v3d_debug_flag_for_shader_stage(gl_stage)) { fprintf(stderr, "Just before v3d_compile: %s prog %d NIR:\n", broadcom_shader_stage_name(p_stage->stage), p_stage->program_id); nir_print_shader(p_stage->nir, stderr); fprintf(stderr, "\n"); } uint64_t *qpu_insts; uint32_t qpu_insts_size; struct v3d_prog_data *prog_data; uint32_t prog_data_size = v3d_prog_data_size(gl_stage); qpu_insts = v3d_compile(compiler, key, &prog_data, p_stage->nir, shader_debug_output, NULL, p_stage->program_id, 0, &qpu_insts_size); struct v3dv_shader_variant *variant = NULL; if (!qpu_insts) { fprintf(stderr, "Failed to compile %s prog %d NIR to VIR\n", broadcom_shader_stage_name(p_stage->stage), p_stage->program_id); *out_vk_result = VK_ERROR_UNKNOWN; } else { variant = v3dv_shader_variant_create(pipeline->device, p_stage->stage, prog_data, prog_data_size, 0, /* assembly_offset, no final value yet */ qpu_insts, qpu_insts_size, out_vk_result); } /* At this point we don't need anymore the nir shader, but we are freeing * all the temporary p_stage structs used during the pipeline creation when * we finish it, so let's not worry about freeing the nir here. */ p_stage->feedback.duration += os_time_get_nano() - stage_start; return variant; } static void link_shaders(nir_shader *producer, nir_shader *consumer) { assert(producer); assert(consumer); if (producer->options->lower_to_scalar) { NIR_PASS(_, producer, nir_lower_io_to_scalar_early, nir_var_shader_out); NIR_PASS(_, consumer, nir_lower_io_to_scalar_early, nir_var_shader_in); } nir_lower_io_arrays_to_elements(producer, consumer); v3d_optimize_nir(NULL, producer); v3d_optimize_nir(NULL, consumer); if (nir_link_opt_varyings(producer, consumer)) v3d_optimize_nir(NULL, consumer); NIR_PASS(_, producer, nir_remove_dead_variables, nir_var_shader_out, NULL); NIR_PASS(_, consumer, nir_remove_dead_variables, nir_var_shader_in, NULL); if (nir_remove_unused_varyings(producer, consumer)) { NIR_PASS(_, producer, nir_lower_global_vars_to_local); NIR_PASS(_, consumer, nir_lower_global_vars_to_local); v3d_optimize_nir(NULL, producer); v3d_optimize_nir(NULL, consumer); /* Optimizations can cause varyings to become unused. * nir_compact_varyings() depends on all dead varyings being removed so * we need to call nir_remove_dead_variables() again here. */ NIR_PASS(_, producer, nir_remove_dead_variables, nir_var_shader_out, NULL); NIR_PASS(_, consumer, nir_remove_dead_variables, nir_var_shader_in, NULL); } } static void pipeline_lower_nir(struct v3dv_pipeline *pipeline, struct v3dv_pipeline_stage *p_stage, struct v3dv_pipeline_layout *layout) { int64_t stage_start = os_time_get_nano(); assert(pipeline->shared_data && pipeline->shared_data->maps[p_stage->stage]); NIR_PASS_V(p_stage->nir, nir_vk_lower_ycbcr_tex, lookup_ycbcr_conversion, layout); nir_shader_gather_info(p_stage->nir, nir_shader_get_entrypoint(p_stage->nir)); /* We add this because we need a valid sampler for nir_lower_tex to do * unpacking of the texture operation result, even for the case where there * is no sampler state. * * We add two of those, one for the case we need a 16bit return_size, and * another for the case we need a 32bit return size. */ struct v3dv_descriptor_maps *maps = pipeline->shared_data->maps[p_stage->stage]; UNUSED unsigned index; index = descriptor_map_add(&maps->sampler_map, -1, -1, -1, 0, 0, 16, 0); assert(index == V3DV_NO_SAMPLER_16BIT_IDX); index = descriptor_map_add(&maps->sampler_map, -2, -2, -2, 0, 0, 32, 0); assert(index == V3DV_NO_SAMPLER_32BIT_IDX); /* Apply the actual pipeline layout to UBOs, SSBOs, and textures */ bool needs_default_sampler_state = false; NIR_PASS(_, p_stage->nir, lower_pipeline_layout_info, pipeline, layout, &needs_default_sampler_state); /* If in the end we didn't need to use the default sampler states and the * shader doesn't need any other samplers, get rid of them so we can * recognize that this program doesn't use any samplers at all. */ if (!needs_default_sampler_state && maps->sampler_map.num_desc == 2) maps->sampler_map.num_desc = 0; p_stage->feedback.duration += os_time_get_nano() - stage_start; } /** * The SPIR-V compiler will insert a sized compact array for * VARYING_SLOT_CLIP_DIST0 if the vertex shader writes to gl_ClipDistance[], * where the size of the array determines the number of active clip planes. */ static uint32_t get_ucp_enable_mask(struct v3dv_pipeline_stage *p_stage) { assert(p_stage->stage == BROADCOM_SHADER_VERTEX); const nir_shader *shader = p_stage->nir; assert(shader); nir_foreach_variable_with_modes(var, shader, nir_var_shader_out) { if (var->data.location == VARYING_SLOT_CLIP_DIST0) { assert(var->data.compact); return (1 << glsl_get_length(var->type)) - 1; } } return 0; } static nir_shader * pipeline_stage_get_nir(struct v3dv_pipeline_stage *p_stage, struct v3dv_pipeline *pipeline, struct v3dv_pipeline_cache *cache) { int64_t stage_start = os_time_get_nano(); nir_shader *nir = NULL; const nir_shader_compiler_options *nir_options = v3dv_pipeline_get_nir_options(&pipeline->device->devinfo); nir = v3dv_pipeline_cache_search_for_nir(pipeline, cache, nir_options, p_stage->shader_sha1); if (nir) { assert(nir->info.stage == broadcom_shader_stage_to_gl(p_stage->stage)); /* A NIR cache hit doesn't avoid the large majority of pipeline stage * creation so the cache hit is not recorded in the pipeline feedback * flags */ p_stage->feedback.duration += os_time_get_nano() - stage_start; return nir; } nir = shader_module_compile_to_nir(pipeline->device, p_stage); if (nir) { struct v3dv_pipeline_cache *default_cache = &pipeline->device->default_pipeline_cache; v3dv_pipeline_cache_upload_nir(pipeline, cache, nir, p_stage->shader_sha1); /* Ensure that the variant is on the default cache, as cmd_buffer could * need to change the current variant */ if (default_cache != cache) { v3dv_pipeline_cache_upload_nir(pipeline, default_cache, nir, p_stage->shader_sha1); } p_stage->feedback.duration += os_time_get_nano() - stage_start; return nir; } /* FIXME: this shouldn't happen, raise error? */ return NULL; } static VkResult pipeline_compile_vertex_shader(struct v3dv_pipeline *pipeline, const VkAllocationCallbacks *pAllocator, const VkGraphicsPipelineCreateInfo *pCreateInfo) { struct v3dv_pipeline_stage *p_stage_vs = pipeline->stages[BROADCOM_SHADER_VERTEX]; struct v3dv_pipeline_stage *p_stage_vs_bin = pipeline->stages[BROADCOM_SHADER_VERTEX_BIN]; assert(p_stage_vs_bin != NULL); if (p_stage_vs_bin->nir == NULL) { assert(p_stage_vs->nir); p_stage_vs_bin->nir = nir_shader_clone(NULL, p_stage_vs->nir); } VkResult vk_result; struct v3d_vs_key key; pipeline_populate_v3d_vs_key(&key, pCreateInfo, p_stage_vs); pipeline->shared_data->variants[BROADCOM_SHADER_VERTEX] = pipeline_compile_shader_variant(p_stage_vs, &key.base, sizeof(key), pAllocator, &vk_result); if (vk_result != VK_SUCCESS) return vk_result; pipeline_populate_v3d_vs_key(&key, pCreateInfo, p_stage_vs_bin); pipeline->shared_data->variants[BROADCOM_SHADER_VERTEX_BIN] = pipeline_compile_shader_variant(p_stage_vs_bin, &key.base, sizeof(key), pAllocator, &vk_result); return vk_result; } static VkResult pipeline_compile_geometry_shader(struct v3dv_pipeline *pipeline, const VkAllocationCallbacks *pAllocator, const VkGraphicsPipelineCreateInfo *pCreateInfo) { struct v3dv_pipeline_stage *p_stage_gs = pipeline->stages[BROADCOM_SHADER_GEOMETRY]; struct v3dv_pipeline_stage *p_stage_gs_bin = pipeline->stages[BROADCOM_SHADER_GEOMETRY_BIN]; assert(p_stage_gs); assert(p_stage_gs_bin != NULL); if (p_stage_gs_bin->nir == NULL) { assert(p_stage_gs->nir); p_stage_gs_bin->nir = nir_shader_clone(NULL, p_stage_gs->nir); } VkResult vk_result; struct v3d_gs_key key; pipeline_populate_v3d_gs_key(&key, pCreateInfo, p_stage_gs); pipeline->shared_data->variants[BROADCOM_SHADER_GEOMETRY] = pipeline_compile_shader_variant(p_stage_gs, &key.base, sizeof(key), pAllocator, &vk_result); if (vk_result != VK_SUCCESS) return vk_result; pipeline_populate_v3d_gs_key(&key, pCreateInfo, p_stage_gs_bin); pipeline->shared_data->variants[BROADCOM_SHADER_GEOMETRY_BIN] = pipeline_compile_shader_variant(p_stage_gs_bin, &key.base, sizeof(key), pAllocator, &vk_result); return vk_result; } static VkResult pipeline_compile_fragment_shader(struct v3dv_pipeline *pipeline, const VkAllocationCallbacks *pAllocator, const VkGraphicsPipelineCreateInfo *pCreateInfo) { struct v3dv_pipeline_stage *p_stage_vs = pipeline->stages[BROADCOM_SHADER_VERTEX]; struct v3dv_pipeline_stage *p_stage_fs = pipeline->stages[BROADCOM_SHADER_FRAGMENT]; struct v3dv_pipeline_stage *p_stage_gs = pipeline->stages[BROADCOM_SHADER_GEOMETRY]; struct v3d_fs_key key; pipeline_populate_v3d_fs_key(&key, pCreateInfo, &pipeline->rendering_info, p_stage_fs, p_stage_gs != NULL, get_ucp_enable_mask(p_stage_vs)); if (key.is_points) { assert(key.point_coord_upper_left); NIR_PASS(_, p_stage_fs->nir, v3d_nir_lower_point_coord); } VkResult vk_result; pipeline->shared_data->variants[BROADCOM_SHADER_FRAGMENT] = pipeline_compile_shader_variant(p_stage_fs, &key.base, sizeof(key), pAllocator, &vk_result); return vk_result; } static void pipeline_populate_graphics_key(struct v3dv_pipeline *pipeline, struct v3dv_pipeline_key *key, const VkGraphicsPipelineCreateInfo *pCreateInfo) { struct v3dv_device *device = pipeline->device; assert(device); memset(key, 0, sizeof(*key)); key->line_smooth = pipeline->line_smooth; const VkPipelineInputAssemblyStateCreateInfo *ia_info = pCreateInfo->pInputAssemblyState; key->topology = vk_to_mesa_prim[ia_info->topology]; const VkPipelineColorBlendStateCreateInfo *cb_info = pipeline->rasterization_enabled ? pCreateInfo->pColorBlendState : NULL; key->logicop_func = cb_info && cb_info->logicOpEnable == VK_TRUE ? vk_to_pipe_logicop[cb_info->logicOp] : PIPE_LOGICOP_COPY; /* Multisample rasterization state must be ignored if rasterization * is disabled. */ const VkPipelineMultisampleStateCreateInfo *ms_info = pipeline->rasterization_enabled ? pCreateInfo->pMultisampleState : NULL; if (ms_info) { assert(ms_info->rasterizationSamples == VK_SAMPLE_COUNT_1_BIT || ms_info->rasterizationSamples == VK_SAMPLE_COUNT_4_BIT); key->msaa = ms_info->rasterizationSamples > VK_SAMPLE_COUNT_1_BIT; if (key->msaa) key->sample_alpha_to_coverage = ms_info->alphaToCoverageEnable; key->sample_alpha_to_one = ms_info->alphaToOneEnable; } struct vk_render_pass_state *ri = &pipeline->rendering_info; for (uint32_t i = 0; i < ri->color_attachment_count; i++) { if (ri->color_attachment_formats[i] == VK_FORMAT_UNDEFINED) continue; key->cbufs |= 1 << i; VkFormat fb_format = ri->color_attachment_formats[i]; enum pipe_format fb_pipe_format = vk_format_to_pipe_format(fb_format); /* If logic operations are enabled then we might emit color reads and we * need to know the color buffer format and swizzle for that */ if (key->logicop_func != PIPE_LOGICOP_COPY) { /* Framebuffer formats should be single plane */ assert(vk_format_get_plane_count(fb_format) == 1); key->color_fmt[i].format = fb_pipe_format; memcpy(key->color_fmt[i].swizzle, v3dv_get_format_swizzle(pipeline->device, fb_format, 0), sizeof(key->color_fmt[i].swizzle)); } const struct util_format_description *desc = vk_format_description(fb_format); if (desc->channel[0].type == UTIL_FORMAT_TYPE_FLOAT && desc->channel[0].size == 32) { key->f32_color_rb |= 1 << i; } } const VkPipelineVertexInputStateCreateInfo *vi_info = pCreateInfo->pVertexInputState; for (uint32_t i = 0; i < vi_info->vertexAttributeDescriptionCount; i++) { const VkVertexInputAttributeDescription *desc = &vi_info->pVertexAttributeDescriptions[i]; assert(desc->location < MAX_VERTEX_ATTRIBS); if (desc->format == VK_FORMAT_B8G8R8A8_UNORM || desc->format == VK_FORMAT_A2R10G10B10_UNORM_PACK32) { key->va_swap_rb_mask |= 1 << (VERT_ATTRIB_GENERIC0 + desc->location); } } key->has_multiview = ri->view_mask != 0; } static void pipeline_populate_compute_key(struct v3dv_pipeline *pipeline, struct v3dv_pipeline_key *key, const VkComputePipelineCreateInfo *pCreateInfo) { struct v3dv_device *device = pipeline->device; assert(device); /* We use the same pipeline key for graphics and compute, but we don't need * to add a field to flag compute keys because this key is not used alone * to search in the cache, we also use the SPIR-V or the serialized NIR for * example, which already flags compute shaders. */ memset(key, 0, sizeof(*key)); } static struct v3dv_pipeline_shared_data * v3dv_pipeline_shared_data_new_empty(const unsigned char sha1_key[20], struct v3dv_pipeline *pipeline, bool is_graphics_pipeline) { /* We create new_entry using the device alloc. Right now shared_data is ref * and unref by both the pipeline and the pipeline cache, so we can't * ensure that the cache or pipeline alloc will be available on the last * unref. */ struct v3dv_pipeline_shared_data *new_entry = vk_zalloc2(&pipeline->device->vk.alloc, NULL, sizeof(struct v3dv_pipeline_shared_data), 8, VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); if (new_entry == NULL) return NULL; for (uint8_t stage = 0; stage < BROADCOM_SHADER_STAGES; stage++) { /* We don't need specific descriptor maps for binning stages we use the * map for the render stage. */ if (broadcom_shader_stage_is_binning(stage)) continue; if ((is_graphics_pipeline && stage == BROADCOM_SHADER_COMPUTE) || (!is_graphics_pipeline && stage != BROADCOM_SHADER_COMPUTE)) { continue; } if (stage == BROADCOM_SHADER_GEOMETRY && !pipeline->stages[BROADCOM_SHADER_GEOMETRY]) { /* We always inject a custom GS if we have multiview */ if (!pipeline->rendering_info.view_mask) continue; } struct v3dv_descriptor_maps *new_maps = vk_zalloc2(&pipeline->device->vk.alloc, NULL, sizeof(struct v3dv_descriptor_maps), 8, VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); if (new_maps == NULL) goto fail; new_entry->maps[stage] = new_maps; } new_entry->maps[BROADCOM_SHADER_VERTEX_BIN] = new_entry->maps[BROADCOM_SHADER_VERTEX]; new_entry->maps[BROADCOM_SHADER_GEOMETRY_BIN] = new_entry->maps[BROADCOM_SHADER_GEOMETRY]; new_entry->ref_cnt = 1; memcpy(new_entry->sha1_key, sha1_key, 20); return new_entry; fail: if (new_entry != NULL) { for (uint8_t stage = 0; stage < BROADCOM_SHADER_STAGES; stage++) { if (new_entry->maps[stage] != NULL) vk_free(&pipeline->device->vk.alloc, new_entry->maps[stage]); } } vk_free(&pipeline->device->vk.alloc, new_entry); return NULL; } static void write_creation_feedback(struct v3dv_pipeline *pipeline, const void *next, const VkPipelineCreationFeedback *pipeline_feedback, uint32_t stage_count, const VkPipelineShaderStageCreateInfo *stages) { const VkPipelineCreationFeedbackCreateInfo *create_feedback = vk_find_struct_const(next, PIPELINE_CREATION_FEEDBACK_CREATE_INFO); if (create_feedback) { typed_memcpy(create_feedback->pPipelineCreationFeedback, pipeline_feedback, 1); const uint32_t feedback_stage_count = create_feedback->pipelineStageCreationFeedbackCount; assert(feedback_stage_count <= stage_count); for (uint32_t i = 0; i < feedback_stage_count; i++) { gl_shader_stage s = vk_to_mesa_shader_stage(stages[i].stage); enum broadcom_shader_stage bs = gl_shader_stage_to_broadcom(s); create_feedback->pPipelineStageCreationFeedbacks[i] = pipeline->stages[bs]->feedback; if (broadcom_shader_stage_is_render_with_binning(bs)) { enum broadcom_shader_stage bs_bin = broadcom_binning_shader_stage_for_render_stage(bs); create_feedback->pPipelineStageCreationFeedbacks[i].duration += pipeline->stages[bs_bin]->feedback.duration; } } } } /* Note that although PrimitiveTopology is now dynamic, it is still safe to * compute the gs_input/output_primitive from the topology saved at the * pipeline, as the topology class will not change, because we don't support * dynamicPrimitiveTopologyUnrestricted */ static enum mesa_prim multiview_gs_input_primitive_from_pipeline(struct v3dv_pipeline *pipeline) { switch (pipeline->topology) { case MESA_PRIM_POINTS: return MESA_PRIM_POINTS; case MESA_PRIM_LINES: case MESA_PRIM_LINE_STRIP: return MESA_PRIM_LINES; case MESA_PRIM_TRIANGLES: case MESA_PRIM_TRIANGLE_STRIP: case MESA_PRIM_TRIANGLE_FAN: return MESA_PRIM_TRIANGLES; default: /* Since we don't allow GS with multiview, we can only see non-adjacency * primitives. */ unreachable("Unexpected pipeline primitive type"); } } static enum mesa_prim multiview_gs_output_primitive_from_pipeline(struct v3dv_pipeline *pipeline) { switch (pipeline->topology) { case MESA_PRIM_POINTS: return MESA_PRIM_POINTS; case MESA_PRIM_LINES: case MESA_PRIM_LINE_STRIP: return MESA_PRIM_LINE_STRIP; case MESA_PRIM_TRIANGLES: case MESA_PRIM_TRIANGLE_STRIP: case MESA_PRIM_TRIANGLE_FAN: return MESA_PRIM_TRIANGLE_STRIP; default: /* Since we don't allow GS with multiview, we can only see non-adjacency * primitives. */ unreachable("Unexpected pipeline primitive type"); } } static bool pipeline_add_multiview_gs(struct v3dv_pipeline *pipeline, struct v3dv_pipeline_cache *cache, const VkAllocationCallbacks *pAllocator) { /* Create the passthrough GS from the VS output interface */ struct v3dv_pipeline_stage *p_stage_vs = pipeline->stages[BROADCOM_SHADER_VERTEX]; p_stage_vs->nir = pipeline_stage_get_nir(p_stage_vs, pipeline, cache); nir_shader *vs_nir = p_stage_vs->nir; const nir_shader_compiler_options *options = v3dv_pipeline_get_nir_options(&pipeline->device->devinfo); nir_builder b = nir_builder_init_simple_shader(MESA_SHADER_GEOMETRY, options, "multiview broadcast gs"); nir_shader *nir = b.shader; nir->info.inputs_read = vs_nir->info.outputs_written; nir->info.outputs_written = vs_nir->info.outputs_written | (1ull << VARYING_SLOT_LAYER); uint32_t vertex_count = mesa_vertices_per_prim(pipeline->topology); nir->info.gs.input_primitive = multiview_gs_input_primitive_from_pipeline(pipeline); nir->info.gs.output_primitive = multiview_gs_output_primitive_from_pipeline(pipeline); nir->info.gs.vertices_in = vertex_count; nir->info.gs.vertices_out = nir->info.gs.vertices_in; nir->info.gs.invocations = 1; nir->info.gs.active_stream_mask = 0x1; /* Make a list of GS input/output variables from the VS outputs */ nir_variable *in_vars[100]; nir_variable *out_vars[100]; uint32_t var_count = 0; nir_foreach_shader_out_variable(out_vs_var, vs_nir) { char name[8]; snprintf(name, ARRAY_SIZE(name), "in_%d", var_count); in_vars[var_count] = nir_variable_create(nir, nir_var_shader_in, glsl_array_type(out_vs_var->type, vertex_count, 0), name); in_vars[var_count]->data.location = out_vs_var->data.location; in_vars[var_count]->data.location_frac = out_vs_var->data.location_frac; in_vars[var_count]->data.interpolation = out_vs_var->data.interpolation; snprintf(name, ARRAY_SIZE(name), "out_%d", var_count); out_vars[var_count] = nir_variable_create(nir, nir_var_shader_out, out_vs_var->type, name); out_vars[var_count]->data.location = out_vs_var->data.location; out_vars[var_count]->data.interpolation = out_vs_var->data.interpolation; var_count++; } /* Add the gl_Layer output variable */ nir_variable *out_layer = nir_variable_create(nir, nir_var_shader_out, glsl_int_type(), "out_Layer"); out_layer->data.location = VARYING_SLOT_LAYER; /* Get the view index value that we will write to gl_Layer */ nir_def *layer = nir_load_system_value(&b, nir_intrinsic_load_view_index, 0, 1, 32); /* Emit all output vertices */ for (uint32_t vi = 0; vi < vertex_count; vi++) { /* Emit all output varyings */ for (uint32_t i = 0; i < var_count; i++) { nir_deref_instr *in_value = nir_build_deref_array_imm(&b, nir_build_deref_var(&b, in_vars[i]), vi); nir_copy_deref(&b, nir_build_deref_var(&b, out_vars[i]), in_value); } /* Emit gl_Layer write */ nir_store_var(&b, out_layer, layer, 0x1); nir_emit_vertex(&b, 0); } nir_end_primitive(&b, 0); /* Make sure we run our pre-process NIR passes so we produce NIR compatible * with what we expect from SPIR-V modules. */ preprocess_nir(nir); /* Attach the geometry shader to the pipeline */ struct v3dv_device *device = pipeline->device; struct v3dv_physical_device *physical_device = device->pdevice; struct v3dv_pipeline_stage *p_stage = vk_zalloc2(&device->vk.alloc, pAllocator, sizeof(*p_stage), 8, VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); if (p_stage == NULL) { ralloc_free(nir); return false; } p_stage->pipeline = pipeline; p_stage->stage = BROADCOM_SHADER_GEOMETRY; p_stage->entrypoint = "main"; p_stage->module = NULL; p_stage->module_info = NULL; p_stage->nir = nir; pipeline_compute_sha1_from_nir(p_stage); p_stage->program_id = p_atomic_inc_return(&physical_device->next_program_id); p_stage->robustness = pipeline->stages[BROADCOM_SHADER_VERTEX]->robustness; pipeline->has_gs = true; pipeline->stages[BROADCOM_SHADER_GEOMETRY] = p_stage; pipeline->active_stages |= MESA_SHADER_GEOMETRY; pipeline->stages[BROADCOM_SHADER_GEOMETRY_BIN] = pipeline_stage_create_binning(p_stage, pAllocator); if (pipeline->stages[BROADCOM_SHADER_GEOMETRY_BIN] == NULL) return false; return true; } static void pipeline_check_buffer_device_address(struct v3dv_pipeline *pipeline) { for (int i = BROADCOM_SHADER_VERTEX; i < BROADCOM_SHADER_STAGES; i++) { struct v3dv_shader_variant *variant = pipeline->shared_data->variants[i]; if (variant && variant->prog_data.base->has_global_address) { pipeline->uses_buffer_device_address = true; return; } } pipeline->uses_buffer_device_address = false; } /* * It compiles a pipeline. Note that it also allocate internal object, but if * some allocations success, but other fails, the method is not freeing the * successful ones. * * This is done to simplify the code, as what we do in this case is just call * the pipeline destroy method, and this would handle freeing the internal * objects allocated. We just need to be careful setting to NULL the objects * not allocated. */ static VkResult pipeline_compile_graphics(struct v3dv_pipeline *pipeline, struct v3dv_pipeline_cache *cache, const VkGraphicsPipelineCreateInfo *pCreateInfo, const VkAllocationCallbacks *pAllocator) { VkPipelineCreationFeedback pipeline_feedback = { .flags = VK_PIPELINE_CREATION_FEEDBACK_VALID_BIT, }; int64_t pipeline_start = os_time_get_nano(); struct v3dv_device *device = pipeline->device; struct v3dv_physical_device *physical_device = device->pdevice; /* First pass to get some common info from the shader, and create the * individual pipeline_stage objects */ for (uint32_t i = 0; i < pCreateInfo->stageCount; i++) { const VkPipelineShaderStageCreateInfo *sinfo = &pCreateInfo->pStages[i]; gl_shader_stage stage = vk_to_mesa_shader_stage(sinfo->stage); struct v3dv_pipeline_stage *p_stage = vk_zalloc2(&device->vk.alloc, pAllocator, sizeof(*p_stage), 8, VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); if (p_stage == NULL) return VK_ERROR_OUT_OF_HOST_MEMORY; p_stage->program_id = p_atomic_inc_return(&physical_device->next_program_id); enum broadcom_shader_stage broadcom_stage = gl_shader_stage_to_broadcom(stage); p_stage->pipeline = pipeline; p_stage->stage = broadcom_stage; p_stage->entrypoint = sinfo->pName; p_stage->module = vk_shader_module_from_handle(sinfo->module); p_stage->spec_info = sinfo->pSpecializationInfo; if (!p_stage->module) { p_stage->module_info = vk_find_struct_const(sinfo->pNext, SHADER_MODULE_CREATE_INFO); } vk_pipeline_robustness_state_fill(&device->vk, &p_stage->robustness, pCreateInfo->pNext, sinfo->pNext); vk_pipeline_hash_shader_stage(pipeline->flags, &pCreateInfo->pStages[i], &p_stage->robustness, p_stage->shader_sha1); pipeline->active_stages |= sinfo->stage; /* We will try to get directly the compiled shader variant, so let's not * worry about getting the nir shader for now. */ p_stage->nir = NULL; pipeline->stages[broadcom_stage] = p_stage; if (broadcom_stage == BROADCOM_SHADER_GEOMETRY) pipeline->has_gs = true; if (broadcom_shader_stage_is_render_with_binning(broadcom_stage)) { enum broadcom_shader_stage broadcom_stage_bin = broadcom_binning_shader_stage_for_render_stage(broadcom_stage); pipeline->stages[broadcom_stage_bin] = pipeline_stage_create_binning(p_stage, pAllocator); if (pipeline->stages[broadcom_stage_bin] == NULL) return VK_ERROR_OUT_OF_HOST_MEMORY; } } /* Add a no-op fragment shader if needed */ if (!pipeline->stages[BROADCOM_SHADER_FRAGMENT]) { const nir_shader_compiler_options *compiler_options = v3dv_pipeline_get_nir_options(&pipeline->device->devinfo); nir_builder b = nir_builder_init_simple_shader(MESA_SHADER_FRAGMENT, compiler_options, "noop_fs"); struct v3dv_pipeline_stage *p_stage = vk_zalloc2(&device->vk.alloc, pAllocator, sizeof(*p_stage), 8, VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); if (p_stage == NULL) return VK_ERROR_OUT_OF_HOST_MEMORY; p_stage->pipeline = pipeline; p_stage->stage = BROADCOM_SHADER_FRAGMENT; p_stage->entrypoint = "main"; p_stage->module = NULL; p_stage->module_info = NULL; p_stage->nir = b.shader; vk_pipeline_robustness_state_fill(&device->vk, &p_stage->robustness, NULL, NULL); pipeline_compute_sha1_from_nir(p_stage); p_stage->program_id = p_atomic_inc_return(&physical_device->next_program_id); pipeline->stages[BROADCOM_SHADER_FRAGMENT] = p_stage; pipeline->active_stages |= MESA_SHADER_FRAGMENT; } /* If multiview is enabled, we inject a custom passthrough geometry shader * to broadcast draw calls to the appropriate views. */ const uint32_t view_mask = pipeline->rendering_info.view_mask; assert(!view_mask || (!pipeline->has_gs && !pipeline->stages[BROADCOM_SHADER_GEOMETRY])); if (view_mask) { if (!pipeline_add_multiview_gs(pipeline, cache, pAllocator)) return VK_ERROR_OUT_OF_HOST_MEMORY; } /* First we try to get the variants from the pipeline cache (unless we are * required to capture internal representations, since in that case we need * compile). */ bool needs_executable_info = pipeline->flags & VK_PIPELINE_CREATE_CAPTURE_INTERNAL_REPRESENTATIONS_BIT_KHR; if (!needs_executable_info) { struct v3dv_pipeline_key pipeline_key; pipeline_populate_graphics_key(pipeline, &pipeline_key, pCreateInfo); pipeline_hash_graphics(pipeline, &pipeline_key, pipeline->sha1); bool cache_hit = false; pipeline->shared_data = v3dv_pipeline_cache_search_for_pipeline(cache, pipeline->sha1, &cache_hit); if (pipeline->shared_data != NULL) { /* A correct pipeline must have at least a VS and FS */ assert(pipeline->shared_data->variants[BROADCOM_SHADER_VERTEX]); assert(pipeline->shared_data->variants[BROADCOM_SHADER_VERTEX_BIN]); assert(pipeline->shared_data->variants[BROADCOM_SHADER_FRAGMENT]); assert(!pipeline->stages[BROADCOM_SHADER_GEOMETRY] || pipeline->shared_data->variants[BROADCOM_SHADER_GEOMETRY]); assert(!pipeline->stages[BROADCOM_SHADER_GEOMETRY] || pipeline->shared_data->variants[BROADCOM_SHADER_GEOMETRY_BIN]); if (cache_hit && cache != &pipeline->device->default_pipeline_cache) pipeline_feedback.flags |= VK_PIPELINE_CREATION_FEEDBACK_APPLICATION_PIPELINE_CACHE_HIT_BIT; goto success; } } if (pipeline->flags & VK_PIPELINE_CREATE_FAIL_ON_PIPELINE_COMPILE_REQUIRED_BIT) return VK_PIPELINE_COMPILE_REQUIRED; /* Otherwise we try to get the NIR shaders (either from the original SPIR-V * shader or the pipeline cache) and compile. */ pipeline->shared_data = v3dv_pipeline_shared_data_new_empty(pipeline->sha1, pipeline, true); if (!pipeline->shared_data) return VK_ERROR_OUT_OF_HOST_MEMORY; struct v3dv_pipeline_stage *p_stage_vs = pipeline->stages[BROADCOM_SHADER_VERTEX]; struct v3dv_pipeline_stage *p_stage_fs = pipeline->stages[BROADCOM_SHADER_FRAGMENT]; struct v3dv_pipeline_stage *p_stage_gs = pipeline->stages[BROADCOM_SHADER_GEOMETRY]; p_stage_vs->feedback.flags |= VK_PIPELINE_CREATION_FEEDBACK_VALID_BIT; if (p_stage_gs) p_stage_gs->feedback.flags |= VK_PIPELINE_CREATION_FEEDBACK_VALID_BIT; p_stage_fs->feedback.flags |= VK_PIPELINE_CREATION_FEEDBACK_VALID_BIT; if (!p_stage_vs->nir) p_stage_vs->nir = pipeline_stage_get_nir(p_stage_vs, pipeline, cache); if (p_stage_gs && !p_stage_gs->nir) p_stage_gs->nir = pipeline_stage_get_nir(p_stage_gs, pipeline, cache); if (!p_stage_fs->nir) p_stage_fs->nir = pipeline_stage_get_nir(p_stage_fs, pipeline, cache); /* Linking + pipeline lowerings */ if (p_stage_gs) { link_shaders(p_stage_gs->nir, p_stage_fs->nir); link_shaders(p_stage_vs->nir, p_stage_gs->nir); } else { link_shaders(p_stage_vs->nir, p_stage_fs->nir); } pipeline_lower_nir(pipeline, p_stage_fs, pipeline->layout); lower_fs_io(p_stage_fs->nir); if (p_stage_gs) { pipeline_lower_nir(pipeline, p_stage_gs, pipeline->layout); lower_gs_io(p_stage_gs->nir); } pipeline_lower_nir(pipeline, p_stage_vs, pipeline->layout); lower_vs_io(p_stage_vs->nir); /* Compiling to vir */ VkResult vk_result; /* We should have got all the variants or no variants from the cache */ assert(!pipeline->shared_data->variants[BROADCOM_SHADER_FRAGMENT]); vk_result = pipeline_compile_fragment_shader(pipeline, pAllocator, pCreateInfo); if (vk_result != VK_SUCCESS) return vk_result; assert(!pipeline->shared_data->variants[BROADCOM_SHADER_GEOMETRY] && !pipeline->shared_data->variants[BROADCOM_SHADER_GEOMETRY_BIN]); if (p_stage_gs) { vk_result = pipeline_compile_geometry_shader(pipeline, pAllocator, pCreateInfo); if (vk_result != VK_SUCCESS) return vk_result; } assert(!pipeline->shared_data->variants[BROADCOM_SHADER_VERTEX] && !pipeline->shared_data->variants[BROADCOM_SHADER_VERTEX_BIN]); vk_result = pipeline_compile_vertex_shader(pipeline, pAllocator, pCreateInfo); if (vk_result != VK_SUCCESS) return vk_result; if (!upload_assembly(pipeline)) return VK_ERROR_OUT_OF_DEVICE_MEMORY; v3dv_pipeline_cache_upload_pipeline(pipeline, cache); success: pipeline_check_buffer_device_address(pipeline); pipeline_feedback.duration = os_time_get_nano() - pipeline_start; write_creation_feedback(pipeline, pCreateInfo->pNext, &pipeline_feedback, pCreateInfo->stageCount, pCreateInfo->pStages); /* Since we have the variants in the pipeline shared data we can now free * the pipeline stages. */ if (!needs_executable_info) pipeline_free_stages(device, pipeline, pAllocator); pipeline_check_spill_size(pipeline); return compute_vpm_config(pipeline); } static VkResult compute_vpm_config(struct v3dv_pipeline *pipeline) { struct v3dv_shader_variant *vs_variant = pipeline->shared_data->variants[BROADCOM_SHADER_VERTEX]; struct v3dv_shader_variant *vs_bin_variant = pipeline->shared_data->variants[BROADCOM_SHADER_VERTEX]; struct v3d_vs_prog_data *vs = vs_variant->prog_data.vs; struct v3d_vs_prog_data *vs_bin =vs_bin_variant->prog_data.vs; struct v3d_gs_prog_data *gs = NULL; struct v3d_gs_prog_data *gs_bin = NULL; if (pipeline->has_gs) { struct v3dv_shader_variant *gs_variant = pipeline->shared_data->variants[BROADCOM_SHADER_GEOMETRY]; struct v3dv_shader_variant *gs_bin_variant = pipeline->shared_data->variants[BROADCOM_SHADER_GEOMETRY_BIN]; gs = gs_variant->prog_data.gs; gs_bin = gs_bin_variant->prog_data.gs; } if (!v3d_compute_vpm_config(&pipeline->device->devinfo, vs_bin, vs, gs_bin, gs, &pipeline->vpm_cfg_bin, &pipeline->vpm_cfg)) { return VK_ERROR_OUT_OF_DEVICE_MEMORY; } return VK_SUCCESS; } static bool stencil_op_is_no_op(struct vk_stencil_test_face_state *stencil) { return stencil->op.depth_fail == VK_STENCIL_OP_KEEP && stencil->op.compare == VK_COMPARE_OP_ALWAYS; } /* Computes the ez_state based on a given vk_dynamic_graphics_state. Note * that the parameter dyn doesn't need to be pipeline->dynamic_graphics_state, * as this method can be used by the cmd_buffer too. */ void v3dv_compute_ez_state(struct vk_dynamic_graphics_state *dyn, struct v3dv_pipeline *pipeline, enum v3dv_ez_state *ez_state, bool *incompatible_ez_test) { if (!dyn->ds.depth.test_enable) { *ez_state = V3D_EZ_DISABLED; return; } switch (dyn->ds.depth.compare_op) { case VK_COMPARE_OP_LESS: case VK_COMPARE_OP_LESS_OR_EQUAL: *ez_state = V3D_EZ_LT_LE; break; case VK_COMPARE_OP_GREATER: case VK_COMPARE_OP_GREATER_OR_EQUAL: *ez_state = V3D_EZ_GT_GE; break; case VK_COMPARE_OP_NEVER: case VK_COMPARE_OP_EQUAL: *ez_state = V3D_EZ_UNDECIDED; break; default: *ez_state = V3D_EZ_DISABLED; *incompatible_ez_test = true; break; } /* If stencil is enabled and is not a no-op, we need to disable EZ */ if (dyn->ds.stencil.test_enable && (!stencil_op_is_no_op(&dyn->ds.stencil.front) || !stencil_op_is_no_op(&dyn->ds.stencil.back))) { *ez_state = V3D_EZ_DISABLED; } /* If the FS writes Z, then it may update against the chosen EZ direction */ struct v3dv_shader_variant *fs_variant = pipeline->shared_data->variants[BROADCOM_SHADER_FRAGMENT]; if (fs_variant && fs_variant->prog_data.fs->writes_z && !fs_variant->prog_data.fs->writes_z_from_fep) { *ez_state = V3D_EZ_DISABLED; } } static void pipeline_set_sample_mask(struct v3dv_pipeline *pipeline, const VkPipelineMultisampleStateCreateInfo *ms_info) { pipeline->sample_mask = (1 << V3D_MAX_SAMPLES) - 1; /* Ignore pSampleMask if we are not enabling multisampling. The hardware * requires this to be 0xf or 0x0 if using a single sample. */ if (ms_info && ms_info->pSampleMask && ms_info->rasterizationSamples > VK_SAMPLE_COUNT_1_BIT) { pipeline->sample_mask &= ms_info->pSampleMask[0]; } } static void pipeline_set_sample_rate_shading(struct v3dv_pipeline *pipeline, const VkPipelineMultisampleStateCreateInfo *ms_info) { pipeline->sample_rate_shading = ms_info && ms_info->rasterizationSamples > VK_SAMPLE_COUNT_1_BIT && ms_info->sampleShadingEnable; } static void pipeline_setup_rendering_info(struct v3dv_device *device, struct v3dv_pipeline *pipeline, const VkGraphicsPipelineCreateInfo *pCreateInfo, const VkAllocationCallbacks *alloc) { struct vk_render_pass_state *rp = &pipeline->rendering_info; if (pipeline->pass) { assert(pipeline->subpass); struct v3dv_render_pass *pass = pipeline->pass; struct v3dv_subpass *subpass = pipeline->subpass; const uint32_t attachment_idx = subpass->ds_attachment.attachment; rp->view_mask = subpass->view_mask; rp->depth_attachment_format = VK_FORMAT_UNDEFINED; rp->stencil_attachment_format = VK_FORMAT_UNDEFINED; rp->attachments = MESA_VK_RP_ATTACHMENT_NONE; if (attachment_idx != VK_ATTACHMENT_UNUSED) { VkFormat ds_format = pass->attachments[attachment_idx].desc.format; if (vk_format_has_depth(ds_format)) { rp->depth_attachment_format = ds_format; rp->attachments |= MESA_VK_RP_ATTACHMENT_DEPTH_BIT; } if (vk_format_has_stencil(ds_format)) { rp->stencil_attachment_format = ds_format; rp->attachments |= MESA_VK_RP_ATTACHMENT_STENCIL_BIT; } } rp->color_attachment_count = subpass->color_count; for (uint32_t i = 0; i < subpass->color_count; i++) { const uint32_t attachment_idx = subpass->color_attachments[i].attachment; if (attachment_idx == VK_ATTACHMENT_UNUSED) { rp->color_attachment_formats[i] = VK_FORMAT_UNDEFINED; continue; } rp->color_attachment_formats[i] = pass->attachments[attachment_idx].desc.format; rp->attachments |= MESA_VK_RP_ATTACHMENT_COLOR_BIT(i); } return; } const VkPipelineRenderingCreateInfo *ri = vk_find_struct_const(pCreateInfo->pNext, PIPELINE_RENDERING_CREATE_INFO); if (ri) { rp->view_mask = ri->viewMask; rp->color_attachment_count = ri->colorAttachmentCount; for (int i = 0; i < ri->colorAttachmentCount; i++) { rp->color_attachment_formats[i] = ri->pColorAttachmentFormats[i]; if (rp->color_attachment_formats[i] != VK_FORMAT_UNDEFINED) { rp->attachments |= MESA_VK_RP_ATTACHMENT_COLOR_BIT(i); } } rp->depth_attachment_format = ri->depthAttachmentFormat; if (ri->depthAttachmentFormat != VK_FORMAT_UNDEFINED) rp->attachments |= MESA_VK_RP_ATTACHMENT_DEPTH_BIT; rp->stencil_attachment_format = ri->stencilAttachmentFormat; if (ri->stencilAttachmentFormat != VK_FORMAT_UNDEFINED) rp->attachments |= MESA_VK_RP_ATTACHMENT_STENCIL_BIT; return; } /* From the Vulkan spec for VkPipelineRenderingCreateInfo: * * "if this structure is not specified, and the pipeline does not include * a VkRenderPass, viewMask and colorAttachmentCount are 0, and * depthAttachmentFormat and stencilAttachmentFormat are * VK_FORMAT_UNDEFINED. */ pipeline->rendering_info = (struct vk_render_pass_state) { .view_mask = 0, .attachments = 0, .color_attachment_count = 0, .depth_attachment_format = VK_FORMAT_UNDEFINED, .stencil_attachment_format = VK_FORMAT_UNDEFINED, }; } static VkResult pipeline_init_dynamic_state(struct v3dv_device *device, struct v3dv_pipeline *pipeline, struct vk_graphics_pipeline_all_state *pipeline_all_state, struct vk_graphics_pipeline_state *pipeline_state, const VkGraphicsPipelineCreateInfo *pCreateInfo) { VkResult result = VK_SUCCESS; result = vk_graphics_pipeline_state_fill(&pipeline->device->vk, pipeline_state, pCreateInfo, &pipeline->rendering_info, 0, pipeline_all_state, NULL, 0, NULL); if (result != VK_SUCCESS) return result; vk_dynamic_graphics_state_fill(&pipeline->dynamic_graphics_state, pipeline_state); struct v3dv_dynamic_state *v3dv_dyn = &pipeline->dynamic; struct vk_dynamic_graphics_state *dyn = &pipeline->dynamic_graphics_state; if (BITSET_TEST(dyn->set, MESA_VK_DYNAMIC_VP_VIEWPORTS) || BITSET_TEST(dyn->set, MESA_VK_DYNAMIC_VP_SCISSORS)) { /* FIXME: right now we don't support multiViewport so viewporst[0] would * work now, but would need to change if we allow multiple viewports. */ v3dv_X(device, viewport_compute_xform)(&dyn->vp.viewports[0], v3dv_dyn->viewport.scale[0], v3dv_dyn->viewport.translate[0]); } v3dv_dyn->color_write_enable = (1ull << (4 * V3D_MAX_RENDER_TARGETS(device->devinfo.ver))) - 1; if (pipeline_state->cb) { const uint8_t color_writes = pipeline_state->cb->color_write_enables; v3dv_dyn->color_write_enable = 0; for (uint32_t i = 0; i < pipeline_state->cb->attachment_count; i++) { v3dv_dyn->color_write_enable |= (color_writes & BITFIELD_BIT(i)) ? (0xfu << (i * 4)) : 0; } } return result; } static VkResult pipeline_init(struct v3dv_pipeline *pipeline, struct v3dv_device *device, struct v3dv_pipeline_cache *cache, const VkGraphicsPipelineCreateInfo *pCreateInfo, const VkAllocationCallbacks *pAllocator) { VkResult result = VK_SUCCESS; pipeline->device = device; V3DV_FROM_HANDLE(v3dv_pipeline_layout, layout, pCreateInfo->layout); pipeline->layout = layout; v3dv_pipeline_layout_ref(pipeline->layout); V3DV_FROM_HANDLE(v3dv_render_pass, render_pass, pCreateInfo->renderPass); if (render_pass) { assert(pCreateInfo->subpass < render_pass->subpass_count); pipeline->pass = render_pass; pipeline->subpass = &render_pass->subpasses[pCreateInfo->subpass]; } pipeline_setup_rendering_info(device, pipeline, pCreateInfo, pAllocator); const VkPipelineInputAssemblyStateCreateInfo *ia_info = pCreateInfo->pInputAssemblyState; pipeline->topology = vk_to_mesa_prim[ia_info->topology]; struct vk_graphics_pipeline_all_state all; struct vk_graphics_pipeline_state pipeline_state = { }; result = pipeline_init_dynamic_state(device, pipeline, &all, &pipeline_state, pCreateInfo); if (result != VK_SUCCESS) { /* Caller would already destroy the pipeline, and we didn't allocate any * extra info. We don't need to do anything else. */ return result; } /* If rasterization is disabled, we just disable it through the CFG_BITS * packet, so for building the pipeline we always assume it is enabled */ const bool raster_enabled = (pipeline_state.rs && !pipeline_state.rs->rasterizer_discard_enable) || BITSET_TEST(pipeline_state.dynamic, MESA_VK_DYNAMIC_RS_RASTERIZER_DISCARD_ENABLE); pipeline->rasterization_enabled = raster_enabled; const VkPipelineViewportStateCreateInfo *vp_info = raster_enabled ? pCreateInfo->pViewportState : NULL; const VkPipelineDepthStencilStateCreateInfo *ds_info = raster_enabled ? pCreateInfo->pDepthStencilState : NULL; const VkPipelineRasterizationStateCreateInfo *rs_info = raster_enabled ? pCreateInfo->pRasterizationState : NULL; const VkPipelineRasterizationProvokingVertexStateCreateInfoEXT *pv_info = raster_enabled ? vk_find_struct_const( rs_info->pNext, PIPELINE_RASTERIZATION_PROVOKING_VERTEX_STATE_CREATE_INFO_EXT) : NULL; const VkPipelineRasterizationLineStateCreateInfoEXT *ls_info = raster_enabled ? vk_find_struct_const( rs_info->pNext, PIPELINE_RASTERIZATION_LINE_STATE_CREATE_INFO_EXT) : NULL; const VkPipelineColorBlendStateCreateInfo *cb_info = raster_enabled ? pCreateInfo->pColorBlendState : NULL; const VkPipelineMultisampleStateCreateInfo *ms_info = raster_enabled ? pCreateInfo->pMultisampleState : NULL; const VkPipelineViewportDepthClipControlCreateInfoEXT *depth_clip_control = vp_info ? vk_find_struct_const(vp_info->pNext, PIPELINE_VIEWPORT_DEPTH_CLIP_CONTROL_CREATE_INFO_EXT) : NULL; if (depth_clip_control) pipeline->negative_one_to_one = depth_clip_control->negativeOneToOne; v3dv_X(device, pipeline_pack_state)(pipeline, cb_info, ds_info, rs_info, pv_info, ls_info, ms_info, &pipeline_state); pipeline_set_sample_mask(pipeline, ms_info); pipeline_set_sample_rate_shading(pipeline, ms_info); pipeline->line_smooth = enable_line_smooth(pipeline, rs_info); result = pipeline_compile_graphics(pipeline, cache, pCreateInfo, pAllocator); if (result != VK_SUCCESS) { /* Caller would already destroy the pipeline, and we didn't allocate any * extra info. We don't need to do anything else. */ return result; } const VkPipelineVertexInputStateCreateInfo *vi_info = pCreateInfo->pVertexInputState; const VkPipelineVertexInputDivisorStateCreateInfoEXT *vd_info = vk_find_struct_const(vi_info->pNext, PIPELINE_VERTEX_INPUT_DIVISOR_STATE_CREATE_INFO_EXT); v3dv_X(device, pipeline_pack_compile_state)(pipeline, vi_info, vd_info); if (v3dv_X(device, pipeline_needs_default_attribute_values)(pipeline)) { pipeline->default_attribute_values = v3dv_X(pipeline->device, create_default_attribute_values)(pipeline->device, pipeline); if (!pipeline->default_attribute_values) return VK_ERROR_OUT_OF_DEVICE_MEMORY; } else { pipeline->default_attribute_values = NULL; } /* This must be done after the pipeline has been compiled */ v3dv_compute_ez_state(&pipeline->dynamic_graphics_state, pipeline, &pipeline->ez_state, &pipeline->incompatible_ez_test); return result; } static VkPipelineCreateFlagBits2KHR pipeline_create_info_get_flags(VkPipelineCreateFlags flags, const void *pNext) { const VkPipelineCreateFlags2CreateInfoKHR *flags2 = vk_find_struct_const(pNext, PIPELINE_CREATE_FLAGS_2_CREATE_INFO_KHR); if (flags2) return flags2->flags; else return flags; } static VkResult graphics_pipeline_create(VkDevice _device, VkPipelineCache _cache, const VkGraphicsPipelineCreateInfo *pCreateInfo, const VkAllocationCallbacks *pAllocator, VkPipeline *pPipeline, VkPipelineCreateFlagBits2KHR *flags) { V3DV_FROM_HANDLE(v3dv_device, device, _device); V3DV_FROM_HANDLE(v3dv_pipeline_cache, cache, _cache); struct v3dv_pipeline *pipeline; VkResult result; *flags = pipeline_create_info_get_flags(pCreateInfo->flags, pCreateInfo->pNext); /* Use the default pipeline cache if none is specified */ if (cache == NULL && device->instance->default_pipeline_cache_enabled) cache = &device->default_pipeline_cache; pipeline = vk_object_zalloc(&device->vk, pAllocator, sizeof(*pipeline), VK_OBJECT_TYPE_PIPELINE); if (pipeline == NULL) return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY); pipeline->flags = *flags; result = pipeline_init(pipeline, device, cache, pCreateInfo, pAllocator); if (result != VK_SUCCESS) { v3dv_destroy_pipeline(pipeline, device, pAllocator); if (result == VK_PIPELINE_COMPILE_REQUIRED) *pPipeline = VK_NULL_HANDLE; return result; } *pPipeline = v3dv_pipeline_to_handle(pipeline); return VK_SUCCESS; } VKAPI_ATTR VkResult VKAPI_CALL v3dv_CreateGraphicsPipelines(VkDevice _device, VkPipelineCache pipelineCache, uint32_t count, const VkGraphicsPipelineCreateInfo *pCreateInfos, const VkAllocationCallbacks *pAllocator, VkPipeline *pPipelines) { V3DV_FROM_HANDLE(v3dv_device, device, _device); VkResult result = VK_SUCCESS; if (V3D_DBG(SHADERS)) mtx_lock(&device->pdevice->mutex); uint32_t i = 0; for (; i < count; i++) { VkResult local_result; VkPipelineCreateFlagBits2KHR flags; local_result = graphics_pipeline_create(_device, pipelineCache, &pCreateInfos[i], pAllocator, &pPipelines[i], &flags); if (local_result != VK_SUCCESS) { result = local_result; pPipelines[i] = VK_NULL_HANDLE; if (flags & VK_PIPELINE_CREATE_EARLY_RETURN_ON_FAILURE_BIT) break; } } for (; i < count; i++) pPipelines[i] = VK_NULL_HANDLE; if (V3D_DBG(SHADERS)) mtx_unlock(&device->pdevice->mutex); return result; } static void shared_type_info(const struct glsl_type *type, unsigned *size, unsigned *align) { assert(glsl_type_is_vector_or_scalar(type)); uint32_t comp_size = glsl_type_is_boolean(type) ? 4 : glsl_get_bit_size(type) / 8; unsigned length = glsl_get_vector_elements(type); *size = comp_size * length, *align = comp_size * (length == 3 ? 4 : length); } static void lower_compute(struct nir_shader *nir) { if (!nir->info.shared_memory_explicit_layout) { NIR_PASS(_, nir, nir_lower_vars_to_explicit_types, nir_var_mem_shared, shared_type_info); } NIR_PASS(_, nir, nir_lower_explicit_io, nir_var_mem_shared, nir_address_format_32bit_offset); struct nir_lower_compute_system_values_options sysval_options = { .has_base_workgroup_id = true, }; NIR_PASS_V(nir, nir_lower_compute_system_values, &sysval_options); } static VkResult pipeline_compile_compute(struct v3dv_pipeline *pipeline, struct v3dv_pipeline_cache *cache, const VkComputePipelineCreateInfo *info, const VkAllocationCallbacks *alloc) { VkPipelineCreationFeedback pipeline_feedback = { .flags = VK_PIPELINE_CREATION_FEEDBACK_VALID_BIT, }; int64_t pipeline_start = os_time_get_nano(); struct v3dv_device *device = pipeline->device; struct v3dv_physical_device *physical_device = device->pdevice; const VkPipelineShaderStageCreateInfo *sinfo = &info->stage; gl_shader_stage stage = vk_to_mesa_shader_stage(sinfo->stage); struct v3dv_pipeline_stage *p_stage = vk_zalloc2(&device->vk.alloc, alloc, sizeof(*p_stage), 8, VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); if (!p_stage) return VK_ERROR_OUT_OF_HOST_MEMORY; p_stage->program_id = p_atomic_inc_return(&physical_device->next_program_id); p_stage->pipeline = pipeline; p_stage->stage = gl_shader_stage_to_broadcom(stage); p_stage->entrypoint = sinfo->pName; p_stage->module = vk_shader_module_from_handle(sinfo->module); p_stage->spec_info = sinfo->pSpecializationInfo; p_stage->feedback = (VkPipelineCreationFeedback) { 0 }; if (!p_stage->module) { p_stage->module_info = vk_find_struct_const(sinfo->pNext, SHADER_MODULE_CREATE_INFO); } vk_pipeline_robustness_state_fill(&device->vk, &p_stage->robustness, info->pNext, sinfo->pNext); vk_pipeline_hash_shader_stage(pipeline->flags, &info->stage, &p_stage->robustness, p_stage->shader_sha1); p_stage->nir = NULL; pipeline->stages[BROADCOM_SHADER_COMPUTE] = p_stage; pipeline->active_stages |= sinfo->stage; /* First we try to get the variants from the pipeline cache (unless we are * required to capture internal representations, since in that case we need * compile). */ bool needs_executable_info = pipeline->flags & VK_PIPELINE_CREATE_CAPTURE_INTERNAL_REPRESENTATIONS_BIT_KHR; if (!needs_executable_info) { struct v3dv_pipeline_key pipeline_key; pipeline_populate_compute_key(pipeline, &pipeline_key, info); pipeline_hash_compute(pipeline, &pipeline_key, pipeline->sha1); bool cache_hit = false; pipeline->shared_data = v3dv_pipeline_cache_search_for_pipeline(cache, pipeline->sha1, &cache_hit); if (pipeline->shared_data != NULL) { assert(pipeline->shared_data->variants[BROADCOM_SHADER_COMPUTE]); if (cache_hit && cache != &pipeline->device->default_pipeline_cache) pipeline_feedback.flags |= VK_PIPELINE_CREATION_FEEDBACK_APPLICATION_PIPELINE_CACHE_HIT_BIT; goto success; } } if (pipeline->flags & VK_PIPELINE_CREATE_FAIL_ON_PIPELINE_COMPILE_REQUIRED_BIT) return VK_PIPELINE_COMPILE_REQUIRED; pipeline->shared_data = v3dv_pipeline_shared_data_new_empty(pipeline->sha1, pipeline, false); if (!pipeline->shared_data) return VK_ERROR_OUT_OF_HOST_MEMORY; p_stage->feedback.flags |= VK_PIPELINE_CREATION_FEEDBACK_VALID_BIT; /* If not found on cache, compile it */ p_stage->nir = pipeline_stage_get_nir(p_stage, pipeline, cache); assert(p_stage->nir); v3d_optimize_nir(NULL, p_stage->nir); pipeline_lower_nir(pipeline, p_stage, pipeline->layout); lower_compute(p_stage->nir); VkResult result = VK_SUCCESS; struct v3d_key key; memset(&key, 0, sizeof(key)); pipeline_populate_v3d_key(&key, p_stage, 0); pipeline->shared_data->variants[BROADCOM_SHADER_COMPUTE] = pipeline_compile_shader_variant(p_stage, &key, sizeof(key), alloc, &result); if (result != VK_SUCCESS) return result; if (!upload_assembly(pipeline)) return VK_ERROR_OUT_OF_DEVICE_MEMORY; v3dv_pipeline_cache_upload_pipeline(pipeline, cache); success: pipeline_check_buffer_device_address(pipeline); pipeline_feedback.duration = os_time_get_nano() - pipeline_start; write_creation_feedback(pipeline, info->pNext, &pipeline_feedback, 1, &info->stage); /* As we got the variants in pipeline->shared_data, after compiling we * don't need the pipeline_stages. */ if (!needs_executable_info) pipeline_free_stages(device, pipeline, alloc); pipeline_check_spill_size(pipeline); return VK_SUCCESS; } static VkResult compute_pipeline_init(struct v3dv_pipeline *pipeline, struct v3dv_device *device, struct v3dv_pipeline_cache *cache, const VkComputePipelineCreateInfo *info, const VkAllocationCallbacks *alloc) { V3DV_FROM_HANDLE(v3dv_pipeline_layout, layout, info->layout); pipeline->device = device; pipeline->layout = layout; v3dv_pipeline_layout_ref(pipeline->layout); VkResult result = pipeline_compile_compute(pipeline, cache, info, alloc); if (result != VK_SUCCESS) return result; return result; } static VkResult compute_pipeline_create(VkDevice _device, VkPipelineCache _cache, const VkComputePipelineCreateInfo *pCreateInfo, const VkAllocationCallbacks *pAllocator, VkPipeline *pPipeline, VkPipelineCreateFlagBits2KHR *flags) { V3DV_FROM_HANDLE(v3dv_device, device, _device); V3DV_FROM_HANDLE(v3dv_pipeline_cache, cache, _cache); struct v3dv_pipeline *pipeline; VkResult result; *flags = pipeline_create_info_get_flags(pCreateInfo->flags, pCreateInfo->pNext); /* Use the default pipeline cache if none is specified */ if (cache == NULL && device->instance->default_pipeline_cache_enabled) cache = &device->default_pipeline_cache; pipeline = vk_object_zalloc(&device->vk, pAllocator, sizeof(*pipeline), VK_OBJECT_TYPE_PIPELINE); if (pipeline == NULL) return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY); pipeline->flags = *flags; result = compute_pipeline_init(pipeline, device, cache, pCreateInfo, pAllocator); if (result != VK_SUCCESS) { v3dv_destroy_pipeline(pipeline, device, pAllocator); if (result == VK_PIPELINE_COMPILE_REQUIRED) *pPipeline = VK_NULL_HANDLE; return result; } *pPipeline = v3dv_pipeline_to_handle(pipeline); return VK_SUCCESS; } VKAPI_ATTR VkResult VKAPI_CALL v3dv_CreateComputePipelines(VkDevice _device, VkPipelineCache pipelineCache, uint32_t createInfoCount, const VkComputePipelineCreateInfo *pCreateInfos, const VkAllocationCallbacks *pAllocator, VkPipeline *pPipelines) { V3DV_FROM_HANDLE(v3dv_device, device, _device); VkResult result = VK_SUCCESS; if (V3D_DBG(SHADERS)) mtx_lock(&device->pdevice->mutex); uint32_t i = 0; for (; i < createInfoCount; i++) { VkResult local_result; VkPipelineCreateFlagBits2KHR flags; local_result = compute_pipeline_create(_device, pipelineCache, &pCreateInfos[i], pAllocator, &pPipelines[i], &flags); if (local_result != VK_SUCCESS) { result = local_result; pPipelines[i] = VK_NULL_HANDLE; if (flags & VK_PIPELINE_CREATE_EARLY_RETURN_ON_FAILURE_BIT) break; } } for (; i < createInfoCount; i++) pPipelines[i] = VK_NULL_HANDLE; if (V3D_DBG(SHADERS)) mtx_unlock(&device->pdevice->mutex); return result; } static nir_shader * pipeline_get_nir(struct v3dv_pipeline *pipeline, enum broadcom_shader_stage stage) { assert(stage >= 0 && stage < BROADCOM_SHADER_STAGES); if (pipeline->stages[stage]) return pipeline->stages[stage]->nir; return NULL; } static struct v3d_prog_data * pipeline_get_prog_data(struct v3dv_pipeline *pipeline, enum broadcom_shader_stage stage) { if (pipeline->shared_data->variants[stage]) return pipeline->shared_data->variants[stage]->prog_data.base; return NULL; } static uint64_t * pipeline_get_qpu(struct v3dv_pipeline *pipeline, enum broadcom_shader_stage stage, uint32_t *qpu_size) { struct v3dv_shader_variant *variant = pipeline->shared_data->variants[stage]; if (!variant) { *qpu_size = 0; return NULL; } *qpu_size = variant->qpu_insts_size; return variant->qpu_insts; } /* FIXME: we use the same macro in various drivers, maybe move it to * the common vk_util.h? */ #define WRITE_STR(field, ...) ({ \ memset(field, 0, sizeof(field)); \ UNUSED int _i = snprintf(field, sizeof(field), __VA_ARGS__); \ assert(_i > 0 && _i < sizeof(field)); \ }) static bool write_ir_text(VkPipelineExecutableInternalRepresentationKHR* ir, const char *data) { ir->isText = VK_TRUE; size_t data_len = strlen(data) + 1; if (ir->pData == NULL) { ir->dataSize = data_len; return true; } strncpy(ir->pData, data, ir->dataSize); if (ir->dataSize < data_len) return false; ir->dataSize = data_len; return true; } static void append(char **str, size_t *offset, const char *fmt, ...) { va_list args; va_start(args, fmt); ralloc_vasprintf_rewrite_tail(str, offset, fmt, args); va_end(args); } static void pipeline_collect_executable_data(struct v3dv_pipeline *pipeline) { if (pipeline->executables.mem_ctx) return; pipeline->executables.mem_ctx = ralloc_context(NULL); util_dynarray_init(&pipeline->executables.data, pipeline->executables.mem_ctx); /* Don't crash for failed/bogus pipelines */ if (!pipeline->shared_data) return; for (int s = BROADCOM_SHADER_VERTEX; s <= BROADCOM_SHADER_COMPUTE; s++) { VkShaderStageFlags vk_stage = mesa_to_vk_shader_stage(broadcom_shader_stage_to_gl(s)); if (!(vk_stage & pipeline->active_stages)) continue; char *nir_str = NULL; char *qpu_str = NULL; if (pipeline_keep_qpu(pipeline)) { nir_shader *nir = pipeline_get_nir(pipeline, s); nir_str = nir ? nir_shader_as_str(nir, pipeline->executables.mem_ctx) : NULL; uint32_t qpu_size; uint64_t *qpu = pipeline_get_qpu(pipeline, s, &qpu_size); if (qpu) { uint32_t qpu_inst_count = qpu_size / sizeof(uint64_t); qpu_str = rzalloc_size(pipeline->executables.mem_ctx, qpu_inst_count * 96); size_t offset = 0; for (int i = 0; i < qpu_inst_count; i++) { const char *str = v3d_qpu_disasm(&pipeline->device->devinfo, qpu[i]); append(&qpu_str, &offset, "%s\n", str); ralloc_free((void *)str); } } } struct v3dv_pipeline_executable_data data = { .stage = s, .nir_str = nir_str, .qpu_str = qpu_str, }; util_dynarray_append(&pipeline->executables.data, struct v3dv_pipeline_executable_data, data); } } static const struct v3dv_pipeline_executable_data * pipeline_get_executable(struct v3dv_pipeline *pipeline, uint32_t index) { assert(index < util_dynarray_num_elements(&pipeline->executables.data, struct v3dv_pipeline_executable_data)); return util_dynarray_element(&pipeline->executables.data, struct v3dv_pipeline_executable_data, index); } VKAPI_ATTR VkResult VKAPI_CALL v3dv_GetPipelineExecutableInternalRepresentationsKHR( VkDevice device, const VkPipelineExecutableInfoKHR *pExecutableInfo, uint32_t *pInternalRepresentationCount, VkPipelineExecutableInternalRepresentationKHR *pInternalRepresentations) { V3DV_FROM_HANDLE(v3dv_pipeline, pipeline, pExecutableInfo->pipeline); pipeline_collect_executable_data(pipeline); VK_OUTARRAY_MAKE_TYPED(VkPipelineExecutableInternalRepresentationKHR, out, pInternalRepresentations, pInternalRepresentationCount); bool incomplete = false; const struct v3dv_pipeline_executable_data *exe = pipeline_get_executable(pipeline, pExecutableInfo->executableIndex); if (exe->nir_str) { vk_outarray_append_typed(VkPipelineExecutableInternalRepresentationKHR, &out, ir) { WRITE_STR(ir->name, "NIR (%s)", broadcom_shader_stage_name(exe->stage)); WRITE_STR(ir->description, "Final NIR form"); if (!write_ir_text(ir, exe->nir_str)) incomplete = true; } } if (exe->qpu_str) { vk_outarray_append_typed(VkPipelineExecutableInternalRepresentationKHR, &out, ir) { WRITE_STR(ir->name, "QPU (%s)", broadcom_shader_stage_name(exe->stage)); WRITE_STR(ir->description, "Final QPU assembly"); if (!write_ir_text(ir, exe->qpu_str)) incomplete = true; } } return incomplete ? VK_INCOMPLETE : vk_outarray_status(&out); } VKAPI_ATTR VkResult VKAPI_CALL v3dv_GetPipelineExecutablePropertiesKHR( VkDevice device, const VkPipelineInfoKHR *pPipelineInfo, uint32_t *pExecutableCount, VkPipelineExecutablePropertiesKHR *pProperties) { V3DV_FROM_HANDLE(v3dv_pipeline, pipeline, pPipelineInfo->pipeline); pipeline_collect_executable_data(pipeline); VK_OUTARRAY_MAKE_TYPED(VkPipelineExecutablePropertiesKHR, out, pProperties, pExecutableCount); util_dynarray_foreach(&pipeline->executables.data, struct v3dv_pipeline_executable_data, exe) { vk_outarray_append_typed(VkPipelineExecutablePropertiesKHR, &out, props) { gl_shader_stage mesa_stage = broadcom_shader_stage_to_gl(exe->stage); props->stages = mesa_to_vk_shader_stage(mesa_stage); WRITE_STR(props->name, "%s (%s)", _mesa_shader_stage_to_abbrev(mesa_stage), broadcom_shader_stage_is_binning(exe->stage) ? "Binning" : "Render"); WRITE_STR(props->description, "%s", _mesa_shader_stage_to_string(mesa_stage)); props->subgroupSize = V3D_CHANNELS; } } return vk_outarray_status(&out); } VKAPI_ATTR VkResult VKAPI_CALL v3dv_GetPipelineExecutableStatisticsKHR( VkDevice device, const VkPipelineExecutableInfoKHR *pExecutableInfo, uint32_t *pStatisticCount, VkPipelineExecutableStatisticKHR *pStatistics) { V3DV_FROM_HANDLE(v3dv_pipeline, pipeline, pExecutableInfo->pipeline); pipeline_collect_executable_data(pipeline); const struct v3dv_pipeline_executable_data *exe = pipeline_get_executable(pipeline, pExecutableInfo->executableIndex); struct v3d_prog_data *prog_data = pipeline_get_prog_data(pipeline, exe->stage); struct v3dv_shader_variant *variant = pipeline->shared_data->variants[exe->stage]; uint32_t qpu_inst_count = variant->qpu_insts_size / sizeof(uint64_t); VK_OUTARRAY_MAKE_TYPED(VkPipelineExecutableStatisticKHR, out, pStatistics, pStatisticCount); if (qpu_inst_count > 0) { vk_outarray_append_typed(VkPipelineExecutableStatisticKHR, &out, stat) { WRITE_STR(stat->name, "Compile Strategy"); WRITE_STR(stat->description, "Chosen compile strategy index"); stat->format = VK_PIPELINE_EXECUTABLE_STATISTIC_FORMAT_UINT64_KHR; stat->value.u64 = prog_data->compile_strategy_idx; } vk_outarray_append_typed(VkPipelineExecutableStatisticKHR, &out, stat) { WRITE_STR(stat->name, "Instruction Count"); WRITE_STR(stat->description, "Number of QPU instructions"); stat->format = VK_PIPELINE_EXECUTABLE_STATISTIC_FORMAT_UINT64_KHR; stat->value.u64 = qpu_inst_count; } vk_outarray_append_typed(VkPipelineExecutableStatisticKHR, &out, stat) { WRITE_STR(stat->name, "Thread Count"); WRITE_STR(stat->description, "Number of QPU threads dispatched"); stat->format = VK_PIPELINE_EXECUTABLE_STATISTIC_FORMAT_UINT64_KHR; stat->value.u64 = prog_data->threads; } vk_outarray_append_typed(VkPipelineExecutableStatisticKHR, &out, stat) { WRITE_STR(stat->name, "Spill Size"); WRITE_STR(stat->description, "Size of the spill buffer in bytes"); stat->format = VK_PIPELINE_EXECUTABLE_STATISTIC_FORMAT_UINT64_KHR; stat->value.u64 = prog_data->spill_size; } vk_outarray_append_typed(VkPipelineExecutableStatisticKHR, &out, stat) { WRITE_STR(stat->name, "TMU Spills"); WRITE_STR(stat->description, "Number of times a register was spilled " "to memory"); stat->format = VK_PIPELINE_EXECUTABLE_STATISTIC_FORMAT_UINT64_KHR; stat->value.u64 = prog_data->spill_size; } vk_outarray_append_typed(VkPipelineExecutableStatisticKHR, &out, stat) { WRITE_STR(stat->name, "TMU Fills"); WRITE_STR(stat->description, "Number of times a register was filled " "from memory"); stat->format = VK_PIPELINE_EXECUTABLE_STATISTIC_FORMAT_UINT64_KHR; stat->value.u64 = prog_data->spill_size; } vk_outarray_append_typed(VkPipelineExecutableStatisticKHR, &out, stat) { WRITE_STR(stat->name, "QPU Read Stalls"); WRITE_STR(stat->description, "Number of cycles the QPU stalls for a " "register read dependency"); stat->format = VK_PIPELINE_EXECUTABLE_STATISTIC_FORMAT_UINT64_KHR; stat->value.u64 = prog_data->qpu_read_stalls; } } return vk_outarray_status(&out); }