/* * Copyright © 2024 Collabora, Ltd. * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the "Software"), * to deal in the Software without restriction, including without limitation * the rights to use, copy, modify, merge, publish, distribute, sublicense, * and/or sell copies of the Software, and to permit persons to whom the * Software is furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice (including the next * paragraph) shall be included in all copies or substantial portions of the * Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS * IN THE SOFTWARE. */ #include "vk_shader.h" #include "vk_alloc.h" #include "vk_command_buffer.h" #include "vk_common_entrypoints.h" #include "vk_descriptor_set_layout.h" #include "vk_device.h" #include "vk_nir.h" #include "vk_physical_device.h" #include "vk_pipeline.h" #include "util/mesa-sha1.h" void * vk_shader_zalloc(struct vk_device *device, const struct vk_shader_ops *ops, gl_shader_stage stage, const VkAllocationCallbacks *alloc, size_t size) { /* For internal allocations, we need to allocate from the device scope * because they might be put in pipeline caches. Importantly, it is * impossible for the client to get at this pointer and we apply this * heuristic before we account for allocation fallbacks so this will only * ever happen for internal shader objectx. */ const VkSystemAllocationScope alloc_scope = alloc == &device->alloc ? VK_SYSTEM_ALLOCATION_SCOPE_DEVICE : VK_SYSTEM_ALLOCATION_SCOPE_OBJECT; struct vk_shader *shader = vk_zalloc2(&device->alloc, alloc, size, 8, alloc_scope); if (shader == NULL) return NULL; vk_object_base_init(device, &shader->base, VK_OBJECT_TYPE_SHADER_EXT); shader->ops = ops; shader->stage = stage; return shader; } void vk_shader_free(struct vk_device *device, const VkAllocationCallbacks *alloc, struct vk_shader *shader) { vk_object_base_finish(&shader->base); vk_free2(&device->alloc, alloc, shader); } int vk_shader_cmp_graphics_stages(gl_shader_stage a, gl_shader_stage b) { static const int stage_order[MESA_SHADER_MESH + 1] = { [MESA_SHADER_VERTEX] = 1, [MESA_SHADER_TESS_CTRL] = 2, [MESA_SHADER_TESS_EVAL] = 3, [MESA_SHADER_GEOMETRY] = 4, [MESA_SHADER_TASK] = 5, [MESA_SHADER_MESH] = 6, [MESA_SHADER_FRAGMENT] = 7, }; assert(a < ARRAY_SIZE(stage_order) && stage_order[a] > 0); assert(b < ARRAY_SIZE(stage_order) && stage_order[b] > 0); return stage_order[a] - stage_order[b]; } struct stage_idx { gl_shader_stage stage; uint32_t idx; }; static int cmp_stage_idx(const void *_a, const void *_b) { const struct stage_idx *a = _a, *b = _b; return vk_shader_cmp_graphics_stages(a->stage, b->stage); } static nir_shader * vk_shader_to_nir(struct vk_device *device, const VkShaderCreateInfoEXT *info, const struct vk_pipeline_robustness_state *rs) { const struct vk_device_shader_ops *ops = device->shader_ops; const gl_shader_stage stage = vk_to_mesa_shader_stage(info->stage); const nir_shader_compiler_options *nir_options = ops->get_nir_options(device->physical, stage, rs); struct spirv_to_nir_options spirv_options = ops->get_spirv_options(device->physical, stage, rs); enum gl_subgroup_size subgroup_size = vk_get_subgroup_size( vk_spirv_version(info->pCode, info->codeSize), stage, info->pNext, info->flags & VK_SHADER_CREATE_ALLOW_VARYING_SUBGROUP_SIZE_BIT_EXT, info->flags &VK_SHADER_CREATE_REQUIRE_FULL_SUBGROUPS_BIT_EXT); nir_shader *nir = vk_spirv_to_nir(device, info->pCode, info->codeSize, stage, info->pName, subgroup_size, info->pSpecializationInfo, &spirv_options, nir_options, false /* internal */, NULL); if (nir == NULL) return NULL; if (ops->preprocess_nir != NULL) ops->preprocess_nir(device->physical, nir); return nir; } struct set_layouts { struct vk_descriptor_set_layout *set_layouts[MESA_VK_MAX_DESCRIPTOR_SETS]; }; static void vk_shader_compile_info_init(struct vk_shader_compile_info *info, struct set_layouts *set_layouts, const VkShaderCreateInfoEXT *vk_info, const struct vk_pipeline_robustness_state *rs, nir_shader *nir) { for (uint32_t sl = 0; sl < vk_info->setLayoutCount; sl++) { set_layouts->set_layouts[sl] = vk_descriptor_set_layout_from_handle(vk_info->pSetLayouts[sl]); } *info = (struct vk_shader_compile_info) { .stage = nir->info.stage, .flags = vk_info->flags, .next_stage_mask = vk_info->nextStage, .nir = nir, .robustness = rs, .set_layout_count = vk_info->setLayoutCount, .set_layouts = set_layouts->set_layouts, .push_constant_range_count = vk_info->pushConstantRangeCount, .push_constant_ranges = vk_info->pPushConstantRanges, }; } PRAGMA_DIAGNOSTIC_PUSH PRAGMA_DIAGNOSTIC_ERROR(-Wpadded) struct vk_shader_bin_header { char mesavkshaderbin[16]; VkDriverId driver_id; uint8_t uuid[VK_UUID_SIZE]; uint32_t version; uint64_t size; uint8_t sha1[SHA1_DIGEST_LENGTH]; uint32_t _pad; }; PRAGMA_DIAGNOSTIC_POP static_assert(sizeof(struct vk_shader_bin_header) == 72, "This struct has no holes"); static void vk_shader_bin_header_init(struct vk_shader_bin_header *header, struct vk_physical_device *device) { *header = (struct vk_shader_bin_header) { .mesavkshaderbin = "MesaVkShaderBin", .driver_id = device->properties.driverID, }; memcpy(header->uuid, device->properties.shaderBinaryUUID, VK_UUID_SIZE); header->version = device->properties.shaderBinaryVersion; } static VkResult vk_shader_serialize(struct vk_device *device, struct vk_shader *shader, struct blob *blob) { struct vk_shader_bin_header header; vk_shader_bin_header_init(&header, device->physical); ASSERTED intptr_t header_offset = blob_reserve_bytes(blob, sizeof(header)); assert(header_offset == 0); bool success = shader->ops->serialize(device, shader, blob); if (!success || blob->out_of_memory) return VK_INCOMPLETE; /* Finalize and write the header */ header.size = blob->size; if (blob->data != NULL) { assert(sizeof(header) <= blob->size); struct mesa_sha1 sha1_ctx; _mesa_sha1_init(&sha1_ctx); /* Hash the header with a zero SHA1 */ _mesa_sha1_update(&sha1_ctx, &header, sizeof(header)); /* Hash the serialized data */ _mesa_sha1_update(&sha1_ctx, blob->data + sizeof(header), blob->size - sizeof(header)); _mesa_sha1_final(&sha1_ctx, header.sha1); blob_overwrite_bytes(blob, header_offset, &header, sizeof(header)); } return VK_SUCCESS; } static VkResult vk_shader_deserialize(struct vk_device *device, size_t data_size, const void *data, const VkAllocationCallbacks* pAllocator, struct vk_shader **shader_out) { const struct vk_device_shader_ops *ops = device->shader_ops; struct blob_reader blob; blob_reader_init(&blob, data, data_size); struct vk_shader_bin_header header, ref_header; blob_copy_bytes(&blob, &header, sizeof(header)); if (blob.overrun) return vk_error(device, VK_ERROR_INCOMPATIBLE_SHADER_BINARY_EXT); vk_shader_bin_header_init(&ref_header, device->physical); if (memcmp(header.mesavkshaderbin, ref_header.mesavkshaderbin, sizeof(header.mesavkshaderbin))) return vk_error(device, VK_ERROR_INCOMPATIBLE_SHADER_BINARY_EXT); if (header.driver_id != ref_header.driver_id) return vk_error(device, VK_ERROR_INCOMPATIBLE_SHADER_BINARY_EXT); if (memcmp(header.uuid, ref_header.uuid, sizeof(header.uuid))) return vk_error(device, VK_ERROR_INCOMPATIBLE_SHADER_BINARY_EXT); /* From the Vulkan 1.3.276 spec: * * "Guaranteed compatibility of shader binaries is expressed through a * combination of the shaderBinaryUUID and shaderBinaryVersion members * of the VkPhysicalDeviceShaderObjectPropertiesEXT structure queried * from a physical device. Binary shaders retrieved from a physical * device with a certain shaderBinaryUUID are guaranteed to be * compatible with all other physical devices reporting the same * shaderBinaryUUID and the same or higher shaderBinaryVersion." * * We handle the version check here on behalf of the driver and then pass * the version into the driver's deserialize callback. * * If a driver doesn't want to mess with versions, they can always make the * UUID a hash and always report version 0 and that will make this check * effectively a no-op. */ if (header.version > ref_header.version) return vk_error(device, VK_ERROR_INCOMPATIBLE_SHADER_BINARY_EXT); /* Reject shader binaries that are the wrong size. */ if (header.size != data_size) return vk_error(device, VK_ERROR_INCOMPATIBLE_SHADER_BINARY_EXT); assert(blob.current == (uint8_t *)data + sizeof(header)); blob.end = (uint8_t *)data + data_size; struct mesa_sha1 sha1_ctx; _mesa_sha1_init(&sha1_ctx); /* Hash the header with a zero SHA1 */ struct vk_shader_bin_header sha1_header = header; memset(sha1_header.sha1, 0, sizeof(sha1_header.sha1)); _mesa_sha1_update(&sha1_ctx, &sha1_header, sizeof(sha1_header)); /* Hash the serialized data */ _mesa_sha1_update(&sha1_ctx, (uint8_t *)data + sizeof(header), data_size - sizeof(header)); _mesa_sha1_final(&sha1_ctx, ref_header.sha1); if (memcmp(header.sha1, ref_header.sha1, sizeof(header.sha1))) return vk_error(device, VK_ERROR_INCOMPATIBLE_SHADER_BINARY_EXT); /* We've now verified that the header matches and that the data has the * right SHA1 hash so it's safe to call into the driver. */ return ops->deserialize(device, &blob, header.version, pAllocator, shader_out); } VKAPI_ATTR VkResult VKAPI_CALL vk_common_GetShaderBinaryDataEXT(VkDevice _device, VkShaderEXT _shader, size_t *pDataSize, void *pData) { VK_FROM_HANDLE(vk_device, device, _device); VK_FROM_HANDLE(vk_shader, shader, _shader); VkResult result; /* From the Vulkan 1.3.275 spec: * * "If pData is NULL, then the size of the binary shader code of the * shader object, in bytes, is returned in pDataSize. Otherwise, * pDataSize must point to a variable set by the user to the size of the * buffer, in bytes, pointed to by pData, and on return the variable is * overwritten with the amount of data actually written to pData. If * pDataSize is less than the size of the binary shader code, nothing is * written to pData, and VK_INCOMPLETE will be returned instead of * VK_SUCCESS." * * This is annoying. Unlike basically every other Vulkan data return * method, we're not allowed to overwrite the client-provided memory region * on VK_INCOMPLETE. This means we either need to query the blob size * up-front by serializing twice or we need to serialize into temporary * memory and memcpy into the client-provided region. We choose the first * approach. * * In the common case, this means that vk_shader_ops::serialize will get * called 3 times: Once for the client to get the size, once for us to * validate the client's size, and once to actually write the data. It's a * bit heavy-weight but this shouldn't be in a hot path and this is better * for memory efficiency. Also, the vk_shader_ops::serialize should be * pretty fast on a null blob. */ struct blob blob; blob_init_fixed(&blob, NULL, SIZE_MAX); result = vk_shader_serialize(device, shader, &blob); assert(result == VK_SUCCESS); if (result != VK_SUCCESS) { *pDataSize = 0; return result; } else if (pData == NULL) { *pDataSize = blob.size; return VK_SUCCESS; } else if (blob.size > *pDataSize) { /* No data written */ *pDataSize = 0; return VK_INCOMPLETE; } blob_init_fixed(&blob, pData, *pDataSize); result = vk_shader_serialize(device, shader, &blob); assert(result == VK_SUCCESS); *pDataSize = blob.size; return result; } /* The only place where we have "real" linking is graphics shaders and there * is a limit as to how many of them can be linked together at one time. */ #define VK_MAX_LINKED_SHADER_STAGES MESA_VK_MAX_GRAPHICS_PIPELINE_STAGES VKAPI_ATTR VkResult VKAPI_CALL vk_common_CreateShadersEXT(VkDevice _device, uint32_t createInfoCount, const VkShaderCreateInfoEXT *pCreateInfos, const VkAllocationCallbacks *pAllocator, VkShaderEXT *pShaders) { VK_FROM_HANDLE(vk_device, device, _device); const struct vk_device_shader_ops *ops = device->shader_ops; VkResult first_fail_or_success = VK_SUCCESS; struct vk_pipeline_robustness_state rs = { .storage_buffers = VK_PIPELINE_ROBUSTNESS_BUFFER_BEHAVIOR_DISABLED_EXT, .uniform_buffers = VK_PIPELINE_ROBUSTNESS_BUFFER_BEHAVIOR_DISABLED_EXT, .vertex_inputs = VK_PIPELINE_ROBUSTNESS_BUFFER_BEHAVIOR_DISABLED_EXT, .images = VK_PIPELINE_ROBUSTNESS_IMAGE_BEHAVIOR_DISABLED_EXT, /* From the Vulkan 1.3.292 spec: * * "This extension [VK_EXT_robustness2] also adds support for “null * descriptors”, where VK_NULL_HANDLE can be used instead of a valid * handle. Accesses to null descriptors have well-defined behavior, * and do not rely on robustness." * * For now, default these to true. */ .null_uniform_buffer_descriptor = true, .null_storage_buffer_descriptor = true, }; /* From the Vulkan 1.3.274 spec: * * "When this function returns, whether or not it succeeds, it is * guaranteed that every element of pShaders will have been overwritten * by either VK_NULL_HANDLE or a valid VkShaderEXT handle." * * Zeroing up-front makes the error path easier. */ memset(pShaders, 0, createInfoCount * sizeof(*pShaders)); bool has_linked_spirv = false; for (uint32_t i = 0; i < createInfoCount; i++) { if (pCreateInfos[i].codeType == VK_SHADER_CODE_TYPE_SPIRV_EXT && (pCreateInfos[i].flags & VK_SHADER_CREATE_LINK_STAGE_BIT_EXT)) has_linked_spirv = true; } uint32_t linked_count = 0; struct stage_idx linked[VK_MAX_LINKED_SHADER_STAGES]; for (uint32_t i = 0; i < createInfoCount; i++) { const VkShaderCreateInfoEXT *vk_info = &pCreateInfos[i]; VkResult result = VK_SUCCESS; switch (vk_info->codeType) { case VK_SHADER_CODE_TYPE_BINARY_EXT: { /* This isn't required by Vulkan but we're allowed to fail binary * import for basically any reason. This seems like a pretty good * reason. */ if (has_linked_spirv && (vk_info->flags & VK_SHADER_CREATE_LINK_STAGE_BIT_EXT)) { result = vk_errorf(device, VK_ERROR_INCOMPATIBLE_SHADER_BINARY_EXT, "Cannot mix linked binary and SPIR-V"); break; } struct vk_shader *shader; result = vk_shader_deserialize(device, vk_info->codeSize, vk_info->pCode, pAllocator, &shader); if (result != VK_SUCCESS) break; pShaders[i] = vk_shader_to_handle(shader); break; } case VK_SHADER_CODE_TYPE_SPIRV_EXT: { if (vk_info->flags & VK_SHADER_CREATE_LINK_STAGE_BIT_EXT) { /* Stash it and compile later */ assert(linked_count < ARRAY_SIZE(linked)); linked[linked_count++] = (struct stage_idx) { .stage = vk_to_mesa_shader_stage(vk_info->stage), .idx = i, }; } else { nir_shader *nir = vk_shader_to_nir(device, vk_info, &rs); if (nir == NULL) { result = vk_errorf(device, VK_ERROR_UNKNOWN, "Failed to compile shader to NIR"); break; } struct vk_shader_compile_info info; struct set_layouts set_layouts; vk_shader_compile_info_init(&info, &set_layouts, vk_info, &rs, nir); struct vk_shader *shader; result = ops->compile(device, 1, &info, NULL /* state */, pAllocator, &shader); if (result != VK_SUCCESS) break; pShaders[i] = vk_shader_to_handle(shader); } break; } default: unreachable("Unknown shader code type"); } if (first_fail_or_success == VK_SUCCESS) first_fail_or_success = result; } if (linked_count > 0) { struct set_layouts set_layouts[VK_MAX_LINKED_SHADER_STAGES]; struct vk_shader_compile_info infos[VK_MAX_LINKED_SHADER_STAGES]; VkResult result = VK_SUCCESS; /* Sort so we guarantee the driver always gets them in-order */ qsort(linked, linked_count, sizeof(*linked), cmp_stage_idx); /* Memset for easy error handling */ memset(infos, 0, sizeof(infos)); for (uint32_t l = 0; l < linked_count; l++) { const VkShaderCreateInfoEXT *vk_info = &pCreateInfos[linked[l].idx]; nir_shader *nir = vk_shader_to_nir(device, vk_info, &rs); if (nir == NULL) { result = vk_errorf(device, VK_ERROR_UNKNOWN, "Failed to compile shader to NIR"); break; } vk_shader_compile_info_init(&infos[l], &set_layouts[l], vk_info, &rs, nir); } if (result == VK_SUCCESS) { struct vk_shader *shaders[VK_MAX_LINKED_SHADER_STAGES]; result = ops->compile(device, linked_count, infos, NULL /* state */, pAllocator, shaders); if (result == VK_SUCCESS) { for (uint32_t l = 0; l < linked_count; l++) pShaders[linked[l].idx] = vk_shader_to_handle(shaders[l]); } } else { for (uint32_t l = 0; l < linked_count; l++) { if (infos[l].nir != NULL) ralloc_free(infos[l].nir); } } if (first_fail_or_success == VK_SUCCESS) first_fail_or_success = result; } return first_fail_or_success; } VKAPI_ATTR void VKAPI_CALL vk_common_DestroyShaderEXT(VkDevice _device, VkShaderEXT _shader, const VkAllocationCallbacks *pAllocator) { VK_FROM_HANDLE(vk_device, device, _device); VK_FROM_HANDLE(vk_shader, shader, _shader); if (shader == NULL) return; vk_shader_destroy(device, shader, pAllocator); } VKAPI_ATTR void VKAPI_CALL vk_common_CmdBindShadersEXT(VkCommandBuffer commandBuffer, uint32_t stageCount, const VkShaderStageFlagBits *pStages, const VkShaderEXT *pShaders) { VK_FROM_HANDLE(vk_command_buffer, cmd_buffer, commandBuffer); struct vk_device *device = cmd_buffer->base.device; const struct vk_device_shader_ops *ops = device->shader_ops; STACK_ARRAY(gl_shader_stage, stages, stageCount); STACK_ARRAY(struct vk_shader *, shaders, stageCount); VkShaderStageFlags vk_stages = 0; for (uint32_t i = 0; i < stageCount; i++) { vk_stages |= pStages[i]; stages[i] = vk_to_mesa_shader_stage(pStages[i]); shaders[i] = pShaders != NULL ? vk_shader_from_handle(pShaders[i]) : NULL; } vk_cmd_unbind_pipelines_for_stages(cmd_buffer, vk_stages); if (vk_stages & ~VK_SHADER_STAGE_COMPUTE_BIT) vk_cmd_set_rp_attachments(cmd_buffer, ~0); ops->cmd_bind_shaders(cmd_buffer, stageCount, stages, shaders); }