/* * Copyright © 2022 Imagination Technologies Ltd. * * based in part on anv driver which is: * Copyright © 2015 Intel Corporation * * based in part on radv driver which is: * Copyright © 2016 Red Hat. * Copyright © 2016 Bas Nieuwenhuizen * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal * in the Software without restriction, including without limitation the rights * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell * copies of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice (including the next * paragraph) shall be included in all copies or substantial portions of the * Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ #ifndef PVR_PRIVATE_H #define PVR_PRIVATE_H #include #include #include #include #include "compiler/shader_enums.h" #include "hwdef/rogue_hw_defs.h" #include "pvr_border.h" #include "pvr_clear.h" #include "pvr_common.h" #include "pvr_csb.h" #include "pvr_device_info.h" #include "pvr_entrypoints.h" #include "pvr_hw_pass.h" #include "pvr_job_render.h" #include "pvr_limits.h" #include "pvr_pds.h" #include "pvr_shader_factory.h" #include "pvr_spm.h" #include "pvr_types.h" #include "pvr_winsys.h" #include "rogue/rogue.h" #include "util/bitscan.h" #include "util/format/u_format.h" #include "util/log.h" #include "util/macros.h" #include "util/simple_mtx.h" #include "util/u_dynarray.h" #include "util/u_math.h" #include "vk_buffer.h" #include "vk_command_buffer.h" #include "vk_device.h" #include "vk_enum_to_str.h" #include "vk_graphics_state.h" #include "vk_image.h" #include "vk_instance.h" #include "vk_log.h" #include "vk_physical_device.h" #include "vk_queue.h" #include "vk_sync.h" #include "wsi_common.h" #ifdef HAVE_VALGRIND # include # include # define VG(x) x #else # define VG(x) ((void)0) #endif struct pvr_bo; struct pvr_bo_store; struct pvr_compute_ctx; struct pvr_compute_pipeline; struct pvr_free_list; struct pvr_graphics_pipeline; struct pvr_instance; struct pvr_render_ctx; struct rogue_compiler; struct pvr_physical_device { struct vk_physical_device vk; /* Back-pointer to instance */ struct pvr_instance *instance; char *render_path; char *display_path; struct pvr_winsys *ws; struct pvr_device_info dev_info; struct pvr_device_runtime_info dev_runtime_info; VkPhysicalDeviceMemoryProperties memory; uint64_t heap_used; struct wsi_device wsi_device; struct rogue_compiler *compiler; }; struct pvr_instance { struct vk_instance vk; uint32_t active_device_count; }; struct pvr_queue { struct vk_queue vk; struct pvr_device *device; struct pvr_render_ctx *gfx_ctx; struct pvr_compute_ctx *compute_ctx; struct pvr_compute_ctx *query_ctx; struct pvr_transfer_ctx *transfer_ctx; struct vk_sync *last_job_signal_sync[PVR_JOB_TYPE_MAX]; struct vk_sync *next_job_wait_sync[PVR_JOB_TYPE_MAX]; }; struct pvr_vertex_binding { struct pvr_buffer *buffer; VkDeviceSize offset; }; struct pvr_pds_upload { struct pvr_suballoc_bo *pvr_bo; /* Offset from the pds heap base address. */ uint32_t data_offset; /* Offset from the pds heap base address. */ uint32_t code_offset; /* data_size + code_size = program_size. */ uint32_t data_size; uint32_t code_size; }; struct pvr_compute_query_shader { struct pvr_suballoc_bo *usc_bo; struct pvr_pds_upload pds_prim_code; uint32_t primary_data_size_dw; uint32_t primary_num_temps; struct pvr_pds_info info; struct pvr_pds_upload pds_sec_code; }; struct pvr_device { struct vk_device vk; struct pvr_instance *instance; struct pvr_physical_device *pdevice; struct pvr_winsys *ws; struct pvr_winsys_heaps heaps; struct pvr_free_list *global_free_list; struct pvr_queue *queues; uint32_t queue_count; /* Running count of the number of job submissions across all queue. */ uint32_t global_cmd_buffer_submit_count; /* Running count of the number of presentations across all queues. */ uint32_t global_queue_present_count; uint32_t pixel_event_data_size_in_dwords; uint64_t input_attachment_sampler; struct pvr_pds_upload pds_compute_fence_program; struct pvr_pds_upload pds_compute_empty_program; /* Compute shaders for queries. */ struct pvr_compute_query_shader availability_shader; struct pvr_compute_query_shader *copy_results_shaders; struct pvr_compute_query_shader *reset_queries_shaders; struct pvr_suballocator suballoc_general; struct pvr_suballocator suballoc_pds; struct pvr_suballocator suballoc_transfer; struct pvr_suballocator suballoc_usc; struct pvr_suballocator suballoc_vis_test; struct { struct pvr_pds_upload pds; struct pvr_suballoc_bo *usc; } nop_program; /* Issue Data Fence, Wait for Data Fence state. */ struct { uint32_t usc_shareds; struct pvr_suballoc_bo *usc; /* Buffer in which the IDF/WDF program performs store ops. */ struct pvr_bo *store_bo; /* Contains the initialization values for the shared registers. */ struct pvr_bo *shareds_bo; struct pvr_pds_upload pds; struct pvr_pds_upload sw_compute_barrier_pds; } idfwdf_state; struct pvr_device_static_clear_state { struct pvr_suballoc_bo *usc_vertex_shader_bo; struct pvr_suballoc_bo *vertices_bo; struct pvr_pds_upload pds; /* Only valid if PVR_HAS_FEATURE(dev_info, gs_rta_support). */ struct pvr_suballoc_bo *usc_multi_layer_vertex_shader_bo; struct pvr_static_clear_ppp_base ppp_base; /* Indexable using VkImageAspectFlags. */ struct pvr_static_clear_ppp_template ppp_templates[PVR_STATIC_CLEAR_VARIANT_COUNT]; const uint32_t *vdm_words; const uint32_t *large_clear_vdm_words; struct pvr_suballoc_bo *usc_clear_attachment_programs; struct pvr_suballoc_bo *pds_clear_attachment_programs; /* TODO: See if we can use PVR_CLEAR_ATTACHMENT_PROGRAM_COUNT to save some * memory. */ struct pvr_pds_clear_attachment_program_info { pvr_dev_addr_t texture_program_offset; pvr_dev_addr_t pixel_program_offset; uint32_t texture_program_pds_temps_count; /* Size in dwords. */ uint32_t texture_program_data_size; } pds_clear_attachment_program_info [PVR_CLEAR_ATTACHMENT_PROGRAM_COUNT_WITH_HOLES]; } static_clear_state; struct { struct pvr_suballoc_bo *usc_programs; struct pvr_suballoc_bo *pds_programs; struct pvr_spm_per_load_program_state { pvr_dev_addr_t pds_pixel_program_offset; pvr_dev_addr_t pds_uniform_program_offset; uint32_t pds_texture_program_data_size; uint32_t pds_texture_program_temps_count; } load_program[PVR_SPM_LOAD_PROGRAM_COUNT]; } spm_load_state; struct pvr_device_tile_buffer_state { simple_mtx_t mtx; #define PVR_MAX_TILE_BUFFER_COUNT 7U struct pvr_bo *buffers[PVR_MAX_TILE_BUFFER_COUNT]; uint32_t buffer_count; } tile_buffer_state; struct pvr_spm_scratch_buffer_store spm_scratch_buffer_store; struct pvr_bo_store *bo_store; struct pvr_bo *robustness_buffer; struct vk_sync *presignaled_sync; struct pvr_border_color_table border_color_table; }; struct pvr_device_memory { struct vk_object_base base; struct pvr_winsys_bo *bo; }; struct pvr_mip_level { /* Offset of the mip level in bytes */ uint32_t offset; /* Aligned mip level size in bytes */ uint32_t size; /* Aligned row length in bytes */ uint32_t pitch; /* Aligned height in bytes */ uint32_t height_pitch; }; struct pvr_image { struct vk_image vk; /* vma this image is bound to */ struct pvr_winsys_vma *vma; /* Device address the image is mapped to in device virtual address space */ pvr_dev_addr_t dev_addr; /* Derived and other state */ VkExtent3D physical_extent; enum pvr_memlayout memlayout; VkDeviceSize layer_size; VkDeviceSize size; VkDeviceSize alignment; struct pvr_mip_level mip_levels[14]; }; struct pvr_buffer { struct vk_buffer vk; /* Derived and other state */ uint32_t alignment; /* vma this buffer is bound to */ struct pvr_winsys_vma *vma; /* Device address the buffer is mapped to in device virtual address space */ pvr_dev_addr_t dev_addr; }; struct pvr_image_view { struct vk_image_view vk; /* Prepacked Texture Image dword 0 and 1. It will be copied to the * descriptor info during pvr_UpdateDescriptorSets(). * * We create separate texture states for sampling, storage and input * attachment cases. */ uint64_t texture_state[PVR_TEXTURE_STATE_MAX_ENUM][2]; }; struct pvr_buffer_view { struct vk_object_base base; uint64_t range; VkFormat format; /* Prepacked Texture dword 0 and 1. It will be copied to the descriptor * during pvr_UpdateDescriptorSets(). */ uint64_t texture_state[2]; }; #define PVR_TRANSFER_MAX_SOURCES 10U #define PVR_TRANSFER_MAX_CUSTOM_MAPPINGS 6U /** A surface describes a source or destination for a transfer operation. */ struct pvr_transfer_cmd_surface { pvr_dev_addr_t dev_addr; /* Memory address for extra U/V planes. */ pvr_dev_addr_t uv_address[2]; /* Surface width in texels. */ uint32_t width; /* Surface height in texels. */ uint32_t height; uint32_t depth; /* Z position in a 3D tecture. 0.0f <= z_position <= depth. */ float z_position; /* Stride in texels. */ uint32_t stride; VkFormat vk_format; enum pvr_memlayout mem_layout; uint32_t sample_count; }; struct pvr_rect_mapping { VkRect2D src_rect; VkRect2D dst_rect; bool flip_x; bool flip_y; }; struct pvr_transfer_cmd_source { struct pvr_transfer_cmd_surface surface; uint32_t mapping_count; struct pvr_rect_mapping mappings[PVR_TRANSFER_MAX_CUSTOM_MAPPINGS]; /* In the case of a simple 1:1 copy, this setting does not affect the output * but will affect performance. Use clamp to edge when possible. */ /* This is of type enum PVRX(TEXSTATE_ADDRMODE). */ int addr_mode; /* Source filtering method. */ enum pvr_filter filter; /* MSAA resolve operation. */ enum pvr_resolve_op resolve_op; }; struct pvr_transfer_cmd { /* Node to link this cmd into the transfer_cmds list in * pvr_sub_cmd::transfer structure. */ struct list_head link; uint32_t flags; uint32_t source_count; struct pvr_transfer_cmd_source sources[PVR_TRANSFER_MAX_SOURCES]; union fi clear_color[4]; struct pvr_transfer_cmd_surface dst; VkRect2D scissor; /* Pointer to cmd buffer this transfer cmd belongs to. This is mainly used * to link buffer objects allocated during job submission into * cmd_buffer::bo_list head. */ struct pvr_cmd_buffer *cmd_buffer; /* Deferred RTA clears are allocated from pvr_cmd_buffer->deferred_clears and * cannot be freed directly. */ bool is_deferred_clear; }; struct pvr_sub_cmd_gfx { const struct pvr_framebuffer *framebuffer; struct pvr_render_job job; struct pvr_suballoc_bo *depth_bias_bo; struct pvr_suballoc_bo *scissor_bo; /* Tracking how the loaded depth/stencil values are being used. */ enum pvr_depth_stencil_usage depth_usage; enum pvr_depth_stencil_usage stencil_usage; /* Tracking whether the subcommand modifies depth/stencil. */ bool modifies_depth; bool modifies_stencil; /* Store the render to a scratch buffer. */ bool barrier_store; /* Load the render (stored with a `barrier_store`) as a background to the * current render. */ bool barrier_load; const struct pvr_query_pool *query_pool; struct util_dynarray sec_query_indices; /* Control stream builder object */ struct pvr_csb control_stream; /* Required iff pvr_sub_cmd_gfx_requires_split_submit() returns true. */ struct pvr_bo *terminate_ctrl_stream; uint32_t hw_render_idx; uint32_t max_tiles_in_flight; bool empty_cmd; /* True if any fragment shader used in this sub command uses atomic * operations. */ bool frag_uses_atomic_ops; bool disable_compute_overlap; /* True if any fragment shader used in this sub command has side * effects. */ bool frag_has_side_effects; /* True if any vertex shader used in this sub command contains both * texture reads and texture writes. */ bool vertex_uses_texture_rw; /* True if any fragment shader used in this sub command contains * both texture reads and texture writes. */ bool frag_uses_texture_rw; bool has_occlusion_query; bool wait_on_previous_transfer; }; struct pvr_sub_cmd_compute { /* Control stream builder object. */ struct pvr_csb control_stream; uint32_t num_shared_regs; /* True if any shader used in this sub command uses atomic * operations. */ bool uses_atomic_ops; bool uses_barrier; bool pds_sw_barrier_requires_clearing; }; struct pvr_sub_cmd_transfer { bool serialize_with_frag; /* Pointer to the actual transfer command list, allowing primary and * secondary sub-commands to share the same list. */ struct list_head *transfer_cmds; /* List of pvr_transfer_cmd type structures. Do not access the list * directly, but always use the transfer_cmds pointer above. */ struct list_head transfer_cmds_priv; }; struct pvr_sub_cmd_event { enum pvr_event_type type; union { struct pvr_sub_cmd_event_set_reset { struct pvr_event *event; /* Stages to wait for until the event is set or reset. */ uint32_t wait_for_stage_mask; } set_reset; struct pvr_sub_cmd_event_wait { uint32_t count; /* Events to wait for before resuming. */ struct pvr_event **events; /* Stages to wait at. */ uint32_t *wait_at_stage_masks; } wait; struct pvr_sub_cmd_event_barrier { bool in_render_pass; /* Stages to wait for. */ uint32_t wait_for_stage_mask; /* Stages to wait at. */ uint32_t wait_at_stage_mask; } barrier; }; }; struct pvr_sub_cmd { /* This links the subcommand in pvr_cmd_buffer:sub_cmds list. */ struct list_head link; enum pvr_sub_cmd_type type; /* True if the sub_cmd is owned by this command buffer. False if taken from * a secondary command buffer, in that case we are not supposed to free any * resources associated with the sub_cmd. */ bool owned; union { struct pvr_sub_cmd_gfx gfx; struct pvr_sub_cmd_compute compute; struct pvr_sub_cmd_transfer transfer; struct pvr_sub_cmd_event event; }; }; struct pvr_render_pass_info { const struct pvr_render_pass *pass; struct pvr_framebuffer *framebuffer; struct pvr_image_view **attachments; uint32_t subpass_idx; uint32_t current_hw_subpass; VkRect2D render_area; uint32_t clear_value_count; VkClearValue *clear_values; VkPipelineBindPoint pipeline_bind_point; bool process_empty_tiles; bool enable_bg_tag; uint32_t isp_userpass; }; struct pvr_ppp_state { uint32_t header; struct { /* TODO: Can we get rid of the "control" field? */ struct PVRX(TA_STATE_ISPCTL) control_struct; uint32_t control; uint32_t front_a; uint32_t front_b; uint32_t back_a; uint32_t back_b; } isp; struct pvr_ppp_dbsc { uint16_t scissor_index; uint16_t depthbias_index; } depthbias_scissor_indices; struct { uint32_t pixel_shader_base; uint32_t texture_uniform_code_base; uint32_t size_info1; uint32_t size_info2; uint32_t varying_base; uint32_t texture_state_data_base; uint32_t uniform_state_data_base; } pds; struct { uint32_t word0; uint32_t word1; } region_clipping; struct { uint32_t a0; uint32_t m0; uint32_t a1; uint32_t m1; uint32_t a2; uint32_t m2; } viewports[PVR_MAX_VIEWPORTS]; uint32_t viewport_count; uint32_t output_selects; uint32_t varying_word[2]; uint32_t ppp_control; }; /* Represents a control stream related command that is deferred for execution in * a secondary command buffer. */ struct pvr_deferred_cs_command { enum pvr_deferred_cs_command_type type; union { struct { struct pvr_ppp_dbsc state; uint32_t *vdm_state; } dbsc; struct { struct pvr_ppp_dbsc state; struct pvr_suballoc_bo *ppp_cs_bo; uint32_t patch_offset; } dbsc2; }; }; struct pvr_cmd_buffer_draw_state { uint32_t base_instance; uint32_t base_vertex; bool draw_indirect; bool draw_indexed; }; struct pvr_cmd_buffer_state { /* Pipeline binding. */ const struct pvr_graphics_pipeline *gfx_pipeline; const struct pvr_compute_pipeline *compute_pipeline; struct pvr_render_pass_info render_pass_info; struct pvr_sub_cmd *current_sub_cmd; struct pvr_ppp_state ppp_state; struct PVRX(TA_STATE_HEADER) emit_header; struct pvr_vertex_binding vertex_bindings[PVR_MAX_VERTEX_INPUT_BINDINGS]; struct { struct pvr_buffer *buffer; VkDeviceSize offset; VkIndexType type; } index_buffer_binding; struct { uint8_t data[PVR_MAX_PUSH_CONSTANTS_SIZE]; VkShaderStageFlags dirty_stages; /* Indicates if the whole push constants buffer was uploaded. This avoids * having to upload the same stuff twice when the push constant range * covers both gfx and compute. */ bool uploaded; pvr_dev_addr_t dev_addr; } push_constants; /* Array size of barriers_needed is based on number of sync pipeline * stages. */ uint32_t barriers_needed[PVR_NUM_SYNC_PIPELINE_STAGES]; struct pvr_descriptor_state gfx_desc_state; struct pvr_descriptor_state compute_desc_state; VkFormat depth_format; struct { bool compute_pipeline_binding : 1; bool compute_desc_dirty : 1; bool gfx_pipeline_binding : 1; bool gfx_desc_dirty : 1; bool vertex_bindings : 1; bool index_buffer_binding : 1; bool vertex_descriptors : 1; bool fragment_descriptors : 1; bool isp_userpass : 1; /* Some draw state needs to be tracked for changes between draw calls * i.e. if we get a draw with baseInstance=0, followed by a call with * baseInstance=1 that needs to cause us to select a different PDS * attrib program and update the BASE_INSTANCE PDS const. If only * baseInstance changes then we just have to update the data section. */ bool draw_base_instance : 1; bool draw_variant : 1; bool vis_test; } dirty; struct pvr_cmd_buffer_draw_state draw_state; struct { uint32_t code_offset; const struct pvr_pds_info *info; } pds_shader; const struct pvr_query_pool *query_pool; bool vis_test_enabled; uint32_t vis_reg; struct util_dynarray query_indices; uint32_t max_shared_regs; /* Address of data segment for vertex attrib upload program. */ uint32_t pds_vertex_attrib_offset; uint32_t pds_fragment_descriptor_data_offset; uint32_t pds_compute_descriptor_data_offset; }; /* Do not change this. This is the format used for the depth_bias_array * elements uploaded to the device. */ struct pvr_depth_bias_state { /* Saved information from pCreateInfo. */ float constant_factor; float slope_factor; float clamp; }; /* Do not change this. This is the format used for the scissor_array * elements uploaded to the device. */ struct pvr_scissor_words { /* Contains a packed IPF_SCISSOR_WORD_0. */ uint32_t w0; /* Contains a packed IPF_SCISSOR_WORD_1. */ uint32_t w1; }; struct pvr_cmd_buffer { struct vk_command_buffer vk; struct pvr_device *device; /* Buffer usage flags */ VkCommandBufferUsageFlags usage_flags; /* Array of struct pvr_depth_bias_state. */ struct util_dynarray depth_bias_array; /* Array of struct pvr_scissor_words. */ struct util_dynarray scissor_array; struct pvr_scissor_words scissor_words; struct pvr_cmd_buffer_state state; /* List of struct pvr_deferred_cs_command control stream related commands to * execute in secondary command buffer. */ struct util_dynarray deferred_csb_commands; /* List of struct pvr_transfer_cmd used to emulate RTA clears on non RTA * capable cores. */ struct util_dynarray deferred_clears; /* List of pvr_bo structs associated with this cmd buffer. */ struct list_head bo_list; struct list_head sub_cmds; }; struct pvr_stage_allocation_descriptor_state { struct pvr_pds_upload pds_code; /* Since we upload the code segment separately from the data segment * pds_code->data_size might be 0 whilst * pds_info->data_size_in_dwords might be >0 in the case of this struct * referring to the code upload. */ struct pvr_pds_info pds_info; /* Already setup compile time static consts. */ struct pvr_suballoc_bo *static_consts; }; struct pvr_pds_attrib_program { struct pvr_pds_info info; /* The uploaded PDS program stored here only contains the code segment, * meaning the data size will be 0, unlike the data size stored in the * 'info' member above. */ struct pvr_pds_upload program; }; struct pvr_pipeline_stage_state { uint32_t const_shared_reg_count; uint32_t const_shared_reg_offset; uint32_t pds_temps_count; uint32_t coefficient_size; /* True if this shader uses any atomic operations. */ bool uses_atomic_ops; /* True if this shader uses both texture reads and texture writes. */ bool uses_texture_rw; /* Only used for compute stage. */ bool uses_barrier; /* True if this shader has side effects */ bool has_side_effects; /* True if this shader is simply a nop.end. */ bool empty_program; }; struct pvr_compute_shader_state { /* Pointer to a buffer object that contains the shader binary. */ struct pvr_suballoc_bo *bo; bool uses_atomic_ops; bool uses_barrier; /* E.g. GLSL shader uses gl_NumWorkGroups. */ bool uses_num_workgroups; uint32_t const_shared_reg_count; uint32_t input_register_count; uint32_t work_size; uint32_t coefficient_register_count; }; struct pvr_vertex_shader_state { /* Pointer to a buffer object that contains the shader binary. */ struct pvr_suballoc_bo *bo; uint32_t entry_offset; /* 2 since we only need STATE_VARYING{0,1} state words. */ uint32_t varying[2]; struct pvr_pds_attrib_program pds_attrib_programs[PVR_PDS_VERTEX_ATTRIB_PROGRAM_COUNT]; struct pvr_pipeline_stage_state stage_state; /* FIXME: Move this into stage_state? */ struct pvr_stage_allocation_descriptor_state descriptor_state; uint32_t vertex_input_size; uint32_t vertex_output_size; uint32_t user_clip_planes_mask; }; struct pvr_fragment_shader_state { /* Pointer to a buffer object that contains the shader binary. */ struct pvr_suballoc_bo *bo; uint32_t entry_offset; struct pvr_pipeline_stage_state stage_state; /* FIXME: Move this into stage_state? */ struct pvr_stage_allocation_descriptor_state descriptor_state; enum PVRX(TA_PASSTYPE) pass_type; struct pvr_pds_upload pds_coeff_program; struct pvr_pds_upload pds_fragment_program; }; struct pvr_pipeline { struct vk_object_base base; enum pvr_pipeline_type type; /* Saved information from pCreateInfo. */ struct pvr_pipeline_layout *layout; }; struct pvr_compute_pipeline { struct pvr_pipeline base; struct pvr_compute_shader_state shader_state; struct { uint32_t base_workgroup : 1; } flags; struct pvr_stage_allocation_descriptor_state descriptor_state; struct pvr_pds_upload primary_program; struct pvr_pds_info primary_program_info; struct pvr_pds_base_workgroup_program { struct pvr_pds_upload code_upload; uint32_t *data_section; /* Offset within the PDS data section at which the base workgroup id * resides. */ uint32_t base_workgroup_data_patching_offset; struct pvr_pds_info info; } primary_base_workgroup_variant_program; }; struct pvr_graphics_pipeline { struct pvr_pipeline base; struct vk_dynamic_graphics_state dynamic_state; /* Derived and other state */ size_t stage_indices[MESA_SHADER_FRAGMENT + 1]; struct { struct pvr_vertex_shader_state vertex; struct pvr_fragment_shader_state fragment; } shader_state; }; struct pvr_query_pool { struct vk_object_base base; /* Stride of result_buffer to get to the start of the results for the next * Phantom. */ uint32_t result_stride; uint32_t query_count; struct pvr_suballoc_bo *result_buffer; struct pvr_suballoc_bo *availability_buffer; }; struct pvr_private_compute_pipeline { /* Used by pvr_compute_update_kernel_private(). */ uint32_t pds_code_offset; uint32_t pds_data_offset; uint32_t pds_data_size_dw; uint32_t pds_temps_used; uint32_t coeff_regs_count; uint32_t unified_store_regs_count; VkExtent3D workgroup_size; /* Used by pvr_compute_update_shared_private(). */ uint32_t pds_shared_update_code_offset; uint32_t pds_shared_update_data_offset; uint32_t pds_shared_update_data_size_dw; /* Used by both pvr_compute_update_{kernel,shared}_private(). */ uint32_t const_shared_regs_count; pvr_dev_addr_t const_buffer_addr; }; struct pvr_query_info { enum pvr_query_type type; union { struct { uint32_t num_query_indices; struct pvr_suballoc_bo *index_bo; uint32_t num_queries; struct pvr_suballoc_bo *availability_bo; } availability_write; struct { VkQueryPool query_pool; uint32_t first_query; uint32_t query_count; } reset_query_pool; struct { VkQueryPool query_pool; uint32_t first_query; uint32_t query_count; VkBuffer dst_buffer; VkDeviceSize dst_offset; VkDeviceSize stride; VkQueryResultFlags flags; } copy_query_results; }; }; struct pvr_render_target { struct pvr_rt_dataset *rt_dataset; pthread_mutex_t mutex; bool valid; }; struct pvr_framebuffer { struct vk_object_base base; /* Saved information from pCreateInfo. */ uint32_t width; uint32_t height; uint32_t layers; uint32_t attachment_count; struct pvr_image_view **attachments; /* Derived and other state. */ struct pvr_suballoc_bo *ppp_state_bo; /* PPP state size in dwords. */ size_t ppp_state_size; uint32_t render_targets_count; struct pvr_render_target *render_targets; struct pvr_spm_scratch_buffer *scratch_buffer; uint32_t render_count; struct pvr_spm_eot_state *spm_eot_state_per_render; struct pvr_spm_bgobj_state *spm_bgobj_state_per_render; }; struct pvr_render_pass_attachment { /* Saved information from pCreateInfo. */ VkAttachmentLoadOp load_op; VkAttachmentStoreOp store_op; VkAttachmentLoadOp stencil_load_op; VkAttachmentStoreOp stencil_store_op; VkFormat vk_format; uint32_t sample_count; VkImageLayout initial_layout; /* Derived and other state. */ VkImageAspectFlags aspects; /* Can this surface be resolved by the PBE. */ bool is_pbe_downscalable; uint32_t index; }; struct pvr_render_subpass { /* Saved information from pCreateInfo. */ /* The number of samples per color attachment (or depth attachment if * z-only). */ /* FIXME: rename to 'samples' to match struct pvr_image */ uint32_t sample_count; uint32_t color_count; uint32_t *color_attachments; uint32_t *resolve_attachments; uint32_t input_count; uint32_t *input_attachments; uint32_t depth_stencil_attachment; /* Derived and other state. */ uint32_t dep_count; uint32_t *dep_list; /* Array with dep_count elements. flush_on_dep[x] is true if this subpass * and the subpass dep_list[x] can't be in the same hardware render. */ bool *flush_on_dep; uint32_t index; uint32_t isp_userpass; VkPipelineBindPoint pipeline_bind_point; }; struct pvr_render_pass { struct vk_object_base base; /* Saved information from pCreateInfo. */ uint32_t attachment_count; struct pvr_render_pass_attachment *attachments; uint32_t subpass_count; struct pvr_render_subpass *subpasses; struct pvr_renderpass_hwsetup *hw_setup; /* Derived and other state. */ /* FIXME: rename to 'max_samples' as we use 'samples' elsewhere */ uint32_t max_sample_count; /* The maximum number of tile buffers to use in any subpass. */ uint32_t max_tilebuffer_count; }; /* Max render targets for the clears loads state in load op. * To account for resolve attachments, double the color attachments. */ #define PVR_LOAD_OP_CLEARS_LOADS_MAX_RTS (PVR_MAX_COLOR_ATTACHMENTS * 2) struct pvr_load_op { bool is_hw_object; struct pvr_suballoc_bo *usc_frag_prog_bo; uint32_t const_shareds_count; uint32_t shareds_dest_offset; uint32_t shareds_count; struct pvr_pds_upload pds_frag_prog; struct pvr_pds_upload pds_tex_state_prog; uint32_t temps_count; union { const struct pvr_renderpass_hwsetup_render *hw_render; const struct pvr_render_subpass *subpass; }; /* TODO: We might not need to keep all of this around. Some stuff might just * be for the compiler to ingest which we can then discard. */ struct { uint16_t rt_clear_mask; uint16_t rt_load_mask; uint16_t unresolved_msaa_mask; /* The format to write to the output regs. */ VkFormat dest_vk_format[PVR_LOAD_OP_CLEARS_LOADS_MAX_RTS]; #define PVR_NO_DEPTH_CLEAR_TO_REG (-1) /* If >= 0, write a depth clear value to the specified pixel output. */ int32_t depth_clear_to_reg; } clears_loads_state; }; #define CHECK_MASK_SIZE(_struct_type, _field_name, _nr_bits) \ static_assert(sizeof(((struct _struct_type *)NULL)->_field_name) * 8 >= \ _nr_bits, \ #_field_name " mask of struct " #_struct_type " too small") CHECK_MASK_SIZE(pvr_load_op, clears_loads_state.rt_clear_mask, PVR_LOAD_OP_CLEARS_LOADS_MAX_RTS); CHECK_MASK_SIZE(pvr_load_op, clears_loads_state.rt_load_mask, PVR_LOAD_OP_CLEARS_LOADS_MAX_RTS); CHECK_MASK_SIZE(pvr_load_op, clears_loads_state.unresolved_msaa_mask, PVR_LOAD_OP_CLEARS_LOADS_MAX_RTS); #undef CHECK_MASK_SIZE uint32_t pvr_calc_fscommon_size_and_tiles_in_flight( const struct pvr_device_info *dev_info, const struct pvr_device_runtime_info *dev_runtime_info, uint32_t fs_common_size, uint32_t min_tiles_in_flight); VkResult pvr_wsi_init(struct pvr_physical_device *pdevice); void pvr_wsi_finish(struct pvr_physical_device *pdevice); VkResult pvr_queues_create(struct pvr_device *device, const VkDeviceCreateInfo *pCreateInfo); void pvr_queues_destroy(struct pvr_device *device); VkResult pvr_bind_memory(struct pvr_device *device, struct pvr_device_memory *mem, VkDeviceSize offset, VkDeviceSize size, VkDeviceSize alignment, struct pvr_winsys_vma **const vma_out, pvr_dev_addr_t *const dev_addr_out); void pvr_unbind_memory(struct pvr_device *device, struct pvr_winsys_vma *vma); VkResult pvr_gpu_upload(struct pvr_device *device, struct pvr_winsys_heap *heap, const void *data, size_t size, uint64_t alignment, struct pvr_suballoc_bo **const pvr_bo_out); VkResult pvr_gpu_upload_pds(struct pvr_device *device, const uint32_t *data, uint32_t data_size_dwords, uint32_t data_alignment, const uint32_t *code, uint32_t code_size_dwords, uint32_t code_alignment, uint64_t min_alignment, struct pvr_pds_upload *const pds_upload_out); VkResult pvr_gpu_upload_usc(struct pvr_device *device, const void *code, size_t code_size, uint64_t code_alignment, struct pvr_suballoc_bo **const pvr_bo_out); VkResult pvr_cmd_buffer_add_transfer_cmd(struct pvr_cmd_buffer *cmd_buffer, struct pvr_transfer_cmd *transfer_cmd); VkResult pvr_cmd_buffer_alloc_mem(struct pvr_cmd_buffer *cmd_buffer, struct pvr_winsys_heap *heap, uint64_t size, struct pvr_suballoc_bo **const pvr_bo_out); void pvr_calculate_vertex_cam_size(const struct pvr_device_info *dev_info, const uint32_t vs_output_size, const bool raster_enable, uint32_t *const cam_size_out, uint32_t *const vs_max_instances_out); void pvr_get_image_subresource_layout(const struct pvr_image *image, const VkImageSubresource *subresource, VkSubresourceLayout *layout); static inline struct pvr_compute_pipeline * to_pvr_compute_pipeline(struct pvr_pipeline *pipeline) { assert(pipeline->type == PVR_PIPELINE_TYPE_COMPUTE); return container_of(pipeline, struct pvr_compute_pipeline, base); } static inline struct pvr_graphics_pipeline * to_pvr_graphics_pipeline(struct pvr_pipeline *pipeline) { assert(pipeline->type == PVR_PIPELINE_TYPE_GRAPHICS); return container_of(pipeline, struct pvr_graphics_pipeline, base); } static inline const struct pvr_image * vk_to_pvr_image(const struct vk_image *image) { return container_of(image, const struct pvr_image, vk); } static inline const struct pvr_image * pvr_image_view_get_image(const struct pvr_image_view *const iview) { return vk_to_pvr_image(iview->vk.image); } static enum pvr_pipeline_stage_bits pvr_stage_mask(VkPipelineStageFlags2 stage_mask) { enum pvr_pipeline_stage_bits stages = 0; if (stage_mask & VK_PIPELINE_STAGE_ALL_COMMANDS_BIT) return PVR_PIPELINE_STAGE_ALL_BITS; if (stage_mask & (VK_PIPELINE_STAGE_ALL_GRAPHICS_BIT)) stages |= PVR_PIPELINE_STAGE_ALL_GRAPHICS_BITS; if (stage_mask & (VK_PIPELINE_STAGE_DRAW_INDIRECT_BIT | VK_PIPELINE_STAGE_VERTEX_INPUT_BIT | VK_PIPELINE_STAGE_VERTEX_SHADER_BIT | VK_PIPELINE_STAGE_TESSELLATION_CONTROL_SHADER_BIT | VK_PIPELINE_STAGE_TESSELLATION_EVALUATION_SHADER_BIT | VK_PIPELINE_STAGE_GEOMETRY_SHADER_BIT)) { stages |= PVR_PIPELINE_STAGE_GEOM_BIT; } if (stage_mask & (VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT | VK_PIPELINE_STAGE_EARLY_FRAGMENT_TESTS_BIT | VK_PIPELINE_STAGE_LATE_FRAGMENT_TESTS_BIT | VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT)) { stages |= PVR_PIPELINE_STAGE_FRAG_BIT; } if (stage_mask & (VK_PIPELINE_STAGE_DRAW_INDIRECT_BIT | VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT)) { stages |= PVR_PIPELINE_STAGE_COMPUTE_BIT; } if (stage_mask & (VK_PIPELINE_STAGE_TRANSFER_BIT)) stages |= PVR_PIPELINE_STAGE_TRANSFER_BIT; return stages; } static inline enum pvr_pipeline_stage_bits pvr_stage_mask_src(VkPipelineStageFlags2 stage_mask) { /* If the source is bottom of pipe, all stages will need to be waited for. */ if (stage_mask & VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT) return PVR_PIPELINE_STAGE_ALL_BITS; return pvr_stage_mask(stage_mask); } static inline enum pvr_pipeline_stage_bits pvr_stage_mask_dst(VkPipelineStageFlags2 stage_mask) { /* If the destination is top of pipe, all stages should be blocked by prior * commands. */ if (stage_mask & VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT) return PVR_PIPELINE_STAGE_ALL_BITS; return pvr_stage_mask(stage_mask); } static inline bool pvr_sub_cmd_gfx_requires_split_submit( const struct pvr_sub_cmd_gfx *const sub_cmd) { return sub_cmd->job.run_frag && sub_cmd->framebuffer->layers > 1; } /* This function is intended to be used when the error being set has been * returned from a function call, i.e. the error happened further down the * stack. `vk_command_buffer_set_error()` should be used at the point an error * occurs, i.e. VK_ERROR_* is being passed in. * This ensures we only ever get the error printed once. */ static inline VkResult pvr_cmd_buffer_set_error_unwarned(struct pvr_cmd_buffer *cmd_buffer, VkResult error) { assert(error != VK_SUCCESS); if (cmd_buffer->vk.record_result == VK_SUCCESS) cmd_buffer->vk.record_result = error; return error; } VkResult pvr_pds_fragment_program_create_and_upload( struct pvr_device *device, const VkAllocationCallbacks *allocator, const struct pvr_suballoc_bo *fragment_shader_bo, uint32_t fragment_temp_count, enum rogue_msaa_mode msaa_mode, bool has_phase_rate_change, struct pvr_pds_upload *const pds_upload_out); VkResult pvr_pds_unitex_state_program_create_and_upload( struct pvr_device *device, const VkAllocationCallbacks *allocator, uint32_t texture_kicks, uint32_t uniform_kicks, struct pvr_pds_upload *const pds_upload_out); VkResult pvr_device_tile_buffer_ensure_cap(struct pvr_device *device, uint32_t capacity, uint32_t size_in_bytes); VkResult pvr_cmd_buffer_upload_general(struct pvr_cmd_buffer *const cmd_buffer, const void *const data, const size_t size, struct pvr_suballoc_bo **const pvr_bo_out); VkResult pvr_cmd_buffer_upload_pds(struct pvr_cmd_buffer *const cmd_buffer, const uint32_t *data, uint32_t data_size_dwords, uint32_t data_alignment, const uint32_t *code, uint32_t code_size_dwords, uint32_t code_alignment, uint64_t min_alignment, struct pvr_pds_upload *const pds_upload_out); VkResult pvr_cmd_buffer_start_sub_cmd(struct pvr_cmd_buffer *cmd_buffer, enum pvr_sub_cmd_type type); VkResult pvr_cmd_buffer_end_sub_cmd(struct pvr_cmd_buffer *cmd_buffer); void pvr_compute_generate_fence(struct pvr_cmd_buffer *cmd_buffer, struct pvr_sub_cmd_compute *const sub_cmd, bool deallocate_shareds); void pvr_compute_update_shared_private( struct pvr_cmd_buffer *cmd_buffer, struct pvr_sub_cmd_compute *const sub_cmd, struct pvr_private_compute_pipeline *pipeline); void pvr_compute_update_kernel_private( struct pvr_cmd_buffer *cmd_buffer, struct pvr_sub_cmd_compute *const sub_cmd, struct pvr_private_compute_pipeline *pipeline, const uint32_t global_workgroup_size[static const PVR_WORKGROUP_DIMENSIONS]); size_t pvr_pds_get_max_descriptor_upload_const_map_size_in_bytes(void); VkResult pvr_pds_compute_shader_create_and_upload( struct pvr_device *device, struct pvr_pds_compute_shader_program *program, struct pvr_pds_upload *const pds_upload_out); VkResult pvr_device_create_compute_query_programs(struct pvr_device *device); void pvr_device_destroy_compute_query_programs(struct pvr_device *device); VkResult pvr_add_query_program(struct pvr_cmd_buffer *cmd_buffer, const struct pvr_query_info *query_info); void pvr_reset_graphics_dirty_state(struct pvr_cmd_buffer *const cmd_buffer, bool start_geom); const struct pvr_renderpass_hwsetup_subpass * pvr_get_hw_subpass(const struct pvr_render_pass *pass, const uint32_t subpass); void pvr_descriptor_size_info_init( const struct pvr_device *device, VkDescriptorType type, struct pvr_descriptor_size_info *const size_info_out); #define PVR_FROM_HANDLE(__pvr_type, __name, __handle) \ VK_FROM_HANDLE(__pvr_type, __name, __handle) VK_DEFINE_HANDLE_CASTS(pvr_cmd_buffer, vk.base, VkCommandBuffer, VK_OBJECT_TYPE_COMMAND_BUFFER) VK_DEFINE_HANDLE_CASTS(pvr_device, vk.base, VkDevice, VK_OBJECT_TYPE_DEVICE) VK_DEFINE_HANDLE_CASTS(pvr_instance, vk.base, VkInstance, VK_OBJECT_TYPE_INSTANCE) VK_DEFINE_HANDLE_CASTS(pvr_physical_device, vk.base, VkPhysicalDevice, VK_OBJECT_TYPE_PHYSICAL_DEVICE) VK_DEFINE_HANDLE_CASTS(pvr_queue, vk.base, VkQueue, VK_OBJECT_TYPE_QUEUE) VK_DEFINE_NONDISP_HANDLE_CASTS(pvr_device_memory, base, VkDeviceMemory, VK_OBJECT_TYPE_DEVICE_MEMORY) VK_DEFINE_NONDISP_HANDLE_CASTS(pvr_image, vk.base, VkImage, VK_OBJECT_TYPE_IMAGE) VK_DEFINE_NONDISP_HANDLE_CASTS(pvr_buffer, vk.base, VkBuffer, VK_OBJECT_TYPE_BUFFER) VK_DEFINE_NONDISP_HANDLE_CASTS(pvr_image_view, vk.base, VkImageView, VK_OBJECT_TYPE_IMAGE_VIEW) VK_DEFINE_NONDISP_HANDLE_CASTS(pvr_buffer_view, base, VkBufferView, VK_OBJECT_TYPE_BUFFER_VIEW) VK_DEFINE_NONDISP_HANDLE_CASTS(pvr_descriptor_set_layout, base, VkDescriptorSetLayout, VK_OBJECT_TYPE_DESCRIPTOR_SET_LAYOUT) VK_DEFINE_NONDISP_HANDLE_CASTS(pvr_descriptor_set, base, VkDescriptorSet, VK_OBJECT_TYPE_DESCRIPTOR_SET) VK_DEFINE_NONDISP_HANDLE_CASTS(pvr_event, base, VkEvent, VK_OBJECT_TYPE_EVENT) VK_DEFINE_NONDISP_HANDLE_CASTS(pvr_descriptor_pool, base, VkDescriptorPool, VK_OBJECT_TYPE_DESCRIPTOR_POOL) VK_DEFINE_NONDISP_HANDLE_CASTS(pvr_sampler, vk.base, VkSampler, VK_OBJECT_TYPE_SAMPLER) VK_DEFINE_NONDISP_HANDLE_CASTS(pvr_pipeline_layout, base, VkPipelineLayout, VK_OBJECT_TYPE_PIPELINE_LAYOUT) VK_DEFINE_NONDISP_HANDLE_CASTS(pvr_pipeline, base, VkPipeline, VK_OBJECT_TYPE_PIPELINE) VK_DEFINE_NONDISP_HANDLE_CASTS(pvr_query_pool, base, VkQueryPool, VK_OBJECT_TYPE_QUERY_POOL) VK_DEFINE_NONDISP_HANDLE_CASTS(pvr_framebuffer, base, VkFramebuffer, VK_OBJECT_TYPE_FRAMEBUFFER) VK_DEFINE_NONDISP_HANDLE_CASTS(pvr_render_pass, base, VkRenderPass, VK_OBJECT_TYPE_RENDER_PASS) #define PVR_CHECK_COMMAND_BUFFER_BUILDING_STATE(cmd_buffer) \ do { \ struct pvr_cmd_buffer *const _cmd_buffer = (cmd_buffer); \ const VkResult _record_result = \ vk_command_buffer_get_record_result(&_cmd_buffer->vk); \ \ if (_cmd_buffer->vk.state != MESA_VK_COMMAND_BUFFER_STATE_RECORDING) { \ vk_errorf(_cmd_buffer, \ VK_ERROR_OUT_OF_DEVICE_MEMORY, \ "Command buffer is not in recording state"); \ return; \ } else if (_record_result < VK_SUCCESS) { \ vk_errorf(_cmd_buffer, \ _record_result, \ "Skipping function as command buffer has " \ "previous build error"); \ return; \ } \ } while (0) /** * Print a FINISHME message, including its source location. */ #define pvr_finishme(format, ...) \ do { \ static bool reported = false; \ if (!reported) { \ mesa_logw("%s:%d: FINISHME: " format, \ __FILE__, \ __LINE__, \ ##__VA_ARGS__); \ reported = true; \ } \ } while (false) #define PVR_WRITE(_buffer, _value, _offset, _max) \ do { \ __typeof__(_value) __value = _value; \ uint64_t __offset = _offset; \ uint32_t __nr_dwords = sizeof(__value) / sizeof(uint32_t); \ static_assert(__same_type(*_buffer, __value), \ "Buffer and value type mismatch"); \ assert((__offset + __nr_dwords) <= (_max)); \ assert((__offset % __nr_dwords) == 0U); \ _buffer[__offset / __nr_dwords] = __value; \ } while (0) /* A non-fatal assert. Useful for debugging. */ #if MESA_DEBUG # define pvr_assert(x) \ ({ \ if (unlikely(!(x))) \ mesa_loge("%s:%d ASSERT: %s", __FILE__, __LINE__, #x); \ }) #else # define pvr_assert(x) #endif #endif /* PVR_PRIVATE_H */