/* * Copyright © 2022 Imagination Technologies Ltd. * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal * in the Software without restriction, including without limitation the rights * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell * copies of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice (including the next * paragraph) shall be included in all copies or substantial portions of the * Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ #include #include #include #include #include "pvr_csb.h" #include "pvr_debug.h" #include "pvr_job_common.h" #include "pvr_job_context.h" #include "pvr_job_compute.h" #include "pvr_private.h" #include "pvr_types.h" #include "pvr_winsys.h" #include "util/macros.h" static void pvr_submit_info_stream_init(struct pvr_compute_ctx *ctx, struct pvr_sub_cmd_compute *sub_cmd, struct pvr_winsys_compute_submit_info *submit_info) { const struct pvr_device *const device = ctx->device; const struct pvr_physical_device *const pdevice = device->pdevice; const struct pvr_device_runtime_info *const dev_runtime_info = &pdevice->dev_runtime_info; const struct pvr_device_info *const dev_info = &pdevice->dev_info; const struct pvr_compute_ctx_switch *const ctx_switch = &ctx->ctx_switch; uint32_t *stream_ptr = (uint32_t *)submit_info->fw_stream; uint32_t *stream_len_ptr = stream_ptr; /* Leave space for stream header. */ stream_ptr += pvr_cmd_length(KMD_STREAM_HDR); pvr_csb_pack ((uint64_t *)stream_ptr, CR_TPU_BORDER_COLOUR_TABLE_CDM, value) { value.border_colour_table_address = device->border_color_table.table->vma->dev_addr; } stream_ptr += pvr_cmd_length(CR_TPU_BORDER_COLOUR_TABLE_CDM); pvr_csb_pack ((uint64_t *)stream_ptr, CR_CDM_CTRL_STREAM_BASE, value) { value.addr = pvr_csb_get_start_address(&sub_cmd->control_stream); } stream_ptr += pvr_cmd_length(CR_CDM_CTRL_STREAM_BASE); pvr_csb_pack ((uint64_t *)stream_ptr, CR_CDM_CONTEXT_STATE_BASE, state) { state.addr = ctx_switch->compute_state_bo->vma->dev_addr; } stream_ptr += pvr_cmd_length(CR_CDM_CONTEXT_STATE_BASE); pvr_csb_pack (stream_ptr, CR_CDM_CONTEXT_PDS1, state) { const uint32_t load_program_data_size = PVR_DW_TO_BYTES(ctx_switch->sr[0].pds.load_program.data_size); state.pds_seq_dep = false; state.usc_seq_dep = false; state.target = false; state.unified_size = ctx_switch->sr[0].usc.unified_size; state.common_shared = true; state.common_size = DIV_ROUND_UP(sub_cmd->num_shared_regs << 2, PVRX(CR_CDM_CONTEXT_PDS1_COMMON_SIZE_UNIT_SIZE)); state.temp_size = 0; assert(load_program_data_size % PVRX(CR_CDM_CONTEXT_PDS1_DATA_SIZE_UNIT_SIZE) == 0); state.data_size = load_program_data_size / PVRX(CR_CDM_CONTEXT_PDS1_DATA_SIZE_UNIT_SIZE); state.fence = false; } stream_ptr += pvr_cmd_length(CR_CDM_CONTEXT_PDS1); if (PVR_HAS_FEATURE(dev_info, compute_morton_capable)) { pvr_csb_pack (stream_ptr, CR_CDM_ITEM, value) { value.mode = 0; } stream_ptr += pvr_cmd_length(CR_CDM_ITEM); } if (PVR_HAS_FEATURE(dev_info, cluster_grouping)) { pvr_csb_pack (stream_ptr, CR_COMPUTE_CLUSTER, value) { if (PVR_HAS_FEATURE(dev_info, slc_mcu_cache_controls) && dev_runtime_info->num_phantoms > 1 && sub_cmd->uses_atomic_ops) { /* Each phantom has its own MCU, so atomicity can only be * guaranteed when all work items are processed on the same * phantom. This means we need to disable all USCs other than * those of the first phantom, which has 4 clusters. */ value.mask = 0xFU; } else { value.mask = 0U; } } stream_ptr += pvr_cmd_length(CR_COMPUTE_CLUSTER); } if (PVR_HAS_FEATURE(dev_info, tpu_dm_global_registers)) { pvr_csb_pack (stream_ptr, CR_TPU_TAG_CDM_CTRL, value) { } stream_ptr += pvr_cmd_length(CR_TPU_TAG_CDM_CTRL); } if (PVR_HAS_FEATURE(dev_info, gpu_multicore_support)) { pvr_finishme( "Emit execute_count when feature gpu_multicore_support is present"); *stream_ptr = 0; stream_ptr++; } submit_info->fw_stream_len = (uint8_t *)stream_ptr - (uint8_t *)submit_info->fw_stream; assert(submit_info->fw_stream_len <= ARRAY_SIZE(submit_info->fw_stream)); pvr_csb_pack ((uint64_t *)stream_len_ptr, KMD_STREAM_HDR, value) { value.length = submit_info->fw_stream_len; } } static void pvr_submit_info_ext_stream_init( struct pvr_compute_ctx *ctx, struct pvr_winsys_compute_submit_info *submit_info) { const struct pvr_device_info *const dev_info = &ctx->device->pdevice->dev_info; uint32_t *stream_ptr = (uint32_t *)submit_info->fw_stream; uint32_t main_stream_len = pvr_csb_unpack((uint64_t *)stream_ptr, KMD_STREAM_HDR).length; uint32_t *ext_stream_ptr = (uint32_t *)((uint8_t *)stream_ptr + main_stream_len); uint32_t *header0_ptr; header0_ptr = ext_stream_ptr; ext_stream_ptr += pvr_cmd_length(KMD_STREAM_EXTHDR_COMPUTE0); pvr_csb_pack (header0_ptr, KMD_STREAM_EXTHDR_COMPUTE0, header0) { if (PVR_HAS_QUIRK(dev_info, 49927)) { header0.has_brn49927 = true; pvr_csb_pack (ext_stream_ptr, CR_TPU, value) { value.tag_cem_4k_face_packing = true; } ext_stream_ptr += pvr_cmd_length(CR_TPU); } } if ((*header0_ptr & PVRX(KMD_STREAM_EXTHDR_DATA_MASK)) != 0) { submit_info->fw_stream_len = (uint8_t *)ext_stream_ptr - (uint8_t *)submit_info->fw_stream; assert(submit_info->fw_stream_len <= ARRAY_SIZE(submit_info->fw_stream)); } } static void pvr_submit_info_flags_init(const struct pvr_device_info *const dev_info, const struct pvr_sub_cmd_compute *const sub_cmd, struct pvr_winsys_compute_submit_flags *flags) { *flags = (struct pvr_winsys_compute_submit_flags){ .prevent_all_overlap = sub_cmd->uses_barrier, .use_single_core = PVR_HAS_FEATURE(dev_info, gpu_multicore_support) && sub_cmd->uses_atomic_ops, }; } static void pvr_compute_job_ws_submit_info_init( struct pvr_compute_ctx *ctx, struct pvr_sub_cmd_compute *sub_cmd, struct vk_sync *wait, struct pvr_winsys_compute_submit_info *submit_info) { const struct pvr_device *const device = ctx->device; const struct pvr_device_info *const dev_info = &device->pdevice->dev_info; memset(submit_info, 0, sizeof(*submit_info)); submit_info->frame_num = device->global_queue_present_count; submit_info->job_num = device->global_cmd_buffer_submit_count; submit_info->wait = wait; pvr_submit_info_stream_init(ctx, sub_cmd, submit_info); pvr_submit_info_ext_stream_init(ctx, submit_info); pvr_submit_info_flags_init(dev_info, sub_cmd, &submit_info->flags); } VkResult pvr_compute_job_submit(struct pvr_compute_ctx *ctx, struct pvr_sub_cmd_compute *sub_cmd, struct vk_sync *wait, struct vk_sync *signal_sync) { struct pvr_winsys_compute_submit_info submit_info; struct pvr_device *device = ctx->device; pvr_compute_job_ws_submit_info_init(ctx, sub_cmd, wait, &submit_info); if (PVR_IS_DEBUG_SET(DUMP_CONTROL_STREAM)) { pvr_csb_dump(&sub_cmd->control_stream, submit_info.frame_num, submit_info.job_num); } return device->ws->ops->compute_submit(ctx->ws_ctx, &submit_info, &device->pdevice->dev_info, signal_sync); }