/* * Copyright 2020 Advanced Micro Devices, Inc. * Copyright 2020 Valve Corporation * * SPDX-License-Identifier: MIT */ #ifndef AC_SQTT_H #define AC_SQTT_H #include #include #include #include "ac_pm4.h" #include "ac_rgp.h" #include "amd_family.h" struct radeon_cmdbuf; struct radeon_info; /** * SQ Thread tracing is a tracing mechanism that allows taking a detailed look * at what the shader cores are doing. * * Among the things recorded are: * - draws/dispatches + state * - when each wave starts and stops. * - for one SIMD per SE all instructions executed on that SIMD. * * The hardware stores all these as events in a buffer, no manual barrier * around each command needed. The primary user of this is RGP. */ struct ac_sqtt { struct radeon_cmdbuf *start_cs[2]; struct radeon_cmdbuf *stop_cs[2]; /* struct radeon_winsys_bo or struct pb_buffer */ void *bo; uint64_t buffer_va; void *ptr; uint32_t buffer_size; int start_frame; char *trigger_file; bool instruction_timing_enabled; uint32_t cmdbuf_ids_per_queue[AMD_NUM_IP_TYPES]; struct rgp_code_object rgp_code_object; struct rgp_loader_events rgp_loader_events; struct rgp_pso_correlation rgp_pso_correlation; struct rgp_queue_info rgp_queue_info; struct rgp_queue_event rgp_queue_event; struct rgp_clock_calibration rgp_clock_calibration; struct hash_table_u64 *pipeline_bos; }; #define SQTT_BUFFER_ALIGN_SHIFT 12 struct ac_sqtt_data_info { uint32_t cur_offset; uint32_t trace_status; union { uint32_t gfx9_write_counter; uint32_t gfx10_dropped_cntr; }; }; struct ac_sqtt_data_se { struct ac_sqtt_data_info info; void *data_ptr; uint32_t shader_engine; uint32_t compute_unit; }; #define SQTT_MAX_TRACES 6 struct ac_sqtt_trace { const struct rgp_code_object *rgp_code_object; const struct rgp_loader_events *rgp_loader_events; const struct rgp_pso_correlation *rgp_pso_correlation; const struct rgp_queue_info *rgp_queue_info; const struct rgp_queue_event *rgp_queue_event; const struct rgp_clock_calibration *rgp_clock_calibration; uint32_t num_traces; struct ac_sqtt_data_se traces[SQTT_MAX_TRACES]; }; uint64_t ac_sqtt_get_info_offset(unsigned se); uint64_t ac_sqtt_get_data_offset(const struct radeon_info *rad_info, const struct ac_sqtt *sqtt, unsigned se); void ac_sqtt_init(struct ac_sqtt *data); void ac_sqtt_finish(struct ac_sqtt *data); bool ac_is_sqtt_complete(const struct radeon_info *rad_info, const struct ac_sqtt *sqtt, const struct ac_sqtt_data_info *info); uint32_t ac_get_expected_buffer_size(struct radeon_info *rad_info, const struct ac_sqtt_data_info *info); /** * Identifiers for RGP SQ thread-tracing markers (Table 1) */ enum rgp_sqtt_marker_identifier { RGP_SQTT_MARKER_IDENTIFIER_EVENT = 0x0, RGP_SQTT_MARKER_IDENTIFIER_CB_START = 0x1, RGP_SQTT_MARKER_IDENTIFIER_CB_END = 0x2, RGP_SQTT_MARKER_IDENTIFIER_BARRIER_START = 0x3, RGP_SQTT_MARKER_IDENTIFIER_BARRIER_END = 0x4, RGP_SQTT_MARKER_IDENTIFIER_USER_EVENT = 0x5, RGP_SQTT_MARKER_IDENTIFIER_GENERAL_API = 0x6, RGP_SQTT_MARKER_IDENTIFIER_SYNC = 0x7, RGP_SQTT_MARKER_IDENTIFIER_PRESENT = 0x8, RGP_SQTT_MARKER_IDENTIFIER_LAYOUT_TRANSITION = 0x9, RGP_SQTT_MARKER_IDENTIFIER_RENDER_PASS = 0xA, RGP_SQTT_MARKER_IDENTIFIER_RESERVED2 = 0xB, RGP_SQTT_MARKER_IDENTIFIER_BIND_PIPELINE = 0xC, RGP_SQTT_MARKER_IDENTIFIER_RESERVED4 = 0xD, RGP_SQTT_MARKER_IDENTIFIER_RESERVED5 = 0xE, RGP_SQTT_MARKER_IDENTIFIER_RESERVED6 = 0xF }; /** * Command buffer IDs used in RGP SQ thread-tracing markers (only 20 bits). */ union rgp_sqtt_marker_cb_id { struct { uint32_t per_frame : 1; /* Must be 1, frame-based command buffer ID. */ uint32_t frame_index : 7; uint32_t cb_index : 12; /* Command buffer index within the frame. */ uint32_t reserved : 12; } per_frame_cb_id; struct { uint32_t per_frame : 1; /* Must be 0, global command buffer ID. */ uint32_t cb_index : 19; /* Global command buffer index. */ uint32_t reserved : 12; } global_cb_id; uint32_t all; }; /** * RGP SQ thread-tracing marker for the start of a command buffer. (Table 2) */ struct rgp_sqtt_marker_cb_start { union { struct { uint32_t identifier : 4; uint32_t ext_dwords : 3; uint32_t cb_id : 20; uint32_t queue : 5; }; uint32_t dword01; }; union { uint32_t device_id_low; uint32_t dword02; }; union { uint32_t device_id_high; uint32_t dword03; }; union { uint32_t queue_flags; uint32_t dword04; }; }; static_assert(sizeof(struct rgp_sqtt_marker_cb_start) == 16, "rgp_sqtt_marker_cb_start doesn't match RGP spec"); /** * * RGP SQ thread-tracing marker for the end of a command buffer. (Table 3) */ struct rgp_sqtt_marker_cb_end { union { struct { uint32_t identifier : 4; uint32_t ext_dwords : 3; uint32_t cb_id : 20; uint32_t reserved : 5; }; uint32_t dword01; }; union { uint32_t device_id_low; uint32_t dword02; }; union { uint32_t device_id_high; uint32_t dword03; }; }; static_assert(sizeof(struct rgp_sqtt_marker_cb_end) == 12, "rgp_sqtt_marker_cb_end doesn't match RGP spec"); /** * API types used in RGP SQ thread-tracing markers for the "General API" * packet. */ enum rgp_sqtt_marker_general_api_type { ApiCmdBindPipeline = 0, ApiCmdBindDescriptorSets = 1, ApiCmdBindIndexBuffer = 2, ApiCmdBindVertexBuffers = 3, ApiCmdDraw = 4, ApiCmdDrawIndexed = 5, ApiCmdDrawIndirect = 6, ApiCmdDrawIndexedIndirect = 7, ApiCmdDrawIndirectCountAMD = 8, ApiCmdDrawIndexedIndirectCountAMD = 9, ApiCmdDispatch = 10, ApiCmdDispatchIndirect = 11, ApiCmdCopyBuffer = 12, ApiCmdCopyImage = 13, ApiCmdBlitImage = 14, ApiCmdCopyBufferToImage = 15, ApiCmdCopyImageToBuffer = 16, ApiCmdUpdateBuffer = 17, ApiCmdFillBuffer = 18, ApiCmdClearColorImage = 19, ApiCmdClearDepthStencilImage = 20, ApiCmdClearAttachments = 21, ApiCmdResolveImage = 22, ApiCmdWaitEvents = 23, ApiCmdPipelineBarrier = 24, ApiCmdBeginQuery = 25, ApiCmdEndQuery = 26, ApiCmdResetQueryPool = 27, ApiCmdWriteTimestamp = 28, ApiCmdCopyQueryPoolResults = 29, ApiCmdPushConstants = 30, ApiCmdBeginRenderPass = 31, ApiCmdNextSubpass = 32, ApiCmdEndRenderPass = 33, ApiCmdExecuteCommands = 34, ApiCmdSetViewport = 35, ApiCmdSetScissor = 36, ApiCmdSetLineWidth = 37, ApiCmdSetDepthBias = 38, ApiCmdSetBlendConstants = 39, ApiCmdSetDepthBounds = 40, ApiCmdSetStencilCompareMask = 41, ApiCmdSetStencilWriteMask = 42, ApiCmdSetStencilReference = 43, ApiCmdDrawIndirectCount = 44, ApiCmdDrawIndexedIndirectCount = 45, /* gap */ ApiCmdDrawMeshTasksEXT = 47, ApiCmdDrawMeshTasksIndirectCountEXT = 48, ApiCmdDrawMeshTasksIndirectEXT = 49, ApiRayTracingSeparateCompiled = 0x800000, ApiInvalid = 0xffffffff }; /** * RGP SQ thread-tracing marker for a "General API" instrumentation packet. */ struct rgp_sqtt_marker_general_api { union { struct { uint32_t identifier : 4; uint32_t ext_dwords : 3; uint32_t api_type : 20; uint32_t is_end : 1; uint32_t reserved : 4; }; uint32_t dword01; }; }; static_assert(sizeof(struct rgp_sqtt_marker_general_api) == 4, "rgp_sqtt_marker_general_api doesn't match RGP spec"); /** * API types used in RGP SQ thread-tracing markers (Table 16). */ enum rgp_sqtt_marker_event_type { EventCmdDraw = 0, EventCmdDrawIndexed = 1, EventCmdDrawIndirect = 2, EventCmdDrawIndexedIndirect = 3, EventCmdDrawIndirectCountAMD = 4, EventCmdDrawIndexedIndirectCountAMD = 5, EventCmdDispatch = 6, EventCmdDispatchIndirect = 7, EventCmdCopyBuffer = 8, EventCmdCopyImage = 9, EventCmdBlitImage = 10, EventCmdCopyBufferToImage = 11, EventCmdCopyImageToBuffer = 12, EventCmdUpdateBuffer = 13, EventCmdFillBuffer = 14, EventCmdClearColorImage = 15, EventCmdClearDepthStencilImage = 16, EventCmdClearAttachments = 17, EventCmdResolveImage = 18, EventCmdWaitEvents = 19, EventCmdPipelineBarrier = 20, EventCmdResetQueryPool = 21, EventCmdCopyQueryPoolResults = 22, EventRenderPassColorClear = 23, EventRenderPassDepthStencilClear = 24, EventRenderPassResolve = 25, EventInternalUnknown = 26, EventCmdDrawIndirectCount = 27, EventCmdDrawIndexedIndirectCount = 28, /* gap */ EventCmdTraceRaysKHR = 30, EventCmdTraceRaysIndirectKHR = 31, EventCmdBuildAccelerationStructuresKHR = 32, EventCmdBuildAccelerationStructuresIndirectKHR = 33, EventCmdCopyAccelerationStructureKHR = 34, EventCmdCopyAccelerationStructureToMemoryKHR = 35, EventCmdCopyMemoryToAccelerationStructureKHR = 36, /* gap */ EventCmdDrawMeshTasksEXT = 41, EventCmdDrawMeshTasksIndirectCountEXT = 42, EventCmdDrawMeshTasksIndirectEXT = 43, EventUnknown = 0x7fff, EventInvalid = 0xffffffff }; /** * "Event (Per-draw/dispatch)" RGP SQ thread-tracing marker. (Table 4) */ struct rgp_sqtt_marker_event { union { struct { uint32_t identifier : 4; uint32_t ext_dwords : 3; uint32_t api_type : 24; uint32_t has_thread_dims : 1; }; uint32_t dword01; }; union { struct { uint32_t cb_id : 20; uint32_t vertex_offset_reg_idx : 4; uint32_t instance_offset_reg_idx : 4; uint32_t draw_index_reg_idx : 4; }; uint32_t dword02; }; union { uint32_t cmd_id; uint32_t dword03; }; }; static_assert(sizeof(struct rgp_sqtt_marker_event) == 12, "rgp_sqtt_marker_event doesn't match RGP spec"); /** * Per-dispatch specific marker where workgroup dims are included. */ struct rgp_sqtt_marker_event_with_dims { struct rgp_sqtt_marker_event event; uint32_t thread_x; uint32_t thread_y; uint32_t thread_z; }; static_assert(sizeof(struct rgp_sqtt_marker_event_with_dims) == 24, "rgp_sqtt_marker_event_with_dims doesn't match RGP spec"); /** * "Barrier Start" RGP SQTT instrumentation marker (Table 5) */ struct rgp_sqtt_marker_barrier_start { union { struct { uint32_t identifier : 4; uint32_t ext_dwords : 3; uint32_t cb_id : 20; uint32_t reserved : 5; }; uint32_t dword01; }; union { struct { uint32_t driver_reason : 31; uint32_t internal : 1; }; uint32_t dword02; }; }; static_assert(sizeof(struct rgp_sqtt_marker_barrier_start) == 8, "rgp_sqtt_marker_barrier_start doesn't match RGP spec"); /** * "Barrier End" RGP SQTT instrumentation marker (Table 6) */ struct rgp_sqtt_marker_barrier_end { union { struct { uint32_t identifier : 4; uint32_t ext_dwords : 3; uint32_t cb_id : 20; uint32_t wait_on_eop_ts : 1; uint32_t vs_partial_flush : 1; uint32_t ps_partial_flush : 1; uint32_t cs_partial_flush : 1; uint32_t pfp_sync_me : 1; }; uint32_t dword01; }; union { struct { uint32_t sync_cp_dma : 1; uint32_t inval_tcp : 1; uint32_t inval_sqI : 1; uint32_t inval_sqK : 1; uint32_t flush_tcc : 1; uint32_t inval_tcc : 1; uint32_t flush_cb : 1; uint32_t inval_cb : 1; uint32_t flush_db : 1; uint32_t inval_db : 1; uint32_t num_layout_transitions : 16; uint32_t inval_gl1 : 1; uint32_t wait_on_ts : 1; uint32_t eop_ts_bottom_of_pipe : 1; uint32_t eos_ts_ps_done : 1; uint32_t eos_ts_cs_done : 1; uint32_t reserved : 1; }; uint32_t dword02; }; }; static_assert(sizeof(struct rgp_sqtt_marker_barrier_end) == 8, "rgp_sqtt_marker_barrier_end doesn't match RGP spec"); /** * "Layout Transition" RGP SQTT instrumentation marker (Table 7) */ struct rgp_sqtt_marker_layout_transition { union { struct { uint32_t identifier : 4; uint32_t ext_dwords : 3; uint32_t depth_stencil_expand : 1; uint32_t htile_hiz_range_expand : 1; uint32_t depth_stencil_resummarize : 1; uint32_t dcc_decompress : 1; uint32_t fmask_decompress : 1; uint32_t fast_clear_eliminate : 1; uint32_t fmask_color_expand : 1; uint32_t init_mask_ram : 1; uint32_t reserved1 : 17; }; uint32_t dword01; }; union { struct { uint32_t reserved2 : 32; }; uint32_t dword02; }; }; static_assert(sizeof(struct rgp_sqtt_marker_layout_transition) == 8, "rgp_sqtt_marker_layout_transition doesn't match RGP spec"); /** * "User Event" RGP SQTT instrumentation marker (Table 8) */ struct rgp_sqtt_marker_user_event { union { struct { uint32_t identifier : 4; uint32_t reserved0 : 8; uint32_t data_type : 8; uint32_t reserved1 : 12; }; uint32_t dword01; }; }; struct rgp_sqtt_marker_user_event_with_length { struct rgp_sqtt_marker_user_event user_event; uint32_t length; }; static_assert(sizeof(struct rgp_sqtt_marker_user_event) == 4, "rgp_sqtt_marker_user_event doesn't match RGP spec"); enum rgp_sqtt_marker_user_event_type { UserEventTrigger = 0, UserEventPop, UserEventPush, UserEventObjectName, }; /** * "Pipeline bind" RGP SQTT instrumentation marker (Table 12) */ struct rgp_sqtt_marker_pipeline_bind { union { struct { uint32_t identifier : 4; uint32_t ext_dwords : 3; uint32_t bind_point : 1; uint32_t cb_id : 20; uint32_t reserved : 4; }; uint32_t dword01; }; union { uint32_t api_pso_hash[2]; struct { uint32_t dword02; uint32_t dword03; }; }; }; static_assert(sizeof(struct rgp_sqtt_marker_pipeline_bind) == 12, "rgp_sqtt_marker_pipeline_bind doesn't match RGP spec"); bool ac_sqtt_add_pso_correlation(struct ac_sqtt *sqtt, uint64_t pipeline_hash, uint64_t api_hash); bool ac_sqtt_add_code_object_loader_event(struct ac_sqtt *sqtt, uint64_t pipeline_hash, uint64_t base_address); bool ac_sqtt_add_clock_calibration(struct ac_sqtt *sqtt, uint64_t cpu_timestamp, uint64_t gpu_timestamp); bool ac_check_profile_state(const struct radeon_info *info); union rgp_sqtt_marker_cb_id ac_sqtt_get_next_cmdbuf_id(struct ac_sqtt *sqtt, enum amd_ip_type ip_type); bool ac_sqtt_get_trace(struct ac_sqtt *sqtt, const struct radeon_info *info, struct ac_sqtt_trace *sqtt_trace); uint32_t ac_sqtt_get_ctrl(const struct radeon_info *info, bool enable); uint32_t ac_sqtt_get_shader_mask(const struct radeon_info *info); void ac_sqtt_emit_start(const struct radeon_info *info, struct ac_pm4_state *pm4, const struct ac_sqtt *sqtt, bool is_compute_queue); void ac_sqtt_emit_stop(const struct radeon_info *info, struct ac_pm4_state *pm4, bool is_compute_queue); void ac_sqtt_emit_wait(const struct radeon_info *info, struct ac_pm4_state *pm4, const struct ac_sqtt *sqtt, bool is_compute_queue); #endif