/* * Copyright © 2021 Intel Corporation * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the "Software"), * to deal in the Software without restriction, including without limitation * the rights to use, copy, modify, merge, publish, distribute, sublicense, * and/or sell copies of the Software, and to permit persons to whom the * Software is furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice (including the next * paragraph) shall be included in all copies or substantial portions of the * Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS * IN THE SOFTWARE. */ #include "iris_batch.h" #include "iris_context.h" #include "iris_utrace.h" #include "util/u_trace_gallium.h" #include "ds/intel_driver_ds.h" #ifdef MAJOR_IN_MKDEV #include #endif #ifdef MAJOR_IN_SYSMACROS #include #endif #include #include #include /** Timestamp structure format */ union iris_utrace_timestamp { /* Timestamp writtem by either 2 * MI_STORE_REGISTER_MEM or * PIPE_CONTROL. */ uint64_t timestamp; /* Timestamp written by COMPUTE_WALKER::PostSync * * Layout is described in PRMs. * ATSM PRMs, Volume 2d: Command Reference: Structures, POSTSYNC_DATA: * * "The timestamp layout : * [0] = 32b Context Timestamp Start * [1] = 32b Global Timestamp Start * [2] = 32b Context Timestamp End * [3] = 32b Global Timestamp End" */ uint32_t gfx125_postsync_data[4]; /* Timestamp written by COMPUTE_WALKER::PostSync * * BSpec 56591: * * "The timestamp layout : * [0] = 64b Context Timestamp Start * [1] = 64b Global Timestamp Start * [2] = 64b Context Timestamp End * [3] = 64b Global Timestamp End" */ uint64_t gfx20_postsync_data[4]; }; static void * iris_utrace_create_buffer(struct u_trace_context *utctx, uint64_t size_B) { struct iris_context *ice = container_of(utctx, struct iris_context, ds.trace_context); struct pipe_context *ctx = &ice->ctx; struct iris_screen *screen = (struct iris_screen *)ctx->screen; struct iris_bo *bo = iris_bo_alloc(screen->bufmgr, "utrace timestamps", size_B, 16 /* alignment */, IRIS_MEMZONE_OTHER, BO_ALLOC_COHERENT | BO_ALLOC_SMEM); void *ptr = iris_bo_map(NULL, bo, MAP_READ | MAP_WRITE); memset(ptr, 0, size_B); return bo; } static void iris_utrace_delete_buffer(struct u_trace_context *utctx, void *timestamps) { struct iris_bo *bo = timestamps; iris_bo_unreference(bo); } static void iris_utrace_record_ts(struct u_trace *trace, void *cs, void *timestamps, uint64_t offset_B, uint32_t flags) { struct iris_batch *batch = container_of(trace, struct iris_batch, trace); struct iris_context *ice = batch->ice; struct iris_bo *bo = timestamps; iris_use_pinned_bo(batch, bo, true, IRIS_DOMAIN_NONE); const bool is_end_compute = cs == NULL && (flags & INTEL_DS_TRACEPOINT_FLAG_END_OF_PIPE_CS); if (is_end_compute) { assert(ice->utrace.last_compute_walker != NULL); batch->screen->vtbl.rewrite_compute_walker_pc( batch, ice->utrace.last_compute_walker, bo, offset_B); ice->utrace.last_compute_walker = NULL; } else if (flags & INTEL_DS_TRACEPOINT_FLAG_END_OF_PIPE) { iris_emit_pipe_control_write(batch, "query: pipelined snapshot write", PIPE_CONTROL_WRITE_TIMESTAMP, bo, offset_B, 0ull); } else { batch->screen->vtbl.store_register_mem64(batch, 0x2358, bo, offset_B, false); } } static uint64_t iris_utrace_read_ts(struct u_trace_context *utctx, void *timestamps, uint64_t offset_B, void *flush_data) { struct iris_context *ice = container_of(utctx, struct iris_context, ds.trace_context); struct pipe_context *ctx = &ice->ctx; struct iris_screen *screen = (struct iris_screen *)ctx->screen; struct iris_bo *bo = timestamps; if (offset_B == 0) iris_bo_wait_rendering(bo); union iris_utrace_timestamp *ts = iris_bo_map(NULL, bo, MAP_READ) + offset_B; /* Don't translate the no-timestamp marker: */ if (ts->timestamp == U_TRACE_NO_TIMESTAMP) return U_TRACE_NO_TIMESTAMP; /* Detect a 16/32 bytes timestamp write */ if (ts->gfx20_postsync_data[1] != 0 || ts->gfx20_postsync_data[2] != 0 || ts->gfx20_postsync_data[3] != 0) { if (screen->devinfo->ver >= 20) { return intel_device_info_timebase_scale(screen->devinfo, ts->gfx20_postsync_data[3]); } /* The timestamp written by COMPUTE_WALKER::PostSync only as 32bits. We * need to rebuild the full 64bits using the previous timestamp. We * assume that utrace is reading the timestamp in order. Anyway * timestamp rollover on 32bits in a few minutes so in most cases that * should be correct. */ uint64_t timestamp = (ice->utrace.last_full_timestamp & 0xffffffff00000000) | (uint64_t) ts->gfx125_postsync_data[3]; return intel_device_info_timebase_scale(screen->devinfo, timestamp); } ice->utrace.last_full_timestamp = ts->timestamp; return intel_device_info_timebase_scale(screen->devinfo, ts->timestamp); } static void iris_utrace_delete_flush_data(struct u_trace_context *utctx, void *flush_data) { free(flush_data); } void iris_utrace_flush(struct iris_batch *batch, uint64_t submission_id) { struct intel_ds_flush_data *flush_data = malloc(sizeof(*flush_data)); intel_ds_flush_data_init(flush_data, &batch->ds, submission_id); intel_ds_queue_flush_data(&batch->ds, &batch->trace, flush_data, U_TRACE_FRAME_UNKNOWN, false); } void iris_utrace_init(struct iris_context *ice) { struct iris_screen *screen = (struct iris_screen *)ice->ctx.screen; struct stat st; uint32_t minor; if (fstat(screen->fd, &st) == 0) minor = minor(st.st_rdev); else minor = 0; intel_ds_device_init(&ice->ds, screen->devinfo, screen->fd, minor, INTEL_DS_API_OPENGL); u_trace_context_init(&ice->ds.trace_context, &ice->ctx, sizeof(union iris_utrace_timestamp), 0, iris_utrace_create_buffer, iris_utrace_delete_buffer, iris_utrace_record_ts, iris_utrace_read_ts, NULL, NULL, iris_utrace_delete_flush_data); for (int i = 0; i < IRIS_BATCH_COUNT; i++) { intel_ds_device_init_queue(&ice->ds, &ice->batches[i].ds, "%s", iris_batch_name_to_string(i)); } } void iris_utrace_fini(struct iris_context *ice) { intel_ds_device_fini(&ice->ds); } enum intel_ds_stall_flag iris_utrace_pipe_flush_bit_to_ds_stall_flag(uint32_t flags) { static const struct { uint32_t iris; enum intel_ds_stall_flag ds; } iris_to_ds_flags[] = { { .iris = PIPE_CONTROL_DEPTH_CACHE_FLUSH, .ds = INTEL_DS_DEPTH_CACHE_FLUSH_BIT, }, { .iris = PIPE_CONTROL_DATA_CACHE_FLUSH, .ds = INTEL_DS_DATA_CACHE_FLUSH_BIT, }, { .iris = PIPE_CONTROL_TILE_CACHE_FLUSH, .ds = INTEL_DS_TILE_CACHE_FLUSH_BIT, }, { .iris = PIPE_CONTROL_RENDER_TARGET_FLUSH, .ds = INTEL_DS_RENDER_TARGET_CACHE_FLUSH_BIT, }, { .iris = PIPE_CONTROL_STATE_CACHE_INVALIDATE, .ds = INTEL_DS_STATE_CACHE_INVALIDATE_BIT, }, { .iris = PIPE_CONTROL_CONST_CACHE_INVALIDATE, .ds = INTEL_DS_CONST_CACHE_INVALIDATE_BIT, }, { .iris = PIPE_CONTROL_VF_CACHE_INVALIDATE, .ds = INTEL_DS_VF_CACHE_INVALIDATE_BIT, }, { .iris = PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE, .ds = INTEL_DS_TEXTURE_CACHE_INVALIDATE_BIT, }, { .iris = PIPE_CONTROL_INSTRUCTION_INVALIDATE, .ds = INTEL_DS_INST_CACHE_INVALIDATE_BIT, }, { .iris = PIPE_CONTROL_DEPTH_STALL, .ds = INTEL_DS_DEPTH_STALL_BIT, }, { .iris = PIPE_CONTROL_CS_STALL, .ds = INTEL_DS_CS_STALL_BIT, }, { .iris = PIPE_CONTROL_FLUSH_HDC, .ds = INTEL_DS_HDC_PIPELINE_FLUSH_BIT, }, { .iris = PIPE_CONTROL_STALL_AT_SCOREBOARD, .ds = INTEL_DS_STALL_AT_SCOREBOARD_BIT, }, { .iris = PIPE_CONTROL_UNTYPED_DATAPORT_CACHE_FLUSH, .ds = INTEL_DS_UNTYPED_DATAPORT_CACHE_FLUSH_BIT, }, { .iris = PIPE_CONTROL_CCS_CACHE_FLUSH, .ds = INTEL_DS_CCS_CACHE_FLUSH_BIT, }, }; enum intel_ds_stall_flag ret = 0; for (uint32_t i = 0; i < ARRAY_SIZE(iris_to_ds_flags); i++) { if (iris_to_ds_flags[i].iris & flags) ret |= iris_to_ds_flags[i].ds; } assert(ret != 0); return ret; }