/* * Copyright © 2022 Igalia S.L. * SPDX-License-Identifier: MIT */ #include #include #include #include #include #include #include #include #include "redump.h" #include "util/u_math.h" #include "adreno_common.xml.h" #include "adreno_pm4.xml.h" #include "freedreno_pm4.h" #include "a6xx.xml.h" #include "ir3/ir3_assembler.h" #include "ir3/ir3_compiler.h" #include "ir3/ir3_shader.h" #include "util/list.h" #include "util/vma.h" struct cmdstream { struct list_head link; uint32_t *mem; uint32_t total_size; uint32_t cur; uint64_t iova; }; static uint64_t cs_get_cur_iova(struct cmdstream *cs) { return cs->iova + cs->cur * sizeof(uint32_t); } struct wrbuf { struct list_head link; uint64_t iova; uint64_t size; uint64_t clear; const char *name; }; struct replay_context { void *mem_ctx; struct util_vma_heap vma; struct cmdstream *submit_cs; struct cmdstream *state_cs; struct cmdstream *shader_cs; struct cmdstream *shader_log; struct cmdstream *cp_log; struct list_head cs_list; struct list_head wrbuf_list; struct ir3_compiler *compiler; struct hash_table_u64 *compiled_shaders; const char *output_name; }; static void pkt(struct cmdstream *cs, uint32_t payload) { assert(cs->cur <= cs->total_size); cs->mem[cs->cur++] = payload; } static void pkt_qw(struct cmdstream *cs, uint64_t payload) { pkt(cs, payload); pkt(cs, payload >> 32); } static uint64_t pkt_blob(struct cmdstream *cs, void *payload, uint32_t size, uint32_t alignment) { cs->cur = align(cs->cur, alignment / sizeof(uint32_t)); uint64_t start_iova = cs_get_cur_iova(cs); memcpy(cs->mem + cs->cur, payload, size); cs->cur += size; return start_iova; } static void pkt4(struct cmdstream *cs, uint16_t regindx, uint16_t cnt, uint32_t payload) { pkt(cs, pm4_pkt4_hdr(regindx, cnt)); pkt(cs, payload); } static void pkt7(struct cmdstream *cs, uint8_t opcode, uint16_t cnt) { pkt(cs, pm4_pkt7_hdr(opcode, cnt)); } struct rd_section { uint32_t type; uint32_t size; }; static struct cmdstream * cs_alloc(struct replay_context *ctx, uint32_t size) { struct cmdstream *cs = (struct cmdstream *) calloc(1, sizeof(struct cmdstream)); cs->mem = (uint32_t *)calloc(1, size); cs->total_size = size / sizeof(uint32_t); cs->cur = 0; cs->iova = util_vma_heap_alloc(&ctx->vma, size, 4096); assert(cs->iova != 0); list_addtail(&cs->link, &ctx->cs_list); return cs; } static void rd_write_gpu_addr_section(FILE *out, struct cmdstream *cs, enum rd_sect_type section) { const uint32_t packet[] = {(uint32_t)cs->iova, (uint32_t)(cs->cur * sizeof(uint32_t)), (uint32_t)(cs->iova >> 32)}; struct rd_section section_address = {.type = section, .size = sizeof(packet)}; fwrite(§ion_address, sizeof(section_address), 1, out); fwrite(packet, sizeof(packet), 1, out); } static void rd_write_cs_buffer(FILE *out, struct cmdstream *cs) { if (cs->cur == 0) return; rd_write_gpu_addr_section(out, cs, RD_GPUADDR); struct rd_section section_contents = {.type = RD_BUFFER_CONTENTS, .size = uint32_t(cs->cur * sizeof(uint32_t))}; fwrite(§ion_contents, sizeof(section_contents), 1, out); fwrite(cs->mem, sizeof(uint32_t), cs->cur, out); } static void rd_write_cs_submit(FILE *out, struct cmdstream *cs) { const uint32_t packet[] = {(uint32_t)cs->iova, cs->cur, (uint32_t)(cs->iova >> 32)}; struct rd_section section_cmdstream = {.type = RD_CMDSTREAM_ADDR, .size = sizeof(packet)}; fwrite(§ion_cmdstream, sizeof(section_cmdstream), 1, out); fwrite(packet, sizeof(packet), 1, out); } static void rd_write_wrbuffer(FILE *out, struct wrbuf *wrbuf) { uint32_t name_len = strlen(wrbuf->name) + 1; struct rd_section section = {.type = RD_WRBUFFER, .size = (uint32_t)(sizeof(uint64_t) * 3) + name_len}; fwrite(§ion, sizeof(section), 1, out); fwrite(&wrbuf->iova, sizeof(uint64_t), 1, out); fwrite(&wrbuf->size, sizeof(uint64_t), 1, out); fwrite(&wrbuf->clear, sizeof(uint64_t), 1, out); fwrite(wrbuf->name, sizeof(char), name_len, out); } static void print_usage(const char *name) { /* clang-format off */ fprintf(stderr, "Usage:\n\n" "\t%s [OPTIONS]... FILE...\n\n" "Options:\n" "\t --vastart=offset\n" "\t --vasize=size\n" "\t-h, --help - show this message\n" , name); /* clang-format on */ exit(2); } #define OPT_VA_START 1000 #define OPT_VA_SIZE 1001 /* clang-format off */ static const struct option opts[] = { { "vastart", required_argument, 0, OPT_VA_START }, { "vasize", required_argument, 0, OPT_VA_SIZE }, { "help", no_argument, 0, 'h' }, }; /* clang-format on */ static void replay_context_init(struct replay_context *ctx, struct fd_dev_id *dev_id, int argc, char **argv) { uint64_t va_start = 0; uint64_t va_size = 0; int c; while ((c = getopt_long(argc, argv, "h", opts, NULL)) != -1) { switch (c) { case OPT_VA_START: va_start = strtoull(optarg, NULL, 0); break; case OPT_VA_SIZE: va_size = strtoull(optarg, NULL, 0); break; case 'h': default: print_usage(argv[0]); } } if (optind < argc) { ctx->output_name = argv[optind]; } else { } if (!va_start || !va_size || !ctx->output_name) { print_usage(argv[0]); exit(1); } ctx->mem_ctx = ralloc_context(NULL); list_inithead(&ctx->cs_list); list_inithead(&ctx->wrbuf_list); util_vma_heap_init(&ctx->vma, va_start, ROUND_DOWN_TO(va_size, 4096)); ctx->submit_cs = cs_alloc(ctx, 1024 * 1024); ctx->state_cs = cs_alloc(ctx, 2 * 1024 * 1024); ctx->shader_cs = cs_alloc(ctx, 8 * 1024 * 1024); ctx->shader_log = cs_alloc(ctx, 1024 * 1024); ctx->shader_log->mem[0] = (ctx->shader_log->iova & 0xffffffff) + sizeof(uint64_t); ctx->shader_log->mem[1] = ctx->shader_log->iova >> 32; ctx->shader_log->cur = ctx->shader_log->total_size; ctx->cp_log = cs_alloc(ctx, 8 * 1024 * 1024); ((uint64_t *)ctx->cp_log->mem)[0] = ctx->cp_log->iova + 2 * sizeof(uint64_t); ((uint64_t *)ctx->cp_log->mem)[1] = sizeof(uint64_t); ctx->cp_log->cur = ctx->cp_log->total_size; struct ir3_compiler_options options{ .disable_cache = true, }; ctx->compiler = ir3_compiler_create(NULL, dev_id, fd_dev_info_raw(dev_id), &options); ctx->compiled_shaders = _mesa_hash_table_u64_create(ctx->mem_ctx); } static void replay_context_finish(struct replay_context *ctx) { FILE *out = fopen(ctx->output_name, "w"); if (!out) { errx(1, "Cannot open '%s' for writing\n", ctx->output_name); } static const uint32_t gpu_id = 660; struct rd_section section_gpu_id = {.type = RD_GPU_ID, .size = 1 * sizeof(uint32_t)}; fwrite(§ion_gpu_id, sizeof(section_gpu_id), 1, out); fwrite(&gpu_id, sizeof(uint32_t), 1, out); rd_write_gpu_addr_section(out, ctx->shader_log, RD_SHADER_LOG_BUFFER); rd_write_gpu_addr_section(out, ctx->cp_log, RD_CP_LOG_BUFFER); list_for_each_entry (struct cmdstream, cs, &ctx->cs_list, link) { rd_write_cs_buffer(out, cs); } rd_write_cs_submit(out, ctx->submit_cs); list_for_each_entry (struct wrbuf, wrbuf, &ctx->wrbuf_list, link) { rd_write_wrbuffer(out, wrbuf); } fclose(out); } static void upload_shader(struct replay_context *ctx, uint64_t id, const char *source) { FILE *in = fmemopen((void *)source, strlen(source), "r"); struct ir3_kernel_info info = { .shader_print_buffer_iova = ctx->shader_log->iova, }; struct ir3_shader *shader = ir3_parse_asm(ctx->compiler, &info, in); assert(shader); fclose(in); uint64_t *shader_iova = ralloc(ctx->mem_ctx, uint64_t); *shader_iova = pkt_blob(ctx->shader_cs, shader->variants->bin, shader->variants->info.size, 128); ralloc_free(shader); _mesa_hash_table_u64_insert(ctx->compiled_shaders, id, shader_iova); } static void emit_shader_iova(struct replay_context *ctx, struct cmdstream *cs, uint64_t id) { uint64_t *shader_iova = (uint64_t *) _mesa_hash_table_u64_search(ctx->compiled_shaders, id); if (shader_iova) { pkt_qw(cs, *shader_iova); } else { fprintf(stderr, "Not override for shader at 0x%" PRIx64 ", using original\n", id); pkt_qw(cs, id); } } #define begin_draw_state() \ uint64_t subcs_iova_start = cs_get_cur_iova(ctx.state_cs); \ struct cmdstream *prev_cs = cs; \ struct cmdstream *cs = ctx.state_cs; #define end_draw_state(params) \ uint64_t subcs_iova_end = cs_get_cur_iova(ctx.state_cs); \ uint32_t subcs_size = \ (subcs_iova_end - subcs_iova_start) / sizeof(uint32_t); \ pkt7(prev_cs, CP_SET_DRAW_STATE, 3); \ pkt(prev_cs, (params) | subcs_size); \ pkt_qw(prev_cs, subcs_iova_start); #define begin_ib() \ struct cmdstream *prev_cs = cs; \ struct cmdstream *cs = cs_alloc(&ctx, 1024 * 1024); #define end_ib() \ uint64_t ibcs_size = cs->cur; \ pkt7(prev_cs, CP_INDIRECT_BUFFER, 3); \ pkt_qw(prev_cs, cs->iova); \ pkt(prev_cs, ibcs_size); static void gpu_print(struct replay_context *ctx, struct cmdstream *_cs, uint64_t iova, uint32_t dwords) { uint64_t header_iova, body_iova; struct cmdstream *prev_cs = _cs; struct cmdstream *cs = cs_alloc(ctx, 4096); /* Commands that are being modified should be in a separate cmdstream, * otherwise they would be prefetched and writes would not be visible. */ { /* Write size into entry's header */ pkt7(cs, CP_MEM_WRITE, 4); header_iova = cs_get_cur_iova(cs); pkt_qw(cs, 0xdeadbeef); uint64_t size_iova = cs_get_cur_iova(cs); pkt(cs, dwords * 4); pkt(cs, 0); /* Copy the data into entry's body */ pkt7(cs, CP_MEMCPY, 5); pkt(cs, dwords); pkt_qw(cs, iova); body_iova = cs_get_cur_iova(cs); pkt_qw(cs, 0xdeadbeef); /* iova = iova + body_size + header_size */ pkt7(cs, CP_MEM_TO_MEM, 9); pkt(cs, CP_MEM_TO_MEM_0_DOUBLE | CP_MEM_TO_MEM_0_WAIT_FOR_MEM_WRITES); pkt_qw(cs, ctx->cp_log->iova); pkt_qw(cs, ctx->cp_log->iova); pkt_qw(cs, size_iova); pkt_qw(cs, ctx->cp_log->iova + sizeof(uint64_t)); } { struct cmdstream *cs = prev_cs; pkt7(cs, CP_MEM_TO_MEM, 5); pkt(cs, CP_MEM_TO_MEM_0_DOUBLE | CP_MEM_TO_MEM_0_WAIT_FOR_MEM_WRITES); pkt_qw(cs, header_iova); pkt_qw(cs, ctx->cp_log->iova); pkt7(cs, CP_MEM_TO_MEM, 7); pkt(cs, CP_MEM_TO_MEM_0_DOUBLE); pkt_qw(cs, body_iova); pkt_qw(cs, ctx->cp_log->iova); pkt_qw(cs, ctx->cp_log->iova + sizeof(uint64_t)); pkt7(cs, CP_WAIT_MEM_WRITES, 0); pkt7(cs, CP_WAIT_FOR_ME, 0); } end_ib(); } /* This function is used to read a buffer from the GPU into a file. * The buffer can optionally be cleared to 0xdeadbeef at the start * of the cmdstream by setting the clear parameter to true. * * Note: Unlike gpu_print, this function isn't sequenced, it will * read the state of the buffer at the end of the cmdstream, not * at the point of the call. */ static void gpu_read_into_file(struct replay_context *ctx, struct cmdstream *_cs, uint64_t iova, uint64_t size, bool clear, const char *name) { struct wrbuf *wrbuf = (struct wrbuf *) calloc(1, sizeof(struct wrbuf)); wrbuf->iova = iova; wrbuf->size = size; wrbuf->clear = clear; wrbuf->name = strdup(name); assert(wrbuf->iova != 0); list_addtail(&wrbuf->link, &ctx->wrbuf_list); }