xref: /aosp_15_r20/external/mesa3d/src/amd/vulkan/radv_rmv.c (revision 6104692788411f58d303aa86923a9ff6ecaded22)
1 /*
2  * Copyright © 2022 Friedrich Vock
3  *
4  * SPDX-License-Identifier: MIT
5  */
6 
7 #ifndef _WIN32
8 #include <dirent.h>
9 #include <unistd.h>
10 #endif
11 #include <errno.h>
12 #include <fcntl.h>
13 #include <stdio.h>
14 #include <stdlib.h>
15 #include "ac_gpu_info.h"
16 #include "radv_buffer.h"
17 #include "radv_descriptor_set.h"
18 #include "radv_device_memory.h"
19 #include "radv_event.h"
20 #include "radv_image.h"
21 #include "radv_pipeline_graphics.h"
22 #include "radv_pipeline_rt.h"
23 #include "radv_query.h"
24 #include "radv_rmv.h"
25 
26 #define RADV_FTRACE_INSTANCE_PATH "/sys/kernel/tracing/instances/amd_rmv"
27 
28 static FILE *
open_event_file(const char * event_name,const char * event_filename,const char * mode)29 open_event_file(const char *event_name, const char *event_filename, const char *mode)
30 {
31    char filename[2048];
32    snprintf(filename, sizeof(filename), RADV_FTRACE_INSTANCE_PATH "/events/amdgpu/%s/%s", event_name, event_filename);
33    return fopen(filename, mode);
34 }
35 
36 static bool
set_event_tracing_enabled(const char * event_name,bool enabled)37 set_event_tracing_enabled(const char *event_name, bool enabled)
38 {
39    FILE *file = open_event_file(event_name, "enable", "w");
40    if (!file)
41       return false;
42 
43    size_t written_bytes = fwrite("1", 1, 1, file);
44    fclose(file);
45    return written_bytes == 1;
46 }
47 
48 static uint16_t
trace_event_id(const char * event_name)49 trace_event_id(const char *event_name)
50 {
51    /* id is 16-bit, so <= 65535 */
52    char data[6];
53 
54    FILE *file = open_event_file(event_name, "id", "r");
55    if (!file)
56       return (uint16_t)~0;
57 
58    size_t read_bytes = fread(data, 1, 6, file);
59    fclose(file);
60 
61    if (!read_bytes)
62       return (uint16_t)~0;
63 
64    return (uint16_t)strtoul(data, NULL, 10);
65 }
66 
67 static void
open_trace_pipe(uint32_t cpu_index,int * dst_fd)68 open_trace_pipe(uint32_t cpu_index, int *dst_fd)
69 {
70 #ifdef _WIN32
71    *dst_fd = -1;
72 #else
73    char filename[2048];
74    snprintf(filename, sizeof(filename), RADV_FTRACE_INSTANCE_PATH "/per_cpu/cpu%d/trace_pipe_raw", cpu_index);
75    /* I/O to the pipe needs to be non-blocking, otherwise reading all available
76     * data would block indefinitely by waiting for more data to be written to the pipe */
77    *dst_fd = open(filename, O_RDONLY | O_NONBLOCK);
78 #endif
79 }
80 
81 /*
82  * Kernel trace buffer parsing
83  */
84 
85 struct trace_page_header {
86    uint64_t timestamp;
87    int32_t commit;
88 };
89 
90 enum trace_event_type { TRACE_EVENT_TYPE_PADDING = 29, TRACE_EVENT_TYPE_EXTENDED_DELTA, TRACE_EVENT_TYPE_TIMESTAMP };
91 
92 struct trace_event_header {
93    uint32_t type_len : 5;
94    uint32_t time_delta : 27;
95    /* Only present if length is too big for type_len */
96    uint32_t excess_length;
97 };
98 
99 struct trace_event_common {
100    unsigned short type;
101    unsigned char flags;
102    unsigned char preempt_count;
103    int pid;
104 };
105 
106 struct trace_event_amdgpu_vm_update_ptes {
107    struct trace_event_common common;
108    uint64_t start;
109    uint64_t end;
110    uint64_t flags;
111    unsigned int num_ptes;
112    uint64_t incr;
113    int pid;
114    uint64_t vm_ctx;
115 };
116 
117 /* Represents a dynamic array of addresses in the ftrace buffer. */
118 struct trace_event_address_array {
119    uint16_t data_size;
120    uint16_t reserved;
121    char data[];
122 };
123 
124 /* Possible flags for PTEs, taken from amdgpu_vm.h */
125 #define AMDGPU_PTE_VALID  (1ULL << 0)
126 #define AMDGPU_PTE_SYSTEM (1ULL << 1)
127 #define AMDGPU_PTE_PRT    (1ULL << 51)
128 
129 /* The minimum size of a GPU page */
130 #define MIN_GPU_PAGE_SIZE 4096
131 
132 static void
emit_page_table_update_event(struct vk_memory_trace_data * data,bool is_apu,uint64_t timestamp,struct trace_event_amdgpu_vm_update_ptes * event,uint64_t * addrs,unsigned int pte_index)133 emit_page_table_update_event(struct vk_memory_trace_data *data, bool is_apu, uint64_t timestamp,
134                              struct trace_event_amdgpu_vm_update_ptes *event, uint64_t *addrs, unsigned int pte_index)
135 {
136    struct vk_rmv_token token;
137 
138    uint64_t end_addr;
139    /* There may be more updated PTEs than the ones reported in the ftrace buffer.
140     * We choose the reported end virtual address here to report the correct total committed memory. */
141    if (pte_index == event->num_ptes - 1)
142       end_addr = event->end;
143    else
144       end_addr = event->start + (pte_index + 1) * (event->incr / MIN_GPU_PAGE_SIZE);
145    uint64_t start_addr = event->start + pte_index * (event->incr / MIN_GPU_PAGE_SIZE);
146 
147    token.type = VK_RMV_TOKEN_TYPE_PAGE_TABLE_UPDATE;
148    token.timestamp = timestamp;
149    token.data.page_table_update.type = VK_RMV_PAGE_TABLE_UPDATE_TYPE_UPDATE;
150    token.data.page_table_update.page_size = event->incr;
151    token.data.page_table_update.page_count = (end_addr - start_addr) * MIN_GPU_PAGE_SIZE / event->incr;
152    token.data.page_table_update.pid = event->common.pid;
153    token.data.page_table_update.virtual_address = event->start * MIN_GPU_PAGE_SIZE + pte_index * event->incr;
154    /* RMV expects mappings to system memory to have a physical address of 0.
155     * Even with traces generated by AMDGPU-PRO, on APUs without dedicated VRAM everything seems to
156     * be marked as "committed to system memory". */
157    token.data.page_table_update.physical_address = event->flags & AMDGPU_PTE_SYSTEM || is_apu ? 0 : addrs[pte_index];
158 
159    token.data.page_table_update.is_unmap = !(event->flags & (AMDGPU_PTE_VALID | AMDGPU_PTE_PRT));
160    util_dynarray_append(&data->tokens, struct vk_rmv_token, token);
161 }
162 
163 static void
evaluate_trace_event(struct radv_device * device,uint64_t timestamp,struct util_dynarray * tokens,struct trace_event_amdgpu_vm_update_ptes * event)164 evaluate_trace_event(struct radv_device *device, uint64_t timestamp, struct util_dynarray *tokens,
165                      struct trace_event_amdgpu_vm_update_ptes *event)
166 {
167    const struct radv_physical_device *pdev = radv_device_physical(device);
168 
169    if (event->common.pid != getpid() && event->pid != getpid()) {
170       return;
171    }
172 
173    struct trace_event_address_array *array = (struct trace_event_address_array *)(event + 1);
174 
175    for (uint32_t i = 0; i < event->num_ptes; ++i)
176       emit_page_table_update_event(&device->vk.memory_trace_data, !pdev->info.has_dedicated_vram, timestamp, event,
177                                    (uint64_t *)array->data, i);
178 }
179 
180 static void
append_trace_events(struct radv_device * device,int pipe_fd)181 append_trace_events(struct radv_device *device, int pipe_fd)
182 {
183    /* Assuming 4KB if os_get_page_size fails. */
184    uint64_t page_size = 4096;
185    os_get_page_size(&page_size);
186 
187    uint64_t timestamp;
188 
189    /*
190     * Parse the trace ring buffer page by page.
191     */
192    char *page = (char *)malloc(page_size);
193    if (!page) {
194       return;
195    }
196    int64_t read_bytes;
197    do {
198       read_bytes = (int64_t)read(pipe_fd, page, page_size);
199       if (read_bytes < (int64_t)sizeof(struct trace_page_header))
200          break;
201 
202       struct trace_page_header *page_header = (struct trace_page_header *)page;
203       timestamp = page_header->timestamp;
204 
205       size_t data_size = MIN2((size_t)read_bytes, (size_t)page_header->commit);
206 
207       char *read_ptr = page + sizeof(struct trace_page_header);
208       while (read_ptr - page < data_size) {
209          struct trace_event_header *event_header = (struct trace_event_header *)read_ptr;
210          read_ptr += sizeof(struct trace_event_header);
211 
212          /* Handle special event type, see include/linux/ring_buffer.h in the
213           * kernel source */
214          switch (event_header->type_len) {
215          case TRACE_EVENT_TYPE_PADDING:
216             if (event_header->time_delta) {
217                /* Specified size, skip past padding */
218                read_ptr += event_header->excess_length;
219                timestamp += event_header->time_delta;
220                continue;
221             } else {
222                /* Padding is until end of page, skip until next page */
223                read_ptr = page + data_size;
224                continue;
225             }
226          case TRACE_EVENT_TYPE_EXTENDED_DELTA:
227             timestamp += event_header->time_delta;
228             timestamp += (uint64_t)event_header->excess_length << 27ULL;
229             continue;
230          case TRACE_EVENT_TYPE_TIMESTAMP:
231             timestamp = event_header->time_delta;
232             timestamp |= (uint64_t)event_header->excess_length << 27ULL;
233             continue;
234          default:
235             break;
236          }
237 
238          timestamp += event_header->time_delta;
239 
240          /* If type_len is not one of the special types and not zero, it is
241           * the data length / 4. */
242          size_t length;
243          struct trace_event_common *event;
244          if (event_header->type_len) {
245             length = event_header->type_len * 4 + 4;
246             /* The length variable already contains event data in this case.
247              */
248             event = (struct trace_event_common *)&event_header->excess_length;
249          } else {
250             length = event_header->excess_length + 4;
251             event = (struct trace_event_common *)read_ptr;
252          }
253 
254          if (event->type == device->memory_trace.ftrace_update_ptes_id)
255             evaluate_trace_event(device, timestamp, &device->vk.memory_trace_data.tokens,
256                                  (struct trace_event_amdgpu_vm_update_ptes *)event);
257 
258          read_ptr += length - sizeof(struct trace_event_header);
259       }
260    } while (true);
261 
262    free(page);
263 }
264 
265 static void
close_pipe_fds(struct radv_device * device)266 close_pipe_fds(struct radv_device *device)
267 {
268    for (uint32_t i = 0; i < device->memory_trace.num_cpus; ++i) {
269       close(device->memory_trace.pipe_fds[i]);
270    }
271 }
272 
273 void
radv_memory_trace_init(struct radv_device * device)274 radv_memory_trace_init(struct radv_device *device)
275 {
276 #ifndef _WIN32
277    DIR *dir = opendir(RADV_FTRACE_INSTANCE_PATH);
278    if (!dir) {
279       fprintf(stderr,
280               "radv: Couldn't initialize memory tracing: "
281               "Can't access the tracing instance directory (%s)\n",
282               strerror(errno));
283       goto error;
284    }
285    closedir(dir);
286 
287    device->memory_trace.num_cpus = 0;
288 
289    char cpuinfo_line[1024];
290    FILE *cpuinfo_file = fopen("/proc/cpuinfo", "r");
291    uint32_t num_physical_cores;
292    while (fgets(cpuinfo_line, sizeof(cpuinfo_line), cpuinfo_file)) {
293       char *logical_core_string = strstr(cpuinfo_line, "siblings");
294       if (logical_core_string)
295          sscanf(logical_core_string, "siblings : %d", &device->memory_trace.num_cpus);
296       char *physical_core_string = strstr(cpuinfo_line, "cpu cores");
297       if (physical_core_string)
298          sscanf(physical_core_string, "cpu cores : %d", &num_physical_cores);
299    }
300    if (!device->memory_trace.num_cpus)
301       device->memory_trace.num_cpus = num_physical_cores;
302    fclose(cpuinfo_file);
303 
304    FILE *clock_file = fopen(RADV_FTRACE_INSTANCE_PATH "/trace_clock", "w");
305    if (!clock_file) {
306       fprintf(stderr,
307               "radv: Couldn't initialize memory tracing: "
308               "Can't access the tracing control files (%s).\n",
309               strerror(errno));
310       goto error;
311    }
312 
313    fprintf(clock_file, "mono");
314    fclose(clock_file);
315 
316    device->memory_trace.pipe_fds = malloc(device->memory_trace.num_cpus * sizeof(int));
317 
318    if (!device->memory_trace.pipe_fds) {
319       device->memory_trace.num_cpus = 0;
320    }
321    for (uint32_t i = 0; i < device->memory_trace.num_cpus; ++i) {
322       open_trace_pipe(i, device->memory_trace.pipe_fds + i);
323 
324       if (device->memory_trace.pipe_fds[i] == -1) {
325          fprintf(stderr,
326                  "radv: Couldn't initialize memory tracing: "
327                  "Can't access the trace buffer pipes (%s).\n",
328                  strerror(errno));
329          for (i -= 1; i < device->memory_trace.num_cpus; --i) {
330             close(device->memory_trace.pipe_fds[i]);
331          }
332          goto error;
333       }
334    }
335 
336    device->memory_trace.ftrace_update_ptes_id = trace_event_id("amdgpu_vm_update_ptes");
337    if (device->memory_trace.ftrace_update_ptes_id == (uint16_t)~0U) {
338       fprintf(stderr,
339               "radv: Couldn't initialize memory tracing: "
340               "Can't access the trace event ID file (%s).\n",
341               strerror(errno));
342       goto error_pipes;
343    }
344 
345    if (!set_event_tracing_enabled("amdgpu_vm_update_ptes", true)) {
346       fprintf(stderr,
347               "radv: Couldn't initialize memory tracing: "
348               "Can't enable trace events (%s).\n",
349               strerror(errno));
350       goto error_pipes;
351    }
352 
353    fprintf(stderr, "radv: Enabled Memory Trace.\n");
354    return;
355 
356 error_pipes:
357    close_pipe_fds(device);
358 error:
359    vk_memory_trace_finish(&device->vk);
360 #endif
361 }
362 
363 static void
fill_memory_info(const struct radeon_info * gpu_info,struct vk_rmv_memory_info * out_info,int32_t index)364 fill_memory_info(const struct radeon_info *gpu_info, struct vk_rmv_memory_info *out_info, int32_t index)
365 {
366    switch (index) {
367    case VK_RMV_MEMORY_LOCATION_DEVICE:
368       out_info->physical_base_address = 0;
369       out_info->size = gpu_info->all_vram_visible ? (uint64_t)gpu_info->vram_size_kb * 1024ULL
370                                                   : (uint64_t)gpu_info->vram_vis_size_kb * 1024ULL;
371       break;
372    case VK_RMV_MEMORY_LOCATION_DEVICE_INVISIBLE:
373       out_info->physical_base_address = (uint64_t)gpu_info->vram_vis_size_kb * 1024ULL;
374       out_info->size = gpu_info->all_vram_visible ? 0 : (uint64_t)gpu_info->vram_size_kb * 1024ULL;
375       break;
376    case VK_RMV_MEMORY_LOCATION_HOST: {
377       uint64_t ram_size = -1U;
378       os_get_total_physical_memory(&ram_size);
379       out_info->physical_base_address = 0;
380       out_info->size = MIN2((uint64_t)gpu_info->gart_size_kb * 1024ULL, ram_size);
381    } break;
382    default:
383       unreachable("invalid memory index");
384    }
385 }
386 
387 static enum vk_rmv_memory_type
memory_type_from_vram_type(uint32_t vram_type)388 memory_type_from_vram_type(uint32_t vram_type)
389 {
390    switch (vram_type) {
391    case AMD_VRAM_TYPE_UNKNOWN:
392       return VK_RMV_MEMORY_TYPE_UNKNOWN;
393    case AMD_VRAM_TYPE_DDR2:
394       return VK_RMV_MEMORY_TYPE_DDR2;
395    case AMD_VRAM_TYPE_DDR3:
396       return VK_RMV_MEMORY_TYPE_DDR3;
397    case AMD_VRAM_TYPE_DDR4:
398       return VK_RMV_MEMORY_TYPE_DDR4;
399    case AMD_VRAM_TYPE_GDDR5:
400       return VK_RMV_MEMORY_TYPE_GDDR5;
401    case AMD_VRAM_TYPE_HBM:
402       return VK_RMV_MEMORY_TYPE_HBM;
403    case AMD_VRAM_TYPE_GDDR6:
404       return VK_RMV_MEMORY_TYPE_GDDR6;
405    case AMD_VRAM_TYPE_DDR5:
406       return VK_RMV_MEMORY_TYPE_DDR5;
407    case AMD_VRAM_TYPE_LPDDR4:
408       return VK_RMV_MEMORY_TYPE_LPDDR4;
409    case AMD_VRAM_TYPE_LPDDR5:
410       return VK_RMV_MEMORY_TYPE_LPDDR5;
411    default:
412       unreachable("Invalid vram type");
413    }
414 }
415 
416 void
radv_rmv_fill_device_info(const struct radv_physical_device * pdev,struct vk_rmv_device_info * info)417 radv_rmv_fill_device_info(const struct radv_physical_device *pdev, struct vk_rmv_device_info *info)
418 {
419    const struct radeon_info *gpu_info = &pdev->info;
420 
421    for (int32_t i = 0; i < VK_RMV_MEMORY_LOCATION_COUNT; ++i) {
422       fill_memory_info(gpu_info, &info->memory_infos[i], i);
423    }
424 
425    if (gpu_info->marketing_name)
426       strncpy(info->device_name, gpu_info->marketing_name, sizeof(info->device_name) - 1);
427    info->pcie_family_id = gpu_info->family_id;
428    info->pcie_revision_id = gpu_info->pci_rev_id;
429    info->pcie_device_id = gpu_info->pci.dev;
430    info->minimum_shader_clock = 0;
431    info->maximum_shader_clock = gpu_info->max_gpu_freq_mhz;
432    info->vram_type = memory_type_from_vram_type(gpu_info->vram_type);
433    info->vram_bus_width = gpu_info->memory_bus_width;
434    info->vram_operations_per_clock = ac_memory_ops_per_clock(gpu_info->vram_type);
435    info->minimum_memory_clock = 0;
436    info->maximum_memory_clock = gpu_info->memory_freq_mhz;
437    info->vram_bandwidth = gpu_info->memory_bandwidth_gbps;
438 }
439 
440 void
radv_rmv_collect_trace_events(struct radv_device * device)441 radv_rmv_collect_trace_events(struct radv_device *device)
442 {
443    for (uint32_t i = 0; i < device->memory_trace.num_cpus; ++i) {
444       append_trace_events(device, device->memory_trace.pipe_fds[i]);
445    }
446 }
447 
448 void
radv_memory_trace_finish(struct radv_device * device)449 radv_memory_trace_finish(struct radv_device *device)
450 {
451    if (!device->vk.memory_trace_data.is_enabled)
452       return;
453 
454    set_event_tracing_enabled("amdgpu_vm_update_ptes", false);
455    close_pipe_fds(device);
456 }
457 
458 /* The token lock must be held when entering _locked functions */
459 static void
log_resource_bind_locked(struct radv_device * device,uint64_t resource,struct radeon_winsys_bo * bo,uint64_t offset,uint64_t size)460 log_resource_bind_locked(struct radv_device *device, uint64_t resource, struct radeon_winsys_bo *bo, uint64_t offset,
461                          uint64_t size)
462 {
463    struct vk_rmv_resource_bind_token token = {0};
464    token.address = bo->va + offset;
465    token.is_system_memory = bo->initial_domain & RADEON_DOMAIN_GTT;
466    token.size = size;
467    token.resource_id = vk_rmv_get_resource_id_locked(&device->vk, resource);
468 
469    vk_rmv_emit_token(&device->vk.memory_trace_data, VK_RMV_TOKEN_TYPE_RESOURCE_BIND, &token);
470 }
471 
472 void
radv_rmv_log_heap_create(struct radv_device * device,VkDeviceMemory heap,bool is_internal,VkMemoryAllocateFlags alloc_flags)473 radv_rmv_log_heap_create(struct radv_device *device, VkDeviceMemory heap, bool is_internal,
474                          VkMemoryAllocateFlags alloc_flags)
475 {
476    const struct radv_physical_device *pdev = radv_device_physical(device);
477 
478    if (!device->vk.memory_trace_data.is_enabled)
479       return;
480 
481    VK_FROM_HANDLE(radv_device_memory, memory, heap);
482 
483    /* Do not log zero-sized device memory objects. */
484    if (!memory->alloc_size)
485       return;
486 
487    simple_mtx_lock(&device->vk.memory_trace_data.token_mtx);
488 
489    struct vk_rmv_resource_create_token token = {0};
490    token.is_driver_internal = is_internal;
491    token.resource_id = vk_rmv_get_resource_id_locked(&device->vk, (uint64_t)heap);
492    token.type = VK_RMV_RESOURCE_TYPE_HEAP;
493    token.heap.alignment = pdev->info.max_alignment;
494    token.heap.size = memory->alloc_size;
495    token.heap.heap_index = memory->heap_index;
496    token.heap.alloc_flags = alloc_flags;
497 
498    vk_rmv_emit_token(&device->vk.memory_trace_data, VK_RMV_TOKEN_TYPE_RESOURCE_CREATE, &token);
499    log_resource_bind_locked(device, (uint64_t)heap, memory->bo, 0, memory->alloc_size);
500    simple_mtx_unlock(&device->vk.memory_trace_data.token_mtx);
501 }
502 
503 void
radv_rmv_log_bo_allocate(struct radv_device * device,struct radeon_winsys_bo * bo,bool is_internal)504 radv_rmv_log_bo_allocate(struct radv_device *device, struct radeon_winsys_bo *bo, bool is_internal)
505 {
506    const struct radv_physical_device *pdev = radv_device_physical(device);
507 
508    if (!device->vk.memory_trace_data.is_enabled)
509       return;
510 
511    /* RMV doesn't seem to support GDS/OA domains. */
512    if (!(bo->initial_domain & RADEON_DOMAIN_VRAM_GTT))
513       return;
514 
515    struct vk_rmv_virtual_allocate_token token = {0};
516    token.address = bo->va;
517    /* If all VRAM is visible, no bo will be in invisible memory. */
518    token.is_in_invisible_vram = bo->vram_no_cpu_access && !pdev->info.all_vram_visible;
519    token.preferred_domains = (enum vk_rmv_kernel_memory_domain)bo->initial_domain;
520    token.is_driver_internal = is_internal;
521    token.page_count = DIV_ROUND_UP(bo->size, 4096);
522 
523    simple_mtx_lock(&device->vk.memory_trace_data.token_mtx);
524    vk_rmv_emit_token(&device->vk.memory_trace_data, VK_RMV_TOKEN_TYPE_VIRTUAL_ALLOCATE, &token);
525    radv_rmv_collect_trace_events(device);
526    simple_mtx_unlock(&device->vk.memory_trace_data.token_mtx);
527 }
528 
529 void
radv_rmv_log_bo_destroy(struct radv_device * device,struct radeon_winsys_bo * bo)530 radv_rmv_log_bo_destroy(struct radv_device *device, struct radeon_winsys_bo *bo)
531 {
532    if (!device->vk.memory_trace_data.is_enabled)
533       return;
534 
535    /* RMV doesn't seem to support GDS/OA domains. */
536    if (!(bo->initial_domain & RADEON_DOMAIN_VRAM_GTT))
537       return;
538 
539    struct vk_rmv_virtual_free_token token = {0};
540    token.address = bo->va;
541 
542    simple_mtx_lock(&device->vk.memory_trace_data.token_mtx);
543    vk_rmv_emit_token(&device->vk.memory_trace_data, VK_RMV_TOKEN_TYPE_VIRTUAL_FREE, &token);
544    radv_rmv_collect_trace_events(device);
545    simple_mtx_unlock(&device->vk.memory_trace_data.token_mtx);
546 }
547 
548 void
radv_rmv_log_buffer_bind(struct radv_device * device,VkBuffer _buffer)549 radv_rmv_log_buffer_bind(struct radv_device *device, VkBuffer _buffer)
550 {
551    if (!device->vk.memory_trace_data.is_enabled)
552       return;
553 
554    VK_FROM_HANDLE(radv_buffer, buffer, _buffer);
555    simple_mtx_lock(&device->vk.memory_trace_data.token_mtx);
556    log_resource_bind_locked(device, (uint64_t)_buffer, buffer->bo, buffer->offset, buffer->vk.size);
557    simple_mtx_unlock(&device->vk.memory_trace_data.token_mtx);
558 }
559 
560 void
radv_rmv_log_image_create(struct radv_device * device,const VkImageCreateInfo * create_info,bool is_internal,VkImage _image)561 radv_rmv_log_image_create(struct radv_device *device, const VkImageCreateInfo *create_info, bool is_internal,
562                           VkImage _image)
563 {
564    if (!device->vk.memory_trace_data.is_enabled)
565       return;
566 
567    VK_FROM_HANDLE(radv_image, image, _image);
568 
569    simple_mtx_lock(&device->vk.memory_trace_data.token_mtx);
570    struct vk_rmv_resource_create_token token = {0};
571    token.is_driver_internal = is_internal;
572    token.resource_id = vk_rmv_get_resource_id_locked(&device->vk, (uint64_t)_image);
573    token.type = VK_RMV_RESOURCE_TYPE_IMAGE;
574    token.image.create_flags = create_info->flags;
575    token.image.usage_flags = create_info->usage;
576    token.image.type = create_info->imageType;
577    token.image.extent = create_info->extent;
578    token.image.format = create_info->format;
579    token.image.num_mips = create_info->mipLevels;
580    token.image.num_slices = create_info->arrayLayers;
581    token.image.tiling = create_info->tiling;
582    token.image.alignment_log2 = util_logbase2(image->alignment);
583    token.image.log2_samples = util_logbase2(image->vk.samples);
584    token.image.log2_storage_samples = util_logbase2(image->vk.samples);
585    token.image.metadata_alignment_log2 = image->planes[0].surface.meta_alignment_log2;
586    token.image.image_alignment_log2 = image->planes[0].surface.alignment_log2;
587    token.image.size = image->size;
588    token.image.metadata_size = image->planes[0].surface.meta_size;
589    token.image.metadata_header_size = 0;
590    token.image.metadata_offset = image->planes[0].surface.meta_offset;
591    token.image.metadata_header_offset = image->planes[0].surface.meta_offset;
592    token.image.presentable = image->planes[0].surface.is_displayable;
593 
594    vk_rmv_emit_token(&device->vk.memory_trace_data, VK_RMV_TOKEN_TYPE_RESOURCE_CREATE, &token);
595    simple_mtx_unlock(&device->vk.memory_trace_data.token_mtx);
596 }
597 
598 void
radv_rmv_log_image_bind(struct radv_device * device,uint32_t bind_idx,VkImage _image)599 radv_rmv_log_image_bind(struct radv_device *device, uint32_t bind_idx, VkImage _image)
600 {
601    if (!device->vk.memory_trace_data.is_enabled)
602       return;
603 
604    VK_FROM_HANDLE(radv_image, image, _image);
605    simple_mtx_lock(&device->vk.memory_trace_data.token_mtx);
606    log_resource_bind_locked(device, (uint64_t)_image, image->bindings[bind_idx].bo, image->bindings[bind_idx].offset,
607                             image->bindings[bind_idx].range);
608    simple_mtx_unlock(&device->vk.memory_trace_data.token_mtx);
609 }
610 
611 void
radv_rmv_log_query_pool_create(struct radv_device * device,VkQueryPool _pool)612 radv_rmv_log_query_pool_create(struct radv_device *device, VkQueryPool _pool)
613 {
614    if (!device->vk.memory_trace_data.is_enabled)
615       return;
616 
617    VK_FROM_HANDLE(radv_query_pool, pool, _pool);
618 
619    if (pool->vk.query_type != VK_QUERY_TYPE_OCCLUSION && pool->vk.query_type != VK_QUERY_TYPE_PIPELINE_STATISTICS &&
620        pool->vk.query_type != VK_QUERY_TYPE_TRANSFORM_FEEDBACK_STREAM_EXT)
621       return;
622 
623    simple_mtx_lock(&device->vk.memory_trace_data.token_mtx);
624    struct vk_rmv_resource_create_token create_token = {0};
625    create_token.resource_id = vk_rmv_get_resource_id_locked(&device->vk, (uint64_t)_pool);
626    create_token.type = VK_RMV_RESOURCE_TYPE_QUERY_HEAP;
627    create_token.query_pool.type = pool->vk.query_type;
628    create_token.query_pool.has_cpu_access = true;
629 
630    vk_rmv_emit_token(&device->vk.memory_trace_data, VK_RMV_TOKEN_TYPE_RESOURCE_CREATE, &create_token);
631    log_resource_bind_locked(device, (uint64_t)_pool, pool->bo, 0, pool->size);
632    simple_mtx_unlock(&device->vk.memory_trace_data.token_mtx);
633 }
634 
635 void
radv_rmv_log_command_buffer_bo_create(struct radv_device * device,struct radeon_winsys_bo * bo,uint32_t executable_size,uint32_t data_size,uint32_t scratch_size)636 radv_rmv_log_command_buffer_bo_create(struct radv_device *device, struct radeon_winsys_bo *bo, uint32_t executable_size,
637                                       uint32_t data_size, uint32_t scratch_size)
638 {
639    if (!device->vk.memory_trace_data.is_enabled)
640       return;
641 
642    uint64_t upload_resource_identifier = (uint64_t)(uintptr_t)bo;
643 
644    simple_mtx_lock(&device->vk.memory_trace_data.token_mtx);
645    struct vk_rmv_resource_create_token create_token = {0};
646    create_token.is_driver_internal = true;
647    create_token.resource_id = vk_rmv_get_resource_id_locked(&device->vk, upload_resource_identifier);
648    create_token.type = VK_RMV_RESOURCE_TYPE_COMMAND_ALLOCATOR;
649    create_token.command_buffer.preferred_domain = (enum vk_rmv_kernel_memory_domain)device->ws->cs_domain(device->ws);
650    create_token.command_buffer.executable_size = executable_size;
651    create_token.command_buffer.app_available_executable_size = executable_size;
652    create_token.command_buffer.embedded_data_size = data_size;
653    create_token.command_buffer.app_available_embedded_data_size = data_size;
654    create_token.command_buffer.scratch_size = scratch_size;
655    create_token.command_buffer.app_available_scratch_size = scratch_size;
656 
657    vk_rmv_emit_token(&device->vk.memory_trace_data, VK_RMV_TOKEN_TYPE_RESOURCE_CREATE, &create_token);
658    log_resource_bind_locked(device, upload_resource_identifier, bo, 0, bo->size);
659    simple_mtx_unlock(&device->vk.memory_trace_data.token_mtx);
660    vk_rmv_log_cpu_map(&device->vk, bo->va, false);
661 }
662 
663 void
radv_rmv_log_command_buffer_bo_destroy(struct radv_device * device,struct radeon_winsys_bo * bo)664 radv_rmv_log_command_buffer_bo_destroy(struct radv_device *device, struct radeon_winsys_bo *bo)
665 {
666    if (!device->vk.memory_trace_data.is_enabled)
667       return;
668 
669    simple_mtx_lock(&device->vk.memory_trace_data.token_mtx);
670    struct vk_rmv_resource_destroy_token destroy_token = {0};
671    destroy_token.resource_id = vk_rmv_get_resource_id_locked(&device->vk, (uint64_t)(uintptr_t)bo);
672 
673    vk_rmv_emit_token(&device->vk.memory_trace_data, VK_RMV_TOKEN_TYPE_RESOURCE_DESTROY, &destroy_token);
674    vk_rmv_destroy_resource_id_locked(&device->vk, (uint64_t)(uintptr_t)bo);
675    simple_mtx_unlock(&device->vk.memory_trace_data.token_mtx);
676    vk_rmv_log_cpu_map(&device->vk, bo->va, true);
677 }
678 
679 void
radv_rmv_log_border_color_palette_create(struct radv_device * device,struct radeon_winsys_bo * bo)680 radv_rmv_log_border_color_palette_create(struct radv_device *device, struct radeon_winsys_bo *bo)
681 {
682    if (!device->vk.memory_trace_data.is_enabled)
683       return;
684 
685    simple_mtx_lock(&device->vk.memory_trace_data.token_mtx);
686    uint32_t resource_id = vk_rmv_get_resource_id_locked(&device->vk, (uint64_t)(uintptr_t)bo);
687 
688    struct vk_rmv_resource_create_token create_token = {0};
689    create_token.is_driver_internal = true;
690    create_token.resource_id = resource_id;
691    create_token.type = VK_RMV_RESOURCE_TYPE_BORDER_COLOR_PALETTE;
692    /*
693     * We have 4096 entries, but the corresponding RMV token only has 8 bits.
694     */
695    create_token.border_color_palette.num_entries = 255; /* = RADV_BORDER_COLOR_COUNT; */
696 
697    struct vk_rmv_resource_bind_token bind_token;
698    bind_token.address = bo->va;
699    bind_token.is_system_memory = false;
700    bind_token.resource_id = resource_id;
701    bind_token.size = RADV_BORDER_COLOR_BUFFER_SIZE;
702 
703    vk_rmv_emit_token(&device->vk.memory_trace_data, VK_RMV_TOKEN_TYPE_RESOURCE_CREATE, &create_token);
704    vk_rmv_emit_token(&device->vk.memory_trace_data, VK_RMV_TOKEN_TYPE_RESOURCE_BIND, &bind_token);
705    simple_mtx_unlock(&device->vk.memory_trace_data.token_mtx);
706    vk_rmv_log_cpu_map(&device->vk, bo->va, false);
707 }
708 
709 void
radv_rmv_log_border_color_palette_destroy(struct radv_device * device,struct radeon_winsys_bo * bo)710 radv_rmv_log_border_color_palette_destroy(struct radv_device *device, struct radeon_winsys_bo *bo)
711 {
712    if (!device->vk.memory_trace_data.is_enabled)
713       return;
714 
715    simple_mtx_lock(&device->vk.memory_trace_data.token_mtx);
716    struct vk_rmv_resource_destroy_token token = {0};
717    /* same resource id as the create token */
718    token.resource_id = vk_rmv_get_resource_id_locked(&device->vk, (uint64_t)(uintptr_t)bo);
719 
720    vk_rmv_emit_token(&device->vk.memory_trace_data, VK_RMV_TOKEN_TYPE_RESOURCE_DESTROY, &token);
721    simple_mtx_unlock(&device->vk.memory_trace_data.token_mtx);
722    vk_rmv_log_cpu_map(&device->vk, bo->va, true);
723 }
724 
725 void
radv_rmv_log_sparse_add_residency(struct radv_device * device,struct radeon_winsys_bo * src_bo,uint64_t offset)726 radv_rmv_log_sparse_add_residency(struct radv_device *device, struct radeon_winsys_bo *src_bo, uint64_t offset)
727 {
728    if (!device->vk.memory_trace_data.is_enabled)
729       return;
730 
731    struct vk_rmv_resource_reference_token token = {0};
732    token.virtual_address = src_bo->va + offset;
733    token.residency_removed = false;
734 
735    simple_mtx_lock(&device->vk.memory_trace_data.token_mtx);
736    vk_rmv_emit_token(&device->vk.memory_trace_data, VK_RMV_TOKEN_TYPE_RESOURCE_REFERENCE, &token);
737    radv_rmv_collect_trace_events(device);
738    simple_mtx_unlock(&device->vk.memory_trace_data.token_mtx);
739 }
740 
741 void
radv_rmv_log_sparse_remove_residency(struct radv_device * device,struct radeon_winsys_bo * src_bo,uint64_t offset)742 radv_rmv_log_sparse_remove_residency(struct radv_device *device, struct radeon_winsys_bo *src_bo, uint64_t offset)
743 {
744    if (!device->vk.memory_trace_data.is_enabled)
745       return;
746 
747    struct vk_rmv_resource_reference_token token = {0};
748    token.virtual_address = src_bo->va + offset;
749    token.residency_removed = true;
750 
751    simple_mtx_lock(&device->vk.memory_trace_data.token_mtx);
752    vk_rmv_emit_token(&device->vk.memory_trace_data, VK_RMV_TOKEN_TYPE_RESOURCE_REFERENCE, &token);
753    radv_rmv_collect_trace_events(device);
754    simple_mtx_unlock(&device->vk.memory_trace_data.token_mtx);
755 }
756 
757 void
radv_rmv_log_descriptor_pool_create(struct radv_device * device,const VkDescriptorPoolCreateInfo * create_info,VkDescriptorPool _pool)758 radv_rmv_log_descriptor_pool_create(struct radv_device *device, const VkDescriptorPoolCreateInfo *create_info,
759                                     VkDescriptorPool _pool)
760 {
761    if (!device->vk.memory_trace_data.is_enabled)
762       return;
763 
764    VK_FROM_HANDLE(radv_descriptor_pool, pool, _pool);
765 
766    if (pool->bo)
767       vk_rmv_log_cpu_map(&device->vk, pool->bo->va, false);
768 
769    simple_mtx_lock(&device->vk.memory_trace_data.token_mtx);
770    struct vk_rmv_resource_create_token create_token = {0};
771    create_token.is_driver_internal = false;
772    create_token.resource_id = vk_rmv_get_resource_id_locked(&device->vk, (uint64_t)_pool);
773    create_token.type = VK_RMV_RESOURCE_TYPE_DESCRIPTOR_POOL;
774    create_token.descriptor_pool.max_sets = create_info->maxSets;
775    create_token.descriptor_pool.pool_size_count = create_info->poolSizeCount;
776    /* Using vk_rmv_token_pool_alloc frees the allocation automatically when the trace is done. */
777    create_token.descriptor_pool.pool_sizes = malloc(create_info->poolSizeCount * sizeof(VkDescriptorPoolSize));
778    if (!create_token.descriptor_pool.pool_sizes)
779       return;
780 
781    memcpy(create_token.descriptor_pool.pool_sizes, create_info->pPoolSizes,
782           create_info->poolSizeCount * sizeof(VkDescriptorPoolSize));
783 
784    vk_rmv_emit_token(&device->vk.memory_trace_data, VK_RMV_TOKEN_TYPE_RESOURCE_CREATE, &create_token);
785    simple_mtx_unlock(&device->vk.memory_trace_data.token_mtx);
786 
787    if (pool->bo) {
788       simple_mtx_lock(&device->vk.memory_trace_data.token_mtx);
789       struct vk_rmv_resource_bind_token bind_token;
790       bind_token.address = pool->bo->va;
791       bind_token.is_system_memory = false;
792       bind_token.resource_id = vk_rmv_get_resource_id_locked(&device->vk, (uint64_t)_pool);
793       bind_token.size = pool->size;
794 
795       vk_rmv_emit_token(&device->vk.memory_trace_data, VK_RMV_TOKEN_TYPE_RESOURCE_BIND, &bind_token);
796       simple_mtx_unlock(&device->vk.memory_trace_data.token_mtx);
797    }
798 }
799 
800 void
radv_rmv_log_graphics_pipeline_create(struct radv_device * device,struct radv_pipeline * pipeline,bool is_internal)801 radv_rmv_log_graphics_pipeline_create(struct radv_device *device, struct radv_pipeline *pipeline, bool is_internal)
802 {
803    if (!device->vk.memory_trace_data.is_enabled)
804       return;
805 
806    VkPipeline _pipeline = radv_pipeline_to_handle(pipeline);
807    struct radv_graphics_pipeline *graphics_pipeline = radv_pipeline_to_graphics(pipeline);
808 
809    simple_mtx_lock(&device->vk.memory_trace_data.token_mtx);
810    struct vk_rmv_resource_create_token create_token = {0};
811    create_token.is_driver_internal = is_internal;
812    create_token.resource_id = vk_rmv_get_resource_id_locked(&device->vk, (uint64_t)_pipeline);
813    create_token.type = VK_RMV_RESOURCE_TYPE_PIPELINE;
814    create_token.pipeline.is_internal = is_internal;
815    create_token.pipeline.hash_lo = pipeline->pipeline_hash;
816    create_token.pipeline.is_ngg = graphics_pipeline->is_ngg;
817    create_token.pipeline.shader_stages = graphics_pipeline->active_stages;
818 
819    vk_rmv_emit_token(&device->vk.memory_trace_data, VK_RMV_TOKEN_TYPE_RESOURCE_CREATE, &create_token);
820    for (unsigned s = 0; s < MESA_VULKAN_SHADER_STAGES; s++) {
821       struct radv_shader *shader = pipeline->shaders[s];
822 
823       if (!shader)
824          continue;
825 
826       log_resource_bind_locked(device, (uint64_t)_pipeline, shader->bo, shader->alloc->offset, shader->alloc->size);
827    }
828    simple_mtx_unlock(&device->vk.memory_trace_data.token_mtx);
829 }
830 
831 void
radv_rmv_log_compute_pipeline_create(struct radv_device * device,struct radv_pipeline * pipeline,bool is_internal)832 radv_rmv_log_compute_pipeline_create(struct radv_device *device, struct radv_pipeline *pipeline, bool is_internal)
833 {
834    if (!device->vk.memory_trace_data.is_enabled)
835       return;
836 
837    VkPipeline _pipeline = radv_pipeline_to_handle(pipeline);
838 
839    simple_mtx_lock(&device->vk.memory_trace_data.token_mtx);
840    struct vk_rmv_resource_create_token create_token = {0};
841    create_token.is_driver_internal = is_internal;
842    create_token.resource_id = vk_rmv_get_resource_id_locked(&device->vk, (uint64_t)_pipeline);
843    create_token.type = VK_RMV_RESOURCE_TYPE_PIPELINE;
844    create_token.pipeline.is_internal = is_internal;
845    create_token.pipeline.hash_lo = pipeline->pipeline_hash;
846    create_token.pipeline.is_ngg = false;
847    create_token.pipeline.shader_stages = VK_SHADER_STAGE_COMPUTE_BIT;
848 
849    vk_rmv_emit_token(&device->vk.memory_trace_data, VK_RMV_TOKEN_TYPE_RESOURCE_CREATE, &create_token);
850    struct radv_shader *shader = pipeline->shaders[MESA_SHADER_COMPUTE];
851    log_resource_bind_locked(device, (uint64_t)_pipeline, shader->bo, shader->alloc->offset, shader->alloc->size);
852    simple_mtx_unlock(&device->vk.memory_trace_data.token_mtx);
853 }
854 
855 void
radv_rmv_log_rt_pipeline_create(struct radv_device * device,struct radv_ray_tracing_pipeline * pipeline)856 radv_rmv_log_rt_pipeline_create(struct radv_device *device, struct radv_ray_tracing_pipeline *pipeline)
857 {
858    if (!device->vk.memory_trace_data.is_enabled)
859       return;
860 
861    VkPipeline _pipeline = radv_pipeline_to_handle(&pipeline->base.base);
862 
863    struct radv_shader *prolog = pipeline->prolog;
864    struct radv_shader *traversal = pipeline->base.base.shaders[MESA_SHADER_INTERSECTION];
865 
866    VkShaderStageFlagBits active_stages = traversal ? VK_SHADER_STAGE_INTERSECTION_BIT_KHR : 0;
867    if (prolog)
868       active_stages |= VK_SHADER_STAGE_COMPUTE_BIT;
869 
870    for (uint32_t i = 0; i < pipeline->stage_count; i++) {
871       if (pipeline->stages[i].shader)
872          active_stages |= mesa_to_vk_shader_stage(pipeline->stages[i].stage);
873    }
874 
875    simple_mtx_lock(&device->vk.memory_trace_data.token_mtx);
876 
877    struct vk_rmv_resource_create_token create_token = {0};
878    create_token.resource_id = vk_rmv_get_resource_id_locked(&device->vk, (uint64_t)_pipeline);
879    create_token.type = VK_RMV_RESOURCE_TYPE_PIPELINE;
880    create_token.pipeline.hash_lo = pipeline->base.base.pipeline_hash;
881    create_token.pipeline.shader_stages = active_stages;
882    vk_rmv_emit_token(&device->vk.memory_trace_data, VK_RMV_TOKEN_TYPE_RESOURCE_CREATE, &create_token);
883 
884    if (prolog)
885       log_resource_bind_locked(device, (uint64_t)_pipeline, prolog->bo, prolog->alloc->offset, prolog->alloc->size);
886 
887    if (traversal)
888       log_resource_bind_locked(device, (uint64_t)_pipeline, traversal->bo, traversal->alloc->offset,
889                                traversal->alloc->size);
890 
891    for (uint32_t i = 0; i < pipeline->non_imported_stage_count; i++) {
892       struct radv_shader *shader = pipeline->stages[i].shader;
893       if (shader)
894          log_resource_bind_locked(device, (uint64_t)_pipeline, shader->bo, shader->alloc->offset, shader->alloc->size);
895    }
896 
897    simple_mtx_unlock(&device->vk.memory_trace_data.token_mtx);
898 }
899 
900 void
radv_rmv_log_event_create(struct radv_device * device,VkEvent _event,VkEventCreateFlags flags,bool is_internal)901 radv_rmv_log_event_create(struct radv_device *device, VkEvent _event, VkEventCreateFlags flags, bool is_internal)
902 {
903    if (!device->vk.memory_trace_data.is_enabled)
904       return;
905 
906    VK_FROM_HANDLE(radv_event, event, _event);
907 
908    simple_mtx_lock(&device->vk.memory_trace_data.token_mtx);
909    struct vk_rmv_resource_create_token create_token = {0};
910    create_token.is_driver_internal = is_internal;
911    create_token.type = VK_RMV_RESOURCE_TYPE_GPU_EVENT;
912    create_token.event.flags = flags;
913    create_token.resource_id = vk_rmv_get_resource_id_locked(&device->vk, (uint64_t)_event);
914 
915    vk_rmv_emit_token(&device->vk.memory_trace_data, VK_RMV_TOKEN_TYPE_RESOURCE_CREATE, &create_token);
916    log_resource_bind_locked(device, (uint64_t)_event, event->bo, 0, 8);
917    simple_mtx_unlock(&device->vk.memory_trace_data.token_mtx);
918 
919    if (event->map)
920       vk_rmv_log_cpu_map(&device->vk, event->bo->va, false);
921 }
922 
923 void
radv_rmv_log_submit(struct radv_device * device,enum amd_ip_type type)924 radv_rmv_log_submit(struct radv_device *device, enum amd_ip_type type)
925 {
926    if (!device->vk.memory_trace_data.is_enabled)
927       return;
928 
929    switch (type) {
930    case AMD_IP_GFX:
931       vk_rmv_log_misc_token(&device->vk, VK_RMV_MISC_EVENT_TYPE_SUBMIT_GRAPHICS);
932       break;
933    case AMD_IP_COMPUTE:
934       vk_rmv_log_misc_token(&device->vk, VK_RMV_MISC_EVENT_TYPE_SUBMIT_COMPUTE);
935       break;
936    case AMD_IP_SDMA:
937       vk_rmv_log_misc_token(&device->vk, VK_RMV_MISC_EVENT_TYPE_SUBMIT_COPY);
938       break;
939    default:
940       unreachable("invalid ip type");
941    }
942 }
943 
944 void
radv_rmv_log_resource_destroy(struct radv_device * device,uint64_t handle)945 radv_rmv_log_resource_destroy(struct radv_device *device, uint64_t handle)
946 {
947    if (!device->vk.memory_trace_data.is_enabled || handle == 0)
948       return;
949 
950    simple_mtx_lock(&device->vk.memory_trace_data.token_mtx);
951    struct vk_rmv_resource_destroy_token token = {0};
952    token.resource_id = vk_rmv_get_resource_id_locked(&device->vk, handle);
953 
954    vk_rmv_emit_token(&device->vk.memory_trace_data, VK_RMV_TOKEN_TYPE_RESOURCE_DESTROY, &token);
955    vk_rmv_destroy_resource_id_locked(&device->vk, handle);
956    simple_mtx_unlock(&device->vk.memory_trace_data.token_mtx);
957 }
958