1 /*
2 * Copyright © 2022 Friedrich Vock
3 *
4 * SPDX-License-Identifier: MIT
5 */
6
7 #ifndef _WIN32
8 #include <dirent.h>
9 #include <unistd.h>
10 #endif
11 #include <errno.h>
12 #include <fcntl.h>
13 #include <stdio.h>
14 #include <stdlib.h>
15 #include "ac_gpu_info.h"
16 #include "radv_buffer.h"
17 #include "radv_descriptor_set.h"
18 #include "radv_device_memory.h"
19 #include "radv_event.h"
20 #include "radv_image.h"
21 #include "radv_pipeline_graphics.h"
22 #include "radv_pipeline_rt.h"
23 #include "radv_query.h"
24 #include "radv_rmv.h"
25
26 #define RADV_FTRACE_INSTANCE_PATH "/sys/kernel/tracing/instances/amd_rmv"
27
28 static FILE *
open_event_file(const char * event_name,const char * event_filename,const char * mode)29 open_event_file(const char *event_name, const char *event_filename, const char *mode)
30 {
31 char filename[2048];
32 snprintf(filename, sizeof(filename), RADV_FTRACE_INSTANCE_PATH "/events/amdgpu/%s/%s", event_name, event_filename);
33 return fopen(filename, mode);
34 }
35
36 static bool
set_event_tracing_enabled(const char * event_name,bool enabled)37 set_event_tracing_enabled(const char *event_name, bool enabled)
38 {
39 FILE *file = open_event_file(event_name, "enable", "w");
40 if (!file)
41 return false;
42
43 size_t written_bytes = fwrite("1", 1, 1, file);
44 fclose(file);
45 return written_bytes == 1;
46 }
47
48 static uint16_t
trace_event_id(const char * event_name)49 trace_event_id(const char *event_name)
50 {
51 /* id is 16-bit, so <= 65535 */
52 char data[6];
53
54 FILE *file = open_event_file(event_name, "id", "r");
55 if (!file)
56 return (uint16_t)~0;
57
58 size_t read_bytes = fread(data, 1, 6, file);
59 fclose(file);
60
61 if (!read_bytes)
62 return (uint16_t)~0;
63
64 return (uint16_t)strtoul(data, NULL, 10);
65 }
66
67 static void
open_trace_pipe(uint32_t cpu_index,int * dst_fd)68 open_trace_pipe(uint32_t cpu_index, int *dst_fd)
69 {
70 #ifdef _WIN32
71 *dst_fd = -1;
72 #else
73 char filename[2048];
74 snprintf(filename, sizeof(filename), RADV_FTRACE_INSTANCE_PATH "/per_cpu/cpu%d/trace_pipe_raw", cpu_index);
75 /* I/O to the pipe needs to be non-blocking, otherwise reading all available
76 * data would block indefinitely by waiting for more data to be written to the pipe */
77 *dst_fd = open(filename, O_RDONLY | O_NONBLOCK);
78 #endif
79 }
80
81 /*
82 * Kernel trace buffer parsing
83 */
84
85 struct trace_page_header {
86 uint64_t timestamp;
87 int32_t commit;
88 };
89
90 enum trace_event_type { TRACE_EVENT_TYPE_PADDING = 29, TRACE_EVENT_TYPE_EXTENDED_DELTA, TRACE_EVENT_TYPE_TIMESTAMP };
91
92 struct trace_event_header {
93 uint32_t type_len : 5;
94 uint32_t time_delta : 27;
95 /* Only present if length is too big for type_len */
96 uint32_t excess_length;
97 };
98
99 struct trace_event_common {
100 unsigned short type;
101 unsigned char flags;
102 unsigned char preempt_count;
103 int pid;
104 };
105
106 struct trace_event_amdgpu_vm_update_ptes {
107 struct trace_event_common common;
108 uint64_t start;
109 uint64_t end;
110 uint64_t flags;
111 unsigned int num_ptes;
112 uint64_t incr;
113 int pid;
114 uint64_t vm_ctx;
115 };
116
117 /* Represents a dynamic array of addresses in the ftrace buffer. */
118 struct trace_event_address_array {
119 uint16_t data_size;
120 uint16_t reserved;
121 char data[];
122 };
123
124 /* Possible flags for PTEs, taken from amdgpu_vm.h */
125 #define AMDGPU_PTE_VALID (1ULL << 0)
126 #define AMDGPU_PTE_SYSTEM (1ULL << 1)
127 #define AMDGPU_PTE_PRT (1ULL << 51)
128
129 /* The minimum size of a GPU page */
130 #define MIN_GPU_PAGE_SIZE 4096
131
132 static void
emit_page_table_update_event(struct vk_memory_trace_data * data,bool is_apu,uint64_t timestamp,struct trace_event_amdgpu_vm_update_ptes * event,uint64_t * addrs,unsigned int pte_index)133 emit_page_table_update_event(struct vk_memory_trace_data *data, bool is_apu, uint64_t timestamp,
134 struct trace_event_amdgpu_vm_update_ptes *event, uint64_t *addrs, unsigned int pte_index)
135 {
136 struct vk_rmv_token token;
137
138 uint64_t end_addr;
139 /* There may be more updated PTEs than the ones reported in the ftrace buffer.
140 * We choose the reported end virtual address here to report the correct total committed memory. */
141 if (pte_index == event->num_ptes - 1)
142 end_addr = event->end;
143 else
144 end_addr = event->start + (pte_index + 1) * (event->incr / MIN_GPU_PAGE_SIZE);
145 uint64_t start_addr = event->start + pte_index * (event->incr / MIN_GPU_PAGE_SIZE);
146
147 token.type = VK_RMV_TOKEN_TYPE_PAGE_TABLE_UPDATE;
148 token.timestamp = timestamp;
149 token.data.page_table_update.type = VK_RMV_PAGE_TABLE_UPDATE_TYPE_UPDATE;
150 token.data.page_table_update.page_size = event->incr;
151 token.data.page_table_update.page_count = (end_addr - start_addr) * MIN_GPU_PAGE_SIZE / event->incr;
152 token.data.page_table_update.pid = event->common.pid;
153 token.data.page_table_update.virtual_address = event->start * MIN_GPU_PAGE_SIZE + pte_index * event->incr;
154 /* RMV expects mappings to system memory to have a physical address of 0.
155 * Even with traces generated by AMDGPU-PRO, on APUs without dedicated VRAM everything seems to
156 * be marked as "committed to system memory". */
157 token.data.page_table_update.physical_address = event->flags & AMDGPU_PTE_SYSTEM || is_apu ? 0 : addrs[pte_index];
158
159 token.data.page_table_update.is_unmap = !(event->flags & (AMDGPU_PTE_VALID | AMDGPU_PTE_PRT));
160 util_dynarray_append(&data->tokens, struct vk_rmv_token, token);
161 }
162
163 static void
evaluate_trace_event(struct radv_device * device,uint64_t timestamp,struct util_dynarray * tokens,struct trace_event_amdgpu_vm_update_ptes * event)164 evaluate_trace_event(struct radv_device *device, uint64_t timestamp, struct util_dynarray *tokens,
165 struct trace_event_amdgpu_vm_update_ptes *event)
166 {
167 const struct radv_physical_device *pdev = radv_device_physical(device);
168
169 if (event->common.pid != getpid() && event->pid != getpid()) {
170 return;
171 }
172
173 struct trace_event_address_array *array = (struct trace_event_address_array *)(event + 1);
174
175 for (uint32_t i = 0; i < event->num_ptes; ++i)
176 emit_page_table_update_event(&device->vk.memory_trace_data, !pdev->info.has_dedicated_vram, timestamp, event,
177 (uint64_t *)array->data, i);
178 }
179
180 static void
append_trace_events(struct radv_device * device,int pipe_fd)181 append_trace_events(struct radv_device *device, int pipe_fd)
182 {
183 /* Assuming 4KB if os_get_page_size fails. */
184 uint64_t page_size = 4096;
185 os_get_page_size(&page_size);
186
187 uint64_t timestamp;
188
189 /*
190 * Parse the trace ring buffer page by page.
191 */
192 char *page = (char *)malloc(page_size);
193 if (!page) {
194 return;
195 }
196 int64_t read_bytes;
197 do {
198 read_bytes = (int64_t)read(pipe_fd, page, page_size);
199 if (read_bytes < (int64_t)sizeof(struct trace_page_header))
200 break;
201
202 struct trace_page_header *page_header = (struct trace_page_header *)page;
203 timestamp = page_header->timestamp;
204
205 size_t data_size = MIN2((size_t)read_bytes, (size_t)page_header->commit);
206
207 char *read_ptr = page + sizeof(struct trace_page_header);
208 while (read_ptr - page < data_size) {
209 struct trace_event_header *event_header = (struct trace_event_header *)read_ptr;
210 read_ptr += sizeof(struct trace_event_header);
211
212 /* Handle special event type, see include/linux/ring_buffer.h in the
213 * kernel source */
214 switch (event_header->type_len) {
215 case TRACE_EVENT_TYPE_PADDING:
216 if (event_header->time_delta) {
217 /* Specified size, skip past padding */
218 read_ptr += event_header->excess_length;
219 timestamp += event_header->time_delta;
220 continue;
221 } else {
222 /* Padding is until end of page, skip until next page */
223 read_ptr = page + data_size;
224 continue;
225 }
226 case TRACE_EVENT_TYPE_EXTENDED_DELTA:
227 timestamp += event_header->time_delta;
228 timestamp += (uint64_t)event_header->excess_length << 27ULL;
229 continue;
230 case TRACE_EVENT_TYPE_TIMESTAMP:
231 timestamp = event_header->time_delta;
232 timestamp |= (uint64_t)event_header->excess_length << 27ULL;
233 continue;
234 default:
235 break;
236 }
237
238 timestamp += event_header->time_delta;
239
240 /* If type_len is not one of the special types and not zero, it is
241 * the data length / 4. */
242 size_t length;
243 struct trace_event_common *event;
244 if (event_header->type_len) {
245 length = event_header->type_len * 4 + 4;
246 /* The length variable already contains event data in this case.
247 */
248 event = (struct trace_event_common *)&event_header->excess_length;
249 } else {
250 length = event_header->excess_length + 4;
251 event = (struct trace_event_common *)read_ptr;
252 }
253
254 if (event->type == device->memory_trace.ftrace_update_ptes_id)
255 evaluate_trace_event(device, timestamp, &device->vk.memory_trace_data.tokens,
256 (struct trace_event_amdgpu_vm_update_ptes *)event);
257
258 read_ptr += length - sizeof(struct trace_event_header);
259 }
260 } while (true);
261
262 free(page);
263 }
264
265 static void
close_pipe_fds(struct radv_device * device)266 close_pipe_fds(struct radv_device *device)
267 {
268 for (uint32_t i = 0; i < device->memory_trace.num_cpus; ++i) {
269 close(device->memory_trace.pipe_fds[i]);
270 }
271 }
272
273 void
radv_memory_trace_init(struct radv_device * device)274 radv_memory_trace_init(struct radv_device *device)
275 {
276 #ifndef _WIN32
277 DIR *dir = opendir(RADV_FTRACE_INSTANCE_PATH);
278 if (!dir) {
279 fprintf(stderr,
280 "radv: Couldn't initialize memory tracing: "
281 "Can't access the tracing instance directory (%s)\n",
282 strerror(errno));
283 goto error;
284 }
285 closedir(dir);
286
287 device->memory_trace.num_cpus = 0;
288
289 char cpuinfo_line[1024];
290 FILE *cpuinfo_file = fopen("/proc/cpuinfo", "r");
291 uint32_t num_physical_cores;
292 while (fgets(cpuinfo_line, sizeof(cpuinfo_line), cpuinfo_file)) {
293 char *logical_core_string = strstr(cpuinfo_line, "siblings");
294 if (logical_core_string)
295 sscanf(logical_core_string, "siblings : %d", &device->memory_trace.num_cpus);
296 char *physical_core_string = strstr(cpuinfo_line, "cpu cores");
297 if (physical_core_string)
298 sscanf(physical_core_string, "cpu cores : %d", &num_physical_cores);
299 }
300 if (!device->memory_trace.num_cpus)
301 device->memory_trace.num_cpus = num_physical_cores;
302 fclose(cpuinfo_file);
303
304 FILE *clock_file = fopen(RADV_FTRACE_INSTANCE_PATH "/trace_clock", "w");
305 if (!clock_file) {
306 fprintf(stderr,
307 "radv: Couldn't initialize memory tracing: "
308 "Can't access the tracing control files (%s).\n",
309 strerror(errno));
310 goto error;
311 }
312
313 fprintf(clock_file, "mono");
314 fclose(clock_file);
315
316 device->memory_trace.pipe_fds = malloc(device->memory_trace.num_cpus * sizeof(int));
317
318 if (!device->memory_trace.pipe_fds) {
319 device->memory_trace.num_cpus = 0;
320 }
321 for (uint32_t i = 0; i < device->memory_trace.num_cpus; ++i) {
322 open_trace_pipe(i, device->memory_trace.pipe_fds + i);
323
324 if (device->memory_trace.pipe_fds[i] == -1) {
325 fprintf(stderr,
326 "radv: Couldn't initialize memory tracing: "
327 "Can't access the trace buffer pipes (%s).\n",
328 strerror(errno));
329 for (i -= 1; i < device->memory_trace.num_cpus; --i) {
330 close(device->memory_trace.pipe_fds[i]);
331 }
332 goto error;
333 }
334 }
335
336 device->memory_trace.ftrace_update_ptes_id = trace_event_id("amdgpu_vm_update_ptes");
337 if (device->memory_trace.ftrace_update_ptes_id == (uint16_t)~0U) {
338 fprintf(stderr,
339 "radv: Couldn't initialize memory tracing: "
340 "Can't access the trace event ID file (%s).\n",
341 strerror(errno));
342 goto error_pipes;
343 }
344
345 if (!set_event_tracing_enabled("amdgpu_vm_update_ptes", true)) {
346 fprintf(stderr,
347 "radv: Couldn't initialize memory tracing: "
348 "Can't enable trace events (%s).\n",
349 strerror(errno));
350 goto error_pipes;
351 }
352
353 fprintf(stderr, "radv: Enabled Memory Trace.\n");
354 return;
355
356 error_pipes:
357 close_pipe_fds(device);
358 error:
359 vk_memory_trace_finish(&device->vk);
360 #endif
361 }
362
363 static void
fill_memory_info(const struct radeon_info * gpu_info,struct vk_rmv_memory_info * out_info,int32_t index)364 fill_memory_info(const struct radeon_info *gpu_info, struct vk_rmv_memory_info *out_info, int32_t index)
365 {
366 switch (index) {
367 case VK_RMV_MEMORY_LOCATION_DEVICE:
368 out_info->physical_base_address = 0;
369 out_info->size = gpu_info->all_vram_visible ? (uint64_t)gpu_info->vram_size_kb * 1024ULL
370 : (uint64_t)gpu_info->vram_vis_size_kb * 1024ULL;
371 break;
372 case VK_RMV_MEMORY_LOCATION_DEVICE_INVISIBLE:
373 out_info->physical_base_address = (uint64_t)gpu_info->vram_vis_size_kb * 1024ULL;
374 out_info->size = gpu_info->all_vram_visible ? 0 : (uint64_t)gpu_info->vram_size_kb * 1024ULL;
375 break;
376 case VK_RMV_MEMORY_LOCATION_HOST: {
377 uint64_t ram_size = -1U;
378 os_get_total_physical_memory(&ram_size);
379 out_info->physical_base_address = 0;
380 out_info->size = MIN2((uint64_t)gpu_info->gart_size_kb * 1024ULL, ram_size);
381 } break;
382 default:
383 unreachable("invalid memory index");
384 }
385 }
386
387 static enum vk_rmv_memory_type
memory_type_from_vram_type(uint32_t vram_type)388 memory_type_from_vram_type(uint32_t vram_type)
389 {
390 switch (vram_type) {
391 case AMD_VRAM_TYPE_UNKNOWN:
392 return VK_RMV_MEMORY_TYPE_UNKNOWN;
393 case AMD_VRAM_TYPE_DDR2:
394 return VK_RMV_MEMORY_TYPE_DDR2;
395 case AMD_VRAM_TYPE_DDR3:
396 return VK_RMV_MEMORY_TYPE_DDR3;
397 case AMD_VRAM_TYPE_DDR4:
398 return VK_RMV_MEMORY_TYPE_DDR4;
399 case AMD_VRAM_TYPE_GDDR5:
400 return VK_RMV_MEMORY_TYPE_GDDR5;
401 case AMD_VRAM_TYPE_HBM:
402 return VK_RMV_MEMORY_TYPE_HBM;
403 case AMD_VRAM_TYPE_GDDR6:
404 return VK_RMV_MEMORY_TYPE_GDDR6;
405 case AMD_VRAM_TYPE_DDR5:
406 return VK_RMV_MEMORY_TYPE_DDR5;
407 case AMD_VRAM_TYPE_LPDDR4:
408 return VK_RMV_MEMORY_TYPE_LPDDR4;
409 case AMD_VRAM_TYPE_LPDDR5:
410 return VK_RMV_MEMORY_TYPE_LPDDR5;
411 default:
412 unreachable("Invalid vram type");
413 }
414 }
415
416 void
radv_rmv_fill_device_info(const struct radv_physical_device * pdev,struct vk_rmv_device_info * info)417 radv_rmv_fill_device_info(const struct radv_physical_device *pdev, struct vk_rmv_device_info *info)
418 {
419 const struct radeon_info *gpu_info = &pdev->info;
420
421 for (int32_t i = 0; i < VK_RMV_MEMORY_LOCATION_COUNT; ++i) {
422 fill_memory_info(gpu_info, &info->memory_infos[i], i);
423 }
424
425 if (gpu_info->marketing_name)
426 strncpy(info->device_name, gpu_info->marketing_name, sizeof(info->device_name) - 1);
427 info->pcie_family_id = gpu_info->family_id;
428 info->pcie_revision_id = gpu_info->pci_rev_id;
429 info->pcie_device_id = gpu_info->pci.dev;
430 info->minimum_shader_clock = 0;
431 info->maximum_shader_clock = gpu_info->max_gpu_freq_mhz;
432 info->vram_type = memory_type_from_vram_type(gpu_info->vram_type);
433 info->vram_bus_width = gpu_info->memory_bus_width;
434 info->vram_operations_per_clock = ac_memory_ops_per_clock(gpu_info->vram_type);
435 info->minimum_memory_clock = 0;
436 info->maximum_memory_clock = gpu_info->memory_freq_mhz;
437 info->vram_bandwidth = gpu_info->memory_bandwidth_gbps;
438 }
439
440 void
radv_rmv_collect_trace_events(struct radv_device * device)441 radv_rmv_collect_trace_events(struct radv_device *device)
442 {
443 for (uint32_t i = 0; i < device->memory_trace.num_cpus; ++i) {
444 append_trace_events(device, device->memory_trace.pipe_fds[i]);
445 }
446 }
447
448 void
radv_memory_trace_finish(struct radv_device * device)449 radv_memory_trace_finish(struct radv_device *device)
450 {
451 if (!device->vk.memory_trace_data.is_enabled)
452 return;
453
454 set_event_tracing_enabled("amdgpu_vm_update_ptes", false);
455 close_pipe_fds(device);
456 }
457
458 /* The token lock must be held when entering _locked functions */
459 static void
log_resource_bind_locked(struct radv_device * device,uint64_t resource,struct radeon_winsys_bo * bo,uint64_t offset,uint64_t size)460 log_resource_bind_locked(struct radv_device *device, uint64_t resource, struct radeon_winsys_bo *bo, uint64_t offset,
461 uint64_t size)
462 {
463 struct vk_rmv_resource_bind_token token = {0};
464 token.address = bo->va + offset;
465 token.is_system_memory = bo->initial_domain & RADEON_DOMAIN_GTT;
466 token.size = size;
467 token.resource_id = vk_rmv_get_resource_id_locked(&device->vk, resource);
468
469 vk_rmv_emit_token(&device->vk.memory_trace_data, VK_RMV_TOKEN_TYPE_RESOURCE_BIND, &token);
470 }
471
472 void
radv_rmv_log_heap_create(struct radv_device * device,VkDeviceMemory heap,bool is_internal,VkMemoryAllocateFlags alloc_flags)473 radv_rmv_log_heap_create(struct radv_device *device, VkDeviceMemory heap, bool is_internal,
474 VkMemoryAllocateFlags alloc_flags)
475 {
476 const struct radv_physical_device *pdev = radv_device_physical(device);
477
478 if (!device->vk.memory_trace_data.is_enabled)
479 return;
480
481 VK_FROM_HANDLE(radv_device_memory, memory, heap);
482
483 /* Do not log zero-sized device memory objects. */
484 if (!memory->alloc_size)
485 return;
486
487 simple_mtx_lock(&device->vk.memory_trace_data.token_mtx);
488
489 struct vk_rmv_resource_create_token token = {0};
490 token.is_driver_internal = is_internal;
491 token.resource_id = vk_rmv_get_resource_id_locked(&device->vk, (uint64_t)heap);
492 token.type = VK_RMV_RESOURCE_TYPE_HEAP;
493 token.heap.alignment = pdev->info.max_alignment;
494 token.heap.size = memory->alloc_size;
495 token.heap.heap_index = memory->heap_index;
496 token.heap.alloc_flags = alloc_flags;
497
498 vk_rmv_emit_token(&device->vk.memory_trace_data, VK_RMV_TOKEN_TYPE_RESOURCE_CREATE, &token);
499 log_resource_bind_locked(device, (uint64_t)heap, memory->bo, 0, memory->alloc_size);
500 simple_mtx_unlock(&device->vk.memory_trace_data.token_mtx);
501 }
502
503 void
radv_rmv_log_bo_allocate(struct radv_device * device,struct radeon_winsys_bo * bo,bool is_internal)504 radv_rmv_log_bo_allocate(struct radv_device *device, struct radeon_winsys_bo *bo, bool is_internal)
505 {
506 const struct radv_physical_device *pdev = radv_device_physical(device);
507
508 if (!device->vk.memory_trace_data.is_enabled)
509 return;
510
511 /* RMV doesn't seem to support GDS/OA domains. */
512 if (!(bo->initial_domain & RADEON_DOMAIN_VRAM_GTT))
513 return;
514
515 struct vk_rmv_virtual_allocate_token token = {0};
516 token.address = bo->va;
517 /* If all VRAM is visible, no bo will be in invisible memory. */
518 token.is_in_invisible_vram = bo->vram_no_cpu_access && !pdev->info.all_vram_visible;
519 token.preferred_domains = (enum vk_rmv_kernel_memory_domain)bo->initial_domain;
520 token.is_driver_internal = is_internal;
521 token.page_count = DIV_ROUND_UP(bo->size, 4096);
522
523 simple_mtx_lock(&device->vk.memory_trace_data.token_mtx);
524 vk_rmv_emit_token(&device->vk.memory_trace_data, VK_RMV_TOKEN_TYPE_VIRTUAL_ALLOCATE, &token);
525 radv_rmv_collect_trace_events(device);
526 simple_mtx_unlock(&device->vk.memory_trace_data.token_mtx);
527 }
528
529 void
radv_rmv_log_bo_destroy(struct radv_device * device,struct radeon_winsys_bo * bo)530 radv_rmv_log_bo_destroy(struct radv_device *device, struct radeon_winsys_bo *bo)
531 {
532 if (!device->vk.memory_trace_data.is_enabled)
533 return;
534
535 /* RMV doesn't seem to support GDS/OA domains. */
536 if (!(bo->initial_domain & RADEON_DOMAIN_VRAM_GTT))
537 return;
538
539 struct vk_rmv_virtual_free_token token = {0};
540 token.address = bo->va;
541
542 simple_mtx_lock(&device->vk.memory_trace_data.token_mtx);
543 vk_rmv_emit_token(&device->vk.memory_trace_data, VK_RMV_TOKEN_TYPE_VIRTUAL_FREE, &token);
544 radv_rmv_collect_trace_events(device);
545 simple_mtx_unlock(&device->vk.memory_trace_data.token_mtx);
546 }
547
548 void
radv_rmv_log_buffer_bind(struct radv_device * device,VkBuffer _buffer)549 radv_rmv_log_buffer_bind(struct radv_device *device, VkBuffer _buffer)
550 {
551 if (!device->vk.memory_trace_data.is_enabled)
552 return;
553
554 VK_FROM_HANDLE(radv_buffer, buffer, _buffer);
555 simple_mtx_lock(&device->vk.memory_trace_data.token_mtx);
556 log_resource_bind_locked(device, (uint64_t)_buffer, buffer->bo, buffer->offset, buffer->vk.size);
557 simple_mtx_unlock(&device->vk.memory_trace_data.token_mtx);
558 }
559
560 void
radv_rmv_log_image_create(struct radv_device * device,const VkImageCreateInfo * create_info,bool is_internal,VkImage _image)561 radv_rmv_log_image_create(struct radv_device *device, const VkImageCreateInfo *create_info, bool is_internal,
562 VkImage _image)
563 {
564 if (!device->vk.memory_trace_data.is_enabled)
565 return;
566
567 VK_FROM_HANDLE(radv_image, image, _image);
568
569 simple_mtx_lock(&device->vk.memory_trace_data.token_mtx);
570 struct vk_rmv_resource_create_token token = {0};
571 token.is_driver_internal = is_internal;
572 token.resource_id = vk_rmv_get_resource_id_locked(&device->vk, (uint64_t)_image);
573 token.type = VK_RMV_RESOURCE_TYPE_IMAGE;
574 token.image.create_flags = create_info->flags;
575 token.image.usage_flags = create_info->usage;
576 token.image.type = create_info->imageType;
577 token.image.extent = create_info->extent;
578 token.image.format = create_info->format;
579 token.image.num_mips = create_info->mipLevels;
580 token.image.num_slices = create_info->arrayLayers;
581 token.image.tiling = create_info->tiling;
582 token.image.alignment_log2 = util_logbase2(image->alignment);
583 token.image.log2_samples = util_logbase2(image->vk.samples);
584 token.image.log2_storage_samples = util_logbase2(image->vk.samples);
585 token.image.metadata_alignment_log2 = image->planes[0].surface.meta_alignment_log2;
586 token.image.image_alignment_log2 = image->planes[0].surface.alignment_log2;
587 token.image.size = image->size;
588 token.image.metadata_size = image->planes[0].surface.meta_size;
589 token.image.metadata_header_size = 0;
590 token.image.metadata_offset = image->planes[0].surface.meta_offset;
591 token.image.metadata_header_offset = image->planes[0].surface.meta_offset;
592 token.image.presentable = image->planes[0].surface.is_displayable;
593
594 vk_rmv_emit_token(&device->vk.memory_trace_data, VK_RMV_TOKEN_TYPE_RESOURCE_CREATE, &token);
595 simple_mtx_unlock(&device->vk.memory_trace_data.token_mtx);
596 }
597
598 void
radv_rmv_log_image_bind(struct radv_device * device,uint32_t bind_idx,VkImage _image)599 radv_rmv_log_image_bind(struct radv_device *device, uint32_t bind_idx, VkImage _image)
600 {
601 if (!device->vk.memory_trace_data.is_enabled)
602 return;
603
604 VK_FROM_HANDLE(radv_image, image, _image);
605 simple_mtx_lock(&device->vk.memory_trace_data.token_mtx);
606 log_resource_bind_locked(device, (uint64_t)_image, image->bindings[bind_idx].bo, image->bindings[bind_idx].offset,
607 image->bindings[bind_idx].range);
608 simple_mtx_unlock(&device->vk.memory_trace_data.token_mtx);
609 }
610
611 void
radv_rmv_log_query_pool_create(struct radv_device * device,VkQueryPool _pool)612 radv_rmv_log_query_pool_create(struct radv_device *device, VkQueryPool _pool)
613 {
614 if (!device->vk.memory_trace_data.is_enabled)
615 return;
616
617 VK_FROM_HANDLE(radv_query_pool, pool, _pool);
618
619 if (pool->vk.query_type != VK_QUERY_TYPE_OCCLUSION && pool->vk.query_type != VK_QUERY_TYPE_PIPELINE_STATISTICS &&
620 pool->vk.query_type != VK_QUERY_TYPE_TRANSFORM_FEEDBACK_STREAM_EXT)
621 return;
622
623 simple_mtx_lock(&device->vk.memory_trace_data.token_mtx);
624 struct vk_rmv_resource_create_token create_token = {0};
625 create_token.resource_id = vk_rmv_get_resource_id_locked(&device->vk, (uint64_t)_pool);
626 create_token.type = VK_RMV_RESOURCE_TYPE_QUERY_HEAP;
627 create_token.query_pool.type = pool->vk.query_type;
628 create_token.query_pool.has_cpu_access = true;
629
630 vk_rmv_emit_token(&device->vk.memory_trace_data, VK_RMV_TOKEN_TYPE_RESOURCE_CREATE, &create_token);
631 log_resource_bind_locked(device, (uint64_t)_pool, pool->bo, 0, pool->size);
632 simple_mtx_unlock(&device->vk.memory_trace_data.token_mtx);
633 }
634
635 void
radv_rmv_log_command_buffer_bo_create(struct radv_device * device,struct radeon_winsys_bo * bo,uint32_t executable_size,uint32_t data_size,uint32_t scratch_size)636 radv_rmv_log_command_buffer_bo_create(struct radv_device *device, struct radeon_winsys_bo *bo, uint32_t executable_size,
637 uint32_t data_size, uint32_t scratch_size)
638 {
639 if (!device->vk.memory_trace_data.is_enabled)
640 return;
641
642 uint64_t upload_resource_identifier = (uint64_t)(uintptr_t)bo;
643
644 simple_mtx_lock(&device->vk.memory_trace_data.token_mtx);
645 struct vk_rmv_resource_create_token create_token = {0};
646 create_token.is_driver_internal = true;
647 create_token.resource_id = vk_rmv_get_resource_id_locked(&device->vk, upload_resource_identifier);
648 create_token.type = VK_RMV_RESOURCE_TYPE_COMMAND_ALLOCATOR;
649 create_token.command_buffer.preferred_domain = (enum vk_rmv_kernel_memory_domain)device->ws->cs_domain(device->ws);
650 create_token.command_buffer.executable_size = executable_size;
651 create_token.command_buffer.app_available_executable_size = executable_size;
652 create_token.command_buffer.embedded_data_size = data_size;
653 create_token.command_buffer.app_available_embedded_data_size = data_size;
654 create_token.command_buffer.scratch_size = scratch_size;
655 create_token.command_buffer.app_available_scratch_size = scratch_size;
656
657 vk_rmv_emit_token(&device->vk.memory_trace_data, VK_RMV_TOKEN_TYPE_RESOURCE_CREATE, &create_token);
658 log_resource_bind_locked(device, upload_resource_identifier, bo, 0, bo->size);
659 simple_mtx_unlock(&device->vk.memory_trace_data.token_mtx);
660 vk_rmv_log_cpu_map(&device->vk, bo->va, false);
661 }
662
663 void
radv_rmv_log_command_buffer_bo_destroy(struct radv_device * device,struct radeon_winsys_bo * bo)664 radv_rmv_log_command_buffer_bo_destroy(struct radv_device *device, struct radeon_winsys_bo *bo)
665 {
666 if (!device->vk.memory_trace_data.is_enabled)
667 return;
668
669 simple_mtx_lock(&device->vk.memory_trace_data.token_mtx);
670 struct vk_rmv_resource_destroy_token destroy_token = {0};
671 destroy_token.resource_id = vk_rmv_get_resource_id_locked(&device->vk, (uint64_t)(uintptr_t)bo);
672
673 vk_rmv_emit_token(&device->vk.memory_trace_data, VK_RMV_TOKEN_TYPE_RESOURCE_DESTROY, &destroy_token);
674 vk_rmv_destroy_resource_id_locked(&device->vk, (uint64_t)(uintptr_t)bo);
675 simple_mtx_unlock(&device->vk.memory_trace_data.token_mtx);
676 vk_rmv_log_cpu_map(&device->vk, bo->va, true);
677 }
678
679 void
radv_rmv_log_border_color_palette_create(struct radv_device * device,struct radeon_winsys_bo * bo)680 radv_rmv_log_border_color_palette_create(struct radv_device *device, struct radeon_winsys_bo *bo)
681 {
682 if (!device->vk.memory_trace_data.is_enabled)
683 return;
684
685 simple_mtx_lock(&device->vk.memory_trace_data.token_mtx);
686 uint32_t resource_id = vk_rmv_get_resource_id_locked(&device->vk, (uint64_t)(uintptr_t)bo);
687
688 struct vk_rmv_resource_create_token create_token = {0};
689 create_token.is_driver_internal = true;
690 create_token.resource_id = resource_id;
691 create_token.type = VK_RMV_RESOURCE_TYPE_BORDER_COLOR_PALETTE;
692 /*
693 * We have 4096 entries, but the corresponding RMV token only has 8 bits.
694 */
695 create_token.border_color_palette.num_entries = 255; /* = RADV_BORDER_COLOR_COUNT; */
696
697 struct vk_rmv_resource_bind_token bind_token;
698 bind_token.address = bo->va;
699 bind_token.is_system_memory = false;
700 bind_token.resource_id = resource_id;
701 bind_token.size = RADV_BORDER_COLOR_BUFFER_SIZE;
702
703 vk_rmv_emit_token(&device->vk.memory_trace_data, VK_RMV_TOKEN_TYPE_RESOURCE_CREATE, &create_token);
704 vk_rmv_emit_token(&device->vk.memory_trace_data, VK_RMV_TOKEN_TYPE_RESOURCE_BIND, &bind_token);
705 simple_mtx_unlock(&device->vk.memory_trace_data.token_mtx);
706 vk_rmv_log_cpu_map(&device->vk, bo->va, false);
707 }
708
709 void
radv_rmv_log_border_color_palette_destroy(struct radv_device * device,struct radeon_winsys_bo * bo)710 radv_rmv_log_border_color_palette_destroy(struct radv_device *device, struct radeon_winsys_bo *bo)
711 {
712 if (!device->vk.memory_trace_data.is_enabled)
713 return;
714
715 simple_mtx_lock(&device->vk.memory_trace_data.token_mtx);
716 struct vk_rmv_resource_destroy_token token = {0};
717 /* same resource id as the create token */
718 token.resource_id = vk_rmv_get_resource_id_locked(&device->vk, (uint64_t)(uintptr_t)bo);
719
720 vk_rmv_emit_token(&device->vk.memory_trace_data, VK_RMV_TOKEN_TYPE_RESOURCE_DESTROY, &token);
721 simple_mtx_unlock(&device->vk.memory_trace_data.token_mtx);
722 vk_rmv_log_cpu_map(&device->vk, bo->va, true);
723 }
724
725 void
radv_rmv_log_sparse_add_residency(struct radv_device * device,struct radeon_winsys_bo * src_bo,uint64_t offset)726 radv_rmv_log_sparse_add_residency(struct radv_device *device, struct radeon_winsys_bo *src_bo, uint64_t offset)
727 {
728 if (!device->vk.memory_trace_data.is_enabled)
729 return;
730
731 struct vk_rmv_resource_reference_token token = {0};
732 token.virtual_address = src_bo->va + offset;
733 token.residency_removed = false;
734
735 simple_mtx_lock(&device->vk.memory_trace_data.token_mtx);
736 vk_rmv_emit_token(&device->vk.memory_trace_data, VK_RMV_TOKEN_TYPE_RESOURCE_REFERENCE, &token);
737 radv_rmv_collect_trace_events(device);
738 simple_mtx_unlock(&device->vk.memory_trace_data.token_mtx);
739 }
740
741 void
radv_rmv_log_sparse_remove_residency(struct radv_device * device,struct radeon_winsys_bo * src_bo,uint64_t offset)742 radv_rmv_log_sparse_remove_residency(struct radv_device *device, struct radeon_winsys_bo *src_bo, uint64_t offset)
743 {
744 if (!device->vk.memory_trace_data.is_enabled)
745 return;
746
747 struct vk_rmv_resource_reference_token token = {0};
748 token.virtual_address = src_bo->va + offset;
749 token.residency_removed = true;
750
751 simple_mtx_lock(&device->vk.memory_trace_data.token_mtx);
752 vk_rmv_emit_token(&device->vk.memory_trace_data, VK_RMV_TOKEN_TYPE_RESOURCE_REFERENCE, &token);
753 radv_rmv_collect_trace_events(device);
754 simple_mtx_unlock(&device->vk.memory_trace_data.token_mtx);
755 }
756
757 void
radv_rmv_log_descriptor_pool_create(struct radv_device * device,const VkDescriptorPoolCreateInfo * create_info,VkDescriptorPool _pool)758 radv_rmv_log_descriptor_pool_create(struct radv_device *device, const VkDescriptorPoolCreateInfo *create_info,
759 VkDescriptorPool _pool)
760 {
761 if (!device->vk.memory_trace_data.is_enabled)
762 return;
763
764 VK_FROM_HANDLE(radv_descriptor_pool, pool, _pool);
765
766 if (pool->bo)
767 vk_rmv_log_cpu_map(&device->vk, pool->bo->va, false);
768
769 simple_mtx_lock(&device->vk.memory_trace_data.token_mtx);
770 struct vk_rmv_resource_create_token create_token = {0};
771 create_token.is_driver_internal = false;
772 create_token.resource_id = vk_rmv_get_resource_id_locked(&device->vk, (uint64_t)_pool);
773 create_token.type = VK_RMV_RESOURCE_TYPE_DESCRIPTOR_POOL;
774 create_token.descriptor_pool.max_sets = create_info->maxSets;
775 create_token.descriptor_pool.pool_size_count = create_info->poolSizeCount;
776 /* Using vk_rmv_token_pool_alloc frees the allocation automatically when the trace is done. */
777 create_token.descriptor_pool.pool_sizes = malloc(create_info->poolSizeCount * sizeof(VkDescriptorPoolSize));
778 if (!create_token.descriptor_pool.pool_sizes)
779 return;
780
781 memcpy(create_token.descriptor_pool.pool_sizes, create_info->pPoolSizes,
782 create_info->poolSizeCount * sizeof(VkDescriptorPoolSize));
783
784 vk_rmv_emit_token(&device->vk.memory_trace_data, VK_RMV_TOKEN_TYPE_RESOURCE_CREATE, &create_token);
785 simple_mtx_unlock(&device->vk.memory_trace_data.token_mtx);
786
787 if (pool->bo) {
788 simple_mtx_lock(&device->vk.memory_trace_data.token_mtx);
789 struct vk_rmv_resource_bind_token bind_token;
790 bind_token.address = pool->bo->va;
791 bind_token.is_system_memory = false;
792 bind_token.resource_id = vk_rmv_get_resource_id_locked(&device->vk, (uint64_t)_pool);
793 bind_token.size = pool->size;
794
795 vk_rmv_emit_token(&device->vk.memory_trace_data, VK_RMV_TOKEN_TYPE_RESOURCE_BIND, &bind_token);
796 simple_mtx_unlock(&device->vk.memory_trace_data.token_mtx);
797 }
798 }
799
800 void
radv_rmv_log_graphics_pipeline_create(struct radv_device * device,struct radv_pipeline * pipeline,bool is_internal)801 radv_rmv_log_graphics_pipeline_create(struct radv_device *device, struct radv_pipeline *pipeline, bool is_internal)
802 {
803 if (!device->vk.memory_trace_data.is_enabled)
804 return;
805
806 VkPipeline _pipeline = radv_pipeline_to_handle(pipeline);
807 struct radv_graphics_pipeline *graphics_pipeline = radv_pipeline_to_graphics(pipeline);
808
809 simple_mtx_lock(&device->vk.memory_trace_data.token_mtx);
810 struct vk_rmv_resource_create_token create_token = {0};
811 create_token.is_driver_internal = is_internal;
812 create_token.resource_id = vk_rmv_get_resource_id_locked(&device->vk, (uint64_t)_pipeline);
813 create_token.type = VK_RMV_RESOURCE_TYPE_PIPELINE;
814 create_token.pipeline.is_internal = is_internal;
815 create_token.pipeline.hash_lo = pipeline->pipeline_hash;
816 create_token.pipeline.is_ngg = graphics_pipeline->is_ngg;
817 create_token.pipeline.shader_stages = graphics_pipeline->active_stages;
818
819 vk_rmv_emit_token(&device->vk.memory_trace_data, VK_RMV_TOKEN_TYPE_RESOURCE_CREATE, &create_token);
820 for (unsigned s = 0; s < MESA_VULKAN_SHADER_STAGES; s++) {
821 struct radv_shader *shader = pipeline->shaders[s];
822
823 if (!shader)
824 continue;
825
826 log_resource_bind_locked(device, (uint64_t)_pipeline, shader->bo, shader->alloc->offset, shader->alloc->size);
827 }
828 simple_mtx_unlock(&device->vk.memory_trace_data.token_mtx);
829 }
830
831 void
radv_rmv_log_compute_pipeline_create(struct radv_device * device,struct radv_pipeline * pipeline,bool is_internal)832 radv_rmv_log_compute_pipeline_create(struct radv_device *device, struct radv_pipeline *pipeline, bool is_internal)
833 {
834 if (!device->vk.memory_trace_data.is_enabled)
835 return;
836
837 VkPipeline _pipeline = radv_pipeline_to_handle(pipeline);
838
839 simple_mtx_lock(&device->vk.memory_trace_data.token_mtx);
840 struct vk_rmv_resource_create_token create_token = {0};
841 create_token.is_driver_internal = is_internal;
842 create_token.resource_id = vk_rmv_get_resource_id_locked(&device->vk, (uint64_t)_pipeline);
843 create_token.type = VK_RMV_RESOURCE_TYPE_PIPELINE;
844 create_token.pipeline.is_internal = is_internal;
845 create_token.pipeline.hash_lo = pipeline->pipeline_hash;
846 create_token.pipeline.is_ngg = false;
847 create_token.pipeline.shader_stages = VK_SHADER_STAGE_COMPUTE_BIT;
848
849 vk_rmv_emit_token(&device->vk.memory_trace_data, VK_RMV_TOKEN_TYPE_RESOURCE_CREATE, &create_token);
850 struct radv_shader *shader = pipeline->shaders[MESA_SHADER_COMPUTE];
851 log_resource_bind_locked(device, (uint64_t)_pipeline, shader->bo, shader->alloc->offset, shader->alloc->size);
852 simple_mtx_unlock(&device->vk.memory_trace_data.token_mtx);
853 }
854
855 void
radv_rmv_log_rt_pipeline_create(struct radv_device * device,struct radv_ray_tracing_pipeline * pipeline)856 radv_rmv_log_rt_pipeline_create(struct radv_device *device, struct radv_ray_tracing_pipeline *pipeline)
857 {
858 if (!device->vk.memory_trace_data.is_enabled)
859 return;
860
861 VkPipeline _pipeline = radv_pipeline_to_handle(&pipeline->base.base);
862
863 struct radv_shader *prolog = pipeline->prolog;
864 struct radv_shader *traversal = pipeline->base.base.shaders[MESA_SHADER_INTERSECTION];
865
866 VkShaderStageFlagBits active_stages = traversal ? VK_SHADER_STAGE_INTERSECTION_BIT_KHR : 0;
867 if (prolog)
868 active_stages |= VK_SHADER_STAGE_COMPUTE_BIT;
869
870 for (uint32_t i = 0; i < pipeline->stage_count; i++) {
871 if (pipeline->stages[i].shader)
872 active_stages |= mesa_to_vk_shader_stage(pipeline->stages[i].stage);
873 }
874
875 simple_mtx_lock(&device->vk.memory_trace_data.token_mtx);
876
877 struct vk_rmv_resource_create_token create_token = {0};
878 create_token.resource_id = vk_rmv_get_resource_id_locked(&device->vk, (uint64_t)_pipeline);
879 create_token.type = VK_RMV_RESOURCE_TYPE_PIPELINE;
880 create_token.pipeline.hash_lo = pipeline->base.base.pipeline_hash;
881 create_token.pipeline.shader_stages = active_stages;
882 vk_rmv_emit_token(&device->vk.memory_trace_data, VK_RMV_TOKEN_TYPE_RESOURCE_CREATE, &create_token);
883
884 if (prolog)
885 log_resource_bind_locked(device, (uint64_t)_pipeline, prolog->bo, prolog->alloc->offset, prolog->alloc->size);
886
887 if (traversal)
888 log_resource_bind_locked(device, (uint64_t)_pipeline, traversal->bo, traversal->alloc->offset,
889 traversal->alloc->size);
890
891 for (uint32_t i = 0; i < pipeline->non_imported_stage_count; i++) {
892 struct radv_shader *shader = pipeline->stages[i].shader;
893 if (shader)
894 log_resource_bind_locked(device, (uint64_t)_pipeline, shader->bo, shader->alloc->offset, shader->alloc->size);
895 }
896
897 simple_mtx_unlock(&device->vk.memory_trace_data.token_mtx);
898 }
899
900 void
radv_rmv_log_event_create(struct radv_device * device,VkEvent _event,VkEventCreateFlags flags,bool is_internal)901 radv_rmv_log_event_create(struct radv_device *device, VkEvent _event, VkEventCreateFlags flags, bool is_internal)
902 {
903 if (!device->vk.memory_trace_data.is_enabled)
904 return;
905
906 VK_FROM_HANDLE(radv_event, event, _event);
907
908 simple_mtx_lock(&device->vk.memory_trace_data.token_mtx);
909 struct vk_rmv_resource_create_token create_token = {0};
910 create_token.is_driver_internal = is_internal;
911 create_token.type = VK_RMV_RESOURCE_TYPE_GPU_EVENT;
912 create_token.event.flags = flags;
913 create_token.resource_id = vk_rmv_get_resource_id_locked(&device->vk, (uint64_t)_event);
914
915 vk_rmv_emit_token(&device->vk.memory_trace_data, VK_RMV_TOKEN_TYPE_RESOURCE_CREATE, &create_token);
916 log_resource_bind_locked(device, (uint64_t)_event, event->bo, 0, 8);
917 simple_mtx_unlock(&device->vk.memory_trace_data.token_mtx);
918
919 if (event->map)
920 vk_rmv_log_cpu_map(&device->vk, event->bo->va, false);
921 }
922
923 void
radv_rmv_log_submit(struct radv_device * device,enum amd_ip_type type)924 radv_rmv_log_submit(struct radv_device *device, enum amd_ip_type type)
925 {
926 if (!device->vk.memory_trace_data.is_enabled)
927 return;
928
929 switch (type) {
930 case AMD_IP_GFX:
931 vk_rmv_log_misc_token(&device->vk, VK_RMV_MISC_EVENT_TYPE_SUBMIT_GRAPHICS);
932 break;
933 case AMD_IP_COMPUTE:
934 vk_rmv_log_misc_token(&device->vk, VK_RMV_MISC_EVENT_TYPE_SUBMIT_COMPUTE);
935 break;
936 case AMD_IP_SDMA:
937 vk_rmv_log_misc_token(&device->vk, VK_RMV_MISC_EVENT_TYPE_SUBMIT_COPY);
938 break;
939 default:
940 unreachable("invalid ip type");
941 }
942 }
943
944 void
radv_rmv_log_resource_destroy(struct radv_device * device,uint64_t handle)945 radv_rmv_log_resource_destroy(struct radv_device *device, uint64_t handle)
946 {
947 if (!device->vk.memory_trace_data.is_enabled || handle == 0)
948 return;
949
950 simple_mtx_lock(&device->vk.memory_trace_data.token_mtx);
951 struct vk_rmv_resource_destroy_token token = {0};
952 token.resource_id = vk_rmv_get_resource_id_locked(&device->vk, handle);
953
954 vk_rmv_emit_token(&device->vk.memory_trace_data, VK_RMV_TOKEN_TYPE_RESOURCE_DESTROY, &token);
955 vk_rmv_destroy_resource_id_locked(&device->vk, handle);
956 simple_mtx_unlock(&device->vk.memory_trace_data.token_mtx);
957 }
958