xref: /aosp_15_r20/external/mesa3d/src/intel/vulkan_hasvk/anv_utrace.c (revision 6104692788411f58d303aa86923a9ff6ecaded22)
1 /*
2  * Copyright © 2021 Intel Corporation
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21  * IN THE SOFTWARE.
22  */
23 
24 #include "anv_private.h"
25 
26 #include "perf/intel_perf.h"
27 
28 static uint32_t
command_buffers_count_utraces(struct anv_device * device,uint32_t cmd_buffer_count,struct anv_cmd_buffer ** cmd_buffers,uint32_t * utrace_copies)29 command_buffers_count_utraces(struct anv_device *device,
30                               uint32_t cmd_buffer_count,
31                               struct anv_cmd_buffer **cmd_buffers,
32                               uint32_t *utrace_copies)
33 {
34    if (!u_trace_should_process(&device->ds.trace_context))
35       return 0;
36 
37    uint32_t utraces = 0;
38    for (uint32_t i = 0; i < cmd_buffer_count; i++) {
39       if (u_trace_has_points(&cmd_buffers[i]->trace)) {
40          utraces++;
41          if (!(cmd_buffers[i]->usage_flags & VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT))
42             *utrace_copies += list_length(&cmd_buffers[i]->trace.trace_chunks);
43       }
44    }
45 
46    return utraces;
47 }
48 
49 static void
anv_utrace_delete_flush_data(struct u_trace_context * utctx,void * flush_data)50 anv_utrace_delete_flush_data(struct u_trace_context *utctx,
51                              void *flush_data)
52 {
53    struct anv_device *device =
54       container_of(utctx, struct anv_device, ds.trace_context);
55    struct anv_utrace_flush_copy *flush = flush_data;
56 
57    intel_ds_flush_data_fini(&flush->ds);
58 
59    if (flush->trace_bo) {
60       assert(flush->batch_bo);
61       anv_reloc_list_finish(&flush->relocs, &device->vk.alloc);
62       anv_device_release_bo(device, flush->batch_bo);
63       anv_device_release_bo(device, flush->trace_bo);
64    }
65 
66    vk_sync_destroy(&device->vk, flush->sync);
67 
68    vk_free(&device->vk.alloc, flush);
69 }
70 
71 static void
anv_device_utrace_emit_copy_ts_buffer(struct u_trace_context * utctx,void * cmdstream,void * ts_from,uint64_t from_offset_B,void * ts_to,uint64_t to_offset_B,uint64_t size_B)72 anv_device_utrace_emit_copy_ts_buffer(struct u_trace_context *utctx,
73                                       void *cmdstream,
74                                       void *ts_from, uint64_t from_offset_B,
75                                       void *ts_to, uint64_t to_offset_B,
76                                       uint64_t size_B)
77 {
78    struct anv_device *device =
79       container_of(utctx, struct anv_device, ds.trace_context);
80    struct anv_utrace_flush_copy *flush = cmdstream;
81    struct anv_address from_addr = (struct anv_address) {
82       .bo = ts_from, .offset = from_offset_B };
83    struct anv_address to_addr = (struct anv_address) {
84       .bo = ts_to, .offset = to_offset_B };
85 
86    anv_genX(device->info, emit_so_memcpy)(&flush->memcpy_state,
87                                            to_addr, from_addr, size_B);
88 }
89 
90 VkResult
anv_device_utrace_flush_cmd_buffers(struct anv_queue * queue,uint32_t cmd_buffer_count,struct anv_cmd_buffer ** cmd_buffers,struct anv_utrace_flush_copy ** out_flush_data)91 anv_device_utrace_flush_cmd_buffers(struct anv_queue *queue,
92                                     uint32_t cmd_buffer_count,
93                                     struct anv_cmd_buffer **cmd_buffers,
94                                     struct anv_utrace_flush_copy **out_flush_data)
95 {
96    struct anv_device *device = queue->device;
97    uint32_t utrace_copies = 0;
98    uint32_t utraces = command_buffers_count_utraces(device,
99                                                     cmd_buffer_count,
100                                                     cmd_buffers,
101                                                     &utrace_copies);
102    if (!utraces) {
103       *out_flush_data = NULL;
104       return VK_SUCCESS;
105    }
106 
107    VkResult result;
108    struct anv_utrace_flush_copy *flush =
109       vk_zalloc(&device->vk.alloc, sizeof(struct anv_utrace_flush_copy),
110                 8, VK_SYSTEM_ALLOCATION_SCOPE_DEVICE);
111    if (!flush)
112       return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
113 
114    intel_ds_flush_data_init(&flush->ds, &queue->ds, queue->ds.submission_id);
115 
116    result = vk_sync_create(&device->vk, &device->physical->sync_syncobj_type,
117                            0, 0, &flush->sync);
118    if (result != VK_SUCCESS)
119       goto error_sync;
120 
121    if (utrace_copies > 0) {
122       result = anv_bo_pool_alloc(&device->utrace_bo_pool,
123                                  utrace_copies * 4096,
124                                  &flush->trace_bo);
125       if (result != VK_SUCCESS)
126          goto error_trace_buf;
127 
128       result = anv_bo_pool_alloc(&device->utrace_bo_pool,
129                                  /* 128 dwords of setup + 64 dwords per copy */
130                                  align(512 + 64 * utrace_copies, 4096),
131                                  &flush->batch_bo);
132       if (result != VK_SUCCESS)
133          goto error_batch_buf;
134 
135       result = anv_reloc_list_init(&flush->relocs, &device->vk.alloc);
136       if (result != VK_SUCCESS)
137          goto error_reloc_list;
138 
139       flush->batch.alloc = &device->vk.alloc;
140       flush->batch.relocs = &flush->relocs;
141       anv_batch_set_storage(&flush->batch,
142                             (struct anv_address) { .bo = flush->batch_bo, },
143                             flush->batch_bo->map, flush->batch_bo->size);
144 
145       /* Emit the copies */
146       anv_genX(device->info, emit_so_memcpy_init)(&flush->memcpy_state,
147                                                    device,
148                                                    &flush->batch);
149       for (uint32_t i = 0; i < cmd_buffer_count; i++) {
150          if (cmd_buffers[i]->usage_flags & VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT) {
151            intel_ds_queue_flush_data(&queue->ds, &cmd_buffers[i]->trace,
152                                      &flush->ds, device->vk.current_frame, false);
153          } else {
154             u_trace_clone_append(u_trace_begin_iterator(&cmd_buffers[i]->trace),
155                                  u_trace_end_iterator(&cmd_buffers[i]->trace),
156                                  &flush->ds.trace,
157                                  flush,
158                                  anv_device_utrace_emit_copy_ts_buffer);
159          }
160       }
161       anv_genX(device->info, emit_so_memcpy_fini)(&flush->memcpy_state);
162 
163       intel_ds_queue_flush_data(&queue->ds, &flush->ds.trace, &flush->ds,
164                                 device->vk.current_frame, true);
165 
166       if (flush->batch.status != VK_SUCCESS) {
167          result = flush->batch.status;
168          goto error_batch;
169       }
170    } else {
171       for (uint32_t i = 0; i < cmd_buffer_count; i++) {
172          assert(cmd_buffers[i]->usage_flags & VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT);
173          intel_ds_queue_flush_data(&queue->ds, &cmd_buffers[i]->trace,
174                                    &flush->ds, device->vk.current_frame,
175                                    i == (cmd_buffer_count - 1));
176       }
177    }
178 
179    flush->queue = queue;
180 
181    *out_flush_data = flush;
182 
183    return VK_SUCCESS;
184 
185  error_batch:
186    anv_reloc_list_finish(&flush->relocs, &device->vk.alloc);
187  error_reloc_list:
188    anv_bo_pool_free(&device->utrace_bo_pool, flush->batch_bo);
189  error_batch_buf:
190    anv_bo_pool_free(&device->utrace_bo_pool, flush->trace_bo);
191  error_trace_buf:
192    vk_sync_destroy(&device->vk, flush->sync);
193  error_sync:
194    vk_free(&device->vk.alloc, flush);
195    return result;
196 }
197 
198 static void *
anv_utrace_create_buffer(struct u_trace_context * utctx,uint64_t size_B)199 anv_utrace_create_buffer(struct u_trace_context *utctx, uint64_t size_B)
200 {
201    struct anv_device *device =
202       container_of(utctx, struct anv_device, ds.trace_context);
203 
204    struct anv_bo *bo = NULL;
205    UNUSED VkResult result =
206       anv_bo_pool_alloc(&device->utrace_bo_pool,
207                         align(size_B, 4096),
208                         &bo);
209    assert(result == VK_SUCCESS);
210 
211    return bo;
212 }
213 
214 static void
anv_utrace_destroy_buffer(struct u_trace_context * utctx,void * timestamps)215 anv_utrace_destroy_buffer(struct u_trace_context *utctx, void *timestamps)
216 {
217    struct anv_device *device =
218       container_of(utctx, struct anv_device, ds.trace_context);
219    struct anv_bo *bo = timestamps;
220 
221    anv_bo_pool_free(&device->utrace_bo_pool, bo);
222 }
223 
224 static void
anv_utrace_record_ts(struct u_trace * ut,void * cs,void * timestamps,uint64_t offset_B,uint32_t flags)225 anv_utrace_record_ts(struct u_trace *ut, void *cs,
226                      void *timestamps, uint64_t offset_B,
227                      uint32_t flags)
228 {
229    struct anv_cmd_buffer *cmd_buffer =
230       container_of(ut, struct anv_cmd_buffer, trace);
231    struct anv_device *device = cmd_buffer->device;
232    struct anv_bo *bo = timestamps;
233 
234    enum anv_timestamp_capture_type capture_type =
235       (flags & INTEL_DS_TRACEPOINT_FLAG_END_OF_PIPE) ?
236       ANV_TIMESTAMP_CAPTURE_END_OF_PIPE :
237       ANV_TIMESTAMP_CAPTURE_TOP_OF_PIPE;
238    device->physical->cmd_emit_timestamp(&cmd_buffer->batch, device,
239                                         (struct anv_address) {
240                                            .bo = bo,
241                                            .offset = offset_B, },
242                                         capture_type);
243 }
244 
245 static uint64_t
anv_utrace_read_ts(struct u_trace_context * utctx,void * timestamps,uint64_t offset_B,void * flush_data)246 anv_utrace_read_ts(struct u_trace_context *utctx,
247                    void *timestamps, uint64_t offset_B,
248                    void *flush_data)
249 {
250    struct anv_device *device =
251       container_of(utctx, struct anv_device, ds.trace_context);
252    struct anv_bo *bo = timestamps;
253    struct anv_utrace_flush_copy *flush = flush_data;
254 
255    /* Only need to stall on results for the first entry: */
256    if (offset_B == 0) {
257       UNUSED VkResult result =
258          vk_sync_wait(&device->vk,
259                       flush->sync,
260                       0,
261                       VK_SYNC_WAIT_COMPLETE,
262                       os_time_get_absolute_timeout(OS_TIMEOUT_INFINITE));
263       assert(result == VK_SUCCESS);
264    }
265 
266    uint64_t *ts = bo->map + offset_B;
267 
268    /* Don't translate the no-timestamp marker: */
269    if (*ts == U_TRACE_NO_TIMESTAMP)
270       return U_TRACE_NO_TIMESTAMP;
271 
272    return intel_device_info_timebase_scale(device->info, *ts);
273 }
274 
275 static void
anv_utrace_capture_data(struct u_trace * ut,void * cs,void * dst_buffer,uint64_t dst_offset_B,void * src_buffer,uint64_t src_offset_B,uint32_t size_B)276 anv_utrace_capture_data(struct u_trace *ut,
277                         void *cs,
278                         void *dst_buffer,
279                         uint64_t dst_offset_B,
280                         void *src_buffer,
281                         uint64_t src_offset_B,
282                         uint32_t size_B)
283 {
284    struct anv_device *device =
285       container_of(ut->utctx, struct anv_device, ds.trace_context);
286    struct anv_cmd_buffer *cmd_buffer =
287       container_of(ut, struct anv_cmd_buffer, trace);
288    /* cmd_buffer is only valid if cs == NULL */
289    struct anv_batch *batch = cs != NULL ? cs : &cmd_buffer->batch;
290    struct anv_address dst_addr = {
291       .bo = dst_buffer,
292       .offset = dst_offset_B,
293    };
294    struct anv_address src_addr = {
295       .bo = src_buffer,
296       .offset = src_offset_B,
297    };
298 
299    device->physical->cmd_capture_data(batch, device, dst_addr, src_addr, size_B);
300 }
301 
302 static const void *
anv_utrace_get_data(struct u_trace_context * utctx,void * buffer,uint64_t offset_B,uint32_t size_B)303 anv_utrace_get_data(struct u_trace_context *utctx, void *buffer,
304                     uint64_t offset_B, uint32_t size_B)
305 {
306    struct anv_bo *bo = buffer;
307 
308    return bo->map + offset_B;
309 }
310 
311 void
anv_device_utrace_init(struct anv_device * device)312 anv_device_utrace_init(struct anv_device *device)
313 {
314    anv_bo_pool_init(&device->utrace_bo_pool, device, "utrace");
315    intel_ds_device_init(&device->ds, device->info, device->fd,
316                         device->physical->local_minor,
317                         INTEL_DS_API_VULKAN);
318    u_trace_context_init(&device->ds.trace_context,
319                         &device->ds,
320                         sizeof(uint64_t),
321                         0,
322                         anv_utrace_create_buffer,
323                         anv_utrace_destroy_buffer,
324                         anv_utrace_record_ts,
325                         anv_utrace_read_ts,
326                         anv_utrace_capture_data,
327                         anv_utrace_get_data,
328                         anv_utrace_delete_flush_data);
329 
330    for (uint32_t q = 0; q < device->queue_count; q++) {
331       struct anv_queue *queue = &device->queues[q];
332 
333       intel_ds_device_init_queue(&device->ds, &queue->ds, "%s%u",
334                                  intel_engines_class_to_string(queue->family->engine_class),
335                                  queue->vk.index_in_family);
336    }
337 }
338 
339 void
anv_device_utrace_finish(struct anv_device * device)340 anv_device_utrace_finish(struct anv_device *device)
341 {
342    intel_ds_device_process(&device->ds, true);
343    intel_ds_device_fini(&device->ds);
344    anv_bo_pool_finish(&device->utrace_bo_pool);
345 }
346 
347 enum intel_ds_stall_flag
anv_pipe_flush_bit_to_ds_stall_flag(enum anv_pipe_bits bits)348 anv_pipe_flush_bit_to_ds_stall_flag(enum anv_pipe_bits bits)
349 {
350    static const struct {
351       enum anv_pipe_bits anv;
352       enum intel_ds_stall_flag ds;
353    } anv_to_ds_flags[] = {
354       { .anv = ANV_PIPE_DEPTH_CACHE_FLUSH_BIT,            .ds = INTEL_DS_DEPTH_CACHE_FLUSH_BIT, },
355       { .anv = ANV_PIPE_DATA_CACHE_FLUSH_BIT,             .ds = INTEL_DS_DATA_CACHE_FLUSH_BIT, },
356       { .anv = ANV_PIPE_TILE_CACHE_FLUSH_BIT,             .ds = INTEL_DS_TILE_CACHE_FLUSH_BIT, },
357       { .anv = ANV_PIPE_RENDER_TARGET_CACHE_FLUSH_BIT,    .ds = INTEL_DS_RENDER_TARGET_CACHE_FLUSH_BIT, },
358       { .anv = ANV_PIPE_STATE_CACHE_INVALIDATE_BIT,       .ds = INTEL_DS_STATE_CACHE_INVALIDATE_BIT, },
359       { .anv = ANV_PIPE_CONSTANT_CACHE_INVALIDATE_BIT,    .ds = INTEL_DS_CONST_CACHE_INVALIDATE_BIT, },
360       { .anv = ANV_PIPE_VF_CACHE_INVALIDATE_BIT,          .ds = INTEL_DS_VF_CACHE_INVALIDATE_BIT, },
361       { .anv = ANV_PIPE_TEXTURE_CACHE_INVALIDATE_BIT,     .ds = INTEL_DS_TEXTURE_CACHE_INVALIDATE_BIT, },
362       { .anv = ANV_PIPE_INSTRUCTION_CACHE_INVALIDATE_BIT, .ds = INTEL_DS_INST_CACHE_INVALIDATE_BIT, },
363       { .anv = ANV_PIPE_DEPTH_STALL_BIT,                  .ds = INTEL_DS_DEPTH_STALL_BIT, },
364       { .anv = ANV_PIPE_CS_STALL_BIT,                     .ds = INTEL_DS_CS_STALL_BIT, },
365       { .anv = ANV_PIPE_HDC_PIPELINE_FLUSH_BIT,           .ds = INTEL_DS_HDC_PIPELINE_FLUSH_BIT, },
366       { .anv = ANV_PIPE_STALL_AT_SCOREBOARD_BIT,          .ds = INTEL_DS_STALL_AT_SCOREBOARD_BIT, },
367       { .anv = ANV_PIPE_UNTYPED_DATAPORT_CACHE_FLUSH_BIT, .ds = INTEL_DS_UNTYPED_DATAPORT_CACHE_FLUSH_BIT, },
368    };
369 
370    enum intel_ds_stall_flag ret = 0;
371    for (uint32_t i = 0; i < ARRAY_SIZE(anv_to_ds_flags); i++) {
372       if (anv_to_ds_flags[i].anv & bits)
373          ret |= anv_to_ds_flags[i].ds;
374    }
375 
376    return ret;
377 }
378