xref: /aosp_15_r20/external/mesa3d/src/gallium/drivers/iris/iris_utrace.c (revision 6104692788411f58d303aa86923a9ff6ecaded22)
1 /*
2  * Copyright © 2021 Intel Corporation
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21  * IN THE SOFTWARE.
22  */
23 
24 #include "iris_batch.h"
25 #include "iris_context.h"
26 #include "iris_utrace.h"
27 
28 #include "util/u_trace_gallium.h"
29 
30 #include "ds/intel_driver_ds.h"
31 
32 #ifdef MAJOR_IN_MKDEV
33 #include <sys/mkdev.h>
34 #endif
35 #ifdef MAJOR_IN_SYSMACROS
36 #include <sys/sysmacros.h>
37 #endif
38 #include <sys/types.h>
39 #include <sys/stat.h>
40 #include <unistd.h>
41 
42 /** Timestamp structure format */
43 union iris_utrace_timestamp {
44    /* Timestamp writtem by either 2 * MI_STORE_REGISTER_MEM or
45     * PIPE_CONTROL.
46     */
47    uint64_t timestamp;
48 
49    /* Timestamp written by COMPUTE_WALKER::PostSync
50     *
51     * Layout is described in PRMs.
52     * ATSM PRMs, Volume 2d: Command Reference: Structures, POSTSYNC_DATA:
53     *
54     *    "The timestamp layout :
55     *        [0] = 32b Context Timestamp Start
56     *        [1] = 32b Global Timestamp Start
57     *        [2] = 32b Context Timestamp End
58     *        [3] = 32b Global Timestamp End"
59     */
60    uint32_t gfx125_postsync_data[4];
61 
62    /* Timestamp written by COMPUTE_WALKER::PostSync
63     *
64     * BSpec 56591:
65     *
66     *    "The timestamp layout :
67     *       [0] = 64b Context Timestamp Start
68     *       [1] = 64b Global Timestamp Start
69     *       [2] = 64b Context Timestamp End
70     *       [3] = 64b Global Timestamp End"
71     */
72    uint64_t gfx20_postsync_data[4];
73 };
74 
75 static void *
iris_utrace_create_buffer(struct u_trace_context * utctx,uint64_t size_B)76 iris_utrace_create_buffer(struct u_trace_context *utctx, uint64_t size_B)
77 {
78    struct iris_context *ice =
79       container_of(utctx, struct iris_context, ds.trace_context);
80    struct pipe_context *ctx = &ice->ctx;
81    struct iris_screen *screen = (struct iris_screen *)ctx->screen;
82 
83    struct iris_bo *bo =
84       iris_bo_alloc(screen->bufmgr, "utrace timestamps",
85                     size_B, 16 /* alignment */,
86                     IRIS_MEMZONE_OTHER,
87                     BO_ALLOC_COHERENT | BO_ALLOC_SMEM);
88 
89    void *ptr = iris_bo_map(NULL, bo, MAP_READ | MAP_WRITE);
90    memset(ptr, 0, size_B);
91 
92    return bo;
93 }
94 
95 static void
iris_utrace_delete_buffer(struct u_trace_context * utctx,void * timestamps)96 iris_utrace_delete_buffer(struct u_trace_context *utctx, void *timestamps)
97 {
98    struct iris_bo *bo = timestamps;
99    iris_bo_unreference(bo);
100 }
101 
102 static void
iris_utrace_record_ts(struct u_trace * trace,void * cs,void * timestamps,uint64_t offset_B,uint32_t flags)103 iris_utrace_record_ts(struct u_trace *trace, void *cs,
104                       void *timestamps, uint64_t offset_B,
105                       uint32_t flags)
106 {
107    struct iris_batch *batch = container_of(trace, struct iris_batch, trace);
108    struct iris_context *ice = batch->ice;
109    struct iris_bo *bo = timestamps;
110 
111    iris_use_pinned_bo(batch, bo, true, IRIS_DOMAIN_NONE);
112 
113    const bool is_end_compute =
114       cs == NULL &&
115       (flags & INTEL_DS_TRACEPOINT_FLAG_END_OF_PIPE_CS);
116    if (is_end_compute) {
117       assert(ice->utrace.last_compute_walker != NULL);
118       batch->screen->vtbl.rewrite_compute_walker_pc(
119          batch, ice->utrace.last_compute_walker, bo, offset_B);
120       ice->utrace.last_compute_walker = NULL;
121    } else if (flags & INTEL_DS_TRACEPOINT_FLAG_END_OF_PIPE) {
122       iris_emit_pipe_control_write(batch, "query: pipelined snapshot write",
123                                    PIPE_CONTROL_WRITE_TIMESTAMP,
124                                    bo, offset_B, 0ull);
125    } else {
126       batch->screen->vtbl.store_register_mem64(batch, 0x2358,
127                                                bo, offset_B,
128                                                false);
129    }
130 }
131 
132 static uint64_t
iris_utrace_read_ts(struct u_trace_context * utctx,void * timestamps,uint64_t offset_B,void * flush_data)133 iris_utrace_read_ts(struct u_trace_context *utctx,
134                     void *timestamps, uint64_t offset_B, void *flush_data)
135 {
136    struct iris_context *ice =
137       container_of(utctx, struct iris_context, ds.trace_context);
138    struct pipe_context *ctx = &ice->ctx;
139    struct iris_screen *screen = (struct iris_screen *)ctx->screen;
140    struct iris_bo *bo = timestamps;
141 
142    if (offset_B == 0)
143       iris_bo_wait_rendering(bo);
144 
145    union iris_utrace_timestamp *ts = iris_bo_map(NULL, bo, MAP_READ) + offset_B;
146 
147    /* Don't translate the no-timestamp marker: */
148    if (ts->timestamp == U_TRACE_NO_TIMESTAMP)
149       return U_TRACE_NO_TIMESTAMP;
150 
151    /* Detect a 16/32 bytes timestamp write */
152    if (ts->gfx20_postsync_data[1] != 0 ||
153        ts->gfx20_postsync_data[2] != 0 ||
154        ts->gfx20_postsync_data[3] != 0) {
155       if (screen->devinfo->ver >= 20) {
156          return intel_device_info_timebase_scale(screen->devinfo,
157                                                  ts->gfx20_postsync_data[3]);
158       }
159 
160       /* The timestamp written by COMPUTE_WALKER::PostSync only as 32bits. We
161        * need to rebuild the full 64bits using the previous timestamp. We
162        * assume that utrace is reading the timestamp in order. Anyway
163        * timestamp rollover on 32bits in a few minutes so in most cases that
164        * should be correct.
165        */
166       uint64_t timestamp =
167          (ice->utrace.last_full_timestamp & 0xffffffff00000000) |
168          (uint64_t) ts->gfx125_postsync_data[3];
169 
170       return intel_device_info_timebase_scale(screen->devinfo, timestamp);
171    }
172 
173    ice->utrace.last_full_timestamp = ts->timestamp;
174 
175    return intel_device_info_timebase_scale(screen->devinfo, ts->timestamp);
176 }
177 
178 static void
iris_utrace_delete_flush_data(struct u_trace_context * utctx,void * flush_data)179 iris_utrace_delete_flush_data(struct u_trace_context *utctx,
180                               void *flush_data)
181 {
182    free(flush_data);
183 }
184 
iris_utrace_flush(struct iris_batch * batch,uint64_t submission_id)185 void iris_utrace_flush(struct iris_batch *batch, uint64_t submission_id)
186 {
187    struct intel_ds_flush_data *flush_data = malloc(sizeof(*flush_data));
188    intel_ds_flush_data_init(flush_data, &batch->ds, submission_id);
189    intel_ds_queue_flush_data(&batch->ds, &batch->trace, flush_data,
190                              U_TRACE_FRAME_UNKNOWN, false);
191 }
192 
iris_utrace_init(struct iris_context * ice)193 void iris_utrace_init(struct iris_context *ice)
194 {
195    struct iris_screen *screen = (struct iris_screen *)ice->ctx.screen;
196 
197    struct stat st;
198    uint32_t minor;
199 
200    if (fstat(screen->fd, &st) == 0)
201       minor = minor(st.st_rdev);
202    else
203       minor = 0;
204 
205    intel_ds_device_init(&ice->ds, screen->devinfo, screen->fd, minor,
206                         INTEL_DS_API_OPENGL);
207 
208    u_trace_context_init(&ice->ds.trace_context, &ice->ctx,
209                         sizeof(union iris_utrace_timestamp),
210                         0,
211                         iris_utrace_create_buffer,
212                         iris_utrace_delete_buffer,
213                         iris_utrace_record_ts,
214                         iris_utrace_read_ts,
215                         NULL,
216                         NULL,
217                         iris_utrace_delete_flush_data);
218 
219    for (int i = 0; i < IRIS_BATCH_COUNT; i++) {
220       intel_ds_device_init_queue(&ice->ds, &ice->batches[i].ds, "%s",
221                                  iris_batch_name_to_string(i));
222    }
223 }
224 
iris_utrace_fini(struct iris_context * ice)225 void iris_utrace_fini(struct iris_context *ice)
226 {
227    intel_ds_device_fini(&ice->ds);
228 }
229 
230 enum intel_ds_stall_flag
iris_utrace_pipe_flush_bit_to_ds_stall_flag(uint32_t flags)231 iris_utrace_pipe_flush_bit_to_ds_stall_flag(uint32_t flags)
232 {
233    static const struct {
234       uint32_t iris;
235       enum intel_ds_stall_flag ds;
236    } iris_to_ds_flags[] = {
237       { .iris = PIPE_CONTROL_DEPTH_CACHE_FLUSH,            .ds = INTEL_DS_DEPTH_CACHE_FLUSH_BIT, },
238       { .iris = PIPE_CONTROL_DATA_CACHE_FLUSH,             .ds = INTEL_DS_DATA_CACHE_FLUSH_BIT, },
239       { .iris = PIPE_CONTROL_TILE_CACHE_FLUSH,             .ds = INTEL_DS_TILE_CACHE_FLUSH_BIT, },
240       { .iris = PIPE_CONTROL_RENDER_TARGET_FLUSH,          .ds = INTEL_DS_RENDER_TARGET_CACHE_FLUSH_BIT, },
241       { .iris = PIPE_CONTROL_STATE_CACHE_INVALIDATE,       .ds = INTEL_DS_STATE_CACHE_INVALIDATE_BIT, },
242       { .iris = PIPE_CONTROL_CONST_CACHE_INVALIDATE,       .ds = INTEL_DS_CONST_CACHE_INVALIDATE_BIT, },
243       { .iris = PIPE_CONTROL_VF_CACHE_INVALIDATE,          .ds = INTEL_DS_VF_CACHE_INVALIDATE_BIT, },
244       { .iris = PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE,     .ds = INTEL_DS_TEXTURE_CACHE_INVALIDATE_BIT, },
245       { .iris = PIPE_CONTROL_INSTRUCTION_INVALIDATE,       .ds = INTEL_DS_INST_CACHE_INVALIDATE_BIT, },
246       { .iris = PIPE_CONTROL_DEPTH_STALL,                  .ds = INTEL_DS_DEPTH_STALL_BIT, },
247       { .iris = PIPE_CONTROL_CS_STALL,                     .ds = INTEL_DS_CS_STALL_BIT, },
248       { .iris = PIPE_CONTROL_FLUSH_HDC,                    .ds = INTEL_DS_HDC_PIPELINE_FLUSH_BIT, },
249       { .iris = PIPE_CONTROL_STALL_AT_SCOREBOARD,          .ds = INTEL_DS_STALL_AT_SCOREBOARD_BIT, },
250       { .iris = PIPE_CONTROL_UNTYPED_DATAPORT_CACHE_FLUSH, .ds = INTEL_DS_UNTYPED_DATAPORT_CACHE_FLUSH_BIT, },
251       { .iris = PIPE_CONTROL_CCS_CACHE_FLUSH,              .ds = INTEL_DS_CCS_CACHE_FLUSH_BIT, },
252    };
253 
254    enum intel_ds_stall_flag ret = 0;
255    for (uint32_t i = 0; i < ARRAY_SIZE(iris_to_ds_flags); i++) {
256       if (iris_to_ds_flags[i].iris & flags)
257          ret |= iris_to_ds_flags[i].ds;
258    }
259 
260    assert(ret != 0);
261 
262    return ret;
263 }
264