1 /*
2 * Copyright © 2021 Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 */
23
24 #include "iris_batch.h"
25 #include "iris_context.h"
26 #include "iris_utrace.h"
27
28 #include "util/u_trace_gallium.h"
29
30 #include "ds/intel_driver_ds.h"
31
32 #ifdef MAJOR_IN_MKDEV
33 #include <sys/mkdev.h>
34 #endif
35 #ifdef MAJOR_IN_SYSMACROS
36 #include <sys/sysmacros.h>
37 #endif
38 #include <sys/types.h>
39 #include <sys/stat.h>
40 #include <unistd.h>
41
42 /** Timestamp structure format */
43 union iris_utrace_timestamp {
44 /* Timestamp writtem by either 2 * MI_STORE_REGISTER_MEM or
45 * PIPE_CONTROL.
46 */
47 uint64_t timestamp;
48
49 /* Timestamp written by COMPUTE_WALKER::PostSync
50 *
51 * Layout is described in PRMs.
52 * ATSM PRMs, Volume 2d: Command Reference: Structures, POSTSYNC_DATA:
53 *
54 * "The timestamp layout :
55 * [0] = 32b Context Timestamp Start
56 * [1] = 32b Global Timestamp Start
57 * [2] = 32b Context Timestamp End
58 * [3] = 32b Global Timestamp End"
59 */
60 uint32_t gfx125_postsync_data[4];
61
62 /* Timestamp written by COMPUTE_WALKER::PostSync
63 *
64 * BSpec 56591:
65 *
66 * "The timestamp layout :
67 * [0] = 64b Context Timestamp Start
68 * [1] = 64b Global Timestamp Start
69 * [2] = 64b Context Timestamp End
70 * [3] = 64b Global Timestamp End"
71 */
72 uint64_t gfx20_postsync_data[4];
73 };
74
75 static void *
iris_utrace_create_buffer(struct u_trace_context * utctx,uint64_t size_B)76 iris_utrace_create_buffer(struct u_trace_context *utctx, uint64_t size_B)
77 {
78 struct iris_context *ice =
79 container_of(utctx, struct iris_context, ds.trace_context);
80 struct pipe_context *ctx = &ice->ctx;
81 struct iris_screen *screen = (struct iris_screen *)ctx->screen;
82
83 struct iris_bo *bo =
84 iris_bo_alloc(screen->bufmgr, "utrace timestamps",
85 size_B, 16 /* alignment */,
86 IRIS_MEMZONE_OTHER,
87 BO_ALLOC_COHERENT | BO_ALLOC_SMEM);
88
89 void *ptr = iris_bo_map(NULL, bo, MAP_READ | MAP_WRITE);
90 memset(ptr, 0, size_B);
91
92 return bo;
93 }
94
95 static void
iris_utrace_delete_buffer(struct u_trace_context * utctx,void * timestamps)96 iris_utrace_delete_buffer(struct u_trace_context *utctx, void *timestamps)
97 {
98 struct iris_bo *bo = timestamps;
99 iris_bo_unreference(bo);
100 }
101
102 static void
iris_utrace_record_ts(struct u_trace * trace,void * cs,void * timestamps,uint64_t offset_B,uint32_t flags)103 iris_utrace_record_ts(struct u_trace *trace, void *cs,
104 void *timestamps, uint64_t offset_B,
105 uint32_t flags)
106 {
107 struct iris_batch *batch = container_of(trace, struct iris_batch, trace);
108 struct iris_context *ice = batch->ice;
109 struct iris_bo *bo = timestamps;
110
111 iris_use_pinned_bo(batch, bo, true, IRIS_DOMAIN_NONE);
112
113 const bool is_end_compute =
114 cs == NULL &&
115 (flags & INTEL_DS_TRACEPOINT_FLAG_END_OF_PIPE_CS);
116 if (is_end_compute) {
117 assert(ice->utrace.last_compute_walker != NULL);
118 batch->screen->vtbl.rewrite_compute_walker_pc(
119 batch, ice->utrace.last_compute_walker, bo, offset_B);
120 ice->utrace.last_compute_walker = NULL;
121 } else if (flags & INTEL_DS_TRACEPOINT_FLAG_END_OF_PIPE) {
122 iris_emit_pipe_control_write(batch, "query: pipelined snapshot write",
123 PIPE_CONTROL_WRITE_TIMESTAMP,
124 bo, offset_B, 0ull);
125 } else {
126 batch->screen->vtbl.store_register_mem64(batch, 0x2358,
127 bo, offset_B,
128 false);
129 }
130 }
131
132 static uint64_t
iris_utrace_read_ts(struct u_trace_context * utctx,void * timestamps,uint64_t offset_B,void * flush_data)133 iris_utrace_read_ts(struct u_trace_context *utctx,
134 void *timestamps, uint64_t offset_B, void *flush_data)
135 {
136 struct iris_context *ice =
137 container_of(utctx, struct iris_context, ds.trace_context);
138 struct pipe_context *ctx = &ice->ctx;
139 struct iris_screen *screen = (struct iris_screen *)ctx->screen;
140 struct iris_bo *bo = timestamps;
141
142 if (offset_B == 0)
143 iris_bo_wait_rendering(bo);
144
145 union iris_utrace_timestamp *ts = iris_bo_map(NULL, bo, MAP_READ) + offset_B;
146
147 /* Don't translate the no-timestamp marker: */
148 if (ts->timestamp == U_TRACE_NO_TIMESTAMP)
149 return U_TRACE_NO_TIMESTAMP;
150
151 /* Detect a 16/32 bytes timestamp write */
152 if (ts->gfx20_postsync_data[1] != 0 ||
153 ts->gfx20_postsync_data[2] != 0 ||
154 ts->gfx20_postsync_data[3] != 0) {
155 if (screen->devinfo->ver >= 20) {
156 return intel_device_info_timebase_scale(screen->devinfo,
157 ts->gfx20_postsync_data[3]);
158 }
159
160 /* The timestamp written by COMPUTE_WALKER::PostSync only as 32bits. We
161 * need to rebuild the full 64bits using the previous timestamp. We
162 * assume that utrace is reading the timestamp in order. Anyway
163 * timestamp rollover on 32bits in a few minutes so in most cases that
164 * should be correct.
165 */
166 uint64_t timestamp =
167 (ice->utrace.last_full_timestamp & 0xffffffff00000000) |
168 (uint64_t) ts->gfx125_postsync_data[3];
169
170 return intel_device_info_timebase_scale(screen->devinfo, timestamp);
171 }
172
173 ice->utrace.last_full_timestamp = ts->timestamp;
174
175 return intel_device_info_timebase_scale(screen->devinfo, ts->timestamp);
176 }
177
178 static void
iris_utrace_delete_flush_data(struct u_trace_context * utctx,void * flush_data)179 iris_utrace_delete_flush_data(struct u_trace_context *utctx,
180 void *flush_data)
181 {
182 free(flush_data);
183 }
184
iris_utrace_flush(struct iris_batch * batch,uint64_t submission_id)185 void iris_utrace_flush(struct iris_batch *batch, uint64_t submission_id)
186 {
187 struct intel_ds_flush_data *flush_data = malloc(sizeof(*flush_data));
188 intel_ds_flush_data_init(flush_data, &batch->ds, submission_id);
189 intel_ds_queue_flush_data(&batch->ds, &batch->trace, flush_data,
190 U_TRACE_FRAME_UNKNOWN, false);
191 }
192
iris_utrace_init(struct iris_context * ice)193 void iris_utrace_init(struct iris_context *ice)
194 {
195 struct iris_screen *screen = (struct iris_screen *)ice->ctx.screen;
196
197 struct stat st;
198 uint32_t minor;
199
200 if (fstat(screen->fd, &st) == 0)
201 minor = minor(st.st_rdev);
202 else
203 minor = 0;
204
205 intel_ds_device_init(&ice->ds, screen->devinfo, screen->fd, minor,
206 INTEL_DS_API_OPENGL);
207
208 u_trace_context_init(&ice->ds.trace_context, &ice->ctx,
209 sizeof(union iris_utrace_timestamp),
210 0,
211 iris_utrace_create_buffer,
212 iris_utrace_delete_buffer,
213 iris_utrace_record_ts,
214 iris_utrace_read_ts,
215 NULL,
216 NULL,
217 iris_utrace_delete_flush_data);
218
219 for (int i = 0; i < IRIS_BATCH_COUNT; i++) {
220 intel_ds_device_init_queue(&ice->ds, &ice->batches[i].ds, "%s",
221 iris_batch_name_to_string(i));
222 }
223 }
224
iris_utrace_fini(struct iris_context * ice)225 void iris_utrace_fini(struct iris_context *ice)
226 {
227 intel_ds_device_fini(&ice->ds);
228 }
229
230 enum intel_ds_stall_flag
iris_utrace_pipe_flush_bit_to_ds_stall_flag(uint32_t flags)231 iris_utrace_pipe_flush_bit_to_ds_stall_flag(uint32_t flags)
232 {
233 static const struct {
234 uint32_t iris;
235 enum intel_ds_stall_flag ds;
236 } iris_to_ds_flags[] = {
237 { .iris = PIPE_CONTROL_DEPTH_CACHE_FLUSH, .ds = INTEL_DS_DEPTH_CACHE_FLUSH_BIT, },
238 { .iris = PIPE_CONTROL_DATA_CACHE_FLUSH, .ds = INTEL_DS_DATA_CACHE_FLUSH_BIT, },
239 { .iris = PIPE_CONTROL_TILE_CACHE_FLUSH, .ds = INTEL_DS_TILE_CACHE_FLUSH_BIT, },
240 { .iris = PIPE_CONTROL_RENDER_TARGET_FLUSH, .ds = INTEL_DS_RENDER_TARGET_CACHE_FLUSH_BIT, },
241 { .iris = PIPE_CONTROL_STATE_CACHE_INVALIDATE, .ds = INTEL_DS_STATE_CACHE_INVALIDATE_BIT, },
242 { .iris = PIPE_CONTROL_CONST_CACHE_INVALIDATE, .ds = INTEL_DS_CONST_CACHE_INVALIDATE_BIT, },
243 { .iris = PIPE_CONTROL_VF_CACHE_INVALIDATE, .ds = INTEL_DS_VF_CACHE_INVALIDATE_BIT, },
244 { .iris = PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE, .ds = INTEL_DS_TEXTURE_CACHE_INVALIDATE_BIT, },
245 { .iris = PIPE_CONTROL_INSTRUCTION_INVALIDATE, .ds = INTEL_DS_INST_CACHE_INVALIDATE_BIT, },
246 { .iris = PIPE_CONTROL_DEPTH_STALL, .ds = INTEL_DS_DEPTH_STALL_BIT, },
247 { .iris = PIPE_CONTROL_CS_STALL, .ds = INTEL_DS_CS_STALL_BIT, },
248 { .iris = PIPE_CONTROL_FLUSH_HDC, .ds = INTEL_DS_HDC_PIPELINE_FLUSH_BIT, },
249 { .iris = PIPE_CONTROL_STALL_AT_SCOREBOARD, .ds = INTEL_DS_STALL_AT_SCOREBOARD_BIT, },
250 { .iris = PIPE_CONTROL_UNTYPED_DATAPORT_CACHE_FLUSH, .ds = INTEL_DS_UNTYPED_DATAPORT_CACHE_FLUSH_BIT, },
251 { .iris = PIPE_CONTROL_CCS_CACHE_FLUSH, .ds = INTEL_DS_CCS_CACHE_FLUSH_BIT, },
252 };
253
254 enum intel_ds_stall_flag ret = 0;
255 for (uint32_t i = 0; i < ARRAY_SIZE(iris_to_ds_flags); i++) {
256 if (iris_to_ds_flags[i].iris & flags)
257 ret |= iris_to_ds_flags[i].ds;
258 }
259
260 assert(ret != 0);
261
262 return ret;
263 }
264