xref: /aosp_15_r20/external/igt-gpu-tools/benchmarks/gem_exec_trace.c (revision d83cc019efdc2edc6c4b16e9034a3ceb8d35d77c)
1 /*
2  * Copyright © 2011 Intel Corporation
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21  * IN THE SOFTWARE.
22  *
23  * Authors:
24  *    Chris Wilson <[email protected]>
25  *
26  */
27 
28 #include <unistd.h>
29 #include <stdlib.h>
30 #include <stdint.h>
31 #include <stdio.h>
32 #include <string.h>
33 #include <fcntl.h>
34 #include <inttypes.h>
35 #include <errno.h>
36 #include <sys/stat.h>
37 #include <sys/ioctl.h>
38 #include <sys/time.h>
39 #include <time.h>
40 #include <assert.h>
41 
42 #include "drm.h"
43 #include "ioctl_wrappers.h"
44 #include "drmtest.h"
45 #include "intel_io.h"
46 #include "igt_stats.h"
47 
48 enum {
49 	ADD_BO = 0,
50 	DEL_BO,
51 	ADD_CTX,
52 	DEL_CTX,
53 	EXEC,
54 	WAIT,
55 };
56 
57 struct trace_add_bo {
58 	uint32_t handle;
59 	uint64_t size;
60 } __attribute__((packed));
61 
62 struct trace_del_bo {
63 	uint32_t handle;
64 } __attribute__((packed));
65 
66 struct trace_add_ctx {
67 	uint32_t handle;
68 } __attribute__((packed));
69 
70 struct trace_del_ctx {
71 	uint32_t handle;
72 } __attribute__((packed));
73 
74 struct trace_exec {
75 	uint32_t object_count;
76 	uint64_t flags;
77 	uint32_t context;
78 }__attribute__((packed));
79 
80 struct trace_exec_object {
81 	uint32_t handle;
82 	uint32_t relocation_count;
83 	uint64_t alignment;
84 	uint64_t offset;
85 	uint64_t flags;
86 	uint64_t rsvd1;
87 	uint64_t rsvd2;
88 }__attribute__((packed));
89 
90 struct trace_wait {
91 	uint32_t handle;
92 } __attribute__((packed));
93 
hars_petruska_f54_1_random(void)94 static uint32_t hars_petruska_f54_1_random(void)
95 {
96 	static uint32_t state = 0x12345678;
97 
98 #define rol(x,k) ((x << k) | (x >> (32-k)))
99 	return state = (state ^ rol (state, 5) ^ rol (state, 24)) + 0x37798849;
100 #undef rol
101 }
102 
elapsed(const struct timespec * start,const struct timespec * end)103 static double elapsed(const struct timespec *start, const struct timespec *end)
104 {
105 	return 1e3*(end->tv_sec - start->tv_sec) + 1e-6*(end->tv_nsec - start->tv_nsec);
106 }
107 
__gem_context_create_local(int fd)108 static uint32_t __gem_context_create_local(int fd)
109 {
110 	struct drm_i915_gem_context_create arg = {};
111 	drmIoctl(fd, DRM_IOCTL_I915_GEM_CONTEXT_CREATE, &arg);
112 	return arg.ctx_id;
113 }
114 
replay(const char * filename,long nop,long range)115 static double replay(const char *filename, long nop, long range)
116 {
117 	struct timespec t_start, t_end;
118 	struct drm_i915_gem_execbuffer2 eb = {};
119 	const struct trace_version {
120 		uint32_t magic;
121 		uint32_t version;
122 	} *tv;
123 	const uint32_t bbe = 0xa << 23;
124 	struct drm_i915_gem_exec_object2 *exec_objects = NULL;
125 	uint32_t *bo, *ctx;
126 	int num_bo, num_ctx;
127 	int max_objects = 0;
128 	struct stat st;
129 	uint8_t *ptr, *end;
130 	int fd;
131 
132 	fd = open(filename, O_RDONLY);
133 	if (fd < 0)
134 		return -1;
135 
136 	if (fstat(fd, &st) < 0) {
137 		close(fd);
138 		return -1;
139 	}
140 
141 	ptr = mmap(0, st.st_size, PROT_WRITE, MAP_PRIVATE, fd, 0);
142 	close(fd);
143 
144 	if (ptr == MAP_FAILED)
145 		return -1;
146 
147 	madvise(ptr, st.st_size, MADV_SEQUENTIAL);
148 	end = ptr + st.st_size;
149 
150 	tv = (struct trace_version *)ptr;
151 	if (tv->magic != 0xdeadbeef) {
152 		fprintf(stderr, "%s: invalid magic\n", filename);
153 		return -1;
154 	}
155 	if (tv->version != 1) {
156 		fprintf(stderr, "%s: unhandled version %d\n",
157 			filename, tv->version);
158 		return -1;
159 	}
160 	ptr = (void *)(tv + 1);
161 
162 	ctx = calloc(1024, sizeof(*ctx));
163 	num_ctx = 1024;
164 
165 	bo = calloc(4096, sizeof(*bo));
166 	num_bo = 4096;
167 
168 	fd = drm_open_driver(DRIVER_INTEL);
169 	if (nop > 0) {
170 		bo[0] = gem_create(fd, nop + range);
171 		gem_write(fd, bo[0], nop + range - sizeof(bbe),
172 			  &bbe, sizeof(bbe));
173 		range *= 2;
174 		range -= 64;
175 	} else {
176 		bo[0] = gem_create(fd, 4096);
177 		gem_write(fd, bo[0], 0, &bbe, sizeof(bbe));
178 	}
179 
180 	clock_gettime(CLOCK_MONOTONIC, &t_start);
181 	do switch (*ptr++) {
182 	case ADD_BO:
183 		{
184 			struct trace_add_bo *t = (void *)ptr;
185 			ptr = (void *)(t + 1);
186 
187 			if (t->handle >= num_bo) {
188 				int new_bo = ALIGN(t->handle, 4096);
189 				bo = realloc(bo, sizeof(*bo)*new_bo);
190 				memset(bo + num_bo, 0, sizeof(*bo)*(new_bo - num_bo));
191 				num_bo = new_bo;
192 			}
193 
194 			bo[t->handle] = gem_create(fd, t->size);
195 			break;
196 		}
197 	case DEL_BO:
198 		{
199 			struct trace_del_bo *t = (void *)ptr;
200 			ptr = (void *)(t + 1);
201 
202 			assert(t->handle && t->handle < num_bo && bo[t->handle]);
203 			gem_close(fd, bo[t->handle]);
204 			bo[t->handle] = 0;
205 			break;
206 		}
207 	case ADD_CTX:
208 		{
209 			struct trace_add_ctx *t = (void *)ptr;
210 			ptr = (void *)(t + 1);
211 
212 			if (t->handle >= num_ctx) {
213 				int new_ctx = ALIGN(t->handle, 1024);
214 				ctx = realloc(ctx, sizeof(*ctx)*new_ctx);
215 				memset(ctx + num_ctx, 0, sizeof(*ctx)*(new_ctx - num_ctx));
216 				num_ctx = new_ctx;
217 			}
218 
219 			ctx[t->handle] = __gem_context_create_local(fd);
220 			break;
221 		}
222 	case DEL_CTX:
223 		{
224 			struct trace_del_ctx *t = (void *)ptr;
225 			ptr = (void *)(t + 1);
226 
227 			assert(t->handle < num_ctx && ctx[t->handle]);
228 			gem_context_destroy(fd, ctx[t->handle]);
229 			ctx[t->handle] = 0;
230 			break;
231 		}
232 	case EXEC:
233 		{
234 			struct trace_exec *t = (void *)ptr;
235 			ptr = (void *)(t + 1);
236 
237 			eb.buffer_count = t->object_count;
238 			eb.flags = t->flags;
239 			eb.rsvd1 = ctx[t->context];
240 
241 			if (eb.buffer_count >= max_objects) {
242 				free(exec_objects);
243 
244 				max_objects = ALIGN(eb.buffer_count + 1, 4096);
245 
246 				exec_objects = malloc(max_objects*sizeof(*exec_objects));
247 				eb.buffers_ptr = (uintptr_t)exec_objects;
248 			}
249 
250 			for (uint32_t i = 0; i < eb.buffer_count; i++) {
251 				struct trace_exec_object *to = (void *)ptr;
252 				ptr = (void *)(to + 1);
253 
254 				exec_objects[i].handle = bo[to->handle];
255 				exec_objects[i].alignment = to->alignment;
256 				exec_objects[i].offset = to->offset;
257 				exec_objects[i].flags = to->flags;
258 				exec_objects[i].rsvd1 = to->rsvd1;
259 				exec_objects[i].rsvd2 = to->rsvd2;
260 
261 				exec_objects[i].relocation_count = to->relocation_count;
262 				exec_objects[i].relocs_ptr = (uintptr_t)ptr;
263 
264 				if (!(eb.flags & I915_EXEC_HANDLE_LUT)) {
265 					struct drm_i915_gem_relocation_entry *relocs =
266 						(struct drm_i915_gem_relocation_entry *)ptr;
267 					for (uint32_t j = 0; j < to->relocation_count; j++)
268 						relocs[j].target_handle = bo[relocs[j].target_handle];
269 				}
270 
271 				ptr += sizeof(struct drm_i915_gem_relocation_entry) * to->relocation_count;
272 			}
273 
274 			((struct drm_i915_gem_exec_object2 *)
275 			 memset(&exec_objects[eb.buffer_count++], 0,
276 				sizeof(*exec_objects)))->handle = bo[0];
277 
278 			if (nop > 0) {
279 				eb.batch_start_offset = hars_petruska_f54_1_random();
280 				eb.batch_start_offset =
281 					((uint64_t)eb.batch_start_offset * range) >> 32;
282 				eb.batch_start_offset = ALIGN(eb.batch_start_offset, 64);
283 			}
284 			gem_execbuf(fd, &eb);
285 			break;
286 		}
287 
288 	case WAIT:
289 		{
290 			struct trace_wait *t = (void *)ptr;
291 			ptr = (void *)(t + 1);
292 
293 			assert(t->handle && t->handle < num_bo && bo[t->handle]);
294 			gem_wait(fd, bo[t->handle], NULL);
295 			break;
296 		}
297 
298 	default:
299 		fprintf(stderr, "Unknown cmd: %x\n", *ptr);
300 		return -1;
301 	} while (ptr < end);
302 	clock_gettime(CLOCK_MONOTONIC, &t_end);
303 
304 	return elapsed(&t_start, &t_end);
305 }
306 
calibrate_nop(int usecs)307 static long calibrate_nop(int usecs)
308 {
309 	const uint32_t bbe = 0xa << 23;
310 	int fd = drm_open_driver(DRIVER_INTEL);
311 	struct drm_i915_gem_exec_object2 obj = {};
312 	struct drm_i915_gem_execbuffer2 eb = { .buffer_count = 1, .buffers_ptr = (uintptr_t)&obj};
313 	unsigned long size, last_size;
314 
315 	size = 256*1024;
316 	do {
317 		struct timespec t_start, t_end;
318 
319 		obj.handle = gem_create(fd, size);
320 		gem_write(fd, obj.handle, size - sizeof(bbe), &bbe, sizeof(bbe));
321 		gem_execbuf(fd, &eb);
322 		gem_sync(fd, obj.handle);
323 
324 		clock_gettime(CLOCK_MONOTONIC, &t_start);
325 		for (int loop = 0; loop < 9; loop++)
326 			gem_execbuf(fd, &eb);
327 		gem_sync(fd, obj.handle);
328 		clock_gettime(CLOCK_MONOTONIC, &t_end);
329 
330 		gem_close(fd, obj.handle);
331 
332 		last_size = size;
333 		size = 9e-3*usecs / elapsed(&t_start, &t_end) * size;
334 		size = ALIGN(size, 4096);
335 	} while (size != last_size);
336 
337 	close(fd);
338 	return size;
339 }
340 
measure_nop(long nop)341 static int measure_nop(long nop)
342 {
343 	const uint32_t bbe = 0xa << 23;
344 	int fd = drm_open_driver(DRIVER_INTEL);
345 	struct drm_i915_gem_exec_object2 obj = {};
346 	struct drm_i915_gem_execbuffer2 eb = { .buffer_count = 1, .buffers_ptr = (uintptr_t)&obj};
347 	struct timespec t_start, t_end;
348 
349 	obj.handle = gem_create(fd, nop);
350 	gem_write(fd, obj.handle, nop - sizeof(bbe), &bbe, sizeof(bbe));
351 	gem_execbuf(fd, &eb);
352 	gem_sync(fd, obj.handle);
353 
354 	clock_gettime(CLOCK_MONOTONIC, &t_start);
355 	for (int loop = 0; loop < 9; loop++)
356 		gem_execbuf(fd, &eb);
357 	gem_sync(fd, obj.handle);
358 	clock_gettime(CLOCK_MONOTONIC, &t_end);
359 
360 	gem_close(fd, obj.handle);
361 
362 	close(fd);
363 	return 1e3*elapsed(&t_start, &t_end) / 9;
364 }
365 
main(int argc,char ** argv)366 int main(int argc, char **argv)
367 {
368 	int delay = 1000;
369 	double *results;
370 	long nop = 0;
371 	long range = 0;
372 	int i, c;
373 
374 	results = mmap(NULL, ALIGN(argc*sizeof(double), 4096),
375 		       PROT_WRITE, MAP_SHARED | MAP_ANON, -1, 0);
376 
377 	while ((c = getopt(argc, argv, "d:n:r:")) != -1) {
378 		switch (c) {
379 		case 'd':
380 			delay = atoi(optarg);
381 			break;
382 		case 'n':
383 			nop = strtol(optarg, NULL, 0);
384 			if (nop > 0)
385 				nop = ALIGN(nop, 4096);
386 			break;
387 		case 'r':
388 			range = strtol(optarg, NULL, 0);
389 			if (range > 0)
390 				range = ALIGN(range, 4096);
391 			break;
392 		default:
393 			break;
394 		}
395 	}
396 
397 	if (!nop)
398 		nop = calibrate_nop(delay);
399 	if (!range)
400 		range = nop / 2;
401 	if (nop > 0) {
402 		delay = measure_nop(nop);
403 		printf("Using %lu nop batch for ~%dus delay, range %lu [%dus]\n",
404 		       nop, delay,
405 		       range, (int)(delay * range / nop));
406 	}
407 
408 	igt_fork(child, argc-optind)
409 		results[child] = replay(argv[child + optind], nop, range);
410 	igt_waitchildren();
411 
412 	for (i = 0; i < argc - optind; i++) {
413 		double t = results[i];
414 		if (t < 0)
415 			printf("%s: failed\n", argv[optind + i]);
416 		else
417 			printf("%s: %.3f\n", argv[optind + i], t);
418 	}
419 
420 	return 0;
421 }
422