1 /*
2 * Copyright © 2011 Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 *
23 * Authors:
24 * Chris Wilson <[email protected]>
25 *
26 */
27
28 #include <unistd.h>
29 #include <stdlib.h>
30 #include <stdint.h>
31 #include <stdio.h>
32 #include <string.h>
33 #include <fcntl.h>
34 #include <inttypes.h>
35 #include <errno.h>
36 #include <sys/stat.h>
37 #include <sys/ioctl.h>
38 #include <sys/time.h>
39 #include <time.h>
40 #include <assert.h>
41
42 #include "drm.h"
43 #include "ioctl_wrappers.h"
44 #include "drmtest.h"
45 #include "intel_io.h"
46 #include "igt_stats.h"
47
48 enum {
49 ADD_BO = 0,
50 DEL_BO,
51 ADD_CTX,
52 DEL_CTX,
53 EXEC,
54 WAIT,
55 };
56
57 struct trace_add_bo {
58 uint32_t handle;
59 uint64_t size;
60 } __attribute__((packed));
61
62 struct trace_del_bo {
63 uint32_t handle;
64 } __attribute__((packed));
65
66 struct trace_add_ctx {
67 uint32_t handle;
68 } __attribute__((packed));
69
70 struct trace_del_ctx {
71 uint32_t handle;
72 } __attribute__((packed));
73
74 struct trace_exec {
75 uint32_t object_count;
76 uint64_t flags;
77 uint32_t context;
78 }__attribute__((packed));
79
80 struct trace_exec_object {
81 uint32_t handle;
82 uint32_t relocation_count;
83 uint64_t alignment;
84 uint64_t offset;
85 uint64_t flags;
86 uint64_t rsvd1;
87 uint64_t rsvd2;
88 }__attribute__((packed));
89
90 struct trace_wait {
91 uint32_t handle;
92 } __attribute__((packed));
93
hars_petruska_f54_1_random(void)94 static uint32_t hars_petruska_f54_1_random(void)
95 {
96 static uint32_t state = 0x12345678;
97
98 #define rol(x,k) ((x << k) | (x >> (32-k)))
99 return state = (state ^ rol (state, 5) ^ rol (state, 24)) + 0x37798849;
100 #undef rol
101 }
102
elapsed(const struct timespec * start,const struct timespec * end)103 static double elapsed(const struct timespec *start, const struct timespec *end)
104 {
105 return 1e3*(end->tv_sec - start->tv_sec) + 1e-6*(end->tv_nsec - start->tv_nsec);
106 }
107
__gem_context_create_local(int fd)108 static uint32_t __gem_context_create_local(int fd)
109 {
110 struct drm_i915_gem_context_create arg = {};
111 drmIoctl(fd, DRM_IOCTL_I915_GEM_CONTEXT_CREATE, &arg);
112 return arg.ctx_id;
113 }
114
replay(const char * filename,long nop,long range)115 static double replay(const char *filename, long nop, long range)
116 {
117 struct timespec t_start, t_end;
118 struct drm_i915_gem_execbuffer2 eb = {};
119 const struct trace_version {
120 uint32_t magic;
121 uint32_t version;
122 } *tv;
123 const uint32_t bbe = 0xa << 23;
124 struct drm_i915_gem_exec_object2 *exec_objects = NULL;
125 uint32_t *bo, *ctx;
126 int num_bo, num_ctx;
127 int max_objects = 0;
128 struct stat st;
129 uint8_t *ptr, *end;
130 int fd;
131
132 fd = open(filename, O_RDONLY);
133 if (fd < 0)
134 return -1;
135
136 if (fstat(fd, &st) < 0) {
137 close(fd);
138 return -1;
139 }
140
141 ptr = mmap(0, st.st_size, PROT_WRITE, MAP_PRIVATE, fd, 0);
142 close(fd);
143
144 if (ptr == MAP_FAILED)
145 return -1;
146
147 madvise(ptr, st.st_size, MADV_SEQUENTIAL);
148 end = ptr + st.st_size;
149
150 tv = (struct trace_version *)ptr;
151 if (tv->magic != 0xdeadbeef) {
152 fprintf(stderr, "%s: invalid magic\n", filename);
153 return -1;
154 }
155 if (tv->version != 1) {
156 fprintf(stderr, "%s: unhandled version %d\n",
157 filename, tv->version);
158 return -1;
159 }
160 ptr = (void *)(tv + 1);
161
162 ctx = calloc(1024, sizeof(*ctx));
163 num_ctx = 1024;
164
165 bo = calloc(4096, sizeof(*bo));
166 num_bo = 4096;
167
168 fd = drm_open_driver(DRIVER_INTEL);
169 if (nop > 0) {
170 bo[0] = gem_create(fd, nop + range);
171 gem_write(fd, bo[0], nop + range - sizeof(bbe),
172 &bbe, sizeof(bbe));
173 range *= 2;
174 range -= 64;
175 } else {
176 bo[0] = gem_create(fd, 4096);
177 gem_write(fd, bo[0], 0, &bbe, sizeof(bbe));
178 }
179
180 clock_gettime(CLOCK_MONOTONIC, &t_start);
181 do switch (*ptr++) {
182 case ADD_BO:
183 {
184 struct trace_add_bo *t = (void *)ptr;
185 ptr = (void *)(t + 1);
186
187 if (t->handle >= num_bo) {
188 int new_bo = ALIGN(t->handle, 4096);
189 bo = realloc(bo, sizeof(*bo)*new_bo);
190 memset(bo + num_bo, 0, sizeof(*bo)*(new_bo - num_bo));
191 num_bo = new_bo;
192 }
193
194 bo[t->handle] = gem_create(fd, t->size);
195 break;
196 }
197 case DEL_BO:
198 {
199 struct trace_del_bo *t = (void *)ptr;
200 ptr = (void *)(t + 1);
201
202 assert(t->handle && t->handle < num_bo && bo[t->handle]);
203 gem_close(fd, bo[t->handle]);
204 bo[t->handle] = 0;
205 break;
206 }
207 case ADD_CTX:
208 {
209 struct trace_add_ctx *t = (void *)ptr;
210 ptr = (void *)(t + 1);
211
212 if (t->handle >= num_ctx) {
213 int new_ctx = ALIGN(t->handle, 1024);
214 ctx = realloc(ctx, sizeof(*ctx)*new_ctx);
215 memset(ctx + num_ctx, 0, sizeof(*ctx)*(new_ctx - num_ctx));
216 num_ctx = new_ctx;
217 }
218
219 ctx[t->handle] = __gem_context_create_local(fd);
220 break;
221 }
222 case DEL_CTX:
223 {
224 struct trace_del_ctx *t = (void *)ptr;
225 ptr = (void *)(t + 1);
226
227 assert(t->handle < num_ctx && ctx[t->handle]);
228 gem_context_destroy(fd, ctx[t->handle]);
229 ctx[t->handle] = 0;
230 break;
231 }
232 case EXEC:
233 {
234 struct trace_exec *t = (void *)ptr;
235 ptr = (void *)(t + 1);
236
237 eb.buffer_count = t->object_count;
238 eb.flags = t->flags;
239 eb.rsvd1 = ctx[t->context];
240
241 if (eb.buffer_count >= max_objects) {
242 free(exec_objects);
243
244 max_objects = ALIGN(eb.buffer_count + 1, 4096);
245
246 exec_objects = malloc(max_objects*sizeof(*exec_objects));
247 eb.buffers_ptr = (uintptr_t)exec_objects;
248 }
249
250 for (uint32_t i = 0; i < eb.buffer_count; i++) {
251 struct trace_exec_object *to = (void *)ptr;
252 ptr = (void *)(to + 1);
253
254 exec_objects[i].handle = bo[to->handle];
255 exec_objects[i].alignment = to->alignment;
256 exec_objects[i].offset = to->offset;
257 exec_objects[i].flags = to->flags;
258 exec_objects[i].rsvd1 = to->rsvd1;
259 exec_objects[i].rsvd2 = to->rsvd2;
260
261 exec_objects[i].relocation_count = to->relocation_count;
262 exec_objects[i].relocs_ptr = (uintptr_t)ptr;
263
264 if (!(eb.flags & I915_EXEC_HANDLE_LUT)) {
265 struct drm_i915_gem_relocation_entry *relocs =
266 (struct drm_i915_gem_relocation_entry *)ptr;
267 for (uint32_t j = 0; j < to->relocation_count; j++)
268 relocs[j].target_handle = bo[relocs[j].target_handle];
269 }
270
271 ptr += sizeof(struct drm_i915_gem_relocation_entry) * to->relocation_count;
272 }
273
274 ((struct drm_i915_gem_exec_object2 *)
275 memset(&exec_objects[eb.buffer_count++], 0,
276 sizeof(*exec_objects)))->handle = bo[0];
277
278 if (nop > 0) {
279 eb.batch_start_offset = hars_petruska_f54_1_random();
280 eb.batch_start_offset =
281 ((uint64_t)eb.batch_start_offset * range) >> 32;
282 eb.batch_start_offset = ALIGN(eb.batch_start_offset, 64);
283 }
284 gem_execbuf(fd, &eb);
285 break;
286 }
287
288 case WAIT:
289 {
290 struct trace_wait *t = (void *)ptr;
291 ptr = (void *)(t + 1);
292
293 assert(t->handle && t->handle < num_bo && bo[t->handle]);
294 gem_wait(fd, bo[t->handle], NULL);
295 break;
296 }
297
298 default:
299 fprintf(stderr, "Unknown cmd: %x\n", *ptr);
300 return -1;
301 } while (ptr < end);
302 clock_gettime(CLOCK_MONOTONIC, &t_end);
303
304 return elapsed(&t_start, &t_end);
305 }
306
calibrate_nop(int usecs)307 static long calibrate_nop(int usecs)
308 {
309 const uint32_t bbe = 0xa << 23;
310 int fd = drm_open_driver(DRIVER_INTEL);
311 struct drm_i915_gem_exec_object2 obj = {};
312 struct drm_i915_gem_execbuffer2 eb = { .buffer_count = 1, .buffers_ptr = (uintptr_t)&obj};
313 unsigned long size, last_size;
314
315 size = 256*1024;
316 do {
317 struct timespec t_start, t_end;
318
319 obj.handle = gem_create(fd, size);
320 gem_write(fd, obj.handle, size - sizeof(bbe), &bbe, sizeof(bbe));
321 gem_execbuf(fd, &eb);
322 gem_sync(fd, obj.handle);
323
324 clock_gettime(CLOCK_MONOTONIC, &t_start);
325 for (int loop = 0; loop < 9; loop++)
326 gem_execbuf(fd, &eb);
327 gem_sync(fd, obj.handle);
328 clock_gettime(CLOCK_MONOTONIC, &t_end);
329
330 gem_close(fd, obj.handle);
331
332 last_size = size;
333 size = 9e-3*usecs / elapsed(&t_start, &t_end) * size;
334 size = ALIGN(size, 4096);
335 } while (size != last_size);
336
337 close(fd);
338 return size;
339 }
340
measure_nop(long nop)341 static int measure_nop(long nop)
342 {
343 const uint32_t bbe = 0xa << 23;
344 int fd = drm_open_driver(DRIVER_INTEL);
345 struct drm_i915_gem_exec_object2 obj = {};
346 struct drm_i915_gem_execbuffer2 eb = { .buffer_count = 1, .buffers_ptr = (uintptr_t)&obj};
347 struct timespec t_start, t_end;
348
349 obj.handle = gem_create(fd, nop);
350 gem_write(fd, obj.handle, nop - sizeof(bbe), &bbe, sizeof(bbe));
351 gem_execbuf(fd, &eb);
352 gem_sync(fd, obj.handle);
353
354 clock_gettime(CLOCK_MONOTONIC, &t_start);
355 for (int loop = 0; loop < 9; loop++)
356 gem_execbuf(fd, &eb);
357 gem_sync(fd, obj.handle);
358 clock_gettime(CLOCK_MONOTONIC, &t_end);
359
360 gem_close(fd, obj.handle);
361
362 close(fd);
363 return 1e3*elapsed(&t_start, &t_end) / 9;
364 }
365
main(int argc,char ** argv)366 int main(int argc, char **argv)
367 {
368 int delay = 1000;
369 double *results;
370 long nop = 0;
371 long range = 0;
372 int i, c;
373
374 results = mmap(NULL, ALIGN(argc*sizeof(double), 4096),
375 PROT_WRITE, MAP_SHARED | MAP_ANON, -1, 0);
376
377 while ((c = getopt(argc, argv, "d:n:r:")) != -1) {
378 switch (c) {
379 case 'd':
380 delay = atoi(optarg);
381 break;
382 case 'n':
383 nop = strtol(optarg, NULL, 0);
384 if (nop > 0)
385 nop = ALIGN(nop, 4096);
386 break;
387 case 'r':
388 range = strtol(optarg, NULL, 0);
389 if (range > 0)
390 range = ALIGN(range, 4096);
391 break;
392 default:
393 break;
394 }
395 }
396
397 if (!nop)
398 nop = calibrate_nop(delay);
399 if (!range)
400 range = nop / 2;
401 if (nop > 0) {
402 delay = measure_nop(nop);
403 printf("Using %lu nop batch for ~%dus delay, range %lu [%dus]\n",
404 nop, delay,
405 range, (int)(delay * range / nop));
406 }
407
408 igt_fork(child, argc-optind)
409 results[child] = replay(argv[child + optind], nop, range);
410 igt_waitchildren();
411
412 for (i = 0; i < argc - optind; i++) {
413 double t = results[i];
414 if (t < 0)
415 printf("%s: failed\n", argv[optind + i]);
416 else
417 printf("%s: %.3f\n", argv[optind + i], t);
418 }
419
420 return 0;
421 }
422