1 /*
2 * Copyright © 2016 Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 */
23
24 #include "config.h"
25
26 #include <sys/poll.h>
27 #include <sys/ioctl.h>
28 #include <sched.h>
29 #include <signal.h>
30
31 #include "igt.h"
32 #include "igt_gpu_power.h"
33 #include "igt_rand.h"
34 #include "igt_sysfs.h"
35 #include "igt_vgem.h"
36 #include "i915/gem_ring.h"
37
38 #define LO 0
39 #define HI 1
40 #define NOISE 2
41
42 #define MAX_PRIO LOCAL_I915_CONTEXT_MAX_USER_PRIORITY
43 #define MIN_PRIO LOCAL_I915_CONTEXT_MIN_USER_PRIORITY
44
45 #define MAX_ELSP_QLEN 16
46
47 #define MAX_ENGINES 16
48
49 #define MAX_CONTEXTS 1024
50
51 #define LOCAL_I915_EXEC_BSD_SHIFT (13)
52 #define LOCAL_I915_EXEC_BSD_MASK (3 << LOCAL_I915_EXEC_BSD_SHIFT)
53 #define ENGINE_MASK (I915_EXEC_RING_MASK | LOCAL_I915_EXEC_BSD_MASK)
54
55 #define MI_SEMAPHORE_WAIT (0x1c << 23)
56 #define MI_SEMAPHORE_POLL (1 << 15)
57 #define MI_SEMAPHORE_SAD_GT_SDD (0 << 12)
58 #define MI_SEMAPHORE_SAD_GTE_SDD (1 << 12)
59 #define MI_SEMAPHORE_SAD_LT_SDD (2 << 12)
60 #define MI_SEMAPHORE_SAD_LTE_SDD (3 << 12)
61 #define MI_SEMAPHORE_SAD_EQ_SDD (4 << 12)
62 #define MI_SEMAPHORE_SAD_NEQ_SDD (5 << 12)
63
64 IGT_TEST_DESCRIPTION("Check that we can control the order of execution");
65
66 static inline
__sync_read_u32(int fd,uint32_t handle,uint64_t offset)67 uint32_t __sync_read_u32(int fd, uint32_t handle, uint64_t offset)
68 {
69 uint32_t value;
70
71 gem_set_domain(fd, handle, /* No write hazard lies! */
72 I915_GEM_DOMAIN_GTT, I915_GEM_DOMAIN_GTT);
73 gem_read(fd, handle, offset, &value, sizeof(value));
74
75 return value;
76 }
77
78 static inline
__sync_read_u32_count(int fd,uint32_t handle,uint32_t * dst,uint64_t size)79 void __sync_read_u32_count(int fd, uint32_t handle, uint32_t *dst, uint64_t size)
80 {
81 gem_set_domain(fd, handle, /* No write hazard lies! */
82 I915_GEM_DOMAIN_GTT, I915_GEM_DOMAIN_GTT);
83 gem_read(fd, handle, 0, dst, size);
84 }
85
__store_dword(int fd,uint32_t ctx,unsigned ring,uint32_t target,uint32_t offset,uint32_t value,uint32_t cork,unsigned write_domain)86 static uint32_t __store_dword(int fd, uint32_t ctx, unsigned ring,
87 uint32_t target, uint32_t offset, uint32_t value,
88 uint32_t cork, unsigned write_domain)
89 {
90 const int gen = intel_gen(intel_get_drm_devid(fd));
91 struct drm_i915_gem_exec_object2 obj[3];
92 struct drm_i915_gem_relocation_entry reloc;
93 struct drm_i915_gem_execbuffer2 execbuf;
94 uint32_t batch[16];
95 int i;
96
97 memset(&execbuf, 0, sizeof(execbuf));
98 execbuf.buffers_ptr = to_user_pointer(obj + !cork);
99 execbuf.buffer_count = 2 + !!cork;
100 execbuf.flags = ring;
101 if (gen < 6)
102 execbuf.flags |= I915_EXEC_SECURE;
103 execbuf.rsvd1 = ctx;
104
105 memset(obj, 0, sizeof(obj));
106 obj[0].handle = cork;
107 obj[1].handle = target;
108 obj[2].handle = gem_create(fd, 4096);
109
110 memset(&reloc, 0, sizeof(reloc));
111 reloc.target_handle = obj[1].handle;
112 reloc.presumed_offset = 0;
113 reloc.offset = sizeof(uint32_t);
114 reloc.delta = offset;
115 reloc.read_domains = I915_GEM_DOMAIN_INSTRUCTION;
116 reloc.write_domain = write_domain;
117 obj[2].relocs_ptr = to_user_pointer(&reloc);
118 obj[2].relocation_count = 1;
119
120 i = 0;
121 batch[i] = MI_STORE_DWORD_IMM | (gen < 6 ? 1 << 22 : 0);
122 if (gen >= 8) {
123 batch[++i] = offset;
124 batch[++i] = 0;
125 } else if (gen >= 4) {
126 batch[++i] = 0;
127 batch[++i] = offset;
128 reloc.offset += sizeof(uint32_t);
129 } else {
130 batch[i]--;
131 batch[++i] = offset;
132 }
133 batch[++i] = value;
134 batch[++i] = MI_BATCH_BUFFER_END;
135 gem_write(fd, obj[2].handle, 0, batch, sizeof(batch));
136 gem_execbuf(fd, &execbuf);
137
138 return obj[2].handle;
139 }
140
store_dword(int fd,uint32_t ctx,unsigned ring,uint32_t target,uint32_t offset,uint32_t value,uint32_t cork,unsigned write_domain)141 static void store_dword(int fd, uint32_t ctx, unsigned ring,
142 uint32_t target, uint32_t offset, uint32_t value,
143 uint32_t cork, unsigned write_domain)
144 {
145 gem_close(fd, __store_dword(fd, ctx, ring,
146 target, offset, value,
147 cork, write_domain));
148 }
149
create_highest_priority(int fd)150 static uint32_t create_highest_priority(int fd)
151 {
152 uint32_t ctx = gem_context_create(fd);
153
154 /*
155 * If there is no priority support, all contexts will have equal
156 * priority (and therefore the max user priority), so no context
157 * can overtake us, and we effectively can form a plug.
158 */
159 __gem_context_set_priority(fd, ctx, MAX_PRIO);
160
161 return ctx;
162 }
163
unplug_show_queue(int fd,struct igt_cork * c,unsigned int engine)164 static void unplug_show_queue(int fd, struct igt_cork *c, unsigned int engine)
165 {
166 igt_spin_t *spin[MAX_ELSP_QLEN];
167 int max = MAX_ELSP_QLEN;
168
169 /* If no scheduler, all batches are emitted in submission order */
170 if (!gem_scheduler_enabled(fd))
171 max = 1;
172
173 for (int n = 0; n < max; n++) {
174 const struct igt_spin_factory opts = {
175 .ctx = create_highest_priority(fd),
176 .engine = engine,
177 };
178 spin[n] = __igt_spin_factory(fd, &opts);
179 gem_context_destroy(fd, opts.ctx);
180 }
181
182 igt_cork_unplug(c); /* batches will now be queued on the engine */
183 igt_debugfs_dump(fd, "i915_engine_info");
184
185 for (int n = 0; n < max; n++)
186 igt_spin_free(fd, spin[n]);
187
188 }
189
fifo(int fd,unsigned ring)190 static void fifo(int fd, unsigned ring)
191 {
192 IGT_CORK_HANDLE(cork);
193 uint32_t scratch, plug;
194 uint32_t result;
195
196 scratch = gem_create(fd, 4096);
197
198 plug = igt_cork_plug(&cork, fd);
199
200 /* Same priority, same timeline, final result will be the second eb */
201 store_dword(fd, 0, ring, scratch, 0, 1, plug, 0);
202 store_dword(fd, 0, ring, scratch, 0, 2, plug, 0);
203
204 unplug_show_queue(fd, &cork, ring);
205 gem_close(fd, plug);
206
207 result = __sync_read_u32(fd, scratch, 0);
208 gem_close(fd, scratch);
209
210 igt_assert_eq_u32(result, 2);
211 }
212
independent(int fd,unsigned int engine)213 static void independent(int fd, unsigned int engine)
214 {
215 IGT_CORK_HANDLE(cork);
216 uint32_t scratch, plug, batch;
217 igt_spin_t *spin = NULL;
218 unsigned int other;
219 uint32_t *ptr;
220
221 igt_require(engine != 0);
222
223 scratch = gem_create(fd, 4096);
224 ptr = gem_mmap__gtt(fd, scratch, 4096, PROT_READ);
225 igt_assert_eq(ptr[0], 0);
226
227 plug = igt_cork_plug(&cork, fd);
228
229 /* Check that we can submit to engine while all others are blocked */
230 for_each_physical_engine(fd, other) {
231 if (other == engine)
232 continue;
233
234 if (!gem_can_store_dword(fd, other))
235 continue;
236
237 if (spin == NULL) {
238 spin = __igt_spin_new(fd, .engine = other);
239 } else {
240 struct drm_i915_gem_execbuffer2 eb = {
241 .buffer_count = 1,
242 .buffers_ptr = to_user_pointer(&spin->obj[IGT_SPIN_BATCH]),
243 .flags = other,
244 };
245 gem_execbuf(fd, &eb);
246 }
247
248 store_dword(fd, 0, other, scratch, 0, other, plug, 0);
249 }
250 igt_require(spin);
251
252 /* Same priority, but different timeline (as different engine) */
253 batch = __store_dword(fd, 0, engine, scratch, 0, engine, plug, 0);
254
255 unplug_show_queue(fd, &cork, engine);
256 gem_close(fd, plug);
257
258 gem_sync(fd, batch);
259 igt_assert(!gem_bo_busy(fd, batch));
260 igt_assert(gem_bo_busy(fd, spin->handle));
261 gem_close(fd, batch);
262
263 /* Only the local engine should be free to complete. */
264 igt_assert(gem_bo_busy(fd, scratch));
265 igt_assert_eq(ptr[0], engine);
266
267 igt_spin_free(fd, spin);
268 gem_quiescent_gpu(fd);
269
270 /* And we expect the others to have overwritten us, order unspecified */
271 igt_assert(!gem_bo_busy(fd, scratch));
272 igt_assert_neq(ptr[0], engine);
273
274 munmap(ptr, 4096);
275 gem_close(fd, scratch);
276 }
277
smoketest(int fd,unsigned ring,unsigned timeout)278 static void smoketest(int fd, unsigned ring, unsigned timeout)
279 {
280 const int ncpus = sysconf(_SC_NPROCESSORS_ONLN);
281 unsigned engines[MAX_ENGINES];
282 unsigned nengine;
283 unsigned engine;
284 uint32_t scratch;
285 uint32_t result[2 * ncpus];
286
287 nengine = 0;
288 if (ring == ALL_ENGINES) {
289 for_each_physical_engine(fd, engine)
290 if (gem_can_store_dword(fd, engine))
291 engines[nengine++] = engine;
292 } else {
293 if (gem_can_store_dword(fd, ring))
294 engines[nengine++] = ring;
295 }
296 igt_require(nengine);
297
298 scratch = gem_create(fd, 4096);
299 igt_fork(child, ncpus) {
300 unsigned long count = 0;
301 uint32_t ctx;
302
303 hars_petruska_f54_1_random_perturb(child);
304
305 ctx = gem_context_create(fd);
306 igt_until_timeout(timeout) {
307 int prio;
308
309 prio = hars_petruska_f54_1_random_unsafe_max(MAX_PRIO - MIN_PRIO) + MIN_PRIO;
310 gem_context_set_priority(fd, ctx, prio);
311
312 engine = engines[hars_petruska_f54_1_random_unsafe_max(nengine)];
313 store_dword(fd, ctx, engine, scratch,
314 8*child + 0, ~child,
315 0, 0);
316 for (unsigned int step = 0; step < 8; step++)
317 store_dword(fd, ctx, engine, scratch,
318 8*child + 4, count++,
319 0, 0);
320 }
321 gem_context_destroy(fd, ctx);
322 }
323 igt_waitchildren();
324
325 __sync_read_u32_count(fd, scratch, result, sizeof(result));
326 gem_close(fd, scratch);
327
328 for (unsigned n = 0; n < ncpus; n++) {
329 igt_assert_eq_u32(result[2 * n], ~n);
330 /*
331 * Note this count is approximate due to unconstrained
332 * ordering of the dword writes between engines.
333 *
334 * Take the result with a pinch of salt.
335 */
336 igt_info("Child[%d] completed %u cycles\n", n, result[(2 * n) + 1]);
337 }
338 }
339
__batch_create(int i915,uint32_t offset)340 static uint32_t __batch_create(int i915, uint32_t offset)
341 {
342 const uint32_t bbe = MI_BATCH_BUFFER_END;
343 uint32_t handle;
344
345 handle = gem_create(i915, ALIGN(offset + 4, 4096));
346 gem_write(i915, handle, offset, &bbe, sizeof(bbe));
347
348 return handle;
349 }
350
batch_create(int i915)351 static uint32_t batch_create(int i915)
352 {
353 return __batch_create(i915, 0);
354 }
355
semaphore_userlock(int i915)356 static void semaphore_userlock(int i915)
357 {
358 struct drm_i915_gem_exec_object2 obj = {
359 .handle = batch_create(i915),
360 };
361 igt_spin_t *spin = NULL;
362 unsigned int engine;
363 uint32_t scratch;
364
365 igt_require(gem_scheduler_has_semaphores(i915));
366
367 /*
368 * Given the use of semaphores to govern parallel submission
369 * of nearly-ready work to HW, we still want to run actually
370 * ready work immediately. Without semaphores, the dependent
371 * work wouldn't be submitted so our ready work will run.
372 */
373
374 scratch = gem_create(i915, 4096);
375 for_each_physical_engine(i915, engine) {
376 if (!spin) {
377 spin = igt_spin_new(i915,
378 .dependency = scratch,
379 .engine = engine);
380 } else {
381 uint64_t saved = spin->execbuf.flags;
382
383 spin->execbuf.flags &= ~ENGINE_MASK;
384 spin->execbuf.flags |= engine;
385
386 gem_execbuf(i915, &spin->execbuf);
387
388 spin->execbuf.flags = saved;
389 }
390 }
391 igt_require(spin);
392 gem_close(i915, scratch);
393
394 /*
395 * On all dependent engines, the request may be executing (busywaiting
396 * on a HW semaphore) but it should not prevent any real work from
397 * taking precedence.
398 */
399 scratch = gem_context_create(i915);
400 for_each_physical_engine(i915, engine) {
401 struct drm_i915_gem_execbuffer2 execbuf = {
402 .buffers_ptr = to_user_pointer(&obj),
403 .buffer_count = 1,
404 .flags = engine,
405 .rsvd1 = scratch,
406 };
407
408 if (engine == (spin->execbuf.flags & ENGINE_MASK))
409 continue;
410
411 gem_execbuf(i915, &execbuf);
412 }
413 gem_context_destroy(i915, scratch);
414 gem_sync(i915, obj.handle); /* to hang unless we can preempt */
415 gem_close(i915, obj.handle);
416
417 igt_spin_free(i915, spin);
418 }
419
semaphore_codependency(int i915)420 static void semaphore_codependency(int i915)
421 {
422 struct {
423 igt_spin_t *xcs, *rcs;
424 } task[2];
425 unsigned int engine;
426 int i;
427
428 /*
429 * Consider two tasks, task A runs on (xcs0, rcs0) and task B
430 * on (xcs1, rcs0). That is they must both run a dependent
431 * batch on rcs0, after first running in parallel on separate
432 * engines. To maximise throughput, we want the shorter xcs task
433 * to start on rcs first. However, if we insert semaphores we may
434 * pick wrongly and end up running the requests in the least
435 * optimal order.
436 */
437
438 i = 0;
439 for_each_physical_engine(i915, engine) {
440 uint32_t ctx;
441
442 if (engine == I915_EXEC_RENDER)
443 continue;
444
445 if (!gem_can_store_dword(i915, engine))
446 continue;
447
448 ctx = gem_context_create(i915);
449
450 task[i].xcs =
451 __igt_spin_new(i915,
452 .ctx = ctx,
453 .engine = engine,
454 .flags = IGT_SPIN_POLL_RUN);
455 igt_spin_busywait_until_started(task[i].xcs);
456
457 /* Common rcs tasks will be queued in FIFO */
458 task[i].rcs =
459 __igt_spin_new(i915,
460 .ctx = ctx,
461 .engine = I915_EXEC_RENDER,
462 .dependency = task[i].xcs->handle);
463
464 gem_context_destroy(i915, ctx);
465
466 if (++i == ARRAY_SIZE(task))
467 break;
468 }
469 igt_require(i == ARRAY_SIZE(task));
470
471 /* Since task[0] was queued first, it will be first in queue for rcs */
472 igt_spin_end(task[1].xcs);
473 igt_spin_end(task[1].rcs);
474 gem_sync(i915, task[1].rcs->handle); /* to hang if task[0] hogs rcs */
475
476 for (i = 0; i < ARRAY_SIZE(task); i++) {
477 igt_spin_free(i915, task[i].xcs);
478 igt_spin_free(i915, task[i].rcs);
479 }
480 }
481
offset_in_page(void * addr)482 static unsigned int offset_in_page(void *addr)
483 {
484 return (uintptr_t)addr & 4095;
485 }
486
semaphore_resolve(int i915)487 static void semaphore_resolve(int i915)
488 {
489 const uint32_t SEMAPHORE_ADDR = 64 << 10;
490 uint32_t semaphore, outer, inner, *sema;
491 unsigned int engine;
492
493 /*
494 * Userspace may submit batches that wait upon unresolved
495 * semaphores. Ideally, we want to put those blocking batches
496 * to the back of the execution queue if we have something else
497 * that is ready to run right away. This test exploits a failure
498 * to reorder batches around a blocking semaphore by submitting
499 * the release of that semaphore from a later context.
500 */
501
502 igt_require(gem_scheduler_has_preemption(i915));
503 igt_require(intel_get_drm_devid(i915) >= 8); /* for MI_SEMAPHORE_WAIT */
504
505 outer = gem_context_create(i915);
506 inner = gem_context_create(i915);
507
508 semaphore = gem_create(i915, 4096);
509 sema = gem_mmap__wc(i915, semaphore, 0, 4096, PROT_WRITE);
510
511 for_each_physical_engine(i915, engine) {
512 struct drm_i915_gem_exec_object2 obj[3];
513 struct drm_i915_gem_execbuffer2 eb;
514 uint32_t handle, cancel;
515 uint32_t *cs, *map;
516 igt_spin_t *spin;
517 int64_t poke = 1;
518
519 if (!gem_can_store_dword(i915, engine))
520 continue;
521
522 spin = __igt_spin_new(i915, .engine = engine);
523 igt_spin_end(spin); /* we just want its address for later */
524 gem_sync(i915, spin->handle);
525 igt_spin_reset(spin);
526
527 handle = gem_create(i915, 4096);
528 cs = map = gem_mmap__cpu(i915, handle, 0, 4096, PROT_WRITE);
529
530 /* Set semaphore initially to 1 for polling and signaling */
531 *cs++ = MI_STORE_DWORD_IMM;
532 *cs++ = SEMAPHORE_ADDR;
533 *cs++ = 0;
534 *cs++ = 1;
535
536 /* Wait until another batch writes to our semaphore */
537 *cs++ = MI_SEMAPHORE_WAIT |
538 MI_SEMAPHORE_POLL |
539 MI_SEMAPHORE_SAD_EQ_SDD |
540 (4 - 2);
541 *cs++ = 0;
542 *cs++ = SEMAPHORE_ADDR;
543 *cs++ = 0;
544
545 /* Then cancel the spinner */
546 *cs++ = MI_STORE_DWORD_IMM;
547 *cs++ = spin->obj[IGT_SPIN_BATCH].offset +
548 offset_in_page(spin->condition);
549 *cs++ = 0;
550 *cs++ = MI_BATCH_BUFFER_END;
551
552 *cs++ = MI_BATCH_BUFFER_END;
553 munmap(map, 4096);
554
555 memset(&eb, 0, sizeof(eb));
556
557 /* First up is our spinning semaphore */
558 memset(obj, 0, sizeof(obj));
559 obj[0] = spin->obj[IGT_SPIN_BATCH];
560 obj[1].handle = semaphore;
561 obj[1].offset = SEMAPHORE_ADDR;
562 obj[1].flags = EXEC_OBJECT_PINNED;
563 obj[2].handle = handle;
564 eb.buffer_count = 3;
565 eb.buffers_ptr = to_user_pointer(obj);
566 eb.rsvd1 = outer;
567 gem_execbuf(i915, &eb);
568
569 /* Then add the GPU hang intermediatory */
570 memset(obj, 0, sizeof(obj));
571 obj[0].handle = handle;
572 obj[0].flags = EXEC_OBJECT_WRITE; /* always after semaphore */
573 obj[1] = spin->obj[IGT_SPIN_BATCH];
574 eb.buffer_count = 2;
575 eb.rsvd1 = 0;
576 gem_execbuf(i915, &eb);
577
578 while (READ_ONCE(*sema) == 0)
579 ;
580
581 /* Now the semaphore is spinning, cancel it */
582 cancel = gem_create(i915, 4096);
583 cs = map = gem_mmap__cpu(i915, cancel, 0, 4096, PROT_WRITE);
584 *cs++ = MI_STORE_DWORD_IMM;
585 *cs++ = SEMAPHORE_ADDR;
586 *cs++ = 0;
587 *cs++ = 0;
588 *cs++ = MI_BATCH_BUFFER_END;
589 munmap(map, 4096);
590
591 memset(obj, 0, sizeof(obj));
592 obj[0].handle = semaphore;
593 obj[0].offset = SEMAPHORE_ADDR;
594 obj[0].flags = EXEC_OBJECT_PINNED;
595 obj[1].handle = cancel;
596 eb.buffer_count = 2;
597 eb.rsvd1 = inner;
598 gem_execbuf(i915, &eb);
599 gem_wait(i915, cancel, &poke); /* match sync's WAIT_PRIORITY */
600 gem_close(i915, cancel);
601
602 gem_sync(i915, handle); /* To hang unless cancel runs! */
603 gem_close(i915, handle);
604 igt_spin_free(i915, spin);
605
606 igt_assert_eq(*sema, 0);
607 }
608
609 munmap(sema, 4096);
610 gem_close(i915, semaphore);
611
612 gem_context_destroy(i915, inner);
613 gem_context_destroy(i915, outer);
614 }
615
semaphore_noskip(int i915)616 static void semaphore_noskip(int i915)
617 {
618 const int gen = intel_gen(intel_get_drm_devid(i915));
619 unsigned int engine, other;
620 uint32_t ctx;
621
622 igt_require(gen >= 6); /* MI_STORE_DWORD_IMM convenience */
623
624 ctx = gem_context_create(i915);
625
626 for_each_physical_engine(i915, engine) {
627 for_each_physical_engine(i915, other) {
628 struct drm_i915_gem_exec_object2 obj[3];
629 struct drm_i915_gem_execbuffer2 eb;
630 uint32_t handle, *cs, *map;
631 igt_spin_t *chain, *spin;
632
633 if (other == engine || !gem_can_store_dword(i915, other))
634 continue;
635
636 chain = __igt_spin_new(i915, .engine = engine);
637
638 spin = __igt_spin_new(i915, .engine = other);
639 igt_spin_end(spin); /* we just want its address for later */
640 gem_sync(i915, spin->handle);
641 igt_spin_reset(spin);
642
643 handle = gem_create(i915, 4096);
644 cs = map = gem_mmap__cpu(i915, handle, 0, 4096, PROT_WRITE);
645
646 /* Cancel the following spinner */
647 *cs++ = MI_STORE_DWORD_IMM;
648 if (gen >= 8) {
649 *cs++ = spin->obj[IGT_SPIN_BATCH].offset +
650 offset_in_page(spin->condition);
651 *cs++ = 0;
652 } else {
653 *cs++ = 0;
654 *cs++ = spin->obj[IGT_SPIN_BATCH].offset +
655 offset_in_page(spin->condition);
656 }
657 *cs++ = MI_BATCH_BUFFER_END;
658
659 *cs++ = MI_BATCH_BUFFER_END;
660 munmap(map, 4096);
661
662 /* port0: implicit semaphore from engine */
663 memset(obj, 0, sizeof(obj));
664 obj[0] = chain->obj[IGT_SPIN_BATCH];
665 obj[0].flags |= EXEC_OBJECT_WRITE;
666 obj[1] = spin->obj[IGT_SPIN_BATCH];
667 obj[2].handle = handle;
668 memset(&eb, 0, sizeof(eb));
669 eb.buffer_count = 3;
670 eb.buffers_ptr = to_user_pointer(obj);
671 eb.rsvd1 = ctx;
672 eb.flags = other;
673 gem_execbuf(i915, &eb);
674
675 /* port1: dependency chain from port0 */
676 memset(obj, 0, sizeof(obj));
677 obj[0].handle = handle;
678 obj[0].flags = EXEC_OBJECT_WRITE;
679 obj[1] = spin->obj[IGT_SPIN_BATCH];
680 memset(&eb, 0, sizeof(eb));
681 eb.buffer_count = 2;
682 eb.buffers_ptr = to_user_pointer(obj);
683 eb.flags = other;
684 gem_execbuf(i915, &eb);
685
686 igt_spin_set_timeout(chain, NSEC_PER_SEC / 100);
687 gem_sync(i915, spin->handle); /* To hang unless cancel runs! */
688
689 gem_close(i915, handle);
690 igt_spin_free(i915, spin);
691 igt_spin_free(i915, chain);
692 }
693 }
694
695 gem_context_destroy(i915, ctx);
696 }
697
reorder(int fd,unsigned ring,unsigned flags)698 static void reorder(int fd, unsigned ring, unsigned flags)
699 #define EQUAL 1
700 {
701 IGT_CORK_HANDLE(cork);
702 uint32_t scratch, plug;
703 uint32_t result;
704 uint32_t ctx[2];
705
706 ctx[LO] = gem_context_create(fd);
707 gem_context_set_priority(fd, ctx[LO], MIN_PRIO);
708
709 ctx[HI] = gem_context_create(fd);
710 gem_context_set_priority(fd, ctx[HI], flags & EQUAL ? MIN_PRIO : 0);
711
712 scratch = gem_create(fd, 4096);
713 plug = igt_cork_plug(&cork, fd);
714
715 /* We expect the high priority context to be executed first, and
716 * so the final result will be value from the low priority context.
717 */
718 store_dword(fd, ctx[LO], ring, scratch, 0, ctx[LO], plug, 0);
719 store_dword(fd, ctx[HI], ring, scratch, 0, ctx[HI], plug, 0);
720
721 unplug_show_queue(fd, &cork, ring);
722 gem_close(fd, plug);
723
724 gem_context_destroy(fd, ctx[LO]);
725 gem_context_destroy(fd, ctx[HI]);
726
727 result = __sync_read_u32(fd, scratch, 0);
728 gem_close(fd, scratch);
729
730 if (flags & EQUAL) /* equal priority, result will be fifo */
731 igt_assert_eq_u32(result, ctx[HI]);
732 else
733 igt_assert_eq_u32(result, ctx[LO]);
734 }
735
promotion(int fd,unsigned ring)736 static void promotion(int fd, unsigned ring)
737 {
738 IGT_CORK_HANDLE(cork);
739 uint32_t result, dep;
740 uint32_t result_read, dep_read;
741 uint32_t ctx[3];
742 uint32_t plug;
743
744 ctx[LO] = gem_context_create(fd);
745 gem_context_set_priority(fd, ctx[LO], MIN_PRIO);
746
747 ctx[HI] = gem_context_create(fd);
748 gem_context_set_priority(fd, ctx[HI], 0);
749
750 ctx[NOISE] = gem_context_create(fd);
751 gem_context_set_priority(fd, ctx[NOISE], MIN_PRIO/2);
752
753 result = gem_create(fd, 4096);
754 dep = gem_create(fd, 4096);
755
756 plug = igt_cork_plug(&cork, fd);
757
758 /* Expect that HI promotes LO, so the order will be LO, HI, NOISE.
759 *
760 * fifo would be NOISE, LO, HI.
761 * strict priority would be HI, NOISE, LO
762 */
763 store_dword(fd, ctx[NOISE], ring, result, 0, ctx[NOISE], plug, 0);
764 store_dword(fd, ctx[LO], ring, result, 0, ctx[LO], plug, 0);
765
766 /* link LO <-> HI via a dependency on another buffer */
767 store_dword(fd, ctx[LO], ring, dep, 0, ctx[LO], 0, I915_GEM_DOMAIN_INSTRUCTION);
768 store_dword(fd, ctx[HI], ring, dep, 0, ctx[HI], 0, 0);
769
770 store_dword(fd, ctx[HI], ring, result, 0, ctx[HI], 0, 0);
771
772 unplug_show_queue(fd, &cork, ring);
773 gem_close(fd, plug);
774
775 gem_context_destroy(fd, ctx[NOISE]);
776 gem_context_destroy(fd, ctx[LO]);
777 gem_context_destroy(fd, ctx[HI]);
778
779 dep_read = __sync_read_u32(fd, dep, 0);
780 gem_close(fd, dep);
781
782 result_read = __sync_read_u32(fd, result, 0);
783 gem_close(fd, result);
784
785 igt_assert_eq_u32(dep_read, ctx[HI]);
786 igt_assert_eq_u32(result_read, ctx[NOISE]);
787 }
788
789 #define NEW_CTX (0x1 << 0)
790 #define HANG_LP (0x1 << 1)
preempt(int fd,unsigned ring,unsigned flags)791 static void preempt(int fd, unsigned ring, unsigned flags)
792 {
793 uint32_t result = gem_create(fd, 4096);
794 uint32_t result_read;
795 igt_spin_t *spin[MAX_ELSP_QLEN];
796 uint32_t ctx[2];
797 igt_hang_t hang;
798
799 ctx[LO] = gem_context_create(fd);
800 gem_context_set_priority(fd, ctx[LO], MIN_PRIO);
801
802 ctx[HI] = gem_context_create(fd);
803 gem_context_set_priority(fd, ctx[HI], MAX_PRIO);
804
805 if (flags & HANG_LP)
806 hang = igt_hang_ctx(fd, ctx[LO], ring, 0);
807
808 for (int n = 0; n < ARRAY_SIZE(spin); n++) {
809 if (flags & NEW_CTX) {
810 gem_context_destroy(fd, ctx[LO]);
811 ctx[LO] = gem_context_create(fd);
812 gem_context_set_priority(fd, ctx[LO], MIN_PRIO);
813 }
814 spin[n] = __igt_spin_new(fd,
815 .ctx = ctx[LO],
816 .engine = ring);
817 igt_debug("spin[%d].handle=%d\n", n, spin[n]->handle);
818
819 store_dword(fd, ctx[HI], ring, result, 0, n + 1, 0, I915_GEM_DOMAIN_RENDER);
820
821 result_read = __sync_read_u32(fd, result, 0);
822 igt_assert_eq_u32(result_read, n + 1);
823 igt_assert(gem_bo_busy(fd, spin[0]->handle));
824 }
825
826 for (int n = 0; n < ARRAY_SIZE(spin); n++)
827 igt_spin_free(fd, spin[n]);
828
829 if (flags & HANG_LP)
830 igt_post_hang_ring(fd, hang);
831
832 gem_context_destroy(fd, ctx[LO]);
833 gem_context_destroy(fd, ctx[HI]);
834
835 gem_close(fd, result);
836 }
837
838 #define CHAIN 0x1
839 #define CONTEXTS 0x2
840
__noise(int fd,uint32_t ctx,int prio,igt_spin_t * spin)841 static igt_spin_t *__noise(int fd, uint32_t ctx, int prio, igt_spin_t *spin)
842 {
843 unsigned other;
844
845 gem_context_set_priority(fd, ctx, prio);
846
847 for_each_physical_engine(fd, other) {
848 if (spin == NULL) {
849 spin = __igt_spin_new(fd,
850 .ctx = ctx,
851 .engine = other);
852 } else {
853 struct drm_i915_gem_execbuffer2 eb = {
854 .buffer_count = 1,
855 .buffers_ptr = to_user_pointer(&spin->obj[IGT_SPIN_BATCH]),
856 .rsvd1 = ctx,
857 .flags = other,
858 };
859 gem_execbuf(fd, &eb);
860 }
861 }
862
863 return spin;
864 }
865
__preempt_other(int fd,uint32_t * ctx,unsigned int target,unsigned int primary,unsigned flags)866 static void __preempt_other(int fd,
867 uint32_t *ctx,
868 unsigned int target, unsigned int primary,
869 unsigned flags)
870 {
871 uint32_t result = gem_create(fd, 4096);
872 uint32_t result_read[4096 / sizeof(uint32_t)];
873 unsigned int n, i, other;
874
875 n = 0;
876 store_dword(fd, ctx[LO], primary,
877 result, (n + 1)*sizeof(uint32_t), n + 1,
878 0, I915_GEM_DOMAIN_RENDER);
879 n++;
880
881 if (flags & CHAIN) {
882 for_each_physical_engine(fd, other) {
883 store_dword(fd, ctx[LO], other,
884 result, (n + 1)*sizeof(uint32_t), n + 1,
885 0, I915_GEM_DOMAIN_RENDER);
886 n++;
887 }
888 }
889
890 store_dword(fd, ctx[HI], target,
891 result, (n + 1)*sizeof(uint32_t), n + 1,
892 0, I915_GEM_DOMAIN_RENDER);
893
894 igt_debugfs_dump(fd, "i915_engine_info");
895 gem_set_domain(fd, result, I915_GEM_DOMAIN_GTT, 0);
896
897 n++;
898
899 __sync_read_u32_count(fd, result, result_read, sizeof(result_read));
900 for (i = 0; i <= n; i++)
901 igt_assert_eq_u32(result_read[i], i);
902
903 gem_close(fd, result);
904 }
905
preempt_other(int fd,unsigned ring,unsigned int flags)906 static void preempt_other(int fd, unsigned ring, unsigned int flags)
907 {
908 unsigned int primary;
909 igt_spin_t *spin = NULL;
910 uint32_t ctx[3];
911
912 /* On each engine, insert
913 * [NOISE] spinner,
914 * [LOW] write
915 *
916 * Then on our target engine do a [HIGH] write which should then
917 * prompt its dependent LOW writes in front of the spinner on
918 * each engine. The purpose of this test is to check that preemption
919 * can cross engines.
920 */
921
922 ctx[LO] = gem_context_create(fd);
923 gem_context_set_priority(fd, ctx[LO], MIN_PRIO);
924
925 ctx[NOISE] = gem_context_create(fd);
926 spin = __noise(fd, ctx[NOISE], 0, NULL);
927
928 ctx[HI] = gem_context_create(fd);
929 gem_context_set_priority(fd, ctx[HI], MAX_PRIO);
930
931 for_each_physical_engine(fd, primary) {
932 igt_debug("Primary engine: %s\n", e__->name);
933 __preempt_other(fd, ctx, ring, primary, flags);
934
935 }
936
937 igt_assert(gem_bo_busy(fd, spin->handle));
938 igt_spin_free(fd, spin);
939
940 gem_context_destroy(fd, ctx[LO]);
941 gem_context_destroy(fd, ctx[NOISE]);
942 gem_context_destroy(fd, ctx[HI]);
943 }
944
__preempt_queue(int fd,unsigned target,unsigned primary,unsigned depth,unsigned flags)945 static void __preempt_queue(int fd,
946 unsigned target, unsigned primary,
947 unsigned depth, unsigned flags)
948 {
949 uint32_t result = gem_create(fd, 4096);
950 uint32_t result_read[4096 / sizeof(uint32_t)];
951 igt_spin_t *above = NULL, *below = NULL;
952 unsigned int other, n, i;
953 int prio = MAX_PRIO;
954 uint32_t ctx[3] = {
955 gem_context_create(fd),
956 gem_context_create(fd),
957 gem_context_create(fd),
958 };
959
960 for (n = 0; n < depth; n++) {
961 if (flags & CONTEXTS) {
962 gem_context_destroy(fd, ctx[NOISE]);
963 ctx[NOISE] = gem_context_create(fd);
964 }
965 above = __noise(fd, ctx[NOISE], prio--, above);
966 }
967
968 gem_context_set_priority(fd, ctx[HI], prio--);
969
970 for (; n < MAX_ELSP_QLEN; n++) {
971 if (flags & CONTEXTS) {
972 gem_context_destroy(fd, ctx[NOISE]);
973 ctx[NOISE] = gem_context_create(fd);
974 }
975 below = __noise(fd, ctx[NOISE], prio--, below);
976 }
977
978 gem_context_set_priority(fd, ctx[LO], prio--);
979
980 n = 0;
981 store_dword(fd, ctx[LO], primary,
982 result, (n + 1)*sizeof(uint32_t), n + 1,
983 0, I915_GEM_DOMAIN_RENDER);
984 n++;
985
986 if (flags & CHAIN) {
987 for_each_physical_engine(fd, other) {
988 store_dword(fd, ctx[LO], other,
989 result, (n + 1)*sizeof(uint32_t), n + 1,
990 0, I915_GEM_DOMAIN_RENDER);
991 n++;
992 }
993 }
994
995 store_dword(fd, ctx[HI], target,
996 result, (n + 1)*sizeof(uint32_t), n + 1,
997 0, I915_GEM_DOMAIN_RENDER);
998
999 igt_debugfs_dump(fd, "i915_engine_info");
1000
1001 if (above) {
1002 igt_assert(gem_bo_busy(fd, above->handle));
1003 igt_spin_free(fd, above);
1004 }
1005
1006 gem_set_domain(fd, result, I915_GEM_DOMAIN_GTT, 0);
1007
1008 __sync_read_u32_count(fd, result, result_read, sizeof(result_read));
1009
1010 n++;
1011 for (i = 0; i <= n; i++)
1012 igt_assert_eq_u32(result_read[i], i);
1013
1014 if (below) {
1015 igt_assert(gem_bo_busy(fd, below->handle));
1016 igt_spin_free(fd, below);
1017 }
1018
1019 gem_context_destroy(fd, ctx[LO]);
1020 gem_context_destroy(fd, ctx[NOISE]);
1021 gem_context_destroy(fd, ctx[HI]);
1022
1023 gem_close(fd, result);
1024 }
1025
preempt_queue(int fd,unsigned ring,unsigned int flags)1026 static void preempt_queue(int fd, unsigned ring, unsigned int flags)
1027 {
1028 unsigned other;
1029
1030 for_each_physical_engine(fd, other) {
1031 for (unsigned depth = 0; depth <= MAX_ELSP_QLEN; depth++)
1032 __preempt_queue(fd, ring, other, depth, flags);
1033 }
1034 }
1035
preempt_self(int fd,unsigned ring)1036 static void preempt_self(int fd, unsigned ring)
1037 {
1038 uint32_t result = gem_create(fd, 4096);
1039 uint32_t result_read[4096 / sizeof(uint32_t)];
1040 igt_spin_t *spin[MAX_ELSP_QLEN];
1041 unsigned int other;
1042 unsigned int n, i;
1043 uint32_t ctx[3];
1044
1045 /* On each engine, insert
1046 * [NOISE] spinner,
1047 * [self/LOW] write
1048 *
1049 * Then on our target engine do a [self/HIGH] write which should then
1050 * preempt its own lower priority task on any engine.
1051 */
1052
1053 ctx[NOISE] = gem_context_create(fd);
1054
1055 ctx[HI] = gem_context_create(fd);
1056
1057 n = 0;
1058 gem_context_set_priority(fd, ctx[HI], MIN_PRIO);
1059 for_each_physical_engine(fd, other) {
1060 spin[n] = __igt_spin_new(fd,
1061 .ctx = ctx[NOISE],
1062 .engine = other);
1063 store_dword(fd, ctx[HI], other,
1064 result, (n + 1)*sizeof(uint32_t), n + 1,
1065 0, I915_GEM_DOMAIN_RENDER);
1066 n++;
1067 }
1068 gem_context_set_priority(fd, ctx[HI], MAX_PRIO);
1069 store_dword(fd, ctx[HI], ring,
1070 result, (n + 1)*sizeof(uint32_t), n + 1,
1071 0, I915_GEM_DOMAIN_RENDER);
1072
1073 gem_set_domain(fd, result, I915_GEM_DOMAIN_GTT, 0);
1074
1075 for (i = 0; i < n; i++) {
1076 igt_assert(gem_bo_busy(fd, spin[i]->handle));
1077 igt_spin_free(fd, spin[i]);
1078 }
1079
1080 __sync_read_u32_count(fd, result, result_read, sizeof(result_read));
1081
1082 n++;
1083 for (i = 0; i <= n; i++)
1084 igt_assert_eq_u32(result_read[i], i);
1085
1086 gem_context_destroy(fd, ctx[NOISE]);
1087 gem_context_destroy(fd, ctx[HI]);
1088
1089 gem_close(fd, result);
1090 }
1091
preemptive_hang(int fd,unsigned ring)1092 static void preemptive_hang(int fd, unsigned ring)
1093 {
1094 igt_spin_t *spin[MAX_ELSP_QLEN];
1095 igt_hang_t hang;
1096 uint32_t ctx[2];
1097
1098 ctx[HI] = gem_context_create(fd);
1099 gem_context_set_priority(fd, ctx[HI], MAX_PRIO);
1100
1101 for (int n = 0; n < ARRAY_SIZE(spin); n++) {
1102 ctx[LO] = gem_context_create(fd);
1103 gem_context_set_priority(fd, ctx[LO], MIN_PRIO);
1104
1105 spin[n] = __igt_spin_new(fd,
1106 .ctx = ctx[LO],
1107 .engine = ring);
1108
1109 gem_context_destroy(fd, ctx[LO]);
1110 }
1111
1112 hang = igt_hang_ctx(fd, ctx[HI], ring, 0);
1113 igt_post_hang_ring(fd, hang);
1114
1115 for (int n = 0; n < ARRAY_SIZE(spin); n++) {
1116 /* Current behavior is to execute requests in order of submission.
1117 * This is subject to change as the scheduler evolve. The test should
1118 * be updated to reflect such changes.
1119 */
1120 igt_assert(gem_bo_busy(fd, spin[n]->handle));
1121 igt_spin_free(fd, spin[n]);
1122 }
1123
1124 gem_context_destroy(fd, ctx[HI]);
1125 }
1126
deep(int fd,unsigned ring)1127 static void deep(int fd, unsigned ring)
1128 {
1129 #define XS 8
1130 const unsigned int max_req = MAX_PRIO - MIN_PRIO;
1131 const unsigned size = ALIGN(4*max_req, 4096);
1132 struct timespec tv = {};
1133 IGT_CORK_HANDLE(cork);
1134 unsigned int nreq;
1135 uint32_t plug;
1136 uint32_t result, dep[XS];
1137 uint32_t read_buf[size / sizeof(uint32_t)];
1138 uint32_t expected = 0;
1139 uint32_t *ctx;
1140 int dep_nreq;
1141 int n;
1142
1143 ctx = malloc(sizeof(*ctx) * MAX_CONTEXTS);
1144 for (n = 0; n < MAX_CONTEXTS; n++) {
1145 ctx[n] = gem_context_create(fd);
1146 }
1147
1148 nreq = gem_measure_ring_inflight(fd, ring, 0) / (4 * XS) * MAX_CONTEXTS;
1149 if (nreq > max_req)
1150 nreq = max_req;
1151 igt_info("Using %d requests (prio range %d)\n", nreq, max_req);
1152
1153 result = gem_create(fd, size);
1154 for (int m = 0; m < XS; m ++)
1155 dep[m] = gem_create(fd, size);
1156
1157 /* Bind all surfaces and contexts before starting the timeout. */
1158 {
1159 struct drm_i915_gem_exec_object2 obj[XS + 2];
1160 struct drm_i915_gem_execbuffer2 execbuf;
1161 const uint32_t bbe = MI_BATCH_BUFFER_END;
1162
1163 memset(obj, 0, sizeof(obj));
1164 for (n = 0; n < XS; n++)
1165 obj[n].handle = dep[n];
1166 obj[XS].handle = result;
1167 obj[XS+1].handle = gem_create(fd, 4096);
1168 gem_write(fd, obj[XS+1].handle, 0, &bbe, sizeof(bbe));
1169
1170 memset(&execbuf, 0, sizeof(execbuf));
1171 execbuf.buffers_ptr = to_user_pointer(obj);
1172 execbuf.buffer_count = XS + 2;
1173 execbuf.flags = ring;
1174 for (n = 0; n < MAX_CONTEXTS; n++) {
1175 execbuf.rsvd1 = ctx[n];
1176 gem_execbuf(fd, &execbuf);
1177 }
1178 gem_close(fd, obj[XS+1].handle);
1179 gem_sync(fd, result);
1180 }
1181
1182 plug = igt_cork_plug(&cork, fd);
1183
1184 /* Create a deep dependency chain, with a few branches */
1185 for (n = 0; n < nreq && igt_seconds_elapsed(&tv) < 2; n++) {
1186 const int gen = intel_gen(intel_get_drm_devid(fd));
1187 struct drm_i915_gem_exec_object2 obj[3];
1188 struct drm_i915_gem_relocation_entry reloc;
1189 struct drm_i915_gem_execbuffer2 eb = {
1190 .buffers_ptr = to_user_pointer(obj),
1191 .buffer_count = 3,
1192 .flags = ring | (gen < 6 ? I915_EXEC_SECURE : 0),
1193 .rsvd1 = ctx[n % MAX_CONTEXTS],
1194 };
1195 uint32_t batch[16];
1196 int i;
1197
1198 memset(obj, 0, sizeof(obj));
1199 obj[0].handle = plug;
1200
1201 memset(&reloc, 0, sizeof(reloc));
1202 reloc.presumed_offset = 0;
1203 reloc.offset = sizeof(uint32_t);
1204 reloc.delta = sizeof(uint32_t) * n;
1205 reloc.read_domains = I915_GEM_DOMAIN_RENDER;
1206 reloc.write_domain = I915_GEM_DOMAIN_RENDER;
1207 obj[2].handle = gem_create(fd, 4096);
1208 obj[2].relocs_ptr = to_user_pointer(&reloc);
1209 obj[2].relocation_count = 1;
1210
1211 i = 0;
1212 batch[i] = MI_STORE_DWORD_IMM | (gen < 6 ? 1 << 22 : 0);
1213 if (gen >= 8) {
1214 batch[++i] = reloc.delta;
1215 batch[++i] = 0;
1216 } else if (gen >= 4) {
1217 batch[++i] = 0;
1218 batch[++i] = reloc.delta;
1219 reloc.offset += sizeof(uint32_t);
1220 } else {
1221 batch[i]--;
1222 batch[++i] = reloc.delta;
1223 }
1224 batch[++i] = eb.rsvd1;
1225 batch[++i] = MI_BATCH_BUFFER_END;
1226 gem_write(fd, obj[2].handle, 0, batch, sizeof(batch));
1227
1228 gem_context_set_priority(fd, eb.rsvd1, MAX_PRIO - nreq + n);
1229 for (int m = 0; m < XS; m++) {
1230 obj[1].handle = dep[m];
1231 reloc.target_handle = obj[1].handle;
1232 gem_execbuf(fd, &eb);
1233 }
1234 gem_close(fd, obj[2].handle);
1235 }
1236 igt_info("First deptree: %d requests [%.3fs]\n",
1237 n * XS, 1e-9*igt_nsec_elapsed(&tv));
1238 dep_nreq = n;
1239
1240 for (n = 0; n < nreq && igt_seconds_elapsed(&tv) < 4; n++) {
1241 uint32_t context = ctx[n % MAX_CONTEXTS];
1242 gem_context_set_priority(fd, context, MAX_PRIO - nreq + n);
1243
1244 for (int m = 0; m < XS; m++) {
1245 store_dword(fd, context, ring, result, 4*n, context, dep[m], 0);
1246 store_dword(fd, context, ring, result, 4*m, context, 0, I915_GEM_DOMAIN_INSTRUCTION);
1247 }
1248 expected = context;
1249 }
1250 igt_info("Second deptree: %d requests [%.3fs]\n",
1251 n * XS, 1e-9*igt_nsec_elapsed(&tv));
1252
1253 unplug_show_queue(fd, &cork, ring);
1254 gem_close(fd, plug);
1255 igt_require(expected); /* too slow */
1256
1257 for (n = 0; n < MAX_CONTEXTS; n++)
1258 gem_context_destroy(fd, ctx[n]);
1259
1260 for (int m = 0; m < XS; m++) {
1261 __sync_read_u32_count(fd, dep[m], read_buf, sizeof(read_buf));
1262 gem_close(fd, dep[m]);
1263
1264 for (n = 0; n < dep_nreq; n++)
1265 igt_assert_eq_u32(read_buf[n], ctx[n % MAX_CONTEXTS]);
1266 }
1267
1268 __sync_read_u32_count(fd, result, read_buf, sizeof(read_buf));
1269 gem_close(fd, result);
1270
1271 /* No reordering due to PI on all contexts because of the common dep */
1272 for (int m = 0; m < XS; m++)
1273 igt_assert_eq_u32(read_buf[m], expected);
1274
1275 free(ctx);
1276 #undef XS
1277 }
1278
alarm_handler(int sig)1279 static void alarm_handler(int sig)
1280 {
1281 }
1282
__execbuf(int fd,struct drm_i915_gem_execbuffer2 * execbuf)1283 static int __execbuf(int fd, struct drm_i915_gem_execbuffer2 *execbuf)
1284 {
1285 int err = 0;
1286 if (ioctl(fd, DRM_IOCTL_I915_GEM_EXECBUFFER2, execbuf))
1287 err = -errno;
1288 return err;
1289 }
1290
wide(int fd,unsigned ring)1291 static void wide(int fd, unsigned ring)
1292 {
1293 struct timespec tv = {};
1294 unsigned int ring_size = gem_measure_ring_inflight(fd, ring, MEASURE_RING_NEW_CTX);
1295
1296 IGT_CORK_HANDLE(cork);
1297 uint32_t plug;
1298 uint32_t result;
1299 uint32_t result_read[MAX_CONTEXTS];
1300 uint32_t *ctx;
1301 unsigned int count;
1302
1303 ctx = malloc(sizeof(*ctx)*MAX_CONTEXTS);
1304 for (int n = 0; n < MAX_CONTEXTS; n++)
1305 ctx[n] = gem_context_create(fd);
1306
1307 result = gem_create(fd, 4*MAX_CONTEXTS);
1308
1309 plug = igt_cork_plug(&cork, fd);
1310
1311 /* Lots of in-order requests, plugged and submitted simultaneously */
1312 for (count = 0;
1313 igt_seconds_elapsed(&tv) < 5 && count < ring_size;
1314 count++) {
1315 for (int n = 0; n < MAX_CONTEXTS; n++) {
1316 store_dword(fd, ctx[n], ring, result, 4*n, ctx[n], plug, I915_GEM_DOMAIN_INSTRUCTION);
1317 }
1318 }
1319 igt_info("Submitted %d requests over %d contexts in %.1fms\n",
1320 count, MAX_CONTEXTS, igt_nsec_elapsed(&tv) * 1e-6);
1321
1322 unplug_show_queue(fd, &cork, ring);
1323 gem_close(fd, plug);
1324
1325 for (int n = 0; n < MAX_CONTEXTS; n++)
1326 gem_context_destroy(fd, ctx[n]);
1327
1328 __sync_read_u32_count(fd, result, result_read, sizeof(result_read));
1329 for (int n = 0; n < MAX_CONTEXTS; n++)
1330 igt_assert_eq_u32(result_read[n], ctx[n]);
1331
1332 gem_close(fd, result);
1333 free(ctx);
1334 }
1335
reorder_wide(int fd,unsigned ring)1336 static void reorder_wide(int fd, unsigned ring)
1337 {
1338 const int gen = intel_gen(intel_get_drm_devid(fd));
1339 struct drm_i915_gem_relocation_entry reloc;
1340 struct drm_i915_gem_exec_object2 obj[3];
1341 struct drm_i915_gem_execbuffer2 execbuf;
1342 struct timespec tv = {};
1343 unsigned int ring_size = gem_measure_ring_inflight(fd, ring, MEASURE_RING_NEW_CTX);
1344 IGT_CORK_HANDLE(cork);
1345 uint32_t result, target, plug;
1346 uint32_t result_read[1024];
1347 uint32_t *expected;
1348
1349 result = gem_create(fd, 4096);
1350 target = gem_create(fd, 4096);
1351 plug = igt_cork_plug(&cork, fd);
1352
1353 expected = gem_mmap__cpu(fd, target, 0, 4096, PROT_WRITE);
1354 gem_set_domain(fd, target, I915_GEM_DOMAIN_CPU, I915_GEM_DOMAIN_CPU);
1355
1356 memset(obj, 0, sizeof(obj));
1357 obj[0].handle = plug;
1358 obj[1].handle = result;
1359 obj[2].relocs_ptr = to_user_pointer(&reloc);
1360 obj[2].relocation_count = 1;
1361
1362 memset(&reloc, 0, sizeof(reloc));
1363 reloc.target_handle = result;
1364 reloc.read_domains = I915_GEM_DOMAIN_INSTRUCTION;
1365 reloc.write_domain = 0; /* lies */
1366
1367 memset(&execbuf, 0, sizeof(execbuf));
1368 execbuf.buffers_ptr = to_user_pointer(obj);
1369 execbuf.buffer_count = 3;
1370 execbuf.flags = ring;
1371 if (gen < 6)
1372 execbuf.flags |= I915_EXEC_SECURE;
1373
1374 for (int n = MIN_PRIO, x = 1;
1375 igt_seconds_elapsed(&tv) < 5 && n <= MAX_PRIO;
1376 n++, x++) {
1377 unsigned int sz = ALIGN(ring_size * 64, 4096);
1378 uint32_t *batch;
1379
1380 execbuf.rsvd1 = gem_context_create(fd);
1381 gem_context_set_priority(fd, execbuf.rsvd1, n);
1382
1383 obj[2].handle = gem_create(fd, sz);
1384 batch = gem_mmap__gtt(fd, obj[2].handle, sz, PROT_WRITE);
1385 gem_set_domain(fd, obj[2].handle, I915_GEM_DOMAIN_GTT, I915_GEM_DOMAIN_GTT);
1386
1387 for (int m = 0; m < ring_size; m++) {
1388 uint64_t addr;
1389 int idx = hars_petruska_f54_1_random_unsafe_max(1024);
1390 int i;
1391
1392 execbuf.batch_start_offset = m * 64;
1393 reloc.offset = execbuf.batch_start_offset + sizeof(uint32_t);
1394 reloc.delta = idx * sizeof(uint32_t);
1395 addr = reloc.presumed_offset + reloc.delta;
1396
1397 i = execbuf.batch_start_offset / sizeof(uint32_t);
1398 batch[i] = MI_STORE_DWORD_IMM | (gen < 6 ? 1 << 22 : 0);
1399 if (gen >= 8) {
1400 batch[++i] = addr;
1401 batch[++i] = addr >> 32;
1402 } else if (gen >= 4) {
1403 batch[++i] = 0;
1404 batch[++i] = addr;
1405 reloc.offset += sizeof(uint32_t);
1406 } else {
1407 batch[i]--;
1408 batch[++i] = addr;
1409 }
1410 batch[++i] = x;
1411 batch[++i] = MI_BATCH_BUFFER_END;
1412
1413 if (!expected[idx])
1414 expected[idx] = x;
1415
1416 gem_execbuf(fd, &execbuf);
1417 }
1418
1419 munmap(batch, sz);
1420 gem_close(fd, obj[2].handle);
1421 gem_context_destroy(fd, execbuf.rsvd1);
1422 }
1423
1424 unplug_show_queue(fd, &cork, ring);
1425 gem_close(fd, plug);
1426
1427 __sync_read_u32_count(fd, result, result_read, sizeof(result_read));
1428 for (int n = 0; n < 1024; n++)
1429 igt_assert_eq_u32(result_read[n], expected[n]);
1430
1431 munmap(expected, 4096);
1432
1433 gem_close(fd, result);
1434 gem_close(fd, target);
1435 }
1436
bind_to_cpu(int cpu)1437 static void bind_to_cpu(int cpu)
1438 {
1439 const int ncpus = sysconf(_SC_NPROCESSORS_ONLN);
1440 struct sched_param rt = {.sched_priority = 99 };
1441 cpu_set_t allowed;
1442
1443 igt_assert(sched_setscheduler(getpid(), SCHED_RR | SCHED_RESET_ON_FORK, &rt) == 0);
1444
1445 CPU_ZERO(&allowed);
1446 CPU_SET(cpu % ncpus, &allowed);
1447 igt_assert(sched_setaffinity(getpid(), sizeof(cpu_set_t), &allowed) == 0);
1448 }
1449
test_pi_ringfull(int fd,unsigned int engine)1450 static void test_pi_ringfull(int fd, unsigned int engine)
1451 {
1452 const uint32_t bbe = MI_BATCH_BUFFER_END;
1453 struct sigaction sa = { .sa_handler = alarm_handler };
1454 struct drm_i915_gem_execbuffer2 execbuf;
1455 struct drm_i915_gem_exec_object2 obj[2];
1456 unsigned int last, count;
1457 struct itimerval itv;
1458 IGT_CORK_HANDLE(c);
1459 uint32_t vip;
1460 bool *result;
1461
1462 result = mmap(NULL, 4096, PROT_WRITE, MAP_SHARED | MAP_ANON, -1, 0);
1463 igt_assert(result != MAP_FAILED);
1464
1465 memset(&execbuf, 0, sizeof(execbuf));
1466 memset(&obj, 0, sizeof(obj));
1467
1468 obj[1].handle = gem_create(fd, 4096);
1469 gem_write(fd, obj[1].handle, 0, &bbe, sizeof(bbe));
1470
1471 execbuf.buffers_ptr = to_user_pointer(&obj[1]);
1472 execbuf.buffer_count = 1;
1473 execbuf.flags = engine;
1474
1475 /* Warm up both (hi/lo) contexts */
1476 execbuf.rsvd1 = gem_context_create(fd);
1477 gem_context_set_priority(fd, execbuf.rsvd1, MAX_PRIO);
1478 gem_execbuf(fd, &execbuf);
1479 gem_sync(fd, obj[1].handle);
1480 vip = execbuf.rsvd1;
1481
1482 execbuf.rsvd1 = gem_context_create(fd);
1483 gem_context_set_priority(fd, execbuf.rsvd1, MIN_PRIO);
1484 gem_execbuf(fd, &execbuf);
1485 gem_sync(fd, obj[1].handle);
1486
1487 /* Fill the low-priority ring */
1488 obj[0].handle = igt_cork_plug(&c, fd);
1489
1490 execbuf.buffers_ptr = to_user_pointer(obj);
1491 execbuf.buffer_count = 2;
1492
1493 sigaction(SIGALRM, &sa, NULL);
1494 itv.it_interval.tv_sec = 0;
1495 itv.it_interval.tv_usec = 1000;
1496 itv.it_value.tv_sec = 0;
1497 itv.it_value.tv_usec = 10000;
1498 setitimer(ITIMER_REAL, &itv, NULL);
1499
1500 last = -1;
1501 count = 0;
1502 do {
1503 if (__execbuf(fd, &execbuf) == 0) {
1504 count++;
1505 continue;
1506 }
1507
1508 if (last == count)
1509 break;
1510
1511 last = count;
1512 } while (1);
1513 igt_debug("Filled low-priority ring with %d batches\n", count);
1514
1515 memset(&itv, 0, sizeof(itv));
1516 setitimer(ITIMER_REAL, &itv, NULL);
1517
1518 execbuf.buffers_ptr = to_user_pointer(&obj[1]);
1519 execbuf.buffer_count = 1;
1520
1521 /* both parent + child on the same cpu, only parent is RT */
1522 bind_to_cpu(0);
1523
1524 igt_fork(child, 1) {
1525 int err;
1526
1527 result[0] = vip != execbuf.rsvd1;
1528
1529 igt_debug("Waking parent\n");
1530 kill(getppid(), SIGALRM);
1531 sched_yield();
1532 result[1] = true;
1533
1534 sigaction(SIGALRM, &sa, NULL);
1535 itv.it_value.tv_sec = 0;
1536 itv.it_value.tv_usec = 10000;
1537 setitimer(ITIMER_REAL, &itv, NULL);
1538
1539 /* Since we are the high priority task, we expect to be
1540 * able to add ourselves to *our* ring without interruption.
1541 */
1542 igt_debug("HP child executing\n");
1543 execbuf.rsvd1 = vip;
1544 err = __execbuf(fd, &execbuf);
1545 igt_debug("HP execbuf returned %d\n", err);
1546
1547 memset(&itv, 0, sizeof(itv));
1548 setitimer(ITIMER_REAL, &itv, NULL);
1549
1550 result[2] = err == 0;
1551 }
1552
1553 /* Relinquish CPU just to allow child to create a context */
1554 sleep(1);
1555 igt_assert_f(result[0], "HP context (child) not created\n");
1556 igt_assert_f(!result[1], "Child released too early!\n");
1557
1558 /* Parent sleeps waiting for ringspace, releasing child */
1559 itv.it_value.tv_sec = 0;
1560 itv.it_value.tv_usec = 50000;
1561 setitimer(ITIMER_REAL, &itv, NULL);
1562 igt_debug("LP parent executing\n");
1563 igt_assert_eq(__execbuf(fd, &execbuf), -EINTR);
1564 igt_assert_f(result[1], "Child was not released!\n");
1565 igt_assert_f(result[2],
1566 "High priority child unable to submit within 10ms\n");
1567
1568 igt_cork_unplug(&c);
1569 igt_waitchildren();
1570
1571 gem_context_destroy(fd, execbuf.rsvd1);
1572 gem_context_destroy(fd, vip);
1573 gem_close(fd, obj[1].handle);
1574 gem_close(fd, obj[0].handle);
1575 munmap(result, 4096);
1576 }
1577
measure_semaphore_power(int i915)1578 static void measure_semaphore_power(int i915)
1579 {
1580 struct gpu_power power;
1581 unsigned int engine, signaler;
1582
1583 igt_require(gpu_power_open(&power) == 0);
1584
1585 for_each_physical_engine(i915, signaler) {
1586 struct gpu_power_sample s_spin[2];
1587 struct gpu_power_sample s_sema[2];
1588 double baseline, total;
1589 int64_t jiffie = 1;
1590 igt_spin_t *spin;
1591
1592 spin = __igt_spin_new(i915,
1593 .engine = signaler,
1594 .flags = IGT_SPIN_POLL_RUN);
1595 gem_wait(i915, spin->handle, &jiffie); /* waitboost */
1596 igt_spin_busywait_until_started(spin);
1597
1598 gpu_power_read(&power, &s_spin[0]);
1599 usleep(100*1000);
1600 gpu_power_read(&power, &s_spin[1]);
1601
1602 /* Add a waiter to each engine */
1603 for_each_physical_engine(i915, engine) {
1604 igt_spin_t *sema;
1605
1606 if (engine == signaler)
1607 continue;
1608
1609 sema = __igt_spin_new(i915,
1610 .engine = engine,
1611 .dependency = spin->handle);
1612
1613 igt_spin_free(i915, sema);
1614 }
1615 usleep(10); /* just give the tasklets a chance to run */
1616
1617 gpu_power_read(&power, &s_sema[0]);
1618 usleep(100*1000);
1619 gpu_power_read(&power, &s_sema[1]);
1620
1621 igt_spin_free(i915, spin);
1622
1623 baseline = gpu_power_W(&power, &s_spin[0], &s_spin[1]);
1624 total = gpu_power_W(&power, &s_sema[0], &s_sema[1]);
1625
1626 igt_info("%s: %.1fmW + %.1fmW (total %1.fmW)\n",
1627 e__->name,
1628 1e3 * baseline,
1629 1e3 * (total - baseline),
1630 1e3 * total);
1631 }
1632
1633 gpu_power_close(&power);
1634 }
1635
1636 igt_main
1637 {
1638 const struct intel_execution_engine *e;
1639 int fd = -1;
1640
1641 igt_skip_on_simulation();
1642
1643 igt_fixture {
1644 fd = drm_open_driver_master(DRIVER_INTEL);
1645 gem_submission_print_method(fd);
1646 gem_scheduler_print_capability(fd);
1647
1648 igt_require_gem(fd);
1649 gem_require_mmap_wc(fd);
1650 gem_require_contexts(fd);
1651
1652 igt_fork_hang_detector(fd);
1653 }
1654
1655 igt_subtest_group {
1656 for (e = intel_execution_engines; e->name; e++) {
1657 /* default exec-id is purely symbolic */
1658 if (e->exec_id == 0)
1659 continue;
1660
1661 igt_subtest_f("fifo-%s", e->name) {
1662 igt_require(gem_ring_has_physical_engine(fd, e->exec_id | e->flags));
1663 igt_require(gem_can_store_dword(fd, e->exec_id | e->flags));
1664 fifo(fd, e->exec_id | e->flags);
1665 }
1666
1667 igt_subtest_f("independent-%s", e->name) {
1668 igt_require(gem_ring_has_physical_engine(fd, e->exec_id | e->flags));
1669 igt_require(gem_can_store_dword(fd, e->exec_id | e->flags));
1670 independent(fd, e->exec_id | e->flags);
1671 }
1672 }
1673 }
1674
1675 igt_subtest_group {
1676 igt_fixture {
1677 igt_require(gem_scheduler_enabled(fd));
1678 igt_require(gem_scheduler_has_ctx_priority(fd));
1679 }
1680
1681 igt_subtest("semaphore-user")
1682 semaphore_userlock(fd);
1683 igt_subtest("semaphore-codependency")
1684 semaphore_codependency(fd);
1685 igt_subtest("semaphore-resolve")
1686 semaphore_resolve(fd);
1687 igt_subtest("semaphore-noskip")
1688 semaphore_noskip(fd);
1689
1690 igt_subtest("smoketest-all")
1691 smoketest(fd, ALL_ENGINES, 30);
1692
1693 for (e = intel_execution_engines; e->name; e++) {
1694 if (e->exec_id == 0)
1695 continue;
1696
1697 igt_subtest_group {
1698 igt_fixture {
1699 igt_require(gem_ring_has_physical_engine(fd, e->exec_id | e->flags));
1700 igt_require(gem_can_store_dword(fd, e->exec_id | e->flags));
1701 }
1702
1703 igt_subtest_f("in-order-%s", e->name)
1704 reorder(fd, e->exec_id | e->flags, EQUAL);
1705
1706 igt_subtest_f("out-order-%s", e->name)
1707 reorder(fd, e->exec_id | e->flags, 0);
1708
1709 igt_subtest_f("promotion-%s", e->name)
1710 promotion(fd, e->exec_id | e->flags);
1711
1712 igt_subtest_group {
1713 igt_fixture {
1714 igt_require(gem_scheduler_has_preemption(fd));
1715 }
1716
1717 igt_subtest_f("preempt-%s", e->name)
1718 preempt(fd, e->exec_id | e->flags, 0);
1719
1720 igt_subtest_f("preempt-contexts-%s", e->name)
1721 preempt(fd, e->exec_id | e->flags, NEW_CTX);
1722
1723 igt_subtest_f("preempt-self-%s", e->name)
1724 preempt_self(fd, e->exec_id | e->flags);
1725
1726 igt_subtest_f("preempt-other-%s", e->name)
1727 preempt_other(fd, e->exec_id | e->flags, 0);
1728
1729 igt_subtest_f("preempt-other-chain-%s", e->name)
1730 preempt_other(fd, e->exec_id | e->flags, CHAIN);
1731
1732 igt_subtest_f("preempt-queue-%s", e->name)
1733 preempt_queue(fd, e->exec_id | e->flags, 0);
1734
1735 igt_subtest_f("preempt-queue-chain-%s", e->name)
1736 preempt_queue(fd, e->exec_id | e->flags, CHAIN);
1737 igt_subtest_f("preempt-queue-contexts-%s", e->name)
1738 preempt_queue(fd, e->exec_id | e->flags, CONTEXTS);
1739
1740 igt_subtest_f("preempt-queue-contexts-chain-%s", e->name)
1741 preempt_queue(fd, e->exec_id | e->flags, CONTEXTS | CHAIN);
1742
1743 igt_subtest_group {
1744 igt_hang_t hang;
1745
1746 igt_fixture {
1747 igt_stop_hang_detector();
1748 hang = igt_allow_hang(fd, 0, 0);
1749 }
1750
1751 igt_subtest_f("preempt-hang-%s", e->name) {
1752 preempt(fd, e->exec_id | e->flags, NEW_CTX | HANG_LP);
1753 }
1754
1755 igt_subtest_f("preemptive-hang-%s", e->name)
1756 preemptive_hang(fd, e->exec_id | e->flags);
1757
1758 igt_fixture {
1759 igt_disallow_hang(fd, hang);
1760 igt_fork_hang_detector(fd);
1761 }
1762 }
1763 }
1764
1765 igt_subtest_f("deep-%s", e->name)
1766 deep(fd, e->exec_id | e->flags);
1767
1768 igt_subtest_f("wide-%s", e->name)
1769 wide(fd, e->exec_id | e->flags);
1770
1771 igt_subtest_f("reorder-wide-%s", e->name)
1772 reorder_wide(fd, e->exec_id | e->flags);
1773
1774 igt_subtest_f("smoketest-%s", e->name)
1775 smoketest(fd, e->exec_id | e->flags, 5);
1776 }
1777 }
1778 }
1779
1780 igt_subtest_group {
1781 igt_fixture {
1782 igt_require(gem_scheduler_enabled(fd));
1783 igt_require(gem_scheduler_has_ctx_priority(fd));
1784 }
1785
1786 for (e = intel_execution_engines; e->name; e++) {
1787 if (e->exec_id == 0)
1788 continue;
1789
1790 igt_subtest_group {
1791 igt_fixture {
1792 igt_require(gem_ring_has_physical_engine(fd, e->exec_id | e->flags));
1793 igt_require(gem_scheduler_has_preemption(fd));
1794 }
1795
1796 igt_subtest_f("pi-ringfull-%s", e->name)
1797 test_pi_ringfull(fd, e->exec_id | e->flags);
1798 }
1799 }
1800 }
1801
1802 igt_subtest_group {
1803 igt_fixture {
1804 igt_require(gem_scheduler_enabled(fd));
1805 igt_require(gem_scheduler_has_semaphores(fd));
1806 }
1807
1808 igt_subtest("semaphore-power")
1809 measure_semaphore_power(fd);
1810 }
1811
1812 igt_fixture {
1813 igt_stop_hang_detector();
1814 close(fd);
1815 }
1816 }
1817