xref: /aosp_15_r20/external/igt-gpu-tools/tests/i915/gem_exec_latency.c (revision d83cc019efdc2edc6c4b16e9034a3ceb8d35d77c)
1 /*
2  * Copyright © 2016 Intel Corporation
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21  * IN THE SOFTWARE.
22  *
23  */
24 
25 #include "igt.h"
26 #include <unistd.h>
27 #include <stdlib.h>
28 #include <stdint.h>
29 #include <stdio.h>
30 #include <string.h>
31 #include <fcntl.h>
32 #include <inttypes.h>
33 #include <errno.h>
34 #include <sys/stat.h>
35 #include <sys/ioctl.h>
36 #include <sys/time.h>
37 #include <sys/signal.h>
38 #include <time.h>
39 #include <sched.h>
40 
41 #include "drm.h"
42 
43 #include "igt_sysfs.h"
44 #include "igt_vgem.h"
45 #include "igt_dummyload.h"
46 #include "igt_stats.h"
47 
48 #include "i915/gem_ring.h"
49 
50 #define LOCAL_I915_EXEC_NO_RELOC (1<<11)
51 #define LOCAL_I915_EXEC_HANDLE_LUT (1<<12)
52 
53 #define LOCAL_I915_EXEC_BSD_SHIFT      (13)
54 #define LOCAL_I915_EXEC_BSD_MASK       (3 << LOCAL_I915_EXEC_BSD_SHIFT)
55 
56 #define ENGINE_FLAGS  (I915_EXEC_RING_MASK | LOCAL_I915_EXEC_BSD_MASK)
57 
58 #define LIVE 0x1
59 #define CORK 0x2
60 #define PREEMPT 0x4
61 
62 static unsigned int ring_size;
63 static double rcs_clock;
64 
65 static void
poll_ring(int fd,unsigned ring,const char * name)66 poll_ring(int fd, unsigned ring, const char *name)
67 {
68 	const struct igt_spin_factory opts = {
69 		.engine = ring,
70 		.flags = IGT_SPIN_POLL_RUN | IGT_SPIN_FAST,
71 	};
72 	struct timespec tv = {};
73 	unsigned long cycles;
74 	igt_spin_t *spin[2];
75 	uint64_t elapsed;
76 
77 	gem_require_ring(fd, ring);
78 	igt_require(gem_can_store_dword(fd, ring));
79 
80 	spin[0] = __igt_spin_factory(fd, &opts);
81 	igt_assert(igt_spin_has_poll(spin[0]));
82 
83 	spin[1] = __igt_spin_factory(fd, &opts);
84 	igt_assert(igt_spin_has_poll(spin[1]));
85 
86 	igt_spin_end(spin[0]);
87 	igt_spin_busywait_until_started(spin[1]);
88 
89 	igt_assert(!gem_bo_busy(fd, spin[0]->handle));
90 
91 	cycles = 0;
92 	while ((elapsed = igt_nsec_elapsed(&tv)) < 2ull << 30) {
93 		const unsigned int idx = cycles++ & 1;
94 
95 		igt_spin_reset(spin[idx]);
96 
97 		gem_execbuf(fd, &spin[idx]->execbuf);
98 
99 		igt_spin_end(spin[!idx]);
100 		igt_spin_busywait_until_started(spin[idx]);
101 	}
102 
103 	igt_info("%s completed %ld cycles: %.3f us\n",
104 		 name, cycles, elapsed*1e-3/cycles);
105 
106 	igt_spin_free(fd, spin[1]);
107 	igt_spin_free(fd, spin[0]);
108 }
109 
110 #define RCS_TIMESTAMP (0x2000 + 0x358)
latency_on_ring(int fd,unsigned ring,const char * name,unsigned flags)111 static void latency_on_ring(int fd,
112 			    unsigned ring, const char *name,
113 			    unsigned flags)
114 {
115 	const int gen = intel_gen(intel_get_drm_devid(fd));
116 	const int has_64bit_reloc = gen >= 8;
117 	struct drm_i915_gem_exec_object2 obj[3];
118 	struct drm_i915_gem_relocation_entry reloc;
119 	struct drm_i915_gem_execbuffer2 execbuf;
120 	igt_spin_t *spin = NULL;
121 	IGT_CORK_HANDLE(c);
122 	volatile uint32_t *reg;
123 	unsigned repeats = ring_size;
124 	uint32_t start, end, *map, *results;
125 	uint64_t offset;
126 	double gpu_latency;
127 	int i, j;
128 
129 	reg = (volatile uint32_t *)((volatile char *)igt_global_mmio + RCS_TIMESTAMP);
130 
131 	memset(&execbuf, 0, sizeof(execbuf));
132 	execbuf.buffers_ptr = to_user_pointer(&obj[1]);
133 	execbuf.buffer_count = 2;
134 	execbuf.flags = ring;
135 	execbuf.flags |= LOCAL_I915_EXEC_NO_RELOC | LOCAL_I915_EXEC_HANDLE_LUT;
136 
137 	memset(obj, 0, sizeof(obj));
138 	obj[1].handle = gem_create(fd, 4096);
139 	obj[1].flags = EXEC_OBJECT_WRITE;
140 	results = gem_mmap__wc(fd, obj[1].handle, 0, 4096, PROT_READ);
141 
142 	obj[2].handle = gem_create(fd, 64*1024);
143 	map = gem_mmap__wc(fd, obj[2].handle, 0, 64*1024, PROT_WRITE);
144 	gem_set_domain(fd, obj[2].handle,
145 		       I915_GEM_DOMAIN_GTT,
146 		       I915_GEM_DOMAIN_GTT);
147 	map[0] = MI_BATCH_BUFFER_END;
148 	gem_execbuf(fd, &execbuf);
149 
150 	memset(&reloc,0, sizeof(reloc));
151 	obj[2].relocation_count = 1;
152 	obj[2].relocs_ptr = to_user_pointer(&reloc);
153 
154 	gem_set_domain(fd, obj[2].handle,
155 		       I915_GEM_DOMAIN_GTT,
156 		       I915_GEM_DOMAIN_GTT);
157 
158 	reloc.target_handle = flags & CORK ? 1 : 0;
159 	reloc.read_domains = I915_GEM_DOMAIN_INSTRUCTION;
160 	reloc.write_domain = I915_GEM_DOMAIN_INSTRUCTION;
161 	reloc.presumed_offset = obj[1].offset;
162 
163 	for (j = 0; j < repeats; j++) {
164 		execbuf.batch_start_offset = 64 * j;
165 		reloc.offset =
166 			execbuf.batch_start_offset + sizeof(uint32_t);
167 		reloc.delta = sizeof(uint32_t) * j;
168 
169 		offset = reloc.presumed_offset;
170 		offset += reloc.delta;
171 
172 		i = 16 * j;
173 		/* MI_STORE_REG_MEM */
174 		map[i++] = 0x24 << 23 | 1;
175 		if (has_64bit_reloc)
176 			map[i-1]++;
177 		map[i++] = RCS_TIMESTAMP; /* ring local! */
178 		map[i++] = offset;
179 		if (has_64bit_reloc)
180 			map[i++] = offset >> 32;
181 		map[i++] = MI_BATCH_BUFFER_END;
182 	}
183 
184 	if (flags & CORK) {
185 		obj[0].handle = igt_cork_plug(&c, fd);
186 		execbuf.buffers_ptr = to_user_pointer(&obj[0]);
187 		execbuf.buffer_count = 3;
188 	}
189 
190 	if (flags & LIVE)
191 		spin = igt_spin_new(fd, .engine = ring);
192 
193 	start = *reg;
194 	for (j = 0; j < repeats; j++) {
195 		uint64_t presumed_offset = reloc.presumed_offset;
196 
197 		execbuf.batch_start_offset = 64 * j;
198 		reloc.offset =
199 			execbuf.batch_start_offset + sizeof(uint32_t);
200 		reloc.delta = sizeof(uint32_t) * j;
201 
202 		gem_execbuf(fd, &execbuf);
203 		igt_assert(reloc.presumed_offset == presumed_offset);
204 	}
205 	end = *reg;
206 	igt_assert(reloc.presumed_offset == obj[1].offset);
207 
208 	igt_spin_free(fd, spin);
209 	if (flags & CORK)
210 		igt_cork_unplug(&c);
211 
212 	gem_set_domain(fd, obj[1].handle, I915_GEM_DOMAIN_GTT, 0);
213 	gpu_latency = (results[repeats-1] - results[0]) / (double)(repeats-1);
214 
215 	gem_set_domain(fd, obj[2].handle,
216 		       I915_GEM_DOMAIN_GTT, I915_GEM_DOMAIN_GTT);
217 
218 	execbuf.batch_start_offset = 0;
219 	for (j = 0; j < repeats - 1; j++) {
220 		offset = obj[2].offset;
221 		offset += 64 * (j + 1);
222 
223 		i = 16 * j + (has_64bit_reloc ? 4 : 3);
224 		map[i] = MI_BATCH_BUFFER_START;
225 		if (gen >= 8) {
226 			map[i] |= 1 << 8 | 1;
227 			map[i + 1] = offset;
228 			map[i + 2] = offset >> 32;
229 		} else if (gen >= 6) {
230 			map[i] |= 1 << 8;
231 			map[i + 1] = offset;
232 		} else {
233 			map[i] |= 2 << 6;
234 			map[i + 1] = offset;
235 			if (gen < 4)
236 				map[i] |= 1;
237 		}
238 	}
239 	offset = obj[2].offset;
240 	gem_execbuf(fd, &execbuf);
241 	igt_assert(offset == obj[2].offset);
242 
243 	gem_set_domain(fd, obj[1].handle, I915_GEM_DOMAIN_GTT, 0);
244 	igt_info("%s: dispatch latency: %.1fns, execution latency: %.1fns (target %.1fns)\n",
245 		 name,
246 		 (end - start) / (double)repeats * rcs_clock,
247 		 gpu_latency * rcs_clock,
248 		 (results[repeats - 1] - results[0]) / (double)(repeats - 1) * rcs_clock);
249 
250 	munmap(map, 64*1024);
251 	munmap(results, 4096);
252 	if (flags & CORK)
253 		gem_close(fd, obj[0].handle);
254 	gem_close(fd, obj[1].handle);
255 	gem_close(fd, obj[2].handle);
256 }
257 
latency_from_ring(int fd,unsigned ring,const char * name,unsigned flags)258 static void latency_from_ring(int fd,
259 			      unsigned ring, const char *name,
260 			      unsigned flags)
261 {
262 	const int gen = intel_gen(intel_get_drm_devid(fd));
263 	const int has_64bit_reloc = gen >= 8;
264 	struct drm_i915_gem_exec_object2 obj[3];
265 	struct drm_i915_gem_relocation_entry reloc;
266 	struct drm_i915_gem_execbuffer2 execbuf;
267 	const unsigned int repeats = ring_size / 2;
268 	unsigned int other;
269 	uint32_t *map, *results;
270 	uint32_t ctx[2] = {};
271 	int i, j;
272 
273 	if (flags & PREEMPT) {
274 		ctx[0] = gem_context_create(fd);
275 		gem_context_set_priority(fd, ctx[0], -1023);
276 
277 		ctx[1] = gem_context_create(fd);
278 		gem_context_set_priority(fd, ctx[1], 1023);
279 	}
280 
281 	memset(&execbuf, 0, sizeof(execbuf));
282 	execbuf.buffers_ptr = to_user_pointer(&obj[1]);
283 	execbuf.buffer_count = 2;
284 	execbuf.flags = ring;
285 	execbuf.flags |= LOCAL_I915_EXEC_NO_RELOC | LOCAL_I915_EXEC_HANDLE_LUT;
286 	execbuf.rsvd1 = ctx[1];
287 
288 	memset(obj, 0, sizeof(obj));
289 	obj[1].handle = gem_create(fd, 4096);
290 	obj[1].flags = EXEC_OBJECT_WRITE;
291 	results = gem_mmap__wc(fd, obj[1].handle, 0, 4096, PROT_READ);
292 
293 	obj[2].handle = gem_create(fd, 64*1024);
294 	map = gem_mmap__wc(fd, obj[2].handle, 0, 64*1024, PROT_WRITE);
295 	gem_set_domain(fd, obj[2].handle,
296 		       I915_GEM_DOMAIN_GTT,
297 		       I915_GEM_DOMAIN_GTT);
298 	map[0] = MI_BATCH_BUFFER_END;
299 	gem_execbuf(fd, &execbuf);
300 
301 	memset(&reloc,0, sizeof(reloc));
302 	obj[2].relocation_count = 1;
303 	obj[2].relocs_ptr = to_user_pointer(&reloc);
304 
305 	gem_set_domain(fd, obj[2].handle,
306 		       I915_GEM_DOMAIN_GTT,
307 		       I915_GEM_DOMAIN_GTT);
308 
309 	reloc.read_domains = I915_GEM_DOMAIN_INSTRUCTION;
310 	reloc.write_domain = I915_GEM_DOMAIN_INSTRUCTION;
311 	reloc.presumed_offset = obj[1].offset;
312 	reloc.target_handle = flags & CORK ? 1 : 0;
313 
314 	for_each_physical_engine(fd, other) {
315 		igt_spin_t *spin = NULL;
316 		IGT_CORK_HANDLE(c);
317 
318 		gem_set_domain(fd, obj[2].handle,
319 			       I915_GEM_DOMAIN_GTT,
320 			       I915_GEM_DOMAIN_GTT);
321 
322 		if (flags & PREEMPT)
323 			spin = __igt_spin_new(fd,
324 					      .ctx = ctx[0],
325 					      .engine = ring);
326 
327 		if (flags & CORK) {
328 			obj[0].handle = igt_cork_plug(&c, fd);
329 			execbuf.buffers_ptr = to_user_pointer(&obj[0]);
330 			execbuf.buffer_count = 3;
331 		}
332 
333 		for (j = 0; j < repeats; j++) {
334 			uint64_t offset;
335 
336 			execbuf.flags &= ~ENGINE_FLAGS;
337 			execbuf.flags |= ring;
338 
339 			execbuf.batch_start_offset = 64 * j;
340 			reloc.offset =
341 				execbuf.batch_start_offset + sizeof(uint32_t);
342 			reloc.delta = sizeof(uint32_t) * j;
343 
344 			reloc.presumed_offset = obj[1].offset;
345 			offset = reloc.presumed_offset;
346 			offset += reloc.delta;
347 
348 			i = 16 * j;
349 			/* MI_STORE_REG_MEM */
350 			map[i++] = 0x24 << 23 | 1;
351 			if (has_64bit_reloc)
352 				map[i-1]++;
353 			map[i++] = RCS_TIMESTAMP; /* ring local! */
354 			map[i++] = offset;
355 			if (has_64bit_reloc)
356 				map[i++] = offset >> 32;
357 			map[i++] = MI_BATCH_BUFFER_END;
358 
359 			gem_execbuf(fd, &execbuf);
360 
361 			execbuf.flags &= ~ENGINE_FLAGS;
362 			execbuf.flags |= other;
363 
364 			execbuf.batch_start_offset = 64 * (j + repeats);
365 			reloc.offset =
366 				execbuf.batch_start_offset + sizeof(uint32_t);
367 			reloc.delta = sizeof(uint32_t) * (j + repeats);
368 
369 			reloc.presumed_offset = obj[1].offset;
370 			offset = reloc.presumed_offset;
371 			offset += reloc.delta;
372 
373 			i = 16 * (j + repeats);
374 			/* MI_STORE_REG_MEM */
375 			map[i++] = 0x24 << 23 | 1;
376 			if (has_64bit_reloc)
377 				map[i-1]++;
378 			map[i++] = RCS_TIMESTAMP; /* ring local! */
379 			map[i++] = offset;
380 			if (has_64bit_reloc)
381 				map[i++] = offset >> 32;
382 			map[i++] = MI_BATCH_BUFFER_END;
383 
384 			gem_execbuf(fd, &execbuf);
385 		}
386 
387 		if (flags & CORK)
388 			igt_cork_unplug(&c);
389 		gem_set_domain(fd, obj[1].handle,
390 			       I915_GEM_DOMAIN_GTT,
391 			       I915_GEM_DOMAIN_GTT);
392 		igt_spin_free(fd, spin);
393 
394 		igt_info("%s-%s delay: %.2fns\n",
395 			 name, e__->name,
396 			 (results[2*repeats-1] - results[0]) / (double)repeats * rcs_clock);
397 	}
398 
399 	munmap(map, 64*1024);
400 	munmap(results, 4096);
401 
402 	if (flags & CORK)
403 		gem_close(fd, obj[0].handle);
404 	gem_close(fd, obj[1].handle);
405 	gem_close(fd, obj[2].handle);
406 
407 	if (flags & PREEMPT) {
408 		gem_context_destroy(fd, ctx[1]);
409 		gem_context_destroy(fd, ctx[0]);
410 	}
411 }
412 
413 static void
__submit_spin(int fd,igt_spin_t * spin,unsigned int flags)414 __submit_spin(int fd, igt_spin_t *spin, unsigned int flags)
415 {
416 	struct drm_i915_gem_execbuffer2 eb = spin->execbuf;
417 
418 	eb.flags &= ~(0x3f | I915_EXEC_BSD_MASK);
419 	eb.flags |= flags | I915_EXEC_NO_RELOC;
420 
421 	gem_execbuf(fd, &eb);
422 }
423 
424 struct rt_pkt {
425 	struct igt_mean mean;
426 	double min, max;
427 };
428 
__spin_wait(int fd,igt_spin_t * spin)429 static bool __spin_wait(int fd, igt_spin_t *spin)
430 {
431 	while (!igt_spin_has_started(spin)) {
432 		if (!gem_bo_busy(fd, spin->handle))
433 			return false;
434 	}
435 
436 	return true;
437 }
438 
439 /*
440  * Test whether RT thread which hogs the CPU a lot can submit work with
441  * reasonable latency.
442  */
443 static void
rthog_latency_on_ring(int fd,unsigned int engine,const char * name,unsigned int flags)444 rthog_latency_on_ring(int fd, unsigned int engine, const char *name, unsigned int flags)
445 #define RTIDLE 0x1
446 {
447 	const char *passname[] = {
448 		"warmup",
449 		"normal",
450 		"rt[0]",
451 		"rt[1]",
452 		"rt[2]",
453 		"rt[3]",
454 		"rt[4]",
455 		"rt[5]",
456 		"rt[6]",
457 	};
458 #define NPASS ARRAY_SIZE(passname)
459 #define MMAP_SZ (64 << 10)
460 	const struct igt_spin_factory opts = {
461 		.engine = engine,
462 		.flags = IGT_SPIN_POLL_RUN | IGT_SPIN_FAST,
463 	};
464 	struct rt_pkt *results;
465 	unsigned int engines[16];
466 	const char *names[16];
467 	unsigned int nengine;
468 	int ret;
469 
470 	igt_assert(ARRAY_SIZE(engines) * NPASS * sizeof(*results) <= MMAP_SZ);
471 	results = mmap(NULL, MMAP_SZ, PROT_WRITE, MAP_SHARED | MAP_ANON, -1, 0);
472 	igt_assert(results != MAP_FAILED);
473 
474 	nengine = 0;
475 	if (engine == ALL_ENGINES) {
476 		for_each_physical_engine(fd, engine) {
477 			if (!gem_can_store_dword(fd, engine))
478 				continue;
479 
480 			engines[nengine] = engine;
481 			names[nengine] = e__->name;
482 			nengine++;
483 		}
484 		igt_require(nengine > 1);
485 	} else {
486 		igt_require(gem_can_store_dword(fd, engine));
487 		engines[nengine] = engine;
488 		names[nengine] = name;
489 		nengine++;
490 	}
491 
492 	gem_quiescent_gpu(fd);
493 
494 	igt_fork(child, nengine) {
495 		unsigned int pass = 0; /* Three phases: warmup, normal, rt. */
496 
497 		engine = engines[child];
498 		do {
499 			struct igt_mean mean;
500 			double min = HUGE_VAL;
501 			double max = -HUGE_VAL;
502 			igt_spin_t *spin;
503 
504 			igt_mean_init(&mean);
505 
506 			if (pass == 2) {
507 				struct sched_param rt =
508 				{ .sched_priority = 99 };
509 
510 				ret = sched_setscheduler(0,
511 							 SCHED_FIFO | SCHED_RESET_ON_FORK,
512 							 &rt);
513 				if (ret) {
514 					igt_warn("Failed to set scheduling policy!\n");
515 					break;
516 				}
517 			}
518 
519 			usleep(250);
520 
521 			spin = __igt_spin_factory(fd, &opts);
522 			if (!spin) {
523 				igt_warn("Failed to create spinner! (%s)\n",
524 					 passname[pass]);
525 				break;
526 			}
527 			igt_spin_busywait_until_started(spin);
528 
529 			igt_until_timeout(pass > 0 ? 5 : 2) {
530 				struct timespec ts = { };
531 				double t;
532 
533 				igt_spin_end(spin);
534 				gem_sync(fd, spin->handle);
535 				if (flags & RTIDLE)
536 					igt_drop_caches_set(fd, DROP_IDLE);
537 
538 				/*
539 				 * If we are oversubscribed (more RT hogs than
540 				 * cpus) give the others a change to run;
541 				 * otherwise, they will interrupt us in the
542 				 * middle of the measurement.
543 				 */
544 				if (nengine > 1)
545 					usleep(10*nengine);
546 
547 				igt_spin_reset(spin);
548 
549 				igt_nsec_elapsed(&ts);
550 				__submit_spin(fd, spin, engine);
551 				if (!__spin_wait(fd, spin)) {
552 					igt_warn("Wait timeout! (%s)\n",
553 						 passname[pass]);
554 					break;
555 				}
556 
557 				t = igt_nsec_elapsed(&ts) * 1e-9;
558 				if (t > max)
559 					max = t;
560 				if (t < min)
561 					min = t;
562 
563 				igt_mean_add(&mean, t);
564 			}
565 
566 			igt_spin_free(fd, spin);
567 
568 			igt_info("%8s %10s: mean=%.2fus stddev=%.3fus [%.2fus, %.2fus] (n=%lu)\n",
569 				 names[child],
570 				 passname[pass],
571 				 igt_mean_get(&mean) * 1e6,
572 				 sqrt(igt_mean_get_variance(&mean)) * 1e6,
573 				 min * 1e6, max * 1e6,
574 				 mean.count);
575 
576 			results[NPASS * child + pass].mean = mean;
577 			results[NPASS * child + pass].min = min;
578 			results[NPASS * child + pass].max = max;
579 		} while (++pass < NPASS);
580 	}
581 
582 	igt_waitchildren();
583 
584 	for (unsigned int child = 0; child < nengine; child++) {
585 		struct rt_pkt normal = results[NPASS * child + 1];
586 		igt_stats_t stats;
587 		double variance;
588 
589 		igt_stats_init_with_size(&stats, NPASS);
590 
591 		variance = 0;
592 		for (unsigned int pass = 2; pass < NPASS; pass++) {
593 			struct rt_pkt *rt = &results[NPASS * child + pass];
594 
595 			igt_assert(rt->max);
596 
597 			igt_stats_push_float(&stats, igt_mean_get(&rt->mean));
598 			variance += igt_mean_get_variance(&rt->mean);
599 		}
600 		variance /= NPASS - 2;
601 
602 		igt_info("%8s: normal latency=%.2f±%.3fus, rt latency=%.2f±%.3fus\n",
603 			 names[child],
604 			 igt_mean_get(&normal.mean) * 1e6,
605 			 sqrt(igt_mean_get_variance(&normal.mean)) * 1e6,
606 			 igt_stats_get_median(&stats) * 1e6,
607 			 sqrt(variance) * 1e6);
608 
609 		igt_assert(igt_stats_get_median(&stats) <
610 			   igt_mean_get(&normal.mean) * 2);
611 
612 		/* The system is noisy; be conservative when declaring fail. */
613 		igt_assert(variance < igt_mean_get_variance(&normal.mean) * 10);
614 	}
615 
616 	munmap(results, MMAP_SZ);
617 }
618 
clockrate(int i915,int reg)619 static double clockrate(int i915, int reg)
620 {
621 	volatile uint32_t *mmio;
622 	uint32_t r_start, r_end;
623 	struct timespec tv;
624 	uint64_t t_start, t_end;
625 	uint64_t elapsed;
626 	int cs_timestamp_freq;
627 	drm_i915_getparam_t gp = {
628 		.value = &cs_timestamp_freq,
629 		.param = I915_PARAM_CS_TIMESTAMP_FREQUENCY,
630 	};
631 
632 	if (igt_ioctl(i915, DRM_IOCTL_I915_GETPARAM, &gp) == 0)
633 		return cs_timestamp_freq;
634 
635 	mmio = (volatile uint32_t *)((volatile char *)igt_global_mmio + reg);
636 
637 	t_start = igt_nsec_elapsed(&tv);
638 	r_start = *mmio;
639 	elapsed = igt_nsec_elapsed(&tv) - t_start;
640 
641 	usleep(1000);
642 
643 	t_end = igt_nsec_elapsed(&tv);
644 	r_end = *mmio;
645 	elapsed += igt_nsec_elapsed(&tv) - t_end;
646 
647 	elapsed = (t_end - t_start) + elapsed / 2;
648 	return (r_end - r_start) * 1e9 / elapsed;
649 }
650 
651 igt_main
652 {
653 	const struct intel_execution_engine *e;
654 	int device = -1;
655 
656 	igt_fixture {
657 		device = drm_open_driver(DRIVER_INTEL);
658 		igt_require_gem(device);
659 		gem_require_mmap_wc(device);
660 
661 		gem_submission_print_method(device);
662 
663 		ring_size = gem_measure_ring_inflight(device, ALL_ENGINES, 0);
664 		igt_info("Ring size: %d batches\n", ring_size);
665 		igt_require(ring_size > 8);
666 		ring_size -= 8; /* leave some spare */
667 		if (ring_size > 1024)
668 			ring_size = 1024;
669 
670 		intel_register_access_init(intel_get_pci_device(), false, device);
671 		rcs_clock = clockrate(device, RCS_TIMESTAMP);
672 		igt_info("RCS timestamp clock: %.0fKHz, %.1fns\n",
673 			 rcs_clock / 1e3, 1e9 / rcs_clock);
674 		rcs_clock = 1e9 / rcs_clock;
675 	}
676 
677 	igt_subtest("all-rtidle-submit")
678 		rthog_latency_on_ring(device, ALL_ENGINES, "all", RTIDLE);
679 
680 	igt_subtest("all-rthog-submit")
681 		rthog_latency_on_ring(device, ALL_ENGINES, "all", 0);
682 
683 	igt_subtest_group {
684 		igt_fixture
685 			igt_require(intel_gen(intel_get_drm_devid(device)) >= 7);
686 
687 		for (e = intel_execution_engines; e->name; e++) {
688 			if (e->exec_id == 0)
689 				continue;
690 
691 			igt_subtest_group {
692 				igt_fixture {
693 					igt_require(gem_ring_has_physical_engine(device, e->exec_id | e->flags));
694 				}
695 
696 				igt_subtest_f("%s-dispatch", e->name)
697 					latency_on_ring(device,
698 							e->exec_id | e->flags,
699 							e->name, 0);
700 
701 				igt_subtest_f("%s-live-dispatch", e->name)
702 					latency_on_ring(device,
703 							e->exec_id | e->flags,
704 							e->name, LIVE);
705 
706 				igt_subtest_f("%s-poll", e->name)
707 					poll_ring(device,
708 						  e->exec_id | e->flags,
709 						  e->name);
710 
711 				igt_subtest_f("%s-rtidle-submit", e->name)
712 					rthog_latency_on_ring(device,
713 							      e->exec_id |
714 							      e->flags,
715 							      e->name,
716 							      RTIDLE);
717 
718 				igt_subtest_f("%s-rthog-submit", e->name)
719 					rthog_latency_on_ring(device,
720 							      e->exec_id |
721 							      e->flags,
722 							      e->name,
723 							      0);
724 
725 				igt_subtest_f("%s-live-dispatch-queued", e->name)
726 					latency_on_ring(device,
727 							e->exec_id | e->flags,
728 							e->name, LIVE | CORK);
729 				igt_subtest_f("%s-dispatch-queued", e->name)
730 					latency_on_ring(device,
731 							e->exec_id | e->flags,
732 							e->name, CORK);
733 
734 				igt_subtest_f("%s-synchronisation", e->name)
735 					latency_from_ring(device,
736 							  e->exec_id | e->flags,
737 							  e->name, 0);
738 
739 				igt_subtest_f("%s-synchronisation-queued", e->name)
740 					latency_from_ring(device,
741 							  e->exec_id | e->flags,
742 							  e->name, CORK);
743 
744 				igt_subtest_group {
745 					igt_fixture {
746 						gem_require_contexts(device);
747 						igt_require(gem_scheduler_has_preemption(device));
748 					}
749 
750 					igt_subtest_f("%s-preemption", e->name)
751 						latency_from_ring(device,
752 								  e->exec_id | e->flags,
753 								  e->name, PREEMPT);
754 				}
755 			}
756 		}
757 	}
758 
759 	igt_fixture {
760 		close(device);
761 	}
762 }
763