xref: /aosp_15_r20/external/igt-gpu-tools/tests/i915/gem_busy.c (revision d83cc019efdc2edc6c4b16e9034a3ceb8d35d77c)
1 /*
2  * Copyright © 2016 Intel Corporation
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21  * IN THE SOFTWARE.
22  */
23 
24 #include <sched.h>
25 #include <signal.h>
26 #include <sys/ioctl.h>
27 
28 #include "igt.h"
29 #include "igt_rand.h"
30 #include "igt_vgem.h"
31 #include "i915/gem_ring.h"
32 
33 #define LOCAL_EXEC_NO_RELOC (1<<11)
34 #define PAGE_ALIGN(x) ALIGN(x, 4096)
35 
36 /* Exercise the busy-ioctl, ensuring the ABI is never broken */
37 IGT_TEST_DESCRIPTION("Basic check of busy-ioctl ABI.");
38 
39 enum { TEST = 0, BUSY, BATCH };
40 
gem_busy(int fd,uint32_t handle)41 static bool gem_busy(int fd, uint32_t handle)
42 {
43 	struct drm_i915_gem_busy busy;
44 
45 	memset(&busy, 0, sizeof(busy));
46 	busy.handle = handle;
47 
48 	do_ioctl(fd, DRM_IOCTL_I915_GEM_BUSY, &busy);
49 
50 	return busy.busy != 0;
51 }
52 
__gem_busy(int fd,uint32_t handle,uint32_t * read,uint32_t * write)53 static void __gem_busy(int fd,
54 		       uint32_t handle,
55 		       uint32_t *read,
56 		       uint32_t *write)
57 {
58 	struct drm_i915_gem_busy busy;
59 
60 	memset(&busy, 0, sizeof(busy));
61 	busy.handle = handle;
62 
63 	do_ioctl(fd, DRM_IOCTL_I915_GEM_BUSY, &busy);
64 
65 	*write = busy.busy & 0xffff;
66 	*read = busy.busy >> 16;
67 }
68 
exec_noop(int fd,uint32_t * handles,unsigned flags,bool write)69 static bool exec_noop(int fd,
70 		      uint32_t *handles,
71 		      unsigned flags,
72 		      bool write)
73 {
74 	struct drm_i915_gem_execbuffer2 execbuf;
75 	struct drm_i915_gem_exec_object2 exec[3];
76 
77 	memset(exec, 0, sizeof(exec));
78 	exec[0].handle = handles[BUSY];
79 	exec[1].handle = handles[TEST];
80 	if (write)
81 		exec[1].flags |= EXEC_OBJECT_WRITE;
82 	exec[2].handle = handles[BATCH];
83 
84 	memset(&execbuf, 0, sizeof(execbuf));
85 	execbuf.buffers_ptr = to_user_pointer(exec);
86 	execbuf.buffer_count = 3;
87 	execbuf.flags = flags;
88 	igt_debug("Queuing handle for %s on engine %d\n",
89 		  write ? "writing" : "reading", flags);
90 	return __gem_execbuf(fd, &execbuf) == 0;
91 }
92 
still_busy(int fd,uint32_t handle)93 static bool still_busy(int fd, uint32_t handle)
94 {
95 	uint32_t read, write;
96 	__gem_busy(fd, handle, &read, &write);
97 	return write;
98 }
99 
semaphore(int fd,const struct intel_execution_engine2 * e)100 static void semaphore(int fd, const struct intel_execution_engine2 *e)
101 {
102 	struct intel_execution_engine2 *__e;
103 	uint32_t bbe = MI_BATCH_BUFFER_END;
104 	const unsigned uabi = e->class;
105 	igt_spin_t *spin;
106 	uint32_t handle[3];
107 	uint32_t read, write;
108 	uint32_t active;
109 	unsigned i;
110 
111 	handle[TEST] = gem_create(fd, 4096);
112 	handle[BATCH] = gem_create(fd, 4096);
113 	gem_write(fd, handle[BATCH], 0, &bbe, sizeof(bbe));
114 
115 	/* Create a long running batch which we can use to hog the GPU */
116 	handle[BUSY] = gem_create(fd, 4096);
117 	spin = igt_spin_new(fd,
118 			    .engine = e->flags,
119 			    .dependency = handle[BUSY]);
120 
121 	/* Queue a batch after the busy, it should block and remain "busy" */
122 	igt_assert(exec_noop(fd, handle, e->flags, false));
123 	igt_assert(still_busy(fd, handle[BUSY]));
124 	__gem_busy(fd, handle[TEST], &read, &write);
125 	igt_assert_eq(read, 1 << uabi);
126 	igt_assert_eq(write, 0);
127 
128 	/* Requeue with a write */
129 	igt_assert(exec_noop(fd, handle, e->flags, true));
130 	igt_assert(still_busy(fd, handle[BUSY]));
131 	__gem_busy(fd, handle[TEST], &read, &write);
132 	igt_assert_eq(read, 1 << uabi);
133 	igt_assert_eq(write, 1 + uabi);
134 
135 	/* Now queue it for a read across all available rings */
136 	active = 0;
137 	__for_each_physical_engine(fd, __e) {
138 		if (exec_noop(fd, handle, __e->flags, false))
139 			active |= 1 << __e->class;
140 	}
141 	igt_assert(still_busy(fd, handle[BUSY]));
142 	__gem_busy(fd, handle[TEST], &read, &write);
143 	igt_assert_eq(read, active);
144 	igt_assert_eq(write, 1 + uabi); /* from the earlier write */
145 
146 	/* Check that our long batch was long enough */
147 	igt_assert(still_busy(fd, handle[BUSY]));
148 	igt_spin_free(fd, spin);
149 
150 	/* And make sure it becomes idle again */
151 	gem_sync(fd, handle[TEST]);
152 	__gem_busy(fd, handle[TEST], &read, &write);
153 	igt_assert_eq(read, 0);
154 	igt_assert_eq(write, 0);
155 
156 	for (i = TEST; i <= BATCH; i++)
157 		gem_close(fd, handle[i]);
158 }
159 
160 #define PARALLEL 1
161 #define HANG 2
one(int fd,const struct intel_execution_engine2 * e,unsigned test_flags)162 static void one(int fd, const struct intel_execution_engine2 *e, unsigned test_flags)
163 {
164 	const int gen = intel_gen(intel_get_drm_devid(fd));
165 	struct drm_i915_gem_exec_object2 obj[2];
166 #define SCRATCH 0
167 #define BATCH 1
168 	struct drm_i915_gem_relocation_entry store[1024+1];
169 	struct drm_i915_gem_execbuffer2 execbuf;
170 	unsigned size = ALIGN(ARRAY_SIZE(store)*16 + 4, 4096);
171 	const unsigned uabi = e->class;
172 	uint32_t read[2], write[2];
173 	struct timespec tv;
174 	uint32_t *batch, *bbe;
175 	int i, count, timeout;
176 
177 	memset(&execbuf, 0, sizeof(execbuf));
178 	execbuf.buffers_ptr = to_user_pointer(obj);
179 	execbuf.buffer_count = 2;
180 	execbuf.flags = e->flags;
181 	if (gen < 6)
182 		execbuf.flags |= I915_EXEC_SECURE;
183 
184 	memset(obj, 0, sizeof(obj));
185 	obj[SCRATCH].handle = gem_create(fd, 4096);
186 
187 	obj[BATCH].handle = gem_create(fd, size);
188 	obj[BATCH].relocs_ptr = to_user_pointer(store);
189 	obj[BATCH].relocation_count = ARRAY_SIZE(store);
190 	memset(store, 0, sizeof(store));
191 
192 	batch = gem_mmap__wc(fd, obj[BATCH].handle, 0, size, PROT_WRITE);
193 	gem_set_domain(fd, obj[BATCH].handle,
194 			I915_GEM_DOMAIN_GTT, I915_GEM_DOMAIN_GTT);
195 
196 	i = 0;
197 	for (count = 0; count < 1024; count++) {
198 		store[count].target_handle = obj[SCRATCH].handle;
199 		store[count].presumed_offset = -1;
200 		store[count].offset = sizeof(uint32_t) * (i + 1);
201 		store[count].delta = sizeof(uint32_t) * count;
202 		store[count].read_domains = I915_GEM_DOMAIN_INSTRUCTION;
203 		store[count].write_domain = I915_GEM_DOMAIN_INSTRUCTION;
204 		batch[i] = MI_STORE_DWORD_IMM | (gen < 6 ? 1 << 22 : 0);
205 		if (gen >= 8) {
206 			batch[++i] = 0;
207 			batch[++i] = 0;
208 		} else if (gen >= 4) {
209 			batch[++i] = 0;
210 			batch[++i] = 0;
211 			store[count].offset += sizeof(uint32_t);
212 		} else {
213 			batch[i]--;
214 			batch[++i] = 0;
215 		}
216 		batch[++i] = count;
217 		i++;
218 	}
219 
220 	bbe = &batch[i];
221 	store[count].target_handle = obj[BATCH].handle; /* recurse */
222 	store[count].presumed_offset = 0;
223 	store[count].offset = sizeof(uint32_t) * (i + 1);
224 	store[count].delta = 0;
225 	store[count].read_domains = I915_GEM_DOMAIN_COMMAND;
226 	store[count].write_domain = 0;
227 	batch[i] = MI_BATCH_BUFFER_START;
228 	if (gen >= 8) {
229 		batch[i] |= 1 << 8 | 1;
230 		batch[++i] = 0;
231 		batch[++i] = 0;
232 	} else if (gen >= 6) {
233 		batch[i] |= 1 << 8;
234 		batch[++i] = 0;
235 	} else {
236 		batch[i] |= 2 << 6;
237 		batch[++i] = 0;
238 		if (gen < 4) {
239 			batch[i] |= 1;
240 			store[count].delta = 1;
241 		}
242 	}
243 	i++;
244 
245 	igt_assert(i < size/sizeof(*batch));
246 	igt_require(__gem_execbuf(fd, &execbuf) == 0);
247 
248 	__gem_busy(fd, obj[SCRATCH].handle, &read[SCRATCH], &write[SCRATCH]);
249 	__gem_busy(fd, obj[BATCH].handle, &read[BATCH], &write[BATCH]);
250 
251 	if (test_flags & PARALLEL) {
252 		struct intel_execution_engine2 *e2;
253 
254 		__for_each_physical_engine(fd, e2) {
255 			if (e2->class == e->class &&
256 			    e2->instance == e->instance)
257 				continue;
258 
259 			if (!gem_class_can_store_dword(fd, e2->class))
260 				continue;
261 
262 			igt_debug("Testing %s in parallel\n", e2->name);
263 			one(fd, e2, 0);
264 		}
265 	}
266 
267 	timeout = 120;
268 	if ((test_flags & HANG) == 0) {
269 		*bbe = MI_BATCH_BUFFER_END;
270 		__sync_synchronize();
271 		timeout = 1;
272 	}
273 
274 	igt_assert_eq(write[SCRATCH], 1 + uabi);
275 	igt_assert_eq_u32(read[SCRATCH], 1 << uabi);
276 
277 	igt_assert_eq(write[BATCH], 0);
278 	igt_assert_eq_u32(read[BATCH], 1 << uabi);
279 
280 	/* Calling busy in a loop should be enough to flush the rendering */
281 	memset(&tv, 0, sizeof(tv));
282 	while (gem_busy(fd, obj[BATCH].handle))
283 		igt_assert(igt_seconds_elapsed(&tv) < timeout);
284 	igt_assert(!gem_busy(fd, obj[SCRATCH].handle));
285 
286 	munmap(batch, size);
287 	batch = gem_mmap__wc(fd, obj[SCRATCH].handle, 0, 4096, PROT_READ);
288 	for (i = 0; i < 1024; i++)
289 		igt_assert_eq_u32(batch[i], i);
290 	munmap(batch, 4096);
291 
292 	gem_close(fd, obj[BATCH].handle);
293 	gem_close(fd, obj[SCRATCH].handle);
294 }
295 
xchg_u32(void * array,unsigned i,unsigned j)296 static void xchg_u32(void *array, unsigned i, unsigned j)
297 {
298 	uint32_t *u32 = array;
299 	uint32_t tmp = u32[i];
300 	u32[i] = u32[j];
301 	u32[j] = tmp;
302 }
303 
close_race(int fd)304 static void close_race(int fd)
305 {
306 	const unsigned int ncpus = sysconf(_SC_NPROCESSORS_ONLN);
307 	const unsigned int nhandles = gem_measure_ring_inflight(fd, ALL_ENGINES, 0) / 2;
308 	unsigned int engines[16], nengine;
309 	unsigned long *control;
310 	uint32_t *handles;
311 	int i;
312 
313 	igt_require(ncpus > 1);
314 	intel_require_memory(nhandles, 4096, CHECK_RAM);
315 
316 	/*
317 	 * One thread spawning work and randomly closing handles.
318 	 * One background thread per cpu checking busyness.
319 	 */
320 
321 	nengine = 0;
322 	for_each_engine(fd, i)
323 		engines[nengine++] = i;
324 	igt_require(nengine);
325 
326 	control = mmap(NULL, 4096, PROT_WRITE, MAP_SHARED | MAP_ANON, -1, 0);
327 	igt_assert(control != MAP_FAILED);
328 
329 	handles = mmap(NULL, PAGE_ALIGN(nhandles*sizeof(*handles)),
330 		   PROT_WRITE, MAP_SHARED | MAP_ANON, -1, 0);
331 	igt_assert(handles != MAP_FAILED);
332 
333 	igt_fork(child, ncpus - 1) {
334 		struct drm_i915_gem_busy busy;
335 		uint32_t indirection[nhandles];
336 		unsigned long count = 0;
337 
338 		for (i = 0; i < nhandles; i++)
339 			indirection[i] = i;
340 
341 		hars_petruska_f54_1_random_perturb(child);
342 
343 		memset(&busy, 0, sizeof(busy));
344 		do {
345 			igt_permute_array(indirection, nhandles, xchg_u32);
346 			__sync_synchronize();
347 			for (i = 0; i < nhandles; i++) {
348 				busy.handle = handles[indirection[i]];
349 				/* Check that the busy computation doesn't
350 				 * explode in the face of random gem_close().
351 				 */
352 				drmIoctl(fd, DRM_IOCTL_I915_GEM_BUSY, &busy);
353 			}
354 			count++;
355 		} while(*(volatile long *)control == 0);
356 
357 		igt_debug("child[%d]: count = %lu\n", child, count);
358 		control[child + 1] = count;
359 	}
360 
361 	igt_fork(child, 1) {
362 		struct sched_param rt = {.sched_priority = 99 };
363 		igt_spin_t *spin[nhandles];
364 		unsigned long count = 0;
365 
366 		igt_assert(sched_setscheduler(getpid(), SCHED_RR, &rt) == 0);
367 
368 		for (i = 0; i < nhandles; i++) {
369 			spin[i] = __igt_spin_new(fd,
370 						 .engine = engines[rand() % nengine]);
371 			handles[i] = spin[i]->handle;
372 		}
373 
374 		igt_until_timeout(20) {
375 			for (i = 0; i < nhandles; i++) {
376 				igt_spin_free(fd, spin[i]);
377 				spin[i] = __igt_spin_new(fd,
378 							 .engine = engines[rand() % nengine]);
379 				handles[i] = spin[i]->handle;
380 				__sync_synchronize();
381 			}
382 			count += nhandles;
383 		}
384 		control[0] = count;
385 		__sync_synchronize();
386 
387 		for (i = 0; i < nhandles; i++)
388 			igt_spin_free(fd, spin[i]);
389 	}
390 	igt_waitchildren();
391 
392 	for (i = 0; i < ncpus - 1; i++)
393 		control[ncpus] += control[i + 1];
394 	igt_info("Total execs %lu, busy-ioctls %lu\n",
395 		 control[0], control[ncpus] * nhandles);
396 
397 	munmap(handles, PAGE_ALIGN(nhandles * sizeof(*handles)));
398 	munmap(control, 4096);
399 
400 	gem_quiescent_gpu(fd);
401 }
402 
has_semaphores(int fd)403 static bool has_semaphores(int fd)
404 {
405 	struct drm_i915_getparam gp;
406 	int val = -1;
407 
408 	memset(&gp, 0, sizeof(gp));
409 	gp.param = I915_PARAM_HAS_SEMAPHORES;
410 	gp.value = &val;
411 
412 	drmIoctl(fd, DRM_IOCTL_I915_GETPARAM, &gp);
413 	errno = 0;
414 
415 	return val > 0;
416 }
417 
has_extended_busy_ioctl(int fd)418 static bool has_extended_busy_ioctl(int fd)
419 {
420 	igt_spin_t *spin = igt_spin_new(fd, .engine = I915_EXEC_DEFAULT);
421 	uint32_t read, write;
422 
423 	__gem_busy(fd, spin->handle, &read, &write);
424 	igt_spin_free(fd, spin);
425 
426 	return read != 0;
427 }
428 
basic(int fd,const struct intel_execution_engine2 * e,unsigned flags)429 static void basic(int fd, const struct intel_execution_engine2 *e, unsigned flags)
430 {
431 	igt_spin_t *spin =
432 		igt_spin_new(fd,
433 			     .engine = e->flags,
434 			     .flags = IGT_SPIN_NO_PREEMPTION);
435 	struct timespec tv;
436 	int timeout;
437 	bool busy;
438 
439 	busy = gem_bo_busy(fd, spin->handle);
440 
441 	timeout = 120;
442 	if ((flags & HANG) == 0) {
443 		igt_spin_end(spin);
444 		timeout = 1;
445 	}
446 
447 	igt_assert(busy);
448 	memset(&tv, 0, sizeof(tv));
449 	while (gem_bo_busy(fd, spin->handle)) {
450 		if (igt_seconds_elapsed(&tv) > timeout) {
451 			igt_debugfs_dump(fd, "i915_engine_info");
452 			igt_debugfs_dump(fd, "i915_hangcheck_info");
453 			igt_assert_f(igt_seconds_elapsed(&tv) < timeout,
454 				     "%s batch did not complete within %ds\n",
455 				     flags & HANG ? "Hanging" : "Normal",
456 				     timeout);
457 		}
458 	}
459 
460 	igt_spin_free(fd, spin);
461 }
462 
all(int i915)463 static void all(int i915)
464 {
465 	const struct intel_execution_engine2 *e;
466 
467 	__for_each_physical_engine(i915, e)
468 		basic(i915, e, 0);
469 }
470 
471 igt_main
472 {
473 	const struct intel_execution_engine2 *e;
474 	int fd = -1;
475 
476 	igt_fixture {
477 		fd = drm_open_driver_master(DRIVER_INTEL);
478 		igt_require_gem(fd);
479 		igt_require(gem_class_can_store_dword(fd,
480 						     I915_ENGINE_CLASS_RENDER));
481 	}
482 
483 	igt_subtest_group {
484 		igt_fixture {
485 			igt_fork_hang_detector(fd);
486 		}
487 
488 		igt_subtest("busy-all") {
489 			gem_quiescent_gpu(fd);
490 			all(fd);
491 		}
492 
__for_each_physical_engine(fd,e)493 		__for_each_physical_engine(fd, e) {
494 			igt_subtest_group {
495 				igt_subtest_f("busy-%s", e->name) {
496 					gem_quiescent_gpu(fd);
497 					basic(fd, e, 0);
498 				}
499 			}
500 		}
501 
502 		igt_subtest_group {
503 			igt_fixture {
504 				igt_require(has_extended_busy_ioctl(fd));
505 				gem_require_mmap_wc(fd);
506 			}
507 
__for_each_physical_engine(fd,e)508 			__for_each_physical_engine(fd, e) {
509 				igt_subtest_f("extended-%s", e->name) {
510 					igt_require(gem_class_can_store_dword(fd,
511 						     e->class));
512 					gem_quiescent_gpu(fd);
513 					one(fd, e, 0);
514 					gem_quiescent_gpu(fd);
515 				}
516 			}
517 
__for_each_physical_engine(fd,e)518 			__for_each_physical_engine(fd, e) {
519 				igt_subtest_f("extended-parallel-%s", e->name) {
520 					igt_require(gem_class_can_store_dword(fd, e->class));
521 
522 					gem_quiescent_gpu(fd);
523 					one(fd, e, PARALLEL);
524 					gem_quiescent_gpu(fd);
525 				}
526 			}
527 		}
528 
529 		igt_subtest_group {
530 			igt_fixture {
531 				igt_require(has_extended_busy_ioctl(fd));
532 				igt_require(has_semaphores(fd));
533 			}
534 
__for_each_physical_engine(fd,e)535 			__for_each_physical_engine(fd, e) {
536 				igt_subtest_f("extended-semaphore-%s", e->name)
537 					semaphore(fd, e);
538 			}
539 		}
540 
541 		igt_subtest("close-race")
542 			close_race(fd);
543 
544 		igt_fixture {
545 			igt_stop_hang_detector();
546 		}
547 	}
548 
549 	igt_subtest_group {
550 		igt_hang_t hang;
551 
552 		igt_fixture {
553 			hang = igt_allow_hang(fd, 0, 0);
554 		}
555 
__for_each_physical_engine(fd,e)556 		__for_each_physical_engine(fd, e) {
557 			igt_subtest_f("%shang-%s",
558 				      e->class == I915_ENGINE_CLASS_RENDER
559 				      ? "basic-" : "", e->name) {
560 				igt_skip_on_simulation();
561 				gem_quiescent_gpu(fd);
562 				basic(fd, e, HANG);
563 			}
564 		}
565 
566 		igt_subtest_group {
567 			igt_fixture {
568 				igt_require(has_extended_busy_ioctl(fd));
569 				gem_require_mmap_wc(fd);
570 			}
571 
__for_each_physical_engine(fd,e)572 			__for_each_physical_engine(fd, e) {
573 				igt_subtest_f("extended-hang-%s", e->name) {
574 					igt_skip_on_simulation();
575 					igt_require(gem_class_can_store_dword(fd, e->class));
576 
577 					gem_quiescent_gpu(fd);
578 					one(fd, e, HANG);
579 					gem_quiescent_gpu(fd);
580 				}
581 			}
582 		}
583 
584 		igt_fixture {
585 			igt_disallow_hang(fd, hang);
586 		}
587 	}
588 
589 	igt_fixture {
590 		close(fd);
591 	}
592 }
593