xref: /aosp_15_r20/external/igt-gpu-tools/tests/i915/gem_ctx_switch.c (revision d83cc019efdc2edc6c4b16e9034a3ceb8d35d77c)
1 /*
2  * Copyright © 2011 Intel Corporation
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21  * IN THE SOFTWARE.
22  *
23  * Authors:
24  *    Chris Wilson <[email protected]>
25  *
26  */
27 
28 #include "igt.h"
29 #include <limits.h>
30 #include <unistd.h>
31 #include <stdlib.h>
32 #include <stdint.h>
33 #include <stdio.h>
34 #include <string.h>
35 #include <fcntl.h>
36 #include <inttypes.h>
37 #include <errno.h>
38 #include <sys/stat.h>
39 #include <sys/ioctl.h>
40 #include <sys/time.h>
41 #include <time.h>
42 #include "drm.h"
43 
44 #define LOCAL_I915_EXEC_NO_RELOC (1<<11)
45 #define LOCAL_I915_EXEC_HANDLE_LUT (1<<12)
46 
47 #define INTERRUPTIBLE 0x1
48 #define QUEUE 0x2
49 
elapsed(const struct timespec * start,const struct timespec * end)50 static double elapsed(const struct timespec *start, const struct timespec *end)
51 {
52 	return ((end->tv_sec - start->tv_sec) +
53 		(end->tv_nsec - start->tv_nsec)*1e-9);
54 }
55 
measure_qlen(int fd,struct drm_i915_gem_execbuffer2 * execbuf,const struct intel_engine_data * engines,int timeout)56 static int measure_qlen(int fd,
57 			struct drm_i915_gem_execbuffer2 *execbuf,
58 			const struct intel_engine_data *engines,
59 			int timeout)
60 {
61 	const struct drm_i915_gem_exec_object2 * const obj =
62 		(struct drm_i915_gem_exec_object2 *)(uintptr_t)execbuf->buffers_ptr;
63 	uint32_t ctx[64];
64 	int min = INT_MAX, max = 0;
65 
66 	for (int i = 0; i < ARRAY_SIZE(ctx); i++) {
67 		ctx[i] = gem_context_create(fd);
68 		gem_context_set_all_engines(fd, ctx[i]);
69 	}
70 
71 	for (unsigned int n = 0; n < engines->nengines; n++) {
72 		uint64_t saved = execbuf->flags;
73 		struct timespec tv = {};
74 		int q;
75 
76 		execbuf->flags |= engines->engines[n].flags;
77 
78 		for (int i = 0; i < ARRAY_SIZE(ctx); i++) {
79 			execbuf->rsvd1 = ctx[i];
80 			gem_execbuf(fd, execbuf);
81 		}
82 		gem_sync(fd, obj->handle);
83 
84 		igt_nsec_elapsed(&tv);
85 		for (int i = 0; i < ARRAY_SIZE(ctx); i++) {
86 			execbuf->rsvd1 = ctx[i];
87 			gem_execbuf(fd, execbuf);
88 		}
89 		gem_sync(fd, obj->handle);
90 
91 		/*
92 		 * Be conservative and aim not to overshoot timeout, so scale
93 		 * down by 8 for hopefully a max of 12.5% error.
94 		 */
95 		q = ARRAY_SIZE(ctx) * timeout * 1e9 / igt_nsec_elapsed(&tv) /
96 		    8 + 1;
97 		if (q < min)
98 			min = q;
99 		if (q > max)
100 			max = q;
101 
102 		execbuf->flags = saved;
103 	}
104 
105 	for (int i = 0; i < ARRAY_SIZE(ctx); i++)
106 		gem_context_destroy(fd, ctx[i]);
107 
108 	igt_debug("Estimated qlen: {min:%d, max:%d}\n", min, max);
109 	return min;
110 }
111 
single(int fd,uint32_t handle,const struct intel_execution_engine2 * e2,unsigned flags,const int ncpus,int timeout)112 static void single(int fd, uint32_t handle,
113 		   const struct intel_execution_engine2 *e2,
114 		   unsigned flags,
115 		   const int ncpus,
116 		   int timeout)
117 {
118 	struct drm_i915_gem_execbuffer2 execbuf;
119 	struct drm_i915_gem_exec_object2 obj;
120 	struct drm_i915_gem_relocation_entry reloc;
121 	uint32_t contexts[64];
122 	struct {
123 		double elapsed;
124 		unsigned long count;
125 	} *shared;
126 	int n;
127 
128 	shared = mmap(NULL, 4096, PROT_WRITE, MAP_SHARED | MAP_ANON, -1, 0);
129 	igt_assert(shared != MAP_FAILED);
130 
131 	for (n = 0; n < 64; n++) {
132 		if (flags & QUEUE)
133 			contexts[n] = gem_queue_create(fd);
134 		else
135 			contexts[n] = gem_context_create(fd);
136 
137 		if (gem_context_has_engine_map(fd, 0))
138 			gem_context_set_all_engines(fd, contexts[n]);
139 	}
140 
141 	memset(&obj, 0, sizeof(obj));
142 	obj.handle = handle;
143 
144 	if (flags & INTERRUPTIBLE) {
145 		/* Be tricksy and force a relocation every batch so that
146 		 * we don't emit the batch but just do MI_SET_CONTEXT
147 		 */
148 		memset(&reloc, 0, sizeof(reloc));
149 		reloc.offset = 1024;
150 		reloc.read_domains = I915_GEM_DOMAIN_INSTRUCTION;
151 		obj.relocs_ptr = to_user_pointer(&reloc);
152 		obj.relocation_count = 1;
153 	}
154 
155 	memset(&execbuf, 0, sizeof(execbuf));
156 	execbuf.buffers_ptr = to_user_pointer(&obj);
157 	execbuf.buffer_count = 1;
158 	execbuf.rsvd1 = contexts[0];
159 	execbuf.flags = e2->flags;
160 	execbuf.flags |= LOCAL_I915_EXEC_HANDLE_LUT;
161 	execbuf.flags |= LOCAL_I915_EXEC_NO_RELOC;
162 	igt_require(__gem_execbuf(fd, &execbuf) == 0);
163 	if (__gem_execbuf(fd, &execbuf)) {
164 		execbuf.flags = e2->flags;
165 		reloc.target_handle = obj.handle;
166 		gem_execbuf(fd, &execbuf);
167 	}
168 	gem_sync(fd, handle);
169 
170 	igt_fork(child, ncpus) {
171 		struct timespec start, now;
172 		unsigned int count = 0;
173 
174 		/* Warmup to bind all objects into each ctx before we begin */
175 		for (int i = 0; i < ARRAY_SIZE(contexts); i++) {
176 			execbuf.rsvd1 = contexts[i];
177 			gem_execbuf(fd, &execbuf);
178 		}
179 		gem_sync(fd, handle);
180 
181 		clock_gettime(CLOCK_MONOTONIC, &start);
182 		do {
183 			igt_while_interruptible(flags & INTERRUPTIBLE) {
184 				for (int loop = 0; loop < 64; loop++) {
185 					execbuf.rsvd1 = contexts[loop % 64];
186 					reloc.presumed_offset = -1;
187 					gem_execbuf(fd, &execbuf);
188 				}
189 				count += 64;
190 			}
191 			clock_gettime(CLOCK_MONOTONIC, &now);
192 		} while (elapsed(&start, &now) < timeout);
193 		gem_sync(fd, handle);
194 		clock_gettime(CLOCK_MONOTONIC, &now);
195 
196 		igt_info("[%d] %s: %'u cycles: %.3fus%s\n",
197 			 child, e2->name, count,
198 			 elapsed(&start, &now) * 1e6 / count,
199 			 flags & INTERRUPTIBLE ? " (interruptible)" : "");
200 
201 		shared[child].elapsed = elapsed(&start, &now);
202 		shared[child].count = count;
203 	}
204 	igt_waitchildren();
205 
206 	if (ncpus > 1) {
207 		unsigned long total = 0;
208 		double max = 0;
209 
210 		for (n = 0; n < ncpus; n++) {
211 			total += shared[n].count;
212 			if (shared[n].elapsed > max)
213 				max = shared[n].elapsed;
214 		}
215 
216 		igt_info("Total %s: %'lu cycles: %.3fus%s\n",
217 			 e2->name, total, max*1e6 / total,
218 			 flags & INTERRUPTIBLE ? " (interruptible)" : "");
219 	}
220 
221 	for (n = 0; n < 64; n++)
222 		gem_context_destroy(fd, contexts[n]);
223 
224 	munmap(shared, 4096);
225 }
226 
all(int fd,uint32_t handle,unsigned flags,int timeout)227 static void all(int fd, uint32_t handle, unsigned flags, int timeout)
228 {
229 	struct drm_i915_gem_execbuffer2 execbuf;
230 	struct drm_i915_gem_exec_object2 obj[2];
231 	struct intel_engine_data engines = { };
232 	uint32_t contexts[65];
233 	int n, qlen;
234 
235 	engines = intel_init_engine_list(fd, 0);
236 	igt_require(engines.nengines);
237 
238 	for (n = 0; n < ARRAY_SIZE(contexts); n++) {
239 		if (flags & QUEUE)
240 			contexts[n] = gem_queue_create(fd);
241 		else
242 			contexts[n] = gem_context_create(fd);
243 
244 		gem_context_set_all_engines(fd, contexts[n]);
245 	}
246 
247 	memset(obj, 0, sizeof(obj));
248 	obj[1].handle = handle;
249 
250 	memset(&execbuf, 0, sizeof(execbuf));
251 	execbuf.buffers_ptr = to_user_pointer(obj + 1);
252 	execbuf.buffer_count = 1;
253 	execbuf.rsvd1 = contexts[0];
254 	execbuf.flags |= LOCAL_I915_EXEC_HANDLE_LUT;
255 	execbuf.flags |= LOCAL_I915_EXEC_NO_RELOC;
256 	igt_require(__gem_execbuf(fd, &execbuf) == 0);
257 	gem_sync(fd, handle);
258 
259 	qlen = measure_qlen(fd, &execbuf, &engines, timeout);
260 	igt_info("Using timing depth of %d batches\n", qlen);
261 
262 	execbuf.buffers_ptr = to_user_pointer(obj);
263 	execbuf.buffer_count = 2;
264 
265 	for (int pot = 2; pot <= 64; pot *= 2) {
266 		for (int nctx = pot - 1; nctx <= pot + 1; nctx++) {
267 			igt_fork(child, engines.nengines) {
268 				struct timespec start, now;
269 				unsigned int count = 0;
270 
271 				obj[0].handle = gem_create(fd, 4096);
272 				execbuf.flags |= engines.engines[child].flags;
273 				for (int loop = 0;
274 				     loop < ARRAY_SIZE(contexts);
275 				     loop++) {
276 					execbuf.rsvd1 = contexts[loop];
277 					gem_execbuf(fd, &execbuf);
278 				}
279 				gem_sync(fd, obj[0].handle);
280 
281 				clock_gettime(CLOCK_MONOTONIC, &start);
282 				do {
283 					for (int loop = 0; loop < qlen; loop++) {
284 						execbuf.rsvd1 =
285 							contexts[loop % nctx];
286 						gem_execbuf(fd, &execbuf);
287 					}
288 					count += qlen;
289 					gem_sync(fd, obj[0].handle);
290 					clock_gettime(CLOCK_MONOTONIC, &now);
291 				} while (elapsed(&start, &now) < timeout);
292 				gem_sync(fd, obj[0].handle);
293 				clock_gettime(CLOCK_MONOTONIC, &now);
294 				gem_close(fd, obj[0].handle);
295 
296 				igt_info("[%d:%d] %s: %'u cycles: %.3fus%s (elapsed: %.3fs)\n",
297 					 nctx, child,
298 					 engines.engines[child].name, count,
299 					 elapsed(&start, &now) * 1e6 / count,
300 					 flags & INTERRUPTIBLE ?
301 					 " (interruptible)" : "",
302 					 elapsed(&start, &now));
303 			}
304 			igt_waitchildren();
305 		}
306 	}
307 
308 	for (n = 0; n < ARRAY_SIZE(contexts); n++)
309 		gem_context_destroy(fd, contexts[n]);
310 }
311 
312 igt_main
313 {
314 	const int ncpus = sysconf(_SC_NPROCESSORS_ONLN);
315 	const struct intel_execution_engine2 *e2;
316 	const struct intel_execution_engine *e;
317 	static const struct {
318 		const char *name;
319 		unsigned int flags;
320 		bool (*require)(int fd);
321 	} phases[] = {
322 		{ "", 0, NULL },
323 		{ "-interruptible", INTERRUPTIBLE, NULL },
324 		{ "-queue", QUEUE, gem_has_queues },
325 		{ "-queue-interruptible", QUEUE | INTERRUPTIBLE, gem_has_queues },
326 		{ }
327 	};
328 	uint32_t light = 0, heavy;
329 	int fd = -1;
330 
331 	igt_fixture {
332 		const uint32_t bbe = MI_BATCH_BUFFER_END;
333 
334 		fd = drm_open_driver(DRIVER_INTEL);
335 		igt_require_gem(fd);
336 
337 		gem_require_contexts(fd);
338 
339 		light = gem_create(fd, 4096);
340 		gem_write(fd, light, 0, &bbe, sizeof(bbe));
341 
342 		heavy = gem_create(fd, 4096*1024);
343 		gem_write(fd, heavy, 4096*1024-sizeof(bbe), &bbe, sizeof(bbe));
344 
345 		igt_fork_hang_detector(fd);
346 	}
347 
348 	/* Legacy testing must be first. */
349 	for (e = intel_execution_engines; e->name; e++) {
350 		struct intel_execution_engine2 e2__;
351 
352 		e2__ = gem_eb_flags_to_engine(e->exec_id | e->flags);
353 		if (e2__.flags == -1)
354 			continue; /* I915_EXEC_BSD with no ring selectors */
355 
356 		e2 = &e2__;
357 
358 		for (typeof(*phases) *p = phases; p->name; p++) {
359 			igt_subtest_group {
360 				igt_fixture {
361 					gem_require_ring(fd, e2->flags);
362 					if (p->require)
363 						igt_require(p->require(fd));
364 				}
365 
366 				igt_subtest_f("legacy-%s%s", e->name, p->name)
367 					single(fd, light, e2, p->flags, 1, 5);
368 
369 				igt_skip_on_simulation();
370 
371 				igt_subtest_f("legacy-%s-heavy%s",
372 					      e->name, p->name)
373 					single(fd, heavy, e2, p->flags, 1, 5);
374 				igt_subtest_f("legacy-%s-forked%s",
375 					      e->name, p->name)
376 					single(fd, light, e2, p->flags, ncpus,
377 					       150);
378 				igt_subtest_f("legacy-%s-forked-heavy%s",
379 					      e->name, p->name)
380 					single(fd, heavy, e2, p->flags, ncpus,
381 					       150);
382 			}
383 		}
384 	}
385 
386 	/* Must come after legacy subtests. */
__for_each_physical_engine(fd,e2)387 	__for_each_physical_engine(fd, e2) {
388 		for (typeof(*phases) *p = phases; p->name; p++) {
389 			igt_subtest_group {
390 				igt_fixture {
391 					if (p->require)
392 						igt_require(p->require(fd));
393 				}
394 
395 				igt_subtest_f("%s%s", e2->name, p->name)
396 					single(fd, light, e2, p->flags, 1, 5);
397 
398 				igt_skip_on_simulation();
399 
400 				igt_subtest_f("%s-heavy%s", e2->name, p->name)
401 					single(fd, heavy, e2, p->flags, 1, 5);
402 				igt_subtest_f("%s-forked%s", e2->name, p->name)
403 					single(fd, light, e2, p->flags, ncpus,
404 					       150);
405 				igt_subtest_f("%s-forked-heavy%s",
406 					      e2->name, p->name)
407 					single(fd, heavy, e2, p->flags, ncpus,
408 					       150);
409 			}
410 		}
411 	}
412 
413 	igt_subtest("all-light")
414 		all(fd, light, 0, 5);
415 	igt_subtest("all-heavy")
416 		all(fd, heavy, 0, 5);
417 
418 	igt_subtest_group {
419 		igt_fixture {
420 			igt_require(gem_has_queues(fd));
421 		}
422 		igt_subtest("queue-light")
423 			all(fd, light, QUEUE, 5);
424 		igt_subtest("queue-heavy")
425 			all(fd, heavy, QUEUE, 5);
426 	}
427 
428 	igt_fixture {
429 		igt_stop_hang_detector();
430 		gem_close(fd, heavy);
431 		gem_close(fd, light);
432 		close(fd);
433 	}
434 }
435