xref: /aosp_15_r20/external/igt-gpu-tools/tests/i915/gem_ctx_thrash.c (revision d83cc019efdc2edc6c4b16e9034a3ceb8d35d77c)
1 /*
2  * Copyright © 2014 Intel Corporation
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21  * IN THE SOFTWARE.
22  *
23  */
24 
25 #include "igt.h"
26 #include "igt_sysfs.h"
27 #include <stdio.h>
28 #include <string.h>
29 #include <errno.h>
30 #include <pthread.h>
31 #include <fcntl.h>
32 #include <sys/stat.h>
33 #include <sys/resource.h>
34 
35 IGT_TEST_DESCRIPTION("Fill the Gobal GTT with context objects and VMs\n");
36 
37 #define NUM_THREADS (2*sysconf(_SC_NPROCESSORS_ONLN))
38 
xchg_int(void * array,unsigned i,unsigned j)39 static void xchg_int(void *array, unsigned i, unsigned j)
40 {
41 	int *A = array;
42 	igt_swap(A[i], A[j]);
43 }
44 
context_size(int fd)45 static unsigned context_size(int fd)
46 {
47 	const int gen = intel_gen(intel_get_drm_devid(fd));
48 
49 	switch (gen) {
50 	case 0:
51 	case 1:
52 	case 2:
53 	case 3:
54 	case 4:
55 	case 5:
56 	case 6:
57 	case 7: return 18 << 12;
58 	case 8: return 20 << 12;
59 	case 9: return 22 << 12;
60 	default: return 32 << 12;
61 	}
62 }
63 
get_num_contexts(int fd,int num_engines)64 static unsigned get_num_contexts(int fd, int num_engines)
65 {
66 	uint64_t ggtt_size;
67 	unsigned size;
68 	unsigned count;
69 
70 	/* Compute the number of contexts we can allocate to fill the GGTT */
71 	ggtt_size = gem_global_aperture_size(fd);
72 
73 	size = context_size(fd);
74 	if (gem_has_execlists(fd)) {
75 		size += 4 << 12; /* ringbuffer as well */
76 		if (num_engines) /* one per engine with execlists */
77 			size *= num_engines;
78 	}
79 
80 	count = 3 * (ggtt_size / size) / 2;
81 	igt_info("Creating %lld contexts (assuming of size %lld%s)\n",
82 		 (long long)count, (long long)size,
83 		 gem_has_execlists(fd) ? " with execlists" : "");
84 
85 	intel_require_memory(count, size, CHECK_RAM | CHECK_SWAP);
86 	return count;
87 }
88 
single(const char * name,bool all_engines)89 static void single(const char *name, bool all_engines)
90 {
91 	struct drm_i915_gem_exec_object2 *obj;
92 	struct drm_i915_gem_relocation_entry *reloc;
93 	unsigned int engines[16], num_engines, num_ctx;
94 	uint32_t *ctx, *map, scratch, size;
95 	int fd, gen;
96 #define MAX_LOOP 16
97 
98 	fd = drm_open_driver(DRIVER_INTEL);
99 	igt_require_gem(fd);
100 	gem_require_contexts(fd);
101 
102 	gen = intel_gen(intel_get_drm_devid(fd));
103 
104 	num_engines = 0;
105 	if (all_engines) {
106 		unsigned engine;
107 
108 		for_each_physical_engine(fd, engine) {
109 			if (!gem_can_store_dword(fd, engine))
110 				continue;
111 
112 			engines[num_engines++] = engine;
113 			if (num_engines == ARRAY_SIZE(engines))
114 				break;
115 		}
116 	} else {
117 		igt_require(gem_can_store_dword(fd, 0));
118 		engines[num_engines++] = 0;
119 	}
120 	igt_require(num_engines);
121 
122 	num_ctx = get_num_contexts(fd, num_engines);
123 
124 	size = ALIGN(num_ctx * sizeof(uint32_t), 4096);
125 	scratch = gem_create(fd, size);
126 	gem_set_caching(fd, scratch, I915_CACHING_CACHED);
127 	obj = calloc(num_ctx, 3 * sizeof(*obj));
128 	reloc = calloc(num_ctx, 2 * sizeof(*reloc));
129 
130 	ctx = malloc(num_ctx * sizeof(uint32_t));
131 	igt_assert(ctx);
132 	for (unsigned n = 0; n < num_ctx; n++) {
133 		ctx[n] = gem_context_create(fd);
134 
135 		obj[3*n + 0].handle = gem_create(fd, 4096);
136 		reloc[2*n + 0].target_handle = obj[3*n + 0].handle;
137 		reloc[2*n + 0].presumed_offset = 0;
138 		reloc[2*n + 0].offset = 4000;
139 		reloc[2*n + 0].delta = 0;
140 		reloc[2*n + 0].read_domains = I915_GEM_DOMAIN_RENDER;
141 		reloc[2*n + 0].write_domain = I915_GEM_DOMAIN_RENDER;
142 
143 		obj[3*n + 1].handle = scratch;
144 		reloc[2*n + 1].target_handle = scratch;
145 		reloc[2*n + 1].presumed_offset = 0;
146 		reloc[2*n + 1].offset = sizeof(uint32_t);
147 		reloc[2*n + 1].delta = n * sizeof(uint32_t);
148 		reloc[2*n + 1].read_domains = I915_GEM_DOMAIN_RENDER;
149 		reloc[2*n + 1].write_domain = 0; /* lies! */
150 		if (gen >= 4 && gen < 8)
151 			reloc[2*n + 1].offset += sizeof(uint32_t);
152 
153 		obj[3*n + 2].relocs_ptr = to_user_pointer(&reloc[2*n]);
154 		obj[3*n + 2].relocation_count = 2;
155 	}
156 
157 	map = gem_mmap__cpu(fd, scratch, 0, size, PROT_WRITE);
158 	for (unsigned int loop = 1; loop <= MAX_LOOP; loop <<= 1) {
159 		const unsigned int count = loop * num_ctx;
160 		uint32_t *all;
161 
162 		all = malloc(count * sizeof(uint32_t));
163 		for (unsigned int n = 0; n < count; n++)
164 			all[n] = ctx[n % num_ctx];
165 		igt_permute_array(all, count, xchg_int);
166 
167 		for (unsigned int n = 0; n < count; n++) {
168 			const unsigned int r = n % num_ctx;
169 			struct drm_i915_gem_execbuffer2 execbuf = {
170 				.buffers_ptr = to_user_pointer(&obj[3*r]),
171 				.buffer_count = 3,
172 				.flags = engines[n % num_engines],
173 				.rsvd1 = all[n],
174 			};
175 			uint64_t offset =
176 				reloc[2*r + 1].presumed_offset +
177 				reloc[2*r + 1].delta;
178 			uint32_t handle = gem_create(fd, 4096);
179 			uint32_t buf[16];
180 			int i;
181 
182 			buf[i = 0] = MI_STORE_DWORD_IMM;
183 			if (gen >= 8) {
184 				buf[++i] = offset;
185 				buf[++i] = offset >> 32;
186 			} else if (gen >= 4) {
187 				if (gen < 6)
188 					buf[i] |= 1 << 22;
189 				buf[++i] = 0;
190 				buf[++i] = offset;
191 			} else {
192 				buf[i]--;
193 				buf[++i] = offset;
194 			}
195 			buf[++i] = all[n];
196 			buf[++i] = MI_BATCH_BUFFER_END;
197 			gem_write(fd, handle, 0, buf, sizeof(buf));
198 			obj[3*r + 2].handle = handle;
199 
200 			gem_execbuf(fd, &execbuf);
201 			gem_close(fd, handle);
202 		}
203 
204 		/*
205 		 * Note we lied about the write-domain when writing from the
206 		 * GPU (in order to avoid inter-ring synchronisation), so now
207 		 * we have to force the synchronisation here.
208 		 */
209 		gem_set_domain(fd, scratch,
210 			       I915_GEM_DOMAIN_CPU, I915_GEM_DOMAIN_CPU);
211 		for (unsigned int n = count - num_ctx; n < count; n++)
212 			igt_assert_eq(map[n % num_ctx], all[n]);
213 		free(all);
214 	}
215 	munmap(map, size);
216 
217 	free(ctx);
218 	close(fd);
219 }
220 
processes(void)221 static void processes(void)
222 {
223 	unsigned engines[16], engine;
224 	int num_engines;
225 	struct rlimit rlim;
226 	unsigned num_ctx;
227 	uint32_t name;
228 	int fd, *fds;
229 
230 	fd = drm_open_driver(DRIVER_INTEL);
231 
232 	num_engines = 0;
233 	for_each_physical_engine(fd, engine) {
234 		engines[num_engines++] = engine;
235 		if (num_engines == ARRAY_SIZE(engines))
236 			break;
237 	}
238 
239 	num_ctx = get_num_contexts(fd, num_engines);
240 
241 	/* tweak rlimits to allow us to create this many files */
242 	igt_assert(getrlimit(RLIMIT_NOFILE, &rlim) == 0);
243 	if (rlim.rlim_cur < ALIGN(num_ctx + 1024, 1024)) {
244 		rlim.rlim_cur = ALIGN(num_ctx + 1024, 1024);
245 		if (rlim.rlim_cur > rlim.rlim_max)
246 			rlim.rlim_max = rlim.rlim_cur;
247 		igt_require(setrlimit(RLIMIT_NOFILE, &rlim) == 0);
248 	}
249 
250 	fds = malloc(num_ctx * sizeof(int));
251 	igt_assert(fds);
252 	for (unsigned n = 0; n < num_ctx; n++) {
253 		fds[n] = drm_open_driver(DRIVER_INTEL);
254 		if (fds[n] == -1) {
255 			int err = errno;
256 			for (unsigned i = n; i--; )
257 				close(fds[i]);
258 			free(fds);
259 			errno = err;
260 			igt_assert_f(0, "failed to create context %lld/%lld\n", (long long)n, (long long)num_ctx);
261 		}
262 	}
263 
264 	if (1) {
265 		uint32_t bbe = MI_BATCH_BUFFER_END;
266 		name = gem_create(fd, 4096);
267 		gem_write(fd, name, 0, &bbe, sizeof(bbe));
268 		name = gem_flink(fd, name);
269 	}
270 
271 	igt_fork(child, NUM_THREADS) {
272 		struct drm_i915_gem_execbuffer2 execbuf;
273 		struct drm_i915_gem_exec_object2 obj;
274 
275 		memset(&obj, 0, sizeof(obj));
276 		memset(&execbuf, 0, sizeof(execbuf));
277 		execbuf.buffers_ptr = to_user_pointer(&obj);
278 		execbuf.buffer_count = 1;
279 
280 		igt_permute_array(fds, num_ctx, xchg_int);
281 		for (unsigned n = 0; n < num_ctx; n++) {
282 			obj.handle = gem_open(fds[n], name);
283 			execbuf.flags = engines[n % num_engines];
284 			gem_execbuf(fds[n], &execbuf);
285 			gem_close(fds[n], obj.handle);
286 		}
287 	}
288 	igt_waitchildren();
289 
290 	for (unsigned n = 0; n < num_ctx; n++)
291 		close(fds[n]);
292 	free(fds);
293 	close(fd);
294 }
295 
296 struct thread {
297 	int fd;
298 	uint32_t *all_ctx;
299 	unsigned num_ctx;
300 	uint32_t batch;
301 };
302 
thread(void * data)303 static void *thread(void *data)
304 {
305 	struct thread *t = data;
306 	struct drm_i915_gem_execbuffer2 execbuf;
307 	struct drm_i915_gem_exec_object2 obj;
308 	uint32_t *ctx;
309 
310 	memset(&obj, 0, sizeof(obj));
311 	obj.handle = t->batch;
312 
313 	memset(&execbuf, 0, sizeof(execbuf));
314 	execbuf.buffers_ptr = to_user_pointer(&obj);
315 	execbuf.buffer_count = 1;
316 
317 	ctx = malloc(t->num_ctx * sizeof(uint32_t));
318 	igt_assert(ctx);
319 	memcpy(ctx, t->all_ctx, t->num_ctx * sizeof(uint32_t));
320 
321 	igt_until_timeout(150) {
322 		igt_permute_array(ctx, t->num_ctx, xchg_int);
323 		for (unsigned n = 0; n < t->num_ctx; n++) {
324 			execbuf.rsvd1 = ctx[n];
325 			gem_execbuf(t->fd, &execbuf);
326 		}
327 	}
328 
329 	free(ctx);
330 
331 	return NULL;
332 }
333 
threads(void)334 static void threads(void)
335 {
336 	uint32_t bbe = MI_BATCH_BUFFER_END;
337 	pthread_t threads[NUM_THREADS];
338 	struct thread data;
339 
340 	data.fd = drm_open_driver_render(DRIVER_INTEL);
341 	igt_require_gem(data.fd);
342 
343 	gem_require_contexts(data.fd);
344 
345 	data.num_ctx = get_num_contexts(data.fd, false);
346 	data.all_ctx = malloc(data.num_ctx * sizeof(uint32_t));
347 	igt_assert(data.all_ctx);
348 	for (unsigned n = 0; n < data.num_ctx; n++)
349 		data.all_ctx[n] = gem_context_create(data.fd);
350 	data.batch = gem_create(data.fd, 4096);
351 	gem_write(data.fd, data.batch, 0, &bbe, sizeof(bbe));
352 
353 	for (int n = 0; n < NUM_THREADS; n++)
354 		pthread_create(&threads[n], NULL, thread, &data);
355 
356 	for (int n = 0; n < NUM_THREADS; n++)
357 		pthread_join(threads[n], NULL);
358 
359 	close(data.fd);
360 }
361 
362 igt_main
363 {
364 	igt_skip_on_simulation();
365 
366 	igt_subtest("single")
367 		single("single", false);
368 	igt_subtest("engines")
369 		single("engines", true);
370 
371 	igt_subtest("processes")
372 		processes();
373 
374 	igt_subtest("threads")
375 		threads();
376 }
377