1 /*
2 * Copyright © 2014 Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 *
23 */
24
25 #include "igt.h"
26 #include "igt_sysfs.h"
27 #include <stdio.h>
28 #include <string.h>
29 #include <errno.h>
30 #include <pthread.h>
31 #include <fcntl.h>
32 #include <sys/stat.h>
33 #include <sys/resource.h>
34
35 IGT_TEST_DESCRIPTION("Fill the Gobal GTT with context objects and VMs\n");
36
37 #define NUM_THREADS (2*sysconf(_SC_NPROCESSORS_ONLN))
38
xchg_int(void * array,unsigned i,unsigned j)39 static void xchg_int(void *array, unsigned i, unsigned j)
40 {
41 int *A = array;
42 igt_swap(A[i], A[j]);
43 }
44
context_size(int fd)45 static unsigned context_size(int fd)
46 {
47 const int gen = intel_gen(intel_get_drm_devid(fd));
48
49 switch (gen) {
50 case 0:
51 case 1:
52 case 2:
53 case 3:
54 case 4:
55 case 5:
56 case 6:
57 case 7: return 18 << 12;
58 case 8: return 20 << 12;
59 case 9: return 22 << 12;
60 default: return 32 << 12;
61 }
62 }
63
get_num_contexts(int fd,int num_engines)64 static unsigned get_num_contexts(int fd, int num_engines)
65 {
66 uint64_t ggtt_size;
67 unsigned size;
68 unsigned count;
69
70 /* Compute the number of contexts we can allocate to fill the GGTT */
71 ggtt_size = gem_global_aperture_size(fd);
72
73 size = context_size(fd);
74 if (gem_has_execlists(fd)) {
75 size += 4 << 12; /* ringbuffer as well */
76 if (num_engines) /* one per engine with execlists */
77 size *= num_engines;
78 }
79
80 count = 3 * (ggtt_size / size) / 2;
81 igt_info("Creating %lld contexts (assuming of size %lld%s)\n",
82 (long long)count, (long long)size,
83 gem_has_execlists(fd) ? " with execlists" : "");
84
85 intel_require_memory(count, size, CHECK_RAM | CHECK_SWAP);
86 return count;
87 }
88
single(const char * name,bool all_engines)89 static void single(const char *name, bool all_engines)
90 {
91 struct drm_i915_gem_exec_object2 *obj;
92 struct drm_i915_gem_relocation_entry *reloc;
93 unsigned int engines[16], num_engines, num_ctx;
94 uint32_t *ctx, *map, scratch, size;
95 int fd, gen;
96 #define MAX_LOOP 16
97
98 fd = drm_open_driver(DRIVER_INTEL);
99 igt_require_gem(fd);
100 gem_require_contexts(fd);
101
102 gen = intel_gen(intel_get_drm_devid(fd));
103
104 num_engines = 0;
105 if (all_engines) {
106 unsigned engine;
107
108 for_each_physical_engine(fd, engine) {
109 if (!gem_can_store_dword(fd, engine))
110 continue;
111
112 engines[num_engines++] = engine;
113 if (num_engines == ARRAY_SIZE(engines))
114 break;
115 }
116 } else {
117 igt_require(gem_can_store_dword(fd, 0));
118 engines[num_engines++] = 0;
119 }
120 igt_require(num_engines);
121
122 num_ctx = get_num_contexts(fd, num_engines);
123
124 size = ALIGN(num_ctx * sizeof(uint32_t), 4096);
125 scratch = gem_create(fd, size);
126 gem_set_caching(fd, scratch, I915_CACHING_CACHED);
127 obj = calloc(num_ctx, 3 * sizeof(*obj));
128 reloc = calloc(num_ctx, 2 * sizeof(*reloc));
129
130 ctx = malloc(num_ctx * sizeof(uint32_t));
131 igt_assert(ctx);
132 for (unsigned n = 0; n < num_ctx; n++) {
133 ctx[n] = gem_context_create(fd);
134
135 obj[3*n + 0].handle = gem_create(fd, 4096);
136 reloc[2*n + 0].target_handle = obj[3*n + 0].handle;
137 reloc[2*n + 0].presumed_offset = 0;
138 reloc[2*n + 0].offset = 4000;
139 reloc[2*n + 0].delta = 0;
140 reloc[2*n + 0].read_domains = I915_GEM_DOMAIN_RENDER;
141 reloc[2*n + 0].write_domain = I915_GEM_DOMAIN_RENDER;
142
143 obj[3*n + 1].handle = scratch;
144 reloc[2*n + 1].target_handle = scratch;
145 reloc[2*n + 1].presumed_offset = 0;
146 reloc[2*n + 1].offset = sizeof(uint32_t);
147 reloc[2*n + 1].delta = n * sizeof(uint32_t);
148 reloc[2*n + 1].read_domains = I915_GEM_DOMAIN_RENDER;
149 reloc[2*n + 1].write_domain = 0; /* lies! */
150 if (gen >= 4 && gen < 8)
151 reloc[2*n + 1].offset += sizeof(uint32_t);
152
153 obj[3*n + 2].relocs_ptr = to_user_pointer(&reloc[2*n]);
154 obj[3*n + 2].relocation_count = 2;
155 }
156
157 map = gem_mmap__cpu(fd, scratch, 0, size, PROT_WRITE);
158 for (unsigned int loop = 1; loop <= MAX_LOOP; loop <<= 1) {
159 const unsigned int count = loop * num_ctx;
160 uint32_t *all;
161
162 all = malloc(count * sizeof(uint32_t));
163 for (unsigned int n = 0; n < count; n++)
164 all[n] = ctx[n % num_ctx];
165 igt_permute_array(all, count, xchg_int);
166
167 for (unsigned int n = 0; n < count; n++) {
168 const unsigned int r = n % num_ctx;
169 struct drm_i915_gem_execbuffer2 execbuf = {
170 .buffers_ptr = to_user_pointer(&obj[3*r]),
171 .buffer_count = 3,
172 .flags = engines[n % num_engines],
173 .rsvd1 = all[n],
174 };
175 uint64_t offset =
176 reloc[2*r + 1].presumed_offset +
177 reloc[2*r + 1].delta;
178 uint32_t handle = gem_create(fd, 4096);
179 uint32_t buf[16];
180 int i;
181
182 buf[i = 0] = MI_STORE_DWORD_IMM;
183 if (gen >= 8) {
184 buf[++i] = offset;
185 buf[++i] = offset >> 32;
186 } else if (gen >= 4) {
187 if (gen < 6)
188 buf[i] |= 1 << 22;
189 buf[++i] = 0;
190 buf[++i] = offset;
191 } else {
192 buf[i]--;
193 buf[++i] = offset;
194 }
195 buf[++i] = all[n];
196 buf[++i] = MI_BATCH_BUFFER_END;
197 gem_write(fd, handle, 0, buf, sizeof(buf));
198 obj[3*r + 2].handle = handle;
199
200 gem_execbuf(fd, &execbuf);
201 gem_close(fd, handle);
202 }
203
204 /*
205 * Note we lied about the write-domain when writing from the
206 * GPU (in order to avoid inter-ring synchronisation), so now
207 * we have to force the synchronisation here.
208 */
209 gem_set_domain(fd, scratch,
210 I915_GEM_DOMAIN_CPU, I915_GEM_DOMAIN_CPU);
211 for (unsigned int n = count - num_ctx; n < count; n++)
212 igt_assert_eq(map[n % num_ctx], all[n]);
213 free(all);
214 }
215 munmap(map, size);
216
217 free(ctx);
218 close(fd);
219 }
220
processes(void)221 static void processes(void)
222 {
223 unsigned engines[16], engine;
224 int num_engines;
225 struct rlimit rlim;
226 unsigned num_ctx;
227 uint32_t name;
228 int fd, *fds;
229
230 fd = drm_open_driver(DRIVER_INTEL);
231
232 num_engines = 0;
233 for_each_physical_engine(fd, engine) {
234 engines[num_engines++] = engine;
235 if (num_engines == ARRAY_SIZE(engines))
236 break;
237 }
238
239 num_ctx = get_num_contexts(fd, num_engines);
240
241 /* tweak rlimits to allow us to create this many files */
242 igt_assert(getrlimit(RLIMIT_NOFILE, &rlim) == 0);
243 if (rlim.rlim_cur < ALIGN(num_ctx + 1024, 1024)) {
244 rlim.rlim_cur = ALIGN(num_ctx + 1024, 1024);
245 if (rlim.rlim_cur > rlim.rlim_max)
246 rlim.rlim_max = rlim.rlim_cur;
247 igt_require(setrlimit(RLIMIT_NOFILE, &rlim) == 0);
248 }
249
250 fds = malloc(num_ctx * sizeof(int));
251 igt_assert(fds);
252 for (unsigned n = 0; n < num_ctx; n++) {
253 fds[n] = drm_open_driver(DRIVER_INTEL);
254 if (fds[n] == -1) {
255 int err = errno;
256 for (unsigned i = n; i--; )
257 close(fds[i]);
258 free(fds);
259 errno = err;
260 igt_assert_f(0, "failed to create context %lld/%lld\n", (long long)n, (long long)num_ctx);
261 }
262 }
263
264 if (1) {
265 uint32_t bbe = MI_BATCH_BUFFER_END;
266 name = gem_create(fd, 4096);
267 gem_write(fd, name, 0, &bbe, sizeof(bbe));
268 name = gem_flink(fd, name);
269 }
270
271 igt_fork(child, NUM_THREADS) {
272 struct drm_i915_gem_execbuffer2 execbuf;
273 struct drm_i915_gem_exec_object2 obj;
274
275 memset(&obj, 0, sizeof(obj));
276 memset(&execbuf, 0, sizeof(execbuf));
277 execbuf.buffers_ptr = to_user_pointer(&obj);
278 execbuf.buffer_count = 1;
279
280 igt_permute_array(fds, num_ctx, xchg_int);
281 for (unsigned n = 0; n < num_ctx; n++) {
282 obj.handle = gem_open(fds[n], name);
283 execbuf.flags = engines[n % num_engines];
284 gem_execbuf(fds[n], &execbuf);
285 gem_close(fds[n], obj.handle);
286 }
287 }
288 igt_waitchildren();
289
290 for (unsigned n = 0; n < num_ctx; n++)
291 close(fds[n]);
292 free(fds);
293 close(fd);
294 }
295
296 struct thread {
297 int fd;
298 uint32_t *all_ctx;
299 unsigned num_ctx;
300 uint32_t batch;
301 };
302
thread(void * data)303 static void *thread(void *data)
304 {
305 struct thread *t = data;
306 struct drm_i915_gem_execbuffer2 execbuf;
307 struct drm_i915_gem_exec_object2 obj;
308 uint32_t *ctx;
309
310 memset(&obj, 0, sizeof(obj));
311 obj.handle = t->batch;
312
313 memset(&execbuf, 0, sizeof(execbuf));
314 execbuf.buffers_ptr = to_user_pointer(&obj);
315 execbuf.buffer_count = 1;
316
317 ctx = malloc(t->num_ctx * sizeof(uint32_t));
318 igt_assert(ctx);
319 memcpy(ctx, t->all_ctx, t->num_ctx * sizeof(uint32_t));
320
321 igt_until_timeout(150) {
322 igt_permute_array(ctx, t->num_ctx, xchg_int);
323 for (unsigned n = 0; n < t->num_ctx; n++) {
324 execbuf.rsvd1 = ctx[n];
325 gem_execbuf(t->fd, &execbuf);
326 }
327 }
328
329 free(ctx);
330
331 return NULL;
332 }
333
threads(void)334 static void threads(void)
335 {
336 uint32_t bbe = MI_BATCH_BUFFER_END;
337 pthread_t threads[NUM_THREADS];
338 struct thread data;
339
340 data.fd = drm_open_driver_render(DRIVER_INTEL);
341 igt_require_gem(data.fd);
342
343 gem_require_contexts(data.fd);
344
345 data.num_ctx = get_num_contexts(data.fd, false);
346 data.all_ctx = malloc(data.num_ctx * sizeof(uint32_t));
347 igt_assert(data.all_ctx);
348 for (unsigned n = 0; n < data.num_ctx; n++)
349 data.all_ctx[n] = gem_context_create(data.fd);
350 data.batch = gem_create(data.fd, 4096);
351 gem_write(data.fd, data.batch, 0, &bbe, sizeof(bbe));
352
353 for (int n = 0; n < NUM_THREADS; n++)
354 pthread_create(&threads[n], NULL, thread, &data);
355
356 for (int n = 0; n < NUM_THREADS; n++)
357 pthread_join(threads[n], NULL);
358
359 close(data.fd);
360 }
361
362 igt_main
363 {
364 igt_skip_on_simulation();
365
366 igt_subtest("single")
367 single("single", false);
368 igt_subtest("engines")
369 single("engines", true);
370
371 igt_subtest("processes")
372 processes();
373
374 igt_subtest("threads")
375 threads();
376 }
377