1 /*
2 * Copyright © 2011 Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 *
23 * Authors:
24 * Chris Wilson <[email protected]>
25 *
26 */
27
28 #include "igt.h"
29 #include <limits.h>
30 #include <unistd.h>
31 #include <stdlib.h>
32 #include <stdint.h>
33 #include <stdio.h>
34 #include <string.h>
35 #include <fcntl.h>
36 #include <inttypes.h>
37 #include <errno.h>
38 #include <sys/stat.h>
39 #include <sys/ioctl.h>
40 #include <sys/time.h>
41 #include <time.h>
42 #include "drm.h"
43
44 #define LOCAL_I915_EXEC_NO_RELOC (1<<11)
45 #define LOCAL_I915_EXEC_HANDLE_LUT (1<<12)
46
47 #define INTERRUPTIBLE 0x1
48 #define QUEUE 0x2
49
elapsed(const struct timespec * start,const struct timespec * end)50 static double elapsed(const struct timespec *start, const struct timespec *end)
51 {
52 return ((end->tv_sec - start->tv_sec) +
53 (end->tv_nsec - start->tv_nsec)*1e-9);
54 }
55
measure_qlen(int fd,struct drm_i915_gem_execbuffer2 * execbuf,const struct intel_engine_data * engines,int timeout)56 static int measure_qlen(int fd,
57 struct drm_i915_gem_execbuffer2 *execbuf,
58 const struct intel_engine_data *engines,
59 int timeout)
60 {
61 const struct drm_i915_gem_exec_object2 * const obj =
62 (struct drm_i915_gem_exec_object2 *)(uintptr_t)execbuf->buffers_ptr;
63 uint32_t ctx[64];
64 int min = INT_MAX, max = 0;
65
66 for (int i = 0; i < ARRAY_SIZE(ctx); i++) {
67 ctx[i] = gem_context_create(fd);
68 gem_context_set_all_engines(fd, ctx[i]);
69 }
70
71 for (unsigned int n = 0; n < engines->nengines; n++) {
72 uint64_t saved = execbuf->flags;
73 struct timespec tv = {};
74 int q;
75
76 execbuf->flags |= engines->engines[n].flags;
77
78 for (int i = 0; i < ARRAY_SIZE(ctx); i++) {
79 execbuf->rsvd1 = ctx[i];
80 gem_execbuf(fd, execbuf);
81 }
82 gem_sync(fd, obj->handle);
83
84 igt_nsec_elapsed(&tv);
85 for (int i = 0; i < ARRAY_SIZE(ctx); i++) {
86 execbuf->rsvd1 = ctx[i];
87 gem_execbuf(fd, execbuf);
88 }
89 gem_sync(fd, obj->handle);
90
91 /*
92 * Be conservative and aim not to overshoot timeout, so scale
93 * down by 8 for hopefully a max of 12.5% error.
94 */
95 q = ARRAY_SIZE(ctx) * timeout * 1e9 / igt_nsec_elapsed(&tv) /
96 8 + 1;
97 if (q < min)
98 min = q;
99 if (q > max)
100 max = q;
101
102 execbuf->flags = saved;
103 }
104
105 for (int i = 0; i < ARRAY_SIZE(ctx); i++)
106 gem_context_destroy(fd, ctx[i]);
107
108 igt_debug("Estimated qlen: {min:%d, max:%d}\n", min, max);
109 return min;
110 }
111
single(int fd,uint32_t handle,const struct intel_execution_engine2 * e2,unsigned flags,const int ncpus,int timeout)112 static void single(int fd, uint32_t handle,
113 const struct intel_execution_engine2 *e2,
114 unsigned flags,
115 const int ncpus,
116 int timeout)
117 {
118 struct drm_i915_gem_execbuffer2 execbuf;
119 struct drm_i915_gem_exec_object2 obj;
120 struct drm_i915_gem_relocation_entry reloc;
121 uint32_t contexts[64];
122 struct {
123 double elapsed;
124 unsigned long count;
125 } *shared;
126 int n;
127
128 shared = mmap(NULL, 4096, PROT_WRITE, MAP_SHARED | MAP_ANON, -1, 0);
129 igt_assert(shared != MAP_FAILED);
130
131 for (n = 0; n < 64; n++) {
132 if (flags & QUEUE)
133 contexts[n] = gem_queue_create(fd);
134 else
135 contexts[n] = gem_context_create(fd);
136
137 if (gem_context_has_engine_map(fd, 0))
138 gem_context_set_all_engines(fd, contexts[n]);
139 }
140
141 memset(&obj, 0, sizeof(obj));
142 obj.handle = handle;
143
144 if (flags & INTERRUPTIBLE) {
145 /* Be tricksy and force a relocation every batch so that
146 * we don't emit the batch but just do MI_SET_CONTEXT
147 */
148 memset(&reloc, 0, sizeof(reloc));
149 reloc.offset = 1024;
150 reloc.read_domains = I915_GEM_DOMAIN_INSTRUCTION;
151 obj.relocs_ptr = to_user_pointer(&reloc);
152 obj.relocation_count = 1;
153 }
154
155 memset(&execbuf, 0, sizeof(execbuf));
156 execbuf.buffers_ptr = to_user_pointer(&obj);
157 execbuf.buffer_count = 1;
158 execbuf.rsvd1 = contexts[0];
159 execbuf.flags = e2->flags;
160 execbuf.flags |= LOCAL_I915_EXEC_HANDLE_LUT;
161 execbuf.flags |= LOCAL_I915_EXEC_NO_RELOC;
162 igt_require(__gem_execbuf(fd, &execbuf) == 0);
163 if (__gem_execbuf(fd, &execbuf)) {
164 execbuf.flags = e2->flags;
165 reloc.target_handle = obj.handle;
166 gem_execbuf(fd, &execbuf);
167 }
168 gem_sync(fd, handle);
169
170 igt_fork(child, ncpus) {
171 struct timespec start, now;
172 unsigned int count = 0;
173
174 /* Warmup to bind all objects into each ctx before we begin */
175 for (int i = 0; i < ARRAY_SIZE(contexts); i++) {
176 execbuf.rsvd1 = contexts[i];
177 gem_execbuf(fd, &execbuf);
178 }
179 gem_sync(fd, handle);
180
181 clock_gettime(CLOCK_MONOTONIC, &start);
182 do {
183 igt_while_interruptible(flags & INTERRUPTIBLE) {
184 for (int loop = 0; loop < 64; loop++) {
185 execbuf.rsvd1 = contexts[loop % 64];
186 reloc.presumed_offset = -1;
187 gem_execbuf(fd, &execbuf);
188 }
189 count += 64;
190 }
191 clock_gettime(CLOCK_MONOTONIC, &now);
192 } while (elapsed(&start, &now) < timeout);
193 gem_sync(fd, handle);
194 clock_gettime(CLOCK_MONOTONIC, &now);
195
196 igt_info("[%d] %s: %'u cycles: %.3fus%s\n",
197 child, e2->name, count,
198 elapsed(&start, &now) * 1e6 / count,
199 flags & INTERRUPTIBLE ? " (interruptible)" : "");
200
201 shared[child].elapsed = elapsed(&start, &now);
202 shared[child].count = count;
203 }
204 igt_waitchildren();
205
206 if (ncpus > 1) {
207 unsigned long total = 0;
208 double max = 0;
209
210 for (n = 0; n < ncpus; n++) {
211 total += shared[n].count;
212 if (shared[n].elapsed > max)
213 max = shared[n].elapsed;
214 }
215
216 igt_info("Total %s: %'lu cycles: %.3fus%s\n",
217 e2->name, total, max*1e6 / total,
218 flags & INTERRUPTIBLE ? " (interruptible)" : "");
219 }
220
221 for (n = 0; n < 64; n++)
222 gem_context_destroy(fd, contexts[n]);
223
224 munmap(shared, 4096);
225 }
226
all(int fd,uint32_t handle,unsigned flags,int timeout)227 static void all(int fd, uint32_t handle, unsigned flags, int timeout)
228 {
229 struct drm_i915_gem_execbuffer2 execbuf;
230 struct drm_i915_gem_exec_object2 obj[2];
231 struct intel_engine_data engines = { };
232 uint32_t contexts[65];
233 int n, qlen;
234
235 engines = intel_init_engine_list(fd, 0);
236 igt_require(engines.nengines);
237
238 for (n = 0; n < ARRAY_SIZE(contexts); n++) {
239 if (flags & QUEUE)
240 contexts[n] = gem_queue_create(fd);
241 else
242 contexts[n] = gem_context_create(fd);
243
244 gem_context_set_all_engines(fd, contexts[n]);
245 }
246
247 memset(obj, 0, sizeof(obj));
248 obj[1].handle = handle;
249
250 memset(&execbuf, 0, sizeof(execbuf));
251 execbuf.buffers_ptr = to_user_pointer(obj + 1);
252 execbuf.buffer_count = 1;
253 execbuf.rsvd1 = contexts[0];
254 execbuf.flags |= LOCAL_I915_EXEC_HANDLE_LUT;
255 execbuf.flags |= LOCAL_I915_EXEC_NO_RELOC;
256 igt_require(__gem_execbuf(fd, &execbuf) == 0);
257 gem_sync(fd, handle);
258
259 qlen = measure_qlen(fd, &execbuf, &engines, timeout);
260 igt_info("Using timing depth of %d batches\n", qlen);
261
262 execbuf.buffers_ptr = to_user_pointer(obj);
263 execbuf.buffer_count = 2;
264
265 for (int pot = 2; pot <= 64; pot *= 2) {
266 for (int nctx = pot - 1; nctx <= pot + 1; nctx++) {
267 igt_fork(child, engines.nengines) {
268 struct timespec start, now;
269 unsigned int count = 0;
270
271 obj[0].handle = gem_create(fd, 4096);
272 execbuf.flags |= engines.engines[child].flags;
273 for (int loop = 0;
274 loop < ARRAY_SIZE(contexts);
275 loop++) {
276 execbuf.rsvd1 = contexts[loop];
277 gem_execbuf(fd, &execbuf);
278 }
279 gem_sync(fd, obj[0].handle);
280
281 clock_gettime(CLOCK_MONOTONIC, &start);
282 do {
283 for (int loop = 0; loop < qlen; loop++) {
284 execbuf.rsvd1 =
285 contexts[loop % nctx];
286 gem_execbuf(fd, &execbuf);
287 }
288 count += qlen;
289 gem_sync(fd, obj[0].handle);
290 clock_gettime(CLOCK_MONOTONIC, &now);
291 } while (elapsed(&start, &now) < timeout);
292 gem_sync(fd, obj[0].handle);
293 clock_gettime(CLOCK_MONOTONIC, &now);
294 gem_close(fd, obj[0].handle);
295
296 igt_info("[%d:%d] %s: %'u cycles: %.3fus%s (elapsed: %.3fs)\n",
297 nctx, child,
298 engines.engines[child].name, count,
299 elapsed(&start, &now) * 1e6 / count,
300 flags & INTERRUPTIBLE ?
301 " (interruptible)" : "",
302 elapsed(&start, &now));
303 }
304 igt_waitchildren();
305 }
306 }
307
308 for (n = 0; n < ARRAY_SIZE(contexts); n++)
309 gem_context_destroy(fd, contexts[n]);
310 }
311
312 igt_main
313 {
314 const int ncpus = sysconf(_SC_NPROCESSORS_ONLN);
315 const struct intel_execution_engine2 *e2;
316 const struct intel_execution_engine *e;
317 static const struct {
318 const char *name;
319 unsigned int flags;
320 bool (*require)(int fd);
321 } phases[] = {
322 { "", 0, NULL },
323 { "-interruptible", INTERRUPTIBLE, NULL },
324 { "-queue", QUEUE, gem_has_queues },
325 { "-queue-interruptible", QUEUE | INTERRUPTIBLE, gem_has_queues },
326 { }
327 };
328 uint32_t light = 0, heavy;
329 int fd = -1;
330
331 igt_fixture {
332 const uint32_t bbe = MI_BATCH_BUFFER_END;
333
334 fd = drm_open_driver(DRIVER_INTEL);
335 igt_require_gem(fd);
336
337 gem_require_contexts(fd);
338
339 light = gem_create(fd, 4096);
340 gem_write(fd, light, 0, &bbe, sizeof(bbe));
341
342 heavy = gem_create(fd, 4096*1024);
343 gem_write(fd, heavy, 4096*1024-sizeof(bbe), &bbe, sizeof(bbe));
344
345 igt_fork_hang_detector(fd);
346 }
347
348 /* Legacy testing must be first. */
349 for (e = intel_execution_engines; e->name; e++) {
350 struct intel_execution_engine2 e2__;
351
352 e2__ = gem_eb_flags_to_engine(e->exec_id | e->flags);
353 if (e2__.flags == -1)
354 continue; /* I915_EXEC_BSD with no ring selectors */
355
356 e2 = &e2__;
357
358 for (typeof(*phases) *p = phases; p->name; p++) {
359 igt_subtest_group {
360 igt_fixture {
361 gem_require_ring(fd, e2->flags);
362 if (p->require)
363 igt_require(p->require(fd));
364 }
365
366 igt_subtest_f("legacy-%s%s", e->name, p->name)
367 single(fd, light, e2, p->flags, 1, 5);
368
369 igt_skip_on_simulation();
370
371 igt_subtest_f("legacy-%s-heavy%s",
372 e->name, p->name)
373 single(fd, heavy, e2, p->flags, 1, 5);
374 igt_subtest_f("legacy-%s-forked%s",
375 e->name, p->name)
376 single(fd, light, e2, p->flags, ncpus,
377 150);
378 igt_subtest_f("legacy-%s-forked-heavy%s",
379 e->name, p->name)
380 single(fd, heavy, e2, p->flags, ncpus,
381 150);
382 }
383 }
384 }
385
386 /* Must come after legacy subtests. */
__for_each_physical_engine(fd,e2)387 __for_each_physical_engine(fd, e2) {
388 for (typeof(*phases) *p = phases; p->name; p++) {
389 igt_subtest_group {
390 igt_fixture {
391 if (p->require)
392 igt_require(p->require(fd));
393 }
394
395 igt_subtest_f("%s%s", e2->name, p->name)
396 single(fd, light, e2, p->flags, 1, 5);
397
398 igt_skip_on_simulation();
399
400 igt_subtest_f("%s-heavy%s", e2->name, p->name)
401 single(fd, heavy, e2, p->flags, 1, 5);
402 igt_subtest_f("%s-forked%s", e2->name, p->name)
403 single(fd, light, e2, p->flags, ncpus,
404 150);
405 igt_subtest_f("%s-forked-heavy%s",
406 e2->name, p->name)
407 single(fd, heavy, e2, p->flags, ncpus,
408 150);
409 }
410 }
411 }
412
413 igt_subtest("all-light")
414 all(fd, light, 0, 5);
415 igt_subtest("all-heavy")
416 all(fd, heavy, 0, 5);
417
418 igt_subtest_group {
419 igt_fixture {
420 igt_require(gem_has_queues(fd));
421 }
422 igt_subtest("queue-light")
423 all(fd, light, QUEUE, 5);
424 igt_subtest("queue-heavy")
425 all(fd, heavy, QUEUE, 5);
426 }
427
428 igt_fixture {
429 igt_stop_hang_detector();
430 gem_close(fd, heavy);
431 gem_close(fd, light);
432 close(fd);
433 }
434 }
435