xref: /aosp_15_r20/external/igt-gpu-tools/tests/i915/gem_exec_await.c (revision d83cc019efdc2edc6c4b16e9034a3ceb8d35d77c)
1 /*
2  * Copyright © 2017 Intel Corporation
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21  * IN THE SOFTWARE.
22  *
23  */
24 
25 #include "igt.h"
26 #include "igt_rand.h"
27 #include "igt_sysfs.h"
28 #include "igt_vgem.h"
29 #include "i915/gem_ring.h"
30 
31 #include <sys/ioctl.h>
32 #include <sys/signal.h>
33 
34 #define LOCAL_I915_EXEC_NO_RELOC (1<<11)
35 #define LOCAL_I915_EXEC_HANDLE_LUT (1<<12)
36 
37 #define LOCAL_I915_EXEC_BSD_SHIFT      (13)
38 #define LOCAL_I915_EXEC_BSD_MASK       (3 << LOCAL_I915_EXEC_BSD_SHIFT)
39 
40 #define ENGINE_FLAGS  (I915_EXEC_RING_MASK | LOCAL_I915_EXEC_BSD_MASK)
41 
elapsed(const struct timespec * start,const struct timespec * end)42 static double elapsed(const struct timespec *start, const struct timespec *end)
43 {
44 	return ((end->tv_sec - start->tv_sec) +
45 		(end->tv_nsec - start->tv_nsec)*1e-9);
46 }
47 
xchg_obj(void * array,unsigned i,unsigned j)48 static void xchg_obj(void *array, unsigned i, unsigned j)
49 {
50 	struct drm_i915_gem_exec_object2 *obj = array;
51 	uint64_t tmp;
52 
53 	tmp = obj[i].handle;
54 	obj[i].handle = obj[j].handle;
55 	obj[j].handle = tmp;
56 
57 	tmp = obj[i].offset;
58 	obj[i].offset = obj[j].offset;
59 	obj[j].offset = tmp;
60 }
61 
62 #define CONTEXTS 0x1
wide(int fd,int ring_size,int timeout,unsigned int flags)63 static void wide(int fd, int ring_size, int timeout, unsigned int flags)
64 {
65 	const uint32_t bbe = MI_BATCH_BUFFER_END;
66 	const int gen = intel_gen(intel_get_drm_devid(fd));
67 	struct {
68 		struct drm_i915_gem_exec_object2 *obj;
69 		struct drm_i915_gem_exec_object2 exec[2];
70 		struct drm_i915_gem_relocation_entry reloc;
71 		struct drm_i915_gem_execbuffer2 execbuf;
72 		uint32_t *cmd;
73 	} *exec;
74 	struct drm_i915_gem_exec_object2 *obj;
75 	struct drm_i915_gem_execbuffer2 execbuf;
76 	unsigned engines[16];
77 	unsigned nengine, engine;
78 	unsigned long count;
79 	double time;
80 
81 	nengine = 0;
82 	for_each_physical_engine(fd, engine)
83 		engines[nengine++] = engine;
84 	igt_require(nengine);
85 
86 	exec = calloc(nengine, sizeof(*exec));
87 	igt_assert(exec);
88 
89 	intel_require_memory(nengine*(2 + ring_size), 4096, CHECK_RAM);
90 	obj = calloc(nengine*ring_size + 1, sizeof(*obj));
91 	igt_assert(obj);
92 
93 	for (unsigned e = 0; e < nengine; e++) {
94 		exec[e].obj = calloc(ring_size, sizeof(*exec[e].obj));
95 		igt_assert(exec[e].obj);
96 		for (unsigned n = 0; n < ring_size; n++)  {
97 			exec[e].obj[n].handle = gem_create(fd, 4096);
98 			exec[e].obj[n].flags = EXEC_OBJECT_WRITE;
99 
100 			obj[e*ring_size + n].handle = exec[e].obj[n].handle;
101 		}
102 
103 		exec[e].execbuf.buffers_ptr = to_user_pointer(exec[e].exec);
104 		exec[e].execbuf.buffer_count = 1;
105 		exec[e].execbuf.flags = (engines[e] |
106 					 LOCAL_I915_EXEC_NO_RELOC |
107 					 LOCAL_I915_EXEC_HANDLE_LUT);
108 
109 		if (flags & CONTEXTS) {
110 			exec[e].execbuf.rsvd1 = gem_context_create(fd);
111 		}
112 
113 		exec[e].exec[0].handle = gem_create(fd, 4096);
114 		exec[e].cmd = gem_mmap__wc(fd, exec[e].exec[0].handle,
115 					   0, 4096, PROT_WRITE);
116 
117 		gem_set_domain(fd, exec[e].exec[0].handle,
118 			       I915_GEM_DOMAIN_WC, I915_GEM_DOMAIN_WC);
119 		exec[e].cmd[0] = MI_BATCH_BUFFER_END;
120 
121 		gem_execbuf(fd, &exec[e].execbuf);
122 		exec[e].exec[1] = exec[e].exec[0];
123 		exec[e].execbuf.buffer_count = 2;
124 
125 		exec[e].reloc.target_handle = 1; /* recurse */
126 		exec[e].reloc.offset = sizeof(uint32_t);
127 		exec[e].reloc.read_domains = I915_GEM_DOMAIN_COMMAND;
128 		if (gen < 4)
129 			exec[e].reloc.delta = 1;
130 
131 		exec[e].exec[1].relocs_ptr = to_user_pointer(&exec[e].reloc);
132 		exec[e].exec[1].relocation_count = 1;
133 	}
134 	obj[nengine*ring_size].handle = gem_create(fd, 4096);
135 	gem_write(fd, obj[nengine*ring_size].handle, 0, &bbe, sizeof(bbe));
136 
137 	memset(&execbuf, 0, sizeof(execbuf));
138 	execbuf.buffers_ptr = to_user_pointer(&obj[nengine*ring_size]);
139 	execbuf.buffer_count = 1;
140 	gem_execbuf(fd, &execbuf); /* tag the object as a batch in the GTT */
141 	execbuf.buffers_ptr = to_user_pointer(obj);
142 	execbuf.buffer_count = nengine*ring_size + 1;
143 
144 	intel_detect_and_clear_missed_interrupts(fd);
145 
146 	time = 0;
147 	count = 0;
148 	igt_until_timeout(timeout) {
149 		struct timespec start, now;
150 		for (unsigned e = 0; e < nengine; e++) {
151 			uint64_t address;
152 			int i;
153 
154 			if (flags & CONTEXTS) {
155 				gem_context_destroy(fd, exec[e].execbuf.rsvd1);
156 				exec[e].execbuf.rsvd1 = gem_context_create(fd);
157 			}
158 
159 			exec[e].reloc.presumed_offset = exec[e].exec[1].offset;
160 			address = (exec[e].reloc.presumed_offset +
161 				   exec[e].reloc.delta);
162 			gem_set_domain(fd, exec[e].exec[1].handle,
163 				       I915_GEM_DOMAIN_WC, I915_GEM_DOMAIN_WC);
164 
165 			i = 0;
166 			exec[e].cmd[i] = MI_BATCH_BUFFER_START;
167 			if (gen >= 8) {
168 				exec[e].cmd[i] |= 1 << 8 | 1;
169 				exec[e].cmd[++i] = address;
170 				exec[e].cmd[++i] = address >> 32;
171 			} else if (gen >= 6) {
172 				exec[e].cmd[i] |= 1 << 8;
173 				exec[e].cmd[++i] = address;
174 			} else {
175 				exec[e].cmd[i] |= 2 << 6;
176 				exec[e].cmd[++i] = address;
177 			}
178 
179 			exec[e].exec[0] = obj[nengine*ring_size];
180 			gem_execbuf(fd, &exec[e].execbuf);
181 
182 			for (unsigned n = 0; n < ring_size; n++) {
183 				exec[e].exec[0] = exec[e].obj[n];
184 				gem_execbuf(fd, &exec[e].execbuf);
185 				exec[e].obj[n].offset = exec[e].exec[0].offset;
186 			}
187 		}
188 
189 		igt_permute_array(obj, nengine*ring_size, xchg_obj);
190 
191 		clock_gettime(CLOCK_MONOTONIC, &start);
192 		for (unsigned e = 0; e < nengine; e++) {
193 			execbuf.flags = (engines[e] |
194 					 LOCAL_I915_EXEC_NO_RELOC |
195 					 LOCAL_I915_EXEC_HANDLE_LUT);
196 			gem_execbuf(fd, &execbuf);
197 		}
198 		clock_gettime(CLOCK_MONOTONIC, &now);
199 		time += elapsed(&start, &now);
200 		count += nengine;
201 
202 		for (unsigned e = 0; e < nengine; e++)
203 			exec[e].cmd[0] = MI_BATCH_BUFFER_END;
204 		__sync_synchronize();
205 	}
206 
207 	igt_assert_eq(intel_detect_and_clear_missed_interrupts(fd), 0);
208 
209 	igt_info("%s: %'lu cycles: %.3fus\n",
210 		 __func__, count, time*1e6 / count);
211 
212 	gem_close(fd, obj[nengine*ring_size].handle);
213 	free(obj);
214 
215 	for (unsigned e = 0; e < nengine; e++) {
216 		if (flags & CONTEXTS)
217 			gem_context_destroy(fd, exec[e].execbuf.rsvd1);
218 
219 		for (unsigned n = 0; n < ring_size; n++)
220 			gem_close(fd, exec[e].obj[n].handle);
221 		free(exec[e].obj);
222 
223 		munmap(exec[e].cmd, 4096);
224 		gem_close(fd, exec[e].exec[1].handle);
225 	}
226 	free(exec);
227 }
228 
229 igt_main
230 {
231 	int ring_size = 0;
232 	int device = -1;
233 
234 	igt_fixture {
235 
236 		device = drm_open_driver(DRIVER_INTEL);
237 		igt_require_gem(device);
238 		gem_submission_print_method(device);
239 
240 		ring_size = gem_measure_ring_inflight(device, ALL_ENGINES, 0) - 10;
241 		if (!gem_has_execlists(device))
242 			ring_size /= 2;
243 		igt_info("Ring size: %d batches\n", ring_size);
244 		igt_require(ring_size > 0);
245 
246 		igt_fork_hang_detector(device);
247 	}
248 
249 	igt_subtest("wide-all")
250 		wide(device, ring_size, 20, 0);
251 
252 	igt_subtest("wide-contexts") {
253 		gem_require_contexts(device);
254 		wide(device, ring_size, 20, CONTEXTS);
255 	}
256 
257 	igt_fixture {
258 		igt_stop_hang_detector();
259 		close(device);
260 	}
261 }
262