xref: /aosp_15_r20/external/igt-gpu-tools/benchmarks/gem_busy.c (revision d83cc019efdc2edc6c4b16e9034a3ceb8d35d77c)
1 /*
2  * Copyright © 2011 Intel Corporation
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21  * IN THE SOFTWARE.
22  *
23  * Authors:
24  *    Chris Wilson <[email protected]>
25  *
26  */
27 
28 #include <unistd.h>
29 #include <stdlib.h>
30 #include <stdint.h>
31 #include <stdio.h>
32 #include <string.h>
33 #include <fcntl.h>
34 #include <inttypes.h>
35 #include <errno.h>
36 #include <sys/stat.h>
37 #include <sys/poll.h>
38 #include <sys/ioctl.h>
39 #include <sys/time.h>
40 #include <time.h>
41 
42 #include "drm.h"
43 #include "ioctl_wrappers.h"
44 #include "drmtest.h"
45 #include "intel_chipset.h"
46 #include "intel_reg.h"
47 #include "igt_stats.h"
48 #include "i915/gem_mman.h"
49 
50 #define LOCAL_I915_EXEC_NO_RELOC (1<<11)
51 #define LOCAL_I915_EXEC_HANDLE_LUT (1<<12)
52 
53 #define LOCAL_I915_EXEC_BSD_SHIFT      (13)
54 #define LOCAL_I915_EXEC_BSD_MASK       (3 << LOCAL_I915_EXEC_BSD_SHIFT)
55 
56 #define ENGINE_FLAGS  (I915_EXEC_RING_MASK | LOCAL_I915_EXEC_BSD_MASK)
57 
58 #define WRITE 0x1
59 #define IDLE 0x2
60 #define DMABUF 0x4
61 #define WAIT 0x8
62 #define SYNC 0x10
63 #define SYNCOBJ 0x20
64 
65 #define LOCAL_I915_EXEC_FENCE_ARRAY (1 << 19)
66 struct local_gem_exec_fence {
67 	uint32_t handle;
68 	uint32_t flags;
69 #define LOCAL_EXEC_FENCE_WAIT (1 << 0)
70 #define LOCAL_EXEC_FENCE_SIGNAL (1 << 1)
71 };
72 
gem_busy(int fd,uint32_t handle)73 static void gem_busy(int fd, uint32_t handle)
74 {
75 	struct drm_i915_gem_busy busy = { .handle = handle };
76 	ioctl(fd, DRM_IOCTL_I915_GEM_BUSY, &busy);
77 }
78 
gem_wait__busy(int fd,uint32_t handle)79 static void gem_wait__busy(int fd, uint32_t handle)
80 {
81 	struct drm_i915_gem_wait wait = { .bo_handle = handle };
82 	ioctl(fd, DRM_IOCTL_I915_GEM_WAIT, &wait);
83 }
84 
elapsed(const struct timespec * start,const struct timespec * end)85 static double elapsed(const struct timespec *start,
86 		      const struct timespec *end)
87 {
88 	return 1e9*(end->tv_sec - start->tv_sec) +
89 		(end->tv_nsec - start->tv_nsec);
90 }
91 
92 struct sync_merge_data {
93 	char    name[32];
94 	__s32   fd2;
95 	__s32   fence;
96 	__u32   flags;
97 	__u32   pad;
98 };
99 
100 #define SYNC_IOC_MAGIC         '>'
101 #define SYNC_IOC_MERGE         _IOWR(SYNC_IOC_MAGIC, 3, struct sync_merge_data)
102 
sync_merge(int fd1,int fd2)103 static int sync_merge(int fd1, int fd2)
104 {
105 	struct sync_merge_data data;
106 
107 	if (fd1 == -1)
108 		return dup(fd2);
109 
110 	if (fd2 == -1)
111 		return dup(fd1);
112 
113 	memset(&data, 0, sizeof(data));
114 	data.fd2 = fd2;
115 	strcpy(data.name, "i965");
116 
117 	if (ioctl(fd1, SYNC_IOC_MERGE, &data))
118 		return -errno;
119 
120 	return data.fence;
121 }
122 
__syncobj_create(int fd)123 static uint32_t __syncobj_create(int fd)
124 {
125 	struct local_syncobj_create {
126 		uint32_t handle, flags;
127 	} arg;
128 #define LOCAL_IOCTL_SYNCOBJ_CREATE        DRM_IOWR(0xBF, struct local_syncobj_create)
129 
130 	memset(&arg, 0, sizeof(arg));
131 	ioctl(fd, LOCAL_IOCTL_SYNCOBJ_CREATE, &arg);
132 
133 	return arg.handle;
134 }
135 
syncobj_create(int fd)136 static uint32_t syncobj_create(int fd)
137 {
138 	uint32_t ret;
139 
140 	igt_assert_neq((ret = __syncobj_create(fd)), 0);
141 
142 	return ret;
143 }
144 
145 #define LOCAL_SYNCOBJ_WAIT_FLAGS_WAIT_ALL (1 << 0)
146 #define LOCAL_SYNCOBJ_WAIT_FLAGS_WAIT_FOR_SUBMIT (1 << 1)
147 struct local_syncobj_wait {
148        __u64 handles;
149        /* absolute timeout */
150        __s64 timeout_nsec;
151        __u32 count_handles;
152        __u32 flags;
153        __u32 first_signaled; /* only valid when not waiting all */
154        __u32 pad;
155 };
156 #define LOCAL_IOCTL_SYNCOBJ_WAIT	DRM_IOWR(0xC3, struct local_syncobj_wait)
__syncobj_wait(int fd,struct local_syncobj_wait * args)157 static int __syncobj_wait(int fd, struct local_syncobj_wait *args)
158 {
159 	int err = 0;
160 	if (drmIoctl(fd, LOCAL_IOCTL_SYNCOBJ_WAIT, args))
161 		err = -errno;
162 	return err;
163 }
164 
loop(unsigned ring,int reps,int ncpus,unsigned flags)165 static int loop(unsigned ring, int reps, int ncpus, unsigned flags)
166 {
167 	struct drm_i915_gem_execbuffer2 execbuf;
168 	struct drm_i915_gem_exec_object2 obj[2];
169 	struct drm_i915_gem_relocation_entry reloc[2];
170 	struct local_gem_exec_fence syncobj;
171 	unsigned engines[16];
172 	unsigned nengine;
173 	uint32_t *batch;
174 	double *shared;
175 	int fd, i, gen;
176 	int dmabuf;
177 
178 	shared = mmap(0, 4096, PROT_WRITE, MAP_SHARED | MAP_ANON, -1, 0);
179 
180 	fd = drm_open_driver(DRIVER_INTEL);
181 	gen = intel_gen(intel_get_drm_devid(fd));
182 
183 	memset(obj, 0, sizeof(obj));
184 	obj[0].handle = gem_create(fd, 4096);
185 	if (flags & WRITE)
186 		obj[0].flags = EXEC_OBJECT_WRITE;
187 	obj[1].handle = gem_create(fd, 4096);
188 	if (gem_mmap__has_wc(fd))
189 		batch = gem_mmap__wc(fd, obj[1].handle, 0, 4096, PROT_WRITE);
190 	else
191 		batch = gem_mmap__gtt(fd, obj[1].handle, 4096, PROT_WRITE);
192 	gem_set_domain(fd, obj[1].handle,
193 			I915_GEM_DOMAIN_GTT, I915_GEM_DOMAIN_GTT);
194 	batch[0] = MI_BATCH_BUFFER_END;
195 
196 	memset(&execbuf, 0, sizeof(execbuf));
197 	execbuf.buffers_ptr = to_user_pointer(obj);
198 	execbuf.buffer_count = 2;
199 	execbuf.flags |= LOCAL_I915_EXEC_HANDLE_LUT;
200 	execbuf.flags |= LOCAL_I915_EXEC_NO_RELOC;
201 	if (__gem_execbuf(fd, &execbuf)) {
202 		execbuf.flags = 0;
203 		if (__gem_execbuf(fd, &execbuf))
204 			return 77;
205 	}
206 
207 	if (flags & SYNCOBJ) {
208 		syncobj.handle = syncobj_create(fd);
209 		syncobj.flags = LOCAL_EXEC_FENCE_SIGNAL;
210 
211 		execbuf.cliprects_ptr = to_user_pointer(&syncobj);
212 		execbuf.num_cliprects = 1;
213 		execbuf.flags |= LOCAL_I915_EXEC_FENCE_ARRAY;
214 	}
215 
216 	if (ring == -1) {
217 		nengine = 0;
218 		for (ring = 1; ring < 16; ring++) {
219 			execbuf.flags &= ~ENGINE_FLAGS;
220 			execbuf.flags |= ring;
221 			if (__gem_execbuf(fd, &execbuf) == 0)
222 				engines[nengine++] = ring;
223 		}
224 	} else {
225 		nengine = 1;
226 		engines[0] = ring;
227 	}
228 
229 	obj[1].relocs_ptr = to_user_pointer(reloc);
230 	obj[1].relocation_count = 2;
231 
232 	if (flags & DMABUF)
233 		dmabuf = prime_handle_to_fd(fd, obj[0].handle);
234 
235 	gem_set_domain(fd, obj[1].handle,
236 			I915_GEM_DOMAIN_GTT, I915_GEM_DOMAIN_GTT);
237 
238 	reloc[0].target_handle = obj[1].handle; /* recurse */
239 	reloc[0].presumed_offset = obj[1].offset;
240 	reloc[0].offset = sizeof(uint32_t);
241 	reloc[0].delta = 0;
242 	if (gen < 4)
243 		reloc[0].delta = 1;
244 	reloc[0].read_domains = I915_GEM_DOMAIN_COMMAND;
245 	reloc[0].write_domain = 0;
246 
247 	reloc[1].target_handle = obj[0].handle;
248 	reloc[1].presumed_offset = obj[0].offset;
249 	reloc[1].offset = 1024;
250 	reloc[1].delta = 0;
251 	reloc[1].read_domains = I915_GEM_DOMAIN_RENDER;
252 	reloc[1].write_domain = 0;
253 	if (flags & WRITE)
254 		reloc[1].write_domain = I915_GEM_DOMAIN_RENDER;
255 
256 	while (reps--) {
257 		int fence = -1;
258 		memset(shared, 0, 4096);
259 
260 		gem_set_domain(fd, obj[1].handle,
261 			       I915_GEM_DOMAIN_GTT, I915_GEM_DOMAIN_GTT);
262 		sleep(1); /* wait for the hw to go back to sleep */
263 		batch[i = 0] = MI_BATCH_BUFFER_START;
264 		if (gen >= 8) {
265 			batch[i] |= 1 << 8 | 1;
266 			batch[++i] = obj[1].offset;
267 			batch[++i] = obj[1].offset >> 32;
268 		} else if (gen >= 6) {
269 			batch[i] |= 1 << 8;
270 			batch[++i] = obj[1].offset;
271 		} else {
272 			batch[i] |= 2 << 6;
273 			batch[++i] = obj[1].offset;
274 			if (gen < 4)
275 				batch[i] |= 1;
276 		}
277 
278 		if ((flags & IDLE) == 0) {
279 			for (int n = 0; n < nengine; n++) {
280 				execbuf.flags &= ~(3 << 16);
281 				if (flags & SYNC)
282 					execbuf.flags |= 1 << 17;
283 				execbuf.flags &= ~ENGINE_FLAGS;
284 				execbuf.flags |= engines[n];
285 				gem_execbuf_wr(fd, &execbuf);
286 				if (execbuf.flags & (1 << 17))
287 					fence = sync_merge(fence, execbuf.rsvd2 >> 32);
288 			}
289 		}
290 
291 		igt_fork(child, ncpus) {
292 			struct timespec start, end;
293 			unsigned count = 0;
294 
295 			clock_gettime(CLOCK_MONOTONIC, &start);
296 			do {
297 				if (flags & DMABUF) {
298 					struct pollfd pfd = { .fd = dmabuf, .events = POLLOUT };
299 					for (int inner = 0; inner < 1024; inner++)
300 						poll(&pfd, 1, 0);
301 				} else if (flags & SYNCOBJ) {
302 					struct local_syncobj_wait arg = {
303 						.handles = to_user_pointer(&syncobj.handle),
304 						.count_handles = 1,
305 					};
306 
307 					for (int inner = 0; inner < 1024; inner++)
308 						__syncobj_wait(fd, &arg);
309 				} else if (flags & SYNC) {
310 					struct pollfd pfd = { .fd = fence, .events = POLLOUT };
311 					for (int inner = 0; inner < 1024; inner++)
312 						poll(&pfd, 1, 0);
313 				} else if (flags & WAIT) {
314 					for (int inner = 0; inner < 1024; inner++)
315 						gem_wait__busy(fd, obj[0].handle);
316 				} else {
317 					for (int inner = 0; inner < 1024; inner++)
318 						gem_busy(fd, obj[0].handle);
319 				}
320 
321 				clock_gettime(CLOCK_MONOTONIC, &end);
322 				count += 1024;
323 			} while (elapsed(&start, &end) < 2e9);
324 
325 			clock_gettime(CLOCK_MONOTONIC, &end);
326 			shared[child] = elapsed(&start, &end) / count;
327 		}
328 		igt_waitchildren();
329 
330 		batch[0] = MI_BATCH_BUFFER_END;
331 		if (fence != -1)
332 			close(fence);
333 
334 		for (int child = 0; child < ncpus; child++)
335 			shared[ncpus] += shared[child];
336 		printf("%7.3f\n", shared[ncpus] / ncpus);
337 	}
338 	return 0;
339 }
340 
main(int argc,char ** argv)341 int main(int argc, char **argv)
342 {
343 	unsigned ring = I915_EXEC_RENDER;
344 	unsigned flags = 0;
345 	int reps = 1;
346 	int ncpus = 1;
347 	int c;
348 
349 	while ((c = getopt (argc, argv, "e:r:dfsSwWI")) != -1) {
350 		switch (c) {
351 		case 'e':
352 			if (strcmp(optarg, "rcs") == 0)
353 				ring = I915_EXEC_RENDER;
354 			else if (strcmp(optarg, "vcs") == 0)
355 				ring = I915_EXEC_BSD;
356 			else if (strcmp(optarg, "bcs") == 0)
357 				ring = I915_EXEC_BLT;
358 			else if (strcmp(optarg, "vecs") == 0)
359 				ring = I915_EXEC_VEBOX;
360 			else if (strcmp(optarg, "all") == 0)
361 				ring = -1;
362 			else
363 				ring = atoi(optarg);
364 			break;
365 
366 		case 'r':
367 			reps = atoi(optarg);
368 			if (reps < 1)
369 				reps = 1;
370 			break;
371 
372 		case 'f':
373 			ncpus = sysconf(_SC_NPROCESSORS_ONLN);
374 			break;
375 
376 		case 'd':
377 			flags |= DMABUF;
378 			break;
379 
380 		case 'w':
381 			flags |= WAIT;
382 			break;
383 
384 		case 's':
385 			flags |= SYNC;
386 			break;
387 
388 		case 'S':
389 			flags |= SYNCOBJ;
390 			break;
391 
392 		case 'W':
393 			flags |= WRITE;
394 			break;
395 
396 		case 'I':
397 			flags |= IDLE;
398 			break;
399 		default:
400 			break;
401 		}
402 	}
403 
404 	return loop(ring, reps, ncpus, flags);
405 }
406