xref: /aosp_15_r20/external/igt-gpu-tools/tests/i915/gem_cpu_reloc.c (revision d83cc019efdc2edc6c4b16e9034a3ceb8d35d77c)
1 /*
2  * Copyright © 2012 Intel Corporation
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21  * IN THE SOFTWARE.
22  *
23  * Authors:
24  *    Chris Wilson <[email protected]>
25  *
26  */
27 
28 /*
29  * Testcase: Test the relocations through the CPU domain
30  *
31  * Attempt to stress test performing relocations whilst the batch is in the
32  * CPU domain.
33  *
34  * A freshly allocated buffer starts in the CPU domain, and the pwrite
35  * should also be performed whilst in the CPU domain and so we should
36  * execute the relocations within the CPU domain. If for any reason one of
37  * those steps should land it in the GTT domain, we take the secondary
38  * precaution of filling the mappable portion of the GATT.
39  *
40  * In order to detect whether a relocation fails, we first fill a target
41  * buffer with a sequence of invalid commands that would cause the GPU to
42  * immediate hang, and then attempt to overwrite them with a legal, if
43  * short, batchbuffer using a BLT. Then we come to execute the bo, if the
44  * relocation fail and we either copy across all zeros or garbage, then the
45  * GPU will hang.
46  */
47 
48 #include "igt.h"
49 #include <stdlib.h>
50 #include <stdio.h>
51 #include <string.h>
52 #include <fcntl.h>
53 #include <inttypes.h>
54 #include <errno.h>
55 #include <sys/stat.h>
56 #include <sys/time.h>
57 
58 #include <drm.h>
59 
60 #include "intel_bufmgr.h"
61 
62 #define MI_INSTR(opcode, flags) ((opcode) << 23 | (flags))
63 
64 IGT_TEST_DESCRIPTION("Test the relocations through the CPU domain.");
65 
66 static uint32_t *
gen2_emit_store_addr(uint32_t * cs,struct drm_i915_gem_relocation_entry * addr)67 gen2_emit_store_addr(uint32_t *cs, struct drm_i915_gem_relocation_entry *addr)
68 {
69 	*cs++ = MI_STORE_DWORD_IMM - 1;
70 	addr->offset += sizeof(*cs);
71 	cs += 1; /* addr */
72 	cs += 1; /* value: implicit 0xffffffff */
73 	return cs;
74 }
75 
76 static uint32_t *
gen4_emit_store_addr(uint32_t * cs,struct drm_i915_gem_relocation_entry * addr)77 gen4_emit_store_addr(uint32_t *cs, struct drm_i915_gem_relocation_entry *addr)
78 {
79 	*cs++ = MI_STORE_DWORD_IMM;
80 	*cs++ = 0;
81 	addr->offset += 2 * sizeof(*cs);
82 	cs += 1; /* addr */
83 	cs += 1; /* value: implicit 0xffffffff */
84 	return cs;
85 }
86 
87 static uint32_t *
gen8_emit_store_addr(uint32_t * cs,struct drm_i915_gem_relocation_entry * addr)88 gen8_emit_store_addr(uint32_t *cs, struct drm_i915_gem_relocation_entry *addr)
89 {
90 	*cs++ = (MI_STORE_DWORD_IMM | 1 << 21) + 1;
91 	addr->offset += sizeof(*cs);
92 	igt_assert((addr->delta & 7) == 0);
93 	cs += 2; /* addr */
94 	cs += 2; /* value: implicit 0xffffffffffffffff */
95 	return cs;
96 }
97 
98 static uint32_t *
gen2_emit_bb_start(uint32_t * cs,struct drm_i915_gem_relocation_entry * addr)99 gen2_emit_bb_start(uint32_t *cs, struct drm_i915_gem_relocation_entry *addr)
100 {
101 	*cs++ = MI_BATCH_BUFFER_START | 2 << 6;
102 	addr->offset += sizeof(*cs);
103 	addr->delta += 1;
104 	cs += 1; /* addr */
105 	return cs;
106 }
107 
108 static uint32_t *
gen4_emit_bb_start(uint32_t * cs,struct drm_i915_gem_relocation_entry * addr)109 gen4_emit_bb_start(uint32_t *cs, struct drm_i915_gem_relocation_entry *addr)
110 {
111 	*cs++ = MI_BATCH_BUFFER_START | 2 << 6 | 1 << 8;
112 	addr->offset += sizeof(*cs);
113 	cs += 1; /* addr */
114 	return cs;
115 }
116 
117 static uint32_t *
gen6_emit_bb_start(uint32_t * cs,struct drm_i915_gem_relocation_entry * addr)118 gen6_emit_bb_start(uint32_t *cs, struct drm_i915_gem_relocation_entry *addr)
119 {
120 	*cs++ = MI_BATCH_BUFFER_START | 1 << 8;
121 	addr->offset += sizeof(*cs);
122 	cs += 1; /* addr */
123 	return cs;
124 }
125 
126 static uint32_t *
hsw_emit_bb_start(uint32_t * cs,struct drm_i915_gem_relocation_entry * addr)127 hsw_emit_bb_start(uint32_t *cs, struct drm_i915_gem_relocation_entry *addr)
128 {
129 	*cs++ = MI_BATCH_BUFFER_START | 2 << 6 | 1 << 8 | 1 << 13;
130 	addr->offset += sizeof(*cs);
131 	cs += 1; /* addr */
132 	return cs;
133 }
134 
135 static uint32_t *
gen8_emit_bb_start(uint32_t * cs,struct drm_i915_gem_relocation_entry * addr)136 gen8_emit_bb_start(uint32_t *cs, struct drm_i915_gem_relocation_entry *addr)
137 {
138 	if (((uintptr_t)cs & 7) == 0) {
139 		*cs++ = MI_NOOP; /* align addr for MI_STORE_DWORD_IMM */
140 		addr->offset += sizeof(*cs);
141 	}
142 
143 	*cs++ = MI_BATCH_BUFFER_START + 1;
144 	addr->offset += sizeof(*cs);
145 	cs += 2; /* addr */
146 
147 	return cs;
148 }
149 
150 static void *
create_tmpl(int i915,struct drm_i915_gem_relocation_entry * reloc)151 create_tmpl(int i915, struct drm_i915_gem_relocation_entry *reloc)
152 {
153 	const uint32_t devid = intel_get_drm_devid(i915);
154 	const int gen = intel_gen(devid);
155 	uint32_t *(*emit_store_addr)(uint32_t *cs,
156 				   struct drm_i915_gem_relocation_entry *addr);
157 	uint32_t *(*emit_bb_start)(uint32_t *cs,
158 				   struct drm_i915_gem_relocation_entry *reloc);
159 	void *tmpl;
160 
161 	if (gen >= 8)
162 		emit_store_addr = gen8_emit_store_addr;
163 	else if (gen >= 4)
164 		emit_store_addr = gen4_emit_store_addr;
165 	else
166 		emit_store_addr = gen2_emit_store_addr;
167 
168 	if (gen >= 8)
169 		emit_bb_start = gen8_emit_bb_start;
170 	else if (IS_HASWELL(devid))
171 		emit_bb_start = hsw_emit_bb_start;
172 	else if (gen >= 6)
173 		emit_bb_start = gen6_emit_bb_start;
174 	else if (gen >= 4)
175 		emit_bb_start = gen4_emit_bb_start;
176 	else
177 		emit_bb_start = gen2_emit_bb_start;
178 
179 	tmpl = malloc(4096);
180 	igt_assert(tmpl);
181 	memset(tmpl, 0xff, 4096);
182 
183 	/* Jump over the booby traps to the end */
184 	reloc[0].delta = 64;
185 	emit_bb_start(tmpl, &reloc[0]);
186 
187 	/* Restore the bad address to catch missing relocs */
188 	reloc[1].offset = 64;
189 	reloc[1].delta = reloc[0].offset;
190 	*emit_store_addr(tmpl + 64, &reloc[1]) = MI_BATCH_BUFFER_END;
191 
192 	return tmpl;
193 }
194 
run_test(int i915,int count)195 static void run_test(int i915, int count)
196 {
197 	struct drm_i915_gem_execbuffer2 execbuf;
198 	struct drm_i915_gem_relocation_entry reloc[2];
199 	struct drm_i915_gem_exec_object2 obj;
200 
201 	uint32_t *handles;
202 	uint32_t *tmpl;
203 
204 	handles = malloc(count * sizeof(uint32_t));
205 	igt_assert(handles);
206 
207 	memset(reloc, 0, sizeof(reloc));
208 	tmpl = create_tmpl(i915, reloc);
209 	for (int i = 0; i < count; i++) {
210 		handles[i] = gem_create(i915, 4096);
211 		gem_write(i915, handles[i], 0, tmpl, 4096);
212 	}
213 	free(tmpl);
214 
215 	memset(&obj, 0, sizeof(obj));
216 	obj.relocs_ptr = to_user_pointer(reloc);
217 	obj.relocation_count = ARRAY_SIZE(reloc);
218 
219 	memset(&execbuf, 0, sizeof(execbuf));
220 	execbuf.buffers_ptr = to_user_pointer(&obj);
221 	execbuf.buffer_count = 1;
222 
223 	/* fill the entire gart with batches and run them */
224 	for (int i = 0; i < count; i++) {
225 		obj.handle = handles[i];
226 
227 		reloc[0].target_handle = obj.handle;
228 		reloc[0].presumed_offset = -1;
229 		reloc[1].target_handle = obj.handle;
230 		reloc[1].presumed_offset = -1;
231 
232 		gem_execbuf(i915, &execbuf);
233 	}
234 
235 	/* And again in reverse to try and catch the relocation code out */
236 	for (int i = 0; i < count; i++) {
237 		obj.handle = handles[count - i - 1];
238 
239 		reloc[0].target_handle = obj.handle;
240 		reloc[0].presumed_offset = -1;
241 		reloc[1].target_handle = obj.handle;
242 		reloc[1].presumed_offset = -1;
243 
244 		gem_execbuf(i915, &execbuf);
245 	}
246 
247 	/* Third time unlucky? */
248 	for (int i = 0; i < count; i++) {
249 		obj.handle = handles[i];
250 
251 		reloc[0].target_handle = obj.handle;
252 		reloc[0].presumed_offset = -1;
253 		reloc[1].target_handle = obj.handle;
254 		reloc[1].presumed_offset = -1;
255 
256 		gem_set_domain(i915, obj.handle,
257 			       I915_GEM_DOMAIN_CPU,
258 			       I915_GEM_DOMAIN_CPU);
259 
260 		gem_execbuf(i915, &execbuf);
261 	}
262 
263 	for (int i = 0; i < count; i++)
264 		gem_close(i915, handles[i]);
265 	free(handles);
266 }
267 
268 igt_main
269 {
270 	int i915;
271 
272 	igt_fixture {
273 		i915 = drm_open_driver(DRIVER_INTEL);
274 		igt_require_gem(i915);
275 
276 		/* could use BLT_FILL instead for gen2 */
277 		igt_require(gem_can_store_dword(i915, 0));
278 
279 		igt_fork_hang_detector(i915);
280 	}
281 
282 	igt_subtest("basic")
283 		run_test(i915, 1);
284 
285 	igt_subtest("full") {
286 		uint64_t aper_size = gem_mappable_aperture_size();
287 		unsigned long count = aper_size / 4096 + 1;
288 
289 		intel_require_memory(count, 4096, CHECK_RAM);
290 
291 		run_test(i915, count);
292 	}
293 
294 	igt_subtest("forked") {
295 		uint64_t aper_size = gem_mappable_aperture_size();
296 		unsigned long count = aper_size / 4096 + 1;
297 		int ncpus = sysconf(_SC_NPROCESSORS_ONLN);
298 
299 		intel_require_memory(count, 4096, CHECK_RAM);
300 
301 		igt_fork(child, ncpus)
302 			run_test(i915, count / ncpus + 1);
303 		igt_waitchildren();
304 	}
305 
306 	igt_fixture {
307 		igt_stop_hang_detector();
308 	}
309 }
310