1 /*
2 * Copyright © 2012 Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 *
23 * Authors:
24 * Chris Wilson <[email protected]>
25 *
26 */
27
28 /*
29 * Testcase: Test the relocations through the CPU domain
30 *
31 * Attempt to stress test performing relocations whilst the batch is in the
32 * CPU domain.
33 *
34 * A freshly allocated buffer starts in the CPU domain, and the pwrite
35 * should also be performed whilst in the CPU domain and so we should
36 * execute the relocations within the CPU domain. If for any reason one of
37 * those steps should land it in the GTT domain, we take the secondary
38 * precaution of filling the mappable portion of the GATT.
39 *
40 * In order to detect whether a relocation fails, we first fill a target
41 * buffer with a sequence of invalid commands that would cause the GPU to
42 * immediate hang, and then attempt to overwrite them with a legal, if
43 * short, batchbuffer using a BLT. Then we come to execute the bo, if the
44 * relocation fail and we either copy across all zeros or garbage, then the
45 * GPU will hang.
46 */
47
48 #include "igt.h"
49 #include <stdlib.h>
50 #include <stdio.h>
51 #include <string.h>
52 #include <fcntl.h>
53 #include <inttypes.h>
54 #include <errno.h>
55 #include <sys/stat.h>
56 #include <sys/time.h>
57
58 #include <drm.h>
59
60 #include "intel_bufmgr.h"
61
62 #define MI_INSTR(opcode, flags) ((opcode) << 23 | (flags))
63
64 IGT_TEST_DESCRIPTION("Test the relocations through the CPU domain.");
65
66 static uint32_t *
gen2_emit_store_addr(uint32_t * cs,struct drm_i915_gem_relocation_entry * addr)67 gen2_emit_store_addr(uint32_t *cs, struct drm_i915_gem_relocation_entry *addr)
68 {
69 *cs++ = MI_STORE_DWORD_IMM - 1;
70 addr->offset += sizeof(*cs);
71 cs += 1; /* addr */
72 cs += 1; /* value: implicit 0xffffffff */
73 return cs;
74 }
75
76 static uint32_t *
gen4_emit_store_addr(uint32_t * cs,struct drm_i915_gem_relocation_entry * addr)77 gen4_emit_store_addr(uint32_t *cs, struct drm_i915_gem_relocation_entry *addr)
78 {
79 *cs++ = MI_STORE_DWORD_IMM;
80 *cs++ = 0;
81 addr->offset += 2 * sizeof(*cs);
82 cs += 1; /* addr */
83 cs += 1; /* value: implicit 0xffffffff */
84 return cs;
85 }
86
87 static uint32_t *
gen8_emit_store_addr(uint32_t * cs,struct drm_i915_gem_relocation_entry * addr)88 gen8_emit_store_addr(uint32_t *cs, struct drm_i915_gem_relocation_entry *addr)
89 {
90 *cs++ = (MI_STORE_DWORD_IMM | 1 << 21) + 1;
91 addr->offset += sizeof(*cs);
92 igt_assert((addr->delta & 7) == 0);
93 cs += 2; /* addr */
94 cs += 2; /* value: implicit 0xffffffffffffffff */
95 return cs;
96 }
97
98 static uint32_t *
gen2_emit_bb_start(uint32_t * cs,struct drm_i915_gem_relocation_entry * addr)99 gen2_emit_bb_start(uint32_t *cs, struct drm_i915_gem_relocation_entry *addr)
100 {
101 *cs++ = MI_BATCH_BUFFER_START | 2 << 6;
102 addr->offset += sizeof(*cs);
103 addr->delta += 1;
104 cs += 1; /* addr */
105 return cs;
106 }
107
108 static uint32_t *
gen4_emit_bb_start(uint32_t * cs,struct drm_i915_gem_relocation_entry * addr)109 gen4_emit_bb_start(uint32_t *cs, struct drm_i915_gem_relocation_entry *addr)
110 {
111 *cs++ = MI_BATCH_BUFFER_START | 2 << 6 | 1 << 8;
112 addr->offset += sizeof(*cs);
113 cs += 1; /* addr */
114 return cs;
115 }
116
117 static uint32_t *
gen6_emit_bb_start(uint32_t * cs,struct drm_i915_gem_relocation_entry * addr)118 gen6_emit_bb_start(uint32_t *cs, struct drm_i915_gem_relocation_entry *addr)
119 {
120 *cs++ = MI_BATCH_BUFFER_START | 1 << 8;
121 addr->offset += sizeof(*cs);
122 cs += 1; /* addr */
123 return cs;
124 }
125
126 static uint32_t *
hsw_emit_bb_start(uint32_t * cs,struct drm_i915_gem_relocation_entry * addr)127 hsw_emit_bb_start(uint32_t *cs, struct drm_i915_gem_relocation_entry *addr)
128 {
129 *cs++ = MI_BATCH_BUFFER_START | 2 << 6 | 1 << 8 | 1 << 13;
130 addr->offset += sizeof(*cs);
131 cs += 1; /* addr */
132 return cs;
133 }
134
135 static uint32_t *
gen8_emit_bb_start(uint32_t * cs,struct drm_i915_gem_relocation_entry * addr)136 gen8_emit_bb_start(uint32_t *cs, struct drm_i915_gem_relocation_entry *addr)
137 {
138 if (((uintptr_t)cs & 7) == 0) {
139 *cs++ = MI_NOOP; /* align addr for MI_STORE_DWORD_IMM */
140 addr->offset += sizeof(*cs);
141 }
142
143 *cs++ = MI_BATCH_BUFFER_START + 1;
144 addr->offset += sizeof(*cs);
145 cs += 2; /* addr */
146
147 return cs;
148 }
149
150 static void *
create_tmpl(int i915,struct drm_i915_gem_relocation_entry * reloc)151 create_tmpl(int i915, struct drm_i915_gem_relocation_entry *reloc)
152 {
153 const uint32_t devid = intel_get_drm_devid(i915);
154 const int gen = intel_gen(devid);
155 uint32_t *(*emit_store_addr)(uint32_t *cs,
156 struct drm_i915_gem_relocation_entry *addr);
157 uint32_t *(*emit_bb_start)(uint32_t *cs,
158 struct drm_i915_gem_relocation_entry *reloc);
159 void *tmpl;
160
161 if (gen >= 8)
162 emit_store_addr = gen8_emit_store_addr;
163 else if (gen >= 4)
164 emit_store_addr = gen4_emit_store_addr;
165 else
166 emit_store_addr = gen2_emit_store_addr;
167
168 if (gen >= 8)
169 emit_bb_start = gen8_emit_bb_start;
170 else if (IS_HASWELL(devid))
171 emit_bb_start = hsw_emit_bb_start;
172 else if (gen >= 6)
173 emit_bb_start = gen6_emit_bb_start;
174 else if (gen >= 4)
175 emit_bb_start = gen4_emit_bb_start;
176 else
177 emit_bb_start = gen2_emit_bb_start;
178
179 tmpl = malloc(4096);
180 igt_assert(tmpl);
181 memset(tmpl, 0xff, 4096);
182
183 /* Jump over the booby traps to the end */
184 reloc[0].delta = 64;
185 emit_bb_start(tmpl, &reloc[0]);
186
187 /* Restore the bad address to catch missing relocs */
188 reloc[1].offset = 64;
189 reloc[1].delta = reloc[0].offset;
190 *emit_store_addr(tmpl + 64, &reloc[1]) = MI_BATCH_BUFFER_END;
191
192 return tmpl;
193 }
194
run_test(int i915,int count)195 static void run_test(int i915, int count)
196 {
197 struct drm_i915_gem_execbuffer2 execbuf;
198 struct drm_i915_gem_relocation_entry reloc[2];
199 struct drm_i915_gem_exec_object2 obj;
200
201 uint32_t *handles;
202 uint32_t *tmpl;
203
204 handles = malloc(count * sizeof(uint32_t));
205 igt_assert(handles);
206
207 memset(reloc, 0, sizeof(reloc));
208 tmpl = create_tmpl(i915, reloc);
209 for (int i = 0; i < count; i++) {
210 handles[i] = gem_create(i915, 4096);
211 gem_write(i915, handles[i], 0, tmpl, 4096);
212 }
213 free(tmpl);
214
215 memset(&obj, 0, sizeof(obj));
216 obj.relocs_ptr = to_user_pointer(reloc);
217 obj.relocation_count = ARRAY_SIZE(reloc);
218
219 memset(&execbuf, 0, sizeof(execbuf));
220 execbuf.buffers_ptr = to_user_pointer(&obj);
221 execbuf.buffer_count = 1;
222
223 /* fill the entire gart with batches and run them */
224 for (int i = 0; i < count; i++) {
225 obj.handle = handles[i];
226
227 reloc[0].target_handle = obj.handle;
228 reloc[0].presumed_offset = -1;
229 reloc[1].target_handle = obj.handle;
230 reloc[1].presumed_offset = -1;
231
232 gem_execbuf(i915, &execbuf);
233 }
234
235 /* And again in reverse to try and catch the relocation code out */
236 for (int i = 0; i < count; i++) {
237 obj.handle = handles[count - i - 1];
238
239 reloc[0].target_handle = obj.handle;
240 reloc[0].presumed_offset = -1;
241 reloc[1].target_handle = obj.handle;
242 reloc[1].presumed_offset = -1;
243
244 gem_execbuf(i915, &execbuf);
245 }
246
247 /* Third time unlucky? */
248 for (int i = 0; i < count; i++) {
249 obj.handle = handles[i];
250
251 reloc[0].target_handle = obj.handle;
252 reloc[0].presumed_offset = -1;
253 reloc[1].target_handle = obj.handle;
254 reloc[1].presumed_offset = -1;
255
256 gem_set_domain(i915, obj.handle,
257 I915_GEM_DOMAIN_CPU,
258 I915_GEM_DOMAIN_CPU);
259
260 gem_execbuf(i915, &execbuf);
261 }
262
263 for (int i = 0; i < count; i++)
264 gem_close(i915, handles[i]);
265 free(handles);
266 }
267
268 igt_main
269 {
270 int i915;
271
272 igt_fixture {
273 i915 = drm_open_driver(DRIVER_INTEL);
274 igt_require_gem(i915);
275
276 /* could use BLT_FILL instead for gen2 */
277 igt_require(gem_can_store_dword(i915, 0));
278
279 igt_fork_hang_detector(i915);
280 }
281
282 igt_subtest("basic")
283 run_test(i915, 1);
284
285 igt_subtest("full") {
286 uint64_t aper_size = gem_mappable_aperture_size();
287 unsigned long count = aper_size / 4096 + 1;
288
289 intel_require_memory(count, 4096, CHECK_RAM);
290
291 run_test(i915, count);
292 }
293
294 igt_subtest("forked") {
295 uint64_t aper_size = gem_mappable_aperture_size();
296 unsigned long count = aper_size / 4096 + 1;
297 int ncpus = sysconf(_SC_NPROCESSORS_ONLN);
298
299 intel_require_memory(count, 4096, CHECK_RAM);
300
301 igt_fork(child, ncpus)
302 run_test(i915, count / ncpus + 1);
303 igt_waitchildren();
304 }
305
306 igt_fixture {
307 igt_stop_hang_detector();
308 }
309 }
310