1 /*
2 * Copyright © 2015 Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 *
23 * Authors:
24 * Chris Wilson <[email protected]>
25 *
26 */
27
28 #include "igt.h"
29 #include <unistd.h>
30 #include <stdlib.h>
31 #include <stdio.h>
32 #include <string.h>
33 #include <fcntl.h>
34 #include <inttypes.h>
35 #include <pthread.h>
36 #include <errno.h>
37 #include <sys/stat.h>
38 #include <sys/ioctl.h>
39 #include "drm.h"
40
41 #define OBJECT_SIZE 1024*1024
42 #define CHUNK_SIZE 32
43
44 #define COPY_BLT_CMD (2<<29|0x53<<22|0x6)
45 #define BLT_WRITE_ALPHA (1<<21)
46 #define BLT_WRITE_RGB (1<<20)
47 #define BLT_WRITE_ARGB (BLT_WRITE_ALPHA | BLT_WRITE_RGB)
48
49 #define LOCAL_I915_EXEC_HANDLE_LUT (1<<12)
50
51 IGT_TEST_DESCRIPTION("Test of streaming writes into active GPU sources");
52
53 #define SRC 0
54 #define DST 1
55 #define BATCH 2
56
57 #define src exec[SRC].handle
58 #define src_offset exec[SRC].offset
59 #define dst exec[DST].handle
60 #define dst_offset exec[DST].offset
61
test_streaming(int fd,int mode,int sync)62 static void test_streaming(int fd, int mode, int sync)
63 {
64 const int has_64bit_reloc = intel_gen(intel_get_drm_devid(fd)) >= 8;
65 struct drm_i915_gem_execbuffer2 execbuf;
66 struct drm_i915_gem_exec_object2 exec[3];
67 struct drm_i915_gem_relocation_entry reloc[128];
68 uint32_t tmp[] = { MI_BATCH_BUFFER_END };
69 uint64_t __src_offset, __dst_offset;
70 uint32_t *s, *d;
71 uint32_t offset;
72 struct {
73 uint32_t handle;
74 uint64_t offset;
75 } *batch;
76 int i, n;
77
78 memset(exec, 0, sizeof(exec));
79 exec[SRC].handle = gem_create(fd, OBJECT_SIZE);
80 exec[DST].handle = gem_create(fd, OBJECT_SIZE);
81
82 switch (mode) {
83 case 0: /* cpu/snoop */
84 gem_set_caching(fd, src, I915_CACHING_CACHED);
85 s = gem_mmap__cpu(fd, src, 0, OBJECT_SIZE,
86 PROT_READ | PROT_WRITE);
87 break;
88 case 1: /* gtt */
89 s = gem_mmap__gtt(fd, src, OBJECT_SIZE,
90 PROT_READ | PROT_WRITE);
91 break;
92 case 2: /* wc */
93 s = gem_mmap__wc(fd, src, 0, OBJECT_SIZE,
94 PROT_READ | PROT_WRITE);
95 break;
96 }
97 *s = 0; /* fault the object into the mappable range first (for GTT) */
98
99 d = gem_mmap__cpu(fd, dst, 0, OBJECT_SIZE, PROT_READ);
100
101 gem_write(fd, dst, 0, tmp, sizeof(tmp));
102 memset(&execbuf, 0, sizeof(execbuf));
103 execbuf.buffers_ptr = to_user_pointer(exec);
104 execbuf.buffer_count = 2;
105 execbuf.flags = LOCAL_I915_EXEC_HANDLE_LUT;
106 if (__gem_execbuf(fd, &execbuf)) {
107 execbuf.flags = 0;
108 igt_require(__gem_execbuf(fd, &execbuf) == 0);
109 }
110 /* We assume that the active objects are fixed to avoid relocations */
111 __src_offset = src_offset;
112 __dst_offset = dst_offset;
113
114 memset(reloc, 0, sizeof(reloc));
115 for (i = 0; i < 64; i++) {
116 reloc[2*i+0].offset = 64*i + 4 * sizeof(uint32_t);
117 reloc[2*i+0].delta = 0;
118 reloc[2*i+0].target_handle = execbuf.flags & LOCAL_I915_EXEC_HANDLE_LUT ? DST : dst;
119 reloc[2*i+0].presumed_offset = dst_offset;
120 reloc[2*i+0].read_domains = I915_GEM_DOMAIN_RENDER;
121 reloc[2*i+0].write_domain = I915_GEM_DOMAIN_RENDER;
122
123 reloc[2*i+1].offset = 64*i + 7 * sizeof(uint32_t);
124 if (has_64bit_reloc)
125 reloc[2*i+1].offset += sizeof(uint32_t);
126 reloc[2*i+1].delta = 0;
127 reloc[2*i+1].target_handle = execbuf.flags & LOCAL_I915_EXEC_HANDLE_LUT ? SRC : src;
128 reloc[2*i+1].presumed_offset = src_offset;
129 reloc[2*i+1].read_domains = I915_GEM_DOMAIN_RENDER;
130 reloc[2*i+1].write_domain = 0;
131 }
132 gem_execbuf(fd, &execbuf);
133 igt_assert_eq_u64(__src_offset, src_offset);
134 igt_assert_eq_u64(__dst_offset, dst_offset);
135
136 exec[DST].flags = EXEC_OBJECT_WRITE;
137 exec[BATCH].relocation_count = 2;
138 execbuf.buffer_count = 3;
139 execbuf.flags |= I915_EXEC_NO_RELOC;
140 if (gem_has_blt(fd))
141 execbuf.flags |= I915_EXEC_BLT;
142
143 batch = malloc(sizeof(*batch) * (OBJECT_SIZE / CHUNK_SIZE / 64));
144 for (i = n = 0; i < OBJECT_SIZE / CHUNK_SIZE / 64; i++) {
145 uint32_t *base;
146
147 batch[i].handle = gem_create(fd, 4096);
148 batch[i].offset = 0;
149
150 base = gem_mmap__cpu(fd, batch[i].handle, 0, 4096, PROT_WRITE);
151 gem_set_domain(fd, batch[i].handle,
152 I915_GEM_DOMAIN_CPU, I915_GEM_DOMAIN_CPU);
153
154 for (int j = 0; j < 64; j++) {
155 unsigned x = (n * CHUNK_SIZE) % 4096 >> 2;
156 unsigned y = (n * CHUNK_SIZE) / 4096;
157 uint32_t *b = base + 16 * j;
158 int k = 0;
159
160 b[k] = COPY_BLT_CMD | BLT_WRITE_ARGB;
161 if (has_64bit_reloc)
162 b[k] += 2;
163 k++;
164 b[k++] = 0xcc << 16 | 1 << 25 | 1 << 24 | 4096;
165 b[k++] = (y << 16) | x;
166 b[k++] = ((y+1) << 16) | (x + (CHUNK_SIZE >> 2));
167 b[k++] = dst_offset;
168 if (has_64bit_reloc)
169 b[k++] = dst_offset >> 32;
170 b[k++] = (y << 16) | x;
171 b[k++] = 4096;
172 b[k++] = src_offset;
173 if (has_64bit_reloc)
174 b[k++] = src_offset >> 32;
175 b[k++] = MI_BATCH_BUFFER_END;
176
177 n++;
178 }
179
180 munmap(base, 4096);
181 }
182
183 for (int pass = 0; pass < 256; pass++) {
184 int domain = mode ? I915_GEM_DOMAIN_GTT : I915_GEM_DOMAIN_CPU;
185 gem_set_domain(fd, src, domain, domain);
186
187 if (pass == 0) {
188 for (i = 0; i < OBJECT_SIZE/4; i++)
189 s[i] = i;
190 }
191
192 /* Now copy from the src to the dst in 32byte chunks */
193 for (offset = 0; offset < OBJECT_SIZE; offset += CHUNK_SIZE) {
194 int b;
195
196 if (pass) {
197 if (sync)
198 gem_set_domain(fd, src, domain, domain);
199 for (i = 0; i < CHUNK_SIZE/4; i++)
200 s[offset/4 + i] = (OBJECT_SIZE*pass + offset)/4 + i;
201 }
202
203 igt_assert(exec[DST].flags & EXEC_OBJECT_WRITE);
204
205 b = offset / CHUNK_SIZE / 64;
206 n = offset / CHUNK_SIZE % 64;
207 exec[BATCH].relocs_ptr = to_user_pointer((reloc + 2*n));
208 exec[BATCH].handle = batch[b].handle;
209 exec[BATCH].offset = batch[b].offset;
210 execbuf.batch_start_offset = 64*n;
211
212 gem_execbuf(fd, &execbuf);
213 igt_assert_eq_u64(__src_offset, src_offset);
214 igt_assert_eq_u64(__dst_offset, dst_offset);
215
216 batch[b].offset = exec[BATCH].offset;
217 }
218
219 gem_set_domain(fd, dst, I915_GEM_DOMAIN_CPU, 0);
220 for (offset = 0; offset < OBJECT_SIZE/4; offset++)
221 igt_assert_eq(pass*OBJECT_SIZE/4 + offset, d[offset]);
222 }
223
224 for (i = 0; i < OBJECT_SIZE / CHUNK_SIZE / 64; i++)
225 gem_close(fd, batch[i].handle);
226 free(batch);
227
228 munmap(s, OBJECT_SIZE);
229 gem_close(fd, src);
230 munmap(d, OBJECT_SIZE);
231 gem_close(fd, dst);
232 }
233
test_batch(int fd,int mode,int reverse)234 static void test_batch(int fd, int mode, int reverse)
235 {
236 const int has_64bit_reloc = intel_gen(intel_get_drm_devid(fd)) >= 8;
237 struct drm_i915_gem_execbuffer2 execbuf;
238 struct drm_i915_gem_exec_object2 exec[3];
239 struct drm_i915_gem_relocation_entry reloc[2];
240 uint32_t tmp[] = { MI_BATCH_BUFFER_END };
241 uint64_t __src_offset, __dst_offset;
242 bool need_64b_start_offset = true;
243 uint64_t batch_size;
244 uint32_t *s, *d;
245 uint32_t *base;
246 uint32_t offset;
247
248 memset(exec, 0, sizeof(exec));
249 exec[DST].handle = gem_create(fd, OBJECT_SIZE);
250 exec[SRC].handle = gem_create(fd, OBJECT_SIZE);
251
252 s = gem_mmap__wc(fd, src, 0, OBJECT_SIZE, PROT_READ | PROT_WRITE);
253
254 d = gem_mmap__cpu(fd, dst, 0, OBJECT_SIZE, PROT_READ);
255
256 memset(reloc, 0, sizeof(reloc));
257 reloc[0].offset = 4 * sizeof(uint32_t);
258 reloc[0].delta = 0;
259 reloc[0].target_handle = execbuf.flags & LOCAL_I915_EXEC_HANDLE_LUT ? DST : dst;
260 reloc[0].presumed_offset = dst_offset;
261 reloc[0].read_domains = I915_GEM_DOMAIN_RENDER;
262 reloc[0].write_domain = I915_GEM_DOMAIN_RENDER;
263
264 reloc[1].offset = 7 * sizeof(uint32_t);
265 if (has_64bit_reloc)
266 reloc[1].offset += sizeof(uint32_t);
267 reloc[1].delta = 0;
268 reloc[1].target_handle = execbuf.flags & LOCAL_I915_EXEC_HANDLE_LUT ? SRC : src;
269 reloc[1].presumed_offset = src_offset;
270 reloc[1].read_domains = I915_GEM_DOMAIN_RENDER;
271 reloc[1].write_domain = 0;
272
273 batch_size = ALIGN(OBJECT_SIZE / CHUNK_SIZE * 128, 4096);
274 exec[BATCH].relocs_ptr = to_user_pointer(reloc);
275 exec[BATCH].relocation_count = 2;
276 exec[BATCH].handle = gem_create(fd, batch_size);
277
278 switch (mode) {
279 case 0: /* cpu/snoop */
280 igt_require(gem_has_llc(fd));
281 base = gem_mmap__cpu(fd, exec[BATCH].handle, 0, batch_size,
282 PROT_READ | PROT_WRITE);
283 break;
284 case 1: /* gtt */
285 base = gem_mmap__gtt(fd, exec[BATCH].handle, batch_size,
286 PROT_READ | PROT_WRITE);
287 break;
288 case 2: /* wc */
289 base = gem_mmap__wc(fd, exec[BATCH].handle, 0, batch_size,
290 PROT_READ | PROT_WRITE);
291 break;
292 }
293 *base = 0; /* fault the object into the mappable range first */
294
295 gem_write(fd, exec[BATCH].handle, 0, tmp, sizeof(tmp));
296 memset(&execbuf, 0, sizeof(execbuf));
297 execbuf.buffers_ptr = to_user_pointer(exec);
298 execbuf.buffer_count = 3;
299 execbuf.flags = LOCAL_I915_EXEC_HANDLE_LUT;
300 if (gem_has_blt(fd))
301 execbuf.flags |= I915_EXEC_BLT;
302 if (__gem_execbuf(fd, &execbuf)) {
303 execbuf.flags &= ~LOCAL_I915_EXEC_HANDLE_LUT;
304 gem_execbuf(fd, &execbuf);
305 }
306 execbuf.flags |= I915_EXEC_NO_RELOC;
307 exec[DST].flags = EXEC_OBJECT_WRITE;
308 /* We assume that the active objects are fixed to avoid relocations */
309 exec[BATCH].relocation_count = 0;
310 __src_offset = src_offset;
311 __dst_offset = dst_offset;
312
313 offset = mode ? I915_GEM_DOMAIN_GTT : I915_GEM_DOMAIN_CPU;
314 gem_set_domain(fd, exec[BATCH].handle, offset, offset);
315 for (int pass = 0; pass < 256; pass++) {
316 gem_set_domain(fd, src, I915_GEM_DOMAIN_GTT, I915_GEM_DOMAIN_GTT);
317 for (offset = 0; offset < OBJECT_SIZE/4; offset++)
318 s[offset] = OBJECT_SIZE*pass/4 + offset;
319
320 /* Now copy from the src to the dst in 32byte chunks */
321 for (offset = 0; offset < OBJECT_SIZE / CHUNK_SIZE; offset++) {
322 unsigned x = (offset * CHUNK_SIZE) % 4096 >> 2;
323 unsigned y = (offset * CHUNK_SIZE) / 4096;
324 int k;
325
326 execbuf.batch_start_offset = 128 * offset;
327 if (!need_64b_start_offset)
328 execbuf.batch_start_offset += 8 * (pass & 7);
329 igt_assert(execbuf.batch_start_offset <= batch_size - 64);
330 if (reverse)
331 execbuf.batch_start_offset = batch_size - execbuf.batch_start_offset - 64;
332 igt_assert(execbuf.batch_start_offset <= batch_size - 64);
333 k = execbuf.batch_start_offset / 4;
334
335 base[k] = COPY_BLT_CMD | BLT_WRITE_ARGB;
336 if (has_64bit_reloc)
337 base[k] += 2;
338 k++;
339 base[k++] = 0xcc << 16 | 1 << 25 | 1 << 24 | 4096;
340 base[k++] = (y << 16) | x;
341 base[k++] = ((y+1) << 16) | (x + (CHUNK_SIZE >> 2));
342 base[k++] = dst_offset;
343 if (has_64bit_reloc)
344 base[k++] = dst_offset >> 32;
345 base[k++] = (y << 16) | x;
346 base[k++] = 4096;
347 base[k++] = src_offset;
348 if (has_64bit_reloc)
349 base[k++] = src_offset >> 32;
350 base[k++] = MI_BATCH_BUFFER_END;
351
352 igt_assert(exec[DST].flags & EXEC_OBJECT_WRITE);
353 gem_execbuf(fd, &execbuf);
354 igt_assert_eq_u64(__src_offset, src_offset);
355 igt_assert_eq_u64(__dst_offset, dst_offset);
356 }
357
358 gem_set_domain(fd, dst, I915_GEM_DOMAIN_CPU, 0);
359 for (offset = 0; offset < OBJECT_SIZE/4; offset++)
360 igt_assert_eq(pass*OBJECT_SIZE/4 + offset, d[offset]);
361 }
362
363 munmap(base, OBJECT_SIZE / CHUNK_SIZE * 128);
364 gem_close(fd, exec[BATCH].handle);
365
366 munmap(s, OBJECT_SIZE);
367 gem_close(fd, src);
368 munmap(d, OBJECT_SIZE);
369 gem_close(fd, dst);
370 }
371
372 igt_main
373 {
374 int fd, sync;
375
376 igt_fixture {
377 fd = drm_open_driver(DRIVER_INTEL);
378 igt_require_gem(fd);
379 }
380
381 for (sync = 2; sync--; ) {
382 igt_subtest_f("cpu%s", sync ? "-sync":"")
383 test_streaming(fd, 0, sync);
384 igt_subtest_f("gtt%s", sync ? "-sync":"")
385 test_streaming(fd, 1, sync);
386 igt_subtest_f("wc%s", sync ? "-sync":"")
387 test_streaming(fd, 2, sync);
388 }
389
390 igt_subtest("batch-cpu")
391 test_batch(fd, 0, 0);
392 igt_subtest("batch-gtt")
393 test_batch(fd, 1, 0);
394 igt_subtest("batch-wc")
395 test_batch(fd, 2, 0);
396 igt_subtest("batch-reverse-cpu")
397 test_batch(fd, 0, 1);
398 igt_subtest("batch-reverse-gtt")
399 test_batch(fd, 1, 1);
400 igt_subtest("batch-reverse-wc")
401 test_batch(fd, 2, 1);
402
403 igt_fixture
404 close(fd);
405 }
406