xref: /aosp_15_r20/external/igt-gpu-tools/tests/i915/gem_set_tiling_vs_blt.c (revision d83cc019efdc2edc6c4b16e9034a3ceb8d35d77c)
1 /*
2  * Copyright © 2012 Intel Corporation
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21  * IN THE SOFTWARE.
22  *
23  * Authors:
24  *    Daniel Vetter <[email protected]>
25  *
26  */
27 
28 /** @file gem_set_tiling_vs_blt.c
29  *
30  * Testcase: Check for proper synchronization of tiling changes vs. tiled gpu
31  * access
32  *
33  * The blitter on gen3 and earlier needs properly set up fences. Which also
34  * means that for untiled blits we may not set up a fence before that blt has
35  * finished.
36  *
37  * Current kernels have a bug there, but it's pretty hard to hit because you
38  * need:
39  * - a blt on an untiled object which is aligned correctly for tiling.
40  * - a set_tiling to switch that object to tiling
41  * - another blt without any intervening cpu access that uses this object.
42  *
43  * Testcase has been extended to also check tiled->untiled and tiled->tiled
44  * transitions (i.e. changing stride).
45  */
46 
47 #include "igt.h"
48 #include <stdlib.h>
49 #include <stdio.h>
50 #include <string.h>
51 #include <fcntl.h>
52 #include <inttypes.h>
53 #include <errno.h>
54 #include <sys/stat.h>
55 #include <sys/time.h>
56 #include <stdbool.h>
57 #include "drm.h"
58 #include "intel_bufmgr.h"
59 
60 IGT_TEST_DESCRIPTION("Check for proper synchronization of tiling changes vs."
61 		     " tiled gpu access.");
62 
63 static drm_intel_bufmgr *bufmgr;
64 struct intel_batchbuffer *batch;
65 uint32_t devid;
66 
67 #define TEST_SIZE (1024*1024)
68 #define TEST_STRIDE (4*1024)
69 #define TEST_HEIGHT(stride)	(TEST_SIZE/(stride))
70 #define TEST_WIDTH(stride)	((stride)/4)
71 
72 uint32_t data[TEST_SIZE/4];
73 
do_test(uint32_t tiling,unsigned stride,uint32_t tiling_after,unsigned stride_after)74 static void do_test(uint32_t tiling, unsigned stride,
75 		    uint32_t tiling_after, unsigned stride_after)
76 {
77 	drm_intel_bo *busy_bo, *test_bo, *target_bo;
78 	int i, ret;
79 	uint32_t *ptr;
80 	uint32_t test_bo_handle;
81 	uint32_t blt_stride, blt_bits;
82 	bool tiling_changed = false;
83 
84 	igt_info("filling ring .. ");
85 	busy_bo = drm_intel_bo_alloc(bufmgr, "busy bo bo", 16*1024*1024, 4096);
86 
87 	for (i = 0; i < 250; i++) {
88 		BLIT_COPY_BATCH_START(0);
89 		OUT_BATCH((3 << 24) | /* 32 bits */
90 			  (0xcc << 16) | /* copy ROP */
91 			  2*1024*4);
92 		OUT_BATCH(0 << 16 | 1024);
93 		OUT_BATCH((2048) << 16 | (2048));
94 		OUT_RELOC_FENCED(busy_bo, I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER, 0);
95 		OUT_BATCH(0 << 16 | 0);
96 		OUT_BATCH(2*1024*4);
97 		OUT_RELOC_FENCED(busy_bo, I915_GEM_DOMAIN_RENDER, 0, 0);
98 		ADVANCE_BATCH();
99 
100 		if (batch->gen >= 6) {
101 			BEGIN_BATCH(3, 0);
102 			OUT_BATCH(XY_SETUP_CLIP_BLT_CMD);
103 			OUT_BATCH(0);
104 			OUT_BATCH(0);
105 			ADVANCE_BATCH();
106 		}
107 	}
108 	intel_batchbuffer_flush(batch);
109 
110 	igt_info("playing tricks .. ");
111 	/* first allocate the target so it gets out of the way of playing funky
112 	 * tricks */
113 	target_bo = drm_intel_bo_alloc(bufmgr, "target bo", TEST_SIZE, 4096);
114 
115 	/* allocate buffer with parameters _after_ transition we want to check
116 	 * and touch it, so that it's properly aligned in the gtt. */
117 	test_bo = drm_intel_bo_alloc(bufmgr, "tiled busy bo", TEST_SIZE, 4096);
118 	test_bo_handle = test_bo->handle;
119 	ret = drm_intel_bo_set_tiling(test_bo, &tiling_after, stride_after);
120 	igt_assert_eq(ret, 0);
121 	drm_intel_gem_bo_map_gtt(test_bo);
122 	ptr = test_bo->virtual;
123 	*ptr = 0;
124 	ptr = NULL;
125 	drm_intel_gem_bo_unmap_gtt(test_bo);
126 
127 	drm_intel_bo_unreference(test_bo);
128 
129 	test_bo = NULL;
130 
131 	/* note we need a bo bigger than batches, otherwise the buffer reuse
132 	 * trick will fail. */
133 	test_bo = drm_intel_bo_alloc(bufmgr, "busy bo", TEST_SIZE, 4096);
134 	/* double check that the reuse trick worked */
135 	igt_assert(test_bo_handle == test_bo->handle);
136 
137 	test_bo_handle = test_bo->handle;
138 	/* ensure we have the right tiling before we start. */
139 	ret = drm_intel_bo_set_tiling(test_bo, &tiling, stride);
140 	igt_assert_eq(ret, 0);
141 
142 	if (tiling == I915_TILING_NONE) {
143 		drm_intel_bo_subdata(test_bo, 0, TEST_SIZE, data);
144 	} else {
145 		drm_intel_gem_bo_map_gtt(test_bo);
146 		ptr = test_bo->virtual;
147 		memcpy(ptr, data, TEST_SIZE);
148 		ptr = NULL;
149 		drm_intel_gem_bo_unmap_gtt(test_bo);
150 	}
151 
152 	blt_stride = stride;
153 	blt_bits = 0;
154 	if (intel_gen(devid) >= 4 && tiling != I915_TILING_NONE) {
155 		blt_stride /= 4;
156 		blt_bits = XY_SRC_COPY_BLT_SRC_TILED;
157 	}
158 
159 	BLIT_COPY_BATCH_START(blt_bits);
160 	OUT_BATCH((3 << 24) | /* 32 bits */
161 		  (0xcc << 16) | /* copy ROP */
162 		  stride);
163 	OUT_BATCH(0 << 16 | 0);
164 	OUT_BATCH((TEST_HEIGHT(stride)) << 16 | (TEST_WIDTH(stride)));
165 	OUT_RELOC_FENCED(target_bo, I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER, 0);
166 	OUT_BATCH(0 << 16 | 0);
167 	OUT_BATCH(blt_stride);
168 	OUT_RELOC_FENCED(test_bo, I915_GEM_DOMAIN_RENDER, 0, 0);
169 	ADVANCE_BATCH();
170 	intel_batchbuffer_flush(batch);
171 
172 	drm_intel_bo_unreference(test_bo);
173 
174 	test_bo = drm_intel_bo_alloc_for_render(bufmgr, "tiled busy bo", TEST_SIZE, 4096);
175 	/* double check that the reuse trick worked */
176 	igt_assert(test_bo_handle == test_bo->handle);
177 	ret = drm_intel_bo_set_tiling(test_bo, &tiling_after, stride_after);
178 	igt_assert_eq(ret, 0);
179 
180 	/* Note: We don't care about gen4+ here because the blitter doesn't use
181 	 * fences there. So not setting tiling flags on the tiled buffer is ok.
182 	 */
183 	BLIT_COPY_BATCH_START(0);
184 	OUT_BATCH((3 << 24) | /* 32 bits */
185 		  (0xcc << 16) | /* copy ROP */
186 		  stride_after);
187 	OUT_BATCH(0 << 16 | 0);
188 	OUT_BATCH((1) << 16 | (1));
189 	OUT_RELOC_FENCED(test_bo, I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER, 0);
190 	OUT_BATCH(0 << 16 | 0);
191 	OUT_BATCH(stride_after);
192 	OUT_RELOC_FENCED(test_bo, I915_GEM_DOMAIN_RENDER, 0, 0);
193 	ADVANCE_BATCH();
194 	intel_batchbuffer_flush(batch);
195 
196 	/* Now try to trick the kernel the kernel into changing up the fencing
197 	 * too early. */
198 
199 	igt_info("checking .. ");
200 	memset(data, 0, TEST_SIZE);
201 	drm_intel_bo_get_subdata(target_bo, 0, TEST_SIZE, data);
202 	for (i = 0; i < TEST_SIZE/4; i++)
203 		igt_assert(data[i] == i);
204 
205 	/* check whether tiling on the test_bo actually changed. */
206 	drm_intel_gem_bo_map_gtt(test_bo);
207 	ptr = test_bo->virtual;
208 	for (i = 0; i < TEST_SIZE/4; i++)
209 		if (ptr[i] != data[i])
210 			tiling_changed = true;
211 	ptr = NULL;
212 	drm_intel_gem_bo_unmap_gtt(test_bo);
213 	igt_assert(tiling_changed);
214 
215 	drm_intel_bo_unreference(test_bo);
216 	drm_intel_bo_unreference(target_bo);
217 	drm_intel_bo_unreference(busy_bo);
218 	igt_info("done\n");
219 }
220 
221 int fd;
222 
223 igt_main
224 {
225 	int i;
226 	uint32_t tiling, tiling_after;
227 
228 	igt_skip_on_simulation();
229 
230 	igt_fixture {
231 		for (i = 0; i < 1024*256; i++)
232 			data[i] = i;
233 
234 		fd = drm_open_driver(DRIVER_INTEL);
235 		igt_require_gem(fd);
236 
237 		bufmgr = drm_intel_bufmgr_gem_init(fd, 4096);
238 		drm_intel_bufmgr_gem_enable_reuse(bufmgr);
239 		devid = intel_get_drm_devid(fd);
240 		batch = intel_batchbuffer_alloc(bufmgr, devid);
241 	}
242 
243 	igt_subtest("untiled-to-tiled") {
244 		tiling = I915_TILING_NONE;
245 		tiling_after = I915_TILING_X;
246 		do_test(tiling, TEST_STRIDE, tiling_after, TEST_STRIDE);
247 		igt_assert(tiling == I915_TILING_NONE);
248 		igt_assert(tiling_after == I915_TILING_X);
249 	}
250 
251 	igt_subtest("tiled-to-untiled") {
252 		tiling = I915_TILING_X;
253 		tiling_after = I915_TILING_NONE;
254 		do_test(tiling, TEST_STRIDE, tiling_after, TEST_STRIDE);
255 		igt_assert(tiling == I915_TILING_X);
256 		igt_assert(tiling_after == I915_TILING_NONE);
257 	}
258 
259 	igt_subtest("tiled-to-tiled") {
260 		tiling = I915_TILING_X;
261 		tiling_after = I915_TILING_X;
262 		do_test(tiling, TEST_STRIDE/2, tiling_after, TEST_STRIDE);
263 		igt_assert(tiling == I915_TILING_X);
264 		igt_assert(tiling_after == I915_TILING_X);
265 	}
266 }
267