1 /*
2 * Copyright © 2019 Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 */
23
24 #include <fcntl.h>
25 #include <string.h>
26 #include <xf86drm.h>
27 #include <sys/mman.h>
28
29 #include <gtest/gtest.h>
30
31 #include "c99_compat.h"
32 #include "common/intel_gem.h"
33 #include "dev/intel_device_info.h"
34 #include "intel_gem.h"
35 #include "isl/isl.h"
36 #include "drm-uapi/i915_drm.h"
37 #include "genxml/gen_macros.h"
38 #include "util/macros.h"
39
40 class mi_builder_test;
41
42 struct address {
43 uint32_t gem_handle;
44 uint32_t offset;
45 };
46
47 #define __gen_address_type struct address
48 #define __gen_user_data ::mi_builder_test
49
50 uint64_t __gen_combine_address(mi_builder_test *test, void *location,
51 struct address addr, uint32_t delta);
52 void * __gen_get_batch_dwords(mi_builder_test *test, unsigned num_dwords);
53 struct address __gen_get_batch_address(mi_builder_test *test,
54 void *location);
55 bool *__gen_get_write_fencing_status(mi_builder_test *test);
56
57 struct address
__gen_address_offset(address addr,uint64_t offset)58 __gen_address_offset(address addr, uint64_t offset)
59 {
60 addr.offset += offset;
61 return addr;
62 }
63
64 #if GFX_VERx10 >= 75
65 #define RSVD_TEMP_REG 0x2678 /* MI_ALU_REG15 */
66 #else
67 #define RSVD_TEMP_REG 0x2430 /* GFX7_3DPRIM_START_VERTEX */
68 #endif
69 #define MI_BUILDER_NUM_ALLOC_GPRS 15
70 #define INPUT_DATA_OFFSET 0
71 #define OUTPUT_DATA_OFFSET 2048
72
73 #define MI_BUILDER_CAN_WRITE_BATCH GFX_VER >= 8
74
75 #define __genxml_cmd_length(cmd) cmd ## _length
76 #define __genxml_cmd_length_bias(cmd) cmd ## _length_bias
77 #define __genxml_cmd_header(cmd) cmd ## _header
78 #define __genxml_cmd_pack(cmd) cmd ## _pack
79
80 #include "genxml/genX_pack.h"
81 #include "mi_builder.h"
82
83 #define emit_cmd(cmd, name) \
84 for (struct cmd name = { __genxml_cmd_header(cmd) }, \
85 *_dst = (struct cmd *) emit_dwords(__genxml_cmd_length(cmd)); \
86 __builtin_expect(_dst != NULL, 1); \
87 __genxml_cmd_pack(cmd)(this, (void *)_dst, &name), _dst = NULL)
88
89 #include <vector>
90
91 class mi_builder_test : public ::testing::Test {
92 public:
93 mi_builder_test();
94 ~mi_builder_test();
95
96 void SetUp();
97
98 void *emit_dwords(int num_dwords);
99 void submit_batch();
100
in_addr(uint32_t offset)101 inline address in_addr(uint32_t offset)
102 {
103 address addr;
104 addr.gem_handle = data_bo_handle;
105 addr.offset = INPUT_DATA_OFFSET + offset;
106 return addr;
107 }
108
out_addr(uint32_t offset)109 inline address out_addr(uint32_t offset)
110 {
111 address addr;
112 addr.gem_handle = data_bo_handle;
113 addr.offset = OUTPUT_DATA_OFFSET + offset;
114 return addr;
115 }
116
in_mem64(uint32_t offset)117 inline mi_value in_mem64(uint32_t offset)
118 {
119 return mi_mem64(in_addr(offset));
120 }
121
in_mem32(uint32_t offset)122 inline mi_value in_mem32(uint32_t offset)
123 {
124 return mi_mem32(in_addr(offset));
125 }
126
out_mem64(uint32_t offset)127 inline mi_value out_mem64(uint32_t offset)
128 {
129 return mi_mem64(out_addr(offset));
130 }
131
out_mem32(uint32_t offset)132 inline mi_value out_mem32(uint32_t offset)
133 {
134 return mi_mem32(out_addr(offset));
135 }
136
137 int fd;
138 uint32_t ctx_id;
139 intel_device_info devinfo;
140
141 uint32_t batch_bo_handle;
142 #if GFX_VER >= 8
143 uint64_t batch_bo_addr;
144 #endif
145 uint32_t batch_offset;
146 void *batch_map;
147
148 #if GFX_VER < 8
149 std::vector<drm_i915_gem_relocation_entry> relocs;
150 #endif
151
152 uint32_t data_bo_handle;
153 #if GFX_VER >= 8
154 uint64_t data_bo_addr;
155 #endif
156 void *data_map;
157 char *input;
158 char *output;
159 uint64_t canary;
160
161 bool write_fence_status;
162
163 mi_builder b;
164 };
165
mi_builder_test()166 mi_builder_test::mi_builder_test() :
167 fd(-1)
168 { }
169
~mi_builder_test()170 mi_builder_test::~mi_builder_test()
171 {
172 close(fd);
173 }
174
175 // 1 MB of batch should be enough for anyone, right?
176 #define BATCH_BO_SIZE (256 * 4096)
177 #define DATA_BO_SIZE 4096
178
179 void
SetUp()180 mi_builder_test::SetUp()
181 {
182 drmDevicePtr devices[8];
183 int max_devices = drmGetDevices2(0, devices, 8);
184
185 int i;
186 for (i = 0; i < max_devices; i++) {
187 if (devices[i]->available_nodes & 1 << DRM_NODE_RENDER &&
188 devices[i]->bustype == DRM_BUS_PCI &&
189 devices[i]->deviceinfo.pci->vendor_id == 0x8086) {
190 fd = open(devices[i]->nodes[DRM_NODE_RENDER], O_RDWR | O_CLOEXEC);
191 if (fd < 0)
192 continue;
193
194 /* We don't really need to do this when running on hardware because
195 * we can just pull it from the drmDevice. However, without doing
196 * this, intel_dump_gpu gets a bit of heartburn and we can't use the
197 * --device option with it.
198 */
199 int device_id;
200 ASSERT_TRUE(intel_gem_get_param(fd, I915_PARAM_CHIPSET_ID, &device_id))
201 << strerror(errno);
202
203 ASSERT_TRUE(intel_get_device_info_from_fd(fd, &devinfo, -1, -1));
204 if (devinfo.ver != GFX_VER ||
205 (devinfo.platform == INTEL_PLATFORM_HSW) != (GFX_VERx10 == 75)) {
206 close(fd);
207 fd = -1;
208 continue;
209 }
210
211
212 /* Found a device! */
213 break;
214 }
215 }
216 ASSERT_TRUE(i < max_devices) << "Failed to find a DRM device";
217
218 ASSERT_TRUE(intel_gem_create_context(fd, &ctx_id)) << strerror(errno);
219
220 if (GFX_VER >= 8) {
221 /* On gfx8+, we require softpin */
222 int has_softpin;
223 ASSERT_TRUE(intel_gem_get_param(fd, I915_PARAM_HAS_EXEC_SOFTPIN, &has_softpin))
224 << strerror(errno);
225 ASSERT_TRUE(has_softpin);
226 }
227
228 // Create the batch buffer
229 drm_i915_gem_create gem_create = drm_i915_gem_create();
230 gem_create.size = BATCH_BO_SIZE;
231 ASSERT_EQ(drmIoctl(fd, DRM_IOCTL_I915_GEM_CREATE,
232 (void *)&gem_create), 0) << strerror(errno);
233 batch_bo_handle = gem_create.handle;
234 #if GFX_VER >= 8
235 batch_bo_addr = 0xffffffffdff70000ULL;
236 #endif
237
238 if (devinfo.has_caching_uapi) {
239 drm_i915_gem_caching gem_caching = drm_i915_gem_caching();
240 gem_caching.handle = batch_bo_handle;
241 gem_caching.caching = I915_CACHING_CACHED;
242 ASSERT_EQ(drmIoctl(fd, DRM_IOCTL_I915_GEM_SET_CACHING,
243 (void *)&gem_caching), 0) << strerror(errno);
244 }
245
246 if (devinfo.has_mmap_offset) {
247 drm_i915_gem_mmap_offset gem_mmap_offset = drm_i915_gem_mmap_offset();
248 gem_mmap_offset.handle = batch_bo_handle;
249 gem_mmap_offset.flags = devinfo.has_local_mem ?
250 I915_MMAP_OFFSET_FIXED :
251 I915_MMAP_OFFSET_WC;
252 ASSERT_EQ(drmIoctl(fd, DRM_IOCTL_I915_GEM_MMAP_OFFSET,
253 &gem_mmap_offset), 0) << strerror(errno);
254
255 batch_map = mmap(NULL, BATCH_BO_SIZE, PROT_READ | PROT_WRITE, MAP_SHARED,
256 fd, gem_mmap_offset.offset);
257 ASSERT_NE(batch_map, MAP_FAILED) << strerror(errno);
258 } else {
259 drm_i915_gem_mmap gem_mmap = drm_i915_gem_mmap();
260 gem_mmap.handle = batch_bo_handle;
261 gem_mmap.offset = 0;
262 gem_mmap.size = BATCH_BO_SIZE;
263 gem_mmap.flags = 0;
264 ASSERT_EQ(drmIoctl(fd, DRM_IOCTL_I915_GEM_MMAP,
265 (void *)&gem_mmap), 0) << strerror(errno);
266 batch_map = (void *)(uintptr_t)gem_mmap.addr_ptr;
267 }
268
269 // Start the batch at zero
270 batch_offset = 0;
271
272 // Create the data buffer
273 gem_create = drm_i915_gem_create();
274 gem_create.size = DATA_BO_SIZE;
275 ASSERT_EQ(drmIoctl(fd, DRM_IOCTL_I915_GEM_CREATE,
276 (void *)&gem_create), 0) << strerror(errno);
277 data_bo_handle = gem_create.handle;
278 #if GFX_VER >= 8
279 data_bo_addr = 0xffffffffefff0000ULL;
280 #endif
281
282 if (devinfo.has_caching_uapi) {
283 drm_i915_gem_caching gem_caching = drm_i915_gem_caching();
284 gem_caching.handle = data_bo_handle;
285 gem_caching.caching = I915_CACHING_CACHED;
286 ASSERT_EQ(drmIoctl(fd, DRM_IOCTL_I915_GEM_SET_CACHING,
287 (void *)&gem_caching), 0) << strerror(errno);
288 }
289
290 if (devinfo.has_mmap_offset) {
291 drm_i915_gem_mmap_offset gem_mmap_offset = drm_i915_gem_mmap_offset();
292 gem_mmap_offset.handle = data_bo_handle;
293 gem_mmap_offset.flags = devinfo.has_local_mem ?
294 I915_MMAP_OFFSET_FIXED :
295 I915_MMAP_OFFSET_WC;
296 ASSERT_EQ(drmIoctl(fd, DRM_IOCTL_I915_GEM_MMAP_OFFSET,
297 &gem_mmap_offset), 0) << strerror(errno);
298
299 data_map = mmap(NULL, DATA_BO_SIZE, PROT_READ | PROT_WRITE, MAP_SHARED,
300 fd, gem_mmap_offset.offset);
301 ASSERT_NE(data_map, MAP_FAILED) << strerror(errno);
302 } else {
303 drm_i915_gem_mmap gem_mmap = drm_i915_gem_mmap();
304 gem_mmap.handle = data_bo_handle;
305 gem_mmap.offset = 0;
306 gem_mmap.size = DATA_BO_SIZE;
307 gem_mmap.flags = 0;
308 ASSERT_EQ(drmIoctl(fd, DRM_IOCTL_I915_GEM_MMAP,
309 (void *)&gem_mmap), 0) << strerror(errno);
310 data_map = (void *)(uintptr_t)gem_mmap.addr_ptr;
311 }
312
313 input = (char *)data_map + INPUT_DATA_OFFSET;
314 output = (char *)data_map + OUTPUT_DATA_OFFSET;
315
316 // Fill the test data with garbage
317 memset(data_map, 139, DATA_BO_SIZE);
318 memset(&canary, 139, sizeof(canary));
319
320 write_fence_status = false;
321
322 struct isl_device isl_dev;
323 isl_device_init(&isl_dev, &devinfo);
324 mi_builder_init(&b, &devinfo, this);
325 const uint32_t mocs = isl_mocs(&isl_dev, 0, false);
326 mi_builder_set_mocs(&b, mocs);
327 }
328
329 void *
emit_dwords(int num_dwords)330 mi_builder_test::emit_dwords(int num_dwords)
331 {
332 void *ptr = (void *)((char *)batch_map + batch_offset);
333 batch_offset += num_dwords * 4;
334 assert(batch_offset < BATCH_BO_SIZE);
335 return ptr;
336 }
337
338 void
submit_batch()339 mi_builder_test::submit_batch()
340 {
341 mi_builder_emit(&b, GENX(MI_BATCH_BUFFER_END), bbe);
342
343 // Round batch up to an even number of dwords.
344 if (batch_offset & 4)
345 mi_builder_emit(&b, GENX(MI_NOOP), noop);
346
347 drm_i915_gem_exec_object2 objects[2];
348 memset(objects, 0, sizeof(objects));
349
350 objects[0].handle = data_bo_handle;
351 objects[0].relocation_count = 0;
352 objects[0].relocs_ptr = 0;
353 #if GFX_VER >= 8 /* On gfx8+, we pin everything */
354 objects[0].flags = EXEC_OBJECT_SUPPORTS_48B_ADDRESS |
355 EXEC_OBJECT_PINNED |
356 EXEC_OBJECT_WRITE;
357 objects[0].offset = data_bo_addr;
358 #else
359 objects[0].flags = EXEC_OBJECT_WRITE;
360 objects[0].offset = -1;
361 #endif
362
363 objects[1].handle = batch_bo_handle;
364 #if GFX_VER >= 8 /* On gfx8+, we don't use relocations */
365 objects[1].relocation_count = 0;
366 objects[1].relocs_ptr = 0;
367 objects[1].flags = EXEC_OBJECT_SUPPORTS_48B_ADDRESS |
368 EXEC_OBJECT_PINNED;
369 objects[1].offset = batch_bo_addr;
370 #else
371 objects[1].relocation_count = relocs.size();
372 objects[1].relocs_ptr = (uintptr_t)(void *)&relocs[0];
373 objects[1].flags = 0;
374 objects[1].offset = -1;
375 #endif
376
377 drm_i915_gem_execbuffer2 execbuf = drm_i915_gem_execbuffer2();
378 execbuf.buffers_ptr = (uintptr_t)(void *)objects;
379 execbuf.buffer_count = 2;
380 execbuf.batch_start_offset = 0;
381 execbuf.batch_len = batch_offset;
382 execbuf.flags = I915_EXEC_HANDLE_LUT | I915_EXEC_RENDER;
383 execbuf.rsvd1 = ctx_id;
384
385 ASSERT_EQ(drmIoctl(fd, DRM_IOCTL_I915_GEM_EXECBUFFER2,
386 (void *)&execbuf), 0) << strerror(errno);
387
388 drm_i915_gem_wait gem_wait = drm_i915_gem_wait();
389 gem_wait.bo_handle = batch_bo_handle;
390 gem_wait.timeout_ns = INT64_MAX;
391 ASSERT_EQ(drmIoctl(fd, DRM_IOCTL_I915_GEM_WAIT,
392 (void *)&gem_wait), 0) << strerror(errno);
393 }
394
395 uint64_t
__gen_combine_address(mi_builder_test * test,void * location,address addr,uint32_t delta)396 __gen_combine_address(mi_builder_test *test, void *location,
397 address addr, uint32_t delta)
398 {
399 #if GFX_VER >= 8
400 uint64_t addr_u64 = addr.gem_handle == test->data_bo_handle ?
401 test->data_bo_addr : test->batch_bo_addr;
402 return addr_u64 + addr.offset + delta;
403 #else
404 drm_i915_gem_relocation_entry reloc = drm_i915_gem_relocation_entry();
405 reloc.target_handle = addr.gem_handle == test->data_bo_handle ? 0 : 1;
406 reloc.delta = addr.offset + delta;
407 reloc.offset = (char *)location - (char *)test->batch_map;
408 reloc.presumed_offset = -1;
409 test->relocs.push_back(reloc);
410
411 return reloc.delta;
412 #endif
413 }
414
415 bool *
__gen_get_write_fencing_status(mi_builder_test * test)416 __gen_get_write_fencing_status(mi_builder_test *test)
417 {
418 return &test->write_fence_status;
419 }
420
421 void *
__gen_get_batch_dwords(mi_builder_test * test,unsigned num_dwords)422 __gen_get_batch_dwords(mi_builder_test *test, unsigned num_dwords)
423 {
424 return test->emit_dwords(num_dwords);
425 }
426
427 struct address
__gen_get_batch_address(mi_builder_test * test,void * location)428 __gen_get_batch_address(mi_builder_test *test, void *location)
429 {
430 assert(location >= test->batch_map);
431 size_t offset = (char *)location - (char *)test->batch_map;
432 assert(offset < BATCH_BO_SIZE);
433 assert(offset <= UINT32_MAX);
434
435 return (struct address) {
436 .gem_handle = test->batch_bo_handle,
437 .offset = (uint32_t)offset,
438 };
439 }
440
441 #include "genxml/genX_pack.h"
442 #include "mi_builder.h"
443
TEST_F(mi_builder_test,imm_mem)444 TEST_F(mi_builder_test, imm_mem)
445 {
446 const uint64_t value = 0x0123456789abcdef;
447
448 mi_store(&b, out_mem64(0), mi_imm(value));
449 mi_store(&b, out_mem32(8), mi_imm(value));
450
451 submit_batch();
452
453 // 64 -> 64
454 EXPECT_EQ(*(uint64_t *)(output + 0), value);
455
456 // 64 -> 32
457 EXPECT_EQ(*(uint32_t *)(output + 8), (uint32_t)value);
458 EXPECT_EQ(*(uint32_t *)(output + 12), (uint32_t)canary);
459 }
460
461 /* mem -> mem copies are only supported on HSW+ */
462 #if GFX_VERx10 >= 75
TEST_F(mi_builder_test,mem_mem)463 TEST_F(mi_builder_test, mem_mem)
464 {
465 const uint64_t value = 0x0123456789abcdef;
466 *(uint64_t *)input = value;
467
468 mi_store(&b, out_mem64(0), in_mem64(0));
469 mi_store(&b, out_mem32(8), in_mem64(0));
470 mi_store(&b, out_mem32(16), in_mem32(0));
471 mi_store(&b, out_mem64(24), in_mem32(0));
472
473 submit_batch();
474
475 // 64 -> 64
476 EXPECT_EQ(*(uint64_t *)(output + 0), value);
477
478 // 64 -> 32
479 EXPECT_EQ(*(uint32_t *)(output + 8), (uint32_t)value);
480 EXPECT_EQ(*(uint32_t *)(output + 12), (uint32_t)canary);
481
482 // 32 -> 32
483 EXPECT_EQ(*(uint32_t *)(output + 16), (uint32_t)value);
484 EXPECT_EQ(*(uint32_t *)(output + 20), (uint32_t)canary);
485
486 // 32 -> 64
487 EXPECT_EQ(*(uint64_t *)(output + 24), (uint64_t)(uint32_t)value);
488 }
489 #endif
490
TEST_F(mi_builder_test,imm_reg)491 TEST_F(mi_builder_test, imm_reg)
492 {
493 const uint64_t value = 0x0123456789abcdef;
494
495 mi_store(&b, mi_reg64(RSVD_TEMP_REG), mi_imm(canary));
496 mi_store(&b, mi_reg64(RSVD_TEMP_REG), mi_imm(value));
497 mi_store(&b, out_mem64(0), mi_reg64(RSVD_TEMP_REG));
498
499 mi_store(&b, mi_reg64(RSVD_TEMP_REG), mi_imm(canary));
500 mi_store(&b, mi_reg32(RSVD_TEMP_REG), mi_imm(value));
501 mi_store(&b, out_mem64(8), mi_reg64(RSVD_TEMP_REG));
502
503 submit_batch();
504
505 // 64 -> 64
506 EXPECT_EQ(*(uint64_t *)(output + 0), value);
507
508 // 64 -> 32
509 EXPECT_EQ(*(uint32_t *)(output + 8), (uint32_t)value);
510 EXPECT_EQ(*(uint32_t *)(output + 12), (uint32_t)canary);
511 }
512
TEST_F(mi_builder_test,mem_reg)513 TEST_F(mi_builder_test, mem_reg)
514 {
515 const uint64_t value = 0x0123456789abcdef;
516 *(uint64_t *)input = value;
517
518 mi_store(&b, mi_reg64(RSVD_TEMP_REG), mi_imm(canary));
519 mi_store(&b, mi_reg64(RSVD_TEMP_REG), in_mem64(0));
520 mi_store(&b, out_mem64(0), mi_reg64(RSVD_TEMP_REG));
521
522 mi_store(&b, mi_reg64(RSVD_TEMP_REG), mi_imm(canary));
523 mi_store(&b, mi_reg32(RSVD_TEMP_REG), in_mem64(0));
524 mi_store(&b, out_mem64(8), mi_reg64(RSVD_TEMP_REG));
525
526 mi_store(&b, mi_reg64(RSVD_TEMP_REG), mi_imm(canary));
527 mi_store(&b, mi_reg32(RSVD_TEMP_REG), in_mem32(0));
528 mi_store(&b, out_mem64(16), mi_reg64(RSVD_TEMP_REG));
529
530 mi_store(&b, mi_reg64(RSVD_TEMP_REG), mi_imm(canary));
531 mi_store(&b, mi_reg64(RSVD_TEMP_REG), in_mem32(0));
532 mi_store(&b, out_mem64(24), mi_reg64(RSVD_TEMP_REG));
533
534 submit_batch();
535
536 // 64 -> 64
537 EXPECT_EQ(*(uint64_t *)(output + 0), value);
538
539 // 64 -> 32
540 EXPECT_EQ(*(uint32_t *)(output + 8), (uint32_t)value);
541 EXPECT_EQ(*(uint32_t *)(output + 12), (uint32_t)canary);
542
543 // 32 -> 32
544 EXPECT_EQ(*(uint32_t *)(output + 16), (uint32_t)value);
545 EXPECT_EQ(*(uint32_t *)(output + 20), (uint32_t)canary);
546
547 // 32 -> 64
548 EXPECT_EQ(*(uint64_t *)(output + 24), (uint64_t)(uint32_t)value);
549 }
550
TEST_F(mi_builder_test,memset)551 TEST_F(mi_builder_test, memset)
552 {
553 const unsigned memset_size = 256;
554
555 mi_memset(&b, out_addr(0), 0xdeadbeef, memset_size);
556
557 submit_batch();
558
559 uint32_t *out_u32 = (uint32_t *)output;
560 for (unsigned i = 0; i < memset_size / sizeof(*out_u32); i++)
561 EXPECT_EQ(out_u32[i], 0xdeadbeef);
562 }
563
TEST_F(mi_builder_test,memcpy)564 TEST_F(mi_builder_test, memcpy)
565 {
566 const unsigned memcpy_size = 256;
567
568 uint8_t *in_u8 = (uint8_t *)input;
569 for (unsigned i = 0; i < memcpy_size; i++)
570 in_u8[i] = i;
571
572 mi_memcpy(&b, out_addr(0), in_addr(0), 256);
573
574 submit_batch();
575
576 uint8_t *out_u8 = (uint8_t *)output;
577 for (unsigned i = 0; i < memcpy_size; i++)
578 EXPECT_EQ(out_u8[i], i);
579 }
580
581 /* Start of MI_MATH section */
582 #if GFX_VERx10 >= 75
583
584 #define EXPECT_EQ_IMM(x, imm) EXPECT_EQ(x, mi_value_to_u64(imm))
585
TEST_F(mi_builder_test,inot)586 TEST_F(mi_builder_test, inot)
587 {
588 const uint64_t value = 0x0123456789abcdef;
589 const uint32_t value_lo = (uint32_t)value;
590 const uint32_t value_hi = (uint32_t)(value >> 32);
591 memcpy(input, &value, sizeof(value));
592
593 mi_store(&b, out_mem64(0), mi_inot(&b, in_mem64(0)));
594 mi_store(&b, out_mem64(8), mi_inot(&b, mi_inot(&b, in_mem64(0))));
595 mi_store(&b, out_mem64(16), mi_inot(&b, in_mem32(0)));
596 mi_store(&b, out_mem64(24), mi_inot(&b, in_mem32(4)));
597 mi_store(&b, out_mem32(32), mi_inot(&b, in_mem64(0)));
598 mi_store(&b, out_mem32(36), mi_inot(&b, in_mem32(0)));
599 mi_store(&b, out_mem32(40), mi_inot(&b, mi_inot(&b, in_mem32(0))));
600 mi_store(&b, out_mem32(44), mi_inot(&b, in_mem32(4)));
601
602 submit_batch();
603
604 EXPECT_EQ(*(uint64_t *)(output + 0), ~value);
605 EXPECT_EQ(*(uint64_t *)(output + 8), value);
606 EXPECT_EQ(*(uint64_t *)(output + 16), ~(uint64_t)value_lo);
607 EXPECT_EQ(*(uint64_t *)(output + 24), ~(uint64_t)value_hi);
608 EXPECT_EQ(*(uint32_t *)(output + 32), (uint32_t)~value);
609 EXPECT_EQ(*(uint32_t *)(output + 36), (uint32_t)~value_lo);
610 EXPECT_EQ(*(uint32_t *)(output + 40), (uint32_t)value_lo);
611 EXPECT_EQ(*(uint32_t *)(output + 44), (uint32_t)~value_hi);
612 }
613
614 /* Test adding of immediates of all kinds including
615 *
616 * - All zeroes
617 * - All ones
618 * - inverted constants
619 */
TEST_F(mi_builder_test,add_imm)620 TEST_F(mi_builder_test, add_imm)
621 {
622 const uint64_t value = 0x0123456789abcdef;
623 const uint64_t add = 0xdeadbeefac0ffee2;
624 memcpy(input, &value, sizeof(value));
625
626 mi_store(&b, out_mem64(0),
627 mi_iadd(&b, in_mem64(0), mi_imm(0)));
628 mi_store(&b, out_mem64(8),
629 mi_iadd(&b, in_mem64(0), mi_imm(-1)));
630 mi_store(&b, out_mem64(16),
631 mi_iadd(&b, in_mem64(0), mi_inot(&b, mi_imm(0))));
632 mi_store(&b, out_mem64(24),
633 mi_iadd(&b, in_mem64(0), mi_inot(&b, mi_imm(-1))));
634 mi_store(&b, out_mem64(32),
635 mi_iadd(&b, in_mem64(0), mi_imm(add)));
636 mi_store(&b, out_mem64(40),
637 mi_iadd(&b, in_mem64(0), mi_inot(&b, mi_imm(add))));
638 mi_store(&b, out_mem64(48),
639 mi_iadd(&b, mi_imm(0), in_mem64(0)));
640 mi_store(&b, out_mem64(56),
641 mi_iadd(&b, mi_imm(-1), in_mem64(0)));
642 mi_store(&b, out_mem64(64),
643 mi_iadd(&b, mi_inot(&b, mi_imm(0)), in_mem64(0)));
644 mi_store(&b, out_mem64(72),
645 mi_iadd(&b, mi_inot(&b, mi_imm(-1)), in_mem64(0)));
646 mi_store(&b, out_mem64(80),
647 mi_iadd(&b, mi_imm(add), in_mem64(0)));
648 mi_store(&b, out_mem64(88),
649 mi_iadd(&b, mi_inot(&b, mi_imm(add)), in_mem64(0)));
650
651 // And some add_imm just for good measure
652 mi_store(&b, out_mem64(96), mi_iadd_imm(&b, in_mem64(0), 0));
653 mi_store(&b, out_mem64(104), mi_iadd_imm(&b, in_mem64(0), add));
654
655 submit_batch();
656
657 EXPECT_EQ(*(uint64_t *)(output + 0), value);
658 EXPECT_EQ(*(uint64_t *)(output + 8), value - 1);
659 EXPECT_EQ(*(uint64_t *)(output + 16), value - 1);
660 EXPECT_EQ(*(uint64_t *)(output + 24), value);
661 EXPECT_EQ(*(uint64_t *)(output + 32), value + add);
662 EXPECT_EQ(*(uint64_t *)(output + 40), value + ~add);
663 EXPECT_EQ(*(uint64_t *)(output + 48), value);
664 EXPECT_EQ(*(uint64_t *)(output + 56), value - 1);
665 EXPECT_EQ(*(uint64_t *)(output + 64), value - 1);
666 EXPECT_EQ(*(uint64_t *)(output + 72), value);
667 EXPECT_EQ(*(uint64_t *)(output + 80), value + add);
668 EXPECT_EQ(*(uint64_t *)(output + 88), value + ~add);
669 EXPECT_EQ(*(uint64_t *)(output + 96), value);
670 EXPECT_EQ(*(uint64_t *)(output + 104), value + add);
671 }
672
TEST_F(mi_builder_test,ult_uge_ieq_ine)673 TEST_F(mi_builder_test, ult_uge_ieq_ine)
674 {
675 uint64_t values[8] = {
676 0x0123456789abcdef,
677 0xdeadbeefac0ffee2,
678 (uint64_t)-1,
679 1,
680 0,
681 1049571,
682 (uint64_t)-240058,
683 20204184,
684 };
685 memcpy(input, values, sizeof(values));
686
687 for (unsigned i = 0; i < ARRAY_SIZE(values); i++) {
688 for (unsigned j = 0; j < ARRAY_SIZE(values); j++) {
689 mi_store(&b, out_mem64(i * 256 + j * 32 + 0),
690 mi_ult(&b, in_mem64(i * 8), in_mem64(j * 8)));
691 mi_store(&b, out_mem64(i * 256 + j * 32 + 8),
692 mi_uge(&b, in_mem64(i * 8), in_mem64(j * 8)));
693 mi_store(&b, out_mem64(i * 256 + j * 32 + 16),
694 mi_ieq(&b, in_mem64(i * 8), in_mem64(j * 8)));
695 mi_store(&b, out_mem64(i * 256 + j * 32 + 24),
696 mi_ine(&b, in_mem64(i * 8), in_mem64(j * 8)));
697 }
698 }
699
700 submit_batch();
701
702 for (unsigned i = 0; i < ARRAY_SIZE(values); i++) {
703 for (unsigned j = 0; j < ARRAY_SIZE(values); j++) {
704 uint64_t *out_u64 = (uint64_t *)(output + i * 256 + j * 32);
705 EXPECT_EQ_IMM(out_u64[0], mi_ult(&b, mi_imm(values[i]),
706 mi_imm(values[j])));
707 EXPECT_EQ_IMM(out_u64[1], mi_uge(&b, mi_imm(values[i]),
708 mi_imm(values[j])));
709 EXPECT_EQ_IMM(out_u64[2], mi_ieq(&b, mi_imm(values[i]),
710 mi_imm(values[j])));
711 EXPECT_EQ_IMM(out_u64[3], mi_ine(&b, mi_imm(values[i]),
712 mi_imm(values[j])));
713 }
714 }
715 }
716
TEST_F(mi_builder_test,z_nz)717 TEST_F(mi_builder_test, z_nz)
718 {
719 uint64_t values[8] = {
720 0,
721 1,
722 UINT32_MAX,
723 UINT32_MAX + 1,
724 UINT64_MAX,
725 };
726 memcpy(input, values, sizeof(values));
727
728 for (unsigned i = 0; i < ARRAY_SIZE(values); i++) {
729 mi_store(&b, out_mem64(i * 16 + 0), mi_nz(&b, in_mem64(i * 8)));
730 mi_store(&b, out_mem64(i * 16 + 8), mi_z(&b, in_mem64(i * 8)));
731 }
732
733 submit_batch();
734
735 for (unsigned i = 0; i < ARRAY_SIZE(values); i++) {
736 uint64_t *out_u64 = (uint64_t *)(output + i * 16);
737 EXPECT_EQ_IMM(out_u64[0], mi_nz(&b, mi_imm(values[i])));
738 EXPECT_EQ_IMM(out_u64[1], mi_z(&b, mi_imm(values[i])));
739 }
740 }
741
TEST_F(mi_builder_test,iand)742 TEST_F(mi_builder_test, iand)
743 {
744 const uint64_t values[2] = {
745 0x0123456789abcdef,
746 0xdeadbeefac0ffee2,
747 };
748 memcpy(input, values, sizeof(values));
749
750 mi_store(&b, out_mem64(0), mi_iand(&b, in_mem64(0), in_mem64(8)));
751
752 submit_batch();
753
754 EXPECT_EQ_IMM(*(uint64_t *)output, mi_iand(&b, mi_imm(values[0]),
755 mi_imm(values[1])));
756 }
757
758 #if GFX_VER >= 8
TEST_F(mi_builder_test,imm_mem_relocated)759 TEST_F(mi_builder_test, imm_mem_relocated)
760 {
761 const uint64_t value = 0x0123456789abcdef;
762
763 struct mi_reloc_imm_token r0 = mi_store_relocated_imm(&b, out_mem64(0));
764 struct mi_reloc_imm_token r1 = mi_store_relocated_imm(&b, out_mem32(8));
765
766 mi_relocate_store_imm(r0, value);
767 mi_relocate_store_imm(r1, value);
768
769 submit_batch();
770
771 // 64 -> 64
772 EXPECT_EQ(*(uint64_t *)(output + 0), value);
773
774 // 64 -> 32
775 EXPECT_EQ(*(uint32_t *)(output + 8), (uint32_t)value);
776 EXPECT_EQ(*(uint32_t *)(output + 12), (uint32_t)canary);
777 }
778
TEST_F(mi_builder_test,imm_reg_relocated)779 TEST_F(mi_builder_test, imm_reg_relocated)
780 {
781 const uint64_t value = 0x0123456789abcdef;
782
783 struct mi_reloc_imm_token r0, r1;
784
785 r0 = mi_store_relocated_imm(&b, mi_reg64(RSVD_TEMP_REG));
786 r1 = mi_store_relocated_imm(&b, mi_reg64(RSVD_TEMP_REG));
787 mi_store(&b, out_mem64(0), mi_reg64(RSVD_TEMP_REG));
788
789 mi_relocate_store_imm(r0, canary);
790 mi_relocate_store_imm(r1, value);
791
792 r0 = mi_store_relocated_imm(&b, mi_reg64(RSVD_TEMP_REG));
793 r1 = mi_store_relocated_imm(&b, mi_reg32(RSVD_TEMP_REG));
794 mi_store(&b, out_mem64(8), mi_reg64(RSVD_TEMP_REG));
795
796 mi_relocate_store_imm(r0, canary);
797 mi_relocate_store_imm(r1, value);
798
799 submit_batch();
800
801 // 64 -> 64
802 EXPECT_EQ(*(uint64_t *)(output + 0), value);
803
804 // 64 -> 32
805 EXPECT_EQ(*(uint32_t *)(output + 8), (uint32_t)value);
806 EXPECT_EQ(*(uint32_t *)(output + 12), (uint32_t)canary);
807 }
808 #endif // GFX_VER >= 8
809
810 #if GFX_VERx10 >= 125
TEST_F(mi_builder_test,ishl)811 TEST_F(mi_builder_test, ishl)
812 {
813 const uint64_t value = 0x0123456789abcdef;
814 memcpy(input, &value, sizeof(value));
815
816 uint32_t shifts[] = { 0, 1, 2, 4, 8, 16, 32 };
817 memcpy(input + 8, shifts, sizeof(shifts));
818
819 for (unsigned i = 0; i < ARRAY_SIZE(shifts); i++) {
820 mi_store(&b, out_mem64(i * 8),
821 mi_ishl(&b, in_mem64(0), in_mem32(8 + i * 4)));
822 }
823
824 submit_batch();
825
826 for (unsigned i = 0; i < ARRAY_SIZE(shifts); i++) {
827 EXPECT_EQ_IMM(*(uint64_t *)(output + i * 8),
828 mi_ishl(&b, mi_imm(value), mi_imm(shifts[i])));
829 }
830 }
831
TEST_F(mi_builder_test,ushr)832 TEST_F(mi_builder_test, ushr)
833 {
834 const uint64_t value = 0x0123456789abcdef;
835 memcpy(input, &value, sizeof(value));
836
837 uint32_t shifts[] = { 0, 1, 2, 4, 8, 16, 32 };
838 memcpy(input + 8, shifts, sizeof(shifts));
839
840 for (unsigned i = 0; i < ARRAY_SIZE(shifts); i++) {
841 mi_store(&b, out_mem64(i * 8),
842 mi_ushr(&b, in_mem64(0), in_mem32(8 + i * 4)));
843 }
844
845 submit_batch();
846
847 for (unsigned i = 0; i < ARRAY_SIZE(shifts); i++) {
848 EXPECT_EQ_IMM(*(uint64_t *)(output + i * 8),
849 mi_ushr(&b, mi_imm(value), mi_imm(shifts[i])));
850 }
851 }
852
TEST_F(mi_builder_test,ushr_imm)853 TEST_F(mi_builder_test, ushr_imm)
854 {
855 const uint64_t value = 0x0123456789abcdef;
856 memcpy(input, &value, sizeof(value));
857
858 const unsigned max_shift = 64;
859
860 for (unsigned i = 0; i <= max_shift; i++)
861 mi_store(&b, out_mem64(i * 8), mi_ushr_imm(&b, in_mem64(0), i));
862
863 submit_batch();
864
865 for (unsigned i = 0; i <= max_shift; i++) {
866 EXPECT_EQ_IMM(*(uint64_t *)(output + i * 8),
867 mi_ushr_imm(&b, mi_imm(value), i));
868 }
869 }
870
TEST_F(mi_builder_test,ishr)871 TEST_F(mi_builder_test, ishr)
872 {
873 const uint64_t values[] = {
874 0x0123456789abcdef,
875 0xfedcba9876543210,
876 };
877 memcpy(input, values, sizeof(values));
878
879 uint32_t shifts[] = { 0, 1, 2, 4, 8, 16, 32 };
880 memcpy(input + 16, shifts, sizeof(shifts));
881
882 for (unsigned i = 0; i < ARRAY_SIZE(values); i++) {
883 for (unsigned j = 0; j < ARRAY_SIZE(shifts); j++) {
884 mi_store(&b, out_mem64(i * 8 + j * 16),
885 mi_ishr(&b, in_mem64(i * 8), in_mem32(16 + j * 4)));
886 }
887 }
888
889 submit_batch();
890
891 for (unsigned i = 0; i < ARRAY_SIZE(values); i++) {
892 for (unsigned j = 0; j < ARRAY_SIZE(shifts); j++) {
893 EXPECT_EQ_IMM(*(uint64_t *)(output + i * 8 + j * 16),
894 mi_ishr(&b, mi_imm(values[i]), mi_imm(shifts[j])));
895 }
896 }
897 }
898
TEST_F(mi_builder_test,ishr_imm)899 TEST_F(mi_builder_test, ishr_imm)
900 {
901 const uint64_t value = 0x0123456789abcdef;
902 memcpy(input, &value, sizeof(value));
903
904 const unsigned max_shift = 64;
905
906 for (unsigned i = 0; i <= max_shift; i++)
907 mi_store(&b, out_mem64(i * 8), mi_ishr_imm(&b, in_mem64(0), i));
908
909 submit_batch();
910
911 for (unsigned i = 0; i <= max_shift; i++) {
912 EXPECT_EQ_IMM(*(uint64_t *)(output + i * 8),
913 mi_ishr_imm(&b, mi_imm(value), i));
914 }
915 }
916 #endif /* if GFX_VERx10 >= 125 */
917
TEST_F(mi_builder_test,imul_imm)918 TEST_F(mi_builder_test, imul_imm)
919 {
920 uint64_t lhs[2] = {
921 0x0123456789abcdef,
922 0xdeadbeefac0ffee2,
923 };
924 memcpy(input, lhs, sizeof(lhs));
925
926 /* Some random 32-bit unsigned integers. The first four have been
927 * hand-chosen just to ensure some good low integers; the rest were
928 * generated with a python script.
929 */
930 uint32_t rhs[20] = {
931 1, 2, 3, 5,
932 10800, 193, 64, 40,
933 3796, 256, 88, 473,
934 1421, 706, 175, 850,
935 39, 38985, 1941, 17,
936 };
937
938 for (unsigned i = 0; i < ARRAY_SIZE(lhs); i++) {
939 for (unsigned j = 0; j < ARRAY_SIZE(rhs); j++) {
940 mi_store(&b, out_mem64(i * 160 + j * 8),
941 mi_imul_imm(&b, in_mem64(i * 8), rhs[j]));
942 }
943 }
944
945 submit_batch();
946
947 for (unsigned i = 0; i < ARRAY_SIZE(lhs); i++) {
948 for (unsigned j = 0; j < ARRAY_SIZE(rhs); j++) {
949 EXPECT_EQ_IMM(*(uint64_t *)(output + i * 160 + j * 8),
950 mi_imul_imm(&b, mi_imm(lhs[i]), rhs[j]));
951 }
952 }
953 }
954
TEST_F(mi_builder_test,ishl_imm)955 TEST_F(mi_builder_test, ishl_imm)
956 {
957 const uint64_t value = 0x0123456789abcdef;
958 memcpy(input, &value, sizeof(value));
959
960 const unsigned max_shift = 64;
961
962 for (unsigned i = 0; i <= max_shift; i++)
963 mi_store(&b, out_mem64(i * 8), mi_ishl_imm(&b, in_mem64(0), i));
964
965 submit_batch();
966
967 for (unsigned i = 0; i <= max_shift; i++) {
968 EXPECT_EQ_IMM(*(uint64_t *)(output + i * 8),
969 mi_ishl_imm(&b, mi_imm(value), i));
970 }
971 }
972
TEST_F(mi_builder_test,ushr32_imm)973 TEST_F(mi_builder_test, ushr32_imm)
974 {
975 const uint64_t value = 0x0123456789abcdef;
976 memcpy(input, &value, sizeof(value));
977
978 const unsigned max_shift = 64;
979
980 for (unsigned i = 0; i <= max_shift; i++)
981 mi_store(&b, out_mem64(i * 8), mi_ushr32_imm(&b, in_mem64(0), i));
982
983 submit_batch();
984
985 for (unsigned i = 0; i <= max_shift; i++) {
986 EXPECT_EQ_IMM(*(uint64_t *)(output + i * 8),
987 mi_ushr32_imm(&b, mi_imm(value), i));
988 }
989 }
990
TEST_F(mi_builder_test,udiv32_imm)991 TEST_F(mi_builder_test, udiv32_imm)
992 {
993 /* Some random 32-bit unsigned integers. The first four have been
994 * hand-chosen just to ensure some good low integers; the rest were
995 * generated with a python script.
996 */
997 uint32_t values[20] = {
998 1, 2, 3, 5,
999 10800, 193, 64, 40,
1000 3796, 256, 88, 473,
1001 1421, 706, 175, 850,
1002 39, 38985, 1941, 17,
1003 };
1004 memcpy(input, values, sizeof(values));
1005
1006 for (unsigned i = 0; i < ARRAY_SIZE(values); i++) {
1007 for (unsigned j = 0; j < ARRAY_SIZE(values); j++) {
1008 mi_store(&b, out_mem32(i * 80 + j * 4),
1009 mi_udiv32_imm(&b, in_mem32(i * 4), values[j]));
1010 }
1011 }
1012
1013 submit_batch();
1014
1015 for (unsigned i = 0; i < ARRAY_SIZE(values); i++) {
1016 for (unsigned j = 0; j < ARRAY_SIZE(values); j++) {
1017 EXPECT_EQ_IMM(*(uint32_t *)(output + i * 80 + j * 4),
1018 mi_udiv32_imm(&b, mi_imm(values[i]), values[j]));
1019 }
1020 }
1021 }
1022
TEST_F(mi_builder_test,store_if)1023 TEST_F(mi_builder_test, store_if)
1024 {
1025 uint64_t u64 = 0xb453b411deadc0deull;
1026 uint32_t u32 = 0x1337d00d;
1027
1028 /* Write values with the predicate enabled */
1029 emit_cmd(GENX(MI_PREDICATE), mip) {
1030 mip.LoadOperation = LOAD_LOAD;
1031 mip.CombineOperation = COMBINE_SET;
1032 mip.CompareOperation = COMPARE_TRUE;
1033 }
1034
1035 mi_store_if(&b, out_mem64(0), mi_imm(u64));
1036 mi_store_if(&b, out_mem32(8), mi_imm(u32));
1037
1038 /* Set predicate to false, write garbage that shouldn't land */
1039 emit_cmd(GENX(MI_PREDICATE), mip) {
1040 mip.LoadOperation = LOAD_LOAD;
1041 mip.CombineOperation = COMBINE_SET;
1042 mip.CompareOperation = COMPARE_FALSE;
1043 }
1044
1045 mi_store_if(&b, out_mem64(0), mi_imm(0xd0d0d0d0d0d0d0d0ull));
1046 mi_store_if(&b, out_mem32(8), mi_imm(0xc000c000));
1047
1048 submit_batch();
1049
1050 EXPECT_EQ(*(uint64_t *)(output + 0), u64);
1051 EXPECT_EQ(*(uint32_t *)(output + 8), u32);
1052 EXPECT_EQ(*(uint32_t *)(output + 12), (uint32_t)canary);
1053 }
1054
1055 #endif /* GFX_VERx10 >= 75 */
1056
1057 #if GFX_VERx10 >= 125
1058
1059 /*
1060 * Indirect load/store tests. Only available on XE_HP+
1061 */
1062
TEST_F(mi_builder_test,load_mem64_offset)1063 TEST_F(mi_builder_test, load_mem64_offset)
1064 {
1065 uint64_t values[8] = {
1066 0x0123456789abcdef,
1067 0xdeadbeefac0ffee2,
1068 (uint64_t)-1,
1069 1,
1070 0,
1071 1049571,
1072 (uint64_t)-240058,
1073 20204184,
1074 };
1075 memcpy(input, values, sizeof(values));
1076
1077 uint32_t offsets[8] = { 0, 40, 24, 48, 56, 8, 32, 16 };
1078 memcpy(input + 64, offsets, sizeof(offsets));
1079
1080 for (unsigned i = 0; i < ARRAY_SIZE(offsets); i++) {
1081 mi_store(&b, out_mem64(i * 8),
1082 mi_load_mem64_offset(&b, in_addr(0), in_mem32(i * 4 + 64)));
1083 }
1084
1085 submit_batch();
1086
1087 for (unsigned i = 0; i < ARRAY_SIZE(offsets); i++)
1088 EXPECT_EQ(*(uint64_t *)(output + i * 8), values[offsets[i] / 8]);
1089 }
1090
TEST_F(mi_builder_test,store_mem64_offset)1091 TEST_F(mi_builder_test, store_mem64_offset)
1092 {
1093 uint64_t values[8] = {
1094 0x0123456789abcdef,
1095 0xdeadbeefac0ffee2,
1096 (uint64_t)-1,
1097 1,
1098 0,
1099 1049571,
1100 (uint64_t)-240058,
1101 20204184,
1102 };
1103 memcpy(input, values, sizeof(values));
1104
1105 uint32_t offsets[8] = { 0, 40, 24, 48, 56, 8, 32, 16 };
1106 memcpy(input + 64, offsets, sizeof(offsets));
1107
1108 for (unsigned i = 0; i < ARRAY_SIZE(offsets); i++) {
1109 mi_store_mem64_offset(&b, out_addr(0), in_mem32(i * 4 + 64),
1110 in_mem64(i * 8));
1111 }
1112
1113 submit_batch();
1114
1115 for (unsigned i = 0; i < ARRAY_SIZE(offsets); i++)
1116 EXPECT_EQ(*(uint64_t *)(output + offsets[i]), values[i]);
1117 }
1118
1119 #endif /* GFX_VERx10 >= 125 */
1120
1121 #if GFX_VER >= 9
1122
1123 /*
1124 * Control-flow tests. Only available on Gfx9+
1125 */
1126
TEST_F(mi_builder_test,goto)1127 TEST_F(mi_builder_test, goto)
1128 {
1129 const uint64_t value = 0xb453b411deadc0deull;
1130
1131 mi_store(&b, out_mem64(0), mi_imm(value));
1132
1133 struct mi_goto_target t = MI_GOTO_TARGET_INIT;
1134 mi_goto(&b, &t);
1135
1136 /* This one should be skipped */
1137 mi_store(&b, out_mem64(0), mi_imm(0));
1138
1139 mi_goto_target(&b, &t);
1140
1141 submit_batch();
1142
1143 EXPECT_EQ(*(uint64_t *)(output + 0), value);
1144 }
1145
1146 #define MI_PREDICATE_RESULT 0x2418
1147
TEST_F(mi_builder_test,goto_if)1148 TEST_F(mi_builder_test, goto_if)
1149 {
1150 const uint64_t values[] = {
1151 0xb453b411deadc0deull,
1152 0x0123456789abcdefull,
1153 0,
1154 };
1155
1156 mi_store(&b, out_mem64(0), mi_imm(values[0]));
1157
1158 emit_cmd(GENX(MI_PREDICATE), mip) {
1159 mip.LoadOperation = LOAD_LOAD;
1160 mip.CombineOperation = COMBINE_SET;
1161 mip.CompareOperation = COMPARE_FALSE;
1162 }
1163
1164 struct mi_goto_target t = MI_GOTO_TARGET_INIT;
1165 mi_goto_if(&b, mi_reg32(MI_PREDICATE_RESULT), &t);
1166
1167 mi_store(&b, out_mem64(0), mi_imm(values[1]));
1168
1169 emit_cmd(GENX(MI_PREDICATE), mip) {
1170 mip.LoadOperation = LOAD_LOAD;
1171 mip.CombineOperation = COMBINE_SET;
1172 mip.CompareOperation = COMPARE_TRUE;
1173 }
1174
1175 mi_goto_if(&b, mi_reg32(MI_PREDICATE_RESULT), &t);
1176
1177 /* This one should be skipped */
1178 mi_store(&b, out_mem64(0), mi_imm(values[2]));
1179
1180 mi_goto_target(&b, &t);
1181
1182 submit_batch();
1183
1184 EXPECT_EQ(*(uint64_t *)(output + 0), values[1]);
1185 }
1186
TEST_F(mi_builder_test,loop_simple)1187 TEST_F(mi_builder_test, loop_simple)
1188 {
1189 const uint64_t loop_count = 8;
1190
1191 mi_store(&b, out_mem64(0), mi_imm(0));
1192
1193 mi_loop(&b) {
1194 mi_break_if(&b, mi_uge(&b, out_mem64(0), mi_imm(loop_count)));
1195
1196 mi_store(&b, out_mem64(0), mi_iadd_imm(&b, out_mem64(0), 1));
1197 }
1198
1199 submit_batch();
1200
1201 EXPECT_EQ(*(uint64_t *)(output + 0), loop_count);
1202 }
1203
TEST_F(mi_builder_test,loop_break)1204 TEST_F(mi_builder_test, loop_break)
1205 {
1206 mi_loop(&b) {
1207 mi_store(&b, out_mem64(0), mi_imm(1));
1208
1209 mi_break_if(&b, mi_imm(0));
1210
1211 mi_store(&b, out_mem64(0), mi_imm(2));
1212
1213 mi_break(&b);
1214
1215 mi_store(&b, out_mem64(0), mi_imm(3));
1216 }
1217
1218 submit_batch();
1219
1220 EXPECT_EQ(*(uint64_t *)(output + 0), 2);
1221 }
1222
TEST_F(mi_builder_test,loop_continue)1223 TEST_F(mi_builder_test, loop_continue)
1224 {
1225 const uint64_t loop_count = 8;
1226
1227 mi_store(&b, out_mem64(0), mi_imm(0));
1228 mi_store(&b, out_mem64(8), mi_imm(0));
1229
1230 mi_loop(&b) {
1231 mi_break_if(&b, mi_uge(&b, out_mem64(0), mi_imm(loop_count)));
1232
1233 mi_store(&b, out_mem64(0), mi_iadd_imm(&b, out_mem64(0), 1));
1234 mi_store(&b, out_mem64(8), mi_imm(5));
1235
1236 mi_continue(&b);
1237
1238 mi_store(&b, out_mem64(8), mi_imm(10));
1239 }
1240
1241 submit_batch();
1242
1243 EXPECT_EQ(*(uint64_t *)(output + 0), loop_count);
1244 EXPECT_EQ(*(uint64_t *)(output + 8), 5);
1245 }
1246
TEST_F(mi_builder_test,loop_continue_if)1247 TEST_F(mi_builder_test, loop_continue_if)
1248 {
1249 const uint64_t loop_count = 8;
1250
1251 mi_store(&b, out_mem64(0), mi_imm(0));
1252 mi_store(&b, out_mem64(8), mi_imm(0));
1253
1254 mi_loop(&b) {
1255 mi_break_if(&b, mi_uge(&b, out_mem64(0), mi_imm(loop_count)));
1256
1257 mi_store(&b, out_mem64(0), mi_iadd_imm(&b, out_mem64(0), 1));
1258 mi_store(&b, out_mem64(8), mi_imm(5));
1259
1260 emit_cmd(GENX(MI_PREDICATE), mip) {
1261 mip.LoadOperation = LOAD_LOAD;
1262 mip.CombineOperation = COMBINE_SET;
1263 mip.CompareOperation = COMPARE_FALSE;
1264 }
1265
1266 mi_continue_if(&b, mi_reg32(MI_PREDICATE_RESULT));
1267
1268 mi_store(&b, out_mem64(8), mi_imm(10));
1269
1270 emit_cmd(GENX(MI_PREDICATE), mip) {
1271 mip.LoadOperation = LOAD_LOAD;
1272 mip.CombineOperation = COMBINE_SET;
1273 mip.CompareOperation = COMPARE_TRUE;
1274 }
1275
1276 mi_continue_if(&b, mi_reg32(MI_PREDICATE_RESULT));
1277
1278 mi_store(&b, out_mem64(8), mi_imm(15));
1279 }
1280
1281 submit_batch();
1282
1283 EXPECT_EQ(*(uint64_t *)(output + 0), loop_count);
1284 EXPECT_EQ(*(uint64_t *)(output + 8), 10);
1285 }
1286 #endif /* GFX_VER >= 9 */
1287