xref: /aosp_15_r20/external/mesa3d/src/intel/common/tests/mi_builder_test.cpp (revision 6104692788411f58d303aa86923a9ff6ecaded22)
1 /*
2  * Copyright © 2019 Intel Corporation
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21  * IN THE SOFTWARE.
22  */
23 
24 #include <fcntl.h>
25 #include <string.h>
26 #include <xf86drm.h>
27 #include <sys/mman.h>
28 
29 #include <gtest/gtest.h>
30 
31 #include "c99_compat.h"
32 #include "common/intel_gem.h"
33 #include "dev/intel_device_info.h"
34 #include "intel_gem.h"
35 #include "isl/isl.h"
36 #include "drm-uapi/i915_drm.h"
37 #include "genxml/gen_macros.h"
38 #include "util/macros.h"
39 
40 class mi_builder_test;
41 
42 struct address {
43    uint32_t gem_handle;
44    uint32_t offset;
45 };
46 
47 #define __gen_address_type struct address
48 #define __gen_user_data ::mi_builder_test
49 
50 uint64_t __gen_combine_address(mi_builder_test *test, void *location,
51                                struct address addr, uint32_t delta);
52 void * __gen_get_batch_dwords(mi_builder_test *test, unsigned num_dwords);
53 struct address __gen_get_batch_address(mi_builder_test *test,
54                                        void *location);
55 bool *__gen_get_write_fencing_status(mi_builder_test *test);
56 
57 struct address
__gen_address_offset(address addr,uint64_t offset)58 __gen_address_offset(address addr, uint64_t offset)
59 {
60    addr.offset += offset;
61    return addr;
62 }
63 
64 #if GFX_VERx10 >= 75
65 #define RSVD_TEMP_REG 0x2678 /* MI_ALU_REG15 */
66 #else
67 #define RSVD_TEMP_REG 0x2430 /* GFX7_3DPRIM_START_VERTEX */
68 #endif
69 #define MI_BUILDER_NUM_ALLOC_GPRS 15
70 #define INPUT_DATA_OFFSET 0
71 #define OUTPUT_DATA_OFFSET 2048
72 
73 #define MI_BUILDER_CAN_WRITE_BATCH GFX_VER >= 8
74 
75 #define __genxml_cmd_length(cmd) cmd ## _length
76 #define __genxml_cmd_length_bias(cmd) cmd ## _length_bias
77 #define __genxml_cmd_header(cmd) cmd ## _header
78 #define __genxml_cmd_pack(cmd) cmd ## _pack
79 
80 #include "genxml/genX_pack.h"
81 #include "mi_builder.h"
82 
83 #define emit_cmd(cmd, name)                                           \
84    for (struct cmd name = { __genxml_cmd_header(cmd) },               \
85         *_dst = (struct cmd *) emit_dwords(__genxml_cmd_length(cmd)); \
86         __builtin_expect(_dst != NULL, 1);                            \
87         __genxml_cmd_pack(cmd)(this, (void *)_dst, &name), _dst = NULL)
88 
89 #include <vector>
90 
91 class mi_builder_test : public ::testing::Test {
92 public:
93    mi_builder_test();
94    ~mi_builder_test();
95 
96    void SetUp();
97 
98    void *emit_dwords(int num_dwords);
99    void submit_batch();
100 
in_addr(uint32_t offset)101    inline address in_addr(uint32_t offset)
102    {
103       address addr;
104       addr.gem_handle = data_bo_handle;
105       addr.offset = INPUT_DATA_OFFSET + offset;
106       return addr;
107    }
108 
out_addr(uint32_t offset)109    inline address out_addr(uint32_t offset)
110    {
111       address addr;
112       addr.gem_handle = data_bo_handle;
113       addr.offset = OUTPUT_DATA_OFFSET + offset;
114       return addr;
115    }
116 
in_mem64(uint32_t offset)117    inline mi_value in_mem64(uint32_t offset)
118    {
119       return mi_mem64(in_addr(offset));
120    }
121 
in_mem32(uint32_t offset)122    inline mi_value in_mem32(uint32_t offset)
123    {
124       return mi_mem32(in_addr(offset));
125    }
126 
out_mem64(uint32_t offset)127    inline mi_value out_mem64(uint32_t offset)
128    {
129       return mi_mem64(out_addr(offset));
130    }
131 
out_mem32(uint32_t offset)132    inline mi_value out_mem32(uint32_t offset)
133    {
134       return mi_mem32(out_addr(offset));
135    }
136 
137    int fd;
138    uint32_t ctx_id;
139    intel_device_info devinfo;
140 
141    uint32_t batch_bo_handle;
142 #if GFX_VER >= 8
143    uint64_t batch_bo_addr;
144 #endif
145    uint32_t batch_offset;
146    void *batch_map;
147 
148 #if GFX_VER < 8
149    std::vector<drm_i915_gem_relocation_entry> relocs;
150 #endif
151 
152    uint32_t data_bo_handle;
153 #if GFX_VER >= 8
154    uint64_t data_bo_addr;
155 #endif
156    void *data_map;
157    char *input;
158    char *output;
159    uint64_t canary;
160 
161    bool write_fence_status;
162 
163    mi_builder b;
164 };
165 
mi_builder_test()166 mi_builder_test::mi_builder_test() :
167   fd(-1)
168 { }
169 
~mi_builder_test()170 mi_builder_test::~mi_builder_test()
171 {
172    close(fd);
173 }
174 
175 // 1 MB of batch should be enough for anyone, right?
176 #define BATCH_BO_SIZE (256 * 4096)
177 #define DATA_BO_SIZE 4096
178 
179 void
SetUp()180 mi_builder_test::SetUp()
181 {
182    drmDevicePtr devices[8];
183    int max_devices = drmGetDevices2(0, devices, 8);
184 
185    int i;
186    for (i = 0; i < max_devices; i++) {
187       if (devices[i]->available_nodes & 1 << DRM_NODE_RENDER &&
188           devices[i]->bustype == DRM_BUS_PCI &&
189           devices[i]->deviceinfo.pci->vendor_id == 0x8086) {
190          fd = open(devices[i]->nodes[DRM_NODE_RENDER], O_RDWR | O_CLOEXEC);
191          if (fd < 0)
192             continue;
193 
194          /* We don't really need to do this when running on hardware because
195           * we can just pull it from the drmDevice.  However, without doing
196           * this, intel_dump_gpu gets a bit of heartburn and we can't use the
197           * --device option with it.
198           */
199          int device_id;
200          ASSERT_TRUE(intel_gem_get_param(fd, I915_PARAM_CHIPSET_ID, &device_id))
201                << strerror(errno);
202 
203          ASSERT_TRUE(intel_get_device_info_from_fd(fd, &devinfo, -1, -1));
204          if (devinfo.ver != GFX_VER ||
205              (devinfo.platform == INTEL_PLATFORM_HSW) != (GFX_VERx10 == 75)) {
206             close(fd);
207             fd = -1;
208             continue;
209          }
210 
211 
212          /* Found a device! */
213          break;
214       }
215    }
216    ASSERT_TRUE(i < max_devices) << "Failed to find a DRM device";
217 
218    ASSERT_TRUE(intel_gem_create_context(fd, &ctx_id)) << strerror(errno);
219 
220    if (GFX_VER >= 8) {
221       /* On gfx8+, we require softpin */
222       int has_softpin;
223       ASSERT_TRUE(intel_gem_get_param(fd, I915_PARAM_HAS_EXEC_SOFTPIN, &has_softpin))
224             << strerror(errno);
225       ASSERT_TRUE(has_softpin);
226    }
227 
228    // Create the batch buffer
229    drm_i915_gem_create gem_create = drm_i915_gem_create();
230    gem_create.size = BATCH_BO_SIZE;
231    ASSERT_EQ(drmIoctl(fd, DRM_IOCTL_I915_GEM_CREATE,
232                       (void *)&gem_create), 0) << strerror(errno);
233    batch_bo_handle = gem_create.handle;
234 #if GFX_VER >= 8
235    batch_bo_addr = 0xffffffffdff70000ULL;
236 #endif
237 
238    if (devinfo.has_caching_uapi) {
239       drm_i915_gem_caching gem_caching = drm_i915_gem_caching();
240       gem_caching.handle = batch_bo_handle;
241       gem_caching.caching = I915_CACHING_CACHED;
242       ASSERT_EQ(drmIoctl(fd, DRM_IOCTL_I915_GEM_SET_CACHING,
243                          (void *)&gem_caching), 0) << strerror(errno);
244    }
245 
246    if (devinfo.has_mmap_offset) {
247       drm_i915_gem_mmap_offset gem_mmap_offset = drm_i915_gem_mmap_offset();
248       gem_mmap_offset.handle = batch_bo_handle;
249       gem_mmap_offset.flags = devinfo.has_local_mem ?
250                               I915_MMAP_OFFSET_FIXED :
251                               I915_MMAP_OFFSET_WC;
252       ASSERT_EQ(drmIoctl(fd, DRM_IOCTL_I915_GEM_MMAP_OFFSET,
253                          &gem_mmap_offset), 0) << strerror(errno);
254 
255       batch_map = mmap(NULL, BATCH_BO_SIZE, PROT_READ | PROT_WRITE, MAP_SHARED,
256                        fd, gem_mmap_offset.offset);
257       ASSERT_NE(batch_map, MAP_FAILED) << strerror(errno);
258    } else {
259       drm_i915_gem_mmap gem_mmap = drm_i915_gem_mmap();
260       gem_mmap.handle = batch_bo_handle;
261       gem_mmap.offset = 0;
262       gem_mmap.size = BATCH_BO_SIZE;
263       gem_mmap.flags = 0;
264       ASSERT_EQ(drmIoctl(fd, DRM_IOCTL_I915_GEM_MMAP,
265                       (void *)&gem_mmap), 0) << strerror(errno);
266       batch_map = (void *)(uintptr_t)gem_mmap.addr_ptr;
267    }
268 
269    // Start the batch at zero
270    batch_offset = 0;
271 
272    // Create the data buffer
273    gem_create = drm_i915_gem_create();
274    gem_create.size = DATA_BO_SIZE;
275    ASSERT_EQ(drmIoctl(fd, DRM_IOCTL_I915_GEM_CREATE,
276                       (void *)&gem_create), 0) << strerror(errno);
277    data_bo_handle = gem_create.handle;
278 #if GFX_VER >= 8
279    data_bo_addr = 0xffffffffefff0000ULL;
280 #endif
281 
282    if (devinfo.has_caching_uapi) {
283       drm_i915_gem_caching gem_caching = drm_i915_gem_caching();
284       gem_caching.handle = data_bo_handle;
285       gem_caching.caching = I915_CACHING_CACHED;
286       ASSERT_EQ(drmIoctl(fd, DRM_IOCTL_I915_GEM_SET_CACHING,
287                          (void *)&gem_caching), 0) << strerror(errno);
288    }
289 
290    if (devinfo.has_mmap_offset) {
291       drm_i915_gem_mmap_offset gem_mmap_offset = drm_i915_gem_mmap_offset();
292       gem_mmap_offset.handle = data_bo_handle;
293       gem_mmap_offset.flags = devinfo.has_local_mem ?
294                               I915_MMAP_OFFSET_FIXED :
295                               I915_MMAP_OFFSET_WC;
296       ASSERT_EQ(drmIoctl(fd, DRM_IOCTL_I915_GEM_MMAP_OFFSET,
297                          &gem_mmap_offset), 0) << strerror(errno);
298 
299       data_map = mmap(NULL, DATA_BO_SIZE, PROT_READ | PROT_WRITE, MAP_SHARED,
300                       fd, gem_mmap_offset.offset);
301       ASSERT_NE(data_map, MAP_FAILED) << strerror(errno);
302    } else {
303       drm_i915_gem_mmap gem_mmap = drm_i915_gem_mmap();
304       gem_mmap.handle = data_bo_handle;
305       gem_mmap.offset = 0;
306       gem_mmap.size = DATA_BO_SIZE;
307       gem_mmap.flags = 0;
308       ASSERT_EQ(drmIoctl(fd, DRM_IOCTL_I915_GEM_MMAP,
309                          (void *)&gem_mmap), 0) << strerror(errno);
310       data_map = (void *)(uintptr_t)gem_mmap.addr_ptr;
311    }
312 
313    input = (char *)data_map + INPUT_DATA_OFFSET;
314    output = (char *)data_map + OUTPUT_DATA_OFFSET;
315 
316    // Fill the test data with garbage
317    memset(data_map, 139, DATA_BO_SIZE);
318    memset(&canary, 139, sizeof(canary));
319 
320    write_fence_status = false;
321 
322    struct isl_device isl_dev;
323    isl_device_init(&isl_dev, &devinfo);
324    mi_builder_init(&b, &devinfo, this);
325    const uint32_t mocs = isl_mocs(&isl_dev, 0, false);
326    mi_builder_set_mocs(&b, mocs);
327 }
328 
329 void *
emit_dwords(int num_dwords)330 mi_builder_test::emit_dwords(int num_dwords)
331 {
332    void *ptr = (void *)((char *)batch_map + batch_offset);
333    batch_offset += num_dwords * 4;
334    assert(batch_offset < BATCH_BO_SIZE);
335    return ptr;
336 }
337 
338 void
submit_batch()339 mi_builder_test::submit_batch()
340 {
341    mi_builder_emit(&b, GENX(MI_BATCH_BUFFER_END), bbe);
342 
343    // Round batch up to an even number of dwords.
344    if (batch_offset & 4)
345       mi_builder_emit(&b, GENX(MI_NOOP), noop);
346 
347    drm_i915_gem_exec_object2 objects[2];
348    memset(objects, 0, sizeof(objects));
349 
350    objects[0].handle = data_bo_handle;
351    objects[0].relocation_count = 0;
352    objects[0].relocs_ptr = 0;
353 #if GFX_VER >= 8 /* On gfx8+, we pin everything */
354    objects[0].flags = EXEC_OBJECT_SUPPORTS_48B_ADDRESS |
355                       EXEC_OBJECT_PINNED |
356                       EXEC_OBJECT_WRITE;
357    objects[0].offset = data_bo_addr;
358 #else
359    objects[0].flags = EXEC_OBJECT_WRITE;
360    objects[0].offset = -1;
361 #endif
362 
363    objects[1].handle = batch_bo_handle;
364 #if GFX_VER >= 8 /* On gfx8+, we don't use relocations */
365    objects[1].relocation_count = 0;
366    objects[1].relocs_ptr = 0;
367    objects[1].flags = EXEC_OBJECT_SUPPORTS_48B_ADDRESS |
368                       EXEC_OBJECT_PINNED;
369    objects[1].offset = batch_bo_addr;
370 #else
371    objects[1].relocation_count = relocs.size();
372    objects[1].relocs_ptr = (uintptr_t)(void *)&relocs[0];
373    objects[1].flags = 0;
374    objects[1].offset = -1;
375 #endif
376 
377    drm_i915_gem_execbuffer2 execbuf = drm_i915_gem_execbuffer2();
378    execbuf.buffers_ptr = (uintptr_t)(void *)objects;
379    execbuf.buffer_count = 2;
380    execbuf.batch_start_offset = 0;
381    execbuf.batch_len = batch_offset;
382    execbuf.flags = I915_EXEC_HANDLE_LUT | I915_EXEC_RENDER;
383    execbuf.rsvd1 = ctx_id;
384 
385    ASSERT_EQ(drmIoctl(fd, DRM_IOCTL_I915_GEM_EXECBUFFER2,
386                       (void *)&execbuf), 0) << strerror(errno);
387 
388    drm_i915_gem_wait gem_wait = drm_i915_gem_wait();
389    gem_wait.bo_handle = batch_bo_handle;
390    gem_wait.timeout_ns = INT64_MAX;
391    ASSERT_EQ(drmIoctl(fd, DRM_IOCTL_I915_GEM_WAIT,
392                       (void *)&gem_wait), 0) << strerror(errno);
393 }
394 
395 uint64_t
__gen_combine_address(mi_builder_test * test,void * location,address addr,uint32_t delta)396 __gen_combine_address(mi_builder_test *test, void *location,
397                       address addr, uint32_t delta)
398 {
399 #if GFX_VER >= 8
400    uint64_t addr_u64 = addr.gem_handle == test->data_bo_handle ?
401                        test->data_bo_addr : test->batch_bo_addr;
402    return addr_u64 + addr.offset + delta;
403 #else
404    drm_i915_gem_relocation_entry reloc = drm_i915_gem_relocation_entry();
405    reloc.target_handle = addr.gem_handle == test->data_bo_handle ? 0 : 1;
406    reloc.delta = addr.offset + delta;
407    reloc.offset = (char *)location - (char *)test->batch_map;
408    reloc.presumed_offset = -1;
409    test->relocs.push_back(reloc);
410 
411    return reloc.delta;
412 #endif
413 }
414 
415 bool *
__gen_get_write_fencing_status(mi_builder_test * test)416 __gen_get_write_fencing_status(mi_builder_test *test)
417 {
418    return &test->write_fence_status;
419 }
420 
421 void *
__gen_get_batch_dwords(mi_builder_test * test,unsigned num_dwords)422 __gen_get_batch_dwords(mi_builder_test *test, unsigned num_dwords)
423 {
424    return test->emit_dwords(num_dwords);
425 }
426 
427 struct address
__gen_get_batch_address(mi_builder_test * test,void * location)428 __gen_get_batch_address(mi_builder_test *test, void *location)
429 {
430    assert(location >= test->batch_map);
431    size_t offset = (char *)location - (char *)test->batch_map;
432    assert(offset < BATCH_BO_SIZE);
433    assert(offset <= UINT32_MAX);
434 
435    return (struct address) {
436       .gem_handle = test->batch_bo_handle,
437       .offset = (uint32_t)offset,
438    };
439 }
440 
441 #include "genxml/genX_pack.h"
442 #include "mi_builder.h"
443 
TEST_F(mi_builder_test,imm_mem)444 TEST_F(mi_builder_test, imm_mem)
445 {
446    const uint64_t value = 0x0123456789abcdef;
447 
448    mi_store(&b, out_mem64(0), mi_imm(value));
449    mi_store(&b, out_mem32(8), mi_imm(value));
450 
451    submit_batch();
452 
453    // 64 -> 64
454    EXPECT_EQ(*(uint64_t *)(output + 0),  value);
455 
456    // 64 -> 32
457    EXPECT_EQ(*(uint32_t *)(output + 8),  (uint32_t)value);
458    EXPECT_EQ(*(uint32_t *)(output + 12), (uint32_t)canary);
459 }
460 
461 /* mem -> mem copies are only supported on HSW+ */
462 #if GFX_VERx10 >= 75
TEST_F(mi_builder_test,mem_mem)463 TEST_F(mi_builder_test, mem_mem)
464 {
465    const uint64_t value = 0x0123456789abcdef;
466    *(uint64_t *)input = value;
467 
468    mi_store(&b, out_mem64(0),   in_mem64(0));
469    mi_store(&b, out_mem32(8),   in_mem64(0));
470    mi_store(&b, out_mem32(16),  in_mem32(0));
471    mi_store(&b, out_mem64(24),  in_mem32(0));
472 
473    submit_batch();
474 
475    // 64 -> 64
476    EXPECT_EQ(*(uint64_t *)(output + 0),  value);
477 
478    // 64 -> 32
479    EXPECT_EQ(*(uint32_t *)(output + 8),  (uint32_t)value);
480    EXPECT_EQ(*(uint32_t *)(output + 12), (uint32_t)canary);
481 
482    // 32 -> 32
483    EXPECT_EQ(*(uint32_t *)(output + 16), (uint32_t)value);
484    EXPECT_EQ(*(uint32_t *)(output + 20), (uint32_t)canary);
485 
486    // 32 -> 64
487    EXPECT_EQ(*(uint64_t *)(output + 24), (uint64_t)(uint32_t)value);
488 }
489 #endif
490 
TEST_F(mi_builder_test,imm_reg)491 TEST_F(mi_builder_test, imm_reg)
492 {
493    const uint64_t value = 0x0123456789abcdef;
494 
495    mi_store(&b, mi_reg64(RSVD_TEMP_REG), mi_imm(canary));
496    mi_store(&b, mi_reg64(RSVD_TEMP_REG), mi_imm(value));
497    mi_store(&b, out_mem64(0), mi_reg64(RSVD_TEMP_REG));
498 
499    mi_store(&b, mi_reg64(RSVD_TEMP_REG), mi_imm(canary));
500    mi_store(&b, mi_reg32(RSVD_TEMP_REG), mi_imm(value));
501    mi_store(&b, out_mem64(8), mi_reg64(RSVD_TEMP_REG));
502 
503    submit_batch();
504 
505    // 64 -> 64
506    EXPECT_EQ(*(uint64_t *)(output + 0),  value);
507 
508    // 64 -> 32
509    EXPECT_EQ(*(uint32_t *)(output + 8),  (uint32_t)value);
510    EXPECT_EQ(*(uint32_t *)(output + 12), (uint32_t)canary);
511 }
512 
TEST_F(mi_builder_test,mem_reg)513 TEST_F(mi_builder_test, mem_reg)
514 {
515    const uint64_t value = 0x0123456789abcdef;
516    *(uint64_t *)input = value;
517 
518    mi_store(&b, mi_reg64(RSVD_TEMP_REG), mi_imm(canary));
519    mi_store(&b, mi_reg64(RSVD_TEMP_REG), in_mem64(0));
520    mi_store(&b, out_mem64(0), mi_reg64(RSVD_TEMP_REG));
521 
522    mi_store(&b, mi_reg64(RSVD_TEMP_REG), mi_imm(canary));
523    mi_store(&b, mi_reg32(RSVD_TEMP_REG), in_mem64(0));
524    mi_store(&b, out_mem64(8), mi_reg64(RSVD_TEMP_REG));
525 
526    mi_store(&b, mi_reg64(RSVD_TEMP_REG), mi_imm(canary));
527    mi_store(&b, mi_reg32(RSVD_TEMP_REG), in_mem32(0));
528    mi_store(&b, out_mem64(16), mi_reg64(RSVD_TEMP_REG));
529 
530    mi_store(&b, mi_reg64(RSVD_TEMP_REG), mi_imm(canary));
531    mi_store(&b, mi_reg64(RSVD_TEMP_REG), in_mem32(0));
532    mi_store(&b, out_mem64(24), mi_reg64(RSVD_TEMP_REG));
533 
534    submit_batch();
535 
536    // 64 -> 64
537    EXPECT_EQ(*(uint64_t *)(output + 0),  value);
538 
539    // 64 -> 32
540    EXPECT_EQ(*(uint32_t *)(output + 8),  (uint32_t)value);
541    EXPECT_EQ(*(uint32_t *)(output + 12), (uint32_t)canary);
542 
543    // 32 -> 32
544    EXPECT_EQ(*(uint32_t *)(output + 16), (uint32_t)value);
545    EXPECT_EQ(*(uint32_t *)(output + 20), (uint32_t)canary);
546 
547    // 32 -> 64
548    EXPECT_EQ(*(uint64_t *)(output + 24), (uint64_t)(uint32_t)value);
549 }
550 
TEST_F(mi_builder_test,memset)551 TEST_F(mi_builder_test, memset)
552 {
553    const unsigned memset_size = 256;
554 
555    mi_memset(&b, out_addr(0), 0xdeadbeef, memset_size);
556 
557    submit_batch();
558 
559    uint32_t *out_u32 = (uint32_t *)output;
560    for (unsigned i = 0; i <  memset_size / sizeof(*out_u32); i++)
561       EXPECT_EQ(out_u32[i], 0xdeadbeef);
562 }
563 
TEST_F(mi_builder_test,memcpy)564 TEST_F(mi_builder_test, memcpy)
565 {
566    const unsigned memcpy_size = 256;
567 
568    uint8_t *in_u8 = (uint8_t *)input;
569    for (unsigned i = 0; i < memcpy_size; i++)
570       in_u8[i] = i;
571 
572    mi_memcpy(&b, out_addr(0), in_addr(0), 256);
573 
574    submit_batch();
575 
576    uint8_t *out_u8 = (uint8_t *)output;
577    for (unsigned i = 0; i < memcpy_size; i++)
578       EXPECT_EQ(out_u8[i], i);
579 }
580 
581 /* Start of MI_MATH section */
582 #if GFX_VERx10 >= 75
583 
584 #define EXPECT_EQ_IMM(x, imm) EXPECT_EQ(x, mi_value_to_u64(imm))
585 
TEST_F(mi_builder_test,inot)586 TEST_F(mi_builder_test, inot)
587 {
588    const uint64_t value = 0x0123456789abcdef;
589    const uint32_t value_lo = (uint32_t)value;
590    const uint32_t value_hi = (uint32_t)(value >> 32);
591    memcpy(input, &value, sizeof(value));
592 
593    mi_store(&b, out_mem64(0),  mi_inot(&b, in_mem64(0)));
594    mi_store(&b, out_mem64(8),  mi_inot(&b, mi_inot(&b, in_mem64(0))));
595    mi_store(&b, out_mem64(16), mi_inot(&b, in_mem32(0)));
596    mi_store(&b, out_mem64(24), mi_inot(&b, in_mem32(4)));
597    mi_store(&b, out_mem32(32), mi_inot(&b, in_mem64(0)));
598    mi_store(&b, out_mem32(36), mi_inot(&b, in_mem32(0)));
599    mi_store(&b, out_mem32(40), mi_inot(&b, mi_inot(&b, in_mem32(0))));
600    mi_store(&b, out_mem32(44), mi_inot(&b, in_mem32(4)));
601 
602    submit_batch();
603 
604    EXPECT_EQ(*(uint64_t *)(output + 0),  ~value);
605    EXPECT_EQ(*(uint64_t *)(output + 8),  value);
606    EXPECT_EQ(*(uint64_t *)(output + 16), ~(uint64_t)value_lo);
607    EXPECT_EQ(*(uint64_t *)(output + 24), ~(uint64_t)value_hi);
608    EXPECT_EQ(*(uint32_t *)(output + 32), (uint32_t)~value);
609    EXPECT_EQ(*(uint32_t *)(output + 36), (uint32_t)~value_lo);
610    EXPECT_EQ(*(uint32_t *)(output + 40), (uint32_t)value_lo);
611    EXPECT_EQ(*(uint32_t *)(output + 44), (uint32_t)~value_hi);
612 }
613 
614 /* Test adding of immediates of all kinds including
615  *
616  *  - All zeroes
617  *  - All ones
618  *  - inverted constants
619  */
TEST_F(mi_builder_test,add_imm)620 TEST_F(mi_builder_test, add_imm)
621 {
622    const uint64_t value = 0x0123456789abcdef;
623    const uint64_t add = 0xdeadbeefac0ffee2;
624    memcpy(input, &value, sizeof(value));
625 
626    mi_store(&b, out_mem64(0),
627                 mi_iadd(&b, in_mem64(0), mi_imm(0)));
628    mi_store(&b, out_mem64(8),
629                 mi_iadd(&b, in_mem64(0), mi_imm(-1)));
630    mi_store(&b, out_mem64(16),
631                 mi_iadd(&b, in_mem64(0), mi_inot(&b, mi_imm(0))));
632    mi_store(&b, out_mem64(24),
633                 mi_iadd(&b, in_mem64(0), mi_inot(&b, mi_imm(-1))));
634    mi_store(&b, out_mem64(32),
635                 mi_iadd(&b, in_mem64(0), mi_imm(add)));
636    mi_store(&b, out_mem64(40),
637                 mi_iadd(&b, in_mem64(0), mi_inot(&b, mi_imm(add))));
638    mi_store(&b, out_mem64(48),
639                 mi_iadd(&b, mi_imm(0), in_mem64(0)));
640    mi_store(&b, out_mem64(56),
641                 mi_iadd(&b, mi_imm(-1), in_mem64(0)));
642    mi_store(&b, out_mem64(64),
643                 mi_iadd(&b, mi_inot(&b, mi_imm(0)), in_mem64(0)));
644    mi_store(&b, out_mem64(72),
645                 mi_iadd(&b, mi_inot(&b, mi_imm(-1)), in_mem64(0)));
646    mi_store(&b, out_mem64(80),
647                 mi_iadd(&b, mi_imm(add), in_mem64(0)));
648    mi_store(&b, out_mem64(88),
649                 mi_iadd(&b, mi_inot(&b, mi_imm(add)), in_mem64(0)));
650 
651    // And some add_imm just for good measure
652    mi_store(&b, out_mem64(96), mi_iadd_imm(&b, in_mem64(0), 0));
653    mi_store(&b, out_mem64(104), mi_iadd_imm(&b, in_mem64(0), add));
654 
655    submit_batch();
656 
657    EXPECT_EQ(*(uint64_t *)(output + 0),   value);
658    EXPECT_EQ(*(uint64_t *)(output + 8),   value - 1);
659    EXPECT_EQ(*(uint64_t *)(output + 16),  value - 1);
660    EXPECT_EQ(*(uint64_t *)(output + 24),  value);
661    EXPECT_EQ(*(uint64_t *)(output + 32),  value + add);
662    EXPECT_EQ(*(uint64_t *)(output + 40),  value + ~add);
663    EXPECT_EQ(*(uint64_t *)(output + 48),  value);
664    EXPECT_EQ(*(uint64_t *)(output + 56),  value - 1);
665    EXPECT_EQ(*(uint64_t *)(output + 64),  value - 1);
666    EXPECT_EQ(*(uint64_t *)(output + 72),  value);
667    EXPECT_EQ(*(uint64_t *)(output + 80),  value + add);
668    EXPECT_EQ(*(uint64_t *)(output + 88),  value + ~add);
669    EXPECT_EQ(*(uint64_t *)(output + 96),  value);
670    EXPECT_EQ(*(uint64_t *)(output + 104), value + add);
671 }
672 
TEST_F(mi_builder_test,ult_uge_ieq_ine)673 TEST_F(mi_builder_test, ult_uge_ieq_ine)
674 {
675    uint64_t values[8] = {
676       0x0123456789abcdef,
677       0xdeadbeefac0ffee2,
678       (uint64_t)-1,
679       1,
680       0,
681       1049571,
682       (uint64_t)-240058,
683       20204184,
684    };
685    memcpy(input, values, sizeof(values));
686 
687    for (unsigned i = 0; i < ARRAY_SIZE(values); i++) {
688       for (unsigned j = 0; j < ARRAY_SIZE(values); j++) {
689          mi_store(&b, out_mem64(i * 256 + j * 32 + 0),
690                       mi_ult(&b, in_mem64(i * 8), in_mem64(j * 8)));
691          mi_store(&b, out_mem64(i * 256 + j * 32 + 8),
692                       mi_uge(&b, in_mem64(i * 8), in_mem64(j * 8)));
693          mi_store(&b, out_mem64(i * 256 + j * 32 + 16),
694                       mi_ieq(&b, in_mem64(i * 8), in_mem64(j * 8)));
695          mi_store(&b, out_mem64(i * 256 + j * 32 + 24),
696                       mi_ine(&b, in_mem64(i * 8), in_mem64(j * 8)));
697       }
698    }
699 
700    submit_batch();
701 
702    for (unsigned i = 0; i < ARRAY_SIZE(values); i++) {
703       for (unsigned j = 0; j < ARRAY_SIZE(values); j++) {
704          uint64_t *out_u64 = (uint64_t *)(output + i * 256 + j * 32);
705          EXPECT_EQ_IMM(out_u64[0], mi_ult(&b, mi_imm(values[i]),
706                                               mi_imm(values[j])));
707          EXPECT_EQ_IMM(out_u64[1], mi_uge(&b, mi_imm(values[i]),
708                                               mi_imm(values[j])));
709          EXPECT_EQ_IMM(out_u64[2], mi_ieq(&b, mi_imm(values[i]),
710                                               mi_imm(values[j])));
711          EXPECT_EQ_IMM(out_u64[3], mi_ine(&b, mi_imm(values[i]),
712                                               mi_imm(values[j])));
713       }
714    }
715 }
716 
TEST_F(mi_builder_test,z_nz)717 TEST_F(mi_builder_test, z_nz)
718 {
719    uint64_t values[8] = {
720       0,
721       1,
722       UINT32_MAX,
723       UINT32_MAX + 1,
724       UINT64_MAX,
725    };
726    memcpy(input, values, sizeof(values));
727 
728    for (unsigned i = 0; i < ARRAY_SIZE(values); i++) {
729       mi_store(&b, out_mem64(i * 16 + 0), mi_nz(&b, in_mem64(i * 8)));
730       mi_store(&b, out_mem64(i * 16 + 8), mi_z(&b, in_mem64(i * 8)));
731    }
732 
733    submit_batch();
734 
735    for (unsigned i = 0; i < ARRAY_SIZE(values); i++) {
736       uint64_t *out_u64 = (uint64_t *)(output + i * 16);
737       EXPECT_EQ_IMM(out_u64[0], mi_nz(&b, mi_imm(values[i])));
738       EXPECT_EQ_IMM(out_u64[1], mi_z(&b, mi_imm(values[i])));
739    }
740 }
741 
TEST_F(mi_builder_test,iand)742 TEST_F(mi_builder_test, iand)
743 {
744    const uint64_t values[2] = {
745       0x0123456789abcdef,
746       0xdeadbeefac0ffee2,
747    };
748    memcpy(input, values, sizeof(values));
749 
750    mi_store(&b, out_mem64(0), mi_iand(&b, in_mem64(0), in_mem64(8)));
751 
752    submit_batch();
753 
754    EXPECT_EQ_IMM(*(uint64_t *)output, mi_iand(&b, mi_imm(values[0]),
755                                                   mi_imm(values[1])));
756 }
757 
758 #if GFX_VER >= 8
TEST_F(mi_builder_test,imm_mem_relocated)759 TEST_F(mi_builder_test, imm_mem_relocated)
760 {
761    const uint64_t value = 0x0123456789abcdef;
762 
763    struct mi_reloc_imm_token r0 = mi_store_relocated_imm(&b, out_mem64(0));
764    struct mi_reloc_imm_token r1 = mi_store_relocated_imm(&b, out_mem32(8));
765 
766    mi_relocate_store_imm(r0, value);
767    mi_relocate_store_imm(r1, value);
768 
769    submit_batch();
770 
771    // 64 -> 64
772    EXPECT_EQ(*(uint64_t *)(output + 0),  value);
773 
774    // 64 -> 32
775    EXPECT_EQ(*(uint32_t *)(output + 8),  (uint32_t)value);
776    EXPECT_EQ(*(uint32_t *)(output + 12), (uint32_t)canary);
777 }
778 
TEST_F(mi_builder_test,imm_reg_relocated)779 TEST_F(mi_builder_test, imm_reg_relocated)
780 {
781    const uint64_t value = 0x0123456789abcdef;
782 
783    struct mi_reloc_imm_token r0, r1;
784 
785    r0 = mi_store_relocated_imm(&b, mi_reg64(RSVD_TEMP_REG));
786    r1 = mi_store_relocated_imm(&b, mi_reg64(RSVD_TEMP_REG));
787    mi_store(&b, out_mem64(0), mi_reg64(RSVD_TEMP_REG));
788 
789    mi_relocate_store_imm(r0, canary);
790    mi_relocate_store_imm(r1, value);
791 
792    r0 = mi_store_relocated_imm(&b, mi_reg64(RSVD_TEMP_REG));
793    r1 = mi_store_relocated_imm(&b, mi_reg32(RSVD_TEMP_REG));
794    mi_store(&b, out_mem64(8), mi_reg64(RSVD_TEMP_REG));
795 
796    mi_relocate_store_imm(r0, canary);
797    mi_relocate_store_imm(r1, value);
798 
799    submit_batch();
800 
801    // 64 -> 64
802    EXPECT_EQ(*(uint64_t *)(output + 0),  value);
803 
804    // 64 -> 32
805    EXPECT_EQ(*(uint32_t *)(output + 8),  (uint32_t)value);
806    EXPECT_EQ(*(uint32_t *)(output + 12), (uint32_t)canary);
807 }
808 #endif // GFX_VER >= 8
809 
810 #if GFX_VERx10 >= 125
TEST_F(mi_builder_test,ishl)811 TEST_F(mi_builder_test, ishl)
812 {
813    const uint64_t value = 0x0123456789abcdef;
814    memcpy(input, &value, sizeof(value));
815 
816    uint32_t shifts[] = { 0, 1, 2, 4, 8, 16, 32 };
817    memcpy(input + 8, shifts, sizeof(shifts));
818 
819    for (unsigned i = 0; i < ARRAY_SIZE(shifts); i++) {
820       mi_store(&b, out_mem64(i * 8),
821                    mi_ishl(&b, in_mem64(0), in_mem32(8 + i * 4)));
822    }
823 
824    submit_batch();
825 
826    for (unsigned i = 0; i < ARRAY_SIZE(shifts); i++) {
827       EXPECT_EQ_IMM(*(uint64_t *)(output + i * 8),
828                     mi_ishl(&b, mi_imm(value), mi_imm(shifts[i])));
829    }
830 }
831 
TEST_F(mi_builder_test,ushr)832 TEST_F(mi_builder_test, ushr)
833 {
834    const uint64_t value = 0x0123456789abcdef;
835    memcpy(input, &value, sizeof(value));
836 
837    uint32_t shifts[] = { 0, 1, 2, 4, 8, 16, 32 };
838    memcpy(input + 8, shifts, sizeof(shifts));
839 
840    for (unsigned i = 0; i < ARRAY_SIZE(shifts); i++) {
841       mi_store(&b, out_mem64(i * 8),
842                    mi_ushr(&b, in_mem64(0), in_mem32(8 + i * 4)));
843    }
844 
845    submit_batch();
846 
847    for (unsigned i = 0; i < ARRAY_SIZE(shifts); i++) {
848       EXPECT_EQ_IMM(*(uint64_t *)(output + i * 8),
849                     mi_ushr(&b, mi_imm(value), mi_imm(shifts[i])));
850    }
851 }
852 
TEST_F(mi_builder_test,ushr_imm)853 TEST_F(mi_builder_test, ushr_imm)
854 {
855    const uint64_t value = 0x0123456789abcdef;
856    memcpy(input, &value, sizeof(value));
857 
858    const unsigned max_shift = 64;
859 
860    for (unsigned i = 0; i <= max_shift; i++)
861       mi_store(&b, out_mem64(i * 8), mi_ushr_imm(&b, in_mem64(0), i));
862 
863    submit_batch();
864 
865    for (unsigned i = 0; i <= max_shift; i++) {
866       EXPECT_EQ_IMM(*(uint64_t *)(output + i * 8),
867                     mi_ushr_imm(&b, mi_imm(value), i));
868    }
869 }
870 
TEST_F(mi_builder_test,ishr)871 TEST_F(mi_builder_test, ishr)
872 {
873    const uint64_t values[] = {
874       0x0123456789abcdef,
875       0xfedcba9876543210,
876    };
877    memcpy(input, values, sizeof(values));
878 
879    uint32_t shifts[] = { 0, 1, 2, 4, 8, 16, 32 };
880    memcpy(input + 16, shifts, sizeof(shifts));
881 
882    for (unsigned i = 0; i < ARRAY_SIZE(values); i++) {
883       for (unsigned j = 0; j < ARRAY_SIZE(shifts); j++) {
884          mi_store(&b, out_mem64(i * 8 + j * 16),
885                       mi_ishr(&b, in_mem64(i * 8), in_mem32(16 + j * 4)));
886       }
887    }
888 
889    submit_batch();
890 
891    for (unsigned i = 0; i < ARRAY_SIZE(values); i++) {
892       for (unsigned j = 0; j < ARRAY_SIZE(shifts); j++) {
893          EXPECT_EQ_IMM(*(uint64_t *)(output + i * 8 + j * 16),
894                        mi_ishr(&b, mi_imm(values[i]), mi_imm(shifts[j])));
895       }
896    }
897 }
898 
TEST_F(mi_builder_test,ishr_imm)899 TEST_F(mi_builder_test, ishr_imm)
900 {
901    const uint64_t value = 0x0123456789abcdef;
902    memcpy(input, &value, sizeof(value));
903 
904    const unsigned max_shift = 64;
905 
906    for (unsigned i = 0; i <= max_shift; i++)
907       mi_store(&b, out_mem64(i * 8), mi_ishr_imm(&b, in_mem64(0), i));
908 
909    submit_batch();
910 
911    for (unsigned i = 0; i <= max_shift; i++) {
912       EXPECT_EQ_IMM(*(uint64_t *)(output + i * 8),
913                     mi_ishr_imm(&b, mi_imm(value), i));
914    }
915 }
916 #endif /* if GFX_VERx10 >= 125 */
917 
TEST_F(mi_builder_test,imul_imm)918 TEST_F(mi_builder_test, imul_imm)
919 {
920    uint64_t lhs[2] = {
921       0x0123456789abcdef,
922       0xdeadbeefac0ffee2,
923    };
924    memcpy(input, lhs, sizeof(lhs));
925 
926     /* Some random 32-bit unsigned integers.  The first four have been
927      * hand-chosen just to ensure some good low integers; the rest were
928      * generated with a python script.
929      */
930    uint32_t rhs[20] = {
931       1,       2,       3,       5,
932       10800,   193,     64,      40,
933       3796,    256,     88,      473,
934       1421,    706,     175,     850,
935       39,      38985,   1941,    17,
936    };
937 
938    for (unsigned i = 0; i < ARRAY_SIZE(lhs); i++) {
939       for (unsigned j = 0; j < ARRAY_SIZE(rhs); j++) {
940          mi_store(&b, out_mem64(i * 160 + j * 8),
941                       mi_imul_imm(&b, in_mem64(i * 8), rhs[j]));
942       }
943    }
944 
945    submit_batch();
946 
947    for (unsigned i = 0; i < ARRAY_SIZE(lhs); i++) {
948       for (unsigned j = 0; j < ARRAY_SIZE(rhs); j++) {
949          EXPECT_EQ_IMM(*(uint64_t *)(output + i * 160 + j * 8),
950                        mi_imul_imm(&b, mi_imm(lhs[i]), rhs[j]));
951       }
952    }
953 }
954 
TEST_F(mi_builder_test,ishl_imm)955 TEST_F(mi_builder_test, ishl_imm)
956 {
957    const uint64_t value = 0x0123456789abcdef;
958    memcpy(input, &value, sizeof(value));
959 
960    const unsigned max_shift = 64;
961 
962    for (unsigned i = 0; i <= max_shift; i++)
963       mi_store(&b, out_mem64(i * 8), mi_ishl_imm(&b, in_mem64(0), i));
964 
965    submit_batch();
966 
967    for (unsigned i = 0; i <= max_shift; i++) {
968       EXPECT_EQ_IMM(*(uint64_t *)(output + i * 8),
969                     mi_ishl_imm(&b, mi_imm(value), i));
970    }
971 }
972 
TEST_F(mi_builder_test,ushr32_imm)973 TEST_F(mi_builder_test, ushr32_imm)
974 {
975    const uint64_t value = 0x0123456789abcdef;
976    memcpy(input, &value, sizeof(value));
977 
978    const unsigned max_shift = 64;
979 
980    for (unsigned i = 0; i <= max_shift; i++)
981       mi_store(&b, out_mem64(i * 8), mi_ushr32_imm(&b, in_mem64(0), i));
982 
983    submit_batch();
984 
985    for (unsigned i = 0; i <= max_shift; i++) {
986       EXPECT_EQ_IMM(*(uint64_t *)(output + i * 8),
987                     mi_ushr32_imm(&b, mi_imm(value), i));
988    }
989 }
990 
TEST_F(mi_builder_test,udiv32_imm)991 TEST_F(mi_builder_test, udiv32_imm)
992 {
993     /* Some random 32-bit unsigned integers.  The first four have been
994      * hand-chosen just to ensure some good low integers; the rest were
995      * generated with a python script.
996      */
997    uint32_t values[20] = {
998       1,       2,       3,       5,
999       10800,   193,     64,      40,
1000       3796,    256,     88,      473,
1001       1421,    706,     175,     850,
1002       39,      38985,   1941,    17,
1003    };
1004    memcpy(input, values, sizeof(values));
1005 
1006    for (unsigned i = 0; i < ARRAY_SIZE(values); i++) {
1007       for (unsigned j = 0; j < ARRAY_SIZE(values); j++) {
1008          mi_store(&b, out_mem32(i * 80 + j * 4),
1009                       mi_udiv32_imm(&b, in_mem32(i * 4), values[j]));
1010       }
1011    }
1012 
1013    submit_batch();
1014 
1015    for (unsigned i = 0; i < ARRAY_SIZE(values); i++) {
1016       for (unsigned j = 0; j < ARRAY_SIZE(values); j++) {
1017          EXPECT_EQ_IMM(*(uint32_t *)(output + i * 80 + j * 4),
1018                        mi_udiv32_imm(&b, mi_imm(values[i]), values[j]));
1019       }
1020    }
1021 }
1022 
TEST_F(mi_builder_test,store_if)1023 TEST_F(mi_builder_test, store_if)
1024 {
1025    uint64_t u64 = 0xb453b411deadc0deull;
1026    uint32_t u32 = 0x1337d00d;
1027 
1028    /* Write values with the predicate enabled */
1029    emit_cmd(GENX(MI_PREDICATE), mip) {
1030       mip.LoadOperation    = LOAD_LOAD;
1031       mip.CombineOperation = COMBINE_SET;
1032       mip.CompareOperation = COMPARE_TRUE;
1033    }
1034 
1035    mi_store_if(&b, out_mem64(0), mi_imm(u64));
1036    mi_store_if(&b, out_mem32(8), mi_imm(u32));
1037 
1038    /* Set predicate to false, write garbage that shouldn't land */
1039    emit_cmd(GENX(MI_PREDICATE), mip) {
1040       mip.LoadOperation    = LOAD_LOAD;
1041       mip.CombineOperation = COMBINE_SET;
1042       mip.CompareOperation = COMPARE_FALSE;
1043    }
1044 
1045    mi_store_if(&b, out_mem64(0), mi_imm(0xd0d0d0d0d0d0d0d0ull));
1046    mi_store_if(&b, out_mem32(8), mi_imm(0xc000c000));
1047 
1048    submit_batch();
1049 
1050    EXPECT_EQ(*(uint64_t *)(output + 0), u64);
1051    EXPECT_EQ(*(uint32_t *)(output + 8), u32);
1052    EXPECT_EQ(*(uint32_t *)(output + 12), (uint32_t)canary);
1053 }
1054 
1055 #endif /* GFX_VERx10 >= 75 */
1056 
1057 #if GFX_VERx10 >= 125
1058 
1059 /*
1060  * Indirect load/store tests.  Only available on XE_HP+
1061  */
1062 
TEST_F(mi_builder_test,load_mem64_offset)1063 TEST_F(mi_builder_test, load_mem64_offset)
1064 {
1065    uint64_t values[8] = {
1066       0x0123456789abcdef,
1067       0xdeadbeefac0ffee2,
1068       (uint64_t)-1,
1069       1,
1070       0,
1071       1049571,
1072       (uint64_t)-240058,
1073       20204184,
1074    };
1075    memcpy(input, values, sizeof(values));
1076 
1077    uint32_t offsets[8] = { 0, 40, 24, 48, 56, 8, 32, 16 };
1078    memcpy(input + 64, offsets, sizeof(offsets));
1079 
1080    for (unsigned i = 0; i < ARRAY_SIZE(offsets); i++) {
1081       mi_store(&b, out_mem64(i * 8),
1082                mi_load_mem64_offset(&b, in_addr(0), in_mem32(i * 4 + 64)));
1083    }
1084 
1085    submit_batch();
1086 
1087    for (unsigned i = 0; i < ARRAY_SIZE(offsets); i++)
1088       EXPECT_EQ(*(uint64_t *)(output + i * 8), values[offsets[i] / 8]);
1089 }
1090 
TEST_F(mi_builder_test,store_mem64_offset)1091 TEST_F(mi_builder_test, store_mem64_offset)
1092 {
1093    uint64_t values[8] = {
1094       0x0123456789abcdef,
1095       0xdeadbeefac0ffee2,
1096       (uint64_t)-1,
1097       1,
1098       0,
1099       1049571,
1100       (uint64_t)-240058,
1101       20204184,
1102    };
1103    memcpy(input, values, sizeof(values));
1104 
1105    uint32_t offsets[8] = { 0, 40, 24, 48, 56, 8, 32, 16 };
1106    memcpy(input + 64, offsets, sizeof(offsets));
1107 
1108    for (unsigned i = 0; i < ARRAY_SIZE(offsets); i++) {
1109       mi_store_mem64_offset(&b, out_addr(0), in_mem32(i * 4 + 64),
1110                                 in_mem64(i * 8));
1111    }
1112 
1113    submit_batch();
1114 
1115    for (unsigned i = 0; i < ARRAY_SIZE(offsets); i++)
1116       EXPECT_EQ(*(uint64_t *)(output + offsets[i]), values[i]);
1117 }
1118 
1119 #endif /* GFX_VERx10 >= 125 */
1120 
1121 #if GFX_VER >= 9
1122 
1123 /*
1124  * Control-flow tests.  Only available on Gfx9+
1125  */
1126 
TEST_F(mi_builder_test,goto)1127 TEST_F(mi_builder_test, goto)
1128 {
1129    const uint64_t value = 0xb453b411deadc0deull;
1130 
1131    mi_store(&b, out_mem64(0), mi_imm(value));
1132 
1133    struct mi_goto_target t = MI_GOTO_TARGET_INIT;
1134    mi_goto(&b, &t);
1135 
1136    /* This one should be skipped */
1137    mi_store(&b, out_mem64(0), mi_imm(0));
1138 
1139    mi_goto_target(&b, &t);
1140 
1141    submit_batch();
1142 
1143    EXPECT_EQ(*(uint64_t *)(output + 0), value);
1144 }
1145 
1146 #define MI_PREDICATE_RESULT  0x2418
1147 
TEST_F(mi_builder_test,goto_if)1148 TEST_F(mi_builder_test, goto_if)
1149 {
1150    const uint64_t values[] = {
1151       0xb453b411deadc0deull,
1152       0x0123456789abcdefull,
1153       0,
1154    };
1155 
1156    mi_store(&b, out_mem64(0), mi_imm(values[0]));
1157 
1158    emit_cmd(GENX(MI_PREDICATE), mip) {
1159       mip.LoadOperation    = LOAD_LOAD;
1160       mip.CombineOperation = COMBINE_SET;
1161       mip.CompareOperation = COMPARE_FALSE;
1162    }
1163 
1164    struct mi_goto_target t = MI_GOTO_TARGET_INIT;
1165    mi_goto_if(&b, mi_reg32(MI_PREDICATE_RESULT), &t);
1166 
1167    mi_store(&b, out_mem64(0), mi_imm(values[1]));
1168 
1169    emit_cmd(GENX(MI_PREDICATE), mip) {
1170       mip.LoadOperation    = LOAD_LOAD;
1171       mip.CombineOperation = COMBINE_SET;
1172       mip.CompareOperation = COMPARE_TRUE;
1173    }
1174 
1175    mi_goto_if(&b, mi_reg32(MI_PREDICATE_RESULT), &t);
1176 
1177    /* This one should be skipped */
1178    mi_store(&b, out_mem64(0), mi_imm(values[2]));
1179 
1180    mi_goto_target(&b, &t);
1181 
1182    submit_batch();
1183 
1184    EXPECT_EQ(*(uint64_t *)(output + 0), values[1]);
1185 }
1186 
TEST_F(mi_builder_test,loop_simple)1187 TEST_F(mi_builder_test, loop_simple)
1188 {
1189    const uint64_t loop_count = 8;
1190 
1191    mi_store(&b, out_mem64(0), mi_imm(0));
1192 
1193    mi_loop(&b) {
1194       mi_break_if(&b, mi_uge(&b, out_mem64(0), mi_imm(loop_count)));
1195 
1196       mi_store(&b, out_mem64(0), mi_iadd_imm(&b, out_mem64(0), 1));
1197    }
1198 
1199    submit_batch();
1200 
1201    EXPECT_EQ(*(uint64_t *)(output + 0), loop_count);
1202 }
1203 
TEST_F(mi_builder_test,loop_break)1204 TEST_F(mi_builder_test, loop_break)
1205 {
1206    mi_loop(&b) {
1207       mi_store(&b, out_mem64(0), mi_imm(1));
1208 
1209       mi_break_if(&b, mi_imm(0));
1210 
1211       mi_store(&b, out_mem64(0), mi_imm(2));
1212 
1213       mi_break(&b);
1214 
1215       mi_store(&b, out_mem64(0), mi_imm(3));
1216    }
1217 
1218    submit_batch();
1219 
1220    EXPECT_EQ(*(uint64_t *)(output + 0), 2);
1221 }
1222 
TEST_F(mi_builder_test,loop_continue)1223 TEST_F(mi_builder_test, loop_continue)
1224 {
1225    const uint64_t loop_count = 8;
1226 
1227    mi_store(&b, out_mem64(0), mi_imm(0));
1228    mi_store(&b, out_mem64(8), mi_imm(0));
1229 
1230    mi_loop(&b) {
1231       mi_break_if(&b, mi_uge(&b, out_mem64(0), mi_imm(loop_count)));
1232 
1233       mi_store(&b, out_mem64(0), mi_iadd_imm(&b, out_mem64(0), 1));
1234       mi_store(&b, out_mem64(8), mi_imm(5));
1235 
1236       mi_continue(&b);
1237 
1238       mi_store(&b, out_mem64(8), mi_imm(10));
1239    }
1240 
1241    submit_batch();
1242 
1243    EXPECT_EQ(*(uint64_t *)(output + 0), loop_count);
1244    EXPECT_EQ(*(uint64_t *)(output + 8), 5);
1245 }
1246 
TEST_F(mi_builder_test,loop_continue_if)1247 TEST_F(mi_builder_test, loop_continue_if)
1248 {
1249    const uint64_t loop_count = 8;
1250 
1251    mi_store(&b, out_mem64(0), mi_imm(0));
1252    mi_store(&b, out_mem64(8), mi_imm(0));
1253 
1254    mi_loop(&b) {
1255       mi_break_if(&b, mi_uge(&b, out_mem64(0), mi_imm(loop_count)));
1256 
1257       mi_store(&b, out_mem64(0), mi_iadd_imm(&b, out_mem64(0), 1));
1258       mi_store(&b, out_mem64(8), mi_imm(5));
1259 
1260       emit_cmd(GENX(MI_PREDICATE), mip) {
1261          mip.LoadOperation    = LOAD_LOAD;
1262          mip.CombineOperation = COMBINE_SET;
1263          mip.CompareOperation = COMPARE_FALSE;
1264       }
1265 
1266       mi_continue_if(&b, mi_reg32(MI_PREDICATE_RESULT));
1267 
1268       mi_store(&b, out_mem64(8), mi_imm(10));
1269 
1270       emit_cmd(GENX(MI_PREDICATE), mip) {
1271          mip.LoadOperation    = LOAD_LOAD;
1272          mip.CombineOperation = COMBINE_SET;
1273          mip.CompareOperation = COMPARE_TRUE;
1274       }
1275 
1276       mi_continue_if(&b, mi_reg32(MI_PREDICATE_RESULT));
1277 
1278       mi_store(&b, out_mem64(8), mi_imm(15));
1279    }
1280 
1281    submit_batch();
1282 
1283    EXPECT_EQ(*(uint64_t *)(output + 0), loop_count);
1284    EXPECT_EQ(*(uint64_t *)(output + 8), 10);
1285 }
1286 #endif /* GFX_VER >= 9 */
1287