xref: /aosp_15_r20/external/mesa3d/src/intel/compiler/elk/elk_test_eu_validate.cpp (revision 6104692788411f58d303aa86923a9ff6ecaded22)
1 /*
2  * Copyright © 2016 Intel Corporation
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21  * IN THE SOFTWARE.
22  */
23 
24 #include <gtest/gtest.h>
25 #include "elk_disasm_info.h"
26 #include "elk_eu.h"
27 #include "elk_eu_defines.h"
28 #include "util/bitset.h"
29 #include "util/ralloc.h"
30 
31 static const struct intel_gfx_info {
32    const char *name;
33 } gfx_names[] = {
34    { "brw", },
35    { "g4x", },
36    { "ilk", },
37    { "snb", },
38    { "ivb", },
39    { "hsw", },
40    { "byt", },
41    { "bdw", },
42    { "chv", },
43 };
44 
45 class validation_test: public ::testing::TestWithParam<struct intel_gfx_info> {
46    virtual void SetUp();
47 
48 public:
49    validation_test();
50    virtual ~validation_test();
51 
52    struct elk_isa_info isa;
53    struct elk_codegen *p;
54    struct intel_device_info devinfo;
55 };
56 
validation_test()57 validation_test::validation_test()
58 {
59    p = rzalloc(NULL, struct elk_codegen);
60    memset(&devinfo, 0, sizeof(devinfo));
61 }
62 
~validation_test()63 validation_test::~validation_test()
64 {
65    ralloc_free(p);
66 }
67 
SetUp()68 void validation_test::SetUp()
69 {
70    struct intel_gfx_info info = GetParam();
71    int devid = intel_device_name_to_pci_device_id(info.name);
72 
73    intel_get_device_info_from_pci_id(devid, &devinfo);
74 
75    elk_init_isa_info(&isa, &devinfo);
76 
77    elk_init_codegen(&isa, p, p);
78 }
79 
80 struct gfx_name {
81    template <class ParamType>
82    std::string
operator ()gfx_name83    operator()(const ::testing::TestParamInfo<ParamType>& info) const {
84       return info.param.name;
85    }
86 };
87 
88 INSTANTIATE_TEST_SUITE_P(
89    eu_assembly, validation_test,
90    ::testing::ValuesIn(gfx_names),
91    gfx_name()
92 );
93 
94 static bool
validate(struct elk_codegen * p)95 validate(struct elk_codegen *p)
96 {
97    const bool print = getenv("TEST_DEBUG");
98    struct elk_disasm_info *disasm = elk_disasm_initialize(p->isa, NULL);
99 
100    if (print) {
101       elk_disasm_new_inst_group(disasm, 0);
102       elk_disasm_new_inst_group(disasm, p->next_insn_offset);
103    }
104 
105    bool ret = elk_validate_instructions(p->isa, p->store, 0,
106                                         p->next_insn_offset, disasm);
107 
108    if (print) {
109       elk_dump_assembly(p->store, 0, p->next_insn_offset, disasm, NULL);
110    }
111    ralloc_free(disasm);
112 
113    return ret;
114 }
115 
116 #define last_inst    (&p->store[p->nr_insn - 1])
117 #define g0           elk_vec8_grf(0, 0)
118 #define acc0         elk_acc_reg(8)
119 #define null         elk_null_reg()
120 #define zero         elk_imm_f(0.0f)
121 
122 static void
clear_instructions(struct elk_codegen * p)123 clear_instructions(struct elk_codegen *p)
124 {
125    p->next_insn_offset = 0;
126    p->nr_insn = 0;
127 }
128 
TEST_P(validation_test,sanity)129 TEST_P(validation_test, sanity)
130 {
131    elk_ADD(p, g0, g0, g0);
132 
133    EXPECT_TRUE(validate(p));
134 }
135 
TEST_P(validation_test,src0_null_reg)136 TEST_P(validation_test, src0_null_reg)
137 {
138    elk_MOV(p, g0, null);
139 
140    EXPECT_FALSE(validate(p));
141 }
142 
TEST_P(validation_test,src1_null_reg)143 TEST_P(validation_test, src1_null_reg)
144 {
145    elk_ADD(p, g0, g0, null);
146 
147    EXPECT_FALSE(validate(p));
148 }
149 
TEST_P(validation_test,math_src0_null_reg)150 TEST_P(validation_test, math_src0_null_reg)
151 {
152    if (devinfo.ver >= 6) {
153       elk_gfx6_math(p, g0, ELK_MATH_FUNCTION_SIN, null, null);
154    } else {
155       elk_gfx4_math(p, g0, ELK_MATH_FUNCTION_SIN, 0, null, ELK_MATH_PRECISION_FULL);
156    }
157 
158    EXPECT_FALSE(validate(p));
159 }
160 
TEST_P(validation_test,math_src1_null_reg)161 TEST_P(validation_test, math_src1_null_reg)
162 {
163    if (devinfo.ver >= 6) {
164       elk_gfx6_math(p, g0, ELK_MATH_FUNCTION_POW, g0, null);
165       EXPECT_FALSE(validate(p));
166    } else {
167       /* Math instructions on Gfx4/5 are actually SEND messages with payloads.
168        * src1 is an immediate message descriptor set by elk_gfx4_math.
169        */
170    }
171 }
172 
TEST_P(validation_test,opcode46)173 TEST_P(validation_test, opcode46)
174 {
175    /* opcode 46 is "push" on Gen 4 and 5
176     *              "fork" on Gen 6
177     *              reserved on Gen 7
178     *              "goto" on Gfx8+
179     */
180    elk_next_insn(p, elk_opcode_decode(&isa, 46));
181 
182    if (devinfo.ver == 7) {
183       EXPECT_FALSE(validate(p));
184    } else {
185       EXPECT_TRUE(validate(p));
186    }
187 }
188 
TEST_P(validation_test,invalid_exec_size_encoding)189 TEST_P(validation_test, invalid_exec_size_encoding)
190 {
191    const struct {
192       enum elk_execution_size exec_size;
193       bool expected_result;
194    } test_case[] = {
195       { ELK_EXECUTE_1,      true  },
196       { ELK_EXECUTE_2,      true  },
197       { ELK_EXECUTE_4,      true  },
198       { ELK_EXECUTE_8,      true  },
199       { ELK_EXECUTE_16,     true  },
200       { ELK_EXECUTE_32,     true  },
201 
202       { (enum elk_execution_size)((int)ELK_EXECUTE_32 + 1), false },
203       { (enum elk_execution_size)((int)ELK_EXECUTE_32 + 2), false },
204    };
205 
206    for (unsigned i = 0; i < ARRAY_SIZE(test_case); i++) {
207       elk_MOV(p, g0, g0);
208 
209       elk_inst_set_exec_size(&devinfo, last_inst, test_case[i].exec_size);
210       elk_inst_set_src0_file_type(&devinfo, last_inst, ELK_GENERAL_REGISTER_FILE, ELK_REGISTER_TYPE_W);
211       elk_inst_set_dst_file_type(&devinfo, last_inst, ELK_GENERAL_REGISTER_FILE, ELK_REGISTER_TYPE_W);
212 
213       if (test_case[i].exec_size == ELK_EXECUTE_1) {
214          elk_inst_set_src0_vstride(&devinfo, last_inst, ELK_VERTICAL_STRIDE_0);
215          elk_inst_set_src0_width(&devinfo, last_inst, ELK_WIDTH_1);
216          elk_inst_set_src0_hstride(&devinfo, last_inst, ELK_HORIZONTAL_STRIDE_0);
217       } else {
218          elk_inst_set_src0_vstride(&devinfo, last_inst, ELK_VERTICAL_STRIDE_2);
219          elk_inst_set_src0_width(&devinfo, last_inst, ELK_WIDTH_2);
220          elk_inst_set_src0_hstride(&devinfo, last_inst, ELK_HORIZONTAL_STRIDE_1);
221       }
222 
223       EXPECT_EQ(test_case[i].expected_result, validate(p));
224 
225       clear_instructions(p);
226    }
227 }
228 
TEST_P(validation_test,invalid_file_encoding)229 TEST_P(validation_test, invalid_file_encoding)
230 {
231    elk_MOV(p, g0, g0);
232    elk_inst_set_dst_file_type(&devinfo, last_inst, ELK_MESSAGE_REGISTER_FILE, ELK_REGISTER_TYPE_F);
233 
234    if (devinfo.ver > 6) {
235       EXPECT_FALSE(validate(p));
236    } else {
237       EXPECT_TRUE(validate(p));
238    }
239 
240    clear_instructions(p);
241 
242    if (devinfo.ver < 6) {
243       elk_gfx4_math(p, g0, ELK_MATH_FUNCTION_SIN, 0, g0, ELK_MATH_PRECISION_FULL);
244    } else {
245       elk_gfx6_math(p, g0, ELK_MATH_FUNCTION_SIN, g0, null);
246    }
247    elk_inst_set_src0_file_type(&devinfo, last_inst, ELK_MESSAGE_REGISTER_FILE, ELK_REGISTER_TYPE_F);
248 
249    if (devinfo.ver > 6) {
250       EXPECT_FALSE(validate(p));
251    } else {
252       EXPECT_TRUE(validate(p));
253    }
254 }
255 
TEST_P(validation_test,invalid_type_encoding)256 TEST_P(validation_test, invalid_type_encoding)
257 {
258    enum elk_reg_file files[2] = {
259       ELK_GENERAL_REGISTER_FILE,
260       ELK_IMMEDIATE_VALUE,
261    };
262 
263    for (unsigned i = 0; i < ARRAY_SIZE(files); i++) {
264       const enum elk_reg_file file = files[i];
265       const int num_bits = devinfo.ver >= 8 ? 4 : 3;
266       const int num_encodings = 1 << num_bits;
267 
268       /* The data types are encoded into <num_bits> bits to be used in hardware
269        * instructions, so keep a record in a bitset the invalid patterns so
270        * they can be verified to be invalid when used.
271        */
272       BITSET_DECLARE(invalid_encodings, num_encodings);
273 
274       const struct {
275          enum elk_reg_type type;
276          bool expected_result;
277       } test_case[] = {
278          { ELK_REGISTER_TYPE_NF, devinfo.ver == 11 && file != IMM },
279          { ELK_REGISTER_TYPE_DF, devinfo.has_64bit_float && (devinfo.ver >= 8 || file != IMM) },
280          { ELK_REGISTER_TYPE_F,  true },
281          { ELK_REGISTER_TYPE_HF, devinfo.ver >= 8 },
282          { ELK_REGISTER_TYPE_VF, file == IMM },
283          { ELK_REGISTER_TYPE_Q,  devinfo.has_64bit_int },
284          { ELK_REGISTER_TYPE_UQ, devinfo.has_64bit_int },
285          { ELK_REGISTER_TYPE_D,  true },
286          { ELK_REGISTER_TYPE_UD, true },
287          { ELK_REGISTER_TYPE_W,  true },
288          { ELK_REGISTER_TYPE_UW, true },
289          { ELK_REGISTER_TYPE_B,  file == FIXED_GRF },
290          { ELK_REGISTER_TYPE_UB, file == FIXED_GRF },
291          { ELK_REGISTER_TYPE_V,  file == IMM },
292          { ELK_REGISTER_TYPE_UV, devinfo.ver >= 6 && file == IMM },
293       };
294 
295       /* Initially assume all hardware encodings are invalid */
296       BITSET_ONES(invalid_encodings);
297 
298       elk_set_default_exec_size(p, ELK_EXECUTE_4);
299 
300       for (unsigned i = 0; i < ARRAY_SIZE(test_case); i++) {
301          if (test_case[i].expected_result) {
302             unsigned hw_type = elk_reg_type_to_hw_type(&devinfo, file, test_case[i].type);
303             if (hw_type != INVALID_REG_TYPE) {
304                /* ... and remove valid encodings from the set */
305                assert(BITSET_TEST(invalid_encodings, hw_type));
306                BITSET_CLEAR(invalid_encodings, hw_type);
307             }
308 
309             if (file == FIXED_GRF) {
310                struct elk_reg g = retype(g0, test_case[i].type);
311                elk_MOV(p, g, g);
312                elk_inst_set_src0_vstride(&devinfo, last_inst, ELK_VERTICAL_STRIDE_4);
313                elk_inst_set_src0_width(&devinfo, last_inst, ELK_WIDTH_4);
314                elk_inst_set_src0_hstride(&devinfo, last_inst, ELK_HORIZONTAL_STRIDE_1);
315             } else {
316                enum elk_reg_type t;
317 
318                switch (test_case[i].type) {
319                case ELK_REGISTER_TYPE_V:
320                   t = ELK_REGISTER_TYPE_W;
321                   break;
322                case ELK_REGISTER_TYPE_UV:
323                   t = ELK_REGISTER_TYPE_UW;
324                   break;
325                case ELK_REGISTER_TYPE_VF:
326                   t = ELK_REGISTER_TYPE_F;
327                   break;
328                default:
329                   t = test_case[i].type;
330                   break;
331                }
332 
333                struct elk_reg g = retype(g0, t);
334                elk_MOV(p, g, retype(elk_imm_w(0), test_case[i].type));
335             }
336 
337             EXPECT_TRUE(validate(p));
338 
339             clear_instructions(p);
340          }
341       }
342 
343       /* The remaining encodings in invalid_encodings do not have a mapping
344        * from ELK_REGISTER_TYPE_* and must be invalid. Verify that invalid
345        * encodings are rejected by the validator.
346        */
347       int e;
348       BITSET_FOREACH_SET(e, invalid_encodings, num_encodings) {
349          if (file == FIXED_GRF) {
350             elk_MOV(p, g0, g0);
351             elk_inst_set_src0_vstride(&devinfo, last_inst, ELK_VERTICAL_STRIDE_4);
352             elk_inst_set_src0_width(&devinfo, last_inst, ELK_WIDTH_4);
353             elk_inst_set_src0_hstride(&devinfo, last_inst, ELK_HORIZONTAL_STRIDE_1);
354          } else {
355             elk_MOV(p, g0, elk_imm_w(0));
356          }
357          elk_inst_set_dst_reg_hw_type(&devinfo, last_inst, e);
358          elk_inst_set_src0_reg_hw_type(&devinfo, last_inst, e);
359 
360          EXPECT_FALSE(validate(p));
361 
362          clear_instructions(p);
363       }
364    }
365 }
366 
TEST_P(validation_test,invalid_type_encoding_3src_a16)367 TEST_P(validation_test, invalid_type_encoding_3src_a16)
368 {
369    /* 3-src instructions in align16 mode only supported on Gfx6-10 */
370    if (devinfo.ver < 6)
371       return;
372 
373    const int num_bits = devinfo.ver >= 8 ? 3 : 2;
374    const int num_encodings = 1 << num_bits;
375 
376    /* The data types are encoded into <num_bits> bits to be used in hardware
377     * instructions, so keep a record in a bitset the invalid patterns so
378     * they can be verified to be invalid when used.
379     */
380    BITSET_DECLARE(invalid_encodings, num_encodings);
381 
382    const struct {
383       enum elk_reg_type type;
384       bool expected_result;
385    } test_case[] = {
386       { ELK_REGISTER_TYPE_DF, devinfo.ver >= 7  },
387       { ELK_REGISTER_TYPE_F,  true },
388       { ELK_REGISTER_TYPE_HF, devinfo.ver >= 8  },
389       { ELK_REGISTER_TYPE_D,  devinfo.ver >= 7  },
390       { ELK_REGISTER_TYPE_UD, devinfo.ver >= 7  },
391    };
392 
393    /* Initially assume all hardware encodings are invalid */
394    BITSET_ONES(invalid_encodings);
395 
396    elk_set_default_access_mode(p, ELK_ALIGN_16);
397    elk_set_default_exec_size(p, ELK_EXECUTE_4);
398 
399    for (unsigned i = 0; i < ARRAY_SIZE(test_case); i++) {
400       if (test_case[i].expected_result) {
401          unsigned hw_type = elk_reg_type_to_a16_hw_3src_type(&devinfo, test_case[i].type);
402          if (hw_type != INVALID_HW_REG_TYPE) {
403             /* ... and remove valid encodings from the set */
404             assert(BITSET_TEST(invalid_encodings, hw_type));
405             BITSET_CLEAR(invalid_encodings, hw_type);
406          }
407 
408          struct elk_reg g = retype(g0, test_case[i].type);
409          if (!elk_reg_type_is_integer(test_case[i].type)) {
410             elk_MAD(p, g, g, g, g);
411          } else {
412             elk_BFE(p, g, g, g, g);
413          }
414 
415          EXPECT_TRUE(validate(p));
416 
417          clear_instructions(p);
418       }
419    }
420 
421    /* The remaining encodings in invalid_encodings do not have a mapping
422     * from ELK_REGISTER_TYPE_* and must be invalid. Verify that invalid
423     * encodings are rejected by the validator.
424     */
425    int e;
426    BITSET_FOREACH_SET(e, invalid_encodings, num_encodings) {
427       for (unsigned i = 0; i < 2; i++) {
428          if (i == 0) {
429             elk_MAD(p, g0, g0, g0, g0);
430          } else {
431             elk_BFE(p, g0, g0, g0, g0);
432          }
433 
434          elk_inst_set_3src_a16_dst_hw_type(&devinfo, last_inst, e);
435          elk_inst_set_3src_a16_src_hw_type(&devinfo, last_inst, e);
436 
437          EXPECT_FALSE(validate(p));
438 
439          clear_instructions(p);
440 
441          if (devinfo.ver == 6)
442             break;
443       }
444    }
445 }
446 
447 TEST_P(validation_test, 3src_inst_access_mode)
448 {
449    /* 3-src instructions only supported on Gfx6+ */
450    if (devinfo.ver < 6)
451       return;
452 
453    const struct {
454       unsigned mode;
455       bool expected_result;
456    } test_case[] = {
457       { ELK_ALIGN_1,  false},
458       { ELK_ALIGN_16, true },
459    };
460 
461    for (unsigned i = 0; i < ARRAY_SIZE(test_case); i++) {
462       elk_set_default_access_mode(p, ELK_ALIGN_16);
463 
464       elk_MAD(p, g0, g0, g0, g0);
465       elk_inst_set_access_mode(&devinfo, last_inst, test_case[i].mode);
466 
467       EXPECT_EQ(test_case[i].expected_result, validate(p));
468 
469       clear_instructions(p);
470    }
471 }
472 
473 /* When the Execution Data Type is wider than the destination data type, the
474  * destination must [...] specify a HorzStride equal to the ratio in sizes of
475  * the two data types.
476  */
TEST_P(validation_test,dest_stride_must_be_equal_to_the_ratio_of_exec_size_to_dest_size)477 TEST_P(validation_test, dest_stride_must_be_equal_to_the_ratio_of_exec_size_to_dest_size)
478 {
479    elk_ADD(p, g0, g0, g0);
480    elk_inst_set_dst_file_type(&devinfo, last_inst, ELK_GENERAL_REGISTER_FILE, ELK_REGISTER_TYPE_W);
481    elk_inst_set_src0_file_type(&devinfo, last_inst, ELK_GENERAL_REGISTER_FILE, ELK_REGISTER_TYPE_D);
482    elk_inst_set_src1_file_type(&devinfo, last_inst, ELK_GENERAL_REGISTER_FILE, ELK_REGISTER_TYPE_D);
483 
484    EXPECT_FALSE(validate(p));
485 
486    clear_instructions(p);
487 
488    elk_ADD(p, g0, g0, g0);
489    elk_inst_set_dst_file_type(&devinfo, last_inst, ELK_GENERAL_REGISTER_FILE, ELK_REGISTER_TYPE_W);
490    elk_inst_set_dst_hstride(&devinfo, last_inst, ELK_HORIZONTAL_STRIDE_2);
491    elk_inst_set_src0_file_type(&devinfo, last_inst, ELK_GENERAL_REGISTER_FILE, ELK_REGISTER_TYPE_D);
492    elk_inst_set_src1_file_type(&devinfo, last_inst, ELK_GENERAL_REGISTER_FILE, ELK_REGISTER_TYPE_D);
493 
494    EXPECT_TRUE(validate(p));
495 }
496 
497 /* When the Execution Data Type is wider than the destination data type, the
498  * destination must be aligned as required by the wider execution data type
499  * [...]
500  */
TEST_P(validation_test,dst_subreg_must_be_aligned_to_exec_type_size)501 TEST_P(validation_test, dst_subreg_must_be_aligned_to_exec_type_size)
502 {
503    elk_ADD(p, g0, g0, g0);
504    elk_inst_set_dst_da1_subreg_nr(&devinfo, last_inst, 2);
505    elk_inst_set_dst_hstride(&devinfo, last_inst, ELK_HORIZONTAL_STRIDE_2);
506    elk_inst_set_dst_file_type(&devinfo, last_inst, ELK_GENERAL_REGISTER_FILE, ELK_REGISTER_TYPE_W);
507    elk_inst_set_src0_file_type(&devinfo, last_inst, ELK_GENERAL_REGISTER_FILE, ELK_REGISTER_TYPE_D);
508    elk_inst_set_src1_file_type(&devinfo, last_inst, ELK_GENERAL_REGISTER_FILE, ELK_REGISTER_TYPE_D);
509 
510    EXPECT_FALSE(validate(p));
511 
512    clear_instructions(p);
513 
514    elk_ADD(p, g0, g0, g0);
515    elk_inst_set_exec_size(&devinfo, last_inst, ELK_EXECUTE_4);
516    elk_inst_set_dst_da1_subreg_nr(&devinfo, last_inst, 8);
517    elk_inst_set_dst_hstride(&devinfo, last_inst, ELK_HORIZONTAL_STRIDE_2);
518    elk_inst_set_dst_file_type(&devinfo, last_inst, ELK_GENERAL_REGISTER_FILE, ELK_REGISTER_TYPE_W);
519    elk_inst_set_src0_file_type(&devinfo, last_inst, ELK_GENERAL_REGISTER_FILE, ELK_REGISTER_TYPE_D);
520    elk_inst_set_src0_vstride(&devinfo, last_inst, ELK_VERTICAL_STRIDE_4);
521    elk_inst_set_src0_width(&devinfo, last_inst, ELK_WIDTH_4);
522    elk_inst_set_src0_hstride(&devinfo, last_inst, ELK_HORIZONTAL_STRIDE_1);
523    elk_inst_set_src1_file_type(&devinfo, last_inst, ELK_GENERAL_REGISTER_FILE, ELK_REGISTER_TYPE_D);
524    elk_inst_set_src1_vstride(&devinfo, last_inst, ELK_VERTICAL_STRIDE_4);
525    elk_inst_set_src1_width(&devinfo, last_inst, ELK_WIDTH_4);
526    elk_inst_set_src1_hstride(&devinfo, last_inst, ELK_HORIZONTAL_STRIDE_1);
527 
528    EXPECT_TRUE(validate(p));
529 }
530 
531 /* ExecSize must be greater than or equal to Width. */
TEST_P(validation_test,exec_size_less_than_width)532 TEST_P(validation_test, exec_size_less_than_width)
533 {
534    elk_ADD(p, g0, g0, g0);
535    elk_inst_set_src0_width(&devinfo, last_inst, ELK_WIDTH_16);
536 
537    EXPECT_FALSE(validate(p));
538 
539    clear_instructions(p);
540 
541    elk_ADD(p, g0, g0, g0);
542    elk_inst_set_src1_width(&devinfo, last_inst, ELK_WIDTH_16);
543 
544    EXPECT_FALSE(validate(p));
545 }
546 
547 /* If ExecSize = Width and HorzStride ≠ 0,
548  * VertStride must be set to Width * HorzStride.
549  */
TEST_P(validation_test,vertical_stride_is_width_by_horizontal_stride)550 TEST_P(validation_test, vertical_stride_is_width_by_horizontal_stride)
551 {
552    elk_ADD(p, g0, g0, g0);
553    elk_inst_set_src0_vstride(&devinfo, last_inst, ELK_VERTICAL_STRIDE_4);
554 
555    EXPECT_FALSE(validate(p));
556 
557    clear_instructions(p);
558 
559    elk_ADD(p, g0, g0, g0);
560    elk_inst_set_src1_vstride(&devinfo, last_inst, ELK_VERTICAL_STRIDE_4);
561 
562    EXPECT_FALSE(validate(p));
563 }
564 
565 /* If Width = 1, HorzStride must be 0 regardless of the values
566  * of ExecSize and VertStride.
567  */
TEST_P(validation_test,horizontal_stride_must_be_0_if_width_is_1)568 TEST_P(validation_test, horizontal_stride_must_be_0_if_width_is_1)
569 {
570    elk_ADD(p, g0, g0, g0);
571    elk_inst_set_src0_vstride(&devinfo, last_inst, ELK_VERTICAL_STRIDE_0);
572    elk_inst_set_src0_width(&devinfo, last_inst, ELK_WIDTH_1);
573    elk_inst_set_src0_hstride(&devinfo, last_inst, ELK_HORIZONTAL_STRIDE_1);
574 
575    EXPECT_FALSE(validate(p));
576 
577    clear_instructions(p);
578 
579    elk_ADD(p, g0, g0, g0);
580    elk_inst_set_src1_vstride(&devinfo, last_inst, ELK_VERTICAL_STRIDE_0);
581    elk_inst_set_src1_width(&devinfo, last_inst, ELK_WIDTH_1);
582    elk_inst_set_src1_hstride(&devinfo, last_inst, ELK_HORIZONTAL_STRIDE_1);
583 
584    EXPECT_FALSE(validate(p));
585 }
586 
587 /* If ExecSize = Width = 1, both VertStride and HorzStride must be 0. */
TEST_P(validation_test,scalar_region_must_be_0_1_0)588 TEST_P(validation_test, scalar_region_must_be_0_1_0)
589 {
590    struct elk_reg g0_0 = elk_vec1_grf(0, 0);
591 
592    elk_ADD(p, g0, g0, g0_0);
593    elk_inst_set_exec_size(&devinfo, last_inst, ELK_EXECUTE_1);
594    elk_inst_set_src0_vstride(&devinfo, last_inst, ELK_VERTICAL_STRIDE_1);
595    elk_inst_set_src0_width(&devinfo, last_inst, ELK_WIDTH_1);
596    elk_inst_set_src0_hstride(&devinfo, last_inst, ELK_HORIZONTAL_STRIDE_0);
597 
598    EXPECT_FALSE(validate(p));
599 
600    clear_instructions(p);
601 
602    elk_ADD(p, g0, g0_0, g0);
603    elk_inst_set_exec_size(&devinfo, last_inst, ELK_EXECUTE_1);
604    elk_inst_set_src1_vstride(&devinfo, last_inst, ELK_VERTICAL_STRIDE_1);
605    elk_inst_set_src1_width(&devinfo, last_inst, ELK_WIDTH_1);
606    elk_inst_set_src1_hstride(&devinfo, last_inst, ELK_HORIZONTAL_STRIDE_0);
607 
608    EXPECT_FALSE(validate(p));
609 }
610 
611 /* If VertStride = HorzStride = 0, Width must be 1 regardless of the value
612  * of ExecSize.
613  */
TEST_P(validation_test,zero_stride_implies_0_1_0)614 TEST_P(validation_test, zero_stride_implies_0_1_0)
615 {
616    elk_ADD(p, g0, g0, g0);
617    elk_inst_set_src0_vstride(&devinfo, last_inst, ELK_VERTICAL_STRIDE_0);
618    elk_inst_set_src0_width(&devinfo, last_inst, ELK_WIDTH_2);
619    elk_inst_set_src0_hstride(&devinfo, last_inst, ELK_HORIZONTAL_STRIDE_0);
620 
621    EXPECT_FALSE(validate(p));
622 
623    clear_instructions(p);
624 
625    elk_ADD(p, g0, g0, g0);
626    elk_inst_set_src1_vstride(&devinfo, last_inst, ELK_VERTICAL_STRIDE_0);
627    elk_inst_set_src1_width(&devinfo, last_inst, ELK_WIDTH_2);
628    elk_inst_set_src1_hstride(&devinfo, last_inst, ELK_HORIZONTAL_STRIDE_0);
629 
630    EXPECT_FALSE(validate(p));
631 }
632 
633 /* Dst.HorzStride must not be 0. */
TEST_P(validation_test,dst_horizontal_stride_0)634 TEST_P(validation_test, dst_horizontal_stride_0)
635 {
636    elk_ADD(p, g0, g0, g0);
637    elk_inst_set_dst_hstride(&devinfo, last_inst, ELK_HORIZONTAL_STRIDE_0);
638 
639    EXPECT_FALSE(validate(p));
640 
641    clear_instructions(p);
642 
643    elk_set_default_access_mode(p, ELK_ALIGN_16);
644 
645    elk_ADD(p, g0, g0, g0);
646    elk_inst_set_dst_hstride(&devinfo, last_inst, ELK_HORIZONTAL_STRIDE_0);
647 
648    EXPECT_FALSE(validate(p));
649 }
650 
651 /* VertStride must be used to cross ELK_GENERAL_REGISTER_FILE register boundaries. This rule implies
652  * that elements within a 'Width' cannot cross ELK_GENERAL_REGISTER_FILE boundaries.
653  */
TEST_P(validation_test,must_not_cross_grf_boundary_in_a_width)654 TEST_P(validation_test, must_not_cross_grf_boundary_in_a_width)
655 {
656    elk_ADD(p, g0, g0, g0);
657    elk_inst_set_src0_da1_subreg_nr(&devinfo, last_inst, 4);
658 
659    EXPECT_FALSE(validate(p));
660 
661    clear_instructions(p);
662 
663    elk_ADD(p, g0, g0, g0);
664    elk_inst_set_src1_da1_subreg_nr(&devinfo, last_inst, 4);
665 
666    EXPECT_FALSE(validate(p));
667 
668    clear_instructions(p);
669 
670    elk_ADD(p, g0, g0, g0);
671    elk_inst_set_src0_vstride(&devinfo, last_inst, ELK_VERTICAL_STRIDE_4);
672    elk_inst_set_src0_width(&devinfo, last_inst, ELK_WIDTH_4);
673    elk_inst_set_src0_hstride(&devinfo, last_inst, ELK_HORIZONTAL_STRIDE_2);
674 
675    EXPECT_FALSE(validate(p));
676 
677    clear_instructions(p);
678 
679    elk_ADD(p, g0, g0, g0);
680    elk_inst_set_src1_vstride(&devinfo, last_inst, ELK_VERTICAL_STRIDE_4);
681    elk_inst_set_src1_width(&devinfo, last_inst, ELK_WIDTH_4);
682    elk_inst_set_src1_hstride(&devinfo, last_inst, ELK_HORIZONTAL_STRIDE_2);
683 
684    EXPECT_FALSE(validate(p));
685 }
686 
687 /* Destination Horizontal must be 1 in Align16 */
TEST_P(validation_test,dst_hstride_on_align16_must_be_1)688 TEST_P(validation_test, dst_hstride_on_align16_must_be_1)
689 {
690    elk_set_default_access_mode(p, ELK_ALIGN_16);
691 
692    elk_ADD(p, g0, g0, g0);
693    elk_inst_set_dst_hstride(&devinfo, last_inst, ELK_HORIZONTAL_STRIDE_2);
694 
695    EXPECT_FALSE(validate(p));
696 
697    clear_instructions(p);
698 
699    elk_ADD(p, g0, g0, g0);
700    elk_inst_set_dst_hstride(&devinfo, last_inst, ELK_HORIZONTAL_STRIDE_1);
701 
702    EXPECT_TRUE(validate(p));
703 }
704 
705 /* VertStride must be 0 or 4 in Align16 */
TEST_P(validation_test,vstride_on_align16_must_be_0_or_4)706 TEST_P(validation_test, vstride_on_align16_must_be_0_or_4)
707 {
708    const struct {
709       enum elk_vertical_stride vstride;
710       bool expected_result;
711    } vstride[] = {
712       { ELK_VERTICAL_STRIDE_0, true },
713       { ELK_VERTICAL_STRIDE_1, false },
714       { ELK_VERTICAL_STRIDE_2, devinfo.verx10 >= 75 },
715       { ELK_VERTICAL_STRIDE_4, true },
716       { ELK_VERTICAL_STRIDE_8, false },
717       { ELK_VERTICAL_STRIDE_16, false },
718       { ELK_VERTICAL_STRIDE_32, false },
719       { ELK_VERTICAL_STRIDE_ONE_DIMENSIONAL, false },
720    };
721 
722    elk_set_default_access_mode(p, ELK_ALIGN_16);
723 
724    for (unsigned i = 0; i < ARRAY_SIZE(vstride); i++) {
725       elk_ADD(p, g0, g0, g0);
726       elk_inst_set_src0_vstride(&devinfo, last_inst, vstride[i].vstride);
727 
728       EXPECT_EQ(vstride[i].expected_result, validate(p));
729 
730       clear_instructions(p);
731    }
732 
733    for (unsigned i = 0; i < ARRAY_SIZE(vstride); i++) {
734       elk_ADD(p, g0, g0, g0);
735       elk_inst_set_src1_vstride(&devinfo, last_inst, vstride[i].vstride);
736 
737       EXPECT_EQ(vstride[i].expected_result, validate(p));
738 
739       clear_instructions(p);
740    }
741 }
742 
743 /* In Direct Addressing mode, a source cannot span more than 2 adjacent ELK_GENERAL_REGISTER_FILE
744  * registers.
745  */
TEST_P(validation_test,source_cannot_span_more_than_2_registers)746 TEST_P(validation_test, source_cannot_span_more_than_2_registers)
747 {
748    elk_ADD(p, g0, g0, g0);
749    elk_inst_set_exec_size(&devinfo, last_inst, ELK_EXECUTE_32);
750    elk_inst_set_dst_file_type(&devinfo, last_inst, ELK_GENERAL_REGISTER_FILE, ELK_REGISTER_TYPE_W);
751    elk_inst_set_src0_file_type(&devinfo, last_inst, ELK_GENERAL_REGISTER_FILE, ELK_REGISTER_TYPE_W);
752    elk_inst_set_src1_file_type(&devinfo, last_inst, ELK_GENERAL_REGISTER_FILE, ELK_REGISTER_TYPE_W);
753    elk_inst_set_src1_vstride(&devinfo, last_inst, ELK_VERTICAL_STRIDE_16);
754    elk_inst_set_src1_width(&devinfo, last_inst, ELK_WIDTH_8);
755    elk_inst_set_src1_hstride(&devinfo, last_inst, ELK_HORIZONTAL_STRIDE_2);
756 
757    EXPECT_FALSE(validate(p));
758 
759    clear_instructions(p);
760 
761    elk_ADD(p, g0, g0, g0);
762    elk_inst_set_exec_size(&devinfo, last_inst, ELK_EXECUTE_16);
763    elk_inst_set_dst_file_type(&devinfo, last_inst, ELK_GENERAL_REGISTER_FILE, ELK_REGISTER_TYPE_W);
764    elk_inst_set_src0_file_type(&devinfo, last_inst, ELK_GENERAL_REGISTER_FILE, ELK_REGISTER_TYPE_W);
765    elk_inst_set_src1_file_type(&devinfo, last_inst, ELK_GENERAL_REGISTER_FILE, ELK_REGISTER_TYPE_W);
766    elk_inst_set_src1_vstride(&devinfo, last_inst, ELK_VERTICAL_STRIDE_16);
767    elk_inst_set_src1_width(&devinfo, last_inst, ELK_WIDTH_8);
768    elk_inst_set_src1_hstride(&devinfo, last_inst, ELK_HORIZONTAL_STRIDE_2);
769    elk_inst_set_src1_da1_subreg_nr(&devinfo, last_inst, 2);
770 
771    EXPECT_TRUE(validate(p));
772 
773    clear_instructions(p);
774 
775    elk_ADD(p, g0, g0, g0);
776    elk_inst_set_exec_size(&devinfo, last_inst, ELK_EXECUTE_16);
777 
778    EXPECT_TRUE(validate(p));
779 }
780 
781 /* A destination cannot span more than 2 adjacent ELK_GENERAL_REGISTER_FILE registers. */
TEST_P(validation_test,destination_cannot_span_more_than_2_registers)782 TEST_P(validation_test, destination_cannot_span_more_than_2_registers)
783 {
784    elk_ADD(p, g0, g0, g0);
785    elk_inst_set_exec_size(&devinfo, last_inst, ELK_EXECUTE_32);
786    elk_inst_set_dst_hstride(&devinfo, last_inst, ELK_HORIZONTAL_STRIDE_2);
787    elk_inst_set_dst_file_type(&devinfo, last_inst, ELK_GENERAL_REGISTER_FILE, ELK_REGISTER_TYPE_W);
788    elk_inst_set_src0_file_type(&devinfo, last_inst, ELK_GENERAL_REGISTER_FILE, ELK_REGISTER_TYPE_W);
789    elk_inst_set_src1_file_type(&devinfo, last_inst, ELK_GENERAL_REGISTER_FILE, ELK_REGISTER_TYPE_W);
790 
791    EXPECT_FALSE(validate(p));
792 
793    clear_instructions(p);
794 
795    elk_ADD(p, g0, g0, g0);
796    elk_inst_set_exec_size(&devinfo, last_inst, ELK_EXECUTE_8);
797    elk_inst_set_dst_da1_subreg_nr(&devinfo, last_inst, 6);
798    elk_inst_set_dst_hstride(&devinfo, last_inst, ELK_HORIZONTAL_STRIDE_4);
799    elk_inst_set_dst_file_type(&devinfo, last_inst, ELK_GENERAL_REGISTER_FILE, ELK_REGISTER_TYPE_W);
800    elk_inst_set_src0_file_type(&devinfo, last_inst, ELK_GENERAL_REGISTER_FILE, ELK_REGISTER_TYPE_W);
801    elk_inst_set_src0_vstride(&devinfo, last_inst, ELK_VERTICAL_STRIDE_16);
802    elk_inst_set_src0_width(&devinfo, last_inst, ELK_WIDTH_4);
803    elk_inst_set_src0_hstride(&devinfo, last_inst, ELK_HORIZONTAL_STRIDE_1);
804    elk_inst_set_src1_file_type(&devinfo, last_inst, ELK_GENERAL_REGISTER_FILE, ELK_REGISTER_TYPE_W);
805    elk_inst_set_src1_vstride(&devinfo, last_inst, ELK_VERTICAL_STRIDE_16);
806    elk_inst_set_src1_width(&devinfo, last_inst, ELK_WIDTH_4);
807    elk_inst_set_src1_hstride(&devinfo, last_inst, ELK_HORIZONTAL_STRIDE_1);
808 
809    EXPECT_TRUE(validate(p));
810 }
811 
TEST_P(validation_test,src_region_spans_two_regs_dst_region_spans_one)812 TEST_P(validation_test, src_region_spans_two_regs_dst_region_spans_one)
813 {
814    /* Writes to dest are to the lower OWord */
815    elk_ADD(p, g0, g0, g0);
816    elk_inst_set_dst_file_type(&devinfo, last_inst, ELK_GENERAL_REGISTER_FILE, ELK_REGISTER_TYPE_W);
817    elk_inst_set_src0_file_type(&devinfo, last_inst, ELK_GENERAL_REGISTER_FILE, ELK_REGISTER_TYPE_W);
818    elk_inst_set_src1_file_type(&devinfo, last_inst, ELK_GENERAL_REGISTER_FILE, ELK_REGISTER_TYPE_W);
819    elk_inst_set_src1_vstride(&devinfo, last_inst, ELK_VERTICAL_STRIDE_16);
820    elk_inst_set_src1_width(&devinfo, last_inst, ELK_WIDTH_4);
821    elk_inst_set_src1_hstride(&devinfo, last_inst, ELK_HORIZONTAL_STRIDE_2);
822 
823    EXPECT_TRUE(validate(p));
824 
825    clear_instructions(p);
826 
827    /* Writes to dest are to the upper OWord */
828    elk_ADD(p, g0, g0, g0);
829    elk_inst_set_dst_da1_subreg_nr(&devinfo, last_inst, 16);
830    elk_inst_set_dst_file_type(&devinfo, last_inst, ELK_GENERAL_REGISTER_FILE, ELK_REGISTER_TYPE_W);
831    elk_inst_set_src0_file_type(&devinfo, last_inst, ELK_GENERAL_REGISTER_FILE, ELK_REGISTER_TYPE_W);
832    elk_inst_set_src1_file_type(&devinfo, last_inst, ELK_GENERAL_REGISTER_FILE, ELK_REGISTER_TYPE_W);
833    elk_inst_set_src1_vstride(&devinfo, last_inst, ELK_VERTICAL_STRIDE_16);
834    elk_inst_set_src1_width(&devinfo, last_inst, ELK_WIDTH_4);
835    elk_inst_set_src1_hstride(&devinfo, last_inst, ELK_HORIZONTAL_STRIDE_2);
836 
837    EXPECT_TRUE(validate(p));
838 
839    clear_instructions(p);
840 
841    /* Writes to dest are evenly split between OWords */
842    elk_ADD(p, g0, g0, g0);
843    elk_inst_set_exec_size(&devinfo, last_inst, ELK_EXECUTE_16);
844    elk_inst_set_dst_file_type(&devinfo, last_inst, ELK_GENERAL_REGISTER_FILE, ELK_REGISTER_TYPE_W);
845    elk_inst_set_src0_file_type(&devinfo, last_inst, ELK_GENERAL_REGISTER_FILE, ELK_REGISTER_TYPE_W);
846    elk_inst_set_src1_file_type(&devinfo, last_inst, ELK_GENERAL_REGISTER_FILE, ELK_REGISTER_TYPE_W);
847    elk_inst_set_src1_vstride(&devinfo, last_inst, ELK_VERTICAL_STRIDE_16);
848    elk_inst_set_src1_width(&devinfo, last_inst, ELK_WIDTH_8);
849    elk_inst_set_src1_hstride(&devinfo, last_inst, ELK_HORIZONTAL_STRIDE_2);
850 
851    EXPECT_TRUE(validate(p));
852 
853    clear_instructions(p);
854 
855    /* Writes to dest are uneven between OWords */
856    elk_ADD(p, g0, g0, g0);
857    elk_inst_set_exec_size(&devinfo, last_inst, ELK_EXECUTE_4);
858    elk_inst_set_dst_da1_subreg_nr(&devinfo, last_inst, 10);
859    elk_inst_set_dst_file_type(&devinfo, last_inst, ELK_GENERAL_REGISTER_FILE, ELK_REGISTER_TYPE_W);
860    elk_inst_set_src0_file_type(&devinfo, last_inst, ELK_GENERAL_REGISTER_FILE, ELK_REGISTER_TYPE_W);
861    elk_inst_set_src0_vstride(&devinfo, last_inst, ELK_VERTICAL_STRIDE_4);
862    elk_inst_set_src0_width(&devinfo, last_inst, ELK_WIDTH_4);
863    elk_inst_set_src0_hstride(&devinfo, last_inst, ELK_HORIZONTAL_STRIDE_1);
864    elk_inst_set_src1_file_type(&devinfo, last_inst, ELK_GENERAL_REGISTER_FILE, ELK_REGISTER_TYPE_W);
865    elk_inst_set_src1_vstride(&devinfo, last_inst, ELK_VERTICAL_STRIDE_16);
866    elk_inst_set_src1_width(&devinfo, last_inst, ELK_WIDTH_2);
867    elk_inst_set_src1_hstride(&devinfo, last_inst, ELK_HORIZONTAL_STRIDE_1);
868 
869    EXPECT_FALSE(validate(p));
870 }
871 
TEST_P(validation_test,dst_elements_must_be_evenly_split_between_registers)872 TEST_P(validation_test, dst_elements_must_be_evenly_split_between_registers)
873 {
874    elk_ADD(p, g0, g0, g0);
875    elk_inst_set_dst_da1_subreg_nr(&devinfo, last_inst, 4);
876 
877    EXPECT_FALSE(validate(p));
878 
879    clear_instructions(p);
880 
881    elk_ADD(p, g0, g0, g0);
882    elk_inst_set_exec_size(&devinfo, last_inst, ELK_EXECUTE_16);
883 
884    EXPECT_TRUE(validate(p));
885 
886    clear_instructions(p);
887 
888    if (devinfo.ver >= 6) {
889       elk_gfx6_math(p, g0, ELK_MATH_FUNCTION_SIN, g0, null);
890 
891       EXPECT_TRUE(validate(p));
892 
893       clear_instructions(p);
894 
895       elk_gfx6_math(p, g0, ELK_MATH_FUNCTION_SIN, g0, null);
896       elk_inst_set_dst_da1_subreg_nr(&devinfo, last_inst, 4);
897 
898       EXPECT_FALSE(validate(p));
899    }
900 }
901 
TEST_P(validation_test,two_src_two_dst_source_offsets_must_be_same)902 TEST_P(validation_test, two_src_two_dst_source_offsets_must_be_same)
903 {
904    elk_ADD(p, g0, g0, g0);
905    elk_inst_set_exec_size(&devinfo, last_inst, ELK_EXECUTE_4);
906    elk_inst_set_dst_hstride(&devinfo, last_inst, ELK_HORIZONTAL_STRIDE_4);
907    elk_inst_set_src0_da1_subreg_nr(&devinfo, last_inst, 16);
908    elk_inst_set_src0_vstride(&devinfo, last_inst, ELK_VERTICAL_STRIDE_2);
909    elk_inst_set_src0_width(&devinfo, last_inst, ELK_WIDTH_1);
910    elk_inst_set_src0_hstride(&devinfo, last_inst, ELK_HORIZONTAL_STRIDE_0);
911    elk_inst_set_src1_vstride(&devinfo, last_inst, ELK_VERTICAL_STRIDE_4);
912    elk_inst_set_src1_width(&devinfo, last_inst, ELK_WIDTH_4);
913    elk_inst_set_src1_hstride(&devinfo, last_inst, ELK_HORIZONTAL_STRIDE_1);
914 
915   if (devinfo.ver <= 7) {
916       EXPECT_FALSE(validate(p));
917    } else {
918       EXPECT_TRUE(validate(p));
919    }
920 
921    clear_instructions(p);
922 
923    elk_ADD(p, g0, g0, g0);
924    elk_inst_set_exec_size(&devinfo, last_inst, ELK_EXECUTE_4);
925    elk_inst_set_dst_hstride(&devinfo, last_inst, ELK_HORIZONTAL_STRIDE_4);
926    elk_inst_set_src0_vstride(&devinfo, last_inst, ELK_VERTICAL_STRIDE_4);
927    elk_inst_set_src0_width(&devinfo, last_inst, ELK_WIDTH_1);
928    elk_inst_set_src0_hstride(&devinfo, last_inst, ELK_HORIZONTAL_STRIDE_0);
929    elk_inst_set_src1_vstride(&devinfo, last_inst, ELK_VERTICAL_STRIDE_8);
930    elk_inst_set_src1_width(&devinfo, last_inst, ELK_WIDTH_2);
931    elk_inst_set_src1_hstride(&devinfo, last_inst, ELK_HORIZONTAL_STRIDE_1);
932 
933    EXPECT_TRUE(validate(p));
934 }
935 
TEST_P(validation_test,two_src_two_dst_each_dst_must_be_derived_from_one_src)936 TEST_P(validation_test, two_src_two_dst_each_dst_must_be_derived_from_one_src)
937 {
938    elk_MOV(p, g0, g0);
939    elk_inst_set_exec_size(&devinfo, last_inst, ELK_EXECUTE_16);
940    elk_inst_set_dst_file_type(&devinfo, last_inst, ELK_GENERAL_REGISTER_FILE, ELK_REGISTER_TYPE_W);
941    elk_inst_set_dst_hstride(&devinfo, last_inst, ELK_HORIZONTAL_STRIDE_2);
942    elk_inst_set_src0_file_type(&devinfo, last_inst, ELK_GENERAL_REGISTER_FILE, ELK_REGISTER_TYPE_W);
943    elk_inst_set_src0_da1_subreg_nr(&devinfo, last_inst, 8);
944    elk_inst_set_src0_vstride(&devinfo, last_inst, ELK_VERTICAL_STRIDE_4);
945    elk_inst_set_src0_width(&devinfo, last_inst, ELK_WIDTH_4);
946    elk_inst_set_src0_hstride(&devinfo, last_inst, ELK_HORIZONTAL_STRIDE_1);
947 
948    if (devinfo.ver <= 7) {
949       EXPECT_FALSE(validate(p));
950    } else {
951       EXPECT_TRUE(validate(p));
952    }
953 
954    clear_instructions(p);
955 
956    elk_MOV(p, g0, g0);
957    elk_inst_set_dst_da1_subreg_nr(&devinfo, last_inst, 16);
958    elk_inst_set_src0_da1_subreg_nr(&devinfo, last_inst, 8);
959    elk_inst_set_src0_vstride(&devinfo, last_inst, ELK_VERTICAL_STRIDE_2);
960    elk_inst_set_src0_width(&devinfo, last_inst, ELK_WIDTH_2);
961    elk_inst_set_src0_hstride(&devinfo, last_inst, ELK_HORIZONTAL_STRIDE_1);
962 
963    if (devinfo.ver <= 7) {
964       EXPECT_FALSE(validate(p));
965    } else {
966       EXPECT_TRUE(validate(p));
967    }
968 }
969 
TEST_P(validation_test,one_src_two_dst)970 TEST_P(validation_test, one_src_two_dst)
971 {
972    struct elk_reg g0_0 = elk_vec1_grf(0, 0);
973 
974    elk_ADD(p, g0, g0_0, g0_0);
975    elk_inst_set_exec_size(&devinfo, last_inst, ELK_EXECUTE_16);
976 
977    EXPECT_TRUE(validate(p));
978 
979    clear_instructions(p);
980 
981    elk_ADD(p, g0, g0, g0);
982    elk_inst_set_exec_size(&devinfo, last_inst, ELK_EXECUTE_16);
983    elk_inst_set_dst_file_type(&devinfo, last_inst, ELK_GENERAL_REGISTER_FILE, ELK_REGISTER_TYPE_D);
984    elk_inst_set_src0_file_type(&devinfo, last_inst, ELK_GENERAL_REGISTER_FILE, ELK_REGISTER_TYPE_W);
985    elk_inst_set_src1_file_type(&devinfo, last_inst, ELK_GENERAL_REGISTER_FILE, ELK_REGISTER_TYPE_D);
986 
987    EXPECT_TRUE(validate(p));
988 
989    clear_instructions(p);
990 
991    elk_ADD(p, g0, g0, g0);
992    elk_inst_set_exec_size(&devinfo, last_inst, ELK_EXECUTE_16);
993    elk_inst_set_dst_file_type(&devinfo, last_inst, ELK_GENERAL_REGISTER_FILE, ELK_REGISTER_TYPE_D);
994    elk_inst_set_src0_file_type(&devinfo, last_inst, ELK_GENERAL_REGISTER_FILE, ELK_REGISTER_TYPE_D);
995    elk_inst_set_src1_file_type(&devinfo, last_inst, ELK_GENERAL_REGISTER_FILE, ELK_REGISTER_TYPE_W);
996 
997    if (devinfo.ver >= 8) {
998       EXPECT_TRUE(validate(p));
999    } else {
1000       EXPECT_FALSE(validate(p));
1001    }
1002 
1003    clear_instructions(p);
1004 
1005    elk_ADD(p, g0, g0, g0);
1006    elk_inst_set_exec_size(&devinfo, last_inst, ELK_EXECUTE_16);
1007    elk_inst_set_dst_file_type(&devinfo, last_inst, ELK_GENERAL_REGISTER_FILE, ELK_REGISTER_TYPE_D);
1008    elk_inst_set_src0_file_type(&devinfo, last_inst, ELK_GENERAL_REGISTER_FILE, ELK_REGISTER_TYPE_W);
1009    elk_inst_set_src1_file_type(&devinfo, last_inst, ELK_GENERAL_REGISTER_FILE, ELK_REGISTER_TYPE_W);
1010 
1011    if (devinfo.ver >= 8) {
1012       EXPECT_TRUE(validate(p));
1013    } else {
1014       EXPECT_FALSE(validate(p));
1015    }
1016 
1017    clear_instructions(p);
1018 
1019    elk_ADD(p, g0, g0, g0);
1020    elk_inst_set_exec_size(&devinfo, last_inst, ELK_EXECUTE_16);
1021    elk_inst_set_dst_hstride(&devinfo, last_inst, ELK_HORIZONTAL_STRIDE_2);
1022    elk_inst_set_dst_file_type(&devinfo, last_inst, ELK_GENERAL_REGISTER_FILE, ELK_REGISTER_TYPE_W);
1023    elk_inst_set_src0_file_type(&devinfo, last_inst, ELK_GENERAL_REGISTER_FILE, ELK_REGISTER_TYPE_W);
1024    elk_inst_set_src1_file_type(&devinfo, last_inst, ELK_GENERAL_REGISTER_FILE, ELK_REGISTER_TYPE_W);
1025    elk_inst_set_src1_vstride(&devinfo, last_inst, ELK_VERTICAL_STRIDE_0);
1026    elk_inst_set_src1_width(&devinfo, last_inst, ELK_WIDTH_1);
1027    elk_inst_set_src1_hstride(&devinfo, last_inst, ELK_HORIZONTAL_STRIDE_0);
1028 
1029    if (devinfo.ver >= 8) {
1030       EXPECT_TRUE(validate(p));
1031    } else {
1032       EXPECT_FALSE(validate(p));
1033    }
1034 
1035    clear_instructions(p);
1036 
1037    elk_ADD(p, g0, g0, g0);
1038    elk_inst_set_exec_size(&devinfo, last_inst, ELK_EXECUTE_16);
1039    elk_inst_set_dst_hstride(&devinfo, last_inst, ELK_HORIZONTAL_STRIDE_2);
1040    elk_inst_set_dst_file_type(&devinfo, last_inst, ELK_GENERAL_REGISTER_FILE, ELK_REGISTER_TYPE_W);
1041    elk_inst_set_src0_file_type(&devinfo, last_inst, ELK_GENERAL_REGISTER_FILE, ELK_REGISTER_TYPE_W);
1042    elk_inst_set_src0_vstride(&devinfo, last_inst, ELK_VERTICAL_STRIDE_0);
1043    elk_inst_set_src0_width(&devinfo, last_inst, ELK_WIDTH_1);
1044    elk_inst_set_src0_hstride(&devinfo, last_inst, ELK_HORIZONTAL_STRIDE_0);
1045    elk_inst_set_src1_file_type(&devinfo, last_inst, ELK_GENERAL_REGISTER_FILE, ELK_REGISTER_TYPE_W);
1046 
1047    if (devinfo.ver >= 8) {
1048       EXPECT_TRUE(validate(p));
1049    } else {
1050       EXPECT_FALSE(validate(p));
1051    }
1052 }
1053 
TEST_P(validation_test,packed_byte_destination)1054 TEST_P(validation_test, packed_byte_destination)
1055 {
1056    static const struct {
1057       enum elk_reg_type dst_type;
1058       enum elk_reg_type src_type;
1059       bool neg, abs, sat;
1060       bool expected_result;
1061    } move[] = {
1062       { ELK_REGISTER_TYPE_UB, ELK_REGISTER_TYPE_UB, 0, 0, 0, true },
1063       { ELK_REGISTER_TYPE_B , ELK_REGISTER_TYPE_B , 0, 0, 0, true },
1064       { ELK_REGISTER_TYPE_UB, ELK_REGISTER_TYPE_B , 0, 0, 0, true },
1065       { ELK_REGISTER_TYPE_B , ELK_REGISTER_TYPE_UB, 0, 0, 0, true },
1066 
1067       { ELK_REGISTER_TYPE_UB, ELK_REGISTER_TYPE_UB, 1, 0, 0, false },
1068       { ELK_REGISTER_TYPE_B , ELK_REGISTER_TYPE_B , 1, 0, 0, false },
1069       { ELK_REGISTER_TYPE_UB, ELK_REGISTER_TYPE_B , 1, 0, 0, false },
1070       { ELK_REGISTER_TYPE_B , ELK_REGISTER_TYPE_UB, 1, 0, 0, false },
1071 
1072       { ELK_REGISTER_TYPE_UB, ELK_REGISTER_TYPE_UB, 0, 1, 0, false },
1073       { ELK_REGISTER_TYPE_B , ELK_REGISTER_TYPE_B , 0, 1, 0, false },
1074       { ELK_REGISTER_TYPE_UB, ELK_REGISTER_TYPE_B , 0, 1, 0, false },
1075       { ELK_REGISTER_TYPE_B , ELK_REGISTER_TYPE_UB, 0, 1, 0, false },
1076 
1077       { ELK_REGISTER_TYPE_UB, ELK_REGISTER_TYPE_UB, 0, 0, 1, false },
1078       { ELK_REGISTER_TYPE_B , ELK_REGISTER_TYPE_B , 0, 0, 1, false },
1079       { ELK_REGISTER_TYPE_UB, ELK_REGISTER_TYPE_B , 0, 0, 1, false },
1080       { ELK_REGISTER_TYPE_B , ELK_REGISTER_TYPE_UB, 0, 0, 1, false },
1081 
1082       { ELK_REGISTER_TYPE_UB, ELK_REGISTER_TYPE_UW, 0, 0, 0, false },
1083       { ELK_REGISTER_TYPE_B , ELK_REGISTER_TYPE_W , 0, 0, 0, false },
1084       { ELK_REGISTER_TYPE_UB, ELK_REGISTER_TYPE_UD, 0, 0, 0, false },
1085       { ELK_REGISTER_TYPE_B , ELK_REGISTER_TYPE_D , 0, 0, 0, false },
1086    };
1087 
1088    for (unsigned i = 0; i < ARRAY_SIZE(move); i++) {
1089       elk_MOV(p, retype(g0, move[i].dst_type), retype(g0, move[i].src_type));
1090       elk_inst_set_src0_negate(&devinfo, last_inst, move[i].neg);
1091       elk_inst_set_src0_abs(&devinfo, last_inst, move[i].abs);
1092       elk_inst_set_saturate(&devinfo, last_inst, move[i].sat);
1093 
1094       EXPECT_EQ(move[i].expected_result, validate(p));
1095 
1096       clear_instructions(p);
1097    }
1098 
1099    elk_SEL(p, retype(g0, ELK_REGISTER_TYPE_UB),
1100               retype(g0, ELK_REGISTER_TYPE_UB),
1101               retype(g0, ELK_REGISTER_TYPE_UB));
1102    elk_inst_set_pred_control(&devinfo, last_inst, ELK_PREDICATE_NORMAL);
1103 
1104    EXPECT_FALSE(validate(p));
1105 
1106    clear_instructions(p);
1107 
1108    elk_SEL(p, retype(g0, ELK_REGISTER_TYPE_B),
1109               retype(g0, ELK_REGISTER_TYPE_B),
1110               retype(g0, ELK_REGISTER_TYPE_B));
1111    elk_inst_set_pred_control(&devinfo, last_inst, ELK_PREDICATE_NORMAL);
1112 
1113    EXPECT_FALSE(validate(p));
1114 }
1115 
TEST_P(validation_test,byte_destination_relaxed_alignment)1116 TEST_P(validation_test, byte_destination_relaxed_alignment)
1117 {
1118    elk_SEL(p, retype(g0, ELK_REGISTER_TYPE_B),
1119               retype(g0, ELK_REGISTER_TYPE_W),
1120               retype(g0, ELK_REGISTER_TYPE_W));
1121    elk_inst_set_pred_control(&devinfo, last_inst, ELK_PREDICATE_NORMAL);
1122    elk_inst_set_dst_hstride(&devinfo, last_inst, ELK_HORIZONTAL_STRIDE_2);
1123 
1124    EXPECT_TRUE(validate(p));
1125 
1126    clear_instructions(p);
1127 
1128    elk_SEL(p, retype(g0, ELK_REGISTER_TYPE_B),
1129               retype(g0, ELK_REGISTER_TYPE_W),
1130               retype(g0, ELK_REGISTER_TYPE_W));
1131    elk_inst_set_pred_control(&devinfo, last_inst, ELK_PREDICATE_NORMAL);
1132    elk_inst_set_dst_hstride(&devinfo, last_inst, ELK_HORIZONTAL_STRIDE_2);
1133    elk_inst_set_dst_da1_subreg_nr(&devinfo, last_inst, 1);
1134 
1135    if (devinfo.verx10 >= 45) {
1136       EXPECT_TRUE(validate(p));
1137    } else {
1138       EXPECT_FALSE(validate(p));
1139    }
1140 }
1141 
TEST_P(validation_test,byte_64bit_conversion)1142 TEST_P(validation_test, byte_64bit_conversion)
1143 {
1144    static const struct {
1145       enum elk_reg_type dst_type;
1146       enum elk_reg_type src_type;
1147       unsigned dst_stride;
1148       bool expected_result;
1149    } inst[] = {
1150 #define INST(dst_type, src_type, dst_stride, expected_result)             \
1151       {                                                                   \
1152          ELK_REGISTER_TYPE_##dst_type,                                    \
1153          ELK_REGISTER_TYPE_##src_type,                                    \
1154          ELK_HORIZONTAL_STRIDE_##dst_stride,                              \
1155          expected_result,                                                 \
1156       }
1157 
1158       INST(B,   Q, 1, false),
1159       INST(B,  UQ, 1, false),
1160       INST(B,  DF, 1, false),
1161       INST(UB,  Q, 1, false),
1162       INST(UB, UQ, 1, false),
1163       INST(UB, DF, 1, false),
1164 
1165       INST(B,   Q, 2, false),
1166       INST(B,  UQ, 2, false),
1167       INST(B , DF, 2, false),
1168       INST(UB,  Q, 2, false),
1169       INST(UB, UQ, 2, false),
1170       INST(UB, DF, 2, false),
1171 
1172       INST(B,   Q, 4, false),
1173       INST(B,  UQ, 4, false),
1174       INST(B,  DF, 4, false),
1175       INST(UB,  Q, 4, false),
1176       INST(UB, UQ, 4, false),
1177       INST(UB, DF, 4, false),
1178 
1179 #undef INST
1180    };
1181 
1182    if (devinfo.ver < 8)
1183       return;
1184 
1185    for (unsigned i = 0; i < ARRAY_SIZE(inst); i++) {
1186       if (!devinfo.has_64bit_float &&
1187           inst[i].src_type == ELK_REGISTER_TYPE_DF)
1188          continue;
1189 
1190       if (!devinfo.has_64bit_int &&
1191           (inst[i].src_type == ELK_REGISTER_TYPE_Q ||
1192            inst[i].src_type == ELK_REGISTER_TYPE_UQ))
1193          continue;
1194 
1195       elk_MOV(p, retype(g0, inst[i].dst_type), retype(g0, inst[i].src_type));
1196       elk_inst_set_dst_hstride(&devinfo, last_inst, inst[i].dst_stride);
1197       EXPECT_EQ(inst[i].expected_result, validate(p));
1198 
1199       clear_instructions(p);
1200    }
1201 }
1202 
TEST_P(validation_test,half_float_conversion)1203 TEST_P(validation_test, half_float_conversion)
1204 {
1205    static const struct {
1206       enum elk_reg_type dst_type;
1207       enum elk_reg_type src_type;
1208       unsigned dst_stride;
1209       unsigned dst_subnr;
1210       bool expected_result_bdw;
1211       bool expected_result_chv;
1212    } inst[] = {
1213 #define INST(dst_type, src_type, dst_stride, dst_subnr,                     \
1214              expected_result_bdw, expected_result_chv)                      \
1215       {                                                                     \
1216          ELK_REGISTER_TYPE_##dst_type,                                      \
1217          ELK_REGISTER_TYPE_##src_type,                                      \
1218          ELK_HORIZONTAL_STRIDE_##dst_stride,                                \
1219          dst_subnr,                                                         \
1220          expected_result_bdw,                                               \
1221          expected_result_chv,                                               \
1222       }
1223 
1224       /* MOV to half-float destination */
1225       INST(HF,  B, 1, 0, false, false), /* 0 */
1226       INST(HF,  W, 1, 0, false, false),
1227       INST(HF, HF, 1, 0, true,  true),
1228       INST(HF, HF, 1, 2, true,  true),
1229       INST(HF,  D, 1, 0, false, false),
1230       INST(HF,  F, 1, 0, false, true),
1231       INST(HF,  Q, 1, 0, false, false),
1232       INST(HF,  B, 2, 0, true,  true),
1233       INST(HF,  B, 2, 2, false, false),
1234       INST(HF,  W, 2, 0, true,  true),
1235       INST(HF,  W, 2, 2, false, false), /* 10 */
1236       INST(HF, HF, 2, 0, true,  true),
1237       INST(HF, HF, 2, 2, true,  true),
1238       INST(HF,  D, 2, 0, true,  true),
1239       INST(HF,  D, 2, 2, false, false),
1240       INST(HF,  F, 2, 0, true,  true),
1241       INST(HF,  F, 2, 2, false, true),
1242       INST(HF,  Q, 2, 0, false, false),
1243       INST(HF, DF, 2, 0, false, false),
1244       INST(HF,  B, 4, 0, false, false),
1245       INST(HF,  W, 4, 0, false, false), /* 20 */
1246       INST(HF, HF, 4, 0, true,  true),
1247       INST(HF, HF, 4, 2, true,  true),
1248       INST(HF,  D, 4, 0, false, false),
1249       INST(HF,  F, 4, 0, false, false),
1250       INST(HF,  Q, 4, 0, false, false),
1251       INST(HF, DF, 4, 0, false, false),
1252 
1253       /* MOV from half-float source */
1254       INST( B, HF, 1, 0, false, false),
1255       INST( W, HF, 1, 0, false, false),
1256       INST( D, HF, 1, 0, true,  true),
1257       INST( D, HF, 1, 4, true,  true),  /* 30 */
1258       INST( F, HF, 1, 0, true,  true),
1259       INST( F, HF, 1, 4, true,  true),
1260       INST( Q, HF, 1, 0, false, false),
1261       INST(DF, HF, 1, 0, false, false),
1262       INST( B, HF, 2, 0, false, false),
1263       INST( W, HF, 2, 0, true,  true),
1264       INST( W, HF, 2, 2, false, false),
1265       INST( D, HF, 2, 0, false, false),
1266       INST( F, HF, 2, 0, true,  true),
1267       INST( B, HF, 4, 0, true,  true),  /* 40 */
1268       INST( B, HF, 4, 1, false, false),
1269       INST( W, HF, 4, 0, false, false),
1270 
1271 #undef INST
1272    };
1273 
1274    if (devinfo.ver < 8)
1275       return;
1276 
1277    for (unsigned i = 0; i < ARRAY_SIZE(inst); i++) {
1278       if (!devinfo.has_64bit_float &&
1279           (inst[i].dst_type == ELK_REGISTER_TYPE_DF ||
1280            inst[i].src_type == ELK_REGISTER_TYPE_DF))
1281          continue;
1282 
1283       if (!devinfo.has_64bit_int &&
1284           (inst[i].dst_type == ELK_REGISTER_TYPE_Q ||
1285            inst[i].dst_type == ELK_REGISTER_TYPE_UQ ||
1286            inst[i].src_type == ELK_REGISTER_TYPE_Q ||
1287            inst[i].src_type == ELK_REGISTER_TYPE_UQ))
1288          continue;
1289 
1290       elk_MOV(p, retype(g0, inst[i].dst_type), retype(g0, inst[i].src_type));
1291 
1292       elk_inst_set_exec_size(&devinfo, last_inst, ELK_EXECUTE_4);
1293 
1294       elk_inst_set_dst_hstride(&devinfo, last_inst, inst[i].dst_stride);
1295       elk_inst_set_dst_da1_subreg_nr(&devinfo, last_inst, inst[i].dst_subnr);
1296 
1297       if (inst[i].src_type == ELK_REGISTER_TYPE_B) {
1298          elk_inst_set_src0_vstride(&devinfo, last_inst, ELK_VERTICAL_STRIDE_4);
1299          elk_inst_set_src0_width(&devinfo, last_inst, ELK_WIDTH_2);
1300          elk_inst_set_src0_hstride(&devinfo, last_inst, ELK_HORIZONTAL_STRIDE_2);
1301       } else {
1302          elk_inst_set_src0_vstride(&devinfo, last_inst, ELK_VERTICAL_STRIDE_4);
1303          elk_inst_set_src0_width(&devinfo, last_inst, ELK_WIDTH_4);
1304          elk_inst_set_src0_hstride(&devinfo, last_inst, ELK_HORIZONTAL_STRIDE_1);
1305       }
1306 
1307       if (devinfo.platform == INTEL_PLATFORM_CHV) {
1308          EXPECT_EQ(inst[i].expected_result_chv, validate(p)) <<
1309             "Failing test is: " << i;
1310       } else {
1311          EXPECT_EQ(inst[i].expected_result_bdw, validate(p)) <<
1312             "Failing test is: " << i;
1313       }
1314 
1315       clear_instructions(p);
1316    }
1317 }
1318 
TEST_P(validation_test,mixed_float_source_indirect_addressing)1319 TEST_P(validation_test, mixed_float_source_indirect_addressing)
1320 {
1321    static const struct {
1322       enum elk_reg_type dst_type;
1323       enum elk_reg_type src0_type;
1324       enum elk_reg_type src1_type;
1325       unsigned dst_stride;
1326       bool dst_indirect;
1327       bool src0_indirect;
1328       bool expected_result;
1329       bool gfx125_expected_result;
1330    } inst[] = {
1331 #define INST(dst_type, src0_type, src1_type,                              \
1332              dst_stride, dst_indirect, src0_indirect, expected_result,    \
1333              gfx125_expected_result)                                      \
1334       {                                                                   \
1335          ELK_REGISTER_TYPE_##dst_type,                                    \
1336          ELK_REGISTER_TYPE_##src0_type,                                   \
1337          ELK_REGISTER_TYPE_##src1_type,                                   \
1338          ELK_HORIZONTAL_STRIDE_##dst_stride,                              \
1339          dst_indirect,                                                    \
1340          src0_indirect,                                                   \
1341          expected_result,                                                 \
1342          gfx125_expected_result,                                          \
1343       }
1344 
1345       /* Source and dest are mixed float: indirect src addressing not allowed */
1346       INST(HF,  F,  F, 2, false, false, true,  true),
1347       INST(HF,  F,  F, 2, true,  false, true,  true),
1348       INST(HF,  F,  F, 2, false, true,  false, false),
1349       INST(HF,  F,  F, 2, true,  true,  false, false),
1350       INST( F, HF,  F, 1, false, false, true,  false),
1351       INST( F, HF,  F, 1, true,  false, true,  false),
1352       INST( F, HF,  F, 1, false, true,  false, false),
1353       INST( F, HF,  F, 1, true,  true,  false, false),
1354 
1355       INST(HF, HF,  F, 2, false, false, true,  false),
1356       INST(HF, HF,  F, 2, true,  false, true,  false),
1357       INST(HF, HF,  F, 2, false, true,  false, false),
1358       INST(HF, HF,  F, 2, true,  true,  false, false),
1359       INST( F,  F, HF, 1, false, false, true,  false),
1360       INST( F,  F, HF, 1, true,  false, true,  false),
1361       INST( F,  F, HF, 1, false, true,  false, false),
1362       INST( F,  F, HF, 1, true,  true,  false, false),
1363 
1364 #undef INST
1365    };
1366 
1367    if (devinfo.ver < 8)
1368       return;
1369 
1370    for (unsigned i = 0; i < ARRAY_SIZE(inst); i++) {
1371       elk_ADD(p, retype(g0, inst[i].dst_type),
1372                  retype(g0, inst[i].src0_type),
1373                  retype(g0, inst[i].src1_type));
1374 
1375       elk_inst_set_dst_address_mode(&devinfo, last_inst, inst[i].dst_indirect);
1376       elk_inst_set_dst_hstride(&devinfo, last_inst, inst[i].dst_stride);
1377       elk_inst_set_src0_address_mode(&devinfo, last_inst, inst[i].src0_indirect);
1378 
1379       EXPECT_EQ(inst[i].expected_result, validate(p));
1380 
1381       clear_instructions(p);
1382    }
1383 }
1384 
TEST_P(validation_test,mixed_float_align1_simd16)1385 TEST_P(validation_test, mixed_float_align1_simd16)
1386 {
1387    static const struct {
1388       unsigned exec_size;
1389       enum elk_reg_type dst_type;
1390       enum elk_reg_type src0_type;
1391       enum elk_reg_type src1_type;
1392       unsigned dst_stride;
1393       bool expected_result;
1394       bool gfx125_expected_result;
1395    } inst[] = {
1396 #define INST(exec_size, dst_type, src0_type, src1_type,                   \
1397              dst_stride, expected_result, gfx125_expected_result)         \
1398       {                                                                   \
1399          ELK_EXECUTE_##exec_size,                                         \
1400          ELK_REGISTER_TYPE_##dst_type,                                    \
1401          ELK_REGISTER_TYPE_##src0_type,                                   \
1402          ELK_REGISTER_TYPE_##src1_type,                                   \
1403          ELK_HORIZONTAL_STRIDE_##dst_stride,                              \
1404          expected_result,                                                 \
1405          gfx125_expected_result,                                          \
1406       }
1407 
1408       /* No SIMD16 in mixed mode when destination is packed f16 */
1409       INST( 8, HF,  F, HF, 2, true,  false),
1410       INST(16, HF, HF,  F, 2, true,  false),
1411       INST(16, HF, HF,  F, 1, false, false),
1412       INST(16, HF,  F, HF, 1, false, false),
1413 
1414       /* No SIMD16 in mixed mode when destination is f32 */
1415       INST( 8,  F, HF,  F, 1, true,  false),
1416       INST( 8,  F,  F, HF, 1, true,  false),
1417       INST(16,  F, HF,  F, 1, false, false),
1418       INST(16,  F,  F, HF, 1, false, false),
1419 
1420 #undef INST
1421    };
1422 
1423    if (devinfo.ver < 8)
1424       return;
1425 
1426    for (unsigned i = 0; i < ARRAY_SIZE(inst); i++) {
1427       elk_ADD(p, retype(g0, inst[i].dst_type),
1428                  retype(g0, inst[i].src0_type),
1429                  retype(g0, inst[i].src1_type));
1430 
1431       elk_inst_set_exec_size(&devinfo, last_inst, inst[i].exec_size);
1432 
1433       elk_inst_set_dst_hstride(&devinfo, last_inst, inst[i].dst_stride);
1434 
1435       EXPECT_EQ(inst[i].expected_result, validate(p));
1436 
1437       clear_instructions(p);
1438    }
1439 }
1440 
TEST_P(validation_test,mixed_float_align1_packed_fp16_dst_acc_read_offset_0)1441 TEST_P(validation_test, mixed_float_align1_packed_fp16_dst_acc_read_offset_0)
1442 {
1443    static const struct {
1444       enum elk_reg_type dst_type;
1445       enum elk_reg_type src0_type;
1446       enum elk_reg_type src1_type;
1447       unsigned dst_stride;
1448       bool read_acc;
1449       unsigned subnr;
1450       bool expected_result_bdw;
1451       bool expected_result_chv_skl;
1452       bool expected_result_gfx125;
1453    } inst[] = {
1454 #define INST(dst_type, src0_type, src1_type, dst_stride, read_acc, subnr,   \
1455              expected_result_bdw, expected_result_chv_skl,                  \
1456              expected_result_gfx125)                                        \
1457       {                                                                     \
1458          ELK_REGISTER_TYPE_##dst_type,                                      \
1459          ELK_REGISTER_TYPE_##src0_type,                                     \
1460          ELK_REGISTER_TYPE_##src1_type,                                     \
1461          ELK_HORIZONTAL_STRIDE_##dst_stride,                                \
1462          read_acc,                                                          \
1463          subnr,                                                             \
1464          expected_result_bdw,                                               \
1465          expected_result_chv_skl,                                           \
1466          expected_result_gfx125,                                            \
1467       }
1468 
1469       /* Destination is not packed */
1470       INST(HF, HF,  F, 2, true,  0, true, true, false),
1471       INST(HF, HF,  F, 2, true,  2, true, true, false),
1472       INST(HF, HF,  F, 2, true,  4, true, true, false),
1473       INST(HF, HF,  F, 2, true,  8, true, true, false),
1474       INST(HF, HF,  F, 2, true, 16, true, true, false),
1475 
1476       /* Destination is packed, we don't read acc */
1477       INST(HF, HF,  F, 1, false,  0, false, true, false),
1478       INST(HF, HF,  F, 1, false,  2, false, true, false),
1479       INST(HF, HF,  F, 1, false,  4, false, true, false),
1480       INST(HF, HF,  F, 1, false,  8, false, true, false),
1481       INST(HF, HF,  F, 1, false, 16, false, true, false),
1482 
1483       /* Destination is packed, we read acc */
1484       INST(HF, HF,  F, 1, true,  0, false, false, false),
1485       INST(HF, HF,  F, 1, true,  2, false, false, false),
1486       INST(HF, HF,  F, 1, true,  4, false, false, false),
1487       INST(HF, HF,  F, 1, true,  8, false, false, false),
1488       INST(HF, HF,  F, 1, true, 16, false, false, false),
1489 
1490 #undef INST
1491    };
1492 
1493    if (devinfo.ver < 8)
1494       return;
1495 
1496    for (unsigned i = 0; i < ARRAY_SIZE(inst); i++) {
1497       elk_ADD(p, retype(g0, inst[i].dst_type),
1498                  retype(inst[i].read_acc ? acc0 : g0, inst[i].src0_type),
1499                  retype(g0, inst[i].src1_type));
1500 
1501       elk_inst_set_dst_hstride(&devinfo, last_inst, inst[i].dst_stride);
1502 
1503       elk_inst_set_src0_da1_subreg_nr(&devinfo, last_inst, inst[i].subnr);
1504 
1505       if (devinfo.verx10 >= 125)
1506          EXPECT_EQ(inst[i].expected_result_gfx125, validate(p));
1507       else if (devinfo.platform == INTEL_PLATFORM_CHV || devinfo.ver >= 9)
1508          EXPECT_EQ(inst[i].expected_result_chv_skl, validate(p));
1509       else
1510          EXPECT_EQ(inst[i].expected_result_bdw, validate(p));
1511 
1512       clear_instructions(p);
1513    }
1514 }
1515 
TEST_P(validation_test,mixed_float_fp16_dest_with_acc)1516 TEST_P(validation_test, mixed_float_fp16_dest_with_acc)
1517 {
1518    static const struct {
1519       unsigned exec_size;
1520       unsigned opcode;
1521       enum elk_reg_type dst_type;
1522       enum elk_reg_type src0_type;
1523       enum elk_reg_type src1_type;
1524       unsigned dst_stride;
1525       bool read_acc;
1526       bool expected_result_bdw;
1527       bool expected_result_chv_skl;
1528       bool expected_result_gfx125;
1529    } inst[] = {
1530 #define INST(exec_size, opcode, dst_type, src0_type, src1_type,           \
1531              dst_stride, read_acc,expected_result_bdw,                    \
1532              expected_result_chv_skl, expected_result_gfx125)             \
1533       {                                                                   \
1534          ELK_EXECUTE_##exec_size,                                         \
1535          ELK_OPCODE_##opcode,                                             \
1536          ELK_REGISTER_TYPE_##dst_type,                                    \
1537          ELK_REGISTER_TYPE_##src0_type,                                   \
1538          ELK_REGISTER_TYPE_##src1_type,                                   \
1539          ELK_HORIZONTAL_STRIDE_##dst_stride,                              \
1540          read_acc,                                                        \
1541          expected_result_bdw,                                             \
1542          expected_result_chv_skl,                                         \
1543          expected_result_gfx125,                                          \
1544       }
1545 
1546       /* Packed fp16 dest with implicit acc needs hstride=2 */
1547       INST(8, MAC, HF, HF,  F, 1, false, false, false, false),
1548       INST(8, MAC, HF, HF,  F, 2, false, true,  true,  false),
1549       INST(8, MAC, HF,  F, HF, 1, false, false, false, false),
1550       INST(8, MAC, HF,  F, HF, 2, false, true,  true,  false),
1551 
1552       /* Packed fp16 dest with explicit acc needs hstride=2 */
1553       INST(8, ADD, HF, HF,  F, 1, true,  false, false, false),
1554       INST(8, ADD, HF, HF,  F, 2, true,  true,  true,  false),
1555       INST(8, ADD, HF,  F, HF, 1, true,  false, false, false),
1556       INST(8, ADD, HF,  F, HF, 2, true,  true,  true,  false),
1557 
1558       /* If destination is not fp16, restriction doesn't apply */
1559       INST(8, MAC,  F, HF,  F, 1, false, true, true, false),
1560       INST(8, MAC,  F, HF,  F, 2, false, true, true, false),
1561 
1562       /* If there is no implicit/explicit acc, restriction doesn't apply */
1563       INST(8, ADD, HF, HF,  F, 1, false, false, true, false),
1564       INST(8, ADD, HF, HF,  F, 2, false, true,  true, false),
1565       INST(8, ADD, HF,  F, HF, 1, false, false, true, false),
1566       INST(8, ADD, HF,  F, HF, 2, false, true,  true, false),
1567       INST(8, ADD,  F, HF,  F, 1, false, true,  true, false),
1568       INST(8, ADD,  F, HF,  F, 2, false, true,  true, false),
1569 
1570 #undef INST
1571    };
1572 
1573    if (devinfo.ver < 8)
1574       return;
1575 
1576    for (unsigned i = 0; i < ARRAY_SIZE(inst); i++) {
1577       if (inst[i].opcode == ELK_OPCODE_MAC) {
1578          elk_MAC(p, retype(g0, inst[i].dst_type),
1579                     retype(g0, inst[i].src0_type),
1580                     retype(g0, inst[i].src1_type));
1581       } else {
1582          assert(inst[i].opcode == ELK_OPCODE_ADD);
1583          elk_ADD(p, retype(g0, inst[i].dst_type),
1584                     retype(inst[i].read_acc ? acc0: g0, inst[i].src0_type),
1585                     retype(g0, inst[i].src1_type));
1586       }
1587 
1588       elk_inst_set_exec_size(&devinfo, last_inst, inst[i].exec_size);
1589 
1590       elk_inst_set_dst_hstride(&devinfo, last_inst, inst[i].dst_stride);
1591 
1592       if (devinfo.verx10 >= 125)
1593          EXPECT_EQ(inst[i].expected_result_gfx125, validate(p));
1594       else if (devinfo.platform == INTEL_PLATFORM_CHV || devinfo.ver >= 9)
1595          EXPECT_EQ(inst[i].expected_result_chv_skl, validate(p));
1596       else
1597          EXPECT_EQ(inst[i].expected_result_bdw, validate(p));
1598 
1599       clear_instructions(p);
1600    }
1601 }
1602 
TEST_P(validation_test,mixed_float_align1_math_strided_fp16_inputs)1603 TEST_P(validation_test, mixed_float_align1_math_strided_fp16_inputs)
1604 {
1605    static const struct {
1606       enum elk_reg_type dst_type;
1607       enum elk_reg_type src0_type;
1608       enum elk_reg_type src1_type;
1609       unsigned dst_stride;
1610       unsigned src0_stride;
1611       unsigned src1_stride;
1612       bool expected_result;
1613       bool expected_result_gfx125;
1614    } inst[] = {
1615 #define INST(dst_type, src0_type, src1_type,                              \
1616              dst_stride, src0_stride, src1_stride, expected_result,       \
1617              expected_result_125)                                         \
1618       {                                                                   \
1619          ELK_REGISTER_TYPE_##dst_type,                                    \
1620          ELK_REGISTER_TYPE_##src0_type,                                   \
1621          ELK_REGISTER_TYPE_##src1_type,                                   \
1622          ELK_HORIZONTAL_STRIDE_##dst_stride,                              \
1623          ELK_HORIZONTAL_STRIDE_##src0_stride,                             \
1624          ELK_HORIZONTAL_STRIDE_##src1_stride,                             \
1625          expected_result,                                                 \
1626          expected_result_125,                                             \
1627       }
1628 
1629       INST(HF, HF,  F, 2, 2, 1, true,  false),
1630       INST(HF,  F, HF, 2, 1, 2, true,  false),
1631       INST(HF,  F, HF, 1, 1, 2, true,  false),
1632       INST(HF,  F, HF, 2, 1, 1, false, false),
1633       INST(HF, HF,  F, 2, 1, 1, false, false),
1634       INST(HF, HF,  F, 1, 1, 1, false, false),
1635       INST(HF, HF,  F, 2, 1, 1, false, false),
1636       INST( F, HF,  F, 1, 1, 1, false, false),
1637       INST( F,  F, HF, 1, 1, 2, true,  false),
1638       INST( F, HF, HF, 1, 2, 1, false, false),
1639       INST( F, HF, HF, 1, 2, 2, true,  false),
1640 
1641 #undef INST
1642    };
1643 
1644    /* No half-float math in gfx8 */
1645    if (devinfo.ver < 9)
1646       return;
1647 
1648    for (unsigned i = 0; i < ARRAY_SIZE(inst); i++) {
1649       elk_gfx6_math(p, retype(g0, inst[i].dst_type),
1650                    ELK_MATH_FUNCTION_POW,
1651                    retype(g0, inst[i].src0_type),
1652                    retype(g0, inst[i].src1_type));
1653 
1654       elk_inst_set_dst_hstride(&devinfo, last_inst, inst[i].dst_stride);
1655 
1656       elk_inst_set_src0_vstride(&devinfo, last_inst, ELK_VERTICAL_STRIDE_4);
1657       elk_inst_set_src0_width(&devinfo, last_inst, ELK_WIDTH_4);
1658       elk_inst_set_src0_hstride(&devinfo, last_inst, inst[i].src0_stride);
1659 
1660       elk_inst_set_src1_vstride(&devinfo, last_inst, ELK_VERTICAL_STRIDE_4);
1661       elk_inst_set_src1_width(&devinfo, last_inst, ELK_WIDTH_4);
1662       elk_inst_set_src1_hstride(&devinfo, last_inst, inst[i].src1_stride);
1663 
1664       if (devinfo.verx10 >= 125)
1665          EXPECT_EQ(inst[i].expected_result_gfx125, validate(p));
1666       else
1667          EXPECT_EQ(inst[i].expected_result, validate(p));
1668 
1669       clear_instructions(p);
1670    }
1671 }
1672 
TEST_P(validation_test,mixed_float_align1_packed_fp16_dst)1673 TEST_P(validation_test, mixed_float_align1_packed_fp16_dst)
1674 {
1675    static const struct {
1676       unsigned exec_size;
1677       enum elk_reg_type dst_type;
1678       enum elk_reg_type src0_type;
1679       enum elk_reg_type src1_type;
1680       unsigned dst_stride;
1681       unsigned dst_subnr;
1682       bool expected_result_bdw;
1683       bool expected_result_chv_skl;
1684       bool expected_result_gfx125;
1685    } inst[] = {
1686 #define INST(exec_size, dst_type, src0_type, src1_type, dst_stride, dst_subnr, \
1687              expected_result_bdw, expected_result_chv_skl,                     \
1688              expected_result_gfx125)                                           \
1689       {                                                                        \
1690          ELK_EXECUTE_##exec_size,                                              \
1691          ELK_REGISTER_TYPE_##dst_type,                                         \
1692          ELK_REGISTER_TYPE_##src0_type,                                        \
1693          ELK_REGISTER_TYPE_##src1_type,                                        \
1694          ELK_HORIZONTAL_STRIDE_##dst_stride,                                   \
1695          dst_subnr,                                                            \
1696          expected_result_bdw,                                                  \
1697          expected_result_chv_skl,                                              \
1698          expected_result_gfx125                                                \
1699       }
1700 
1701       /* SIMD8 packed fp16 dst won't cross oword boundaries if region is
1702        * oword-aligned
1703        */
1704       INST( 8, HF, HF,  F, 1,  0, false, true,  false),
1705       INST( 8, HF, HF,  F, 1,  2, false, false, false),
1706       INST( 8, HF, HF,  F, 1,  4, false, false, false),
1707       INST( 8, HF, HF,  F, 1,  8, false, false, false),
1708       INST( 8, HF, HF,  F, 1, 16, false, true,  false),
1709 
1710       /* SIMD16 packed fp16 always crosses oword boundaries */
1711       INST(16, HF, HF,  F, 1,  0, false, false, false),
1712       INST(16, HF, HF,  F, 1,  2, false, false, false),
1713       INST(16, HF, HF,  F, 1,  4, false, false, false),
1714       INST(16, HF, HF,  F, 1,  8, false, false, false),
1715       INST(16, HF, HF,  F, 1, 16, false, false, false),
1716 
1717       /* If destination is not packed (or not fp16) we can cross oword
1718        * boundaries
1719        */
1720       INST( 8, HF, HF,  F, 2,  0, true, true, false),
1721       INST( 8,  F, HF,  F, 1,  0, true, true, false),
1722 
1723 #undef INST
1724    };
1725 
1726    if (devinfo.ver < 8)
1727       return;
1728 
1729    for (unsigned i = 0; i < ARRAY_SIZE(inst); i++) {
1730       elk_ADD(p, retype(g0, inst[i].dst_type),
1731                  retype(g0, inst[i].src0_type),
1732                  retype(g0, inst[i].src1_type));
1733 
1734       elk_inst_set_dst_hstride(&devinfo, last_inst, inst[i].dst_stride);
1735       elk_inst_set_dst_da1_subreg_nr(&devinfo, last_inst, inst[i].dst_subnr);
1736 
1737       elk_inst_set_src0_vstride(&devinfo, last_inst, ELK_VERTICAL_STRIDE_4);
1738       elk_inst_set_src0_width(&devinfo, last_inst, ELK_WIDTH_4);
1739       elk_inst_set_src0_hstride(&devinfo, last_inst, ELK_HORIZONTAL_STRIDE_1);
1740 
1741       elk_inst_set_src1_vstride(&devinfo, last_inst, ELK_VERTICAL_STRIDE_4);
1742       elk_inst_set_src1_width(&devinfo, last_inst, ELK_WIDTH_4);
1743       elk_inst_set_src1_hstride(&devinfo, last_inst, ELK_HORIZONTAL_STRIDE_1);
1744 
1745       elk_inst_set_exec_size(&devinfo, last_inst, inst[i].exec_size);
1746 
1747       if (devinfo.verx10 >= 125)
1748          EXPECT_EQ(inst[i].expected_result_gfx125, validate(p));
1749       else if (devinfo.platform == INTEL_PLATFORM_CHV || devinfo.ver >= 9)
1750          EXPECT_EQ(inst[i].expected_result_chv_skl, validate(p));
1751       else
1752          EXPECT_EQ(inst[i].expected_result_bdw, validate(p));
1753 
1754       clear_instructions(p);
1755    }
1756 }
1757 
TEST_P(validation_test,mixed_float_align16_packed_data)1758 TEST_P(validation_test, mixed_float_align16_packed_data)
1759 {
1760    static const struct {
1761       enum elk_reg_type dst_type;
1762       enum elk_reg_type src0_type;
1763       enum elk_reg_type src1_type;
1764       unsigned src0_vstride;
1765       unsigned src1_vstride;
1766       bool expected_result;
1767    } inst[] = {
1768 #define INST(dst_type, src0_type, src1_type,                              \
1769              src0_vstride, src1_vstride, expected_result)                 \
1770       {                                                                   \
1771          ELK_REGISTER_TYPE_##dst_type,                                    \
1772          ELK_REGISTER_TYPE_##src0_type,                                   \
1773          ELK_REGISTER_TYPE_##src1_type,                                   \
1774          ELK_VERTICAL_STRIDE_##src0_vstride,                              \
1775          ELK_VERTICAL_STRIDE_##src1_vstride,                              \
1776          expected_result,                                                 \
1777       }
1778 
1779       /* We only test with F destination because there is a restriction
1780        * by which F->HF conversions need to be DWord aligned but Align16 also
1781        * requires that destination horizontal stride is 1.
1782        */
1783       INST(F,  F, HF, 4, 4, true),
1784       INST(F,  F, HF, 2, 4, false),
1785       INST(F,  F, HF, 4, 2, false),
1786       INST(F,  F, HF, 0, 4, false),
1787       INST(F,  F, HF, 4, 0, false),
1788       INST(F, HF,  F, 4, 4, true),
1789       INST(F, HF,  F, 4, 2, false),
1790       INST(F, HF,  F, 2, 4, false),
1791       INST(F, HF,  F, 0, 4, false),
1792       INST(F, HF,  F, 4, 0, false),
1793 
1794 #undef INST
1795    };
1796 
1797    if (devinfo.ver < 8 || devinfo.ver >= 11)
1798       return;
1799 
1800    elk_set_default_access_mode(p, ELK_ALIGN_16);
1801 
1802    for (unsigned i = 0; i < ARRAY_SIZE(inst); i++) {
1803       elk_ADD(p, retype(g0, inst[i].dst_type),
1804                  retype(g0, inst[i].src0_type),
1805                  retype(g0, inst[i].src1_type));
1806 
1807       elk_inst_set_src0_vstride(&devinfo, last_inst, inst[i].src0_vstride);
1808       elk_inst_set_src1_vstride(&devinfo, last_inst, inst[i].src1_vstride);
1809 
1810       EXPECT_EQ(inst[i].expected_result, validate(p));
1811 
1812       clear_instructions(p);
1813    }
1814 }
1815 
TEST_P(validation_test,mixed_float_align16_no_simd16)1816 TEST_P(validation_test, mixed_float_align16_no_simd16)
1817 {
1818    static const struct {
1819       unsigned exec_size;
1820       enum elk_reg_type dst_type;
1821       enum elk_reg_type src0_type;
1822       enum elk_reg_type src1_type;
1823       bool expected_result;
1824    } inst[] = {
1825 #define INST(exec_size, dst_type, src0_type, src1_type, expected_result)  \
1826       {                                                                   \
1827          ELK_EXECUTE_##exec_size,                                         \
1828          ELK_REGISTER_TYPE_##dst_type,                                    \
1829          ELK_REGISTER_TYPE_##src0_type,                                   \
1830          ELK_REGISTER_TYPE_##src1_type,                                   \
1831          expected_result,                                                 \
1832       }
1833 
1834       /* We only test with F destination because there is a restriction
1835        * by which F->HF conversions need to be DWord aligned but Align16 also
1836        * requires that destination horizontal stride is 1.
1837        */
1838       INST( 8,  F,  F, HF, true),
1839       INST( 8,  F, HF,  F, true),
1840       INST( 8,  F,  F, HF, true),
1841       INST(16,  F,  F, HF, false),
1842       INST(16,  F, HF,  F, false),
1843       INST(16,  F,  F, HF, false),
1844 
1845 #undef INST
1846    };
1847 
1848    if (devinfo.ver < 8 || devinfo.ver >= 11)
1849       return;
1850 
1851    elk_set_default_access_mode(p, ELK_ALIGN_16);
1852 
1853    for (unsigned i = 0; i < ARRAY_SIZE(inst); i++) {
1854       elk_ADD(p, retype(g0, inst[i].dst_type),
1855                  retype(g0, inst[i].src0_type),
1856                  retype(g0, inst[i].src1_type));
1857 
1858       elk_inst_set_exec_size(&devinfo, last_inst, inst[i].exec_size);
1859 
1860       elk_inst_set_src0_vstride(&devinfo, last_inst, ELK_VERTICAL_STRIDE_4);
1861       elk_inst_set_src1_vstride(&devinfo, last_inst, ELK_VERTICAL_STRIDE_4);
1862 
1863       EXPECT_EQ(inst[i].expected_result, validate(p));
1864 
1865       clear_instructions(p);
1866    }
1867 }
1868 
TEST_P(validation_test,mixed_float_align16_no_acc_read)1869 TEST_P(validation_test, mixed_float_align16_no_acc_read)
1870 {
1871    static const struct {
1872       enum elk_reg_type dst_type;
1873       enum elk_reg_type src0_type;
1874       enum elk_reg_type src1_type;
1875       bool read_acc;
1876       bool expected_result;
1877    } inst[] = {
1878 #define INST(dst_type, src0_type, src1_type, read_acc, expected_result)   \
1879       {                                                                   \
1880          ELK_REGISTER_TYPE_##dst_type,                                    \
1881          ELK_REGISTER_TYPE_##src0_type,                                   \
1882          ELK_REGISTER_TYPE_##src1_type,                                   \
1883          read_acc,                                                        \
1884          expected_result,                                                 \
1885       }
1886 
1887       /* We only test with F destination because there is a restriction
1888        * by which F->HF conversions need to be DWord aligned but Align16 also
1889        * requires that destination horizontal stride is 1.
1890        */
1891       INST( F,  F, HF, false, true),
1892       INST( F,  F, HF, true,  false),
1893       INST( F, HF,  F, false, true),
1894       INST( F, HF,  F, true,  false),
1895 
1896 #undef INST
1897    };
1898 
1899    if (devinfo.ver < 8 || devinfo.ver >= 11)
1900       return;
1901 
1902    elk_set_default_access_mode(p, ELK_ALIGN_16);
1903 
1904    for (unsigned i = 0; i < ARRAY_SIZE(inst); i++) {
1905       elk_ADD(p, retype(g0, inst[i].dst_type),
1906                  retype(inst[i].read_acc ? acc0 : g0, inst[i].src0_type),
1907                  retype(g0, inst[i].src1_type));
1908 
1909       elk_inst_set_src0_vstride(&devinfo, last_inst, ELK_VERTICAL_STRIDE_4);
1910       elk_inst_set_src1_vstride(&devinfo, last_inst, ELK_VERTICAL_STRIDE_4);
1911 
1912       EXPECT_EQ(inst[i].expected_result, validate(p));
1913 
1914       clear_instructions(p);
1915    }
1916 }
1917 
TEST_P(validation_test,mixed_float_align16_math_packed_format)1918 TEST_P(validation_test, mixed_float_align16_math_packed_format)
1919 {
1920    static const struct {
1921       enum elk_reg_type dst_type;
1922       enum elk_reg_type src0_type;
1923       enum elk_reg_type src1_type;
1924       unsigned src0_vstride;
1925       unsigned src1_vstride;
1926       bool expected_result;
1927    } inst[] = {
1928 #define INST(dst_type, src0_type, src1_type,                              \
1929              src0_vstride, src1_vstride, expected_result)                 \
1930       {                                                                   \
1931          ELK_REGISTER_TYPE_##dst_type,                                    \
1932          ELK_REGISTER_TYPE_##src0_type,                                   \
1933          ELK_REGISTER_TYPE_##src1_type,                                   \
1934          ELK_VERTICAL_STRIDE_##src0_vstride,                              \
1935          ELK_VERTICAL_STRIDE_##src1_vstride,                              \
1936          expected_result,                                                 \
1937       }
1938 
1939       /* We only test with F destination because there is a restriction
1940        * by which F->HF conversions need to be DWord aligned but Align16 also
1941        * requires that destination horizontal stride is 1.
1942        */
1943       INST( F, HF,  F, 4, 0, false),
1944       INST( F, HF, HF, 4, 4, true),
1945       INST( F,  F, HF, 4, 0, false),
1946       INST( F,  F, HF, 2, 4, false),
1947       INST( F,  F, HF, 4, 2, false),
1948       INST( F, HF, HF, 0, 4, false),
1949 
1950 #undef INST
1951    };
1952 
1953    /* Align16 Math for mixed float mode is not supported in gfx8 */
1954    if (devinfo.ver < 9 || devinfo.ver >= 11)
1955       return;
1956 
1957    elk_set_default_access_mode(p, ELK_ALIGN_16);
1958 
1959    for (unsigned i = 0; i < ARRAY_SIZE(inst); i++) {
1960       elk_gfx6_math(p, retype(g0, inst[i].dst_type),
1961                    ELK_MATH_FUNCTION_POW,
1962                    retype(g0, inst[i].src0_type),
1963                    retype(g0, inst[i].src1_type));
1964 
1965       elk_inst_set_src0_vstride(&devinfo, last_inst, inst[i].src0_vstride);
1966       elk_inst_set_src1_vstride(&devinfo, last_inst, inst[i].src1_vstride);
1967 
1968       EXPECT_EQ(inst[i].expected_result, validate(p));
1969 
1970       clear_instructions(p);
1971    }
1972 }
1973 
TEST_P(validation_test,vector_immediate_destination_alignment)1974 TEST_P(validation_test, vector_immediate_destination_alignment)
1975 {
1976    static const struct {
1977       enum elk_reg_type dst_type;
1978       enum elk_reg_type src_type;
1979       unsigned subnr;
1980       unsigned exec_size;
1981       bool expected_result;
1982    } move[] = {
1983       { ELK_REGISTER_TYPE_F, ELK_REGISTER_TYPE_VF,  0, ELK_EXECUTE_4, true  },
1984       { ELK_REGISTER_TYPE_F, ELK_REGISTER_TYPE_VF, 16, ELK_EXECUTE_4, true  },
1985       { ELK_REGISTER_TYPE_F, ELK_REGISTER_TYPE_VF,  1, ELK_EXECUTE_4, false },
1986 
1987       { ELK_REGISTER_TYPE_W, ELK_REGISTER_TYPE_V,   0, ELK_EXECUTE_8, true  },
1988       { ELK_REGISTER_TYPE_W, ELK_REGISTER_TYPE_V,  16, ELK_EXECUTE_8, true  },
1989       { ELK_REGISTER_TYPE_W, ELK_REGISTER_TYPE_V,   1, ELK_EXECUTE_8, false },
1990 
1991       { ELK_REGISTER_TYPE_W, ELK_REGISTER_TYPE_UV,  0, ELK_EXECUTE_8, true  },
1992       { ELK_REGISTER_TYPE_W, ELK_REGISTER_TYPE_UV, 16, ELK_EXECUTE_8, true  },
1993       { ELK_REGISTER_TYPE_W, ELK_REGISTER_TYPE_UV,  1, ELK_EXECUTE_8, false },
1994    };
1995 
1996    for (unsigned i = 0; i < ARRAY_SIZE(move); i++) {
1997       /* UV type is Gfx6+ */
1998       if (devinfo.ver < 6 &&
1999           move[i].src_type == ELK_REGISTER_TYPE_UV)
2000          continue;
2001 
2002       elk_MOV(p, retype(g0, move[i].dst_type), retype(zero, move[i].src_type));
2003       elk_inst_set_dst_da1_subreg_nr(&devinfo, last_inst, move[i].subnr);
2004       elk_inst_set_exec_size(&devinfo, last_inst, move[i].exec_size);
2005 
2006       EXPECT_EQ(move[i].expected_result, validate(p));
2007 
2008       clear_instructions(p);
2009    }
2010 }
2011 
TEST_P(validation_test,vector_immediate_destination_stride)2012 TEST_P(validation_test, vector_immediate_destination_stride)
2013 {
2014    static const struct {
2015       enum elk_reg_type dst_type;
2016       enum elk_reg_type src_type;
2017       unsigned stride;
2018       bool expected_result;
2019    } move[] = {
2020       { ELK_REGISTER_TYPE_F, ELK_REGISTER_TYPE_VF, ELK_HORIZONTAL_STRIDE_1, true  },
2021       { ELK_REGISTER_TYPE_F, ELK_REGISTER_TYPE_VF, ELK_HORIZONTAL_STRIDE_2, false },
2022       { ELK_REGISTER_TYPE_D, ELK_REGISTER_TYPE_VF, ELK_HORIZONTAL_STRIDE_1, true  },
2023       { ELK_REGISTER_TYPE_D, ELK_REGISTER_TYPE_VF, ELK_HORIZONTAL_STRIDE_2, false },
2024       { ELK_REGISTER_TYPE_W, ELK_REGISTER_TYPE_VF, ELK_HORIZONTAL_STRIDE_2, true  },
2025       { ELK_REGISTER_TYPE_B, ELK_REGISTER_TYPE_VF, ELK_HORIZONTAL_STRIDE_4, true  },
2026 
2027       { ELK_REGISTER_TYPE_W, ELK_REGISTER_TYPE_V,  ELK_HORIZONTAL_STRIDE_1, true  },
2028       { ELK_REGISTER_TYPE_W, ELK_REGISTER_TYPE_V,  ELK_HORIZONTAL_STRIDE_2, false },
2029       { ELK_REGISTER_TYPE_W, ELK_REGISTER_TYPE_V,  ELK_HORIZONTAL_STRIDE_4, false },
2030       { ELK_REGISTER_TYPE_B, ELK_REGISTER_TYPE_V,  ELK_HORIZONTAL_STRIDE_2, true  },
2031 
2032       { ELK_REGISTER_TYPE_W, ELK_REGISTER_TYPE_UV, ELK_HORIZONTAL_STRIDE_1, true  },
2033       { ELK_REGISTER_TYPE_W, ELK_REGISTER_TYPE_UV, ELK_HORIZONTAL_STRIDE_2, false },
2034       { ELK_REGISTER_TYPE_W, ELK_REGISTER_TYPE_UV, ELK_HORIZONTAL_STRIDE_4, false },
2035       { ELK_REGISTER_TYPE_B, ELK_REGISTER_TYPE_UV, ELK_HORIZONTAL_STRIDE_2, true  },
2036    };
2037 
2038    for (unsigned i = 0; i < ARRAY_SIZE(move); i++) {
2039       /* UV type is Gfx6+ */
2040       if (devinfo.ver < 6 &&
2041           move[i].src_type == ELK_REGISTER_TYPE_UV)
2042          continue;
2043 
2044       elk_MOV(p, retype(g0, move[i].dst_type), retype(zero, move[i].src_type));
2045       elk_inst_set_dst_hstride(&devinfo, last_inst, move[i].stride);
2046 
2047       EXPECT_EQ(move[i].expected_result, validate(p));
2048 
2049       clear_instructions(p);
2050    }
2051 }
2052 
TEST_P(validation_test,qword_low_power_align1_regioning_restrictions)2053 TEST_P(validation_test, qword_low_power_align1_regioning_restrictions)
2054 {
2055    static const struct {
2056       enum elk_opcode opcode;
2057       unsigned exec_size;
2058 
2059       enum elk_reg_type dst_type;
2060       unsigned dst_subreg;
2061       unsigned dst_stride;
2062 
2063       enum elk_reg_type src_type;
2064       unsigned src_subreg;
2065       unsigned src_vstride;
2066       unsigned src_width;
2067       unsigned src_hstride;
2068 
2069       bool expected_result;
2070    } inst[] = {
2071 #define INST(opcode, exec_size, dst_type, dst_subreg, dst_stride, src_type,    \
2072              src_subreg, src_vstride, src_width, src_hstride, expected_result) \
2073       {                                                                        \
2074          ELK_OPCODE_##opcode,                                                  \
2075          ELK_EXECUTE_##exec_size,                                              \
2076          ELK_REGISTER_TYPE_##dst_type,                                         \
2077          dst_subreg,                                                           \
2078          ELK_HORIZONTAL_STRIDE_##dst_stride,                                   \
2079          ELK_REGISTER_TYPE_##src_type,                                         \
2080          src_subreg,                                                           \
2081          ELK_VERTICAL_STRIDE_##src_vstride,                                    \
2082          ELK_WIDTH_##src_width,                                                \
2083          ELK_HORIZONTAL_STRIDE_##src_hstride,                                  \
2084          expected_result,                                                      \
2085       }
2086 
2087       /* Some instruction that violate no restrictions, as a control */
2088       INST(MOV, 4, DF, 0, 1, DF, 0, 4, 4, 1, true ),
2089       INST(MOV, 4, Q,  0, 1, Q,  0, 4, 4, 1, true ),
2090       INST(MOV, 4, UQ, 0, 1, UQ, 0, 4, 4, 1, true ),
2091 
2092       INST(MOV, 4, DF, 0, 1, F,  0, 8, 4, 2, true ),
2093       INST(MOV, 4, Q,  0, 1, D,  0, 8, 4, 2, true ),
2094       INST(MOV, 4, UQ, 0, 1, UD, 0, 8, 4, 2, true ),
2095 
2096       INST(MOV, 4, F,  0, 2, DF, 0, 4, 4, 1, true ),
2097       INST(MOV, 4, D,  0, 2, Q,  0, 4, 4, 1, true ),
2098       INST(MOV, 4, UD, 0, 2, UQ, 0, 4, 4, 1, true ),
2099 
2100       INST(MUL, 8, D,  0, 2, D,  0, 8, 4, 2, true ),
2101       INST(MUL, 8, UD, 0, 2, UD, 0, 8, 4, 2, true ),
2102 
2103       /* Something with subreg nrs */
2104       INST(MOV, 2, DF, 8, 1, DF, 8, 2, 2, 1, true ),
2105       INST(MOV, 2, Q,  8, 1, Q,  8, 2, 2, 1, true ),
2106       INST(MOV, 2, UQ, 8, 1, UQ, 8, 2, 2, 1, true ),
2107 
2108       INST(MUL, 2, D,  4, 2, D,  4, 4, 2, 2, true ),
2109       INST(MUL, 2, UD, 4, 2, UD, 4, 4, 2, 2, true ),
2110 
2111       /* The PRMs say that for CHV, BXT:
2112        *
2113        *    When source or destination datatype is 64b or operation is integer
2114        *    DWord multiply, regioning in Align1 must follow these rules:
2115        *
2116        *    1. Source and Destination horizontal stride must be aligned to the
2117        *       same qword.
2118        */
2119       INST(MOV, 4, DF, 0, 2, DF, 0, 4, 4, 1, false),
2120       INST(MOV, 4, Q,  0, 2, Q,  0, 4, 4, 1, false),
2121       INST(MOV, 4, UQ, 0, 2, UQ, 0, 4, 4, 1, false),
2122 
2123       INST(MOV, 4, DF, 0, 2, F,  0, 8, 4, 2, false),
2124       INST(MOV, 4, Q,  0, 2, D,  0, 8, 4, 2, false),
2125       INST(MOV, 4, UQ, 0, 2, UD, 0, 8, 4, 2, false),
2126 
2127       INST(MOV, 4, DF, 0, 2, F,  0, 4, 4, 1, false),
2128       INST(MOV, 4, Q,  0, 2, D,  0, 4, 4, 1, false),
2129       INST(MOV, 4, UQ, 0, 2, UD, 0, 4, 4, 1, false),
2130 
2131       INST(MUL, 4, D,  0, 2, D,  0, 4, 4, 1, false),
2132       INST(MUL, 4, UD, 0, 2, UD, 0, 4, 4, 1, false),
2133 
2134       INST(MUL, 4, D,  0, 1, D,  0, 8, 4, 2, false),
2135       INST(MUL, 4, UD, 0, 1, UD, 0, 8, 4, 2, false),
2136 
2137       /*    2. Regioning must ensure Src.Vstride = Src.Width * Src.Hstride. */
2138       INST(MOV, 4, DF, 0, 1, DF, 0, 0, 2, 1, false),
2139       INST(MOV, 4, Q,  0, 1, Q,  0, 0, 2, 1, false),
2140       INST(MOV, 4, UQ, 0, 1, UQ, 0, 0, 2, 1, false),
2141 
2142       INST(MOV, 4, DF, 0, 1, F,  0, 0, 2, 2, false),
2143       INST(MOV, 4, Q,  0, 1, D,  0, 0, 2, 2, false),
2144       INST(MOV, 4, UQ, 0, 1, UD, 0, 0, 2, 2, false),
2145 
2146       INST(MOV, 8, F,  0, 2, DF, 0, 0, 2, 1, false),
2147       INST(MOV, 8, D,  0, 2, Q,  0, 0, 2, 1, false),
2148       INST(MOV, 8, UD, 0, 2, UQ, 0, 0, 2, 1, false),
2149 
2150       INST(MUL, 8, D,  0, 2, D,  0, 0, 4, 2, false),
2151       INST(MUL, 8, UD, 0, 2, UD, 0, 0, 4, 2, false),
2152 
2153       INST(MUL, 8, D,  0, 2, D,  0, 0, 4, 2, false),
2154       INST(MUL, 8, UD, 0, 2, UD, 0, 0, 4, 2, false),
2155 
2156       /*    3. Source and Destination offset must be the same, except the case
2157        *       of scalar source.
2158        */
2159       INST(MOV, 2, DF, 8, 1, DF, 0, 2, 2, 1, false),
2160       INST(MOV, 2, Q,  8, 1, Q,  0, 2, 2, 1, false),
2161       INST(MOV, 2, UQ, 8, 1, UQ, 0, 2, 2, 1, false),
2162 
2163       INST(MOV, 2, DF, 0, 1, DF, 8, 2, 2, 1, false),
2164       INST(MOV, 2, Q,  0, 1, Q,  8, 2, 2, 1, false),
2165       INST(MOV, 2, UQ, 0, 1, UQ, 8, 2, 2, 1, false),
2166 
2167       INST(MUL, 4, D,  4, 2, D,  0, 4, 2, 2, false),
2168       INST(MUL, 4, UD, 4, 2, UD, 0, 4, 2, 2, false),
2169 
2170       INST(MUL, 4, D,  0, 2, D,  4, 4, 2, 2, false),
2171       INST(MUL, 4, UD, 0, 2, UD, 4, 4, 2, 2, false),
2172 
2173       INST(MOV, 2, DF, 8, 1, DF, 0, 0, 1, 0, true ),
2174       INST(MOV, 2, Q,  8, 1, Q,  0, 0, 1, 0, true ),
2175       INST(MOV, 2, UQ, 8, 1, UQ, 0, 0, 1, 0, true ),
2176 
2177       INST(MOV, 2, DF, 8, 1, F,  4, 0, 1, 0, true ),
2178       INST(MOV, 2, Q,  8, 1, D,  4, 0, 1, 0, true ),
2179       INST(MOV, 2, UQ, 8, 1, UD, 4, 0, 1, 0, true ),
2180 
2181       INST(MUL, 4, D,  4, 1, D,  0, 0, 1, 0, true ),
2182       INST(MUL, 4, UD, 4, 1, UD, 0, 0, 1, 0, true ),
2183 
2184       INST(MUL, 4, D,  0, 1, D,  4, 0, 1, 0, true ),
2185       INST(MUL, 4, UD, 0, 1, UD, 4, 0, 1, 0, true ),
2186 
2187 #undef INST
2188    };
2189 
2190    /* These restrictions only apply to Gfx8+ */
2191    if (devinfo.ver < 8)
2192       return;
2193 
2194    /* NoDDChk/NoDDClr does not exist on Gfx12+ */
2195    if (devinfo.ver >= 12)
2196       return;
2197 
2198    for (unsigned i = 0; i < ARRAY_SIZE(inst); i++) {
2199       if (!devinfo.has_64bit_float &&
2200           (inst[i].dst_type == ELK_REGISTER_TYPE_DF ||
2201            inst[i].src_type == ELK_REGISTER_TYPE_DF))
2202          continue;
2203 
2204       if (!devinfo.has_64bit_int &&
2205           (inst[i].dst_type == ELK_REGISTER_TYPE_Q ||
2206            inst[i].dst_type == ELK_REGISTER_TYPE_UQ ||
2207            inst[i].src_type == ELK_REGISTER_TYPE_Q ||
2208            inst[i].src_type == ELK_REGISTER_TYPE_UQ))
2209          continue;
2210 
2211       if (inst[i].opcode == ELK_OPCODE_MOV) {
2212          elk_MOV(p, retype(g0, inst[i].dst_type),
2213                     retype(g0, inst[i].src_type));
2214       } else {
2215          assert(inst[i].opcode == ELK_OPCODE_MUL);
2216          elk_MUL(p, retype(g0, inst[i].dst_type),
2217                     retype(g0, inst[i].src_type),
2218                     retype(zero, inst[i].src_type));
2219       }
2220       elk_inst_set_exec_size(&devinfo, last_inst, inst[i].exec_size);
2221 
2222       elk_inst_set_dst_da1_subreg_nr(&devinfo, last_inst, inst[i].dst_subreg);
2223       elk_inst_set_src0_da1_subreg_nr(&devinfo, last_inst, inst[i].src_subreg);
2224 
2225       elk_inst_set_dst_hstride(&devinfo, last_inst, inst[i].dst_stride);
2226 
2227       elk_inst_set_src0_vstride(&devinfo, last_inst, inst[i].src_vstride);
2228       elk_inst_set_src0_width(&devinfo, last_inst, inst[i].src_width);
2229       elk_inst_set_src0_hstride(&devinfo, last_inst, inst[i].src_hstride);
2230 
2231       if (devinfo.platform == INTEL_PLATFORM_CHV) {
2232          EXPECT_EQ(inst[i].expected_result, validate(p));
2233       } else {
2234          EXPECT_TRUE(validate(p));
2235       }
2236 
2237       clear_instructions(p);
2238    }
2239 }
2240 
TEST_P(validation_test,qword_low_power_no_indirect_addressing)2241 TEST_P(validation_test, qword_low_power_no_indirect_addressing)
2242 {
2243    static const struct {
2244       enum elk_opcode opcode;
2245       unsigned exec_size;
2246 
2247       enum elk_reg_type dst_type;
2248       bool dst_is_indirect;
2249       unsigned dst_stride;
2250 
2251       enum elk_reg_type src_type;
2252       bool src_is_indirect;
2253       unsigned src_vstride;
2254       unsigned src_width;
2255       unsigned src_hstride;
2256 
2257       bool expected_result;
2258    } inst[] = {
2259 #define INST(opcode, exec_size, dst_type, dst_is_indirect, dst_stride,         \
2260              src_type, src_is_indirect, src_vstride, src_width, src_hstride,   \
2261              expected_result)                                                  \
2262       {                                                                        \
2263          ELK_OPCODE_##opcode,                                                  \
2264          ELK_EXECUTE_##exec_size,                                              \
2265          ELK_REGISTER_TYPE_##dst_type,                                         \
2266          dst_is_indirect,                                                      \
2267          ELK_HORIZONTAL_STRIDE_##dst_stride,                                   \
2268          ELK_REGISTER_TYPE_##src_type,                                         \
2269          src_is_indirect,                                                      \
2270          ELK_VERTICAL_STRIDE_##src_vstride,                                    \
2271          ELK_WIDTH_##src_width,                                                \
2272          ELK_HORIZONTAL_STRIDE_##src_hstride,                                  \
2273          expected_result,                                                      \
2274       }
2275 
2276       /* Some instruction that violate no restrictions, as a control */
2277       INST(MOV, 4, DF, 0, 1, DF, 0, 4, 4, 1, true ),
2278       INST(MOV, 4, Q,  0, 1, Q,  0, 4, 4, 1, true ),
2279       INST(MOV, 4, UQ, 0, 1, UQ, 0, 4, 4, 1, true ),
2280 
2281       INST(MUL, 8, D,  0, 2, D,  0, 8, 4, 2, true ),
2282       INST(MUL, 8, UD, 0, 2, UD, 0, 8, 4, 2, true ),
2283 
2284       INST(MOV, 4, F,  1, 1, F,  0, 4, 4, 1, true ),
2285       INST(MOV, 4, F,  0, 1, F,  1, 4, 4, 1, true ),
2286       INST(MOV, 4, F,  1, 1, F,  1, 4, 4, 1, true ),
2287 
2288       /* The PRMs say that for CHV, BXT:
2289        *
2290        *    When source or destination datatype is 64b or operation is integer
2291        *    DWord multiply, indirect addressing must not be used.
2292        */
2293       INST(MOV, 4, DF, 1, 1, DF, 0, 4, 4, 1, false),
2294       INST(MOV, 4, Q,  1, 1, Q,  0, 4, 4, 1, false),
2295       INST(MOV, 4, UQ, 1, 1, UQ, 0, 4, 4, 1, false),
2296 
2297       INST(MOV, 4, DF, 0, 1, DF, 1, 4, 4, 1, false),
2298       INST(MOV, 4, Q,  0, 1, Q,  1, 4, 4, 1, false),
2299       INST(MOV, 4, UQ, 0, 1, UQ, 1, 4, 4, 1, false),
2300 
2301       INST(MOV, 4, DF, 1, 1, F,  0, 8, 4, 2, false),
2302       INST(MOV, 4, Q,  1, 1, D,  0, 8, 4, 2, false),
2303       INST(MOV, 4, UQ, 1, 1, UD, 0, 8, 4, 2, false),
2304 
2305       INST(MOV, 4, DF, 0, 1, F,  1, 8, 4, 2, false),
2306       INST(MOV, 4, Q,  0, 1, D,  1, 8, 4, 2, false),
2307       INST(MOV, 4, UQ, 0, 1, UD, 1, 8, 4, 2, false),
2308 
2309       INST(MOV, 4, F,  1, 2, DF, 0, 4, 4, 1, false),
2310       INST(MOV, 4, D,  1, 2, Q,  0, 4, 4, 1, false),
2311       INST(MOV, 4, UD, 1, 2, UQ, 0, 4, 4, 1, false),
2312 
2313       INST(MOV, 4, F,  0, 2, DF, 1, 4, 4, 1, false),
2314       INST(MOV, 4, D,  0, 2, Q,  1, 4, 4, 1, false),
2315       INST(MOV, 4, UD, 0, 2, UQ, 1, 4, 4, 1, false),
2316 
2317       INST(MUL, 8, D,  1, 2, D,  0, 8, 4, 2, false),
2318       INST(MUL, 8, UD, 1, 2, UD, 0, 8, 4, 2, false),
2319 
2320       INST(MUL, 8, D,  0, 2, D,  1, 8, 4, 2, false),
2321       INST(MUL, 8, UD, 0, 2, UD, 1, 8, 4, 2, false),
2322 
2323 #undef INST
2324    };
2325 
2326    /* These restrictions only apply to Gfx8+ */
2327    if (devinfo.ver < 8)
2328       return;
2329 
2330    for (unsigned i = 0; i < ARRAY_SIZE(inst); i++) {
2331       if (!devinfo.has_64bit_float &&
2332           (inst[i].dst_type == ELK_REGISTER_TYPE_DF ||
2333            inst[i].src_type == ELK_REGISTER_TYPE_DF))
2334          continue;
2335 
2336       if (!devinfo.has_64bit_int &&
2337           (inst[i].dst_type == ELK_REGISTER_TYPE_Q ||
2338            inst[i].dst_type == ELK_REGISTER_TYPE_UQ ||
2339            inst[i].src_type == ELK_REGISTER_TYPE_Q ||
2340            inst[i].src_type == ELK_REGISTER_TYPE_UQ))
2341          continue;
2342 
2343       if (inst[i].opcode == ELK_OPCODE_MOV) {
2344          elk_MOV(p, retype(g0, inst[i].dst_type),
2345                     retype(g0, inst[i].src_type));
2346       } else {
2347          assert(inst[i].opcode == ELK_OPCODE_MUL);
2348          elk_MUL(p, retype(g0, inst[i].dst_type),
2349                     retype(g0, inst[i].src_type),
2350                     retype(zero, inst[i].src_type));
2351       }
2352       elk_inst_set_exec_size(&devinfo, last_inst, inst[i].exec_size);
2353 
2354       elk_inst_set_dst_address_mode(&devinfo, last_inst, inst[i].dst_is_indirect);
2355       elk_inst_set_src0_address_mode(&devinfo, last_inst, inst[i].src_is_indirect);
2356 
2357       elk_inst_set_dst_hstride(&devinfo, last_inst, inst[i].dst_stride);
2358 
2359       elk_inst_set_src0_vstride(&devinfo, last_inst, inst[i].src_vstride);
2360       elk_inst_set_src0_width(&devinfo, last_inst, inst[i].src_width);
2361       elk_inst_set_src0_hstride(&devinfo, last_inst, inst[i].src_hstride);
2362 
2363       if (devinfo.platform == INTEL_PLATFORM_CHV) {
2364          EXPECT_EQ(inst[i].expected_result, validate(p));
2365       } else {
2366          EXPECT_TRUE(validate(p));
2367       }
2368 
2369       clear_instructions(p);
2370    }
2371 }
2372 
TEST_P(validation_test,qword_low_power_no_64bit_arf)2373 TEST_P(validation_test, qword_low_power_no_64bit_arf)
2374 {
2375    static const struct {
2376       enum elk_opcode opcode;
2377       unsigned exec_size;
2378 
2379       struct elk_reg dst;
2380       enum elk_reg_type dst_type;
2381       unsigned dst_stride;
2382 
2383       struct elk_reg src;
2384       enum elk_reg_type src_type;
2385       unsigned src_vstride;
2386       unsigned src_width;
2387       unsigned src_hstride;
2388 
2389       bool acc_wr;
2390       bool expected_result;
2391    } inst[] = {
2392 #define INST(opcode, exec_size, dst, dst_type, dst_stride,                     \
2393              src, src_type, src_vstride, src_width, src_hstride,               \
2394              acc_wr, expected_result)                                          \
2395       {                                                                        \
2396          ELK_OPCODE_##opcode,                                                  \
2397          ELK_EXECUTE_##exec_size,                                              \
2398          dst,                                                                  \
2399          ELK_REGISTER_TYPE_##dst_type,                                         \
2400          ELK_HORIZONTAL_STRIDE_##dst_stride,                                   \
2401          src,                                                                  \
2402          ELK_REGISTER_TYPE_##src_type,                                         \
2403          ELK_VERTICAL_STRIDE_##src_vstride,                                    \
2404          ELK_WIDTH_##src_width,                                                \
2405          ELK_HORIZONTAL_STRIDE_##src_hstride,                                  \
2406          acc_wr,                                                               \
2407          expected_result,                                                      \
2408       }
2409 
2410       /* Some instruction that violate no restrictions, as a control */
2411       INST(MOV, 4, g0,   DF, 1, g0,   F,  4, 2, 2, 0, true ),
2412       INST(MOV, 4, g0,   F,  2, g0,   DF, 4, 4, 1, 0, true ),
2413 
2414       INST(MOV, 4, g0,   Q,  1, g0,   D,  4, 2, 2, 0, true ),
2415       INST(MOV, 4, g0,   D,  2, g0,   Q,  4, 4, 1, 0, true ),
2416 
2417       INST(MOV, 4, g0,   UQ, 1, g0,   UD, 4, 2, 2, 0, true ),
2418       INST(MOV, 4, g0,   UD, 2, g0,   UQ, 4, 4, 1, 0, true ),
2419 
2420       INST(MOV, 4, null, F,  1, g0,   F,  4, 4, 1, 0, true ),
2421       INST(MOV, 4, acc0, F,  1, g0,   F,  4, 4, 1, 0, true ),
2422       INST(MOV, 4, g0,   F,  1, acc0, F,  4, 4, 1, 0, true ),
2423 
2424       INST(MOV, 4, null, D,  1, g0,   D,  4, 4, 1, 0, true ),
2425       INST(MOV, 4, acc0, D,  1, g0,   D,  4, 4, 1, 0, true ),
2426       INST(MOV, 4, g0,   D,  1, acc0, D,  4, 4, 1, 0, true ),
2427 
2428       INST(MOV, 4, null, UD, 1, g0,   UD, 4, 4, 1, 0, true ),
2429       INST(MOV, 4, acc0, UD, 1, g0,   UD, 4, 4, 1, 0, true ),
2430       INST(MOV, 4, g0,   UD, 1, acc0, UD, 4, 4, 1, 0, true ),
2431 
2432       INST(MUL, 4, g0,   D,  2, g0,   D,  4, 2, 2, 0, true ),
2433       INST(MUL, 4, g0,   UD, 2, g0,   UD, 4, 2, 2, 0, true ),
2434 
2435       /* The PRMs say that for CHV, BXT:
2436        *
2437        *    ARF registers must never be used with 64b datatype or when
2438        *    operation is integer DWord multiply.
2439        */
2440       INST(MOV, 4, acc0, DF, 1, g0,   F,  4, 2, 2, 0, false),
2441       INST(MOV, 4, g0,   DF, 1, acc0, F,  4, 2, 2, 0, false),
2442 
2443       INST(MOV, 4, acc0, Q,  1, g0,   D,  4, 2, 2, 0, false),
2444       INST(MOV, 4, g0,   Q,  1, acc0, D,  4, 2, 2, 0, false),
2445 
2446       INST(MOV, 4, acc0, UQ, 1, g0,   UD, 4, 2, 2, 0, false),
2447       INST(MOV, 4, g0,   UQ, 1, acc0, UD, 4, 2, 2, 0, false),
2448 
2449       INST(MOV, 4, acc0, F,  2, g0,   DF, 4, 4, 1, 0, false),
2450       INST(MOV, 4, g0,   F,  2, acc0, DF, 4, 4, 1, 0, false),
2451 
2452       INST(MOV, 4, acc0, D,  2, g0,   Q,  4, 4, 1, 0, false),
2453       INST(MOV, 4, g0,   D,  2, acc0, Q,  4, 4, 1, 0, false),
2454 
2455       INST(MOV, 4, acc0, UD, 2, g0,   UQ, 4, 4, 1, 0, false),
2456       INST(MOV, 4, g0,   UD, 2, acc0, UQ, 4, 4, 1, 0, false),
2457 
2458       INST(MUL, 4, acc0, D,  2, g0,   D,  4, 2, 2, 0, false),
2459       INST(MUL, 4, acc0, UD, 2, g0,   UD, 4, 2, 2, 0, false),
2460       /* MUL cannot have integer accumulator sources, so don't test that */
2461 
2462       /* We assume that the restriction does not apply to the null register */
2463       INST(MOV, 4, null, DF, 1, g0,   F,  4, 2, 2, 0, true ),
2464       INST(MOV, 4, null, Q,  1, g0,   D,  4, 2, 2, 0, true ),
2465       INST(MOV, 4, null, UQ, 1, g0,   UD, 4, 2, 2, 0, true ),
2466 
2467       /* Check implicit accumulator write control */
2468       INST(MOV, 4, null, DF, 1, g0,   F,  4, 2, 2, 1, false),
2469       INST(MUL, 4, null, DF, 1, g0,   F,  4, 2, 2, 1, false),
2470 
2471 #undef INST
2472    };
2473 
2474    /* These restrictions only apply to Gfx8+ */
2475    if (devinfo.ver < 8)
2476       return;
2477 
2478    for (unsigned i = 0; i < ARRAY_SIZE(inst); i++) {
2479       if (!devinfo.has_64bit_float &&
2480           (inst[i].dst_type == ELK_REGISTER_TYPE_DF ||
2481            inst[i].src_type == ELK_REGISTER_TYPE_DF))
2482          continue;
2483 
2484       if (!devinfo.has_64bit_int &&
2485           (inst[i].dst_type == ELK_REGISTER_TYPE_Q ||
2486            inst[i].dst_type == ELK_REGISTER_TYPE_UQ ||
2487            inst[i].src_type == ELK_REGISTER_TYPE_Q ||
2488            inst[i].src_type == ELK_REGISTER_TYPE_UQ))
2489          continue;
2490 
2491       if (inst[i].opcode == ELK_OPCODE_MOV) {
2492          elk_MOV(p, retype(inst[i].dst, inst[i].dst_type),
2493                     retype(inst[i].src, inst[i].src_type));
2494       } else {
2495          assert(inst[i].opcode == ELK_OPCODE_MUL);
2496          elk_MUL(p, retype(inst[i].dst, inst[i].dst_type),
2497                     retype(inst[i].src, inst[i].src_type),
2498                     retype(zero, inst[i].src_type));
2499          elk_inst_set_opcode(&isa, last_inst, inst[i].opcode);
2500       }
2501       elk_inst_set_exec_size(&devinfo, last_inst, inst[i].exec_size);
2502       elk_inst_set_acc_wr_control(&devinfo, last_inst, inst[i].acc_wr);
2503 
2504       elk_inst_set_dst_hstride(&devinfo, last_inst, inst[i].dst_stride);
2505 
2506       elk_inst_set_src0_vstride(&devinfo, last_inst, inst[i].src_vstride);
2507       elk_inst_set_src0_width(&devinfo, last_inst, inst[i].src_width);
2508       elk_inst_set_src0_hstride(&devinfo, last_inst, inst[i].src_hstride);
2509 
2510       /* Note: The Broadwell PRM also lists the restriction that destination
2511        * of DWord multiplication cannot be the accumulator.
2512        */
2513       if (devinfo.platform == INTEL_PLATFORM_CHV ||
2514           (devinfo.ver == 8 &&
2515            inst[i].opcode == ELK_OPCODE_MUL &&
2516            elk_inst_dst_reg_file(&devinfo, last_inst) == ELK_ARCHITECTURE_REGISTER_FILE &&
2517            elk_inst_dst_da_reg_nr(&devinfo, last_inst) != ELK_ARF_NULL)) {
2518          EXPECT_EQ(inst[i].expected_result, validate(p));
2519       } else {
2520          EXPECT_TRUE(validate(p));
2521       }
2522 
2523       clear_instructions(p);
2524    }
2525 
2526    if (!devinfo.has_64bit_float)
2527       return;
2528 
2529    /* MAC implicitly reads the accumulator */
2530    elk_MAC(p, retype(g0, ELK_REGISTER_TYPE_DF),
2531               retype(stride(g0, 4, 4, 1), ELK_REGISTER_TYPE_DF),
2532               retype(stride(g0, 4, 4, 1), ELK_REGISTER_TYPE_DF));
2533    if (devinfo.platform == INTEL_PLATFORM_CHV) {
2534       EXPECT_FALSE(validate(p));
2535    } else {
2536       EXPECT_TRUE(validate(p));
2537    }
2538 }
2539 
TEST_P(validation_test,align16_64_bit_integer)2540 TEST_P(validation_test, align16_64_bit_integer)
2541 {
2542    static const struct {
2543       enum elk_opcode opcode;
2544       unsigned exec_size;
2545 
2546       enum elk_reg_type dst_type;
2547       enum elk_reg_type src_type;
2548 
2549       bool expected_result;
2550    } inst[] = {
2551 #define INST(opcode, exec_size, dst_type, src_type, expected_result)           \
2552       {                                                                        \
2553          ELK_OPCODE_##opcode,                                                  \
2554          ELK_EXECUTE_##exec_size,                                              \
2555          ELK_REGISTER_TYPE_##dst_type,                                         \
2556          ELK_REGISTER_TYPE_##src_type,                                         \
2557          expected_result,                                                      \
2558       }
2559 
2560       /* Some instruction that violate no restrictions, as a control */
2561       INST(MOV, 2, Q,  D,  true ),
2562       INST(MOV, 2, UQ, UD, true ),
2563       INST(MOV, 2, DF, F,  true ),
2564 
2565       INST(ADD, 2, Q,  D,  true ),
2566       INST(ADD, 2, UQ, UD, true ),
2567       INST(ADD, 2, DF, F,  true ),
2568 
2569       /* The PRMs say that for BDW, SKL:
2570        *
2571        *    If Align16 is required for an operation with QW destination and non-QW
2572        *    source datatypes, the execution size cannot exceed 2.
2573        */
2574 
2575       INST(MOV, 4, Q,  D,  false),
2576       INST(MOV, 4, UQ, UD, false),
2577       INST(MOV, 4, DF, F,  false),
2578 
2579       INST(ADD, 4, Q,  D,  false),
2580       INST(ADD, 4, UQ, UD, false),
2581       INST(ADD, 4, DF, F,  false),
2582 
2583 #undef INST
2584    };
2585 
2586    /* 64-bit integer types exist on Gfx8+ */
2587    if (devinfo.ver < 8)
2588       return;
2589 
2590    /* Align16 does not exist on Gfx11+ */
2591    if (devinfo.ver >= 11)
2592       return;
2593 
2594    elk_set_default_access_mode(p, ELK_ALIGN_16);
2595 
2596    for (unsigned i = 0; i < ARRAY_SIZE(inst); i++) {
2597       if (inst[i].opcode == ELK_OPCODE_MOV) {
2598          elk_MOV(p, retype(g0, inst[i].dst_type),
2599                     retype(g0, inst[i].src_type));
2600       } else {
2601          assert(inst[i].opcode == ELK_OPCODE_ADD);
2602          elk_ADD(p, retype(g0, inst[i].dst_type),
2603                     retype(g0, inst[i].src_type),
2604                     retype(g0, inst[i].src_type));
2605       }
2606       elk_inst_set_exec_size(&devinfo, last_inst, inst[i].exec_size);
2607 
2608       EXPECT_EQ(inst[i].expected_result, validate(p));
2609 
2610       clear_instructions(p);
2611    }
2612 }
2613 
TEST_P(validation_test,qword_low_power_no_depctrl)2614 TEST_P(validation_test, qword_low_power_no_depctrl)
2615 {
2616    static const struct {
2617       enum elk_opcode opcode;
2618       unsigned exec_size;
2619 
2620       enum elk_reg_type dst_type;
2621       unsigned dst_stride;
2622 
2623       enum elk_reg_type src_type;
2624       unsigned src_vstride;
2625       unsigned src_width;
2626       unsigned src_hstride;
2627 
2628       bool no_dd_check;
2629       bool no_dd_clear;
2630 
2631       bool expected_result;
2632    } inst[] = {
2633 #define INST(opcode, exec_size, dst_type, dst_stride,                          \
2634              src_type, src_vstride, src_width, src_hstride,                    \
2635              no_dd_check, no_dd_clear, expected_result)                        \
2636       {                                                                        \
2637          ELK_OPCODE_##opcode,                                                  \
2638          ELK_EXECUTE_##exec_size,                                              \
2639          ELK_REGISTER_TYPE_##dst_type,                                         \
2640          ELK_HORIZONTAL_STRIDE_##dst_stride,                                   \
2641          ELK_REGISTER_TYPE_##src_type,                                         \
2642          ELK_VERTICAL_STRIDE_##src_vstride,                                    \
2643          ELK_WIDTH_##src_width,                                                \
2644          ELK_HORIZONTAL_STRIDE_##src_hstride,                                  \
2645          no_dd_check,                                                          \
2646          no_dd_clear,                                                          \
2647          expected_result,                                                      \
2648       }
2649 
2650       /* Some instruction that violate no restrictions, as a control */
2651       INST(MOV, 4, DF, 1, F,  8, 4, 2, 0, 0, true ),
2652       INST(MOV, 4, Q,  1, D,  8, 4, 2, 0, 0, true ),
2653       INST(MOV, 4, UQ, 1, UD, 8, 4, 2, 0, 0, true ),
2654 
2655       INST(MOV, 4, F,  2, DF, 4, 4, 1, 0, 0, true ),
2656       INST(MOV, 4, D,  2, Q,  4, 4, 1, 0, 0, true ),
2657       INST(MOV, 4, UD, 2, UQ, 4, 4, 1, 0, 0, true ),
2658 
2659       INST(MUL, 8, D,  2, D,  8, 4, 2, 0, 0, true ),
2660       INST(MUL, 8, UD, 2, UD, 8, 4, 2, 0, 0, true ),
2661 
2662       INST(MOV, 4, F,  1, F,  4, 4, 1, 1, 1, true ),
2663 
2664       /* The PRMs say that for CHV, BXT:
2665        *
2666        *    When source or destination datatype is 64b or operation is integer
2667        *    DWord multiply, DepCtrl must not be used.
2668        */
2669       INST(MOV, 4, DF, 1, F,  8, 4, 2, 1, 0, false),
2670       INST(MOV, 4, Q,  1, D,  8, 4, 2, 1, 0, false),
2671       INST(MOV, 4, UQ, 1, UD, 8, 4, 2, 1, 0, false),
2672 
2673       INST(MOV, 4, F,  2, DF, 4, 4, 1, 1, 0, false),
2674       INST(MOV, 4, D,  2, Q,  4, 4, 1, 1, 0, false),
2675       INST(MOV, 4, UD, 2, UQ, 4, 4, 1, 1, 0, false),
2676 
2677       INST(MOV, 4, DF, 1, F,  8, 4, 2, 0, 1, false),
2678       INST(MOV, 4, Q,  1, D,  8, 4, 2, 0, 1, false),
2679       INST(MOV, 4, UQ, 1, UD, 8, 4, 2, 0, 1, false),
2680 
2681       INST(MOV, 4, F,  2, DF, 4, 4, 1, 0, 1, false),
2682       INST(MOV, 4, D,  2, Q,  4, 4, 1, 0, 1, false),
2683       INST(MOV, 4, UD, 2, UQ, 4, 4, 1, 0, 1, false),
2684 
2685       INST(MUL, 8, D,  2, D,  8, 4, 2, 1, 0, false),
2686       INST(MUL, 8, UD, 2, UD, 8, 4, 2, 1, 0, false),
2687 
2688       INST(MUL, 8, D,  2, D,  8, 4, 2, 0, 1, false),
2689       INST(MUL, 8, UD, 2, UD, 8, 4, 2, 0, 1, false),
2690 
2691 #undef INST
2692    };
2693 
2694    /* These restrictions only apply to Gfx8+ */
2695    if (devinfo.ver < 8)
2696       return;
2697 
2698    /* NoDDChk/NoDDClr does not exist on Gfx12+ */
2699    if (devinfo.ver >= 12)
2700       return;
2701 
2702    for (unsigned i = 0; i < ARRAY_SIZE(inst); i++) {
2703       if (!devinfo.has_64bit_float &&
2704           (inst[i].dst_type == ELK_REGISTER_TYPE_DF ||
2705            inst[i].src_type == ELK_REGISTER_TYPE_DF))
2706          continue;
2707 
2708       if (!devinfo.has_64bit_int &&
2709           (inst[i].dst_type == ELK_REGISTER_TYPE_Q ||
2710            inst[i].dst_type == ELK_REGISTER_TYPE_UQ ||
2711            inst[i].src_type == ELK_REGISTER_TYPE_Q ||
2712            inst[i].src_type == ELK_REGISTER_TYPE_UQ))
2713          continue;
2714 
2715       if (inst[i].opcode == ELK_OPCODE_MOV) {
2716          elk_MOV(p, retype(g0, inst[i].dst_type),
2717                     retype(g0, inst[i].src_type));
2718       } else {
2719          assert(inst[i].opcode == ELK_OPCODE_MUL);
2720          elk_MUL(p, retype(g0, inst[i].dst_type),
2721                     retype(g0, inst[i].src_type),
2722                     retype(zero, inst[i].src_type));
2723       }
2724       elk_inst_set_exec_size(&devinfo, last_inst, inst[i].exec_size);
2725 
2726       elk_inst_set_dst_hstride(&devinfo, last_inst, inst[i].dst_stride);
2727 
2728       elk_inst_set_src0_vstride(&devinfo, last_inst, inst[i].src_vstride);
2729       elk_inst_set_src0_width(&devinfo, last_inst, inst[i].src_width);
2730       elk_inst_set_src0_hstride(&devinfo, last_inst, inst[i].src_hstride);
2731 
2732       elk_inst_set_no_dd_check(&devinfo, last_inst, inst[i].no_dd_check);
2733       elk_inst_set_no_dd_clear(&devinfo, last_inst, inst[i].no_dd_clear);
2734 
2735       if (devinfo.platform == INTEL_PLATFORM_CHV) {
2736          EXPECT_EQ(inst[i].expected_result, validate(p));
2737       } else {
2738          EXPECT_TRUE(validate(p));
2739       }
2740 
2741       clear_instructions(p);
2742    }
2743 }
2744