xref: /aosp_15_r20/external/mesa3d/src/intel/compiler/test_eu_validate.cpp (revision 6104692788411f58d303aa86923a9ff6ecaded22)
1 /*
2  * Copyright © 2016 Intel Corporation
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21  * IN THE SOFTWARE.
22  */
23 
24 #include <gtest/gtest.h>
25 #include "brw_disasm_info.h"
26 #include "brw_eu.h"
27 #include "brw_eu_defines.h"
28 #include "util/bitset.h"
29 #include "util/ralloc.h"
30 
31 static const struct intel_gfx_info {
32    const char *name;
33 } gfx_names[] = {
34    { "skl", },
35    { "bxt", },
36    { "kbl", },
37    { "aml", },
38    { "glk", },
39    { "cfl", },
40    { "whl", },
41    { "cml", },
42    { "icl", },
43    { "ehl", },
44    { "jsl", },
45    { "tgl", },
46    { "rkl", },
47    { "dg1", },
48    { "adl", },
49    { "sg1", },
50    { "rpl", },
51    { "dg2", },
52    { "mtl", },
53 };
54 
55 class validation_test: public ::testing::TestWithParam<struct intel_gfx_info> {
56    virtual void SetUp();
57 
58 public:
59    validation_test();
60    virtual ~validation_test();
61 
62    struct brw_isa_info isa;
63    struct brw_codegen *p;
64    struct intel_device_info devinfo;
65 };
66 
validation_test()67 validation_test::validation_test()
68 {
69    p = rzalloc(NULL, struct brw_codegen);
70    memset(&devinfo, 0, sizeof(devinfo));
71 }
72 
~validation_test()73 validation_test::~validation_test()
74 {
75    ralloc_free(p);
76 }
77 
SetUp()78 void validation_test::SetUp()
79 {
80    struct intel_gfx_info info = GetParam();
81    int devid = intel_device_name_to_pci_device_id(info.name);
82 
83    intel_get_device_info_from_pci_id(devid, &devinfo);
84 
85    brw_init_isa_info(&isa, &devinfo);
86 
87    brw_init_codegen(&isa, p, p);
88 }
89 
90 struct gfx_name {
91    template <class ParamType>
92    std::string
operator ()gfx_name93    operator()(const ::testing::TestParamInfo<ParamType>& info) const {
94       return info.param.name;
95    }
96 };
97 
98 INSTANTIATE_TEST_SUITE_P(
99    eu_assembly, validation_test,
100    ::testing::ValuesIn(gfx_names),
101    gfx_name()
102 );
103 
104 static bool
validate(struct brw_codegen * p)105 validate(struct brw_codegen *p)
106 {
107    const bool print = getenv("TEST_DEBUG");
108    struct disasm_info *disasm = disasm_initialize(p->isa, NULL);
109 
110    if (print) {
111       disasm_new_inst_group(disasm, 0);
112       disasm_new_inst_group(disasm, p->next_insn_offset);
113    }
114 
115    bool ret = brw_validate_instructions(p->isa, p->store, 0,
116                                         p->next_insn_offset, disasm);
117 
118    if (print) {
119       dump_assembly(p->store, 0, p->next_insn_offset, disasm, NULL);
120    }
121    ralloc_free(disasm);
122 
123    return ret;
124 }
125 
126 #define last_inst    (&p->store[p->nr_insn - 1])
127 #define g0           brw_vec8_grf(0, 0)
128 #define acc0         brw_acc_reg(8)
129 #define null         brw_null_reg()
130 #define zero         brw_imm_f(0.0f)
131 
132 static void
clear_instructions(struct brw_codegen * p)133 clear_instructions(struct brw_codegen *p)
134 {
135    p->next_insn_offset = 0;
136    p->nr_insn = 0;
137 }
138 
TEST_P(validation_test,sanity)139 TEST_P(validation_test, sanity)
140 {
141    brw_ADD(p, g0, g0, g0);
142 
143    EXPECT_TRUE(validate(p));
144 }
145 
TEST_P(validation_test,src0_null_reg)146 TEST_P(validation_test, src0_null_reg)
147 {
148    brw_MOV(p, g0, null);
149 
150    EXPECT_FALSE(validate(p));
151 }
152 
TEST_P(validation_test,src1_null_reg)153 TEST_P(validation_test, src1_null_reg)
154 {
155    brw_ADD(p, g0, g0, null);
156 
157    EXPECT_FALSE(validate(p));
158 }
159 
TEST_P(validation_test,math_src0_null_reg)160 TEST_P(validation_test, math_src0_null_reg)
161 {
162    gfx6_math(p, g0, BRW_MATH_FUNCTION_SIN, null, null);
163 
164    EXPECT_FALSE(validate(p));
165 }
166 
TEST_P(validation_test,math_src1_null_reg)167 TEST_P(validation_test, math_src1_null_reg)
168 {
169    gfx6_math(p, g0, BRW_MATH_FUNCTION_POW, g0, null);
170    EXPECT_FALSE(validate(p));
171 }
172 
TEST_P(validation_test,opcode46)173 TEST_P(validation_test, opcode46)
174 {
175    /* opcode 46 is "push" on Gen 4 and 5
176     *              "fork" on Gen 6
177     *              reserved on Gen 7
178     *              "goto" on Gfx8+
179     */
180    brw_next_insn(p, brw_opcode_decode(&isa, 46));
181 
182    EXPECT_TRUE(validate(p));
183 }
184 
TEST_P(validation_test,invalid_exec_size_encoding)185 TEST_P(validation_test, invalid_exec_size_encoding)
186 {
187    const struct {
188       enum brw_execution_size exec_size;
189       bool expected_result;
190    } test_case[] = {
191       { BRW_EXECUTE_1,      true  },
192       { BRW_EXECUTE_2,      true  },
193       { BRW_EXECUTE_4,      true  },
194       { BRW_EXECUTE_8,      true  },
195       { BRW_EXECUTE_16,     true  },
196       { BRW_EXECUTE_32,     true  },
197 
198       { (enum brw_execution_size)((int)BRW_EXECUTE_32 + 1), false },
199       { (enum brw_execution_size)((int)BRW_EXECUTE_32 + 2), false },
200    };
201 
202    for (unsigned i = 0; i < ARRAY_SIZE(test_case); i++) {
203       brw_MOV(p, g0, g0);
204 
205       brw_inst_set_exec_size(&devinfo, last_inst, test_case[i].exec_size);
206       brw_inst_set_src0_file_type(&devinfo, last_inst, FIXED_GRF, BRW_TYPE_W);
207       brw_inst_set_dst_file_type(&devinfo, last_inst, FIXED_GRF, BRW_TYPE_W);
208 
209       if (test_case[i].exec_size == BRW_EXECUTE_1) {
210          brw_inst_set_src0_vstride(&devinfo, last_inst, BRW_VERTICAL_STRIDE_0);
211          brw_inst_set_src0_width(&devinfo, last_inst, BRW_WIDTH_1);
212          brw_inst_set_src0_hstride(&devinfo, last_inst, BRW_HORIZONTAL_STRIDE_0);
213       } else {
214          brw_inst_set_src0_vstride(&devinfo, last_inst, BRW_VERTICAL_STRIDE_2);
215          brw_inst_set_src0_width(&devinfo, last_inst, BRW_WIDTH_2);
216          brw_inst_set_src0_hstride(&devinfo, last_inst, BRW_HORIZONTAL_STRIDE_1);
217       }
218 
219       EXPECT_EQ(test_case[i].expected_result, validate(p));
220 
221       clear_instructions(p);
222    }
223 }
224 
TEST_P(validation_test,invalid_type_encoding)225 TEST_P(validation_test, invalid_type_encoding)
226 {
227    enum brw_reg_file files[2] = {
228       FIXED_GRF,
229       IMM,
230    };
231 
232    for (unsigned i = 0; i < ARRAY_SIZE(files); i++) {
233       const enum brw_reg_file file = files[i];
234       const int num_bits = 4;
235       const int num_encodings = 1 << num_bits;
236 
237       /* The data types are encoded into <num_bits> bits to be used in hardware
238        * instructions, so keep a record in a bitset the invalid patterns so
239        * they can be verified to be invalid when used.
240        */
241       BITSET_DECLARE(invalid_encodings, num_encodings);
242 
243       const struct {
244          enum brw_reg_type type;
245          bool expected_result;
246       } test_case[] = {
247          { BRW_TYPE_DF, devinfo.has_64bit_float },
248          { BRW_TYPE_F,  true },
249          { BRW_TYPE_HF, true },
250          { BRW_TYPE_VF, file == IMM },
251          { BRW_TYPE_Q,  devinfo.has_64bit_int },
252          { BRW_TYPE_UQ, devinfo.has_64bit_int },
253          { BRW_TYPE_D,  true },
254          { BRW_TYPE_UD, true },
255          { BRW_TYPE_W,  true },
256          { BRW_TYPE_UW, true },
257          { BRW_TYPE_B,  file == FIXED_GRF },
258          { BRW_TYPE_UB, file == FIXED_GRF },
259          { BRW_TYPE_V,  file == IMM },
260          { BRW_TYPE_UV, file == IMM },
261       };
262 
263       /* Initially assume all hardware encodings are invalid */
264       BITSET_ONES(invalid_encodings);
265 
266       brw_set_default_exec_size(p, BRW_EXECUTE_4);
267 
268       for (unsigned i = 0; i < ARRAY_SIZE(test_case); i++) {
269          if (test_case[i].expected_result) {
270             unsigned hw_type = brw_type_encode(&devinfo, file, test_case[i].type);
271             if (hw_type != INVALID_HW_REG_TYPE) {
272                /* ... and remove valid encodings from the set */
273                assert(BITSET_TEST(invalid_encodings, hw_type));
274                BITSET_CLEAR(invalid_encodings, hw_type);
275             }
276 
277             if (file == FIXED_GRF) {
278                struct brw_reg g = retype(g0, test_case[i].type);
279                brw_MOV(p, g, g);
280                brw_inst_set_src0_vstride(&devinfo, last_inst, BRW_VERTICAL_STRIDE_4);
281                brw_inst_set_src0_width(&devinfo, last_inst, BRW_WIDTH_4);
282                brw_inst_set_src0_hstride(&devinfo, last_inst, BRW_HORIZONTAL_STRIDE_1);
283             } else {
284                enum brw_reg_type t;
285 
286                switch (test_case[i].type) {
287                case BRW_TYPE_V:
288                   t = BRW_TYPE_W;
289                   break;
290                case BRW_TYPE_UV:
291                   t = BRW_TYPE_UW;
292                   break;
293                case BRW_TYPE_VF:
294                   t = BRW_TYPE_F;
295                   break;
296                default:
297                   t = test_case[i].type;
298                   break;
299                }
300 
301                struct brw_reg g = retype(g0, t);
302                brw_MOV(p, g, retype(brw_imm_w(0), test_case[i].type));
303             }
304 
305             EXPECT_TRUE(validate(p));
306 
307             clear_instructions(p);
308          }
309       }
310 
311       /* The remaining encodings in invalid_encodings do not have a mapping
312        * from BRW_TYPE_* and must be invalid. Verify that invalid
313        * encodings are rejected by the validator.
314        */
315       int e;
316       BITSET_FOREACH_SET(e, invalid_encodings, num_encodings) {
317          if (file == FIXED_GRF) {
318             brw_MOV(p, g0, g0);
319             brw_inst_set_src0_vstride(&devinfo, last_inst, BRW_VERTICAL_STRIDE_4);
320             brw_inst_set_src0_width(&devinfo, last_inst, BRW_WIDTH_4);
321             brw_inst_set_src0_hstride(&devinfo, last_inst, BRW_HORIZONTAL_STRIDE_1);
322          } else {
323             brw_MOV(p, g0, brw_imm_w(0));
324          }
325          brw_inst_set_dst_reg_hw_type(&devinfo, last_inst, e);
326          brw_inst_set_src0_reg_hw_type(&devinfo, last_inst, e);
327 
328          EXPECT_FALSE(validate(p));
329 
330          clear_instructions(p);
331       }
332    }
333 }
334 
TEST_P(validation_test,invalid_type_encoding_3src_a16)335 TEST_P(validation_test, invalid_type_encoding_3src_a16)
336 {
337    /* 3-src instructions in align16 mode only supported on Gfx6-10 */
338    if (devinfo.ver < 6 || devinfo.ver > 10)
339       return;
340 
341    const int num_bits = devinfo.ver >= 8 ? 3 : 2;
342    const int num_encodings = 1 << num_bits;
343 
344    /* The data types are encoded into <num_bits> bits to be used in hardware
345     * instructions, so keep a record in a bitset the invalid patterns so
346     * they can be verified to be invalid when used.
347     */
348    BITSET_DECLARE(invalid_encodings, num_encodings);
349 
350    const struct {
351       enum brw_reg_type type;
352       bool expected_result;
353    } test_case[] = {
354       { BRW_TYPE_DF, devinfo.ver >= 7  },
355       { BRW_TYPE_F,  true },
356       { BRW_TYPE_HF, devinfo.ver >= 8  },
357       { BRW_TYPE_D,  devinfo.ver >= 7  },
358       { BRW_TYPE_UD, devinfo.ver >= 7  },
359    };
360 
361    /* Initially assume all hardware encodings are invalid */
362    BITSET_ONES(invalid_encodings);
363 
364    brw_set_default_access_mode(p, BRW_ALIGN_16);
365    brw_set_default_exec_size(p, BRW_EXECUTE_4);
366 
367    for (unsigned i = 0; i < ARRAY_SIZE(test_case); i++) {
368       if (test_case[i].expected_result) {
369          unsigned hw_type =
370             brw_type_encode_for_3src(&devinfo, test_case[i].type);
371          if (hw_type != INVALID_HW_REG_TYPE) {
372             /* ... and remove valid encodings from the set */
373             assert(BITSET_TEST(invalid_encodings, hw_type));
374             BITSET_CLEAR(invalid_encodings, hw_type);
375          }
376 
377          struct brw_reg g = retype(g0, test_case[i].type);
378          if (!brw_type_is_int(test_case[i].type)) {
379             brw_MAD(p, g, g, g, g);
380          } else {
381             brw_BFE(p, g, g, g, g);
382          }
383 
384          EXPECT_TRUE(validate(p));
385 
386          clear_instructions(p);
387       }
388    }
389 
390    /* The remaining encodings in invalid_encodings do not have a mapping
391     * from BRW_TYPE_* and must be invalid. Verify that invalid
392     * encodings are rejected by the validator.
393     */
394    int e;
395    BITSET_FOREACH_SET(e, invalid_encodings, num_encodings) {
396       for (unsigned i = 0; i < 2; i++) {
397          if (i == 0) {
398             brw_MAD(p, g0, g0, g0, g0);
399          } else {
400             brw_BFE(p, g0, g0, g0, g0);
401          }
402 
403          brw_inst_set_3src_a16_dst_hw_type(&devinfo, last_inst, e);
404          brw_inst_set_3src_a16_src_hw_type(&devinfo, last_inst, e);
405 
406          EXPECT_FALSE(validate(p));
407 
408          clear_instructions(p);
409 
410          if (devinfo.ver == 6)
411             break;
412       }
413    }
414 }
415 
TEST_P(validation_test,invalid_type_encoding_3src_a1)416 TEST_P(validation_test, invalid_type_encoding_3src_a1)
417 {
418    /* 3-src instructions in align1 mode only supported on Gfx10+ */
419    if (devinfo.ver < 10)
420       return;
421 
422    const int num_bits = 3 + 1 /* for exec_type */;
423    const int num_encodings = 1 << num_bits;
424 
425    /* The data types are encoded into <num_bits> bits to be used in hardware
426     * instructions, so keep a record in a bitset the invalid patterns so
427     * they can be verified to be invalid when used.
428     */
429    BITSET_DECLARE(invalid_encodings, num_encodings);
430 
431    const struct {
432       enum brw_reg_type type;
433       unsigned exec_type;
434       bool expected_result;
435    } test_case[] = {
436 #define E(x) ((unsigned)BRW_ALIGN1_3SRC_EXEC_TYPE_##x)
437       { BRW_TYPE_DF, E(FLOAT), devinfo.has_64bit_float },
438       { BRW_TYPE_F,  E(FLOAT), true  },
439       { BRW_TYPE_HF, E(FLOAT), true  },
440       { BRW_TYPE_D,  E(INT),   true  },
441       { BRW_TYPE_UD, E(INT),   true  },
442       { BRW_TYPE_W,  E(INT),   true  },
443       { BRW_TYPE_UW, E(INT),   true  },
444 
445       /* There are no ternary instructions that can operate on B-type sources
446        * on Gfx11-12. Src1/Src2 cannot be B-typed either.
447        */
448       { BRW_TYPE_B,  E(INT),   false },
449       { BRW_TYPE_UB, E(INT),   false },
450    };
451 
452    /* Initially assume all hardware encodings are invalid */
453    BITSET_ONES(invalid_encodings);
454 
455    brw_set_default_access_mode(p, BRW_ALIGN_1);
456    brw_set_default_exec_size(p, BRW_EXECUTE_4);
457 
458    for (unsigned i = 0; i < ARRAY_SIZE(test_case); i++) {
459       if (test_case[i].expected_result) {
460          unsigned hw_type =
461             brw_type_encode_for_3src(&devinfo, test_case[i].type);
462          unsigned hw_exec_type = hw_type | (test_case[i].exec_type << 3);
463          if (hw_type != INVALID_HW_REG_TYPE) {
464             /* ... and remove valid encodings from the set */
465             assert(BITSET_TEST(invalid_encodings, hw_exec_type));
466             BITSET_CLEAR(invalid_encodings, hw_exec_type);
467          }
468 
469          struct brw_reg g = retype(g0, test_case[i].type);
470          if (!brw_type_is_int(test_case[i].type)) {
471             brw_MAD(p, g, g, g, g);
472          } else {
473             brw_BFE(p, g, g, g, g);
474          }
475 
476          EXPECT_TRUE(validate(p));
477 
478          clear_instructions(p);
479       }
480    }
481 
482    /* The remaining encodings in invalid_encodings do not have a mapping
483     * from BRW_TYPE_* and must be invalid. Verify that invalid
484     * encodings are rejected by the validator.
485     */
486    int e;
487    BITSET_FOREACH_SET(e, invalid_encodings, num_encodings) {
488       const unsigned hw_type = e & 0x7;
489       const unsigned exec_type = e >> 3;
490 
491       for (unsigned i = 0; i < 2; i++) {
492          if (i == 0) {
493             brw_MAD(p, g0, g0, g0, g0);
494             brw_inst_set_3src_a1_exec_type(&devinfo, last_inst, BRW_ALIGN1_3SRC_EXEC_TYPE_FLOAT);
495          } else {
496             brw_CSEL(p, g0, g0, g0, g0);
497             brw_inst_set_3src_cond_modifier(&devinfo, last_inst, BRW_CONDITIONAL_NZ);
498             brw_inst_set_3src_a1_exec_type(&devinfo, last_inst, BRW_ALIGN1_3SRC_EXEC_TYPE_INT);
499          }
500 
501          brw_inst_set_3src_a1_exec_type(&devinfo, last_inst, exec_type);
502          brw_inst_set_3src_a1_dst_hw_type (&devinfo, last_inst, hw_type);
503          brw_inst_set_3src_a1_src0_hw_type(&devinfo, last_inst, hw_type);
504          brw_inst_set_3src_a1_src1_hw_type(&devinfo, last_inst, hw_type);
505          brw_inst_set_3src_a1_src2_hw_type(&devinfo, last_inst, hw_type);
506 
507          EXPECT_FALSE(validate(p));
508 
509          clear_instructions(p);
510       }
511    }
512 }
513 
514 TEST_P(validation_test, 3src_inst_access_mode)
515 {
516    /* 3-src instructions only supported on Gfx6+ */
517    if (devinfo.ver < 6)
518       return;
519 
520    /* No access mode bit on Gfx12+ */
521    if (devinfo.ver >= 12)
522       return;
523 
524    const struct {
525       unsigned mode;
526       bool expected_result;
527    } test_case[] = {
528       { BRW_ALIGN_1,  devinfo.ver >= 10 },
529       { BRW_ALIGN_16, devinfo.ver <= 10 },
530    };
531 
532    for (unsigned i = 0; i < ARRAY_SIZE(test_case); i++) {
533       if (devinfo.ver < 10)
534          brw_set_default_access_mode(p, BRW_ALIGN_16);
535 
536       brw_MAD(p, g0, g0, g0, g0);
537       brw_inst_set_access_mode(&devinfo, last_inst, test_case[i].mode);
538 
539       EXPECT_EQ(test_case[i].expected_result, validate(p));
540 
541       clear_instructions(p);
542    }
543 }
544 
545 /* When the Execution Data Type is wider than the destination data type, the
546  * destination must [...] specify a HorzStride equal to the ratio in sizes of
547  * the two data types.
548  */
TEST_P(validation_test,dest_stride_must_be_equal_to_the_ratio_of_exec_size_to_dest_size)549 TEST_P(validation_test, dest_stride_must_be_equal_to_the_ratio_of_exec_size_to_dest_size)
550 {
551    brw_ADD(p, g0, g0, g0);
552    brw_inst_set_dst_file_type(&devinfo, last_inst, FIXED_GRF, BRW_TYPE_W);
553    brw_inst_set_src0_file_type(&devinfo, last_inst, FIXED_GRF, BRW_TYPE_D);
554    brw_inst_set_src1_file_type(&devinfo, last_inst, FIXED_GRF, BRW_TYPE_D);
555 
556    EXPECT_FALSE(validate(p));
557 
558    clear_instructions(p);
559 
560    brw_ADD(p, g0, g0, g0);
561    brw_inst_set_dst_file_type(&devinfo, last_inst, FIXED_GRF, BRW_TYPE_W);
562    brw_inst_set_dst_hstride(&devinfo, last_inst, BRW_HORIZONTAL_STRIDE_2);
563    brw_inst_set_src0_file_type(&devinfo, last_inst, FIXED_GRF, BRW_TYPE_D);
564    brw_inst_set_src1_file_type(&devinfo, last_inst, FIXED_GRF, BRW_TYPE_D);
565 
566    EXPECT_TRUE(validate(p));
567 }
568 
569 /* When the Execution Data Type is wider than the destination data type, the
570  * destination must be aligned as required by the wider execution data type
571  * [...]
572  */
TEST_P(validation_test,dst_subreg_must_be_aligned_to_exec_type_size)573 TEST_P(validation_test, dst_subreg_must_be_aligned_to_exec_type_size)
574 {
575    brw_ADD(p, g0, g0, g0);
576    brw_inst_set_dst_da1_subreg_nr(&devinfo, last_inst, 2);
577    brw_inst_set_dst_hstride(&devinfo, last_inst, BRW_HORIZONTAL_STRIDE_2);
578    brw_inst_set_dst_file_type(&devinfo, last_inst, FIXED_GRF, BRW_TYPE_W);
579    brw_inst_set_src0_file_type(&devinfo, last_inst, FIXED_GRF, BRW_TYPE_D);
580    brw_inst_set_src1_file_type(&devinfo, last_inst, FIXED_GRF, BRW_TYPE_D);
581 
582    EXPECT_FALSE(validate(p));
583 
584    clear_instructions(p);
585 
586    brw_ADD(p, g0, g0, g0);
587    brw_inst_set_exec_size(&devinfo, last_inst, BRW_EXECUTE_4);
588    brw_inst_set_dst_da1_subreg_nr(&devinfo, last_inst, 8);
589    brw_inst_set_dst_hstride(&devinfo, last_inst, BRW_HORIZONTAL_STRIDE_2);
590    brw_inst_set_dst_file_type(&devinfo, last_inst, FIXED_GRF, BRW_TYPE_W);
591    brw_inst_set_src0_file_type(&devinfo, last_inst, FIXED_GRF, BRW_TYPE_D);
592    brw_inst_set_src0_vstride(&devinfo, last_inst, BRW_VERTICAL_STRIDE_4);
593    brw_inst_set_src0_width(&devinfo, last_inst, BRW_WIDTH_4);
594    brw_inst_set_src0_hstride(&devinfo, last_inst, BRW_HORIZONTAL_STRIDE_1);
595    brw_inst_set_src1_file_type(&devinfo, last_inst, FIXED_GRF, BRW_TYPE_D);
596    brw_inst_set_src1_vstride(&devinfo, last_inst, BRW_VERTICAL_STRIDE_4);
597    brw_inst_set_src1_width(&devinfo, last_inst, BRW_WIDTH_4);
598    brw_inst_set_src1_hstride(&devinfo, last_inst, BRW_HORIZONTAL_STRIDE_1);
599 
600    EXPECT_TRUE(validate(p));
601 }
602 
603 /* ExecSize must be greater than or equal to Width. */
TEST_P(validation_test,exec_size_less_than_width)604 TEST_P(validation_test, exec_size_less_than_width)
605 {
606    brw_ADD(p, g0, g0, g0);
607    brw_inst_set_src0_width(&devinfo, last_inst, BRW_WIDTH_16);
608 
609    EXPECT_FALSE(validate(p));
610 
611    clear_instructions(p);
612 
613    brw_ADD(p, g0, g0, g0);
614    brw_inst_set_src1_width(&devinfo, last_inst, BRW_WIDTH_16);
615 
616    EXPECT_FALSE(validate(p));
617 }
618 
619 /* If ExecSize = Width and HorzStride ≠ 0,
620  * VertStride must be set to Width * HorzStride.
621  */
TEST_P(validation_test,vertical_stride_is_width_by_horizontal_stride)622 TEST_P(validation_test, vertical_stride_is_width_by_horizontal_stride)
623 {
624    brw_ADD(p, g0, g0, g0);
625    brw_inst_set_src0_vstride(&devinfo, last_inst, BRW_VERTICAL_STRIDE_4);
626 
627    EXPECT_FALSE(validate(p));
628 
629    clear_instructions(p);
630 
631    brw_ADD(p, g0, g0, g0);
632    brw_inst_set_src1_vstride(&devinfo, last_inst, BRW_VERTICAL_STRIDE_4);
633 
634    EXPECT_FALSE(validate(p));
635 }
636 
637 /* If Width = 1, HorzStride must be 0 regardless of the values
638  * of ExecSize and VertStride.
639  */
TEST_P(validation_test,horizontal_stride_must_be_0_if_width_is_1)640 TEST_P(validation_test, horizontal_stride_must_be_0_if_width_is_1)
641 {
642    brw_ADD(p, g0, g0, g0);
643    brw_inst_set_src0_vstride(&devinfo, last_inst, BRW_VERTICAL_STRIDE_0);
644    brw_inst_set_src0_width(&devinfo, last_inst, BRW_WIDTH_1);
645    brw_inst_set_src0_hstride(&devinfo, last_inst, BRW_HORIZONTAL_STRIDE_1);
646 
647    EXPECT_FALSE(validate(p));
648 
649    clear_instructions(p);
650 
651    brw_ADD(p, g0, g0, g0);
652    brw_inst_set_src1_vstride(&devinfo, last_inst, BRW_VERTICAL_STRIDE_0);
653    brw_inst_set_src1_width(&devinfo, last_inst, BRW_WIDTH_1);
654    brw_inst_set_src1_hstride(&devinfo, last_inst, BRW_HORIZONTAL_STRIDE_1);
655 
656    EXPECT_FALSE(validate(p));
657 }
658 
659 /* If ExecSize = Width = 1, both VertStride and HorzStride must be 0. */
TEST_P(validation_test,scalar_region_must_be_0_1_0)660 TEST_P(validation_test, scalar_region_must_be_0_1_0)
661 {
662    struct brw_reg g0_0 = brw_vec1_grf(0, 0);
663 
664    brw_ADD(p, g0, g0, g0_0);
665    brw_inst_set_exec_size(&devinfo, last_inst, BRW_EXECUTE_1);
666    brw_inst_set_src0_vstride(&devinfo, last_inst, BRW_VERTICAL_STRIDE_1);
667    brw_inst_set_src0_width(&devinfo, last_inst, BRW_WIDTH_1);
668    brw_inst_set_src0_hstride(&devinfo, last_inst, BRW_HORIZONTAL_STRIDE_0);
669 
670    EXPECT_FALSE(validate(p));
671 
672    clear_instructions(p);
673 
674    brw_ADD(p, g0, g0_0, g0);
675    brw_inst_set_exec_size(&devinfo, last_inst, BRW_EXECUTE_1);
676    brw_inst_set_src1_vstride(&devinfo, last_inst, BRW_VERTICAL_STRIDE_1);
677    brw_inst_set_src1_width(&devinfo, last_inst, BRW_WIDTH_1);
678    brw_inst_set_src1_hstride(&devinfo, last_inst, BRW_HORIZONTAL_STRIDE_0);
679 
680    EXPECT_FALSE(validate(p));
681 }
682 
683 /* If VertStride = HorzStride = 0, Width must be 1 regardless of the value
684  * of ExecSize.
685  */
TEST_P(validation_test,zero_stride_implies_0_1_0)686 TEST_P(validation_test, zero_stride_implies_0_1_0)
687 {
688    brw_ADD(p, g0, g0, g0);
689    brw_inst_set_src0_vstride(&devinfo, last_inst, BRW_VERTICAL_STRIDE_0);
690    brw_inst_set_src0_width(&devinfo, last_inst, BRW_WIDTH_2);
691    brw_inst_set_src0_hstride(&devinfo, last_inst, BRW_HORIZONTAL_STRIDE_0);
692 
693    EXPECT_FALSE(validate(p));
694 
695    clear_instructions(p);
696 
697    brw_ADD(p, g0, g0, g0);
698    brw_inst_set_src1_vstride(&devinfo, last_inst, BRW_VERTICAL_STRIDE_0);
699    brw_inst_set_src1_width(&devinfo, last_inst, BRW_WIDTH_2);
700    brw_inst_set_src1_hstride(&devinfo, last_inst, BRW_HORIZONTAL_STRIDE_0);
701 
702    EXPECT_FALSE(validate(p));
703 }
704 
705 /* Dst.HorzStride must not be 0. */
TEST_P(validation_test,dst_horizontal_stride_0)706 TEST_P(validation_test, dst_horizontal_stride_0)
707 {
708    brw_ADD(p, g0, g0, g0);
709    brw_inst_set_dst_hstride(&devinfo, last_inst, BRW_HORIZONTAL_STRIDE_0);
710 
711    EXPECT_FALSE(validate(p));
712 
713    clear_instructions(p);
714 
715    /* Align16 does not exist on Gfx11+ */
716    if (devinfo.ver >= 11)
717       return;
718 
719    brw_set_default_access_mode(p, BRW_ALIGN_16);
720 
721    brw_ADD(p, g0, g0, g0);
722    brw_inst_set_dst_hstride(&devinfo, last_inst, BRW_HORIZONTAL_STRIDE_0);
723 
724    EXPECT_FALSE(validate(p));
725 }
726 
727 /* VertStride must be used to cross FIXED_GRF register boundaries. This rule implies
728  * that elements within a 'Width' cannot cross FIXED_GRF boundaries.
729  */
TEST_P(validation_test,must_not_cross_grf_boundary_in_a_width)730 TEST_P(validation_test, must_not_cross_grf_boundary_in_a_width)
731 {
732    brw_ADD(p, g0, g0, g0);
733    brw_inst_set_src0_da1_subreg_nr(&devinfo, last_inst, 4);
734 
735    EXPECT_FALSE(validate(p));
736 
737    clear_instructions(p);
738 
739    brw_ADD(p, g0, g0, g0);
740    brw_inst_set_src1_da1_subreg_nr(&devinfo, last_inst, 4);
741 
742    EXPECT_FALSE(validate(p));
743 
744    clear_instructions(p);
745 
746    brw_ADD(p, g0, g0, g0);
747    brw_inst_set_src0_vstride(&devinfo, last_inst, BRW_VERTICAL_STRIDE_4);
748    brw_inst_set_src0_width(&devinfo, last_inst, BRW_WIDTH_4);
749    brw_inst_set_src0_hstride(&devinfo, last_inst, BRW_HORIZONTAL_STRIDE_2);
750 
751    EXPECT_FALSE(validate(p));
752 
753    clear_instructions(p);
754 
755    brw_ADD(p, g0, g0, g0);
756    brw_inst_set_src1_vstride(&devinfo, last_inst, BRW_VERTICAL_STRIDE_4);
757    brw_inst_set_src1_width(&devinfo, last_inst, BRW_WIDTH_4);
758    brw_inst_set_src1_hstride(&devinfo, last_inst, BRW_HORIZONTAL_STRIDE_2);
759 
760    EXPECT_FALSE(validate(p));
761 }
762 
763 /* Destination Horizontal must be 1 in Align16 */
TEST_P(validation_test,dst_hstride_on_align16_must_be_1)764 TEST_P(validation_test, dst_hstride_on_align16_must_be_1)
765 {
766    /* Align16 does not exist on Gfx11+ */
767    if (devinfo.ver >= 11)
768       return;
769 
770    brw_set_default_access_mode(p, BRW_ALIGN_16);
771 
772    brw_ADD(p, g0, g0, g0);
773    brw_inst_set_dst_hstride(&devinfo, last_inst, BRW_HORIZONTAL_STRIDE_2);
774 
775    EXPECT_FALSE(validate(p));
776 
777    clear_instructions(p);
778 
779    brw_ADD(p, g0, g0, g0);
780    brw_inst_set_dst_hstride(&devinfo, last_inst, BRW_HORIZONTAL_STRIDE_1);
781 
782    EXPECT_TRUE(validate(p));
783 }
784 
785 /* VertStride must be 0 or 4 in Align16 */
TEST_P(validation_test,vstride_on_align16_must_be_0_or_4)786 TEST_P(validation_test, vstride_on_align16_must_be_0_or_4)
787 {
788    /* Align16 does not exist on Gfx11+ */
789    if (devinfo.ver >= 11)
790       return;
791 
792    const struct {
793       enum brw_vertical_stride vstride;
794       bool expected_result;
795    } vstride[] = {
796       { BRW_VERTICAL_STRIDE_0, true },
797       { BRW_VERTICAL_STRIDE_1, false },
798       { BRW_VERTICAL_STRIDE_2, devinfo.verx10 >= 75 },
799       { BRW_VERTICAL_STRIDE_4, true },
800       { BRW_VERTICAL_STRIDE_8, false },
801       { BRW_VERTICAL_STRIDE_16, false },
802       { BRW_VERTICAL_STRIDE_32, false },
803       { BRW_VERTICAL_STRIDE_ONE_DIMENSIONAL, false },
804    };
805 
806    brw_set_default_access_mode(p, BRW_ALIGN_16);
807 
808    for (unsigned i = 0; i < ARRAY_SIZE(vstride); i++) {
809       brw_ADD(p, g0, g0, g0);
810       brw_inst_set_src0_vstride(&devinfo, last_inst, vstride[i].vstride);
811 
812       EXPECT_EQ(vstride[i].expected_result, validate(p));
813 
814       clear_instructions(p);
815    }
816 
817    for (unsigned i = 0; i < ARRAY_SIZE(vstride); i++) {
818       brw_ADD(p, g0, g0, g0);
819       brw_inst_set_src1_vstride(&devinfo, last_inst, vstride[i].vstride);
820 
821       EXPECT_EQ(vstride[i].expected_result, validate(p));
822 
823       clear_instructions(p);
824    }
825 }
826 
827 /* In Direct Addressing mode, a source cannot span more than 2 adjacent FIXED_GRF
828  * registers.
829  */
TEST_P(validation_test,source_cannot_span_more_than_2_registers)830 TEST_P(validation_test, source_cannot_span_more_than_2_registers)
831 {
832    brw_ADD(p, g0, g0, g0);
833    brw_inst_set_exec_size(&devinfo, last_inst, BRW_EXECUTE_32);
834    brw_inst_set_dst_file_type(&devinfo, last_inst, FIXED_GRF, BRW_TYPE_W);
835    brw_inst_set_src0_file_type(&devinfo, last_inst, FIXED_GRF, BRW_TYPE_W);
836    brw_inst_set_src1_file_type(&devinfo, last_inst, FIXED_GRF, BRW_TYPE_W);
837    brw_inst_set_src1_vstride(&devinfo, last_inst, BRW_VERTICAL_STRIDE_16);
838    brw_inst_set_src1_width(&devinfo, last_inst, BRW_WIDTH_8);
839    brw_inst_set_src1_hstride(&devinfo, last_inst, BRW_HORIZONTAL_STRIDE_2);
840 
841    EXPECT_FALSE(validate(p));
842 
843    clear_instructions(p);
844 
845    brw_ADD(p, g0, g0, g0);
846    brw_inst_set_exec_size(&devinfo, last_inst, BRW_EXECUTE_16);
847    brw_inst_set_dst_file_type(&devinfo, last_inst, FIXED_GRF, BRW_TYPE_W);
848    brw_inst_set_src0_file_type(&devinfo, last_inst, FIXED_GRF, BRW_TYPE_W);
849    brw_inst_set_src1_file_type(&devinfo, last_inst, FIXED_GRF, BRW_TYPE_W);
850    brw_inst_set_src1_vstride(&devinfo, last_inst, BRW_VERTICAL_STRIDE_16);
851    brw_inst_set_src1_width(&devinfo, last_inst, BRW_WIDTH_8);
852    brw_inst_set_src1_hstride(&devinfo, last_inst, BRW_HORIZONTAL_STRIDE_2);
853    brw_inst_set_src1_da1_subreg_nr(&devinfo, last_inst, 2);
854 
855    EXPECT_TRUE(validate(p));
856 
857    clear_instructions(p);
858 
859    brw_ADD(p, g0, g0, g0);
860    brw_inst_set_exec_size(&devinfo, last_inst, BRW_EXECUTE_16);
861 
862    EXPECT_TRUE(validate(p));
863 }
864 
865 /* A destination cannot span more than 2 adjacent FIXED_GRF registers. */
TEST_P(validation_test,destination_cannot_span_more_than_2_registers)866 TEST_P(validation_test, destination_cannot_span_more_than_2_registers)
867 {
868    brw_ADD(p, g0, g0, g0);
869    brw_inst_set_exec_size(&devinfo, last_inst, BRW_EXECUTE_32);
870    brw_inst_set_dst_hstride(&devinfo, last_inst, BRW_HORIZONTAL_STRIDE_2);
871    brw_inst_set_dst_file_type(&devinfo, last_inst, FIXED_GRF, BRW_TYPE_W);
872    brw_inst_set_src0_file_type(&devinfo, last_inst, FIXED_GRF, BRW_TYPE_W);
873    brw_inst_set_src1_file_type(&devinfo, last_inst, FIXED_GRF, BRW_TYPE_W);
874 
875    EXPECT_FALSE(validate(p));
876 
877    clear_instructions(p);
878 
879    brw_ADD(p, g0, g0, g0);
880    brw_inst_set_exec_size(&devinfo, last_inst, BRW_EXECUTE_8);
881    brw_inst_set_dst_da1_subreg_nr(&devinfo, last_inst, 6);
882    brw_inst_set_dst_hstride(&devinfo, last_inst, BRW_HORIZONTAL_STRIDE_4);
883    brw_inst_set_dst_file_type(&devinfo, last_inst, FIXED_GRF, BRW_TYPE_W);
884    brw_inst_set_src0_file_type(&devinfo, last_inst, FIXED_GRF, BRW_TYPE_W);
885    brw_inst_set_src0_vstride(&devinfo, last_inst, BRW_VERTICAL_STRIDE_16);
886    brw_inst_set_src0_width(&devinfo, last_inst, BRW_WIDTH_4);
887    brw_inst_set_src0_hstride(&devinfo, last_inst, BRW_HORIZONTAL_STRIDE_1);
888    brw_inst_set_src1_file_type(&devinfo, last_inst, FIXED_GRF, BRW_TYPE_W);
889    brw_inst_set_src1_vstride(&devinfo, last_inst, BRW_VERTICAL_STRIDE_16);
890    brw_inst_set_src1_width(&devinfo, last_inst, BRW_WIDTH_4);
891    brw_inst_set_src1_hstride(&devinfo, last_inst, BRW_HORIZONTAL_STRIDE_1);
892 
893    EXPECT_TRUE(validate(p));
894 }
895 
TEST_P(validation_test,src_region_spans_two_regs_dst_region_spans_one)896 TEST_P(validation_test, src_region_spans_two_regs_dst_region_spans_one)
897 {
898    /* Writes to dest are to the lower OWord */
899    brw_ADD(p, g0, g0, g0);
900    brw_inst_set_dst_file_type(&devinfo, last_inst, FIXED_GRF, BRW_TYPE_W);
901    brw_inst_set_src0_file_type(&devinfo, last_inst, FIXED_GRF, BRW_TYPE_W);
902    brw_inst_set_src1_file_type(&devinfo, last_inst, FIXED_GRF, BRW_TYPE_W);
903    brw_inst_set_src1_vstride(&devinfo, last_inst, BRW_VERTICAL_STRIDE_16);
904    brw_inst_set_src1_width(&devinfo, last_inst, BRW_WIDTH_4);
905    brw_inst_set_src1_hstride(&devinfo, last_inst, BRW_HORIZONTAL_STRIDE_2);
906 
907    EXPECT_TRUE(validate(p));
908 
909    clear_instructions(p);
910 
911    /* Writes to dest are to the upper OWord */
912    brw_ADD(p, g0, g0, g0);
913    brw_inst_set_dst_da1_subreg_nr(&devinfo, last_inst, 16);
914    brw_inst_set_dst_file_type(&devinfo, last_inst, FIXED_GRF, BRW_TYPE_W);
915    brw_inst_set_src0_file_type(&devinfo, last_inst, FIXED_GRF, BRW_TYPE_W);
916    brw_inst_set_src1_file_type(&devinfo, last_inst, FIXED_GRF, BRW_TYPE_W);
917    brw_inst_set_src1_vstride(&devinfo, last_inst, BRW_VERTICAL_STRIDE_16);
918    brw_inst_set_src1_width(&devinfo, last_inst, BRW_WIDTH_4);
919    brw_inst_set_src1_hstride(&devinfo, last_inst, BRW_HORIZONTAL_STRIDE_2);
920 
921    EXPECT_TRUE(validate(p));
922 
923    clear_instructions(p);
924 
925    /* Writes to dest are evenly split between OWords */
926    brw_ADD(p, g0, g0, g0);
927    brw_inst_set_exec_size(&devinfo, last_inst, BRW_EXECUTE_16);
928    brw_inst_set_dst_file_type(&devinfo, last_inst, FIXED_GRF, BRW_TYPE_W);
929    brw_inst_set_src0_file_type(&devinfo, last_inst, FIXED_GRF, BRW_TYPE_W);
930    brw_inst_set_src1_file_type(&devinfo, last_inst, FIXED_GRF, BRW_TYPE_W);
931    brw_inst_set_src1_vstride(&devinfo, last_inst, BRW_VERTICAL_STRIDE_16);
932    brw_inst_set_src1_width(&devinfo, last_inst, BRW_WIDTH_8);
933    brw_inst_set_src1_hstride(&devinfo, last_inst, BRW_HORIZONTAL_STRIDE_2);
934 
935    EXPECT_TRUE(validate(p));
936 
937    clear_instructions(p);
938 
939    /* Writes to dest are uneven between OWords */
940    brw_ADD(p, g0, g0, g0);
941    brw_inst_set_exec_size(&devinfo, last_inst, BRW_EXECUTE_4);
942    brw_inst_set_dst_da1_subreg_nr(&devinfo, last_inst, 10);
943    brw_inst_set_dst_file_type(&devinfo, last_inst, FIXED_GRF, BRW_TYPE_W);
944    brw_inst_set_src0_file_type(&devinfo, last_inst, FIXED_GRF, BRW_TYPE_W);
945    brw_inst_set_src0_vstride(&devinfo, last_inst, BRW_VERTICAL_STRIDE_4);
946    brw_inst_set_src0_width(&devinfo, last_inst, BRW_WIDTH_4);
947    brw_inst_set_src0_hstride(&devinfo, last_inst, BRW_HORIZONTAL_STRIDE_1);
948    brw_inst_set_src1_file_type(&devinfo, last_inst, FIXED_GRF, BRW_TYPE_W);
949    brw_inst_set_src1_vstride(&devinfo, last_inst, BRW_VERTICAL_STRIDE_16);
950    brw_inst_set_src1_width(&devinfo, last_inst, BRW_WIDTH_2);
951    brw_inst_set_src1_hstride(&devinfo, last_inst, BRW_HORIZONTAL_STRIDE_1);
952 
953    if (devinfo.ver >= 9) {
954       EXPECT_TRUE(validate(p));
955    } else {
956       EXPECT_FALSE(validate(p));
957    }
958 }
959 
TEST_P(validation_test,dst_elements_must_be_evenly_split_between_registers)960 TEST_P(validation_test, dst_elements_must_be_evenly_split_between_registers)
961 {
962    brw_ADD(p, g0, g0, g0);
963    brw_inst_set_dst_da1_subreg_nr(&devinfo, last_inst, 4);
964 
965    if (devinfo.ver >= 9 && devinfo.verx10 < 125) {
966       EXPECT_TRUE(validate(p));
967    } else {
968       EXPECT_FALSE(validate(p));
969    }
970 
971    clear_instructions(p);
972 
973    brw_ADD(p, g0, g0, g0);
974    brw_inst_set_exec_size(&devinfo, last_inst, BRW_EXECUTE_16);
975 
976    EXPECT_TRUE(validate(p));
977 
978    clear_instructions(p);
979 
980    if (devinfo.ver >= 6) {
981       gfx6_math(p, g0, BRW_MATH_FUNCTION_SIN, g0, null);
982 
983       EXPECT_TRUE(validate(p));
984 
985       clear_instructions(p);
986 
987       gfx6_math(p, g0, BRW_MATH_FUNCTION_SIN, g0, null);
988       brw_inst_set_dst_da1_subreg_nr(&devinfo, last_inst, 4);
989 
990       EXPECT_FALSE(validate(p));
991    }
992 }
993 
TEST_P(validation_test,two_src_two_dst_source_offsets_must_be_same)994 TEST_P(validation_test, two_src_two_dst_source_offsets_must_be_same)
995 {
996    brw_ADD(p, g0, g0, g0);
997    brw_inst_set_exec_size(&devinfo, last_inst, BRW_EXECUTE_4);
998    brw_inst_set_dst_hstride(&devinfo, last_inst, BRW_HORIZONTAL_STRIDE_4);
999    brw_inst_set_src0_da1_subreg_nr(&devinfo, last_inst, 16);
1000    brw_inst_set_src0_vstride(&devinfo, last_inst, BRW_VERTICAL_STRIDE_2);
1001    brw_inst_set_src0_width(&devinfo, last_inst, BRW_WIDTH_1);
1002    brw_inst_set_src0_hstride(&devinfo, last_inst, BRW_HORIZONTAL_STRIDE_0);
1003    brw_inst_set_src1_vstride(&devinfo, last_inst, BRW_VERTICAL_STRIDE_4);
1004    brw_inst_set_src1_width(&devinfo, last_inst, BRW_WIDTH_4);
1005    brw_inst_set_src1_hstride(&devinfo, last_inst, BRW_HORIZONTAL_STRIDE_1);
1006 
1007   if (devinfo.ver <= 7 || devinfo.verx10 >= 125) {
1008       EXPECT_FALSE(validate(p));
1009    } else {
1010       EXPECT_TRUE(validate(p));
1011    }
1012 
1013    clear_instructions(p);
1014 
1015    brw_ADD(p, g0, g0, g0);
1016    brw_inst_set_exec_size(&devinfo, last_inst, BRW_EXECUTE_4);
1017    brw_inst_set_dst_hstride(&devinfo, last_inst, BRW_HORIZONTAL_STRIDE_4);
1018    brw_inst_set_src0_vstride(&devinfo, last_inst, BRW_VERTICAL_STRIDE_4);
1019    brw_inst_set_src0_width(&devinfo, last_inst, BRW_WIDTH_1);
1020    brw_inst_set_src0_hstride(&devinfo, last_inst, BRW_HORIZONTAL_STRIDE_0);
1021    brw_inst_set_src1_vstride(&devinfo, last_inst, BRW_VERTICAL_STRIDE_8);
1022    brw_inst_set_src1_width(&devinfo, last_inst, BRW_WIDTH_2);
1023    brw_inst_set_src1_hstride(&devinfo, last_inst, BRW_HORIZONTAL_STRIDE_1);
1024 
1025    if (devinfo.verx10 >= 125)
1026       EXPECT_FALSE(validate(p));
1027    else
1028       EXPECT_TRUE(validate(p));
1029 }
1030 
TEST_P(validation_test,two_src_two_dst_each_dst_must_be_derived_from_one_src)1031 TEST_P(validation_test, two_src_two_dst_each_dst_must_be_derived_from_one_src)
1032 {
1033    brw_MOV(p, g0, g0);
1034    brw_inst_set_exec_size(&devinfo, last_inst, BRW_EXECUTE_16);
1035    brw_inst_set_dst_file_type(&devinfo, last_inst, FIXED_GRF, BRW_TYPE_W);
1036    brw_inst_set_dst_hstride(&devinfo, last_inst, BRW_HORIZONTAL_STRIDE_2);
1037    brw_inst_set_src0_file_type(&devinfo, last_inst, FIXED_GRF, BRW_TYPE_W);
1038    brw_inst_set_src0_da1_subreg_nr(&devinfo, last_inst, 8);
1039    brw_inst_set_src0_vstride(&devinfo, last_inst, BRW_VERTICAL_STRIDE_4);
1040    brw_inst_set_src0_width(&devinfo, last_inst, BRW_WIDTH_4);
1041    brw_inst_set_src0_hstride(&devinfo, last_inst, BRW_HORIZONTAL_STRIDE_1);
1042 
1043    if (devinfo.ver <= 7) {
1044       EXPECT_FALSE(validate(p));
1045    } else {
1046       EXPECT_TRUE(validate(p));
1047    }
1048 
1049    clear_instructions(p);
1050 
1051    brw_MOV(p, g0, g0);
1052    brw_inst_set_dst_da1_subreg_nr(&devinfo, last_inst, 16);
1053    brw_inst_set_src0_da1_subreg_nr(&devinfo, last_inst, 8);
1054    brw_inst_set_src0_vstride(&devinfo, last_inst, BRW_VERTICAL_STRIDE_2);
1055    brw_inst_set_src0_width(&devinfo, last_inst, BRW_WIDTH_2);
1056    brw_inst_set_src0_hstride(&devinfo, last_inst, BRW_HORIZONTAL_STRIDE_1);
1057 
1058    if (devinfo.ver <= 7 || devinfo.verx10 >= 125) {
1059       EXPECT_FALSE(validate(p));
1060    } else {
1061       EXPECT_TRUE(validate(p));
1062    }
1063 }
1064 
TEST_P(validation_test,one_src_two_dst)1065 TEST_P(validation_test, one_src_two_dst)
1066 {
1067    struct brw_reg g0_0 = brw_vec1_grf(0, 0);
1068 
1069    brw_ADD(p, g0, g0_0, g0_0);
1070    brw_inst_set_exec_size(&devinfo, last_inst, BRW_EXECUTE_16);
1071 
1072    EXPECT_TRUE(validate(p));
1073 
1074    clear_instructions(p);
1075 
1076    brw_ADD(p, g0, g0, g0);
1077    brw_inst_set_exec_size(&devinfo, last_inst, BRW_EXECUTE_16);
1078    brw_inst_set_dst_file_type(&devinfo, last_inst, FIXED_GRF, BRW_TYPE_D);
1079    brw_inst_set_src0_file_type(&devinfo, last_inst, FIXED_GRF, BRW_TYPE_W);
1080    brw_inst_set_src1_file_type(&devinfo, last_inst, FIXED_GRF, BRW_TYPE_D);
1081 
1082    EXPECT_TRUE(validate(p));
1083 
1084    clear_instructions(p);
1085 
1086    brw_ADD(p, g0, g0, g0);
1087    brw_inst_set_exec_size(&devinfo, last_inst, BRW_EXECUTE_16);
1088    brw_inst_set_dst_file_type(&devinfo, last_inst, FIXED_GRF, BRW_TYPE_D);
1089    brw_inst_set_src0_file_type(&devinfo, last_inst, FIXED_GRF, BRW_TYPE_D);
1090    brw_inst_set_src1_file_type(&devinfo, last_inst, FIXED_GRF, BRW_TYPE_W);
1091 
1092    if (devinfo.ver >= 8) {
1093       EXPECT_TRUE(validate(p));
1094    } else {
1095       EXPECT_FALSE(validate(p));
1096    }
1097 
1098    clear_instructions(p);
1099 
1100    brw_ADD(p, g0, g0, g0);
1101    brw_inst_set_exec_size(&devinfo, last_inst, BRW_EXECUTE_16);
1102    brw_inst_set_dst_file_type(&devinfo, last_inst, FIXED_GRF, BRW_TYPE_D);
1103    brw_inst_set_src0_file_type(&devinfo, last_inst, FIXED_GRF, BRW_TYPE_W);
1104    brw_inst_set_src1_file_type(&devinfo, last_inst, FIXED_GRF, BRW_TYPE_W);
1105 
1106    if (devinfo.ver >= 8) {
1107       EXPECT_TRUE(validate(p));
1108    } else {
1109       EXPECT_FALSE(validate(p));
1110    }
1111 
1112    clear_instructions(p);
1113 
1114    brw_ADD(p, g0, g0, g0);
1115    brw_inst_set_exec_size(&devinfo, last_inst, BRW_EXECUTE_16);
1116    brw_inst_set_dst_hstride(&devinfo, last_inst, BRW_HORIZONTAL_STRIDE_2);
1117    brw_inst_set_dst_file_type(&devinfo, last_inst, FIXED_GRF, BRW_TYPE_W);
1118    brw_inst_set_src0_file_type(&devinfo, last_inst, FIXED_GRF, BRW_TYPE_W);
1119    brw_inst_set_src1_file_type(&devinfo, last_inst, FIXED_GRF, BRW_TYPE_W);
1120    brw_inst_set_src1_vstride(&devinfo, last_inst, BRW_VERTICAL_STRIDE_0);
1121    brw_inst_set_src1_width(&devinfo, last_inst, BRW_WIDTH_1);
1122    brw_inst_set_src1_hstride(&devinfo, last_inst, BRW_HORIZONTAL_STRIDE_0);
1123 
1124    if (devinfo.ver >= 8) {
1125       EXPECT_TRUE(validate(p));
1126    } else {
1127       EXPECT_FALSE(validate(p));
1128    }
1129 
1130    clear_instructions(p);
1131 
1132    brw_ADD(p, g0, g0, g0);
1133    brw_inst_set_exec_size(&devinfo, last_inst, BRW_EXECUTE_16);
1134    brw_inst_set_dst_hstride(&devinfo, last_inst, BRW_HORIZONTAL_STRIDE_2);
1135    brw_inst_set_dst_file_type(&devinfo, last_inst, FIXED_GRF, BRW_TYPE_W);
1136    brw_inst_set_src0_file_type(&devinfo, last_inst, FIXED_GRF, BRW_TYPE_W);
1137    brw_inst_set_src0_vstride(&devinfo, last_inst, BRW_VERTICAL_STRIDE_0);
1138    brw_inst_set_src0_width(&devinfo, last_inst, BRW_WIDTH_1);
1139    brw_inst_set_src0_hstride(&devinfo, last_inst, BRW_HORIZONTAL_STRIDE_0);
1140    brw_inst_set_src1_file_type(&devinfo, last_inst, FIXED_GRF, BRW_TYPE_W);
1141 
1142    if (devinfo.ver >= 8) {
1143       EXPECT_TRUE(validate(p));
1144    } else {
1145       EXPECT_FALSE(validate(p));
1146    }
1147 }
1148 
TEST_P(validation_test,packed_byte_destination)1149 TEST_P(validation_test, packed_byte_destination)
1150 {
1151    static const struct {
1152       enum brw_reg_type dst_type;
1153       enum brw_reg_type src_type;
1154       bool neg, abs, sat;
1155       bool expected_result;
1156    } move[] = {
1157       { BRW_TYPE_UB, BRW_TYPE_UB, 0, 0, 0, true },
1158       { BRW_TYPE_B , BRW_TYPE_B , 0, 0, 0, true },
1159       { BRW_TYPE_UB, BRW_TYPE_B , 0, 0, 0, true },
1160       { BRW_TYPE_B , BRW_TYPE_UB, 0, 0, 0, true },
1161 
1162       { BRW_TYPE_UB, BRW_TYPE_UB, 1, 0, 0, false },
1163       { BRW_TYPE_B , BRW_TYPE_B , 1, 0, 0, false },
1164       { BRW_TYPE_UB, BRW_TYPE_B , 1, 0, 0, false },
1165       { BRW_TYPE_B , BRW_TYPE_UB, 1, 0, 0, false },
1166 
1167       { BRW_TYPE_UB, BRW_TYPE_UB, 0, 1, 0, false },
1168       { BRW_TYPE_B , BRW_TYPE_B , 0, 1, 0, false },
1169       { BRW_TYPE_UB, BRW_TYPE_B , 0, 1, 0, false },
1170       { BRW_TYPE_B , BRW_TYPE_UB, 0, 1, 0, false },
1171 
1172       { BRW_TYPE_UB, BRW_TYPE_UB, 0, 0, 1, false },
1173       { BRW_TYPE_B , BRW_TYPE_B , 0, 0, 1, false },
1174       { BRW_TYPE_UB, BRW_TYPE_B , 0, 0, 1, false },
1175       { BRW_TYPE_B , BRW_TYPE_UB, 0, 0, 1, false },
1176 
1177       { BRW_TYPE_UB, BRW_TYPE_UW, 0, 0, 0, false },
1178       { BRW_TYPE_B , BRW_TYPE_W , 0, 0, 0, false },
1179       { BRW_TYPE_UB, BRW_TYPE_UD, 0, 0, 0, false },
1180       { BRW_TYPE_B , BRW_TYPE_D , 0, 0, 0, false },
1181    };
1182 
1183    for (unsigned i = 0; i < ARRAY_SIZE(move); i++) {
1184       brw_MOV(p, retype(g0, move[i].dst_type), retype(g0, move[i].src_type));
1185       brw_inst_set_src0_negate(&devinfo, last_inst, move[i].neg);
1186       brw_inst_set_src0_abs(&devinfo, last_inst, move[i].abs);
1187       brw_inst_set_saturate(&devinfo, last_inst, move[i].sat);
1188 
1189       EXPECT_EQ(move[i].expected_result, validate(p));
1190 
1191       clear_instructions(p);
1192    }
1193 
1194    brw_SEL(p, retype(g0, BRW_TYPE_UB),
1195               retype(g0, BRW_TYPE_UB),
1196               retype(g0, BRW_TYPE_UB));
1197    brw_inst_set_pred_control(&devinfo, last_inst, BRW_PREDICATE_NORMAL);
1198 
1199    EXPECT_FALSE(validate(p));
1200 
1201    clear_instructions(p);
1202 
1203    brw_SEL(p, retype(g0, BRW_TYPE_B),
1204               retype(g0, BRW_TYPE_B),
1205               retype(g0, BRW_TYPE_B));
1206    brw_inst_set_pred_control(&devinfo, last_inst, BRW_PREDICATE_NORMAL);
1207 
1208    EXPECT_FALSE(validate(p));
1209 }
1210 
TEST_P(validation_test,byte_destination_relaxed_alignment)1211 TEST_P(validation_test, byte_destination_relaxed_alignment)
1212 {
1213    brw_SEL(p, retype(g0, BRW_TYPE_B),
1214               retype(g0, BRW_TYPE_W),
1215               retype(g0, BRW_TYPE_W));
1216    brw_inst_set_pred_control(&devinfo, last_inst, BRW_PREDICATE_NORMAL);
1217    brw_inst_set_dst_hstride(&devinfo, last_inst, BRW_HORIZONTAL_STRIDE_2);
1218 
1219    EXPECT_TRUE(validate(p));
1220 
1221    clear_instructions(p);
1222 
1223    brw_SEL(p, retype(g0, BRW_TYPE_B),
1224               retype(g0, BRW_TYPE_W),
1225               retype(g0, BRW_TYPE_W));
1226    brw_inst_set_pred_control(&devinfo, last_inst, BRW_PREDICATE_NORMAL);
1227    brw_inst_set_dst_hstride(&devinfo, last_inst, BRW_HORIZONTAL_STRIDE_2);
1228    brw_inst_set_dst_da1_subreg_nr(&devinfo, last_inst, 1);
1229 
1230    EXPECT_TRUE(validate(p));
1231 }
1232 
TEST_P(validation_test,byte_64bit_conversion)1233 TEST_P(validation_test, byte_64bit_conversion)
1234 {
1235    static const struct {
1236       enum brw_reg_type dst_type;
1237       enum brw_reg_type src_type;
1238       unsigned dst_stride;
1239       bool expected_result;
1240    } inst[] = {
1241 #define INST(dst_type, src_type, dst_stride, expected_result)             \
1242       {                                                                   \
1243          BRW_TYPE_##dst_type,                                             \
1244          BRW_TYPE_##src_type,                                             \
1245          BRW_HORIZONTAL_STRIDE_##dst_stride,                              \
1246          expected_result,                                                 \
1247       }
1248 
1249       INST(B,   Q, 1, false),
1250       INST(B,  UQ, 1, false),
1251       INST(B,  DF, 1, false),
1252       INST(UB,  Q, 1, false),
1253       INST(UB, UQ, 1, false),
1254       INST(UB, DF, 1, false),
1255 
1256       INST(B,   Q, 2, false),
1257       INST(B,  UQ, 2, false),
1258       INST(B , DF, 2, false),
1259       INST(UB,  Q, 2, false),
1260       INST(UB, UQ, 2, false),
1261       INST(UB, DF, 2, false),
1262 
1263       INST(B,   Q, 4, false),
1264       INST(B,  UQ, 4, false),
1265       INST(B,  DF, 4, false),
1266       INST(UB,  Q, 4, false),
1267       INST(UB, UQ, 4, false),
1268       INST(UB, DF, 4, false),
1269 
1270 #undef INST
1271    };
1272 
1273    if (devinfo.ver < 8)
1274       return;
1275 
1276    for (unsigned i = 0; i < ARRAY_SIZE(inst); i++) {
1277       if (!devinfo.has_64bit_float &&
1278           inst[i].src_type == BRW_TYPE_DF)
1279          continue;
1280 
1281       if (!devinfo.has_64bit_int &&
1282           (inst[i].src_type == BRW_TYPE_Q ||
1283            inst[i].src_type == BRW_TYPE_UQ))
1284          continue;
1285 
1286       brw_MOV(p, retype(g0, inst[i].dst_type), retype(g0, inst[i].src_type));
1287       brw_inst_set_dst_hstride(&devinfo, last_inst, inst[i].dst_stride);
1288       EXPECT_EQ(inst[i].expected_result, validate(p));
1289 
1290       clear_instructions(p);
1291    }
1292 }
1293 
TEST_P(validation_test,half_float_conversion)1294 TEST_P(validation_test, half_float_conversion)
1295 {
1296    static const struct {
1297       enum brw_reg_type dst_type;
1298       enum brw_reg_type src_type;
1299       unsigned dst_stride;
1300       unsigned dst_subnr;
1301       bool expected_result_gfx9;
1302       bool expected_result_gfx125;
1303    } inst[] = {
1304 #define INST(dst_type, src_type, dst_stride, dst_subnr,                     \
1305              expected_result_gfx9,                                          \
1306              expected_result_gfx125)                                        \
1307       {                                                                     \
1308          BRW_TYPE_##dst_type,                                               \
1309          BRW_TYPE_##src_type,                                               \
1310          BRW_HORIZONTAL_STRIDE_##dst_stride,                                \
1311          dst_subnr,                                                         \
1312          expected_result_gfx9,                                              \
1313          expected_result_gfx125,                                            \
1314       }
1315 
1316       /* MOV to half-float destination */
1317       INST(HF,  B, 1, 0, false, false), /* 0 */
1318       INST(HF,  W, 1, 0, false, false),
1319       INST(HF, HF, 1, 0, true,  true),
1320       INST(HF, HF, 1, 2, true,  false),
1321       INST(HF,  D, 1, 0, false, false),
1322       INST(HF,  F, 1, 0, true,  false),
1323       INST(HF,  Q, 1, 0, false, false),
1324       INST(HF,  B, 2, 0, true,  false),
1325       INST(HF,  B, 2, 2, false, false),
1326       INST(HF,  W, 2, 0, true,  false),
1327       INST(HF,  W, 2, 2, false, false), /* 10 */
1328       INST(HF, HF, 2, 0, true,  false),
1329       INST(HF, HF, 2, 2, true,  false),
1330       INST(HF,  D, 2, 0, true,  true),
1331       INST(HF,  D, 2, 2, false, false),
1332       INST(HF,  F, 2, 0, true,  true),
1333       INST(HF,  F, 2, 2, true,  false),
1334       INST(HF,  Q, 2, 0, false, false),
1335       INST(HF, DF, 2, 0, false, false),
1336       INST(HF,  B, 4, 0, false, false),
1337       INST(HF,  W, 4, 0, false, false), /* 20 */
1338       INST(HF, HF, 4, 0, true,  false),
1339       INST(HF, HF, 4, 2, true,  false),
1340       INST(HF,  D, 4, 0, false, false),
1341       INST(HF,  F, 4, 0, false, false),
1342       INST(HF,  Q, 4, 0, false, false),
1343       INST(HF, DF, 4, 0, false, false),
1344 
1345       /* MOV from half-float source */
1346       INST( B, HF, 1, 0, false, false),
1347       INST( W, HF, 1, 0, false, false),
1348       INST( D, HF, 1, 0, true,  true),
1349       INST( D, HF, 1, 4, true,  true),  /* 30 */
1350       INST( F, HF, 1, 0, true,  false),
1351       INST( F, HF, 1, 4, true,  false),
1352       INST( Q, HF, 1, 0, false, false),
1353       INST(DF, HF, 1, 0, false, false),
1354       INST( B, HF, 2, 0, false, false),
1355       INST( W, HF, 2, 0, true,  true),
1356       INST( W, HF, 2, 2, false, false),
1357       INST( D, HF, 2, 0, false, false),
1358       INST( F, HF, 2, 0, true,  false),
1359       INST( B, HF, 4, 0, true,  true),  /* 40 */
1360       INST( B, HF, 4, 1, false, false),
1361       INST( W, HF, 4, 0, false, false),
1362 
1363 #undef INST
1364    };
1365 
1366    for (unsigned i = 0; i < ARRAY_SIZE(inst); i++) {
1367       if (!devinfo.has_64bit_float &&
1368           (inst[i].dst_type == BRW_TYPE_DF ||
1369            inst[i].src_type == BRW_TYPE_DF))
1370          continue;
1371 
1372       if (!devinfo.has_64bit_int &&
1373           (inst[i].dst_type == BRW_TYPE_Q ||
1374            inst[i].dst_type == BRW_TYPE_UQ ||
1375            inst[i].src_type == BRW_TYPE_Q ||
1376            inst[i].src_type == BRW_TYPE_UQ))
1377          continue;
1378 
1379       brw_MOV(p, retype(g0, inst[i].dst_type), retype(g0, inst[i].src_type));
1380 
1381       brw_inst_set_exec_size(&devinfo, last_inst, BRW_EXECUTE_4);
1382 
1383       brw_inst_set_dst_hstride(&devinfo, last_inst, inst[i].dst_stride);
1384       brw_inst_set_dst_da1_subreg_nr(&devinfo, last_inst, inst[i].dst_subnr);
1385 
1386       if (inst[i].src_type == BRW_TYPE_B) {
1387          brw_inst_set_src0_vstride(&devinfo, last_inst, BRW_VERTICAL_STRIDE_4);
1388          brw_inst_set_src0_width(&devinfo, last_inst, BRW_WIDTH_2);
1389          brw_inst_set_src0_hstride(&devinfo, last_inst, BRW_HORIZONTAL_STRIDE_2);
1390       } else {
1391          brw_inst_set_src0_vstride(&devinfo, last_inst, BRW_VERTICAL_STRIDE_4);
1392          brw_inst_set_src0_width(&devinfo, last_inst, BRW_WIDTH_4);
1393          brw_inst_set_src0_hstride(&devinfo, last_inst, BRW_HORIZONTAL_STRIDE_1);
1394       }
1395 
1396       if (devinfo.verx10 >= 125) {
1397          EXPECT_EQ(inst[i].expected_result_gfx125, validate(p)) <<
1398             "Failing test is: " << i;
1399       } else {
1400          EXPECT_EQ(inst[i].expected_result_gfx9, validate(p)) <<
1401             "Failing test is: " << i;
1402       }
1403 
1404       clear_instructions(p);
1405    }
1406 }
1407 
TEST_P(validation_test,mixed_float_source_indirect_addressing)1408 TEST_P(validation_test, mixed_float_source_indirect_addressing)
1409 {
1410    static const struct {
1411       enum brw_reg_type dst_type;
1412       enum brw_reg_type src0_type;
1413       enum brw_reg_type src1_type;
1414       unsigned dst_stride;
1415       bool dst_indirect;
1416       bool src0_indirect;
1417       bool expected_result;
1418       bool gfx125_expected_result;
1419    } inst[] = {
1420 #define INST(dst_type, src0_type, src1_type,                              \
1421              dst_stride, dst_indirect, src0_indirect, expected_result,    \
1422              gfx125_expected_result)                                      \
1423       {                                                                   \
1424          BRW_TYPE_##dst_type,                                             \
1425          BRW_TYPE_##src0_type,                                            \
1426          BRW_TYPE_##src1_type,                                            \
1427          BRW_HORIZONTAL_STRIDE_##dst_stride,                              \
1428          dst_indirect,                                                    \
1429          src0_indirect,                                                   \
1430          expected_result,                                                 \
1431          gfx125_expected_result,                                          \
1432       }
1433 
1434       /* Source and dest are mixed float: indirect src addressing not allowed */
1435       INST(HF,  F,  F, 2, false, false, true,  true),
1436       INST(HF,  F,  F, 2, true,  false, true,  true),
1437       INST(HF,  F,  F, 2, false, true,  false, false),
1438       INST(HF,  F,  F, 2, true,  true,  false, false),
1439       INST( F, HF,  F, 1, false, false, true,  false),
1440       INST( F, HF,  F, 1, true,  false, true,  false),
1441       INST( F, HF,  F, 1, false, true,  false, false),
1442       INST( F, HF,  F, 1, true,  true,  false, false),
1443 
1444       INST(HF, HF,  F, 2, false, false, true,  false),
1445       INST(HF, HF,  F, 2, true,  false, true,  false),
1446       INST(HF, HF,  F, 2, false, true,  false, false),
1447       INST(HF, HF,  F, 2, true,  true,  false, false),
1448       INST( F,  F, HF, 1, false, false, true,  false),
1449       INST( F,  F, HF, 1, true,  false, true,  false),
1450       INST( F,  F, HF, 1, false, true,  false, false),
1451       INST( F,  F, HF, 1, true,  true,  false, false),
1452 
1453 #undef INST
1454    };
1455 
1456    if (devinfo.ver < 8)
1457       return;
1458 
1459    for (unsigned i = 0; i < ARRAY_SIZE(inst); i++) {
1460       brw_ADD(p, retype(g0, inst[i].dst_type),
1461                  retype(g0, inst[i].src0_type),
1462                  retype(g0, inst[i].src1_type));
1463 
1464       brw_inst_set_dst_address_mode(&devinfo, last_inst, inst[i].dst_indirect);
1465       brw_inst_set_dst_hstride(&devinfo, last_inst, inst[i].dst_stride);
1466       brw_inst_set_src0_address_mode(&devinfo, last_inst, inst[i].src0_indirect);
1467 
1468       if (devinfo.verx10 >= 125) {
1469          EXPECT_EQ(inst[i].gfx125_expected_result, validate(p));
1470       } else {
1471          EXPECT_EQ(inst[i].expected_result, validate(p));
1472       }
1473 
1474       clear_instructions(p);
1475    }
1476 }
1477 
TEST_P(validation_test,mixed_float_align1_simd16)1478 TEST_P(validation_test, mixed_float_align1_simd16)
1479 {
1480    static const struct {
1481       unsigned exec_size;
1482       enum brw_reg_type dst_type;
1483       enum brw_reg_type src0_type;
1484       enum brw_reg_type src1_type;
1485       unsigned dst_stride;
1486       bool expected_result;
1487       bool gfx125_expected_result;
1488    } inst[] = {
1489 #define INST(exec_size, dst_type, src0_type, src1_type,                   \
1490              dst_stride, expected_result, gfx125_expected_result)         \
1491       {                                                                   \
1492          BRW_EXECUTE_##exec_size,                                         \
1493          BRW_TYPE_##dst_type,                                             \
1494          BRW_TYPE_##src0_type,                                            \
1495          BRW_TYPE_##src1_type,                                            \
1496          BRW_HORIZONTAL_STRIDE_##dst_stride,                              \
1497          expected_result,                                                 \
1498          gfx125_expected_result,                                          \
1499       }
1500 
1501       /* No SIMD16 in mixed mode when destination is packed f16 */
1502       INST( 8, HF,  F, HF, 2, true,  false),
1503       INST(16, HF, HF,  F, 2, true,  false),
1504       INST(16, HF, HF,  F, 1, false, false),
1505       INST(16, HF,  F, HF, 1, false, false),
1506 
1507       /* No SIMD16 in mixed mode when destination is f32 */
1508       INST( 8,  F, HF,  F, 1, true,  false),
1509       INST( 8,  F,  F, HF, 1, true,  false),
1510       INST(16,  F, HF,  F, 1, false, false),
1511       INST(16,  F,  F, HF, 1, false, false),
1512 
1513 #undef INST
1514    };
1515 
1516    if (devinfo.ver < 8)
1517       return;
1518 
1519    for (unsigned i = 0; i < ARRAY_SIZE(inst); i++) {
1520       brw_ADD(p, retype(g0, inst[i].dst_type),
1521                  retype(g0, inst[i].src0_type),
1522                  retype(g0, inst[i].src1_type));
1523 
1524       brw_inst_set_exec_size(&devinfo, last_inst, inst[i].exec_size);
1525 
1526       brw_inst_set_dst_hstride(&devinfo, last_inst, inst[i].dst_stride);
1527 
1528       if (devinfo.verx10 >= 125) {
1529          EXPECT_EQ(inst[i].gfx125_expected_result, validate(p));
1530       } else {
1531          EXPECT_EQ(inst[i].expected_result, validate(p));
1532       }
1533 
1534       clear_instructions(p);
1535    }
1536 }
1537 
TEST_P(validation_test,mixed_float_align1_packed_fp16_dst_acc_read_offset_0)1538 TEST_P(validation_test, mixed_float_align1_packed_fp16_dst_acc_read_offset_0)
1539 {
1540    static const struct {
1541       enum brw_reg_type dst_type;
1542       enum brw_reg_type src0_type;
1543       enum brw_reg_type src1_type;
1544       unsigned dst_stride;
1545       bool read_acc;
1546       unsigned subnr;
1547       bool expected_result_bdw;
1548       bool expected_result_chv_skl;
1549       bool expected_result_gfx125;
1550    } inst[] = {
1551 #define INST(dst_type, src0_type, src1_type, dst_stride, read_acc, subnr,   \
1552              expected_result_bdw, expected_result_chv_skl,                  \
1553              expected_result_gfx125)                                        \
1554       {                                                                     \
1555          BRW_TYPE_##dst_type,                                               \
1556          BRW_TYPE_##src0_type,                                              \
1557          BRW_TYPE_##src1_type,                                              \
1558          BRW_HORIZONTAL_STRIDE_##dst_stride,                                \
1559          read_acc,                                                          \
1560          subnr,                                                             \
1561          expected_result_bdw,                                               \
1562          expected_result_chv_skl,                                           \
1563          expected_result_gfx125,                                            \
1564       }
1565 
1566       /* Destination is not packed */
1567       INST(HF, HF,  F, 2, true,  0, true, true, false),
1568       INST(HF, HF,  F, 2, true,  2, true, true, false),
1569       INST(HF, HF,  F, 2, true,  4, true, true, false),
1570       INST(HF, HF,  F, 2, true,  8, true, true, false),
1571       INST(HF, HF,  F, 2, true, 16, true, true, false),
1572 
1573       /* Destination is packed, we don't read acc */
1574       INST(HF, HF,  F, 1, false,  0, false, true, false),
1575       INST(HF, HF,  F, 1, false,  2, false, true, false),
1576       INST(HF, HF,  F, 1, false,  4, false, true, false),
1577       INST(HF, HF,  F, 1, false,  8, false, true, false),
1578       INST(HF, HF,  F, 1, false, 16, false, true, false),
1579 
1580       /* Destination is packed, we read acc */
1581       INST(HF, HF,  F, 1, true,  0, false, false, false),
1582       INST(HF, HF,  F, 1, true,  2, false, false, false),
1583       INST(HF, HF,  F, 1, true,  4, false, false, false),
1584       INST(HF, HF,  F, 1, true,  8, false, false, false),
1585       INST(HF, HF,  F, 1, true, 16, false, false, false),
1586 
1587 #undef INST
1588    };
1589 
1590    if (devinfo.ver < 8)
1591       return;
1592 
1593    for (unsigned i = 0; i < ARRAY_SIZE(inst); i++) {
1594       brw_ADD(p, retype(g0, inst[i].dst_type),
1595                  retype(inst[i].read_acc ? acc0 : g0, inst[i].src0_type),
1596                  retype(g0, inst[i].src1_type));
1597 
1598       brw_inst_set_dst_hstride(&devinfo, last_inst, inst[i].dst_stride);
1599 
1600       brw_inst_set_src0_da1_subreg_nr(&devinfo, last_inst, inst[i].subnr);
1601 
1602       if (devinfo.verx10 >= 125)
1603          EXPECT_EQ(inst[i].expected_result_gfx125, validate(p));
1604       else if (devinfo.platform == INTEL_PLATFORM_CHV || devinfo.ver >= 9)
1605          EXPECT_EQ(inst[i].expected_result_chv_skl, validate(p));
1606       else
1607          EXPECT_EQ(inst[i].expected_result_bdw, validate(p));
1608 
1609       clear_instructions(p);
1610    }
1611 }
1612 
TEST_P(validation_test,mixed_float_fp16_dest_with_acc)1613 TEST_P(validation_test, mixed_float_fp16_dest_with_acc)
1614 {
1615    static const struct {
1616       unsigned exec_size;
1617       unsigned opcode;
1618       enum brw_reg_type dst_type;
1619       enum brw_reg_type src0_type;
1620       enum brw_reg_type src1_type;
1621       unsigned dst_stride;
1622       bool read_acc;
1623       bool expected_result_bdw;
1624       bool expected_result_chv_skl;
1625       bool expected_result_gfx125;
1626    } inst[] = {
1627 #define INST(exec_size, opcode, dst_type, src0_type, src1_type,           \
1628              dst_stride, read_acc,expected_result_bdw,                    \
1629              expected_result_chv_skl, expected_result_gfx125)             \
1630       {                                                                   \
1631          BRW_EXECUTE_##exec_size,                                         \
1632          BRW_OPCODE_##opcode,                                             \
1633          BRW_TYPE_##dst_type,                                             \
1634          BRW_TYPE_##src0_type,                                            \
1635          BRW_TYPE_##src1_type,                                            \
1636          BRW_HORIZONTAL_STRIDE_##dst_stride,                              \
1637          read_acc,                                                        \
1638          expected_result_bdw,                                             \
1639          expected_result_chv_skl,                                         \
1640          expected_result_gfx125,                                          \
1641       }
1642 
1643       /* Packed fp16 dest with implicit acc needs hstride=2 */
1644       INST(8, MAC, HF, HF,  F, 1, false, false, false, false),
1645       INST(8, MAC, HF, HF,  F, 2, false, true,  true,  false),
1646       INST(8, MAC, HF,  F, HF, 1, false, false, false, false),
1647       INST(8, MAC, HF,  F, HF, 2, false, true,  true,  false),
1648 
1649       /* Packed fp16 dest with explicit acc needs hstride=2 */
1650       INST(8, ADD, HF, HF,  F, 1, true,  false, false, false),
1651       INST(8, ADD, HF, HF,  F, 2, true,  true,  true,  false),
1652       INST(8, ADD, HF,  F, HF, 1, true,  false, false, false),
1653       INST(8, ADD, HF,  F, HF, 2, true,  true,  true,  false),
1654 
1655       /* If destination is not fp16, restriction doesn't apply */
1656       INST(8, MAC,  F, HF,  F, 1, false, true, true, false),
1657       INST(8, MAC,  F, HF,  F, 2, false, true, true, false),
1658 
1659       /* If there is no implicit/explicit acc, restriction doesn't apply */
1660       INST(8, ADD, HF, HF,  F, 1, false, false, true, false),
1661       INST(8, ADD, HF, HF,  F, 2, false, true,  true, false),
1662       INST(8, ADD, HF,  F, HF, 1, false, false, true, false),
1663       INST(8, ADD, HF,  F, HF, 2, false, true,  true, false),
1664       INST(8, ADD,  F, HF,  F, 1, false, true,  true, false),
1665       INST(8, ADD,  F, HF,  F, 2, false, true,  true, false),
1666 
1667 #undef INST
1668    };
1669 
1670    if (devinfo.ver < 8)
1671       return;
1672 
1673    for (unsigned i = 0; i < ARRAY_SIZE(inst); i++) {
1674       if (inst[i].opcode == BRW_OPCODE_MAC) {
1675          brw_MAC(p, retype(g0, inst[i].dst_type),
1676                     retype(g0, inst[i].src0_type),
1677                     retype(g0, inst[i].src1_type));
1678       } else {
1679          assert(inst[i].opcode == BRW_OPCODE_ADD);
1680          brw_ADD(p, retype(g0, inst[i].dst_type),
1681                     retype(inst[i].read_acc ? acc0: g0, inst[i].src0_type),
1682                     retype(g0, inst[i].src1_type));
1683       }
1684 
1685       brw_inst_set_exec_size(&devinfo, last_inst, inst[i].exec_size);
1686 
1687       brw_inst_set_dst_hstride(&devinfo, last_inst, inst[i].dst_stride);
1688 
1689       if (devinfo.verx10 >= 125)
1690          EXPECT_EQ(inst[i].expected_result_gfx125, validate(p));
1691       else if (devinfo.platform == INTEL_PLATFORM_CHV || devinfo.ver >= 9)
1692          EXPECT_EQ(inst[i].expected_result_chv_skl, validate(p));
1693       else
1694          EXPECT_EQ(inst[i].expected_result_bdw, validate(p));
1695 
1696       clear_instructions(p);
1697    }
1698 }
1699 
TEST_P(validation_test,mixed_float_align1_math_strided_fp16_inputs)1700 TEST_P(validation_test, mixed_float_align1_math_strided_fp16_inputs)
1701 {
1702    static const struct {
1703       enum brw_reg_type dst_type;
1704       enum brw_reg_type src0_type;
1705       enum brw_reg_type src1_type;
1706       unsigned dst_stride;
1707       unsigned src0_stride;
1708       unsigned src1_stride;
1709       bool expected_result;
1710       bool expected_result_gfx125;
1711    } inst[] = {
1712 #define INST(dst_type, src0_type, src1_type,                              \
1713              dst_stride, src0_stride, src1_stride, expected_result,       \
1714              expected_result_125)                                         \
1715       {                                                                   \
1716          BRW_TYPE_##dst_type,                                             \
1717          BRW_TYPE_##src0_type,                                            \
1718          BRW_TYPE_##src1_type,                                            \
1719          BRW_HORIZONTAL_STRIDE_##dst_stride,                              \
1720          BRW_HORIZONTAL_STRIDE_##src0_stride,                             \
1721          BRW_HORIZONTAL_STRIDE_##src1_stride,                             \
1722          expected_result,                                                 \
1723          expected_result_125,                                             \
1724       }
1725 
1726       INST(HF, HF,  F, 2, 2, 1, true,  false),
1727       INST(HF,  F, HF, 2, 1, 2, true,  false),
1728       INST(HF,  F, HF, 1, 1, 2, true,  false),
1729       INST(HF,  F, HF, 2, 1, 1, false, false),
1730       INST(HF, HF,  F, 2, 1, 1, false, false),
1731       INST(HF, HF,  F, 1, 1, 1, false, false),
1732       INST(HF, HF,  F, 2, 1, 1, false, false),
1733       INST( F, HF,  F, 1, 1, 1, false, false),
1734       INST( F,  F, HF, 1, 1, 2, true,  false),
1735       INST( F, HF, HF, 1, 2, 1, false, false),
1736       INST( F, HF, HF, 1, 2, 2, true,  false),
1737 
1738 #undef INST
1739    };
1740 
1741    /* No half-float math in gfx8 */
1742    if (devinfo.ver < 9)
1743       return;
1744 
1745    for (unsigned i = 0; i < ARRAY_SIZE(inst); i++) {
1746       gfx6_math(p, retype(g0, inst[i].dst_type),
1747                    BRW_MATH_FUNCTION_POW,
1748                    retype(g0, inst[i].src0_type),
1749                    retype(g0, inst[i].src1_type));
1750 
1751       brw_inst_set_dst_hstride(&devinfo, last_inst, inst[i].dst_stride);
1752 
1753       brw_inst_set_src0_vstride(&devinfo, last_inst, BRW_VERTICAL_STRIDE_4);
1754       brw_inst_set_src0_width(&devinfo, last_inst, BRW_WIDTH_4);
1755       brw_inst_set_src0_hstride(&devinfo, last_inst, inst[i].src0_stride);
1756 
1757       brw_inst_set_src1_vstride(&devinfo, last_inst, BRW_VERTICAL_STRIDE_4);
1758       brw_inst_set_src1_width(&devinfo, last_inst, BRW_WIDTH_4);
1759       brw_inst_set_src1_hstride(&devinfo, last_inst, inst[i].src1_stride);
1760 
1761       if (devinfo.verx10 >= 125)
1762          EXPECT_EQ(inst[i].expected_result_gfx125, validate(p));
1763       else
1764          EXPECT_EQ(inst[i].expected_result, validate(p));
1765 
1766       clear_instructions(p);
1767    }
1768 }
1769 
TEST_P(validation_test,mixed_float_align1_packed_fp16_dst)1770 TEST_P(validation_test, mixed_float_align1_packed_fp16_dst)
1771 {
1772    static const struct {
1773       unsigned exec_size;
1774       enum brw_reg_type dst_type;
1775       enum brw_reg_type src0_type;
1776       enum brw_reg_type src1_type;
1777       unsigned dst_stride;
1778       unsigned dst_subnr;
1779       bool expected_result_bdw;
1780       bool expected_result_chv_skl;
1781       bool expected_result_gfx125;
1782    } inst[] = {
1783 #define INST(exec_size, dst_type, src0_type, src1_type, dst_stride, dst_subnr, \
1784              expected_result_bdw, expected_result_chv_skl,                     \
1785              expected_result_gfx125)                                           \
1786       {                                                                        \
1787          BRW_EXECUTE_##exec_size,                                              \
1788          BRW_TYPE_##dst_type,                                                  \
1789          BRW_TYPE_##src0_type,                                                 \
1790          BRW_TYPE_##src1_type,                                                 \
1791          BRW_HORIZONTAL_STRIDE_##dst_stride,                                   \
1792          dst_subnr,                                                            \
1793          expected_result_bdw,                                                  \
1794          expected_result_chv_skl,                                              \
1795          expected_result_gfx125                                                \
1796       }
1797 
1798       /* SIMD8 packed fp16 dst won't cross oword boundaries if region is
1799        * oword-aligned
1800        */
1801       INST( 8, HF, HF,  F, 1,  0, false, true,  false),
1802       INST( 8, HF, HF,  F, 1,  2, false, false, false),
1803       INST( 8, HF, HF,  F, 1,  4, false, false, false),
1804       INST( 8, HF, HF,  F, 1,  8, false, false, false),
1805       INST( 8, HF, HF,  F, 1, 16, false, true,  false),
1806 
1807       /* SIMD16 packed fp16 always crosses oword boundaries */
1808       INST(16, HF, HF,  F, 1,  0, false, false, false),
1809       INST(16, HF, HF,  F, 1,  2, false, false, false),
1810       INST(16, HF, HF,  F, 1,  4, false, false, false),
1811       INST(16, HF, HF,  F, 1,  8, false, false, false),
1812       INST(16, HF, HF,  F, 1, 16, false, false, false),
1813 
1814       /* If destination is not packed (or not fp16) we can cross oword
1815        * boundaries
1816        */
1817       INST( 8, HF, HF,  F, 2,  0, true, true, false),
1818       INST( 8,  F, HF,  F, 1,  0, true, true, false),
1819 
1820 #undef INST
1821    };
1822 
1823    if (devinfo.ver < 8)
1824       return;
1825 
1826    for (unsigned i = 0; i < ARRAY_SIZE(inst); i++) {
1827       brw_ADD(p, retype(g0, inst[i].dst_type),
1828                  retype(g0, inst[i].src0_type),
1829                  retype(g0, inst[i].src1_type));
1830 
1831       brw_inst_set_dst_hstride(&devinfo, last_inst, inst[i].dst_stride);
1832       brw_inst_set_dst_da1_subreg_nr(&devinfo, last_inst, inst[i].dst_subnr);
1833 
1834       brw_inst_set_src0_vstride(&devinfo, last_inst, BRW_VERTICAL_STRIDE_4);
1835       brw_inst_set_src0_width(&devinfo, last_inst, BRW_WIDTH_4);
1836       brw_inst_set_src0_hstride(&devinfo, last_inst, BRW_HORIZONTAL_STRIDE_1);
1837 
1838       brw_inst_set_src1_vstride(&devinfo, last_inst, BRW_VERTICAL_STRIDE_4);
1839       brw_inst_set_src1_width(&devinfo, last_inst, BRW_WIDTH_4);
1840       brw_inst_set_src1_hstride(&devinfo, last_inst, BRW_HORIZONTAL_STRIDE_1);
1841 
1842       brw_inst_set_exec_size(&devinfo, last_inst, inst[i].exec_size);
1843 
1844       if (devinfo.verx10 >= 125)
1845          EXPECT_EQ(inst[i].expected_result_gfx125, validate(p));
1846       else if (devinfo.platform == INTEL_PLATFORM_CHV || devinfo.ver >= 9)
1847          EXPECT_EQ(inst[i].expected_result_chv_skl, validate(p));
1848       else
1849          EXPECT_EQ(inst[i].expected_result_bdw, validate(p));
1850 
1851       clear_instructions(p);
1852    }
1853 }
1854 
TEST_P(validation_test,mixed_float_align16_packed_data)1855 TEST_P(validation_test, mixed_float_align16_packed_data)
1856 {
1857    static const struct {
1858       enum brw_reg_type dst_type;
1859       enum brw_reg_type src0_type;
1860       enum brw_reg_type src1_type;
1861       unsigned src0_vstride;
1862       unsigned src1_vstride;
1863       bool expected_result;
1864    } inst[] = {
1865 #define INST(dst_type, src0_type, src1_type,                              \
1866              src0_vstride, src1_vstride, expected_result)                 \
1867       {                                                                   \
1868          BRW_TYPE_##dst_type,                                             \
1869          BRW_TYPE_##src0_type,                                            \
1870          BRW_TYPE_##src1_type,                                            \
1871          BRW_VERTICAL_STRIDE_##src0_vstride,                              \
1872          BRW_VERTICAL_STRIDE_##src1_vstride,                              \
1873          expected_result,                                                 \
1874       }
1875 
1876       /* We only test with F destination because there is a restriction
1877        * by which F->HF conversions need to be DWord aligned but Align16 also
1878        * requires that destination horizontal stride is 1.
1879        */
1880       INST(F,  F, HF, 4, 4, true),
1881       INST(F,  F, HF, 2, 4, false),
1882       INST(F,  F, HF, 4, 2, false),
1883       INST(F,  F, HF, 0, 4, false),
1884       INST(F,  F, HF, 4, 0, false),
1885       INST(F, HF,  F, 4, 4, true),
1886       INST(F, HF,  F, 4, 2, false),
1887       INST(F, HF,  F, 2, 4, false),
1888       INST(F, HF,  F, 0, 4, false),
1889       INST(F, HF,  F, 4, 0, false),
1890 
1891 #undef INST
1892    };
1893 
1894    if (devinfo.ver < 8 || devinfo.ver >= 11)
1895       return;
1896 
1897    brw_set_default_access_mode(p, BRW_ALIGN_16);
1898 
1899    for (unsigned i = 0; i < ARRAY_SIZE(inst); i++) {
1900       brw_ADD(p, retype(g0, inst[i].dst_type),
1901                  retype(g0, inst[i].src0_type),
1902                  retype(g0, inst[i].src1_type));
1903 
1904       brw_inst_set_src0_vstride(&devinfo, last_inst, inst[i].src0_vstride);
1905       brw_inst_set_src1_vstride(&devinfo, last_inst, inst[i].src1_vstride);
1906 
1907       EXPECT_EQ(inst[i].expected_result, validate(p));
1908 
1909       clear_instructions(p);
1910    }
1911 }
1912 
TEST_P(validation_test,mixed_float_align16_no_simd16)1913 TEST_P(validation_test, mixed_float_align16_no_simd16)
1914 {
1915    static const struct {
1916       unsigned exec_size;
1917       enum brw_reg_type dst_type;
1918       enum brw_reg_type src0_type;
1919       enum brw_reg_type src1_type;
1920       bool expected_result;
1921    } inst[] = {
1922 #define INST(exec_size, dst_type, src0_type, src1_type, expected_result)  \
1923       {                                                                   \
1924          BRW_EXECUTE_##exec_size,                                         \
1925          BRW_TYPE_##dst_type,                                             \
1926          BRW_TYPE_##src0_type,                                            \
1927          BRW_TYPE_##src1_type,                                            \
1928          expected_result,                                                 \
1929       }
1930 
1931       /* We only test with F destination because there is a restriction
1932        * by which F->HF conversions need to be DWord aligned but Align16 also
1933        * requires that destination horizontal stride is 1.
1934        */
1935       INST( 8,  F,  F, HF, true),
1936       INST( 8,  F, HF,  F, true),
1937       INST( 8,  F,  F, HF, true),
1938       INST(16,  F,  F, HF, false),
1939       INST(16,  F, HF,  F, false),
1940       INST(16,  F,  F, HF, false),
1941 
1942 #undef INST
1943    };
1944 
1945    if (devinfo.ver < 8 || devinfo.ver >= 11)
1946       return;
1947 
1948    brw_set_default_access_mode(p, BRW_ALIGN_16);
1949 
1950    for (unsigned i = 0; i < ARRAY_SIZE(inst); i++) {
1951       brw_ADD(p, retype(g0, inst[i].dst_type),
1952                  retype(g0, inst[i].src0_type),
1953                  retype(g0, inst[i].src1_type));
1954 
1955       brw_inst_set_exec_size(&devinfo, last_inst, inst[i].exec_size);
1956 
1957       brw_inst_set_src0_vstride(&devinfo, last_inst, BRW_VERTICAL_STRIDE_4);
1958       brw_inst_set_src1_vstride(&devinfo, last_inst, BRW_VERTICAL_STRIDE_4);
1959 
1960       EXPECT_EQ(inst[i].expected_result, validate(p));
1961 
1962       clear_instructions(p);
1963    }
1964 }
1965 
TEST_P(validation_test,mixed_float_align16_no_acc_read)1966 TEST_P(validation_test, mixed_float_align16_no_acc_read)
1967 {
1968    static const struct {
1969       enum brw_reg_type dst_type;
1970       enum brw_reg_type src0_type;
1971       enum brw_reg_type src1_type;
1972       bool read_acc;
1973       bool expected_result;
1974    } inst[] = {
1975 #define INST(dst_type, src0_type, src1_type, read_acc, expected_result)   \
1976       {                                                                   \
1977          BRW_TYPE_##dst_type,                                             \
1978          BRW_TYPE_##src0_type,                                            \
1979          BRW_TYPE_##src1_type,                                            \
1980          read_acc,                                                        \
1981          expected_result,                                                 \
1982       }
1983 
1984       /* We only test with F destination because there is a restriction
1985        * by which F->HF conversions need to be DWord aligned but Align16 also
1986        * requires that destination horizontal stride is 1.
1987        */
1988       INST( F,  F, HF, false, true),
1989       INST( F,  F, HF, true,  false),
1990       INST( F, HF,  F, false, true),
1991       INST( F, HF,  F, true,  false),
1992 
1993 #undef INST
1994    };
1995 
1996    if (devinfo.ver < 8 || devinfo.ver >= 11)
1997       return;
1998 
1999    brw_set_default_access_mode(p, BRW_ALIGN_16);
2000 
2001    for (unsigned i = 0; i < ARRAY_SIZE(inst); i++) {
2002       brw_ADD(p, retype(g0, inst[i].dst_type),
2003                  retype(inst[i].read_acc ? acc0 : g0, inst[i].src0_type),
2004                  retype(g0, inst[i].src1_type));
2005 
2006       brw_inst_set_src0_vstride(&devinfo, last_inst, BRW_VERTICAL_STRIDE_4);
2007       brw_inst_set_src1_vstride(&devinfo, last_inst, BRW_VERTICAL_STRIDE_4);
2008 
2009       EXPECT_EQ(inst[i].expected_result, validate(p));
2010 
2011       clear_instructions(p);
2012    }
2013 }
2014 
TEST_P(validation_test,mixed_float_align16_math_packed_format)2015 TEST_P(validation_test, mixed_float_align16_math_packed_format)
2016 {
2017    static const struct {
2018       enum brw_reg_type dst_type;
2019       enum brw_reg_type src0_type;
2020       enum brw_reg_type src1_type;
2021       unsigned src0_vstride;
2022       unsigned src1_vstride;
2023       bool expected_result;
2024    } inst[] = {
2025 #define INST(dst_type, src0_type, src1_type,                              \
2026              src0_vstride, src1_vstride, expected_result)                 \
2027       {                                                                   \
2028          BRW_TYPE_##dst_type,                                             \
2029          BRW_TYPE_##src0_type,                                            \
2030          BRW_TYPE_##src1_type,                                            \
2031          BRW_VERTICAL_STRIDE_##src0_vstride,                              \
2032          BRW_VERTICAL_STRIDE_##src1_vstride,                              \
2033          expected_result,                                                 \
2034       }
2035 
2036       /* We only test with F destination because there is a restriction
2037        * by which F->HF conversions need to be DWord aligned but Align16 also
2038        * requires that destination horizontal stride is 1.
2039        */
2040       INST( F, HF,  F, 4, 0, false),
2041       INST( F, HF, HF, 4, 4, true),
2042       INST( F,  F, HF, 4, 0, false),
2043       INST( F,  F, HF, 2, 4, false),
2044       INST( F,  F, HF, 4, 2, false),
2045       INST( F, HF, HF, 0, 4, false),
2046 
2047 #undef INST
2048    };
2049 
2050    /* Align16 Math for mixed float mode is not supported in gfx8 */
2051    if (devinfo.ver < 9 || devinfo.ver >= 11)
2052       return;
2053 
2054    brw_set_default_access_mode(p, BRW_ALIGN_16);
2055 
2056    for (unsigned i = 0; i < ARRAY_SIZE(inst); i++) {
2057       gfx6_math(p, retype(g0, inst[i].dst_type),
2058                    BRW_MATH_FUNCTION_POW,
2059                    retype(g0, inst[i].src0_type),
2060                    retype(g0, inst[i].src1_type));
2061 
2062       brw_inst_set_src0_vstride(&devinfo, last_inst, inst[i].src0_vstride);
2063       brw_inst_set_src1_vstride(&devinfo, last_inst, inst[i].src1_vstride);
2064 
2065       EXPECT_EQ(inst[i].expected_result, validate(p));
2066 
2067       clear_instructions(p);
2068    }
2069 }
2070 
TEST_P(validation_test,vector_immediate_destination_alignment)2071 TEST_P(validation_test, vector_immediate_destination_alignment)
2072 {
2073    static const struct {
2074       enum brw_reg_type dst_type;
2075       enum brw_reg_type src_type;
2076       unsigned subnr;
2077       unsigned exec_size;
2078       bool expected_result;
2079    } move[] = {
2080       { BRW_TYPE_F, BRW_TYPE_VF,  0, BRW_EXECUTE_4, true  },
2081       { BRW_TYPE_F, BRW_TYPE_VF, 16, BRW_EXECUTE_4, true  },
2082       { BRW_TYPE_F, BRW_TYPE_VF,  1, BRW_EXECUTE_4, false },
2083 
2084       { BRW_TYPE_W, BRW_TYPE_V,   0, BRW_EXECUTE_8, true  },
2085       { BRW_TYPE_W, BRW_TYPE_V,  16, BRW_EXECUTE_8, true  },
2086       { BRW_TYPE_W, BRW_TYPE_V,   1, BRW_EXECUTE_8, false },
2087 
2088       { BRW_TYPE_W, BRW_TYPE_UV,  0, BRW_EXECUTE_8, true  },
2089       { BRW_TYPE_W, BRW_TYPE_UV, 16, BRW_EXECUTE_8, true  },
2090       { BRW_TYPE_W, BRW_TYPE_UV,  1, BRW_EXECUTE_8, false },
2091    };
2092 
2093    for (unsigned i = 0; i < ARRAY_SIZE(move); i++) {
2094       /* UV type is Gfx6+ */
2095       if (devinfo.ver < 6 &&
2096           move[i].src_type == BRW_TYPE_UV)
2097          continue;
2098 
2099       brw_MOV(p, retype(g0, move[i].dst_type), retype(zero, move[i].src_type));
2100       brw_inst_set_dst_da1_subreg_nr(&devinfo, last_inst, move[i].subnr);
2101       brw_inst_set_exec_size(&devinfo, last_inst, move[i].exec_size);
2102 
2103       EXPECT_EQ(move[i].expected_result, validate(p));
2104 
2105       clear_instructions(p);
2106    }
2107 }
2108 
TEST_P(validation_test,vector_immediate_destination_stride)2109 TEST_P(validation_test, vector_immediate_destination_stride)
2110 {
2111    static const struct {
2112       enum brw_reg_type dst_type;
2113       enum brw_reg_type src_type;
2114       unsigned stride;
2115       bool expected_result;
2116    } move[] = {
2117       { BRW_TYPE_F, BRW_TYPE_VF, BRW_HORIZONTAL_STRIDE_1, true  },
2118       { BRW_TYPE_F, BRW_TYPE_VF, BRW_HORIZONTAL_STRIDE_2, false },
2119       { BRW_TYPE_D, BRW_TYPE_VF, BRW_HORIZONTAL_STRIDE_1, true  },
2120       { BRW_TYPE_D, BRW_TYPE_VF, BRW_HORIZONTAL_STRIDE_2, false },
2121       { BRW_TYPE_W, BRW_TYPE_VF, BRW_HORIZONTAL_STRIDE_2, true  },
2122       { BRW_TYPE_B, BRW_TYPE_VF, BRW_HORIZONTAL_STRIDE_4, true  },
2123 
2124       { BRW_TYPE_W, BRW_TYPE_V,  BRW_HORIZONTAL_STRIDE_1, true  },
2125       { BRW_TYPE_W, BRW_TYPE_V,  BRW_HORIZONTAL_STRIDE_2, false },
2126       { BRW_TYPE_W, BRW_TYPE_V,  BRW_HORIZONTAL_STRIDE_4, false },
2127       { BRW_TYPE_B, BRW_TYPE_V,  BRW_HORIZONTAL_STRIDE_2, true  },
2128 
2129       { BRW_TYPE_W, BRW_TYPE_UV, BRW_HORIZONTAL_STRIDE_1, true  },
2130       { BRW_TYPE_W, BRW_TYPE_UV, BRW_HORIZONTAL_STRIDE_2, false },
2131       { BRW_TYPE_W, BRW_TYPE_UV, BRW_HORIZONTAL_STRIDE_4, false },
2132       { BRW_TYPE_B, BRW_TYPE_UV, BRW_HORIZONTAL_STRIDE_2, true  },
2133    };
2134 
2135    for (unsigned i = 0; i < ARRAY_SIZE(move); i++) {
2136       /* UV type is Gfx6+ */
2137       if (devinfo.ver < 6 &&
2138           move[i].src_type == BRW_TYPE_UV)
2139          continue;
2140 
2141       brw_MOV(p, retype(g0, move[i].dst_type), retype(zero, move[i].src_type));
2142       brw_inst_set_dst_hstride(&devinfo, last_inst, move[i].stride);
2143 
2144       EXPECT_EQ(move[i].expected_result, validate(p));
2145 
2146       clear_instructions(p);
2147    }
2148 }
2149 
TEST_P(validation_test,qword_low_power_align1_regioning_restrictions)2150 TEST_P(validation_test, qword_low_power_align1_regioning_restrictions)
2151 {
2152    static const struct {
2153       enum opcode opcode;
2154       unsigned exec_size;
2155 
2156       enum brw_reg_type dst_type;
2157       unsigned dst_subreg;
2158       unsigned dst_stride;
2159 
2160       enum brw_reg_type src_type;
2161       unsigned src_subreg;
2162       unsigned src_vstride;
2163       unsigned src_width;
2164       unsigned src_hstride;
2165 
2166       bool expected_result;
2167    } inst[] = {
2168 #define INST(opcode, exec_size, dst_type, dst_subreg, dst_stride, src_type,    \
2169              src_subreg, src_vstride, src_width, src_hstride, expected_result) \
2170       {                                                                        \
2171          BRW_OPCODE_##opcode,                                                  \
2172          BRW_EXECUTE_##exec_size,                                              \
2173          BRW_TYPE_##dst_type,                                                  \
2174          dst_subreg,                                                           \
2175          BRW_HORIZONTAL_STRIDE_##dst_stride,                                   \
2176          BRW_TYPE_##src_type,                                                  \
2177          src_subreg,                                                           \
2178          BRW_VERTICAL_STRIDE_##src_vstride,                                    \
2179          BRW_WIDTH_##src_width,                                                \
2180          BRW_HORIZONTAL_STRIDE_##src_hstride,                                  \
2181          expected_result,                                                      \
2182       }
2183 
2184       /* Some instruction that violate no restrictions, as a control */
2185       INST(MOV, 4, DF, 0, 1, DF, 0, 4, 4, 1, true ),
2186       INST(MOV, 4, Q,  0, 1, Q,  0, 4, 4, 1, true ),
2187       INST(MOV, 4, UQ, 0, 1, UQ, 0, 4, 4, 1, true ),
2188 
2189       INST(MOV, 4, DF, 0, 1, F,  0, 8, 4, 2, true ),
2190       INST(MOV, 4, Q,  0, 1, D,  0, 8, 4, 2, true ),
2191       INST(MOV, 4, UQ, 0, 1, UD, 0, 8, 4, 2, true ),
2192 
2193       INST(MOV, 4, F,  0, 2, DF, 0, 4, 4, 1, true ),
2194       INST(MOV, 4, D,  0, 2, Q,  0, 4, 4, 1, true ),
2195       INST(MOV, 4, UD, 0, 2, UQ, 0, 4, 4, 1, true ),
2196 
2197       INST(MUL, 8, D,  0, 2, D,  0, 8, 4, 2, true ),
2198       INST(MUL, 8, UD, 0, 2, UD, 0, 8, 4, 2, true ),
2199 
2200       /* Something with subreg nrs */
2201       INST(MOV, 2, DF, 8, 1, DF, 8, 2, 2, 1, true ),
2202       INST(MOV, 2, Q,  8, 1, Q,  8, 2, 2, 1, true ),
2203       INST(MOV, 2, UQ, 8, 1, UQ, 8, 2, 2, 1, true ),
2204 
2205       INST(MUL, 2, D,  4, 2, D,  4, 4, 2, 2, true ),
2206       INST(MUL, 2, UD, 4, 2, UD, 4, 4, 2, 2, true ),
2207 
2208       /* The PRMs say that for CHV, BXT:
2209        *
2210        *    When source or destination datatype is 64b or operation is integer
2211        *    DWord multiply, regioning in Align1 must follow these rules:
2212        *
2213        *    1. Source and Destination horizontal stride must be aligned to the
2214        *       same qword.
2215        */
2216       INST(MOV, 4, DF, 0, 2, DF, 0, 4, 4, 1, false),
2217       INST(MOV, 4, Q,  0, 2, Q,  0, 4, 4, 1, false),
2218       INST(MOV, 4, UQ, 0, 2, UQ, 0, 4, 4, 1, false),
2219 
2220       INST(MOV, 4, DF, 0, 2, F,  0, 8, 4, 2, false),
2221       INST(MOV, 4, Q,  0, 2, D,  0, 8, 4, 2, false),
2222       INST(MOV, 4, UQ, 0, 2, UD, 0, 8, 4, 2, false),
2223 
2224       INST(MOV, 4, DF, 0, 2, F,  0, 4, 4, 1, false),
2225       INST(MOV, 4, Q,  0, 2, D,  0, 4, 4, 1, false),
2226       INST(MOV, 4, UQ, 0, 2, UD, 0, 4, 4, 1, false),
2227 
2228       INST(MUL, 4, D,  0, 2, D,  0, 4, 4, 1, false),
2229       INST(MUL, 4, UD, 0, 2, UD, 0, 4, 4, 1, false),
2230 
2231       INST(MUL, 4, D,  0, 1, D,  0, 8, 4, 2, false),
2232       INST(MUL, 4, UD, 0, 1, UD, 0, 8, 4, 2, false),
2233 
2234       /*    2. Regioning must ensure Src.Vstride = Src.Width * Src.Hstride. */
2235       INST(MOV, 4, DF, 0, 1, DF, 0, 0, 2, 1, false),
2236       INST(MOV, 4, Q,  0, 1, Q,  0, 0, 2, 1, false),
2237       INST(MOV, 4, UQ, 0, 1, UQ, 0, 0, 2, 1, false),
2238 
2239       INST(MOV, 4, DF, 0, 1, F,  0, 0, 2, 2, false),
2240       INST(MOV, 4, Q,  0, 1, D,  0, 0, 2, 2, false),
2241       INST(MOV, 4, UQ, 0, 1, UD, 0, 0, 2, 2, false),
2242 
2243       INST(MOV, 8, F,  0, 2, DF, 0, 0, 2, 1, false),
2244       INST(MOV, 8, D,  0, 2, Q,  0, 0, 2, 1, false),
2245       INST(MOV, 8, UD, 0, 2, UQ, 0, 0, 2, 1, false),
2246 
2247       INST(MUL, 8, D,  0, 2, D,  0, 0, 4, 2, false),
2248       INST(MUL, 8, UD, 0, 2, UD, 0, 0, 4, 2, false),
2249 
2250       INST(MUL, 8, D,  0, 2, D,  0, 0, 4, 2, false),
2251       INST(MUL, 8, UD, 0, 2, UD, 0, 0, 4, 2, false),
2252 
2253       /*    3. Source and Destination offset must be the same, except the case
2254        *       of scalar source.
2255        */
2256       INST(MOV, 2, DF, 8, 1, DF, 0, 2, 2, 1, false),
2257       INST(MOV, 2, Q,  8, 1, Q,  0, 2, 2, 1, false),
2258       INST(MOV, 2, UQ, 8, 1, UQ, 0, 2, 2, 1, false),
2259 
2260       INST(MOV, 2, DF, 0, 1, DF, 8, 2, 2, 1, false),
2261       INST(MOV, 2, Q,  0, 1, Q,  8, 2, 2, 1, false),
2262       INST(MOV, 2, UQ, 0, 1, UQ, 8, 2, 2, 1, false),
2263 
2264       INST(MUL, 4, D,  4, 2, D,  0, 4, 2, 2, false),
2265       INST(MUL, 4, UD, 4, 2, UD, 0, 4, 2, 2, false),
2266 
2267       INST(MUL, 4, D,  0, 2, D,  4, 4, 2, 2, false),
2268       INST(MUL, 4, UD, 0, 2, UD, 4, 4, 2, 2, false),
2269 
2270       INST(MOV, 2, DF, 8, 1, DF, 0, 0, 1, 0, true ),
2271       INST(MOV, 2, Q,  8, 1, Q,  0, 0, 1, 0, true ),
2272       INST(MOV, 2, UQ, 8, 1, UQ, 0, 0, 1, 0, true ),
2273 
2274       INST(MOV, 2, DF, 8, 1, F,  4, 0, 1, 0, true ),
2275       INST(MOV, 2, Q,  8, 1, D,  4, 0, 1, 0, true ),
2276       INST(MOV, 2, UQ, 8, 1, UD, 4, 0, 1, 0, true ),
2277 
2278       INST(MUL, 4, D,  4, 1, D,  0, 0, 1, 0, true ),
2279       INST(MUL, 4, UD, 4, 1, UD, 0, 0, 1, 0, true ),
2280 
2281       INST(MUL, 4, D,  0, 1, D,  4, 0, 1, 0, true ),
2282       INST(MUL, 4, UD, 0, 1, UD, 4, 0, 1, 0, true ),
2283 
2284 #undef INST
2285    };
2286 
2287    /* These restrictions only apply to Gfx8+ */
2288    if (devinfo.ver < 8)
2289       return;
2290 
2291    /* NoDDChk/NoDDClr does not exist on Gfx12+ */
2292    if (devinfo.ver >= 12)
2293       return;
2294 
2295    for (unsigned i = 0; i < ARRAY_SIZE(inst); i++) {
2296       if (!devinfo.has_64bit_float &&
2297           (inst[i].dst_type == BRW_TYPE_DF ||
2298            inst[i].src_type == BRW_TYPE_DF))
2299          continue;
2300 
2301       if (!devinfo.has_64bit_int &&
2302           (inst[i].dst_type == BRW_TYPE_Q ||
2303            inst[i].dst_type == BRW_TYPE_UQ ||
2304            inst[i].src_type == BRW_TYPE_Q ||
2305            inst[i].src_type == BRW_TYPE_UQ))
2306          continue;
2307 
2308       if (inst[i].opcode == BRW_OPCODE_MOV) {
2309          brw_MOV(p, retype(g0, inst[i].dst_type),
2310                     retype(g0, inst[i].src_type));
2311       } else {
2312          assert(inst[i].opcode == BRW_OPCODE_MUL);
2313          brw_MUL(p, retype(g0, inst[i].dst_type),
2314                     retype(g0, inst[i].src_type),
2315                     retype(zero, inst[i].src_type));
2316       }
2317       brw_inst_set_exec_size(&devinfo, last_inst, inst[i].exec_size);
2318 
2319       brw_inst_set_dst_da1_subreg_nr(&devinfo, last_inst, inst[i].dst_subreg);
2320       brw_inst_set_src0_da1_subreg_nr(&devinfo, last_inst, inst[i].src_subreg);
2321 
2322       brw_inst_set_dst_hstride(&devinfo, last_inst, inst[i].dst_stride);
2323 
2324       brw_inst_set_src0_vstride(&devinfo, last_inst, inst[i].src_vstride);
2325       brw_inst_set_src0_width(&devinfo, last_inst, inst[i].src_width);
2326       brw_inst_set_src0_hstride(&devinfo, last_inst, inst[i].src_hstride);
2327 
2328       if (devinfo.platform == INTEL_PLATFORM_CHV ||
2329           intel_device_info_is_9lp(&devinfo)) {
2330          EXPECT_EQ(inst[i].expected_result, validate(p));
2331       } else {
2332          EXPECT_TRUE(validate(p));
2333       }
2334 
2335       clear_instructions(p);
2336    }
2337 }
2338 
TEST_P(validation_test,qword_low_power_no_indirect_addressing)2339 TEST_P(validation_test, qword_low_power_no_indirect_addressing)
2340 {
2341    static const struct {
2342       enum opcode opcode;
2343       unsigned exec_size;
2344 
2345       enum brw_reg_type dst_type;
2346       bool dst_is_indirect;
2347       unsigned dst_stride;
2348 
2349       enum brw_reg_type src_type;
2350       bool src_is_indirect;
2351       unsigned src_vstride;
2352       unsigned src_width;
2353       unsigned src_hstride;
2354 
2355       bool expected_result;
2356    } inst[] = {
2357 #define INST(opcode, exec_size, dst_type, dst_is_indirect, dst_stride,       \
2358              src_type, src_is_indirect, src_vstride, src_width, src_hstride, \
2359              expected_result)                                                \
2360       {                                                                      \
2361          BRW_OPCODE_##opcode,                                                \
2362          BRW_EXECUTE_##exec_size,                                            \
2363          BRW_TYPE_##dst_type,                                                \
2364          dst_is_indirect,                                                    \
2365          BRW_HORIZONTAL_STRIDE_##dst_stride,                                 \
2366          BRW_TYPE_##src_type,                                                \
2367          src_is_indirect,                                                    \
2368          BRW_VERTICAL_STRIDE_##src_vstride,                                  \
2369          BRW_WIDTH_##src_width,                                              \
2370          BRW_HORIZONTAL_STRIDE_##src_hstride,                                \
2371          expected_result,                                                    \
2372       }
2373 
2374       /* Some instruction that violate no restrictions, as a control */
2375       INST(MOV, 4, DF, 0, 1, DF, 0, 4, 4, 1, true ),
2376       INST(MOV, 4, Q,  0, 1, Q,  0, 4, 4, 1, true ),
2377       INST(MOV, 4, UQ, 0, 1, UQ, 0, 4, 4, 1, true ),
2378 
2379       INST(MUL, 8, D,  0, 2, D,  0, 8, 4, 2, true ),
2380       INST(MUL, 8, UD, 0, 2, UD, 0, 8, 4, 2, true ),
2381 
2382       INST(MOV, 4, F,  1, 1, F,  0, 4, 4, 1, true ),
2383       INST(MOV, 4, F,  0, 1, F,  1, 4, 4, 1, true ),
2384       INST(MOV, 4, F,  1, 1, F,  1, 4, 4, 1, true ),
2385 
2386       /* The PRMs say that for CHV, BXT:
2387        *
2388        *    When source or destination datatype is 64b or operation is integer
2389        *    DWord multiply, indirect addressing must not be used.
2390        */
2391       INST(MOV, 4, DF, 1, 1, DF, 0, 4, 4, 1, false),
2392       INST(MOV, 4, Q,  1, 1, Q,  0, 4, 4, 1, false),
2393       INST(MOV, 4, UQ, 1, 1, UQ, 0, 4, 4, 1, false),
2394 
2395       INST(MOV, 4, DF, 0, 1, DF, 1, 4, 4, 1, false),
2396       INST(MOV, 4, Q,  0, 1, Q,  1, 4, 4, 1, false),
2397       INST(MOV, 4, UQ, 0, 1, UQ, 1, 4, 4, 1, false),
2398 
2399       INST(MOV, 4, DF, 1, 1, F,  0, 8, 4, 2, false),
2400       INST(MOV, 4, Q,  1, 1, D,  0, 8, 4, 2, false),
2401       INST(MOV, 4, UQ, 1, 1, UD, 0, 8, 4, 2, false),
2402 
2403       INST(MOV, 4, DF, 0, 1, F,  1, 8, 4, 2, false),
2404       INST(MOV, 4, Q,  0, 1, D,  1, 8, 4, 2, false),
2405       INST(MOV, 4, UQ, 0, 1, UD, 1, 8, 4, 2, false),
2406 
2407       INST(MOV, 4, F,  1, 2, DF, 0, 4, 4, 1, false),
2408       INST(MOV, 4, D,  1, 2, Q,  0, 4, 4, 1, false),
2409       INST(MOV, 4, UD, 1, 2, UQ, 0, 4, 4, 1, false),
2410 
2411       INST(MOV, 4, F,  0, 2, DF, 1, 4, 4, 1, false),
2412       INST(MOV, 4, D,  0, 2, Q,  1, 4, 4, 1, false),
2413       INST(MOV, 4, UD, 0, 2, UQ, 1, 4, 4, 1, false),
2414 
2415       INST(MUL, 8, D,  1, 2, D,  0, 8, 4, 2, false),
2416       INST(MUL, 8, UD, 1, 2, UD, 0, 8, 4, 2, false),
2417 
2418       INST(MUL, 8, D,  0, 2, D,  1, 8, 4, 2, false),
2419       INST(MUL, 8, UD, 0, 2, UD, 1, 8, 4, 2, false),
2420 
2421 #undef INST
2422    };
2423 
2424    /* These restrictions only apply to Gfx8+ */
2425    if (devinfo.ver < 8)
2426       return;
2427 
2428    for (unsigned i = 0; i < ARRAY_SIZE(inst); i++) {
2429       if (!devinfo.has_64bit_float &&
2430           (inst[i].dst_type == BRW_TYPE_DF ||
2431            inst[i].src_type == BRW_TYPE_DF))
2432          continue;
2433 
2434       if (!devinfo.has_64bit_int &&
2435           (inst[i].dst_type == BRW_TYPE_Q ||
2436            inst[i].dst_type == BRW_TYPE_UQ ||
2437            inst[i].src_type == BRW_TYPE_Q ||
2438            inst[i].src_type == BRW_TYPE_UQ))
2439          continue;
2440 
2441       if (inst[i].opcode == BRW_OPCODE_MOV) {
2442          brw_MOV(p, retype(g0, inst[i].dst_type),
2443                     retype(g0, inst[i].src_type));
2444       } else {
2445          assert(inst[i].opcode == BRW_OPCODE_MUL);
2446          brw_MUL(p, retype(g0, inst[i].dst_type),
2447                     retype(g0, inst[i].src_type),
2448                     retype(zero, inst[i].src_type));
2449       }
2450       brw_inst_set_exec_size(&devinfo, last_inst, inst[i].exec_size);
2451 
2452       brw_inst_set_dst_address_mode(&devinfo, last_inst, inst[i].dst_is_indirect);
2453       brw_inst_set_src0_address_mode(&devinfo, last_inst, inst[i].src_is_indirect);
2454 
2455       brw_inst_set_dst_hstride(&devinfo, last_inst, inst[i].dst_stride);
2456 
2457       brw_inst_set_src0_vstride(&devinfo, last_inst, inst[i].src_vstride);
2458       brw_inst_set_src0_width(&devinfo, last_inst, inst[i].src_width);
2459       brw_inst_set_src0_hstride(&devinfo, last_inst, inst[i].src_hstride);
2460 
2461       if (devinfo.platform == INTEL_PLATFORM_CHV ||
2462           intel_device_info_is_9lp(&devinfo)) {
2463          EXPECT_EQ(inst[i].expected_result, validate(p));
2464       } else {
2465          EXPECT_TRUE(validate(p));
2466       }
2467 
2468       clear_instructions(p);
2469    }
2470 }
2471 
TEST_P(validation_test,qword_low_power_no_64bit_arf)2472 TEST_P(validation_test, qword_low_power_no_64bit_arf)
2473 {
2474    static const struct {
2475       enum opcode opcode;
2476       unsigned exec_size;
2477 
2478       struct brw_reg dst;
2479       enum brw_reg_type dst_type;
2480       unsigned dst_stride;
2481 
2482       struct brw_reg src;
2483       enum brw_reg_type src_type;
2484       unsigned src_vstride;
2485       unsigned src_width;
2486       unsigned src_hstride;
2487 
2488       bool acc_wr;
2489       bool expected_result;
2490    } inst[] = {
2491 #define INST(opcode, exec_size, dst, dst_type, dst_stride,        \
2492              src, src_type, src_vstride, src_width, src_hstride,  \
2493              acc_wr, expected_result)                             \
2494       {                                                           \
2495          BRW_OPCODE_##opcode,                                     \
2496          BRW_EXECUTE_##exec_size,                                 \
2497          dst,                                                     \
2498          BRW_TYPE_##dst_type,                                     \
2499          BRW_HORIZONTAL_STRIDE_##dst_stride,                      \
2500          src,                                                     \
2501          BRW_TYPE_##src_type,                                     \
2502          BRW_VERTICAL_STRIDE_##src_vstride,                       \
2503          BRW_WIDTH_##src_width,                                   \
2504          BRW_HORIZONTAL_STRIDE_##src_hstride,                     \
2505          acc_wr,                                                  \
2506          expected_result,                                         \
2507       }
2508 
2509       /* Some instruction that violate no restrictions, as a control */
2510       INST(MOV, 4, g0,   DF, 1, g0,   F,  4, 2, 2, 0, true ),
2511       INST(MOV, 4, g0,   F,  2, g0,   DF, 4, 4, 1, 0, true ),
2512 
2513       INST(MOV, 4, g0,   Q,  1, g0,   D,  4, 2, 2, 0, true ),
2514       INST(MOV, 4, g0,   D,  2, g0,   Q,  4, 4, 1, 0, true ),
2515 
2516       INST(MOV, 4, g0,   UQ, 1, g0,   UD, 4, 2, 2, 0, true ),
2517       INST(MOV, 4, g0,   UD, 2, g0,   UQ, 4, 4, 1, 0, true ),
2518 
2519       INST(MOV, 4, null, F,  1, g0,   F,  4, 4, 1, 0, true ),
2520       INST(MOV, 4, acc0, F,  1, g0,   F,  4, 4, 1, 0, true ),
2521       INST(MOV, 4, g0,   F,  1, acc0, F,  4, 4, 1, 0, true ),
2522 
2523       INST(MOV, 4, null, D,  1, g0,   D,  4, 4, 1, 0, true ),
2524       INST(MOV, 4, acc0, D,  1, g0,   D,  4, 4, 1, 0, true ),
2525       INST(MOV, 4, g0,   D,  1, acc0, D,  4, 4, 1, 0, true ),
2526 
2527       INST(MOV, 4, null, UD, 1, g0,   UD, 4, 4, 1, 0, true ),
2528       INST(MOV, 4, acc0, UD, 1, g0,   UD, 4, 4, 1, 0, true ),
2529       INST(MOV, 4, g0,   UD, 1, acc0, UD, 4, 4, 1, 0, true ),
2530 
2531       INST(MUL, 4, g0,   D,  2, g0,   D,  4, 2, 2, 0, true ),
2532       INST(MUL, 4, g0,   UD, 2, g0,   UD, 4, 2, 2, 0, true ),
2533 
2534       /* The PRMs say that for CHV, BXT:
2535        *
2536        *    ARF registers must never be used with 64b datatype or when
2537        *    operation is integer DWord multiply.
2538        */
2539       INST(MOV, 4, acc0, DF, 1, g0,   F,  4, 2, 2, 0, false),
2540       INST(MOV, 4, g0,   DF, 1, acc0, F,  4, 2, 2, 0, false),
2541 
2542       INST(MOV, 4, acc0, Q,  1, g0,   D,  4, 2, 2, 0, false),
2543       INST(MOV, 4, g0,   Q,  1, acc0, D,  4, 2, 2, 0, false),
2544 
2545       INST(MOV, 4, acc0, UQ, 1, g0,   UD, 4, 2, 2, 0, false),
2546       INST(MOV, 4, g0,   UQ, 1, acc0, UD, 4, 2, 2, 0, false),
2547 
2548       INST(MOV, 4, acc0, F,  2, g0,   DF, 4, 4, 1, 0, false),
2549       INST(MOV, 4, g0,   F,  2, acc0, DF, 4, 4, 1, 0, false),
2550 
2551       INST(MOV, 4, acc0, D,  2, g0,   Q,  4, 4, 1, 0, false),
2552       INST(MOV, 4, g0,   D,  2, acc0, Q,  4, 4, 1, 0, false),
2553 
2554       INST(MOV, 4, acc0, UD, 2, g0,   UQ, 4, 4, 1, 0, false),
2555       INST(MOV, 4, g0,   UD, 2, acc0, UQ, 4, 4, 1, 0, false),
2556 
2557       INST(MUL, 4, acc0, D,  2, g0,   D,  4, 2, 2, 0, false),
2558       INST(MUL, 4, acc0, UD, 2, g0,   UD, 4, 2, 2, 0, false),
2559       /* MUL cannot have integer accumulator sources, so don't test that */
2560 
2561       /* We assume that the restriction does not apply to the null register */
2562       INST(MOV, 4, null, DF, 1, g0,   F,  4, 2, 2, 0, true ),
2563       INST(MOV, 4, null, Q,  1, g0,   D,  4, 2, 2, 0, true ),
2564       INST(MOV, 4, null, UQ, 1, g0,   UD, 4, 2, 2, 0, true ),
2565 
2566       /* Check implicit accumulator write control */
2567       INST(MOV, 4, null, DF, 1, g0,   F,  4, 2, 2, 1, false),
2568       INST(MUL, 4, null, DF, 1, g0,   F,  4, 2, 2, 1, false),
2569 
2570 #undef INST
2571    };
2572 
2573    /* These restrictions only apply to Gfx8+ */
2574    if (devinfo.ver < 8)
2575       return;
2576 
2577    for (unsigned i = 0; i < ARRAY_SIZE(inst); i++) {
2578       if (!devinfo.has_64bit_float &&
2579           (inst[i].dst_type == BRW_TYPE_DF ||
2580            inst[i].src_type == BRW_TYPE_DF))
2581          continue;
2582 
2583       if (!devinfo.has_64bit_int &&
2584           (inst[i].dst_type == BRW_TYPE_Q ||
2585            inst[i].dst_type == BRW_TYPE_UQ ||
2586            inst[i].src_type == BRW_TYPE_Q ||
2587            inst[i].src_type == BRW_TYPE_UQ))
2588          continue;
2589 
2590       if (inst[i].opcode == BRW_OPCODE_MOV) {
2591          brw_MOV(p, retype(inst[i].dst, inst[i].dst_type),
2592                     retype(inst[i].src, inst[i].src_type));
2593       } else {
2594          assert(inst[i].opcode == BRW_OPCODE_MUL);
2595          brw_MUL(p, retype(inst[i].dst, inst[i].dst_type),
2596                     retype(inst[i].src, inst[i].src_type),
2597                     retype(zero, inst[i].src_type));
2598          brw_inst_set_opcode(&isa, last_inst, inst[i].opcode);
2599       }
2600       brw_inst_set_exec_size(&devinfo, last_inst, inst[i].exec_size);
2601       brw_inst_set_acc_wr_control(&devinfo, last_inst, inst[i].acc_wr);
2602 
2603       brw_inst_set_dst_hstride(&devinfo, last_inst, inst[i].dst_stride);
2604 
2605       brw_inst_set_src0_vstride(&devinfo, last_inst, inst[i].src_vstride);
2606       brw_inst_set_src0_width(&devinfo, last_inst, inst[i].src_width);
2607       brw_inst_set_src0_hstride(&devinfo, last_inst, inst[i].src_hstride);
2608 
2609       /* Note: The Broadwell PRM also lists the restriction that destination
2610        * of DWord multiplication cannot be the accumulator.
2611        */
2612       if (devinfo.platform == INTEL_PLATFORM_CHV ||
2613           intel_device_info_is_9lp(&devinfo) ||
2614           (devinfo.ver == 8 &&
2615            inst[i].opcode == BRW_OPCODE_MUL &&
2616            brw_inst_dst_reg_file(&devinfo, last_inst) == ARF &&
2617            brw_inst_dst_da_reg_nr(&devinfo, last_inst) != BRW_ARF_NULL)) {
2618          EXPECT_EQ(inst[i].expected_result, validate(p));
2619       } else {
2620          EXPECT_TRUE(validate(p));
2621       }
2622 
2623       clear_instructions(p);
2624    }
2625 
2626    if (!devinfo.has_64bit_float)
2627       return;
2628 
2629    /* MAC implicitly reads the accumulator */
2630    brw_MAC(p, retype(g0, BRW_TYPE_DF),
2631               retype(stride(g0, 4, 4, 1), BRW_TYPE_DF),
2632               retype(stride(g0, 4, 4, 1), BRW_TYPE_DF));
2633    if (devinfo.platform == INTEL_PLATFORM_CHV ||
2634        intel_device_info_is_9lp(&devinfo)) {
2635       EXPECT_FALSE(validate(p));
2636    } else {
2637       EXPECT_TRUE(validate(p));
2638    }
2639 }
2640 
TEST_P(validation_test,align16_64_bit_integer)2641 TEST_P(validation_test, align16_64_bit_integer)
2642 {
2643    static const struct {
2644       enum opcode opcode;
2645       unsigned exec_size;
2646 
2647       enum brw_reg_type dst_type;
2648       enum brw_reg_type src_type;
2649 
2650       bool expected_result;
2651    } inst[] = {
2652 #define INST(opcode, exec_size, dst_type, src_type, expected_result)  \
2653       {                                                               \
2654          BRW_OPCODE_##opcode,                                         \
2655          BRW_EXECUTE_##exec_size,                                     \
2656          BRW_TYPE_##dst_type,                                         \
2657          BRW_TYPE_##src_type,                                         \
2658          expected_result,                                             \
2659       }
2660 
2661       /* Some instruction that violate no restrictions, as a control */
2662       INST(MOV, 2, Q,  D,  true ),
2663       INST(MOV, 2, UQ, UD, true ),
2664       INST(MOV, 2, DF, F,  true ),
2665 
2666       INST(ADD, 2, Q,  D,  true ),
2667       INST(ADD, 2, UQ, UD, true ),
2668       INST(ADD, 2, DF, F,  true ),
2669 
2670       /* The PRMs say that for BDW, SKL:
2671        *
2672        *    If Align16 is required for an operation with QW destination and non-QW
2673        *    source datatypes, the execution size cannot exceed 2.
2674        */
2675 
2676       INST(MOV, 4, Q,  D,  false),
2677       INST(MOV, 4, UQ, UD, false),
2678       INST(MOV, 4, DF, F,  false),
2679 
2680       INST(ADD, 4, Q,  D,  false),
2681       INST(ADD, 4, UQ, UD, false),
2682       INST(ADD, 4, DF, F,  false),
2683 
2684 #undef INST
2685    };
2686 
2687    /* 64-bit integer types exist on Gfx8+ */
2688    if (devinfo.ver < 8)
2689       return;
2690 
2691    /* Align16 does not exist on Gfx11+ */
2692    if (devinfo.ver >= 11)
2693       return;
2694 
2695    brw_set_default_access_mode(p, BRW_ALIGN_16);
2696 
2697    for (unsigned i = 0; i < ARRAY_SIZE(inst); i++) {
2698       if (inst[i].opcode == BRW_OPCODE_MOV) {
2699          brw_MOV(p, retype(g0, inst[i].dst_type),
2700                     retype(g0, inst[i].src_type));
2701       } else {
2702          assert(inst[i].opcode == BRW_OPCODE_ADD);
2703          brw_ADD(p, retype(g0, inst[i].dst_type),
2704                     retype(g0, inst[i].src_type),
2705                     retype(g0, inst[i].src_type));
2706       }
2707       brw_inst_set_exec_size(&devinfo, last_inst, inst[i].exec_size);
2708 
2709       EXPECT_EQ(inst[i].expected_result, validate(p));
2710 
2711       clear_instructions(p);
2712    }
2713 }
2714 
TEST_P(validation_test,qword_low_power_no_depctrl)2715 TEST_P(validation_test, qword_low_power_no_depctrl)
2716 {
2717    static const struct {
2718       enum opcode opcode;
2719       unsigned exec_size;
2720 
2721       enum brw_reg_type dst_type;
2722       unsigned dst_stride;
2723 
2724       enum brw_reg_type src_type;
2725       unsigned src_vstride;
2726       unsigned src_width;
2727       unsigned src_hstride;
2728 
2729       bool no_dd_check;
2730       bool no_dd_clear;
2731 
2732       bool expected_result;
2733    } inst[] = {
2734 #define INST(opcode, exec_size, dst_type, dst_stride,        \
2735              src_type, src_vstride, src_width, src_hstride,  \
2736              no_dd_check, no_dd_clear, expected_result)      \
2737       {                                                      \
2738          BRW_OPCODE_##opcode,                                \
2739          BRW_EXECUTE_##exec_size,                            \
2740          BRW_TYPE_##dst_type,                                \
2741          BRW_HORIZONTAL_STRIDE_##dst_stride,                 \
2742          BRW_TYPE_##src_type,                                \
2743          BRW_VERTICAL_STRIDE_##src_vstride,                  \
2744          BRW_WIDTH_##src_width,                              \
2745          BRW_HORIZONTAL_STRIDE_##src_hstride,                \
2746          no_dd_check,                                        \
2747          no_dd_clear,                                        \
2748          expected_result,                                    \
2749       }
2750 
2751       /* Some instruction that violate no restrictions, as a control */
2752       INST(MOV, 4, DF, 1, F,  8, 4, 2, 0, 0, true ),
2753       INST(MOV, 4, Q,  1, D,  8, 4, 2, 0, 0, true ),
2754       INST(MOV, 4, UQ, 1, UD, 8, 4, 2, 0, 0, true ),
2755 
2756       INST(MOV, 4, F,  2, DF, 4, 4, 1, 0, 0, true ),
2757       INST(MOV, 4, D,  2, Q,  4, 4, 1, 0, 0, true ),
2758       INST(MOV, 4, UD, 2, UQ, 4, 4, 1, 0, 0, true ),
2759 
2760       INST(MUL, 8, D,  2, D,  8, 4, 2, 0, 0, true ),
2761       INST(MUL, 8, UD, 2, UD, 8, 4, 2, 0, 0, true ),
2762 
2763       INST(MOV, 4, F,  1, F,  4, 4, 1, 1, 1, true ),
2764 
2765       /* The PRMs say that for CHV, BXT:
2766        *
2767        *    When source or destination datatype is 64b or operation is integer
2768        *    DWord multiply, DepCtrl must not be used.
2769        */
2770       INST(MOV, 4, DF, 1, F,  8, 4, 2, 1, 0, false),
2771       INST(MOV, 4, Q,  1, D,  8, 4, 2, 1, 0, false),
2772       INST(MOV, 4, UQ, 1, UD, 8, 4, 2, 1, 0, false),
2773 
2774       INST(MOV, 4, F,  2, DF, 4, 4, 1, 1, 0, false),
2775       INST(MOV, 4, D,  2, Q,  4, 4, 1, 1, 0, false),
2776       INST(MOV, 4, UD, 2, UQ, 4, 4, 1, 1, 0, false),
2777 
2778       INST(MOV, 4, DF, 1, F,  8, 4, 2, 0, 1, false),
2779       INST(MOV, 4, Q,  1, D,  8, 4, 2, 0, 1, false),
2780       INST(MOV, 4, UQ, 1, UD, 8, 4, 2, 0, 1, false),
2781 
2782       INST(MOV, 4, F,  2, DF, 4, 4, 1, 0, 1, false),
2783       INST(MOV, 4, D,  2, Q,  4, 4, 1, 0, 1, false),
2784       INST(MOV, 4, UD, 2, UQ, 4, 4, 1, 0, 1, false),
2785 
2786       INST(MUL, 8, D,  2, D,  8, 4, 2, 1, 0, false),
2787       INST(MUL, 8, UD, 2, UD, 8, 4, 2, 1, 0, false),
2788 
2789       INST(MUL, 8, D,  2, D,  8, 4, 2, 0, 1, false),
2790       INST(MUL, 8, UD, 2, UD, 8, 4, 2, 0, 1, false),
2791 
2792 #undef INST
2793    };
2794 
2795    /* These restrictions only apply to Gfx8+ */
2796    if (devinfo.ver < 8)
2797       return;
2798 
2799    /* NoDDChk/NoDDClr does not exist on Gfx12+ */
2800    if (devinfo.ver >= 12)
2801       return;
2802 
2803    for (unsigned i = 0; i < ARRAY_SIZE(inst); i++) {
2804       if (!devinfo.has_64bit_float &&
2805           (inst[i].dst_type == BRW_TYPE_DF ||
2806            inst[i].src_type == BRW_TYPE_DF))
2807          continue;
2808 
2809       if (!devinfo.has_64bit_int &&
2810           (inst[i].dst_type == BRW_TYPE_Q ||
2811            inst[i].dst_type == BRW_TYPE_UQ ||
2812            inst[i].src_type == BRW_TYPE_Q ||
2813            inst[i].src_type == BRW_TYPE_UQ))
2814          continue;
2815 
2816       if (inst[i].opcode == BRW_OPCODE_MOV) {
2817          brw_MOV(p, retype(g0, inst[i].dst_type),
2818                     retype(g0, inst[i].src_type));
2819       } else {
2820          assert(inst[i].opcode == BRW_OPCODE_MUL);
2821          brw_MUL(p, retype(g0, inst[i].dst_type),
2822                     retype(g0, inst[i].src_type),
2823                     retype(zero, inst[i].src_type));
2824       }
2825       brw_inst_set_exec_size(&devinfo, last_inst, inst[i].exec_size);
2826 
2827       brw_inst_set_dst_hstride(&devinfo, last_inst, inst[i].dst_stride);
2828 
2829       brw_inst_set_src0_vstride(&devinfo, last_inst, inst[i].src_vstride);
2830       brw_inst_set_src0_width(&devinfo, last_inst, inst[i].src_width);
2831       brw_inst_set_src0_hstride(&devinfo, last_inst, inst[i].src_hstride);
2832 
2833       brw_inst_set_no_dd_check(&devinfo, last_inst, inst[i].no_dd_check);
2834       brw_inst_set_no_dd_clear(&devinfo, last_inst, inst[i].no_dd_clear);
2835 
2836       if (devinfo.platform == INTEL_PLATFORM_CHV ||
2837           intel_device_info_is_9lp(&devinfo)) {
2838          EXPECT_EQ(inst[i].expected_result, validate(p));
2839       } else {
2840          EXPECT_TRUE(validate(p));
2841       }
2842 
2843       clear_instructions(p);
2844    }
2845 }
2846 
TEST_P(validation_test,gfx11_no_byte_src_1_2)2847 TEST_P(validation_test, gfx11_no_byte_src_1_2)
2848 {
2849    static const struct {
2850       enum opcode opcode;
2851       unsigned access_mode;
2852 
2853       enum brw_reg_type dst_type;
2854       struct {
2855          enum brw_reg_type type;
2856          unsigned vstride;
2857          unsigned width;
2858          unsigned hstride;
2859       } srcs[3];
2860 
2861       int  gfx_ver;
2862       bool expected_result;
2863    } inst[] = {
2864 #define INST(opcode, access_mode, dst_type,                             \
2865              src0_type, src0_vstride, src0_width, src0_hstride,         \
2866              src1_type, src1_vstride, src1_width, src1_hstride,         \
2867              src2_type,                                                 \
2868              gfx_ver, expected_result)                                  \
2869       {                                                                 \
2870          BRW_OPCODE_##opcode,                                           \
2871          BRW_ALIGN_##access_mode,                                       \
2872          BRW_TYPE_##dst_type,                                           \
2873          {                                                              \
2874             {                                                           \
2875                BRW_TYPE_##src0_type,                                    \
2876                BRW_VERTICAL_STRIDE_##src0_vstride,                      \
2877                BRW_WIDTH_##src0_width,                                  \
2878                BRW_HORIZONTAL_STRIDE_##src0_hstride,                    \
2879             },                                                          \
2880             {                                                           \
2881                BRW_TYPE_##src1_type,                                    \
2882                BRW_VERTICAL_STRIDE_##src1_vstride,                      \
2883                BRW_WIDTH_##src1_width,                                  \
2884                BRW_HORIZONTAL_STRIDE_##src1_hstride,                    \
2885             },                                                          \
2886             {                                                           \
2887                BRW_TYPE_##src2_type,                                    \
2888             },                                                          \
2889          },                                                             \
2890          gfx_ver,                                                       \
2891          expected_result,                                               \
2892       }
2893 
2894       /* Passes on < 11 */
2895       INST(MOV, 16,  F, B, 2, 4, 0, UD, 0, 4, 0,  D,  8, true ),
2896       INST(ADD, 16, UD, F, 0, 4, 0, UB, 0, 1, 0,  D,  7, true ),
2897       INST(MAD, 16,  D, B, 0, 4, 0, UB, 0, 1, 0,  B, 10, true ),
2898 
2899       /* Fails on 11+ */
2900       INST(MAD,  1, UB, W, 1, 1, 0,  D, 0, 4, 0,  B, 11, false ),
2901       INST(MAD,  1, UB, W, 1, 1, 1, UB, 1, 1, 0,  W, 11, false ),
2902       INST(ADD,  1,  W, W, 1, 4, 1,  B, 1, 1, 0,  D, 11, false ),
2903 
2904       /* Passes on 11+ */
2905       INST(MOV,  1,  W, B, 8, 8, 1,  D, 8, 8, 1,  D, 11, true ),
2906       INST(ADD,  1, UD, B, 8, 8, 1,  W, 8, 8, 1,  D, 11, true ),
2907       INST(MAD,  1,  B, B, 0, 1, 0,  D, 0, 4, 0,  W, 11, true ),
2908 
2909 #undef INST
2910    };
2911 
2912 
2913    for (unsigned i = 0; i < ARRAY_SIZE(inst); i++) {
2914       /* Skip instruction not meant for this gfx_ver. */
2915       if (devinfo.ver != inst[i].gfx_ver)
2916          continue;
2917 
2918       brw_push_insn_state(p);
2919 
2920       brw_set_default_exec_size(p, BRW_EXECUTE_8);
2921       brw_set_default_access_mode(p, inst[i].access_mode);
2922 
2923       switch (inst[i].opcode) {
2924       case BRW_OPCODE_MOV:
2925          brw_MOV(p, retype(g0, inst[i].dst_type),
2926                     retype(g0, inst[i].srcs[0].type));
2927          brw_inst_set_src0_vstride(&devinfo, last_inst, inst[i].srcs[0].vstride);
2928          brw_inst_set_src0_hstride(&devinfo, last_inst, inst[i].srcs[0].hstride);
2929          break;
2930       case BRW_OPCODE_ADD:
2931          brw_ADD(p, retype(g0, inst[i].dst_type),
2932                     retype(g0, inst[i].srcs[0].type),
2933                     retype(g0, inst[i].srcs[1].type));
2934          brw_inst_set_src0_vstride(&devinfo, last_inst, inst[i].srcs[0].vstride);
2935          brw_inst_set_src0_width(&devinfo, last_inst, inst[i].srcs[0].width);
2936          brw_inst_set_src0_hstride(&devinfo, last_inst, inst[i].srcs[0].hstride);
2937          brw_inst_set_src1_vstride(&devinfo, last_inst, inst[i].srcs[1].vstride);
2938          brw_inst_set_src1_width(&devinfo, last_inst, inst[i].srcs[1].width);
2939          brw_inst_set_src1_hstride(&devinfo, last_inst, inst[i].srcs[1].hstride);
2940          break;
2941       case BRW_OPCODE_MAD:
2942          brw_MAD(p, retype(g0, inst[i].dst_type),
2943                     retype(g0, inst[i].srcs[0].type),
2944                     retype(g0, inst[i].srcs[1].type),
2945                     retype(g0, inst[i].srcs[2].type));
2946          brw_inst_set_3src_a1_src0_vstride(&devinfo, last_inst, inst[i].srcs[0].vstride);
2947          brw_inst_set_3src_a1_src0_hstride(&devinfo, last_inst, inst[i].srcs[0].hstride);
2948          brw_inst_set_3src_a1_src1_vstride(&devinfo, last_inst, inst[i].srcs[0].vstride);
2949          brw_inst_set_3src_a1_src1_hstride(&devinfo, last_inst, inst[i].srcs[0].hstride);
2950          break;
2951       default:
2952          unreachable("invalid opcode");
2953       }
2954 
2955       brw_inst_set_dst_hstride(&devinfo, last_inst, BRW_HORIZONTAL_STRIDE_1);
2956 
2957       brw_inst_set_src0_width(&devinfo, last_inst, inst[i].srcs[0].width);
2958       brw_inst_set_src1_width(&devinfo, last_inst, inst[i].srcs[1].width);
2959 
2960       brw_pop_insn_state(p);
2961 
2962       EXPECT_EQ(inst[i].expected_result, validate(p));
2963 
2964       clear_instructions(p);
2965    }
2966 }
2967 
TEST_P(validation_test,add3_source_types)2968 TEST_P(validation_test, add3_source_types)
2969 {
2970    static const struct {
2971       enum brw_reg_type dst_type;
2972       enum brw_reg_type src0_type;
2973       enum brw_reg_type src1_type;
2974       enum brw_reg_type src2_type;
2975       bool expected_result;
2976    } inst[] = {
2977 #define INST(dst_type, src0_type, src1_type, src2_type, expected_result)  \
2978       {                                                                   \
2979          BRW_TYPE_##dst_type,                                             \
2980          BRW_TYPE_##src0_type,                                            \
2981          BRW_TYPE_##src1_type,                                            \
2982          BRW_TYPE_##src2_type,                                            \
2983          expected_result,                                                 \
2984       }
2985 
2986       INST( F,  F,  F,  F, false),
2987       INST(HF, HF, HF, HF, false),
2988       INST( B,  B,  B,  B, false),
2989       INST(UB, UB, UB, UB, false),
2990 
2991       INST( W,  W,  W,  W, true),
2992       INST(UW, UW, UW, UW, true),
2993       INST( D,  D,  D,  D, true),
2994       INST(UD, UD, UD, UD, true),
2995 
2996       INST( W,  D,  W,  W, true),
2997       INST(UW, UW, UD, UW, true),
2998       INST( D,  D,  W,  D, true),
2999       INST(UD, UD, UD, UW, true),
3000 #undef INST
3001    };
3002 
3003 
3004    if (devinfo.verx10 < 125)
3005       return;
3006 
3007    for (unsigned i = 0; i < ARRAY_SIZE(inst); i++) {
3008       brw_ADD3(p,
3009                retype(g0, inst[i].dst_type),
3010                retype(g0, inst[i].src0_type),
3011                retype(g0, inst[i].src1_type),
3012                retype(g0, inst[i].src2_type));
3013 
3014       EXPECT_EQ(inst[i].expected_result, validate(p));
3015 
3016       clear_instructions(p);
3017    }
3018 }
3019 
TEST_P(validation_test,add3_immediate_types)3020 TEST_P(validation_test, add3_immediate_types)
3021 {
3022    static const struct {
3023       enum brw_reg_type reg_type;
3024       enum brw_reg_type imm_type;
3025       unsigned imm_src;
3026       bool expected_result;
3027    } inst[] = {
3028 #define INST(reg_type, imm_type, imm_src, expected_result)  \
3029       {                                                     \
3030          BRW_TYPE_##reg_type,                               \
3031          BRW_TYPE_##imm_type,                               \
3032          imm_src,                                           \
3033          expected_result,                                   \
3034       }
3035 
3036       INST( W,  W,  0, true),
3037       INST( W,  W,  2, true),
3038       INST(UW, UW,  0, true),
3039       INST(UW, UW,  2, true),
3040       INST( D,  W,  0, true),
3041       INST(UD,  W,  2, true),
3042       INST( D, UW,  0, true),
3043       INST(UW, UW,  2, true),
3044 
3045       INST( W,  D,  0, false),
3046       INST( W,  D,  2, false),
3047       INST(UW, UD,  0, false),
3048       INST(UW, UD,  2, false),
3049       INST( D,  D,  0, false),
3050       INST(UD,  D,  2, false),
3051       INST( D, UD,  0, false),
3052       INST(UW, UD,  2, false),
3053 #undef INST
3054    };
3055 
3056 
3057    if (devinfo.verx10 < 125)
3058       return;
3059 
3060    for (unsigned i = 0; i < ARRAY_SIZE(inst); i++) {
3061       brw_ADD3(p,
3062                retype(g0, inst[i].reg_type),
3063                inst[i].imm_src == 0 ? retype(brw_imm_d(0x1234), inst[i].imm_type)
3064                                     : retype(g0, inst[i].reg_type),
3065                retype(g0, inst[i].reg_type),
3066                inst[i].imm_src == 2 ? retype(brw_imm_d(0x2143), inst[i].imm_type)
3067                                     : retype(g0, inst[i].reg_type));
3068 
3069       EXPECT_EQ(inst[i].expected_result, validate(p));
3070 
3071       clear_instructions(p);
3072    }
3073 }
3074 
TEST_P(validation_test,dpas_sdepth)3075 TEST_P(validation_test, dpas_sdepth)
3076 {
3077    if (devinfo.verx10 < 125)
3078       return;
3079 
3080    static const enum gfx12_systolic_depth depth[] = {
3081       BRW_SYSTOLIC_DEPTH_16,
3082       BRW_SYSTOLIC_DEPTH_2,
3083       BRW_SYSTOLIC_DEPTH_4,
3084       BRW_SYSTOLIC_DEPTH_8,
3085    };
3086 
3087    for (unsigned i = 0; i < ARRAY_SIZE(depth); i++) {
3088       brw_DPAS(p,
3089                depth[i],
3090                8,
3091                retype(brw_vec8_grf(0, 0), BRW_TYPE_F),
3092                null,
3093                retype(brw_vec8_grf(16, 0), BRW_TYPE_HF),
3094                retype(brw_vec8_grf(32, 0), BRW_TYPE_HF));
3095 
3096       const bool expected_result = depth[i] == BRW_SYSTOLIC_DEPTH_8;
3097 
3098       EXPECT_EQ(expected_result, validate(p)) <<
3099          "Encoded systolic depth value is: " << depth[i];
3100 
3101       clear_instructions(p);
3102    }
3103 }
3104 
TEST_P(validation_test,dpas_exec_size)3105 TEST_P(validation_test, dpas_exec_size)
3106 {
3107    if (devinfo.verx10 < 125)
3108       return;
3109 
3110    static const enum brw_execution_size test_vectors[] = {
3111       BRW_EXECUTE_1,
3112       BRW_EXECUTE_2,
3113       BRW_EXECUTE_4,
3114       BRW_EXECUTE_8,
3115       BRW_EXECUTE_16,
3116       BRW_EXECUTE_32,
3117    };
3118 
3119    for (unsigned i = 0; i < ARRAY_SIZE(test_vectors); i++) {
3120       brw_set_default_exec_size(p, test_vectors[i]);
3121 
3122       brw_DPAS(p,
3123                BRW_SYSTOLIC_DEPTH_8,
3124                8,
3125                retype(brw_vec8_grf(0, 0), BRW_TYPE_F),
3126                null,
3127                retype(brw_vec8_grf(16, 0), BRW_TYPE_HF),
3128                retype(brw_vec8_grf(32, 0), BRW_TYPE_HF));
3129 
3130       const bool expected_result = test_vectors[i] == BRW_EXECUTE_8;
3131 
3132       EXPECT_EQ(expected_result, validate(p)) <<
3133          "Exec size = " << (1u << test_vectors[i]);
3134 
3135       clear_instructions(p);
3136    }
3137 
3138    brw_set_default_exec_size(p, BRW_EXECUTE_8);
3139 }
3140 
TEST_P(validation_test,dpas_sub_byte_precision)3141 TEST_P(validation_test, dpas_sub_byte_precision)
3142 {
3143    if (devinfo.verx10 < 125)
3144       return;
3145 
3146    static const struct {
3147       brw_reg_type dst_type;
3148       brw_reg_type src0_type;
3149       brw_reg_type src1_type;
3150       enum gfx12_sub_byte_precision src1_prec;
3151       brw_reg_type src2_type;
3152       enum gfx12_sub_byte_precision src2_prec;
3153       bool expected_result;
3154    } test_vectors[] = {
3155       {
3156          BRW_TYPE_F,
3157          BRW_TYPE_F,
3158          BRW_TYPE_HF, BRW_SUB_BYTE_PRECISION_NONE,
3159          BRW_TYPE_HF, BRW_SUB_BYTE_PRECISION_NONE,
3160          true,
3161       },
3162       {
3163          BRW_TYPE_F,
3164          BRW_TYPE_F,
3165          BRW_TYPE_HF, BRW_SUB_BYTE_PRECISION_NONE,
3166          BRW_TYPE_HF, BRW_SUB_BYTE_PRECISION_4BIT,
3167          false,
3168       },
3169       {
3170          BRW_TYPE_F,
3171          BRW_TYPE_F,
3172          BRW_TYPE_HF, BRW_SUB_BYTE_PRECISION_NONE,
3173          BRW_TYPE_HF, BRW_SUB_BYTE_PRECISION_2BIT,
3174          false,
3175       },
3176       {
3177          BRW_TYPE_F,
3178          BRW_TYPE_F,
3179          BRW_TYPE_HF, BRW_SUB_BYTE_PRECISION_4BIT,
3180          BRW_TYPE_HF, BRW_SUB_BYTE_PRECISION_NONE,
3181          false,
3182       },
3183       {
3184          BRW_TYPE_F,
3185          BRW_TYPE_F,
3186          BRW_TYPE_HF, BRW_SUB_BYTE_PRECISION_2BIT,
3187          BRW_TYPE_HF, BRW_SUB_BYTE_PRECISION_NONE,
3188          false,
3189       },
3190 
3191       {
3192          BRW_TYPE_UD,
3193          BRW_TYPE_UD,
3194          BRW_TYPE_UB, BRW_SUB_BYTE_PRECISION_NONE,
3195          BRW_TYPE_UB, BRW_SUB_BYTE_PRECISION_NONE,
3196          true,
3197       },
3198       {
3199          BRW_TYPE_UD,
3200          BRW_TYPE_UD,
3201          BRW_TYPE_UB, BRW_SUB_BYTE_PRECISION_NONE,
3202          BRW_TYPE_UB, BRW_SUB_BYTE_PRECISION_4BIT,
3203          true,
3204       },
3205       {
3206          BRW_TYPE_UD,
3207          BRW_TYPE_UD,
3208          BRW_TYPE_UB, BRW_SUB_BYTE_PRECISION_NONE,
3209          BRW_TYPE_UB, BRW_SUB_BYTE_PRECISION_2BIT,
3210          true,
3211       },
3212       {
3213          BRW_TYPE_UD,
3214          BRW_TYPE_UD,
3215          BRW_TYPE_UB, BRW_SUB_BYTE_PRECISION_NONE,
3216          BRW_TYPE_UB, (enum gfx12_sub_byte_precision) 3,
3217          false,
3218       },
3219       {
3220          BRW_TYPE_UD,
3221          BRW_TYPE_UD,
3222          BRW_TYPE_UB, BRW_SUB_BYTE_PRECISION_4BIT,
3223          BRW_TYPE_UB, BRW_SUB_BYTE_PRECISION_NONE,
3224          true,
3225       },
3226       {
3227          BRW_TYPE_UD,
3228          BRW_TYPE_UD,
3229          BRW_TYPE_UB, BRW_SUB_BYTE_PRECISION_2BIT,
3230          BRW_TYPE_UB, BRW_SUB_BYTE_PRECISION_NONE,
3231          true,
3232       },
3233       {
3234          BRW_TYPE_UD,
3235          BRW_TYPE_UD,
3236          BRW_TYPE_UB, (enum gfx12_sub_byte_precision) 3,
3237          BRW_TYPE_UB, BRW_SUB_BYTE_PRECISION_NONE,
3238          false,
3239       },
3240    };
3241 
3242    for (unsigned i = 0; i < ARRAY_SIZE(test_vectors); i++) {
3243       brw_inst *inst =
3244          brw_DPAS(p,
3245                   BRW_SYSTOLIC_DEPTH_8,
3246                   8,
3247                   retype(brw_vec8_grf(0, 0), test_vectors[i].dst_type),
3248                   retype(brw_vec8_grf(16, 0), test_vectors[i].src0_type),
3249                   retype(brw_vec8_grf(32, 0), test_vectors[i].src1_type),
3250                   retype(brw_vec8_grf(48, 0), test_vectors[i].src2_type));
3251 
3252       brw_inst_set_dpas_3src_src1_subbyte(&devinfo, inst,
3253                                           test_vectors[i].src1_prec);
3254       brw_inst_set_dpas_3src_src2_subbyte(&devinfo, inst,
3255                                           test_vectors[i].src2_prec);
3256 
3257       EXPECT_EQ(test_vectors[i].expected_result, validate(p)) <<
3258          "test vector index = " << i;
3259 
3260       clear_instructions(p);
3261    }
3262 }
3263 
TEST_P(validation_test,dpas_types)3264 TEST_P(validation_test, dpas_types)
3265 {
3266    if (devinfo.verx10 < 125)
3267       return;
3268 
3269 #define TV(a, b, c, d, r) \
3270    { BRW_TYPE_ ## a, BRW_TYPE_ ## b, BRW_TYPE_ ## c, BRW_TYPE_ ## d, r }
3271 
3272    static const struct {
3273       brw_reg_type dst_type;
3274       brw_reg_type src0_type;
3275       brw_reg_type src1_type;
3276       brw_reg_type src2_type;
3277       bool expected_result;
3278    } test_vectors[] = {
3279       TV( F,  F, HF, HF, true),
3280       TV( F, HF, HF, HF, false),
3281       TV(HF,  F, HF, HF, false),
3282       TV( F,  F,  F, HF, false),
3283       TV( F,  F, HF,  F, false),
3284 
3285       TV(DF, DF, DF, DF, false),
3286       TV(DF, DF, DF,  F, false),
3287       TV(DF, DF,  F, DF, false),
3288       TV(DF,  F, DF, DF, false),
3289       TV(DF, DF, DF, HF, false),
3290       TV(DF, DF, HF, DF, false),
3291       TV(DF, HF, DF, DF, false),
3292 
3293       TV(UD, UD, UB, UB, true),
3294       TV(UD, UD, UB, UD, false),
3295       TV(UD, UD, UD, UB, false),
3296       TV(UD, UD, UB, UW, false),
3297       TV(UD, UD, UW, UB, false),
3298 
3299       TV(UD, UB, UB, UB, false),
3300       TV(UD, UW, UB, UB, false),
3301 
3302       TV(UQ, UQ, UB, UB, false),
3303       TV(UQ, UQ, UB, UQ, false),
3304       TV(UQ, UQ, UQ, UB, false),
3305       TV(UQ, UQ, UB, UW, false),
3306       TV(UQ, UQ, UW, UB, false),
3307 
3308       TV( D,  D,  B,  B, true),
3309       TV( D,  D,  B, UB, true),
3310       TV( D,  D, UB,  B, true),
3311       TV( D, UD,  B,  B, true),
3312 
3313       TV( D,  D,  B,  D, false),
3314       TV( D,  D,  D,  B, false),
3315       TV( D,  D,  B,  W, false),
3316       TV( D,  D,  W,  B, false),
3317 
3318       TV( D,  B,  B,  B, false),
3319       TV( D,  W,  B,  B, false),
3320 
3321       TV( Q,  Q,  B,  B, false),
3322       TV( Q,  Q,  B,  Q, false),
3323       TV( Q,  Q,  Q,  B, false),
3324       TV( Q,  Q,  B,  W, false),
3325       TV( Q,  Q,  W,  B, false),
3326 
3327       TV(UD, UD, UB,  B, false),
3328       TV(UD, UD,  B, UB, false),
3329       TV(UD,  D, UB, UB, false),
3330    };
3331 
3332 #undef TV
3333 
3334    for (unsigned i = 0; i < ARRAY_SIZE(test_vectors); i++) {
3335       brw_DPAS(p,
3336                BRW_SYSTOLIC_DEPTH_8,
3337                8,
3338                retype(brw_vec8_grf(0, 0), test_vectors[i].dst_type),
3339                retype(brw_vec8_grf(16, 0), test_vectors[i].src0_type),
3340                retype(brw_vec8_grf(32, 0), test_vectors[i].src1_type),
3341                retype(brw_vec8_grf(48, 0), test_vectors[i].src2_type));
3342 
3343       EXPECT_EQ(test_vectors[i].expected_result, validate(p)) <<
3344          "test vector index = " << i;
3345 
3346       clear_instructions(p);
3347    }
3348 }
3349 
TEST_P(validation_test,dpas_src_subreg_nr)3350 TEST_P(validation_test, dpas_src_subreg_nr)
3351 {
3352    if (devinfo.verx10 < 125)
3353       return;
3354 
3355 #define TV(dt, od, t0, o0, t1, o1, o2, r) \
3356    { BRW_TYPE_ ## dt, od, BRW_TYPE_ ## t0, o0, BRW_TYPE_ ## t1, o1, o2, r }
3357 
3358    static const struct {
3359       brw_reg_type dst_type;
3360       unsigned dst_subnr;
3361       brw_reg_type src0_type;
3362       unsigned src0_subnr;
3363       brw_reg_type src1_src2_type;
3364       unsigned src1_subnr;
3365       unsigned src2_subnr;
3366       bool expected_result;
3367    } test_vectors[] = {
3368       TV( F,  0,  F,  0, HF,  0,  0, true),
3369       TV( D,  0,  D,  0,  B,  0,  0, true),
3370       TV( D,  0,  D,  0, UB,  0,  0, true),
3371       TV( D,  0, UD,  0,  B,  0,  0, true),
3372 
3373       TV( F,  1,  F,  0, HF,  0,  0, false),
3374       TV( F,  2,  F,  0, HF,  0,  0, false),
3375       TV( F,  3,  F,  0, HF,  0,  0, false),
3376       TV( F,  4,  F,  0, HF,  0,  0, false),
3377       TV( F,  5,  F,  0, HF,  0,  0, false),
3378       TV( F,  6,  F,  0, HF,  0,  0, false),
3379       TV( F,  7,  F,  0, HF,  0,  0, false),
3380 
3381       TV( F,  0,  F,  1, HF,  0,  0, false),
3382       TV( F,  0,  F,  2, HF,  0,  0, false),
3383       TV( F,  0,  F,  3, HF,  0,  0, false),
3384       TV( F,  0,  F,  4, HF,  0,  0, false),
3385       TV( F,  0,  F,  5, HF,  0,  0, false),
3386       TV( F,  0,  F,  6, HF,  0,  0, false),
3387       TV( F,  0,  F,  7, HF,  0,  0, false),
3388 
3389       TV( F,  0,  F,  0, HF,  1,  0, false),
3390       TV( F,  0,  F,  0, HF,  2,  0, false),
3391       TV( F,  0,  F,  0, HF,  3,  0, false),
3392       TV( F,  0,  F,  0, HF,  4,  0, false),
3393       TV( F,  0,  F,  0, HF,  5,  0, false),
3394       TV( F,  0,  F,  0, HF,  6,  0, false),
3395       TV( F,  0,  F,  0, HF,  7,  0, false),
3396       TV( F,  0,  F,  0, HF,  8,  0, false),
3397       TV( F,  0,  F,  0, HF,  9,  0, false),
3398       TV( F,  0,  F,  0, HF, 10,  0, false),
3399       TV( F,  0,  F,  0, HF, 11,  0, false),
3400       TV( F,  0,  F,  0, HF, 12,  0, false),
3401       TV( F,  0,  F,  0, HF, 13,  0, false),
3402       TV( F,  0,  F,  0, HF, 14,  0, false),
3403       TV( F,  0,  F,  0, HF, 15,  0, false),
3404 
3405       TV( F,  0,  F,  0, HF,  0,  1, false),
3406       TV( F,  0,  F,  0, HF,  0,  2, false),
3407       TV( F,  0,  F,  0, HF,  0,  3, false),
3408       TV( F,  0,  F,  0, HF,  0,  4, false),
3409       TV( F,  0,  F,  0, HF,  0,  5, false),
3410       TV( F,  0,  F,  0, HF,  0,  6, false),
3411       TV( F,  0,  F,  0, HF,  0,  7, false),
3412       TV( F,  0,  F,  0, HF,  0,  8, false),
3413       TV( F,  0,  F,  0, HF,  0,  9, false),
3414       TV( F,  0,  F,  0, HF,  0, 10, false),
3415       TV( F,  0,  F,  0, HF,  0, 11, false),
3416       TV( F,  0,  F,  0, HF,  0, 12, false),
3417       TV( F,  0,  F,  0, HF,  0, 13, false),
3418       TV( F,  0,  F,  0, HF,  0, 14, false),
3419       TV( F,  0,  F,  0, HF,  0, 15, false),
3420 
3421       /* These meet the requirements, but they specify a subnr that is part of
3422        * the next register. It is currently not possible to specify a subnr of
3423        * 32 for the B and UB values because brw_reg::subnr is only 5 bits.
3424        */
3425       TV( F, 16,  F,  0, HF,  0,  0, false),
3426       TV( F,  0,  F, 16, HF,  0,  0, false),
3427       TV( F,  0,  F,  0, HF,  0, 16, false),
3428 
3429       TV( D, 16,  D,  0,  B,  0,  0, false),
3430       TV( D,  0,  D, 16,  B,  0,  0, false),
3431    };
3432 
3433 #undef TV
3434 
3435    for (unsigned i = 0; i < ARRAY_SIZE(test_vectors); i++) {
3436       struct brw_reg dst =
3437          retype(brw_vec8_grf( 0, 0), test_vectors[i].dst_type);
3438       struct brw_reg src0 =
3439          retype(brw_vec8_grf(16, 0), test_vectors[i].src0_type);
3440       struct brw_reg src1 =
3441          retype(brw_vec8_grf(32, 0), test_vectors[i].src1_src2_type);
3442       struct brw_reg src2 =
3443          retype(brw_vec8_grf(48, 0), test_vectors[i].src1_src2_type);
3444 
3445       /* subnr for DPAS is in units of datatype precision instead of bytes as
3446        * it is for every other instruction. Set the value by hand instead of
3447        * using byte_offset() or similar.
3448        */
3449       dst.subnr = test_vectors[i].dst_subnr;
3450       src0.subnr = test_vectors[i].src0_subnr;
3451       src1.subnr = test_vectors[i].src1_subnr;
3452       src2.subnr = test_vectors[i].src2_subnr;
3453 
3454       brw_DPAS(p, BRW_SYSTOLIC_DEPTH_8, 8, dst, src0, src1, src2);
3455 
3456       EXPECT_EQ(test_vectors[i].expected_result, validate(p)) <<
3457          "test vector index = " << i;
3458 
3459       clear_instructions(p);
3460    }
3461 }
3462