1 /*
2 * Copyright © 2016 Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 */
23
24 #include <gtest/gtest.h>
25 #include "brw_disasm_info.h"
26 #include "brw_eu.h"
27 #include "brw_eu_defines.h"
28 #include "util/bitset.h"
29 #include "util/ralloc.h"
30
31 static const struct intel_gfx_info {
32 const char *name;
33 } gfx_names[] = {
34 { "skl", },
35 { "bxt", },
36 { "kbl", },
37 { "aml", },
38 { "glk", },
39 { "cfl", },
40 { "whl", },
41 { "cml", },
42 { "icl", },
43 { "ehl", },
44 { "jsl", },
45 { "tgl", },
46 { "rkl", },
47 { "dg1", },
48 { "adl", },
49 { "sg1", },
50 { "rpl", },
51 { "dg2", },
52 { "mtl", },
53 };
54
55 class validation_test: public ::testing::TestWithParam<struct intel_gfx_info> {
56 virtual void SetUp();
57
58 public:
59 validation_test();
60 virtual ~validation_test();
61
62 struct brw_isa_info isa;
63 struct brw_codegen *p;
64 struct intel_device_info devinfo;
65 };
66
validation_test()67 validation_test::validation_test()
68 {
69 p = rzalloc(NULL, struct brw_codegen);
70 memset(&devinfo, 0, sizeof(devinfo));
71 }
72
~validation_test()73 validation_test::~validation_test()
74 {
75 ralloc_free(p);
76 }
77
SetUp()78 void validation_test::SetUp()
79 {
80 struct intel_gfx_info info = GetParam();
81 int devid = intel_device_name_to_pci_device_id(info.name);
82
83 intel_get_device_info_from_pci_id(devid, &devinfo);
84
85 brw_init_isa_info(&isa, &devinfo);
86
87 brw_init_codegen(&isa, p, p);
88 }
89
90 struct gfx_name {
91 template <class ParamType>
92 std::string
operator ()gfx_name93 operator()(const ::testing::TestParamInfo<ParamType>& info) const {
94 return info.param.name;
95 }
96 };
97
98 INSTANTIATE_TEST_SUITE_P(
99 eu_assembly, validation_test,
100 ::testing::ValuesIn(gfx_names),
101 gfx_name()
102 );
103
104 static bool
validate(struct brw_codegen * p)105 validate(struct brw_codegen *p)
106 {
107 const bool print = getenv("TEST_DEBUG");
108 struct disasm_info *disasm = disasm_initialize(p->isa, NULL);
109
110 if (print) {
111 disasm_new_inst_group(disasm, 0);
112 disasm_new_inst_group(disasm, p->next_insn_offset);
113 }
114
115 bool ret = brw_validate_instructions(p->isa, p->store, 0,
116 p->next_insn_offset, disasm);
117
118 if (print) {
119 dump_assembly(p->store, 0, p->next_insn_offset, disasm, NULL);
120 }
121 ralloc_free(disasm);
122
123 return ret;
124 }
125
126 #define last_inst (&p->store[p->nr_insn - 1])
127 #define g0 brw_vec8_grf(0, 0)
128 #define acc0 brw_acc_reg(8)
129 #define null brw_null_reg()
130 #define zero brw_imm_f(0.0f)
131
132 static void
clear_instructions(struct brw_codegen * p)133 clear_instructions(struct brw_codegen *p)
134 {
135 p->next_insn_offset = 0;
136 p->nr_insn = 0;
137 }
138
TEST_P(validation_test,sanity)139 TEST_P(validation_test, sanity)
140 {
141 brw_ADD(p, g0, g0, g0);
142
143 EXPECT_TRUE(validate(p));
144 }
145
TEST_P(validation_test,src0_null_reg)146 TEST_P(validation_test, src0_null_reg)
147 {
148 brw_MOV(p, g0, null);
149
150 EXPECT_FALSE(validate(p));
151 }
152
TEST_P(validation_test,src1_null_reg)153 TEST_P(validation_test, src1_null_reg)
154 {
155 brw_ADD(p, g0, g0, null);
156
157 EXPECT_FALSE(validate(p));
158 }
159
TEST_P(validation_test,math_src0_null_reg)160 TEST_P(validation_test, math_src0_null_reg)
161 {
162 gfx6_math(p, g0, BRW_MATH_FUNCTION_SIN, null, null);
163
164 EXPECT_FALSE(validate(p));
165 }
166
TEST_P(validation_test,math_src1_null_reg)167 TEST_P(validation_test, math_src1_null_reg)
168 {
169 gfx6_math(p, g0, BRW_MATH_FUNCTION_POW, g0, null);
170 EXPECT_FALSE(validate(p));
171 }
172
TEST_P(validation_test,opcode46)173 TEST_P(validation_test, opcode46)
174 {
175 /* opcode 46 is "push" on Gen 4 and 5
176 * "fork" on Gen 6
177 * reserved on Gen 7
178 * "goto" on Gfx8+
179 */
180 brw_next_insn(p, brw_opcode_decode(&isa, 46));
181
182 EXPECT_TRUE(validate(p));
183 }
184
TEST_P(validation_test,invalid_exec_size_encoding)185 TEST_P(validation_test, invalid_exec_size_encoding)
186 {
187 const struct {
188 enum brw_execution_size exec_size;
189 bool expected_result;
190 } test_case[] = {
191 { BRW_EXECUTE_1, true },
192 { BRW_EXECUTE_2, true },
193 { BRW_EXECUTE_4, true },
194 { BRW_EXECUTE_8, true },
195 { BRW_EXECUTE_16, true },
196 { BRW_EXECUTE_32, true },
197
198 { (enum brw_execution_size)((int)BRW_EXECUTE_32 + 1), false },
199 { (enum brw_execution_size)((int)BRW_EXECUTE_32 + 2), false },
200 };
201
202 for (unsigned i = 0; i < ARRAY_SIZE(test_case); i++) {
203 brw_MOV(p, g0, g0);
204
205 brw_inst_set_exec_size(&devinfo, last_inst, test_case[i].exec_size);
206 brw_inst_set_src0_file_type(&devinfo, last_inst, FIXED_GRF, BRW_TYPE_W);
207 brw_inst_set_dst_file_type(&devinfo, last_inst, FIXED_GRF, BRW_TYPE_W);
208
209 if (test_case[i].exec_size == BRW_EXECUTE_1) {
210 brw_inst_set_src0_vstride(&devinfo, last_inst, BRW_VERTICAL_STRIDE_0);
211 brw_inst_set_src0_width(&devinfo, last_inst, BRW_WIDTH_1);
212 brw_inst_set_src0_hstride(&devinfo, last_inst, BRW_HORIZONTAL_STRIDE_0);
213 } else {
214 brw_inst_set_src0_vstride(&devinfo, last_inst, BRW_VERTICAL_STRIDE_2);
215 brw_inst_set_src0_width(&devinfo, last_inst, BRW_WIDTH_2);
216 brw_inst_set_src0_hstride(&devinfo, last_inst, BRW_HORIZONTAL_STRIDE_1);
217 }
218
219 EXPECT_EQ(test_case[i].expected_result, validate(p));
220
221 clear_instructions(p);
222 }
223 }
224
TEST_P(validation_test,invalid_type_encoding)225 TEST_P(validation_test, invalid_type_encoding)
226 {
227 enum brw_reg_file files[2] = {
228 FIXED_GRF,
229 IMM,
230 };
231
232 for (unsigned i = 0; i < ARRAY_SIZE(files); i++) {
233 const enum brw_reg_file file = files[i];
234 const int num_bits = 4;
235 const int num_encodings = 1 << num_bits;
236
237 /* The data types are encoded into <num_bits> bits to be used in hardware
238 * instructions, so keep a record in a bitset the invalid patterns so
239 * they can be verified to be invalid when used.
240 */
241 BITSET_DECLARE(invalid_encodings, num_encodings);
242
243 const struct {
244 enum brw_reg_type type;
245 bool expected_result;
246 } test_case[] = {
247 { BRW_TYPE_DF, devinfo.has_64bit_float },
248 { BRW_TYPE_F, true },
249 { BRW_TYPE_HF, true },
250 { BRW_TYPE_VF, file == IMM },
251 { BRW_TYPE_Q, devinfo.has_64bit_int },
252 { BRW_TYPE_UQ, devinfo.has_64bit_int },
253 { BRW_TYPE_D, true },
254 { BRW_TYPE_UD, true },
255 { BRW_TYPE_W, true },
256 { BRW_TYPE_UW, true },
257 { BRW_TYPE_B, file == FIXED_GRF },
258 { BRW_TYPE_UB, file == FIXED_GRF },
259 { BRW_TYPE_V, file == IMM },
260 { BRW_TYPE_UV, file == IMM },
261 };
262
263 /* Initially assume all hardware encodings are invalid */
264 BITSET_ONES(invalid_encodings);
265
266 brw_set_default_exec_size(p, BRW_EXECUTE_4);
267
268 for (unsigned i = 0; i < ARRAY_SIZE(test_case); i++) {
269 if (test_case[i].expected_result) {
270 unsigned hw_type = brw_type_encode(&devinfo, file, test_case[i].type);
271 if (hw_type != INVALID_HW_REG_TYPE) {
272 /* ... and remove valid encodings from the set */
273 assert(BITSET_TEST(invalid_encodings, hw_type));
274 BITSET_CLEAR(invalid_encodings, hw_type);
275 }
276
277 if (file == FIXED_GRF) {
278 struct brw_reg g = retype(g0, test_case[i].type);
279 brw_MOV(p, g, g);
280 brw_inst_set_src0_vstride(&devinfo, last_inst, BRW_VERTICAL_STRIDE_4);
281 brw_inst_set_src0_width(&devinfo, last_inst, BRW_WIDTH_4);
282 brw_inst_set_src0_hstride(&devinfo, last_inst, BRW_HORIZONTAL_STRIDE_1);
283 } else {
284 enum brw_reg_type t;
285
286 switch (test_case[i].type) {
287 case BRW_TYPE_V:
288 t = BRW_TYPE_W;
289 break;
290 case BRW_TYPE_UV:
291 t = BRW_TYPE_UW;
292 break;
293 case BRW_TYPE_VF:
294 t = BRW_TYPE_F;
295 break;
296 default:
297 t = test_case[i].type;
298 break;
299 }
300
301 struct brw_reg g = retype(g0, t);
302 brw_MOV(p, g, retype(brw_imm_w(0), test_case[i].type));
303 }
304
305 EXPECT_TRUE(validate(p));
306
307 clear_instructions(p);
308 }
309 }
310
311 /* The remaining encodings in invalid_encodings do not have a mapping
312 * from BRW_TYPE_* and must be invalid. Verify that invalid
313 * encodings are rejected by the validator.
314 */
315 int e;
316 BITSET_FOREACH_SET(e, invalid_encodings, num_encodings) {
317 if (file == FIXED_GRF) {
318 brw_MOV(p, g0, g0);
319 brw_inst_set_src0_vstride(&devinfo, last_inst, BRW_VERTICAL_STRIDE_4);
320 brw_inst_set_src0_width(&devinfo, last_inst, BRW_WIDTH_4);
321 brw_inst_set_src0_hstride(&devinfo, last_inst, BRW_HORIZONTAL_STRIDE_1);
322 } else {
323 brw_MOV(p, g0, brw_imm_w(0));
324 }
325 brw_inst_set_dst_reg_hw_type(&devinfo, last_inst, e);
326 brw_inst_set_src0_reg_hw_type(&devinfo, last_inst, e);
327
328 EXPECT_FALSE(validate(p));
329
330 clear_instructions(p);
331 }
332 }
333 }
334
TEST_P(validation_test,invalid_type_encoding_3src_a16)335 TEST_P(validation_test, invalid_type_encoding_3src_a16)
336 {
337 /* 3-src instructions in align16 mode only supported on Gfx6-10 */
338 if (devinfo.ver < 6 || devinfo.ver > 10)
339 return;
340
341 const int num_bits = devinfo.ver >= 8 ? 3 : 2;
342 const int num_encodings = 1 << num_bits;
343
344 /* The data types are encoded into <num_bits> bits to be used in hardware
345 * instructions, so keep a record in a bitset the invalid patterns so
346 * they can be verified to be invalid when used.
347 */
348 BITSET_DECLARE(invalid_encodings, num_encodings);
349
350 const struct {
351 enum brw_reg_type type;
352 bool expected_result;
353 } test_case[] = {
354 { BRW_TYPE_DF, devinfo.ver >= 7 },
355 { BRW_TYPE_F, true },
356 { BRW_TYPE_HF, devinfo.ver >= 8 },
357 { BRW_TYPE_D, devinfo.ver >= 7 },
358 { BRW_TYPE_UD, devinfo.ver >= 7 },
359 };
360
361 /* Initially assume all hardware encodings are invalid */
362 BITSET_ONES(invalid_encodings);
363
364 brw_set_default_access_mode(p, BRW_ALIGN_16);
365 brw_set_default_exec_size(p, BRW_EXECUTE_4);
366
367 for (unsigned i = 0; i < ARRAY_SIZE(test_case); i++) {
368 if (test_case[i].expected_result) {
369 unsigned hw_type =
370 brw_type_encode_for_3src(&devinfo, test_case[i].type);
371 if (hw_type != INVALID_HW_REG_TYPE) {
372 /* ... and remove valid encodings from the set */
373 assert(BITSET_TEST(invalid_encodings, hw_type));
374 BITSET_CLEAR(invalid_encodings, hw_type);
375 }
376
377 struct brw_reg g = retype(g0, test_case[i].type);
378 if (!brw_type_is_int(test_case[i].type)) {
379 brw_MAD(p, g, g, g, g);
380 } else {
381 brw_BFE(p, g, g, g, g);
382 }
383
384 EXPECT_TRUE(validate(p));
385
386 clear_instructions(p);
387 }
388 }
389
390 /* The remaining encodings in invalid_encodings do not have a mapping
391 * from BRW_TYPE_* and must be invalid. Verify that invalid
392 * encodings are rejected by the validator.
393 */
394 int e;
395 BITSET_FOREACH_SET(e, invalid_encodings, num_encodings) {
396 for (unsigned i = 0; i < 2; i++) {
397 if (i == 0) {
398 brw_MAD(p, g0, g0, g0, g0);
399 } else {
400 brw_BFE(p, g0, g0, g0, g0);
401 }
402
403 brw_inst_set_3src_a16_dst_hw_type(&devinfo, last_inst, e);
404 brw_inst_set_3src_a16_src_hw_type(&devinfo, last_inst, e);
405
406 EXPECT_FALSE(validate(p));
407
408 clear_instructions(p);
409
410 if (devinfo.ver == 6)
411 break;
412 }
413 }
414 }
415
TEST_P(validation_test,invalid_type_encoding_3src_a1)416 TEST_P(validation_test, invalid_type_encoding_3src_a1)
417 {
418 /* 3-src instructions in align1 mode only supported on Gfx10+ */
419 if (devinfo.ver < 10)
420 return;
421
422 const int num_bits = 3 + 1 /* for exec_type */;
423 const int num_encodings = 1 << num_bits;
424
425 /* The data types are encoded into <num_bits> bits to be used in hardware
426 * instructions, so keep a record in a bitset the invalid patterns so
427 * they can be verified to be invalid when used.
428 */
429 BITSET_DECLARE(invalid_encodings, num_encodings);
430
431 const struct {
432 enum brw_reg_type type;
433 unsigned exec_type;
434 bool expected_result;
435 } test_case[] = {
436 #define E(x) ((unsigned)BRW_ALIGN1_3SRC_EXEC_TYPE_##x)
437 { BRW_TYPE_DF, E(FLOAT), devinfo.has_64bit_float },
438 { BRW_TYPE_F, E(FLOAT), true },
439 { BRW_TYPE_HF, E(FLOAT), true },
440 { BRW_TYPE_D, E(INT), true },
441 { BRW_TYPE_UD, E(INT), true },
442 { BRW_TYPE_W, E(INT), true },
443 { BRW_TYPE_UW, E(INT), true },
444
445 /* There are no ternary instructions that can operate on B-type sources
446 * on Gfx11-12. Src1/Src2 cannot be B-typed either.
447 */
448 { BRW_TYPE_B, E(INT), false },
449 { BRW_TYPE_UB, E(INT), false },
450 };
451
452 /* Initially assume all hardware encodings are invalid */
453 BITSET_ONES(invalid_encodings);
454
455 brw_set_default_access_mode(p, BRW_ALIGN_1);
456 brw_set_default_exec_size(p, BRW_EXECUTE_4);
457
458 for (unsigned i = 0; i < ARRAY_SIZE(test_case); i++) {
459 if (test_case[i].expected_result) {
460 unsigned hw_type =
461 brw_type_encode_for_3src(&devinfo, test_case[i].type);
462 unsigned hw_exec_type = hw_type | (test_case[i].exec_type << 3);
463 if (hw_type != INVALID_HW_REG_TYPE) {
464 /* ... and remove valid encodings from the set */
465 assert(BITSET_TEST(invalid_encodings, hw_exec_type));
466 BITSET_CLEAR(invalid_encodings, hw_exec_type);
467 }
468
469 struct brw_reg g = retype(g0, test_case[i].type);
470 if (!brw_type_is_int(test_case[i].type)) {
471 brw_MAD(p, g, g, g, g);
472 } else {
473 brw_BFE(p, g, g, g, g);
474 }
475
476 EXPECT_TRUE(validate(p));
477
478 clear_instructions(p);
479 }
480 }
481
482 /* The remaining encodings in invalid_encodings do not have a mapping
483 * from BRW_TYPE_* and must be invalid. Verify that invalid
484 * encodings are rejected by the validator.
485 */
486 int e;
487 BITSET_FOREACH_SET(e, invalid_encodings, num_encodings) {
488 const unsigned hw_type = e & 0x7;
489 const unsigned exec_type = e >> 3;
490
491 for (unsigned i = 0; i < 2; i++) {
492 if (i == 0) {
493 brw_MAD(p, g0, g0, g0, g0);
494 brw_inst_set_3src_a1_exec_type(&devinfo, last_inst, BRW_ALIGN1_3SRC_EXEC_TYPE_FLOAT);
495 } else {
496 brw_CSEL(p, g0, g0, g0, g0);
497 brw_inst_set_3src_cond_modifier(&devinfo, last_inst, BRW_CONDITIONAL_NZ);
498 brw_inst_set_3src_a1_exec_type(&devinfo, last_inst, BRW_ALIGN1_3SRC_EXEC_TYPE_INT);
499 }
500
501 brw_inst_set_3src_a1_exec_type(&devinfo, last_inst, exec_type);
502 brw_inst_set_3src_a1_dst_hw_type (&devinfo, last_inst, hw_type);
503 brw_inst_set_3src_a1_src0_hw_type(&devinfo, last_inst, hw_type);
504 brw_inst_set_3src_a1_src1_hw_type(&devinfo, last_inst, hw_type);
505 brw_inst_set_3src_a1_src2_hw_type(&devinfo, last_inst, hw_type);
506
507 EXPECT_FALSE(validate(p));
508
509 clear_instructions(p);
510 }
511 }
512 }
513
514 TEST_P(validation_test, 3src_inst_access_mode)
515 {
516 /* 3-src instructions only supported on Gfx6+ */
517 if (devinfo.ver < 6)
518 return;
519
520 /* No access mode bit on Gfx12+ */
521 if (devinfo.ver >= 12)
522 return;
523
524 const struct {
525 unsigned mode;
526 bool expected_result;
527 } test_case[] = {
528 { BRW_ALIGN_1, devinfo.ver >= 10 },
529 { BRW_ALIGN_16, devinfo.ver <= 10 },
530 };
531
532 for (unsigned i = 0; i < ARRAY_SIZE(test_case); i++) {
533 if (devinfo.ver < 10)
534 brw_set_default_access_mode(p, BRW_ALIGN_16);
535
536 brw_MAD(p, g0, g0, g0, g0);
537 brw_inst_set_access_mode(&devinfo, last_inst, test_case[i].mode);
538
539 EXPECT_EQ(test_case[i].expected_result, validate(p));
540
541 clear_instructions(p);
542 }
543 }
544
545 /* When the Execution Data Type is wider than the destination data type, the
546 * destination must [...] specify a HorzStride equal to the ratio in sizes of
547 * the two data types.
548 */
TEST_P(validation_test,dest_stride_must_be_equal_to_the_ratio_of_exec_size_to_dest_size)549 TEST_P(validation_test, dest_stride_must_be_equal_to_the_ratio_of_exec_size_to_dest_size)
550 {
551 brw_ADD(p, g0, g0, g0);
552 brw_inst_set_dst_file_type(&devinfo, last_inst, FIXED_GRF, BRW_TYPE_W);
553 brw_inst_set_src0_file_type(&devinfo, last_inst, FIXED_GRF, BRW_TYPE_D);
554 brw_inst_set_src1_file_type(&devinfo, last_inst, FIXED_GRF, BRW_TYPE_D);
555
556 EXPECT_FALSE(validate(p));
557
558 clear_instructions(p);
559
560 brw_ADD(p, g0, g0, g0);
561 brw_inst_set_dst_file_type(&devinfo, last_inst, FIXED_GRF, BRW_TYPE_W);
562 brw_inst_set_dst_hstride(&devinfo, last_inst, BRW_HORIZONTAL_STRIDE_2);
563 brw_inst_set_src0_file_type(&devinfo, last_inst, FIXED_GRF, BRW_TYPE_D);
564 brw_inst_set_src1_file_type(&devinfo, last_inst, FIXED_GRF, BRW_TYPE_D);
565
566 EXPECT_TRUE(validate(p));
567 }
568
569 /* When the Execution Data Type is wider than the destination data type, the
570 * destination must be aligned as required by the wider execution data type
571 * [...]
572 */
TEST_P(validation_test,dst_subreg_must_be_aligned_to_exec_type_size)573 TEST_P(validation_test, dst_subreg_must_be_aligned_to_exec_type_size)
574 {
575 brw_ADD(p, g0, g0, g0);
576 brw_inst_set_dst_da1_subreg_nr(&devinfo, last_inst, 2);
577 brw_inst_set_dst_hstride(&devinfo, last_inst, BRW_HORIZONTAL_STRIDE_2);
578 brw_inst_set_dst_file_type(&devinfo, last_inst, FIXED_GRF, BRW_TYPE_W);
579 brw_inst_set_src0_file_type(&devinfo, last_inst, FIXED_GRF, BRW_TYPE_D);
580 brw_inst_set_src1_file_type(&devinfo, last_inst, FIXED_GRF, BRW_TYPE_D);
581
582 EXPECT_FALSE(validate(p));
583
584 clear_instructions(p);
585
586 brw_ADD(p, g0, g0, g0);
587 brw_inst_set_exec_size(&devinfo, last_inst, BRW_EXECUTE_4);
588 brw_inst_set_dst_da1_subreg_nr(&devinfo, last_inst, 8);
589 brw_inst_set_dst_hstride(&devinfo, last_inst, BRW_HORIZONTAL_STRIDE_2);
590 brw_inst_set_dst_file_type(&devinfo, last_inst, FIXED_GRF, BRW_TYPE_W);
591 brw_inst_set_src0_file_type(&devinfo, last_inst, FIXED_GRF, BRW_TYPE_D);
592 brw_inst_set_src0_vstride(&devinfo, last_inst, BRW_VERTICAL_STRIDE_4);
593 brw_inst_set_src0_width(&devinfo, last_inst, BRW_WIDTH_4);
594 brw_inst_set_src0_hstride(&devinfo, last_inst, BRW_HORIZONTAL_STRIDE_1);
595 brw_inst_set_src1_file_type(&devinfo, last_inst, FIXED_GRF, BRW_TYPE_D);
596 brw_inst_set_src1_vstride(&devinfo, last_inst, BRW_VERTICAL_STRIDE_4);
597 brw_inst_set_src1_width(&devinfo, last_inst, BRW_WIDTH_4);
598 brw_inst_set_src1_hstride(&devinfo, last_inst, BRW_HORIZONTAL_STRIDE_1);
599
600 EXPECT_TRUE(validate(p));
601 }
602
603 /* ExecSize must be greater than or equal to Width. */
TEST_P(validation_test,exec_size_less_than_width)604 TEST_P(validation_test, exec_size_less_than_width)
605 {
606 brw_ADD(p, g0, g0, g0);
607 brw_inst_set_src0_width(&devinfo, last_inst, BRW_WIDTH_16);
608
609 EXPECT_FALSE(validate(p));
610
611 clear_instructions(p);
612
613 brw_ADD(p, g0, g0, g0);
614 brw_inst_set_src1_width(&devinfo, last_inst, BRW_WIDTH_16);
615
616 EXPECT_FALSE(validate(p));
617 }
618
619 /* If ExecSize = Width and HorzStride ≠ 0,
620 * VertStride must be set to Width * HorzStride.
621 */
TEST_P(validation_test,vertical_stride_is_width_by_horizontal_stride)622 TEST_P(validation_test, vertical_stride_is_width_by_horizontal_stride)
623 {
624 brw_ADD(p, g0, g0, g0);
625 brw_inst_set_src0_vstride(&devinfo, last_inst, BRW_VERTICAL_STRIDE_4);
626
627 EXPECT_FALSE(validate(p));
628
629 clear_instructions(p);
630
631 brw_ADD(p, g0, g0, g0);
632 brw_inst_set_src1_vstride(&devinfo, last_inst, BRW_VERTICAL_STRIDE_4);
633
634 EXPECT_FALSE(validate(p));
635 }
636
637 /* If Width = 1, HorzStride must be 0 regardless of the values
638 * of ExecSize and VertStride.
639 */
TEST_P(validation_test,horizontal_stride_must_be_0_if_width_is_1)640 TEST_P(validation_test, horizontal_stride_must_be_0_if_width_is_1)
641 {
642 brw_ADD(p, g0, g0, g0);
643 brw_inst_set_src0_vstride(&devinfo, last_inst, BRW_VERTICAL_STRIDE_0);
644 brw_inst_set_src0_width(&devinfo, last_inst, BRW_WIDTH_1);
645 brw_inst_set_src0_hstride(&devinfo, last_inst, BRW_HORIZONTAL_STRIDE_1);
646
647 EXPECT_FALSE(validate(p));
648
649 clear_instructions(p);
650
651 brw_ADD(p, g0, g0, g0);
652 brw_inst_set_src1_vstride(&devinfo, last_inst, BRW_VERTICAL_STRIDE_0);
653 brw_inst_set_src1_width(&devinfo, last_inst, BRW_WIDTH_1);
654 brw_inst_set_src1_hstride(&devinfo, last_inst, BRW_HORIZONTAL_STRIDE_1);
655
656 EXPECT_FALSE(validate(p));
657 }
658
659 /* If ExecSize = Width = 1, both VertStride and HorzStride must be 0. */
TEST_P(validation_test,scalar_region_must_be_0_1_0)660 TEST_P(validation_test, scalar_region_must_be_0_1_0)
661 {
662 struct brw_reg g0_0 = brw_vec1_grf(0, 0);
663
664 brw_ADD(p, g0, g0, g0_0);
665 brw_inst_set_exec_size(&devinfo, last_inst, BRW_EXECUTE_1);
666 brw_inst_set_src0_vstride(&devinfo, last_inst, BRW_VERTICAL_STRIDE_1);
667 brw_inst_set_src0_width(&devinfo, last_inst, BRW_WIDTH_1);
668 brw_inst_set_src0_hstride(&devinfo, last_inst, BRW_HORIZONTAL_STRIDE_0);
669
670 EXPECT_FALSE(validate(p));
671
672 clear_instructions(p);
673
674 brw_ADD(p, g0, g0_0, g0);
675 brw_inst_set_exec_size(&devinfo, last_inst, BRW_EXECUTE_1);
676 brw_inst_set_src1_vstride(&devinfo, last_inst, BRW_VERTICAL_STRIDE_1);
677 brw_inst_set_src1_width(&devinfo, last_inst, BRW_WIDTH_1);
678 brw_inst_set_src1_hstride(&devinfo, last_inst, BRW_HORIZONTAL_STRIDE_0);
679
680 EXPECT_FALSE(validate(p));
681 }
682
683 /* If VertStride = HorzStride = 0, Width must be 1 regardless of the value
684 * of ExecSize.
685 */
TEST_P(validation_test,zero_stride_implies_0_1_0)686 TEST_P(validation_test, zero_stride_implies_0_1_0)
687 {
688 brw_ADD(p, g0, g0, g0);
689 brw_inst_set_src0_vstride(&devinfo, last_inst, BRW_VERTICAL_STRIDE_0);
690 brw_inst_set_src0_width(&devinfo, last_inst, BRW_WIDTH_2);
691 brw_inst_set_src0_hstride(&devinfo, last_inst, BRW_HORIZONTAL_STRIDE_0);
692
693 EXPECT_FALSE(validate(p));
694
695 clear_instructions(p);
696
697 brw_ADD(p, g0, g0, g0);
698 brw_inst_set_src1_vstride(&devinfo, last_inst, BRW_VERTICAL_STRIDE_0);
699 brw_inst_set_src1_width(&devinfo, last_inst, BRW_WIDTH_2);
700 brw_inst_set_src1_hstride(&devinfo, last_inst, BRW_HORIZONTAL_STRIDE_0);
701
702 EXPECT_FALSE(validate(p));
703 }
704
705 /* Dst.HorzStride must not be 0. */
TEST_P(validation_test,dst_horizontal_stride_0)706 TEST_P(validation_test, dst_horizontal_stride_0)
707 {
708 brw_ADD(p, g0, g0, g0);
709 brw_inst_set_dst_hstride(&devinfo, last_inst, BRW_HORIZONTAL_STRIDE_0);
710
711 EXPECT_FALSE(validate(p));
712
713 clear_instructions(p);
714
715 /* Align16 does not exist on Gfx11+ */
716 if (devinfo.ver >= 11)
717 return;
718
719 brw_set_default_access_mode(p, BRW_ALIGN_16);
720
721 brw_ADD(p, g0, g0, g0);
722 brw_inst_set_dst_hstride(&devinfo, last_inst, BRW_HORIZONTAL_STRIDE_0);
723
724 EXPECT_FALSE(validate(p));
725 }
726
727 /* VertStride must be used to cross FIXED_GRF register boundaries. This rule implies
728 * that elements within a 'Width' cannot cross FIXED_GRF boundaries.
729 */
TEST_P(validation_test,must_not_cross_grf_boundary_in_a_width)730 TEST_P(validation_test, must_not_cross_grf_boundary_in_a_width)
731 {
732 brw_ADD(p, g0, g0, g0);
733 brw_inst_set_src0_da1_subreg_nr(&devinfo, last_inst, 4);
734
735 EXPECT_FALSE(validate(p));
736
737 clear_instructions(p);
738
739 brw_ADD(p, g0, g0, g0);
740 brw_inst_set_src1_da1_subreg_nr(&devinfo, last_inst, 4);
741
742 EXPECT_FALSE(validate(p));
743
744 clear_instructions(p);
745
746 brw_ADD(p, g0, g0, g0);
747 brw_inst_set_src0_vstride(&devinfo, last_inst, BRW_VERTICAL_STRIDE_4);
748 brw_inst_set_src0_width(&devinfo, last_inst, BRW_WIDTH_4);
749 brw_inst_set_src0_hstride(&devinfo, last_inst, BRW_HORIZONTAL_STRIDE_2);
750
751 EXPECT_FALSE(validate(p));
752
753 clear_instructions(p);
754
755 brw_ADD(p, g0, g0, g0);
756 brw_inst_set_src1_vstride(&devinfo, last_inst, BRW_VERTICAL_STRIDE_4);
757 brw_inst_set_src1_width(&devinfo, last_inst, BRW_WIDTH_4);
758 brw_inst_set_src1_hstride(&devinfo, last_inst, BRW_HORIZONTAL_STRIDE_2);
759
760 EXPECT_FALSE(validate(p));
761 }
762
763 /* Destination Horizontal must be 1 in Align16 */
TEST_P(validation_test,dst_hstride_on_align16_must_be_1)764 TEST_P(validation_test, dst_hstride_on_align16_must_be_1)
765 {
766 /* Align16 does not exist on Gfx11+ */
767 if (devinfo.ver >= 11)
768 return;
769
770 brw_set_default_access_mode(p, BRW_ALIGN_16);
771
772 brw_ADD(p, g0, g0, g0);
773 brw_inst_set_dst_hstride(&devinfo, last_inst, BRW_HORIZONTAL_STRIDE_2);
774
775 EXPECT_FALSE(validate(p));
776
777 clear_instructions(p);
778
779 brw_ADD(p, g0, g0, g0);
780 brw_inst_set_dst_hstride(&devinfo, last_inst, BRW_HORIZONTAL_STRIDE_1);
781
782 EXPECT_TRUE(validate(p));
783 }
784
785 /* VertStride must be 0 or 4 in Align16 */
TEST_P(validation_test,vstride_on_align16_must_be_0_or_4)786 TEST_P(validation_test, vstride_on_align16_must_be_0_or_4)
787 {
788 /* Align16 does not exist on Gfx11+ */
789 if (devinfo.ver >= 11)
790 return;
791
792 const struct {
793 enum brw_vertical_stride vstride;
794 bool expected_result;
795 } vstride[] = {
796 { BRW_VERTICAL_STRIDE_0, true },
797 { BRW_VERTICAL_STRIDE_1, false },
798 { BRW_VERTICAL_STRIDE_2, devinfo.verx10 >= 75 },
799 { BRW_VERTICAL_STRIDE_4, true },
800 { BRW_VERTICAL_STRIDE_8, false },
801 { BRW_VERTICAL_STRIDE_16, false },
802 { BRW_VERTICAL_STRIDE_32, false },
803 { BRW_VERTICAL_STRIDE_ONE_DIMENSIONAL, false },
804 };
805
806 brw_set_default_access_mode(p, BRW_ALIGN_16);
807
808 for (unsigned i = 0; i < ARRAY_SIZE(vstride); i++) {
809 brw_ADD(p, g0, g0, g0);
810 brw_inst_set_src0_vstride(&devinfo, last_inst, vstride[i].vstride);
811
812 EXPECT_EQ(vstride[i].expected_result, validate(p));
813
814 clear_instructions(p);
815 }
816
817 for (unsigned i = 0; i < ARRAY_SIZE(vstride); i++) {
818 brw_ADD(p, g0, g0, g0);
819 brw_inst_set_src1_vstride(&devinfo, last_inst, vstride[i].vstride);
820
821 EXPECT_EQ(vstride[i].expected_result, validate(p));
822
823 clear_instructions(p);
824 }
825 }
826
827 /* In Direct Addressing mode, a source cannot span more than 2 adjacent FIXED_GRF
828 * registers.
829 */
TEST_P(validation_test,source_cannot_span_more_than_2_registers)830 TEST_P(validation_test, source_cannot_span_more_than_2_registers)
831 {
832 brw_ADD(p, g0, g0, g0);
833 brw_inst_set_exec_size(&devinfo, last_inst, BRW_EXECUTE_32);
834 brw_inst_set_dst_file_type(&devinfo, last_inst, FIXED_GRF, BRW_TYPE_W);
835 brw_inst_set_src0_file_type(&devinfo, last_inst, FIXED_GRF, BRW_TYPE_W);
836 brw_inst_set_src1_file_type(&devinfo, last_inst, FIXED_GRF, BRW_TYPE_W);
837 brw_inst_set_src1_vstride(&devinfo, last_inst, BRW_VERTICAL_STRIDE_16);
838 brw_inst_set_src1_width(&devinfo, last_inst, BRW_WIDTH_8);
839 brw_inst_set_src1_hstride(&devinfo, last_inst, BRW_HORIZONTAL_STRIDE_2);
840
841 EXPECT_FALSE(validate(p));
842
843 clear_instructions(p);
844
845 brw_ADD(p, g0, g0, g0);
846 brw_inst_set_exec_size(&devinfo, last_inst, BRW_EXECUTE_16);
847 brw_inst_set_dst_file_type(&devinfo, last_inst, FIXED_GRF, BRW_TYPE_W);
848 brw_inst_set_src0_file_type(&devinfo, last_inst, FIXED_GRF, BRW_TYPE_W);
849 brw_inst_set_src1_file_type(&devinfo, last_inst, FIXED_GRF, BRW_TYPE_W);
850 brw_inst_set_src1_vstride(&devinfo, last_inst, BRW_VERTICAL_STRIDE_16);
851 brw_inst_set_src1_width(&devinfo, last_inst, BRW_WIDTH_8);
852 brw_inst_set_src1_hstride(&devinfo, last_inst, BRW_HORIZONTAL_STRIDE_2);
853 brw_inst_set_src1_da1_subreg_nr(&devinfo, last_inst, 2);
854
855 EXPECT_TRUE(validate(p));
856
857 clear_instructions(p);
858
859 brw_ADD(p, g0, g0, g0);
860 brw_inst_set_exec_size(&devinfo, last_inst, BRW_EXECUTE_16);
861
862 EXPECT_TRUE(validate(p));
863 }
864
865 /* A destination cannot span more than 2 adjacent FIXED_GRF registers. */
TEST_P(validation_test,destination_cannot_span_more_than_2_registers)866 TEST_P(validation_test, destination_cannot_span_more_than_2_registers)
867 {
868 brw_ADD(p, g0, g0, g0);
869 brw_inst_set_exec_size(&devinfo, last_inst, BRW_EXECUTE_32);
870 brw_inst_set_dst_hstride(&devinfo, last_inst, BRW_HORIZONTAL_STRIDE_2);
871 brw_inst_set_dst_file_type(&devinfo, last_inst, FIXED_GRF, BRW_TYPE_W);
872 brw_inst_set_src0_file_type(&devinfo, last_inst, FIXED_GRF, BRW_TYPE_W);
873 brw_inst_set_src1_file_type(&devinfo, last_inst, FIXED_GRF, BRW_TYPE_W);
874
875 EXPECT_FALSE(validate(p));
876
877 clear_instructions(p);
878
879 brw_ADD(p, g0, g0, g0);
880 brw_inst_set_exec_size(&devinfo, last_inst, BRW_EXECUTE_8);
881 brw_inst_set_dst_da1_subreg_nr(&devinfo, last_inst, 6);
882 brw_inst_set_dst_hstride(&devinfo, last_inst, BRW_HORIZONTAL_STRIDE_4);
883 brw_inst_set_dst_file_type(&devinfo, last_inst, FIXED_GRF, BRW_TYPE_W);
884 brw_inst_set_src0_file_type(&devinfo, last_inst, FIXED_GRF, BRW_TYPE_W);
885 brw_inst_set_src0_vstride(&devinfo, last_inst, BRW_VERTICAL_STRIDE_16);
886 brw_inst_set_src0_width(&devinfo, last_inst, BRW_WIDTH_4);
887 brw_inst_set_src0_hstride(&devinfo, last_inst, BRW_HORIZONTAL_STRIDE_1);
888 brw_inst_set_src1_file_type(&devinfo, last_inst, FIXED_GRF, BRW_TYPE_W);
889 brw_inst_set_src1_vstride(&devinfo, last_inst, BRW_VERTICAL_STRIDE_16);
890 brw_inst_set_src1_width(&devinfo, last_inst, BRW_WIDTH_4);
891 brw_inst_set_src1_hstride(&devinfo, last_inst, BRW_HORIZONTAL_STRIDE_1);
892
893 EXPECT_TRUE(validate(p));
894 }
895
TEST_P(validation_test,src_region_spans_two_regs_dst_region_spans_one)896 TEST_P(validation_test, src_region_spans_two_regs_dst_region_spans_one)
897 {
898 /* Writes to dest are to the lower OWord */
899 brw_ADD(p, g0, g0, g0);
900 brw_inst_set_dst_file_type(&devinfo, last_inst, FIXED_GRF, BRW_TYPE_W);
901 brw_inst_set_src0_file_type(&devinfo, last_inst, FIXED_GRF, BRW_TYPE_W);
902 brw_inst_set_src1_file_type(&devinfo, last_inst, FIXED_GRF, BRW_TYPE_W);
903 brw_inst_set_src1_vstride(&devinfo, last_inst, BRW_VERTICAL_STRIDE_16);
904 brw_inst_set_src1_width(&devinfo, last_inst, BRW_WIDTH_4);
905 brw_inst_set_src1_hstride(&devinfo, last_inst, BRW_HORIZONTAL_STRIDE_2);
906
907 EXPECT_TRUE(validate(p));
908
909 clear_instructions(p);
910
911 /* Writes to dest are to the upper OWord */
912 brw_ADD(p, g0, g0, g0);
913 brw_inst_set_dst_da1_subreg_nr(&devinfo, last_inst, 16);
914 brw_inst_set_dst_file_type(&devinfo, last_inst, FIXED_GRF, BRW_TYPE_W);
915 brw_inst_set_src0_file_type(&devinfo, last_inst, FIXED_GRF, BRW_TYPE_W);
916 brw_inst_set_src1_file_type(&devinfo, last_inst, FIXED_GRF, BRW_TYPE_W);
917 brw_inst_set_src1_vstride(&devinfo, last_inst, BRW_VERTICAL_STRIDE_16);
918 brw_inst_set_src1_width(&devinfo, last_inst, BRW_WIDTH_4);
919 brw_inst_set_src1_hstride(&devinfo, last_inst, BRW_HORIZONTAL_STRIDE_2);
920
921 EXPECT_TRUE(validate(p));
922
923 clear_instructions(p);
924
925 /* Writes to dest are evenly split between OWords */
926 brw_ADD(p, g0, g0, g0);
927 brw_inst_set_exec_size(&devinfo, last_inst, BRW_EXECUTE_16);
928 brw_inst_set_dst_file_type(&devinfo, last_inst, FIXED_GRF, BRW_TYPE_W);
929 brw_inst_set_src0_file_type(&devinfo, last_inst, FIXED_GRF, BRW_TYPE_W);
930 brw_inst_set_src1_file_type(&devinfo, last_inst, FIXED_GRF, BRW_TYPE_W);
931 brw_inst_set_src1_vstride(&devinfo, last_inst, BRW_VERTICAL_STRIDE_16);
932 brw_inst_set_src1_width(&devinfo, last_inst, BRW_WIDTH_8);
933 brw_inst_set_src1_hstride(&devinfo, last_inst, BRW_HORIZONTAL_STRIDE_2);
934
935 EXPECT_TRUE(validate(p));
936
937 clear_instructions(p);
938
939 /* Writes to dest are uneven between OWords */
940 brw_ADD(p, g0, g0, g0);
941 brw_inst_set_exec_size(&devinfo, last_inst, BRW_EXECUTE_4);
942 brw_inst_set_dst_da1_subreg_nr(&devinfo, last_inst, 10);
943 brw_inst_set_dst_file_type(&devinfo, last_inst, FIXED_GRF, BRW_TYPE_W);
944 brw_inst_set_src0_file_type(&devinfo, last_inst, FIXED_GRF, BRW_TYPE_W);
945 brw_inst_set_src0_vstride(&devinfo, last_inst, BRW_VERTICAL_STRIDE_4);
946 brw_inst_set_src0_width(&devinfo, last_inst, BRW_WIDTH_4);
947 brw_inst_set_src0_hstride(&devinfo, last_inst, BRW_HORIZONTAL_STRIDE_1);
948 brw_inst_set_src1_file_type(&devinfo, last_inst, FIXED_GRF, BRW_TYPE_W);
949 brw_inst_set_src1_vstride(&devinfo, last_inst, BRW_VERTICAL_STRIDE_16);
950 brw_inst_set_src1_width(&devinfo, last_inst, BRW_WIDTH_2);
951 brw_inst_set_src1_hstride(&devinfo, last_inst, BRW_HORIZONTAL_STRIDE_1);
952
953 if (devinfo.ver >= 9) {
954 EXPECT_TRUE(validate(p));
955 } else {
956 EXPECT_FALSE(validate(p));
957 }
958 }
959
TEST_P(validation_test,dst_elements_must_be_evenly_split_between_registers)960 TEST_P(validation_test, dst_elements_must_be_evenly_split_between_registers)
961 {
962 brw_ADD(p, g0, g0, g0);
963 brw_inst_set_dst_da1_subreg_nr(&devinfo, last_inst, 4);
964
965 if (devinfo.ver >= 9 && devinfo.verx10 < 125) {
966 EXPECT_TRUE(validate(p));
967 } else {
968 EXPECT_FALSE(validate(p));
969 }
970
971 clear_instructions(p);
972
973 brw_ADD(p, g0, g0, g0);
974 brw_inst_set_exec_size(&devinfo, last_inst, BRW_EXECUTE_16);
975
976 EXPECT_TRUE(validate(p));
977
978 clear_instructions(p);
979
980 if (devinfo.ver >= 6) {
981 gfx6_math(p, g0, BRW_MATH_FUNCTION_SIN, g0, null);
982
983 EXPECT_TRUE(validate(p));
984
985 clear_instructions(p);
986
987 gfx6_math(p, g0, BRW_MATH_FUNCTION_SIN, g0, null);
988 brw_inst_set_dst_da1_subreg_nr(&devinfo, last_inst, 4);
989
990 EXPECT_FALSE(validate(p));
991 }
992 }
993
TEST_P(validation_test,two_src_two_dst_source_offsets_must_be_same)994 TEST_P(validation_test, two_src_two_dst_source_offsets_must_be_same)
995 {
996 brw_ADD(p, g0, g0, g0);
997 brw_inst_set_exec_size(&devinfo, last_inst, BRW_EXECUTE_4);
998 brw_inst_set_dst_hstride(&devinfo, last_inst, BRW_HORIZONTAL_STRIDE_4);
999 brw_inst_set_src0_da1_subreg_nr(&devinfo, last_inst, 16);
1000 brw_inst_set_src0_vstride(&devinfo, last_inst, BRW_VERTICAL_STRIDE_2);
1001 brw_inst_set_src0_width(&devinfo, last_inst, BRW_WIDTH_1);
1002 brw_inst_set_src0_hstride(&devinfo, last_inst, BRW_HORIZONTAL_STRIDE_0);
1003 brw_inst_set_src1_vstride(&devinfo, last_inst, BRW_VERTICAL_STRIDE_4);
1004 brw_inst_set_src1_width(&devinfo, last_inst, BRW_WIDTH_4);
1005 brw_inst_set_src1_hstride(&devinfo, last_inst, BRW_HORIZONTAL_STRIDE_1);
1006
1007 if (devinfo.ver <= 7 || devinfo.verx10 >= 125) {
1008 EXPECT_FALSE(validate(p));
1009 } else {
1010 EXPECT_TRUE(validate(p));
1011 }
1012
1013 clear_instructions(p);
1014
1015 brw_ADD(p, g0, g0, g0);
1016 brw_inst_set_exec_size(&devinfo, last_inst, BRW_EXECUTE_4);
1017 brw_inst_set_dst_hstride(&devinfo, last_inst, BRW_HORIZONTAL_STRIDE_4);
1018 brw_inst_set_src0_vstride(&devinfo, last_inst, BRW_VERTICAL_STRIDE_4);
1019 brw_inst_set_src0_width(&devinfo, last_inst, BRW_WIDTH_1);
1020 brw_inst_set_src0_hstride(&devinfo, last_inst, BRW_HORIZONTAL_STRIDE_0);
1021 brw_inst_set_src1_vstride(&devinfo, last_inst, BRW_VERTICAL_STRIDE_8);
1022 brw_inst_set_src1_width(&devinfo, last_inst, BRW_WIDTH_2);
1023 brw_inst_set_src1_hstride(&devinfo, last_inst, BRW_HORIZONTAL_STRIDE_1);
1024
1025 if (devinfo.verx10 >= 125)
1026 EXPECT_FALSE(validate(p));
1027 else
1028 EXPECT_TRUE(validate(p));
1029 }
1030
TEST_P(validation_test,two_src_two_dst_each_dst_must_be_derived_from_one_src)1031 TEST_P(validation_test, two_src_two_dst_each_dst_must_be_derived_from_one_src)
1032 {
1033 brw_MOV(p, g0, g0);
1034 brw_inst_set_exec_size(&devinfo, last_inst, BRW_EXECUTE_16);
1035 brw_inst_set_dst_file_type(&devinfo, last_inst, FIXED_GRF, BRW_TYPE_W);
1036 brw_inst_set_dst_hstride(&devinfo, last_inst, BRW_HORIZONTAL_STRIDE_2);
1037 brw_inst_set_src0_file_type(&devinfo, last_inst, FIXED_GRF, BRW_TYPE_W);
1038 brw_inst_set_src0_da1_subreg_nr(&devinfo, last_inst, 8);
1039 brw_inst_set_src0_vstride(&devinfo, last_inst, BRW_VERTICAL_STRIDE_4);
1040 brw_inst_set_src0_width(&devinfo, last_inst, BRW_WIDTH_4);
1041 brw_inst_set_src0_hstride(&devinfo, last_inst, BRW_HORIZONTAL_STRIDE_1);
1042
1043 if (devinfo.ver <= 7) {
1044 EXPECT_FALSE(validate(p));
1045 } else {
1046 EXPECT_TRUE(validate(p));
1047 }
1048
1049 clear_instructions(p);
1050
1051 brw_MOV(p, g0, g0);
1052 brw_inst_set_dst_da1_subreg_nr(&devinfo, last_inst, 16);
1053 brw_inst_set_src0_da1_subreg_nr(&devinfo, last_inst, 8);
1054 brw_inst_set_src0_vstride(&devinfo, last_inst, BRW_VERTICAL_STRIDE_2);
1055 brw_inst_set_src0_width(&devinfo, last_inst, BRW_WIDTH_2);
1056 brw_inst_set_src0_hstride(&devinfo, last_inst, BRW_HORIZONTAL_STRIDE_1);
1057
1058 if (devinfo.ver <= 7 || devinfo.verx10 >= 125) {
1059 EXPECT_FALSE(validate(p));
1060 } else {
1061 EXPECT_TRUE(validate(p));
1062 }
1063 }
1064
TEST_P(validation_test,one_src_two_dst)1065 TEST_P(validation_test, one_src_two_dst)
1066 {
1067 struct brw_reg g0_0 = brw_vec1_grf(0, 0);
1068
1069 brw_ADD(p, g0, g0_0, g0_0);
1070 brw_inst_set_exec_size(&devinfo, last_inst, BRW_EXECUTE_16);
1071
1072 EXPECT_TRUE(validate(p));
1073
1074 clear_instructions(p);
1075
1076 brw_ADD(p, g0, g0, g0);
1077 brw_inst_set_exec_size(&devinfo, last_inst, BRW_EXECUTE_16);
1078 brw_inst_set_dst_file_type(&devinfo, last_inst, FIXED_GRF, BRW_TYPE_D);
1079 brw_inst_set_src0_file_type(&devinfo, last_inst, FIXED_GRF, BRW_TYPE_W);
1080 brw_inst_set_src1_file_type(&devinfo, last_inst, FIXED_GRF, BRW_TYPE_D);
1081
1082 EXPECT_TRUE(validate(p));
1083
1084 clear_instructions(p);
1085
1086 brw_ADD(p, g0, g0, g0);
1087 brw_inst_set_exec_size(&devinfo, last_inst, BRW_EXECUTE_16);
1088 brw_inst_set_dst_file_type(&devinfo, last_inst, FIXED_GRF, BRW_TYPE_D);
1089 brw_inst_set_src0_file_type(&devinfo, last_inst, FIXED_GRF, BRW_TYPE_D);
1090 brw_inst_set_src1_file_type(&devinfo, last_inst, FIXED_GRF, BRW_TYPE_W);
1091
1092 if (devinfo.ver >= 8) {
1093 EXPECT_TRUE(validate(p));
1094 } else {
1095 EXPECT_FALSE(validate(p));
1096 }
1097
1098 clear_instructions(p);
1099
1100 brw_ADD(p, g0, g0, g0);
1101 brw_inst_set_exec_size(&devinfo, last_inst, BRW_EXECUTE_16);
1102 brw_inst_set_dst_file_type(&devinfo, last_inst, FIXED_GRF, BRW_TYPE_D);
1103 brw_inst_set_src0_file_type(&devinfo, last_inst, FIXED_GRF, BRW_TYPE_W);
1104 brw_inst_set_src1_file_type(&devinfo, last_inst, FIXED_GRF, BRW_TYPE_W);
1105
1106 if (devinfo.ver >= 8) {
1107 EXPECT_TRUE(validate(p));
1108 } else {
1109 EXPECT_FALSE(validate(p));
1110 }
1111
1112 clear_instructions(p);
1113
1114 brw_ADD(p, g0, g0, g0);
1115 brw_inst_set_exec_size(&devinfo, last_inst, BRW_EXECUTE_16);
1116 brw_inst_set_dst_hstride(&devinfo, last_inst, BRW_HORIZONTAL_STRIDE_2);
1117 brw_inst_set_dst_file_type(&devinfo, last_inst, FIXED_GRF, BRW_TYPE_W);
1118 brw_inst_set_src0_file_type(&devinfo, last_inst, FIXED_GRF, BRW_TYPE_W);
1119 brw_inst_set_src1_file_type(&devinfo, last_inst, FIXED_GRF, BRW_TYPE_W);
1120 brw_inst_set_src1_vstride(&devinfo, last_inst, BRW_VERTICAL_STRIDE_0);
1121 brw_inst_set_src1_width(&devinfo, last_inst, BRW_WIDTH_1);
1122 brw_inst_set_src1_hstride(&devinfo, last_inst, BRW_HORIZONTAL_STRIDE_0);
1123
1124 if (devinfo.ver >= 8) {
1125 EXPECT_TRUE(validate(p));
1126 } else {
1127 EXPECT_FALSE(validate(p));
1128 }
1129
1130 clear_instructions(p);
1131
1132 brw_ADD(p, g0, g0, g0);
1133 brw_inst_set_exec_size(&devinfo, last_inst, BRW_EXECUTE_16);
1134 brw_inst_set_dst_hstride(&devinfo, last_inst, BRW_HORIZONTAL_STRIDE_2);
1135 brw_inst_set_dst_file_type(&devinfo, last_inst, FIXED_GRF, BRW_TYPE_W);
1136 brw_inst_set_src0_file_type(&devinfo, last_inst, FIXED_GRF, BRW_TYPE_W);
1137 brw_inst_set_src0_vstride(&devinfo, last_inst, BRW_VERTICAL_STRIDE_0);
1138 brw_inst_set_src0_width(&devinfo, last_inst, BRW_WIDTH_1);
1139 brw_inst_set_src0_hstride(&devinfo, last_inst, BRW_HORIZONTAL_STRIDE_0);
1140 brw_inst_set_src1_file_type(&devinfo, last_inst, FIXED_GRF, BRW_TYPE_W);
1141
1142 if (devinfo.ver >= 8) {
1143 EXPECT_TRUE(validate(p));
1144 } else {
1145 EXPECT_FALSE(validate(p));
1146 }
1147 }
1148
TEST_P(validation_test,packed_byte_destination)1149 TEST_P(validation_test, packed_byte_destination)
1150 {
1151 static const struct {
1152 enum brw_reg_type dst_type;
1153 enum brw_reg_type src_type;
1154 bool neg, abs, sat;
1155 bool expected_result;
1156 } move[] = {
1157 { BRW_TYPE_UB, BRW_TYPE_UB, 0, 0, 0, true },
1158 { BRW_TYPE_B , BRW_TYPE_B , 0, 0, 0, true },
1159 { BRW_TYPE_UB, BRW_TYPE_B , 0, 0, 0, true },
1160 { BRW_TYPE_B , BRW_TYPE_UB, 0, 0, 0, true },
1161
1162 { BRW_TYPE_UB, BRW_TYPE_UB, 1, 0, 0, false },
1163 { BRW_TYPE_B , BRW_TYPE_B , 1, 0, 0, false },
1164 { BRW_TYPE_UB, BRW_TYPE_B , 1, 0, 0, false },
1165 { BRW_TYPE_B , BRW_TYPE_UB, 1, 0, 0, false },
1166
1167 { BRW_TYPE_UB, BRW_TYPE_UB, 0, 1, 0, false },
1168 { BRW_TYPE_B , BRW_TYPE_B , 0, 1, 0, false },
1169 { BRW_TYPE_UB, BRW_TYPE_B , 0, 1, 0, false },
1170 { BRW_TYPE_B , BRW_TYPE_UB, 0, 1, 0, false },
1171
1172 { BRW_TYPE_UB, BRW_TYPE_UB, 0, 0, 1, false },
1173 { BRW_TYPE_B , BRW_TYPE_B , 0, 0, 1, false },
1174 { BRW_TYPE_UB, BRW_TYPE_B , 0, 0, 1, false },
1175 { BRW_TYPE_B , BRW_TYPE_UB, 0, 0, 1, false },
1176
1177 { BRW_TYPE_UB, BRW_TYPE_UW, 0, 0, 0, false },
1178 { BRW_TYPE_B , BRW_TYPE_W , 0, 0, 0, false },
1179 { BRW_TYPE_UB, BRW_TYPE_UD, 0, 0, 0, false },
1180 { BRW_TYPE_B , BRW_TYPE_D , 0, 0, 0, false },
1181 };
1182
1183 for (unsigned i = 0; i < ARRAY_SIZE(move); i++) {
1184 brw_MOV(p, retype(g0, move[i].dst_type), retype(g0, move[i].src_type));
1185 brw_inst_set_src0_negate(&devinfo, last_inst, move[i].neg);
1186 brw_inst_set_src0_abs(&devinfo, last_inst, move[i].abs);
1187 brw_inst_set_saturate(&devinfo, last_inst, move[i].sat);
1188
1189 EXPECT_EQ(move[i].expected_result, validate(p));
1190
1191 clear_instructions(p);
1192 }
1193
1194 brw_SEL(p, retype(g0, BRW_TYPE_UB),
1195 retype(g0, BRW_TYPE_UB),
1196 retype(g0, BRW_TYPE_UB));
1197 brw_inst_set_pred_control(&devinfo, last_inst, BRW_PREDICATE_NORMAL);
1198
1199 EXPECT_FALSE(validate(p));
1200
1201 clear_instructions(p);
1202
1203 brw_SEL(p, retype(g0, BRW_TYPE_B),
1204 retype(g0, BRW_TYPE_B),
1205 retype(g0, BRW_TYPE_B));
1206 brw_inst_set_pred_control(&devinfo, last_inst, BRW_PREDICATE_NORMAL);
1207
1208 EXPECT_FALSE(validate(p));
1209 }
1210
TEST_P(validation_test,byte_destination_relaxed_alignment)1211 TEST_P(validation_test, byte_destination_relaxed_alignment)
1212 {
1213 brw_SEL(p, retype(g0, BRW_TYPE_B),
1214 retype(g0, BRW_TYPE_W),
1215 retype(g0, BRW_TYPE_W));
1216 brw_inst_set_pred_control(&devinfo, last_inst, BRW_PREDICATE_NORMAL);
1217 brw_inst_set_dst_hstride(&devinfo, last_inst, BRW_HORIZONTAL_STRIDE_2);
1218
1219 EXPECT_TRUE(validate(p));
1220
1221 clear_instructions(p);
1222
1223 brw_SEL(p, retype(g0, BRW_TYPE_B),
1224 retype(g0, BRW_TYPE_W),
1225 retype(g0, BRW_TYPE_W));
1226 brw_inst_set_pred_control(&devinfo, last_inst, BRW_PREDICATE_NORMAL);
1227 brw_inst_set_dst_hstride(&devinfo, last_inst, BRW_HORIZONTAL_STRIDE_2);
1228 brw_inst_set_dst_da1_subreg_nr(&devinfo, last_inst, 1);
1229
1230 EXPECT_TRUE(validate(p));
1231 }
1232
TEST_P(validation_test,byte_64bit_conversion)1233 TEST_P(validation_test, byte_64bit_conversion)
1234 {
1235 static const struct {
1236 enum brw_reg_type dst_type;
1237 enum brw_reg_type src_type;
1238 unsigned dst_stride;
1239 bool expected_result;
1240 } inst[] = {
1241 #define INST(dst_type, src_type, dst_stride, expected_result) \
1242 { \
1243 BRW_TYPE_##dst_type, \
1244 BRW_TYPE_##src_type, \
1245 BRW_HORIZONTAL_STRIDE_##dst_stride, \
1246 expected_result, \
1247 }
1248
1249 INST(B, Q, 1, false),
1250 INST(B, UQ, 1, false),
1251 INST(B, DF, 1, false),
1252 INST(UB, Q, 1, false),
1253 INST(UB, UQ, 1, false),
1254 INST(UB, DF, 1, false),
1255
1256 INST(B, Q, 2, false),
1257 INST(B, UQ, 2, false),
1258 INST(B , DF, 2, false),
1259 INST(UB, Q, 2, false),
1260 INST(UB, UQ, 2, false),
1261 INST(UB, DF, 2, false),
1262
1263 INST(B, Q, 4, false),
1264 INST(B, UQ, 4, false),
1265 INST(B, DF, 4, false),
1266 INST(UB, Q, 4, false),
1267 INST(UB, UQ, 4, false),
1268 INST(UB, DF, 4, false),
1269
1270 #undef INST
1271 };
1272
1273 if (devinfo.ver < 8)
1274 return;
1275
1276 for (unsigned i = 0; i < ARRAY_SIZE(inst); i++) {
1277 if (!devinfo.has_64bit_float &&
1278 inst[i].src_type == BRW_TYPE_DF)
1279 continue;
1280
1281 if (!devinfo.has_64bit_int &&
1282 (inst[i].src_type == BRW_TYPE_Q ||
1283 inst[i].src_type == BRW_TYPE_UQ))
1284 continue;
1285
1286 brw_MOV(p, retype(g0, inst[i].dst_type), retype(g0, inst[i].src_type));
1287 brw_inst_set_dst_hstride(&devinfo, last_inst, inst[i].dst_stride);
1288 EXPECT_EQ(inst[i].expected_result, validate(p));
1289
1290 clear_instructions(p);
1291 }
1292 }
1293
TEST_P(validation_test,half_float_conversion)1294 TEST_P(validation_test, half_float_conversion)
1295 {
1296 static const struct {
1297 enum brw_reg_type dst_type;
1298 enum brw_reg_type src_type;
1299 unsigned dst_stride;
1300 unsigned dst_subnr;
1301 bool expected_result_gfx9;
1302 bool expected_result_gfx125;
1303 } inst[] = {
1304 #define INST(dst_type, src_type, dst_stride, dst_subnr, \
1305 expected_result_gfx9, \
1306 expected_result_gfx125) \
1307 { \
1308 BRW_TYPE_##dst_type, \
1309 BRW_TYPE_##src_type, \
1310 BRW_HORIZONTAL_STRIDE_##dst_stride, \
1311 dst_subnr, \
1312 expected_result_gfx9, \
1313 expected_result_gfx125, \
1314 }
1315
1316 /* MOV to half-float destination */
1317 INST(HF, B, 1, 0, false, false), /* 0 */
1318 INST(HF, W, 1, 0, false, false),
1319 INST(HF, HF, 1, 0, true, true),
1320 INST(HF, HF, 1, 2, true, false),
1321 INST(HF, D, 1, 0, false, false),
1322 INST(HF, F, 1, 0, true, false),
1323 INST(HF, Q, 1, 0, false, false),
1324 INST(HF, B, 2, 0, true, false),
1325 INST(HF, B, 2, 2, false, false),
1326 INST(HF, W, 2, 0, true, false),
1327 INST(HF, W, 2, 2, false, false), /* 10 */
1328 INST(HF, HF, 2, 0, true, false),
1329 INST(HF, HF, 2, 2, true, false),
1330 INST(HF, D, 2, 0, true, true),
1331 INST(HF, D, 2, 2, false, false),
1332 INST(HF, F, 2, 0, true, true),
1333 INST(HF, F, 2, 2, true, false),
1334 INST(HF, Q, 2, 0, false, false),
1335 INST(HF, DF, 2, 0, false, false),
1336 INST(HF, B, 4, 0, false, false),
1337 INST(HF, W, 4, 0, false, false), /* 20 */
1338 INST(HF, HF, 4, 0, true, false),
1339 INST(HF, HF, 4, 2, true, false),
1340 INST(HF, D, 4, 0, false, false),
1341 INST(HF, F, 4, 0, false, false),
1342 INST(HF, Q, 4, 0, false, false),
1343 INST(HF, DF, 4, 0, false, false),
1344
1345 /* MOV from half-float source */
1346 INST( B, HF, 1, 0, false, false),
1347 INST( W, HF, 1, 0, false, false),
1348 INST( D, HF, 1, 0, true, true),
1349 INST( D, HF, 1, 4, true, true), /* 30 */
1350 INST( F, HF, 1, 0, true, false),
1351 INST( F, HF, 1, 4, true, false),
1352 INST( Q, HF, 1, 0, false, false),
1353 INST(DF, HF, 1, 0, false, false),
1354 INST( B, HF, 2, 0, false, false),
1355 INST( W, HF, 2, 0, true, true),
1356 INST( W, HF, 2, 2, false, false),
1357 INST( D, HF, 2, 0, false, false),
1358 INST( F, HF, 2, 0, true, false),
1359 INST( B, HF, 4, 0, true, true), /* 40 */
1360 INST( B, HF, 4, 1, false, false),
1361 INST( W, HF, 4, 0, false, false),
1362
1363 #undef INST
1364 };
1365
1366 for (unsigned i = 0; i < ARRAY_SIZE(inst); i++) {
1367 if (!devinfo.has_64bit_float &&
1368 (inst[i].dst_type == BRW_TYPE_DF ||
1369 inst[i].src_type == BRW_TYPE_DF))
1370 continue;
1371
1372 if (!devinfo.has_64bit_int &&
1373 (inst[i].dst_type == BRW_TYPE_Q ||
1374 inst[i].dst_type == BRW_TYPE_UQ ||
1375 inst[i].src_type == BRW_TYPE_Q ||
1376 inst[i].src_type == BRW_TYPE_UQ))
1377 continue;
1378
1379 brw_MOV(p, retype(g0, inst[i].dst_type), retype(g0, inst[i].src_type));
1380
1381 brw_inst_set_exec_size(&devinfo, last_inst, BRW_EXECUTE_4);
1382
1383 brw_inst_set_dst_hstride(&devinfo, last_inst, inst[i].dst_stride);
1384 brw_inst_set_dst_da1_subreg_nr(&devinfo, last_inst, inst[i].dst_subnr);
1385
1386 if (inst[i].src_type == BRW_TYPE_B) {
1387 brw_inst_set_src0_vstride(&devinfo, last_inst, BRW_VERTICAL_STRIDE_4);
1388 brw_inst_set_src0_width(&devinfo, last_inst, BRW_WIDTH_2);
1389 brw_inst_set_src0_hstride(&devinfo, last_inst, BRW_HORIZONTAL_STRIDE_2);
1390 } else {
1391 brw_inst_set_src0_vstride(&devinfo, last_inst, BRW_VERTICAL_STRIDE_4);
1392 brw_inst_set_src0_width(&devinfo, last_inst, BRW_WIDTH_4);
1393 brw_inst_set_src0_hstride(&devinfo, last_inst, BRW_HORIZONTAL_STRIDE_1);
1394 }
1395
1396 if (devinfo.verx10 >= 125) {
1397 EXPECT_EQ(inst[i].expected_result_gfx125, validate(p)) <<
1398 "Failing test is: " << i;
1399 } else {
1400 EXPECT_EQ(inst[i].expected_result_gfx9, validate(p)) <<
1401 "Failing test is: " << i;
1402 }
1403
1404 clear_instructions(p);
1405 }
1406 }
1407
TEST_P(validation_test,mixed_float_source_indirect_addressing)1408 TEST_P(validation_test, mixed_float_source_indirect_addressing)
1409 {
1410 static const struct {
1411 enum brw_reg_type dst_type;
1412 enum brw_reg_type src0_type;
1413 enum brw_reg_type src1_type;
1414 unsigned dst_stride;
1415 bool dst_indirect;
1416 bool src0_indirect;
1417 bool expected_result;
1418 bool gfx125_expected_result;
1419 } inst[] = {
1420 #define INST(dst_type, src0_type, src1_type, \
1421 dst_stride, dst_indirect, src0_indirect, expected_result, \
1422 gfx125_expected_result) \
1423 { \
1424 BRW_TYPE_##dst_type, \
1425 BRW_TYPE_##src0_type, \
1426 BRW_TYPE_##src1_type, \
1427 BRW_HORIZONTAL_STRIDE_##dst_stride, \
1428 dst_indirect, \
1429 src0_indirect, \
1430 expected_result, \
1431 gfx125_expected_result, \
1432 }
1433
1434 /* Source and dest are mixed float: indirect src addressing not allowed */
1435 INST(HF, F, F, 2, false, false, true, true),
1436 INST(HF, F, F, 2, true, false, true, true),
1437 INST(HF, F, F, 2, false, true, false, false),
1438 INST(HF, F, F, 2, true, true, false, false),
1439 INST( F, HF, F, 1, false, false, true, false),
1440 INST( F, HF, F, 1, true, false, true, false),
1441 INST( F, HF, F, 1, false, true, false, false),
1442 INST( F, HF, F, 1, true, true, false, false),
1443
1444 INST(HF, HF, F, 2, false, false, true, false),
1445 INST(HF, HF, F, 2, true, false, true, false),
1446 INST(HF, HF, F, 2, false, true, false, false),
1447 INST(HF, HF, F, 2, true, true, false, false),
1448 INST( F, F, HF, 1, false, false, true, false),
1449 INST( F, F, HF, 1, true, false, true, false),
1450 INST( F, F, HF, 1, false, true, false, false),
1451 INST( F, F, HF, 1, true, true, false, false),
1452
1453 #undef INST
1454 };
1455
1456 if (devinfo.ver < 8)
1457 return;
1458
1459 for (unsigned i = 0; i < ARRAY_SIZE(inst); i++) {
1460 brw_ADD(p, retype(g0, inst[i].dst_type),
1461 retype(g0, inst[i].src0_type),
1462 retype(g0, inst[i].src1_type));
1463
1464 brw_inst_set_dst_address_mode(&devinfo, last_inst, inst[i].dst_indirect);
1465 brw_inst_set_dst_hstride(&devinfo, last_inst, inst[i].dst_stride);
1466 brw_inst_set_src0_address_mode(&devinfo, last_inst, inst[i].src0_indirect);
1467
1468 if (devinfo.verx10 >= 125) {
1469 EXPECT_EQ(inst[i].gfx125_expected_result, validate(p));
1470 } else {
1471 EXPECT_EQ(inst[i].expected_result, validate(p));
1472 }
1473
1474 clear_instructions(p);
1475 }
1476 }
1477
TEST_P(validation_test,mixed_float_align1_simd16)1478 TEST_P(validation_test, mixed_float_align1_simd16)
1479 {
1480 static const struct {
1481 unsigned exec_size;
1482 enum brw_reg_type dst_type;
1483 enum brw_reg_type src0_type;
1484 enum brw_reg_type src1_type;
1485 unsigned dst_stride;
1486 bool expected_result;
1487 bool gfx125_expected_result;
1488 } inst[] = {
1489 #define INST(exec_size, dst_type, src0_type, src1_type, \
1490 dst_stride, expected_result, gfx125_expected_result) \
1491 { \
1492 BRW_EXECUTE_##exec_size, \
1493 BRW_TYPE_##dst_type, \
1494 BRW_TYPE_##src0_type, \
1495 BRW_TYPE_##src1_type, \
1496 BRW_HORIZONTAL_STRIDE_##dst_stride, \
1497 expected_result, \
1498 gfx125_expected_result, \
1499 }
1500
1501 /* No SIMD16 in mixed mode when destination is packed f16 */
1502 INST( 8, HF, F, HF, 2, true, false),
1503 INST(16, HF, HF, F, 2, true, false),
1504 INST(16, HF, HF, F, 1, false, false),
1505 INST(16, HF, F, HF, 1, false, false),
1506
1507 /* No SIMD16 in mixed mode when destination is f32 */
1508 INST( 8, F, HF, F, 1, true, false),
1509 INST( 8, F, F, HF, 1, true, false),
1510 INST(16, F, HF, F, 1, false, false),
1511 INST(16, F, F, HF, 1, false, false),
1512
1513 #undef INST
1514 };
1515
1516 if (devinfo.ver < 8)
1517 return;
1518
1519 for (unsigned i = 0; i < ARRAY_SIZE(inst); i++) {
1520 brw_ADD(p, retype(g0, inst[i].dst_type),
1521 retype(g0, inst[i].src0_type),
1522 retype(g0, inst[i].src1_type));
1523
1524 brw_inst_set_exec_size(&devinfo, last_inst, inst[i].exec_size);
1525
1526 brw_inst_set_dst_hstride(&devinfo, last_inst, inst[i].dst_stride);
1527
1528 if (devinfo.verx10 >= 125) {
1529 EXPECT_EQ(inst[i].gfx125_expected_result, validate(p));
1530 } else {
1531 EXPECT_EQ(inst[i].expected_result, validate(p));
1532 }
1533
1534 clear_instructions(p);
1535 }
1536 }
1537
TEST_P(validation_test,mixed_float_align1_packed_fp16_dst_acc_read_offset_0)1538 TEST_P(validation_test, mixed_float_align1_packed_fp16_dst_acc_read_offset_0)
1539 {
1540 static const struct {
1541 enum brw_reg_type dst_type;
1542 enum brw_reg_type src0_type;
1543 enum brw_reg_type src1_type;
1544 unsigned dst_stride;
1545 bool read_acc;
1546 unsigned subnr;
1547 bool expected_result_bdw;
1548 bool expected_result_chv_skl;
1549 bool expected_result_gfx125;
1550 } inst[] = {
1551 #define INST(dst_type, src0_type, src1_type, dst_stride, read_acc, subnr, \
1552 expected_result_bdw, expected_result_chv_skl, \
1553 expected_result_gfx125) \
1554 { \
1555 BRW_TYPE_##dst_type, \
1556 BRW_TYPE_##src0_type, \
1557 BRW_TYPE_##src1_type, \
1558 BRW_HORIZONTAL_STRIDE_##dst_stride, \
1559 read_acc, \
1560 subnr, \
1561 expected_result_bdw, \
1562 expected_result_chv_skl, \
1563 expected_result_gfx125, \
1564 }
1565
1566 /* Destination is not packed */
1567 INST(HF, HF, F, 2, true, 0, true, true, false),
1568 INST(HF, HF, F, 2, true, 2, true, true, false),
1569 INST(HF, HF, F, 2, true, 4, true, true, false),
1570 INST(HF, HF, F, 2, true, 8, true, true, false),
1571 INST(HF, HF, F, 2, true, 16, true, true, false),
1572
1573 /* Destination is packed, we don't read acc */
1574 INST(HF, HF, F, 1, false, 0, false, true, false),
1575 INST(HF, HF, F, 1, false, 2, false, true, false),
1576 INST(HF, HF, F, 1, false, 4, false, true, false),
1577 INST(HF, HF, F, 1, false, 8, false, true, false),
1578 INST(HF, HF, F, 1, false, 16, false, true, false),
1579
1580 /* Destination is packed, we read acc */
1581 INST(HF, HF, F, 1, true, 0, false, false, false),
1582 INST(HF, HF, F, 1, true, 2, false, false, false),
1583 INST(HF, HF, F, 1, true, 4, false, false, false),
1584 INST(HF, HF, F, 1, true, 8, false, false, false),
1585 INST(HF, HF, F, 1, true, 16, false, false, false),
1586
1587 #undef INST
1588 };
1589
1590 if (devinfo.ver < 8)
1591 return;
1592
1593 for (unsigned i = 0; i < ARRAY_SIZE(inst); i++) {
1594 brw_ADD(p, retype(g0, inst[i].dst_type),
1595 retype(inst[i].read_acc ? acc0 : g0, inst[i].src0_type),
1596 retype(g0, inst[i].src1_type));
1597
1598 brw_inst_set_dst_hstride(&devinfo, last_inst, inst[i].dst_stride);
1599
1600 brw_inst_set_src0_da1_subreg_nr(&devinfo, last_inst, inst[i].subnr);
1601
1602 if (devinfo.verx10 >= 125)
1603 EXPECT_EQ(inst[i].expected_result_gfx125, validate(p));
1604 else if (devinfo.platform == INTEL_PLATFORM_CHV || devinfo.ver >= 9)
1605 EXPECT_EQ(inst[i].expected_result_chv_skl, validate(p));
1606 else
1607 EXPECT_EQ(inst[i].expected_result_bdw, validate(p));
1608
1609 clear_instructions(p);
1610 }
1611 }
1612
TEST_P(validation_test,mixed_float_fp16_dest_with_acc)1613 TEST_P(validation_test, mixed_float_fp16_dest_with_acc)
1614 {
1615 static const struct {
1616 unsigned exec_size;
1617 unsigned opcode;
1618 enum brw_reg_type dst_type;
1619 enum brw_reg_type src0_type;
1620 enum brw_reg_type src1_type;
1621 unsigned dst_stride;
1622 bool read_acc;
1623 bool expected_result_bdw;
1624 bool expected_result_chv_skl;
1625 bool expected_result_gfx125;
1626 } inst[] = {
1627 #define INST(exec_size, opcode, dst_type, src0_type, src1_type, \
1628 dst_stride, read_acc,expected_result_bdw, \
1629 expected_result_chv_skl, expected_result_gfx125) \
1630 { \
1631 BRW_EXECUTE_##exec_size, \
1632 BRW_OPCODE_##opcode, \
1633 BRW_TYPE_##dst_type, \
1634 BRW_TYPE_##src0_type, \
1635 BRW_TYPE_##src1_type, \
1636 BRW_HORIZONTAL_STRIDE_##dst_stride, \
1637 read_acc, \
1638 expected_result_bdw, \
1639 expected_result_chv_skl, \
1640 expected_result_gfx125, \
1641 }
1642
1643 /* Packed fp16 dest with implicit acc needs hstride=2 */
1644 INST(8, MAC, HF, HF, F, 1, false, false, false, false),
1645 INST(8, MAC, HF, HF, F, 2, false, true, true, false),
1646 INST(8, MAC, HF, F, HF, 1, false, false, false, false),
1647 INST(8, MAC, HF, F, HF, 2, false, true, true, false),
1648
1649 /* Packed fp16 dest with explicit acc needs hstride=2 */
1650 INST(8, ADD, HF, HF, F, 1, true, false, false, false),
1651 INST(8, ADD, HF, HF, F, 2, true, true, true, false),
1652 INST(8, ADD, HF, F, HF, 1, true, false, false, false),
1653 INST(8, ADD, HF, F, HF, 2, true, true, true, false),
1654
1655 /* If destination is not fp16, restriction doesn't apply */
1656 INST(8, MAC, F, HF, F, 1, false, true, true, false),
1657 INST(8, MAC, F, HF, F, 2, false, true, true, false),
1658
1659 /* If there is no implicit/explicit acc, restriction doesn't apply */
1660 INST(8, ADD, HF, HF, F, 1, false, false, true, false),
1661 INST(8, ADD, HF, HF, F, 2, false, true, true, false),
1662 INST(8, ADD, HF, F, HF, 1, false, false, true, false),
1663 INST(8, ADD, HF, F, HF, 2, false, true, true, false),
1664 INST(8, ADD, F, HF, F, 1, false, true, true, false),
1665 INST(8, ADD, F, HF, F, 2, false, true, true, false),
1666
1667 #undef INST
1668 };
1669
1670 if (devinfo.ver < 8)
1671 return;
1672
1673 for (unsigned i = 0; i < ARRAY_SIZE(inst); i++) {
1674 if (inst[i].opcode == BRW_OPCODE_MAC) {
1675 brw_MAC(p, retype(g0, inst[i].dst_type),
1676 retype(g0, inst[i].src0_type),
1677 retype(g0, inst[i].src1_type));
1678 } else {
1679 assert(inst[i].opcode == BRW_OPCODE_ADD);
1680 brw_ADD(p, retype(g0, inst[i].dst_type),
1681 retype(inst[i].read_acc ? acc0: g0, inst[i].src0_type),
1682 retype(g0, inst[i].src1_type));
1683 }
1684
1685 brw_inst_set_exec_size(&devinfo, last_inst, inst[i].exec_size);
1686
1687 brw_inst_set_dst_hstride(&devinfo, last_inst, inst[i].dst_stride);
1688
1689 if (devinfo.verx10 >= 125)
1690 EXPECT_EQ(inst[i].expected_result_gfx125, validate(p));
1691 else if (devinfo.platform == INTEL_PLATFORM_CHV || devinfo.ver >= 9)
1692 EXPECT_EQ(inst[i].expected_result_chv_skl, validate(p));
1693 else
1694 EXPECT_EQ(inst[i].expected_result_bdw, validate(p));
1695
1696 clear_instructions(p);
1697 }
1698 }
1699
TEST_P(validation_test,mixed_float_align1_math_strided_fp16_inputs)1700 TEST_P(validation_test, mixed_float_align1_math_strided_fp16_inputs)
1701 {
1702 static const struct {
1703 enum brw_reg_type dst_type;
1704 enum brw_reg_type src0_type;
1705 enum brw_reg_type src1_type;
1706 unsigned dst_stride;
1707 unsigned src0_stride;
1708 unsigned src1_stride;
1709 bool expected_result;
1710 bool expected_result_gfx125;
1711 } inst[] = {
1712 #define INST(dst_type, src0_type, src1_type, \
1713 dst_stride, src0_stride, src1_stride, expected_result, \
1714 expected_result_125) \
1715 { \
1716 BRW_TYPE_##dst_type, \
1717 BRW_TYPE_##src0_type, \
1718 BRW_TYPE_##src1_type, \
1719 BRW_HORIZONTAL_STRIDE_##dst_stride, \
1720 BRW_HORIZONTAL_STRIDE_##src0_stride, \
1721 BRW_HORIZONTAL_STRIDE_##src1_stride, \
1722 expected_result, \
1723 expected_result_125, \
1724 }
1725
1726 INST(HF, HF, F, 2, 2, 1, true, false),
1727 INST(HF, F, HF, 2, 1, 2, true, false),
1728 INST(HF, F, HF, 1, 1, 2, true, false),
1729 INST(HF, F, HF, 2, 1, 1, false, false),
1730 INST(HF, HF, F, 2, 1, 1, false, false),
1731 INST(HF, HF, F, 1, 1, 1, false, false),
1732 INST(HF, HF, F, 2, 1, 1, false, false),
1733 INST( F, HF, F, 1, 1, 1, false, false),
1734 INST( F, F, HF, 1, 1, 2, true, false),
1735 INST( F, HF, HF, 1, 2, 1, false, false),
1736 INST( F, HF, HF, 1, 2, 2, true, false),
1737
1738 #undef INST
1739 };
1740
1741 /* No half-float math in gfx8 */
1742 if (devinfo.ver < 9)
1743 return;
1744
1745 for (unsigned i = 0; i < ARRAY_SIZE(inst); i++) {
1746 gfx6_math(p, retype(g0, inst[i].dst_type),
1747 BRW_MATH_FUNCTION_POW,
1748 retype(g0, inst[i].src0_type),
1749 retype(g0, inst[i].src1_type));
1750
1751 brw_inst_set_dst_hstride(&devinfo, last_inst, inst[i].dst_stride);
1752
1753 brw_inst_set_src0_vstride(&devinfo, last_inst, BRW_VERTICAL_STRIDE_4);
1754 brw_inst_set_src0_width(&devinfo, last_inst, BRW_WIDTH_4);
1755 brw_inst_set_src0_hstride(&devinfo, last_inst, inst[i].src0_stride);
1756
1757 brw_inst_set_src1_vstride(&devinfo, last_inst, BRW_VERTICAL_STRIDE_4);
1758 brw_inst_set_src1_width(&devinfo, last_inst, BRW_WIDTH_4);
1759 brw_inst_set_src1_hstride(&devinfo, last_inst, inst[i].src1_stride);
1760
1761 if (devinfo.verx10 >= 125)
1762 EXPECT_EQ(inst[i].expected_result_gfx125, validate(p));
1763 else
1764 EXPECT_EQ(inst[i].expected_result, validate(p));
1765
1766 clear_instructions(p);
1767 }
1768 }
1769
TEST_P(validation_test,mixed_float_align1_packed_fp16_dst)1770 TEST_P(validation_test, mixed_float_align1_packed_fp16_dst)
1771 {
1772 static const struct {
1773 unsigned exec_size;
1774 enum brw_reg_type dst_type;
1775 enum brw_reg_type src0_type;
1776 enum brw_reg_type src1_type;
1777 unsigned dst_stride;
1778 unsigned dst_subnr;
1779 bool expected_result_bdw;
1780 bool expected_result_chv_skl;
1781 bool expected_result_gfx125;
1782 } inst[] = {
1783 #define INST(exec_size, dst_type, src0_type, src1_type, dst_stride, dst_subnr, \
1784 expected_result_bdw, expected_result_chv_skl, \
1785 expected_result_gfx125) \
1786 { \
1787 BRW_EXECUTE_##exec_size, \
1788 BRW_TYPE_##dst_type, \
1789 BRW_TYPE_##src0_type, \
1790 BRW_TYPE_##src1_type, \
1791 BRW_HORIZONTAL_STRIDE_##dst_stride, \
1792 dst_subnr, \
1793 expected_result_bdw, \
1794 expected_result_chv_skl, \
1795 expected_result_gfx125 \
1796 }
1797
1798 /* SIMD8 packed fp16 dst won't cross oword boundaries if region is
1799 * oword-aligned
1800 */
1801 INST( 8, HF, HF, F, 1, 0, false, true, false),
1802 INST( 8, HF, HF, F, 1, 2, false, false, false),
1803 INST( 8, HF, HF, F, 1, 4, false, false, false),
1804 INST( 8, HF, HF, F, 1, 8, false, false, false),
1805 INST( 8, HF, HF, F, 1, 16, false, true, false),
1806
1807 /* SIMD16 packed fp16 always crosses oword boundaries */
1808 INST(16, HF, HF, F, 1, 0, false, false, false),
1809 INST(16, HF, HF, F, 1, 2, false, false, false),
1810 INST(16, HF, HF, F, 1, 4, false, false, false),
1811 INST(16, HF, HF, F, 1, 8, false, false, false),
1812 INST(16, HF, HF, F, 1, 16, false, false, false),
1813
1814 /* If destination is not packed (or not fp16) we can cross oword
1815 * boundaries
1816 */
1817 INST( 8, HF, HF, F, 2, 0, true, true, false),
1818 INST( 8, F, HF, F, 1, 0, true, true, false),
1819
1820 #undef INST
1821 };
1822
1823 if (devinfo.ver < 8)
1824 return;
1825
1826 for (unsigned i = 0; i < ARRAY_SIZE(inst); i++) {
1827 brw_ADD(p, retype(g0, inst[i].dst_type),
1828 retype(g0, inst[i].src0_type),
1829 retype(g0, inst[i].src1_type));
1830
1831 brw_inst_set_dst_hstride(&devinfo, last_inst, inst[i].dst_stride);
1832 brw_inst_set_dst_da1_subreg_nr(&devinfo, last_inst, inst[i].dst_subnr);
1833
1834 brw_inst_set_src0_vstride(&devinfo, last_inst, BRW_VERTICAL_STRIDE_4);
1835 brw_inst_set_src0_width(&devinfo, last_inst, BRW_WIDTH_4);
1836 brw_inst_set_src0_hstride(&devinfo, last_inst, BRW_HORIZONTAL_STRIDE_1);
1837
1838 brw_inst_set_src1_vstride(&devinfo, last_inst, BRW_VERTICAL_STRIDE_4);
1839 brw_inst_set_src1_width(&devinfo, last_inst, BRW_WIDTH_4);
1840 brw_inst_set_src1_hstride(&devinfo, last_inst, BRW_HORIZONTAL_STRIDE_1);
1841
1842 brw_inst_set_exec_size(&devinfo, last_inst, inst[i].exec_size);
1843
1844 if (devinfo.verx10 >= 125)
1845 EXPECT_EQ(inst[i].expected_result_gfx125, validate(p));
1846 else if (devinfo.platform == INTEL_PLATFORM_CHV || devinfo.ver >= 9)
1847 EXPECT_EQ(inst[i].expected_result_chv_skl, validate(p));
1848 else
1849 EXPECT_EQ(inst[i].expected_result_bdw, validate(p));
1850
1851 clear_instructions(p);
1852 }
1853 }
1854
TEST_P(validation_test,mixed_float_align16_packed_data)1855 TEST_P(validation_test, mixed_float_align16_packed_data)
1856 {
1857 static const struct {
1858 enum brw_reg_type dst_type;
1859 enum brw_reg_type src0_type;
1860 enum brw_reg_type src1_type;
1861 unsigned src0_vstride;
1862 unsigned src1_vstride;
1863 bool expected_result;
1864 } inst[] = {
1865 #define INST(dst_type, src0_type, src1_type, \
1866 src0_vstride, src1_vstride, expected_result) \
1867 { \
1868 BRW_TYPE_##dst_type, \
1869 BRW_TYPE_##src0_type, \
1870 BRW_TYPE_##src1_type, \
1871 BRW_VERTICAL_STRIDE_##src0_vstride, \
1872 BRW_VERTICAL_STRIDE_##src1_vstride, \
1873 expected_result, \
1874 }
1875
1876 /* We only test with F destination because there is a restriction
1877 * by which F->HF conversions need to be DWord aligned but Align16 also
1878 * requires that destination horizontal stride is 1.
1879 */
1880 INST(F, F, HF, 4, 4, true),
1881 INST(F, F, HF, 2, 4, false),
1882 INST(F, F, HF, 4, 2, false),
1883 INST(F, F, HF, 0, 4, false),
1884 INST(F, F, HF, 4, 0, false),
1885 INST(F, HF, F, 4, 4, true),
1886 INST(F, HF, F, 4, 2, false),
1887 INST(F, HF, F, 2, 4, false),
1888 INST(F, HF, F, 0, 4, false),
1889 INST(F, HF, F, 4, 0, false),
1890
1891 #undef INST
1892 };
1893
1894 if (devinfo.ver < 8 || devinfo.ver >= 11)
1895 return;
1896
1897 brw_set_default_access_mode(p, BRW_ALIGN_16);
1898
1899 for (unsigned i = 0; i < ARRAY_SIZE(inst); i++) {
1900 brw_ADD(p, retype(g0, inst[i].dst_type),
1901 retype(g0, inst[i].src0_type),
1902 retype(g0, inst[i].src1_type));
1903
1904 brw_inst_set_src0_vstride(&devinfo, last_inst, inst[i].src0_vstride);
1905 brw_inst_set_src1_vstride(&devinfo, last_inst, inst[i].src1_vstride);
1906
1907 EXPECT_EQ(inst[i].expected_result, validate(p));
1908
1909 clear_instructions(p);
1910 }
1911 }
1912
TEST_P(validation_test,mixed_float_align16_no_simd16)1913 TEST_P(validation_test, mixed_float_align16_no_simd16)
1914 {
1915 static const struct {
1916 unsigned exec_size;
1917 enum brw_reg_type dst_type;
1918 enum brw_reg_type src0_type;
1919 enum brw_reg_type src1_type;
1920 bool expected_result;
1921 } inst[] = {
1922 #define INST(exec_size, dst_type, src0_type, src1_type, expected_result) \
1923 { \
1924 BRW_EXECUTE_##exec_size, \
1925 BRW_TYPE_##dst_type, \
1926 BRW_TYPE_##src0_type, \
1927 BRW_TYPE_##src1_type, \
1928 expected_result, \
1929 }
1930
1931 /* We only test with F destination because there is a restriction
1932 * by which F->HF conversions need to be DWord aligned but Align16 also
1933 * requires that destination horizontal stride is 1.
1934 */
1935 INST( 8, F, F, HF, true),
1936 INST( 8, F, HF, F, true),
1937 INST( 8, F, F, HF, true),
1938 INST(16, F, F, HF, false),
1939 INST(16, F, HF, F, false),
1940 INST(16, F, F, HF, false),
1941
1942 #undef INST
1943 };
1944
1945 if (devinfo.ver < 8 || devinfo.ver >= 11)
1946 return;
1947
1948 brw_set_default_access_mode(p, BRW_ALIGN_16);
1949
1950 for (unsigned i = 0; i < ARRAY_SIZE(inst); i++) {
1951 brw_ADD(p, retype(g0, inst[i].dst_type),
1952 retype(g0, inst[i].src0_type),
1953 retype(g0, inst[i].src1_type));
1954
1955 brw_inst_set_exec_size(&devinfo, last_inst, inst[i].exec_size);
1956
1957 brw_inst_set_src0_vstride(&devinfo, last_inst, BRW_VERTICAL_STRIDE_4);
1958 brw_inst_set_src1_vstride(&devinfo, last_inst, BRW_VERTICAL_STRIDE_4);
1959
1960 EXPECT_EQ(inst[i].expected_result, validate(p));
1961
1962 clear_instructions(p);
1963 }
1964 }
1965
TEST_P(validation_test,mixed_float_align16_no_acc_read)1966 TEST_P(validation_test, mixed_float_align16_no_acc_read)
1967 {
1968 static const struct {
1969 enum brw_reg_type dst_type;
1970 enum brw_reg_type src0_type;
1971 enum brw_reg_type src1_type;
1972 bool read_acc;
1973 bool expected_result;
1974 } inst[] = {
1975 #define INST(dst_type, src0_type, src1_type, read_acc, expected_result) \
1976 { \
1977 BRW_TYPE_##dst_type, \
1978 BRW_TYPE_##src0_type, \
1979 BRW_TYPE_##src1_type, \
1980 read_acc, \
1981 expected_result, \
1982 }
1983
1984 /* We only test with F destination because there is a restriction
1985 * by which F->HF conversions need to be DWord aligned but Align16 also
1986 * requires that destination horizontal stride is 1.
1987 */
1988 INST( F, F, HF, false, true),
1989 INST( F, F, HF, true, false),
1990 INST( F, HF, F, false, true),
1991 INST( F, HF, F, true, false),
1992
1993 #undef INST
1994 };
1995
1996 if (devinfo.ver < 8 || devinfo.ver >= 11)
1997 return;
1998
1999 brw_set_default_access_mode(p, BRW_ALIGN_16);
2000
2001 for (unsigned i = 0; i < ARRAY_SIZE(inst); i++) {
2002 brw_ADD(p, retype(g0, inst[i].dst_type),
2003 retype(inst[i].read_acc ? acc0 : g0, inst[i].src0_type),
2004 retype(g0, inst[i].src1_type));
2005
2006 brw_inst_set_src0_vstride(&devinfo, last_inst, BRW_VERTICAL_STRIDE_4);
2007 brw_inst_set_src1_vstride(&devinfo, last_inst, BRW_VERTICAL_STRIDE_4);
2008
2009 EXPECT_EQ(inst[i].expected_result, validate(p));
2010
2011 clear_instructions(p);
2012 }
2013 }
2014
TEST_P(validation_test,mixed_float_align16_math_packed_format)2015 TEST_P(validation_test, mixed_float_align16_math_packed_format)
2016 {
2017 static const struct {
2018 enum brw_reg_type dst_type;
2019 enum brw_reg_type src0_type;
2020 enum brw_reg_type src1_type;
2021 unsigned src0_vstride;
2022 unsigned src1_vstride;
2023 bool expected_result;
2024 } inst[] = {
2025 #define INST(dst_type, src0_type, src1_type, \
2026 src0_vstride, src1_vstride, expected_result) \
2027 { \
2028 BRW_TYPE_##dst_type, \
2029 BRW_TYPE_##src0_type, \
2030 BRW_TYPE_##src1_type, \
2031 BRW_VERTICAL_STRIDE_##src0_vstride, \
2032 BRW_VERTICAL_STRIDE_##src1_vstride, \
2033 expected_result, \
2034 }
2035
2036 /* We only test with F destination because there is a restriction
2037 * by which F->HF conversions need to be DWord aligned but Align16 also
2038 * requires that destination horizontal stride is 1.
2039 */
2040 INST( F, HF, F, 4, 0, false),
2041 INST( F, HF, HF, 4, 4, true),
2042 INST( F, F, HF, 4, 0, false),
2043 INST( F, F, HF, 2, 4, false),
2044 INST( F, F, HF, 4, 2, false),
2045 INST( F, HF, HF, 0, 4, false),
2046
2047 #undef INST
2048 };
2049
2050 /* Align16 Math for mixed float mode is not supported in gfx8 */
2051 if (devinfo.ver < 9 || devinfo.ver >= 11)
2052 return;
2053
2054 brw_set_default_access_mode(p, BRW_ALIGN_16);
2055
2056 for (unsigned i = 0; i < ARRAY_SIZE(inst); i++) {
2057 gfx6_math(p, retype(g0, inst[i].dst_type),
2058 BRW_MATH_FUNCTION_POW,
2059 retype(g0, inst[i].src0_type),
2060 retype(g0, inst[i].src1_type));
2061
2062 brw_inst_set_src0_vstride(&devinfo, last_inst, inst[i].src0_vstride);
2063 brw_inst_set_src1_vstride(&devinfo, last_inst, inst[i].src1_vstride);
2064
2065 EXPECT_EQ(inst[i].expected_result, validate(p));
2066
2067 clear_instructions(p);
2068 }
2069 }
2070
TEST_P(validation_test,vector_immediate_destination_alignment)2071 TEST_P(validation_test, vector_immediate_destination_alignment)
2072 {
2073 static const struct {
2074 enum brw_reg_type dst_type;
2075 enum brw_reg_type src_type;
2076 unsigned subnr;
2077 unsigned exec_size;
2078 bool expected_result;
2079 } move[] = {
2080 { BRW_TYPE_F, BRW_TYPE_VF, 0, BRW_EXECUTE_4, true },
2081 { BRW_TYPE_F, BRW_TYPE_VF, 16, BRW_EXECUTE_4, true },
2082 { BRW_TYPE_F, BRW_TYPE_VF, 1, BRW_EXECUTE_4, false },
2083
2084 { BRW_TYPE_W, BRW_TYPE_V, 0, BRW_EXECUTE_8, true },
2085 { BRW_TYPE_W, BRW_TYPE_V, 16, BRW_EXECUTE_8, true },
2086 { BRW_TYPE_W, BRW_TYPE_V, 1, BRW_EXECUTE_8, false },
2087
2088 { BRW_TYPE_W, BRW_TYPE_UV, 0, BRW_EXECUTE_8, true },
2089 { BRW_TYPE_W, BRW_TYPE_UV, 16, BRW_EXECUTE_8, true },
2090 { BRW_TYPE_W, BRW_TYPE_UV, 1, BRW_EXECUTE_8, false },
2091 };
2092
2093 for (unsigned i = 0; i < ARRAY_SIZE(move); i++) {
2094 /* UV type is Gfx6+ */
2095 if (devinfo.ver < 6 &&
2096 move[i].src_type == BRW_TYPE_UV)
2097 continue;
2098
2099 brw_MOV(p, retype(g0, move[i].dst_type), retype(zero, move[i].src_type));
2100 brw_inst_set_dst_da1_subreg_nr(&devinfo, last_inst, move[i].subnr);
2101 brw_inst_set_exec_size(&devinfo, last_inst, move[i].exec_size);
2102
2103 EXPECT_EQ(move[i].expected_result, validate(p));
2104
2105 clear_instructions(p);
2106 }
2107 }
2108
TEST_P(validation_test,vector_immediate_destination_stride)2109 TEST_P(validation_test, vector_immediate_destination_stride)
2110 {
2111 static const struct {
2112 enum brw_reg_type dst_type;
2113 enum brw_reg_type src_type;
2114 unsigned stride;
2115 bool expected_result;
2116 } move[] = {
2117 { BRW_TYPE_F, BRW_TYPE_VF, BRW_HORIZONTAL_STRIDE_1, true },
2118 { BRW_TYPE_F, BRW_TYPE_VF, BRW_HORIZONTAL_STRIDE_2, false },
2119 { BRW_TYPE_D, BRW_TYPE_VF, BRW_HORIZONTAL_STRIDE_1, true },
2120 { BRW_TYPE_D, BRW_TYPE_VF, BRW_HORIZONTAL_STRIDE_2, false },
2121 { BRW_TYPE_W, BRW_TYPE_VF, BRW_HORIZONTAL_STRIDE_2, true },
2122 { BRW_TYPE_B, BRW_TYPE_VF, BRW_HORIZONTAL_STRIDE_4, true },
2123
2124 { BRW_TYPE_W, BRW_TYPE_V, BRW_HORIZONTAL_STRIDE_1, true },
2125 { BRW_TYPE_W, BRW_TYPE_V, BRW_HORIZONTAL_STRIDE_2, false },
2126 { BRW_TYPE_W, BRW_TYPE_V, BRW_HORIZONTAL_STRIDE_4, false },
2127 { BRW_TYPE_B, BRW_TYPE_V, BRW_HORIZONTAL_STRIDE_2, true },
2128
2129 { BRW_TYPE_W, BRW_TYPE_UV, BRW_HORIZONTAL_STRIDE_1, true },
2130 { BRW_TYPE_W, BRW_TYPE_UV, BRW_HORIZONTAL_STRIDE_2, false },
2131 { BRW_TYPE_W, BRW_TYPE_UV, BRW_HORIZONTAL_STRIDE_4, false },
2132 { BRW_TYPE_B, BRW_TYPE_UV, BRW_HORIZONTAL_STRIDE_2, true },
2133 };
2134
2135 for (unsigned i = 0; i < ARRAY_SIZE(move); i++) {
2136 /* UV type is Gfx6+ */
2137 if (devinfo.ver < 6 &&
2138 move[i].src_type == BRW_TYPE_UV)
2139 continue;
2140
2141 brw_MOV(p, retype(g0, move[i].dst_type), retype(zero, move[i].src_type));
2142 brw_inst_set_dst_hstride(&devinfo, last_inst, move[i].stride);
2143
2144 EXPECT_EQ(move[i].expected_result, validate(p));
2145
2146 clear_instructions(p);
2147 }
2148 }
2149
TEST_P(validation_test,qword_low_power_align1_regioning_restrictions)2150 TEST_P(validation_test, qword_low_power_align1_regioning_restrictions)
2151 {
2152 static const struct {
2153 enum opcode opcode;
2154 unsigned exec_size;
2155
2156 enum brw_reg_type dst_type;
2157 unsigned dst_subreg;
2158 unsigned dst_stride;
2159
2160 enum brw_reg_type src_type;
2161 unsigned src_subreg;
2162 unsigned src_vstride;
2163 unsigned src_width;
2164 unsigned src_hstride;
2165
2166 bool expected_result;
2167 } inst[] = {
2168 #define INST(opcode, exec_size, dst_type, dst_subreg, dst_stride, src_type, \
2169 src_subreg, src_vstride, src_width, src_hstride, expected_result) \
2170 { \
2171 BRW_OPCODE_##opcode, \
2172 BRW_EXECUTE_##exec_size, \
2173 BRW_TYPE_##dst_type, \
2174 dst_subreg, \
2175 BRW_HORIZONTAL_STRIDE_##dst_stride, \
2176 BRW_TYPE_##src_type, \
2177 src_subreg, \
2178 BRW_VERTICAL_STRIDE_##src_vstride, \
2179 BRW_WIDTH_##src_width, \
2180 BRW_HORIZONTAL_STRIDE_##src_hstride, \
2181 expected_result, \
2182 }
2183
2184 /* Some instruction that violate no restrictions, as a control */
2185 INST(MOV, 4, DF, 0, 1, DF, 0, 4, 4, 1, true ),
2186 INST(MOV, 4, Q, 0, 1, Q, 0, 4, 4, 1, true ),
2187 INST(MOV, 4, UQ, 0, 1, UQ, 0, 4, 4, 1, true ),
2188
2189 INST(MOV, 4, DF, 0, 1, F, 0, 8, 4, 2, true ),
2190 INST(MOV, 4, Q, 0, 1, D, 0, 8, 4, 2, true ),
2191 INST(MOV, 4, UQ, 0, 1, UD, 0, 8, 4, 2, true ),
2192
2193 INST(MOV, 4, F, 0, 2, DF, 0, 4, 4, 1, true ),
2194 INST(MOV, 4, D, 0, 2, Q, 0, 4, 4, 1, true ),
2195 INST(MOV, 4, UD, 0, 2, UQ, 0, 4, 4, 1, true ),
2196
2197 INST(MUL, 8, D, 0, 2, D, 0, 8, 4, 2, true ),
2198 INST(MUL, 8, UD, 0, 2, UD, 0, 8, 4, 2, true ),
2199
2200 /* Something with subreg nrs */
2201 INST(MOV, 2, DF, 8, 1, DF, 8, 2, 2, 1, true ),
2202 INST(MOV, 2, Q, 8, 1, Q, 8, 2, 2, 1, true ),
2203 INST(MOV, 2, UQ, 8, 1, UQ, 8, 2, 2, 1, true ),
2204
2205 INST(MUL, 2, D, 4, 2, D, 4, 4, 2, 2, true ),
2206 INST(MUL, 2, UD, 4, 2, UD, 4, 4, 2, 2, true ),
2207
2208 /* The PRMs say that for CHV, BXT:
2209 *
2210 * When source or destination datatype is 64b or operation is integer
2211 * DWord multiply, regioning in Align1 must follow these rules:
2212 *
2213 * 1. Source and Destination horizontal stride must be aligned to the
2214 * same qword.
2215 */
2216 INST(MOV, 4, DF, 0, 2, DF, 0, 4, 4, 1, false),
2217 INST(MOV, 4, Q, 0, 2, Q, 0, 4, 4, 1, false),
2218 INST(MOV, 4, UQ, 0, 2, UQ, 0, 4, 4, 1, false),
2219
2220 INST(MOV, 4, DF, 0, 2, F, 0, 8, 4, 2, false),
2221 INST(MOV, 4, Q, 0, 2, D, 0, 8, 4, 2, false),
2222 INST(MOV, 4, UQ, 0, 2, UD, 0, 8, 4, 2, false),
2223
2224 INST(MOV, 4, DF, 0, 2, F, 0, 4, 4, 1, false),
2225 INST(MOV, 4, Q, 0, 2, D, 0, 4, 4, 1, false),
2226 INST(MOV, 4, UQ, 0, 2, UD, 0, 4, 4, 1, false),
2227
2228 INST(MUL, 4, D, 0, 2, D, 0, 4, 4, 1, false),
2229 INST(MUL, 4, UD, 0, 2, UD, 0, 4, 4, 1, false),
2230
2231 INST(MUL, 4, D, 0, 1, D, 0, 8, 4, 2, false),
2232 INST(MUL, 4, UD, 0, 1, UD, 0, 8, 4, 2, false),
2233
2234 /* 2. Regioning must ensure Src.Vstride = Src.Width * Src.Hstride. */
2235 INST(MOV, 4, DF, 0, 1, DF, 0, 0, 2, 1, false),
2236 INST(MOV, 4, Q, 0, 1, Q, 0, 0, 2, 1, false),
2237 INST(MOV, 4, UQ, 0, 1, UQ, 0, 0, 2, 1, false),
2238
2239 INST(MOV, 4, DF, 0, 1, F, 0, 0, 2, 2, false),
2240 INST(MOV, 4, Q, 0, 1, D, 0, 0, 2, 2, false),
2241 INST(MOV, 4, UQ, 0, 1, UD, 0, 0, 2, 2, false),
2242
2243 INST(MOV, 8, F, 0, 2, DF, 0, 0, 2, 1, false),
2244 INST(MOV, 8, D, 0, 2, Q, 0, 0, 2, 1, false),
2245 INST(MOV, 8, UD, 0, 2, UQ, 0, 0, 2, 1, false),
2246
2247 INST(MUL, 8, D, 0, 2, D, 0, 0, 4, 2, false),
2248 INST(MUL, 8, UD, 0, 2, UD, 0, 0, 4, 2, false),
2249
2250 INST(MUL, 8, D, 0, 2, D, 0, 0, 4, 2, false),
2251 INST(MUL, 8, UD, 0, 2, UD, 0, 0, 4, 2, false),
2252
2253 /* 3. Source and Destination offset must be the same, except the case
2254 * of scalar source.
2255 */
2256 INST(MOV, 2, DF, 8, 1, DF, 0, 2, 2, 1, false),
2257 INST(MOV, 2, Q, 8, 1, Q, 0, 2, 2, 1, false),
2258 INST(MOV, 2, UQ, 8, 1, UQ, 0, 2, 2, 1, false),
2259
2260 INST(MOV, 2, DF, 0, 1, DF, 8, 2, 2, 1, false),
2261 INST(MOV, 2, Q, 0, 1, Q, 8, 2, 2, 1, false),
2262 INST(MOV, 2, UQ, 0, 1, UQ, 8, 2, 2, 1, false),
2263
2264 INST(MUL, 4, D, 4, 2, D, 0, 4, 2, 2, false),
2265 INST(MUL, 4, UD, 4, 2, UD, 0, 4, 2, 2, false),
2266
2267 INST(MUL, 4, D, 0, 2, D, 4, 4, 2, 2, false),
2268 INST(MUL, 4, UD, 0, 2, UD, 4, 4, 2, 2, false),
2269
2270 INST(MOV, 2, DF, 8, 1, DF, 0, 0, 1, 0, true ),
2271 INST(MOV, 2, Q, 8, 1, Q, 0, 0, 1, 0, true ),
2272 INST(MOV, 2, UQ, 8, 1, UQ, 0, 0, 1, 0, true ),
2273
2274 INST(MOV, 2, DF, 8, 1, F, 4, 0, 1, 0, true ),
2275 INST(MOV, 2, Q, 8, 1, D, 4, 0, 1, 0, true ),
2276 INST(MOV, 2, UQ, 8, 1, UD, 4, 0, 1, 0, true ),
2277
2278 INST(MUL, 4, D, 4, 1, D, 0, 0, 1, 0, true ),
2279 INST(MUL, 4, UD, 4, 1, UD, 0, 0, 1, 0, true ),
2280
2281 INST(MUL, 4, D, 0, 1, D, 4, 0, 1, 0, true ),
2282 INST(MUL, 4, UD, 0, 1, UD, 4, 0, 1, 0, true ),
2283
2284 #undef INST
2285 };
2286
2287 /* These restrictions only apply to Gfx8+ */
2288 if (devinfo.ver < 8)
2289 return;
2290
2291 /* NoDDChk/NoDDClr does not exist on Gfx12+ */
2292 if (devinfo.ver >= 12)
2293 return;
2294
2295 for (unsigned i = 0; i < ARRAY_SIZE(inst); i++) {
2296 if (!devinfo.has_64bit_float &&
2297 (inst[i].dst_type == BRW_TYPE_DF ||
2298 inst[i].src_type == BRW_TYPE_DF))
2299 continue;
2300
2301 if (!devinfo.has_64bit_int &&
2302 (inst[i].dst_type == BRW_TYPE_Q ||
2303 inst[i].dst_type == BRW_TYPE_UQ ||
2304 inst[i].src_type == BRW_TYPE_Q ||
2305 inst[i].src_type == BRW_TYPE_UQ))
2306 continue;
2307
2308 if (inst[i].opcode == BRW_OPCODE_MOV) {
2309 brw_MOV(p, retype(g0, inst[i].dst_type),
2310 retype(g0, inst[i].src_type));
2311 } else {
2312 assert(inst[i].opcode == BRW_OPCODE_MUL);
2313 brw_MUL(p, retype(g0, inst[i].dst_type),
2314 retype(g0, inst[i].src_type),
2315 retype(zero, inst[i].src_type));
2316 }
2317 brw_inst_set_exec_size(&devinfo, last_inst, inst[i].exec_size);
2318
2319 brw_inst_set_dst_da1_subreg_nr(&devinfo, last_inst, inst[i].dst_subreg);
2320 brw_inst_set_src0_da1_subreg_nr(&devinfo, last_inst, inst[i].src_subreg);
2321
2322 brw_inst_set_dst_hstride(&devinfo, last_inst, inst[i].dst_stride);
2323
2324 brw_inst_set_src0_vstride(&devinfo, last_inst, inst[i].src_vstride);
2325 brw_inst_set_src0_width(&devinfo, last_inst, inst[i].src_width);
2326 brw_inst_set_src0_hstride(&devinfo, last_inst, inst[i].src_hstride);
2327
2328 if (devinfo.platform == INTEL_PLATFORM_CHV ||
2329 intel_device_info_is_9lp(&devinfo)) {
2330 EXPECT_EQ(inst[i].expected_result, validate(p));
2331 } else {
2332 EXPECT_TRUE(validate(p));
2333 }
2334
2335 clear_instructions(p);
2336 }
2337 }
2338
TEST_P(validation_test,qword_low_power_no_indirect_addressing)2339 TEST_P(validation_test, qword_low_power_no_indirect_addressing)
2340 {
2341 static const struct {
2342 enum opcode opcode;
2343 unsigned exec_size;
2344
2345 enum brw_reg_type dst_type;
2346 bool dst_is_indirect;
2347 unsigned dst_stride;
2348
2349 enum brw_reg_type src_type;
2350 bool src_is_indirect;
2351 unsigned src_vstride;
2352 unsigned src_width;
2353 unsigned src_hstride;
2354
2355 bool expected_result;
2356 } inst[] = {
2357 #define INST(opcode, exec_size, dst_type, dst_is_indirect, dst_stride, \
2358 src_type, src_is_indirect, src_vstride, src_width, src_hstride, \
2359 expected_result) \
2360 { \
2361 BRW_OPCODE_##opcode, \
2362 BRW_EXECUTE_##exec_size, \
2363 BRW_TYPE_##dst_type, \
2364 dst_is_indirect, \
2365 BRW_HORIZONTAL_STRIDE_##dst_stride, \
2366 BRW_TYPE_##src_type, \
2367 src_is_indirect, \
2368 BRW_VERTICAL_STRIDE_##src_vstride, \
2369 BRW_WIDTH_##src_width, \
2370 BRW_HORIZONTAL_STRIDE_##src_hstride, \
2371 expected_result, \
2372 }
2373
2374 /* Some instruction that violate no restrictions, as a control */
2375 INST(MOV, 4, DF, 0, 1, DF, 0, 4, 4, 1, true ),
2376 INST(MOV, 4, Q, 0, 1, Q, 0, 4, 4, 1, true ),
2377 INST(MOV, 4, UQ, 0, 1, UQ, 0, 4, 4, 1, true ),
2378
2379 INST(MUL, 8, D, 0, 2, D, 0, 8, 4, 2, true ),
2380 INST(MUL, 8, UD, 0, 2, UD, 0, 8, 4, 2, true ),
2381
2382 INST(MOV, 4, F, 1, 1, F, 0, 4, 4, 1, true ),
2383 INST(MOV, 4, F, 0, 1, F, 1, 4, 4, 1, true ),
2384 INST(MOV, 4, F, 1, 1, F, 1, 4, 4, 1, true ),
2385
2386 /* The PRMs say that for CHV, BXT:
2387 *
2388 * When source or destination datatype is 64b or operation is integer
2389 * DWord multiply, indirect addressing must not be used.
2390 */
2391 INST(MOV, 4, DF, 1, 1, DF, 0, 4, 4, 1, false),
2392 INST(MOV, 4, Q, 1, 1, Q, 0, 4, 4, 1, false),
2393 INST(MOV, 4, UQ, 1, 1, UQ, 0, 4, 4, 1, false),
2394
2395 INST(MOV, 4, DF, 0, 1, DF, 1, 4, 4, 1, false),
2396 INST(MOV, 4, Q, 0, 1, Q, 1, 4, 4, 1, false),
2397 INST(MOV, 4, UQ, 0, 1, UQ, 1, 4, 4, 1, false),
2398
2399 INST(MOV, 4, DF, 1, 1, F, 0, 8, 4, 2, false),
2400 INST(MOV, 4, Q, 1, 1, D, 0, 8, 4, 2, false),
2401 INST(MOV, 4, UQ, 1, 1, UD, 0, 8, 4, 2, false),
2402
2403 INST(MOV, 4, DF, 0, 1, F, 1, 8, 4, 2, false),
2404 INST(MOV, 4, Q, 0, 1, D, 1, 8, 4, 2, false),
2405 INST(MOV, 4, UQ, 0, 1, UD, 1, 8, 4, 2, false),
2406
2407 INST(MOV, 4, F, 1, 2, DF, 0, 4, 4, 1, false),
2408 INST(MOV, 4, D, 1, 2, Q, 0, 4, 4, 1, false),
2409 INST(MOV, 4, UD, 1, 2, UQ, 0, 4, 4, 1, false),
2410
2411 INST(MOV, 4, F, 0, 2, DF, 1, 4, 4, 1, false),
2412 INST(MOV, 4, D, 0, 2, Q, 1, 4, 4, 1, false),
2413 INST(MOV, 4, UD, 0, 2, UQ, 1, 4, 4, 1, false),
2414
2415 INST(MUL, 8, D, 1, 2, D, 0, 8, 4, 2, false),
2416 INST(MUL, 8, UD, 1, 2, UD, 0, 8, 4, 2, false),
2417
2418 INST(MUL, 8, D, 0, 2, D, 1, 8, 4, 2, false),
2419 INST(MUL, 8, UD, 0, 2, UD, 1, 8, 4, 2, false),
2420
2421 #undef INST
2422 };
2423
2424 /* These restrictions only apply to Gfx8+ */
2425 if (devinfo.ver < 8)
2426 return;
2427
2428 for (unsigned i = 0; i < ARRAY_SIZE(inst); i++) {
2429 if (!devinfo.has_64bit_float &&
2430 (inst[i].dst_type == BRW_TYPE_DF ||
2431 inst[i].src_type == BRW_TYPE_DF))
2432 continue;
2433
2434 if (!devinfo.has_64bit_int &&
2435 (inst[i].dst_type == BRW_TYPE_Q ||
2436 inst[i].dst_type == BRW_TYPE_UQ ||
2437 inst[i].src_type == BRW_TYPE_Q ||
2438 inst[i].src_type == BRW_TYPE_UQ))
2439 continue;
2440
2441 if (inst[i].opcode == BRW_OPCODE_MOV) {
2442 brw_MOV(p, retype(g0, inst[i].dst_type),
2443 retype(g0, inst[i].src_type));
2444 } else {
2445 assert(inst[i].opcode == BRW_OPCODE_MUL);
2446 brw_MUL(p, retype(g0, inst[i].dst_type),
2447 retype(g0, inst[i].src_type),
2448 retype(zero, inst[i].src_type));
2449 }
2450 brw_inst_set_exec_size(&devinfo, last_inst, inst[i].exec_size);
2451
2452 brw_inst_set_dst_address_mode(&devinfo, last_inst, inst[i].dst_is_indirect);
2453 brw_inst_set_src0_address_mode(&devinfo, last_inst, inst[i].src_is_indirect);
2454
2455 brw_inst_set_dst_hstride(&devinfo, last_inst, inst[i].dst_stride);
2456
2457 brw_inst_set_src0_vstride(&devinfo, last_inst, inst[i].src_vstride);
2458 brw_inst_set_src0_width(&devinfo, last_inst, inst[i].src_width);
2459 brw_inst_set_src0_hstride(&devinfo, last_inst, inst[i].src_hstride);
2460
2461 if (devinfo.platform == INTEL_PLATFORM_CHV ||
2462 intel_device_info_is_9lp(&devinfo)) {
2463 EXPECT_EQ(inst[i].expected_result, validate(p));
2464 } else {
2465 EXPECT_TRUE(validate(p));
2466 }
2467
2468 clear_instructions(p);
2469 }
2470 }
2471
TEST_P(validation_test,qword_low_power_no_64bit_arf)2472 TEST_P(validation_test, qword_low_power_no_64bit_arf)
2473 {
2474 static const struct {
2475 enum opcode opcode;
2476 unsigned exec_size;
2477
2478 struct brw_reg dst;
2479 enum brw_reg_type dst_type;
2480 unsigned dst_stride;
2481
2482 struct brw_reg src;
2483 enum brw_reg_type src_type;
2484 unsigned src_vstride;
2485 unsigned src_width;
2486 unsigned src_hstride;
2487
2488 bool acc_wr;
2489 bool expected_result;
2490 } inst[] = {
2491 #define INST(opcode, exec_size, dst, dst_type, dst_stride, \
2492 src, src_type, src_vstride, src_width, src_hstride, \
2493 acc_wr, expected_result) \
2494 { \
2495 BRW_OPCODE_##opcode, \
2496 BRW_EXECUTE_##exec_size, \
2497 dst, \
2498 BRW_TYPE_##dst_type, \
2499 BRW_HORIZONTAL_STRIDE_##dst_stride, \
2500 src, \
2501 BRW_TYPE_##src_type, \
2502 BRW_VERTICAL_STRIDE_##src_vstride, \
2503 BRW_WIDTH_##src_width, \
2504 BRW_HORIZONTAL_STRIDE_##src_hstride, \
2505 acc_wr, \
2506 expected_result, \
2507 }
2508
2509 /* Some instruction that violate no restrictions, as a control */
2510 INST(MOV, 4, g0, DF, 1, g0, F, 4, 2, 2, 0, true ),
2511 INST(MOV, 4, g0, F, 2, g0, DF, 4, 4, 1, 0, true ),
2512
2513 INST(MOV, 4, g0, Q, 1, g0, D, 4, 2, 2, 0, true ),
2514 INST(MOV, 4, g0, D, 2, g0, Q, 4, 4, 1, 0, true ),
2515
2516 INST(MOV, 4, g0, UQ, 1, g0, UD, 4, 2, 2, 0, true ),
2517 INST(MOV, 4, g0, UD, 2, g0, UQ, 4, 4, 1, 0, true ),
2518
2519 INST(MOV, 4, null, F, 1, g0, F, 4, 4, 1, 0, true ),
2520 INST(MOV, 4, acc0, F, 1, g0, F, 4, 4, 1, 0, true ),
2521 INST(MOV, 4, g0, F, 1, acc0, F, 4, 4, 1, 0, true ),
2522
2523 INST(MOV, 4, null, D, 1, g0, D, 4, 4, 1, 0, true ),
2524 INST(MOV, 4, acc0, D, 1, g0, D, 4, 4, 1, 0, true ),
2525 INST(MOV, 4, g0, D, 1, acc0, D, 4, 4, 1, 0, true ),
2526
2527 INST(MOV, 4, null, UD, 1, g0, UD, 4, 4, 1, 0, true ),
2528 INST(MOV, 4, acc0, UD, 1, g0, UD, 4, 4, 1, 0, true ),
2529 INST(MOV, 4, g0, UD, 1, acc0, UD, 4, 4, 1, 0, true ),
2530
2531 INST(MUL, 4, g0, D, 2, g0, D, 4, 2, 2, 0, true ),
2532 INST(MUL, 4, g0, UD, 2, g0, UD, 4, 2, 2, 0, true ),
2533
2534 /* The PRMs say that for CHV, BXT:
2535 *
2536 * ARF registers must never be used with 64b datatype or when
2537 * operation is integer DWord multiply.
2538 */
2539 INST(MOV, 4, acc0, DF, 1, g0, F, 4, 2, 2, 0, false),
2540 INST(MOV, 4, g0, DF, 1, acc0, F, 4, 2, 2, 0, false),
2541
2542 INST(MOV, 4, acc0, Q, 1, g0, D, 4, 2, 2, 0, false),
2543 INST(MOV, 4, g0, Q, 1, acc0, D, 4, 2, 2, 0, false),
2544
2545 INST(MOV, 4, acc0, UQ, 1, g0, UD, 4, 2, 2, 0, false),
2546 INST(MOV, 4, g0, UQ, 1, acc0, UD, 4, 2, 2, 0, false),
2547
2548 INST(MOV, 4, acc0, F, 2, g0, DF, 4, 4, 1, 0, false),
2549 INST(MOV, 4, g0, F, 2, acc0, DF, 4, 4, 1, 0, false),
2550
2551 INST(MOV, 4, acc0, D, 2, g0, Q, 4, 4, 1, 0, false),
2552 INST(MOV, 4, g0, D, 2, acc0, Q, 4, 4, 1, 0, false),
2553
2554 INST(MOV, 4, acc0, UD, 2, g0, UQ, 4, 4, 1, 0, false),
2555 INST(MOV, 4, g0, UD, 2, acc0, UQ, 4, 4, 1, 0, false),
2556
2557 INST(MUL, 4, acc0, D, 2, g0, D, 4, 2, 2, 0, false),
2558 INST(MUL, 4, acc0, UD, 2, g0, UD, 4, 2, 2, 0, false),
2559 /* MUL cannot have integer accumulator sources, so don't test that */
2560
2561 /* We assume that the restriction does not apply to the null register */
2562 INST(MOV, 4, null, DF, 1, g0, F, 4, 2, 2, 0, true ),
2563 INST(MOV, 4, null, Q, 1, g0, D, 4, 2, 2, 0, true ),
2564 INST(MOV, 4, null, UQ, 1, g0, UD, 4, 2, 2, 0, true ),
2565
2566 /* Check implicit accumulator write control */
2567 INST(MOV, 4, null, DF, 1, g0, F, 4, 2, 2, 1, false),
2568 INST(MUL, 4, null, DF, 1, g0, F, 4, 2, 2, 1, false),
2569
2570 #undef INST
2571 };
2572
2573 /* These restrictions only apply to Gfx8+ */
2574 if (devinfo.ver < 8)
2575 return;
2576
2577 for (unsigned i = 0; i < ARRAY_SIZE(inst); i++) {
2578 if (!devinfo.has_64bit_float &&
2579 (inst[i].dst_type == BRW_TYPE_DF ||
2580 inst[i].src_type == BRW_TYPE_DF))
2581 continue;
2582
2583 if (!devinfo.has_64bit_int &&
2584 (inst[i].dst_type == BRW_TYPE_Q ||
2585 inst[i].dst_type == BRW_TYPE_UQ ||
2586 inst[i].src_type == BRW_TYPE_Q ||
2587 inst[i].src_type == BRW_TYPE_UQ))
2588 continue;
2589
2590 if (inst[i].opcode == BRW_OPCODE_MOV) {
2591 brw_MOV(p, retype(inst[i].dst, inst[i].dst_type),
2592 retype(inst[i].src, inst[i].src_type));
2593 } else {
2594 assert(inst[i].opcode == BRW_OPCODE_MUL);
2595 brw_MUL(p, retype(inst[i].dst, inst[i].dst_type),
2596 retype(inst[i].src, inst[i].src_type),
2597 retype(zero, inst[i].src_type));
2598 brw_inst_set_opcode(&isa, last_inst, inst[i].opcode);
2599 }
2600 brw_inst_set_exec_size(&devinfo, last_inst, inst[i].exec_size);
2601 brw_inst_set_acc_wr_control(&devinfo, last_inst, inst[i].acc_wr);
2602
2603 brw_inst_set_dst_hstride(&devinfo, last_inst, inst[i].dst_stride);
2604
2605 brw_inst_set_src0_vstride(&devinfo, last_inst, inst[i].src_vstride);
2606 brw_inst_set_src0_width(&devinfo, last_inst, inst[i].src_width);
2607 brw_inst_set_src0_hstride(&devinfo, last_inst, inst[i].src_hstride);
2608
2609 /* Note: The Broadwell PRM also lists the restriction that destination
2610 * of DWord multiplication cannot be the accumulator.
2611 */
2612 if (devinfo.platform == INTEL_PLATFORM_CHV ||
2613 intel_device_info_is_9lp(&devinfo) ||
2614 (devinfo.ver == 8 &&
2615 inst[i].opcode == BRW_OPCODE_MUL &&
2616 brw_inst_dst_reg_file(&devinfo, last_inst) == ARF &&
2617 brw_inst_dst_da_reg_nr(&devinfo, last_inst) != BRW_ARF_NULL)) {
2618 EXPECT_EQ(inst[i].expected_result, validate(p));
2619 } else {
2620 EXPECT_TRUE(validate(p));
2621 }
2622
2623 clear_instructions(p);
2624 }
2625
2626 if (!devinfo.has_64bit_float)
2627 return;
2628
2629 /* MAC implicitly reads the accumulator */
2630 brw_MAC(p, retype(g0, BRW_TYPE_DF),
2631 retype(stride(g0, 4, 4, 1), BRW_TYPE_DF),
2632 retype(stride(g0, 4, 4, 1), BRW_TYPE_DF));
2633 if (devinfo.platform == INTEL_PLATFORM_CHV ||
2634 intel_device_info_is_9lp(&devinfo)) {
2635 EXPECT_FALSE(validate(p));
2636 } else {
2637 EXPECT_TRUE(validate(p));
2638 }
2639 }
2640
TEST_P(validation_test,align16_64_bit_integer)2641 TEST_P(validation_test, align16_64_bit_integer)
2642 {
2643 static const struct {
2644 enum opcode opcode;
2645 unsigned exec_size;
2646
2647 enum brw_reg_type dst_type;
2648 enum brw_reg_type src_type;
2649
2650 bool expected_result;
2651 } inst[] = {
2652 #define INST(opcode, exec_size, dst_type, src_type, expected_result) \
2653 { \
2654 BRW_OPCODE_##opcode, \
2655 BRW_EXECUTE_##exec_size, \
2656 BRW_TYPE_##dst_type, \
2657 BRW_TYPE_##src_type, \
2658 expected_result, \
2659 }
2660
2661 /* Some instruction that violate no restrictions, as a control */
2662 INST(MOV, 2, Q, D, true ),
2663 INST(MOV, 2, UQ, UD, true ),
2664 INST(MOV, 2, DF, F, true ),
2665
2666 INST(ADD, 2, Q, D, true ),
2667 INST(ADD, 2, UQ, UD, true ),
2668 INST(ADD, 2, DF, F, true ),
2669
2670 /* The PRMs say that for BDW, SKL:
2671 *
2672 * If Align16 is required for an operation with QW destination and non-QW
2673 * source datatypes, the execution size cannot exceed 2.
2674 */
2675
2676 INST(MOV, 4, Q, D, false),
2677 INST(MOV, 4, UQ, UD, false),
2678 INST(MOV, 4, DF, F, false),
2679
2680 INST(ADD, 4, Q, D, false),
2681 INST(ADD, 4, UQ, UD, false),
2682 INST(ADD, 4, DF, F, false),
2683
2684 #undef INST
2685 };
2686
2687 /* 64-bit integer types exist on Gfx8+ */
2688 if (devinfo.ver < 8)
2689 return;
2690
2691 /* Align16 does not exist on Gfx11+ */
2692 if (devinfo.ver >= 11)
2693 return;
2694
2695 brw_set_default_access_mode(p, BRW_ALIGN_16);
2696
2697 for (unsigned i = 0; i < ARRAY_SIZE(inst); i++) {
2698 if (inst[i].opcode == BRW_OPCODE_MOV) {
2699 brw_MOV(p, retype(g0, inst[i].dst_type),
2700 retype(g0, inst[i].src_type));
2701 } else {
2702 assert(inst[i].opcode == BRW_OPCODE_ADD);
2703 brw_ADD(p, retype(g0, inst[i].dst_type),
2704 retype(g0, inst[i].src_type),
2705 retype(g0, inst[i].src_type));
2706 }
2707 brw_inst_set_exec_size(&devinfo, last_inst, inst[i].exec_size);
2708
2709 EXPECT_EQ(inst[i].expected_result, validate(p));
2710
2711 clear_instructions(p);
2712 }
2713 }
2714
TEST_P(validation_test,qword_low_power_no_depctrl)2715 TEST_P(validation_test, qword_low_power_no_depctrl)
2716 {
2717 static const struct {
2718 enum opcode opcode;
2719 unsigned exec_size;
2720
2721 enum brw_reg_type dst_type;
2722 unsigned dst_stride;
2723
2724 enum brw_reg_type src_type;
2725 unsigned src_vstride;
2726 unsigned src_width;
2727 unsigned src_hstride;
2728
2729 bool no_dd_check;
2730 bool no_dd_clear;
2731
2732 bool expected_result;
2733 } inst[] = {
2734 #define INST(opcode, exec_size, dst_type, dst_stride, \
2735 src_type, src_vstride, src_width, src_hstride, \
2736 no_dd_check, no_dd_clear, expected_result) \
2737 { \
2738 BRW_OPCODE_##opcode, \
2739 BRW_EXECUTE_##exec_size, \
2740 BRW_TYPE_##dst_type, \
2741 BRW_HORIZONTAL_STRIDE_##dst_stride, \
2742 BRW_TYPE_##src_type, \
2743 BRW_VERTICAL_STRIDE_##src_vstride, \
2744 BRW_WIDTH_##src_width, \
2745 BRW_HORIZONTAL_STRIDE_##src_hstride, \
2746 no_dd_check, \
2747 no_dd_clear, \
2748 expected_result, \
2749 }
2750
2751 /* Some instruction that violate no restrictions, as a control */
2752 INST(MOV, 4, DF, 1, F, 8, 4, 2, 0, 0, true ),
2753 INST(MOV, 4, Q, 1, D, 8, 4, 2, 0, 0, true ),
2754 INST(MOV, 4, UQ, 1, UD, 8, 4, 2, 0, 0, true ),
2755
2756 INST(MOV, 4, F, 2, DF, 4, 4, 1, 0, 0, true ),
2757 INST(MOV, 4, D, 2, Q, 4, 4, 1, 0, 0, true ),
2758 INST(MOV, 4, UD, 2, UQ, 4, 4, 1, 0, 0, true ),
2759
2760 INST(MUL, 8, D, 2, D, 8, 4, 2, 0, 0, true ),
2761 INST(MUL, 8, UD, 2, UD, 8, 4, 2, 0, 0, true ),
2762
2763 INST(MOV, 4, F, 1, F, 4, 4, 1, 1, 1, true ),
2764
2765 /* The PRMs say that for CHV, BXT:
2766 *
2767 * When source or destination datatype is 64b or operation is integer
2768 * DWord multiply, DepCtrl must not be used.
2769 */
2770 INST(MOV, 4, DF, 1, F, 8, 4, 2, 1, 0, false),
2771 INST(MOV, 4, Q, 1, D, 8, 4, 2, 1, 0, false),
2772 INST(MOV, 4, UQ, 1, UD, 8, 4, 2, 1, 0, false),
2773
2774 INST(MOV, 4, F, 2, DF, 4, 4, 1, 1, 0, false),
2775 INST(MOV, 4, D, 2, Q, 4, 4, 1, 1, 0, false),
2776 INST(MOV, 4, UD, 2, UQ, 4, 4, 1, 1, 0, false),
2777
2778 INST(MOV, 4, DF, 1, F, 8, 4, 2, 0, 1, false),
2779 INST(MOV, 4, Q, 1, D, 8, 4, 2, 0, 1, false),
2780 INST(MOV, 4, UQ, 1, UD, 8, 4, 2, 0, 1, false),
2781
2782 INST(MOV, 4, F, 2, DF, 4, 4, 1, 0, 1, false),
2783 INST(MOV, 4, D, 2, Q, 4, 4, 1, 0, 1, false),
2784 INST(MOV, 4, UD, 2, UQ, 4, 4, 1, 0, 1, false),
2785
2786 INST(MUL, 8, D, 2, D, 8, 4, 2, 1, 0, false),
2787 INST(MUL, 8, UD, 2, UD, 8, 4, 2, 1, 0, false),
2788
2789 INST(MUL, 8, D, 2, D, 8, 4, 2, 0, 1, false),
2790 INST(MUL, 8, UD, 2, UD, 8, 4, 2, 0, 1, false),
2791
2792 #undef INST
2793 };
2794
2795 /* These restrictions only apply to Gfx8+ */
2796 if (devinfo.ver < 8)
2797 return;
2798
2799 /* NoDDChk/NoDDClr does not exist on Gfx12+ */
2800 if (devinfo.ver >= 12)
2801 return;
2802
2803 for (unsigned i = 0; i < ARRAY_SIZE(inst); i++) {
2804 if (!devinfo.has_64bit_float &&
2805 (inst[i].dst_type == BRW_TYPE_DF ||
2806 inst[i].src_type == BRW_TYPE_DF))
2807 continue;
2808
2809 if (!devinfo.has_64bit_int &&
2810 (inst[i].dst_type == BRW_TYPE_Q ||
2811 inst[i].dst_type == BRW_TYPE_UQ ||
2812 inst[i].src_type == BRW_TYPE_Q ||
2813 inst[i].src_type == BRW_TYPE_UQ))
2814 continue;
2815
2816 if (inst[i].opcode == BRW_OPCODE_MOV) {
2817 brw_MOV(p, retype(g0, inst[i].dst_type),
2818 retype(g0, inst[i].src_type));
2819 } else {
2820 assert(inst[i].opcode == BRW_OPCODE_MUL);
2821 brw_MUL(p, retype(g0, inst[i].dst_type),
2822 retype(g0, inst[i].src_type),
2823 retype(zero, inst[i].src_type));
2824 }
2825 brw_inst_set_exec_size(&devinfo, last_inst, inst[i].exec_size);
2826
2827 brw_inst_set_dst_hstride(&devinfo, last_inst, inst[i].dst_stride);
2828
2829 brw_inst_set_src0_vstride(&devinfo, last_inst, inst[i].src_vstride);
2830 brw_inst_set_src0_width(&devinfo, last_inst, inst[i].src_width);
2831 brw_inst_set_src0_hstride(&devinfo, last_inst, inst[i].src_hstride);
2832
2833 brw_inst_set_no_dd_check(&devinfo, last_inst, inst[i].no_dd_check);
2834 brw_inst_set_no_dd_clear(&devinfo, last_inst, inst[i].no_dd_clear);
2835
2836 if (devinfo.platform == INTEL_PLATFORM_CHV ||
2837 intel_device_info_is_9lp(&devinfo)) {
2838 EXPECT_EQ(inst[i].expected_result, validate(p));
2839 } else {
2840 EXPECT_TRUE(validate(p));
2841 }
2842
2843 clear_instructions(p);
2844 }
2845 }
2846
TEST_P(validation_test,gfx11_no_byte_src_1_2)2847 TEST_P(validation_test, gfx11_no_byte_src_1_2)
2848 {
2849 static const struct {
2850 enum opcode opcode;
2851 unsigned access_mode;
2852
2853 enum brw_reg_type dst_type;
2854 struct {
2855 enum brw_reg_type type;
2856 unsigned vstride;
2857 unsigned width;
2858 unsigned hstride;
2859 } srcs[3];
2860
2861 int gfx_ver;
2862 bool expected_result;
2863 } inst[] = {
2864 #define INST(opcode, access_mode, dst_type, \
2865 src0_type, src0_vstride, src0_width, src0_hstride, \
2866 src1_type, src1_vstride, src1_width, src1_hstride, \
2867 src2_type, \
2868 gfx_ver, expected_result) \
2869 { \
2870 BRW_OPCODE_##opcode, \
2871 BRW_ALIGN_##access_mode, \
2872 BRW_TYPE_##dst_type, \
2873 { \
2874 { \
2875 BRW_TYPE_##src0_type, \
2876 BRW_VERTICAL_STRIDE_##src0_vstride, \
2877 BRW_WIDTH_##src0_width, \
2878 BRW_HORIZONTAL_STRIDE_##src0_hstride, \
2879 }, \
2880 { \
2881 BRW_TYPE_##src1_type, \
2882 BRW_VERTICAL_STRIDE_##src1_vstride, \
2883 BRW_WIDTH_##src1_width, \
2884 BRW_HORIZONTAL_STRIDE_##src1_hstride, \
2885 }, \
2886 { \
2887 BRW_TYPE_##src2_type, \
2888 }, \
2889 }, \
2890 gfx_ver, \
2891 expected_result, \
2892 }
2893
2894 /* Passes on < 11 */
2895 INST(MOV, 16, F, B, 2, 4, 0, UD, 0, 4, 0, D, 8, true ),
2896 INST(ADD, 16, UD, F, 0, 4, 0, UB, 0, 1, 0, D, 7, true ),
2897 INST(MAD, 16, D, B, 0, 4, 0, UB, 0, 1, 0, B, 10, true ),
2898
2899 /* Fails on 11+ */
2900 INST(MAD, 1, UB, W, 1, 1, 0, D, 0, 4, 0, B, 11, false ),
2901 INST(MAD, 1, UB, W, 1, 1, 1, UB, 1, 1, 0, W, 11, false ),
2902 INST(ADD, 1, W, W, 1, 4, 1, B, 1, 1, 0, D, 11, false ),
2903
2904 /* Passes on 11+ */
2905 INST(MOV, 1, W, B, 8, 8, 1, D, 8, 8, 1, D, 11, true ),
2906 INST(ADD, 1, UD, B, 8, 8, 1, W, 8, 8, 1, D, 11, true ),
2907 INST(MAD, 1, B, B, 0, 1, 0, D, 0, 4, 0, W, 11, true ),
2908
2909 #undef INST
2910 };
2911
2912
2913 for (unsigned i = 0; i < ARRAY_SIZE(inst); i++) {
2914 /* Skip instruction not meant for this gfx_ver. */
2915 if (devinfo.ver != inst[i].gfx_ver)
2916 continue;
2917
2918 brw_push_insn_state(p);
2919
2920 brw_set_default_exec_size(p, BRW_EXECUTE_8);
2921 brw_set_default_access_mode(p, inst[i].access_mode);
2922
2923 switch (inst[i].opcode) {
2924 case BRW_OPCODE_MOV:
2925 brw_MOV(p, retype(g0, inst[i].dst_type),
2926 retype(g0, inst[i].srcs[0].type));
2927 brw_inst_set_src0_vstride(&devinfo, last_inst, inst[i].srcs[0].vstride);
2928 brw_inst_set_src0_hstride(&devinfo, last_inst, inst[i].srcs[0].hstride);
2929 break;
2930 case BRW_OPCODE_ADD:
2931 brw_ADD(p, retype(g0, inst[i].dst_type),
2932 retype(g0, inst[i].srcs[0].type),
2933 retype(g0, inst[i].srcs[1].type));
2934 brw_inst_set_src0_vstride(&devinfo, last_inst, inst[i].srcs[0].vstride);
2935 brw_inst_set_src0_width(&devinfo, last_inst, inst[i].srcs[0].width);
2936 brw_inst_set_src0_hstride(&devinfo, last_inst, inst[i].srcs[0].hstride);
2937 brw_inst_set_src1_vstride(&devinfo, last_inst, inst[i].srcs[1].vstride);
2938 brw_inst_set_src1_width(&devinfo, last_inst, inst[i].srcs[1].width);
2939 brw_inst_set_src1_hstride(&devinfo, last_inst, inst[i].srcs[1].hstride);
2940 break;
2941 case BRW_OPCODE_MAD:
2942 brw_MAD(p, retype(g0, inst[i].dst_type),
2943 retype(g0, inst[i].srcs[0].type),
2944 retype(g0, inst[i].srcs[1].type),
2945 retype(g0, inst[i].srcs[2].type));
2946 brw_inst_set_3src_a1_src0_vstride(&devinfo, last_inst, inst[i].srcs[0].vstride);
2947 brw_inst_set_3src_a1_src0_hstride(&devinfo, last_inst, inst[i].srcs[0].hstride);
2948 brw_inst_set_3src_a1_src1_vstride(&devinfo, last_inst, inst[i].srcs[0].vstride);
2949 brw_inst_set_3src_a1_src1_hstride(&devinfo, last_inst, inst[i].srcs[0].hstride);
2950 break;
2951 default:
2952 unreachable("invalid opcode");
2953 }
2954
2955 brw_inst_set_dst_hstride(&devinfo, last_inst, BRW_HORIZONTAL_STRIDE_1);
2956
2957 brw_inst_set_src0_width(&devinfo, last_inst, inst[i].srcs[0].width);
2958 brw_inst_set_src1_width(&devinfo, last_inst, inst[i].srcs[1].width);
2959
2960 brw_pop_insn_state(p);
2961
2962 EXPECT_EQ(inst[i].expected_result, validate(p));
2963
2964 clear_instructions(p);
2965 }
2966 }
2967
TEST_P(validation_test,add3_source_types)2968 TEST_P(validation_test, add3_source_types)
2969 {
2970 static const struct {
2971 enum brw_reg_type dst_type;
2972 enum brw_reg_type src0_type;
2973 enum brw_reg_type src1_type;
2974 enum brw_reg_type src2_type;
2975 bool expected_result;
2976 } inst[] = {
2977 #define INST(dst_type, src0_type, src1_type, src2_type, expected_result) \
2978 { \
2979 BRW_TYPE_##dst_type, \
2980 BRW_TYPE_##src0_type, \
2981 BRW_TYPE_##src1_type, \
2982 BRW_TYPE_##src2_type, \
2983 expected_result, \
2984 }
2985
2986 INST( F, F, F, F, false),
2987 INST(HF, HF, HF, HF, false),
2988 INST( B, B, B, B, false),
2989 INST(UB, UB, UB, UB, false),
2990
2991 INST( W, W, W, W, true),
2992 INST(UW, UW, UW, UW, true),
2993 INST( D, D, D, D, true),
2994 INST(UD, UD, UD, UD, true),
2995
2996 INST( W, D, W, W, true),
2997 INST(UW, UW, UD, UW, true),
2998 INST( D, D, W, D, true),
2999 INST(UD, UD, UD, UW, true),
3000 #undef INST
3001 };
3002
3003
3004 if (devinfo.verx10 < 125)
3005 return;
3006
3007 for (unsigned i = 0; i < ARRAY_SIZE(inst); i++) {
3008 brw_ADD3(p,
3009 retype(g0, inst[i].dst_type),
3010 retype(g0, inst[i].src0_type),
3011 retype(g0, inst[i].src1_type),
3012 retype(g0, inst[i].src2_type));
3013
3014 EXPECT_EQ(inst[i].expected_result, validate(p));
3015
3016 clear_instructions(p);
3017 }
3018 }
3019
TEST_P(validation_test,add3_immediate_types)3020 TEST_P(validation_test, add3_immediate_types)
3021 {
3022 static const struct {
3023 enum brw_reg_type reg_type;
3024 enum brw_reg_type imm_type;
3025 unsigned imm_src;
3026 bool expected_result;
3027 } inst[] = {
3028 #define INST(reg_type, imm_type, imm_src, expected_result) \
3029 { \
3030 BRW_TYPE_##reg_type, \
3031 BRW_TYPE_##imm_type, \
3032 imm_src, \
3033 expected_result, \
3034 }
3035
3036 INST( W, W, 0, true),
3037 INST( W, W, 2, true),
3038 INST(UW, UW, 0, true),
3039 INST(UW, UW, 2, true),
3040 INST( D, W, 0, true),
3041 INST(UD, W, 2, true),
3042 INST( D, UW, 0, true),
3043 INST(UW, UW, 2, true),
3044
3045 INST( W, D, 0, false),
3046 INST( W, D, 2, false),
3047 INST(UW, UD, 0, false),
3048 INST(UW, UD, 2, false),
3049 INST( D, D, 0, false),
3050 INST(UD, D, 2, false),
3051 INST( D, UD, 0, false),
3052 INST(UW, UD, 2, false),
3053 #undef INST
3054 };
3055
3056
3057 if (devinfo.verx10 < 125)
3058 return;
3059
3060 for (unsigned i = 0; i < ARRAY_SIZE(inst); i++) {
3061 brw_ADD3(p,
3062 retype(g0, inst[i].reg_type),
3063 inst[i].imm_src == 0 ? retype(brw_imm_d(0x1234), inst[i].imm_type)
3064 : retype(g0, inst[i].reg_type),
3065 retype(g0, inst[i].reg_type),
3066 inst[i].imm_src == 2 ? retype(brw_imm_d(0x2143), inst[i].imm_type)
3067 : retype(g0, inst[i].reg_type));
3068
3069 EXPECT_EQ(inst[i].expected_result, validate(p));
3070
3071 clear_instructions(p);
3072 }
3073 }
3074
TEST_P(validation_test,dpas_sdepth)3075 TEST_P(validation_test, dpas_sdepth)
3076 {
3077 if (devinfo.verx10 < 125)
3078 return;
3079
3080 static const enum gfx12_systolic_depth depth[] = {
3081 BRW_SYSTOLIC_DEPTH_16,
3082 BRW_SYSTOLIC_DEPTH_2,
3083 BRW_SYSTOLIC_DEPTH_4,
3084 BRW_SYSTOLIC_DEPTH_8,
3085 };
3086
3087 for (unsigned i = 0; i < ARRAY_SIZE(depth); i++) {
3088 brw_DPAS(p,
3089 depth[i],
3090 8,
3091 retype(brw_vec8_grf(0, 0), BRW_TYPE_F),
3092 null,
3093 retype(brw_vec8_grf(16, 0), BRW_TYPE_HF),
3094 retype(brw_vec8_grf(32, 0), BRW_TYPE_HF));
3095
3096 const bool expected_result = depth[i] == BRW_SYSTOLIC_DEPTH_8;
3097
3098 EXPECT_EQ(expected_result, validate(p)) <<
3099 "Encoded systolic depth value is: " << depth[i];
3100
3101 clear_instructions(p);
3102 }
3103 }
3104
TEST_P(validation_test,dpas_exec_size)3105 TEST_P(validation_test, dpas_exec_size)
3106 {
3107 if (devinfo.verx10 < 125)
3108 return;
3109
3110 static const enum brw_execution_size test_vectors[] = {
3111 BRW_EXECUTE_1,
3112 BRW_EXECUTE_2,
3113 BRW_EXECUTE_4,
3114 BRW_EXECUTE_8,
3115 BRW_EXECUTE_16,
3116 BRW_EXECUTE_32,
3117 };
3118
3119 for (unsigned i = 0; i < ARRAY_SIZE(test_vectors); i++) {
3120 brw_set_default_exec_size(p, test_vectors[i]);
3121
3122 brw_DPAS(p,
3123 BRW_SYSTOLIC_DEPTH_8,
3124 8,
3125 retype(brw_vec8_grf(0, 0), BRW_TYPE_F),
3126 null,
3127 retype(brw_vec8_grf(16, 0), BRW_TYPE_HF),
3128 retype(brw_vec8_grf(32, 0), BRW_TYPE_HF));
3129
3130 const bool expected_result = test_vectors[i] == BRW_EXECUTE_8;
3131
3132 EXPECT_EQ(expected_result, validate(p)) <<
3133 "Exec size = " << (1u << test_vectors[i]);
3134
3135 clear_instructions(p);
3136 }
3137
3138 brw_set_default_exec_size(p, BRW_EXECUTE_8);
3139 }
3140
TEST_P(validation_test,dpas_sub_byte_precision)3141 TEST_P(validation_test, dpas_sub_byte_precision)
3142 {
3143 if (devinfo.verx10 < 125)
3144 return;
3145
3146 static const struct {
3147 brw_reg_type dst_type;
3148 brw_reg_type src0_type;
3149 brw_reg_type src1_type;
3150 enum gfx12_sub_byte_precision src1_prec;
3151 brw_reg_type src2_type;
3152 enum gfx12_sub_byte_precision src2_prec;
3153 bool expected_result;
3154 } test_vectors[] = {
3155 {
3156 BRW_TYPE_F,
3157 BRW_TYPE_F,
3158 BRW_TYPE_HF, BRW_SUB_BYTE_PRECISION_NONE,
3159 BRW_TYPE_HF, BRW_SUB_BYTE_PRECISION_NONE,
3160 true,
3161 },
3162 {
3163 BRW_TYPE_F,
3164 BRW_TYPE_F,
3165 BRW_TYPE_HF, BRW_SUB_BYTE_PRECISION_NONE,
3166 BRW_TYPE_HF, BRW_SUB_BYTE_PRECISION_4BIT,
3167 false,
3168 },
3169 {
3170 BRW_TYPE_F,
3171 BRW_TYPE_F,
3172 BRW_TYPE_HF, BRW_SUB_BYTE_PRECISION_NONE,
3173 BRW_TYPE_HF, BRW_SUB_BYTE_PRECISION_2BIT,
3174 false,
3175 },
3176 {
3177 BRW_TYPE_F,
3178 BRW_TYPE_F,
3179 BRW_TYPE_HF, BRW_SUB_BYTE_PRECISION_4BIT,
3180 BRW_TYPE_HF, BRW_SUB_BYTE_PRECISION_NONE,
3181 false,
3182 },
3183 {
3184 BRW_TYPE_F,
3185 BRW_TYPE_F,
3186 BRW_TYPE_HF, BRW_SUB_BYTE_PRECISION_2BIT,
3187 BRW_TYPE_HF, BRW_SUB_BYTE_PRECISION_NONE,
3188 false,
3189 },
3190
3191 {
3192 BRW_TYPE_UD,
3193 BRW_TYPE_UD,
3194 BRW_TYPE_UB, BRW_SUB_BYTE_PRECISION_NONE,
3195 BRW_TYPE_UB, BRW_SUB_BYTE_PRECISION_NONE,
3196 true,
3197 },
3198 {
3199 BRW_TYPE_UD,
3200 BRW_TYPE_UD,
3201 BRW_TYPE_UB, BRW_SUB_BYTE_PRECISION_NONE,
3202 BRW_TYPE_UB, BRW_SUB_BYTE_PRECISION_4BIT,
3203 true,
3204 },
3205 {
3206 BRW_TYPE_UD,
3207 BRW_TYPE_UD,
3208 BRW_TYPE_UB, BRW_SUB_BYTE_PRECISION_NONE,
3209 BRW_TYPE_UB, BRW_SUB_BYTE_PRECISION_2BIT,
3210 true,
3211 },
3212 {
3213 BRW_TYPE_UD,
3214 BRW_TYPE_UD,
3215 BRW_TYPE_UB, BRW_SUB_BYTE_PRECISION_NONE,
3216 BRW_TYPE_UB, (enum gfx12_sub_byte_precision) 3,
3217 false,
3218 },
3219 {
3220 BRW_TYPE_UD,
3221 BRW_TYPE_UD,
3222 BRW_TYPE_UB, BRW_SUB_BYTE_PRECISION_4BIT,
3223 BRW_TYPE_UB, BRW_SUB_BYTE_PRECISION_NONE,
3224 true,
3225 },
3226 {
3227 BRW_TYPE_UD,
3228 BRW_TYPE_UD,
3229 BRW_TYPE_UB, BRW_SUB_BYTE_PRECISION_2BIT,
3230 BRW_TYPE_UB, BRW_SUB_BYTE_PRECISION_NONE,
3231 true,
3232 },
3233 {
3234 BRW_TYPE_UD,
3235 BRW_TYPE_UD,
3236 BRW_TYPE_UB, (enum gfx12_sub_byte_precision) 3,
3237 BRW_TYPE_UB, BRW_SUB_BYTE_PRECISION_NONE,
3238 false,
3239 },
3240 };
3241
3242 for (unsigned i = 0; i < ARRAY_SIZE(test_vectors); i++) {
3243 brw_inst *inst =
3244 brw_DPAS(p,
3245 BRW_SYSTOLIC_DEPTH_8,
3246 8,
3247 retype(brw_vec8_grf(0, 0), test_vectors[i].dst_type),
3248 retype(brw_vec8_grf(16, 0), test_vectors[i].src0_type),
3249 retype(brw_vec8_grf(32, 0), test_vectors[i].src1_type),
3250 retype(brw_vec8_grf(48, 0), test_vectors[i].src2_type));
3251
3252 brw_inst_set_dpas_3src_src1_subbyte(&devinfo, inst,
3253 test_vectors[i].src1_prec);
3254 brw_inst_set_dpas_3src_src2_subbyte(&devinfo, inst,
3255 test_vectors[i].src2_prec);
3256
3257 EXPECT_EQ(test_vectors[i].expected_result, validate(p)) <<
3258 "test vector index = " << i;
3259
3260 clear_instructions(p);
3261 }
3262 }
3263
TEST_P(validation_test,dpas_types)3264 TEST_P(validation_test, dpas_types)
3265 {
3266 if (devinfo.verx10 < 125)
3267 return;
3268
3269 #define TV(a, b, c, d, r) \
3270 { BRW_TYPE_ ## a, BRW_TYPE_ ## b, BRW_TYPE_ ## c, BRW_TYPE_ ## d, r }
3271
3272 static const struct {
3273 brw_reg_type dst_type;
3274 brw_reg_type src0_type;
3275 brw_reg_type src1_type;
3276 brw_reg_type src2_type;
3277 bool expected_result;
3278 } test_vectors[] = {
3279 TV( F, F, HF, HF, true),
3280 TV( F, HF, HF, HF, false),
3281 TV(HF, F, HF, HF, false),
3282 TV( F, F, F, HF, false),
3283 TV( F, F, HF, F, false),
3284
3285 TV(DF, DF, DF, DF, false),
3286 TV(DF, DF, DF, F, false),
3287 TV(DF, DF, F, DF, false),
3288 TV(DF, F, DF, DF, false),
3289 TV(DF, DF, DF, HF, false),
3290 TV(DF, DF, HF, DF, false),
3291 TV(DF, HF, DF, DF, false),
3292
3293 TV(UD, UD, UB, UB, true),
3294 TV(UD, UD, UB, UD, false),
3295 TV(UD, UD, UD, UB, false),
3296 TV(UD, UD, UB, UW, false),
3297 TV(UD, UD, UW, UB, false),
3298
3299 TV(UD, UB, UB, UB, false),
3300 TV(UD, UW, UB, UB, false),
3301
3302 TV(UQ, UQ, UB, UB, false),
3303 TV(UQ, UQ, UB, UQ, false),
3304 TV(UQ, UQ, UQ, UB, false),
3305 TV(UQ, UQ, UB, UW, false),
3306 TV(UQ, UQ, UW, UB, false),
3307
3308 TV( D, D, B, B, true),
3309 TV( D, D, B, UB, true),
3310 TV( D, D, UB, B, true),
3311 TV( D, UD, B, B, true),
3312
3313 TV( D, D, B, D, false),
3314 TV( D, D, D, B, false),
3315 TV( D, D, B, W, false),
3316 TV( D, D, W, B, false),
3317
3318 TV( D, B, B, B, false),
3319 TV( D, W, B, B, false),
3320
3321 TV( Q, Q, B, B, false),
3322 TV( Q, Q, B, Q, false),
3323 TV( Q, Q, Q, B, false),
3324 TV( Q, Q, B, W, false),
3325 TV( Q, Q, W, B, false),
3326
3327 TV(UD, UD, UB, B, false),
3328 TV(UD, UD, B, UB, false),
3329 TV(UD, D, UB, UB, false),
3330 };
3331
3332 #undef TV
3333
3334 for (unsigned i = 0; i < ARRAY_SIZE(test_vectors); i++) {
3335 brw_DPAS(p,
3336 BRW_SYSTOLIC_DEPTH_8,
3337 8,
3338 retype(brw_vec8_grf(0, 0), test_vectors[i].dst_type),
3339 retype(brw_vec8_grf(16, 0), test_vectors[i].src0_type),
3340 retype(brw_vec8_grf(32, 0), test_vectors[i].src1_type),
3341 retype(brw_vec8_grf(48, 0), test_vectors[i].src2_type));
3342
3343 EXPECT_EQ(test_vectors[i].expected_result, validate(p)) <<
3344 "test vector index = " << i;
3345
3346 clear_instructions(p);
3347 }
3348 }
3349
TEST_P(validation_test,dpas_src_subreg_nr)3350 TEST_P(validation_test, dpas_src_subreg_nr)
3351 {
3352 if (devinfo.verx10 < 125)
3353 return;
3354
3355 #define TV(dt, od, t0, o0, t1, o1, o2, r) \
3356 { BRW_TYPE_ ## dt, od, BRW_TYPE_ ## t0, o0, BRW_TYPE_ ## t1, o1, o2, r }
3357
3358 static const struct {
3359 brw_reg_type dst_type;
3360 unsigned dst_subnr;
3361 brw_reg_type src0_type;
3362 unsigned src0_subnr;
3363 brw_reg_type src1_src2_type;
3364 unsigned src1_subnr;
3365 unsigned src2_subnr;
3366 bool expected_result;
3367 } test_vectors[] = {
3368 TV( F, 0, F, 0, HF, 0, 0, true),
3369 TV( D, 0, D, 0, B, 0, 0, true),
3370 TV( D, 0, D, 0, UB, 0, 0, true),
3371 TV( D, 0, UD, 0, B, 0, 0, true),
3372
3373 TV( F, 1, F, 0, HF, 0, 0, false),
3374 TV( F, 2, F, 0, HF, 0, 0, false),
3375 TV( F, 3, F, 0, HF, 0, 0, false),
3376 TV( F, 4, F, 0, HF, 0, 0, false),
3377 TV( F, 5, F, 0, HF, 0, 0, false),
3378 TV( F, 6, F, 0, HF, 0, 0, false),
3379 TV( F, 7, F, 0, HF, 0, 0, false),
3380
3381 TV( F, 0, F, 1, HF, 0, 0, false),
3382 TV( F, 0, F, 2, HF, 0, 0, false),
3383 TV( F, 0, F, 3, HF, 0, 0, false),
3384 TV( F, 0, F, 4, HF, 0, 0, false),
3385 TV( F, 0, F, 5, HF, 0, 0, false),
3386 TV( F, 0, F, 6, HF, 0, 0, false),
3387 TV( F, 0, F, 7, HF, 0, 0, false),
3388
3389 TV( F, 0, F, 0, HF, 1, 0, false),
3390 TV( F, 0, F, 0, HF, 2, 0, false),
3391 TV( F, 0, F, 0, HF, 3, 0, false),
3392 TV( F, 0, F, 0, HF, 4, 0, false),
3393 TV( F, 0, F, 0, HF, 5, 0, false),
3394 TV( F, 0, F, 0, HF, 6, 0, false),
3395 TV( F, 0, F, 0, HF, 7, 0, false),
3396 TV( F, 0, F, 0, HF, 8, 0, false),
3397 TV( F, 0, F, 0, HF, 9, 0, false),
3398 TV( F, 0, F, 0, HF, 10, 0, false),
3399 TV( F, 0, F, 0, HF, 11, 0, false),
3400 TV( F, 0, F, 0, HF, 12, 0, false),
3401 TV( F, 0, F, 0, HF, 13, 0, false),
3402 TV( F, 0, F, 0, HF, 14, 0, false),
3403 TV( F, 0, F, 0, HF, 15, 0, false),
3404
3405 TV( F, 0, F, 0, HF, 0, 1, false),
3406 TV( F, 0, F, 0, HF, 0, 2, false),
3407 TV( F, 0, F, 0, HF, 0, 3, false),
3408 TV( F, 0, F, 0, HF, 0, 4, false),
3409 TV( F, 0, F, 0, HF, 0, 5, false),
3410 TV( F, 0, F, 0, HF, 0, 6, false),
3411 TV( F, 0, F, 0, HF, 0, 7, false),
3412 TV( F, 0, F, 0, HF, 0, 8, false),
3413 TV( F, 0, F, 0, HF, 0, 9, false),
3414 TV( F, 0, F, 0, HF, 0, 10, false),
3415 TV( F, 0, F, 0, HF, 0, 11, false),
3416 TV( F, 0, F, 0, HF, 0, 12, false),
3417 TV( F, 0, F, 0, HF, 0, 13, false),
3418 TV( F, 0, F, 0, HF, 0, 14, false),
3419 TV( F, 0, F, 0, HF, 0, 15, false),
3420
3421 /* These meet the requirements, but they specify a subnr that is part of
3422 * the next register. It is currently not possible to specify a subnr of
3423 * 32 for the B and UB values because brw_reg::subnr is only 5 bits.
3424 */
3425 TV( F, 16, F, 0, HF, 0, 0, false),
3426 TV( F, 0, F, 16, HF, 0, 0, false),
3427 TV( F, 0, F, 0, HF, 0, 16, false),
3428
3429 TV( D, 16, D, 0, B, 0, 0, false),
3430 TV( D, 0, D, 16, B, 0, 0, false),
3431 };
3432
3433 #undef TV
3434
3435 for (unsigned i = 0; i < ARRAY_SIZE(test_vectors); i++) {
3436 struct brw_reg dst =
3437 retype(brw_vec8_grf( 0, 0), test_vectors[i].dst_type);
3438 struct brw_reg src0 =
3439 retype(brw_vec8_grf(16, 0), test_vectors[i].src0_type);
3440 struct brw_reg src1 =
3441 retype(brw_vec8_grf(32, 0), test_vectors[i].src1_src2_type);
3442 struct brw_reg src2 =
3443 retype(brw_vec8_grf(48, 0), test_vectors[i].src1_src2_type);
3444
3445 /* subnr for DPAS is in units of datatype precision instead of bytes as
3446 * it is for every other instruction. Set the value by hand instead of
3447 * using byte_offset() or similar.
3448 */
3449 dst.subnr = test_vectors[i].dst_subnr;
3450 src0.subnr = test_vectors[i].src0_subnr;
3451 src1.subnr = test_vectors[i].src1_subnr;
3452 src2.subnr = test_vectors[i].src2_subnr;
3453
3454 brw_DPAS(p, BRW_SYSTOLIC_DEPTH_8, 8, dst, src0, src1, src2);
3455
3456 EXPECT_EQ(test_vectors[i].expected_result, validate(p)) <<
3457 "test vector index = " << i;
3458
3459 clear_instructions(p);
3460 }
3461 }
3462