1 /*
2 * Copyright © 2016 Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 */
23
24 #include <gtest/gtest.h>
25 #include "elk_disasm_info.h"
26 #include "elk_eu.h"
27 #include "elk_eu_defines.h"
28 #include "util/bitset.h"
29 #include "util/ralloc.h"
30
31 static const struct intel_gfx_info {
32 const char *name;
33 } gfx_names[] = {
34 { "brw", },
35 { "g4x", },
36 { "ilk", },
37 { "snb", },
38 { "ivb", },
39 { "hsw", },
40 { "byt", },
41 { "bdw", },
42 { "chv", },
43 };
44
45 class validation_test: public ::testing::TestWithParam<struct intel_gfx_info> {
46 virtual void SetUp();
47
48 public:
49 validation_test();
50 virtual ~validation_test();
51
52 struct elk_isa_info isa;
53 struct elk_codegen *p;
54 struct intel_device_info devinfo;
55 };
56
validation_test()57 validation_test::validation_test()
58 {
59 p = rzalloc(NULL, struct elk_codegen);
60 memset(&devinfo, 0, sizeof(devinfo));
61 }
62
~validation_test()63 validation_test::~validation_test()
64 {
65 ralloc_free(p);
66 }
67
SetUp()68 void validation_test::SetUp()
69 {
70 struct intel_gfx_info info = GetParam();
71 int devid = intel_device_name_to_pci_device_id(info.name);
72
73 intel_get_device_info_from_pci_id(devid, &devinfo);
74
75 elk_init_isa_info(&isa, &devinfo);
76
77 elk_init_codegen(&isa, p, p);
78 }
79
80 struct gfx_name {
81 template <class ParamType>
82 std::string
operator ()gfx_name83 operator()(const ::testing::TestParamInfo<ParamType>& info) const {
84 return info.param.name;
85 }
86 };
87
88 INSTANTIATE_TEST_SUITE_P(
89 eu_assembly, validation_test,
90 ::testing::ValuesIn(gfx_names),
91 gfx_name()
92 );
93
94 static bool
validate(struct elk_codegen * p)95 validate(struct elk_codegen *p)
96 {
97 const bool print = getenv("TEST_DEBUG");
98 struct elk_disasm_info *disasm = elk_disasm_initialize(p->isa, NULL);
99
100 if (print) {
101 elk_disasm_new_inst_group(disasm, 0);
102 elk_disasm_new_inst_group(disasm, p->next_insn_offset);
103 }
104
105 bool ret = elk_validate_instructions(p->isa, p->store, 0,
106 p->next_insn_offset, disasm);
107
108 if (print) {
109 elk_dump_assembly(p->store, 0, p->next_insn_offset, disasm, NULL);
110 }
111 ralloc_free(disasm);
112
113 return ret;
114 }
115
116 #define last_inst (&p->store[p->nr_insn - 1])
117 #define g0 elk_vec8_grf(0, 0)
118 #define acc0 elk_acc_reg(8)
119 #define null elk_null_reg()
120 #define zero elk_imm_f(0.0f)
121
122 static void
clear_instructions(struct elk_codegen * p)123 clear_instructions(struct elk_codegen *p)
124 {
125 p->next_insn_offset = 0;
126 p->nr_insn = 0;
127 }
128
TEST_P(validation_test,sanity)129 TEST_P(validation_test, sanity)
130 {
131 elk_ADD(p, g0, g0, g0);
132
133 EXPECT_TRUE(validate(p));
134 }
135
TEST_P(validation_test,src0_null_reg)136 TEST_P(validation_test, src0_null_reg)
137 {
138 elk_MOV(p, g0, null);
139
140 EXPECT_FALSE(validate(p));
141 }
142
TEST_P(validation_test,src1_null_reg)143 TEST_P(validation_test, src1_null_reg)
144 {
145 elk_ADD(p, g0, g0, null);
146
147 EXPECT_FALSE(validate(p));
148 }
149
TEST_P(validation_test,math_src0_null_reg)150 TEST_P(validation_test, math_src0_null_reg)
151 {
152 if (devinfo.ver >= 6) {
153 elk_gfx6_math(p, g0, ELK_MATH_FUNCTION_SIN, null, null);
154 } else {
155 elk_gfx4_math(p, g0, ELK_MATH_FUNCTION_SIN, 0, null, ELK_MATH_PRECISION_FULL);
156 }
157
158 EXPECT_FALSE(validate(p));
159 }
160
TEST_P(validation_test,math_src1_null_reg)161 TEST_P(validation_test, math_src1_null_reg)
162 {
163 if (devinfo.ver >= 6) {
164 elk_gfx6_math(p, g0, ELK_MATH_FUNCTION_POW, g0, null);
165 EXPECT_FALSE(validate(p));
166 } else {
167 /* Math instructions on Gfx4/5 are actually SEND messages with payloads.
168 * src1 is an immediate message descriptor set by elk_gfx4_math.
169 */
170 }
171 }
172
TEST_P(validation_test,opcode46)173 TEST_P(validation_test, opcode46)
174 {
175 /* opcode 46 is "push" on Gen 4 and 5
176 * "fork" on Gen 6
177 * reserved on Gen 7
178 * "goto" on Gfx8+
179 */
180 elk_next_insn(p, elk_opcode_decode(&isa, 46));
181
182 if (devinfo.ver == 7) {
183 EXPECT_FALSE(validate(p));
184 } else {
185 EXPECT_TRUE(validate(p));
186 }
187 }
188
TEST_P(validation_test,invalid_exec_size_encoding)189 TEST_P(validation_test, invalid_exec_size_encoding)
190 {
191 const struct {
192 enum elk_execution_size exec_size;
193 bool expected_result;
194 } test_case[] = {
195 { ELK_EXECUTE_1, true },
196 { ELK_EXECUTE_2, true },
197 { ELK_EXECUTE_4, true },
198 { ELK_EXECUTE_8, true },
199 { ELK_EXECUTE_16, true },
200 { ELK_EXECUTE_32, true },
201
202 { (enum elk_execution_size)((int)ELK_EXECUTE_32 + 1), false },
203 { (enum elk_execution_size)((int)ELK_EXECUTE_32 + 2), false },
204 };
205
206 for (unsigned i = 0; i < ARRAY_SIZE(test_case); i++) {
207 elk_MOV(p, g0, g0);
208
209 elk_inst_set_exec_size(&devinfo, last_inst, test_case[i].exec_size);
210 elk_inst_set_src0_file_type(&devinfo, last_inst, ELK_GENERAL_REGISTER_FILE, ELK_REGISTER_TYPE_W);
211 elk_inst_set_dst_file_type(&devinfo, last_inst, ELK_GENERAL_REGISTER_FILE, ELK_REGISTER_TYPE_W);
212
213 if (test_case[i].exec_size == ELK_EXECUTE_1) {
214 elk_inst_set_src0_vstride(&devinfo, last_inst, ELK_VERTICAL_STRIDE_0);
215 elk_inst_set_src0_width(&devinfo, last_inst, ELK_WIDTH_1);
216 elk_inst_set_src0_hstride(&devinfo, last_inst, ELK_HORIZONTAL_STRIDE_0);
217 } else {
218 elk_inst_set_src0_vstride(&devinfo, last_inst, ELK_VERTICAL_STRIDE_2);
219 elk_inst_set_src0_width(&devinfo, last_inst, ELK_WIDTH_2);
220 elk_inst_set_src0_hstride(&devinfo, last_inst, ELK_HORIZONTAL_STRIDE_1);
221 }
222
223 EXPECT_EQ(test_case[i].expected_result, validate(p));
224
225 clear_instructions(p);
226 }
227 }
228
TEST_P(validation_test,invalid_file_encoding)229 TEST_P(validation_test, invalid_file_encoding)
230 {
231 elk_MOV(p, g0, g0);
232 elk_inst_set_dst_file_type(&devinfo, last_inst, ELK_MESSAGE_REGISTER_FILE, ELK_REGISTER_TYPE_F);
233
234 if (devinfo.ver > 6) {
235 EXPECT_FALSE(validate(p));
236 } else {
237 EXPECT_TRUE(validate(p));
238 }
239
240 clear_instructions(p);
241
242 if (devinfo.ver < 6) {
243 elk_gfx4_math(p, g0, ELK_MATH_FUNCTION_SIN, 0, g0, ELK_MATH_PRECISION_FULL);
244 } else {
245 elk_gfx6_math(p, g0, ELK_MATH_FUNCTION_SIN, g0, null);
246 }
247 elk_inst_set_src0_file_type(&devinfo, last_inst, ELK_MESSAGE_REGISTER_FILE, ELK_REGISTER_TYPE_F);
248
249 if (devinfo.ver > 6) {
250 EXPECT_FALSE(validate(p));
251 } else {
252 EXPECT_TRUE(validate(p));
253 }
254 }
255
TEST_P(validation_test,invalid_type_encoding)256 TEST_P(validation_test, invalid_type_encoding)
257 {
258 enum elk_reg_file files[2] = {
259 ELK_GENERAL_REGISTER_FILE,
260 ELK_IMMEDIATE_VALUE,
261 };
262
263 for (unsigned i = 0; i < ARRAY_SIZE(files); i++) {
264 const enum elk_reg_file file = files[i];
265 const int num_bits = devinfo.ver >= 8 ? 4 : 3;
266 const int num_encodings = 1 << num_bits;
267
268 /* The data types are encoded into <num_bits> bits to be used in hardware
269 * instructions, so keep a record in a bitset the invalid patterns so
270 * they can be verified to be invalid when used.
271 */
272 BITSET_DECLARE(invalid_encodings, num_encodings);
273
274 const struct {
275 enum elk_reg_type type;
276 bool expected_result;
277 } test_case[] = {
278 { ELK_REGISTER_TYPE_NF, devinfo.ver == 11 && file != IMM },
279 { ELK_REGISTER_TYPE_DF, devinfo.has_64bit_float && (devinfo.ver >= 8 || file != IMM) },
280 { ELK_REGISTER_TYPE_F, true },
281 { ELK_REGISTER_TYPE_HF, devinfo.ver >= 8 },
282 { ELK_REGISTER_TYPE_VF, file == IMM },
283 { ELK_REGISTER_TYPE_Q, devinfo.has_64bit_int },
284 { ELK_REGISTER_TYPE_UQ, devinfo.has_64bit_int },
285 { ELK_REGISTER_TYPE_D, true },
286 { ELK_REGISTER_TYPE_UD, true },
287 { ELK_REGISTER_TYPE_W, true },
288 { ELK_REGISTER_TYPE_UW, true },
289 { ELK_REGISTER_TYPE_B, file == FIXED_GRF },
290 { ELK_REGISTER_TYPE_UB, file == FIXED_GRF },
291 { ELK_REGISTER_TYPE_V, file == IMM },
292 { ELK_REGISTER_TYPE_UV, devinfo.ver >= 6 && file == IMM },
293 };
294
295 /* Initially assume all hardware encodings are invalid */
296 BITSET_ONES(invalid_encodings);
297
298 elk_set_default_exec_size(p, ELK_EXECUTE_4);
299
300 for (unsigned i = 0; i < ARRAY_SIZE(test_case); i++) {
301 if (test_case[i].expected_result) {
302 unsigned hw_type = elk_reg_type_to_hw_type(&devinfo, file, test_case[i].type);
303 if (hw_type != INVALID_REG_TYPE) {
304 /* ... and remove valid encodings from the set */
305 assert(BITSET_TEST(invalid_encodings, hw_type));
306 BITSET_CLEAR(invalid_encodings, hw_type);
307 }
308
309 if (file == FIXED_GRF) {
310 struct elk_reg g = retype(g0, test_case[i].type);
311 elk_MOV(p, g, g);
312 elk_inst_set_src0_vstride(&devinfo, last_inst, ELK_VERTICAL_STRIDE_4);
313 elk_inst_set_src0_width(&devinfo, last_inst, ELK_WIDTH_4);
314 elk_inst_set_src0_hstride(&devinfo, last_inst, ELK_HORIZONTAL_STRIDE_1);
315 } else {
316 enum elk_reg_type t;
317
318 switch (test_case[i].type) {
319 case ELK_REGISTER_TYPE_V:
320 t = ELK_REGISTER_TYPE_W;
321 break;
322 case ELK_REGISTER_TYPE_UV:
323 t = ELK_REGISTER_TYPE_UW;
324 break;
325 case ELK_REGISTER_TYPE_VF:
326 t = ELK_REGISTER_TYPE_F;
327 break;
328 default:
329 t = test_case[i].type;
330 break;
331 }
332
333 struct elk_reg g = retype(g0, t);
334 elk_MOV(p, g, retype(elk_imm_w(0), test_case[i].type));
335 }
336
337 EXPECT_TRUE(validate(p));
338
339 clear_instructions(p);
340 }
341 }
342
343 /* The remaining encodings in invalid_encodings do not have a mapping
344 * from ELK_REGISTER_TYPE_* and must be invalid. Verify that invalid
345 * encodings are rejected by the validator.
346 */
347 int e;
348 BITSET_FOREACH_SET(e, invalid_encodings, num_encodings) {
349 if (file == FIXED_GRF) {
350 elk_MOV(p, g0, g0);
351 elk_inst_set_src0_vstride(&devinfo, last_inst, ELK_VERTICAL_STRIDE_4);
352 elk_inst_set_src0_width(&devinfo, last_inst, ELK_WIDTH_4);
353 elk_inst_set_src0_hstride(&devinfo, last_inst, ELK_HORIZONTAL_STRIDE_1);
354 } else {
355 elk_MOV(p, g0, elk_imm_w(0));
356 }
357 elk_inst_set_dst_reg_hw_type(&devinfo, last_inst, e);
358 elk_inst_set_src0_reg_hw_type(&devinfo, last_inst, e);
359
360 EXPECT_FALSE(validate(p));
361
362 clear_instructions(p);
363 }
364 }
365 }
366
TEST_P(validation_test,invalid_type_encoding_3src_a16)367 TEST_P(validation_test, invalid_type_encoding_3src_a16)
368 {
369 /* 3-src instructions in align16 mode only supported on Gfx6-10 */
370 if (devinfo.ver < 6)
371 return;
372
373 const int num_bits = devinfo.ver >= 8 ? 3 : 2;
374 const int num_encodings = 1 << num_bits;
375
376 /* The data types are encoded into <num_bits> bits to be used in hardware
377 * instructions, so keep a record in a bitset the invalid patterns so
378 * they can be verified to be invalid when used.
379 */
380 BITSET_DECLARE(invalid_encodings, num_encodings);
381
382 const struct {
383 enum elk_reg_type type;
384 bool expected_result;
385 } test_case[] = {
386 { ELK_REGISTER_TYPE_DF, devinfo.ver >= 7 },
387 { ELK_REGISTER_TYPE_F, true },
388 { ELK_REGISTER_TYPE_HF, devinfo.ver >= 8 },
389 { ELK_REGISTER_TYPE_D, devinfo.ver >= 7 },
390 { ELK_REGISTER_TYPE_UD, devinfo.ver >= 7 },
391 };
392
393 /* Initially assume all hardware encodings are invalid */
394 BITSET_ONES(invalid_encodings);
395
396 elk_set_default_access_mode(p, ELK_ALIGN_16);
397 elk_set_default_exec_size(p, ELK_EXECUTE_4);
398
399 for (unsigned i = 0; i < ARRAY_SIZE(test_case); i++) {
400 if (test_case[i].expected_result) {
401 unsigned hw_type = elk_reg_type_to_a16_hw_3src_type(&devinfo, test_case[i].type);
402 if (hw_type != INVALID_HW_REG_TYPE) {
403 /* ... and remove valid encodings from the set */
404 assert(BITSET_TEST(invalid_encodings, hw_type));
405 BITSET_CLEAR(invalid_encodings, hw_type);
406 }
407
408 struct elk_reg g = retype(g0, test_case[i].type);
409 if (!elk_reg_type_is_integer(test_case[i].type)) {
410 elk_MAD(p, g, g, g, g);
411 } else {
412 elk_BFE(p, g, g, g, g);
413 }
414
415 EXPECT_TRUE(validate(p));
416
417 clear_instructions(p);
418 }
419 }
420
421 /* The remaining encodings in invalid_encodings do not have a mapping
422 * from ELK_REGISTER_TYPE_* and must be invalid. Verify that invalid
423 * encodings are rejected by the validator.
424 */
425 int e;
426 BITSET_FOREACH_SET(e, invalid_encodings, num_encodings) {
427 for (unsigned i = 0; i < 2; i++) {
428 if (i == 0) {
429 elk_MAD(p, g0, g0, g0, g0);
430 } else {
431 elk_BFE(p, g0, g0, g0, g0);
432 }
433
434 elk_inst_set_3src_a16_dst_hw_type(&devinfo, last_inst, e);
435 elk_inst_set_3src_a16_src_hw_type(&devinfo, last_inst, e);
436
437 EXPECT_FALSE(validate(p));
438
439 clear_instructions(p);
440
441 if (devinfo.ver == 6)
442 break;
443 }
444 }
445 }
446
447 TEST_P(validation_test, 3src_inst_access_mode)
448 {
449 /* 3-src instructions only supported on Gfx6+ */
450 if (devinfo.ver < 6)
451 return;
452
453 const struct {
454 unsigned mode;
455 bool expected_result;
456 } test_case[] = {
457 { ELK_ALIGN_1, false},
458 { ELK_ALIGN_16, true },
459 };
460
461 for (unsigned i = 0; i < ARRAY_SIZE(test_case); i++) {
462 elk_set_default_access_mode(p, ELK_ALIGN_16);
463
464 elk_MAD(p, g0, g0, g0, g0);
465 elk_inst_set_access_mode(&devinfo, last_inst, test_case[i].mode);
466
467 EXPECT_EQ(test_case[i].expected_result, validate(p));
468
469 clear_instructions(p);
470 }
471 }
472
473 /* When the Execution Data Type is wider than the destination data type, the
474 * destination must [...] specify a HorzStride equal to the ratio in sizes of
475 * the two data types.
476 */
TEST_P(validation_test,dest_stride_must_be_equal_to_the_ratio_of_exec_size_to_dest_size)477 TEST_P(validation_test, dest_stride_must_be_equal_to_the_ratio_of_exec_size_to_dest_size)
478 {
479 elk_ADD(p, g0, g0, g0);
480 elk_inst_set_dst_file_type(&devinfo, last_inst, ELK_GENERAL_REGISTER_FILE, ELK_REGISTER_TYPE_W);
481 elk_inst_set_src0_file_type(&devinfo, last_inst, ELK_GENERAL_REGISTER_FILE, ELK_REGISTER_TYPE_D);
482 elk_inst_set_src1_file_type(&devinfo, last_inst, ELK_GENERAL_REGISTER_FILE, ELK_REGISTER_TYPE_D);
483
484 EXPECT_FALSE(validate(p));
485
486 clear_instructions(p);
487
488 elk_ADD(p, g0, g0, g0);
489 elk_inst_set_dst_file_type(&devinfo, last_inst, ELK_GENERAL_REGISTER_FILE, ELK_REGISTER_TYPE_W);
490 elk_inst_set_dst_hstride(&devinfo, last_inst, ELK_HORIZONTAL_STRIDE_2);
491 elk_inst_set_src0_file_type(&devinfo, last_inst, ELK_GENERAL_REGISTER_FILE, ELK_REGISTER_TYPE_D);
492 elk_inst_set_src1_file_type(&devinfo, last_inst, ELK_GENERAL_REGISTER_FILE, ELK_REGISTER_TYPE_D);
493
494 EXPECT_TRUE(validate(p));
495 }
496
497 /* When the Execution Data Type is wider than the destination data type, the
498 * destination must be aligned as required by the wider execution data type
499 * [...]
500 */
TEST_P(validation_test,dst_subreg_must_be_aligned_to_exec_type_size)501 TEST_P(validation_test, dst_subreg_must_be_aligned_to_exec_type_size)
502 {
503 elk_ADD(p, g0, g0, g0);
504 elk_inst_set_dst_da1_subreg_nr(&devinfo, last_inst, 2);
505 elk_inst_set_dst_hstride(&devinfo, last_inst, ELK_HORIZONTAL_STRIDE_2);
506 elk_inst_set_dst_file_type(&devinfo, last_inst, ELK_GENERAL_REGISTER_FILE, ELK_REGISTER_TYPE_W);
507 elk_inst_set_src0_file_type(&devinfo, last_inst, ELK_GENERAL_REGISTER_FILE, ELK_REGISTER_TYPE_D);
508 elk_inst_set_src1_file_type(&devinfo, last_inst, ELK_GENERAL_REGISTER_FILE, ELK_REGISTER_TYPE_D);
509
510 EXPECT_FALSE(validate(p));
511
512 clear_instructions(p);
513
514 elk_ADD(p, g0, g0, g0);
515 elk_inst_set_exec_size(&devinfo, last_inst, ELK_EXECUTE_4);
516 elk_inst_set_dst_da1_subreg_nr(&devinfo, last_inst, 8);
517 elk_inst_set_dst_hstride(&devinfo, last_inst, ELK_HORIZONTAL_STRIDE_2);
518 elk_inst_set_dst_file_type(&devinfo, last_inst, ELK_GENERAL_REGISTER_FILE, ELK_REGISTER_TYPE_W);
519 elk_inst_set_src0_file_type(&devinfo, last_inst, ELK_GENERAL_REGISTER_FILE, ELK_REGISTER_TYPE_D);
520 elk_inst_set_src0_vstride(&devinfo, last_inst, ELK_VERTICAL_STRIDE_4);
521 elk_inst_set_src0_width(&devinfo, last_inst, ELK_WIDTH_4);
522 elk_inst_set_src0_hstride(&devinfo, last_inst, ELK_HORIZONTAL_STRIDE_1);
523 elk_inst_set_src1_file_type(&devinfo, last_inst, ELK_GENERAL_REGISTER_FILE, ELK_REGISTER_TYPE_D);
524 elk_inst_set_src1_vstride(&devinfo, last_inst, ELK_VERTICAL_STRIDE_4);
525 elk_inst_set_src1_width(&devinfo, last_inst, ELK_WIDTH_4);
526 elk_inst_set_src1_hstride(&devinfo, last_inst, ELK_HORIZONTAL_STRIDE_1);
527
528 EXPECT_TRUE(validate(p));
529 }
530
531 /* ExecSize must be greater than or equal to Width. */
TEST_P(validation_test,exec_size_less_than_width)532 TEST_P(validation_test, exec_size_less_than_width)
533 {
534 elk_ADD(p, g0, g0, g0);
535 elk_inst_set_src0_width(&devinfo, last_inst, ELK_WIDTH_16);
536
537 EXPECT_FALSE(validate(p));
538
539 clear_instructions(p);
540
541 elk_ADD(p, g0, g0, g0);
542 elk_inst_set_src1_width(&devinfo, last_inst, ELK_WIDTH_16);
543
544 EXPECT_FALSE(validate(p));
545 }
546
547 /* If ExecSize = Width and HorzStride ≠ 0,
548 * VertStride must be set to Width * HorzStride.
549 */
TEST_P(validation_test,vertical_stride_is_width_by_horizontal_stride)550 TEST_P(validation_test, vertical_stride_is_width_by_horizontal_stride)
551 {
552 elk_ADD(p, g0, g0, g0);
553 elk_inst_set_src0_vstride(&devinfo, last_inst, ELK_VERTICAL_STRIDE_4);
554
555 EXPECT_FALSE(validate(p));
556
557 clear_instructions(p);
558
559 elk_ADD(p, g0, g0, g0);
560 elk_inst_set_src1_vstride(&devinfo, last_inst, ELK_VERTICAL_STRIDE_4);
561
562 EXPECT_FALSE(validate(p));
563 }
564
565 /* If Width = 1, HorzStride must be 0 regardless of the values
566 * of ExecSize and VertStride.
567 */
TEST_P(validation_test,horizontal_stride_must_be_0_if_width_is_1)568 TEST_P(validation_test, horizontal_stride_must_be_0_if_width_is_1)
569 {
570 elk_ADD(p, g0, g0, g0);
571 elk_inst_set_src0_vstride(&devinfo, last_inst, ELK_VERTICAL_STRIDE_0);
572 elk_inst_set_src0_width(&devinfo, last_inst, ELK_WIDTH_1);
573 elk_inst_set_src0_hstride(&devinfo, last_inst, ELK_HORIZONTAL_STRIDE_1);
574
575 EXPECT_FALSE(validate(p));
576
577 clear_instructions(p);
578
579 elk_ADD(p, g0, g0, g0);
580 elk_inst_set_src1_vstride(&devinfo, last_inst, ELK_VERTICAL_STRIDE_0);
581 elk_inst_set_src1_width(&devinfo, last_inst, ELK_WIDTH_1);
582 elk_inst_set_src1_hstride(&devinfo, last_inst, ELK_HORIZONTAL_STRIDE_1);
583
584 EXPECT_FALSE(validate(p));
585 }
586
587 /* If ExecSize = Width = 1, both VertStride and HorzStride must be 0. */
TEST_P(validation_test,scalar_region_must_be_0_1_0)588 TEST_P(validation_test, scalar_region_must_be_0_1_0)
589 {
590 struct elk_reg g0_0 = elk_vec1_grf(0, 0);
591
592 elk_ADD(p, g0, g0, g0_0);
593 elk_inst_set_exec_size(&devinfo, last_inst, ELK_EXECUTE_1);
594 elk_inst_set_src0_vstride(&devinfo, last_inst, ELK_VERTICAL_STRIDE_1);
595 elk_inst_set_src0_width(&devinfo, last_inst, ELK_WIDTH_1);
596 elk_inst_set_src0_hstride(&devinfo, last_inst, ELK_HORIZONTAL_STRIDE_0);
597
598 EXPECT_FALSE(validate(p));
599
600 clear_instructions(p);
601
602 elk_ADD(p, g0, g0_0, g0);
603 elk_inst_set_exec_size(&devinfo, last_inst, ELK_EXECUTE_1);
604 elk_inst_set_src1_vstride(&devinfo, last_inst, ELK_VERTICAL_STRIDE_1);
605 elk_inst_set_src1_width(&devinfo, last_inst, ELK_WIDTH_1);
606 elk_inst_set_src1_hstride(&devinfo, last_inst, ELK_HORIZONTAL_STRIDE_0);
607
608 EXPECT_FALSE(validate(p));
609 }
610
611 /* If VertStride = HorzStride = 0, Width must be 1 regardless of the value
612 * of ExecSize.
613 */
TEST_P(validation_test,zero_stride_implies_0_1_0)614 TEST_P(validation_test, zero_stride_implies_0_1_0)
615 {
616 elk_ADD(p, g0, g0, g0);
617 elk_inst_set_src0_vstride(&devinfo, last_inst, ELK_VERTICAL_STRIDE_0);
618 elk_inst_set_src0_width(&devinfo, last_inst, ELK_WIDTH_2);
619 elk_inst_set_src0_hstride(&devinfo, last_inst, ELK_HORIZONTAL_STRIDE_0);
620
621 EXPECT_FALSE(validate(p));
622
623 clear_instructions(p);
624
625 elk_ADD(p, g0, g0, g0);
626 elk_inst_set_src1_vstride(&devinfo, last_inst, ELK_VERTICAL_STRIDE_0);
627 elk_inst_set_src1_width(&devinfo, last_inst, ELK_WIDTH_2);
628 elk_inst_set_src1_hstride(&devinfo, last_inst, ELK_HORIZONTAL_STRIDE_0);
629
630 EXPECT_FALSE(validate(p));
631 }
632
633 /* Dst.HorzStride must not be 0. */
TEST_P(validation_test,dst_horizontal_stride_0)634 TEST_P(validation_test, dst_horizontal_stride_0)
635 {
636 elk_ADD(p, g0, g0, g0);
637 elk_inst_set_dst_hstride(&devinfo, last_inst, ELK_HORIZONTAL_STRIDE_0);
638
639 EXPECT_FALSE(validate(p));
640
641 clear_instructions(p);
642
643 elk_set_default_access_mode(p, ELK_ALIGN_16);
644
645 elk_ADD(p, g0, g0, g0);
646 elk_inst_set_dst_hstride(&devinfo, last_inst, ELK_HORIZONTAL_STRIDE_0);
647
648 EXPECT_FALSE(validate(p));
649 }
650
651 /* VertStride must be used to cross ELK_GENERAL_REGISTER_FILE register boundaries. This rule implies
652 * that elements within a 'Width' cannot cross ELK_GENERAL_REGISTER_FILE boundaries.
653 */
TEST_P(validation_test,must_not_cross_grf_boundary_in_a_width)654 TEST_P(validation_test, must_not_cross_grf_boundary_in_a_width)
655 {
656 elk_ADD(p, g0, g0, g0);
657 elk_inst_set_src0_da1_subreg_nr(&devinfo, last_inst, 4);
658
659 EXPECT_FALSE(validate(p));
660
661 clear_instructions(p);
662
663 elk_ADD(p, g0, g0, g0);
664 elk_inst_set_src1_da1_subreg_nr(&devinfo, last_inst, 4);
665
666 EXPECT_FALSE(validate(p));
667
668 clear_instructions(p);
669
670 elk_ADD(p, g0, g0, g0);
671 elk_inst_set_src0_vstride(&devinfo, last_inst, ELK_VERTICAL_STRIDE_4);
672 elk_inst_set_src0_width(&devinfo, last_inst, ELK_WIDTH_4);
673 elk_inst_set_src0_hstride(&devinfo, last_inst, ELK_HORIZONTAL_STRIDE_2);
674
675 EXPECT_FALSE(validate(p));
676
677 clear_instructions(p);
678
679 elk_ADD(p, g0, g0, g0);
680 elk_inst_set_src1_vstride(&devinfo, last_inst, ELK_VERTICAL_STRIDE_4);
681 elk_inst_set_src1_width(&devinfo, last_inst, ELK_WIDTH_4);
682 elk_inst_set_src1_hstride(&devinfo, last_inst, ELK_HORIZONTAL_STRIDE_2);
683
684 EXPECT_FALSE(validate(p));
685 }
686
687 /* Destination Horizontal must be 1 in Align16 */
TEST_P(validation_test,dst_hstride_on_align16_must_be_1)688 TEST_P(validation_test, dst_hstride_on_align16_must_be_1)
689 {
690 elk_set_default_access_mode(p, ELK_ALIGN_16);
691
692 elk_ADD(p, g0, g0, g0);
693 elk_inst_set_dst_hstride(&devinfo, last_inst, ELK_HORIZONTAL_STRIDE_2);
694
695 EXPECT_FALSE(validate(p));
696
697 clear_instructions(p);
698
699 elk_ADD(p, g0, g0, g0);
700 elk_inst_set_dst_hstride(&devinfo, last_inst, ELK_HORIZONTAL_STRIDE_1);
701
702 EXPECT_TRUE(validate(p));
703 }
704
705 /* VertStride must be 0 or 4 in Align16 */
TEST_P(validation_test,vstride_on_align16_must_be_0_or_4)706 TEST_P(validation_test, vstride_on_align16_must_be_0_or_4)
707 {
708 const struct {
709 enum elk_vertical_stride vstride;
710 bool expected_result;
711 } vstride[] = {
712 { ELK_VERTICAL_STRIDE_0, true },
713 { ELK_VERTICAL_STRIDE_1, false },
714 { ELK_VERTICAL_STRIDE_2, devinfo.verx10 >= 75 },
715 { ELK_VERTICAL_STRIDE_4, true },
716 { ELK_VERTICAL_STRIDE_8, false },
717 { ELK_VERTICAL_STRIDE_16, false },
718 { ELK_VERTICAL_STRIDE_32, false },
719 { ELK_VERTICAL_STRIDE_ONE_DIMENSIONAL, false },
720 };
721
722 elk_set_default_access_mode(p, ELK_ALIGN_16);
723
724 for (unsigned i = 0; i < ARRAY_SIZE(vstride); i++) {
725 elk_ADD(p, g0, g0, g0);
726 elk_inst_set_src0_vstride(&devinfo, last_inst, vstride[i].vstride);
727
728 EXPECT_EQ(vstride[i].expected_result, validate(p));
729
730 clear_instructions(p);
731 }
732
733 for (unsigned i = 0; i < ARRAY_SIZE(vstride); i++) {
734 elk_ADD(p, g0, g0, g0);
735 elk_inst_set_src1_vstride(&devinfo, last_inst, vstride[i].vstride);
736
737 EXPECT_EQ(vstride[i].expected_result, validate(p));
738
739 clear_instructions(p);
740 }
741 }
742
743 /* In Direct Addressing mode, a source cannot span more than 2 adjacent ELK_GENERAL_REGISTER_FILE
744 * registers.
745 */
TEST_P(validation_test,source_cannot_span_more_than_2_registers)746 TEST_P(validation_test, source_cannot_span_more_than_2_registers)
747 {
748 elk_ADD(p, g0, g0, g0);
749 elk_inst_set_exec_size(&devinfo, last_inst, ELK_EXECUTE_32);
750 elk_inst_set_dst_file_type(&devinfo, last_inst, ELK_GENERAL_REGISTER_FILE, ELK_REGISTER_TYPE_W);
751 elk_inst_set_src0_file_type(&devinfo, last_inst, ELK_GENERAL_REGISTER_FILE, ELK_REGISTER_TYPE_W);
752 elk_inst_set_src1_file_type(&devinfo, last_inst, ELK_GENERAL_REGISTER_FILE, ELK_REGISTER_TYPE_W);
753 elk_inst_set_src1_vstride(&devinfo, last_inst, ELK_VERTICAL_STRIDE_16);
754 elk_inst_set_src1_width(&devinfo, last_inst, ELK_WIDTH_8);
755 elk_inst_set_src1_hstride(&devinfo, last_inst, ELK_HORIZONTAL_STRIDE_2);
756
757 EXPECT_FALSE(validate(p));
758
759 clear_instructions(p);
760
761 elk_ADD(p, g0, g0, g0);
762 elk_inst_set_exec_size(&devinfo, last_inst, ELK_EXECUTE_16);
763 elk_inst_set_dst_file_type(&devinfo, last_inst, ELK_GENERAL_REGISTER_FILE, ELK_REGISTER_TYPE_W);
764 elk_inst_set_src0_file_type(&devinfo, last_inst, ELK_GENERAL_REGISTER_FILE, ELK_REGISTER_TYPE_W);
765 elk_inst_set_src1_file_type(&devinfo, last_inst, ELK_GENERAL_REGISTER_FILE, ELK_REGISTER_TYPE_W);
766 elk_inst_set_src1_vstride(&devinfo, last_inst, ELK_VERTICAL_STRIDE_16);
767 elk_inst_set_src1_width(&devinfo, last_inst, ELK_WIDTH_8);
768 elk_inst_set_src1_hstride(&devinfo, last_inst, ELK_HORIZONTAL_STRIDE_2);
769 elk_inst_set_src1_da1_subreg_nr(&devinfo, last_inst, 2);
770
771 EXPECT_TRUE(validate(p));
772
773 clear_instructions(p);
774
775 elk_ADD(p, g0, g0, g0);
776 elk_inst_set_exec_size(&devinfo, last_inst, ELK_EXECUTE_16);
777
778 EXPECT_TRUE(validate(p));
779 }
780
781 /* A destination cannot span more than 2 adjacent ELK_GENERAL_REGISTER_FILE registers. */
TEST_P(validation_test,destination_cannot_span_more_than_2_registers)782 TEST_P(validation_test, destination_cannot_span_more_than_2_registers)
783 {
784 elk_ADD(p, g0, g0, g0);
785 elk_inst_set_exec_size(&devinfo, last_inst, ELK_EXECUTE_32);
786 elk_inst_set_dst_hstride(&devinfo, last_inst, ELK_HORIZONTAL_STRIDE_2);
787 elk_inst_set_dst_file_type(&devinfo, last_inst, ELK_GENERAL_REGISTER_FILE, ELK_REGISTER_TYPE_W);
788 elk_inst_set_src0_file_type(&devinfo, last_inst, ELK_GENERAL_REGISTER_FILE, ELK_REGISTER_TYPE_W);
789 elk_inst_set_src1_file_type(&devinfo, last_inst, ELK_GENERAL_REGISTER_FILE, ELK_REGISTER_TYPE_W);
790
791 EXPECT_FALSE(validate(p));
792
793 clear_instructions(p);
794
795 elk_ADD(p, g0, g0, g0);
796 elk_inst_set_exec_size(&devinfo, last_inst, ELK_EXECUTE_8);
797 elk_inst_set_dst_da1_subreg_nr(&devinfo, last_inst, 6);
798 elk_inst_set_dst_hstride(&devinfo, last_inst, ELK_HORIZONTAL_STRIDE_4);
799 elk_inst_set_dst_file_type(&devinfo, last_inst, ELK_GENERAL_REGISTER_FILE, ELK_REGISTER_TYPE_W);
800 elk_inst_set_src0_file_type(&devinfo, last_inst, ELK_GENERAL_REGISTER_FILE, ELK_REGISTER_TYPE_W);
801 elk_inst_set_src0_vstride(&devinfo, last_inst, ELK_VERTICAL_STRIDE_16);
802 elk_inst_set_src0_width(&devinfo, last_inst, ELK_WIDTH_4);
803 elk_inst_set_src0_hstride(&devinfo, last_inst, ELK_HORIZONTAL_STRIDE_1);
804 elk_inst_set_src1_file_type(&devinfo, last_inst, ELK_GENERAL_REGISTER_FILE, ELK_REGISTER_TYPE_W);
805 elk_inst_set_src1_vstride(&devinfo, last_inst, ELK_VERTICAL_STRIDE_16);
806 elk_inst_set_src1_width(&devinfo, last_inst, ELK_WIDTH_4);
807 elk_inst_set_src1_hstride(&devinfo, last_inst, ELK_HORIZONTAL_STRIDE_1);
808
809 EXPECT_TRUE(validate(p));
810 }
811
TEST_P(validation_test,src_region_spans_two_regs_dst_region_spans_one)812 TEST_P(validation_test, src_region_spans_two_regs_dst_region_spans_one)
813 {
814 /* Writes to dest are to the lower OWord */
815 elk_ADD(p, g0, g0, g0);
816 elk_inst_set_dst_file_type(&devinfo, last_inst, ELK_GENERAL_REGISTER_FILE, ELK_REGISTER_TYPE_W);
817 elk_inst_set_src0_file_type(&devinfo, last_inst, ELK_GENERAL_REGISTER_FILE, ELK_REGISTER_TYPE_W);
818 elk_inst_set_src1_file_type(&devinfo, last_inst, ELK_GENERAL_REGISTER_FILE, ELK_REGISTER_TYPE_W);
819 elk_inst_set_src1_vstride(&devinfo, last_inst, ELK_VERTICAL_STRIDE_16);
820 elk_inst_set_src1_width(&devinfo, last_inst, ELK_WIDTH_4);
821 elk_inst_set_src1_hstride(&devinfo, last_inst, ELK_HORIZONTAL_STRIDE_2);
822
823 EXPECT_TRUE(validate(p));
824
825 clear_instructions(p);
826
827 /* Writes to dest are to the upper OWord */
828 elk_ADD(p, g0, g0, g0);
829 elk_inst_set_dst_da1_subreg_nr(&devinfo, last_inst, 16);
830 elk_inst_set_dst_file_type(&devinfo, last_inst, ELK_GENERAL_REGISTER_FILE, ELK_REGISTER_TYPE_W);
831 elk_inst_set_src0_file_type(&devinfo, last_inst, ELK_GENERAL_REGISTER_FILE, ELK_REGISTER_TYPE_W);
832 elk_inst_set_src1_file_type(&devinfo, last_inst, ELK_GENERAL_REGISTER_FILE, ELK_REGISTER_TYPE_W);
833 elk_inst_set_src1_vstride(&devinfo, last_inst, ELK_VERTICAL_STRIDE_16);
834 elk_inst_set_src1_width(&devinfo, last_inst, ELK_WIDTH_4);
835 elk_inst_set_src1_hstride(&devinfo, last_inst, ELK_HORIZONTAL_STRIDE_2);
836
837 EXPECT_TRUE(validate(p));
838
839 clear_instructions(p);
840
841 /* Writes to dest are evenly split between OWords */
842 elk_ADD(p, g0, g0, g0);
843 elk_inst_set_exec_size(&devinfo, last_inst, ELK_EXECUTE_16);
844 elk_inst_set_dst_file_type(&devinfo, last_inst, ELK_GENERAL_REGISTER_FILE, ELK_REGISTER_TYPE_W);
845 elk_inst_set_src0_file_type(&devinfo, last_inst, ELK_GENERAL_REGISTER_FILE, ELK_REGISTER_TYPE_W);
846 elk_inst_set_src1_file_type(&devinfo, last_inst, ELK_GENERAL_REGISTER_FILE, ELK_REGISTER_TYPE_W);
847 elk_inst_set_src1_vstride(&devinfo, last_inst, ELK_VERTICAL_STRIDE_16);
848 elk_inst_set_src1_width(&devinfo, last_inst, ELK_WIDTH_8);
849 elk_inst_set_src1_hstride(&devinfo, last_inst, ELK_HORIZONTAL_STRIDE_2);
850
851 EXPECT_TRUE(validate(p));
852
853 clear_instructions(p);
854
855 /* Writes to dest are uneven between OWords */
856 elk_ADD(p, g0, g0, g0);
857 elk_inst_set_exec_size(&devinfo, last_inst, ELK_EXECUTE_4);
858 elk_inst_set_dst_da1_subreg_nr(&devinfo, last_inst, 10);
859 elk_inst_set_dst_file_type(&devinfo, last_inst, ELK_GENERAL_REGISTER_FILE, ELK_REGISTER_TYPE_W);
860 elk_inst_set_src0_file_type(&devinfo, last_inst, ELK_GENERAL_REGISTER_FILE, ELK_REGISTER_TYPE_W);
861 elk_inst_set_src0_vstride(&devinfo, last_inst, ELK_VERTICAL_STRIDE_4);
862 elk_inst_set_src0_width(&devinfo, last_inst, ELK_WIDTH_4);
863 elk_inst_set_src0_hstride(&devinfo, last_inst, ELK_HORIZONTAL_STRIDE_1);
864 elk_inst_set_src1_file_type(&devinfo, last_inst, ELK_GENERAL_REGISTER_FILE, ELK_REGISTER_TYPE_W);
865 elk_inst_set_src1_vstride(&devinfo, last_inst, ELK_VERTICAL_STRIDE_16);
866 elk_inst_set_src1_width(&devinfo, last_inst, ELK_WIDTH_2);
867 elk_inst_set_src1_hstride(&devinfo, last_inst, ELK_HORIZONTAL_STRIDE_1);
868
869 EXPECT_FALSE(validate(p));
870 }
871
TEST_P(validation_test,dst_elements_must_be_evenly_split_between_registers)872 TEST_P(validation_test, dst_elements_must_be_evenly_split_between_registers)
873 {
874 elk_ADD(p, g0, g0, g0);
875 elk_inst_set_dst_da1_subreg_nr(&devinfo, last_inst, 4);
876
877 EXPECT_FALSE(validate(p));
878
879 clear_instructions(p);
880
881 elk_ADD(p, g0, g0, g0);
882 elk_inst_set_exec_size(&devinfo, last_inst, ELK_EXECUTE_16);
883
884 EXPECT_TRUE(validate(p));
885
886 clear_instructions(p);
887
888 if (devinfo.ver >= 6) {
889 elk_gfx6_math(p, g0, ELK_MATH_FUNCTION_SIN, g0, null);
890
891 EXPECT_TRUE(validate(p));
892
893 clear_instructions(p);
894
895 elk_gfx6_math(p, g0, ELK_MATH_FUNCTION_SIN, g0, null);
896 elk_inst_set_dst_da1_subreg_nr(&devinfo, last_inst, 4);
897
898 EXPECT_FALSE(validate(p));
899 }
900 }
901
TEST_P(validation_test,two_src_two_dst_source_offsets_must_be_same)902 TEST_P(validation_test, two_src_two_dst_source_offsets_must_be_same)
903 {
904 elk_ADD(p, g0, g0, g0);
905 elk_inst_set_exec_size(&devinfo, last_inst, ELK_EXECUTE_4);
906 elk_inst_set_dst_hstride(&devinfo, last_inst, ELK_HORIZONTAL_STRIDE_4);
907 elk_inst_set_src0_da1_subreg_nr(&devinfo, last_inst, 16);
908 elk_inst_set_src0_vstride(&devinfo, last_inst, ELK_VERTICAL_STRIDE_2);
909 elk_inst_set_src0_width(&devinfo, last_inst, ELK_WIDTH_1);
910 elk_inst_set_src0_hstride(&devinfo, last_inst, ELK_HORIZONTAL_STRIDE_0);
911 elk_inst_set_src1_vstride(&devinfo, last_inst, ELK_VERTICAL_STRIDE_4);
912 elk_inst_set_src1_width(&devinfo, last_inst, ELK_WIDTH_4);
913 elk_inst_set_src1_hstride(&devinfo, last_inst, ELK_HORIZONTAL_STRIDE_1);
914
915 if (devinfo.ver <= 7) {
916 EXPECT_FALSE(validate(p));
917 } else {
918 EXPECT_TRUE(validate(p));
919 }
920
921 clear_instructions(p);
922
923 elk_ADD(p, g0, g0, g0);
924 elk_inst_set_exec_size(&devinfo, last_inst, ELK_EXECUTE_4);
925 elk_inst_set_dst_hstride(&devinfo, last_inst, ELK_HORIZONTAL_STRIDE_4);
926 elk_inst_set_src0_vstride(&devinfo, last_inst, ELK_VERTICAL_STRIDE_4);
927 elk_inst_set_src0_width(&devinfo, last_inst, ELK_WIDTH_1);
928 elk_inst_set_src0_hstride(&devinfo, last_inst, ELK_HORIZONTAL_STRIDE_0);
929 elk_inst_set_src1_vstride(&devinfo, last_inst, ELK_VERTICAL_STRIDE_8);
930 elk_inst_set_src1_width(&devinfo, last_inst, ELK_WIDTH_2);
931 elk_inst_set_src1_hstride(&devinfo, last_inst, ELK_HORIZONTAL_STRIDE_1);
932
933 EXPECT_TRUE(validate(p));
934 }
935
TEST_P(validation_test,two_src_two_dst_each_dst_must_be_derived_from_one_src)936 TEST_P(validation_test, two_src_two_dst_each_dst_must_be_derived_from_one_src)
937 {
938 elk_MOV(p, g0, g0);
939 elk_inst_set_exec_size(&devinfo, last_inst, ELK_EXECUTE_16);
940 elk_inst_set_dst_file_type(&devinfo, last_inst, ELK_GENERAL_REGISTER_FILE, ELK_REGISTER_TYPE_W);
941 elk_inst_set_dst_hstride(&devinfo, last_inst, ELK_HORIZONTAL_STRIDE_2);
942 elk_inst_set_src0_file_type(&devinfo, last_inst, ELK_GENERAL_REGISTER_FILE, ELK_REGISTER_TYPE_W);
943 elk_inst_set_src0_da1_subreg_nr(&devinfo, last_inst, 8);
944 elk_inst_set_src0_vstride(&devinfo, last_inst, ELK_VERTICAL_STRIDE_4);
945 elk_inst_set_src0_width(&devinfo, last_inst, ELK_WIDTH_4);
946 elk_inst_set_src0_hstride(&devinfo, last_inst, ELK_HORIZONTAL_STRIDE_1);
947
948 if (devinfo.ver <= 7) {
949 EXPECT_FALSE(validate(p));
950 } else {
951 EXPECT_TRUE(validate(p));
952 }
953
954 clear_instructions(p);
955
956 elk_MOV(p, g0, g0);
957 elk_inst_set_dst_da1_subreg_nr(&devinfo, last_inst, 16);
958 elk_inst_set_src0_da1_subreg_nr(&devinfo, last_inst, 8);
959 elk_inst_set_src0_vstride(&devinfo, last_inst, ELK_VERTICAL_STRIDE_2);
960 elk_inst_set_src0_width(&devinfo, last_inst, ELK_WIDTH_2);
961 elk_inst_set_src0_hstride(&devinfo, last_inst, ELK_HORIZONTAL_STRIDE_1);
962
963 if (devinfo.ver <= 7) {
964 EXPECT_FALSE(validate(p));
965 } else {
966 EXPECT_TRUE(validate(p));
967 }
968 }
969
TEST_P(validation_test,one_src_two_dst)970 TEST_P(validation_test, one_src_two_dst)
971 {
972 struct elk_reg g0_0 = elk_vec1_grf(0, 0);
973
974 elk_ADD(p, g0, g0_0, g0_0);
975 elk_inst_set_exec_size(&devinfo, last_inst, ELK_EXECUTE_16);
976
977 EXPECT_TRUE(validate(p));
978
979 clear_instructions(p);
980
981 elk_ADD(p, g0, g0, g0);
982 elk_inst_set_exec_size(&devinfo, last_inst, ELK_EXECUTE_16);
983 elk_inst_set_dst_file_type(&devinfo, last_inst, ELK_GENERAL_REGISTER_FILE, ELK_REGISTER_TYPE_D);
984 elk_inst_set_src0_file_type(&devinfo, last_inst, ELK_GENERAL_REGISTER_FILE, ELK_REGISTER_TYPE_W);
985 elk_inst_set_src1_file_type(&devinfo, last_inst, ELK_GENERAL_REGISTER_FILE, ELK_REGISTER_TYPE_D);
986
987 EXPECT_TRUE(validate(p));
988
989 clear_instructions(p);
990
991 elk_ADD(p, g0, g0, g0);
992 elk_inst_set_exec_size(&devinfo, last_inst, ELK_EXECUTE_16);
993 elk_inst_set_dst_file_type(&devinfo, last_inst, ELK_GENERAL_REGISTER_FILE, ELK_REGISTER_TYPE_D);
994 elk_inst_set_src0_file_type(&devinfo, last_inst, ELK_GENERAL_REGISTER_FILE, ELK_REGISTER_TYPE_D);
995 elk_inst_set_src1_file_type(&devinfo, last_inst, ELK_GENERAL_REGISTER_FILE, ELK_REGISTER_TYPE_W);
996
997 if (devinfo.ver >= 8) {
998 EXPECT_TRUE(validate(p));
999 } else {
1000 EXPECT_FALSE(validate(p));
1001 }
1002
1003 clear_instructions(p);
1004
1005 elk_ADD(p, g0, g0, g0);
1006 elk_inst_set_exec_size(&devinfo, last_inst, ELK_EXECUTE_16);
1007 elk_inst_set_dst_file_type(&devinfo, last_inst, ELK_GENERAL_REGISTER_FILE, ELK_REGISTER_TYPE_D);
1008 elk_inst_set_src0_file_type(&devinfo, last_inst, ELK_GENERAL_REGISTER_FILE, ELK_REGISTER_TYPE_W);
1009 elk_inst_set_src1_file_type(&devinfo, last_inst, ELK_GENERAL_REGISTER_FILE, ELK_REGISTER_TYPE_W);
1010
1011 if (devinfo.ver >= 8) {
1012 EXPECT_TRUE(validate(p));
1013 } else {
1014 EXPECT_FALSE(validate(p));
1015 }
1016
1017 clear_instructions(p);
1018
1019 elk_ADD(p, g0, g0, g0);
1020 elk_inst_set_exec_size(&devinfo, last_inst, ELK_EXECUTE_16);
1021 elk_inst_set_dst_hstride(&devinfo, last_inst, ELK_HORIZONTAL_STRIDE_2);
1022 elk_inst_set_dst_file_type(&devinfo, last_inst, ELK_GENERAL_REGISTER_FILE, ELK_REGISTER_TYPE_W);
1023 elk_inst_set_src0_file_type(&devinfo, last_inst, ELK_GENERAL_REGISTER_FILE, ELK_REGISTER_TYPE_W);
1024 elk_inst_set_src1_file_type(&devinfo, last_inst, ELK_GENERAL_REGISTER_FILE, ELK_REGISTER_TYPE_W);
1025 elk_inst_set_src1_vstride(&devinfo, last_inst, ELK_VERTICAL_STRIDE_0);
1026 elk_inst_set_src1_width(&devinfo, last_inst, ELK_WIDTH_1);
1027 elk_inst_set_src1_hstride(&devinfo, last_inst, ELK_HORIZONTAL_STRIDE_0);
1028
1029 if (devinfo.ver >= 8) {
1030 EXPECT_TRUE(validate(p));
1031 } else {
1032 EXPECT_FALSE(validate(p));
1033 }
1034
1035 clear_instructions(p);
1036
1037 elk_ADD(p, g0, g0, g0);
1038 elk_inst_set_exec_size(&devinfo, last_inst, ELK_EXECUTE_16);
1039 elk_inst_set_dst_hstride(&devinfo, last_inst, ELK_HORIZONTAL_STRIDE_2);
1040 elk_inst_set_dst_file_type(&devinfo, last_inst, ELK_GENERAL_REGISTER_FILE, ELK_REGISTER_TYPE_W);
1041 elk_inst_set_src0_file_type(&devinfo, last_inst, ELK_GENERAL_REGISTER_FILE, ELK_REGISTER_TYPE_W);
1042 elk_inst_set_src0_vstride(&devinfo, last_inst, ELK_VERTICAL_STRIDE_0);
1043 elk_inst_set_src0_width(&devinfo, last_inst, ELK_WIDTH_1);
1044 elk_inst_set_src0_hstride(&devinfo, last_inst, ELK_HORIZONTAL_STRIDE_0);
1045 elk_inst_set_src1_file_type(&devinfo, last_inst, ELK_GENERAL_REGISTER_FILE, ELK_REGISTER_TYPE_W);
1046
1047 if (devinfo.ver >= 8) {
1048 EXPECT_TRUE(validate(p));
1049 } else {
1050 EXPECT_FALSE(validate(p));
1051 }
1052 }
1053
TEST_P(validation_test,packed_byte_destination)1054 TEST_P(validation_test, packed_byte_destination)
1055 {
1056 static const struct {
1057 enum elk_reg_type dst_type;
1058 enum elk_reg_type src_type;
1059 bool neg, abs, sat;
1060 bool expected_result;
1061 } move[] = {
1062 { ELK_REGISTER_TYPE_UB, ELK_REGISTER_TYPE_UB, 0, 0, 0, true },
1063 { ELK_REGISTER_TYPE_B , ELK_REGISTER_TYPE_B , 0, 0, 0, true },
1064 { ELK_REGISTER_TYPE_UB, ELK_REGISTER_TYPE_B , 0, 0, 0, true },
1065 { ELK_REGISTER_TYPE_B , ELK_REGISTER_TYPE_UB, 0, 0, 0, true },
1066
1067 { ELK_REGISTER_TYPE_UB, ELK_REGISTER_TYPE_UB, 1, 0, 0, false },
1068 { ELK_REGISTER_TYPE_B , ELK_REGISTER_TYPE_B , 1, 0, 0, false },
1069 { ELK_REGISTER_TYPE_UB, ELK_REGISTER_TYPE_B , 1, 0, 0, false },
1070 { ELK_REGISTER_TYPE_B , ELK_REGISTER_TYPE_UB, 1, 0, 0, false },
1071
1072 { ELK_REGISTER_TYPE_UB, ELK_REGISTER_TYPE_UB, 0, 1, 0, false },
1073 { ELK_REGISTER_TYPE_B , ELK_REGISTER_TYPE_B , 0, 1, 0, false },
1074 { ELK_REGISTER_TYPE_UB, ELK_REGISTER_TYPE_B , 0, 1, 0, false },
1075 { ELK_REGISTER_TYPE_B , ELK_REGISTER_TYPE_UB, 0, 1, 0, false },
1076
1077 { ELK_REGISTER_TYPE_UB, ELK_REGISTER_TYPE_UB, 0, 0, 1, false },
1078 { ELK_REGISTER_TYPE_B , ELK_REGISTER_TYPE_B , 0, 0, 1, false },
1079 { ELK_REGISTER_TYPE_UB, ELK_REGISTER_TYPE_B , 0, 0, 1, false },
1080 { ELK_REGISTER_TYPE_B , ELK_REGISTER_TYPE_UB, 0, 0, 1, false },
1081
1082 { ELK_REGISTER_TYPE_UB, ELK_REGISTER_TYPE_UW, 0, 0, 0, false },
1083 { ELK_REGISTER_TYPE_B , ELK_REGISTER_TYPE_W , 0, 0, 0, false },
1084 { ELK_REGISTER_TYPE_UB, ELK_REGISTER_TYPE_UD, 0, 0, 0, false },
1085 { ELK_REGISTER_TYPE_B , ELK_REGISTER_TYPE_D , 0, 0, 0, false },
1086 };
1087
1088 for (unsigned i = 0; i < ARRAY_SIZE(move); i++) {
1089 elk_MOV(p, retype(g0, move[i].dst_type), retype(g0, move[i].src_type));
1090 elk_inst_set_src0_negate(&devinfo, last_inst, move[i].neg);
1091 elk_inst_set_src0_abs(&devinfo, last_inst, move[i].abs);
1092 elk_inst_set_saturate(&devinfo, last_inst, move[i].sat);
1093
1094 EXPECT_EQ(move[i].expected_result, validate(p));
1095
1096 clear_instructions(p);
1097 }
1098
1099 elk_SEL(p, retype(g0, ELK_REGISTER_TYPE_UB),
1100 retype(g0, ELK_REGISTER_TYPE_UB),
1101 retype(g0, ELK_REGISTER_TYPE_UB));
1102 elk_inst_set_pred_control(&devinfo, last_inst, ELK_PREDICATE_NORMAL);
1103
1104 EXPECT_FALSE(validate(p));
1105
1106 clear_instructions(p);
1107
1108 elk_SEL(p, retype(g0, ELK_REGISTER_TYPE_B),
1109 retype(g0, ELK_REGISTER_TYPE_B),
1110 retype(g0, ELK_REGISTER_TYPE_B));
1111 elk_inst_set_pred_control(&devinfo, last_inst, ELK_PREDICATE_NORMAL);
1112
1113 EXPECT_FALSE(validate(p));
1114 }
1115
TEST_P(validation_test,byte_destination_relaxed_alignment)1116 TEST_P(validation_test, byte_destination_relaxed_alignment)
1117 {
1118 elk_SEL(p, retype(g0, ELK_REGISTER_TYPE_B),
1119 retype(g0, ELK_REGISTER_TYPE_W),
1120 retype(g0, ELK_REGISTER_TYPE_W));
1121 elk_inst_set_pred_control(&devinfo, last_inst, ELK_PREDICATE_NORMAL);
1122 elk_inst_set_dst_hstride(&devinfo, last_inst, ELK_HORIZONTAL_STRIDE_2);
1123
1124 EXPECT_TRUE(validate(p));
1125
1126 clear_instructions(p);
1127
1128 elk_SEL(p, retype(g0, ELK_REGISTER_TYPE_B),
1129 retype(g0, ELK_REGISTER_TYPE_W),
1130 retype(g0, ELK_REGISTER_TYPE_W));
1131 elk_inst_set_pred_control(&devinfo, last_inst, ELK_PREDICATE_NORMAL);
1132 elk_inst_set_dst_hstride(&devinfo, last_inst, ELK_HORIZONTAL_STRIDE_2);
1133 elk_inst_set_dst_da1_subreg_nr(&devinfo, last_inst, 1);
1134
1135 if (devinfo.verx10 >= 45) {
1136 EXPECT_TRUE(validate(p));
1137 } else {
1138 EXPECT_FALSE(validate(p));
1139 }
1140 }
1141
TEST_P(validation_test,byte_64bit_conversion)1142 TEST_P(validation_test, byte_64bit_conversion)
1143 {
1144 static const struct {
1145 enum elk_reg_type dst_type;
1146 enum elk_reg_type src_type;
1147 unsigned dst_stride;
1148 bool expected_result;
1149 } inst[] = {
1150 #define INST(dst_type, src_type, dst_stride, expected_result) \
1151 { \
1152 ELK_REGISTER_TYPE_##dst_type, \
1153 ELK_REGISTER_TYPE_##src_type, \
1154 ELK_HORIZONTAL_STRIDE_##dst_stride, \
1155 expected_result, \
1156 }
1157
1158 INST(B, Q, 1, false),
1159 INST(B, UQ, 1, false),
1160 INST(B, DF, 1, false),
1161 INST(UB, Q, 1, false),
1162 INST(UB, UQ, 1, false),
1163 INST(UB, DF, 1, false),
1164
1165 INST(B, Q, 2, false),
1166 INST(B, UQ, 2, false),
1167 INST(B , DF, 2, false),
1168 INST(UB, Q, 2, false),
1169 INST(UB, UQ, 2, false),
1170 INST(UB, DF, 2, false),
1171
1172 INST(B, Q, 4, false),
1173 INST(B, UQ, 4, false),
1174 INST(B, DF, 4, false),
1175 INST(UB, Q, 4, false),
1176 INST(UB, UQ, 4, false),
1177 INST(UB, DF, 4, false),
1178
1179 #undef INST
1180 };
1181
1182 if (devinfo.ver < 8)
1183 return;
1184
1185 for (unsigned i = 0; i < ARRAY_SIZE(inst); i++) {
1186 if (!devinfo.has_64bit_float &&
1187 inst[i].src_type == ELK_REGISTER_TYPE_DF)
1188 continue;
1189
1190 if (!devinfo.has_64bit_int &&
1191 (inst[i].src_type == ELK_REGISTER_TYPE_Q ||
1192 inst[i].src_type == ELK_REGISTER_TYPE_UQ))
1193 continue;
1194
1195 elk_MOV(p, retype(g0, inst[i].dst_type), retype(g0, inst[i].src_type));
1196 elk_inst_set_dst_hstride(&devinfo, last_inst, inst[i].dst_stride);
1197 EXPECT_EQ(inst[i].expected_result, validate(p));
1198
1199 clear_instructions(p);
1200 }
1201 }
1202
TEST_P(validation_test,half_float_conversion)1203 TEST_P(validation_test, half_float_conversion)
1204 {
1205 static const struct {
1206 enum elk_reg_type dst_type;
1207 enum elk_reg_type src_type;
1208 unsigned dst_stride;
1209 unsigned dst_subnr;
1210 bool expected_result_bdw;
1211 bool expected_result_chv;
1212 } inst[] = {
1213 #define INST(dst_type, src_type, dst_stride, dst_subnr, \
1214 expected_result_bdw, expected_result_chv) \
1215 { \
1216 ELK_REGISTER_TYPE_##dst_type, \
1217 ELK_REGISTER_TYPE_##src_type, \
1218 ELK_HORIZONTAL_STRIDE_##dst_stride, \
1219 dst_subnr, \
1220 expected_result_bdw, \
1221 expected_result_chv, \
1222 }
1223
1224 /* MOV to half-float destination */
1225 INST(HF, B, 1, 0, false, false), /* 0 */
1226 INST(HF, W, 1, 0, false, false),
1227 INST(HF, HF, 1, 0, true, true),
1228 INST(HF, HF, 1, 2, true, true),
1229 INST(HF, D, 1, 0, false, false),
1230 INST(HF, F, 1, 0, false, true),
1231 INST(HF, Q, 1, 0, false, false),
1232 INST(HF, B, 2, 0, true, true),
1233 INST(HF, B, 2, 2, false, false),
1234 INST(HF, W, 2, 0, true, true),
1235 INST(HF, W, 2, 2, false, false), /* 10 */
1236 INST(HF, HF, 2, 0, true, true),
1237 INST(HF, HF, 2, 2, true, true),
1238 INST(HF, D, 2, 0, true, true),
1239 INST(HF, D, 2, 2, false, false),
1240 INST(HF, F, 2, 0, true, true),
1241 INST(HF, F, 2, 2, false, true),
1242 INST(HF, Q, 2, 0, false, false),
1243 INST(HF, DF, 2, 0, false, false),
1244 INST(HF, B, 4, 0, false, false),
1245 INST(HF, W, 4, 0, false, false), /* 20 */
1246 INST(HF, HF, 4, 0, true, true),
1247 INST(HF, HF, 4, 2, true, true),
1248 INST(HF, D, 4, 0, false, false),
1249 INST(HF, F, 4, 0, false, false),
1250 INST(HF, Q, 4, 0, false, false),
1251 INST(HF, DF, 4, 0, false, false),
1252
1253 /* MOV from half-float source */
1254 INST( B, HF, 1, 0, false, false),
1255 INST( W, HF, 1, 0, false, false),
1256 INST( D, HF, 1, 0, true, true),
1257 INST( D, HF, 1, 4, true, true), /* 30 */
1258 INST( F, HF, 1, 0, true, true),
1259 INST( F, HF, 1, 4, true, true),
1260 INST( Q, HF, 1, 0, false, false),
1261 INST(DF, HF, 1, 0, false, false),
1262 INST( B, HF, 2, 0, false, false),
1263 INST( W, HF, 2, 0, true, true),
1264 INST( W, HF, 2, 2, false, false),
1265 INST( D, HF, 2, 0, false, false),
1266 INST( F, HF, 2, 0, true, true),
1267 INST( B, HF, 4, 0, true, true), /* 40 */
1268 INST( B, HF, 4, 1, false, false),
1269 INST( W, HF, 4, 0, false, false),
1270
1271 #undef INST
1272 };
1273
1274 if (devinfo.ver < 8)
1275 return;
1276
1277 for (unsigned i = 0; i < ARRAY_SIZE(inst); i++) {
1278 if (!devinfo.has_64bit_float &&
1279 (inst[i].dst_type == ELK_REGISTER_TYPE_DF ||
1280 inst[i].src_type == ELK_REGISTER_TYPE_DF))
1281 continue;
1282
1283 if (!devinfo.has_64bit_int &&
1284 (inst[i].dst_type == ELK_REGISTER_TYPE_Q ||
1285 inst[i].dst_type == ELK_REGISTER_TYPE_UQ ||
1286 inst[i].src_type == ELK_REGISTER_TYPE_Q ||
1287 inst[i].src_type == ELK_REGISTER_TYPE_UQ))
1288 continue;
1289
1290 elk_MOV(p, retype(g0, inst[i].dst_type), retype(g0, inst[i].src_type));
1291
1292 elk_inst_set_exec_size(&devinfo, last_inst, ELK_EXECUTE_4);
1293
1294 elk_inst_set_dst_hstride(&devinfo, last_inst, inst[i].dst_stride);
1295 elk_inst_set_dst_da1_subreg_nr(&devinfo, last_inst, inst[i].dst_subnr);
1296
1297 if (inst[i].src_type == ELK_REGISTER_TYPE_B) {
1298 elk_inst_set_src0_vstride(&devinfo, last_inst, ELK_VERTICAL_STRIDE_4);
1299 elk_inst_set_src0_width(&devinfo, last_inst, ELK_WIDTH_2);
1300 elk_inst_set_src0_hstride(&devinfo, last_inst, ELK_HORIZONTAL_STRIDE_2);
1301 } else {
1302 elk_inst_set_src0_vstride(&devinfo, last_inst, ELK_VERTICAL_STRIDE_4);
1303 elk_inst_set_src0_width(&devinfo, last_inst, ELK_WIDTH_4);
1304 elk_inst_set_src0_hstride(&devinfo, last_inst, ELK_HORIZONTAL_STRIDE_1);
1305 }
1306
1307 if (devinfo.platform == INTEL_PLATFORM_CHV) {
1308 EXPECT_EQ(inst[i].expected_result_chv, validate(p)) <<
1309 "Failing test is: " << i;
1310 } else {
1311 EXPECT_EQ(inst[i].expected_result_bdw, validate(p)) <<
1312 "Failing test is: " << i;
1313 }
1314
1315 clear_instructions(p);
1316 }
1317 }
1318
TEST_P(validation_test,mixed_float_source_indirect_addressing)1319 TEST_P(validation_test, mixed_float_source_indirect_addressing)
1320 {
1321 static const struct {
1322 enum elk_reg_type dst_type;
1323 enum elk_reg_type src0_type;
1324 enum elk_reg_type src1_type;
1325 unsigned dst_stride;
1326 bool dst_indirect;
1327 bool src0_indirect;
1328 bool expected_result;
1329 bool gfx125_expected_result;
1330 } inst[] = {
1331 #define INST(dst_type, src0_type, src1_type, \
1332 dst_stride, dst_indirect, src0_indirect, expected_result, \
1333 gfx125_expected_result) \
1334 { \
1335 ELK_REGISTER_TYPE_##dst_type, \
1336 ELK_REGISTER_TYPE_##src0_type, \
1337 ELK_REGISTER_TYPE_##src1_type, \
1338 ELK_HORIZONTAL_STRIDE_##dst_stride, \
1339 dst_indirect, \
1340 src0_indirect, \
1341 expected_result, \
1342 gfx125_expected_result, \
1343 }
1344
1345 /* Source and dest are mixed float: indirect src addressing not allowed */
1346 INST(HF, F, F, 2, false, false, true, true),
1347 INST(HF, F, F, 2, true, false, true, true),
1348 INST(HF, F, F, 2, false, true, false, false),
1349 INST(HF, F, F, 2, true, true, false, false),
1350 INST( F, HF, F, 1, false, false, true, false),
1351 INST( F, HF, F, 1, true, false, true, false),
1352 INST( F, HF, F, 1, false, true, false, false),
1353 INST( F, HF, F, 1, true, true, false, false),
1354
1355 INST(HF, HF, F, 2, false, false, true, false),
1356 INST(HF, HF, F, 2, true, false, true, false),
1357 INST(HF, HF, F, 2, false, true, false, false),
1358 INST(HF, HF, F, 2, true, true, false, false),
1359 INST( F, F, HF, 1, false, false, true, false),
1360 INST( F, F, HF, 1, true, false, true, false),
1361 INST( F, F, HF, 1, false, true, false, false),
1362 INST( F, F, HF, 1, true, true, false, false),
1363
1364 #undef INST
1365 };
1366
1367 if (devinfo.ver < 8)
1368 return;
1369
1370 for (unsigned i = 0; i < ARRAY_SIZE(inst); i++) {
1371 elk_ADD(p, retype(g0, inst[i].dst_type),
1372 retype(g0, inst[i].src0_type),
1373 retype(g0, inst[i].src1_type));
1374
1375 elk_inst_set_dst_address_mode(&devinfo, last_inst, inst[i].dst_indirect);
1376 elk_inst_set_dst_hstride(&devinfo, last_inst, inst[i].dst_stride);
1377 elk_inst_set_src0_address_mode(&devinfo, last_inst, inst[i].src0_indirect);
1378
1379 EXPECT_EQ(inst[i].expected_result, validate(p));
1380
1381 clear_instructions(p);
1382 }
1383 }
1384
TEST_P(validation_test,mixed_float_align1_simd16)1385 TEST_P(validation_test, mixed_float_align1_simd16)
1386 {
1387 static const struct {
1388 unsigned exec_size;
1389 enum elk_reg_type dst_type;
1390 enum elk_reg_type src0_type;
1391 enum elk_reg_type src1_type;
1392 unsigned dst_stride;
1393 bool expected_result;
1394 bool gfx125_expected_result;
1395 } inst[] = {
1396 #define INST(exec_size, dst_type, src0_type, src1_type, \
1397 dst_stride, expected_result, gfx125_expected_result) \
1398 { \
1399 ELK_EXECUTE_##exec_size, \
1400 ELK_REGISTER_TYPE_##dst_type, \
1401 ELK_REGISTER_TYPE_##src0_type, \
1402 ELK_REGISTER_TYPE_##src1_type, \
1403 ELK_HORIZONTAL_STRIDE_##dst_stride, \
1404 expected_result, \
1405 gfx125_expected_result, \
1406 }
1407
1408 /* No SIMD16 in mixed mode when destination is packed f16 */
1409 INST( 8, HF, F, HF, 2, true, false),
1410 INST(16, HF, HF, F, 2, true, false),
1411 INST(16, HF, HF, F, 1, false, false),
1412 INST(16, HF, F, HF, 1, false, false),
1413
1414 /* No SIMD16 in mixed mode when destination is f32 */
1415 INST( 8, F, HF, F, 1, true, false),
1416 INST( 8, F, F, HF, 1, true, false),
1417 INST(16, F, HF, F, 1, false, false),
1418 INST(16, F, F, HF, 1, false, false),
1419
1420 #undef INST
1421 };
1422
1423 if (devinfo.ver < 8)
1424 return;
1425
1426 for (unsigned i = 0; i < ARRAY_SIZE(inst); i++) {
1427 elk_ADD(p, retype(g0, inst[i].dst_type),
1428 retype(g0, inst[i].src0_type),
1429 retype(g0, inst[i].src1_type));
1430
1431 elk_inst_set_exec_size(&devinfo, last_inst, inst[i].exec_size);
1432
1433 elk_inst_set_dst_hstride(&devinfo, last_inst, inst[i].dst_stride);
1434
1435 EXPECT_EQ(inst[i].expected_result, validate(p));
1436
1437 clear_instructions(p);
1438 }
1439 }
1440
TEST_P(validation_test,mixed_float_align1_packed_fp16_dst_acc_read_offset_0)1441 TEST_P(validation_test, mixed_float_align1_packed_fp16_dst_acc_read_offset_0)
1442 {
1443 static const struct {
1444 enum elk_reg_type dst_type;
1445 enum elk_reg_type src0_type;
1446 enum elk_reg_type src1_type;
1447 unsigned dst_stride;
1448 bool read_acc;
1449 unsigned subnr;
1450 bool expected_result_bdw;
1451 bool expected_result_chv_skl;
1452 bool expected_result_gfx125;
1453 } inst[] = {
1454 #define INST(dst_type, src0_type, src1_type, dst_stride, read_acc, subnr, \
1455 expected_result_bdw, expected_result_chv_skl, \
1456 expected_result_gfx125) \
1457 { \
1458 ELK_REGISTER_TYPE_##dst_type, \
1459 ELK_REGISTER_TYPE_##src0_type, \
1460 ELK_REGISTER_TYPE_##src1_type, \
1461 ELK_HORIZONTAL_STRIDE_##dst_stride, \
1462 read_acc, \
1463 subnr, \
1464 expected_result_bdw, \
1465 expected_result_chv_skl, \
1466 expected_result_gfx125, \
1467 }
1468
1469 /* Destination is not packed */
1470 INST(HF, HF, F, 2, true, 0, true, true, false),
1471 INST(HF, HF, F, 2, true, 2, true, true, false),
1472 INST(HF, HF, F, 2, true, 4, true, true, false),
1473 INST(HF, HF, F, 2, true, 8, true, true, false),
1474 INST(HF, HF, F, 2, true, 16, true, true, false),
1475
1476 /* Destination is packed, we don't read acc */
1477 INST(HF, HF, F, 1, false, 0, false, true, false),
1478 INST(HF, HF, F, 1, false, 2, false, true, false),
1479 INST(HF, HF, F, 1, false, 4, false, true, false),
1480 INST(HF, HF, F, 1, false, 8, false, true, false),
1481 INST(HF, HF, F, 1, false, 16, false, true, false),
1482
1483 /* Destination is packed, we read acc */
1484 INST(HF, HF, F, 1, true, 0, false, false, false),
1485 INST(HF, HF, F, 1, true, 2, false, false, false),
1486 INST(HF, HF, F, 1, true, 4, false, false, false),
1487 INST(HF, HF, F, 1, true, 8, false, false, false),
1488 INST(HF, HF, F, 1, true, 16, false, false, false),
1489
1490 #undef INST
1491 };
1492
1493 if (devinfo.ver < 8)
1494 return;
1495
1496 for (unsigned i = 0; i < ARRAY_SIZE(inst); i++) {
1497 elk_ADD(p, retype(g0, inst[i].dst_type),
1498 retype(inst[i].read_acc ? acc0 : g0, inst[i].src0_type),
1499 retype(g0, inst[i].src1_type));
1500
1501 elk_inst_set_dst_hstride(&devinfo, last_inst, inst[i].dst_stride);
1502
1503 elk_inst_set_src0_da1_subreg_nr(&devinfo, last_inst, inst[i].subnr);
1504
1505 if (devinfo.verx10 >= 125)
1506 EXPECT_EQ(inst[i].expected_result_gfx125, validate(p));
1507 else if (devinfo.platform == INTEL_PLATFORM_CHV || devinfo.ver >= 9)
1508 EXPECT_EQ(inst[i].expected_result_chv_skl, validate(p));
1509 else
1510 EXPECT_EQ(inst[i].expected_result_bdw, validate(p));
1511
1512 clear_instructions(p);
1513 }
1514 }
1515
TEST_P(validation_test,mixed_float_fp16_dest_with_acc)1516 TEST_P(validation_test, mixed_float_fp16_dest_with_acc)
1517 {
1518 static const struct {
1519 unsigned exec_size;
1520 unsigned opcode;
1521 enum elk_reg_type dst_type;
1522 enum elk_reg_type src0_type;
1523 enum elk_reg_type src1_type;
1524 unsigned dst_stride;
1525 bool read_acc;
1526 bool expected_result_bdw;
1527 bool expected_result_chv_skl;
1528 bool expected_result_gfx125;
1529 } inst[] = {
1530 #define INST(exec_size, opcode, dst_type, src0_type, src1_type, \
1531 dst_stride, read_acc,expected_result_bdw, \
1532 expected_result_chv_skl, expected_result_gfx125) \
1533 { \
1534 ELK_EXECUTE_##exec_size, \
1535 ELK_OPCODE_##opcode, \
1536 ELK_REGISTER_TYPE_##dst_type, \
1537 ELK_REGISTER_TYPE_##src0_type, \
1538 ELK_REGISTER_TYPE_##src1_type, \
1539 ELK_HORIZONTAL_STRIDE_##dst_stride, \
1540 read_acc, \
1541 expected_result_bdw, \
1542 expected_result_chv_skl, \
1543 expected_result_gfx125, \
1544 }
1545
1546 /* Packed fp16 dest with implicit acc needs hstride=2 */
1547 INST(8, MAC, HF, HF, F, 1, false, false, false, false),
1548 INST(8, MAC, HF, HF, F, 2, false, true, true, false),
1549 INST(8, MAC, HF, F, HF, 1, false, false, false, false),
1550 INST(8, MAC, HF, F, HF, 2, false, true, true, false),
1551
1552 /* Packed fp16 dest with explicit acc needs hstride=2 */
1553 INST(8, ADD, HF, HF, F, 1, true, false, false, false),
1554 INST(8, ADD, HF, HF, F, 2, true, true, true, false),
1555 INST(8, ADD, HF, F, HF, 1, true, false, false, false),
1556 INST(8, ADD, HF, F, HF, 2, true, true, true, false),
1557
1558 /* If destination is not fp16, restriction doesn't apply */
1559 INST(8, MAC, F, HF, F, 1, false, true, true, false),
1560 INST(8, MAC, F, HF, F, 2, false, true, true, false),
1561
1562 /* If there is no implicit/explicit acc, restriction doesn't apply */
1563 INST(8, ADD, HF, HF, F, 1, false, false, true, false),
1564 INST(8, ADD, HF, HF, F, 2, false, true, true, false),
1565 INST(8, ADD, HF, F, HF, 1, false, false, true, false),
1566 INST(8, ADD, HF, F, HF, 2, false, true, true, false),
1567 INST(8, ADD, F, HF, F, 1, false, true, true, false),
1568 INST(8, ADD, F, HF, F, 2, false, true, true, false),
1569
1570 #undef INST
1571 };
1572
1573 if (devinfo.ver < 8)
1574 return;
1575
1576 for (unsigned i = 0; i < ARRAY_SIZE(inst); i++) {
1577 if (inst[i].opcode == ELK_OPCODE_MAC) {
1578 elk_MAC(p, retype(g0, inst[i].dst_type),
1579 retype(g0, inst[i].src0_type),
1580 retype(g0, inst[i].src1_type));
1581 } else {
1582 assert(inst[i].opcode == ELK_OPCODE_ADD);
1583 elk_ADD(p, retype(g0, inst[i].dst_type),
1584 retype(inst[i].read_acc ? acc0: g0, inst[i].src0_type),
1585 retype(g0, inst[i].src1_type));
1586 }
1587
1588 elk_inst_set_exec_size(&devinfo, last_inst, inst[i].exec_size);
1589
1590 elk_inst_set_dst_hstride(&devinfo, last_inst, inst[i].dst_stride);
1591
1592 if (devinfo.verx10 >= 125)
1593 EXPECT_EQ(inst[i].expected_result_gfx125, validate(p));
1594 else if (devinfo.platform == INTEL_PLATFORM_CHV || devinfo.ver >= 9)
1595 EXPECT_EQ(inst[i].expected_result_chv_skl, validate(p));
1596 else
1597 EXPECT_EQ(inst[i].expected_result_bdw, validate(p));
1598
1599 clear_instructions(p);
1600 }
1601 }
1602
TEST_P(validation_test,mixed_float_align1_math_strided_fp16_inputs)1603 TEST_P(validation_test, mixed_float_align1_math_strided_fp16_inputs)
1604 {
1605 static const struct {
1606 enum elk_reg_type dst_type;
1607 enum elk_reg_type src0_type;
1608 enum elk_reg_type src1_type;
1609 unsigned dst_stride;
1610 unsigned src0_stride;
1611 unsigned src1_stride;
1612 bool expected_result;
1613 bool expected_result_gfx125;
1614 } inst[] = {
1615 #define INST(dst_type, src0_type, src1_type, \
1616 dst_stride, src0_stride, src1_stride, expected_result, \
1617 expected_result_125) \
1618 { \
1619 ELK_REGISTER_TYPE_##dst_type, \
1620 ELK_REGISTER_TYPE_##src0_type, \
1621 ELK_REGISTER_TYPE_##src1_type, \
1622 ELK_HORIZONTAL_STRIDE_##dst_stride, \
1623 ELK_HORIZONTAL_STRIDE_##src0_stride, \
1624 ELK_HORIZONTAL_STRIDE_##src1_stride, \
1625 expected_result, \
1626 expected_result_125, \
1627 }
1628
1629 INST(HF, HF, F, 2, 2, 1, true, false),
1630 INST(HF, F, HF, 2, 1, 2, true, false),
1631 INST(HF, F, HF, 1, 1, 2, true, false),
1632 INST(HF, F, HF, 2, 1, 1, false, false),
1633 INST(HF, HF, F, 2, 1, 1, false, false),
1634 INST(HF, HF, F, 1, 1, 1, false, false),
1635 INST(HF, HF, F, 2, 1, 1, false, false),
1636 INST( F, HF, F, 1, 1, 1, false, false),
1637 INST( F, F, HF, 1, 1, 2, true, false),
1638 INST( F, HF, HF, 1, 2, 1, false, false),
1639 INST( F, HF, HF, 1, 2, 2, true, false),
1640
1641 #undef INST
1642 };
1643
1644 /* No half-float math in gfx8 */
1645 if (devinfo.ver < 9)
1646 return;
1647
1648 for (unsigned i = 0; i < ARRAY_SIZE(inst); i++) {
1649 elk_gfx6_math(p, retype(g0, inst[i].dst_type),
1650 ELK_MATH_FUNCTION_POW,
1651 retype(g0, inst[i].src0_type),
1652 retype(g0, inst[i].src1_type));
1653
1654 elk_inst_set_dst_hstride(&devinfo, last_inst, inst[i].dst_stride);
1655
1656 elk_inst_set_src0_vstride(&devinfo, last_inst, ELK_VERTICAL_STRIDE_4);
1657 elk_inst_set_src0_width(&devinfo, last_inst, ELK_WIDTH_4);
1658 elk_inst_set_src0_hstride(&devinfo, last_inst, inst[i].src0_stride);
1659
1660 elk_inst_set_src1_vstride(&devinfo, last_inst, ELK_VERTICAL_STRIDE_4);
1661 elk_inst_set_src1_width(&devinfo, last_inst, ELK_WIDTH_4);
1662 elk_inst_set_src1_hstride(&devinfo, last_inst, inst[i].src1_stride);
1663
1664 if (devinfo.verx10 >= 125)
1665 EXPECT_EQ(inst[i].expected_result_gfx125, validate(p));
1666 else
1667 EXPECT_EQ(inst[i].expected_result, validate(p));
1668
1669 clear_instructions(p);
1670 }
1671 }
1672
TEST_P(validation_test,mixed_float_align1_packed_fp16_dst)1673 TEST_P(validation_test, mixed_float_align1_packed_fp16_dst)
1674 {
1675 static const struct {
1676 unsigned exec_size;
1677 enum elk_reg_type dst_type;
1678 enum elk_reg_type src0_type;
1679 enum elk_reg_type src1_type;
1680 unsigned dst_stride;
1681 unsigned dst_subnr;
1682 bool expected_result_bdw;
1683 bool expected_result_chv_skl;
1684 bool expected_result_gfx125;
1685 } inst[] = {
1686 #define INST(exec_size, dst_type, src0_type, src1_type, dst_stride, dst_subnr, \
1687 expected_result_bdw, expected_result_chv_skl, \
1688 expected_result_gfx125) \
1689 { \
1690 ELK_EXECUTE_##exec_size, \
1691 ELK_REGISTER_TYPE_##dst_type, \
1692 ELK_REGISTER_TYPE_##src0_type, \
1693 ELK_REGISTER_TYPE_##src1_type, \
1694 ELK_HORIZONTAL_STRIDE_##dst_stride, \
1695 dst_subnr, \
1696 expected_result_bdw, \
1697 expected_result_chv_skl, \
1698 expected_result_gfx125 \
1699 }
1700
1701 /* SIMD8 packed fp16 dst won't cross oword boundaries if region is
1702 * oword-aligned
1703 */
1704 INST( 8, HF, HF, F, 1, 0, false, true, false),
1705 INST( 8, HF, HF, F, 1, 2, false, false, false),
1706 INST( 8, HF, HF, F, 1, 4, false, false, false),
1707 INST( 8, HF, HF, F, 1, 8, false, false, false),
1708 INST( 8, HF, HF, F, 1, 16, false, true, false),
1709
1710 /* SIMD16 packed fp16 always crosses oword boundaries */
1711 INST(16, HF, HF, F, 1, 0, false, false, false),
1712 INST(16, HF, HF, F, 1, 2, false, false, false),
1713 INST(16, HF, HF, F, 1, 4, false, false, false),
1714 INST(16, HF, HF, F, 1, 8, false, false, false),
1715 INST(16, HF, HF, F, 1, 16, false, false, false),
1716
1717 /* If destination is not packed (or not fp16) we can cross oword
1718 * boundaries
1719 */
1720 INST( 8, HF, HF, F, 2, 0, true, true, false),
1721 INST( 8, F, HF, F, 1, 0, true, true, false),
1722
1723 #undef INST
1724 };
1725
1726 if (devinfo.ver < 8)
1727 return;
1728
1729 for (unsigned i = 0; i < ARRAY_SIZE(inst); i++) {
1730 elk_ADD(p, retype(g0, inst[i].dst_type),
1731 retype(g0, inst[i].src0_type),
1732 retype(g0, inst[i].src1_type));
1733
1734 elk_inst_set_dst_hstride(&devinfo, last_inst, inst[i].dst_stride);
1735 elk_inst_set_dst_da1_subreg_nr(&devinfo, last_inst, inst[i].dst_subnr);
1736
1737 elk_inst_set_src0_vstride(&devinfo, last_inst, ELK_VERTICAL_STRIDE_4);
1738 elk_inst_set_src0_width(&devinfo, last_inst, ELK_WIDTH_4);
1739 elk_inst_set_src0_hstride(&devinfo, last_inst, ELK_HORIZONTAL_STRIDE_1);
1740
1741 elk_inst_set_src1_vstride(&devinfo, last_inst, ELK_VERTICAL_STRIDE_4);
1742 elk_inst_set_src1_width(&devinfo, last_inst, ELK_WIDTH_4);
1743 elk_inst_set_src1_hstride(&devinfo, last_inst, ELK_HORIZONTAL_STRIDE_1);
1744
1745 elk_inst_set_exec_size(&devinfo, last_inst, inst[i].exec_size);
1746
1747 if (devinfo.verx10 >= 125)
1748 EXPECT_EQ(inst[i].expected_result_gfx125, validate(p));
1749 else if (devinfo.platform == INTEL_PLATFORM_CHV || devinfo.ver >= 9)
1750 EXPECT_EQ(inst[i].expected_result_chv_skl, validate(p));
1751 else
1752 EXPECT_EQ(inst[i].expected_result_bdw, validate(p));
1753
1754 clear_instructions(p);
1755 }
1756 }
1757
TEST_P(validation_test,mixed_float_align16_packed_data)1758 TEST_P(validation_test, mixed_float_align16_packed_data)
1759 {
1760 static const struct {
1761 enum elk_reg_type dst_type;
1762 enum elk_reg_type src0_type;
1763 enum elk_reg_type src1_type;
1764 unsigned src0_vstride;
1765 unsigned src1_vstride;
1766 bool expected_result;
1767 } inst[] = {
1768 #define INST(dst_type, src0_type, src1_type, \
1769 src0_vstride, src1_vstride, expected_result) \
1770 { \
1771 ELK_REGISTER_TYPE_##dst_type, \
1772 ELK_REGISTER_TYPE_##src0_type, \
1773 ELK_REGISTER_TYPE_##src1_type, \
1774 ELK_VERTICAL_STRIDE_##src0_vstride, \
1775 ELK_VERTICAL_STRIDE_##src1_vstride, \
1776 expected_result, \
1777 }
1778
1779 /* We only test with F destination because there is a restriction
1780 * by which F->HF conversions need to be DWord aligned but Align16 also
1781 * requires that destination horizontal stride is 1.
1782 */
1783 INST(F, F, HF, 4, 4, true),
1784 INST(F, F, HF, 2, 4, false),
1785 INST(F, F, HF, 4, 2, false),
1786 INST(F, F, HF, 0, 4, false),
1787 INST(F, F, HF, 4, 0, false),
1788 INST(F, HF, F, 4, 4, true),
1789 INST(F, HF, F, 4, 2, false),
1790 INST(F, HF, F, 2, 4, false),
1791 INST(F, HF, F, 0, 4, false),
1792 INST(F, HF, F, 4, 0, false),
1793
1794 #undef INST
1795 };
1796
1797 if (devinfo.ver < 8 || devinfo.ver >= 11)
1798 return;
1799
1800 elk_set_default_access_mode(p, ELK_ALIGN_16);
1801
1802 for (unsigned i = 0; i < ARRAY_SIZE(inst); i++) {
1803 elk_ADD(p, retype(g0, inst[i].dst_type),
1804 retype(g0, inst[i].src0_type),
1805 retype(g0, inst[i].src1_type));
1806
1807 elk_inst_set_src0_vstride(&devinfo, last_inst, inst[i].src0_vstride);
1808 elk_inst_set_src1_vstride(&devinfo, last_inst, inst[i].src1_vstride);
1809
1810 EXPECT_EQ(inst[i].expected_result, validate(p));
1811
1812 clear_instructions(p);
1813 }
1814 }
1815
TEST_P(validation_test,mixed_float_align16_no_simd16)1816 TEST_P(validation_test, mixed_float_align16_no_simd16)
1817 {
1818 static const struct {
1819 unsigned exec_size;
1820 enum elk_reg_type dst_type;
1821 enum elk_reg_type src0_type;
1822 enum elk_reg_type src1_type;
1823 bool expected_result;
1824 } inst[] = {
1825 #define INST(exec_size, dst_type, src0_type, src1_type, expected_result) \
1826 { \
1827 ELK_EXECUTE_##exec_size, \
1828 ELK_REGISTER_TYPE_##dst_type, \
1829 ELK_REGISTER_TYPE_##src0_type, \
1830 ELK_REGISTER_TYPE_##src1_type, \
1831 expected_result, \
1832 }
1833
1834 /* We only test with F destination because there is a restriction
1835 * by which F->HF conversions need to be DWord aligned but Align16 also
1836 * requires that destination horizontal stride is 1.
1837 */
1838 INST( 8, F, F, HF, true),
1839 INST( 8, F, HF, F, true),
1840 INST( 8, F, F, HF, true),
1841 INST(16, F, F, HF, false),
1842 INST(16, F, HF, F, false),
1843 INST(16, F, F, HF, false),
1844
1845 #undef INST
1846 };
1847
1848 if (devinfo.ver < 8 || devinfo.ver >= 11)
1849 return;
1850
1851 elk_set_default_access_mode(p, ELK_ALIGN_16);
1852
1853 for (unsigned i = 0; i < ARRAY_SIZE(inst); i++) {
1854 elk_ADD(p, retype(g0, inst[i].dst_type),
1855 retype(g0, inst[i].src0_type),
1856 retype(g0, inst[i].src1_type));
1857
1858 elk_inst_set_exec_size(&devinfo, last_inst, inst[i].exec_size);
1859
1860 elk_inst_set_src0_vstride(&devinfo, last_inst, ELK_VERTICAL_STRIDE_4);
1861 elk_inst_set_src1_vstride(&devinfo, last_inst, ELK_VERTICAL_STRIDE_4);
1862
1863 EXPECT_EQ(inst[i].expected_result, validate(p));
1864
1865 clear_instructions(p);
1866 }
1867 }
1868
TEST_P(validation_test,mixed_float_align16_no_acc_read)1869 TEST_P(validation_test, mixed_float_align16_no_acc_read)
1870 {
1871 static const struct {
1872 enum elk_reg_type dst_type;
1873 enum elk_reg_type src0_type;
1874 enum elk_reg_type src1_type;
1875 bool read_acc;
1876 bool expected_result;
1877 } inst[] = {
1878 #define INST(dst_type, src0_type, src1_type, read_acc, expected_result) \
1879 { \
1880 ELK_REGISTER_TYPE_##dst_type, \
1881 ELK_REGISTER_TYPE_##src0_type, \
1882 ELK_REGISTER_TYPE_##src1_type, \
1883 read_acc, \
1884 expected_result, \
1885 }
1886
1887 /* We only test with F destination because there is a restriction
1888 * by which F->HF conversions need to be DWord aligned but Align16 also
1889 * requires that destination horizontal stride is 1.
1890 */
1891 INST( F, F, HF, false, true),
1892 INST( F, F, HF, true, false),
1893 INST( F, HF, F, false, true),
1894 INST( F, HF, F, true, false),
1895
1896 #undef INST
1897 };
1898
1899 if (devinfo.ver < 8 || devinfo.ver >= 11)
1900 return;
1901
1902 elk_set_default_access_mode(p, ELK_ALIGN_16);
1903
1904 for (unsigned i = 0; i < ARRAY_SIZE(inst); i++) {
1905 elk_ADD(p, retype(g0, inst[i].dst_type),
1906 retype(inst[i].read_acc ? acc0 : g0, inst[i].src0_type),
1907 retype(g0, inst[i].src1_type));
1908
1909 elk_inst_set_src0_vstride(&devinfo, last_inst, ELK_VERTICAL_STRIDE_4);
1910 elk_inst_set_src1_vstride(&devinfo, last_inst, ELK_VERTICAL_STRIDE_4);
1911
1912 EXPECT_EQ(inst[i].expected_result, validate(p));
1913
1914 clear_instructions(p);
1915 }
1916 }
1917
TEST_P(validation_test,mixed_float_align16_math_packed_format)1918 TEST_P(validation_test, mixed_float_align16_math_packed_format)
1919 {
1920 static const struct {
1921 enum elk_reg_type dst_type;
1922 enum elk_reg_type src0_type;
1923 enum elk_reg_type src1_type;
1924 unsigned src0_vstride;
1925 unsigned src1_vstride;
1926 bool expected_result;
1927 } inst[] = {
1928 #define INST(dst_type, src0_type, src1_type, \
1929 src0_vstride, src1_vstride, expected_result) \
1930 { \
1931 ELK_REGISTER_TYPE_##dst_type, \
1932 ELK_REGISTER_TYPE_##src0_type, \
1933 ELK_REGISTER_TYPE_##src1_type, \
1934 ELK_VERTICAL_STRIDE_##src0_vstride, \
1935 ELK_VERTICAL_STRIDE_##src1_vstride, \
1936 expected_result, \
1937 }
1938
1939 /* We only test with F destination because there is a restriction
1940 * by which F->HF conversions need to be DWord aligned but Align16 also
1941 * requires that destination horizontal stride is 1.
1942 */
1943 INST( F, HF, F, 4, 0, false),
1944 INST( F, HF, HF, 4, 4, true),
1945 INST( F, F, HF, 4, 0, false),
1946 INST( F, F, HF, 2, 4, false),
1947 INST( F, F, HF, 4, 2, false),
1948 INST( F, HF, HF, 0, 4, false),
1949
1950 #undef INST
1951 };
1952
1953 /* Align16 Math for mixed float mode is not supported in gfx8 */
1954 if (devinfo.ver < 9 || devinfo.ver >= 11)
1955 return;
1956
1957 elk_set_default_access_mode(p, ELK_ALIGN_16);
1958
1959 for (unsigned i = 0; i < ARRAY_SIZE(inst); i++) {
1960 elk_gfx6_math(p, retype(g0, inst[i].dst_type),
1961 ELK_MATH_FUNCTION_POW,
1962 retype(g0, inst[i].src0_type),
1963 retype(g0, inst[i].src1_type));
1964
1965 elk_inst_set_src0_vstride(&devinfo, last_inst, inst[i].src0_vstride);
1966 elk_inst_set_src1_vstride(&devinfo, last_inst, inst[i].src1_vstride);
1967
1968 EXPECT_EQ(inst[i].expected_result, validate(p));
1969
1970 clear_instructions(p);
1971 }
1972 }
1973
TEST_P(validation_test,vector_immediate_destination_alignment)1974 TEST_P(validation_test, vector_immediate_destination_alignment)
1975 {
1976 static const struct {
1977 enum elk_reg_type dst_type;
1978 enum elk_reg_type src_type;
1979 unsigned subnr;
1980 unsigned exec_size;
1981 bool expected_result;
1982 } move[] = {
1983 { ELK_REGISTER_TYPE_F, ELK_REGISTER_TYPE_VF, 0, ELK_EXECUTE_4, true },
1984 { ELK_REGISTER_TYPE_F, ELK_REGISTER_TYPE_VF, 16, ELK_EXECUTE_4, true },
1985 { ELK_REGISTER_TYPE_F, ELK_REGISTER_TYPE_VF, 1, ELK_EXECUTE_4, false },
1986
1987 { ELK_REGISTER_TYPE_W, ELK_REGISTER_TYPE_V, 0, ELK_EXECUTE_8, true },
1988 { ELK_REGISTER_TYPE_W, ELK_REGISTER_TYPE_V, 16, ELK_EXECUTE_8, true },
1989 { ELK_REGISTER_TYPE_W, ELK_REGISTER_TYPE_V, 1, ELK_EXECUTE_8, false },
1990
1991 { ELK_REGISTER_TYPE_W, ELK_REGISTER_TYPE_UV, 0, ELK_EXECUTE_8, true },
1992 { ELK_REGISTER_TYPE_W, ELK_REGISTER_TYPE_UV, 16, ELK_EXECUTE_8, true },
1993 { ELK_REGISTER_TYPE_W, ELK_REGISTER_TYPE_UV, 1, ELK_EXECUTE_8, false },
1994 };
1995
1996 for (unsigned i = 0; i < ARRAY_SIZE(move); i++) {
1997 /* UV type is Gfx6+ */
1998 if (devinfo.ver < 6 &&
1999 move[i].src_type == ELK_REGISTER_TYPE_UV)
2000 continue;
2001
2002 elk_MOV(p, retype(g0, move[i].dst_type), retype(zero, move[i].src_type));
2003 elk_inst_set_dst_da1_subreg_nr(&devinfo, last_inst, move[i].subnr);
2004 elk_inst_set_exec_size(&devinfo, last_inst, move[i].exec_size);
2005
2006 EXPECT_EQ(move[i].expected_result, validate(p));
2007
2008 clear_instructions(p);
2009 }
2010 }
2011
TEST_P(validation_test,vector_immediate_destination_stride)2012 TEST_P(validation_test, vector_immediate_destination_stride)
2013 {
2014 static const struct {
2015 enum elk_reg_type dst_type;
2016 enum elk_reg_type src_type;
2017 unsigned stride;
2018 bool expected_result;
2019 } move[] = {
2020 { ELK_REGISTER_TYPE_F, ELK_REGISTER_TYPE_VF, ELK_HORIZONTAL_STRIDE_1, true },
2021 { ELK_REGISTER_TYPE_F, ELK_REGISTER_TYPE_VF, ELK_HORIZONTAL_STRIDE_2, false },
2022 { ELK_REGISTER_TYPE_D, ELK_REGISTER_TYPE_VF, ELK_HORIZONTAL_STRIDE_1, true },
2023 { ELK_REGISTER_TYPE_D, ELK_REGISTER_TYPE_VF, ELK_HORIZONTAL_STRIDE_2, false },
2024 { ELK_REGISTER_TYPE_W, ELK_REGISTER_TYPE_VF, ELK_HORIZONTAL_STRIDE_2, true },
2025 { ELK_REGISTER_TYPE_B, ELK_REGISTER_TYPE_VF, ELK_HORIZONTAL_STRIDE_4, true },
2026
2027 { ELK_REGISTER_TYPE_W, ELK_REGISTER_TYPE_V, ELK_HORIZONTAL_STRIDE_1, true },
2028 { ELK_REGISTER_TYPE_W, ELK_REGISTER_TYPE_V, ELK_HORIZONTAL_STRIDE_2, false },
2029 { ELK_REGISTER_TYPE_W, ELK_REGISTER_TYPE_V, ELK_HORIZONTAL_STRIDE_4, false },
2030 { ELK_REGISTER_TYPE_B, ELK_REGISTER_TYPE_V, ELK_HORIZONTAL_STRIDE_2, true },
2031
2032 { ELK_REGISTER_TYPE_W, ELK_REGISTER_TYPE_UV, ELK_HORIZONTAL_STRIDE_1, true },
2033 { ELK_REGISTER_TYPE_W, ELK_REGISTER_TYPE_UV, ELK_HORIZONTAL_STRIDE_2, false },
2034 { ELK_REGISTER_TYPE_W, ELK_REGISTER_TYPE_UV, ELK_HORIZONTAL_STRIDE_4, false },
2035 { ELK_REGISTER_TYPE_B, ELK_REGISTER_TYPE_UV, ELK_HORIZONTAL_STRIDE_2, true },
2036 };
2037
2038 for (unsigned i = 0; i < ARRAY_SIZE(move); i++) {
2039 /* UV type is Gfx6+ */
2040 if (devinfo.ver < 6 &&
2041 move[i].src_type == ELK_REGISTER_TYPE_UV)
2042 continue;
2043
2044 elk_MOV(p, retype(g0, move[i].dst_type), retype(zero, move[i].src_type));
2045 elk_inst_set_dst_hstride(&devinfo, last_inst, move[i].stride);
2046
2047 EXPECT_EQ(move[i].expected_result, validate(p));
2048
2049 clear_instructions(p);
2050 }
2051 }
2052
TEST_P(validation_test,qword_low_power_align1_regioning_restrictions)2053 TEST_P(validation_test, qword_low_power_align1_regioning_restrictions)
2054 {
2055 static const struct {
2056 enum elk_opcode opcode;
2057 unsigned exec_size;
2058
2059 enum elk_reg_type dst_type;
2060 unsigned dst_subreg;
2061 unsigned dst_stride;
2062
2063 enum elk_reg_type src_type;
2064 unsigned src_subreg;
2065 unsigned src_vstride;
2066 unsigned src_width;
2067 unsigned src_hstride;
2068
2069 bool expected_result;
2070 } inst[] = {
2071 #define INST(opcode, exec_size, dst_type, dst_subreg, dst_stride, src_type, \
2072 src_subreg, src_vstride, src_width, src_hstride, expected_result) \
2073 { \
2074 ELK_OPCODE_##opcode, \
2075 ELK_EXECUTE_##exec_size, \
2076 ELK_REGISTER_TYPE_##dst_type, \
2077 dst_subreg, \
2078 ELK_HORIZONTAL_STRIDE_##dst_stride, \
2079 ELK_REGISTER_TYPE_##src_type, \
2080 src_subreg, \
2081 ELK_VERTICAL_STRIDE_##src_vstride, \
2082 ELK_WIDTH_##src_width, \
2083 ELK_HORIZONTAL_STRIDE_##src_hstride, \
2084 expected_result, \
2085 }
2086
2087 /* Some instruction that violate no restrictions, as a control */
2088 INST(MOV, 4, DF, 0, 1, DF, 0, 4, 4, 1, true ),
2089 INST(MOV, 4, Q, 0, 1, Q, 0, 4, 4, 1, true ),
2090 INST(MOV, 4, UQ, 0, 1, UQ, 0, 4, 4, 1, true ),
2091
2092 INST(MOV, 4, DF, 0, 1, F, 0, 8, 4, 2, true ),
2093 INST(MOV, 4, Q, 0, 1, D, 0, 8, 4, 2, true ),
2094 INST(MOV, 4, UQ, 0, 1, UD, 0, 8, 4, 2, true ),
2095
2096 INST(MOV, 4, F, 0, 2, DF, 0, 4, 4, 1, true ),
2097 INST(MOV, 4, D, 0, 2, Q, 0, 4, 4, 1, true ),
2098 INST(MOV, 4, UD, 0, 2, UQ, 0, 4, 4, 1, true ),
2099
2100 INST(MUL, 8, D, 0, 2, D, 0, 8, 4, 2, true ),
2101 INST(MUL, 8, UD, 0, 2, UD, 0, 8, 4, 2, true ),
2102
2103 /* Something with subreg nrs */
2104 INST(MOV, 2, DF, 8, 1, DF, 8, 2, 2, 1, true ),
2105 INST(MOV, 2, Q, 8, 1, Q, 8, 2, 2, 1, true ),
2106 INST(MOV, 2, UQ, 8, 1, UQ, 8, 2, 2, 1, true ),
2107
2108 INST(MUL, 2, D, 4, 2, D, 4, 4, 2, 2, true ),
2109 INST(MUL, 2, UD, 4, 2, UD, 4, 4, 2, 2, true ),
2110
2111 /* The PRMs say that for CHV, BXT:
2112 *
2113 * When source or destination datatype is 64b or operation is integer
2114 * DWord multiply, regioning in Align1 must follow these rules:
2115 *
2116 * 1. Source and Destination horizontal stride must be aligned to the
2117 * same qword.
2118 */
2119 INST(MOV, 4, DF, 0, 2, DF, 0, 4, 4, 1, false),
2120 INST(MOV, 4, Q, 0, 2, Q, 0, 4, 4, 1, false),
2121 INST(MOV, 4, UQ, 0, 2, UQ, 0, 4, 4, 1, false),
2122
2123 INST(MOV, 4, DF, 0, 2, F, 0, 8, 4, 2, false),
2124 INST(MOV, 4, Q, 0, 2, D, 0, 8, 4, 2, false),
2125 INST(MOV, 4, UQ, 0, 2, UD, 0, 8, 4, 2, false),
2126
2127 INST(MOV, 4, DF, 0, 2, F, 0, 4, 4, 1, false),
2128 INST(MOV, 4, Q, 0, 2, D, 0, 4, 4, 1, false),
2129 INST(MOV, 4, UQ, 0, 2, UD, 0, 4, 4, 1, false),
2130
2131 INST(MUL, 4, D, 0, 2, D, 0, 4, 4, 1, false),
2132 INST(MUL, 4, UD, 0, 2, UD, 0, 4, 4, 1, false),
2133
2134 INST(MUL, 4, D, 0, 1, D, 0, 8, 4, 2, false),
2135 INST(MUL, 4, UD, 0, 1, UD, 0, 8, 4, 2, false),
2136
2137 /* 2. Regioning must ensure Src.Vstride = Src.Width * Src.Hstride. */
2138 INST(MOV, 4, DF, 0, 1, DF, 0, 0, 2, 1, false),
2139 INST(MOV, 4, Q, 0, 1, Q, 0, 0, 2, 1, false),
2140 INST(MOV, 4, UQ, 0, 1, UQ, 0, 0, 2, 1, false),
2141
2142 INST(MOV, 4, DF, 0, 1, F, 0, 0, 2, 2, false),
2143 INST(MOV, 4, Q, 0, 1, D, 0, 0, 2, 2, false),
2144 INST(MOV, 4, UQ, 0, 1, UD, 0, 0, 2, 2, false),
2145
2146 INST(MOV, 8, F, 0, 2, DF, 0, 0, 2, 1, false),
2147 INST(MOV, 8, D, 0, 2, Q, 0, 0, 2, 1, false),
2148 INST(MOV, 8, UD, 0, 2, UQ, 0, 0, 2, 1, false),
2149
2150 INST(MUL, 8, D, 0, 2, D, 0, 0, 4, 2, false),
2151 INST(MUL, 8, UD, 0, 2, UD, 0, 0, 4, 2, false),
2152
2153 INST(MUL, 8, D, 0, 2, D, 0, 0, 4, 2, false),
2154 INST(MUL, 8, UD, 0, 2, UD, 0, 0, 4, 2, false),
2155
2156 /* 3. Source and Destination offset must be the same, except the case
2157 * of scalar source.
2158 */
2159 INST(MOV, 2, DF, 8, 1, DF, 0, 2, 2, 1, false),
2160 INST(MOV, 2, Q, 8, 1, Q, 0, 2, 2, 1, false),
2161 INST(MOV, 2, UQ, 8, 1, UQ, 0, 2, 2, 1, false),
2162
2163 INST(MOV, 2, DF, 0, 1, DF, 8, 2, 2, 1, false),
2164 INST(MOV, 2, Q, 0, 1, Q, 8, 2, 2, 1, false),
2165 INST(MOV, 2, UQ, 0, 1, UQ, 8, 2, 2, 1, false),
2166
2167 INST(MUL, 4, D, 4, 2, D, 0, 4, 2, 2, false),
2168 INST(MUL, 4, UD, 4, 2, UD, 0, 4, 2, 2, false),
2169
2170 INST(MUL, 4, D, 0, 2, D, 4, 4, 2, 2, false),
2171 INST(MUL, 4, UD, 0, 2, UD, 4, 4, 2, 2, false),
2172
2173 INST(MOV, 2, DF, 8, 1, DF, 0, 0, 1, 0, true ),
2174 INST(MOV, 2, Q, 8, 1, Q, 0, 0, 1, 0, true ),
2175 INST(MOV, 2, UQ, 8, 1, UQ, 0, 0, 1, 0, true ),
2176
2177 INST(MOV, 2, DF, 8, 1, F, 4, 0, 1, 0, true ),
2178 INST(MOV, 2, Q, 8, 1, D, 4, 0, 1, 0, true ),
2179 INST(MOV, 2, UQ, 8, 1, UD, 4, 0, 1, 0, true ),
2180
2181 INST(MUL, 4, D, 4, 1, D, 0, 0, 1, 0, true ),
2182 INST(MUL, 4, UD, 4, 1, UD, 0, 0, 1, 0, true ),
2183
2184 INST(MUL, 4, D, 0, 1, D, 4, 0, 1, 0, true ),
2185 INST(MUL, 4, UD, 0, 1, UD, 4, 0, 1, 0, true ),
2186
2187 #undef INST
2188 };
2189
2190 /* These restrictions only apply to Gfx8+ */
2191 if (devinfo.ver < 8)
2192 return;
2193
2194 /* NoDDChk/NoDDClr does not exist on Gfx12+ */
2195 if (devinfo.ver >= 12)
2196 return;
2197
2198 for (unsigned i = 0; i < ARRAY_SIZE(inst); i++) {
2199 if (!devinfo.has_64bit_float &&
2200 (inst[i].dst_type == ELK_REGISTER_TYPE_DF ||
2201 inst[i].src_type == ELK_REGISTER_TYPE_DF))
2202 continue;
2203
2204 if (!devinfo.has_64bit_int &&
2205 (inst[i].dst_type == ELK_REGISTER_TYPE_Q ||
2206 inst[i].dst_type == ELK_REGISTER_TYPE_UQ ||
2207 inst[i].src_type == ELK_REGISTER_TYPE_Q ||
2208 inst[i].src_type == ELK_REGISTER_TYPE_UQ))
2209 continue;
2210
2211 if (inst[i].opcode == ELK_OPCODE_MOV) {
2212 elk_MOV(p, retype(g0, inst[i].dst_type),
2213 retype(g0, inst[i].src_type));
2214 } else {
2215 assert(inst[i].opcode == ELK_OPCODE_MUL);
2216 elk_MUL(p, retype(g0, inst[i].dst_type),
2217 retype(g0, inst[i].src_type),
2218 retype(zero, inst[i].src_type));
2219 }
2220 elk_inst_set_exec_size(&devinfo, last_inst, inst[i].exec_size);
2221
2222 elk_inst_set_dst_da1_subreg_nr(&devinfo, last_inst, inst[i].dst_subreg);
2223 elk_inst_set_src0_da1_subreg_nr(&devinfo, last_inst, inst[i].src_subreg);
2224
2225 elk_inst_set_dst_hstride(&devinfo, last_inst, inst[i].dst_stride);
2226
2227 elk_inst_set_src0_vstride(&devinfo, last_inst, inst[i].src_vstride);
2228 elk_inst_set_src0_width(&devinfo, last_inst, inst[i].src_width);
2229 elk_inst_set_src0_hstride(&devinfo, last_inst, inst[i].src_hstride);
2230
2231 if (devinfo.platform == INTEL_PLATFORM_CHV) {
2232 EXPECT_EQ(inst[i].expected_result, validate(p));
2233 } else {
2234 EXPECT_TRUE(validate(p));
2235 }
2236
2237 clear_instructions(p);
2238 }
2239 }
2240
TEST_P(validation_test,qword_low_power_no_indirect_addressing)2241 TEST_P(validation_test, qword_low_power_no_indirect_addressing)
2242 {
2243 static const struct {
2244 enum elk_opcode opcode;
2245 unsigned exec_size;
2246
2247 enum elk_reg_type dst_type;
2248 bool dst_is_indirect;
2249 unsigned dst_stride;
2250
2251 enum elk_reg_type src_type;
2252 bool src_is_indirect;
2253 unsigned src_vstride;
2254 unsigned src_width;
2255 unsigned src_hstride;
2256
2257 bool expected_result;
2258 } inst[] = {
2259 #define INST(opcode, exec_size, dst_type, dst_is_indirect, dst_stride, \
2260 src_type, src_is_indirect, src_vstride, src_width, src_hstride, \
2261 expected_result) \
2262 { \
2263 ELK_OPCODE_##opcode, \
2264 ELK_EXECUTE_##exec_size, \
2265 ELK_REGISTER_TYPE_##dst_type, \
2266 dst_is_indirect, \
2267 ELK_HORIZONTAL_STRIDE_##dst_stride, \
2268 ELK_REGISTER_TYPE_##src_type, \
2269 src_is_indirect, \
2270 ELK_VERTICAL_STRIDE_##src_vstride, \
2271 ELK_WIDTH_##src_width, \
2272 ELK_HORIZONTAL_STRIDE_##src_hstride, \
2273 expected_result, \
2274 }
2275
2276 /* Some instruction that violate no restrictions, as a control */
2277 INST(MOV, 4, DF, 0, 1, DF, 0, 4, 4, 1, true ),
2278 INST(MOV, 4, Q, 0, 1, Q, 0, 4, 4, 1, true ),
2279 INST(MOV, 4, UQ, 0, 1, UQ, 0, 4, 4, 1, true ),
2280
2281 INST(MUL, 8, D, 0, 2, D, 0, 8, 4, 2, true ),
2282 INST(MUL, 8, UD, 0, 2, UD, 0, 8, 4, 2, true ),
2283
2284 INST(MOV, 4, F, 1, 1, F, 0, 4, 4, 1, true ),
2285 INST(MOV, 4, F, 0, 1, F, 1, 4, 4, 1, true ),
2286 INST(MOV, 4, F, 1, 1, F, 1, 4, 4, 1, true ),
2287
2288 /* The PRMs say that for CHV, BXT:
2289 *
2290 * When source or destination datatype is 64b or operation is integer
2291 * DWord multiply, indirect addressing must not be used.
2292 */
2293 INST(MOV, 4, DF, 1, 1, DF, 0, 4, 4, 1, false),
2294 INST(MOV, 4, Q, 1, 1, Q, 0, 4, 4, 1, false),
2295 INST(MOV, 4, UQ, 1, 1, UQ, 0, 4, 4, 1, false),
2296
2297 INST(MOV, 4, DF, 0, 1, DF, 1, 4, 4, 1, false),
2298 INST(MOV, 4, Q, 0, 1, Q, 1, 4, 4, 1, false),
2299 INST(MOV, 4, UQ, 0, 1, UQ, 1, 4, 4, 1, false),
2300
2301 INST(MOV, 4, DF, 1, 1, F, 0, 8, 4, 2, false),
2302 INST(MOV, 4, Q, 1, 1, D, 0, 8, 4, 2, false),
2303 INST(MOV, 4, UQ, 1, 1, UD, 0, 8, 4, 2, false),
2304
2305 INST(MOV, 4, DF, 0, 1, F, 1, 8, 4, 2, false),
2306 INST(MOV, 4, Q, 0, 1, D, 1, 8, 4, 2, false),
2307 INST(MOV, 4, UQ, 0, 1, UD, 1, 8, 4, 2, false),
2308
2309 INST(MOV, 4, F, 1, 2, DF, 0, 4, 4, 1, false),
2310 INST(MOV, 4, D, 1, 2, Q, 0, 4, 4, 1, false),
2311 INST(MOV, 4, UD, 1, 2, UQ, 0, 4, 4, 1, false),
2312
2313 INST(MOV, 4, F, 0, 2, DF, 1, 4, 4, 1, false),
2314 INST(MOV, 4, D, 0, 2, Q, 1, 4, 4, 1, false),
2315 INST(MOV, 4, UD, 0, 2, UQ, 1, 4, 4, 1, false),
2316
2317 INST(MUL, 8, D, 1, 2, D, 0, 8, 4, 2, false),
2318 INST(MUL, 8, UD, 1, 2, UD, 0, 8, 4, 2, false),
2319
2320 INST(MUL, 8, D, 0, 2, D, 1, 8, 4, 2, false),
2321 INST(MUL, 8, UD, 0, 2, UD, 1, 8, 4, 2, false),
2322
2323 #undef INST
2324 };
2325
2326 /* These restrictions only apply to Gfx8+ */
2327 if (devinfo.ver < 8)
2328 return;
2329
2330 for (unsigned i = 0; i < ARRAY_SIZE(inst); i++) {
2331 if (!devinfo.has_64bit_float &&
2332 (inst[i].dst_type == ELK_REGISTER_TYPE_DF ||
2333 inst[i].src_type == ELK_REGISTER_TYPE_DF))
2334 continue;
2335
2336 if (!devinfo.has_64bit_int &&
2337 (inst[i].dst_type == ELK_REGISTER_TYPE_Q ||
2338 inst[i].dst_type == ELK_REGISTER_TYPE_UQ ||
2339 inst[i].src_type == ELK_REGISTER_TYPE_Q ||
2340 inst[i].src_type == ELK_REGISTER_TYPE_UQ))
2341 continue;
2342
2343 if (inst[i].opcode == ELK_OPCODE_MOV) {
2344 elk_MOV(p, retype(g0, inst[i].dst_type),
2345 retype(g0, inst[i].src_type));
2346 } else {
2347 assert(inst[i].opcode == ELK_OPCODE_MUL);
2348 elk_MUL(p, retype(g0, inst[i].dst_type),
2349 retype(g0, inst[i].src_type),
2350 retype(zero, inst[i].src_type));
2351 }
2352 elk_inst_set_exec_size(&devinfo, last_inst, inst[i].exec_size);
2353
2354 elk_inst_set_dst_address_mode(&devinfo, last_inst, inst[i].dst_is_indirect);
2355 elk_inst_set_src0_address_mode(&devinfo, last_inst, inst[i].src_is_indirect);
2356
2357 elk_inst_set_dst_hstride(&devinfo, last_inst, inst[i].dst_stride);
2358
2359 elk_inst_set_src0_vstride(&devinfo, last_inst, inst[i].src_vstride);
2360 elk_inst_set_src0_width(&devinfo, last_inst, inst[i].src_width);
2361 elk_inst_set_src0_hstride(&devinfo, last_inst, inst[i].src_hstride);
2362
2363 if (devinfo.platform == INTEL_PLATFORM_CHV) {
2364 EXPECT_EQ(inst[i].expected_result, validate(p));
2365 } else {
2366 EXPECT_TRUE(validate(p));
2367 }
2368
2369 clear_instructions(p);
2370 }
2371 }
2372
TEST_P(validation_test,qword_low_power_no_64bit_arf)2373 TEST_P(validation_test, qword_low_power_no_64bit_arf)
2374 {
2375 static const struct {
2376 enum elk_opcode opcode;
2377 unsigned exec_size;
2378
2379 struct elk_reg dst;
2380 enum elk_reg_type dst_type;
2381 unsigned dst_stride;
2382
2383 struct elk_reg src;
2384 enum elk_reg_type src_type;
2385 unsigned src_vstride;
2386 unsigned src_width;
2387 unsigned src_hstride;
2388
2389 bool acc_wr;
2390 bool expected_result;
2391 } inst[] = {
2392 #define INST(opcode, exec_size, dst, dst_type, dst_stride, \
2393 src, src_type, src_vstride, src_width, src_hstride, \
2394 acc_wr, expected_result) \
2395 { \
2396 ELK_OPCODE_##opcode, \
2397 ELK_EXECUTE_##exec_size, \
2398 dst, \
2399 ELK_REGISTER_TYPE_##dst_type, \
2400 ELK_HORIZONTAL_STRIDE_##dst_stride, \
2401 src, \
2402 ELK_REGISTER_TYPE_##src_type, \
2403 ELK_VERTICAL_STRIDE_##src_vstride, \
2404 ELK_WIDTH_##src_width, \
2405 ELK_HORIZONTAL_STRIDE_##src_hstride, \
2406 acc_wr, \
2407 expected_result, \
2408 }
2409
2410 /* Some instruction that violate no restrictions, as a control */
2411 INST(MOV, 4, g0, DF, 1, g0, F, 4, 2, 2, 0, true ),
2412 INST(MOV, 4, g0, F, 2, g0, DF, 4, 4, 1, 0, true ),
2413
2414 INST(MOV, 4, g0, Q, 1, g0, D, 4, 2, 2, 0, true ),
2415 INST(MOV, 4, g0, D, 2, g0, Q, 4, 4, 1, 0, true ),
2416
2417 INST(MOV, 4, g0, UQ, 1, g0, UD, 4, 2, 2, 0, true ),
2418 INST(MOV, 4, g0, UD, 2, g0, UQ, 4, 4, 1, 0, true ),
2419
2420 INST(MOV, 4, null, F, 1, g0, F, 4, 4, 1, 0, true ),
2421 INST(MOV, 4, acc0, F, 1, g0, F, 4, 4, 1, 0, true ),
2422 INST(MOV, 4, g0, F, 1, acc0, F, 4, 4, 1, 0, true ),
2423
2424 INST(MOV, 4, null, D, 1, g0, D, 4, 4, 1, 0, true ),
2425 INST(MOV, 4, acc0, D, 1, g0, D, 4, 4, 1, 0, true ),
2426 INST(MOV, 4, g0, D, 1, acc0, D, 4, 4, 1, 0, true ),
2427
2428 INST(MOV, 4, null, UD, 1, g0, UD, 4, 4, 1, 0, true ),
2429 INST(MOV, 4, acc0, UD, 1, g0, UD, 4, 4, 1, 0, true ),
2430 INST(MOV, 4, g0, UD, 1, acc0, UD, 4, 4, 1, 0, true ),
2431
2432 INST(MUL, 4, g0, D, 2, g0, D, 4, 2, 2, 0, true ),
2433 INST(MUL, 4, g0, UD, 2, g0, UD, 4, 2, 2, 0, true ),
2434
2435 /* The PRMs say that for CHV, BXT:
2436 *
2437 * ARF registers must never be used with 64b datatype or when
2438 * operation is integer DWord multiply.
2439 */
2440 INST(MOV, 4, acc0, DF, 1, g0, F, 4, 2, 2, 0, false),
2441 INST(MOV, 4, g0, DF, 1, acc0, F, 4, 2, 2, 0, false),
2442
2443 INST(MOV, 4, acc0, Q, 1, g0, D, 4, 2, 2, 0, false),
2444 INST(MOV, 4, g0, Q, 1, acc0, D, 4, 2, 2, 0, false),
2445
2446 INST(MOV, 4, acc0, UQ, 1, g0, UD, 4, 2, 2, 0, false),
2447 INST(MOV, 4, g0, UQ, 1, acc0, UD, 4, 2, 2, 0, false),
2448
2449 INST(MOV, 4, acc0, F, 2, g0, DF, 4, 4, 1, 0, false),
2450 INST(MOV, 4, g0, F, 2, acc0, DF, 4, 4, 1, 0, false),
2451
2452 INST(MOV, 4, acc0, D, 2, g0, Q, 4, 4, 1, 0, false),
2453 INST(MOV, 4, g0, D, 2, acc0, Q, 4, 4, 1, 0, false),
2454
2455 INST(MOV, 4, acc0, UD, 2, g0, UQ, 4, 4, 1, 0, false),
2456 INST(MOV, 4, g0, UD, 2, acc0, UQ, 4, 4, 1, 0, false),
2457
2458 INST(MUL, 4, acc0, D, 2, g0, D, 4, 2, 2, 0, false),
2459 INST(MUL, 4, acc0, UD, 2, g0, UD, 4, 2, 2, 0, false),
2460 /* MUL cannot have integer accumulator sources, so don't test that */
2461
2462 /* We assume that the restriction does not apply to the null register */
2463 INST(MOV, 4, null, DF, 1, g0, F, 4, 2, 2, 0, true ),
2464 INST(MOV, 4, null, Q, 1, g0, D, 4, 2, 2, 0, true ),
2465 INST(MOV, 4, null, UQ, 1, g0, UD, 4, 2, 2, 0, true ),
2466
2467 /* Check implicit accumulator write control */
2468 INST(MOV, 4, null, DF, 1, g0, F, 4, 2, 2, 1, false),
2469 INST(MUL, 4, null, DF, 1, g0, F, 4, 2, 2, 1, false),
2470
2471 #undef INST
2472 };
2473
2474 /* These restrictions only apply to Gfx8+ */
2475 if (devinfo.ver < 8)
2476 return;
2477
2478 for (unsigned i = 0; i < ARRAY_SIZE(inst); i++) {
2479 if (!devinfo.has_64bit_float &&
2480 (inst[i].dst_type == ELK_REGISTER_TYPE_DF ||
2481 inst[i].src_type == ELK_REGISTER_TYPE_DF))
2482 continue;
2483
2484 if (!devinfo.has_64bit_int &&
2485 (inst[i].dst_type == ELK_REGISTER_TYPE_Q ||
2486 inst[i].dst_type == ELK_REGISTER_TYPE_UQ ||
2487 inst[i].src_type == ELK_REGISTER_TYPE_Q ||
2488 inst[i].src_type == ELK_REGISTER_TYPE_UQ))
2489 continue;
2490
2491 if (inst[i].opcode == ELK_OPCODE_MOV) {
2492 elk_MOV(p, retype(inst[i].dst, inst[i].dst_type),
2493 retype(inst[i].src, inst[i].src_type));
2494 } else {
2495 assert(inst[i].opcode == ELK_OPCODE_MUL);
2496 elk_MUL(p, retype(inst[i].dst, inst[i].dst_type),
2497 retype(inst[i].src, inst[i].src_type),
2498 retype(zero, inst[i].src_type));
2499 elk_inst_set_opcode(&isa, last_inst, inst[i].opcode);
2500 }
2501 elk_inst_set_exec_size(&devinfo, last_inst, inst[i].exec_size);
2502 elk_inst_set_acc_wr_control(&devinfo, last_inst, inst[i].acc_wr);
2503
2504 elk_inst_set_dst_hstride(&devinfo, last_inst, inst[i].dst_stride);
2505
2506 elk_inst_set_src0_vstride(&devinfo, last_inst, inst[i].src_vstride);
2507 elk_inst_set_src0_width(&devinfo, last_inst, inst[i].src_width);
2508 elk_inst_set_src0_hstride(&devinfo, last_inst, inst[i].src_hstride);
2509
2510 /* Note: The Broadwell PRM also lists the restriction that destination
2511 * of DWord multiplication cannot be the accumulator.
2512 */
2513 if (devinfo.platform == INTEL_PLATFORM_CHV ||
2514 (devinfo.ver == 8 &&
2515 inst[i].opcode == ELK_OPCODE_MUL &&
2516 elk_inst_dst_reg_file(&devinfo, last_inst) == ELK_ARCHITECTURE_REGISTER_FILE &&
2517 elk_inst_dst_da_reg_nr(&devinfo, last_inst) != ELK_ARF_NULL)) {
2518 EXPECT_EQ(inst[i].expected_result, validate(p));
2519 } else {
2520 EXPECT_TRUE(validate(p));
2521 }
2522
2523 clear_instructions(p);
2524 }
2525
2526 if (!devinfo.has_64bit_float)
2527 return;
2528
2529 /* MAC implicitly reads the accumulator */
2530 elk_MAC(p, retype(g0, ELK_REGISTER_TYPE_DF),
2531 retype(stride(g0, 4, 4, 1), ELK_REGISTER_TYPE_DF),
2532 retype(stride(g0, 4, 4, 1), ELK_REGISTER_TYPE_DF));
2533 if (devinfo.platform == INTEL_PLATFORM_CHV) {
2534 EXPECT_FALSE(validate(p));
2535 } else {
2536 EXPECT_TRUE(validate(p));
2537 }
2538 }
2539
TEST_P(validation_test,align16_64_bit_integer)2540 TEST_P(validation_test, align16_64_bit_integer)
2541 {
2542 static const struct {
2543 enum elk_opcode opcode;
2544 unsigned exec_size;
2545
2546 enum elk_reg_type dst_type;
2547 enum elk_reg_type src_type;
2548
2549 bool expected_result;
2550 } inst[] = {
2551 #define INST(opcode, exec_size, dst_type, src_type, expected_result) \
2552 { \
2553 ELK_OPCODE_##opcode, \
2554 ELK_EXECUTE_##exec_size, \
2555 ELK_REGISTER_TYPE_##dst_type, \
2556 ELK_REGISTER_TYPE_##src_type, \
2557 expected_result, \
2558 }
2559
2560 /* Some instruction that violate no restrictions, as a control */
2561 INST(MOV, 2, Q, D, true ),
2562 INST(MOV, 2, UQ, UD, true ),
2563 INST(MOV, 2, DF, F, true ),
2564
2565 INST(ADD, 2, Q, D, true ),
2566 INST(ADD, 2, UQ, UD, true ),
2567 INST(ADD, 2, DF, F, true ),
2568
2569 /* The PRMs say that for BDW, SKL:
2570 *
2571 * If Align16 is required for an operation with QW destination and non-QW
2572 * source datatypes, the execution size cannot exceed 2.
2573 */
2574
2575 INST(MOV, 4, Q, D, false),
2576 INST(MOV, 4, UQ, UD, false),
2577 INST(MOV, 4, DF, F, false),
2578
2579 INST(ADD, 4, Q, D, false),
2580 INST(ADD, 4, UQ, UD, false),
2581 INST(ADD, 4, DF, F, false),
2582
2583 #undef INST
2584 };
2585
2586 /* 64-bit integer types exist on Gfx8+ */
2587 if (devinfo.ver < 8)
2588 return;
2589
2590 /* Align16 does not exist on Gfx11+ */
2591 if (devinfo.ver >= 11)
2592 return;
2593
2594 elk_set_default_access_mode(p, ELK_ALIGN_16);
2595
2596 for (unsigned i = 0; i < ARRAY_SIZE(inst); i++) {
2597 if (inst[i].opcode == ELK_OPCODE_MOV) {
2598 elk_MOV(p, retype(g0, inst[i].dst_type),
2599 retype(g0, inst[i].src_type));
2600 } else {
2601 assert(inst[i].opcode == ELK_OPCODE_ADD);
2602 elk_ADD(p, retype(g0, inst[i].dst_type),
2603 retype(g0, inst[i].src_type),
2604 retype(g0, inst[i].src_type));
2605 }
2606 elk_inst_set_exec_size(&devinfo, last_inst, inst[i].exec_size);
2607
2608 EXPECT_EQ(inst[i].expected_result, validate(p));
2609
2610 clear_instructions(p);
2611 }
2612 }
2613
TEST_P(validation_test,qword_low_power_no_depctrl)2614 TEST_P(validation_test, qword_low_power_no_depctrl)
2615 {
2616 static const struct {
2617 enum elk_opcode opcode;
2618 unsigned exec_size;
2619
2620 enum elk_reg_type dst_type;
2621 unsigned dst_stride;
2622
2623 enum elk_reg_type src_type;
2624 unsigned src_vstride;
2625 unsigned src_width;
2626 unsigned src_hstride;
2627
2628 bool no_dd_check;
2629 bool no_dd_clear;
2630
2631 bool expected_result;
2632 } inst[] = {
2633 #define INST(opcode, exec_size, dst_type, dst_stride, \
2634 src_type, src_vstride, src_width, src_hstride, \
2635 no_dd_check, no_dd_clear, expected_result) \
2636 { \
2637 ELK_OPCODE_##opcode, \
2638 ELK_EXECUTE_##exec_size, \
2639 ELK_REGISTER_TYPE_##dst_type, \
2640 ELK_HORIZONTAL_STRIDE_##dst_stride, \
2641 ELK_REGISTER_TYPE_##src_type, \
2642 ELK_VERTICAL_STRIDE_##src_vstride, \
2643 ELK_WIDTH_##src_width, \
2644 ELK_HORIZONTAL_STRIDE_##src_hstride, \
2645 no_dd_check, \
2646 no_dd_clear, \
2647 expected_result, \
2648 }
2649
2650 /* Some instruction that violate no restrictions, as a control */
2651 INST(MOV, 4, DF, 1, F, 8, 4, 2, 0, 0, true ),
2652 INST(MOV, 4, Q, 1, D, 8, 4, 2, 0, 0, true ),
2653 INST(MOV, 4, UQ, 1, UD, 8, 4, 2, 0, 0, true ),
2654
2655 INST(MOV, 4, F, 2, DF, 4, 4, 1, 0, 0, true ),
2656 INST(MOV, 4, D, 2, Q, 4, 4, 1, 0, 0, true ),
2657 INST(MOV, 4, UD, 2, UQ, 4, 4, 1, 0, 0, true ),
2658
2659 INST(MUL, 8, D, 2, D, 8, 4, 2, 0, 0, true ),
2660 INST(MUL, 8, UD, 2, UD, 8, 4, 2, 0, 0, true ),
2661
2662 INST(MOV, 4, F, 1, F, 4, 4, 1, 1, 1, true ),
2663
2664 /* The PRMs say that for CHV, BXT:
2665 *
2666 * When source or destination datatype is 64b or operation is integer
2667 * DWord multiply, DepCtrl must not be used.
2668 */
2669 INST(MOV, 4, DF, 1, F, 8, 4, 2, 1, 0, false),
2670 INST(MOV, 4, Q, 1, D, 8, 4, 2, 1, 0, false),
2671 INST(MOV, 4, UQ, 1, UD, 8, 4, 2, 1, 0, false),
2672
2673 INST(MOV, 4, F, 2, DF, 4, 4, 1, 1, 0, false),
2674 INST(MOV, 4, D, 2, Q, 4, 4, 1, 1, 0, false),
2675 INST(MOV, 4, UD, 2, UQ, 4, 4, 1, 1, 0, false),
2676
2677 INST(MOV, 4, DF, 1, F, 8, 4, 2, 0, 1, false),
2678 INST(MOV, 4, Q, 1, D, 8, 4, 2, 0, 1, false),
2679 INST(MOV, 4, UQ, 1, UD, 8, 4, 2, 0, 1, false),
2680
2681 INST(MOV, 4, F, 2, DF, 4, 4, 1, 0, 1, false),
2682 INST(MOV, 4, D, 2, Q, 4, 4, 1, 0, 1, false),
2683 INST(MOV, 4, UD, 2, UQ, 4, 4, 1, 0, 1, false),
2684
2685 INST(MUL, 8, D, 2, D, 8, 4, 2, 1, 0, false),
2686 INST(MUL, 8, UD, 2, UD, 8, 4, 2, 1, 0, false),
2687
2688 INST(MUL, 8, D, 2, D, 8, 4, 2, 0, 1, false),
2689 INST(MUL, 8, UD, 2, UD, 8, 4, 2, 0, 1, false),
2690
2691 #undef INST
2692 };
2693
2694 /* These restrictions only apply to Gfx8+ */
2695 if (devinfo.ver < 8)
2696 return;
2697
2698 /* NoDDChk/NoDDClr does not exist on Gfx12+ */
2699 if (devinfo.ver >= 12)
2700 return;
2701
2702 for (unsigned i = 0; i < ARRAY_SIZE(inst); i++) {
2703 if (!devinfo.has_64bit_float &&
2704 (inst[i].dst_type == ELK_REGISTER_TYPE_DF ||
2705 inst[i].src_type == ELK_REGISTER_TYPE_DF))
2706 continue;
2707
2708 if (!devinfo.has_64bit_int &&
2709 (inst[i].dst_type == ELK_REGISTER_TYPE_Q ||
2710 inst[i].dst_type == ELK_REGISTER_TYPE_UQ ||
2711 inst[i].src_type == ELK_REGISTER_TYPE_Q ||
2712 inst[i].src_type == ELK_REGISTER_TYPE_UQ))
2713 continue;
2714
2715 if (inst[i].opcode == ELK_OPCODE_MOV) {
2716 elk_MOV(p, retype(g0, inst[i].dst_type),
2717 retype(g0, inst[i].src_type));
2718 } else {
2719 assert(inst[i].opcode == ELK_OPCODE_MUL);
2720 elk_MUL(p, retype(g0, inst[i].dst_type),
2721 retype(g0, inst[i].src_type),
2722 retype(zero, inst[i].src_type));
2723 }
2724 elk_inst_set_exec_size(&devinfo, last_inst, inst[i].exec_size);
2725
2726 elk_inst_set_dst_hstride(&devinfo, last_inst, inst[i].dst_stride);
2727
2728 elk_inst_set_src0_vstride(&devinfo, last_inst, inst[i].src_vstride);
2729 elk_inst_set_src0_width(&devinfo, last_inst, inst[i].src_width);
2730 elk_inst_set_src0_hstride(&devinfo, last_inst, inst[i].src_hstride);
2731
2732 elk_inst_set_no_dd_check(&devinfo, last_inst, inst[i].no_dd_check);
2733 elk_inst_set_no_dd_clear(&devinfo, last_inst, inst[i].no_dd_clear);
2734
2735 if (devinfo.platform == INTEL_PLATFORM_CHV) {
2736 EXPECT_EQ(inst[i].expected_result, validate(p));
2737 } else {
2738 EXPECT_TRUE(validate(p));
2739 }
2740
2741 clear_instructions(p);
2742 }
2743 }
2744