xref: /aosp_15_r20/external/mesa3d/src/intel/compiler/brw_eu_validate.c (revision 6104692788411f58d303aa86923a9ff6ecaded22)
1 /*
2  * Copyright © 2015-2019 Intel Corporation
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21  * IN THE SOFTWARE.
22  */
23 
24 /** @file
25  *
26  * This file implements a pass that validates shader assembly.
27  *
28  * The restrictions implemented herein are intended to verify that instructions
29  * in shader assembly do not violate restrictions documented in the graphics
30  * programming reference manuals.
31  *
32  * The restrictions are difficult for humans to quickly verify due to their
33  * complexity and abundance.
34  *
35  * It is critical that this code is thoroughly unit tested because false
36  * results will lead developers astray, which is worse than having no validator
37  * at all. Functional changes to this file without corresponding unit tests (in
38  * test_eu_validate.cpp) will be rejected.
39  */
40 
41 #include <stdlib.h>
42 #include "brw_eu.h"
43 #include "brw_disasm_info.h"
44 
45 /* We're going to do lots of string concatenation, so this should help. */
46 struct string {
47    char *str;
48    size_t len;
49 };
50 
51 static void
cat(struct string * dest,const struct string src)52 cat(struct string *dest, const struct string src)
53 {
54    dest->str = realloc(dest->str, dest->len + src.len + 1);
55    memcpy(dest->str + dest->len, src.str, src.len);
56    dest->str[dest->len + src.len] = '\0';
57    dest->len = dest->len + src.len;
58 }
59 #define CAT(dest, src) cat(&dest, (struct string){src, strlen(src)})
60 
61 static bool
contains(const struct string haystack,const struct string needle)62 contains(const struct string haystack, const struct string needle)
63 {
64    return haystack.str && memmem(haystack.str, haystack.len,
65                                  needle.str, needle.len) != NULL;
66 }
67 #define CONTAINS(haystack, needle) \
68    contains(haystack, (struct string){needle, strlen(needle)})
69 
70 #define error(str)   "\tERROR: " str "\n"
71 #define ERROR_INDENT "\t       "
72 
73 #define ERROR(msg) ERROR_IF(true, msg)
74 #define ERROR_IF(cond, msg)                             \
75    do {                                                 \
76       if ((cond) && !CONTAINS(error_msg, error(msg))) { \
77          CAT(error_msg, error(msg));                    \
78       }                                                 \
79    } while(0)
80 
81 #define CHECK(func, args...)                             \
82    do {                                                  \
83       struct string __msg = func(isa, inst, ##args); \
84       if (__msg.str) {                                   \
85          cat(&error_msg, __msg);                         \
86          free(__msg.str);                                \
87       }                                                  \
88    } while (0)
89 
90 #define STRIDE(stride) (stride != 0 ? 1 << ((stride) - 1) : 0)
91 #define WIDTH(width)   (1 << (width))
92 
93 static bool
inst_is_send(const struct brw_isa_info * isa,const brw_inst * inst)94 inst_is_send(const struct brw_isa_info *isa, const brw_inst *inst)
95 {
96    switch (brw_inst_opcode(isa, inst)) {
97    case BRW_OPCODE_SEND:
98    case BRW_OPCODE_SENDC:
99    case BRW_OPCODE_SENDS:
100    case BRW_OPCODE_SENDSC:
101       return true;
102    default:
103       return false;
104    }
105 }
106 
107 static bool
inst_is_split_send(const struct brw_isa_info * isa,const brw_inst * inst)108 inst_is_split_send(const struct brw_isa_info *isa, const brw_inst *inst)
109 {
110    const struct intel_device_info *devinfo = isa->devinfo;
111 
112    if (devinfo->ver >= 12) {
113       return inst_is_send(isa, inst);
114    } else {
115       switch (brw_inst_opcode(isa, inst)) {
116       case BRW_OPCODE_SENDS:
117       case BRW_OPCODE_SENDSC:
118          return true;
119       default:
120          return false;
121       }
122    }
123 }
124 
125 static unsigned
signed_type(unsigned type)126 signed_type(unsigned type)
127 {
128    return brw_type_is_uint(type) ? (type | BRW_TYPE_BASE_SINT) : type;
129 }
130 
131 static enum brw_reg_type
inst_dst_type(const struct brw_isa_info * isa,const brw_inst * inst)132 inst_dst_type(const struct brw_isa_info *isa, const brw_inst *inst)
133 {
134    const struct intel_device_info *devinfo = isa->devinfo;
135 
136    return (devinfo->ver < 12 || !inst_is_send(isa, inst)) ?
137       brw_inst_dst_type(devinfo, inst) : BRW_TYPE_D;
138 }
139 
140 static bool
inst_is_raw_move(const struct brw_isa_info * isa,const brw_inst * inst)141 inst_is_raw_move(const struct brw_isa_info *isa, const brw_inst *inst)
142 {
143    const struct intel_device_info *devinfo = isa->devinfo;
144 
145    unsigned dst_type = signed_type(inst_dst_type(isa, inst));
146    unsigned src_type = signed_type(brw_inst_src0_type(devinfo, inst));
147 
148    if (brw_inst_src0_reg_file(devinfo, inst) == IMM) {
149       /* FIXME: not strictly true */
150       if (brw_inst_src0_type(devinfo, inst) == BRW_TYPE_VF ||
151           brw_inst_src0_type(devinfo, inst) == BRW_TYPE_UV ||
152           brw_inst_src0_type(devinfo, inst) == BRW_TYPE_V) {
153          return false;
154       }
155    } else if (brw_inst_src0_negate(devinfo, inst) ||
156               brw_inst_src0_abs(devinfo, inst)) {
157       return false;
158    }
159 
160    return brw_inst_opcode(isa, inst) == BRW_OPCODE_MOV &&
161           brw_inst_saturate(devinfo, inst) == 0 &&
162           dst_type == src_type;
163 }
164 
165 static bool
dst_is_null(const struct intel_device_info * devinfo,const brw_inst * inst)166 dst_is_null(const struct intel_device_info *devinfo, const brw_inst *inst)
167 {
168    return brw_inst_dst_reg_file(devinfo, inst) == ARF &&
169           brw_inst_dst_da_reg_nr(devinfo, inst) == BRW_ARF_NULL;
170 }
171 
172 static bool
src0_is_null(const struct intel_device_info * devinfo,const brw_inst * inst)173 src0_is_null(const struct intel_device_info *devinfo, const brw_inst *inst)
174 {
175    return brw_inst_src0_address_mode(devinfo, inst) == BRW_ADDRESS_DIRECT &&
176           brw_inst_src0_reg_file(devinfo, inst) == ARF &&
177           brw_inst_src0_da_reg_nr(devinfo, inst) == BRW_ARF_NULL;
178 }
179 
180 static bool
src1_is_null(const struct intel_device_info * devinfo,const brw_inst * inst)181 src1_is_null(const struct intel_device_info *devinfo, const brw_inst *inst)
182 {
183    return brw_inst_src1_reg_file(devinfo, inst) == ARF &&
184           brw_inst_src1_da_reg_nr(devinfo, inst) == BRW_ARF_NULL;
185 }
186 
187 static bool
src0_is_acc(const struct intel_device_info * devinfo,const brw_inst * inst)188 src0_is_acc(const struct intel_device_info *devinfo, const brw_inst *inst)
189 {
190    return brw_inst_src0_reg_file(devinfo, inst) == ARF &&
191           (brw_inst_src0_da_reg_nr(devinfo, inst) & 0xF0) == BRW_ARF_ACCUMULATOR;
192 }
193 
194 static bool
src1_is_acc(const struct intel_device_info * devinfo,const brw_inst * inst)195 src1_is_acc(const struct intel_device_info *devinfo, const brw_inst *inst)
196 {
197    return brw_inst_src1_reg_file(devinfo, inst) == ARF &&
198           (brw_inst_src1_da_reg_nr(devinfo, inst) & 0xF0) == BRW_ARF_ACCUMULATOR;
199 }
200 
201 static bool
src0_has_scalar_region(const struct intel_device_info * devinfo,const brw_inst * inst)202 src0_has_scalar_region(const struct intel_device_info *devinfo,
203                        const brw_inst *inst)
204 {
205    return brw_inst_src0_vstride(devinfo, inst) == BRW_VERTICAL_STRIDE_0 &&
206           brw_inst_src0_width(devinfo, inst) == BRW_WIDTH_1 &&
207           brw_inst_src0_hstride(devinfo, inst) == BRW_HORIZONTAL_STRIDE_0;
208 }
209 
210 static bool
src1_has_scalar_region(const struct intel_device_info * devinfo,const brw_inst * inst)211 src1_has_scalar_region(const struct intel_device_info *devinfo,
212                        const brw_inst *inst)
213 {
214    return brw_inst_src1_vstride(devinfo, inst) == BRW_VERTICAL_STRIDE_0 &&
215           brw_inst_src1_width(devinfo, inst) == BRW_WIDTH_1 &&
216           brw_inst_src1_hstride(devinfo, inst) == BRW_HORIZONTAL_STRIDE_0;
217 }
218 
219 static struct string
invalid_values(const struct brw_isa_info * isa,const brw_inst * inst)220 invalid_values(const struct brw_isa_info *isa, const brw_inst *inst)
221 {
222    const struct intel_device_info *devinfo = isa->devinfo;
223 
224    unsigned num_sources = brw_num_sources_from_inst(isa, inst);
225    struct string error_msg = { .str = NULL, .len = 0 };
226 
227    switch ((enum brw_execution_size) brw_inst_exec_size(devinfo, inst)) {
228    case BRW_EXECUTE_1:
229    case BRW_EXECUTE_2:
230    case BRW_EXECUTE_4:
231    case BRW_EXECUTE_8:
232    case BRW_EXECUTE_16:
233    case BRW_EXECUTE_32:
234       break;
235    default:
236       ERROR("invalid execution size");
237       break;
238    }
239 
240    if (error_msg.str)
241       return error_msg;
242 
243    if (devinfo->ver >= 12) {
244       unsigned group_size = 1 << brw_inst_exec_size(devinfo, inst);
245       unsigned qtr_ctrl = brw_inst_qtr_control(devinfo, inst);
246       unsigned nib_ctrl =
247          devinfo->ver == 12 ? brw_inst_nib_control(devinfo, inst) : 0;
248 
249       unsigned chan_off = (qtr_ctrl * 2 + nib_ctrl) << 2;
250       ERROR_IF(chan_off % group_size != 0,
251                "The execution size must be a factor of the chosen offset");
252    }
253 
254    if (inst_is_send(isa, inst))
255       return error_msg;
256 
257    if (error_msg.str)
258       return error_msg;
259 
260    if (num_sources == 3) {
261       if (brw_inst_access_mode(devinfo, inst) == BRW_ALIGN_1) {
262          if (devinfo->ver >= 10) {
263             ERROR_IF(brw_inst_3src_a1_dst_type (devinfo, inst) == BRW_TYPE_INVALID ||
264                      brw_inst_3src_a1_src0_type(devinfo, inst) == BRW_TYPE_INVALID ||
265                      brw_inst_3src_a1_src1_type(devinfo, inst) == BRW_TYPE_INVALID ||
266                      brw_inst_3src_a1_src2_type(devinfo, inst) == BRW_TYPE_INVALID,
267                      "invalid register type encoding");
268          } else {
269             ERROR("Align1 mode not allowed on Gen < 10");
270          }
271       } else {
272          ERROR_IF(brw_inst_3src_a16_dst_type(devinfo, inst) == BRW_TYPE_INVALID ||
273                   brw_inst_3src_a16_src_type(devinfo, inst) == BRW_TYPE_INVALID,
274                   "invalid register type encoding");
275       }
276    } else {
277       ERROR_IF(brw_inst_dst_type (devinfo, inst) == BRW_TYPE_INVALID ||
278                (num_sources > 0 &&
279                 brw_inst_src0_type(devinfo, inst) == BRW_TYPE_INVALID) ||
280                (num_sources > 1 &&
281                 brw_inst_src1_type(devinfo, inst) == BRW_TYPE_INVALID),
282                "invalid register type encoding");
283    }
284 
285    return error_msg;
286 }
287 
288 static struct string
sources_not_null(const struct brw_isa_info * isa,const brw_inst * inst)289 sources_not_null(const struct brw_isa_info *isa,
290                  const brw_inst *inst)
291 {
292    const struct intel_device_info *devinfo = isa->devinfo;
293    unsigned num_sources = brw_num_sources_from_inst(isa, inst);
294    struct string error_msg = { .str = NULL, .len = 0 };
295 
296    /* Nothing to test. 3-src instructions can only have GRF sources, and
297     * there's no bit to control the file.
298     */
299    if (num_sources == 3)
300       return (struct string){};
301 
302    /* Nothing to test.  Split sends can only encode a file in sources that are
303     * allowed to be NULL.
304     */
305    if (inst_is_split_send(isa, inst))
306       return (struct string){};
307 
308    if (num_sources >= 1 && brw_inst_opcode(isa, inst) != BRW_OPCODE_SYNC)
309       ERROR_IF(src0_is_null(devinfo, inst), "src0 is null");
310 
311    if (num_sources == 2)
312       ERROR_IF(src1_is_null(devinfo, inst), "src1 is null");
313 
314    return error_msg;
315 }
316 
317 static struct string
alignment_supported(const struct brw_isa_info * isa,const brw_inst * inst)318 alignment_supported(const struct brw_isa_info *isa,
319                     const brw_inst *inst)
320 {
321    const struct intel_device_info *devinfo = isa->devinfo;
322    struct string error_msg = { .str = NULL, .len = 0 };
323 
324    ERROR_IF(devinfo->ver >= 11 && brw_inst_access_mode(devinfo, inst) == BRW_ALIGN_16,
325             "Align16 not supported");
326 
327    return error_msg;
328 }
329 
330 static bool
inst_uses_src_acc(const struct brw_isa_info * isa,const brw_inst * inst)331 inst_uses_src_acc(const struct brw_isa_info *isa,
332                   const brw_inst *inst)
333 {
334    const struct intel_device_info *devinfo = isa->devinfo;
335 
336    /* Check instructions that use implicit accumulator sources */
337    switch (brw_inst_opcode(isa, inst)) {
338    case BRW_OPCODE_MAC:
339    case BRW_OPCODE_MACH:
340       return true;
341    default:
342       break;
343    }
344 
345    /* FIXME: support 3-src instructions */
346    unsigned num_sources = brw_num_sources_from_inst(isa, inst);
347    assert(num_sources < 3);
348 
349    return src0_is_acc(devinfo, inst) || (num_sources > 1 && src1_is_acc(devinfo, inst));
350 }
351 
352 static struct string
send_restrictions(const struct brw_isa_info * isa,const brw_inst * inst)353 send_restrictions(const struct brw_isa_info *isa,
354                   const brw_inst *inst)
355 {
356    const struct intel_device_info *devinfo = isa->devinfo;
357 
358    struct string error_msg = { .str = NULL, .len = 0 };
359 
360    if (inst_is_split_send(isa, inst)) {
361       ERROR_IF(brw_inst_send_src1_reg_file(devinfo, inst) == ARF &&
362                brw_inst_send_src1_reg_nr(devinfo, inst) != BRW_ARF_NULL,
363                "src1 of split send must be a GRF or NULL");
364 
365       ERROR_IF(brw_inst_eot(devinfo, inst) &&
366                brw_inst_src0_da_reg_nr(devinfo, inst) < 112,
367                "send with EOT must use g112-g127");
368       ERROR_IF(brw_inst_eot(devinfo, inst) &&
369                brw_inst_send_src1_reg_file(devinfo, inst) == FIXED_GRF &&
370                brw_inst_send_src1_reg_nr(devinfo, inst) < 112,
371                "send with EOT must use g112-g127");
372 
373       if (brw_inst_send_src0_reg_file(devinfo, inst) == FIXED_GRF &&
374           brw_inst_send_src1_reg_file(devinfo, inst) == FIXED_GRF) {
375          /* Assume minimums if we don't know */
376          unsigned mlen = 1;
377          if (!brw_inst_send_sel_reg32_desc(devinfo, inst)) {
378             const uint32_t desc = brw_inst_send_desc(devinfo, inst);
379             mlen = brw_message_desc_mlen(devinfo, desc) / reg_unit(devinfo);
380          }
381 
382          unsigned ex_mlen = 1;
383          if (!brw_inst_send_sel_reg32_ex_desc(devinfo, inst)) {
384             const uint32_t ex_desc = brw_inst_sends_ex_desc(devinfo, inst);
385             ex_mlen = brw_message_ex_desc_ex_mlen(devinfo, ex_desc) /
386                       reg_unit(devinfo);
387          }
388          const unsigned src0_reg_nr = brw_inst_src0_da_reg_nr(devinfo, inst);
389          const unsigned src1_reg_nr = brw_inst_send_src1_reg_nr(devinfo, inst);
390          ERROR_IF((src0_reg_nr <= src1_reg_nr &&
391                    src1_reg_nr < src0_reg_nr + mlen) ||
392                   (src1_reg_nr <= src0_reg_nr &&
393                    src0_reg_nr < src1_reg_nr + ex_mlen),
394                    "split send payloads must not overlap");
395       }
396    } else if (inst_is_send(isa, inst)) {
397       ERROR_IF(brw_inst_src0_address_mode(devinfo, inst) != BRW_ADDRESS_DIRECT,
398                "send must use direct addressing");
399 
400       ERROR_IF(brw_inst_send_src0_reg_file(devinfo, inst) != FIXED_GRF,
401                "send from non-GRF");
402       ERROR_IF(brw_inst_eot(devinfo, inst) &&
403                brw_inst_src0_da_reg_nr(devinfo, inst) < 112,
404                "send with EOT must use g112-g127");
405 
406       ERROR_IF(!dst_is_null(devinfo, inst) &&
407                (brw_inst_dst_da_reg_nr(devinfo, inst) +
408                 brw_inst_rlen(devinfo, inst) > 127) &&
409                (brw_inst_src0_da_reg_nr(devinfo, inst) +
410                 brw_inst_mlen(devinfo, inst) >
411                 brw_inst_dst_da_reg_nr(devinfo, inst)),
412                "r127 must not be used for return address when there is "
413                "a src and dest overlap");
414    }
415 
416    return error_msg;
417 }
418 
419 static bool
is_unsupported_inst(const struct brw_isa_info * isa,const brw_inst * inst)420 is_unsupported_inst(const struct brw_isa_info *isa,
421                     const brw_inst *inst)
422 {
423    return brw_inst_opcode(isa, inst) == BRW_OPCODE_ILLEGAL;
424 }
425 
426 /**
427  * Returns whether a combination of two types would qualify as mixed float
428  * operation mode
429  */
430 static inline bool
types_are_mixed_float(enum brw_reg_type t0,enum brw_reg_type t1)431 types_are_mixed_float(enum brw_reg_type t0, enum brw_reg_type t1)
432 {
433    return (t0 == BRW_TYPE_F && t1 == BRW_TYPE_HF) ||
434           (t1 == BRW_TYPE_F && t0 == BRW_TYPE_HF);
435 }
436 
437 static enum brw_reg_type
execution_type_for_type(enum brw_reg_type type)438 execution_type_for_type(enum brw_reg_type type)
439 {
440    switch (type) {
441    case BRW_TYPE_DF:
442    case BRW_TYPE_F:
443    case BRW_TYPE_HF:
444       return type;
445 
446    case BRW_TYPE_VF:
447       return BRW_TYPE_F;
448 
449    case BRW_TYPE_Q:
450    case BRW_TYPE_UQ:
451       return BRW_TYPE_Q;
452 
453    case BRW_TYPE_D:
454    case BRW_TYPE_UD:
455       return BRW_TYPE_D;
456 
457    case BRW_TYPE_W:
458    case BRW_TYPE_UW:
459    case BRW_TYPE_B:
460    case BRW_TYPE_UB:
461    case BRW_TYPE_V:
462    case BRW_TYPE_UV:
463       return BRW_TYPE_W;
464    default:
465       unreachable("invalid type");
466    }
467 }
468 
469 /**
470  * Returns the execution type of an instruction \p inst
471  */
472 static enum brw_reg_type
execution_type(const struct brw_isa_info * isa,const brw_inst * inst)473 execution_type(const struct brw_isa_info *isa, const brw_inst *inst)
474 {
475    const struct intel_device_info *devinfo = isa->devinfo;
476 
477    unsigned num_sources = brw_num_sources_from_inst(isa, inst);
478    enum brw_reg_type src0_exec_type, src1_exec_type;
479 
480    /* Execution data type is independent of destination data type, except in
481     * mixed F/HF instructions.
482     */
483    enum brw_reg_type dst_exec_type = inst_dst_type(isa, inst);
484 
485    src0_exec_type = execution_type_for_type(brw_inst_src0_type(devinfo, inst));
486    if (num_sources == 1) {
487       if (src0_exec_type == BRW_TYPE_HF)
488          return dst_exec_type;
489       return src0_exec_type;
490    }
491 
492    src1_exec_type = execution_type_for_type(brw_inst_src1_type(devinfo, inst));
493    if (types_are_mixed_float(src0_exec_type, src1_exec_type) ||
494        types_are_mixed_float(src0_exec_type, dst_exec_type) ||
495        types_are_mixed_float(src1_exec_type, dst_exec_type)) {
496       return BRW_TYPE_F;
497    }
498 
499    if (src0_exec_type == src1_exec_type)
500       return src0_exec_type;
501 
502    if (src0_exec_type == BRW_TYPE_Q ||
503        src1_exec_type == BRW_TYPE_Q)
504       return BRW_TYPE_Q;
505 
506    if (src0_exec_type == BRW_TYPE_D ||
507        src1_exec_type == BRW_TYPE_D)
508       return BRW_TYPE_D;
509 
510    if (src0_exec_type == BRW_TYPE_W ||
511        src1_exec_type == BRW_TYPE_W)
512       return BRW_TYPE_W;
513 
514    if (src0_exec_type == BRW_TYPE_DF ||
515        src1_exec_type == BRW_TYPE_DF)
516       return BRW_TYPE_DF;
517 
518    unreachable("not reached");
519 }
520 
521 /**
522  * Returns whether a region is packed
523  *
524  * A region is packed if its elements are adjacent in memory, with no
525  * intervening space, no overlap, and no replicated values.
526  */
527 static bool
is_packed(unsigned vstride,unsigned width,unsigned hstride)528 is_packed(unsigned vstride, unsigned width, unsigned hstride)
529 {
530    if (vstride == width) {
531       if (vstride == 1) {
532          return hstride == 0;
533       } else {
534          return hstride == 1;
535       }
536    }
537 
538    return false;
539 }
540 
541 /**
542  * Returns whether a region is linear
543  *
544  * A region is linear if its elements do not overlap and are not replicated.
545  * Unlike a packed region, intervening space (i.e. strided values) is allowed.
546  */
547 static bool
is_linear(unsigned vstride,unsigned width,unsigned hstride)548 is_linear(unsigned vstride, unsigned width, unsigned hstride)
549 {
550    return vstride == width * hstride ||
551           (hstride == 0 && width == 1);
552 }
553 
554 /**
555  * Returns whether an instruction is an explicit or implicit conversion
556  * to/from half-float.
557  */
558 static bool
is_half_float_conversion(const struct brw_isa_info * isa,const brw_inst * inst)559 is_half_float_conversion(const struct brw_isa_info *isa,
560                          const brw_inst *inst)
561 {
562    const struct intel_device_info *devinfo = isa->devinfo;
563 
564    enum brw_reg_type dst_type = brw_inst_dst_type(devinfo, inst);
565 
566    unsigned num_sources = brw_num_sources_from_inst(isa, inst);
567    enum brw_reg_type src0_type = brw_inst_src0_type(devinfo, inst);
568 
569    if (dst_type != src0_type &&
570        (dst_type == BRW_TYPE_HF || src0_type == BRW_TYPE_HF)) {
571       return true;
572    } else if (num_sources > 1) {
573       enum brw_reg_type src1_type = brw_inst_src1_type(devinfo, inst);
574       return dst_type != src1_type &&
575             (dst_type == BRW_TYPE_HF ||
576              src1_type == BRW_TYPE_HF);
577    }
578 
579    return false;
580 }
581 
582 /*
583  * Returns whether an instruction is using mixed float operation mode
584  */
585 static bool
is_mixed_float(const struct brw_isa_info * isa,const brw_inst * inst)586 is_mixed_float(const struct brw_isa_info *isa, const brw_inst *inst)
587 {
588    const struct intel_device_info *devinfo = isa->devinfo;
589 
590    if (inst_is_send(isa, inst))
591       return false;
592 
593    unsigned opcode = brw_inst_opcode(isa, inst);
594    const struct opcode_desc *desc = brw_opcode_desc(isa, opcode);
595    if (desc->ndst == 0)
596       return false;
597 
598    /* FIXME: support 3-src instructions */
599    unsigned num_sources = brw_num_sources_from_inst(isa, inst);
600    assert(num_sources < 3);
601 
602    enum brw_reg_type dst_type = brw_inst_dst_type(devinfo, inst);
603    enum brw_reg_type src0_type = brw_inst_src0_type(devinfo, inst);
604 
605    if (num_sources == 1)
606       return types_are_mixed_float(src0_type, dst_type);
607 
608    enum brw_reg_type src1_type = brw_inst_src1_type(devinfo, inst);
609 
610    return types_are_mixed_float(src0_type, src1_type) ||
611           types_are_mixed_float(src0_type, dst_type) ||
612           types_are_mixed_float(src1_type, dst_type);
613 }
614 
615 /**
616  * Returns whether an instruction is an explicit or implicit conversion
617  * to/from byte.
618  */
619 static bool
is_byte_conversion(const struct brw_isa_info * isa,const brw_inst * inst)620 is_byte_conversion(const struct brw_isa_info *isa,
621                    const brw_inst *inst)
622 {
623    const struct intel_device_info *devinfo = isa->devinfo;
624 
625    enum brw_reg_type dst_type = brw_inst_dst_type(devinfo, inst);
626 
627    unsigned num_sources = brw_num_sources_from_inst(isa, inst);
628    enum brw_reg_type src0_type = brw_inst_src0_type(devinfo, inst);
629 
630    if (dst_type != src0_type &&
631        (brw_type_size_bytes(dst_type) == 1 ||
632         brw_type_size_bytes(src0_type) == 1)) {
633       return true;
634    } else if (num_sources > 1) {
635       enum brw_reg_type src1_type = brw_inst_src1_type(devinfo, inst);
636       return dst_type != src1_type &&
637             (brw_type_size_bytes(dst_type) == 1 ||
638              brw_type_size_bytes(src1_type) == 1);
639    }
640 
641    return false;
642 }
643 
644 /**
645  * Checks restrictions listed in "General Restrictions Based on Operand Types"
646  * in the "Register Region Restrictions" section.
647  */
648 static struct string
general_restrictions_based_on_operand_types(const struct brw_isa_info * isa,const brw_inst * inst)649 general_restrictions_based_on_operand_types(const struct brw_isa_info *isa,
650                                             const brw_inst *inst)
651 {
652    const struct intel_device_info *devinfo = isa->devinfo;
653 
654    const struct opcode_desc *desc =
655       brw_opcode_desc(isa, brw_inst_opcode(isa, inst));
656    unsigned num_sources = brw_num_sources_from_inst(isa, inst);
657    unsigned exec_size = 1 << brw_inst_exec_size(devinfo, inst);
658    struct string error_msg = { .str = NULL, .len = 0 };
659 
660    if (inst_is_send(isa, inst))
661       return error_msg;
662 
663    if (devinfo->ver >= 11) {
664       /* A register type of B or UB for DPAS actually means 4 bytes packed into
665        * a D or UD, so it is allowed.
666        */
667       if (num_sources == 3 && brw_inst_opcode(isa, inst) != BRW_OPCODE_DPAS) {
668          ERROR_IF(brw_type_size_bytes(brw_inst_3src_a1_src1_type(devinfo, inst)) == 1 ||
669                   brw_type_size_bytes(brw_inst_3src_a1_src2_type(devinfo, inst)) == 1,
670                   "Byte data type is not supported for src1/2 register regioning. This includes "
671                   "byte broadcast as well.");
672       }
673       if (num_sources == 2) {
674          ERROR_IF(brw_type_size_bytes(brw_inst_src1_type(devinfo, inst)) == 1,
675                   "Byte data type is not supported for src1 register regioning. This includes "
676                   "byte broadcast as well.");
677       }
678    }
679 
680    enum brw_reg_type dst_type;
681 
682    if (num_sources == 3) {
683       if (brw_inst_access_mode(devinfo, inst) == BRW_ALIGN_1)
684          dst_type = brw_inst_3src_a1_dst_type(devinfo, inst);
685       else
686          dst_type = brw_inst_3src_a16_dst_type(devinfo, inst);
687    } else {
688       dst_type = inst_dst_type(isa, inst);
689    }
690 
691    ERROR_IF(dst_type == BRW_TYPE_DF &&
692             !devinfo->has_64bit_float,
693             "64-bit float destination, but platform does not support it");
694 
695    ERROR_IF((dst_type == BRW_TYPE_Q ||
696              dst_type == BRW_TYPE_UQ) &&
697             !devinfo->has_64bit_int,
698             "64-bit int destination, but platform does not support it");
699 
700    for (unsigned s = 0; s < num_sources; s++) {
701       enum brw_reg_type src_type;
702       if (num_sources == 3) {
703          if (brw_inst_access_mode(devinfo, inst) == BRW_ALIGN_1) {
704             switch (s) {
705             case 0: src_type = brw_inst_3src_a1_src0_type(devinfo, inst); break;
706             case 1: src_type = brw_inst_3src_a1_src1_type(devinfo, inst); break;
707             case 2: src_type = brw_inst_3src_a1_src2_type(devinfo, inst); break;
708             default: unreachable("invalid src");
709             }
710          } else {
711             src_type = brw_inst_3src_a16_src_type(devinfo, inst);
712          }
713       } else {
714          switch (s) {
715          case 0: src_type = brw_inst_src0_type(devinfo, inst); break;
716          case 1: src_type = brw_inst_src1_type(devinfo, inst); break;
717          default: unreachable("invalid src");
718          }
719       }
720 
721       ERROR_IF(src_type == BRW_TYPE_DF &&
722                !devinfo->has_64bit_float,
723                "64-bit float source, but platform does not support it");
724 
725       ERROR_IF((src_type == BRW_TYPE_Q ||
726                 src_type == BRW_TYPE_UQ) &&
727                !devinfo->has_64bit_int,
728                "64-bit int source, but platform does not support it");
729       if (brw_inst_access_mode(devinfo, inst) == BRW_ALIGN_16 &&
730           num_sources == 3 && brw_type_size_bytes(src_type) > 4) {
731          /* From the Broadwell PRM, Volume 7 "3D Media GPGPU", page 944:
732           *
733           *    "This is applicable to 32b datatypes and 16b datatype. 64b
734           *    datatypes cannot use the replicate control."
735           */
736          switch (s) {
737          case 0:
738             ERROR_IF(brw_inst_3src_a16_src0_rep_ctrl(devinfo, inst),
739                      "RepCtrl must be zero for 64-bit source 0");
740             break;
741          case 1:
742             ERROR_IF(brw_inst_3src_a16_src1_rep_ctrl(devinfo, inst),
743                      "RepCtrl must be zero for 64-bit source 1");
744             break;
745          case 2:
746             ERROR_IF(brw_inst_3src_a16_src2_rep_ctrl(devinfo, inst),
747                      "RepCtrl must be zero for 64-bit source 2");
748             break;
749          default: unreachable("invalid src");
750          }
751       }
752    }
753 
754    if (num_sources == 3)
755       return error_msg;
756 
757    if (exec_size == 1)
758       return error_msg;
759 
760    if (desc->ndst == 0)
761       return error_msg;
762 
763    if (brw_inst_opcode(isa, inst) == BRW_OPCODE_MATH &&
764        intel_needs_workaround(devinfo, 22016140776)) {
765       /* Wa_22016140776:
766        *
767        *    Scalar broadcast on HF math (packed or unpacked) must not be
768        *    used.  Compiler must use a mov instruction to expand the scalar
769        *    value to a vector before using in a HF (packed or unpacked)
770        *    math operation.
771        */
772       ERROR_IF(brw_inst_src0_type(devinfo, inst) == BRW_TYPE_HF &&
773                src0_has_scalar_region(devinfo, inst),
774                "Scalar broadcast on HF math (packed or unpacked) must not "
775                "be used.");
776 
777       if (num_sources > 1) {
778          ERROR_IF(brw_inst_src1_type(devinfo, inst) == BRW_TYPE_HF &&
779                   src1_has_scalar_region(devinfo, inst),
780                   "Scalar broadcast on HF math (packed or unpacked) must not "
781                   "be used.");
782       }
783    }
784 
785    /* The PRMs say:
786     *
787     *    Where n is the largest element size in bytes for any source or
788     *    destination operand type, ExecSize * n must be <= 64.
789     *
790     * But we do not attempt to enforce it, because it is implied by other
791     * rules:
792     *
793     *    - that the destination stride must match the execution data type
794     *    - sources may not span more than two adjacent GRF registers
795     *    - destination may not span more than two adjacent GRF registers
796     *
797     * In fact, checking it would weaken testing of the other rules.
798     */
799 
800    unsigned dst_stride = STRIDE(brw_inst_dst_hstride(devinfo, inst));
801    bool dst_type_is_byte =
802       inst_dst_type(isa, inst) == BRW_TYPE_B ||
803       inst_dst_type(isa, inst) == BRW_TYPE_UB;
804 
805    if (dst_type_is_byte) {
806       if (is_packed(exec_size * dst_stride, exec_size, dst_stride)) {
807          if (!inst_is_raw_move(isa, inst))
808             ERROR("Only raw MOV supports a packed-byte destination");
809          return error_msg;
810       }
811    }
812 
813    unsigned exec_type = execution_type(isa, inst);
814    unsigned exec_type_size = brw_type_size_bytes(exec_type);
815    unsigned dst_type_size = brw_type_size_bytes(dst_type);
816 
817    if (is_byte_conversion(isa, inst)) {
818       /* From the BDW+ PRM, Volume 2a, Command Reference, Instructions - MOV:
819        *
820        *    "There is no direct conversion from B/UB to DF or DF to B/UB.
821        *     There is no direct conversion from B/UB to Q/UQ or Q/UQ to B/UB."
822        *
823        * Even if these restrictions are listed for the MOV instruction, we
824        * validate this more generally, since there is the possibility
825        * of implicit conversions from other instructions.
826        */
827       enum brw_reg_type src0_type = brw_inst_src0_type(devinfo, inst);
828       enum brw_reg_type src1_type = num_sources > 1 ?
829                                     brw_inst_src1_type(devinfo, inst) : 0;
830 
831       ERROR_IF(brw_type_size_bytes(dst_type) == 1 &&
832                (brw_type_size_bytes(src0_type) == 8 ||
833                 (num_sources > 1 && brw_type_size_bytes(src1_type) == 8)),
834                "There are no direct conversions between 64-bit types and B/UB");
835 
836       ERROR_IF(brw_type_size_bytes(dst_type) == 8 &&
837                (brw_type_size_bytes(src0_type) == 1 ||
838                 (num_sources > 1 && brw_type_size_bytes(src1_type) == 1)),
839                "There are no direct conversions between 64-bit types and B/UB");
840    }
841 
842    if (is_half_float_conversion(isa, inst)) {
843       /**
844        * A helper to validate used in the validation of the following restriction
845        * from the BDW+ PRM, Volume 2a, Command Reference, Instructions - MOV:
846        *
847        *    "There is no direct conversion from HF to DF or DF to HF.
848        *     There is no direct conversion from HF to Q/UQ or Q/UQ to HF."
849        *
850        * Even if these restrictions are listed for the MOV instruction, we
851        * validate this more generally, since there is the possibility
852        * of implicit conversions from other instructions, such us implicit
853        * conversion from integer to HF with the ADD instruction in SKL+.
854        */
855       enum brw_reg_type src0_type = brw_inst_src0_type(devinfo, inst);
856       enum brw_reg_type src1_type = num_sources > 1 ?
857                                     brw_inst_src1_type(devinfo, inst) : 0;
858       ERROR_IF(dst_type == BRW_TYPE_HF &&
859                (brw_type_size_bytes(src0_type) == 8 ||
860                 (num_sources > 1 && brw_type_size_bytes(src1_type) == 8)),
861                "There are no direct conversions between 64-bit types and HF");
862 
863       ERROR_IF(brw_type_size_bytes(dst_type) == 8 &&
864                (src0_type == BRW_TYPE_HF ||
865                 (num_sources > 1 && src1_type == BRW_TYPE_HF)),
866                "There are no direct conversions between 64-bit types and HF");
867 
868       /* From the BDW+ PRM:
869        *
870        *   "Conversion between Integer and HF (Half Float) must be
871        *    DWord-aligned and strided by a DWord on the destination."
872        *
873        * Also, the above restrictions seems to be expanded on CHV and SKL+ by:
874        *
875        *   "There is a relaxed alignment rule for word destinations. When
876        *    the destination type is word (UW, W, HF), destination data types
877        *    can be aligned to either the lowest word or the second lowest
878        *    word of the execution channel. This means the destination data
879        *    words can be either all in the even word locations or all in the
880        *    odd word locations."
881        *
882        * We do not implement the second rule as is though, since empirical
883        * testing shows inconsistencies:
884        *   - It suggests that packed 16-bit is not allowed, which is not true.
885        *   - It suggests that conversions from Q/DF to W (which need to be
886        *     64-bit aligned on the destination) are not possible, which is
887        *     not true.
888        *
889        * So from this rule we only validate the implication that conversions
890        * from F to HF need to be DWord strided (except in Align1 mixed
891        * float mode where packed fp16 destination is allowed so long as the
892        * destination is oword-aligned).
893        *
894        * Finally, we only validate this for Align1 because Align16 always
895        * requires packed destinations, so these restrictions can't possibly
896        * apply to Align16 mode.
897        */
898       if (brw_inst_access_mode(devinfo, inst) == BRW_ALIGN_1) {
899          if ((dst_type == BRW_TYPE_HF &&
900               (brw_type_is_int(src0_type) ||
901                (num_sources > 1 && brw_type_is_int(src1_type)))) ||
902              (brw_type_is_int(dst_type) &&
903               (src0_type == BRW_TYPE_HF ||
904                (num_sources > 1 && src1_type == BRW_TYPE_HF)))) {
905             ERROR_IF(dst_stride * dst_type_size != 4,
906                      "Conversions between integer and half-float must be "
907                      "strided by a DWord on the destination");
908 
909             unsigned subreg = brw_inst_dst_da1_subreg_nr(devinfo, inst);
910             ERROR_IF(subreg % 4 != 0,
911                      "Conversions between integer and half-float must be "
912                      "aligned to a DWord on the destination");
913          } else if (dst_type == BRW_TYPE_HF) {
914             unsigned subreg = brw_inst_dst_da1_subreg_nr(devinfo, inst);
915             ERROR_IF(dst_stride != 2 &&
916                      !(is_mixed_float(isa, inst) &&
917                        dst_stride == 1 && subreg % 16 == 0),
918                      "Conversions to HF must have either all words in even "
919                      "word locations or all words in odd word locations or "
920                      "be mixed-float with Oword-aligned packed destination");
921          }
922       }
923    }
924 
925    /* There are special regioning rules for mixed-float mode in CHV and SKL that
926     * override the general rule for the ratio of sizes of the destination type
927     * and the execution type. We will add validation for those in a later patch.
928     */
929    bool validate_dst_size_and_exec_size_ratio = !is_mixed_float(isa, inst);
930 
931    if (validate_dst_size_and_exec_size_ratio &&
932        exec_type_size > dst_type_size) {
933       if (!(dst_type_is_byte && inst_is_raw_move(isa, inst))) {
934          ERROR_IF(dst_stride * dst_type_size != exec_type_size,
935                   "Destination stride must be equal to the ratio of the sizes "
936                   "of the execution data type to the destination type");
937       }
938 
939       unsigned subreg = brw_inst_dst_da1_subreg_nr(devinfo, inst);
940 
941       if (brw_inst_access_mode(devinfo, inst) == BRW_ALIGN_1 &&
942           brw_inst_dst_address_mode(devinfo, inst) == BRW_ADDRESS_DIRECT) {
943          /* The i965 PRM says:
944           *
945           *    Implementation Restriction: The relaxed alignment rule for byte
946           *    destination (#10.5) is not supported.
947           */
948          if (dst_type_is_byte) {
949             ERROR_IF(subreg % exec_type_size != 0 &&
950                      subreg % exec_type_size != 1,
951                      "Destination subreg must be aligned to the size of the "
952                      "execution data type (or to the next lowest byte for byte "
953                      "destinations)");
954          } else {
955             ERROR_IF(subreg % exec_type_size != 0,
956                      "Destination subreg must be aligned to the size of the "
957                      "execution data type");
958          }
959       }
960    }
961 
962    return error_msg;
963 }
964 
965 /**
966  * Checks restrictions listed in "General Restrictions on Regioning Parameters"
967  * in the "Register Region Restrictions" section.
968  */
969 static struct string
general_restrictions_on_region_parameters(const struct brw_isa_info * isa,const brw_inst * inst)970 general_restrictions_on_region_parameters(const struct brw_isa_info *isa,
971                                           const brw_inst *inst)
972 {
973    const struct intel_device_info *devinfo = isa->devinfo;
974 
975    const struct opcode_desc *desc =
976       brw_opcode_desc(isa, brw_inst_opcode(isa, inst));
977    unsigned num_sources = brw_num_sources_from_inst(isa, inst);
978    unsigned exec_size = 1 << brw_inst_exec_size(devinfo, inst);
979    struct string error_msg = { .str = NULL, .len = 0 };
980 
981    if (num_sources == 3)
982       return (struct string){};
983 
984    /* Split sends don't have the bits in the instruction to encode regions so
985     * there's nothing to check.
986     */
987    if (inst_is_split_send(isa, inst))
988       return (struct string){};
989 
990    if (brw_inst_access_mode(devinfo, inst) == BRW_ALIGN_16) {
991       if (desc->ndst != 0 && !dst_is_null(devinfo, inst))
992          ERROR_IF(brw_inst_dst_hstride(devinfo, inst) != BRW_HORIZONTAL_STRIDE_1,
993                   "Destination Horizontal Stride must be 1");
994 
995       if (num_sources >= 1) {
996          ERROR_IF(brw_inst_src0_reg_file(devinfo, inst) != IMM &&
997                   brw_inst_src0_vstride(devinfo, inst) != BRW_VERTICAL_STRIDE_0 &&
998                   brw_inst_src0_vstride(devinfo, inst) != BRW_VERTICAL_STRIDE_2 &&
999                   brw_inst_src0_vstride(devinfo, inst) != BRW_VERTICAL_STRIDE_4,
1000                   "In Align16 mode, only VertStride of 0, 2, or 4 is allowed");
1001       }
1002 
1003       if (num_sources == 2) {
1004          ERROR_IF(brw_inst_src1_reg_file(devinfo, inst) != IMM &&
1005                   brw_inst_src1_vstride(devinfo, inst) != BRW_VERTICAL_STRIDE_0 &&
1006                   brw_inst_src1_vstride(devinfo, inst) != BRW_VERTICAL_STRIDE_2 &&
1007                   brw_inst_src1_vstride(devinfo, inst) != BRW_VERTICAL_STRIDE_4,
1008                   "In Align16 mode, only VertStride of 0, 2, or 4 is allowed");
1009       }
1010 
1011       return error_msg;
1012    }
1013 
1014    for (unsigned i = 0; i < num_sources; i++) {
1015       unsigned vstride, width, hstride, element_size, subreg;
1016       enum brw_reg_type type;
1017 
1018 #define DO_SRC(n)                                                              \
1019       if (brw_inst_src ## n ## _reg_file(devinfo, inst) ==                     \
1020           IMM)                                                 \
1021          continue;                                                             \
1022                                                                                \
1023       vstride = STRIDE(brw_inst_src ## n ## _vstride(devinfo, inst));          \
1024       width = WIDTH(brw_inst_src ## n ## _width(devinfo, inst));               \
1025       hstride = STRIDE(brw_inst_src ## n ## _hstride(devinfo, inst));          \
1026       type = brw_inst_src ## n ## _type(devinfo, inst);                        \
1027       element_size = brw_type_size_bytes(type);                                \
1028       subreg = brw_inst_src ## n ## _da1_subreg_nr(devinfo, inst)
1029 
1030       if (i == 0) {
1031          DO_SRC(0);
1032       } else {
1033          DO_SRC(1);
1034       }
1035 #undef DO_SRC
1036 
1037       /* ExecSize must be greater than or equal to Width. */
1038       ERROR_IF(exec_size < width, "ExecSize must be greater than or equal "
1039                                   "to Width");
1040 
1041       /* If ExecSize = Width and HorzStride ≠ 0,
1042        * VertStride must be set to Width * HorzStride.
1043        */
1044       if (exec_size == width && hstride != 0) {
1045          ERROR_IF(vstride != width * hstride,
1046                   "If ExecSize = Width and HorzStride ≠ 0, "
1047                   "VertStride must be set to Width * HorzStride");
1048       }
1049 
1050       /* If Width = 1, HorzStride must be 0 regardless of the values of
1051        * ExecSize and VertStride.
1052        */
1053       if (width == 1) {
1054          ERROR_IF(hstride != 0,
1055                   "If Width = 1, HorzStride must be 0 regardless "
1056                   "of the values of ExecSize and VertStride");
1057       }
1058 
1059       /* If ExecSize = Width = 1, both VertStride and HorzStride must be 0. */
1060       if (exec_size == 1 && width == 1) {
1061          ERROR_IF(vstride != 0 || hstride != 0,
1062                   "If ExecSize = Width = 1, both VertStride "
1063                   "and HorzStride must be 0");
1064       }
1065 
1066       /* If VertStride = HorzStride = 0, Width must be 1 regardless of the
1067        * value of ExecSize.
1068        */
1069       if (vstride == 0 && hstride == 0) {
1070          ERROR_IF(width != 1,
1071                   "If VertStride = HorzStride = 0, Width must be "
1072                   "1 regardless of the value of ExecSize");
1073       }
1074 
1075       /* VertStride must be used to cross GRF register boundaries. This rule
1076        * implies that elements within a 'Width' cannot cross GRF boundaries.
1077        */
1078       unsigned rowbase = subreg;
1079       assert(util_is_power_of_two_nonzero(reg_unit(devinfo)));
1080       unsigned grf_size_shift = ffs(REG_SIZE * reg_unit(devinfo)) - 1;
1081 
1082       for (int y = 0; y < exec_size / width; y++) {
1083          bool spans_grfs = false;
1084          unsigned offset = rowbase;
1085          unsigned first_grf = offset >> grf_size_shift;
1086 
1087          for (int x = 0; x < width; x++) {
1088             const unsigned end_byte = offset + (element_size - 1);
1089             const unsigned end_grf = end_byte >> grf_size_shift;
1090             spans_grfs = end_grf != first_grf;
1091             if (spans_grfs)
1092                break;
1093             offset += hstride * element_size;
1094          }
1095 
1096          rowbase += vstride * element_size;
1097 
1098          if (spans_grfs) {
1099             ERROR("VertStride must be used to cross GRF register boundaries");
1100             break;
1101          }
1102       }
1103    }
1104 
1105    /* Dst.HorzStride must not be 0. */
1106    if (desc->ndst != 0 && !dst_is_null(devinfo, inst)) {
1107       ERROR_IF(brw_inst_dst_hstride(devinfo, inst) == BRW_HORIZONTAL_STRIDE_0,
1108                "Destination Horizontal Stride must not be 0");
1109    }
1110 
1111    return error_msg;
1112 }
1113 
1114 static struct string
special_restrictions_for_mixed_float_mode(const struct brw_isa_info * isa,const brw_inst * inst)1115 special_restrictions_for_mixed_float_mode(const struct brw_isa_info *isa,
1116                                           const brw_inst *inst)
1117 {
1118    const struct intel_device_info *devinfo = isa->devinfo;
1119 
1120    struct string error_msg = { .str = NULL, .len = 0 };
1121 
1122    const unsigned opcode = brw_inst_opcode(isa, inst);
1123    const unsigned num_sources = brw_num_sources_from_inst(isa, inst);
1124    if (num_sources >= 3)
1125       return error_msg;
1126 
1127    if (!is_mixed_float(isa, inst))
1128       return error_msg;
1129 
1130    unsigned exec_size = 1 << brw_inst_exec_size(devinfo, inst);
1131    bool is_align16 = brw_inst_access_mode(devinfo, inst) == BRW_ALIGN_16;
1132 
1133    enum brw_reg_type src0_type = brw_inst_src0_type(devinfo, inst);
1134    enum brw_reg_type src1_type = num_sources > 1 ?
1135                                  brw_inst_src1_type(devinfo, inst) : 0;
1136    enum brw_reg_type dst_type = brw_inst_dst_type(devinfo, inst);
1137 
1138    unsigned dst_stride = STRIDE(brw_inst_dst_hstride(devinfo, inst));
1139    bool dst_is_packed = is_packed(exec_size * dst_stride, exec_size, dst_stride);
1140 
1141    /* From the SKL PRM, Special Restrictions for Handling Mixed Mode
1142     * Float Operations:
1143     *
1144     *    "Indirect addressing on source is not supported when source and
1145     *     destination data types are mixed float."
1146     */
1147    ERROR_IF(brw_inst_src0_address_mode(devinfo, inst) != BRW_ADDRESS_DIRECT ||
1148             (num_sources > 1 &&
1149              brw_inst_src1_address_mode(devinfo, inst) != BRW_ADDRESS_DIRECT),
1150             "Indirect addressing on source is not supported when source and "
1151             "destination data types are mixed float");
1152 
1153    /* From the SKL PRM, Special Restrictions for Handling Mixed Mode
1154     * Float Operations:
1155     *
1156     *    "No SIMD16 in mixed mode when destination is f32. Instruction
1157     *     execution size must be no more than 8."
1158     */
1159    ERROR_IF(exec_size > 8 && devinfo->ver < 20 &&
1160             dst_type == BRW_TYPE_F &&
1161             opcode != BRW_OPCODE_MOV,
1162             "Mixed float mode with 32-bit float destination is limited "
1163             "to SIMD8");
1164 
1165    if (is_align16) {
1166       /* From the SKL PRM, Special Restrictions for Handling Mixed Mode
1167        * Float Operations:
1168        *
1169        *   "In Align16 mode, when half float and float data types are mixed
1170        *    between source operands OR between source and destination operands,
1171        *    the register content are assumed to be packed."
1172        *
1173        * Since Align16 doesn't have a concept of horizontal stride (or width),
1174        * it means that vertical stride must always be 4, since 0 and 2 would
1175        * lead to replicated data, and any other value is disallowed in Align16.
1176        */
1177       ERROR_IF(brw_inst_src0_vstride(devinfo, inst) != BRW_VERTICAL_STRIDE_4,
1178                "Align16 mixed float mode assumes packed data (vstride must be 4");
1179 
1180       ERROR_IF(num_sources >= 2 &&
1181                brw_inst_src1_vstride(devinfo, inst) != BRW_VERTICAL_STRIDE_4,
1182                "Align16 mixed float mode assumes packed data (vstride must be 4");
1183 
1184       /* From the SKL PRM, Special Restrictions for Handling Mixed Mode
1185        * Float Operations:
1186        *
1187        *   "For Align16 mixed mode, both input and output packed f16 data
1188        *    must be oword aligned, no oword crossing in packed f16."
1189        *
1190        * The previous rule requires that Align16 operands are always packed,
1191        * and since there is only one bit for Align16 subnr, which represents
1192        * offsets 0B and 16B, this rule is always enforced and we don't need to
1193        * validate it.
1194        */
1195 
1196       /* From the SKL PRM, Special Restrictions for Handling Mixed Mode
1197        * Float Operations:
1198        *
1199        *    "No SIMD16 in mixed mode when destination is packed f16 for both
1200        *     Align1 and Align16."
1201        *
1202        * And:
1203        *
1204        *   "In Align16 mode, when half float and float data types are mixed
1205        *    between source operands OR between source and destination operands,
1206        *    the register content are assumed to be packed."
1207        *
1208        * Which implies that SIMD16 is not available in Align16. This is further
1209        * confirmed by:
1210        *
1211        *    "For Align16 mixed mode, both input and output packed f16 data
1212        *     must be oword aligned, no oword crossing in packed f16"
1213        *
1214        * Since oword-aligned packed f16 data would cross oword boundaries when
1215        * the execution size is larger than 8.
1216        */
1217       ERROR_IF(exec_size > 8, "Align16 mixed float mode is limited to SIMD8");
1218 
1219       /* From the SKL PRM, Special Restrictions for Handling Mixed Mode
1220        * Float Operations:
1221        *
1222        *    "No accumulator read access for Align16 mixed float."
1223        */
1224       ERROR_IF(inst_uses_src_acc(isa, inst),
1225                "No accumulator read access for Align16 mixed float");
1226    } else {
1227       assert(!is_align16);
1228 
1229       /* From the SKL PRM, Special Restrictions for Handling Mixed Mode
1230        * Float Operations:
1231        *
1232        *    "No SIMD16 in mixed mode when destination is packed f16 for both
1233        *     Align1 and Align16."
1234        */
1235       ERROR_IF(exec_size > 8 && dst_is_packed &&
1236                dst_type == BRW_TYPE_HF &&
1237                opcode != BRW_OPCODE_MOV,
1238                "Align1 mixed float mode is limited to SIMD8 when destination "
1239                "is packed half-float");
1240 
1241       /* From the SKL PRM, Special Restrictions for Handling Mixed Mode
1242        * Float Operations:
1243        *
1244        *    "Math operations for mixed mode:
1245        *     - In Align1, f16 inputs need to be strided"
1246        */
1247       if (opcode == BRW_OPCODE_MATH) {
1248          if (src0_type == BRW_TYPE_HF) {
1249             ERROR_IF(STRIDE(brw_inst_src0_hstride(devinfo, inst)) <= 1,
1250                      "Align1 mixed mode math needs strided half-float inputs");
1251          }
1252 
1253          if (num_sources >= 2 && src1_type == BRW_TYPE_HF) {
1254             ERROR_IF(STRIDE(brw_inst_src1_hstride(devinfo, inst)) <= 1,
1255                      "Align1 mixed mode math needs strided half-float inputs");
1256          }
1257       }
1258 
1259       if (dst_type == BRW_TYPE_HF && dst_stride == 1) {
1260          /* From the SKL PRM, Special Restrictions for Handling Mixed Mode
1261           * Float Operations:
1262           *
1263           *    "In Align1, destination stride can be smaller than execution
1264           *     type. When destination is stride of 1, 16 bit packed data is
1265           *     updated on the destination. However, output packed f16 data
1266           *     must be oword aligned, no oword crossing in packed f16."
1267           *
1268           * The requirement of not crossing oword boundaries for 16-bit oword
1269           * aligned data means that execution size is limited to 8.
1270           */
1271          unsigned subreg;
1272          if (brw_inst_dst_address_mode(devinfo, inst) == BRW_ADDRESS_DIRECT)
1273             subreg = brw_inst_dst_da1_subreg_nr(devinfo, inst);
1274          else
1275             subreg = brw_inst_dst_ia_subreg_nr(devinfo, inst);
1276          ERROR_IF(subreg % 16 != 0,
1277                   "Align1 mixed mode packed half-float output must be "
1278                   "oword aligned");
1279          ERROR_IF(exec_size > 8,
1280                   "Align1 mixed mode packed half-float output must not "
1281                   "cross oword boundaries (max exec size is 8)");
1282 
1283          /* From the SKL PRM, Special Restrictions for Handling Mixed Mode
1284           * Float Operations:
1285           *
1286           *    "When source is float or half float from accumulator register and
1287           *     destination is half float with a stride of 1, the source must
1288           *     register aligned. i.e., source must have offset zero."
1289           *
1290           * Align16 mixed float mode doesn't allow accumulator access on sources,
1291           * so we only need to check this for Align1.
1292           */
1293          if (src0_is_acc(devinfo, inst) &&
1294              (src0_type == BRW_TYPE_F ||
1295               src0_type == BRW_TYPE_HF)) {
1296             ERROR_IF(brw_inst_src0_da1_subreg_nr(devinfo, inst) != 0,
1297                      "Mixed float mode requires register-aligned accumulator "
1298                      "source reads when destination is packed half-float");
1299 
1300          }
1301 
1302          if (num_sources > 1 &&
1303              src1_is_acc(devinfo, inst) &&
1304              (src1_type == BRW_TYPE_F ||
1305               src1_type == BRW_TYPE_HF)) {
1306             ERROR_IF(brw_inst_src1_da1_subreg_nr(devinfo, inst) != 0,
1307                      "Mixed float mode requires register-aligned accumulator "
1308                      "source reads when destination is packed half-float");
1309          }
1310       }
1311 
1312       /* From the SKL PRM, Special Restrictions for Handling Mixed Mode
1313        * Float Operations:
1314        *
1315        *    "No swizzle is allowed when an accumulator is used as an implicit
1316        *     source or an explicit source in an instruction. i.e. when
1317        *     destination is half float with an implicit accumulator source,
1318        *     destination stride needs to be 2."
1319        *
1320        * FIXME: it is not quite clear what the first sentence actually means
1321        *        or its link to the implication described after it, so we only
1322        *        validate the explicit implication, which is clearly described.
1323        */
1324       if (dst_type == BRW_TYPE_HF &&
1325           inst_uses_src_acc(isa, inst)) {
1326          ERROR_IF(dst_stride != 2,
1327                   "Mixed float mode with implicit/explicit accumulator "
1328                   "source and half-float destination requires a stride "
1329                   "of 2 on the destination");
1330       }
1331    }
1332 
1333    return error_msg;
1334 }
1335 
1336 /**
1337  * Creates a \p grf_access_mask for an \p exec_size, \p element_size, and a
1338  * region
1339  *
1340  * A \p grf_access_mask is a 32-element array of uint8_t, where each uint8_t
1341  * is a bitmask of grfs accessed by the region.
1342  *
1343  * For instance the access mask of the source gX.1<4,2,2>F in an exec_size = 4
1344  * instruction would be
1345  *
1346  *    access_mask[0] = 0x01 (bytes 7-4 of the 1st grf)
1347  *    access_mask[1] = 0x01 (bytes 15-12 of the 1st grf)
1348  *    access_mask[2] = 0x01 (bytes 23-20 of the 1st grf)
1349  *    access_mask[3] = 0x01 (bytes 31-28 of the 1st grf)
1350  *    access_mask[4-31] = 0
1351  *
1352  * Before Xe2, gX<1,1,0>F in an exec_size == 16 would yield:
1353  *
1354  *    access_mask[0] = 0x01 (bytes 3-0 of the 1st grf)
1355  *    access_mask[1] = 0x01 (bytes 7-4 of the 1st grf)
1356  *      ...
1357  *    access_mask[7] = 0x01 (bytes 31-28 of the 1st grf)
1358  *    access_mask[8] = 0x02 (bytes 3-0 of the 2nd grf)
1359  *      ...
1360  *    access_mask[15] = 0x02 (bytes 31-28 of the 2nd grf)
1361  *    access_mask[16-31] = 0
1362  *
1363  * Whereas on Xe2, gX<1,1,0>F in an exec_size of 16 would yield:
1364  *
1365  *    access_mask[0] = 0x01 (bytes 3-0 of the 1st grf)
1366  *    access_mask[1] = 0x01 (bytes 7-4 of the 1st grf)
1367  *      ...
1368  *    access_mask[7] = 0x01 (bytes 31-28 of the 1st grf)
1369  *    access_mask[8] = 0x01 (bytes 35-32 of the 1st grf)
1370  *      ...
1371  *    access_mask[15] = 0x01 (bytes 63-60 of the 1st grf)
1372  *    access_mask[4-31] = 0
1373  *
1374  */
1375 static void
grfs_accessed(const struct intel_device_info * devinfo,uint8_t grf_access_mask[static32],unsigned exec_size,unsigned element_size,unsigned subreg,unsigned vstride,unsigned width,unsigned hstride)1376 grfs_accessed(const struct intel_device_info *devinfo,
1377               uint8_t grf_access_mask[static 32],
1378               unsigned exec_size, unsigned element_size, unsigned subreg,
1379               unsigned vstride, unsigned width, unsigned hstride)
1380 {
1381    unsigned rowbase = subreg;
1382    unsigned element = 0;
1383    assert(util_is_power_of_two_nonzero(reg_unit(devinfo)));
1384    unsigned grf_size_shift = (5 - 1) + ffs(reg_unit(devinfo));
1385 
1386    for (int y = 0; y < exec_size / width; y++) {
1387       unsigned offset = rowbase;
1388 
1389       for (int x = 0; x < width; x++) {
1390          const unsigned start_grf = (offset >> grf_size_shift) % 8;
1391          const unsigned end_byte = offset + (element_size - 1);
1392          const unsigned end_grf = (end_byte >> grf_size_shift) % 8;
1393          grf_access_mask[element++] = (1 << start_grf) | (1 << end_grf);
1394          offset += hstride * element_size;
1395       }
1396 
1397       rowbase += vstride * element_size;
1398    }
1399 
1400    assert(element == 0 || element == exec_size);
1401 }
1402 
1403 /**
1404  * Returns the number of registers accessed according to the \p access_mask
1405  */
1406 static int
registers_read(const uint8_t grfs_accessed[static32])1407 registers_read(const uint8_t grfs_accessed[static 32])
1408 {
1409    uint8_t all_read = 0;
1410 
1411    for (unsigned i = 0; i < 32; i++)
1412       all_read |= grfs_accessed[i];
1413 
1414    return util_bitcount(all_read);
1415 }
1416 
1417 /**
1418  * Checks restrictions listed in "Region Alignment Rules" in the "Register
1419  * Region Restrictions" section.
1420  */
1421 static struct string
region_alignment_rules(const struct brw_isa_info * isa,const brw_inst * inst)1422 region_alignment_rules(const struct brw_isa_info *isa,
1423                        const brw_inst *inst)
1424 {
1425    const struct intel_device_info *devinfo = isa->devinfo;
1426    const struct opcode_desc *desc =
1427       brw_opcode_desc(isa, brw_inst_opcode(isa, inst));
1428    unsigned num_sources = brw_num_sources_from_inst(isa, inst);
1429    unsigned exec_size = 1 << brw_inst_exec_size(devinfo, inst);
1430    uint8_t dst_access_mask[32], src0_access_mask[32], src1_access_mask[32];
1431    struct string error_msg = { .str = NULL, .len = 0 };
1432 
1433    if (num_sources == 3)
1434       return (struct string){};
1435 
1436    if (brw_inst_access_mode(devinfo, inst) == BRW_ALIGN_16)
1437       return (struct string){};
1438 
1439    if (inst_is_send(isa, inst))
1440       return (struct string){};
1441 
1442    memset(dst_access_mask, 0, sizeof(dst_access_mask));
1443    memset(src0_access_mask, 0, sizeof(src0_access_mask));
1444    memset(src1_access_mask, 0, sizeof(src1_access_mask));
1445 
1446    for (unsigned i = 0; i < num_sources; i++) {
1447       unsigned vstride, width, hstride, element_size, subreg;
1448       enum brw_reg_type type;
1449 
1450       /* In Direct Addressing mode, a source cannot span more than 2 adjacent
1451        * GRF registers.
1452        */
1453 
1454 #define DO_SRC(n)                                                              \
1455       if (brw_inst_src ## n ## _address_mode(devinfo, inst) !=                 \
1456           BRW_ADDRESS_DIRECT)                                                  \
1457          continue;                                                             \
1458                                                                                \
1459       if (brw_inst_src ## n ## _reg_file(devinfo, inst) ==                     \
1460           IMM)                                                 \
1461          continue;                                                             \
1462                                                                                \
1463       vstride = STRIDE(brw_inst_src ## n ## _vstride(devinfo, inst));          \
1464       width = WIDTH(brw_inst_src ## n ## _width(devinfo, inst));               \
1465       hstride = STRIDE(brw_inst_src ## n ## _hstride(devinfo, inst));          \
1466       type = brw_inst_src ## n ## _type(devinfo, inst);                        \
1467       element_size = brw_type_size_bytes(type);                                \
1468       subreg = brw_inst_src ## n ## _da1_subreg_nr(devinfo, inst);             \
1469       grfs_accessed(devinfo, src ## n ## _access_mask,                         \
1470                     exec_size, element_size, subreg,                           \
1471                     vstride, width, hstride)
1472 
1473       if (i == 0) {
1474          DO_SRC(0);
1475       } else {
1476          DO_SRC(1);
1477       }
1478 #undef DO_SRC
1479 
1480       unsigned num_vstride = exec_size / width;
1481       unsigned num_hstride = width;
1482       unsigned vstride_elements = (num_vstride - 1) * vstride;
1483       unsigned hstride_elements = (num_hstride - 1) * hstride;
1484       unsigned offset = (vstride_elements + hstride_elements) * element_size +
1485                         subreg;
1486       ERROR_IF(offset >= 64 * reg_unit(devinfo),
1487                "A source cannot span more than 2 adjacent GRF registers");
1488    }
1489 
1490    if (desc->ndst == 0 || dst_is_null(devinfo, inst))
1491       return error_msg;
1492 
1493    unsigned stride = STRIDE(brw_inst_dst_hstride(devinfo, inst));
1494    enum brw_reg_type dst_type = inst_dst_type(isa, inst);
1495    unsigned element_size = brw_type_size_bytes(dst_type);
1496    unsigned subreg = brw_inst_dst_da1_subreg_nr(devinfo, inst);
1497    unsigned offset = ((exec_size - 1) * stride * element_size) + subreg;
1498    ERROR_IF(offset >= 64 * reg_unit(devinfo),
1499             "A destination cannot span more than 2 adjacent GRF registers");
1500 
1501    if (error_msg.str)
1502       return error_msg;
1503 
1504    grfs_accessed(devinfo, dst_access_mask, exec_size, element_size, subreg,
1505                  exec_size == 1 ? 0 : exec_size * stride,
1506                  exec_size == 1 ? 1 : exec_size,
1507                  exec_size == 1 ? 0 : stride);
1508 
1509    unsigned dst_regs = registers_read(dst_access_mask);
1510 
1511    /* The SKL PRM says:
1512     *
1513     *    When destination of MATH instruction spans two registers, the
1514     *    destination elements must be evenly split between the two registers.
1515     *
1516     * It is not known whether this restriction applies to KBL other Gens after
1517     * SKL.
1518     */
1519    if (brw_inst_opcode(isa, inst) == BRW_OPCODE_MATH) {
1520       if (dst_regs == 2) {
1521          unsigned upper_reg_writes = 0, lower_reg_writes = 0;
1522 
1523          for (unsigned i = 0; i < exec_size; i++) {
1524             if (dst_access_mask[i] == 2) {
1525                upper_reg_writes++;
1526             } else {
1527                assert(dst_access_mask[i] == 1);
1528                lower_reg_writes++;
1529             }
1530          }
1531 
1532          ERROR_IF(upper_reg_writes != lower_reg_writes,
1533                   "Writes must be evenly split between the two "
1534                   "destination registers");
1535       }
1536    }
1537 
1538    return error_msg;
1539 }
1540 
1541 static struct string
vector_immediate_restrictions(const struct brw_isa_info * isa,const brw_inst * inst)1542 vector_immediate_restrictions(const struct brw_isa_info *isa,
1543                               const brw_inst *inst)
1544 {
1545    const struct intel_device_info *devinfo = isa->devinfo;
1546 
1547    unsigned num_sources = brw_num_sources_from_inst(isa, inst);
1548    struct string error_msg = { .str = NULL, .len = 0 };
1549 
1550    if (num_sources == 3 || num_sources == 0 ||
1551        (devinfo->ver >= 12 && inst_is_send(isa, inst)))
1552       return (struct string){};
1553 
1554    unsigned file = num_sources == 1 ?
1555                    brw_inst_src0_reg_file(devinfo, inst) :
1556                    brw_inst_src1_reg_file(devinfo, inst);
1557    if (file != IMM)
1558       return (struct string){};
1559 
1560    enum brw_reg_type dst_type = inst_dst_type(isa, inst);
1561    unsigned dst_type_size = brw_type_size_bytes(dst_type);
1562    unsigned dst_subreg = brw_inst_access_mode(devinfo, inst) == BRW_ALIGN_1 ?
1563                          brw_inst_dst_da1_subreg_nr(devinfo, inst) : 0;
1564    unsigned dst_stride = STRIDE(brw_inst_dst_hstride(devinfo, inst));
1565    enum brw_reg_type type = num_sources == 1 ?
1566                             brw_inst_src0_type(devinfo, inst) :
1567                             brw_inst_src1_type(devinfo, inst);
1568 
1569    /* The PRMs say:
1570     *
1571     *    When an immediate vector is used in an instruction, the destination
1572     *    must be 128-bit aligned with destination horizontal stride equivalent
1573     *    to a word for an immediate integer vector (v) and equivalent to a
1574     *    DWord for an immediate float vector (vf).
1575     *
1576     * The text has not been updated for the addition of the immediate unsigned
1577     * integer vector type (uv) on SNB, but presumably the same restriction
1578     * applies.
1579     */
1580    switch (type) {
1581    case BRW_TYPE_V:
1582    case BRW_TYPE_UV:
1583    case BRW_TYPE_VF:
1584       ERROR_IF(dst_subreg % (128 / 8) != 0,
1585                "Destination must be 128-bit aligned in order to use immediate "
1586                "vector types");
1587 
1588       if (type == BRW_TYPE_VF) {
1589          ERROR_IF(dst_type_size * dst_stride != 4,
1590                   "Destination must have stride equivalent to dword in order "
1591                   "to use the VF type");
1592       } else {
1593          ERROR_IF(dst_type_size * dst_stride != 2,
1594                   "Destination must have stride equivalent to word in order "
1595                   "to use the V or UV type");
1596       }
1597       break;
1598    default:
1599       break;
1600    }
1601 
1602    return error_msg;
1603 }
1604 
1605 static struct string
special_requirements_for_handling_double_precision_data_types(const struct brw_isa_info * isa,const brw_inst * inst)1606 special_requirements_for_handling_double_precision_data_types(
1607                                        const struct brw_isa_info *isa,
1608                                        const brw_inst *inst)
1609 {
1610    const struct intel_device_info *devinfo = isa->devinfo;
1611 
1612    unsigned num_sources = brw_num_sources_from_inst(isa, inst);
1613    struct string error_msg = { .str = NULL, .len = 0 };
1614 
1615    if (num_sources == 3 || num_sources == 0)
1616       return (struct string){};
1617 
1618    /* Split sends don't have types so there's no doubles there. */
1619    if (inst_is_split_send(isa, inst))
1620       return (struct string){};
1621 
1622    enum brw_reg_type exec_type = execution_type(isa, inst);
1623    unsigned exec_type_size = brw_type_size_bytes(exec_type);
1624 
1625    enum brw_reg_file dst_file = brw_inst_dst_reg_file(devinfo, inst);
1626    enum brw_reg_type dst_type = inst_dst_type(isa, inst);
1627    unsigned dst_type_size = brw_type_size_bytes(dst_type);
1628    unsigned dst_hstride = STRIDE(brw_inst_dst_hstride(devinfo, inst));
1629    unsigned dst_reg = brw_inst_dst_da_reg_nr(devinfo, inst);
1630    unsigned dst_subreg = brw_inst_dst_da1_subreg_nr(devinfo, inst);
1631    unsigned dst_address_mode = brw_inst_dst_address_mode(devinfo, inst);
1632 
1633    bool is_integer_dword_multiply =
1634       brw_inst_opcode(isa, inst) == BRW_OPCODE_MUL &&
1635       (brw_inst_src0_type(devinfo, inst) == BRW_TYPE_D ||
1636        brw_inst_src0_type(devinfo, inst) == BRW_TYPE_UD) &&
1637       (brw_inst_src1_type(devinfo, inst) == BRW_TYPE_D ||
1638        brw_inst_src1_type(devinfo, inst) == BRW_TYPE_UD);
1639 
1640    const bool is_double_precision =
1641       dst_type_size == 8 || exec_type_size == 8 || is_integer_dword_multiply;
1642 
1643    for (unsigned i = 0; i < num_sources; i++) {
1644       unsigned vstride, width, hstride, type_size, reg, subreg, address_mode;
1645       bool is_scalar_region;
1646       enum brw_reg_file file;
1647       enum brw_reg_type type;
1648 
1649 #define DO_SRC(n)                                                              \
1650       if (brw_inst_src ## n ## _reg_file(devinfo, inst) ==                     \
1651           IMM)                                                 \
1652          continue;                                                             \
1653                                                                                \
1654       is_scalar_region = src ## n ## _has_scalar_region(devinfo, inst);        \
1655       vstride = STRIDE(brw_inst_src ## n ## _vstride(devinfo, inst));          \
1656       width = WIDTH(brw_inst_src ## n ## _width(devinfo, inst));               \
1657       hstride = STRIDE(brw_inst_src ## n ## _hstride(devinfo, inst));          \
1658       file = brw_inst_src ## n ## _reg_file(devinfo, inst);                    \
1659       type = brw_inst_src ## n ## _type(devinfo, inst);                        \
1660       type_size = brw_type_size_bytes(type);                                   \
1661       reg = brw_inst_src ## n ## _da_reg_nr(devinfo, inst);                    \
1662       subreg = brw_inst_src ## n ## _da1_subreg_nr(devinfo, inst);             \
1663       address_mode = brw_inst_src ## n ## _address_mode(devinfo, inst)
1664 
1665       if (i == 0) {
1666          DO_SRC(0);
1667       } else {
1668          DO_SRC(1);
1669       }
1670 #undef DO_SRC
1671 
1672       const unsigned src_stride = (hstride ? hstride : vstride) * type_size;
1673       const unsigned dst_stride = dst_hstride * dst_type_size;
1674 
1675       /* The PRMs say that for CHV, BXT:
1676        *
1677        *    When source or destination datatype is 64b or operation is integer
1678        *    DWord multiply, regioning in Align1 must follow these rules:
1679        *
1680        *    1. Source and Destination horizontal stride must be aligned to the
1681        *       same qword.
1682        *    2. Regioning must ensure Src.Vstride = Src.Width * Src.Hstride.
1683        *    3. Source and Destination offset must be the same, except the case
1684        *       of scalar source.
1685        *
1686        * We assume that the restriction applies to GLK as well.
1687        */
1688       if (is_double_precision &&
1689           brw_inst_access_mode(devinfo, inst) == BRW_ALIGN_1 &&
1690           intel_device_info_is_9lp(devinfo)) {
1691          ERROR_IF(!is_scalar_region &&
1692                   (src_stride % 8 != 0 ||
1693                    dst_stride % 8 != 0 ||
1694                    src_stride != dst_stride),
1695                   "Source and destination horizontal stride must equal and a "
1696                   "multiple of a qword when the execution type is 64-bit");
1697 
1698          ERROR_IF(vstride != width * hstride,
1699                   "Vstride must be Width * Hstride when the execution type is "
1700                   "64-bit");
1701 
1702          ERROR_IF(!is_scalar_region && dst_subreg != subreg,
1703                   "Source and destination offset must be the same when the "
1704                   "execution type is 64-bit");
1705       }
1706 
1707       /* The PRMs say that for CHV, BXT:
1708        *
1709        *    When source or destination datatype is 64b or operation is integer
1710        *    DWord multiply, indirect addressing must not be used.
1711        *
1712        * We assume that the restriction applies to GLK as well.
1713        */
1714       if (is_double_precision &&
1715           intel_device_info_is_9lp(devinfo)) {
1716          ERROR_IF(BRW_ADDRESS_REGISTER_INDIRECT_REGISTER == address_mode ||
1717                   BRW_ADDRESS_REGISTER_INDIRECT_REGISTER == dst_address_mode,
1718                   "Indirect addressing is not allowed when the execution type "
1719                   "is 64-bit");
1720       }
1721 
1722       /* The PRMs say that for CHV, BXT:
1723        *
1724        *    ARF registers must never be used with 64b datatype or when
1725        *    operation is integer DWord multiply.
1726        *
1727        * We assume that the restriction applies to GLK as well.
1728        *
1729        * We assume that the restriction does not apply to the null register.
1730        */
1731       if (is_double_precision &&
1732           intel_device_info_is_9lp(devinfo)) {
1733          ERROR_IF(brw_inst_opcode(isa, inst) == BRW_OPCODE_MAC ||
1734                   brw_inst_acc_wr_control(devinfo, inst) ||
1735                   (ARF == file &&
1736                    reg != BRW_ARF_NULL) ||
1737                   (ARF == dst_file &&
1738                    dst_reg != BRW_ARF_NULL),
1739                   "Architecture registers cannot be used when the execution "
1740                   "type is 64-bit");
1741       }
1742 
1743       /* From the hardware spec section "Register Region Restrictions":
1744        *
1745        * There are two rules:
1746        *
1747        * "In case of all floating point data types used in destination:" and
1748        *
1749        * "In case where source or destination datatype is 64b or operation is
1750        *  integer DWord multiply:"
1751        *
1752        * both of which list the same restrictions:
1753        *
1754        *  "1. Register Regioning patterns where register data bit location
1755        *      of the LSB of the channels are changed between source and
1756        *      destination are not supported on Src0 and Src1 except for
1757        *      broadcast of a scalar.
1758        *
1759        *   2. Explicit ARF registers except null and accumulator must not be
1760        *      used."
1761        */
1762       if (devinfo->verx10 >= 125 &&
1763           (brw_type_is_float(dst_type) ||
1764            is_double_precision)) {
1765          ERROR_IF(!is_scalar_region &&
1766                   BRW_ADDRESS_REGISTER_INDIRECT_REGISTER != address_mode &&
1767                   (!is_linear(vstride, width, hstride) ||
1768                    src_stride != dst_stride ||
1769                    subreg != dst_subreg),
1770                   "Register Regioning patterns where register data bit "
1771                   "location of the LSB of the channels are changed between "
1772                   "source and destination are not supported except for "
1773                   "broadcast of a scalar.");
1774 
1775          ERROR_IF((address_mode == BRW_ADDRESS_DIRECT && file == ARF &&
1776                    reg != BRW_ARF_NULL && !(reg >= BRW_ARF_ACCUMULATOR && reg < BRW_ARF_FLAG)) ||
1777                   (dst_file == ARF &&
1778                    dst_reg != BRW_ARF_NULL && (dst_reg & 0xF0) != BRW_ARF_ACCUMULATOR),
1779                   "Explicit ARF registers except null and accumulator must not "
1780                   "be used.");
1781       }
1782 
1783       /* From the hardware spec section "Register Region Restrictions":
1784        *
1785        * "Vx1 and VxH indirect addressing for Float, Half-Float, Double-Float and
1786        *  Quad-Word data must not be used."
1787        */
1788       if (devinfo->verx10 >= 125 &&
1789           (brw_type_is_float(type) || brw_type_size_bytes(type) == 8)) {
1790          ERROR_IF(address_mode == BRW_ADDRESS_REGISTER_INDIRECT_REGISTER &&
1791                   vstride == BRW_VERTICAL_STRIDE_ONE_DIMENSIONAL,
1792                   "Vx1 and VxH indirect addressing for Float, Half-Float, "
1793                   "Double-Float and Quad-Word data must not be used");
1794       }
1795    }
1796 
1797    /* The PRMs say that for BDW, SKL:
1798     *
1799     *    If Align16 is required for an operation with QW destination and non-QW
1800     *    source datatypes, the execution size cannot exceed 2.
1801     *
1802     * We assume that the restriction applies to all Gfx8+ parts.
1803     */
1804    if (is_double_precision) {
1805       enum brw_reg_type src0_type = brw_inst_src0_type(devinfo, inst);
1806       enum brw_reg_type src1_type =
1807          num_sources > 1 ? brw_inst_src1_type(devinfo, inst) : src0_type;
1808       unsigned src0_type_size = brw_type_size_bytes(src0_type);
1809       unsigned src1_type_size = brw_type_size_bytes(src1_type);
1810 
1811       ERROR_IF(brw_inst_access_mode(devinfo, inst) == BRW_ALIGN_16 &&
1812                dst_type_size == 8 &&
1813                (src0_type_size != 8 || src1_type_size != 8) &&
1814                brw_inst_exec_size(devinfo, inst) > BRW_EXECUTE_2,
1815                "In Align16 exec size cannot exceed 2 with a QWord destination "
1816                "and a non-QWord source");
1817    }
1818 
1819    /* The PRMs say that for CHV, BXT:
1820     *
1821     *    When source or destination datatype is 64b or operation is integer
1822     *    DWord multiply, DepCtrl must not be used.
1823     *
1824     * We assume that the restriction applies to GLK as well.
1825     */
1826    if (is_double_precision &&
1827        intel_device_info_is_9lp(devinfo)) {
1828       ERROR_IF(brw_inst_no_dd_check(devinfo, inst) ||
1829                brw_inst_no_dd_clear(devinfo, inst),
1830                "DepCtrl is not allowed when the execution type is 64-bit");
1831    }
1832 
1833    return error_msg;
1834 }
1835 
1836 static struct string
instruction_restrictions(const struct brw_isa_info * isa,const brw_inst * inst)1837 instruction_restrictions(const struct brw_isa_info *isa,
1838                          const brw_inst *inst)
1839 {
1840    const struct intel_device_info *devinfo = isa->devinfo;
1841    struct string error_msg = { .str = NULL, .len = 0 };
1842 
1843    /* From Wa_1604601757:
1844     *
1845     * "When multiplying a DW and any lower precision integer, source modifier
1846     *  is not supported."
1847     */
1848    if (devinfo->ver >= 12 &&
1849        brw_inst_opcode(isa, inst) == BRW_OPCODE_MUL) {
1850       enum brw_reg_type exec_type = execution_type(isa, inst);
1851       const bool src0_valid =
1852          brw_type_size_bytes(brw_inst_src0_type(devinfo, inst)) == 4 ||
1853          brw_inst_src0_reg_file(devinfo, inst) == IMM ||
1854          !(brw_inst_src0_negate(devinfo, inst) ||
1855            brw_inst_src0_abs(devinfo, inst));
1856       const bool src1_valid =
1857          brw_type_size_bytes(brw_inst_src1_type(devinfo, inst)) == 4 ||
1858          brw_inst_src1_reg_file(devinfo, inst) == IMM ||
1859          !(brw_inst_src1_negate(devinfo, inst) ||
1860            brw_inst_src1_abs(devinfo, inst));
1861 
1862       ERROR_IF(!brw_type_is_float(exec_type) &&
1863                brw_type_size_bytes(exec_type) == 4 &&
1864                !(src0_valid && src1_valid),
1865                "When multiplying a DW and any lower precision integer, source "
1866                "modifier is not supported.");
1867    }
1868 
1869    if (brw_inst_opcode(isa, inst) == BRW_OPCODE_CMP ||
1870        brw_inst_opcode(isa, inst) == BRW_OPCODE_CMPN) {
1871       ERROR_IF(brw_inst_cond_modifier(devinfo, inst) == BRW_CONDITIONAL_NONE,
1872                "CMP (or CMPN) must have a condition.");
1873    }
1874 
1875    if (brw_inst_opcode(isa, inst) == BRW_OPCODE_SEL) {
1876       ERROR_IF((brw_inst_cond_modifier(devinfo, inst) != BRW_CONDITIONAL_NONE) ==
1877                (brw_inst_pred_control(devinfo, inst) != BRW_PREDICATE_NONE),
1878                "SEL must either be predicated or have a condition modifiers");
1879    }
1880 
1881    if (brw_inst_opcode(isa, inst) == BRW_OPCODE_MUL) {
1882       const enum brw_reg_type src0_type = brw_inst_src0_type(devinfo, inst);
1883       const enum brw_reg_type src1_type = brw_inst_src1_type(devinfo, inst);
1884       const enum brw_reg_type dst_type = inst_dst_type(isa, inst);
1885 
1886       /* Page 966 (page 982 of the PDF) of Broadwell PRM volume 2a says:
1887        *
1888        *    When multiplying a DW and any lower precision integer, the DW
1889        *    operand must on src0.
1890        *
1891        * Ivy Bridge, Haswell, Skylake, and Ice Lake PRMs contain the same
1892        * text.
1893        */
1894       ERROR_IF(brw_type_is_int(src1_type) &&
1895                brw_type_size_bytes(src0_type) < 4 &&
1896                brw_type_size_bytes(src1_type) == 4,
1897                "When multiplying a DW and any lower precision integer, the "
1898                "DW operand must be src0.");
1899 
1900       /* Page 971 (page 987 of the PDF), section "Accumulator
1901        * Restrictions," of the Broadwell PRM volume 7 says:
1902        *
1903        *    Integer source operands cannot be accumulators.
1904        *
1905        * The Skylake and Ice Lake PRMs contain the same text.
1906        */
1907       ERROR_IF((src0_is_acc(devinfo, inst) &&
1908                 brw_type_is_int(src0_type)) ||
1909                (src1_is_acc(devinfo, inst) &&
1910                 brw_type_is_int(src1_type)),
1911                "Integer source operands cannot be accumulators.");
1912 
1913       /* Page 935 (page 951 of the PDF) of the Ice Lake PRM volume 2a says:
1914        *
1915        *    When multiplying integer data types, if one of the sources is a
1916        *    DW, the resulting full precision data is stored in the
1917        *    accumulator. However, if the destination data type is either W or
1918        *    DW, the low bits of the result are written to the destination
1919        *    register and the remaining high bits are discarded. This results
1920        *    in undefined Overflow and Sign flags. Therefore, conditional
1921        *    modifiers and saturation (.sat) cannot be used in this case.
1922        *
1923        * Similar text appears in every version of the PRM.
1924        *
1925        * The wording of the last sentence is not very clear.  It could either
1926        * be interpreted as "conditional modifiers combined with saturation
1927        * cannot be used" or "neither conditional modifiers nor saturation can
1928        * be used."  I have interpreted it as the latter primarily because that
1929        * is the more restrictive interpretation.
1930        */
1931       ERROR_IF((src0_type == BRW_TYPE_UD ||
1932                 src0_type == BRW_TYPE_D ||
1933                 src1_type == BRW_TYPE_UD ||
1934                 src1_type == BRW_TYPE_D) &&
1935                (dst_type == BRW_TYPE_UD ||
1936                 dst_type == BRW_TYPE_D ||
1937                 dst_type == BRW_TYPE_UW ||
1938                 dst_type == BRW_TYPE_W) &&
1939                (brw_inst_saturate(devinfo, inst) != 0 ||
1940                 brw_inst_cond_modifier(devinfo, inst) != BRW_CONDITIONAL_NONE),
1941                "Neither Saturate nor conditional modifier allowed with DW "
1942                "integer multiply.");
1943    }
1944 
1945    if (brw_inst_opcode(isa, inst) == BRW_OPCODE_MATH) {
1946       unsigned math_function = brw_inst_math_function(devinfo, inst);
1947       switch (math_function) {
1948       case BRW_MATH_FUNCTION_INT_DIV_QUOTIENT_AND_REMAINDER:
1949       case BRW_MATH_FUNCTION_INT_DIV_QUOTIENT:
1950       case BRW_MATH_FUNCTION_INT_DIV_REMAINDER: {
1951          /* Page 442 of the Broadwell PRM Volume 2a "Extended Math Function" says:
1952           *    INT DIV function does not support source modifiers.
1953           * Bspec 6647 extends it back to Ivy Bridge.
1954           */
1955          bool src0_valid = !brw_inst_src0_negate(devinfo, inst) &&
1956                            !brw_inst_src0_abs(devinfo, inst);
1957          bool src1_valid = !brw_inst_src1_negate(devinfo, inst) &&
1958                            !brw_inst_src1_abs(devinfo, inst);
1959          ERROR_IF(!src0_valid || !src1_valid,
1960                   "INT DIV function does not support source modifiers.");
1961          break;
1962       }
1963       default:
1964          break;
1965       }
1966    }
1967 
1968    if (brw_inst_opcode(isa, inst) == BRW_OPCODE_DP4A) {
1969       /* Page 396 (page 412 of the PDF) of the DG1 PRM volume 2a says:
1970        *
1971        *    Only one of src0 or src1 operand may be an the (sic) accumulator
1972        *    register (acc#).
1973        */
1974       ERROR_IF(src0_is_acc(devinfo, inst) && src1_is_acc(devinfo, inst),
1975                "Only one of src0 or src1 operand may be an accumulator "
1976                "register (acc#).");
1977 
1978    }
1979 
1980    if (brw_inst_opcode(isa, inst) == BRW_OPCODE_ADD3) {
1981       const enum brw_reg_type dst_type = inst_dst_type(isa, inst);
1982 
1983       ERROR_IF(dst_type != BRW_TYPE_D &&
1984                dst_type != BRW_TYPE_UD &&
1985                dst_type != BRW_TYPE_W &&
1986                dst_type != BRW_TYPE_UW,
1987                "Destination must be integer D, UD, W, or UW type.");
1988 
1989       for (unsigned i = 0; i < 3; i++) {
1990          enum brw_reg_type src_type;
1991 
1992          switch (i) {
1993          case 0: src_type = brw_inst_3src_a1_src0_type(devinfo, inst); break;
1994          case 1: src_type = brw_inst_3src_a1_src1_type(devinfo, inst); break;
1995          case 2: src_type = brw_inst_3src_a1_src2_type(devinfo, inst); break;
1996          default: unreachable("invalid src");
1997          }
1998 
1999          ERROR_IF(src_type != BRW_TYPE_D &&
2000                   src_type != BRW_TYPE_UD &&
2001                   src_type != BRW_TYPE_W &&
2002                   src_type != BRW_TYPE_UW,
2003                   "Source must be integer D, UD, W, or UW type.");
2004 
2005          if (i == 0) {
2006             if (brw_inst_3src_a1_src0_is_imm(devinfo, inst)) {
2007                ERROR_IF(src_type != BRW_TYPE_W &&
2008                         src_type != BRW_TYPE_UW,
2009                         "Immediate source must be integer W or UW type.");
2010             }
2011          } else if (i == 2) {
2012             if (brw_inst_3src_a1_src2_is_imm(devinfo, inst)) {
2013                ERROR_IF(src_type != BRW_TYPE_W &&
2014                         src_type != BRW_TYPE_UW,
2015                         "Immediate source must be integer W or UW type.");
2016             }
2017          }
2018       }
2019    }
2020 
2021    if (brw_inst_opcode(isa, inst) == BRW_OPCODE_OR ||
2022        brw_inst_opcode(isa, inst) == BRW_OPCODE_AND ||
2023        brw_inst_opcode(isa, inst) == BRW_OPCODE_XOR ||
2024        brw_inst_opcode(isa, inst) == BRW_OPCODE_NOT) {
2025       /* While the behavior of the negate source modifier is defined as
2026        * logical not, the behavior of abs source modifier is not
2027        * defined. Disallow it to be safe.
2028        */
2029       ERROR_IF(brw_inst_src0_abs(devinfo, inst),
2030                "Behavior of abs source modifier in logic ops is undefined.");
2031       ERROR_IF(brw_inst_opcode(isa, inst) != BRW_OPCODE_NOT &&
2032                brw_inst_src1_reg_file(devinfo, inst) != IMM &&
2033                brw_inst_src1_abs(devinfo, inst),
2034                "Behavior of abs source modifier in logic ops is undefined.");
2035 
2036       /* Page 479 (page 495 of the PDF) of the Broadwell PRM volume 2a says:
2037        *
2038        *    Source modifier is not allowed if source is an accumulator.
2039        *
2040        * The same text also appears for OR, NOT, and XOR instructions.
2041        */
2042       ERROR_IF((brw_inst_src0_abs(devinfo, inst) ||
2043                 brw_inst_src0_negate(devinfo, inst)) &&
2044                src0_is_acc(devinfo, inst),
2045                "Source modifier is not allowed if source is an accumulator.");
2046       ERROR_IF(brw_num_sources_from_inst(isa, inst) > 1 &&
2047                (brw_inst_src1_abs(devinfo, inst) ||
2048                 brw_inst_src1_negate(devinfo, inst)) &&
2049                src1_is_acc(devinfo, inst),
2050                "Source modifier is not allowed if source is an accumulator.");
2051 
2052       /* Page 479 (page 495 of the PDF) of the Broadwell PRM volume 2a says:
2053        *
2054        *    This operation does not produce sign or overflow conditions. Only
2055        *    the .e/.z or .ne/.nz conditional modifiers should be used.
2056        *
2057        * The same text also appears for OR, NOT, and XOR instructions.
2058        *
2059        * Per the comment around nir_op_imod in brw_fs_nir.cpp, we have
2060        * determined this to not be true. The only conditions that seem
2061        * absolutely sketchy are O, R, and U.  Some OpenGL shaders from Doom
2062        * 2016 have been observed to generate and.g and operate correctly.
2063        */
2064       const enum brw_conditional_mod cmod =
2065          brw_inst_cond_modifier(devinfo, inst);
2066       ERROR_IF(cmod == BRW_CONDITIONAL_O ||
2067                cmod == BRW_CONDITIONAL_R ||
2068                cmod == BRW_CONDITIONAL_U,
2069                "O, R, and U conditional modifiers should not be used.");
2070    }
2071 
2072    if (brw_inst_opcode(isa, inst) == BRW_OPCODE_BFI2) {
2073       ERROR_IF(brw_inst_cond_modifier(devinfo, inst) != BRW_CONDITIONAL_NONE,
2074                "BFI2 cannot have conditional modifier");
2075 
2076       ERROR_IF(brw_inst_saturate(devinfo, inst),
2077                "BFI2 cannot have saturate modifier");
2078 
2079       enum brw_reg_type dst_type;
2080 
2081       if (brw_inst_access_mode(devinfo, inst) == BRW_ALIGN_1)
2082          dst_type = brw_inst_3src_a1_dst_type(devinfo, inst);
2083       else
2084          dst_type = brw_inst_3src_a16_dst_type(devinfo, inst);
2085 
2086       ERROR_IF(dst_type != BRW_TYPE_D &&
2087                dst_type != BRW_TYPE_UD,
2088                "BFI2 destination type must be D or UD");
2089 
2090       for (unsigned s = 0; s < 3; s++) {
2091          enum brw_reg_type src_type;
2092 
2093          if (brw_inst_access_mode(devinfo, inst) == BRW_ALIGN_1) {
2094             switch (s) {
2095             case 0: src_type = brw_inst_3src_a1_src0_type(devinfo, inst); break;
2096             case 1: src_type = brw_inst_3src_a1_src1_type(devinfo, inst); break;
2097             case 2: src_type = brw_inst_3src_a1_src2_type(devinfo, inst); break;
2098             default: unreachable("invalid src");
2099             }
2100          } else {
2101             src_type = brw_inst_3src_a16_src_type(devinfo, inst);
2102          }
2103 
2104          ERROR_IF(src_type != dst_type,
2105                   "BFI2 source type must match destination type");
2106       }
2107    }
2108 
2109    if (brw_inst_opcode(isa, inst) == BRW_OPCODE_CSEL) {
2110       ERROR_IF(brw_inst_pred_control(devinfo, inst) != BRW_PREDICATE_NONE,
2111                "CSEL cannot be predicated");
2112 
2113       /* CSEL is CMP and SEL fused into one. The condition modifier, which
2114        * does not actually modify the flags, controls the built-in comparison.
2115        */
2116       ERROR_IF(brw_inst_cond_modifier(devinfo, inst) == BRW_CONDITIONAL_NONE,
2117                "CSEL must have a condition.");
2118 
2119       enum brw_reg_type dst_type;
2120 
2121       if (brw_inst_access_mode(devinfo, inst) == BRW_ALIGN_1)
2122          dst_type = brw_inst_3src_a1_dst_type(devinfo, inst);
2123       else
2124          dst_type = brw_inst_3src_a16_dst_type(devinfo, inst);
2125 
2126       if (devinfo->ver == 9) {
2127          ERROR_IF(dst_type != BRW_TYPE_F,
2128                   "CSEL destination type must be F");
2129       } else {
2130          ERROR_IF(dst_type != BRW_TYPE_F &&
2131                   dst_type != BRW_TYPE_HF &&
2132                   dst_type != BRW_TYPE_D &&
2133                   dst_type != BRW_TYPE_W &&
2134                   dst_type != BRW_TYPE_UD &&
2135                   dst_type != BRW_TYPE_UW,
2136                   "CSEL destination type must be F, HF, *D, or *W");
2137       }
2138 
2139       for (unsigned s = 0; s < 3; s++) {
2140          enum brw_reg_type src_type;
2141 
2142          if (brw_inst_access_mode(devinfo, inst) == BRW_ALIGN_1) {
2143             switch (s) {
2144             case 0: src_type = brw_inst_3src_a1_src0_type(devinfo, inst); break;
2145             case 1: src_type = brw_inst_3src_a1_src1_type(devinfo, inst); break;
2146             case 2: src_type = brw_inst_3src_a1_src2_type(devinfo, inst); break;
2147             default: unreachable("invalid src");
2148             }
2149          } else {
2150             src_type = brw_inst_3src_a16_src_type(devinfo, inst);
2151          }
2152 
2153          if (devinfo->ver == 9) {
2154             ERROR_IF(src_type != BRW_TYPE_F,
2155                      "CSEL source type must be F");
2156          } else {
2157             ERROR_IF(src_type != BRW_TYPE_F && src_type != BRW_TYPE_HF &&
2158                      src_type != BRW_TYPE_D && src_type != BRW_TYPE_UD &&
2159                      src_type != BRW_TYPE_W && src_type != BRW_TYPE_UW,
2160                      "CSEL source type must be F, HF, *D, or *W");
2161 
2162             ERROR_IF(brw_type_is_float(src_type) != brw_type_is_float(dst_type),
2163                      "CSEL cannot mix float and integer types.");
2164 
2165             ERROR_IF(brw_type_size_bytes(src_type) !=
2166                      brw_type_size_bytes(dst_type),
2167                      "CSEL cannot mix different type sizes.");
2168          }
2169       }
2170    }
2171 
2172    if (brw_inst_opcode(isa, inst) == BRW_OPCODE_DPAS) {
2173       ERROR_IF(brw_inst_dpas_3src_sdepth(devinfo, inst) != BRW_SYSTOLIC_DEPTH_8,
2174                "Systolic depth must be 8.");
2175 
2176       const unsigned sdepth = 8;
2177 
2178       const enum brw_reg_type dst_type =
2179          brw_inst_dpas_3src_dst_type(devinfo, inst);
2180       const enum brw_reg_type src0_type =
2181          brw_inst_dpas_3src_src0_type(devinfo, inst);
2182       const enum brw_reg_type src1_type =
2183          brw_inst_dpas_3src_src1_type(devinfo, inst);
2184       const enum brw_reg_type src2_type =
2185          brw_inst_dpas_3src_src2_type(devinfo, inst);
2186 
2187       const enum gfx12_sub_byte_precision src1_sub_byte =
2188          brw_inst_dpas_3src_src1_subbyte(devinfo, inst);
2189 
2190       if (src1_type != BRW_TYPE_B && src1_type != BRW_TYPE_UB) {
2191          ERROR_IF(src1_sub_byte != BRW_SUB_BYTE_PRECISION_NONE,
2192                   "Sub-byte precision must be None for source type larger than Byte.");
2193       } else {
2194          ERROR_IF(src1_sub_byte != BRW_SUB_BYTE_PRECISION_NONE &&
2195                   src1_sub_byte != BRW_SUB_BYTE_PRECISION_4BIT &&
2196                   src1_sub_byte != BRW_SUB_BYTE_PRECISION_2BIT,
2197                   "Invalid sub-byte precision.");
2198       }
2199 
2200       const enum gfx12_sub_byte_precision src2_sub_byte =
2201          brw_inst_dpas_3src_src2_subbyte(devinfo, inst);
2202 
2203       if (src2_type != BRW_TYPE_B && src2_type != BRW_TYPE_UB) {
2204          ERROR_IF(src2_sub_byte != BRW_SUB_BYTE_PRECISION_NONE,
2205                   "Sub-byte precision must be None.");
2206       } else {
2207          ERROR_IF(src2_sub_byte != BRW_SUB_BYTE_PRECISION_NONE &&
2208                   src2_sub_byte != BRW_SUB_BYTE_PRECISION_4BIT &&
2209                   src2_sub_byte != BRW_SUB_BYTE_PRECISION_2BIT,
2210                   "Invalid sub-byte precision.");
2211       }
2212 
2213       const unsigned src1_bits_per_element =
2214          brw_type_size_bits(src1_type) >>
2215          brw_inst_dpas_3src_src1_subbyte(devinfo, inst);
2216 
2217       const unsigned src2_bits_per_element =
2218          brw_type_size_bits(src2_type) >>
2219          brw_inst_dpas_3src_src2_subbyte(devinfo, inst);
2220 
2221       /* The MAX2(1, ...) is just to prevent possible division by 0 later. */
2222       const unsigned ops_per_chan =
2223          MAX2(1, 32 / MAX2(src1_bits_per_element, src2_bits_per_element));
2224 
2225       if (devinfo->ver < 20) {
2226          ERROR_IF(brw_inst_exec_size(devinfo, inst) != BRW_EXECUTE_8,
2227                   "DPAS execution size must be 8.");
2228       } else {
2229          ERROR_IF(brw_inst_exec_size(devinfo, inst) != BRW_EXECUTE_16,
2230                   "DPAS execution size must be 16.");
2231       }
2232 
2233       const unsigned exec_size = devinfo->ver < 20 ? 8 : 16;
2234 
2235       const unsigned dst_subnr  = brw_inst_dpas_3src_dst_subreg_nr(devinfo, inst);
2236       const unsigned src0_subnr = brw_inst_dpas_3src_src0_subreg_nr(devinfo, inst);
2237       const unsigned src1_subnr = brw_inst_dpas_3src_src1_subreg_nr(devinfo, inst);
2238       const unsigned src2_subnr = brw_inst_dpas_3src_src2_subreg_nr(devinfo, inst);
2239 
2240       /* Until HF is supported as dst type, this is effectively subnr == 0. */
2241       ERROR_IF(dst_subnr % exec_size != 0,
2242                "Destination subregister offset must be a multiple of ExecSize.");
2243 
2244       /* Until HF is supported as src0 type, this is effectively subnr == 0. */
2245       ERROR_IF(src0_subnr % exec_size != 0,
2246                "Src0 subregister offset must be a multiple of ExecSize.");
2247 
2248       ERROR_IF(src1_subnr != 0,
2249                "Src1 subregister offsets must be 0.");
2250 
2251       /* In nearly all cases, this effectively requires that src2.subnr be
2252        * 0. It is only when src1 is 8 bits and src2 is 2 or 4 bits that the
2253        * ops_per_chan value can allow non-zero src2.subnr.
2254        */
2255       ERROR_IF(src2_subnr % (sdepth * ops_per_chan) != 0,
2256                "Src2 subregister offset must be a multiple of SystolicDepth "
2257                "times OPS_PER_CHAN.");
2258 
2259       ERROR_IF(dst_subnr * brw_type_size_bytes(dst_type) >= REG_SIZE,
2260                "Destination subregister specifies next register.");
2261 
2262       ERROR_IF(src0_subnr * brw_type_size_bytes(src0_type) >= REG_SIZE,
2263                "Src0 subregister specifies next register.");
2264 
2265       ERROR_IF((src1_subnr * brw_type_size_bytes(src1_type) * src1_bits_per_element) / 8 >= REG_SIZE,
2266                "Src1 subregister specifies next register.");
2267 
2268       ERROR_IF((src2_subnr * brw_type_size_bytes(src2_type) * src2_bits_per_element) / 8 >= REG_SIZE,
2269                "Src2 subregister specifies next register.");
2270 
2271       if (brw_inst_3src_atomic_control(devinfo, inst)) {
2272          /* FINISHME: When we start emitting DPAS with Atomic set, figure out
2273           * a way to validate it. Also add a test in test_eu_validate.cpp.
2274           */
2275          ERROR_IF(true,
2276                   "When instruction option Atomic is used it must be follwed by a "
2277                   "DPAS instruction.");
2278       }
2279 
2280       if (brw_inst_dpas_3src_exec_type(devinfo, inst) ==
2281           BRW_ALIGN1_3SRC_EXEC_TYPE_FLOAT) {
2282          ERROR_IF(dst_type != BRW_TYPE_F,
2283                   "DPAS destination type must be F.");
2284          ERROR_IF(src0_type != BRW_TYPE_F,
2285                   "DPAS src0 type must be F.");
2286          ERROR_IF(src1_type != BRW_TYPE_HF,
2287                   "DPAS src1 type must be HF.");
2288          ERROR_IF(src2_type != BRW_TYPE_HF,
2289                   "DPAS src2 type must be HF.");
2290       } else {
2291          ERROR_IF(dst_type != BRW_TYPE_D &&
2292                   dst_type != BRW_TYPE_UD,
2293                   "DPAS destination type must be D or UD.");
2294          ERROR_IF(src0_type != BRW_TYPE_D &&
2295                   src0_type != BRW_TYPE_UD,
2296                   "DPAS src0 type must be D or UD.");
2297          ERROR_IF(src1_type != BRW_TYPE_B &&
2298                   src1_type != BRW_TYPE_UB,
2299                   "DPAS src1 base type must be B or UB.");
2300          ERROR_IF(src2_type != BRW_TYPE_B &&
2301                   src2_type != BRW_TYPE_UB,
2302                   "DPAS src2 base type must be B or UB.");
2303 
2304          if (brw_type_is_uint(dst_type)) {
2305             ERROR_IF(!brw_type_is_uint(src0_type) ||
2306                      !brw_type_is_uint(src1_type) ||
2307                      !brw_type_is_uint(src2_type),
2308                      "If any source datatype is signed, destination datatype "
2309                      "must be signed.");
2310          }
2311       }
2312 
2313       /* FINISHME: Additional restrictions mentioned in the Bspec that are not
2314        * yet enforced here:
2315        *
2316        *    - General Accumulator registers access is not supported. This is
2317        *      currently enforced in brw_dpas_three_src (brw_eu_emit.c).
2318        *
2319        *    - Given any combination of datatypes in the sources of a DPAS
2320        *      instructions, the boundaries of a register should not be crossed.
2321        */
2322    }
2323 
2324    return error_msg;
2325 }
2326 
2327 static struct string
send_descriptor_restrictions(const struct brw_isa_info * isa,const brw_inst * inst)2328 send_descriptor_restrictions(const struct brw_isa_info *isa,
2329                              const brw_inst *inst)
2330 {
2331    const struct intel_device_info *devinfo = isa->devinfo;
2332    struct string error_msg = { .str = NULL, .len = 0 };
2333 
2334    if (inst_is_split_send(isa, inst)) {
2335       /* We can only validate immediate descriptors */
2336       if (brw_inst_send_sel_reg32_desc(devinfo, inst))
2337          return error_msg;
2338    } else if (inst_is_send(isa, inst)) {
2339       /* We can only validate immediate descriptors */
2340       if (brw_inst_src1_reg_file(devinfo, inst) != IMM)
2341          return error_msg;
2342    } else {
2343       return error_msg;
2344    }
2345 
2346    const uint32_t desc = brw_inst_send_desc(devinfo, inst);
2347 
2348    switch (brw_inst_sfid(devinfo, inst)) {
2349    case BRW_SFID_URB:
2350       if (devinfo->ver < 20)
2351          break;
2352       FALLTHROUGH;
2353    case GFX12_SFID_TGM:
2354    case GFX12_SFID_SLM:
2355    case GFX12_SFID_UGM:
2356       ERROR_IF(!devinfo->has_lsc, "Platform does not support LSC");
2357 
2358       ERROR_IF(lsc_opcode_has_transpose(lsc_msg_desc_opcode(devinfo, desc)) &&
2359                lsc_msg_desc_transpose(devinfo, desc) &&
2360                brw_inst_exec_size(devinfo, inst) != BRW_EXECUTE_1,
2361                "Transposed vectors are restricted to Exec_Mask = 1.");
2362       break;
2363 
2364    default:
2365       break;
2366    }
2367 
2368    if (brw_inst_sfid(devinfo, inst) == BRW_SFID_URB && devinfo->ver < 20) {
2369       ERROR_IF(!brw_inst_header_present(devinfo, inst),
2370                "Header must be present for all URB messages.");
2371 
2372       switch (brw_inst_urb_opcode(devinfo, inst)) {
2373       case GFX7_URB_OPCODE_ATOMIC_INC:
2374       case GFX7_URB_OPCODE_ATOMIC_MOV:
2375       case GFX8_URB_OPCODE_ATOMIC_ADD:
2376       case GFX8_URB_OPCODE_SIMD8_WRITE:
2377          break;
2378 
2379       case GFX8_URB_OPCODE_SIMD8_READ:
2380          ERROR_IF(brw_inst_rlen(devinfo, inst) == 0,
2381                   "URB SIMD8 read message must read some data.");
2382          break;
2383 
2384       case GFX125_URB_OPCODE_FENCE:
2385          ERROR_IF(devinfo->verx10 < 125,
2386                   "URB fence message only valid on gfx >= 12.5");
2387          break;
2388 
2389       default:
2390          ERROR_IF(true, "Invalid URB message");
2391          break;
2392       }
2393    }
2394 
2395    return error_msg;
2396 }
2397 
2398 bool
brw_validate_instruction(const struct brw_isa_info * isa,const brw_inst * inst,int offset,unsigned inst_size,struct disasm_info * disasm)2399 brw_validate_instruction(const struct brw_isa_info *isa,
2400                          const brw_inst *inst, int offset,
2401                          unsigned inst_size,
2402                          struct disasm_info *disasm)
2403 {
2404    struct string error_msg = { .str = NULL, .len = 0 };
2405 
2406    if (is_unsupported_inst(isa, inst)) {
2407       ERROR("Instruction not supported on this Gen");
2408    } else {
2409       CHECK(invalid_values);
2410 
2411       if (error_msg.str == NULL) {
2412          CHECK(sources_not_null);
2413          CHECK(send_restrictions);
2414          CHECK(alignment_supported);
2415          CHECK(general_restrictions_based_on_operand_types);
2416          CHECK(general_restrictions_on_region_parameters);
2417          CHECK(special_restrictions_for_mixed_float_mode);
2418          CHECK(region_alignment_rules);
2419          CHECK(vector_immediate_restrictions);
2420          CHECK(special_requirements_for_handling_double_precision_data_types);
2421          CHECK(instruction_restrictions);
2422          CHECK(send_descriptor_restrictions);
2423       }
2424    }
2425 
2426    if (error_msg.str && disasm) {
2427       disasm_insert_error(disasm, offset, inst_size, error_msg.str);
2428    }
2429    free(error_msg.str);
2430 
2431    return error_msg.len == 0;
2432 }
2433 
2434 bool
brw_validate_instructions(const struct brw_isa_info * isa,const void * assembly,int start_offset,int end_offset,struct disasm_info * disasm)2435 brw_validate_instructions(const struct brw_isa_info *isa,
2436                           const void *assembly, int start_offset, int end_offset,
2437                           struct disasm_info *disasm)
2438 {
2439    const struct intel_device_info *devinfo = isa->devinfo;
2440    bool valid = true;
2441 
2442    for (int src_offset = start_offset; src_offset < end_offset;) {
2443       const brw_inst *inst = assembly + src_offset;
2444       bool is_compact = brw_inst_cmpt_control(devinfo, inst);
2445       unsigned inst_size = is_compact ? sizeof(brw_compact_inst)
2446                                       : sizeof(brw_inst);
2447       brw_inst uncompacted;
2448 
2449       if (is_compact) {
2450          brw_compact_inst *compacted = (void *)inst;
2451          brw_uncompact_instruction(isa, &uncompacted, compacted);
2452          inst = &uncompacted;
2453       }
2454 
2455       bool v = brw_validate_instruction(isa, inst, src_offset,
2456                                         inst_size, disasm);
2457       valid = valid && v;
2458 
2459       src_offset += inst_size;
2460    }
2461 
2462    return valid;
2463 }
2464