xref: /aosp_15_r20/external/mesa3d/src/intel/compiler/brw_eu.c (revision 6104692788411f58d303aa86923a9ff6ecaded22)
1 /*
2  Copyright (C) Intel Corp.  2006.  All Rights Reserved.
3  Intel funded Tungsten Graphics to
4  develop this 3D driver.
5 
6  Permission is hereby granted, free of charge, to any person obtaining
7  a copy of this software and associated documentation files (the
8  "Software"), to deal in the Software without restriction, including
9  without limitation the rights to use, copy, modify, merge, publish,
10  distribute, sublicense, and/or sell copies of the Software, and to
11  permit persons to whom the Software is furnished to do so, subject to
12  the following conditions:
13 
14  The above copyright notice and this permission notice (including the
15  next paragraph) shall be included in all copies or substantial
16  portions of the Software.
17 
18  THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
19  EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20  MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
21  IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
22  LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
23  OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
24  WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25 
26  **********************************************************************/
27  /*
28   * Authors:
29   *   Keith Whitwell <[email protected]>
30   */
31 
32 #include <sys/stat.h>
33 #include <fcntl.h>
34 
35 #include "brw_disasm.h"
36 #include "brw_eu_defines.h"
37 #include "brw_eu.h"
38 #include "brw_private.h"
39 #include "intel_gfx_ver_enum.h"
40 #include "dev/intel_debug.h"
41 
42 #include "util/u_debug.h"
43 #include "util/ralloc.h"
44 
45 /* Returns a conditional modifier that negates the condition. */
46 enum brw_conditional_mod
brw_negate_cmod(enum brw_conditional_mod cmod)47 brw_negate_cmod(enum brw_conditional_mod cmod)
48 {
49    switch (cmod) {
50    case BRW_CONDITIONAL_Z:
51       return BRW_CONDITIONAL_NZ;
52    case BRW_CONDITIONAL_NZ:
53       return BRW_CONDITIONAL_Z;
54    case BRW_CONDITIONAL_G:
55       return BRW_CONDITIONAL_LE;
56    case BRW_CONDITIONAL_GE:
57       return BRW_CONDITIONAL_L;
58    case BRW_CONDITIONAL_L:
59       return BRW_CONDITIONAL_GE;
60    case BRW_CONDITIONAL_LE:
61       return BRW_CONDITIONAL_G;
62    default:
63       unreachable("Can't negate this cmod");
64    }
65 }
66 
67 /* Returns the corresponding conditional mod for swapping src0 and
68  * src1 in e.g. CMP.
69  */
70 enum brw_conditional_mod
brw_swap_cmod(enum brw_conditional_mod cmod)71 brw_swap_cmod(enum brw_conditional_mod cmod)
72 {
73    switch (cmod) {
74    case BRW_CONDITIONAL_Z:
75    case BRW_CONDITIONAL_NZ:
76       return cmod;
77    case BRW_CONDITIONAL_G:
78       return BRW_CONDITIONAL_L;
79    case BRW_CONDITIONAL_GE:
80       return BRW_CONDITIONAL_LE;
81    case BRW_CONDITIONAL_L:
82       return BRW_CONDITIONAL_G;
83    case BRW_CONDITIONAL_LE:
84       return BRW_CONDITIONAL_GE;
85    default:
86       return BRW_CONDITIONAL_NONE;
87    }
88 }
89 
90 /**
91  * Get the least significant bit offset of the i+1-th component of immediate
92  * type \p type.  For \p i equal to the two's complement of j, return the
93  * offset of the j-th component starting from the end of the vector.  For
94  * scalar register types return zero.
95  */
96 static unsigned
imm_shift(enum brw_reg_type type,unsigned i)97 imm_shift(enum brw_reg_type type, unsigned i)
98 {
99    assert(type != BRW_TYPE_UV && type != BRW_TYPE_V &&
100           "Not implemented.");
101 
102    if (type == BRW_TYPE_VF)
103       return 8 * (i & 3);
104    else
105       return 0;
106 }
107 
108 /**
109  * Swizzle an arbitrary immediate \p x of the given type according to the
110  * permutation specified as \p swz.
111  */
112 uint32_t
brw_swizzle_immediate(enum brw_reg_type type,uint32_t x,unsigned swz)113 brw_swizzle_immediate(enum brw_reg_type type, uint32_t x, unsigned swz)
114 {
115    if (imm_shift(type, 1)) {
116       const unsigned n = 32 / imm_shift(type, 1);
117       uint32_t y = 0;
118 
119       for (unsigned i = 0; i < n; i++) {
120          /* Shift the specified component all the way to the right and left to
121           * discard any undesired L/MSBs, then shift it right into component i.
122           */
123          y |= x >> imm_shift(type, (i & ~3) + BRW_GET_SWZ(swz, i & 3))
124                 << imm_shift(type, ~0u)
125                 >> imm_shift(type, ~0u - i);
126       }
127 
128       return y;
129    } else {
130       return x;
131    }
132 }
133 
134 unsigned
brw_get_default_exec_size(struct brw_codegen * p)135 brw_get_default_exec_size(struct brw_codegen *p)
136 {
137    return p->current->exec_size;
138 }
139 
140 unsigned
brw_get_default_group(struct brw_codegen * p)141 brw_get_default_group(struct brw_codegen *p)
142 {
143    return p->current->group;
144 }
145 
146 unsigned
brw_get_default_access_mode(struct brw_codegen * p)147 brw_get_default_access_mode(struct brw_codegen *p)
148 {
149    return p->current->access_mode;
150 }
151 
152 struct tgl_swsb
brw_get_default_swsb(struct brw_codegen * p)153 brw_get_default_swsb(struct brw_codegen *p)
154 {
155    return p->current->swsb;
156 }
157 
158 void
brw_set_default_exec_size(struct brw_codegen * p,unsigned value)159 brw_set_default_exec_size(struct brw_codegen *p, unsigned value)
160 {
161    p->current->exec_size = value;
162 }
163 
brw_set_default_predicate_control(struct brw_codegen * p,enum brw_predicate pc)164 void brw_set_default_predicate_control(struct brw_codegen *p, enum brw_predicate pc)
165 {
166    p->current->predicate = pc;
167 }
168 
brw_set_default_predicate_inverse(struct brw_codegen * p,bool predicate_inverse)169 void brw_set_default_predicate_inverse(struct brw_codegen *p, bool predicate_inverse)
170 {
171    p->current->pred_inv = predicate_inverse;
172 }
173 
brw_set_default_flag_reg(struct brw_codegen * p,int reg,int subreg)174 void brw_set_default_flag_reg(struct brw_codegen *p, int reg, int subreg)
175 {
176    assert(subreg < 2);
177    p->current->flag_subreg = reg * 2 + subreg;
178 }
179 
brw_set_default_access_mode(struct brw_codegen * p,unsigned access_mode)180 void brw_set_default_access_mode( struct brw_codegen *p, unsigned access_mode )
181 {
182    p->current->access_mode = access_mode;
183 }
184 
185 /**
186  * Apply the range of channel enable signals given by
187  * [group, group + exec_size) to the instruction passed as argument.
188  */
189 void
brw_inst_set_group(const struct intel_device_info * devinfo,brw_inst * inst,unsigned group)190 brw_inst_set_group(const struct intel_device_info *devinfo,
191                    brw_inst *inst, unsigned group)
192 {
193    if (devinfo->ver >= 20) {
194       assert(group % 8 == 0 && group < 32);
195       brw_inst_set_qtr_control(devinfo, inst, group / 8);
196 
197    } else {
198       assert(group % 4 == 0 && group < 32);
199       brw_inst_set_qtr_control(devinfo, inst, group / 8);
200       brw_inst_set_nib_control(devinfo, inst, (group / 4) % 2);
201 
202    }
203 }
204 
205 void
brw_set_default_group(struct brw_codegen * p,unsigned group)206 brw_set_default_group(struct brw_codegen *p, unsigned group)
207 {
208    p->current->group = group;
209 }
210 
brw_set_default_mask_control(struct brw_codegen * p,unsigned value)211 void brw_set_default_mask_control( struct brw_codegen *p, unsigned value )
212 {
213    p->current->mask_control = value;
214 }
215 
brw_set_default_saturate(struct brw_codegen * p,bool enable)216 void brw_set_default_saturate( struct brw_codegen *p, bool enable )
217 {
218    p->current->saturate = enable;
219 }
220 
brw_set_default_acc_write_control(struct brw_codegen * p,unsigned value)221 void brw_set_default_acc_write_control(struct brw_codegen *p, unsigned value)
222 {
223    p->current->acc_wr_control = value;
224 }
225 
brw_set_default_swsb(struct brw_codegen * p,struct tgl_swsb value)226 void brw_set_default_swsb(struct brw_codegen *p, struct tgl_swsb value)
227 {
228    p->current->swsb = value;
229 }
230 
brw_push_insn_state(struct brw_codegen * p)231 void brw_push_insn_state( struct brw_codegen *p )
232 {
233    assert(p->current != &p->stack[BRW_EU_MAX_INSN_STACK-1]);
234    *(p->current + 1) = *p->current;
235    p->current++;
236 }
237 
brw_pop_insn_state(struct brw_codegen * p)238 void brw_pop_insn_state( struct brw_codegen *p )
239 {
240    assert(p->current != p->stack);
241    p->current--;
242 }
243 
244 
245 /***********************************************************************
246  */
247 void
brw_init_codegen(const struct brw_isa_info * isa,struct brw_codegen * p,void * mem_ctx)248 brw_init_codegen(const struct brw_isa_info *isa,
249                  struct brw_codegen *p, void *mem_ctx)
250 {
251    memset(p, 0, sizeof(*p));
252 
253    p->isa = isa;
254    p->devinfo = isa->devinfo;
255    /*
256     * Set the initial instruction store array size to 1024, if found that
257     * isn't enough, then it will double the store size at brw_next_insn()
258     * until out of memory.
259     */
260    p->store_size = 1024;
261    p->store = rzalloc_array(mem_ctx, brw_inst, p->store_size);
262    p->nr_insn = 0;
263    p->current = p->stack;
264    memset(p->current, 0, sizeof(p->current[0]));
265 
266    p->mem_ctx = mem_ctx;
267 
268    /* Some defaults?
269     */
270    brw_set_default_exec_size(p, BRW_EXECUTE_8);
271    brw_set_default_mask_control(p, BRW_MASK_ENABLE); /* what does this do? */
272    brw_set_default_saturate(p, 0);
273 
274    /* Set up control flow stack */
275    p->if_stack_depth = 0;
276    p->if_stack_array_size = 16;
277    p->if_stack = rzalloc_array(mem_ctx, int, p->if_stack_array_size);
278 
279    p->loop_stack_depth = 0;
280    p->loop_stack_array_size = 16;
281    p->loop_stack = rzalloc_array(mem_ctx, int, p->loop_stack_array_size);
282 }
283 
284 
brw_get_program(struct brw_codegen * p,unsigned * sz)285 const unsigned *brw_get_program( struct brw_codegen *p,
286 			       unsigned *sz )
287 {
288    *sz = p->next_insn_offset;
289    return (const unsigned *)p->store;
290 }
291 
292 const struct brw_shader_reloc *
brw_get_shader_relocs(struct brw_codegen * p,unsigned * num_relocs)293 brw_get_shader_relocs(struct brw_codegen *p, unsigned *num_relocs)
294 {
295    *num_relocs = p->num_relocs;
296    return p->relocs;
297 }
298 
299 DEBUG_GET_ONCE_OPTION(shader_bin_dump_path, "INTEL_SHADER_BIN_DUMP_PATH", NULL);
300 
brw_should_dump_shader_bin(void)301 bool brw_should_dump_shader_bin(void)
302 {
303    return debug_get_option_shader_bin_dump_path() != NULL;
304 }
305 
brw_dump_shader_bin(void * assembly,int start_offset,int end_offset,const char * identifier)306 void brw_dump_shader_bin(void *assembly, int start_offset, int end_offset,
307                          const char *identifier)
308 {
309    char *name = ralloc_asprintf(NULL, "%s/%s.bin",
310                                 debug_get_option_shader_bin_dump_path(),
311                                 identifier);
312 
313    int fd = open(name, O_CREAT | O_WRONLY | O_TRUNC, 0644);
314    ralloc_free(name);
315 
316    if (fd < 0)
317       return;
318 
319    struct stat sb;
320    if (fstat(fd, &sb) != 0 || (!S_ISREG(sb.st_mode))) {
321       close(fd);
322       return;
323    }
324 
325    size_t to_write = end_offset - start_offset;
326    void *write_ptr = assembly + start_offset;
327 
328    while (to_write) {
329       ssize_t ret = write(fd, write_ptr, to_write);
330 
331       if (ret <= 0) {
332          close(fd);
333          return;
334       }
335 
336       to_write -= ret;
337       write_ptr += ret;
338    }
339 
340    close(fd);
341 }
342 
brw_try_override_assembly(struct brw_codegen * p,int start_offset,const char * identifier)343 bool brw_try_override_assembly(struct brw_codegen *p, int start_offset,
344                                const char *identifier)
345 {
346    const char *read_path = getenv("INTEL_SHADER_ASM_READ_PATH");
347    if (!read_path) {
348       return false;
349    }
350 
351    char *name = ralloc_asprintf(NULL, "%s/%s.bin", read_path, identifier);
352 
353    int fd = open(name, O_RDONLY);
354    ralloc_free(name);
355 
356    if (fd == -1) {
357       return false;
358    }
359 
360    struct stat sb;
361    if (fstat(fd, &sb) != 0 || (!S_ISREG(sb.st_mode))) {
362       close(fd);
363       return false;
364    }
365 
366    p->nr_insn -= (p->next_insn_offset - start_offset) / sizeof(brw_inst);
367    p->nr_insn += sb.st_size / sizeof(brw_inst);
368 
369    p->next_insn_offset = start_offset + sb.st_size;
370    p->store_size = (start_offset + sb.st_size) / sizeof(brw_inst);
371    p->store = (brw_inst *)reralloc_size(p->mem_ctx, p->store, p->next_insn_offset);
372    assert(p->store);
373 
374    ssize_t ret = read(fd, (char *)p->store + start_offset, sb.st_size);
375    close(fd);
376    if (ret != sb.st_size) {
377       return false;
378    }
379 
380    ASSERTED bool valid =
381       brw_validate_instructions(p->isa, p->store,
382                                 start_offset, p->next_insn_offset,
383                                 NULL);
384    assert(valid);
385 
386    return true;
387 }
388 
389 const struct brw_label *
brw_find_label(const struct brw_label * root,int offset)390 brw_find_label(const struct brw_label *root, int offset)
391 {
392    const struct brw_label *curr = root;
393 
394    if (curr != NULL)
395    {
396       do {
397          if (curr->offset == offset)
398             return curr;
399 
400          curr = curr->next;
401       } while (curr != NULL);
402    }
403 
404    return curr;
405 }
406 
407 void
brw_create_label(struct brw_label ** labels,int offset,void * mem_ctx)408 brw_create_label(struct brw_label **labels, int offset, void *mem_ctx)
409 {
410    if (*labels != NULL) {
411       struct brw_label *curr = *labels;
412       struct brw_label *prev;
413 
414       do {
415          prev = curr;
416 
417          if (curr->offset == offset)
418             return;
419 
420          curr = curr->next;
421       } while (curr != NULL);
422 
423       curr = ralloc(mem_ctx, struct brw_label);
424       curr->offset = offset;
425       curr->number = prev->number + 1;
426       curr->next = NULL;
427       prev->next = curr;
428    } else {
429       struct brw_label *root = ralloc(mem_ctx, struct brw_label);
430       root->number = 0;
431       root->offset = offset;
432       root->next = NULL;
433       *labels = root;
434    }
435 }
436 
437 const struct brw_label *
brw_label_assembly(const struct brw_isa_info * isa,const void * assembly,int start,int end,void * mem_ctx)438 brw_label_assembly(const struct brw_isa_info *isa,
439                    const void *assembly, int start, int end, void *mem_ctx)
440 {
441    const struct intel_device_info *const devinfo = isa->devinfo;
442 
443    struct brw_label *root_label = NULL;
444 
445    int to_bytes_scale = sizeof(brw_inst) / brw_jump_scale(devinfo);
446 
447    for (int offset = start; offset < end;) {
448       const brw_inst *inst = (const brw_inst *) ((const char *) assembly + offset);
449       brw_inst uncompacted;
450 
451       bool is_compact = brw_inst_cmpt_control(devinfo, inst);
452 
453       if (is_compact) {
454          brw_compact_inst *compacted = (brw_compact_inst *)inst;
455          brw_uncompact_instruction(isa, &uncompacted, compacted);
456          inst = &uncompacted;
457       }
458 
459       if (brw_has_uip(devinfo, brw_inst_opcode(isa, inst))) {
460          /* Instructions that have UIP also have JIP. */
461          brw_create_label(&root_label,
462             offset + brw_inst_uip(devinfo, inst) * to_bytes_scale, mem_ctx);
463          brw_create_label(&root_label,
464             offset + brw_inst_jip(devinfo, inst) * to_bytes_scale, mem_ctx);
465       } else if (brw_has_jip(devinfo, brw_inst_opcode(isa, inst))) {
466          int jip = brw_inst_jip(devinfo, inst);
467 
468          brw_create_label(&root_label, offset + jip * to_bytes_scale, mem_ctx);
469       }
470 
471       if (is_compact) {
472          offset += sizeof(brw_compact_inst);
473       } else {
474          offset += sizeof(brw_inst);
475       }
476    }
477 
478    return root_label;
479 }
480 
481 void
brw_disassemble_with_labels(const struct brw_isa_info * isa,const void * assembly,int start,int end,FILE * out)482 brw_disassemble_with_labels(const struct brw_isa_info *isa,
483                             const void *assembly, int start, int end, FILE *out)
484 {
485    void *mem_ctx = ralloc_context(NULL);
486    const struct brw_label *root_label =
487       brw_label_assembly(isa, assembly, start, end, mem_ctx);
488 
489    brw_disassemble(isa, assembly, start, end, root_label, out);
490 
491    ralloc_free(mem_ctx);
492 }
493 
494 void
brw_disassemble(const struct brw_isa_info * isa,const void * assembly,int start,int end,const struct brw_label * root_label,FILE * out)495 brw_disassemble(const struct brw_isa_info *isa,
496                 const void *assembly, int start, int end,
497                 const struct brw_label *root_label, FILE *out)
498 {
499    const struct intel_device_info *devinfo = isa->devinfo;
500 
501    bool dump_hex = INTEL_DEBUG(DEBUG_HEX);
502 
503    for (int offset = start; offset < end;) {
504       const brw_inst *insn = (const brw_inst *)((char *)assembly + offset);
505       brw_inst uncompacted;
506 
507       if (root_label != NULL) {
508         const struct brw_label *label = brw_find_label(root_label, offset);
509         if (label != NULL) {
510            fprintf(out, "\nLABEL%d:\n", label->number);
511         }
512       }
513 
514       bool compacted = brw_inst_cmpt_control(devinfo, insn);
515       if (0)
516          fprintf(out, "0x%08x: ", offset);
517 
518       if (compacted) {
519          brw_compact_inst *compacted = (brw_compact_inst *)insn;
520          if (dump_hex) {
521             unsigned char * insn_ptr = ((unsigned char *)&insn[0]);
522             const unsigned int blank_spaces = 24;
523             for (int i = 0 ; i < 8; i = i + 4) {
524                fprintf(out, "%02x %02x %02x %02x ",
525                        insn_ptr[i],
526                        insn_ptr[i + 1],
527                        insn_ptr[i + 2],
528                        insn_ptr[i + 3]);
529             }
530             /* Make compacted instructions hex value output vertically aligned
531              * with uncompacted instructions hex value
532              */
533             fprintf(out, "%*c", blank_spaces, ' ');
534          }
535 
536          brw_uncompact_instruction(isa, &uncompacted, compacted);
537          insn = &uncompacted;
538       } else {
539          if (dump_hex) {
540             unsigned char * insn_ptr = ((unsigned char *)&insn[0]);
541             for (int i = 0 ; i < 16; i = i + 4) {
542                fprintf(out, "%02x %02x %02x %02x ",
543                        insn_ptr[i],
544                        insn_ptr[i + 1],
545                        insn_ptr[i + 2],
546                        insn_ptr[i + 3]);
547             }
548          }
549       }
550 
551       brw_disassemble_inst(out, isa, insn, compacted, offset, root_label);
552 
553       if (compacted) {
554          offset += sizeof(brw_compact_inst);
555       } else {
556          offset += sizeof(brw_inst);
557       }
558    }
559 }
560 
561 static const struct opcode_desc opcode_descs[] = {
562    /* IR,                 HW,  name,      nsrc, ndst, gfx_vers assuming Gfx9+ */
563    { BRW_OPCODE_ILLEGAL,  0,   "illegal", 0,    0,    GFX_ALL },
564    { BRW_OPCODE_SYNC,     1,   "sync",    1,    0,    GFX_GE(GFX12) },
565    { BRW_OPCODE_MOV,      1,   "mov",     1,    1,    GFX_LT(GFX12) },
566    { BRW_OPCODE_MOV,      97,  "mov",     1,    1,    GFX_GE(GFX12) },
567    { BRW_OPCODE_SEL,      2,   "sel",     2,    1,    GFX_LT(GFX12) },
568    { BRW_OPCODE_SEL,      98,  "sel",     2,    1,    GFX_GE(GFX12) },
569    { BRW_OPCODE_MOVI,     3,   "movi",    2,    1,    GFX_LT(GFX12) },
570    { BRW_OPCODE_MOVI,     99,  "movi",    2,    1,    GFX_GE(GFX12) },
571    { BRW_OPCODE_NOT,      4,   "not",     1,    1,    GFX_LT(GFX12) },
572    { BRW_OPCODE_NOT,      100, "not",     1,    1,    GFX_GE(GFX12) },
573    { BRW_OPCODE_AND,      5,   "and",     2,    1,    GFX_LT(GFX12) },
574    { BRW_OPCODE_AND,      101, "and",     2,    1,    GFX_GE(GFX12) },
575    { BRW_OPCODE_OR,       6,   "or",      2,    1,    GFX_LT(GFX12) },
576    { BRW_OPCODE_OR,       102, "or",      2,    1,    GFX_GE(GFX12) },
577    { BRW_OPCODE_XOR,      7,   "xor",     2,    1,    GFX_LT(GFX12) },
578    { BRW_OPCODE_XOR,      103, "xor",     2,    1,    GFX_GE(GFX12) },
579    { BRW_OPCODE_SHR,      8,   "shr",     2,    1,    GFX_LT(GFX12) },
580    { BRW_OPCODE_SHR,      104, "shr",     2,    1,    GFX_GE(GFX12) },
581    { BRW_OPCODE_SHL,      9,   "shl",     2,    1,    GFX_LT(GFX12) },
582    { BRW_OPCODE_SHL,      105, "shl",     2,    1,    GFX_GE(GFX12) },
583    { BRW_OPCODE_SMOV,     10,  "smov",    0,    0,    GFX_LT(GFX12) },
584    { BRW_OPCODE_SMOV,     106, "smov",    0,    0,    GFX_GE(GFX12) },
585    { BRW_OPCODE_ASR,      12,  "asr",     2,    1,    GFX_LT(GFX12) },
586    { BRW_OPCODE_ASR,      108, "asr",     2,    1,    GFX_GE(GFX12) },
587    { BRW_OPCODE_ROR,      14,  "ror",     2,    1,    GFX11 },
588    { BRW_OPCODE_ROR,      110, "ror",     2,    1,    GFX_GE(GFX12) },
589    { BRW_OPCODE_ROL,      15,  "rol",     2,    1,    GFX11 },
590    { BRW_OPCODE_ROL,      111, "rol",     2,    1,    GFX_GE(GFX12) },
591    { BRW_OPCODE_CMP,      16,  "cmp",     2,    1,    GFX_LT(GFX12) },
592    { BRW_OPCODE_CMP,      112, "cmp",     2,    1,    GFX_GE(GFX12) },
593    { BRW_OPCODE_CMPN,     17,  "cmpn",    2,    1,    GFX_LT(GFX12) },
594    { BRW_OPCODE_CMPN,     113, "cmpn",    2,    1,    GFX_GE(GFX12) },
595    { BRW_OPCODE_CSEL,     18,  "csel",    3,    1,    GFX_LT(GFX12) },
596    { BRW_OPCODE_CSEL,     114, "csel",    3,    1,    GFX_GE(GFX12) },
597    { BRW_OPCODE_BFREV,    23,  "bfrev",   1,    1,    GFX_LT(GFX12) },
598    { BRW_OPCODE_BFREV,    119, "bfrev",   1,    1,    GFX_GE(GFX12) },
599    { BRW_OPCODE_BFE,      24,  "bfe",     3,    1,    GFX_LT(GFX12) },
600    { BRW_OPCODE_BFE,      120, "bfe",     3,    1,    GFX_GE(GFX12) },
601    { BRW_OPCODE_BFI1,     25,  "bfi1",    2,    1,    GFX_LT(GFX12) },
602    { BRW_OPCODE_BFI1,     121, "bfi1",    2,    1,    GFX_GE(GFX12) },
603    { BRW_OPCODE_BFI2,     26,  "bfi2",    3,    1,    GFX_LT(GFX12) },
604    { BRW_OPCODE_BFI2,     122, "bfi2",    3,    1,    GFX_GE(GFX12) },
605    { BRW_OPCODE_JMPI,     32,  "jmpi",    0,    0,    GFX_ALL },
606    { BRW_OPCODE_BRD,      33,  "brd",     0,    0,    GFX_ALL },
607    { BRW_OPCODE_IF,       34,  "if",      0,    0,    GFX_ALL },
608    { BRW_OPCODE_BRC,      35,  "brc",     0,    0,    GFX_ALL },
609    { BRW_OPCODE_ELSE,     36,  "else",    0,    0,    GFX_ALL },
610    { BRW_OPCODE_ENDIF,    37,  "endif",   0,    0,    GFX_ALL },
611    { BRW_OPCODE_DO,       38,  "do",      0,    0,    0 }, /* Pseudo opcode. */
612    { BRW_OPCODE_WHILE,    39,  "while",   0,    0,    GFX_ALL },
613    { BRW_OPCODE_BREAK,    40,  "break",   0,    0,    GFX_ALL },
614    { BRW_OPCODE_CONTINUE, 41,  "cont",    0,    0,    GFX_ALL },
615    { BRW_OPCODE_HALT,     42,  "halt",    0,    0,    GFX_ALL },
616    { BRW_OPCODE_CALLA,    43,  "calla",   0,    0,    GFX_ALL },
617    { BRW_OPCODE_CALL,     44,  "call",    0,    0,    GFX_ALL },
618    { BRW_OPCODE_RET,      45,  "ret",     0,    0,    GFX_ALL },
619    { BRW_OPCODE_GOTO,     46,  "goto",    0,    0,    GFX_ALL },
620    { BRW_OPCODE_WAIT,     48,  "wait",    0,    1,    GFX_LT(GFX12) },
621    { BRW_OPCODE_SEND,     49,  "send",    1,    1,    GFX_LT(GFX12) },
622    { BRW_OPCODE_SENDC,    50,  "sendc",   1,    1,    GFX_LT(GFX12) },
623    { BRW_OPCODE_SEND,     49,  "send",    2,    1,    GFX_GE(GFX12) },
624    { BRW_OPCODE_SENDC,    50,  "sendc",   2,    1,    GFX_GE(GFX12) },
625    { BRW_OPCODE_SENDS,    51,  "sends",   2,    1,    GFX_LT(GFX12) },
626    { BRW_OPCODE_SENDSC,   52,  "sendsc",  2,    1,    GFX_LT(GFX12) },
627    { BRW_OPCODE_MATH,     56,  "math",    2,    1,    GFX_ALL },
628    { BRW_OPCODE_ADD,      64,  "add",     2,    1,    GFX_ALL },
629    { BRW_OPCODE_MUL,      65,  "mul",     2,    1,    GFX_ALL },
630    { BRW_OPCODE_AVG,      66,  "avg",     2,    1,    GFX_ALL },
631    { BRW_OPCODE_FRC,      67,  "frc",     1,    1,    GFX_ALL },
632    { BRW_OPCODE_RNDU,     68,  "rndu",    1,    1,    GFX_ALL },
633    { BRW_OPCODE_RNDD,     69,  "rndd",    1,    1,    GFX_ALL },
634    { BRW_OPCODE_RNDE,     70,  "rnde",    1,    1,    GFX_ALL },
635    { BRW_OPCODE_RNDZ,     71,  "rndz",    1,    1,    GFX_ALL },
636    { BRW_OPCODE_MAC,      72,  "mac",     2,    1,    GFX_ALL },
637    { BRW_OPCODE_MACH,     73,  "mach",    2,    1,    GFX_ALL },
638    { BRW_OPCODE_LZD,      74,  "lzd",     1,    1,    GFX_ALL },
639    { BRW_OPCODE_FBH,      75,  "fbh",     1,    1,    GFX_ALL },
640    { BRW_OPCODE_FBL,      76,  "fbl",     1,    1,    GFX_ALL },
641    { BRW_OPCODE_CBIT,     77,  "cbit",    1,    1,    GFX_ALL },
642    { BRW_OPCODE_ADDC,     78,  "addc",    2,    1,    GFX_ALL },
643    { BRW_OPCODE_SUBB,     79,  "subb",    2,    1,    GFX_ALL },
644    { BRW_OPCODE_ADD3,     82,  "add3",    3,    1,    GFX_GE(GFX125) },
645    { BRW_OPCODE_DP4,      84,  "dp4",     2,    1,    GFX_LT(GFX11) },
646    { BRW_OPCODE_DPH,      85,  "dph",     2,    1,    GFX_LT(GFX11) },
647    { BRW_OPCODE_DP3,      86,  "dp3",     2,    1,    GFX_LT(GFX11) },
648    { BRW_OPCODE_DP2,      87,  "dp2",     2,    1,    GFX_LT(GFX11) },
649    { BRW_OPCODE_DP4A,     88,  "dp4a",    3,    1,    GFX_GE(GFX12) },
650    { BRW_OPCODE_LINE,     89,  "line",    2,    1,    GFX9 },
651    { BRW_OPCODE_DPAS,     89,  "dpas",    3,    1,    GFX_GE(GFX125) },
652    { BRW_OPCODE_PLN,      90,  "pln",     2,    1,    GFX9 },
653    { BRW_OPCODE_MAD,      91,  "mad",     3,    1,    GFX_ALL },
654    { BRW_OPCODE_LRP,      92,  "lrp",     3,    1,    GFX9 },
655    { BRW_OPCODE_MADM,     93,  "madm",    3,    1,    GFX_ALL },
656    { BRW_OPCODE_NOP,      126, "nop",     0,    0,    GFX_LT(GFX12) },
657    { BRW_OPCODE_NOP,      96,  "nop",     0,    0,    GFX_GE(GFX12) }
658 };
659 
660 void
brw_init_isa_info(struct brw_isa_info * isa,const struct intel_device_info * devinfo)661 brw_init_isa_info(struct brw_isa_info *isa,
662                   const struct intel_device_info *devinfo)
663 {
664    assert(devinfo->ver >= 9);
665 
666    isa->devinfo = devinfo;
667 
668    enum gfx_ver ver = gfx_ver_from_devinfo(devinfo);
669 
670    memset(isa->ir_to_descs, 0, sizeof(isa->ir_to_descs));
671    memset(isa->hw_to_descs, 0, sizeof(isa->hw_to_descs));
672 
673    for (unsigned i = 0; i < ARRAY_SIZE(opcode_descs); i++) {
674       if (opcode_descs[i].gfx_vers & ver) {
675          const unsigned e = opcode_descs[i].ir;
676          const unsigned h = opcode_descs[i].hw;
677          assert(e < ARRAY_SIZE(isa->ir_to_descs) && !isa->ir_to_descs[e]);
678          assert(h < ARRAY_SIZE(isa->hw_to_descs) && !isa->hw_to_descs[h]);
679          isa->ir_to_descs[e] = &opcode_descs[i];
680          isa->hw_to_descs[h] = &opcode_descs[i];
681       }
682    }
683 }
684 
685 /**
686  * Return the matching opcode_desc for the specified IR opcode and hardware
687  * generation, or NULL if the opcode is not supported by the device.
688  */
689 const struct opcode_desc *
brw_opcode_desc(const struct brw_isa_info * isa,enum opcode op)690 brw_opcode_desc(const struct brw_isa_info *isa, enum opcode op)
691 {
692    return op < ARRAY_SIZE(isa->ir_to_descs) ? isa->ir_to_descs[op] : NULL;
693 }
694 
695 /**
696  * Return the matching opcode_desc for the specified HW opcode and hardware
697  * generation, or NULL if the opcode is not supported by the device.
698  */
699 const struct opcode_desc *
brw_opcode_desc_from_hw(const struct brw_isa_info * isa,unsigned hw)700 brw_opcode_desc_from_hw(const struct brw_isa_info *isa, unsigned hw)
701 {
702    return hw < ARRAY_SIZE(isa->hw_to_descs) ? isa->hw_to_descs[hw] : NULL;
703 }
704 
705 unsigned
brw_num_sources_from_inst(const struct brw_isa_info * isa,const brw_inst * inst)706 brw_num_sources_from_inst(const struct brw_isa_info *isa,
707                           const brw_inst *inst)
708 {
709    const struct intel_device_info *devinfo = isa->devinfo;
710    const struct opcode_desc *desc =
711       brw_opcode_desc(isa, brw_inst_opcode(isa, inst));
712    unsigned math_function;
713 
714    if (brw_inst_opcode(isa, inst) == BRW_OPCODE_MATH) {
715       math_function = brw_inst_math_function(devinfo, inst);
716    } else {
717       assert(desc->nsrc < 4);
718       return desc->nsrc;
719    }
720 
721    switch (math_function) {
722    case BRW_MATH_FUNCTION_INV:
723    case BRW_MATH_FUNCTION_LOG:
724    case BRW_MATH_FUNCTION_EXP:
725    case BRW_MATH_FUNCTION_SQRT:
726    case BRW_MATH_FUNCTION_RSQ:
727    case BRW_MATH_FUNCTION_SIN:
728    case BRW_MATH_FUNCTION_COS:
729    case GFX8_MATH_FUNCTION_INVM:
730    case GFX8_MATH_FUNCTION_RSQRTM:
731       return 1;
732    case BRW_MATH_FUNCTION_FDIV:
733    case BRW_MATH_FUNCTION_POW:
734    case BRW_MATH_FUNCTION_INT_DIV_QUOTIENT_AND_REMAINDER:
735    case BRW_MATH_FUNCTION_INT_DIV_QUOTIENT:
736    case BRW_MATH_FUNCTION_INT_DIV_REMAINDER:
737       return 2;
738    default:
739       unreachable("not reached");
740    }
741 }
742