1 /*
2 Copyright (C) Intel Corp. 2006. All Rights Reserved.
3 Intel funded Tungsten Graphics to
4 develop this 3D driver.
5
6 Permission is hereby granted, free of charge, to any person obtaining
7 a copy of this software and associated documentation files (the
8 "Software"), to deal in the Software without restriction, including
9 without limitation the rights to use, copy, modify, merge, publish,
10 distribute, sublicense, and/or sell copies of the Software, and to
11 permit persons to whom the Software is furnished to do so, subject to
12 the following conditions:
13
14 The above copyright notice and this permission notice (including the
15 next paragraph) shall be included in all copies or substantial
16 portions of the Software.
17
18 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
19 EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
21 IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
22 LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
23 OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
24 WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25
26 **********************************************************************/
27 /*
28 * Authors:
29 * Keith Whitwell <[email protected]>
30 */
31
32 #include <sys/stat.h>
33 #include <fcntl.h>
34
35 #include "brw_disasm.h"
36 #include "brw_eu_defines.h"
37 #include "brw_eu.h"
38 #include "brw_private.h"
39 #include "intel_gfx_ver_enum.h"
40 #include "dev/intel_debug.h"
41
42 #include "util/u_debug.h"
43 #include "util/ralloc.h"
44
45 /* Returns a conditional modifier that negates the condition. */
46 enum brw_conditional_mod
brw_negate_cmod(enum brw_conditional_mod cmod)47 brw_negate_cmod(enum brw_conditional_mod cmod)
48 {
49 switch (cmod) {
50 case BRW_CONDITIONAL_Z:
51 return BRW_CONDITIONAL_NZ;
52 case BRW_CONDITIONAL_NZ:
53 return BRW_CONDITIONAL_Z;
54 case BRW_CONDITIONAL_G:
55 return BRW_CONDITIONAL_LE;
56 case BRW_CONDITIONAL_GE:
57 return BRW_CONDITIONAL_L;
58 case BRW_CONDITIONAL_L:
59 return BRW_CONDITIONAL_GE;
60 case BRW_CONDITIONAL_LE:
61 return BRW_CONDITIONAL_G;
62 default:
63 unreachable("Can't negate this cmod");
64 }
65 }
66
67 /* Returns the corresponding conditional mod for swapping src0 and
68 * src1 in e.g. CMP.
69 */
70 enum brw_conditional_mod
brw_swap_cmod(enum brw_conditional_mod cmod)71 brw_swap_cmod(enum brw_conditional_mod cmod)
72 {
73 switch (cmod) {
74 case BRW_CONDITIONAL_Z:
75 case BRW_CONDITIONAL_NZ:
76 return cmod;
77 case BRW_CONDITIONAL_G:
78 return BRW_CONDITIONAL_L;
79 case BRW_CONDITIONAL_GE:
80 return BRW_CONDITIONAL_LE;
81 case BRW_CONDITIONAL_L:
82 return BRW_CONDITIONAL_G;
83 case BRW_CONDITIONAL_LE:
84 return BRW_CONDITIONAL_GE;
85 default:
86 return BRW_CONDITIONAL_NONE;
87 }
88 }
89
90 /**
91 * Get the least significant bit offset of the i+1-th component of immediate
92 * type \p type. For \p i equal to the two's complement of j, return the
93 * offset of the j-th component starting from the end of the vector. For
94 * scalar register types return zero.
95 */
96 static unsigned
imm_shift(enum brw_reg_type type,unsigned i)97 imm_shift(enum brw_reg_type type, unsigned i)
98 {
99 assert(type != BRW_TYPE_UV && type != BRW_TYPE_V &&
100 "Not implemented.");
101
102 if (type == BRW_TYPE_VF)
103 return 8 * (i & 3);
104 else
105 return 0;
106 }
107
108 /**
109 * Swizzle an arbitrary immediate \p x of the given type according to the
110 * permutation specified as \p swz.
111 */
112 uint32_t
brw_swizzle_immediate(enum brw_reg_type type,uint32_t x,unsigned swz)113 brw_swizzle_immediate(enum brw_reg_type type, uint32_t x, unsigned swz)
114 {
115 if (imm_shift(type, 1)) {
116 const unsigned n = 32 / imm_shift(type, 1);
117 uint32_t y = 0;
118
119 for (unsigned i = 0; i < n; i++) {
120 /* Shift the specified component all the way to the right and left to
121 * discard any undesired L/MSBs, then shift it right into component i.
122 */
123 y |= x >> imm_shift(type, (i & ~3) + BRW_GET_SWZ(swz, i & 3))
124 << imm_shift(type, ~0u)
125 >> imm_shift(type, ~0u - i);
126 }
127
128 return y;
129 } else {
130 return x;
131 }
132 }
133
134 unsigned
brw_get_default_exec_size(struct brw_codegen * p)135 brw_get_default_exec_size(struct brw_codegen *p)
136 {
137 return p->current->exec_size;
138 }
139
140 unsigned
brw_get_default_group(struct brw_codegen * p)141 brw_get_default_group(struct brw_codegen *p)
142 {
143 return p->current->group;
144 }
145
146 unsigned
brw_get_default_access_mode(struct brw_codegen * p)147 brw_get_default_access_mode(struct brw_codegen *p)
148 {
149 return p->current->access_mode;
150 }
151
152 struct tgl_swsb
brw_get_default_swsb(struct brw_codegen * p)153 brw_get_default_swsb(struct brw_codegen *p)
154 {
155 return p->current->swsb;
156 }
157
158 void
brw_set_default_exec_size(struct brw_codegen * p,unsigned value)159 brw_set_default_exec_size(struct brw_codegen *p, unsigned value)
160 {
161 p->current->exec_size = value;
162 }
163
brw_set_default_predicate_control(struct brw_codegen * p,enum brw_predicate pc)164 void brw_set_default_predicate_control(struct brw_codegen *p, enum brw_predicate pc)
165 {
166 p->current->predicate = pc;
167 }
168
brw_set_default_predicate_inverse(struct brw_codegen * p,bool predicate_inverse)169 void brw_set_default_predicate_inverse(struct brw_codegen *p, bool predicate_inverse)
170 {
171 p->current->pred_inv = predicate_inverse;
172 }
173
brw_set_default_flag_reg(struct brw_codegen * p,int reg,int subreg)174 void brw_set_default_flag_reg(struct brw_codegen *p, int reg, int subreg)
175 {
176 assert(subreg < 2);
177 p->current->flag_subreg = reg * 2 + subreg;
178 }
179
brw_set_default_access_mode(struct brw_codegen * p,unsigned access_mode)180 void brw_set_default_access_mode( struct brw_codegen *p, unsigned access_mode )
181 {
182 p->current->access_mode = access_mode;
183 }
184
185 /**
186 * Apply the range of channel enable signals given by
187 * [group, group + exec_size) to the instruction passed as argument.
188 */
189 void
brw_inst_set_group(const struct intel_device_info * devinfo,brw_inst * inst,unsigned group)190 brw_inst_set_group(const struct intel_device_info *devinfo,
191 brw_inst *inst, unsigned group)
192 {
193 if (devinfo->ver >= 20) {
194 assert(group % 8 == 0 && group < 32);
195 brw_inst_set_qtr_control(devinfo, inst, group / 8);
196
197 } else {
198 assert(group % 4 == 0 && group < 32);
199 brw_inst_set_qtr_control(devinfo, inst, group / 8);
200 brw_inst_set_nib_control(devinfo, inst, (group / 4) % 2);
201
202 }
203 }
204
205 void
brw_set_default_group(struct brw_codegen * p,unsigned group)206 brw_set_default_group(struct brw_codegen *p, unsigned group)
207 {
208 p->current->group = group;
209 }
210
brw_set_default_mask_control(struct brw_codegen * p,unsigned value)211 void brw_set_default_mask_control( struct brw_codegen *p, unsigned value )
212 {
213 p->current->mask_control = value;
214 }
215
brw_set_default_saturate(struct brw_codegen * p,bool enable)216 void brw_set_default_saturate( struct brw_codegen *p, bool enable )
217 {
218 p->current->saturate = enable;
219 }
220
brw_set_default_acc_write_control(struct brw_codegen * p,unsigned value)221 void brw_set_default_acc_write_control(struct brw_codegen *p, unsigned value)
222 {
223 p->current->acc_wr_control = value;
224 }
225
brw_set_default_swsb(struct brw_codegen * p,struct tgl_swsb value)226 void brw_set_default_swsb(struct brw_codegen *p, struct tgl_swsb value)
227 {
228 p->current->swsb = value;
229 }
230
brw_push_insn_state(struct brw_codegen * p)231 void brw_push_insn_state( struct brw_codegen *p )
232 {
233 assert(p->current != &p->stack[BRW_EU_MAX_INSN_STACK-1]);
234 *(p->current + 1) = *p->current;
235 p->current++;
236 }
237
brw_pop_insn_state(struct brw_codegen * p)238 void brw_pop_insn_state( struct brw_codegen *p )
239 {
240 assert(p->current != p->stack);
241 p->current--;
242 }
243
244
245 /***********************************************************************
246 */
247 void
brw_init_codegen(const struct brw_isa_info * isa,struct brw_codegen * p,void * mem_ctx)248 brw_init_codegen(const struct brw_isa_info *isa,
249 struct brw_codegen *p, void *mem_ctx)
250 {
251 memset(p, 0, sizeof(*p));
252
253 p->isa = isa;
254 p->devinfo = isa->devinfo;
255 /*
256 * Set the initial instruction store array size to 1024, if found that
257 * isn't enough, then it will double the store size at brw_next_insn()
258 * until out of memory.
259 */
260 p->store_size = 1024;
261 p->store = rzalloc_array(mem_ctx, brw_inst, p->store_size);
262 p->nr_insn = 0;
263 p->current = p->stack;
264 memset(p->current, 0, sizeof(p->current[0]));
265
266 p->mem_ctx = mem_ctx;
267
268 /* Some defaults?
269 */
270 brw_set_default_exec_size(p, BRW_EXECUTE_8);
271 brw_set_default_mask_control(p, BRW_MASK_ENABLE); /* what does this do? */
272 brw_set_default_saturate(p, 0);
273
274 /* Set up control flow stack */
275 p->if_stack_depth = 0;
276 p->if_stack_array_size = 16;
277 p->if_stack = rzalloc_array(mem_ctx, int, p->if_stack_array_size);
278
279 p->loop_stack_depth = 0;
280 p->loop_stack_array_size = 16;
281 p->loop_stack = rzalloc_array(mem_ctx, int, p->loop_stack_array_size);
282 }
283
284
brw_get_program(struct brw_codegen * p,unsigned * sz)285 const unsigned *brw_get_program( struct brw_codegen *p,
286 unsigned *sz )
287 {
288 *sz = p->next_insn_offset;
289 return (const unsigned *)p->store;
290 }
291
292 const struct brw_shader_reloc *
brw_get_shader_relocs(struct brw_codegen * p,unsigned * num_relocs)293 brw_get_shader_relocs(struct brw_codegen *p, unsigned *num_relocs)
294 {
295 *num_relocs = p->num_relocs;
296 return p->relocs;
297 }
298
299 DEBUG_GET_ONCE_OPTION(shader_bin_dump_path, "INTEL_SHADER_BIN_DUMP_PATH", NULL);
300
brw_should_dump_shader_bin(void)301 bool brw_should_dump_shader_bin(void)
302 {
303 return debug_get_option_shader_bin_dump_path() != NULL;
304 }
305
brw_dump_shader_bin(void * assembly,int start_offset,int end_offset,const char * identifier)306 void brw_dump_shader_bin(void *assembly, int start_offset, int end_offset,
307 const char *identifier)
308 {
309 char *name = ralloc_asprintf(NULL, "%s/%s.bin",
310 debug_get_option_shader_bin_dump_path(),
311 identifier);
312
313 int fd = open(name, O_CREAT | O_WRONLY | O_TRUNC, 0644);
314 ralloc_free(name);
315
316 if (fd < 0)
317 return;
318
319 struct stat sb;
320 if (fstat(fd, &sb) != 0 || (!S_ISREG(sb.st_mode))) {
321 close(fd);
322 return;
323 }
324
325 size_t to_write = end_offset - start_offset;
326 void *write_ptr = assembly + start_offset;
327
328 while (to_write) {
329 ssize_t ret = write(fd, write_ptr, to_write);
330
331 if (ret <= 0) {
332 close(fd);
333 return;
334 }
335
336 to_write -= ret;
337 write_ptr += ret;
338 }
339
340 close(fd);
341 }
342
brw_try_override_assembly(struct brw_codegen * p,int start_offset,const char * identifier)343 bool brw_try_override_assembly(struct brw_codegen *p, int start_offset,
344 const char *identifier)
345 {
346 const char *read_path = getenv("INTEL_SHADER_ASM_READ_PATH");
347 if (!read_path) {
348 return false;
349 }
350
351 char *name = ralloc_asprintf(NULL, "%s/%s.bin", read_path, identifier);
352
353 int fd = open(name, O_RDONLY);
354 ralloc_free(name);
355
356 if (fd == -1) {
357 return false;
358 }
359
360 struct stat sb;
361 if (fstat(fd, &sb) != 0 || (!S_ISREG(sb.st_mode))) {
362 close(fd);
363 return false;
364 }
365
366 p->nr_insn -= (p->next_insn_offset - start_offset) / sizeof(brw_inst);
367 p->nr_insn += sb.st_size / sizeof(brw_inst);
368
369 p->next_insn_offset = start_offset + sb.st_size;
370 p->store_size = (start_offset + sb.st_size) / sizeof(brw_inst);
371 p->store = (brw_inst *)reralloc_size(p->mem_ctx, p->store, p->next_insn_offset);
372 assert(p->store);
373
374 ssize_t ret = read(fd, (char *)p->store + start_offset, sb.st_size);
375 close(fd);
376 if (ret != sb.st_size) {
377 return false;
378 }
379
380 ASSERTED bool valid =
381 brw_validate_instructions(p->isa, p->store,
382 start_offset, p->next_insn_offset,
383 NULL);
384 assert(valid);
385
386 return true;
387 }
388
389 const struct brw_label *
brw_find_label(const struct brw_label * root,int offset)390 brw_find_label(const struct brw_label *root, int offset)
391 {
392 const struct brw_label *curr = root;
393
394 if (curr != NULL)
395 {
396 do {
397 if (curr->offset == offset)
398 return curr;
399
400 curr = curr->next;
401 } while (curr != NULL);
402 }
403
404 return curr;
405 }
406
407 void
brw_create_label(struct brw_label ** labels,int offset,void * mem_ctx)408 brw_create_label(struct brw_label **labels, int offset, void *mem_ctx)
409 {
410 if (*labels != NULL) {
411 struct brw_label *curr = *labels;
412 struct brw_label *prev;
413
414 do {
415 prev = curr;
416
417 if (curr->offset == offset)
418 return;
419
420 curr = curr->next;
421 } while (curr != NULL);
422
423 curr = ralloc(mem_ctx, struct brw_label);
424 curr->offset = offset;
425 curr->number = prev->number + 1;
426 curr->next = NULL;
427 prev->next = curr;
428 } else {
429 struct brw_label *root = ralloc(mem_ctx, struct brw_label);
430 root->number = 0;
431 root->offset = offset;
432 root->next = NULL;
433 *labels = root;
434 }
435 }
436
437 const struct brw_label *
brw_label_assembly(const struct brw_isa_info * isa,const void * assembly,int start,int end,void * mem_ctx)438 brw_label_assembly(const struct brw_isa_info *isa,
439 const void *assembly, int start, int end, void *mem_ctx)
440 {
441 const struct intel_device_info *const devinfo = isa->devinfo;
442
443 struct brw_label *root_label = NULL;
444
445 int to_bytes_scale = sizeof(brw_inst) / brw_jump_scale(devinfo);
446
447 for (int offset = start; offset < end;) {
448 const brw_inst *inst = (const brw_inst *) ((const char *) assembly + offset);
449 brw_inst uncompacted;
450
451 bool is_compact = brw_inst_cmpt_control(devinfo, inst);
452
453 if (is_compact) {
454 brw_compact_inst *compacted = (brw_compact_inst *)inst;
455 brw_uncompact_instruction(isa, &uncompacted, compacted);
456 inst = &uncompacted;
457 }
458
459 if (brw_has_uip(devinfo, brw_inst_opcode(isa, inst))) {
460 /* Instructions that have UIP also have JIP. */
461 brw_create_label(&root_label,
462 offset + brw_inst_uip(devinfo, inst) * to_bytes_scale, mem_ctx);
463 brw_create_label(&root_label,
464 offset + brw_inst_jip(devinfo, inst) * to_bytes_scale, mem_ctx);
465 } else if (brw_has_jip(devinfo, brw_inst_opcode(isa, inst))) {
466 int jip = brw_inst_jip(devinfo, inst);
467
468 brw_create_label(&root_label, offset + jip * to_bytes_scale, mem_ctx);
469 }
470
471 if (is_compact) {
472 offset += sizeof(brw_compact_inst);
473 } else {
474 offset += sizeof(brw_inst);
475 }
476 }
477
478 return root_label;
479 }
480
481 void
brw_disassemble_with_labels(const struct brw_isa_info * isa,const void * assembly,int start,int end,FILE * out)482 brw_disassemble_with_labels(const struct brw_isa_info *isa,
483 const void *assembly, int start, int end, FILE *out)
484 {
485 void *mem_ctx = ralloc_context(NULL);
486 const struct brw_label *root_label =
487 brw_label_assembly(isa, assembly, start, end, mem_ctx);
488
489 brw_disassemble(isa, assembly, start, end, root_label, out);
490
491 ralloc_free(mem_ctx);
492 }
493
494 void
brw_disassemble(const struct brw_isa_info * isa,const void * assembly,int start,int end,const struct brw_label * root_label,FILE * out)495 brw_disassemble(const struct brw_isa_info *isa,
496 const void *assembly, int start, int end,
497 const struct brw_label *root_label, FILE *out)
498 {
499 const struct intel_device_info *devinfo = isa->devinfo;
500
501 bool dump_hex = INTEL_DEBUG(DEBUG_HEX);
502
503 for (int offset = start; offset < end;) {
504 const brw_inst *insn = (const brw_inst *)((char *)assembly + offset);
505 brw_inst uncompacted;
506
507 if (root_label != NULL) {
508 const struct brw_label *label = brw_find_label(root_label, offset);
509 if (label != NULL) {
510 fprintf(out, "\nLABEL%d:\n", label->number);
511 }
512 }
513
514 bool compacted = brw_inst_cmpt_control(devinfo, insn);
515 if (0)
516 fprintf(out, "0x%08x: ", offset);
517
518 if (compacted) {
519 brw_compact_inst *compacted = (brw_compact_inst *)insn;
520 if (dump_hex) {
521 unsigned char * insn_ptr = ((unsigned char *)&insn[0]);
522 const unsigned int blank_spaces = 24;
523 for (int i = 0 ; i < 8; i = i + 4) {
524 fprintf(out, "%02x %02x %02x %02x ",
525 insn_ptr[i],
526 insn_ptr[i + 1],
527 insn_ptr[i + 2],
528 insn_ptr[i + 3]);
529 }
530 /* Make compacted instructions hex value output vertically aligned
531 * with uncompacted instructions hex value
532 */
533 fprintf(out, "%*c", blank_spaces, ' ');
534 }
535
536 brw_uncompact_instruction(isa, &uncompacted, compacted);
537 insn = &uncompacted;
538 } else {
539 if (dump_hex) {
540 unsigned char * insn_ptr = ((unsigned char *)&insn[0]);
541 for (int i = 0 ; i < 16; i = i + 4) {
542 fprintf(out, "%02x %02x %02x %02x ",
543 insn_ptr[i],
544 insn_ptr[i + 1],
545 insn_ptr[i + 2],
546 insn_ptr[i + 3]);
547 }
548 }
549 }
550
551 brw_disassemble_inst(out, isa, insn, compacted, offset, root_label);
552
553 if (compacted) {
554 offset += sizeof(brw_compact_inst);
555 } else {
556 offset += sizeof(brw_inst);
557 }
558 }
559 }
560
561 static const struct opcode_desc opcode_descs[] = {
562 /* IR, HW, name, nsrc, ndst, gfx_vers assuming Gfx9+ */
563 { BRW_OPCODE_ILLEGAL, 0, "illegal", 0, 0, GFX_ALL },
564 { BRW_OPCODE_SYNC, 1, "sync", 1, 0, GFX_GE(GFX12) },
565 { BRW_OPCODE_MOV, 1, "mov", 1, 1, GFX_LT(GFX12) },
566 { BRW_OPCODE_MOV, 97, "mov", 1, 1, GFX_GE(GFX12) },
567 { BRW_OPCODE_SEL, 2, "sel", 2, 1, GFX_LT(GFX12) },
568 { BRW_OPCODE_SEL, 98, "sel", 2, 1, GFX_GE(GFX12) },
569 { BRW_OPCODE_MOVI, 3, "movi", 2, 1, GFX_LT(GFX12) },
570 { BRW_OPCODE_MOVI, 99, "movi", 2, 1, GFX_GE(GFX12) },
571 { BRW_OPCODE_NOT, 4, "not", 1, 1, GFX_LT(GFX12) },
572 { BRW_OPCODE_NOT, 100, "not", 1, 1, GFX_GE(GFX12) },
573 { BRW_OPCODE_AND, 5, "and", 2, 1, GFX_LT(GFX12) },
574 { BRW_OPCODE_AND, 101, "and", 2, 1, GFX_GE(GFX12) },
575 { BRW_OPCODE_OR, 6, "or", 2, 1, GFX_LT(GFX12) },
576 { BRW_OPCODE_OR, 102, "or", 2, 1, GFX_GE(GFX12) },
577 { BRW_OPCODE_XOR, 7, "xor", 2, 1, GFX_LT(GFX12) },
578 { BRW_OPCODE_XOR, 103, "xor", 2, 1, GFX_GE(GFX12) },
579 { BRW_OPCODE_SHR, 8, "shr", 2, 1, GFX_LT(GFX12) },
580 { BRW_OPCODE_SHR, 104, "shr", 2, 1, GFX_GE(GFX12) },
581 { BRW_OPCODE_SHL, 9, "shl", 2, 1, GFX_LT(GFX12) },
582 { BRW_OPCODE_SHL, 105, "shl", 2, 1, GFX_GE(GFX12) },
583 { BRW_OPCODE_SMOV, 10, "smov", 0, 0, GFX_LT(GFX12) },
584 { BRW_OPCODE_SMOV, 106, "smov", 0, 0, GFX_GE(GFX12) },
585 { BRW_OPCODE_ASR, 12, "asr", 2, 1, GFX_LT(GFX12) },
586 { BRW_OPCODE_ASR, 108, "asr", 2, 1, GFX_GE(GFX12) },
587 { BRW_OPCODE_ROR, 14, "ror", 2, 1, GFX11 },
588 { BRW_OPCODE_ROR, 110, "ror", 2, 1, GFX_GE(GFX12) },
589 { BRW_OPCODE_ROL, 15, "rol", 2, 1, GFX11 },
590 { BRW_OPCODE_ROL, 111, "rol", 2, 1, GFX_GE(GFX12) },
591 { BRW_OPCODE_CMP, 16, "cmp", 2, 1, GFX_LT(GFX12) },
592 { BRW_OPCODE_CMP, 112, "cmp", 2, 1, GFX_GE(GFX12) },
593 { BRW_OPCODE_CMPN, 17, "cmpn", 2, 1, GFX_LT(GFX12) },
594 { BRW_OPCODE_CMPN, 113, "cmpn", 2, 1, GFX_GE(GFX12) },
595 { BRW_OPCODE_CSEL, 18, "csel", 3, 1, GFX_LT(GFX12) },
596 { BRW_OPCODE_CSEL, 114, "csel", 3, 1, GFX_GE(GFX12) },
597 { BRW_OPCODE_BFREV, 23, "bfrev", 1, 1, GFX_LT(GFX12) },
598 { BRW_OPCODE_BFREV, 119, "bfrev", 1, 1, GFX_GE(GFX12) },
599 { BRW_OPCODE_BFE, 24, "bfe", 3, 1, GFX_LT(GFX12) },
600 { BRW_OPCODE_BFE, 120, "bfe", 3, 1, GFX_GE(GFX12) },
601 { BRW_OPCODE_BFI1, 25, "bfi1", 2, 1, GFX_LT(GFX12) },
602 { BRW_OPCODE_BFI1, 121, "bfi1", 2, 1, GFX_GE(GFX12) },
603 { BRW_OPCODE_BFI2, 26, "bfi2", 3, 1, GFX_LT(GFX12) },
604 { BRW_OPCODE_BFI2, 122, "bfi2", 3, 1, GFX_GE(GFX12) },
605 { BRW_OPCODE_JMPI, 32, "jmpi", 0, 0, GFX_ALL },
606 { BRW_OPCODE_BRD, 33, "brd", 0, 0, GFX_ALL },
607 { BRW_OPCODE_IF, 34, "if", 0, 0, GFX_ALL },
608 { BRW_OPCODE_BRC, 35, "brc", 0, 0, GFX_ALL },
609 { BRW_OPCODE_ELSE, 36, "else", 0, 0, GFX_ALL },
610 { BRW_OPCODE_ENDIF, 37, "endif", 0, 0, GFX_ALL },
611 { BRW_OPCODE_DO, 38, "do", 0, 0, 0 }, /* Pseudo opcode. */
612 { BRW_OPCODE_WHILE, 39, "while", 0, 0, GFX_ALL },
613 { BRW_OPCODE_BREAK, 40, "break", 0, 0, GFX_ALL },
614 { BRW_OPCODE_CONTINUE, 41, "cont", 0, 0, GFX_ALL },
615 { BRW_OPCODE_HALT, 42, "halt", 0, 0, GFX_ALL },
616 { BRW_OPCODE_CALLA, 43, "calla", 0, 0, GFX_ALL },
617 { BRW_OPCODE_CALL, 44, "call", 0, 0, GFX_ALL },
618 { BRW_OPCODE_RET, 45, "ret", 0, 0, GFX_ALL },
619 { BRW_OPCODE_GOTO, 46, "goto", 0, 0, GFX_ALL },
620 { BRW_OPCODE_WAIT, 48, "wait", 0, 1, GFX_LT(GFX12) },
621 { BRW_OPCODE_SEND, 49, "send", 1, 1, GFX_LT(GFX12) },
622 { BRW_OPCODE_SENDC, 50, "sendc", 1, 1, GFX_LT(GFX12) },
623 { BRW_OPCODE_SEND, 49, "send", 2, 1, GFX_GE(GFX12) },
624 { BRW_OPCODE_SENDC, 50, "sendc", 2, 1, GFX_GE(GFX12) },
625 { BRW_OPCODE_SENDS, 51, "sends", 2, 1, GFX_LT(GFX12) },
626 { BRW_OPCODE_SENDSC, 52, "sendsc", 2, 1, GFX_LT(GFX12) },
627 { BRW_OPCODE_MATH, 56, "math", 2, 1, GFX_ALL },
628 { BRW_OPCODE_ADD, 64, "add", 2, 1, GFX_ALL },
629 { BRW_OPCODE_MUL, 65, "mul", 2, 1, GFX_ALL },
630 { BRW_OPCODE_AVG, 66, "avg", 2, 1, GFX_ALL },
631 { BRW_OPCODE_FRC, 67, "frc", 1, 1, GFX_ALL },
632 { BRW_OPCODE_RNDU, 68, "rndu", 1, 1, GFX_ALL },
633 { BRW_OPCODE_RNDD, 69, "rndd", 1, 1, GFX_ALL },
634 { BRW_OPCODE_RNDE, 70, "rnde", 1, 1, GFX_ALL },
635 { BRW_OPCODE_RNDZ, 71, "rndz", 1, 1, GFX_ALL },
636 { BRW_OPCODE_MAC, 72, "mac", 2, 1, GFX_ALL },
637 { BRW_OPCODE_MACH, 73, "mach", 2, 1, GFX_ALL },
638 { BRW_OPCODE_LZD, 74, "lzd", 1, 1, GFX_ALL },
639 { BRW_OPCODE_FBH, 75, "fbh", 1, 1, GFX_ALL },
640 { BRW_OPCODE_FBL, 76, "fbl", 1, 1, GFX_ALL },
641 { BRW_OPCODE_CBIT, 77, "cbit", 1, 1, GFX_ALL },
642 { BRW_OPCODE_ADDC, 78, "addc", 2, 1, GFX_ALL },
643 { BRW_OPCODE_SUBB, 79, "subb", 2, 1, GFX_ALL },
644 { BRW_OPCODE_ADD3, 82, "add3", 3, 1, GFX_GE(GFX125) },
645 { BRW_OPCODE_DP4, 84, "dp4", 2, 1, GFX_LT(GFX11) },
646 { BRW_OPCODE_DPH, 85, "dph", 2, 1, GFX_LT(GFX11) },
647 { BRW_OPCODE_DP3, 86, "dp3", 2, 1, GFX_LT(GFX11) },
648 { BRW_OPCODE_DP2, 87, "dp2", 2, 1, GFX_LT(GFX11) },
649 { BRW_OPCODE_DP4A, 88, "dp4a", 3, 1, GFX_GE(GFX12) },
650 { BRW_OPCODE_LINE, 89, "line", 2, 1, GFX9 },
651 { BRW_OPCODE_DPAS, 89, "dpas", 3, 1, GFX_GE(GFX125) },
652 { BRW_OPCODE_PLN, 90, "pln", 2, 1, GFX9 },
653 { BRW_OPCODE_MAD, 91, "mad", 3, 1, GFX_ALL },
654 { BRW_OPCODE_LRP, 92, "lrp", 3, 1, GFX9 },
655 { BRW_OPCODE_MADM, 93, "madm", 3, 1, GFX_ALL },
656 { BRW_OPCODE_NOP, 126, "nop", 0, 0, GFX_LT(GFX12) },
657 { BRW_OPCODE_NOP, 96, "nop", 0, 0, GFX_GE(GFX12) }
658 };
659
660 void
brw_init_isa_info(struct brw_isa_info * isa,const struct intel_device_info * devinfo)661 brw_init_isa_info(struct brw_isa_info *isa,
662 const struct intel_device_info *devinfo)
663 {
664 assert(devinfo->ver >= 9);
665
666 isa->devinfo = devinfo;
667
668 enum gfx_ver ver = gfx_ver_from_devinfo(devinfo);
669
670 memset(isa->ir_to_descs, 0, sizeof(isa->ir_to_descs));
671 memset(isa->hw_to_descs, 0, sizeof(isa->hw_to_descs));
672
673 for (unsigned i = 0; i < ARRAY_SIZE(opcode_descs); i++) {
674 if (opcode_descs[i].gfx_vers & ver) {
675 const unsigned e = opcode_descs[i].ir;
676 const unsigned h = opcode_descs[i].hw;
677 assert(e < ARRAY_SIZE(isa->ir_to_descs) && !isa->ir_to_descs[e]);
678 assert(h < ARRAY_SIZE(isa->hw_to_descs) && !isa->hw_to_descs[h]);
679 isa->ir_to_descs[e] = &opcode_descs[i];
680 isa->hw_to_descs[h] = &opcode_descs[i];
681 }
682 }
683 }
684
685 /**
686 * Return the matching opcode_desc for the specified IR opcode and hardware
687 * generation, or NULL if the opcode is not supported by the device.
688 */
689 const struct opcode_desc *
brw_opcode_desc(const struct brw_isa_info * isa,enum opcode op)690 brw_opcode_desc(const struct brw_isa_info *isa, enum opcode op)
691 {
692 return op < ARRAY_SIZE(isa->ir_to_descs) ? isa->ir_to_descs[op] : NULL;
693 }
694
695 /**
696 * Return the matching opcode_desc for the specified HW opcode and hardware
697 * generation, or NULL if the opcode is not supported by the device.
698 */
699 const struct opcode_desc *
brw_opcode_desc_from_hw(const struct brw_isa_info * isa,unsigned hw)700 brw_opcode_desc_from_hw(const struct brw_isa_info *isa, unsigned hw)
701 {
702 return hw < ARRAY_SIZE(isa->hw_to_descs) ? isa->hw_to_descs[hw] : NULL;
703 }
704
705 unsigned
brw_num_sources_from_inst(const struct brw_isa_info * isa,const brw_inst * inst)706 brw_num_sources_from_inst(const struct brw_isa_info *isa,
707 const brw_inst *inst)
708 {
709 const struct intel_device_info *devinfo = isa->devinfo;
710 const struct opcode_desc *desc =
711 brw_opcode_desc(isa, brw_inst_opcode(isa, inst));
712 unsigned math_function;
713
714 if (brw_inst_opcode(isa, inst) == BRW_OPCODE_MATH) {
715 math_function = brw_inst_math_function(devinfo, inst);
716 } else {
717 assert(desc->nsrc < 4);
718 return desc->nsrc;
719 }
720
721 switch (math_function) {
722 case BRW_MATH_FUNCTION_INV:
723 case BRW_MATH_FUNCTION_LOG:
724 case BRW_MATH_FUNCTION_EXP:
725 case BRW_MATH_FUNCTION_SQRT:
726 case BRW_MATH_FUNCTION_RSQ:
727 case BRW_MATH_FUNCTION_SIN:
728 case BRW_MATH_FUNCTION_COS:
729 case GFX8_MATH_FUNCTION_INVM:
730 case GFX8_MATH_FUNCTION_RSQRTM:
731 return 1;
732 case BRW_MATH_FUNCTION_FDIV:
733 case BRW_MATH_FUNCTION_POW:
734 case BRW_MATH_FUNCTION_INT_DIV_QUOTIENT_AND_REMAINDER:
735 case BRW_MATH_FUNCTION_INT_DIV_QUOTIENT:
736 case BRW_MATH_FUNCTION_INT_DIV_REMAINDER:
737 return 2;
738 default:
739 unreachable("not reached");
740 }
741 }
742