xref: /aosp_15_r20/external/mesa3d/src/intel/compiler/brw_shader.cpp (revision 6104692788411f58d303aa86923a9ff6ecaded22)
1 /*
2  * Copyright © 2010 Intel Corporation
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21  * IN THE SOFTWARE.
22  */
23 
24 #include "brw_cfg.h"
25 #include "brw_fs.h"
26 #include "util/macros.h"
27 
28 bool
brw_reg_saturate_immediate(brw_reg * reg)29 brw_reg_saturate_immediate(brw_reg *reg)
30 {
31    union {
32       unsigned ud;
33       int d;
34       float f;
35       double df;
36    } imm, sat_imm = { 0 };
37 
38    const unsigned size = brw_type_size_bytes(reg->type);
39 
40    /* We want to either do a 32-bit or 64-bit data copy, the type is otherwise
41     * irrelevant, so just check the size of the type and copy from/to an
42     * appropriately sized field.
43     */
44    if (size < 8)
45       imm.ud = reg->ud;
46    else
47       imm.df = reg->df;
48 
49    switch (reg->type) {
50    case BRW_TYPE_UD:
51    case BRW_TYPE_D:
52    case BRW_TYPE_UW:
53    case BRW_TYPE_W:
54    case BRW_TYPE_UQ:
55    case BRW_TYPE_Q:
56       /* Nothing to do. */
57       return false;
58    case BRW_TYPE_F:
59       sat_imm.f = SATURATE(imm.f);
60       break;
61    case BRW_TYPE_DF:
62       sat_imm.df = SATURATE(imm.df);
63       break;
64    case BRW_TYPE_UB:
65    case BRW_TYPE_B:
66       unreachable("no UB/B immediates");
67    case BRW_TYPE_V:
68    case BRW_TYPE_UV:
69    case BRW_TYPE_VF:
70       unreachable("unimplemented: saturate vector immediate");
71    case BRW_TYPE_HF:
72       unreachable("unimplemented: saturate HF immediate");
73    default:
74       unreachable("invalid type");
75    }
76 
77    if (size < 8) {
78       if (imm.ud != sat_imm.ud) {
79          reg->ud = sat_imm.ud;
80          return true;
81       }
82    } else {
83       if (imm.df != sat_imm.df) {
84          reg->df = sat_imm.df;
85          return true;
86       }
87    }
88    return false;
89 }
90 
91 bool
brw_reg_negate_immediate(brw_reg * reg)92 brw_reg_negate_immediate(brw_reg *reg)
93 {
94    switch (reg->type) {
95    case BRW_TYPE_D:
96    case BRW_TYPE_UD:
97       reg->d = -reg->d;
98       return true;
99    case BRW_TYPE_W:
100    case BRW_TYPE_UW: {
101       uint16_t value = -(int16_t)reg->ud;
102       reg->ud = value | (uint32_t)value << 16;
103       return true;
104    }
105    case BRW_TYPE_F:
106       reg->f = -reg->f;
107       return true;
108    case BRW_TYPE_VF:
109       reg->ud ^= 0x80808080;
110       return true;
111    case BRW_TYPE_DF:
112       reg->df = -reg->df;
113       return true;
114    case BRW_TYPE_UQ:
115    case BRW_TYPE_Q:
116       reg->d64 = -reg->d64;
117       return true;
118    case BRW_TYPE_UB:
119    case BRW_TYPE_B:
120       unreachable("no UB/B immediates");
121    case BRW_TYPE_UV:
122    case BRW_TYPE_V:
123       assert(!"unimplemented: negate UV/V immediate");
124    case BRW_TYPE_HF:
125       reg->ud ^= 0x80008000;
126       return true;
127    default:
128       unreachable("invalid type");
129    }
130 
131    return false;
132 }
133 
134 bool
brw_reg_abs_immediate(brw_reg * reg)135 brw_reg_abs_immediate(brw_reg *reg)
136 {
137    switch (reg->type) {
138    case BRW_TYPE_D:
139       reg->d = abs(reg->d);
140       return true;
141    case BRW_TYPE_W: {
142       uint16_t value = abs((int16_t)reg->ud);
143       reg->ud = value | (uint32_t)value << 16;
144       return true;
145    }
146    case BRW_TYPE_F:
147       reg->f = fabsf(reg->f);
148       return true;
149    case BRW_TYPE_DF:
150       reg->df = fabs(reg->df);
151       return true;
152    case BRW_TYPE_VF:
153       reg->ud &= ~0x80808080;
154       return true;
155    case BRW_TYPE_Q:
156       reg->d64 = imaxabs(reg->d64);
157       return true;
158    case BRW_TYPE_UB:
159    case BRW_TYPE_B:
160       unreachable("no UB/B immediates");
161    case BRW_TYPE_UQ:
162    case BRW_TYPE_UD:
163    case BRW_TYPE_UW:
164    case BRW_TYPE_UV:
165       /* Presumably the absolute value modifier on an unsigned source is a
166        * nop, but it would be nice to confirm.
167        */
168       assert(!"unimplemented: abs unsigned immediate");
169    case BRW_TYPE_V:
170       assert(!"unimplemented: abs V immediate");
171    case BRW_TYPE_HF:
172       reg->ud &= ~0x80008000;
173       return true;
174    default:
175       unreachable("invalid type");
176    }
177 
178    return false;
179 }
180 
181 bool
is_zero() const182 brw_reg::is_zero() const
183 {
184    if (file != IMM)
185       return false;
186 
187    assert(brw_type_size_bytes(type) > 1);
188 
189    switch (type) {
190    case BRW_TYPE_HF:
191       assert((d & 0xffff) == ((d >> 16) & 0xffff));
192       return (d & 0xffff) == 0 || (d & 0xffff) == 0x8000;
193    case BRW_TYPE_F:
194       return f == 0;
195    case BRW_TYPE_DF:
196       return df == 0;
197    case BRW_TYPE_W:
198    case BRW_TYPE_UW:
199       assert((d & 0xffff) == ((d >> 16) & 0xffff));
200       return (d & 0xffff) == 0;
201    case BRW_TYPE_D:
202    case BRW_TYPE_UD:
203       return d == 0;
204    case BRW_TYPE_UQ:
205    case BRW_TYPE_Q:
206       return u64 == 0;
207    default:
208       return false;
209    }
210 }
211 
212 bool
is_one() const213 brw_reg::is_one() const
214 {
215    if (file != IMM)
216       return false;
217 
218    assert(brw_type_size_bytes(type) > 1);
219 
220    switch (type) {
221    case BRW_TYPE_HF:
222       assert((d & 0xffff) == ((d >> 16) & 0xffff));
223       return (d & 0xffff) == 0x3c00;
224    case BRW_TYPE_F:
225       return f == 1.0f;
226    case BRW_TYPE_DF:
227       return df == 1.0;
228    case BRW_TYPE_W:
229    case BRW_TYPE_UW:
230       assert((d & 0xffff) == ((d >> 16) & 0xffff));
231       return (d & 0xffff) == 1;
232    case BRW_TYPE_D:
233    case BRW_TYPE_UD:
234       return d == 1;
235    case BRW_TYPE_UQ:
236    case BRW_TYPE_Q:
237       return u64 == 1;
238    default:
239       return false;
240    }
241 }
242 
243 bool
is_negative_one() const244 brw_reg::is_negative_one() const
245 {
246    if (file != IMM)
247       return false;
248 
249    assert(brw_type_size_bytes(type) > 1);
250 
251    switch (type) {
252    case BRW_TYPE_HF:
253       assert((d & 0xffff) == ((d >> 16) & 0xffff));
254       return (d & 0xffff) == 0xbc00;
255    case BRW_TYPE_F:
256       return f == -1.0;
257    case BRW_TYPE_DF:
258       return df == -1.0;
259    case BRW_TYPE_W:
260       assert((d & 0xffff) == ((d >> 16) & 0xffff));
261       return (d & 0xffff) == 0xffff;
262    case BRW_TYPE_D:
263       return d == -1;
264    case BRW_TYPE_Q:
265       return d64 == -1;
266    default:
267       return false;
268    }
269 }
270 
271 bool
is_null() const272 brw_reg::is_null() const
273 {
274    return file == ARF && nr == BRW_ARF_NULL;
275 }
276 
277 
278 bool
is_accumulator() const279 brw_reg::is_accumulator() const
280 {
281    return file == ARF && (nr & 0xF0) == BRW_ARF_ACCUMULATOR;
282 }
283 
284 bool
is_commutative() const285 fs_inst::is_commutative() const
286 {
287    switch (opcode) {
288    case BRW_OPCODE_AND:
289    case BRW_OPCODE_OR:
290    case BRW_OPCODE_XOR:
291    case BRW_OPCODE_ADD:
292    case BRW_OPCODE_ADD3:
293    case SHADER_OPCODE_MULH:
294       return true;
295 
296    case BRW_OPCODE_MUL:
297       /* Integer multiplication of dword and word sources is not actually
298        * commutative. The DW source must be first.
299        */
300       return !brw_type_is_int(src[0].type) ||
301              brw_type_size_bits(src[0].type) == brw_type_size_bits(src[1].type);
302 
303    case BRW_OPCODE_SEL:
304       /* MIN and MAX are commutative. */
305       if (conditional_mod == BRW_CONDITIONAL_GE ||
306           conditional_mod == BRW_CONDITIONAL_L) {
307          return true;
308       }
309       FALLTHROUGH;
310    default:
311       return false;
312    }
313 }
314 
315 bool
is_3src(const struct brw_compiler * compiler) const316 fs_inst::is_3src(const struct brw_compiler *compiler) const
317 {
318    return ::is_3src(&compiler->isa, opcode);
319 }
320 
321 bool
is_math() const322 fs_inst::is_math() const
323 {
324    return (opcode == SHADER_OPCODE_RCP ||
325            opcode == SHADER_OPCODE_RSQ ||
326            opcode == SHADER_OPCODE_SQRT ||
327            opcode == SHADER_OPCODE_EXP2 ||
328            opcode == SHADER_OPCODE_LOG2 ||
329            opcode == SHADER_OPCODE_SIN ||
330            opcode == SHADER_OPCODE_COS ||
331            opcode == SHADER_OPCODE_INT_QUOTIENT ||
332            opcode == SHADER_OPCODE_INT_REMAINDER ||
333            opcode == SHADER_OPCODE_POW);
334 }
335 
336 bool
is_control_flow_begin() const337 fs_inst::is_control_flow_begin() const
338 {
339    switch (opcode) {
340    case BRW_OPCODE_DO:
341    case BRW_OPCODE_IF:
342    case BRW_OPCODE_ELSE:
343       return true;
344    default:
345       return false;
346    }
347 }
348 
349 bool
is_control_flow_end() const350 fs_inst::is_control_flow_end() const
351 {
352    switch (opcode) {
353    case BRW_OPCODE_ELSE:
354    case BRW_OPCODE_WHILE:
355    case BRW_OPCODE_ENDIF:
356       return true;
357    default:
358       return false;
359    }
360 }
361 
362 bool
is_control_flow() const363 fs_inst::is_control_flow() const
364 {
365    switch (opcode) {
366    case BRW_OPCODE_DO:
367    case BRW_OPCODE_WHILE:
368    case BRW_OPCODE_IF:
369    case BRW_OPCODE_ELSE:
370    case BRW_OPCODE_ENDIF:
371    case BRW_OPCODE_BREAK:
372    case BRW_OPCODE_CONTINUE:
373       return true;
374    default:
375       return false;
376    }
377 }
378 
379 bool
uses_indirect_addressing() const380 fs_inst::uses_indirect_addressing() const
381 {
382    switch (opcode) {
383    case SHADER_OPCODE_BROADCAST:
384    case SHADER_OPCODE_CLUSTER_BROADCAST:
385    case SHADER_OPCODE_MOV_INDIRECT:
386       return true;
387    default:
388       return false;
389    }
390 }
391 
392 bool
can_do_saturate() const393 fs_inst::can_do_saturate() const
394 {
395    switch (opcode) {
396    case BRW_OPCODE_ADD:
397    case BRW_OPCODE_ADD3:
398    case BRW_OPCODE_ASR:
399    case BRW_OPCODE_AVG:
400    case BRW_OPCODE_CSEL:
401    case BRW_OPCODE_DP2:
402    case BRW_OPCODE_DP3:
403    case BRW_OPCODE_DP4:
404    case BRW_OPCODE_DPH:
405    case BRW_OPCODE_DP4A:
406    case BRW_OPCODE_LINE:
407    case BRW_OPCODE_LRP:
408    case BRW_OPCODE_MAC:
409    case BRW_OPCODE_MAD:
410    case BRW_OPCODE_MATH:
411    case BRW_OPCODE_MOV:
412    case BRW_OPCODE_MUL:
413    case SHADER_OPCODE_MULH:
414    case BRW_OPCODE_PLN:
415    case BRW_OPCODE_RNDD:
416    case BRW_OPCODE_RNDE:
417    case BRW_OPCODE_RNDU:
418    case BRW_OPCODE_RNDZ:
419    case BRW_OPCODE_SEL:
420    case BRW_OPCODE_SHL:
421    case BRW_OPCODE_SHR:
422    case SHADER_OPCODE_COS:
423    case SHADER_OPCODE_EXP2:
424    case SHADER_OPCODE_LOG2:
425    case SHADER_OPCODE_POW:
426    case SHADER_OPCODE_RCP:
427    case SHADER_OPCODE_RSQ:
428    case SHADER_OPCODE_SIN:
429    case SHADER_OPCODE_SQRT:
430       return true;
431    default:
432       return false;
433    }
434 }
435 
436 bool
reads_accumulator_implicitly() const437 fs_inst::reads_accumulator_implicitly() const
438 {
439    switch (opcode) {
440    case BRW_OPCODE_MAC:
441    case BRW_OPCODE_MACH:
442       return true;
443    default:
444       return false;
445    }
446 }
447 
448 bool
writes_accumulator_implicitly(const struct intel_device_info * devinfo) const449 fs_inst::writes_accumulator_implicitly(const struct intel_device_info *devinfo) const
450 {
451    return writes_accumulator ||
452           (eot && intel_needs_workaround(devinfo, 14010017096));
453 }
454 
455 bool
has_side_effects() const456 fs_inst::has_side_effects() const
457 {
458    switch (opcode) {
459    case SHADER_OPCODE_SEND:
460       return send_has_side_effects;
461 
462    case BRW_OPCODE_SYNC:
463    case SHADER_OPCODE_MEMORY_STORE_LOGICAL:
464    case SHADER_OPCODE_MEMORY_ATOMIC_LOGICAL:
465    case SHADER_OPCODE_MEMORY_FENCE:
466    case SHADER_OPCODE_INTERLOCK:
467    case SHADER_OPCODE_URB_WRITE_LOGICAL:
468    case FS_OPCODE_FB_WRITE_LOGICAL:
469    case SHADER_OPCODE_BARRIER:
470    case SHADER_OPCODE_RND_MODE:
471    case SHADER_OPCODE_FLOAT_CONTROL_MODE:
472    case FS_OPCODE_SCHEDULING_FENCE:
473    case SHADER_OPCODE_BTD_SPAWN_LOGICAL:
474    case SHADER_OPCODE_BTD_RETIRE_LOGICAL:
475    case RT_OPCODE_TRACE_RAY_LOGICAL:
476       return true;
477    default:
478       return eot;
479    }
480 }
481 
482 bool
is_volatile() const483 fs_inst::is_volatile() const
484 {
485    return opcode == SHADER_OPCODE_MEMORY_LOAD_LOGICAL ||
486           (opcode == SHADER_OPCODE_SEND && send_is_volatile);
487 }
488 
489 #ifndef NDEBUG
490 static bool
inst_is_in_block(const bblock_t * block,const fs_inst * inst)491 inst_is_in_block(const bblock_t *block, const fs_inst *inst)
492 {
493    const exec_node *n = inst;
494 
495    /* Find the tail sentinel. If the tail sentinel is the sentinel from the
496     * list header in the bblock_t, then this instruction is in that basic
497     * block.
498     */
499    while (!n->is_tail_sentinel())
500       n = n->get_next();
501 
502    return n == &block->instructions.tail_sentinel;
503 }
504 #endif
505 
506 static void
adjust_later_block_ips(bblock_t * start_block,int ip_adjustment)507 adjust_later_block_ips(bblock_t *start_block, int ip_adjustment)
508 {
509    for (bblock_t *block_iter = start_block->next();
510         block_iter;
511         block_iter = block_iter->next()) {
512       block_iter->start_ip += ip_adjustment;
513       block_iter->end_ip += ip_adjustment;
514    }
515 }
516 
517 void
insert_after(bblock_t * block,fs_inst * inst)518 fs_inst::insert_after(bblock_t *block, fs_inst *inst)
519 {
520    assert(this != inst);
521    assert(block->end_ip_delta == 0);
522 
523    if (!this->is_head_sentinel())
524       assert(inst_is_in_block(block, this) || !"Instruction not in block");
525 
526    block->end_ip++;
527 
528    adjust_later_block_ips(block, 1);
529 
530    exec_node::insert_after(inst);
531 }
532 
533 void
insert_before(bblock_t * block,fs_inst * inst)534 fs_inst::insert_before(bblock_t *block, fs_inst *inst)
535 {
536    assert(this != inst);
537    assert(block->end_ip_delta == 0);
538 
539    if (!this->is_tail_sentinel())
540       assert(inst_is_in_block(block, this) || !"Instruction not in block");
541 
542    block->end_ip++;
543 
544    adjust_later_block_ips(block, 1);
545 
546    exec_node::insert_before(inst);
547 }
548 
549 void
remove(bblock_t * block,bool defer_later_block_ip_updates)550 fs_inst::remove(bblock_t *block, bool defer_later_block_ip_updates)
551 {
552    assert(inst_is_in_block(block, this) || !"Instruction not in block");
553 
554    if (exec_list_is_singular(&block->instructions)) {
555       this->opcode = BRW_OPCODE_NOP;
556       this->resize_sources(0);
557       this->dst = brw_reg();
558       this->size_written = 0;
559       return;
560    }
561 
562    if (defer_later_block_ip_updates) {
563       block->end_ip_delta--;
564    } else {
565       assert(block->end_ip_delta == 0);
566       adjust_later_block_ips(block, -1);
567    }
568 
569    if (block->start_ip == block->end_ip) {
570       if (block->end_ip_delta != 0) {
571          adjust_later_block_ips(block, block->end_ip_delta);
572          block->end_ip_delta = 0;
573       }
574 
575       block->cfg->remove_block(block);
576    } else {
577       block->end_ip--;
578    }
579 
580    exec_node::remove();
581 }
582 
583