xref: /aosp_15_r20/external/mesa3d/src/intel/compiler/brw_fs_opt_algebraic.cpp (revision 6104692788411f58d303aa86923a9ff6ecaded22)
1 /*
2  * Copyright © 2010 Intel Corporation
3  * SPDX-License-Identifier: MIT
4  */
5 
6 #include "brw_fs.h"
7 #include "brw_fs_builder.h"
8 
9 using namespace brw;
10 
11 static uint64_t
src_as_uint(const brw_reg & src)12 src_as_uint(const brw_reg &src)
13 {
14    assert(src.file == IMM);
15 
16    switch (src.type) {
17    case BRW_TYPE_W:
18       return (uint64_t)(int16_t)(src.ud & 0xffff);
19 
20    case BRW_TYPE_UW:
21       return (uint64_t)(uint16_t)(src.ud & 0xffff);
22 
23    case BRW_TYPE_D:
24       return (uint64_t)src.d;
25 
26    case BRW_TYPE_UD:
27       return (uint64_t)src.ud;
28 
29    case BRW_TYPE_Q:
30       return src.d64;
31 
32    case BRW_TYPE_UQ:
33       return src.u64;
34 
35    default:
36       unreachable("Invalid integer type.");
37    }
38 }
39 
40 static brw_reg
brw_imm_for_type(uint64_t value,enum brw_reg_type type)41 brw_imm_for_type(uint64_t value, enum brw_reg_type type)
42 {
43    switch (type) {
44    case BRW_TYPE_W:
45       return brw_imm_w(value);
46 
47    case BRW_TYPE_UW:
48       return brw_imm_uw(value);
49 
50    case BRW_TYPE_D:
51       return brw_imm_d(value);
52 
53    case BRW_TYPE_UD:
54       return brw_imm_ud(value);
55 
56    case BRW_TYPE_Q:
57       return brw_imm_d(value);
58 
59    case BRW_TYPE_UQ:
60       return brw_imm_uq(value);
61 
62    default:
63       unreachable("Invalid integer type.");
64    }
65 }
66 
67 bool
brw_fs_opt_algebraic(fs_visitor & s)68 brw_fs_opt_algebraic(fs_visitor &s)
69 {
70    const intel_device_info *devinfo = s.devinfo;
71    bool progress = false;
72 
73    foreach_block_and_inst_safe(block, fs_inst, inst, s.cfg) {
74       switch (inst->opcode) {
75       case BRW_OPCODE_MOV:
76          if ((inst->conditional_mod == BRW_CONDITIONAL_Z ||
77               inst->conditional_mod == BRW_CONDITIONAL_NZ) &&
78              inst->dst.is_null() &&
79              (inst->src[0].abs || inst->src[0].negate)) {
80             inst->src[0].abs = false;
81             inst->src[0].negate = false;
82             progress = true;
83             break;
84          }
85 
86          if (inst->src[0].file != IMM)
87             break;
88 
89          if (inst->saturate) {
90             /* Full mixed-type saturates don't happen.  However, we can end up
91              * with things like:
92              *
93              *    mov.sat(8) g21<1>DF       -1F
94              *
95              * Other mixed-size-but-same-base-type cases may also be possible.
96              */
97             if (inst->dst.type != inst->src[0].type &&
98                 inst->dst.type != BRW_TYPE_DF &&
99                 inst->src[0].type != BRW_TYPE_F)
100                assert(!"unimplemented: saturate mixed types");
101 
102             if (brw_reg_saturate_immediate(&inst->src[0])) {
103                inst->saturate = false;
104                progress = true;
105             }
106          }
107          break;
108 
109       case BRW_OPCODE_MUL:
110          if (inst->src[0].file != IMM && inst->src[1].file != IMM)
111             continue;
112 
113          if (brw_type_is_float(inst->src[1].type))
114             break;
115 
116          /* From the BDW PRM, Vol 2a, "mul - Multiply":
117           *
118           *    "When multiplying integer datatypes, if src0 is DW and src1
119           *    is W, irrespective of the destination datatype, the
120           *    accumulator maintains full 48-bit precision."
121           *    ...
122           *    "When multiplying integer data types, if one of the sources
123           *    is a DW, the resulting full precision data is stored in
124           *    the accumulator."
125           *
126           * There are also similar notes in earlier PRMs.
127           *
128           * The MOV instruction can copy the bits of the source, but it
129           * does not clear the higher bits of the accumulator. So, because
130           * we might use the full accumulator in the MUL/MACH macro, we
131           * shouldn't replace such MULs with MOVs.
132           */
133          if ((brw_type_size_bytes(inst->src[0].type) == 4 ||
134               brw_type_size_bytes(inst->src[1].type) == 4) &&
135              (inst->dst.is_accumulator() ||
136               inst->writes_accumulator_implicitly(devinfo)))
137             break;
138 
139          if (inst->src[0].is_zero() || inst->src[1].is_zero()) {
140             inst->opcode = BRW_OPCODE_MOV;
141             inst->src[0] = brw_imm_d(0);
142             inst->resize_sources(1);
143             progress = true;
144             break;
145          }
146 
147          /* a * 1.0 = a */
148          if (inst->src[1].is_one()) {
149             inst->opcode = BRW_OPCODE_MOV;
150             inst->resize_sources(1);
151             progress = true;
152             break;
153          }
154 
155          /* a * -1.0 = -a */
156          if (inst->src[0].is_negative_one()) {
157             inst->opcode = BRW_OPCODE_MOV;
158             inst->src[0] = inst->src[1];
159             inst->src[0].negate = !inst->src[0].negate;
160             inst->resize_sources(1);
161             progress = true;
162             break;
163          }
164 
165          if (inst->src[1].is_negative_one()) {
166             inst->opcode = BRW_OPCODE_MOV;
167             inst->src[0].negate = !inst->src[0].negate;
168             inst->resize_sources(1);
169             progress = true;
170             break;
171          }
172 
173          break;
174       case BRW_OPCODE_ADD:
175          if (inst->src[1].file != IMM)
176             continue;
177 
178          if (brw_type_is_int(inst->src[1].type) &&
179              inst->src[1].is_zero()) {
180             inst->opcode = BRW_OPCODE_MOV;
181             inst->resize_sources(1);
182             progress = true;
183             break;
184          }
185 
186          if (inst->src[0].file == IMM) {
187             assert(inst->src[0].type == BRW_TYPE_F);
188             inst->opcode = BRW_OPCODE_MOV;
189             inst->src[0].f += inst->src[1].f;
190             inst->resize_sources(1);
191             progress = true;
192             break;
193          }
194          break;
195 
196       case BRW_OPCODE_AND:
197          if (inst->src[0].file == IMM && inst->src[1].file == IMM) {
198             const uint64_t src0 = src_as_uint(inst->src[0]);
199             const uint64_t src1 = src_as_uint(inst->src[1]);
200 
201             inst->opcode = BRW_OPCODE_MOV;
202             inst->src[0] = brw_imm_for_type(src0 & src1, inst->dst.type);
203             inst->resize_sources(1);
204             progress = true;
205             break;
206          }
207 
208          break;
209 
210       case BRW_OPCODE_OR:
211          if (inst->src[0].file == IMM && inst->src[1].file == IMM) {
212             const uint64_t src0 = src_as_uint(inst->src[0]);
213             const uint64_t src1 = src_as_uint(inst->src[1]);
214 
215             inst->opcode = BRW_OPCODE_MOV;
216             inst->src[0] = brw_imm_for_type(src0 | src1, inst->dst.type);
217             inst->resize_sources(1);
218             progress = true;
219             break;
220          }
221 
222          if (inst->src[0].equals(inst->src[1]) ||
223              inst->src[1].is_zero()) {
224             /* On Gfx8+, the OR instruction can have a source modifier that
225              * performs logical not on the operand.  Cases of 'OR r0, ~r1, 0'
226              * or 'OR r0, ~r1, ~r1' should become a NOT instead of a MOV.
227              */
228             if (inst->src[0].negate) {
229                inst->opcode = BRW_OPCODE_NOT;
230                inst->src[0].negate = false;
231             } else {
232                inst->opcode = BRW_OPCODE_MOV;
233             }
234             inst->resize_sources(1);
235             progress = true;
236             break;
237          }
238          break;
239       case BRW_OPCODE_CMP:
240          if ((inst->conditional_mod == BRW_CONDITIONAL_Z ||
241               inst->conditional_mod == BRW_CONDITIONAL_NZ) &&
242              inst->src[1].is_zero() &&
243              (inst->src[0].abs || inst->src[0].negate)) {
244             inst->src[0].abs = false;
245             inst->src[0].negate = false;
246             progress = true;
247             break;
248          }
249          break;
250       case BRW_OPCODE_SEL:
251          if (inst->src[0].equals(inst->src[1])) {
252             inst->opcode = BRW_OPCODE_MOV;
253             inst->predicate = BRW_PREDICATE_NONE;
254             inst->predicate_inverse = false;
255             inst->resize_sources(1);
256             progress = true;
257          } else if (inst->saturate && inst->src[1].file == IMM) {
258             switch (inst->conditional_mod) {
259             case BRW_CONDITIONAL_LE:
260             case BRW_CONDITIONAL_L:
261                switch (inst->src[1].type) {
262                case BRW_TYPE_F:
263                   if (inst->src[1].f >= 1.0f) {
264                      inst->opcode = BRW_OPCODE_MOV;
265                      inst->conditional_mod = BRW_CONDITIONAL_NONE;
266                      inst->resize_sources(1);
267                      progress = true;
268                   }
269                   break;
270                default:
271                   break;
272                }
273                break;
274             case BRW_CONDITIONAL_GE:
275             case BRW_CONDITIONAL_G:
276                switch (inst->src[1].type) {
277                case BRW_TYPE_F:
278                   if (inst->src[1].f <= 0.0f) {
279                      inst->opcode = BRW_OPCODE_MOV;
280                      inst->conditional_mod = BRW_CONDITIONAL_NONE;
281                      inst->resize_sources(1);
282                      progress = true;
283                   }
284                   break;
285                default:
286                   break;
287                }
288             default:
289                break;
290             }
291          }
292          break;
293       case BRW_OPCODE_CSEL:
294          if (brw_type_is_float(inst->dst.type)) {
295             /* This transformation can both clean up spurious modifiers
296              * (making assembly dumps easier to read) and convert GE with -abs
297              * to LE with abs. See abs handling below.
298              */
299             if (inst->src[2].negate) {
300                inst->conditional_mod = brw_swap_cmod(inst->conditional_mod);
301                inst->src[2].negate = false;
302                progress = true;
303             }
304 
305             if (inst->src[2].abs) {
306                switch (inst->conditional_mod) {
307                case BRW_CONDITIONAL_Z:
308                case BRW_CONDITIONAL_NZ:
309                   inst->src[2].abs = false;
310                   progress = true;
311                   break;
312 
313                case BRW_CONDITIONAL_LE:
314                   /* Converting to Z can help constant propagation into src0
315                    * and src1.
316                    */
317                   inst->conditional_mod = BRW_CONDITIONAL_Z;
318                   inst->src[2].abs = false;
319                   progress = true;
320                   break;
321 
322                default:
323                   /* GE or L conditions with absolute value could be used to
324                    * implement isnan(x) in CSEL. Transforming G with absolute
325                    * value to NZ is **not** NaN safe.
326                    */
327                   break;
328                }
329             }
330          } else if (brw_type_is_sint(inst->src[2].type)) {
331             /* Integer transformations are more challenging than floating
332              * point transformations due to INT_MIN == -(INT_MIN) ==
333              * abs(INT_MIN).
334              */
335             if (inst->src[2].negate && inst->src[2].abs) {
336                switch (inst->conditional_mod) {
337                case BRW_CONDITIONAL_GE:
338                   inst->src[2].negate = false;
339                   inst->src[2].abs = false;
340                   inst->conditional_mod = BRW_CONDITIONAL_Z;
341                   progress = true;
342                   break;
343                case BRW_CONDITIONAL_L:
344                   inst->src[2].negate = false;
345                   inst->src[2].abs = false;
346                   inst->conditional_mod = BRW_CONDITIONAL_NZ;
347                   progress = true;
348                   break;
349                case BRW_CONDITIONAL_G:
350                   /* This is a contradtion. -abs(x) cannot be > 0. */
351                   inst->opcode = BRW_OPCODE_MOV;
352                   inst->src[0] = inst->src[1];
353                   inst->resize_sources(1);
354                   progress = true;
355                   break;
356                case BRW_CONDITIONAL_LE:
357                   /* This is a tautology. -abs(x) must be <= 0. */
358                   inst->opcode = BRW_OPCODE_MOV;
359                   inst->resize_sources(1);
360                   progress = true;
361                   break;
362                case BRW_CONDITIONAL_Z:
363                case BRW_CONDITIONAL_NZ:
364                   inst->src[2].negate = false;
365                   inst->src[2].abs = false;
366                   progress = true;
367                   break;
368                default:
369                   unreachable("Impossible icsel condition.");
370                }
371             }
372          }
373          break;
374       case BRW_OPCODE_MAD:
375          if (inst->src[0].type != BRW_TYPE_F ||
376              inst->src[1].type != BRW_TYPE_F ||
377              inst->src[2].type != BRW_TYPE_F)
378             break;
379          if (inst->src[1].is_one()) {
380             inst->opcode = BRW_OPCODE_ADD;
381             inst->src[1] = inst->src[2];
382             inst->resize_sources(2);
383             progress = true;
384          } else if (inst->src[2].is_one()) {
385             inst->opcode = BRW_OPCODE_ADD;
386             inst->resize_sources(2);
387             progress = true;
388          }
389          break;
390       case BRW_OPCODE_SHL:
391          if (inst->src[0].file == IMM && inst->src[1].file == IMM) {
392             /* It's not currently possible to generate this, and this constant
393              * folding does not handle it.
394              */
395             assert(!inst->saturate);
396 
397             brw_reg result;
398 
399             switch (brw_type_size_bytes(inst->src[0].type)) {
400             case 2:
401                result = brw_imm_uw(0x0ffff & (inst->src[0].ud << (inst->src[1].ud & 0x1f)));
402                break;
403             case 4:
404                result = brw_imm_ud(inst->src[0].ud << (inst->src[1].ud & 0x1f));
405                break;
406             case 8:
407                result = brw_imm_uq(inst->src[0].u64 << (inst->src[1].ud & 0x3f));
408                break;
409             default:
410                /* Just in case a future platform re-enables B or UB types. */
411                unreachable("Invalid source size.");
412             }
413 
414             inst->opcode = BRW_OPCODE_MOV;
415             inst->src[0] = retype(result, inst->dst.type);
416             inst->resize_sources(1);
417 
418             progress = true;
419          }
420          break;
421 
422       case SHADER_OPCODE_BROADCAST:
423          if (is_uniform(inst->src[0])) {
424             inst->opcode = BRW_OPCODE_MOV;
425             inst->force_writemask_all = true;
426             inst->resize_sources(1);
427             progress = true;
428          } else if (inst->src[1].file == IMM) {
429             inst->opcode = BRW_OPCODE_MOV;
430             /* It's possible that the selected component will be too large and
431              * overflow the register.  This can happen if someone does a
432              * readInvocation() from GLSL or SPIR-V and provides an OOB
433              * invocationIndex.  If this happens and we some how manage
434              * to constant fold it in and get here, then component() may cause
435              * us to start reading outside of the VGRF which will lead to an
436              * assert later.  Instead, just let it wrap around if it goes over
437              * exec_size.
438              */
439             const unsigned comp = inst->src[1].ud & (inst->exec_size - 1);
440             inst->src[0] = component(inst->src[0], comp);
441             inst->force_writemask_all = true;
442             inst->resize_sources(1);
443             progress = true;
444          }
445          break;
446 
447       case SHADER_OPCODE_SHUFFLE:
448          if (is_uniform(inst->src[0])) {
449             inst->opcode = BRW_OPCODE_MOV;
450             inst->resize_sources(1);
451             progress = true;
452          } else if (inst->src[1].file == IMM) {
453             inst->opcode = BRW_OPCODE_MOV;
454             inst->src[0] = component(inst->src[0],
455                                      inst->src[1].ud);
456             inst->resize_sources(1);
457             progress = true;
458          }
459          break;
460 
461       default:
462 	 break;
463       }
464 
465       /* Ensure that the correct source has the immediate value. 2-source
466        * instructions must have the immediate in src[1]. On Gfx12 and later,
467        * some 3-source instructions can have the immediate in src[0] or
468        * src[2]. It's complicated, so don't mess with 3-source instructions
469        * here.
470        */
471       if (progress && inst->sources == 2 && inst->is_commutative()) {
472          if (inst->src[0].file == IMM) {
473             brw_reg tmp = inst->src[1];
474             inst->src[1] = inst->src[0];
475             inst->src[0] = tmp;
476          }
477       }
478    }
479 
480    if (progress)
481       s.invalidate_analysis(DEPENDENCY_INSTRUCTION_DATA_FLOW |
482                             DEPENDENCY_INSTRUCTION_DETAIL);
483 
484    return progress;
485 }
486