1 /*
2 * Copyright © 2010 Intel Corporation
3 * SPDX-License-Identifier: MIT
4 */
5
6 #include "brw_fs.h"
7 #include "brw_fs_builder.h"
8
9 using namespace brw;
10
11 static uint64_t
src_as_uint(const brw_reg & src)12 src_as_uint(const brw_reg &src)
13 {
14 assert(src.file == IMM);
15
16 switch (src.type) {
17 case BRW_TYPE_W:
18 return (uint64_t)(int16_t)(src.ud & 0xffff);
19
20 case BRW_TYPE_UW:
21 return (uint64_t)(uint16_t)(src.ud & 0xffff);
22
23 case BRW_TYPE_D:
24 return (uint64_t)src.d;
25
26 case BRW_TYPE_UD:
27 return (uint64_t)src.ud;
28
29 case BRW_TYPE_Q:
30 return src.d64;
31
32 case BRW_TYPE_UQ:
33 return src.u64;
34
35 default:
36 unreachable("Invalid integer type.");
37 }
38 }
39
40 static brw_reg
brw_imm_for_type(uint64_t value,enum brw_reg_type type)41 brw_imm_for_type(uint64_t value, enum brw_reg_type type)
42 {
43 switch (type) {
44 case BRW_TYPE_W:
45 return brw_imm_w(value);
46
47 case BRW_TYPE_UW:
48 return brw_imm_uw(value);
49
50 case BRW_TYPE_D:
51 return brw_imm_d(value);
52
53 case BRW_TYPE_UD:
54 return brw_imm_ud(value);
55
56 case BRW_TYPE_Q:
57 return brw_imm_d(value);
58
59 case BRW_TYPE_UQ:
60 return brw_imm_uq(value);
61
62 default:
63 unreachable("Invalid integer type.");
64 }
65 }
66
67 bool
brw_fs_opt_algebraic(fs_visitor & s)68 brw_fs_opt_algebraic(fs_visitor &s)
69 {
70 const intel_device_info *devinfo = s.devinfo;
71 bool progress = false;
72
73 foreach_block_and_inst_safe(block, fs_inst, inst, s.cfg) {
74 switch (inst->opcode) {
75 case BRW_OPCODE_MOV:
76 if ((inst->conditional_mod == BRW_CONDITIONAL_Z ||
77 inst->conditional_mod == BRW_CONDITIONAL_NZ) &&
78 inst->dst.is_null() &&
79 (inst->src[0].abs || inst->src[0].negate)) {
80 inst->src[0].abs = false;
81 inst->src[0].negate = false;
82 progress = true;
83 break;
84 }
85
86 if (inst->src[0].file != IMM)
87 break;
88
89 if (inst->saturate) {
90 /* Full mixed-type saturates don't happen. However, we can end up
91 * with things like:
92 *
93 * mov.sat(8) g21<1>DF -1F
94 *
95 * Other mixed-size-but-same-base-type cases may also be possible.
96 */
97 if (inst->dst.type != inst->src[0].type &&
98 inst->dst.type != BRW_TYPE_DF &&
99 inst->src[0].type != BRW_TYPE_F)
100 assert(!"unimplemented: saturate mixed types");
101
102 if (brw_reg_saturate_immediate(&inst->src[0])) {
103 inst->saturate = false;
104 progress = true;
105 }
106 }
107 break;
108
109 case BRW_OPCODE_MUL:
110 if (inst->src[0].file != IMM && inst->src[1].file != IMM)
111 continue;
112
113 if (brw_type_is_float(inst->src[1].type))
114 break;
115
116 /* From the BDW PRM, Vol 2a, "mul - Multiply":
117 *
118 * "When multiplying integer datatypes, if src0 is DW and src1
119 * is W, irrespective of the destination datatype, the
120 * accumulator maintains full 48-bit precision."
121 * ...
122 * "When multiplying integer data types, if one of the sources
123 * is a DW, the resulting full precision data is stored in
124 * the accumulator."
125 *
126 * There are also similar notes in earlier PRMs.
127 *
128 * The MOV instruction can copy the bits of the source, but it
129 * does not clear the higher bits of the accumulator. So, because
130 * we might use the full accumulator in the MUL/MACH macro, we
131 * shouldn't replace such MULs with MOVs.
132 */
133 if ((brw_type_size_bytes(inst->src[0].type) == 4 ||
134 brw_type_size_bytes(inst->src[1].type) == 4) &&
135 (inst->dst.is_accumulator() ||
136 inst->writes_accumulator_implicitly(devinfo)))
137 break;
138
139 if (inst->src[0].is_zero() || inst->src[1].is_zero()) {
140 inst->opcode = BRW_OPCODE_MOV;
141 inst->src[0] = brw_imm_d(0);
142 inst->resize_sources(1);
143 progress = true;
144 break;
145 }
146
147 /* a * 1.0 = a */
148 if (inst->src[1].is_one()) {
149 inst->opcode = BRW_OPCODE_MOV;
150 inst->resize_sources(1);
151 progress = true;
152 break;
153 }
154
155 /* a * -1.0 = -a */
156 if (inst->src[0].is_negative_one()) {
157 inst->opcode = BRW_OPCODE_MOV;
158 inst->src[0] = inst->src[1];
159 inst->src[0].negate = !inst->src[0].negate;
160 inst->resize_sources(1);
161 progress = true;
162 break;
163 }
164
165 if (inst->src[1].is_negative_one()) {
166 inst->opcode = BRW_OPCODE_MOV;
167 inst->src[0].negate = !inst->src[0].negate;
168 inst->resize_sources(1);
169 progress = true;
170 break;
171 }
172
173 break;
174 case BRW_OPCODE_ADD:
175 if (inst->src[1].file != IMM)
176 continue;
177
178 if (brw_type_is_int(inst->src[1].type) &&
179 inst->src[1].is_zero()) {
180 inst->opcode = BRW_OPCODE_MOV;
181 inst->resize_sources(1);
182 progress = true;
183 break;
184 }
185
186 if (inst->src[0].file == IMM) {
187 assert(inst->src[0].type == BRW_TYPE_F);
188 inst->opcode = BRW_OPCODE_MOV;
189 inst->src[0].f += inst->src[1].f;
190 inst->resize_sources(1);
191 progress = true;
192 break;
193 }
194 break;
195
196 case BRW_OPCODE_AND:
197 if (inst->src[0].file == IMM && inst->src[1].file == IMM) {
198 const uint64_t src0 = src_as_uint(inst->src[0]);
199 const uint64_t src1 = src_as_uint(inst->src[1]);
200
201 inst->opcode = BRW_OPCODE_MOV;
202 inst->src[0] = brw_imm_for_type(src0 & src1, inst->dst.type);
203 inst->resize_sources(1);
204 progress = true;
205 break;
206 }
207
208 break;
209
210 case BRW_OPCODE_OR:
211 if (inst->src[0].file == IMM && inst->src[1].file == IMM) {
212 const uint64_t src0 = src_as_uint(inst->src[0]);
213 const uint64_t src1 = src_as_uint(inst->src[1]);
214
215 inst->opcode = BRW_OPCODE_MOV;
216 inst->src[0] = brw_imm_for_type(src0 | src1, inst->dst.type);
217 inst->resize_sources(1);
218 progress = true;
219 break;
220 }
221
222 if (inst->src[0].equals(inst->src[1]) ||
223 inst->src[1].is_zero()) {
224 /* On Gfx8+, the OR instruction can have a source modifier that
225 * performs logical not on the operand. Cases of 'OR r0, ~r1, 0'
226 * or 'OR r0, ~r1, ~r1' should become a NOT instead of a MOV.
227 */
228 if (inst->src[0].negate) {
229 inst->opcode = BRW_OPCODE_NOT;
230 inst->src[0].negate = false;
231 } else {
232 inst->opcode = BRW_OPCODE_MOV;
233 }
234 inst->resize_sources(1);
235 progress = true;
236 break;
237 }
238 break;
239 case BRW_OPCODE_CMP:
240 if ((inst->conditional_mod == BRW_CONDITIONAL_Z ||
241 inst->conditional_mod == BRW_CONDITIONAL_NZ) &&
242 inst->src[1].is_zero() &&
243 (inst->src[0].abs || inst->src[0].negate)) {
244 inst->src[0].abs = false;
245 inst->src[0].negate = false;
246 progress = true;
247 break;
248 }
249 break;
250 case BRW_OPCODE_SEL:
251 if (inst->src[0].equals(inst->src[1])) {
252 inst->opcode = BRW_OPCODE_MOV;
253 inst->predicate = BRW_PREDICATE_NONE;
254 inst->predicate_inverse = false;
255 inst->resize_sources(1);
256 progress = true;
257 } else if (inst->saturate && inst->src[1].file == IMM) {
258 switch (inst->conditional_mod) {
259 case BRW_CONDITIONAL_LE:
260 case BRW_CONDITIONAL_L:
261 switch (inst->src[1].type) {
262 case BRW_TYPE_F:
263 if (inst->src[1].f >= 1.0f) {
264 inst->opcode = BRW_OPCODE_MOV;
265 inst->conditional_mod = BRW_CONDITIONAL_NONE;
266 inst->resize_sources(1);
267 progress = true;
268 }
269 break;
270 default:
271 break;
272 }
273 break;
274 case BRW_CONDITIONAL_GE:
275 case BRW_CONDITIONAL_G:
276 switch (inst->src[1].type) {
277 case BRW_TYPE_F:
278 if (inst->src[1].f <= 0.0f) {
279 inst->opcode = BRW_OPCODE_MOV;
280 inst->conditional_mod = BRW_CONDITIONAL_NONE;
281 inst->resize_sources(1);
282 progress = true;
283 }
284 break;
285 default:
286 break;
287 }
288 default:
289 break;
290 }
291 }
292 break;
293 case BRW_OPCODE_CSEL:
294 if (brw_type_is_float(inst->dst.type)) {
295 /* This transformation can both clean up spurious modifiers
296 * (making assembly dumps easier to read) and convert GE with -abs
297 * to LE with abs. See abs handling below.
298 */
299 if (inst->src[2].negate) {
300 inst->conditional_mod = brw_swap_cmod(inst->conditional_mod);
301 inst->src[2].negate = false;
302 progress = true;
303 }
304
305 if (inst->src[2].abs) {
306 switch (inst->conditional_mod) {
307 case BRW_CONDITIONAL_Z:
308 case BRW_CONDITIONAL_NZ:
309 inst->src[2].abs = false;
310 progress = true;
311 break;
312
313 case BRW_CONDITIONAL_LE:
314 /* Converting to Z can help constant propagation into src0
315 * and src1.
316 */
317 inst->conditional_mod = BRW_CONDITIONAL_Z;
318 inst->src[2].abs = false;
319 progress = true;
320 break;
321
322 default:
323 /* GE or L conditions with absolute value could be used to
324 * implement isnan(x) in CSEL. Transforming G with absolute
325 * value to NZ is **not** NaN safe.
326 */
327 break;
328 }
329 }
330 } else if (brw_type_is_sint(inst->src[2].type)) {
331 /* Integer transformations are more challenging than floating
332 * point transformations due to INT_MIN == -(INT_MIN) ==
333 * abs(INT_MIN).
334 */
335 if (inst->src[2].negate && inst->src[2].abs) {
336 switch (inst->conditional_mod) {
337 case BRW_CONDITIONAL_GE:
338 inst->src[2].negate = false;
339 inst->src[2].abs = false;
340 inst->conditional_mod = BRW_CONDITIONAL_Z;
341 progress = true;
342 break;
343 case BRW_CONDITIONAL_L:
344 inst->src[2].negate = false;
345 inst->src[2].abs = false;
346 inst->conditional_mod = BRW_CONDITIONAL_NZ;
347 progress = true;
348 break;
349 case BRW_CONDITIONAL_G:
350 /* This is a contradtion. -abs(x) cannot be > 0. */
351 inst->opcode = BRW_OPCODE_MOV;
352 inst->src[0] = inst->src[1];
353 inst->resize_sources(1);
354 progress = true;
355 break;
356 case BRW_CONDITIONAL_LE:
357 /* This is a tautology. -abs(x) must be <= 0. */
358 inst->opcode = BRW_OPCODE_MOV;
359 inst->resize_sources(1);
360 progress = true;
361 break;
362 case BRW_CONDITIONAL_Z:
363 case BRW_CONDITIONAL_NZ:
364 inst->src[2].negate = false;
365 inst->src[2].abs = false;
366 progress = true;
367 break;
368 default:
369 unreachable("Impossible icsel condition.");
370 }
371 }
372 }
373 break;
374 case BRW_OPCODE_MAD:
375 if (inst->src[0].type != BRW_TYPE_F ||
376 inst->src[1].type != BRW_TYPE_F ||
377 inst->src[2].type != BRW_TYPE_F)
378 break;
379 if (inst->src[1].is_one()) {
380 inst->opcode = BRW_OPCODE_ADD;
381 inst->src[1] = inst->src[2];
382 inst->resize_sources(2);
383 progress = true;
384 } else if (inst->src[2].is_one()) {
385 inst->opcode = BRW_OPCODE_ADD;
386 inst->resize_sources(2);
387 progress = true;
388 }
389 break;
390 case BRW_OPCODE_SHL:
391 if (inst->src[0].file == IMM && inst->src[1].file == IMM) {
392 /* It's not currently possible to generate this, and this constant
393 * folding does not handle it.
394 */
395 assert(!inst->saturate);
396
397 brw_reg result;
398
399 switch (brw_type_size_bytes(inst->src[0].type)) {
400 case 2:
401 result = brw_imm_uw(0x0ffff & (inst->src[0].ud << (inst->src[1].ud & 0x1f)));
402 break;
403 case 4:
404 result = brw_imm_ud(inst->src[0].ud << (inst->src[1].ud & 0x1f));
405 break;
406 case 8:
407 result = brw_imm_uq(inst->src[0].u64 << (inst->src[1].ud & 0x3f));
408 break;
409 default:
410 /* Just in case a future platform re-enables B or UB types. */
411 unreachable("Invalid source size.");
412 }
413
414 inst->opcode = BRW_OPCODE_MOV;
415 inst->src[0] = retype(result, inst->dst.type);
416 inst->resize_sources(1);
417
418 progress = true;
419 }
420 break;
421
422 case SHADER_OPCODE_BROADCAST:
423 if (is_uniform(inst->src[0])) {
424 inst->opcode = BRW_OPCODE_MOV;
425 inst->force_writemask_all = true;
426 inst->resize_sources(1);
427 progress = true;
428 } else if (inst->src[1].file == IMM) {
429 inst->opcode = BRW_OPCODE_MOV;
430 /* It's possible that the selected component will be too large and
431 * overflow the register. This can happen if someone does a
432 * readInvocation() from GLSL or SPIR-V and provides an OOB
433 * invocationIndex. If this happens and we some how manage
434 * to constant fold it in and get here, then component() may cause
435 * us to start reading outside of the VGRF which will lead to an
436 * assert later. Instead, just let it wrap around if it goes over
437 * exec_size.
438 */
439 const unsigned comp = inst->src[1].ud & (inst->exec_size - 1);
440 inst->src[0] = component(inst->src[0], comp);
441 inst->force_writemask_all = true;
442 inst->resize_sources(1);
443 progress = true;
444 }
445 break;
446
447 case SHADER_OPCODE_SHUFFLE:
448 if (is_uniform(inst->src[0])) {
449 inst->opcode = BRW_OPCODE_MOV;
450 inst->resize_sources(1);
451 progress = true;
452 } else if (inst->src[1].file == IMM) {
453 inst->opcode = BRW_OPCODE_MOV;
454 inst->src[0] = component(inst->src[0],
455 inst->src[1].ud);
456 inst->resize_sources(1);
457 progress = true;
458 }
459 break;
460
461 default:
462 break;
463 }
464
465 /* Ensure that the correct source has the immediate value. 2-source
466 * instructions must have the immediate in src[1]. On Gfx12 and later,
467 * some 3-source instructions can have the immediate in src[0] or
468 * src[2]. It's complicated, so don't mess with 3-source instructions
469 * here.
470 */
471 if (progress && inst->sources == 2 && inst->is_commutative()) {
472 if (inst->src[0].file == IMM) {
473 brw_reg tmp = inst->src[1];
474 inst->src[1] = inst->src[0];
475 inst->src[0] = tmp;
476 }
477 }
478 }
479
480 if (progress)
481 s.invalidate_analysis(DEPENDENCY_INSTRUCTION_DATA_FLOW |
482 DEPENDENCY_INSTRUCTION_DETAIL);
483
484 return progress;
485 }
486