1 /*
2 * Copyright © 2010 Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 */
23
24 #include "brw_cfg.h"
25 #include "brw_fs.h"
26 #include "util/macros.h"
27
28 bool
brw_reg_saturate_immediate(brw_reg * reg)29 brw_reg_saturate_immediate(brw_reg *reg)
30 {
31 union {
32 unsigned ud;
33 int d;
34 float f;
35 double df;
36 } imm, sat_imm = { 0 };
37
38 const unsigned size = brw_type_size_bytes(reg->type);
39
40 /* We want to either do a 32-bit or 64-bit data copy, the type is otherwise
41 * irrelevant, so just check the size of the type and copy from/to an
42 * appropriately sized field.
43 */
44 if (size < 8)
45 imm.ud = reg->ud;
46 else
47 imm.df = reg->df;
48
49 switch (reg->type) {
50 case BRW_TYPE_UD:
51 case BRW_TYPE_D:
52 case BRW_TYPE_UW:
53 case BRW_TYPE_W:
54 case BRW_TYPE_UQ:
55 case BRW_TYPE_Q:
56 /* Nothing to do. */
57 return false;
58 case BRW_TYPE_F:
59 sat_imm.f = SATURATE(imm.f);
60 break;
61 case BRW_TYPE_DF:
62 sat_imm.df = SATURATE(imm.df);
63 break;
64 case BRW_TYPE_UB:
65 case BRW_TYPE_B:
66 unreachable("no UB/B immediates");
67 case BRW_TYPE_V:
68 case BRW_TYPE_UV:
69 case BRW_TYPE_VF:
70 unreachable("unimplemented: saturate vector immediate");
71 case BRW_TYPE_HF:
72 unreachable("unimplemented: saturate HF immediate");
73 default:
74 unreachable("invalid type");
75 }
76
77 if (size < 8) {
78 if (imm.ud != sat_imm.ud) {
79 reg->ud = sat_imm.ud;
80 return true;
81 }
82 } else {
83 if (imm.df != sat_imm.df) {
84 reg->df = sat_imm.df;
85 return true;
86 }
87 }
88 return false;
89 }
90
91 bool
brw_reg_negate_immediate(brw_reg * reg)92 brw_reg_negate_immediate(brw_reg *reg)
93 {
94 switch (reg->type) {
95 case BRW_TYPE_D:
96 case BRW_TYPE_UD:
97 reg->d = -reg->d;
98 return true;
99 case BRW_TYPE_W:
100 case BRW_TYPE_UW: {
101 uint16_t value = -(int16_t)reg->ud;
102 reg->ud = value | (uint32_t)value << 16;
103 return true;
104 }
105 case BRW_TYPE_F:
106 reg->f = -reg->f;
107 return true;
108 case BRW_TYPE_VF:
109 reg->ud ^= 0x80808080;
110 return true;
111 case BRW_TYPE_DF:
112 reg->df = -reg->df;
113 return true;
114 case BRW_TYPE_UQ:
115 case BRW_TYPE_Q:
116 reg->d64 = -reg->d64;
117 return true;
118 case BRW_TYPE_UB:
119 case BRW_TYPE_B:
120 unreachable("no UB/B immediates");
121 case BRW_TYPE_UV:
122 case BRW_TYPE_V:
123 assert(!"unimplemented: negate UV/V immediate");
124 case BRW_TYPE_HF:
125 reg->ud ^= 0x80008000;
126 return true;
127 default:
128 unreachable("invalid type");
129 }
130
131 return false;
132 }
133
134 bool
brw_reg_abs_immediate(brw_reg * reg)135 brw_reg_abs_immediate(brw_reg *reg)
136 {
137 switch (reg->type) {
138 case BRW_TYPE_D:
139 reg->d = abs(reg->d);
140 return true;
141 case BRW_TYPE_W: {
142 uint16_t value = abs((int16_t)reg->ud);
143 reg->ud = value | (uint32_t)value << 16;
144 return true;
145 }
146 case BRW_TYPE_F:
147 reg->f = fabsf(reg->f);
148 return true;
149 case BRW_TYPE_DF:
150 reg->df = fabs(reg->df);
151 return true;
152 case BRW_TYPE_VF:
153 reg->ud &= ~0x80808080;
154 return true;
155 case BRW_TYPE_Q:
156 reg->d64 = imaxabs(reg->d64);
157 return true;
158 case BRW_TYPE_UB:
159 case BRW_TYPE_B:
160 unreachable("no UB/B immediates");
161 case BRW_TYPE_UQ:
162 case BRW_TYPE_UD:
163 case BRW_TYPE_UW:
164 case BRW_TYPE_UV:
165 /* Presumably the absolute value modifier on an unsigned source is a
166 * nop, but it would be nice to confirm.
167 */
168 assert(!"unimplemented: abs unsigned immediate");
169 case BRW_TYPE_V:
170 assert(!"unimplemented: abs V immediate");
171 case BRW_TYPE_HF:
172 reg->ud &= ~0x80008000;
173 return true;
174 default:
175 unreachable("invalid type");
176 }
177
178 return false;
179 }
180
181 bool
is_zero() const182 brw_reg::is_zero() const
183 {
184 if (file != IMM)
185 return false;
186
187 assert(brw_type_size_bytes(type) > 1);
188
189 switch (type) {
190 case BRW_TYPE_HF:
191 assert((d & 0xffff) == ((d >> 16) & 0xffff));
192 return (d & 0xffff) == 0 || (d & 0xffff) == 0x8000;
193 case BRW_TYPE_F:
194 return f == 0;
195 case BRW_TYPE_DF:
196 return df == 0;
197 case BRW_TYPE_W:
198 case BRW_TYPE_UW:
199 assert((d & 0xffff) == ((d >> 16) & 0xffff));
200 return (d & 0xffff) == 0;
201 case BRW_TYPE_D:
202 case BRW_TYPE_UD:
203 return d == 0;
204 case BRW_TYPE_UQ:
205 case BRW_TYPE_Q:
206 return u64 == 0;
207 default:
208 return false;
209 }
210 }
211
212 bool
is_one() const213 brw_reg::is_one() const
214 {
215 if (file != IMM)
216 return false;
217
218 assert(brw_type_size_bytes(type) > 1);
219
220 switch (type) {
221 case BRW_TYPE_HF:
222 assert((d & 0xffff) == ((d >> 16) & 0xffff));
223 return (d & 0xffff) == 0x3c00;
224 case BRW_TYPE_F:
225 return f == 1.0f;
226 case BRW_TYPE_DF:
227 return df == 1.0;
228 case BRW_TYPE_W:
229 case BRW_TYPE_UW:
230 assert((d & 0xffff) == ((d >> 16) & 0xffff));
231 return (d & 0xffff) == 1;
232 case BRW_TYPE_D:
233 case BRW_TYPE_UD:
234 return d == 1;
235 case BRW_TYPE_UQ:
236 case BRW_TYPE_Q:
237 return u64 == 1;
238 default:
239 return false;
240 }
241 }
242
243 bool
is_negative_one() const244 brw_reg::is_negative_one() const
245 {
246 if (file != IMM)
247 return false;
248
249 assert(brw_type_size_bytes(type) > 1);
250
251 switch (type) {
252 case BRW_TYPE_HF:
253 assert((d & 0xffff) == ((d >> 16) & 0xffff));
254 return (d & 0xffff) == 0xbc00;
255 case BRW_TYPE_F:
256 return f == -1.0;
257 case BRW_TYPE_DF:
258 return df == -1.0;
259 case BRW_TYPE_W:
260 assert((d & 0xffff) == ((d >> 16) & 0xffff));
261 return (d & 0xffff) == 0xffff;
262 case BRW_TYPE_D:
263 return d == -1;
264 case BRW_TYPE_Q:
265 return d64 == -1;
266 default:
267 return false;
268 }
269 }
270
271 bool
is_null() const272 brw_reg::is_null() const
273 {
274 return file == ARF && nr == BRW_ARF_NULL;
275 }
276
277
278 bool
is_accumulator() const279 brw_reg::is_accumulator() const
280 {
281 return file == ARF && (nr & 0xF0) == BRW_ARF_ACCUMULATOR;
282 }
283
284 bool
is_commutative() const285 fs_inst::is_commutative() const
286 {
287 switch (opcode) {
288 case BRW_OPCODE_AND:
289 case BRW_OPCODE_OR:
290 case BRW_OPCODE_XOR:
291 case BRW_OPCODE_ADD:
292 case BRW_OPCODE_ADD3:
293 case SHADER_OPCODE_MULH:
294 return true;
295
296 case BRW_OPCODE_MUL:
297 /* Integer multiplication of dword and word sources is not actually
298 * commutative. The DW source must be first.
299 */
300 return !brw_type_is_int(src[0].type) ||
301 brw_type_size_bits(src[0].type) == brw_type_size_bits(src[1].type);
302
303 case BRW_OPCODE_SEL:
304 /* MIN and MAX are commutative. */
305 if (conditional_mod == BRW_CONDITIONAL_GE ||
306 conditional_mod == BRW_CONDITIONAL_L) {
307 return true;
308 }
309 FALLTHROUGH;
310 default:
311 return false;
312 }
313 }
314
315 bool
is_3src(const struct brw_compiler * compiler) const316 fs_inst::is_3src(const struct brw_compiler *compiler) const
317 {
318 return ::is_3src(&compiler->isa, opcode);
319 }
320
321 bool
is_math() const322 fs_inst::is_math() const
323 {
324 return (opcode == SHADER_OPCODE_RCP ||
325 opcode == SHADER_OPCODE_RSQ ||
326 opcode == SHADER_OPCODE_SQRT ||
327 opcode == SHADER_OPCODE_EXP2 ||
328 opcode == SHADER_OPCODE_LOG2 ||
329 opcode == SHADER_OPCODE_SIN ||
330 opcode == SHADER_OPCODE_COS ||
331 opcode == SHADER_OPCODE_INT_QUOTIENT ||
332 opcode == SHADER_OPCODE_INT_REMAINDER ||
333 opcode == SHADER_OPCODE_POW);
334 }
335
336 bool
is_control_flow_begin() const337 fs_inst::is_control_flow_begin() const
338 {
339 switch (opcode) {
340 case BRW_OPCODE_DO:
341 case BRW_OPCODE_IF:
342 case BRW_OPCODE_ELSE:
343 return true;
344 default:
345 return false;
346 }
347 }
348
349 bool
is_control_flow_end() const350 fs_inst::is_control_flow_end() const
351 {
352 switch (opcode) {
353 case BRW_OPCODE_ELSE:
354 case BRW_OPCODE_WHILE:
355 case BRW_OPCODE_ENDIF:
356 return true;
357 default:
358 return false;
359 }
360 }
361
362 bool
is_control_flow() const363 fs_inst::is_control_flow() const
364 {
365 switch (opcode) {
366 case BRW_OPCODE_DO:
367 case BRW_OPCODE_WHILE:
368 case BRW_OPCODE_IF:
369 case BRW_OPCODE_ELSE:
370 case BRW_OPCODE_ENDIF:
371 case BRW_OPCODE_BREAK:
372 case BRW_OPCODE_CONTINUE:
373 return true;
374 default:
375 return false;
376 }
377 }
378
379 bool
uses_indirect_addressing() const380 fs_inst::uses_indirect_addressing() const
381 {
382 switch (opcode) {
383 case SHADER_OPCODE_BROADCAST:
384 case SHADER_OPCODE_CLUSTER_BROADCAST:
385 case SHADER_OPCODE_MOV_INDIRECT:
386 return true;
387 default:
388 return false;
389 }
390 }
391
392 bool
can_do_saturate() const393 fs_inst::can_do_saturate() const
394 {
395 switch (opcode) {
396 case BRW_OPCODE_ADD:
397 case BRW_OPCODE_ADD3:
398 case BRW_OPCODE_ASR:
399 case BRW_OPCODE_AVG:
400 case BRW_OPCODE_CSEL:
401 case BRW_OPCODE_DP2:
402 case BRW_OPCODE_DP3:
403 case BRW_OPCODE_DP4:
404 case BRW_OPCODE_DPH:
405 case BRW_OPCODE_DP4A:
406 case BRW_OPCODE_LINE:
407 case BRW_OPCODE_LRP:
408 case BRW_OPCODE_MAC:
409 case BRW_OPCODE_MAD:
410 case BRW_OPCODE_MATH:
411 case BRW_OPCODE_MOV:
412 case BRW_OPCODE_MUL:
413 case SHADER_OPCODE_MULH:
414 case BRW_OPCODE_PLN:
415 case BRW_OPCODE_RNDD:
416 case BRW_OPCODE_RNDE:
417 case BRW_OPCODE_RNDU:
418 case BRW_OPCODE_RNDZ:
419 case BRW_OPCODE_SEL:
420 case BRW_OPCODE_SHL:
421 case BRW_OPCODE_SHR:
422 case SHADER_OPCODE_COS:
423 case SHADER_OPCODE_EXP2:
424 case SHADER_OPCODE_LOG2:
425 case SHADER_OPCODE_POW:
426 case SHADER_OPCODE_RCP:
427 case SHADER_OPCODE_RSQ:
428 case SHADER_OPCODE_SIN:
429 case SHADER_OPCODE_SQRT:
430 return true;
431 default:
432 return false;
433 }
434 }
435
436 bool
reads_accumulator_implicitly() const437 fs_inst::reads_accumulator_implicitly() const
438 {
439 switch (opcode) {
440 case BRW_OPCODE_MAC:
441 case BRW_OPCODE_MACH:
442 return true;
443 default:
444 return false;
445 }
446 }
447
448 bool
writes_accumulator_implicitly(const struct intel_device_info * devinfo) const449 fs_inst::writes_accumulator_implicitly(const struct intel_device_info *devinfo) const
450 {
451 return writes_accumulator ||
452 (eot && intel_needs_workaround(devinfo, 14010017096));
453 }
454
455 bool
has_side_effects() const456 fs_inst::has_side_effects() const
457 {
458 switch (opcode) {
459 case SHADER_OPCODE_SEND:
460 return send_has_side_effects;
461
462 case BRW_OPCODE_SYNC:
463 case SHADER_OPCODE_MEMORY_STORE_LOGICAL:
464 case SHADER_OPCODE_MEMORY_ATOMIC_LOGICAL:
465 case SHADER_OPCODE_MEMORY_FENCE:
466 case SHADER_OPCODE_INTERLOCK:
467 case SHADER_OPCODE_URB_WRITE_LOGICAL:
468 case FS_OPCODE_FB_WRITE_LOGICAL:
469 case SHADER_OPCODE_BARRIER:
470 case SHADER_OPCODE_RND_MODE:
471 case SHADER_OPCODE_FLOAT_CONTROL_MODE:
472 case FS_OPCODE_SCHEDULING_FENCE:
473 case SHADER_OPCODE_BTD_SPAWN_LOGICAL:
474 case SHADER_OPCODE_BTD_RETIRE_LOGICAL:
475 case RT_OPCODE_TRACE_RAY_LOGICAL:
476 return true;
477 default:
478 return eot;
479 }
480 }
481
482 bool
is_volatile() const483 fs_inst::is_volatile() const
484 {
485 return opcode == SHADER_OPCODE_MEMORY_LOAD_LOGICAL ||
486 (opcode == SHADER_OPCODE_SEND && send_is_volatile);
487 }
488
489 #ifndef NDEBUG
490 static bool
inst_is_in_block(const bblock_t * block,const fs_inst * inst)491 inst_is_in_block(const bblock_t *block, const fs_inst *inst)
492 {
493 const exec_node *n = inst;
494
495 /* Find the tail sentinel. If the tail sentinel is the sentinel from the
496 * list header in the bblock_t, then this instruction is in that basic
497 * block.
498 */
499 while (!n->is_tail_sentinel())
500 n = n->get_next();
501
502 return n == &block->instructions.tail_sentinel;
503 }
504 #endif
505
506 static void
adjust_later_block_ips(bblock_t * start_block,int ip_adjustment)507 adjust_later_block_ips(bblock_t *start_block, int ip_adjustment)
508 {
509 for (bblock_t *block_iter = start_block->next();
510 block_iter;
511 block_iter = block_iter->next()) {
512 block_iter->start_ip += ip_adjustment;
513 block_iter->end_ip += ip_adjustment;
514 }
515 }
516
517 void
insert_after(bblock_t * block,fs_inst * inst)518 fs_inst::insert_after(bblock_t *block, fs_inst *inst)
519 {
520 assert(this != inst);
521 assert(block->end_ip_delta == 0);
522
523 if (!this->is_head_sentinel())
524 assert(inst_is_in_block(block, this) || !"Instruction not in block");
525
526 block->end_ip++;
527
528 adjust_later_block_ips(block, 1);
529
530 exec_node::insert_after(inst);
531 }
532
533 void
insert_before(bblock_t * block,fs_inst * inst)534 fs_inst::insert_before(bblock_t *block, fs_inst *inst)
535 {
536 assert(this != inst);
537 assert(block->end_ip_delta == 0);
538
539 if (!this->is_tail_sentinel())
540 assert(inst_is_in_block(block, this) || !"Instruction not in block");
541
542 block->end_ip++;
543
544 adjust_later_block_ips(block, 1);
545
546 exec_node::insert_before(inst);
547 }
548
549 void
remove(bblock_t * block,bool defer_later_block_ip_updates)550 fs_inst::remove(bblock_t *block, bool defer_later_block_ip_updates)
551 {
552 assert(inst_is_in_block(block, this) || !"Instruction not in block");
553
554 if (exec_list_is_singular(&block->instructions)) {
555 this->opcode = BRW_OPCODE_NOP;
556 this->resize_sources(0);
557 this->dst = brw_reg();
558 this->size_written = 0;
559 return;
560 }
561
562 if (defer_later_block_ip_updates) {
563 block->end_ip_delta--;
564 } else {
565 assert(block->end_ip_delta == 0);
566 adjust_later_block_ips(block, -1);
567 }
568
569 if (block->start_ip == block->end_ip) {
570 if (block->end_ip_delta != 0) {
571 adjust_later_block_ips(block, block->end_ip_delta);
572 block->end_ip_delta = 0;
573 }
574
575 block->cfg->remove_block(block);
576 } else {
577 block->end_ip--;
578 }
579
580 exec_node::remove();
581 }
582
583