1 /*
2 * Copyright (C) 2020 Collabora Ltd.
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 * SOFTWARE.
22 *
23 * Authors (Collabora):
24 * Alyssa Rosenzweig <[email protected]>
25 */
26
27 #ifndef __BIFROST_COMPILER_H
28 #define __BIFROST_COMPILER_H
29
30 #include "compiler/nir/nir.h"
31 #include "panfrost/util/pan_ir.h"
32 #include "util/half_float.h"
33 #include "util/u_math.h"
34 #include "util/u_worklist.h"
35 #include "bi_opcodes.h"
36 #include "bifrost.h"
37
38 #ifdef __cplusplus
39 extern "C" {
40 #endif
41
42 /* Swizzles across bytes in a 32-bit word. Expresses swz in the XML directly.
43 * To express widen, use the correpsonding replicated form, i.e. H01 = identity
44 * for widen = none, H00 for widen = h0, B1111 for widen = b1. For lane, also
45 * use the replicated form (interpretation is governed by the opcode). For
46 * 8-bit lanes with two channels, use replicated forms for replicated forms
47 * (TODO: what about others?). For 8-bit lanes with four channels using
48 * matching form (TODO: what about others?).
49 */
50
51 enum bi_swizzle {
52 /* 16-bit swizzle ordering deliberate for fast compute */
53 BI_SWIZZLE_H00 = 0, /* = B0101 */
54 BI_SWIZZLE_H01 = 1, /* = B0123 = W0 */
55 BI_SWIZZLE_H10 = 2, /* = B2301 */
56 BI_SWIZZLE_H11 = 3, /* = B2323 */
57
58 /* replication order should be maintained for fast compute */
59 BI_SWIZZLE_B0000 = 4, /* single channel (replicate) */
60 BI_SWIZZLE_B1111 = 5,
61 BI_SWIZZLE_B2222 = 6,
62 BI_SWIZZLE_B3333 = 7,
63
64 /* totally special for explicit pattern matching */
65 BI_SWIZZLE_B0011 = 8, /* +SWZ.v4i8 */
66 BI_SWIZZLE_B2233 = 9, /* +SWZ.v4i8 */
67 BI_SWIZZLE_B1032 = 10, /* +SWZ.v4i8 */
68 BI_SWIZZLE_B3210 = 11, /* +SWZ.v4i8 */
69
70 BI_SWIZZLE_B0022 = 12, /* for b02 lanes */
71 };
72
73 /* Given a packed i16vec2/i8vec4 constant, apply a swizzle. Useful for constant
74 * folding and Valhall constant optimization. */
75
76 static inline uint32_t
bi_apply_swizzle(uint32_t value,enum bi_swizzle swz)77 bi_apply_swizzle(uint32_t value, enum bi_swizzle swz)
78 {
79 const uint16_t *h = (const uint16_t *)&value;
80 const uint8_t *b = (const uint8_t *)&value;
81
82 #define H(h0, h1) (h[h0] | ((uint32_t)h[h1] << 16))
83 #define B(b0, b1, b2, b3) \
84 (b[b0] | ((uint32_t)b[b1] << 8) | ((uint32_t)b[b2] << 16) | \
85 ((uint32_t)b[b3] << 24))
86
87 switch (swz) {
88 case BI_SWIZZLE_H00:
89 return H(0, 0);
90 case BI_SWIZZLE_H01:
91 return H(0, 1);
92 case BI_SWIZZLE_H10:
93 return H(1, 0);
94 case BI_SWIZZLE_H11:
95 return H(1, 1);
96 case BI_SWIZZLE_B0000:
97 return B(0, 0, 0, 0);
98 case BI_SWIZZLE_B1111:
99 return B(1, 1, 1, 1);
100 case BI_SWIZZLE_B2222:
101 return B(2, 2, 2, 2);
102 case BI_SWIZZLE_B3333:
103 return B(3, 3, 3, 3);
104 case BI_SWIZZLE_B0011:
105 return B(0, 0, 1, 1);
106 case BI_SWIZZLE_B2233:
107 return B(2, 2, 3, 3);
108 case BI_SWIZZLE_B1032:
109 return B(1, 0, 3, 2);
110 case BI_SWIZZLE_B3210:
111 return B(3, 2, 1, 0);
112 case BI_SWIZZLE_B0022:
113 return B(0, 0, 2, 2);
114 }
115
116 #undef H
117 #undef B
118
119 unreachable("Invalid swizzle");
120 }
121
122 enum bi_index_type {
123 BI_INDEX_NULL = 0,
124 BI_INDEX_NORMAL = 1,
125 BI_INDEX_REGISTER = 2,
126 BI_INDEX_CONSTANT = 3,
127 BI_INDEX_PASS = 4,
128 BI_INDEX_FAU = 5
129 };
130
131 typedef struct {
132 uint32_t value;
133
134 /* modifiers, should only be set if applicable for a given instruction.
135 * For *IDP.v4i8, abs plays the role of sign. For bitwise ops where
136 * applicable, neg plays the role of not */
137 bool abs : 1;
138 bool neg : 1;
139
140 /* The last use of a value, should be purged from the register cache.
141 * Set by liveness analysis. */
142 bool discard : 1;
143
144 /* For a source, the swizzle. For a destination, acts a bit like a
145 * write mask. Identity for the full 32-bit, H00 for only caring about
146 * the lower half, other values unused. */
147 enum bi_swizzle swizzle : 4;
148 uint32_t offset : 3;
149 enum bi_index_type type : 3;
150
151 /* Must be zeroed so we can hash the whole 64-bits at a time */
152 unsigned padding : (32 - 13);
153 } bi_index;
154
155 static inline bi_index
bi_get_index(unsigned value)156 bi_get_index(unsigned value)
157 {
158 return (bi_index){
159 .value = value,
160 .swizzle = BI_SWIZZLE_H01,
161 .type = BI_INDEX_NORMAL,
162 };
163 }
164
165 static inline bi_index
bi_register(unsigned reg)166 bi_register(unsigned reg)
167 {
168 assert(reg < 64);
169
170 return (bi_index){
171 .value = reg,
172 .swizzle = BI_SWIZZLE_H01,
173 .type = BI_INDEX_REGISTER,
174 };
175 }
176
177 static inline bi_index
bi_imm_u32(uint32_t imm)178 bi_imm_u32(uint32_t imm)
179 {
180 return (bi_index){
181 .value = imm,
182 .swizzle = BI_SWIZZLE_H01,
183 .type = BI_INDEX_CONSTANT,
184 };
185 }
186
187 static inline bi_index
bi_imm_f32(float imm)188 bi_imm_f32(float imm)
189 {
190 return bi_imm_u32(fui(imm));
191 }
192
193 static inline bi_index
bi_null()194 bi_null()
195 {
196 return (bi_index){.type = BI_INDEX_NULL};
197 }
198
199 static inline bi_index
bi_zero()200 bi_zero()
201 {
202 return bi_imm_u32(0);
203 }
204
205 static inline bi_index
bi_passthrough(enum bifrost_packed_src value)206 bi_passthrough(enum bifrost_packed_src value)
207 {
208 return (bi_index){
209 .value = value,
210 .swizzle = BI_SWIZZLE_H01,
211 .type = BI_INDEX_PASS,
212 };
213 }
214
215 /* Helps construct swizzles */
216 static inline bi_index
bi_swz_16(bi_index idx,bool x,bool y)217 bi_swz_16(bi_index idx, bool x, bool y)
218 {
219 assert(idx.swizzle == BI_SWIZZLE_H01);
220 idx.swizzle = (enum bi_swizzle)(BI_SWIZZLE_H00 | (x << 1) | y);
221 return idx;
222 }
223
224 static inline bi_index
bi_half(bi_index idx,bool upper)225 bi_half(bi_index idx, bool upper)
226 {
227 return bi_swz_16(idx, upper, upper);
228 }
229
230 static inline bi_index
bi_byte(bi_index idx,unsigned lane)231 bi_byte(bi_index idx, unsigned lane)
232 {
233 assert(idx.swizzle == BI_SWIZZLE_H01);
234 assert(lane < 4);
235 idx.swizzle = (enum bi_swizzle)(BI_SWIZZLE_B0000 + lane);
236 return idx;
237 }
238
239 static inline bi_index
bi_abs(bi_index idx)240 bi_abs(bi_index idx)
241 {
242 idx.abs = true;
243 return idx;
244 }
245
246 static inline bi_index
bi_neg(bi_index idx)247 bi_neg(bi_index idx)
248 {
249 idx.neg ^= true;
250 return idx;
251 }
252
253 static inline bi_index
bi_discard(bi_index idx)254 bi_discard(bi_index idx)
255 {
256 idx.discard = true;
257 return idx;
258 }
259
260 /* Additive identity in IEEE 754 arithmetic */
261 static inline bi_index
bi_negzero()262 bi_negzero()
263 {
264 return bi_neg(bi_zero());
265 }
266
267 /* Replaces an index, preserving any modifiers */
268
269 static inline bi_index
bi_replace_index(bi_index old,bi_index replacement)270 bi_replace_index(bi_index old, bi_index replacement)
271 {
272 replacement.abs = old.abs;
273 replacement.neg = old.neg;
274 replacement.swizzle = old.swizzle;
275 replacement.discard = false; /* needs liveness analysis to set */
276 return replacement;
277 }
278
279 /* Remove any modifiers. This has the property:
280 *
281 * replace_index(x, strip_index(x)) = x
282 *
283 * This ensures it is suitable to use when lowering sources to moves */
284
285 static inline bi_index
bi_strip_index(bi_index index)286 bi_strip_index(bi_index index)
287 {
288 index.abs = index.neg = false;
289 index.swizzle = BI_SWIZZLE_H01;
290 return index;
291 }
292
293 /* For bitwise instructions */
294 #define bi_not(x) bi_neg(x)
295
296 static inline bi_index
bi_imm_u8(uint8_t imm)297 bi_imm_u8(uint8_t imm)
298 {
299 return bi_byte(bi_imm_u32(imm), 0);
300 }
301
302 static inline bi_index
bi_imm_u16(uint16_t imm)303 bi_imm_u16(uint16_t imm)
304 {
305 return bi_half(bi_imm_u32(imm), false);
306 }
307
308 static inline bi_index
bi_imm_uintN(uint32_t imm,unsigned sz)309 bi_imm_uintN(uint32_t imm, unsigned sz)
310 {
311 assert(sz == 8 || sz == 16 || sz == 32);
312 return (sz == 8) ? bi_imm_u8(imm)
313 : (sz == 16) ? bi_imm_u16(imm)
314 : bi_imm_u32(imm);
315 }
316
317 static inline bi_index
bi_imm_f16(float imm)318 bi_imm_f16(float imm)
319 {
320 return bi_imm_u16(_mesa_float_to_half(imm));
321 }
322
323 static inline bool
bi_is_null(bi_index idx)324 bi_is_null(bi_index idx)
325 {
326 return idx.type == BI_INDEX_NULL;
327 }
328
329 static inline bool
bi_is_ssa(bi_index idx)330 bi_is_ssa(bi_index idx)
331 {
332 return idx.type == BI_INDEX_NORMAL;
333 }
334
335 /* Compares equivalence as references. Does not compare offsets, swizzles, or
336 * modifiers. In other words, this forms bi_index equivalence classes by
337 * partitioning memory. E.g. -abs(foo[1].yx) == foo.xy but foo != bar */
338
339 static inline bool
bi_is_equiv(bi_index left,bi_index right)340 bi_is_equiv(bi_index left, bi_index right)
341 {
342 return (left.type == right.type) && (left.value == right.value);
343 }
344
345 /* A stronger equivalence relation that requires the indices access the
346 * same offset, useful for RA/scheduling to see what registers will
347 * correspond to */
348
349 static inline bool
bi_is_word_equiv(bi_index left,bi_index right)350 bi_is_word_equiv(bi_index left, bi_index right)
351 {
352 return bi_is_equiv(left, right) && left.offset == right.offset;
353 }
354
355 /* An even stronger equivalence that checks if indices correspond to the
356 * right value when evaluated
357 */
358 static inline bool
bi_is_value_equiv(bi_index left,bi_index right)359 bi_is_value_equiv(bi_index left, bi_index right)
360 {
361 if (left.type == BI_INDEX_CONSTANT && right.type == BI_INDEX_CONSTANT) {
362 return (bi_apply_swizzle(left.value, left.swizzle) ==
363 bi_apply_swizzle(right.value, right.swizzle)) &&
364 (left.abs == right.abs) && (left.neg == right.neg);
365 } else {
366 return (left.value == right.value) && (left.abs == right.abs) &&
367 (left.neg == right.neg) && (left.swizzle == right.swizzle) &&
368 (left.offset == right.offset) && (left.type == right.type);
369 }
370 }
371
372 #define BI_MAX_VEC 8
373 #define BI_MAX_DESTS 4
374 #define BI_MAX_SRCS 6
375
376 typedef struct {
377 /* Must be first */
378 struct list_head link;
379 bi_index *dest;
380 bi_index *src;
381
382 enum bi_opcode op;
383 uint8_t nr_srcs;
384 uint8_t nr_dests;
385
386 union {
387 /* For a branch */
388 struct bi_block *branch_target;
389
390 /* For a phi node that hasn't been translated yet. This is only
391 * used during NIR->BIR
392 */
393 nir_phi_instr *phi;
394 };
395
396 /* These don't fit neatly with anything else.. */
397 enum bi_register_format register_format;
398 enum bi_vecsize vecsize;
399
400 /* Flow control associated with a Valhall instruction */
401 uint8_t flow;
402
403 /* Slot associated with a message-passing instruction */
404 uint8_t slot;
405
406 /* Can we spill the value written here? Used to prevent
407 * useless double fills */
408 bool no_spill;
409
410 /* On Bifrost: A value of bi_table to override the table, inducing a
411 * DTSEL_IMM pair if nonzero.
412 *
413 * On Valhall: the table index to use for resource instructions.
414 *
415 * These two interpretations are equivalent if you squint a bit.
416 */
417 unsigned table;
418
419 /* Everything after this MUST NOT be accessed directly, since
420 * interpretation depends on opcodes */
421
422 /* Destination modifiers */
423 union {
424 enum bi_clamp clamp;
425 bool saturate;
426 bool not_result;
427 unsigned dest_mod;
428 };
429
430 /* Immediates. All seen alone in an instruction, except for varying/texture
431 * which are specified jointly for VARTEX */
432 union {
433 uint32_t shift;
434 uint32_t fill;
435 uint32_t index;
436 uint32_t attribute_index;
437
438 struct {
439 uint32_t varying_index;
440 uint32_t sampler_index;
441 uint32_t texture_index;
442 };
443
444 /* TEXC, ATOM_CX: # of staging registers used */
445 struct {
446 uint32_t sr_count;
447 uint32_t sr_count_2;
448
449 union {
450 /* Atomics effectively require all three */
451 int32_t byte_offset;
452
453 /* BLEND requires all three */
454 int32_t branch_offset;
455 };
456 };
457 };
458
459 /* Modifiers specific to particular instructions are thrown in a union */
460 union {
461 enum bi_adj adj; /* FEXP_TABLE.u4 */
462 enum bi_atom_opc atom_opc; /* atomics */
463 enum bi_func func; /* FPOW_SC_DET */
464 enum bi_function function; /* LD_VAR_FLAT */
465 enum bi_mux mux; /* MUX */
466 enum bi_sem sem; /* FMAX, FMIN */
467 enum bi_source source; /* LD_GCLK */
468 bool scale; /* VN_ASST2, FSINCOS_OFFSET */
469 bool offset; /* FSIN_TABLE, FOCS_TABLE */
470 bool mask; /* CLZ */
471 bool threads; /* IMULD, IMOV_FMA */
472 bool combine; /* BRANCHC */
473 bool format; /* LEA_TEX */
474
475 struct {
476 enum bi_special special; /* FADD_RSCALE, FMA_RSCALE */
477 enum bi_round round; /* FMA, converts, FADD, _RSCALE, etc */
478 bool ftz; /* Flush-to-zero for F16_TO_F32 */
479 };
480
481 struct {
482 enum bi_result_type result_type; /* FCMP, ICMP */
483 enum bi_cmpf cmpf; /* CSEL, FCMP, ICMP, BRANCH */
484 };
485
486 struct {
487 enum bi_stack_mode stack_mode; /* JUMP_EX */
488 bool test_mode;
489 };
490
491 struct {
492 enum bi_seg seg; /* LOAD, STORE, SEG_ADD, SEG_SUB */
493 bool preserve_null; /* SEG_ADD, SEG_SUB */
494 enum bi_extend extend; /* LOAD, IMUL */
495 };
496
497 struct {
498 enum bi_sample sample; /* VAR_TEX, LD_VAR */
499 enum bi_update update; /* VAR_TEX, LD_VAR */
500 enum bi_varying_name varying_name; /* LD_VAR_SPECIAL */
501 bool skip; /* VAR_TEX, TEXS, TEXC */
502 bool lod_mode; /* VAR_TEX, TEXS, implicitly for TEXC */
503 enum bi_source_format source_format; /* LD_VAR_BUF */
504
505 /* Used for valhall texturing */
506 bool shadow;
507 bool wide_indices;
508 bool texel_offset;
509 bool array_enable;
510 bool integer_coordinates;
511 enum bi_fetch_component fetch_component;
512 enum bi_va_lod_mode va_lod_mode;
513 enum bi_dimension dimension;
514 enum bi_write_mask write_mask;
515 };
516
517 /* Maximum size, for hashing */
518 unsigned flags[14];
519
520 struct {
521 enum bi_subgroup subgroup; /* WMASK, CLPER */
522 enum bi_inactive_result inactive_result; /* CLPER */
523 enum bi_lane_op lane_op; /* CLPER */
524 };
525
526 struct {
527 bool z; /* ZS_EMIT */
528 bool stencil; /* ZS_EMIT */
529 };
530
531 struct {
532 bool h; /* VN_ASST1.f16 */
533 bool l; /* VN_ASST1.f16 */
534 };
535
536 struct {
537 bool bytes2; /* RROT_DOUBLE, FRSHIFT_DOUBLE */
538 bool result_word;
539 bool arithmetic; /* ARSHIFT_OR */
540 };
541
542 struct {
543 bool sqrt; /* FREXPM */
544 bool log; /* FREXPM */
545 };
546
547 struct {
548 enum bi_mode mode; /* FLOG_TABLE */
549 enum bi_precision precision; /* FLOG_TABLE */
550 bool divzero; /* FRSQ_APPROX, FRSQ */
551 };
552 };
553 } bi_instr;
554
555 static inline bool
bi_is_staging_src(const bi_instr * I,unsigned s)556 bi_is_staging_src(const bi_instr *I, unsigned s)
557 {
558 return (s == 0 || s == 4) && bi_opcode_props[I->op].sr_read;
559 }
560
561 /*
562 * Safe helpers to remove destinations/sources at the end of the
563 * destination/source array when changing opcodes. Unlike adding
564 * sources/destinations, this does not require reallocation.
565 */
566 static inline void
bi_drop_dests(bi_instr * I,unsigned new_count)567 bi_drop_dests(bi_instr *I, unsigned new_count)
568 {
569 assert(new_count < I->nr_dests);
570
571 for (unsigned i = new_count; i < I->nr_dests; ++i)
572 I->dest[i] = bi_null();
573
574 I->nr_dests = new_count;
575 }
576
577 static inline void
bi_drop_srcs(bi_instr * I,unsigned new_count)578 bi_drop_srcs(bi_instr *I, unsigned new_count)
579 {
580 assert(new_count < I->nr_srcs);
581
582 for (unsigned i = new_count; i < I->nr_srcs; ++i)
583 I->src[i] = bi_null();
584
585 I->nr_srcs = new_count;
586 }
587
588 static inline void
bi_replace_src(bi_instr * I,unsigned src_index,bi_index replacement)589 bi_replace_src(bi_instr *I, unsigned src_index, bi_index replacement)
590 {
591 I->src[src_index] = bi_replace_index(I->src[src_index], replacement);
592 }
593
594 /* Represents the assignment of slots for a given bi_tuple */
595
596 typedef struct {
597 /* Register to assign to each slot */
598 unsigned slot[4];
599
600 /* Read slots can be disabled */
601 bool enabled[2];
602
603 /* Configuration for slots 2/3 */
604 struct bifrost_reg_ctrl_23 slot23;
605
606 /* Fast-Access-Uniform RAM index */
607 uint8_t fau_idx;
608
609 /* Whether writes are actually for the last instruction */
610 bool first_instruction;
611 } bi_registers;
612
613 /* A bi_tuple contains two paired instruction pointers. If a slot is unfilled,
614 * leave it NULL; the emitter will fill in a nop. Instructions reference
615 * registers via slots which are assigned per tuple.
616 */
617
618 typedef struct {
619 uint8_t fau_idx;
620 bi_registers regs;
621 bi_instr *fma;
622 bi_instr *add;
623 } bi_tuple;
624
625 struct bi_block;
626
627 typedef struct {
628 struct list_head link;
629
630 /* Link back up for branch calculations */
631 struct bi_block *block;
632
633 /* Architectural limit of 8 tuples/clause */
634 unsigned tuple_count;
635 bi_tuple tuples[8];
636
637 /* For scoreboarding -- the clause ID (this is not globally unique!)
638 * and its dependencies in terms of other clauses, computed during
639 * scheduling and used when emitting code. Dependencies expressed as a
640 * bitfield matching the hardware, except shifted by a clause (the
641 * shift back to the ISA's off-by-one encoding is worked out when
642 * emitting clauses) */
643 unsigned scoreboard_id;
644 uint8_t dependencies;
645
646 /* See ISA header for description */
647 enum bifrost_flow flow_control;
648
649 /* Can we prefetch the next clause? Usually it makes sense, except for
650 * clauses ending in unconditional branches */
651 bool next_clause_prefetch;
652
653 /* Assigned data register */
654 unsigned staging_register;
655
656 /* Corresponds to the usual bit but shifted by a clause */
657 bool staging_barrier;
658
659 /* Constants read by this clause. ISA limit. Must satisfy:
660 *
661 * constant_count + tuple_count <= 13
662 *
663 * Also implicitly constant_count <= tuple_count since a tuple only
664 * reads a single constant.
665 */
666 uint64_t constants[8];
667 unsigned constant_count;
668
669 /* Index of a constant to be PC-relative */
670 unsigned pcrel_idx;
671
672 /* Branches encode a constant offset relative to the program counter
673 * with some magic flags. By convention, if there is a branch, its
674 * constant will be last. Set this flag to indicate this is required.
675 */
676 bool branch_constant;
677
678 /* Unique in a clause */
679 enum bifrost_message_type message_type;
680 bi_instr *message;
681
682 /* Discard helper threads */
683 bool td;
684
685 /* Should flush-to-zero mode be enabled for this clause? */
686 bool ftz;
687 } bi_clause;
688
689 #define BI_NUM_SLOTS 8
690
691 /* A model for the state of the scoreboard */
692 struct bi_scoreboard_state {
693 /** Bitmap of registers read/written by a slot */
694 uint64_t read[BI_NUM_SLOTS];
695 uint64_t write[BI_NUM_SLOTS];
696
697 /* Nonregister dependencies present by a slot */
698 uint8_t varying : BI_NUM_SLOTS;
699 uint8_t memory : BI_NUM_SLOTS;
700 };
701
702 typedef struct bi_block {
703 /* Link to next block. Must be first for mir_get_block */
704 struct list_head link;
705
706 /* List of instructions emitted for the current block */
707 struct list_head instructions;
708
709 /* Index of the block in source order */
710 unsigned index;
711
712 /* Control flow graph */
713 struct bi_block *successors[2];
714 struct util_dynarray predecessors;
715 bool unconditional_jumps;
716 bool loop_header;
717
718 /* Per 32-bit word live masks for the block indexed by node */
719 uint8_t *live_in;
720 uint8_t *live_out;
721
722 /* Scalar liveness indexed by SSA index */
723 BITSET_WORD *ssa_live_in;
724 BITSET_WORD *ssa_live_out;
725
726 /* If true, uses clauses; if false, uses instructions */
727 bool scheduled;
728 struct list_head clauses; /* list of bi_clause */
729
730 /* Post-RA liveness */
731 uint64_t reg_live_in, reg_live_out;
732
733 /* Scoreboard state at the start/end of block */
734 struct bi_scoreboard_state scoreboard_in, scoreboard_out;
735
736 /* On Valhall, indicates we need a terminal NOP to implement jumps to
737 * the end of the shader.
738 */
739 bool needs_nop;
740
741 /* Flags available for pass-internal use */
742 uint8_t pass_flags;
743 } bi_block;
744
745 static inline unsigned
bi_num_successors(bi_block * block)746 bi_num_successors(bi_block *block)
747 {
748 STATIC_ASSERT(ARRAY_SIZE(block->successors) == 2);
749 assert(block->successors[0] || !block->successors[1]);
750
751 if (block->successors[1])
752 return 2;
753 else if (block->successors[0])
754 return 1;
755 else
756 return 0;
757 }
758
759 static inline unsigned
bi_num_predecessors(bi_block * block)760 bi_num_predecessors(bi_block *block)
761 {
762 return util_dynarray_num_elements(&block->predecessors, bi_block *);
763 }
764
765 static inline bi_block *
bi_start_block(struct list_head * blocks)766 bi_start_block(struct list_head *blocks)
767 {
768 bi_block *first = list_first_entry(blocks, bi_block, link);
769 assert(bi_num_predecessors(first) == 0);
770 return first;
771 }
772
773 static inline bi_block *
bi_exit_block(struct list_head * blocks)774 bi_exit_block(struct list_head *blocks)
775 {
776 bi_block *last = list_last_entry(blocks, bi_block, link);
777 assert(bi_num_successors(last) == 0);
778 return last;
779 }
780
781 static inline void
bi_block_add_successor(bi_block * block,bi_block * successor)782 bi_block_add_successor(bi_block *block, bi_block *successor)
783 {
784 assert(block != NULL && successor != NULL);
785
786 /* Cull impossible edges */
787 if (block->unconditional_jumps)
788 return;
789
790 for (unsigned i = 0; i < ARRAY_SIZE(block->successors); ++i) {
791 if (block->successors[i]) {
792 if (block->successors[i] == successor)
793 return;
794 else
795 continue;
796 }
797
798 block->successors[i] = successor;
799 util_dynarray_append(&successor->predecessors, bi_block *, block);
800 return;
801 }
802
803 unreachable("Too many successors");
804 }
805
806 /* Subset of pan_shader_info needed per-variant, in order to support IDVS */
807 struct bi_shader_info {
808 struct panfrost_ubo_push *push;
809 struct bifrost_shader_info *bifrost;
810 unsigned tls_size;
811 unsigned work_reg_count;
812 unsigned push_offset;
813 };
814
815 /* State of index-driven vertex shading for current shader */
816 enum bi_idvs_mode {
817 /* IDVS not in use */
818 BI_IDVS_NONE = 0,
819
820 /* IDVS in use. Compiling a position shader */
821 BI_IDVS_POSITION = 1,
822
823 /* IDVS in use. Compiling a varying shader */
824 BI_IDVS_VARYING = 2,
825 };
826
827 typedef struct {
828 const struct panfrost_compile_inputs *inputs;
829 nir_shader *nir;
830 struct bi_shader_info info;
831 gl_shader_stage stage;
832 struct list_head blocks; /* list of bi_block */
833 uint32_t quirks;
834 unsigned arch;
835 enum bi_idvs_mode idvs;
836 unsigned num_blocks;
837
838 /* In any graphics shader, whether the "IDVS with memory
839 * allocation" flow is used. This affects how varyings are loaded and
840 * stored. Ignore for compute.
841 */
842 bool malloc_idvs;
843
844 /* During NIR->BIR */
845 bi_block *current_block;
846 bi_block *after_block;
847 bi_block *break_block;
848 bi_block *continue_block;
849 bi_block **indexed_nir_blocks;
850 bool emitted_atest;
851
852 /* During NIR->BIR, the coverage bitmap. If this is NULL, the default
853 * coverage bitmap should be source from preloaded register r60. This is
854 * written by ATEST and ZS_EMIT
855 */
856 bi_index coverage;
857
858 /* During NIR->BIR, table of preloaded registers, or NULL if never
859 * preloaded.
860 */
861 bi_index preloaded[64];
862
863 /* For creating temporaries */
864 unsigned ssa_alloc;
865 unsigned reg_alloc;
866
867 /* Mask of UBOs that need to be uploaded */
868 uint32_t ubo_mask;
869
870 /* During instruction selection, map from vector bi_index to its scalar
871 * components, populated by a split.
872 */
873 struct hash_table_u64 *allocated_vec;
874
875 /* Stats for shader-db */
876 unsigned loop_count;
877 unsigned spills;
878 unsigned fills;
879 } bi_context;
880
881 static inline void
bi_remove_instruction(bi_instr * ins)882 bi_remove_instruction(bi_instr *ins)
883 {
884 list_del(&ins->link);
885 }
886
887 enum bir_fau {
888 BIR_FAU_ZERO = 0,
889 BIR_FAU_LANE_ID = 1,
890 BIR_FAU_WARP_ID = 2,
891 BIR_FAU_CORE_ID = 3,
892 BIR_FAU_FB_EXTENT = 4,
893 BIR_FAU_ATEST_PARAM = 5,
894 BIR_FAU_SAMPLE_POS_ARRAY = 6,
895 BIR_FAU_BLEND_0 = 8,
896 /* blend descs 1 - 7 */
897 BIR_FAU_TYPE_MASK = 15,
898
899 /* Valhall only */
900 BIR_FAU_TLS_PTR = 16,
901 BIR_FAU_WLS_PTR = 17,
902 BIR_FAU_PROGRAM_COUNTER = 18,
903
904 BIR_FAU_UNIFORM = (1 << 7),
905 /* Look up table on Valhall */
906 BIR_FAU_IMMEDIATE = (1 << 8),
907
908 };
909
910 static inline bi_index
bi_fau(enum bir_fau value,bool hi)911 bi_fau(enum bir_fau value, bool hi)
912 {
913 return (bi_index){
914 .value = value,
915 .swizzle = BI_SWIZZLE_H01,
916 .offset = hi ? 1u : 0u,
917 .type = BI_INDEX_FAU,
918 };
919 }
920
921 /*
922 * Builder for Valhall LUT entries. Generally, constants are modeled with
923 * BI_INDEX_IMMEDIATE in the intermediate representation. This helper is only
924 * necessary for passes running after lowering constants, as well as when
925 * lowering constants.
926 *
927 */
928 static inline bi_index
va_lut(unsigned index)929 va_lut(unsigned index)
930 {
931 return bi_fau((enum bir_fau)(BIR_FAU_IMMEDIATE | (index >> 1)), index & 1);
932 }
933
934 /*
935 * va_lut_zero is like bi_zero but only works on Valhall. It is intended for
936 * use by late passes that run after constants are lowered, specifically
937 * register allocation. bi_zero() is preferred where possible.
938 */
939 static inline bi_index
va_zero_lut()940 va_zero_lut()
941 {
942 return va_lut(0);
943 }
944
945 static inline bi_index
bi_temp(bi_context * ctx)946 bi_temp(bi_context *ctx)
947 {
948 return bi_get_index(ctx->ssa_alloc++);
949 }
950
951 static inline bi_index
bi_def_index(nir_def * def)952 bi_def_index(nir_def *def)
953 {
954 return bi_get_index(def->index);
955 }
956
957 /* Inline constants automatically, will be lowered out by bi_lower_fau where a
958 * constant is not allowed. load_const_to_scalar gaurantees that this makes
959 * sense */
960
961 static inline bi_index
bi_src_index(nir_src * src)962 bi_src_index(nir_src *src)
963 {
964 if (nir_src_is_const(*src) && nir_src_bit_size(*src) <= 32) {
965 return bi_imm_u32(nir_src_as_uint(*src));
966 } else {
967 return bi_def_index(src->ssa);
968 }
969 }
970
971 /* Iterators for Bifrost IR */
972
973 #define bi_foreach_block(ctx, v) \
974 list_for_each_entry(bi_block, v, &ctx->blocks, link)
975
976 #define bi_foreach_block_rev(ctx, v) \
977 list_for_each_entry_rev(bi_block, v, &ctx->blocks, link)
978
979 #define bi_foreach_block_from(ctx, from, v) \
980 list_for_each_entry_from(bi_block, v, from, &ctx->blocks, link)
981
982 #define bi_foreach_block_from_rev(ctx, from, v) \
983 list_for_each_entry_from_rev(bi_block, v, from, &ctx->blocks, link)
984
985 #define bi_foreach_instr_in_block(block, v) \
986 list_for_each_entry(bi_instr, v, &(block)->instructions, link)
987
988 #define bi_foreach_instr_in_block_rev(block, v) \
989 list_for_each_entry_rev(bi_instr, v, &(block)->instructions, link)
990
991 #define bi_foreach_instr_in_block_safe(block, v) \
992 list_for_each_entry_safe(bi_instr, v, &(block)->instructions, link)
993
994 #define bi_foreach_instr_in_block_safe_rev(block, v) \
995 list_for_each_entry_safe_rev(bi_instr, v, &(block)->instructions, link)
996
997 #define bi_foreach_instr_in_block_from(block, v, from) \
998 list_for_each_entry_from(bi_instr, v, from, &(block)->instructions, link)
999
1000 #define bi_foreach_instr_in_block_from_rev(block, v, from) \
1001 list_for_each_entry_from_rev(bi_instr, v, from, &(block)->instructions, link)
1002
1003 #define bi_foreach_clause_in_block(block, v) \
1004 list_for_each_entry(bi_clause, v, &(block)->clauses, link)
1005
1006 #define bi_foreach_clause_in_block_rev(block, v) \
1007 list_for_each_entry_rev(bi_clause, v, &(block)->clauses, link)
1008
1009 #define bi_foreach_clause_in_block_safe(block, v) \
1010 list_for_each_entry_safe(bi_clause, v, &(block)->clauses, link)
1011
1012 #define bi_foreach_clause_in_block_from(block, v, from) \
1013 list_for_each_entry_from(bi_clause, v, from, &(block)->clauses, link)
1014
1015 #define bi_foreach_clause_in_block_from_rev(block, v, from) \
1016 list_for_each_entry_from_rev(bi_clause, v, from, &(block)->clauses, link)
1017
1018 #define bi_foreach_instr_global(ctx, v) \
1019 bi_foreach_block(ctx, v_block) \
1020 bi_foreach_instr_in_block(v_block, v)
1021
1022 #define bi_foreach_instr_global_rev(ctx, v) \
1023 bi_foreach_block_rev(ctx, v_block) \
1024 bi_foreach_instr_in_block_rev(v_block, v)
1025
1026 #define bi_foreach_instr_global_safe(ctx, v) \
1027 bi_foreach_block(ctx, v_block) \
1028 bi_foreach_instr_in_block_safe(v_block, v)
1029
1030 #define bi_foreach_instr_global_rev_safe(ctx, v) \
1031 bi_foreach_block_rev(ctx, v_block) \
1032 bi_foreach_instr_in_block_rev_safe(v_block, v)
1033
1034 #define bi_foreach_instr_in_tuple(tuple, v) \
1035 for (bi_instr *v = (tuple)->fma ?: (tuple)->add; v != NULL; \
1036 v = (v == (tuple)->add) ? NULL : (tuple)->add)
1037
1038 #define bi_foreach_successor(blk, v) \
1039 bi_block *v; \
1040 bi_block **_v; \
1041 for (_v = &blk->successors[0], v = *_v; \
1042 v != NULL && _v < &blk->successors[2]; _v++, v = *_v)
1043
1044 #define bi_foreach_predecessor(blk, v) \
1045 util_dynarray_foreach(&(blk)->predecessors, bi_block *, v)
1046
1047 #define bi_foreach_src(ins, v) for (unsigned v = 0; v < ins->nr_srcs; ++v)
1048
1049 #define bi_foreach_dest(ins, v) for (unsigned v = 0; v < ins->nr_dests; ++v)
1050
1051 #define bi_foreach_ssa_src(ins, v) \
1052 bi_foreach_src(ins, v) \
1053 if (ins->src[v].type == BI_INDEX_NORMAL)
1054
1055 #define bi_foreach_ssa_dest(ins, v) \
1056 bi_foreach_dest(ins, v) \
1057 if (ins->dest[v].type == BI_INDEX_NORMAL)
1058
1059 #define bi_foreach_instr_and_src_in_tuple(tuple, ins, s) \
1060 bi_foreach_instr_in_tuple(tuple, ins) \
1061 bi_foreach_src(ins, s)
1062
1063 /*
1064 * Find the index of a predecessor, used as the implicit order of phi sources.
1065 */
1066 static inline unsigned
bi_predecessor_index(bi_block * succ,bi_block * pred)1067 bi_predecessor_index(bi_block *succ, bi_block *pred)
1068 {
1069 unsigned index = 0;
1070
1071 bi_foreach_predecessor(succ, x) {
1072 if (*x == pred)
1073 return index;
1074
1075 index++;
1076 }
1077
1078 unreachable("Invalid predecessor");
1079 }
1080
1081 static inline bi_instr *
bi_prev_op(bi_instr * ins)1082 bi_prev_op(bi_instr *ins)
1083 {
1084 return list_last_entry(&(ins->link), bi_instr, link);
1085 }
1086
1087 static inline bi_instr *
bi_next_op(bi_instr * ins)1088 bi_next_op(bi_instr *ins)
1089 {
1090 return list_first_entry(&(ins->link), bi_instr, link);
1091 }
1092
1093 static inline bi_block *
bi_next_block(bi_block * block)1094 bi_next_block(bi_block *block)
1095 {
1096 return list_first_entry(&(block->link), bi_block, link);
1097 }
1098
1099 static inline bi_block *
bi_entry_block(bi_context * ctx)1100 bi_entry_block(bi_context *ctx)
1101 {
1102 return list_first_entry(&ctx->blocks, bi_block, link);
1103 }
1104
1105 /* BIR manipulation */
1106
1107 bool bi_has_arg(const bi_instr *ins, bi_index arg);
1108 unsigned bi_count_read_registers(const bi_instr *ins, unsigned src);
1109 unsigned bi_count_write_registers(const bi_instr *ins, unsigned dest);
1110 bool bi_is_regfmt_16(enum bi_register_format fmt);
1111 unsigned bi_writemask(const bi_instr *ins, unsigned dest);
1112 bi_clause *bi_next_clause(bi_context *ctx, bi_block *block, bi_clause *clause);
1113 bool bi_side_effects(const bi_instr *I);
1114 bool bi_reconverge_branches(bi_block *block);
1115
1116 bool bi_can_replace_with_csel(bi_instr *I);
1117
1118 void bi_print_instr(const bi_instr *I, FILE *fp);
1119 void bi_print_slots(bi_registers *regs, FILE *fp);
1120 void bi_print_tuple(bi_tuple *tuple, FILE *fp);
1121 void bi_print_clause(bi_clause *clause, FILE *fp);
1122 void bi_print_block(bi_block *block, FILE *fp);
1123 void bi_print_shader(bi_context *ctx, FILE *fp);
1124
1125 /* BIR passes */
1126
1127 bool bi_instr_uses_helpers(bi_instr *I);
1128 bool bi_block_terminates_helpers(bi_block *block);
1129 void bi_analyze_helper_terminate(bi_context *ctx);
1130 void bi_mark_clauses_td(bi_context *ctx);
1131
1132 void bi_analyze_helper_requirements(bi_context *ctx);
1133 void bi_opt_copy_prop(bi_context *ctx);
1134 void bi_opt_dce(bi_context *ctx, bool partial);
1135 void bi_opt_cse(bi_context *ctx);
1136 void bi_opt_mod_prop_forward(bi_context *ctx);
1137 void bi_opt_mod_prop_backward(bi_context *ctx);
1138 void bi_opt_fuse_dual_texture(bi_context *ctx);
1139 void bi_opt_dce_post_ra(bi_context *ctx);
1140 void bi_opt_message_preload(bi_context *ctx);
1141 void bi_opt_push_ubo(bi_context *ctx);
1142 void bi_opt_reorder_push(bi_context *ctx);
1143 void bi_lower_swizzle(bi_context *ctx);
1144 void bi_lower_fau(bi_context *ctx);
1145 void bi_assign_scoreboard(bi_context *ctx);
1146 void bi_register_allocate(bi_context *ctx);
1147 void va_optimize(bi_context *ctx);
1148 void va_lower_split_64bit(bi_context *ctx);
1149
1150 void bi_lower_opt_instructions(bi_context *ctx);
1151
1152 void bi_pressure_schedule(bi_context *ctx);
1153 void bi_schedule(bi_context *ctx);
1154 bool bi_can_fma(bi_instr *ins);
1155 bool bi_can_add(bi_instr *ins);
1156 bool bi_must_message(bi_instr *ins);
1157 bool bi_reads_zero(bi_instr *ins);
1158 bool bi_reads_temps(bi_instr *ins, unsigned src);
1159 bool bi_reads_t(bi_instr *ins, unsigned src);
1160
1161 #ifndef NDEBUG
1162 bool bi_validate_initialization(bi_context *ctx);
1163 void bi_validate(bi_context *ctx, const char *after_str);
1164 #else
1165 static inline bool
bi_validate_initialization(UNUSED bi_context * ctx)1166 bi_validate_initialization(UNUSED bi_context *ctx)
1167 {
1168 return true;
1169 }
1170 static inline void
bi_validate(UNUSED bi_context * ctx,UNUSED const char * after_str)1171 bi_validate(UNUSED bi_context *ctx, UNUSED const char *after_str)
1172 {
1173 return;
1174 }
1175 #endif
1176
1177 uint32_t bi_fold_constant(bi_instr *I, bool *unsupported);
1178 bool bi_opt_constant_fold(bi_context *ctx);
1179
1180 /* Liveness */
1181
1182 void bi_compute_liveness_ssa(bi_context *ctx);
1183 void bi_liveness_ins_update_ssa(BITSET_WORD *live, const bi_instr *ins);
1184
1185 void bi_postra_liveness(bi_context *ctx);
1186 uint64_t MUST_CHECK bi_postra_liveness_ins(uint64_t live, bi_instr *ins);
1187
1188 /* Layout */
1189
1190 signed bi_block_offset(bi_context *ctx, bi_clause *start, bi_block *target);
1191 bool bi_ec0_packed(unsigned tuple_count);
1192
1193 /* Check if there are no more instructions starting with a given block, this
1194 * needs to recurse in case a shader ends with multiple empty blocks */
1195
1196 static inline bool
bi_is_terminal_block(bi_block * block)1197 bi_is_terminal_block(bi_block *block)
1198 {
1199 return (block == NULL) || (list_is_empty(&block->instructions) &&
1200 bi_is_terminal_block(block->successors[0]) &&
1201 bi_is_terminal_block(block->successors[1]));
1202 }
1203
1204 /* Code emit */
1205
1206 /* Returns the size of the final clause */
1207 unsigned bi_pack(bi_context *ctx, struct util_dynarray *emission);
1208 void bi_pack_valhall(bi_context *ctx, struct util_dynarray *emission);
1209
1210 struct bi_packed_tuple {
1211 uint64_t lo;
1212 uint64_t hi;
1213 };
1214
1215 uint8_t bi_pack_literal(enum bi_clause_subword literal);
1216
1217 uint8_t bi_pack_upper(enum bi_clause_subword upper,
1218 struct bi_packed_tuple *tuples,
1219 ASSERTED unsigned tuple_count);
1220 uint64_t bi_pack_tuple_bits(enum bi_clause_subword idx,
1221 struct bi_packed_tuple *tuples,
1222 ASSERTED unsigned tuple_count, unsigned offset,
1223 unsigned nbits);
1224
1225 uint8_t bi_pack_sync(enum bi_clause_subword t1, enum bi_clause_subword t2,
1226 enum bi_clause_subword t3, struct bi_packed_tuple *tuples,
1227 ASSERTED unsigned tuple_count, bool z);
1228
1229 void bi_pack_format(struct util_dynarray *emission, unsigned index,
1230 struct bi_packed_tuple *tuples,
1231 ASSERTED unsigned tuple_count, uint64_t header,
1232 uint64_t ec0, unsigned m0, bool z);
1233
1234 unsigned bi_pack_fma(bi_instr *I, enum bifrost_packed_src src0,
1235 enum bifrost_packed_src src1, enum bifrost_packed_src src2,
1236 enum bifrost_packed_src src3);
1237 unsigned bi_pack_add(bi_instr *I, enum bifrost_packed_src src0,
1238 enum bifrost_packed_src src1, enum bifrost_packed_src src2,
1239 enum bifrost_packed_src src3);
1240
1241 /* Like in NIR, for use with the builder */
1242
1243 enum bi_cursor_option {
1244 bi_cursor_after_block,
1245 bi_cursor_before_instr,
1246 bi_cursor_after_instr
1247 };
1248
1249 typedef struct {
1250 enum bi_cursor_option option;
1251
1252 union {
1253 bi_block *block;
1254 bi_instr *instr;
1255 };
1256 } bi_cursor;
1257
1258 static inline bi_cursor
bi_after_block(bi_block * block)1259 bi_after_block(bi_block *block)
1260 {
1261 return (bi_cursor){.option = bi_cursor_after_block, .block = block};
1262 }
1263
1264 static inline bi_cursor
bi_before_instr(bi_instr * instr)1265 bi_before_instr(bi_instr *instr)
1266 {
1267 return (bi_cursor){.option = bi_cursor_before_instr, .instr = instr};
1268 }
1269
1270 static inline bi_cursor
bi_after_instr(bi_instr * instr)1271 bi_after_instr(bi_instr *instr)
1272 {
1273 return (bi_cursor){.option = bi_cursor_after_instr, .instr = instr};
1274 }
1275
1276 static inline bi_cursor
bi_after_block_logical(bi_block * block)1277 bi_after_block_logical(bi_block *block)
1278 {
1279 if (list_is_empty(&block->instructions))
1280 return bi_after_block(block);
1281
1282 bi_instr *last = list_last_entry(&block->instructions, bi_instr, link);
1283 assert(last != NULL);
1284
1285 if (last->branch_target)
1286 return bi_before_instr(last);
1287 else
1288 return bi_after_block(block);
1289 }
1290
1291 static inline bi_cursor
bi_before_nonempty_block(bi_block * block)1292 bi_before_nonempty_block(bi_block *block)
1293 {
1294 bi_instr *I = list_first_entry(&block->instructions, bi_instr, link);
1295 assert(I != NULL);
1296
1297 return bi_before_instr(I);
1298 }
1299
1300 static inline bi_cursor
bi_before_block(bi_block * block)1301 bi_before_block(bi_block *block)
1302 {
1303 if (list_is_empty(&block->instructions))
1304 return bi_after_block(block);
1305 else
1306 return bi_before_nonempty_block(block);
1307 }
1308
1309 /* Invariant: a tuple must be nonempty UNLESS it is the last tuple of a clause,
1310 * in which case there must exist a nonempty penultimate tuple */
1311
1312 ATTRIBUTE_RETURNS_NONNULL static inline bi_instr *
bi_first_instr_in_tuple(bi_tuple * tuple)1313 bi_first_instr_in_tuple(bi_tuple *tuple)
1314 {
1315 bi_instr *instr = tuple->fma ?: tuple->add;
1316 assert(instr != NULL);
1317 return instr;
1318 }
1319
1320 ATTRIBUTE_RETURNS_NONNULL static inline bi_instr *
bi_first_instr_in_clause(bi_clause * clause)1321 bi_first_instr_in_clause(bi_clause *clause)
1322 {
1323 return bi_first_instr_in_tuple(&clause->tuples[0]);
1324 }
1325
1326 ATTRIBUTE_RETURNS_NONNULL static inline bi_instr *
bi_last_instr_in_clause(bi_clause * clause)1327 bi_last_instr_in_clause(bi_clause *clause)
1328 {
1329 bi_tuple tuple = clause->tuples[clause->tuple_count - 1];
1330 bi_instr *instr = tuple.add ?: tuple.fma;
1331
1332 if (!instr) {
1333 assert(clause->tuple_count >= 2);
1334 tuple = clause->tuples[clause->tuple_count - 2];
1335 instr = tuple.add ?: tuple.fma;
1336 }
1337
1338 assert(instr != NULL);
1339 return instr;
1340 }
1341
1342 /* Implemented by expanding bi_foreach_instr_in_block_from(_rev) with the start
1343 * (end) of the clause and adding a condition for the clause boundary */
1344
1345 #define bi_foreach_instr_in_clause(block, clause, pos) \
1346 for (bi_instr *pos = \
1347 list_entry(bi_first_instr_in_clause(clause), bi_instr, link); \
1348 (&pos->link != &(block)->instructions) && \
1349 (pos != bi_next_op(bi_last_instr_in_clause(clause))); \
1350 pos = list_entry(pos->link.next, bi_instr, link))
1351
1352 #define bi_foreach_instr_in_clause_rev(block, clause, pos) \
1353 for (bi_instr *pos = \
1354 list_entry(bi_last_instr_in_clause(clause), bi_instr, link); \
1355 (&pos->link != &(block)->instructions) && \
1356 pos != bi_prev_op(bi_first_instr_in_clause(clause)); \
1357 pos = list_entry(pos->link.prev, bi_instr, link))
1358
1359 static inline bi_cursor
bi_before_clause(bi_clause * clause)1360 bi_before_clause(bi_clause *clause)
1361 {
1362 return bi_before_instr(bi_first_instr_in_clause(clause));
1363 }
1364
1365 static inline bi_cursor
bi_before_tuple(bi_tuple * tuple)1366 bi_before_tuple(bi_tuple *tuple)
1367 {
1368 return bi_before_instr(bi_first_instr_in_tuple(tuple));
1369 }
1370
1371 static inline bi_cursor
bi_after_clause(bi_clause * clause)1372 bi_after_clause(bi_clause *clause)
1373 {
1374 return bi_after_instr(bi_last_instr_in_clause(clause));
1375 }
1376
1377 /* IR builder in terms of cursor infrastructure */
1378
1379 typedef struct {
1380 bi_context *shader;
1381 bi_cursor cursor;
1382 } bi_builder;
1383
1384 static inline bi_builder
bi_init_builder(bi_context * ctx,bi_cursor cursor)1385 bi_init_builder(bi_context *ctx, bi_cursor cursor)
1386 {
1387 return (bi_builder){.shader = ctx, .cursor = cursor};
1388 }
1389
1390 /* Insert an instruction at the cursor and move the cursor */
1391
1392 static inline void
bi_builder_insert(bi_cursor * cursor,bi_instr * I)1393 bi_builder_insert(bi_cursor *cursor, bi_instr *I)
1394 {
1395 switch (cursor->option) {
1396 case bi_cursor_after_instr:
1397 list_add(&I->link, &cursor->instr->link);
1398 cursor->instr = I;
1399 return;
1400
1401 case bi_cursor_after_block:
1402 list_addtail(&I->link, &cursor->block->instructions);
1403 cursor->option = bi_cursor_after_instr;
1404 cursor->instr = I;
1405 return;
1406
1407 case bi_cursor_before_instr:
1408 list_addtail(&I->link, &cursor->instr->link);
1409 cursor->option = bi_cursor_after_instr;
1410 cursor->instr = I;
1411 return;
1412 }
1413
1414 unreachable("Invalid cursor option");
1415 }
1416
1417 bi_instr *bi_csel_from_mux(bi_builder *b, const bi_instr *I, bool must_sign);
1418
1419 /* Read back power-efficent garbage, TODO maybe merge with null? */
1420 static inline bi_index
bi_dontcare(bi_builder * b)1421 bi_dontcare(bi_builder *b)
1422 {
1423 if (b->shader->arch >= 9)
1424 return bi_zero();
1425 else
1426 return bi_passthrough(BIFROST_SRC_FAU_HI);
1427 }
1428
1429 #define bi_worklist_init(ctx, w) u_worklist_init(w, ctx->num_blocks, ctx)
1430 #define bi_worklist_push_head(w, block) u_worklist_push_head(w, block, index)
1431 #define bi_worklist_push_tail(w, block) u_worklist_push_tail(w, block, index)
1432 #define bi_worklist_peek_head(w) u_worklist_peek_head(w, bi_block, index)
1433 #define bi_worklist_pop_head(w) u_worklist_pop_head(w, bi_block, index)
1434 #define bi_worklist_peek_tail(w) u_worklist_peek_tail(w, bi_block, index)
1435 #define bi_worklist_pop_tail(w) u_worklist_pop_tail(w, bi_block, index)
1436
1437 /* NIR passes */
1438
1439 bool bi_lower_divergent_indirects(nir_shader *shader, unsigned lanes);
1440
1441 #ifdef __cplusplus
1442 } /* extern C */
1443 #endif
1444
1445 #endif
1446