1 /*
2 * Copyright © 2021 Valve Corporation
3 * SPDX-License-Identifier: MIT
4 */
5
6 #include "util/ralloc.h"
7 #include "ir3_ra.h"
8 #include "ir3_shader.h"
9
10 /* This file implements a validation pass for register allocation. We check
11 * that the assignment of SSA values to registers is "valid", in the sense
12 * that each original definition reaches all of its uses without being
13 * clobbered by something else.
14 *
15 * The validation is a forward dataflow analysis. The state at each point
16 * consists of, for each physical register, the SSA value occupying it, or a
17 * few special values:
18 *
19 * - "unknown" is set initially, before the dataflow analysis assigns it a
20 * value. This is the lattice bottom.
21 * - Values at the start get "undef", which acts like a special SSA value that
22 * indicates it is never written.
23 * - "overdefined" registers are set to more than one value, depending on
24 * which path you take to get to the spot. This is the lattice top.
25 *
26 * Overdefined is necessary to distinguish because in some programs, like this
27 * simple example, it's perfectly normal and allowed:
28 *
29 * if (...) {
30 * mov.u32u32 ssa_1(r1.x), ...
31 * ...
32 * } else {
33 * mov.u32u32 ssa_2(r1.x), ...
34 * ...
35 * }
36 * // r1.x is overdefined here!
37 *
38 * However, if an ssa value after the if is accidentally assigned to r1.x, we
39 * need to remember that it's invalid to catch the mistake. Overdef has to be
40 * distinguished from undef so that the state forms a valid lattice to
41 * guarantee that the analysis always terminates. We could avoid relying on
42 * overdef by using liveness analysis, but not relying on liveness has the
43 * benefit that we can catch bugs in liveness analysis too.
44 *
45 * One tricky thing we have to handle is the coalescing of splits/collects,
46 * which means that multiple SSA values can occupy a register at the same
47 * time. While we could use the same merge set indices that RA uses, again
48 * that would rely on the merge set calculation being correct which we don't
49 * want to. Instead we treat splits/collects as transfer instructions, similar
50 * to the parallelcopy instructions inserted by RA, and have them copy their
51 * sources to their destinations. This means that each physreg must carry the
52 * SSA def assigned to it plus an offset into that definition, and when
53 * validating sources we must look through splits/collects to find the
54 * "original" source for each subregister.
55 */
56
57 #define UNKNOWN ((struct ir3_register *)NULL)
58 #define UNDEF ((struct ir3_register *)(uintptr_t)1)
59 #define OVERDEF ((struct ir3_register *)(uintptr_t)2)
60
61 struct reg_state {
62 struct ir3_register *def;
63 unsigned offset;
64 };
65
66 struct file_state {
67 struct reg_state regs[RA_MAX_FILE_SIZE];
68 };
69
70 struct reaching_state {
71 struct file_state half, full, shared, predicate;
72 };
73
74 struct ra_val_ctx {
75 struct ir3_instruction *current_instr;
76
77 /* The current state of the dataflow analysis for the instruction we're
78 * processing.
79 */
80 struct reaching_state reaching;
81
82 /* The state at the end of each basic block. */
83 struct reaching_state *block_reaching;
84 unsigned block_count;
85
86 /* When validating shared RA, we have to take spill/reload instructions into
87 * account. This saves an array of reg_state for the source of each spill
88 * instruction, to be restored at the corresponding reload(s).
89 */
90 struct hash_table *spill_reaching;
91
92 unsigned full_size, half_size, predicate_size;
93
94 bool merged_regs;
95 bool shared_ra;
96
97 bool failed;
98 };
99
100 static void
validate_error(struct ra_val_ctx * ctx,const char * condstr)101 validate_error(struct ra_val_ctx *ctx, const char *condstr)
102 {
103 fprintf(stderr, "ra validation fail: %s\n", condstr);
104 fprintf(stderr, " -> for instruction: ");
105 ir3_print_instr(ctx->current_instr);
106 abort();
107 }
108
109 #define validate_assert(ctx, cond) \
110 do { \
111 if (!(cond)) { \
112 validate_error(ctx, #cond); \
113 } \
114 } while (0)
115
116 static unsigned
get_file_size(struct ra_val_ctx * ctx,struct ir3_register * reg)117 get_file_size(struct ra_val_ctx *ctx, struct ir3_register *reg)
118 {
119 if (reg->flags & IR3_REG_SHARED) {
120 if (reg->flags & IR3_REG_HALF)
121 return RA_SHARED_HALF_SIZE;
122 else
123 return RA_SHARED_SIZE;
124 } else if (reg->flags & IR3_REG_PREDICATE) {
125 return ctx->predicate_size;
126 } else if (ctx->merged_regs || !(reg->flags & IR3_REG_HALF)) {
127 return ctx->full_size;
128 } else {
129 return ctx->half_size;
130 }
131 }
132
133 static struct reg_state *
get_spill_state(struct ra_val_ctx * ctx,struct ir3_register * dst)134 get_spill_state(struct ra_val_ctx *ctx, struct ir3_register *dst)
135 {
136 struct hash_entry *entry = _mesa_hash_table_search(ctx->spill_reaching, dst);
137 if (entry)
138 return entry->data;
139 else
140 return NULL;
141 }
142
143 static struct reg_state *
get_or_create_spill_state(struct ra_val_ctx * ctx,struct ir3_register * dst)144 get_or_create_spill_state(struct ra_val_ctx *ctx, struct ir3_register *dst)
145 {
146 struct reg_state *state = get_spill_state(ctx, dst);
147 if (state)
148 return state;
149
150 state = rzalloc_array(ctx, struct reg_state, reg_size(dst));
151 _mesa_hash_table_insert(ctx->spill_reaching, dst, state);
152 return state;
153 }
154
155 static bool
validate_reg_is_src(const struct ir3_register * reg)156 validate_reg_is_src(const struct ir3_register *reg)
157 {
158 return ra_reg_is_src(reg) || ra_reg_is_predicate(reg);
159 }
160
161 static bool
validate_reg_is_dst(const struct ir3_register * reg)162 validate_reg_is_dst(const struct ir3_register *reg)
163 {
164 return ra_reg_is_dst(reg) || ra_reg_is_predicate(reg);
165 }
166
167 /* Validate simple things, like the registers being in-bounds. This way we
168 * don't have to worry about out-of-bounds accesses later.
169 */
170
171 static void
validate_simple(struct ra_val_ctx * ctx,struct ir3_instruction * instr)172 validate_simple(struct ra_val_ctx *ctx, struct ir3_instruction *instr)
173 {
174 ctx->current_instr = instr;
175 foreach_dst_if (dst, instr, validate_reg_is_dst) {
176 if (ctx->shared_ra && !(dst->flags & IR3_REG_SHARED))
177 continue;
178 validate_assert(ctx, ra_reg_get_num(dst) != INVALID_REG);
179 unsigned dst_max = ra_reg_get_physreg(dst) + reg_size(dst);
180 validate_assert(ctx, dst_max <= get_file_size(ctx, dst));
181 if (dst->tied)
182 validate_assert(ctx, ra_reg_get_num(dst) == ra_reg_get_num(dst->tied));
183 }
184
185 foreach_src_if (src, instr, validate_reg_is_src) {
186 if (ctx->shared_ra && !(src->flags & IR3_REG_SHARED))
187 continue;
188 validate_assert(ctx, ra_reg_get_num(src) != INVALID_REG);
189 unsigned src_max = ra_reg_get_physreg(src) + reg_size(src);
190 validate_assert(ctx, src_max <= get_file_size(ctx, src));
191 }
192 }
193
194 /* This is the lattice operator. */
195 static bool
merge_reg(struct reg_state * dst,const struct reg_state * src)196 merge_reg(struct reg_state *dst, const struct reg_state *src)
197 {
198 if (dst->def == UNKNOWN) {
199 *dst = *src;
200 return src->def != UNKNOWN;
201 } else if (dst->def == OVERDEF) {
202 return false;
203 } else {
204 if (src->def == UNKNOWN)
205 return false;
206 else if (src->def == OVERDEF) {
207 *dst = *src;
208 return true;
209 } else {
210 if (dst->def != src->def || dst->offset != src->offset) {
211 dst->def = OVERDEF;
212 dst->offset = 0;
213 return true;
214 } else {
215 return false;
216 }
217 }
218 }
219 }
220
221 static bool
merge_file(struct file_state * dst,const struct file_state * src,unsigned size)222 merge_file(struct file_state *dst, const struct file_state *src, unsigned size)
223 {
224 bool progress = false;
225 for (unsigned i = 0; i < size; i++)
226 progress |= merge_reg(&dst->regs[i], &src->regs[i]);
227 return progress;
228 }
229
230 static bool
merge_state(struct ra_val_ctx * ctx,struct reaching_state * dst,const struct reaching_state * src)231 merge_state(struct ra_val_ctx *ctx, struct reaching_state *dst,
232 const struct reaching_state *src)
233 {
234 bool progress = false;
235 progress |= merge_file(&dst->full, &src->full, ctx->full_size);
236 progress |= merge_file(&dst->half, &src->half, ctx->half_size);
237 progress |=
238 merge_file(&dst->predicate, &src->predicate, ctx->predicate_size);
239 return progress;
240 }
241
242 static bool
merge_state_physical(struct ra_val_ctx * ctx,struct reaching_state * dst,const struct reaching_state * src)243 merge_state_physical(struct ra_val_ctx *ctx, struct reaching_state *dst,
244 const struct reaching_state *src)
245 {
246 return merge_file(&dst->shared, &src->shared, RA_SHARED_SIZE);
247 }
248
249 static struct file_state *
ra_val_get_file(struct ra_val_ctx * ctx,struct ir3_register * reg)250 ra_val_get_file(struct ra_val_ctx *ctx, struct ir3_register *reg)
251 {
252 if (reg->flags & IR3_REG_SHARED)
253 return &ctx->reaching.shared;
254 else if (reg->flags & IR3_REG_PREDICATE)
255 return &ctx->reaching.predicate;
256 else if (ctx->merged_regs || !(reg->flags & IR3_REG_HALF))
257 return &ctx->reaching.full;
258 else
259 return &ctx->reaching.half;
260 }
261
262 /* Predicate RA implements spilling by cloning the instruction that produces a
263 * def. In that case, we might end up two different defs legitimately reaching a
264 * source. To support validation, the RA will store the original def in the
265 * instruction's data field.
266 */
267 static struct ir3_register *
get_original_def(struct ir3_register * def)268 get_original_def(struct ir3_register *def)
269 {
270 if (def == UNKNOWN || def == UNDEF || def == OVERDEF)
271 return def;
272 if (def->flags & IR3_REG_PREDICATE)
273 return def->instr->data;
274 return def;
275 }
276
277 static void
propagate_normal_instr(struct ra_val_ctx * ctx,struct ir3_instruction * instr)278 propagate_normal_instr(struct ra_val_ctx *ctx, struct ir3_instruction *instr)
279 {
280 foreach_dst_if (dst, instr, validate_reg_is_dst) {
281 /* Process destinations from scalar ALU instructions that were demoted to
282 * normal ALU instructions. For these we must treat the instruction as a
283 * spill of itself and set the propagate state to itself. See
284 * try_demote_instructions().
285 */
286 if (ctx->shared_ra && !(dst->flags & IR3_REG_SHARED)) {
287 if (instr->flags & IR3_INSTR_SHARED_SPILL) {
288 struct reg_state *state = get_or_create_spill_state(ctx, dst);
289 for (unsigned i = 0; i < reg_size(dst); i++) {
290 state[i] = (struct reg_state){
291 .def = dst,
292 .offset = i,
293 };
294 }
295 }
296 continue;
297 }
298
299 struct file_state *file = ra_val_get_file(ctx, dst);
300 physreg_t physreg = ra_reg_get_physreg(dst);
301
302 for (unsigned i = 0; i < reg_size(dst); i++) {
303 file->regs[physreg + i] = (struct reg_state){
304 .def = get_original_def(dst),
305 .offset = i,
306 };
307 }
308 }
309 }
310
311 static void
propagate_split(struct ra_val_ctx * ctx,struct ir3_instruction * split)312 propagate_split(struct ra_val_ctx *ctx, struct ir3_instruction *split)
313 {
314 struct ir3_register *dst = split->dsts[0];
315 struct ir3_register *src = split->srcs[0];
316 physreg_t dst_physreg = ra_reg_get_physreg(dst);
317 physreg_t src_physreg = ra_reg_get_physreg(src);
318 struct file_state *file = ra_val_get_file(ctx, dst);
319
320 if (ctx->shared_ra && !(dst->flags & IR3_REG_SHARED)) {
321 struct reg_state *src_state = get_spill_state(ctx, src->def);
322 if (src_state) {
323 struct reg_state *dst_state = get_or_create_spill_state(ctx, dst);
324 memcpy(dst_state, &src_state[split->split.off * reg_elem_size(src)],
325 reg_size(dst) * sizeof(struct reg_state));
326 }
327 return;
328 }
329
330 unsigned offset = split->split.off * reg_elem_size(src);
331 for (unsigned i = 0; i < reg_elem_size(src); i++) {
332 file->regs[dst_physreg + i] = file->regs[src_physreg + offset + i];
333 }
334 }
335
336 static void
propagate_collect(struct ra_val_ctx * ctx,struct ir3_instruction * collect)337 propagate_collect(struct ra_val_ctx *ctx, struct ir3_instruction *collect)
338 {
339 struct ir3_register *dst = collect->dsts[0];
340 unsigned size = reg_size(dst);
341
342 if (ctx->shared_ra && !(dst->flags & IR3_REG_SHARED)) {
343 struct reg_state *dst_state = NULL;
344
345 for (unsigned i = 0; i < collect->srcs_count; i++) {
346 struct ir3_register *src = collect->srcs[i];
347 unsigned dst_offset = i * reg_elem_size(dst);
348
349 if (ra_reg_is_src(src)) {
350 struct reg_state *src_state = get_spill_state(ctx, src->def);
351 if (src_state) {
352 if (!dst_state)
353 dst_state = get_or_create_spill_state(ctx, dst);
354 memcpy(&dst_state[dst_offset], src_state,
355 reg_size(src) * sizeof(struct reg_state));
356 }
357 }
358 }
359 } else {
360 struct file_state *file = ra_val_get_file(ctx, dst);
361 physreg_t dst_physreg = ra_reg_get_physreg(dst);
362 struct reg_state srcs[size];
363
364 for (unsigned i = 0; i < collect->srcs_count; i++) {
365 struct ir3_register *src = collect->srcs[i];
366 unsigned dst_offset = i * reg_elem_size(dst);
367
368 for (unsigned j = 0; j < reg_elem_size(dst); j++) {
369 if (!ra_reg_is_src(src)) {
370 srcs[dst_offset + j] = (struct reg_state){
371 .def = dst,
372 .offset = dst_offset + j,
373 };
374 } else {
375 physreg_t src_physreg = ra_reg_get_physreg(src);
376 srcs[dst_offset + j] = file->regs[src_physreg + j];
377 }
378 }
379 }
380
381 for (unsigned i = 0; i < size; i++)
382 file->regs[dst_physreg + i] = srcs[i];
383 }
384 }
385
386 static void
propagate_parallelcopy(struct ra_val_ctx * ctx,struct ir3_instruction * pcopy)387 propagate_parallelcopy(struct ra_val_ctx *ctx, struct ir3_instruction *pcopy)
388 {
389 unsigned size = 0;
390 for (unsigned i = 0; i < pcopy->dsts_count; i++) {
391 size += reg_size(pcopy->srcs[i]);
392 }
393
394 struct reg_state srcs[size];
395
396 unsigned offset = 0;
397 for (unsigned i = 0; i < pcopy->srcs_count; i++) {
398 struct ir3_register *dst = pcopy->dsts[i];
399 struct ir3_register *src = pcopy->srcs[i];
400 struct file_state *file = ra_val_get_file(ctx, dst);
401
402 if (ctx->shared_ra && !(dst->flags & IR3_REG_SHARED)) {
403 if (ra_reg_is_src(src)) {
404 struct reg_state *src_state = get_spill_state(ctx, src->def);
405 if (src_state) {
406 struct reg_state *dst_state = get_or_create_spill_state(ctx, dst);
407 memcpy(dst_state, src_state, reg_size(dst) * sizeof(struct reg_state));
408 }
409 }
410 } else {
411 for (unsigned j = 0; j < reg_size(dst); j++) {
412 if (src->flags & (IR3_REG_IMMED | IR3_REG_CONST)) {
413 srcs[offset + j] = (struct reg_state){
414 .def = dst,
415 .offset = j,
416 };
417 } else {
418 physreg_t src_physreg = ra_reg_get_physreg(src);
419 srcs[offset + j] = file->regs[src_physreg + j];
420 }
421 }
422 }
423
424 offset += reg_size(dst);
425 }
426 assert(offset == size);
427
428 offset = 0;
429 for (unsigned i = 0; i < pcopy->dsts_count; i++) {
430 struct ir3_register *dst = pcopy->dsts[i];
431
432 if (ctx->shared_ra && !(dst->flags & IR3_REG_SHARED)) {
433 offset += reg_size(dst);
434 continue;
435 }
436
437 physreg_t dst_physreg = ra_reg_get_physreg(dst);
438 struct file_state *file = ra_val_get_file(ctx, dst);
439
440 for (unsigned j = 0; j < reg_size(dst); j++)
441 file->regs[dst_physreg + j] = srcs[offset + j];
442
443 offset += reg_size(dst);
444 }
445 assert(offset == size);
446 }
447
448 static void
propagate_spill(struct ra_val_ctx * ctx,struct ir3_instruction * instr)449 propagate_spill(struct ra_val_ctx *ctx, struct ir3_instruction *instr)
450 {
451 if (instr->srcs[0]->flags & IR3_REG_SHARED) { /* spill */
452 struct reg_state *state = get_or_create_spill_state(ctx, instr->dsts[0]);
453 physreg_t src_physreg = ra_reg_get_physreg(instr->srcs[0]);
454 memcpy(state, &ctx->reaching.shared.regs[src_physreg],
455 reg_size(instr->srcs[0]) * sizeof(struct reg_state));
456 } else { /* reload */
457 struct reg_state *state = get_spill_state(ctx, instr->srcs[0]->def);
458 assert(state);
459 physreg_t dst_physreg = ra_reg_get_physreg(instr->dsts[0]);
460 memcpy(&ctx->reaching.shared.regs[dst_physreg], state,
461 reg_size(instr->dsts[0]) * sizeof(struct reg_state));
462 }
463 }
464
465 static void
propagate_instr(struct ra_val_ctx * ctx,struct ir3_instruction * instr)466 propagate_instr(struct ra_val_ctx *ctx, struct ir3_instruction *instr)
467 {
468 if (instr->opc == OPC_META_SPLIT)
469 propagate_split(ctx, instr);
470 else if (instr->opc == OPC_META_COLLECT)
471 propagate_collect(ctx, instr);
472 else if (instr->opc == OPC_META_PARALLEL_COPY)
473 propagate_parallelcopy(ctx, instr);
474 else if (ctx->shared_ra && instr->opc == OPC_MOV &&
475 /* Moves from immed/const with IR3_INSTR_SHARED_SPILL were demoted
476 * from scalar ALU, see try_demote_instruction().
477 */
478 !(instr->srcs[0]->flags & (IR3_REG_IMMED | IR3_REG_CONST)) &&
479 (instr->flags & IR3_INSTR_SHARED_SPILL))
480 propagate_spill(ctx, instr);
481 else
482 propagate_normal_instr(ctx, instr);
483 }
484
485 static bool
propagate_block(struct ra_val_ctx * ctx,struct ir3_block * block)486 propagate_block(struct ra_val_ctx *ctx, struct ir3_block *block)
487 {
488 ctx->reaching = ctx->block_reaching[block->index];
489
490 foreach_instr (instr, &block->instr_list) {
491 propagate_instr(ctx, instr);
492 }
493
494 bool progress = false;
495 for (unsigned i = 0; i < 2; i++) {
496 struct ir3_block *succ = block->successors[i];
497 if (!succ)
498 continue;
499 progress |=
500 merge_state(ctx, &ctx->block_reaching[succ->index], &ctx->reaching);
501 }
502 for (unsigned i = 0; i < block->physical_successors_count; i++) {
503 struct ir3_block *succ = block->physical_successors[i];
504 progress |= merge_state_physical(ctx, &ctx->block_reaching[succ->index],
505 &ctx->reaching);
506 }
507 return progress;
508 }
509
510 static void
chase_definition(struct reg_state * state)511 chase_definition(struct reg_state *state)
512 {
513 while (true) {
514 struct ir3_instruction *instr = state->def->instr;
515 switch (instr->opc) {
516 case OPC_META_SPLIT: {
517 struct ir3_register *new_def = instr->srcs[0]->def;
518 unsigned offset = instr->split.off * reg_elem_size(new_def);
519 *state = (struct reg_state){
520 .def = new_def,
521 .offset = state->offset + offset,
522 };
523 break;
524 }
525 case OPC_META_COLLECT: {
526 unsigned src_idx = state->offset / reg_elem_size(state->def);
527 unsigned src_offset = state->offset % reg_elem_size(state->def);
528 struct ir3_register *new_def = instr->srcs[src_idx]->def;
529 if (new_def) {
530 *state = (struct reg_state){
531 .def = new_def,
532 .offset = src_offset,
533 };
534 } else {
535 /* Bail on immed/const */
536 return;
537 }
538 break;
539 }
540 case OPC_META_PARALLEL_COPY: {
541 unsigned dst_idx = ~0;
542 for (unsigned i = 0; i < instr->dsts_count; i++) {
543 if (instr->dsts[i] == state->def) {
544 dst_idx = i;
545 break;
546 }
547 }
548 assert(dst_idx != ~0);
549
550 struct ir3_register *new_def = instr->srcs[dst_idx]->def;
551 if (new_def) {
552 state->def = new_def;
553 } else {
554 /* Bail on immed/const */
555 return;
556 }
557 break;
558 }
559 default:
560 return;
561 }
562 }
563 }
564
565 static void
dump_reg_state(struct reg_state * state)566 dump_reg_state(struct reg_state *state)
567 {
568 if (state->def == UNDEF) {
569 fprintf(stderr, "no reaching definition");
570 } else if (state->def == OVERDEF) {
571 fprintf(stderr,
572 "more than one reaching definition or partial definition");
573 } else {
574 /* The analysis should always remove UNKNOWN eventually. */
575 assert(state->def != UNKNOWN);
576
577 const char *prefix = "r";
578 unsigned num = state->def->num / 4;
579 if (state->def->flags & IR3_REG_PREDICATE) {
580 prefix = "p";
581 num = 0;
582 }
583
584 fprintf(stderr, "ssa_%u:%u(%s%s%u.%c) + %u", state->def->instr->serialno,
585 state->def->name, (state->def->flags & IR3_REG_HALF) ? "h" : "",
586 prefix, num, "xyzw"[state->def->num % 4], state -> offset);
587 }
588 }
589
590 static void
check_reaching_src(struct ra_val_ctx * ctx,struct ir3_instruction * instr,struct ir3_register * src)591 check_reaching_src(struct ra_val_ctx *ctx, struct ir3_instruction *instr,
592 struct ir3_register *src)
593 {
594 if (ctx->shared_ra && !(src->flags & IR3_REG_SHARED))
595 return;
596 struct file_state *file = ra_val_get_file(ctx, src);
597 physreg_t physreg = ra_reg_get_physreg(src);
598 for (unsigned i = 0; i < reg_size(src); i++) {
599 struct reg_state expected = (struct reg_state){
600 .def = get_original_def(src->def),
601 .offset = i,
602 };
603 chase_definition(&expected);
604
605 struct reg_state actual = file->regs[physreg + i];
606
607 if (expected.def != actual.def || expected.offset != actual.offset) {
608 fprintf(
609 stderr,
610 "ra validation fail: wrong definition reaches source ssa_%u:%u + %u\n",
611 src->def->instr->serialno, src->def->name, i);
612 fprintf(stderr, "expected: ");
613 dump_reg_state(&expected);
614 fprintf(stderr, "\n");
615 fprintf(stderr, "actual: ");
616 dump_reg_state(&actual);
617 fprintf(stderr, "\n");
618 fprintf(stderr, "-> for instruction: ");
619 ir3_print_instr(instr);
620 ctx->failed = true;
621 }
622 }
623 }
624
625 static void
check_reaching_instr(struct ra_val_ctx * ctx,struct ir3_instruction * instr)626 check_reaching_instr(struct ra_val_ctx *ctx, struct ir3_instruction *instr)
627 {
628 if (instr->opc == OPC_META_SPLIT || instr->opc == OPC_META_COLLECT ||
629 instr->opc == OPC_META_PARALLEL_COPY || instr->opc == OPC_META_PHI) {
630 return;
631 }
632
633 foreach_src_if (src, instr, validate_reg_is_src) {
634 check_reaching_src(ctx, instr, src);
635 }
636 }
637
638 static void
check_reaching_block(struct ra_val_ctx * ctx,struct ir3_block * block)639 check_reaching_block(struct ra_val_ctx *ctx, struct ir3_block *block)
640 {
641 ctx->reaching = ctx->block_reaching[block->index];
642
643 foreach_instr (instr, &block->instr_list) {
644 check_reaching_instr(ctx, instr);
645 propagate_instr(ctx, instr);
646 }
647
648 for (unsigned i = 0; i < 2; i++) {
649 struct ir3_block *succ = block->successors[i];
650 if (!succ)
651 continue;
652
653 unsigned pred_idx = ir3_block_get_pred_index(succ, block);
654 foreach_instr (instr, &succ->instr_list) {
655 if (instr->opc != OPC_META_PHI)
656 break;
657 if (instr->srcs[pred_idx]->def)
658 check_reaching_src(ctx, instr, instr->srcs[pred_idx]);
659 }
660 }
661 }
662
663 static void
check_reaching_defs(struct ra_val_ctx * ctx,struct ir3 * ir)664 check_reaching_defs(struct ra_val_ctx *ctx, struct ir3 *ir)
665 {
666 ctx->block_reaching =
667 rzalloc_array(ctx, struct reaching_state, ctx->block_count);
668
669 struct reaching_state *start = &ctx->block_reaching[0];
670 for (unsigned i = 0; i < ctx->full_size; i++)
671 start->full.regs[i].def = UNDEF;
672 for (unsigned i = 0; i < ctx->half_size; i++)
673 start->half.regs[i].def = UNDEF;
674 for (unsigned i = 0; i < RA_SHARED_SIZE; i++)
675 start->shared.regs[i].def = UNDEF;
676 for (unsigned i = 0; i < ctx->predicate_size; i++)
677 start->predicate.regs[i].def = UNDEF;
678
679 bool progress;
680 do {
681 progress = false;
682 foreach_block (block, &ir->block_list) {
683 progress |= propagate_block(ctx, block);
684 }
685 } while (progress);
686
687 foreach_block (block, &ir->block_list) {
688 check_reaching_block(ctx, block);
689 }
690
691 if (ctx->failed) {
692 fprintf(stderr, "failing shader:\n");
693 ir3_print(ir);
694 abort();
695 }
696 }
697
698 void
ir3_ra_validate(struct ir3_shader_variant * v,unsigned full_size,unsigned half_size,unsigned block_count,bool shared_ra)699 ir3_ra_validate(struct ir3_shader_variant *v, unsigned full_size,
700 unsigned half_size, unsigned block_count, bool shared_ra)
701 {
702 #ifdef NDEBUG
703 #define VALIDATE 0
704 #else
705 #define VALIDATE 1
706 #endif
707
708 if (!VALIDATE)
709 return;
710
711 struct ra_val_ctx *ctx = rzalloc(NULL, struct ra_val_ctx);
712 ctx->merged_regs = v->mergedregs;
713 ctx->full_size = full_size;
714 ctx->half_size = half_size;
715 ctx->predicate_size = v->compiler->num_predicates * 2;
716 ctx->block_count = block_count;
717 ctx->shared_ra = shared_ra;
718 if (ctx->shared_ra)
719 ctx->spill_reaching = _mesa_pointer_hash_table_create(ctx);
720
721 foreach_block (block, &v->ir->block_list) {
722 foreach_instr (instr, &block->instr_list) {
723 validate_simple(ctx, instr);
724 }
725 }
726
727 check_reaching_defs(ctx, v->ir);
728
729 ralloc_free(ctx);
730 }
731