1 /*
2 * Copyright 2009 Nicolai Haehnle.
3 * Copyright 2011 Tom Stellard <[email protected]>
4 * SPDX-License-Identifier: MIT
5 */
6
7 #include "radeon_program_pair.h"
8
9 #include <stdio.h>
10
11 #include "util/glheader.h"
12 #include "util/register_allocate.h"
13 #include "util/u_memory.h"
14 #include "util/ralloc.h"
15
16 #include "r300_fragprog_swizzle.h"
17 #include "radeon_compiler.h"
18 #include "radeon_compiler_util.h"
19 #include "radeon_dataflow.h"
20 #include "radeon_list.h"
21 #include "radeon_regalloc.h"
22 #include "radeon_variable.h"
23
scan_read_callback(void * data,struct rc_instruction * inst,rc_register_file file,unsigned int index,unsigned int mask)24 static void scan_read_callback(void * data, struct rc_instruction * inst,
25 rc_register_file file, unsigned int index, unsigned int mask)
26 {
27 struct regalloc_state * s = data;
28 struct register_info * reg;
29 unsigned int i;
30
31 if (file != RC_FILE_INPUT)
32 return;
33
34 s->Input[index].Used = 1;
35 reg = &s->Input[index];
36
37 for (i = 0; i < 4; i++) {
38 if (!((mask >> i) & 0x1)) {
39 continue;
40 }
41 reg->Live[i].Used = 1;
42 reg->Live[i].Start = 0;
43 reg->Live[i].End =
44 s->LoopEnd > inst->IP ? s->LoopEnd : inst->IP;
45 }
46 }
47
remap_register(void * data,struct rc_instruction * inst,rc_register_file * file,unsigned int * index)48 static void remap_register(void * data, struct rc_instruction * inst,
49 rc_register_file * file, unsigned int * index)
50 {
51 struct regalloc_state * s = data;
52 const struct register_info * reg;
53
54 if (*file == RC_FILE_TEMPORARY && s->Simple)
55 reg = &s->Temporary[*index];
56 else if (*file == RC_FILE_INPUT)
57 reg = &s->Input[*index];
58 else
59 return;
60
61 if (reg->Allocated) {
62 *index = reg->Index;
63 }
64 }
65
alloc_input_simple(void * data,unsigned int input,unsigned int hwreg)66 static void alloc_input_simple(void * data, unsigned int input,
67 unsigned int hwreg)
68 {
69 struct regalloc_state * s = data;
70
71 if (input >= s->NumInputs)
72 return;
73
74 s->Input[input].Allocated = 1;
75 s->Input[input].File = RC_FILE_TEMPORARY;
76 s->Input[input].Index = hwreg;
77 }
78
79 /* This functions offsets the temporary register indices by the number
80 * of input registers, because input registers are actually temporaries and
81 * should not occupy the same space.
82 *
83 * This pass is supposed to be used to maintain correct allocation of inputs
84 * if the standard register allocation is disabled. */
do_regalloc_inputs_only(struct regalloc_state * s)85 static void do_regalloc_inputs_only(struct regalloc_state * s)
86 {
87 for (unsigned i = 0; i < s->NumTemporaries; i++) {
88 s->Temporary[i].Allocated = 1;
89 s->Temporary[i].File = RC_FILE_TEMPORARY;
90 s->Temporary[i].Index = i + s->NumInputs;
91 }
92 }
93
is_derivative(rc_opcode op)94 static unsigned int is_derivative(rc_opcode op)
95 {
96 return (op == RC_OPCODE_DDX || op == RC_OPCODE_DDY);
97 }
98
99 struct variable_get_class_cb_data {
100 unsigned int * can_change_writemask;
101 unsigned int conversion_swizzle;
102 struct radeon_compiler * c;
103 };
104
variable_get_class_read_cb(void * userdata,struct rc_instruction * inst,struct rc_pair_instruction_arg * arg,struct rc_pair_instruction_source * src)105 static void variable_get_class_read_cb(
106 void * userdata,
107 struct rc_instruction * inst,
108 struct rc_pair_instruction_arg * arg,
109 struct rc_pair_instruction_source * src)
110 {
111 struct variable_get_class_cb_data * d = userdata;
112 unsigned int new_swizzle = rc_adjust_channels(arg->Swizzle,
113 d->conversion_swizzle);
114 /* We can't just call r300_swizzle_is_native basic here, because it ignores the
115 * extra requirements for presubtract. However, after pair translation we no longer
116 * have the rc_src_register required for the native swizzle, so we have to
117 * reconstruct it. */
118 struct rc_src_register reg = {};
119 reg.Swizzle = new_swizzle;
120 reg.File = src->File;
121
122 assert(inst->Type == RC_INSTRUCTION_PAIR);
123 /* The opcode is unimportant, we can't have TEX here. */
124 if (!d->c->SwizzleCaps->IsNative(RC_OPCODE_MAD, reg)) {
125 *d->can_change_writemask = 0;
126 }
127 }
128
variable_get_class(struct rc_variable * variable,const struct rc_class * classes)129 static unsigned variable_get_class(
130 struct rc_variable * variable,
131 const struct rc_class * classes)
132 {
133 unsigned int i;
134 unsigned int can_change_writemask= 1;
135 unsigned int writemask = rc_variable_writemask_sum(variable);
136 struct rc_list * readers = rc_variable_readers_union(variable);
137 int class_index;
138
139 if (!variable->C->is_r500) {
140 struct rc_class c;
141 struct rc_variable * var_ptr;
142 /* The assumption here is that if an instruction has type
143 * RC_INSTRUCTION_NORMAL then it is a TEX instruction.
144 * r300 and r400 can't swizzle the result of a TEX lookup. */
145 for (var_ptr = variable; var_ptr; var_ptr = var_ptr->Friend) {
146 if (var_ptr->Inst->Type == RC_INSTRUCTION_NORMAL) {
147 writemask = RC_MASK_XYZW;
148 }
149 }
150
151 /* Check if it is possible to do swizzle packing for r300/r400
152 * without creating non-native swizzles. */
153 class_index = rc_find_class(classes, writemask, 3);
154 if (class_index < 0) {
155 goto error;
156 }
157 c = classes[class_index];
158 if (c.WritemaskCount == 1) {
159 goto done;
160 }
161 for (i = 0; i < c.WritemaskCount; i++) {
162 struct rc_variable * var_ptr;
163 for (var_ptr = variable; var_ptr;
164 var_ptr = var_ptr->Friend) {
165 int j;
166 unsigned int conversion_swizzle =
167 rc_make_conversion_swizzle(
168 writemask, c.Writemasks[i]);
169 struct variable_get_class_cb_data d;
170 d.can_change_writemask = &can_change_writemask;
171 d.conversion_swizzle = conversion_swizzle;
172 d.c = variable->C;
173 /* If we get this far var_ptr->Inst has to
174 * be a pair instruction. If variable or any
175 * of its friends are normal instructions,
176 * then the writemask will be set to RC_MASK_XYZW
177 * and the function will return before it gets
178 * here. */
179 rc_pair_for_all_reads_arg(var_ptr->Inst,
180 variable_get_class_read_cb, &d);
181
182 for (j = 0; j < var_ptr->ReaderCount; j++) {
183 unsigned int old_swizzle;
184 unsigned int new_swizzle;
185 struct rc_reader r = var_ptr->Readers[j];
186 if (r.Inst->Type ==
187 RC_INSTRUCTION_PAIR ) {
188 old_swizzle = r.U.P.Arg->Swizzle;
189 } else {
190 /* Source operands of TEX
191 * instructions can't be
192 * swizzle on r300/r400 GPUs.
193 */
194 can_change_writemask = 0;
195 break;
196 }
197 new_swizzle = rc_rewrite_swizzle(
198 old_swizzle, conversion_swizzle);
199 if (!r300_swizzle_is_native_basic(
200 new_swizzle)) {
201 can_change_writemask = 0;
202 break;
203 }
204 }
205 if (!can_change_writemask) {
206 break;
207 }
208 }
209 if (!can_change_writemask) {
210 break;
211 }
212 }
213 }
214
215 if (variable->Inst->Type == RC_INSTRUCTION_PAIR) {
216 /* DDX/DDY seem to always fail when their writemasks are
217 * changed.*/
218 if (is_derivative(variable->Inst->U.P.RGB.Opcode)
219 || is_derivative(variable->Inst->U.P.Alpha.Opcode)) {
220 can_change_writemask = 0;
221 }
222 }
223 for ( ; readers; readers = readers->Next) {
224 struct rc_reader * r = readers->Item;
225 if (r->Inst->Type == RC_INSTRUCTION_PAIR) {
226 if (r->U.P.Arg->Source == RC_PAIR_PRESUB_SRC) {
227 can_change_writemask = 0;
228 break;
229 }
230 /* DDX/DDY also fail when their swizzles are changed. */
231 if (is_derivative(r->Inst->U.P.RGB.Opcode)
232 || is_derivative(r->Inst->U.P.Alpha.Opcode)) {
233 can_change_writemask = 0;
234 break;
235 }
236 }
237 }
238
239 class_index = rc_find_class(classes, writemask,
240 can_change_writemask ? 3 : 1);
241 done:
242 if (class_index > -1) {
243 return classes[class_index].ID;
244 } else {
245 error:
246 rc_error(variable->C,
247 "Could not find class for index=%u mask=%u\n",
248 variable->Dst.Index, writemask);
249 return 0;
250 }
251 }
252
do_advanced_regalloc(struct regalloc_state * s)253 static void do_advanced_regalloc(struct regalloc_state * s)
254 {
255
256 unsigned int i, input_node, node_count, node_index;
257 struct ra_class ** node_classes;
258 struct rc_instruction * inst;
259 struct rc_list * var_ptr;
260 struct rc_list * variables;
261 struct ra_graph * graph;
262 const struct rc_regalloc_state *ra_state = s->C->regalloc_state;
263
264 /* Get list of program variables */
265 variables = rc_get_variables(s->C);
266 node_count = rc_list_count(variables);
267 node_classes = memory_pool_malloc(&s->C->Pool,
268 node_count * sizeof(struct ra_class *));
269
270 for (var_ptr = variables, node_index = 0; var_ptr;
271 var_ptr = var_ptr->Next, node_index++) {
272 unsigned int class_index;
273 /* Compute the live intervals */
274 rc_variable_compute_live_intervals(var_ptr->Item);
275
276 class_index = variable_get_class(var_ptr->Item, ra_state->class_list);
277 node_classes[node_index] = ra_state->classes[class_index];
278 }
279
280
281 /* Calculate live intervals for input registers */
282 for (inst = s->C->Program.Instructions.Next;
283 inst != &s->C->Program.Instructions;
284 inst = inst->Next) {
285 rc_opcode op = rc_get_flow_control_inst(inst);
286 if (op == RC_OPCODE_BGNLOOP) {
287 struct rc_instruction * endloop =
288 rc_match_bgnloop(inst);
289 if (endloop->IP > s->LoopEnd) {
290 s->LoopEnd = endloop->IP;
291 }
292 }
293 rc_for_all_reads_mask(inst, scan_read_callback, s);
294 }
295
296 /* Compute the writemask for inputs. */
297 for (i = 0; i < s->NumInputs; i++) {
298 unsigned int chan, writemask = 0;
299 for (chan = 0; chan < 4; chan++) {
300 if (s->Input[i].Live[chan].Used) {
301 writemask |= (1 << chan);
302 }
303 }
304 s->Input[i].Writemask = writemask;
305 }
306
307 graph = ra_alloc_interference_graph(ra_state->regs,
308 node_count + s->NumInputs);
309
310 for (node_index = 0; node_index < node_count; node_index++) {
311 ra_set_node_class(graph, node_index, node_classes[node_index]);
312 }
313
314 rc_build_interference_graph(graph, variables);
315
316 /* Add input registers to the interference graph */
317 for (i = 0, input_node = 0; i< s->NumInputs; i++) {
318 if (!s->Input[i].Writemask) {
319 continue;
320 }
321 for (var_ptr = variables, node_index = 0;
322 var_ptr; var_ptr = var_ptr->Next, node_index++) {
323 struct rc_variable * var = var_ptr->Item;
324 if (rc_overlap_live_intervals_array(s->Input[i].Live,
325 var->Live)) {
326 ra_add_node_interference(graph, node_index,
327 node_count + input_node);
328 }
329 }
330 /* Manually allocate a register for this input */
331 ra_set_node_reg(graph, node_count + input_node, get_reg_id(
332 s->Input[i].Index, s->Input[i].Writemask));
333 input_node++;
334 }
335
336 if (!ra_allocate(graph)) {
337 rc_error(s->C, "Ran out of hardware temporaries\n");
338 ralloc_free(graph);
339 return;
340 }
341
342 /* Rewrite the registers */
343 for (var_ptr = variables, node_index = 0; var_ptr;
344 var_ptr = var_ptr->Next, node_index++) {
345 int reg = ra_get_node_reg(graph, node_index);
346 unsigned int writemask = reg_get_writemask(reg);
347 unsigned int index = reg_get_index(reg);
348 struct rc_variable * var = var_ptr->Item;
349
350 if (!s->C->is_r500 && var->Inst->Type == RC_INSTRUCTION_NORMAL) {
351 writemask = rc_variable_writemask_sum(var);
352 }
353
354 if (var->Dst.File == RC_FILE_INPUT) {
355 continue;
356 }
357 rc_variable_change_dst(var, index, writemask);
358 }
359
360 ralloc_free(graph);
361 }
362
363 /**
364 * @param user This parameter should be a pointer to an integer value. If this
365 * integer value is zero, then a simple register allocator will be used that
366 * only allocates space for input registers (\sa do_regalloc_inputs_only). If
367 * user is non-zero, then the regular register allocator will be used
368 * (\sa do_regalloc).
369 */
rc_pair_regalloc(struct radeon_compiler * cc,void * user)370 void rc_pair_regalloc(struct radeon_compiler *cc, void *user)
371 {
372 struct r300_fragment_program_compiler *c =
373 (struct r300_fragment_program_compiler*)cc;
374 struct regalloc_state s;
375 int * do_full_regalloc = (int*)user;
376
377 memset(&s, 0, sizeof(s));
378 s.C = cc;
379 s.NumInputs = rc_get_max_index(cc, RC_FILE_INPUT) + 1;
380 s.Input = memory_pool_malloc(&cc->Pool,
381 s.NumInputs * sizeof(struct register_info));
382 memset(s.Input, 0, s.NumInputs * sizeof(struct register_info));
383
384 s.NumTemporaries = rc_get_max_index(cc, RC_FILE_TEMPORARY) + 1;
385 s.Temporary = memory_pool_malloc(&cc->Pool,
386 s.NumTemporaries * sizeof(struct register_info));
387 memset(s.Temporary, 0, s.NumTemporaries * sizeof(struct register_info));
388
389 rc_recompute_ips(s.C);
390
391 c->AllocateHwInputs(c, &alloc_input_simple, &s);
392 if (*do_full_regalloc) {
393 do_advanced_regalloc(&s);
394 } else {
395 s.Simple = 1;
396 do_regalloc_inputs_only(&s);
397 }
398
399 /* Rewrite inputs and if we are doing the simple allocation, rewrite
400 * temporaries too. */
401 for (struct rc_instruction *inst = s.C->Program.Instructions.Next;
402 inst != &s.C->Program.Instructions;
403 inst = inst->Next) {
404 rc_remap_registers(inst, &remap_register, &s);
405 }
406 }
407