xref: /aosp_15_r20/external/mesa3d/src/gallium/drivers/r300/compiler/radeon_pair_regalloc.c (revision 6104692788411f58d303aa86923a9ff6ecaded22)
1 /*
2  * Copyright 2009 Nicolai Haehnle.
3  * Copyright 2011 Tom Stellard <[email protected]>
4  * SPDX-License-Identifier: MIT
5  */
6 
7 #include "radeon_program_pair.h"
8 
9 #include <stdio.h>
10 
11 #include "util/glheader.h"
12 #include "util/register_allocate.h"
13 #include "util/u_memory.h"
14 #include "util/ralloc.h"
15 
16 #include "r300_fragprog_swizzle.h"
17 #include "radeon_compiler.h"
18 #include "radeon_compiler_util.h"
19 #include "radeon_dataflow.h"
20 #include "radeon_list.h"
21 #include "radeon_regalloc.h"
22 #include "radeon_variable.h"
23 
scan_read_callback(void * data,struct rc_instruction * inst,rc_register_file file,unsigned int index,unsigned int mask)24 static void scan_read_callback(void * data, struct rc_instruction * inst,
25 		rc_register_file file, unsigned int index, unsigned int mask)
26 {
27 	struct regalloc_state * s = data;
28 	struct register_info * reg;
29 	unsigned int i;
30 
31 	if (file != RC_FILE_INPUT)
32 		return;
33 
34 	s->Input[index].Used = 1;
35 	reg = &s->Input[index];
36 
37 	for (i = 0; i < 4; i++) {
38 		if (!((mask >> i) & 0x1)) {
39 			continue;
40 		}
41 		reg->Live[i].Used = 1;
42 		reg->Live[i].Start = 0;
43 		reg->Live[i].End =
44 			s->LoopEnd > inst->IP ? s->LoopEnd : inst->IP;
45 	}
46 }
47 
remap_register(void * data,struct rc_instruction * inst,rc_register_file * file,unsigned int * index)48 static void remap_register(void * data, struct rc_instruction * inst,
49 		rc_register_file * file, unsigned int * index)
50 {
51 	struct regalloc_state * s = data;
52 	const struct register_info * reg;
53 
54 	if (*file == RC_FILE_TEMPORARY && s->Simple)
55 		reg = &s->Temporary[*index];
56 	else if (*file == RC_FILE_INPUT)
57 		reg = &s->Input[*index];
58 	else
59 		return;
60 
61 	if (reg->Allocated) {
62 		*index = reg->Index;
63 	}
64 }
65 
alloc_input_simple(void * data,unsigned int input,unsigned int hwreg)66 static void alloc_input_simple(void * data, unsigned int input,
67 							unsigned int hwreg)
68 {
69 	struct regalloc_state * s = data;
70 
71 	if (input >= s->NumInputs)
72 		return;
73 
74 	s->Input[input].Allocated = 1;
75 	s->Input[input].File = RC_FILE_TEMPORARY;
76 	s->Input[input].Index = hwreg;
77 }
78 
79 /* This functions offsets the temporary register indices by the number
80  * of input registers, because input registers are actually temporaries and
81  * should not occupy the same space.
82  *
83  * This pass is supposed to be used to maintain correct allocation of inputs
84  * if the standard register allocation is disabled. */
do_regalloc_inputs_only(struct regalloc_state * s)85 static void do_regalloc_inputs_only(struct regalloc_state * s)
86 {
87 	for (unsigned i = 0; i < s->NumTemporaries; i++) {
88 		s->Temporary[i].Allocated = 1;
89 		s->Temporary[i].File = RC_FILE_TEMPORARY;
90 		s->Temporary[i].Index = i + s->NumInputs;
91 	}
92 }
93 
is_derivative(rc_opcode op)94 static unsigned int is_derivative(rc_opcode op)
95 {
96 	return (op == RC_OPCODE_DDX || op == RC_OPCODE_DDY);
97 }
98 
99 struct variable_get_class_cb_data {
100 	unsigned int * can_change_writemask;
101 	unsigned int conversion_swizzle;
102 	struct radeon_compiler * c;
103 };
104 
variable_get_class_read_cb(void * userdata,struct rc_instruction * inst,struct rc_pair_instruction_arg * arg,struct rc_pair_instruction_source * src)105 static void variable_get_class_read_cb(
106 	void * userdata,
107 	struct rc_instruction * inst,
108 	struct rc_pair_instruction_arg * arg,
109 	struct rc_pair_instruction_source * src)
110 {
111 	struct variable_get_class_cb_data * d = userdata;
112 	unsigned int new_swizzle = rc_adjust_channels(arg->Swizzle,
113 							d->conversion_swizzle);
114 	/* We can't just call r300_swizzle_is_native basic here, because it ignores the
115 	 * extra requirements for presubtract. However, after pair translation we no longer
116 	 * have the rc_src_register required for the native swizzle, so we have to
117 	 * reconstruct it. */
118 	struct rc_src_register reg = {};
119 	reg.Swizzle = new_swizzle;
120 	reg.File = src->File;
121 
122 	assert(inst->Type == RC_INSTRUCTION_PAIR);
123 	/* The opcode is unimportant, we can't have TEX here. */
124 	if (!d->c->SwizzleCaps->IsNative(RC_OPCODE_MAD, reg)) {
125 		*d->can_change_writemask = 0;
126 	}
127 }
128 
variable_get_class(struct rc_variable * variable,const struct rc_class * classes)129 static unsigned variable_get_class(
130 	struct rc_variable * variable,
131 	const struct rc_class * classes)
132 {
133 	unsigned int i;
134 	unsigned int can_change_writemask= 1;
135 	unsigned int writemask = rc_variable_writemask_sum(variable);
136 	struct rc_list * readers = rc_variable_readers_union(variable);
137 	int class_index;
138 
139 	if (!variable->C->is_r500) {
140 		struct rc_class c;
141 		struct rc_variable * var_ptr;
142 		/* The assumption here is that if an instruction has type
143 		 * RC_INSTRUCTION_NORMAL then it is a TEX instruction.
144 		 * r300 and r400 can't swizzle the result of a TEX lookup. */
145 		for (var_ptr = variable; var_ptr; var_ptr = var_ptr->Friend) {
146 			if (var_ptr->Inst->Type == RC_INSTRUCTION_NORMAL) {
147 				writemask = RC_MASK_XYZW;
148 			}
149 		}
150 
151 		/* Check if it is possible to do swizzle packing for r300/r400
152 		 * without creating non-native swizzles. */
153 		class_index = rc_find_class(classes, writemask, 3);
154 		if (class_index < 0) {
155 			goto error;
156 		}
157 		c = classes[class_index];
158 		if (c.WritemaskCount == 1) {
159 			goto done;
160 		}
161 		for (i = 0; i < c.WritemaskCount; i++) {
162 			struct rc_variable * var_ptr;
163 			for (var_ptr = variable; var_ptr;
164 						var_ptr = var_ptr->Friend) {
165 				int j;
166 				unsigned int conversion_swizzle =
167 						rc_make_conversion_swizzle(
168 						writemask, c.Writemasks[i]);
169 				struct variable_get_class_cb_data d;
170 				d.can_change_writemask = &can_change_writemask;
171 				d.conversion_swizzle = conversion_swizzle;
172 				d.c = variable->C;
173 				/* If we get this far var_ptr->Inst has to
174 				 * be a pair instruction.  If variable or any
175 				 * of its friends are normal instructions,
176 				 * then the writemask will be set to RC_MASK_XYZW
177 				 * and the function will return before it gets
178 				 * here. */
179 				rc_pair_for_all_reads_arg(var_ptr->Inst,
180 					variable_get_class_read_cb, &d);
181 
182 				for (j = 0; j < var_ptr->ReaderCount; j++) {
183 					unsigned int old_swizzle;
184 					unsigned int new_swizzle;
185 					struct rc_reader r = var_ptr->Readers[j];
186 					if (r.Inst->Type ==
187 							RC_INSTRUCTION_PAIR ) {
188 						old_swizzle = r.U.P.Arg->Swizzle;
189 					} else {
190 						/* Source operands of TEX
191 						 * instructions can't be
192 						 * swizzle on r300/r400 GPUs.
193 						 */
194 						can_change_writemask = 0;
195 						break;
196 					}
197 					new_swizzle = rc_rewrite_swizzle(
198 						old_swizzle, conversion_swizzle);
199 					if (!r300_swizzle_is_native_basic(
200 								new_swizzle)) {
201 						can_change_writemask = 0;
202 						break;
203 					}
204 				}
205 				if (!can_change_writemask) {
206 					break;
207 				}
208 			}
209 			if (!can_change_writemask) {
210 				break;
211 			}
212 		}
213 	}
214 
215 	if (variable->Inst->Type == RC_INSTRUCTION_PAIR) {
216 		/* DDX/DDY seem to always fail when their writemasks are
217 		 * changed.*/
218 		if (is_derivative(variable->Inst->U.P.RGB.Opcode)
219 		    || is_derivative(variable->Inst->U.P.Alpha.Opcode)) {
220 			can_change_writemask = 0;
221 		}
222 	}
223 	for ( ; readers; readers = readers->Next) {
224 		struct rc_reader * r = readers->Item;
225 		if (r->Inst->Type == RC_INSTRUCTION_PAIR) {
226 			if (r->U.P.Arg->Source == RC_PAIR_PRESUB_SRC) {
227 				can_change_writemask = 0;
228 				break;
229 			}
230 			/* DDX/DDY also fail when their swizzles are changed. */
231 			if (is_derivative(r->Inst->U.P.RGB.Opcode)
232 			    || is_derivative(r->Inst->U.P.Alpha.Opcode)) {
233 				can_change_writemask = 0;
234 				break;
235 			}
236 		}
237 	}
238 
239 	class_index = rc_find_class(classes, writemask,
240 						can_change_writemask ? 3 : 1);
241 done:
242 	if (class_index > -1) {
243 		return classes[class_index].ID;
244 	} else {
245 error:
246 		rc_error(variable->C,
247 				"Could not find class for index=%u mask=%u\n",
248 				variable->Dst.Index, writemask);
249 		return 0;
250 	}
251 }
252 
do_advanced_regalloc(struct regalloc_state * s)253 static void do_advanced_regalloc(struct regalloc_state * s)
254 {
255 
256 	unsigned int i, input_node, node_count, node_index;
257 	struct ra_class ** node_classes;
258 	struct rc_instruction * inst;
259 	struct rc_list * var_ptr;
260 	struct rc_list * variables;
261 	struct ra_graph * graph;
262 	const struct rc_regalloc_state *ra_state = s->C->regalloc_state;
263 
264 	/* Get list of program variables */
265 	variables = rc_get_variables(s->C);
266 	node_count = rc_list_count(variables);
267 	node_classes = memory_pool_malloc(&s->C->Pool,
268 			node_count * sizeof(struct ra_class *));
269 
270 	for (var_ptr = variables, node_index = 0; var_ptr;
271 					var_ptr = var_ptr->Next, node_index++) {
272 		unsigned int class_index;
273 		/* Compute the live intervals */
274 		rc_variable_compute_live_intervals(var_ptr->Item);
275 
276 		class_index = variable_get_class(var_ptr->Item, ra_state->class_list);
277 		node_classes[node_index] = ra_state->classes[class_index];
278 	}
279 
280 
281 	/* Calculate live intervals for input registers */
282 	for (inst = s->C->Program.Instructions.Next;
283 					inst != &s->C->Program.Instructions;
284 					inst = inst->Next) {
285 		rc_opcode op = rc_get_flow_control_inst(inst);
286 		if (op == RC_OPCODE_BGNLOOP) {
287 			struct rc_instruction * endloop =
288 							rc_match_bgnloop(inst);
289 			if (endloop->IP > s->LoopEnd) {
290 				s->LoopEnd = endloop->IP;
291 			}
292 		}
293 		rc_for_all_reads_mask(inst, scan_read_callback, s);
294 	}
295 
296 	/* Compute the writemask for inputs. */
297 	for (i = 0; i < s->NumInputs; i++) {
298 		unsigned int chan, writemask = 0;
299 		for (chan = 0; chan < 4; chan++) {
300 			if (s->Input[i].Live[chan].Used) {
301 				writemask |= (1 << chan);
302 			}
303 		}
304 		s->Input[i].Writemask = writemask;
305 	}
306 
307 	graph = ra_alloc_interference_graph(ra_state->regs,
308 						node_count + s->NumInputs);
309 
310 	for (node_index = 0; node_index < node_count; node_index++) {
311 		ra_set_node_class(graph, node_index, node_classes[node_index]);
312 	}
313 
314 	rc_build_interference_graph(graph, variables);
315 
316 	/* Add input registers to the interference graph */
317 	for (i = 0, input_node = 0; i< s->NumInputs; i++) {
318 		if (!s->Input[i].Writemask) {
319 			continue;
320 		}
321 		for (var_ptr = variables, node_index = 0;
322 				var_ptr; var_ptr = var_ptr->Next, node_index++) {
323 			struct rc_variable * var = var_ptr->Item;
324 			if (rc_overlap_live_intervals_array(s->Input[i].Live,
325 								var->Live)) {
326 				ra_add_node_interference(graph, node_index,
327 						node_count + input_node);
328 			}
329 		}
330 		/* Manually allocate a register for this input */
331 		ra_set_node_reg(graph, node_count + input_node, get_reg_id(
332 				s->Input[i].Index, s->Input[i].Writemask));
333 		input_node++;
334 	}
335 
336 	if (!ra_allocate(graph)) {
337 		rc_error(s->C, "Ran out of hardware temporaries\n");
338                 ralloc_free(graph);
339 		return;
340 	}
341 
342 	/* Rewrite the registers */
343 	for (var_ptr = variables, node_index = 0; var_ptr;
344 				var_ptr = var_ptr->Next, node_index++) {
345 		int reg = ra_get_node_reg(graph, node_index);
346 		unsigned int writemask = reg_get_writemask(reg);
347 		unsigned int index = reg_get_index(reg);
348 		struct rc_variable * var = var_ptr->Item;
349 
350 		if (!s->C->is_r500 && var->Inst->Type == RC_INSTRUCTION_NORMAL) {
351 			writemask = rc_variable_writemask_sum(var);
352 		}
353 
354 		if (var->Dst.File == RC_FILE_INPUT) {
355 			continue;
356 		}
357 		rc_variable_change_dst(var, index, writemask);
358 	}
359 
360 	ralloc_free(graph);
361 }
362 
363 /**
364  * @param user This parameter should be a pointer to an integer value.  If this
365  * integer value is zero, then a simple register allocator will be used that
366  * only allocates space for input registers (\sa do_regalloc_inputs_only).  If
367  * user is non-zero, then the regular register allocator will be used
368  * (\sa do_regalloc).
369   */
rc_pair_regalloc(struct radeon_compiler * cc,void * user)370 void rc_pair_regalloc(struct radeon_compiler *cc, void *user)
371 {
372 	struct r300_fragment_program_compiler *c =
373 				(struct r300_fragment_program_compiler*)cc;
374 	struct regalloc_state s;
375 	int * do_full_regalloc = (int*)user;
376 
377 	memset(&s, 0, sizeof(s));
378 	s.C = cc;
379 	s.NumInputs = rc_get_max_index(cc, RC_FILE_INPUT) + 1;
380 	s.Input = memory_pool_malloc(&cc->Pool,
381 			s.NumInputs * sizeof(struct register_info));
382 	memset(s.Input, 0, s.NumInputs * sizeof(struct register_info));
383 
384 	s.NumTemporaries = rc_get_max_index(cc, RC_FILE_TEMPORARY) + 1;
385 	s.Temporary = memory_pool_malloc(&cc->Pool,
386 			s.NumTemporaries * sizeof(struct register_info));
387 	memset(s.Temporary, 0, s.NumTemporaries * sizeof(struct register_info));
388 
389 	rc_recompute_ips(s.C);
390 
391 	c->AllocateHwInputs(c, &alloc_input_simple, &s);
392 	if (*do_full_regalloc) {
393 		do_advanced_regalloc(&s);
394 	} else {
395 		s.Simple = 1;
396 		do_regalloc_inputs_only(&s);
397 	}
398 
399 	/* Rewrite inputs and if we are doing the simple allocation, rewrite
400 	 * temporaries too. */
401 	for (struct rc_instruction *inst = s.C->Program.Instructions.Next;
402 					inst != &s.C->Program.Instructions;
403 					inst = inst->Next) {
404 		rc_remap_registers(inst, &remap_register, &s);
405 	}
406 }
407