xref: /aosp_15_r20/external/mesa3d/src/gallium/drivers/r300/compiler/radeon_vert_fc.c (revision 6104692788411f58d303aa86923a9ff6ecaded22)
1 /*
2  * Copyright 2012 Advanced Micro Devices, Inc.
3  * Author: Tom Stellard <[email protected]>
4  * SPDX-License-Identifier: MIT
5  */
6 
7 #include "radeon_compiler.h"
8 #include "radeon_compiler_util.h"
9 #include "radeon_dataflow.h"
10 #include "radeon_program.h"
11 #include "radeon_program_constants.h"
12 
13 struct vert_fc_state {
14 	struct radeon_compiler *C;
15 	unsigned BranchDepth;
16 	unsigned LoopDepth;
17 	unsigned LoopsReserved;
18 	int PredStack[R500_PVS_MAX_LOOP_DEPTH];
19 	int PredicateReg;
20 };
21 
build_pred_src(struct rc_src_register * src,struct vert_fc_state * fc_state)22 static void build_pred_src(
23 	struct rc_src_register * src,
24 	struct vert_fc_state * fc_state)
25 {
26 	src->Swizzle = RC_MAKE_SWIZZLE(RC_SWIZZLE_UNUSED, RC_SWIZZLE_UNUSED,
27 					RC_SWIZZLE_UNUSED, RC_SWIZZLE_W);
28 	src->File = RC_FILE_TEMPORARY;
29 	src->Index = fc_state->PredicateReg;
30 }
31 
build_pred_dst(struct rc_dst_register * dst,struct vert_fc_state * fc_state)32 static void build_pred_dst(
33 	struct rc_dst_register * dst,
34 	struct vert_fc_state * fc_state)
35 {
36 	dst->WriteMask = RC_MASK_W;
37 	dst->File = RC_FILE_TEMPORARY;
38 	dst->Index = fc_state->PredicateReg;
39 }
40 
mark_write(void * userdata,struct rc_instruction * inst,rc_register_file file,unsigned int index,unsigned int mask)41 static void mark_write(void * userdata,	struct rc_instruction * inst,
42 		rc_register_file file,	unsigned int index, unsigned int mask)
43 {
44 	unsigned int * writemasks = userdata;
45 
46 	if (file != RC_FILE_TEMPORARY)
47 		return;
48 
49 	if (index >= R300_VS_MAX_TEMPS)
50 		return;
51 
52 	writemasks[index] |= mask;
53 }
54 
reserve_predicate_reg(struct vert_fc_state * fc_state)55 static int reserve_predicate_reg(struct vert_fc_state * fc_state)
56 {
57 	int i;
58 	unsigned int writemasks[RC_REGISTER_MAX_INDEX];
59 	struct rc_instruction * inst;
60 	memset(writemasks, 0, sizeof(writemasks));
61 	for(inst = fc_state->C->Program.Instructions.Next;
62 				inst != &fc_state->C->Program.Instructions;
63 				inst = inst->Next) {
64 		rc_for_all_writes_mask(inst, mark_write, writemasks);
65 	}
66 
67 	for(i = 0; i < fc_state->C->max_temp_regs; i++) {
68 		/* Most of the control flow instructions only write the
69 		 * W component of the Predicate Register, but
70 		 * the docs say that ME_PRED_SET_CLR and
71 		 * ME_PRED_SET_RESTORE write all components of the
72 		 * register, so we must reserve a register that has
73 		 * all its components free. */
74 		if (!writemasks[i]) {
75 			fc_state->PredicateReg = i;
76 			break;
77 		}
78 	}
79 	if (i == fc_state->C->max_temp_regs) {
80 		rc_error(fc_state->C, "No free temporary to use for"
81 				" predicate stack counter.\n");
82 		return -1;
83 	}
84 	return 1;
85 }
86 
lower_bgnloop(struct rc_instruction * inst,struct vert_fc_state * fc_state)87 static void lower_bgnloop(
88 	struct rc_instruction * inst,
89 	struct vert_fc_state * fc_state)
90 {
91 	struct rc_instruction * new_inst =
92 			rc_insert_new_instruction(fc_state->C, inst->Prev);
93 
94 	if ((!fc_state->C->is_r500
95 		&& fc_state->LoopsReserved >= R300_VS_MAX_LOOP_DEPTH)
96 	     || fc_state->LoopsReserved >= R500_PVS_MAX_LOOP_DEPTH) {
97 		rc_error(fc_state->C, "Loops are nested too deep.");
98 		return;
99 	}
100 
101 	if (fc_state->LoopDepth == 0 && fc_state->BranchDepth == 0) {
102 		if (fc_state->PredicateReg == -1) {
103 			if (reserve_predicate_reg(fc_state) == -1) {
104 				return;
105 			}
106 		}
107 
108 		/* Initialize the predicate bit to true. */
109 		new_inst->U.I.Opcode = RC_ME_PRED_SEQ;
110 		build_pred_dst(&new_inst->U.I.DstReg, fc_state);
111 		new_inst->U.I.SrcReg[0].Index = 0;
112 		new_inst->U.I.SrcReg[0].File = RC_FILE_NONE;
113 		new_inst->U.I.SrcReg[0].Swizzle = RC_SWIZZLE_0000;
114 	} else {
115 		fc_state->PredStack[fc_state->LoopDepth] =
116 						fc_state->PredicateReg;
117 		/* Copy the current predicate value to this loop's
118 		 * predicate register */
119 
120 		/* Use the old predicate value for src0 */
121 		build_pred_src(&new_inst->U.I.SrcReg[0], fc_state);
122 
123 		/* Reserve this loop's predicate register */
124 		if (reserve_predicate_reg(fc_state) == -1) {
125 			return;
126 		}
127 
128 		/* Copy the old predicate value to the new register */
129 		new_inst->U.I.Opcode = RC_OPCODE_ADD;
130 		build_pred_dst(&new_inst->U.I.DstReg, fc_state);
131 		new_inst->U.I.SrcReg[1].Index = 0;
132 		new_inst->U.I.SrcReg[1].File = RC_FILE_NONE;
133 		new_inst->U.I.SrcReg[1].Swizzle = RC_SWIZZLE_0000;
134 	}
135 
136 }
137 
lower_brk(struct rc_instruction * inst,struct vert_fc_state * fc_state)138 static void lower_brk(
139 	struct rc_instruction * inst,
140 	struct vert_fc_state * fc_state)
141 {
142 	if (fc_state->LoopDepth == 1) {
143 		inst->U.I.Opcode = RC_OPCODE_RCP;
144 		inst->U.I.DstReg.Pred = RC_PRED_SET;
145 		inst->U.I.SrcReg[0].Index = 0;
146 		inst->U.I.SrcReg[0].File = RC_FILE_NONE;
147 		inst->U.I.SrcReg[0].Swizzle = RC_SWIZZLE_0000;
148 	} else {
149 		inst->U.I.Opcode = RC_ME_PRED_SET_CLR;
150 		inst->U.I.DstReg.Pred = RC_PRED_SET;
151 	}
152 
153 	build_pred_dst(&inst->U.I.DstReg, fc_state);
154 }
155 
lower_endloop(struct rc_instruction * inst,struct vert_fc_state * fc_state)156 static void lower_endloop(
157 	struct rc_instruction * inst,
158 	struct vert_fc_state * fc_state)
159 {
160 	struct rc_instruction * new_inst =
161 			rc_insert_new_instruction(fc_state->C, inst);
162 
163 	new_inst->U.I.Opcode = RC_ME_PRED_SET_RESTORE;
164 	build_pred_dst(&new_inst->U.I.DstReg, fc_state);
165 	/* Restore the previous predicate register. */
166 	fc_state->PredicateReg = fc_state->PredStack[fc_state->LoopDepth - 1];
167 	build_pred_src(&new_inst->U.I.SrcReg[0], fc_state);
168 }
169 
lower_if(struct rc_instruction * inst,struct vert_fc_state * fc_state)170 static void lower_if(
171 	struct rc_instruction * inst,
172 	struct vert_fc_state * fc_state)
173 {
174 	/* Reserve a temporary to use as our predicate stack counter, if we
175 	 * don't already have one. */
176 	if (fc_state->PredicateReg == -1) {
177 		/* If we are inside a loop, the Predicate Register should
178 		 * have already been defined. */
179 		assert(fc_state->LoopDepth == 0);
180 
181 		if (reserve_predicate_reg(fc_state) == -1) {
182 			return;
183 		}
184 	}
185 
186 	if (fc_state->BranchDepth == 0 && fc_state->LoopDepth == 0) {
187 		inst->U.I.Opcode = RC_ME_PRED_SNEQ;
188 	} else {
189 		unsigned swz;
190 		inst->U.I.Opcode = RC_VE_PRED_SNEQ_PUSH;
191 		memcpy(&inst->U.I.SrcReg[1], &inst->U.I.SrcReg[0],
192 						sizeof(inst->U.I.SrcReg[1]));
193 		swz = rc_get_scalar_src_swz(inst->U.I.SrcReg[1].Swizzle);
194 		/* VE_PRED_SNEQ_PUSH needs to the branch condition to be in the
195 		 * w component */
196 		inst->U.I.SrcReg[1].Swizzle = RC_MAKE_SWIZZLE(RC_SWIZZLE_UNUSED,
197 				RC_SWIZZLE_UNUSED, RC_SWIZZLE_UNUSED, swz);
198 		build_pred_src(&inst->U.I.SrcReg[0], fc_state);
199 	}
200 	build_pred_dst(&inst->U.I.DstReg, fc_state);
201 }
202 
rc_vert_fc(struct radeon_compiler * c,void * user)203 void rc_vert_fc(struct radeon_compiler *c, void *user)
204 {
205 	struct rc_instruction * inst;
206 	struct vert_fc_state fc_state;
207 
208 	memset(&fc_state, 0, sizeof(fc_state));
209 	fc_state.PredicateReg = -1;
210 	fc_state.C = c;
211 
212 	for(inst = c->Program.Instructions.Next;
213 					inst != &c->Program.Instructions;
214 					inst = inst->Next) {
215 
216 		switch (inst->U.I.Opcode) {
217 
218 		case RC_OPCODE_BGNLOOP:
219 			lower_bgnloop(inst, &fc_state);
220 			fc_state.LoopDepth++;
221 			break;
222 
223 		case RC_OPCODE_BRK:
224 			lower_brk(inst, &fc_state);
225 			break;
226 
227 		case RC_OPCODE_ENDLOOP:
228 			if (fc_state.BranchDepth != 0
229 					|| fc_state.LoopDepth != 1) {
230 				lower_endloop(inst, &fc_state);
231 				/* Skip the new PRED_RESTORE */
232 				inst = inst->Next;
233 			}
234 			fc_state.LoopDepth--;
235 			break;
236 		case RC_OPCODE_IF:
237 			lower_if(inst, &fc_state);
238 			fc_state.BranchDepth++;
239 			break;
240 
241 		case RC_OPCODE_ELSE:
242 			inst->U.I.Opcode = RC_ME_PRED_SET_INV;
243 			build_pred_dst(&inst->U.I.DstReg, &fc_state);
244 			build_pred_src(&inst->U.I.SrcReg[0], &fc_state);
245 			break;
246 
247 		case RC_OPCODE_ENDIF:
248 			/* TODO: If LoopDepth == 1 and there is only a single break
249 			 * we can optimize out the endif just after the break. However
250 			 * previous attempts were buggy, so keep it simple for now.
251 			 */
252 			inst->U.I.Opcode = RC_ME_PRED_SET_POP;
253 			build_pred_dst(&inst->U.I.DstReg, &fc_state);
254 			build_pred_src(&inst->U.I.SrcReg[0], &fc_state);
255 			fc_state.BranchDepth--;
256 			break;
257 
258 		default:
259 			if (fc_state.BranchDepth || fc_state.LoopDepth) {
260 				inst->U.I.DstReg.Pred = RC_PRED_SET;
261 			}
262 			break;
263 		}
264 
265 		if (c->Error) {
266 			return;
267 		}
268 	}
269 }
270