xref: /aosp_15_r20/external/mesa3d/src/gallium/drivers/r300/compiler/radeon_pair_translate.c (revision 6104692788411f58d303aa86923a9ff6ecaded22)
1 /*
2  * Copyright 2009 Nicolai Haehnle.
3  * SPDX-License-Identifier: MIT
4  */
5 
6 #include "radeon_program_pair.h"
7 
8 #include "radeon_compiler.h"
9 #include "radeon_compiler_util.h"
10 
11 #include "util/compiler.h"
12 
13 
14 /**
15  * Finally rewrite ADD, MOV, MUL as the appropriate native instruction
16  * and reverse the order of arguments for CMP.
17  */
final_rewrite(struct rc_sub_instruction * inst)18 static void final_rewrite(struct rc_sub_instruction *inst)
19 {
20 	struct rc_src_register tmp;
21 
22 	switch(inst->Opcode) {
23 	case RC_OPCODE_ADD:
24 		inst->SrcReg[2] = inst->SrcReg[1];
25 		inst->SrcReg[1].File = RC_FILE_NONE;
26 		inst->SrcReg[1].Swizzle = RC_SWIZZLE_1111;
27 		inst->SrcReg[1].Negate = RC_MASK_NONE;
28 		inst->Opcode = RC_OPCODE_MAD;
29 		break;
30 	case RC_OPCODE_CMP:
31 		tmp = inst->SrcReg[2];
32 		inst->SrcReg[2] = inst->SrcReg[0];
33 		inst->SrcReg[0] = tmp;
34 		break;
35 	case RC_OPCODE_MOV:
36 		/* AMD say we should use CMP.
37 		 * However, when we transform
38 		 *  KIL -r0;
39 		 * into
40 		 *  CMP tmp, -r0, -r0, 0;
41 		 *  KIL tmp;
42 		 * we get incorrect behaviour on R500 when r0 == 0.0.
43 		 * It appears that the R500 KIL hardware treats -0.0 as less
44 		 * than zero.
45 		 */
46 		inst->SrcReg[1].File = RC_FILE_NONE;
47 		inst->SrcReg[1].Swizzle = RC_SWIZZLE_1111;
48 		inst->SrcReg[2].File = RC_FILE_NONE;
49 		inst->SrcReg[2].Swizzle = RC_SWIZZLE_0000;
50 		inst->Opcode = RC_OPCODE_MAD;
51 		break;
52 	case RC_OPCODE_MUL:
53 		inst->SrcReg[2].File = RC_FILE_NONE;
54 		inst->SrcReg[2].Swizzle = RC_SWIZZLE_0000;
55 		inst->Opcode = RC_OPCODE_MAD;
56 		break;
57 	default:
58 		/* nothing to do */
59 		break;
60 	}
61 }
62 
63 
64 /**
65  * Classify an instruction according to which ALUs etc. it needs
66  */
classify_instruction(struct rc_sub_instruction * inst,int * needrgb,int * needalpha,int * istranscendent)67 static void classify_instruction(struct rc_sub_instruction * inst,
68 	int * needrgb, int * needalpha, int * istranscendent)
69 {
70 	*needrgb = (inst->DstReg.WriteMask & RC_MASK_XYZ) ? 1 : 0;
71 	*needalpha = (inst->DstReg.WriteMask & RC_MASK_W) ? 1 : 0;
72 	*istranscendent = 0;
73 
74 	if (inst->WriteALUResult == RC_ALURESULT_X)
75 		*needrgb = 1;
76 	else if (inst->WriteALUResult == RC_ALURESULT_W)
77 		*needalpha = 1;
78 
79 	switch(inst->Opcode) {
80 	case RC_OPCODE_ADD:
81 	case RC_OPCODE_CMP:
82 	case RC_OPCODE_CND:
83 	case RC_OPCODE_DDX:
84 	case RC_OPCODE_DDY:
85 	case RC_OPCODE_FRC:
86 	case RC_OPCODE_MAD:
87 	case RC_OPCODE_MAX:
88 	case RC_OPCODE_MIN:
89 	case RC_OPCODE_MOV:
90 	case RC_OPCODE_MUL:
91 		break;
92 	case RC_OPCODE_COS:
93 	case RC_OPCODE_EX2:
94 	case RC_OPCODE_LG2:
95 	case RC_OPCODE_RCP:
96 	case RC_OPCODE_RSQ:
97 	case RC_OPCODE_SIN:
98 		*istranscendent = 1;
99 		*needalpha = 1;
100 		break;
101 	case RC_OPCODE_DP4:
102 		*needalpha = 1;
103 		FALLTHROUGH;
104 	case RC_OPCODE_DP3:
105 		*needrgb = 1;
106 		break;
107 	default:
108 		break;
109 	}
110 }
111 
src_uses(struct rc_src_register src,unsigned int * rgb,unsigned int * alpha)112 static void src_uses(struct rc_src_register src, unsigned int * rgb,
113 							unsigned int * alpha)
114 {
115 	int j;
116 	for(j = 0; j < 4; ++j) {
117 		unsigned int swz = GET_SWZ(src.Swizzle, j);
118 		if (swz < 3)
119 			*rgb = 1;
120 		else if (swz < 4)
121 			*alpha = 1;
122 	}
123 }
124 
125 /**
126  * Fill the given ALU instruction's opcodes and source operands into the given pair,
127  * if possible.
128  */
set_pair_instruction(struct r300_fragment_program_compiler * c,struct rc_pair_instruction * pair,struct rc_sub_instruction * inst)129 static void set_pair_instruction(struct r300_fragment_program_compiler *c,
130 	struct rc_pair_instruction * pair,
131 	struct rc_sub_instruction * inst)
132 {
133 	int needrgb, needalpha, istranscendent;
134 	const struct rc_opcode_info * opcode;
135 	int i;
136 
137 	memset(pair, 0, sizeof(struct rc_pair_instruction));
138 
139 	classify_instruction(inst, &needrgb, &needalpha, &istranscendent);
140 
141 	if (needrgb) {
142 		if (istranscendent)
143 			pair->RGB.Opcode = RC_OPCODE_REPL_ALPHA;
144 		else
145 			pair->RGB.Opcode = inst->Opcode;
146 		if (inst->SaturateMode == RC_SATURATE_ZERO_ONE)
147 			pair->RGB.Saturate = 1;
148 	}
149 	if (needalpha) {
150 		pair->Alpha.Opcode = inst->Opcode;
151 		if (inst->SaturateMode == RC_SATURATE_ZERO_ONE)
152 			pair->Alpha.Saturate = 1;
153 	}
154 
155 	opcode = rc_get_opcode_info(inst->Opcode);
156 
157 	/* Presubtract handling:
158 	 * We need to make sure that the values used by the presubtract
159 	 * operation end up in src0 or src1. */
160 	if(inst->PreSub.Opcode != RC_PRESUB_NONE) {
161 		/* rc_pair_alloc_source() will fill in data for
162 		 * pair->{RGB,ALPHA}.Src[RC_PAIR_PRESUB_SRC] */
163 		int j;
164 		for(j = 0; j < 3; j++) {
165 			int src_regs;
166 			if(inst->SrcReg[j].File != RC_FILE_PRESUB)
167 				continue;
168 
169 			src_regs = rc_presubtract_src_reg_count(
170 							inst->PreSub.Opcode);
171 			for(i = 0; i < src_regs; i++) {
172 				unsigned int rgb = 0;
173 				unsigned int alpha = 0;
174 				src_uses(inst->SrcReg[j], &rgb, &alpha);
175 				if(rgb) {
176 					pair->RGB.Src[i].File =
177 						inst->PreSub.SrcReg[i].File;
178 					pair->RGB.Src[i].Index =
179 						inst->PreSub.SrcReg[i].Index;
180 					pair->RGB.Src[i].Used = 1;
181 				}
182 				if(alpha) {
183 					pair->Alpha.Src[i].File =
184 						inst->PreSub.SrcReg[i].File;
185 					pair->Alpha.Src[i].Index =
186 						inst->PreSub.SrcReg[i].Index;
187 					pair->Alpha.Src[i].Used = 1;
188 				}
189 			}
190 		}
191 	}
192 
193 	for(i = 0; i < opcode->NumSrcRegs; ++i) {
194 		int source;
195 		if (needrgb && !istranscendent) {
196 			unsigned int srcrgb = 0;
197 			unsigned int srcalpha = 0;
198 			unsigned int srcmask = 0;
199 			int j;
200 			/* We don't care about the alpha channel here.  We only
201 			 * want the part of the swizzle that writes to rgb,
202 			 * since we are creating an rgb instruction. */
203 			for(j = 0; j < 3; ++j) {
204 				unsigned int swz = GET_SWZ(inst->SrcReg[i].Swizzle, j);
205 
206 				if (swz < RC_SWIZZLE_W)
207 					srcrgb = 1;
208 				else if (swz == RC_SWIZZLE_W)
209 					srcalpha = 1;
210 
211 				/* We check for ZERO here as well because otherwise the zero
212 				 * sign (which doesn't matter and we already ignore it previously
213 				 * when checking for valid swizzle) could mess up the final negate sign.
214 				 * Example problematic pattern where this would be produced is:
215 				 *   CONST[1] FLT32 {   0.0000,     0.0000,    -4.0000,     0.0000}
216 				 *   ADD temp[0].xyz, const[0].xyz_, -const[1].z00_;
217 				 *
218 				 * after inline literals would become:
219 				 *   ADD temp[0].xyz, const[0].xyz_, 4.000000 (0x48).w-0-0-_;
220 				 *
221 				 * and after pair translate:
222 				 *   src0.xyz = const[0], src0.w = 4.000000 (0x48)
223 				 *   MAD temp[0].xyz, src0.xyz, src0.111, src0.w00
224 				 *
225 				 * Without the zero check there would be -src0.w00.
226 				 */
227 				if (swz < RC_SWIZZLE_UNUSED && swz != RC_SWIZZLE_ZERO)
228 					srcmask |= 1 << j;
229 			}
230 			source = rc_pair_alloc_source(pair, srcrgb, srcalpha,
231 							inst->SrcReg[i].File, inst->SrcReg[i].Index);
232 			if (source < 0) {
233 				rc_error(&c->Base, "Failed to translate "
234 							"rgb instruction.\n");
235 				return;
236 			}
237 			pair->RGB.Arg[i].Source = source;
238 			pair->RGB.Arg[i].Swizzle =
239 				rc_init_swizzle(inst->SrcReg[i].Swizzle, 3);
240 			pair->RGB.Arg[i].Abs = inst->SrcReg[i].Abs;
241 			pair->RGB.Arg[i].Negate = !!(srcmask & inst->SrcReg[i].Negate & (RC_MASK_X | RC_MASK_Y | RC_MASK_Z));
242 		}
243 		if (needalpha) {
244 			unsigned int srcrgb = 0;
245 			unsigned int srcalpha = 0;
246 			unsigned int swz;
247 			if (istranscendent) {
248 				swz = rc_get_scalar_src_swz(inst->SrcReg[i].Swizzle);
249 			} else {
250 				swz = GET_SWZ(inst->SrcReg[i].Swizzle, 3);
251 			}
252 
253 			if (swz < 3)
254 				srcrgb = 1;
255 			else if (swz < 4)
256 				srcalpha = 1;
257 			source = rc_pair_alloc_source(pair, srcrgb, srcalpha,
258 							inst->SrcReg[i].File, inst->SrcReg[i].Index);
259 			if (source < 0) {
260 				rc_error(&c->Base, "Failed to translate "
261 							"alpha instruction.\n");
262 				return;
263 			}
264 			pair->Alpha.Arg[i].Source = source;
265 			pair->Alpha.Arg[i].Swizzle = rc_init_swizzle(swz, 1);
266 			pair->Alpha.Arg[i].Abs = inst->SrcReg[i].Abs;
267 
268 			if (istranscendent) {
269 				pair->Alpha.Arg[i].Negate =
270 					!!(inst->SrcReg[i].Negate &
271 							inst->DstReg.WriteMask);
272 			} else {
273 				pair->Alpha.Arg[i].Negate =
274 					!!(inst->SrcReg[i].Negate & RC_MASK_W);
275 			}
276 		}
277 	}
278 
279 	/* Destination handling */
280 	if (inst->DstReg.File == RC_FILE_OUTPUT) {
281         if (inst->DstReg.Index == c->OutputDepth) {
282             pair->Alpha.DepthWriteMask |= GET_BIT(inst->DstReg.WriteMask, 3);
283         } else {
284             for (i = 0; i < 4; i++) {
285                 if (inst->DstReg.Index == c->OutputColor[i]) {
286                     pair->RGB.Target = i;
287                     pair->Alpha.Target = i;
288                     pair->RGB.OutputWriteMask |=
289                         inst->DstReg.WriteMask & RC_MASK_XYZ;
290                     pair->Alpha.OutputWriteMask |=
291                         GET_BIT(inst->DstReg.WriteMask, 3);
292                     break;
293                 }
294             }
295         }
296 	} else {
297 		if (needrgb) {
298 			pair->RGB.DestIndex = inst->DstReg.Index;
299 			pair->RGB.WriteMask |= inst->DstReg.WriteMask & RC_MASK_XYZ;
300 		}
301 
302 		if (needalpha) {
303 			pair->Alpha.WriteMask |= (GET_BIT(inst->DstReg.WriteMask, 3) << 3);
304 			if (pair->Alpha.WriteMask) {
305 				pair->Alpha.DestIndex = inst->DstReg.Index;
306 			}
307 		}
308 	}
309 
310 	if (needrgb) {
311 		pair->RGB.Omod = inst->Omod;
312 	}
313 	if (needalpha) {
314 		pair->Alpha.Omod = inst->Omod;
315 	}
316 
317 	if (inst->WriteALUResult) {
318 		pair->WriteALUResult = inst->WriteALUResult;
319 		pair->ALUResultCompare = inst->ALUResultCompare;
320 	}
321 }
322 
323 
check_opcode_support(struct r300_fragment_program_compiler * c,struct rc_sub_instruction * inst)324 static void check_opcode_support(struct r300_fragment_program_compiler *c,
325 				 struct rc_sub_instruction *inst)
326 {
327 	const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->Opcode);
328 
329 	if (opcode->HasDstReg) {
330 		if (inst->SaturateMode == RC_SATURATE_MINUS_PLUS_ONE) {
331 			rc_error(&c->Base, "Fragment program does not support signed Saturate.\n");
332 			return;
333 		}
334 	}
335 
336 	for (unsigned i = 0; i < opcode->NumSrcRegs; i++) {
337 		if (inst->SrcReg[i].RelAddr) {
338 			rc_error(&c->Base, "Fragment program does not support relative addressing "
339 				 " of source operands.\n");
340 			return;
341 		}
342 	}
343 }
344 
345 
346 /**
347  * Translate all ALU instructions into corresponding pair instructions,
348  * performing no other changes.
349  */
rc_pair_translate(struct radeon_compiler * cc,void * user)350 void rc_pair_translate(struct radeon_compiler *cc, void *user)
351 {
352 	struct r300_fragment_program_compiler *c = (struct r300_fragment_program_compiler*)cc;
353 
354 	for(struct rc_instruction * inst = c->Base.Program.Instructions.Next;
355 	    inst != &c->Base.Program.Instructions;
356 	    inst = inst->Next) {
357 		const struct rc_opcode_info * opcode;
358 		struct rc_sub_instruction copy;
359 
360 		if (inst->Type != RC_INSTRUCTION_NORMAL)
361 			continue;
362 
363 		opcode = rc_get_opcode_info(inst->U.I.Opcode);
364 
365 		if (opcode->HasTexture || opcode->IsFlowControl || opcode->Opcode == RC_OPCODE_KIL)
366 			continue;
367 
368 		copy = inst->U.I;
369 
370 		check_opcode_support(c, &copy);
371 
372 		final_rewrite(&copy);
373 		inst->Type = RC_INSTRUCTION_PAIR;
374 		set_pair_instruction(c, &inst->U.P, &copy);
375 	}
376 }
377