1 /*
2 * Copyright 2009 Nicolai Haehnle.
3 * SPDX-License-Identifier: MIT
4 */
5
6 #include "radeon_program_pair.h"
7
8 #include "radeon_compiler.h"
9 #include "radeon_compiler_util.h"
10
11 #include "util/compiler.h"
12
13
14 /**
15 * Finally rewrite ADD, MOV, MUL as the appropriate native instruction
16 * and reverse the order of arguments for CMP.
17 */
final_rewrite(struct rc_sub_instruction * inst)18 static void final_rewrite(struct rc_sub_instruction *inst)
19 {
20 struct rc_src_register tmp;
21
22 switch(inst->Opcode) {
23 case RC_OPCODE_ADD:
24 inst->SrcReg[2] = inst->SrcReg[1];
25 inst->SrcReg[1].File = RC_FILE_NONE;
26 inst->SrcReg[1].Swizzle = RC_SWIZZLE_1111;
27 inst->SrcReg[1].Negate = RC_MASK_NONE;
28 inst->Opcode = RC_OPCODE_MAD;
29 break;
30 case RC_OPCODE_CMP:
31 tmp = inst->SrcReg[2];
32 inst->SrcReg[2] = inst->SrcReg[0];
33 inst->SrcReg[0] = tmp;
34 break;
35 case RC_OPCODE_MOV:
36 /* AMD say we should use CMP.
37 * However, when we transform
38 * KIL -r0;
39 * into
40 * CMP tmp, -r0, -r0, 0;
41 * KIL tmp;
42 * we get incorrect behaviour on R500 when r0 == 0.0.
43 * It appears that the R500 KIL hardware treats -0.0 as less
44 * than zero.
45 */
46 inst->SrcReg[1].File = RC_FILE_NONE;
47 inst->SrcReg[1].Swizzle = RC_SWIZZLE_1111;
48 inst->SrcReg[2].File = RC_FILE_NONE;
49 inst->SrcReg[2].Swizzle = RC_SWIZZLE_0000;
50 inst->Opcode = RC_OPCODE_MAD;
51 break;
52 case RC_OPCODE_MUL:
53 inst->SrcReg[2].File = RC_FILE_NONE;
54 inst->SrcReg[2].Swizzle = RC_SWIZZLE_0000;
55 inst->Opcode = RC_OPCODE_MAD;
56 break;
57 default:
58 /* nothing to do */
59 break;
60 }
61 }
62
63
64 /**
65 * Classify an instruction according to which ALUs etc. it needs
66 */
classify_instruction(struct rc_sub_instruction * inst,int * needrgb,int * needalpha,int * istranscendent)67 static void classify_instruction(struct rc_sub_instruction * inst,
68 int * needrgb, int * needalpha, int * istranscendent)
69 {
70 *needrgb = (inst->DstReg.WriteMask & RC_MASK_XYZ) ? 1 : 0;
71 *needalpha = (inst->DstReg.WriteMask & RC_MASK_W) ? 1 : 0;
72 *istranscendent = 0;
73
74 if (inst->WriteALUResult == RC_ALURESULT_X)
75 *needrgb = 1;
76 else if (inst->WriteALUResult == RC_ALURESULT_W)
77 *needalpha = 1;
78
79 switch(inst->Opcode) {
80 case RC_OPCODE_ADD:
81 case RC_OPCODE_CMP:
82 case RC_OPCODE_CND:
83 case RC_OPCODE_DDX:
84 case RC_OPCODE_DDY:
85 case RC_OPCODE_FRC:
86 case RC_OPCODE_MAD:
87 case RC_OPCODE_MAX:
88 case RC_OPCODE_MIN:
89 case RC_OPCODE_MOV:
90 case RC_OPCODE_MUL:
91 break;
92 case RC_OPCODE_COS:
93 case RC_OPCODE_EX2:
94 case RC_OPCODE_LG2:
95 case RC_OPCODE_RCP:
96 case RC_OPCODE_RSQ:
97 case RC_OPCODE_SIN:
98 *istranscendent = 1;
99 *needalpha = 1;
100 break;
101 case RC_OPCODE_DP4:
102 *needalpha = 1;
103 FALLTHROUGH;
104 case RC_OPCODE_DP3:
105 *needrgb = 1;
106 break;
107 default:
108 break;
109 }
110 }
111
src_uses(struct rc_src_register src,unsigned int * rgb,unsigned int * alpha)112 static void src_uses(struct rc_src_register src, unsigned int * rgb,
113 unsigned int * alpha)
114 {
115 int j;
116 for(j = 0; j < 4; ++j) {
117 unsigned int swz = GET_SWZ(src.Swizzle, j);
118 if (swz < 3)
119 *rgb = 1;
120 else if (swz < 4)
121 *alpha = 1;
122 }
123 }
124
125 /**
126 * Fill the given ALU instruction's opcodes and source operands into the given pair,
127 * if possible.
128 */
set_pair_instruction(struct r300_fragment_program_compiler * c,struct rc_pair_instruction * pair,struct rc_sub_instruction * inst)129 static void set_pair_instruction(struct r300_fragment_program_compiler *c,
130 struct rc_pair_instruction * pair,
131 struct rc_sub_instruction * inst)
132 {
133 int needrgb, needalpha, istranscendent;
134 const struct rc_opcode_info * opcode;
135 int i;
136
137 memset(pair, 0, sizeof(struct rc_pair_instruction));
138
139 classify_instruction(inst, &needrgb, &needalpha, &istranscendent);
140
141 if (needrgb) {
142 if (istranscendent)
143 pair->RGB.Opcode = RC_OPCODE_REPL_ALPHA;
144 else
145 pair->RGB.Opcode = inst->Opcode;
146 if (inst->SaturateMode == RC_SATURATE_ZERO_ONE)
147 pair->RGB.Saturate = 1;
148 }
149 if (needalpha) {
150 pair->Alpha.Opcode = inst->Opcode;
151 if (inst->SaturateMode == RC_SATURATE_ZERO_ONE)
152 pair->Alpha.Saturate = 1;
153 }
154
155 opcode = rc_get_opcode_info(inst->Opcode);
156
157 /* Presubtract handling:
158 * We need to make sure that the values used by the presubtract
159 * operation end up in src0 or src1. */
160 if(inst->PreSub.Opcode != RC_PRESUB_NONE) {
161 /* rc_pair_alloc_source() will fill in data for
162 * pair->{RGB,ALPHA}.Src[RC_PAIR_PRESUB_SRC] */
163 int j;
164 for(j = 0; j < 3; j++) {
165 int src_regs;
166 if(inst->SrcReg[j].File != RC_FILE_PRESUB)
167 continue;
168
169 src_regs = rc_presubtract_src_reg_count(
170 inst->PreSub.Opcode);
171 for(i = 0; i < src_regs; i++) {
172 unsigned int rgb = 0;
173 unsigned int alpha = 0;
174 src_uses(inst->SrcReg[j], &rgb, &alpha);
175 if(rgb) {
176 pair->RGB.Src[i].File =
177 inst->PreSub.SrcReg[i].File;
178 pair->RGB.Src[i].Index =
179 inst->PreSub.SrcReg[i].Index;
180 pair->RGB.Src[i].Used = 1;
181 }
182 if(alpha) {
183 pair->Alpha.Src[i].File =
184 inst->PreSub.SrcReg[i].File;
185 pair->Alpha.Src[i].Index =
186 inst->PreSub.SrcReg[i].Index;
187 pair->Alpha.Src[i].Used = 1;
188 }
189 }
190 }
191 }
192
193 for(i = 0; i < opcode->NumSrcRegs; ++i) {
194 int source;
195 if (needrgb && !istranscendent) {
196 unsigned int srcrgb = 0;
197 unsigned int srcalpha = 0;
198 unsigned int srcmask = 0;
199 int j;
200 /* We don't care about the alpha channel here. We only
201 * want the part of the swizzle that writes to rgb,
202 * since we are creating an rgb instruction. */
203 for(j = 0; j < 3; ++j) {
204 unsigned int swz = GET_SWZ(inst->SrcReg[i].Swizzle, j);
205
206 if (swz < RC_SWIZZLE_W)
207 srcrgb = 1;
208 else if (swz == RC_SWIZZLE_W)
209 srcalpha = 1;
210
211 /* We check for ZERO here as well because otherwise the zero
212 * sign (which doesn't matter and we already ignore it previously
213 * when checking for valid swizzle) could mess up the final negate sign.
214 * Example problematic pattern where this would be produced is:
215 * CONST[1] FLT32 { 0.0000, 0.0000, -4.0000, 0.0000}
216 * ADD temp[0].xyz, const[0].xyz_, -const[1].z00_;
217 *
218 * after inline literals would become:
219 * ADD temp[0].xyz, const[0].xyz_, 4.000000 (0x48).w-0-0-_;
220 *
221 * and after pair translate:
222 * src0.xyz = const[0], src0.w = 4.000000 (0x48)
223 * MAD temp[0].xyz, src0.xyz, src0.111, src0.w00
224 *
225 * Without the zero check there would be -src0.w00.
226 */
227 if (swz < RC_SWIZZLE_UNUSED && swz != RC_SWIZZLE_ZERO)
228 srcmask |= 1 << j;
229 }
230 source = rc_pair_alloc_source(pair, srcrgb, srcalpha,
231 inst->SrcReg[i].File, inst->SrcReg[i].Index);
232 if (source < 0) {
233 rc_error(&c->Base, "Failed to translate "
234 "rgb instruction.\n");
235 return;
236 }
237 pair->RGB.Arg[i].Source = source;
238 pair->RGB.Arg[i].Swizzle =
239 rc_init_swizzle(inst->SrcReg[i].Swizzle, 3);
240 pair->RGB.Arg[i].Abs = inst->SrcReg[i].Abs;
241 pair->RGB.Arg[i].Negate = !!(srcmask & inst->SrcReg[i].Negate & (RC_MASK_X | RC_MASK_Y | RC_MASK_Z));
242 }
243 if (needalpha) {
244 unsigned int srcrgb = 0;
245 unsigned int srcalpha = 0;
246 unsigned int swz;
247 if (istranscendent) {
248 swz = rc_get_scalar_src_swz(inst->SrcReg[i].Swizzle);
249 } else {
250 swz = GET_SWZ(inst->SrcReg[i].Swizzle, 3);
251 }
252
253 if (swz < 3)
254 srcrgb = 1;
255 else if (swz < 4)
256 srcalpha = 1;
257 source = rc_pair_alloc_source(pair, srcrgb, srcalpha,
258 inst->SrcReg[i].File, inst->SrcReg[i].Index);
259 if (source < 0) {
260 rc_error(&c->Base, "Failed to translate "
261 "alpha instruction.\n");
262 return;
263 }
264 pair->Alpha.Arg[i].Source = source;
265 pair->Alpha.Arg[i].Swizzle = rc_init_swizzle(swz, 1);
266 pair->Alpha.Arg[i].Abs = inst->SrcReg[i].Abs;
267
268 if (istranscendent) {
269 pair->Alpha.Arg[i].Negate =
270 !!(inst->SrcReg[i].Negate &
271 inst->DstReg.WriteMask);
272 } else {
273 pair->Alpha.Arg[i].Negate =
274 !!(inst->SrcReg[i].Negate & RC_MASK_W);
275 }
276 }
277 }
278
279 /* Destination handling */
280 if (inst->DstReg.File == RC_FILE_OUTPUT) {
281 if (inst->DstReg.Index == c->OutputDepth) {
282 pair->Alpha.DepthWriteMask |= GET_BIT(inst->DstReg.WriteMask, 3);
283 } else {
284 for (i = 0; i < 4; i++) {
285 if (inst->DstReg.Index == c->OutputColor[i]) {
286 pair->RGB.Target = i;
287 pair->Alpha.Target = i;
288 pair->RGB.OutputWriteMask |=
289 inst->DstReg.WriteMask & RC_MASK_XYZ;
290 pair->Alpha.OutputWriteMask |=
291 GET_BIT(inst->DstReg.WriteMask, 3);
292 break;
293 }
294 }
295 }
296 } else {
297 if (needrgb) {
298 pair->RGB.DestIndex = inst->DstReg.Index;
299 pair->RGB.WriteMask |= inst->DstReg.WriteMask & RC_MASK_XYZ;
300 }
301
302 if (needalpha) {
303 pair->Alpha.WriteMask |= (GET_BIT(inst->DstReg.WriteMask, 3) << 3);
304 if (pair->Alpha.WriteMask) {
305 pair->Alpha.DestIndex = inst->DstReg.Index;
306 }
307 }
308 }
309
310 if (needrgb) {
311 pair->RGB.Omod = inst->Omod;
312 }
313 if (needalpha) {
314 pair->Alpha.Omod = inst->Omod;
315 }
316
317 if (inst->WriteALUResult) {
318 pair->WriteALUResult = inst->WriteALUResult;
319 pair->ALUResultCompare = inst->ALUResultCompare;
320 }
321 }
322
323
check_opcode_support(struct r300_fragment_program_compiler * c,struct rc_sub_instruction * inst)324 static void check_opcode_support(struct r300_fragment_program_compiler *c,
325 struct rc_sub_instruction *inst)
326 {
327 const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->Opcode);
328
329 if (opcode->HasDstReg) {
330 if (inst->SaturateMode == RC_SATURATE_MINUS_PLUS_ONE) {
331 rc_error(&c->Base, "Fragment program does not support signed Saturate.\n");
332 return;
333 }
334 }
335
336 for (unsigned i = 0; i < opcode->NumSrcRegs; i++) {
337 if (inst->SrcReg[i].RelAddr) {
338 rc_error(&c->Base, "Fragment program does not support relative addressing "
339 " of source operands.\n");
340 return;
341 }
342 }
343 }
344
345
346 /**
347 * Translate all ALU instructions into corresponding pair instructions,
348 * performing no other changes.
349 */
rc_pair_translate(struct radeon_compiler * cc,void * user)350 void rc_pair_translate(struct radeon_compiler *cc, void *user)
351 {
352 struct r300_fragment_program_compiler *c = (struct r300_fragment_program_compiler*)cc;
353
354 for(struct rc_instruction * inst = c->Base.Program.Instructions.Next;
355 inst != &c->Base.Program.Instructions;
356 inst = inst->Next) {
357 const struct rc_opcode_info * opcode;
358 struct rc_sub_instruction copy;
359
360 if (inst->Type != RC_INSTRUCTION_NORMAL)
361 continue;
362
363 opcode = rc_get_opcode_info(inst->U.I.Opcode);
364
365 if (opcode->HasTexture || opcode->IsFlowControl || opcode->Opcode == RC_OPCODE_KIL)
366 continue;
367
368 copy = inst->U.I;
369
370 check_opcode_support(c, ©);
371
372 final_rewrite(©);
373 inst->Type = RC_INSTRUCTION_PAIR;
374 set_pair_instruction(c, &inst->U.P, ©);
375 }
376 }
377