1 /*
2 * Copyright (C) 2014 Rob Clark <[email protected]>
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 * SOFTWARE.
22 *
23 * Authors:
24 * Rob Clark <[email protected]>
25 */
26
27 #include "tgsi/tgsi_transform.h"
28 #include "tgsi/tgsi_scan.h"
29 #include "tgsi/tgsi_dump.h"
30
31 #include "util/compiler.h"
32 #include "util/u_debug.h"
33 #include "util/u_math.h"
34
35 #include "tgsi_lowering.h"
36
37 struct tgsi_lowering_context {
38 struct tgsi_transform_context base;
39 const struct tgsi_lowering_config *config;
40 struct tgsi_shader_info *info;
41 unsigned two_side_colors;
42 unsigned two_side_idx[PIPE_MAX_SHADER_INPUTS];
43 unsigned color_base; /* base register for chosen COLOR/BCOLOR's */
44 int face_idx;
45 unsigned numtmp;
46 struct {
47 struct tgsi_full_src_register src;
48 struct tgsi_full_dst_register dst;
49 } tmp[2];
50 #define A 0
51 #define B 1
52 struct tgsi_full_src_register imm;
53 int emitted_decls;
54 unsigned saturate;
55 };
56
57 static inline struct tgsi_lowering_context *
tgsi_lowering_context(struct tgsi_transform_context * tctx)58 tgsi_lowering_context(struct tgsi_transform_context *tctx)
59 {
60 return (struct tgsi_lowering_context *)tctx;
61 }
62
63 /*
64 * Utility helpers:
65 */
66
67 static void
reg_dst(struct tgsi_full_dst_register * dst,const struct tgsi_full_dst_register * orig_dst,unsigned wrmask)68 reg_dst(struct tgsi_full_dst_register *dst,
69 const struct tgsi_full_dst_register *orig_dst, unsigned wrmask)
70 {
71 *dst = *orig_dst;
72 dst->Register.WriteMask &= wrmask;
73 assert(dst->Register.WriteMask);
74 }
75
76 static inline void
get_swiz(unsigned * swiz,const struct tgsi_src_register * src)77 get_swiz(unsigned *swiz, const struct tgsi_src_register *src)
78 {
79 swiz[0] = src->SwizzleX;
80 swiz[1] = src->SwizzleY;
81 swiz[2] = src->SwizzleZ;
82 swiz[3] = src->SwizzleW;
83 }
84
85 static void
reg_src(struct tgsi_full_src_register * src,const struct tgsi_full_src_register * orig_src,unsigned sx,unsigned sy,unsigned sz,unsigned sw)86 reg_src(struct tgsi_full_src_register *src,
87 const struct tgsi_full_src_register *orig_src,
88 unsigned sx, unsigned sy, unsigned sz, unsigned sw)
89 {
90 unsigned swiz[4];
91 get_swiz(swiz, &orig_src->Register);
92 *src = *orig_src;
93 src->Register.SwizzleX = swiz[sx];
94 src->Register.SwizzleY = swiz[sy];
95 src->Register.SwizzleZ = swiz[sz];
96 src->Register.SwizzleW = swiz[sw];
97 }
98
99 #define TGSI_SWIZZLE__ TGSI_SWIZZLE_X /* don't-care value! */
100 #define SWIZ(x,y,z,w) TGSI_SWIZZLE_ ## x, TGSI_SWIZZLE_ ## y, \
101 TGSI_SWIZZLE_ ## z, TGSI_SWIZZLE_ ## w
102
103 /*
104 * if (dst.x aliases src.x) {
105 * MOV tmpA.x, src.x
106 * src = tmpA
107 * }
108 * COS dst.x, src.x
109 * SIN dst.y, src.x
110 * MOV dst.zw, imm{0.0, 1.0}
111 */
112 static bool
aliases(const struct tgsi_full_dst_register * dst,unsigned dst_mask,const struct tgsi_full_src_register * src,unsigned src_mask)113 aliases(const struct tgsi_full_dst_register *dst, unsigned dst_mask,
114 const struct tgsi_full_src_register *src, unsigned src_mask)
115 {
116 if ((dst->Register.File == src->Register.File) &&
117 (dst->Register.Index == src->Register.Index)) {
118 unsigned i, actual_mask = 0;
119 unsigned swiz[4];
120 get_swiz(swiz, &src->Register);
121 for (i = 0; i < 4; i++)
122 if (src_mask & (1 << i))
123 actual_mask |= (1 << swiz[i]);
124 if (actual_mask & dst_mask)
125 return true;
126 }
127 return false;
128 }
129
130 static void
create_mov(struct tgsi_transform_context * tctx,const struct tgsi_full_dst_register * dst,const struct tgsi_full_src_register * src,unsigned mask,unsigned saturate)131 create_mov(struct tgsi_transform_context *tctx,
132 const struct tgsi_full_dst_register *dst,
133 const struct tgsi_full_src_register *src,
134 unsigned mask, unsigned saturate)
135 {
136 struct tgsi_full_instruction new_inst;
137
138 new_inst = tgsi_default_full_instruction();
139 new_inst.Instruction.Opcode = TGSI_OPCODE_MOV;
140 new_inst.Instruction.Saturate = saturate;
141 new_inst.Instruction.NumDstRegs = 1;
142 reg_dst(&new_inst.Dst[0], dst, mask);
143 new_inst.Instruction.NumSrcRegs = 1;
144 reg_src(&new_inst.Src[0], src, SWIZ(X, Y, Z, W));
145 tctx->emit_instruction(tctx, &new_inst);
146 }
147
148 /* to help calculate # of tgsi tokens for a lowering.. we assume
149 * the worst case, ie. removed instructions don't have ADDR[] or
150 * anything which increases the # of tokens per src/dst and the
151 * inserted instructions do.
152 *
153 * OINST() - old instruction
154 * 1 : instruction itself
155 * 1 : dst
156 * 1 * nargs : srcN
157 *
158 * NINST() - new instruction
159 * 1 : instruction itself
160 * 2 : dst
161 * 2 * nargs : srcN
162 */
163
164 #define OINST(nargs) (1 + 1 + 1 * (nargs))
165 #define NINST(nargs) (1 + 2 + 2 * (nargs))
166
167 /*
168 * Lowering Translators:
169 */
170
171 /* DST - Distance Vector
172 * dst.x = 1.0
173 * dst.y = src0.y \times src1.y
174 * dst.z = src0.z
175 * dst.w = src1.w
176 *
177 * ; note: could be more clever and use just a single temp
178 * ; if I was clever enough to re-write the swizzles.
179 * ; needs: 2 tmp, imm{1.0}
180 * if (dst.y aliases src0.z) {
181 * MOV tmpA.yz, src0.yz
182 * src0 = tmpA
183 * }
184 * if (dst.yz aliases src1.w) {
185 * MOV tmpB.yw, src1.yw
186 * src1 = tmpB
187 * }
188 * MUL dst.y, src0.y, src1.y
189 * MOV dst.z, src0.z
190 * MOV dst.w, src1.w
191 * MOV dst.x, imm{1.0}
192 */
193 #define DST_GROW (NINST(1) + NINST(1) + NINST(2) + NINST(1) + \
194 NINST(1) + NINST(1) - OINST(2))
195 #define DST_TMP 2
196 static void
transform_dst(struct tgsi_transform_context * tctx,struct tgsi_full_instruction * inst)197 transform_dst(struct tgsi_transform_context *tctx,
198 struct tgsi_full_instruction *inst)
199 {
200 struct tgsi_lowering_context *ctx = tgsi_lowering_context(tctx);
201 struct tgsi_full_dst_register *dst = &inst->Dst[0];
202 struct tgsi_full_src_register *src0 = &inst->Src[0];
203 struct tgsi_full_src_register *src1 = &inst->Src[1];
204 struct tgsi_full_instruction new_inst;
205
206 if (aliases(dst, TGSI_WRITEMASK_Y, src0, TGSI_WRITEMASK_Z)) {
207 create_mov(tctx, &ctx->tmp[A].dst, src0, TGSI_WRITEMASK_YZ, 0);
208 src0 = &ctx->tmp[A].src;
209 }
210
211 if (aliases(dst, TGSI_WRITEMASK_YZ, src1, TGSI_WRITEMASK_W)) {
212 create_mov(tctx, &ctx->tmp[B].dst, src1, TGSI_WRITEMASK_YW, 0);
213 src1 = &ctx->tmp[B].src;
214 }
215
216 if (dst->Register.WriteMask & TGSI_WRITEMASK_Y) {
217 /* MUL dst.y, src0.y, src1.y */
218 new_inst = tgsi_default_full_instruction();
219 new_inst.Instruction.Opcode = TGSI_OPCODE_MUL;
220 new_inst.Instruction.NumDstRegs = 1;
221 reg_dst(&new_inst.Dst[0], dst, TGSI_WRITEMASK_Y);
222 new_inst.Instruction.NumSrcRegs = 2;
223 reg_src(&new_inst.Src[0], src0, SWIZ(_, Y, _, _));
224 reg_src(&new_inst.Src[1], src1, SWIZ(_, Y, _, _));
225 tctx->emit_instruction(tctx, &new_inst);
226 }
227
228 if (dst->Register.WriteMask & TGSI_WRITEMASK_Z) {
229 /* MOV dst.z, src0.z */
230 new_inst = tgsi_default_full_instruction();
231 new_inst.Instruction.Opcode = TGSI_OPCODE_MOV;
232 new_inst.Instruction.NumDstRegs = 1;
233 reg_dst(&new_inst.Dst[0], dst, TGSI_WRITEMASK_Z);
234 new_inst.Instruction.NumSrcRegs = 1;
235 reg_src(&new_inst.Src[0], src0, SWIZ(_, _, Z, _));
236 tctx->emit_instruction(tctx, &new_inst);
237 }
238
239 if (dst->Register.WriteMask & TGSI_WRITEMASK_W) {
240 /* MOV dst.w, src1.w */
241 new_inst = tgsi_default_full_instruction();
242 new_inst.Instruction.Opcode = TGSI_OPCODE_MOV;
243 new_inst.Instruction.NumDstRegs = 1;
244 reg_dst(&new_inst.Dst[0], dst, TGSI_WRITEMASK_W);
245 new_inst.Instruction.NumSrcRegs = 1;
246 reg_src(&new_inst.Src[0], src1, SWIZ(_, _, _, W));
247 tctx->emit_instruction(tctx, &new_inst);
248 }
249
250 if (dst->Register.WriteMask & TGSI_WRITEMASK_X) {
251 /* MOV dst.x, imm{1.0} */
252 new_inst = tgsi_default_full_instruction();
253 new_inst.Instruction.Opcode = TGSI_OPCODE_MOV;
254 new_inst.Instruction.NumDstRegs = 1;
255 reg_dst(&new_inst.Dst[0], dst, TGSI_WRITEMASK_X);
256 new_inst.Instruction.NumSrcRegs = 1;
257 reg_src(&new_inst.Src[0], &ctx->imm, SWIZ(Y, _, _, _));
258 tctx->emit_instruction(tctx, &new_inst);
259 }
260 }
261
262 /* LRP - Linear Interpolate
263 * dst.x = src0.x \times src1.x + (1.0 - src0.x) \times src2.x
264 * dst.y = src0.y \times src1.y + (1.0 - src0.y) \times src2.y
265 * dst.z = src0.z \times src1.z + (1.0 - src0.z) \times src2.z
266 * dst.w = src0.w \times src1.w + (1.0 - src0.w) \times src2.w
267 *
268 * This becomes: src0 \times src1 + src2 - src0 \times src2, which
269 * can then become: src0 \times src1 - (src0 \times src2 - src2)
270 *
271 * ; needs: 1 tmp
272 * MAD tmpA, src0, src2, -src2
273 * MAD dst, src0, src1, -tmpA
274 */
275 #define LRP_GROW (NINST(3) + NINST(3) - OINST(3))
276 #define LRP_TMP 1
277 static void
transform_lrp(struct tgsi_transform_context * tctx,struct tgsi_full_instruction * inst)278 transform_lrp(struct tgsi_transform_context *tctx,
279 struct tgsi_full_instruction *inst)
280 {
281 struct tgsi_lowering_context *ctx = tgsi_lowering_context(tctx);
282 struct tgsi_full_dst_register *dst = &inst->Dst[0];
283 struct tgsi_full_src_register *src0 = &inst->Src[0];
284 struct tgsi_full_src_register *src1 = &inst->Src[1];
285 struct tgsi_full_src_register *src2 = &inst->Src[2];
286 struct tgsi_full_instruction new_inst;
287
288 if (dst->Register.WriteMask & TGSI_WRITEMASK_XYZW) {
289 /* MAD tmpA, src0, src2, -src2 */
290 new_inst = tgsi_default_full_instruction();
291 new_inst.Instruction.Opcode = TGSI_OPCODE_MAD;
292 new_inst.Instruction.NumDstRegs = 1;
293 reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_XYZW);
294 new_inst.Instruction.NumSrcRegs = 3;
295 reg_src(&new_inst.Src[0], src0, SWIZ(X, Y, Z, W));
296 reg_src(&new_inst.Src[1], src2, SWIZ(X, Y, Z, W));
297 reg_src(&new_inst.Src[2], src2, SWIZ(X, Y, Z, W));
298 new_inst.Src[2].Register.Negate = !new_inst.Src[2].Register.Negate;
299 tctx->emit_instruction(tctx, &new_inst);
300
301 /* MAD dst, src0, src1, -tmpA */
302 new_inst = tgsi_default_full_instruction();
303 new_inst.Instruction.Opcode = TGSI_OPCODE_MAD;
304 new_inst.Instruction.NumDstRegs = 1;
305 reg_dst(&new_inst.Dst[0], dst, TGSI_WRITEMASK_XYZW);
306 new_inst.Instruction.NumSrcRegs = 3;
307 reg_src(&new_inst.Src[0], src0, SWIZ(X, Y, Z, W));
308 reg_src(&new_inst.Src[1], src1, SWIZ(X, Y, Z, W));
309 reg_src(&new_inst.Src[2], &ctx->tmp[A].src, SWIZ(X, Y, Z, W));
310 new_inst.Src[2].Register.Negate = true;
311 tctx->emit_instruction(tctx, &new_inst);
312 }
313 }
314
315 /* FRC - Fraction
316 * dst.x = src.x - \lfloor src.x\rfloor
317 * dst.y = src.y - \lfloor src.y\rfloor
318 * dst.z = src.z - \lfloor src.z\rfloor
319 * dst.w = src.w - \lfloor src.w\rfloor
320 *
321 * ; needs: 1 tmp
322 * FLR tmpA, src
323 * SUB dst, src, tmpA
324 */
325 #define FRC_GROW (NINST(1) + NINST(2) - OINST(1))
326 #define FRC_TMP 1
327 static void
transform_frc(struct tgsi_transform_context * tctx,struct tgsi_full_instruction * inst)328 transform_frc(struct tgsi_transform_context *tctx,
329 struct tgsi_full_instruction *inst)
330 {
331 struct tgsi_lowering_context *ctx = tgsi_lowering_context(tctx);
332 struct tgsi_full_dst_register *dst = &inst->Dst[0];
333 struct tgsi_full_src_register *src = &inst->Src[0];
334 struct tgsi_full_instruction new_inst;
335
336 if (dst->Register.WriteMask & TGSI_WRITEMASK_XYZW) {
337 /* FLR tmpA, src */
338 new_inst = tgsi_default_full_instruction();
339 new_inst.Instruction.Opcode = TGSI_OPCODE_FLR;
340 new_inst.Instruction.NumDstRegs = 1;
341 reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_XYZW);
342 new_inst.Instruction.NumSrcRegs = 1;
343 reg_src(&new_inst.Src[0], src, SWIZ(X, Y, Z, W));
344 tctx->emit_instruction(tctx, &new_inst);
345
346 /* SUB dst, src, tmpA */
347 new_inst = tgsi_default_full_instruction();
348 new_inst.Instruction.Opcode = TGSI_OPCODE_ADD;
349 new_inst.Instruction.NumDstRegs = 1;
350 reg_dst(&new_inst.Dst[0], dst, TGSI_WRITEMASK_XYZW);
351 new_inst.Instruction.NumSrcRegs = 2;
352 reg_src(&new_inst.Src[0], src, SWIZ(X, Y, Z, W));
353 reg_src(&new_inst.Src[1], &ctx->tmp[A].src, SWIZ(X, Y, Z, W));
354 new_inst.Src[1].Register.Negate = 1;
355 tctx->emit_instruction(tctx, &new_inst);
356 }
357 }
358
359 /* POW - Power
360 * dst.x = src0.x^{src1.x}
361 * dst.y = src0.x^{src1.x}
362 * dst.z = src0.x^{src1.x}
363 * dst.w = src0.x^{src1.x}
364 *
365 * ; needs: 1 tmp
366 * LG2 tmpA.x, src0.x
367 * MUL tmpA.x, src1.x, tmpA.x
368 * EX2 dst, tmpA.x
369 */
370 #define POW_GROW (NINST(1) + NINST(2) + NINST(1) - OINST(2))
371 #define POW_TMP 1
372 static void
transform_pow(struct tgsi_transform_context * tctx,struct tgsi_full_instruction * inst)373 transform_pow(struct tgsi_transform_context *tctx,
374 struct tgsi_full_instruction *inst)
375 {
376 struct tgsi_lowering_context *ctx = tgsi_lowering_context(tctx);
377 struct tgsi_full_dst_register *dst = &inst->Dst[0];
378 struct tgsi_full_src_register *src0 = &inst->Src[0];
379 struct tgsi_full_src_register *src1 = &inst->Src[1];
380 struct tgsi_full_instruction new_inst;
381
382 if (dst->Register.WriteMask & TGSI_WRITEMASK_XYZW) {
383 /* LG2 tmpA.x, src0.x */
384 new_inst = tgsi_default_full_instruction();
385 new_inst.Instruction.Opcode = TGSI_OPCODE_LG2;
386 new_inst.Instruction.NumDstRegs = 1;
387 reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_X);
388 new_inst.Instruction.NumSrcRegs = 1;
389 reg_src(&new_inst.Src[0], src0, SWIZ(X, _, _, _));
390 tctx->emit_instruction(tctx, &new_inst);
391
392 /* MUL tmpA.x, src1.x, tmpA.x */
393 new_inst = tgsi_default_full_instruction();
394 new_inst.Instruction.Opcode = TGSI_OPCODE_MUL;
395 new_inst.Instruction.NumDstRegs = 1;
396 reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_X);
397 new_inst.Instruction.NumSrcRegs = 2;
398 reg_src(&new_inst.Src[0], src1, SWIZ(X, _, _, _));
399 reg_src(&new_inst.Src[1], &ctx->tmp[A].src, SWIZ(X, _, _, _));
400 tctx->emit_instruction(tctx, &new_inst);
401
402 /* EX2 dst, tmpA.x */
403 new_inst = tgsi_default_full_instruction();
404 new_inst.Instruction.Opcode = TGSI_OPCODE_EX2;
405 new_inst.Instruction.NumDstRegs = 1;
406 reg_dst(&new_inst.Dst[0], dst, TGSI_WRITEMASK_XYZW);
407 new_inst.Instruction.NumSrcRegs = 1;
408 reg_src(&new_inst.Src[0], &ctx->tmp[A].src, SWIZ(X, _, _, _));
409 tctx->emit_instruction(tctx, &new_inst);
410 }
411 }
412
413 /* LIT - Light Coefficients
414 * dst.x = 1.0
415 * dst.y = max(src.x, 0.0)
416 * dst.z = (src.x > 0.0) ? max(src.y, 0.0)^{clamp(src.w, -128.0, 128.0))} : 0
417 * dst.w = 1.0
418 *
419 * ; needs: 1 tmp, imm{0.0}, imm{1.0}, imm{128.0}
420 * MAX tmpA.xy, src.xy, imm{0.0}
421 * CLAMP tmpA.z, src.w, -imm{128.0}, imm{128.0}
422 * LG2 tmpA.y, tmpA.y
423 * MUL tmpA.y, tmpA.z, tmpA.y
424 * EX2 tmpA.y, tmpA.y
425 * CMP tmpA.y, -src.x, tmpA.y, imm{0.0}
426 * MOV dst.yz, tmpA.xy
427 * MOV dst.xw, imm{1.0}
428 */
429 #define LIT_GROW (NINST(1) + NINST(3) + NINST(1) + NINST(2) + \
430 NINST(1) + NINST(3) + NINST(1) + NINST(1) - OINST(1))
431 #define LIT_TMP 1
432 static void
transform_lit(struct tgsi_transform_context * tctx,struct tgsi_full_instruction * inst)433 transform_lit(struct tgsi_transform_context *tctx,
434 struct tgsi_full_instruction *inst)
435 {
436 struct tgsi_lowering_context *ctx = tgsi_lowering_context(tctx);
437 struct tgsi_full_dst_register *dst = &inst->Dst[0];
438 struct tgsi_full_src_register *src = &inst->Src[0];
439 struct tgsi_full_instruction new_inst;
440
441 if (dst->Register.WriteMask & TGSI_WRITEMASK_YZ) {
442 /* MAX tmpA.xy, src.xy, imm{0.0} */
443 new_inst = tgsi_default_full_instruction();
444 new_inst.Instruction.Opcode = TGSI_OPCODE_MAX;
445 new_inst.Instruction.NumDstRegs = 1;
446 reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_XY);
447 new_inst.Instruction.NumSrcRegs = 2;
448 reg_src(&new_inst.Src[0], src, SWIZ(X, Y, _, _));
449 reg_src(&new_inst.Src[1], &ctx->imm, SWIZ(X, X, _, _));
450 tctx->emit_instruction(tctx, &new_inst);
451
452 /* MIN tmpA.z, src.w, imm{128.0} */
453 new_inst = tgsi_default_full_instruction();
454 new_inst.Instruction.Opcode = TGSI_OPCODE_MIN;
455 new_inst.Instruction.NumDstRegs = 1;
456 reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_Z);
457 new_inst.Instruction.NumSrcRegs = 2;
458 reg_src(&new_inst.Src[0], src, SWIZ(_, _, W, _));
459 reg_src(&new_inst.Src[1], &ctx->imm, SWIZ(_, _, Z, _));
460 tctx->emit_instruction(tctx, &new_inst);
461
462 /* MAX tmpA.z, tmpA.z, -imm{128.0} */
463 new_inst = tgsi_default_full_instruction();
464 new_inst.Instruction.Opcode = TGSI_OPCODE_MAX;
465 new_inst.Instruction.NumDstRegs = 1;
466 reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_Z);
467 new_inst.Instruction.NumSrcRegs = 2;
468 reg_src(&new_inst.Src[0], &ctx->tmp[A].src, SWIZ(_, _, Z, _));
469 reg_src(&new_inst.Src[1], &ctx->imm, SWIZ(_, _, Z, _));
470 new_inst.Src[1].Register.Negate = true;
471 tctx->emit_instruction(tctx, &new_inst);
472
473 /* LG2 tmpA.y, tmpA.y */
474 new_inst = tgsi_default_full_instruction();
475 new_inst.Instruction.Opcode = TGSI_OPCODE_LG2;
476 new_inst.Instruction.NumDstRegs = 1;
477 reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_Y);
478 new_inst.Instruction.NumSrcRegs = 1;
479 reg_src(&new_inst.Src[0], &ctx->tmp[A].src, SWIZ(Y, _, _, _));
480 tctx->emit_instruction(tctx, &new_inst);
481
482 /* MUL tmpA.y, tmpA.z, tmpA.y */
483 new_inst = tgsi_default_full_instruction();
484 new_inst.Instruction.Opcode = TGSI_OPCODE_MUL;
485 new_inst.Instruction.NumDstRegs = 1;
486 reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_Y);
487 new_inst.Instruction.NumSrcRegs = 2;
488 reg_src(&new_inst.Src[0], &ctx->tmp[A].src, SWIZ(_, Z, _, _));
489 reg_src(&new_inst.Src[1], &ctx->tmp[A].src, SWIZ(_, Y, _, _));
490 tctx->emit_instruction(tctx, &new_inst);
491
492 /* EX2 tmpA.y, tmpA.y */
493 new_inst = tgsi_default_full_instruction();
494 new_inst.Instruction.Opcode = TGSI_OPCODE_EX2;
495 new_inst.Instruction.NumDstRegs = 1;
496 reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_Y);
497 new_inst.Instruction.NumSrcRegs = 1;
498 reg_src(&new_inst.Src[0], &ctx->tmp[A].src, SWIZ(Y, _, _, _));
499 tctx->emit_instruction(tctx, &new_inst);
500
501 /* CMP tmpA.y, -src.x, tmpA.y, imm{0.0} */
502 new_inst = tgsi_default_full_instruction();
503 new_inst.Instruction.Opcode = TGSI_OPCODE_CMP;
504 new_inst.Instruction.NumDstRegs = 1;
505 reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_Y);
506 new_inst.Instruction.NumSrcRegs = 3;
507 reg_src(&new_inst.Src[0], src, SWIZ(_, X, _, _));
508 new_inst.Src[0].Register.Negate = true;
509 reg_src(&new_inst.Src[1], &ctx->tmp[A].src, SWIZ(_, Y, _, _));
510 reg_src(&new_inst.Src[2], &ctx->imm, SWIZ(_, X, _, _));
511 tctx->emit_instruction(tctx, &new_inst);
512
513 /* MOV dst.yz, tmpA.xy */
514 new_inst = tgsi_default_full_instruction();
515 new_inst.Instruction.Opcode = TGSI_OPCODE_MOV;
516 new_inst.Instruction.NumDstRegs = 1;
517 reg_dst(&new_inst.Dst[0], dst, TGSI_WRITEMASK_YZ);
518 new_inst.Instruction.NumSrcRegs = 1;
519 reg_src(&new_inst.Src[0], &ctx->tmp[A].src, SWIZ(_, X, Y, _));
520 tctx->emit_instruction(tctx, &new_inst);
521 }
522
523 if (dst->Register.WriteMask & TGSI_WRITEMASK_XW) {
524 /* MOV dst.xw, imm{1.0} */
525 new_inst = tgsi_default_full_instruction();
526 new_inst.Instruction.Opcode = TGSI_OPCODE_MOV;
527 new_inst.Instruction.NumDstRegs = 1;
528 reg_dst(&new_inst.Dst[0], dst, TGSI_WRITEMASK_XW);
529 new_inst.Instruction.NumSrcRegs = 1;
530 reg_src(&new_inst.Src[0], &ctx->imm, SWIZ(Y, _, _, Y));
531 tctx->emit_instruction(tctx, &new_inst);
532 }
533 }
534
535 /* EXP - Approximate Exponential Base 2
536 * dst.x = 2^{\lfloor src.x\rfloor}
537 * dst.y = src.x - \lfloor src.x\rfloor
538 * dst.z = 2^{src.x}
539 * dst.w = 1.0
540 *
541 * ; needs: 1 tmp, imm{1.0}
542 * if (lowering FLR) {
543 * FRC tmpA.x, src.x
544 * SUB tmpA.x, src.x, tmpA.x
545 * } else {
546 * FLR tmpA.x, src.x
547 * }
548 * EX2 tmpA.y, src.x
549 * SUB dst.y, src.x, tmpA.x
550 * EX2 dst.x, tmpA.x
551 * MOV dst.z, tmpA.y
552 * MOV dst.w, imm{1.0}
553 */
554 #define EXP_GROW (NINST(1) + NINST(2) + NINST(1) + NINST(2) + NINST(1) + \
555 NINST(1)+ NINST(1) - OINST(1))
556 #define EXP_TMP 1
557 static void
transform_exp(struct tgsi_transform_context * tctx,struct tgsi_full_instruction * inst)558 transform_exp(struct tgsi_transform_context *tctx,
559 struct tgsi_full_instruction *inst)
560 {
561 struct tgsi_lowering_context *ctx = tgsi_lowering_context(tctx);
562 struct tgsi_full_dst_register *dst = &inst->Dst[0];
563 struct tgsi_full_src_register *src = &inst->Src[0];
564 struct tgsi_full_instruction new_inst;
565
566 if (dst->Register.WriteMask & TGSI_WRITEMASK_XY) {
567 if (ctx->config->lower_FLR) {
568 /* FRC tmpA.x, src.x */
569 new_inst = tgsi_default_full_instruction();
570 new_inst.Instruction.Opcode = TGSI_OPCODE_FRC;
571 new_inst.Instruction.NumDstRegs = 1;
572 reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_X);
573 new_inst.Instruction.NumSrcRegs = 1;
574 reg_src(&new_inst.Src[0], src, SWIZ(X, _, _, _));
575 tctx->emit_instruction(tctx, &new_inst);
576
577 /* SUB tmpA.x, src.x, tmpA.x */
578 new_inst = tgsi_default_full_instruction();
579 new_inst.Instruction.Opcode = TGSI_OPCODE_ADD;
580 new_inst.Instruction.NumDstRegs = 1;
581 reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_X);
582 new_inst.Instruction.NumSrcRegs = 2;
583 reg_src(&new_inst.Src[0], src, SWIZ(X, _, _, _));
584 reg_src(&new_inst.Src[1], &ctx->tmp[A].src, SWIZ(X, _, _, _));
585 new_inst.Src[1].Register.Negate = 1;
586 tctx->emit_instruction(tctx, &new_inst);
587 } else {
588 /* FLR tmpA.x, src.x */
589 new_inst = tgsi_default_full_instruction();
590 new_inst.Instruction.Opcode = TGSI_OPCODE_FLR;
591 new_inst.Instruction.NumDstRegs = 1;
592 reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_X);
593 new_inst.Instruction.NumSrcRegs = 1;
594 reg_src(&new_inst.Src[0], src, SWIZ(X, _, _, _));
595 tctx->emit_instruction(tctx, &new_inst);
596 }
597 }
598
599 if (dst->Register.WriteMask & TGSI_WRITEMASK_Z) {
600 /* EX2 tmpA.y, src.x */
601 new_inst = tgsi_default_full_instruction();
602 new_inst.Instruction.Opcode = TGSI_OPCODE_EX2;
603 new_inst.Instruction.NumDstRegs = 1;
604 reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_Y);
605 new_inst.Instruction.NumSrcRegs = 1;
606 reg_src(&new_inst.Src[0], src, SWIZ(X, _, _, _));
607 tctx->emit_instruction(tctx, &new_inst);
608 }
609
610 if (dst->Register.WriteMask & TGSI_WRITEMASK_Y) {
611 /* SUB dst.y, src.x, tmpA.x */
612 new_inst = tgsi_default_full_instruction();
613 new_inst.Instruction.Opcode = TGSI_OPCODE_ADD;
614 new_inst.Instruction.NumDstRegs = 1;
615 reg_dst(&new_inst.Dst[0], dst, TGSI_WRITEMASK_Y);
616 new_inst.Instruction.NumSrcRegs = 2;
617 reg_src(&new_inst.Src[0], src, SWIZ(_, X, _, _));
618 reg_src(&new_inst.Src[1], &ctx->tmp[A].src, SWIZ(_, X, _, _));
619 new_inst.Src[1].Register.Negate = 1;
620 tctx->emit_instruction(tctx, &new_inst);
621 }
622
623 if (dst->Register.WriteMask & TGSI_WRITEMASK_X) {
624 /* EX2 dst.x, tmpA.x */
625 new_inst = tgsi_default_full_instruction();
626 new_inst.Instruction.Opcode = TGSI_OPCODE_EX2;
627 new_inst.Instruction.NumDstRegs = 1;
628 reg_dst(&new_inst.Dst[0], dst, TGSI_WRITEMASK_X);
629 new_inst.Instruction.NumSrcRegs = 1;
630 reg_src(&new_inst.Src[0], &ctx->tmp[A].src, SWIZ(X, _, _, _));
631 tctx->emit_instruction(tctx, &new_inst);
632 }
633
634 if (dst->Register.WriteMask & TGSI_WRITEMASK_Z) {
635 /* MOV dst.z, tmpA.y */
636 new_inst = tgsi_default_full_instruction();
637 new_inst.Instruction.Opcode = TGSI_OPCODE_MOV;
638 new_inst.Instruction.NumDstRegs = 1;
639 reg_dst(&new_inst.Dst[0], dst, TGSI_WRITEMASK_Z);
640 new_inst.Instruction.NumSrcRegs = 1;
641 reg_src(&new_inst.Src[0], &ctx->tmp[A].src, SWIZ(_, _, Y, _));
642 tctx->emit_instruction(tctx, &new_inst);
643 }
644
645 if (dst->Register.WriteMask & TGSI_WRITEMASK_W) {
646 /* MOV dst.w, imm{1.0} */
647 new_inst = tgsi_default_full_instruction();
648 new_inst.Instruction.Opcode = TGSI_OPCODE_MOV;
649 new_inst.Instruction.NumDstRegs = 1;
650 reg_dst(&new_inst.Dst[0], dst, TGSI_WRITEMASK_W);
651 new_inst.Instruction.NumSrcRegs = 1;
652 reg_src(&new_inst.Src[0], &ctx->imm, SWIZ(_, _, _, Y));
653 tctx->emit_instruction(tctx, &new_inst);
654 }
655 }
656
657 /* LOG - Approximate Logarithm Base 2
658 * dst.x = \lfloor\log_2{|src.x|}\rfloor
659 * dst.y = \frac{|src.x|}{2^{\lfloor\log_2{|src.x|}\rfloor}}
660 * dst.z = \log_2{|src.x|}
661 * dst.w = 1.0
662 *
663 * ; needs: 1 tmp, imm{1.0}
664 * LG2 tmpA.x, |src.x|
665 * if (lowering FLR) {
666 * FRC tmpA.y, tmpA.x
667 * SUB tmpA.y, tmpA.x, tmpA.y
668 * } else {
669 * FLR tmpA.y, tmpA.x
670 * }
671 * EX2 tmpA.z, tmpA.y
672 * RCP tmpA.z, tmpA.z
673 * MUL dst.y, |src.x|, tmpA.z
674 * MOV dst.xz, tmpA.yx
675 * MOV dst.w, imm{1.0}
676 */
677 #define LOG_GROW (NINST(1) + NINST(1) + NINST(2) + NINST(1) + NINST(1) + \
678 NINST(2) + NINST(1) + NINST(1) - OINST(1))
679 #define LOG_TMP 1
680 static void
transform_log(struct tgsi_transform_context * tctx,struct tgsi_full_instruction * inst)681 transform_log(struct tgsi_transform_context *tctx,
682 struct tgsi_full_instruction *inst)
683 {
684 struct tgsi_lowering_context *ctx = tgsi_lowering_context(tctx);
685 struct tgsi_full_dst_register *dst = &inst->Dst[0];
686 struct tgsi_full_src_register *src = &inst->Src[0];
687 struct tgsi_full_instruction new_inst;
688
689 if (dst->Register.WriteMask & TGSI_WRITEMASK_XYZ) {
690 /* LG2 tmpA.x, |src.x| */
691 new_inst = tgsi_default_full_instruction();
692 new_inst.Instruction.Opcode = TGSI_OPCODE_LG2;
693 new_inst.Instruction.NumDstRegs = 1;
694 reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_X);
695 new_inst.Instruction.NumSrcRegs = 1;
696 reg_src(&new_inst.Src[0], src, SWIZ(X, _, _, _));
697 new_inst.Src[0].Register.Absolute = true;
698 tctx->emit_instruction(tctx, &new_inst);
699 }
700
701 if (dst->Register.WriteMask & TGSI_WRITEMASK_XY) {
702 if (ctx->config->lower_FLR) {
703 /* FRC tmpA.y, tmpA.x */
704 new_inst = tgsi_default_full_instruction();
705 new_inst.Instruction.Opcode = TGSI_OPCODE_FRC;
706 new_inst.Instruction.NumDstRegs = 1;
707 reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_Y);
708 new_inst.Instruction.NumSrcRegs = 1;
709 reg_src(&new_inst.Src[0], &ctx->tmp[A].src, SWIZ(_, X, _, _));
710 tctx->emit_instruction(tctx, &new_inst);
711
712 /* SUB tmpA.y, tmpA.x, tmpA.y */
713 new_inst = tgsi_default_full_instruction();
714 new_inst.Instruction.Opcode = TGSI_OPCODE_ADD;
715 new_inst.Instruction.NumDstRegs = 1;
716 reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_Y);
717 new_inst.Instruction.NumSrcRegs = 2;
718 reg_src(&new_inst.Src[0], &ctx->tmp[A].src, SWIZ(_, X, _, _));
719 reg_src(&new_inst.Src[1], &ctx->tmp[A].src, SWIZ(_, Y, _, _));
720 new_inst.Src[1].Register.Negate = 1;
721 tctx->emit_instruction(tctx, &new_inst);
722 } else {
723 /* FLR tmpA.y, tmpA.x */
724 new_inst = tgsi_default_full_instruction();
725 new_inst.Instruction.Opcode = TGSI_OPCODE_FLR;
726 new_inst.Instruction.NumDstRegs = 1;
727 reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_Y);
728 new_inst.Instruction.NumSrcRegs = 1;
729 reg_src(&new_inst.Src[0], &ctx->tmp[A].src, SWIZ(_, X, _, _));
730 tctx->emit_instruction(tctx, &new_inst);
731 }
732 }
733
734 if (dst->Register.WriteMask & TGSI_WRITEMASK_Y) {
735 /* EX2 tmpA.z, tmpA.y */
736 new_inst = tgsi_default_full_instruction();
737 new_inst.Instruction.Opcode = TGSI_OPCODE_EX2;
738 new_inst.Instruction.NumDstRegs = 1;
739 reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_Z);
740 new_inst.Instruction.NumSrcRegs = 1;
741 reg_src(&new_inst.Src[0], &ctx->tmp[A].src, SWIZ(Y, _, _, _));
742 tctx->emit_instruction(tctx, &new_inst);
743
744 /* RCP tmpA.z, tmpA.z */
745 new_inst = tgsi_default_full_instruction();
746 new_inst.Instruction.Opcode = TGSI_OPCODE_RCP;
747 new_inst.Instruction.NumDstRegs = 1;
748 reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_Z);
749 new_inst.Instruction.NumSrcRegs = 1;
750 reg_src(&new_inst.Src[0], &ctx->tmp[A].src, SWIZ(Z, _, _, _));
751 tctx->emit_instruction(tctx, &new_inst);
752
753 /* MUL dst.y, |src.x|, tmpA.z */
754 new_inst = tgsi_default_full_instruction();
755 new_inst.Instruction.Opcode = TGSI_OPCODE_MUL;
756 new_inst.Instruction.NumDstRegs = 1;
757 reg_dst(&new_inst.Dst[0], dst, TGSI_WRITEMASK_Y);
758 new_inst.Instruction.NumSrcRegs = 2;
759 reg_src(&new_inst.Src[0], src, SWIZ(_, X, _, _));
760 new_inst.Src[0].Register.Absolute = true;
761 reg_src(&new_inst.Src[1], &ctx->tmp[A].src, SWIZ(_, Z, _, _));
762 tctx->emit_instruction(tctx, &new_inst);
763 }
764
765 if (dst->Register.WriteMask & TGSI_WRITEMASK_XZ) {
766 /* MOV dst.xz, tmpA.yx */
767 new_inst = tgsi_default_full_instruction();
768 new_inst.Instruction.Opcode = TGSI_OPCODE_MOV;
769 new_inst.Instruction.NumDstRegs = 1;
770 reg_dst(&new_inst.Dst[0], dst, TGSI_WRITEMASK_XZ);
771 new_inst.Instruction.NumSrcRegs = 1;
772 reg_src(&new_inst.Src[0], &ctx->tmp[A].src, SWIZ(Y, _, X, _));
773 tctx->emit_instruction(tctx, &new_inst);
774 }
775
776 if (dst->Register.WriteMask & TGSI_WRITEMASK_W) {
777 /* MOV dst.w, imm{1.0} */
778 new_inst = tgsi_default_full_instruction();
779 new_inst.Instruction.Opcode = TGSI_OPCODE_MOV;
780 new_inst.Instruction.NumDstRegs = 1;
781 reg_dst(&new_inst.Dst[0], dst, TGSI_WRITEMASK_W);
782 new_inst.Instruction.NumSrcRegs = 1;
783 reg_src(&new_inst.Src[0], &ctx->imm, SWIZ(_, _, _, Y));
784 tctx->emit_instruction(tctx, &new_inst);
785 }
786 }
787
788 /* DP4 - 4-component Dot Product
789 * dst = src0.x \times src1.x + src0.y \times src1.y + src0.z \times src1.z + src0.w \times src1.w
790 *
791 * DP3 - 3-component Dot Product
792 * dst = src0.x \times src1.x + src0.y \times src1.y + src0.z \times src1.z
793 *
794 * DP2 - 2-component Dot Product
795 * dst = src0.x \times src1.x + src0.y \times src1.y
796 *
797 * NOTE: these are translated into sequence of MUL/MAD(/ADD) scalar
798 * operations, which is what you'd prefer for a ISA that is natively
799 * scalar. Probably a native vector ISA would at least already have
800 * DP4/DP3 instructions, but perhaps there is room for an alternative
801 * translation for DP2 using vector instructions.
802 *
803 * ; needs: 1 tmp
804 * MUL tmpA.x, src0.x, src1.x
805 * MAD tmpA.x, src0.y, src1.y, tmpA.x
806 * if (DP3 || DP4) {
807 * MAD tmpA.x, src0.z, src1.z, tmpA.x
808 * if (DP4) {
809 * MAD tmpA.x, src0.w, src1.w, tmpA.x
810 * }
811 * }
812 * ; fixup last instruction to replicate into dst
813 */
814 #define DP4_GROW (NINST(2) + NINST(3) + NINST(3) + NINST(3) - OINST(2))
815 #define DP3_GROW (NINST(2) + NINST(3) + NINST(3) - OINST(2))
816 #define DP2_GROW (NINST(2) + NINST(3) - OINST(2))
817 #define DOTP_TMP 1
818 static void
transform_dotp(struct tgsi_transform_context * tctx,struct tgsi_full_instruction * inst)819 transform_dotp(struct tgsi_transform_context *tctx,
820 struct tgsi_full_instruction *inst)
821 {
822 struct tgsi_lowering_context *ctx = tgsi_lowering_context(tctx);
823 struct tgsi_full_dst_register *dst = &inst->Dst[0];
824 struct tgsi_full_src_register *src0 = &inst->Src[0];
825 struct tgsi_full_src_register *src1 = &inst->Src[1];
826 struct tgsi_full_instruction new_inst;
827 enum tgsi_opcode opcode = inst->Instruction.Opcode;
828
829 /* NOTE: any potential last instruction must replicate src on all
830 * components (since it could be re-written to write to final dst)
831 */
832
833 if (dst->Register.WriteMask & TGSI_WRITEMASK_XYZW) {
834 /* MUL tmpA.x, src0.x, src1.x */
835 new_inst = tgsi_default_full_instruction();
836 new_inst.Instruction.Opcode = TGSI_OPCODE_MUL;
837 new_inst.Instruction.NumDstRegs = 1;
838 reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_X);
839 new_inst.Instruction.NumSrcRegs = 2;
840 reg_src(&new_inst.Src[0], src0, SWIZ(X, _, _, _));
841 reg_src(&new_inst.Src[1], src1, SWIZ(X, _, _, _));
842 tctx->emit_instruction(tctx, &new_inst);
843
844 /* MAD tmpA.x, src0.y, src1.y, tmpA.x */
845 new_inst = tgsi_default_full_instruction();
846 new_inst.Instruction.Opcode = TGSI_OPCODE_MAD;
847 new_inst.Instruction.NumDstRegs = 1;
848 reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_X);
849 new_inst.Instruction.NumSrcRegs = 3;
850 reg_src(&new_inst.Src[0], src0, SWIZ(Y, Y, Y, Y));
851 reg_src(&new_inst.Src[1], src1, SWIZ(Y, Y, Y, Y));
852 reg_src(&new_inst.Src[2], &ctx->tmp[A].src, SWIZ(X, X, X, X));
853
854 if ((opcode == TGSI_OPCODE_DP3) ||
855 (opcode == TGSI_OPCODE_DP4)) {
856 tctx->emit_instruction(tctx, &new_inst);
857
858 /* MAD tmpA.x, src0.z, src1.z, tmpA.x */
859 new_inst = tgsi_default_full_instruction();
860 new_inst.Instruction.Opcode = TGSI_OPCODE_MAD;
861 new_inst.Instruction.NumDstRegs = 1;
862 reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_X);
863 new_inst.Instruction.NumSrcRegs = 3;
864 reg_src(&new_inst.Src[0], src0, SWIZ(Z, Z, Z, Z));
865 reg_src(&new_inst.Src[1], src1, SWIZ(Z, Z, Z, Z));
866 reg_src(&new_inst.Src[2], &ctx->tmp[A].src, SWIZ(X, X, X, X));
867
868 if (opcode == TGSI_OPCODE_DP4) {
869 tctx->emit_instruction(tctx, &new_inst);
870
871 /* MAD tmpA.x, src0.w, src1.w, tmpA.x */
872 new_inst = tgsi_default_full_instruction();
873 new_inst.Instruction.Opcode = TGSI_OPCODE_MAD;
874 new_inst.Instruction.NumDstRegs = 1;
875 reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_X);
876 new_inst.Instruction.NumSrcRegs = 3;
877 reg_src(&new_inst.Src[0], src0, SWIZ(W, W, W, W));
878 reg_src(&new_inst.Src[1], src1, SWIZ(W, W, W, W));
879 reg_src(&new_inst.Src[2], &ctx->tmp[A].src, SWIZ(X, X, X, X));
880 }
881 }
882
883 /* fixup last instruction to write to dst: */
884 reg_dst(&new_inst.Dst[0], dst, TGSI_WRITEMASK_XYZW);
885
886 tctx->emit_instruction(tctx, &new_inst);
887 }
888 }
889
890 /* FLR - floor, CEIL - ceil
891 * ; needs: 1 tmp
892 * if (CEIL) {
893 * FRC tmpA, -src
894 * ADD dst, src, tmpA
895 * } else {
896 * FRC tmpA, src
897 * SUB dst, src, tmpA
898 * }
899 */
900 #define FLR_GROW (NINST(1) + NINST(2) - OINST(1))
901 #define CEIL_GROW (NINST(1) + NINST(2) - OINST(1))
902 #define FLR_TMP 1
903 #define CEIL_TMP 1
904 static void
transform_flr_ceil(struct tgsi_transform_context * tctx,struct tgsi_full_instruction * inst)905 transform_flr_ceil(struct tgsi_transform_context *tctx,
906 struct tgsi_full_instruction *inst)
907 {
908 struct tgsi_lowering_context *ctx = tgsi_lowering_context(tctx);
909 struct tgsi_full_dst_register *dst = &inst->Dst[0];
910 struct tgsi_full_src_register *src0 = &inst->Src[0];
911 struct tgsi_full_instruction new_inst;
912 enum tgsi_opcode opcode = inst->Instruction.Opcode;
913
914 if (dst->Register.WriteMask & TGSI_WRITEMASK_XYZW) {
915 /* FLR: FRC tmpA, src CEIL: FRC tmpA, -src */
916 new_inst = tgsi_default_full_instruction();
917 new_inst.Instruction.Opcode = TGSI_OPCODE_FRC;
918 new_inst.Instruction.NumDstRegs = 1;
919 reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_XYZW);
920 new_inst.Instruction.NumSrcRegs = 1;
921 reg_src(&new_inst.Src[0], src0, SWIZ(X, Y, Z, W));
922
923 if (opcode == TGSI_OPCODE_CEIL)
924 new_inst.Src[0].Register.Negate = !new_inst.Src[0].Register.Negate;
925 tctx->emit_instruction(tctx, &new_inst);
926
927 /* FLR: SUB dst, src, tmpA CEIL: ADD dst, src, tmpA */
928 new_inst = tgsi_default_full_instruction();
929 new_inst.Instruction.Opcode = TGSI_OPCODE_ADD;
930 new_inst.Instruction.NumDstRegs = 1;
931 reg_dst(&new_inst.Dst[0], dst, TGSI_WRITEMASK_XYZW);
932 new_inst.Instruction.NumSrcRegs = 2;
933 reg_src(&new_inst.Src[0], src0, SWIZ(X, Y, Z, W));
934 reg_src(&new_inst.Src[1], &ctx->tmp[A].src, SWIZ(X, Y, Z, W));
935 if (opcode == TGSI_OPCODE_FLR)
936 new_inst.Src[1].Register.Negate = 1;
937 tctx->emit_instruction(tctx, &new_inst);
938 }
939 }
940
941 /* TRUNC - truncate off fractional part
942 * dst.x = trunc(src.x)
943 * dst.y = trunc(src.y)
944 * dst.z = trunc(src.z)
945 * dst.w = trunc(src.w)
946 *
947 * ; needs: 1 tmp
948 * if (lower FLR) {
949 * FRC tmpA, |src|
950 * SUB tmpA, |src|, tmpA
951 * } else {
952 * FLR tmpA, |src|
953 * }
954 * CMP dst, src, -tmpA, tmpA
955 */
956 #define TRUNC_GROW (NINST(1) + NINST(2) + NINST(3) - OINST(1))
957 #define TRUNC_TMP 1
958 static void
transform_trunc(struct tgsi_transform_context * tctx,struct tgsi_full_instruction * inst)959 transform_trunc(struct tgsi_transform_context *tctx,
960 struct tgsi_full_instruction *inst)
961 {
962 struct tgsi_lowering_context *ctx = tgsi_lowering_context(tctx);
963 struct tgsi_full_dst_register *dst = &inst->Dst[0];
964 struct tgsi_full_src_register *src0 = &inst->Src[0];
965 struct tgsi_full_instruction new_inst;
966
967 if (dst->Register.WriteMask & TGSI_WRITEMASK_XYZW) {
968 if (ctx->config->lower_FLR) {
969 new_inst = tgsi_default_full_instruction();
970 new_inst.Instruction.Opcode = TGSI_OPCODE_FRC;
971 new_inst.Instruction.NumDstRegs = 1;
972 reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_XYZW);
973 new_inst.Instruction.NumSrcRegs = 1;
974 reg_src(&new_inst.Src[0], src0, SWIZ(X, Y, Z, W));
975 new_inst.Src[0].Register.Absolute = true;
976 new_inst.Src[0].Register.Negate = false;
977 tctx->emit_instruction(tctx, &new_inst);
978
979 new_inst = tgsi_default_full_instruction();
980 new_inst.Instruction.Opcode = TGSI_OPCODE_ADD;
981 new_inst.Instruction.NumDstRegs = 1;
982 reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_XYZW);
983 new_inst.Instruction.NumSrcRegs = 2;
984 reg_src(&new_inst.Src[0], src0, SWIZ(X, Y, Z, W));
985 new_inst.Src[0].Register.Absolute = true;
986 new_inst.Src[0].Register.Negate = false;
987 reg_src(&new_inst.Src[1], &ctx->tmp[A].src, SWIZ(X, Y, Z, W));
988 new_inst.Src[1].Register.Negate = 1;
989 tctx->emit_instruction(tctx, &new_inst);
990 } else {
991 new_inst = tgsi_default_full_instruction();
992 new_inst.Instruction.Opcode = TGSI_OPCODE_FLR;
993 new_inst.Instruction.NumDstRegs = 1;
994 reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_XYZW);
995 new_inst.Instruction.NumSrcRegs = 1;
996 reg_src(&new_inst.Src[0], src0, SWIZ(X, Y, Z, W));
997 new_inst.Src[0].Register.Absolute = true;
998 new_inst.Src[0].Register.Negate = false;
999 tctx->emit_instruction(tctx, &new_inst);
1000 }
1001
1002 new_inst = tgsi_default_full_instruction();
1003 new_inst.Instruction.Opcode = TGSI_OPCODE_CMP;
1004 new_inst.Instruction.NumDstRegs = 1;
1005 reg_dst(&new_inst.Dst[0], dst, TGSI_WRITEMASK_XYZW);
1006 new_inst.Instruction.NumSrcRegs = 3;
1007 reg_src(&new_inst.Src[0], src0, SWIZ(X, Y, Z, W));
1008 reg_src(&new_inst.Src[1], &ctx->tmp[A].src, SWIZ(X, Y, Z, W));
1009 new_inst.Src[1].Register.Negate = true;
1010 reg_src(&new_inst.Src[2], &ctx->tmp[A].src, SWIZ(X, Y, Z, W));
1011 tctx->emit_instruction(tctx, &new_inst);
1012 }
1013 }
1014
1015 /* Inserts a MOV_SAT for the needed components of tex coord. Note that
1016 * in the case of TXP, the clamping must happen *after* projection, so
1017 * we need to lower TXP to TEX.
1018 *
1019 * MOV tmpA, src0
1020 * if (opc == TXP) {
1021 * ; do perspective division manually before clamping:
1022 * RCP tmpB, tmpA.w
1023 * MUL tmpB.<pmask>, tmpA, tmpB.xxxx
1024 * opc = TEX;
1025 * }
1026 * MOV_SAT tmpA.<mask>, tmpA ; <mask> is the clamped s/t/r coords
1027 * <opc> dst, tmpA, ...
1028 */
1029 #define SAMP_GROW (NINST(1) + NINST(1) + NINST(2) + NINST(1))
1030 #define SAMP_TMP 2
1031 static int
transform_samp(struct tgsi_transform_context * tctx,struct tgsi_full_instruction * inst)1032 transform_samp(struct tgsi_transform_context *tctx,
1033 struct tgsi_full_instruction *inst)
1034 {
1035 struct tgsi_lowering_context *ctx = tgsi_lowering_context(tctx);
1036 struct tgsi_full_src_register *coord = &inst->Src[0];
1037 struct tgsi_full_src_register *samp;
1038 struct tgsi_full_instruction new_inst;
1039 /* mask is clamped coords, pmask is all coords (for projection): */
1040 unsigned mask = 0, pmask = 0, smask;
1041 unsigned tex = inst->Texture.Texture;
1042 enum tgsi_opcode opcode = inst->Instruction.Opcode;
1043 bool lower_txp = (opcode == TGSI_OPCODE_TXP) &&
1044 (ctx->config->lower_TXP & (1 << tex));
1045
1046 if (opcode == TGSI_OPCODE_TXB2) {
1047 samp = &inst->Src[2];
1048 } else {
1049 samp = &inst->Src[1];
1050 }
1051
1052 /* convert sampler # to bitmask to test: */
1053 smask = 1 << samp->Register.Index;
1054
1055 /* check if we actually need to lower this one: */
1056 if (!(ctx->saturate & smask) && !lower_txp)
1057 return -1;
1058
1059 /* figure out which coordinates need saturating:
1060 * - RECT textures should not get saturated
1061 * - array index coords should not get saturated
1062 */
1063 switch (tex) {
1064 case TGSI_TEXTURE_3D:
1065 case TGSI_TEXTURE_CUBE:
1066 case TGSI_TEXTURE_CUBE_ARRAY:
1067 case TGSI_TEXTURE_SHADOWCUBE:
1068 case TGSI_TEXTURE_SHADOWCUBE_ARRAY:
1069 if (ctx->config->saturate_r & smask)
1070 mask |= TGSI_WRITEMASK_Z;
1071 pmask |= TGSI_WRITEMASK_Z;
1072 FALLTHROUGH;
1073
1074 case TGSI_TEXTURE_2D:
1075 case TGSI_TEXTURE_2D_ARRAY:
1076 case TGSI_TEXTURE_SHADOW2D:
1077 case TGSI_TEXTURE_SHADOW2D_ARRAY:
1078 case TGSI_TEXTURE_2D_MSAA:
1079 case TGSI_TEXTURE_2D_ARRAY_MSAA:
1080 if (ctx->config->saturate_t & smask)
1081 mask |= TGSI_WRITEMASK_Y;
1082 pmask |= TGSI_WRITEMASK_Y;
1083 FALLTHROUGH;
1084
1085 case TGSI_TEXTURE_1D:
1086 case TGSI_TEXTURE_1D_ARRAY:
1087 case TGSI_TEXTURE_SHADOW1D:
1088 case TGSI_TEXTURE_SHADOW1D_ARRAY:
1089 if (ctx->config->saturate_s & smask)
1090 mask |= TGSI_WRITEMASK_X;
1091 pmask |= TGSI_WRITEMASK_X;
1092 break;
1093
1094 case TGSI_TEXTURE_RECT:
1095 case TGSI_TEXTURE_SHADOWRECT:
1096 /* we don't saturate, but in case of lower_txp we
1097 * still need to do the perspective divide:
1098 */
1099 pmask = TGSI_WRITEMASK_XY;
1100 break;
1101 }
1102
1103 /* sanity check.. driver could be asking to saturate a non-
1104 * existent coordinate component:
1105 */
1106 if (!mask && !lower_txp)
1107 return -1;
1108
1109 /* MOV tmpA, src0 */
1110 create_mov(tctx, &ctx->tmp[A].dst, coord, TGSI_WRITEMASK_XYZW, 0);
1111
1112 /* This is a bit sad.. we need to clamp *after* the coords
1113 * are projected, which means lowering TXP to TEX and doing
1114 * the projection ourself. But since I haven't figured out
1115 * how to make the lowering code deliver an electric shock
1116 * to anyone using GL_CLAMP, we must do this instead:
1117 */
1118 if (opcode == TGSI_OPCODE_TXP) {
1119 /* RCP tmpB.x tmpA.w */
1120 new_inst = tgsi_default_full_instruction();
1121 new_inst.Instruction.Opcode = TGSI_OPCODE_RCP;
1122 new_inst.Instruction.NumDstRegs = 1;
1123 reg_dst(&new_inst.Dst[0], &ctx->tmp[B].dst, TGSI_WRITEMASK_X);
1124 new_inst.Instruction.NumSrcRegs = 1;
1125 reg_src(&new_inst.Src[0], &ctx->tmp[A].src, SWIZ(W, _, _, _));
1126 tctx->emit_instruction(tctx, &new_inst);
1127
1128 /* MUL tmpA.mask, tmpA, tmpB.xxxx */
1129 new_inst = tgsi_default_full_instruction();
1130 new_inst.Instruction.Opcode = TGSI_OPCODE_MUL;
1131 new_inst.Instruction.NumDstRegs = 1;
1132 reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, pmask);
1133 new_inst.Instruction.NumSrcRegs = 2;
1134 reg_src(&new_inst.Src[0], &ctx->tmp[A].src, SWIZ(X, Y, Z, W));
1135 reg_src(&new_inst.Src[1], &ctx->tmp[B].src, SWIZ(X, X, X, X));
1136 tctx->emit_instruction(tctx, &new_inst);
1137
1138 opcode = TGSI_OPCODE_TEX;
1139 }
1140
1141 /* MOV_SAT tmpA.<mask>, tmpA */
1142 if (mask) {
1143 create_mov(tctx, &ctx->tmp[A].dst, &ctx->tmp[A].src, mask, 1);
1144 }
1145
1146 /* modify the texture samp instruction to take fixed up coord: */
1147 new_inst = *inst;
1148 new_inst.Instruction.Opcode = opcode;
1149 new_inst.Src[0] = ctx->tmp[A].src;
1150 tctx->emit_instruction(tctx, &new_inst);
1151
1152 return 0;
1153 }
1154
1155 /* Two-sided color emulation:
1156 * For each COLOR input, create a corresponding BCOLOR input, plus
1157 * CMP instruction to select front or back color based on FACE
1158 */
1159 #define TWOSIDE_GROW(n) ( \
1160 2 + /* FACE */ \
1161 ((n) * 3) + /* IN[], BCOLOR[n], <intrp> */\
1162 ((n) * 1) + /* TEMP[] */ \
1163 ((n) * NINST(3)) /* CMP instr */ \
1164 )
1165
1166 static void
emit_twoside(struct tgsi_transform_context * tctx)1167 emit_twoside(struct tgsi_transform_context *tctx)
1168 {
1169 struct tgsi_lowering_context *ctx = tgsi_lowering_context(tctx);
1170 struct tgsi_shader_info *info = ctx->info;
1171 struct tgsi_full_declaration decl;
1172 struct tgsi_full_instruction new_inst;
1173 unsigned inbase, tmpbase;
1174 unsigned i;
1175
1176 inbase = info->file_max[TGSI_FILE_INPUT] + 1;
1177 tmpbase = info->file_max[TGSI_FILE_TEMPORARY] + 1;
1178
1179 /* additional inputs for BCOLOR's */
1180 for (i = 0; i < ctx->two_side_colors; i++) {
1181 unsigned in_idx = ctx->two_side_idx[i];
1182 decl = tgsi_default_full_declaration();
1183 decl.Declaration.File = TGSI_FILE_INPUT;
1184 decl.Declaration.Semantic = true;
1185 decl.Range.First = decl.Range.Last = inbase + i;
1186 decl.Semantic.Name = TGSI_SEMANTIC_BCOLOR;
1187 decl.Semantic.Index = info->input_semantic_index[in_idx];
1188 decl.Declaration.Interpolate = true;
1189 decl.Interp.Interpolate = info->input_interpolate[in_idx];
1190 decl.Interp.Location = info->input_interpolate_loc[in_idx];
1191 tctx->emit_declaration(tctx, &decl);
1192 }
1193
1194 /* additional input for FACE */
1195 if (ctx->two_side_colors && (ctx->face_idx == -1)) {
1196 decl = tgsi_default_full_declaration();
1197 decl.Declaration.File = TGSI_FILE_INPUT;
1198 decl.Declaration.Semantic = true;
1199 decl.Range.First = decl.Range.Last = inbase + ctx->two_side_colors;
1200 decl.Semantic.Name = TGSI_SEMANTIC_FACE;
1201 decl.Semantic.Index = 0;
1202 tctx->emit_declaration(tctx, &decl);
1203
1204 ctx->face_idx = decl.Range.First;
1205 }
1206
1207 /* additional temps for COLOR/BCOLOR selection: */
1208 for (i = 0; i < ctx->two_side_colors; i++) {
1209 decl = tgsi_default_full_declaration();
1210 decl.Declaration.File = TGSI_FILE_TEMPORARY;
1211 decl.Range.First = decl.Range.Last = tmpbase + ctx->numtmp + i;
1212 tctx->emit_declaration(tctx, &decl);
1213 }
1214
1215 /* and finally additional instructions to select COLOR/BCOLOR: */
1216 for (i = 0; i < ctx->two_side_colors; i++) {
1217 new_inst = tgsi_default_full_instruction();
1218 new_inst.Instruction.Opcode = TGSI_OPCODE_CMP;
1219
1220 new_inst.Instruction.NumDstRegs = 1;
1221 new_inst.Dst[0].Register.File = TGSI_FILE_TEMPORARY;
1222 new_inst.Dst[0].Register.Index = tmpbase + ctx->numtmp + i;
1223 new_inst.Dst[0].Register.WriteMask = TGSI_WRITEMASK_XYZW;
1224
1225 new_inst.Instruction.NumSrcRegs = 3;
1226 new_inst.Src[0].Register.File = TGSI_FILE_INPUT;
1227 new_inst.Src[0].Register.Index = ctx->face_idx;
1228 new_inst.Src[0].Register.SwizzleX = TGSI_SWIZZLE_X;
1229 new_inst.Src[0].Register.SwizzleY = TGSI_SWIZZLE_X;
1230 new_inst.Src[0].Register.SwizzleZ = TGSI_SWIZZLE_X;
1231 new_inst.Src[0].Register.SwizzleW = TGSI_SWIZZLE_X;
1232 new_inst.Src[1].Register.File = TGSI_FILE_INPUT;
1233 new_inst.Src[1].Register.Index = inbase + i;
1234 new_inst.Src[1].Register.SwizzleX = TGSI_SWIZZLE_X;
1235 new_inst.Src[1].Register.SwizzleY = TGSI_SWIZZLE_Y;
1236 new_inst.Src[1].Register.SwizzleZ = TGSI_SWIZZLE_Z;
1237 new_inst.Src[1].Register.SwizzleW = TGSI_SWIZZLE_W;
1238 new_inst.Src[2].Register.File = TGSI_FILE_INPUT;
1239 new_inst.Src[2].Register.Index = ctx->two_side_idx[i];
1240 new_inst.Src[2].Register.SwizzleX = TGSI_SWIZZLE_X;
1241 new_inst.Src[2].Register.SwizzleY = TGSI_SWIZZLE_Y;
1242 new_inst.Src[2].Register.SwizzleZ = TGSI_SWIZZLE_Z;
1243 new_inst.Src[2].Register.SwizzleW = TGSI_SWIZZLE_W;
1244
1245 tctx->emit_instruction(tctx, &new_inst);
1246 }
1247 }
1248
1249 static void
emit_decls(struct tgsi_transform_context * tctx)1250 emit_decls(struct tgsi_transform_context *tctx)
1251 {
1252 struct tgsi_lowering_context *ctx = tgsi_lowering_context(tctx);
1253 struct tgsi_shader_info *info = ctx->info;
1254 struct tgsi_full_declaration decl;
1255 struct tgsi_full_immediate immed;
1256 unsigned tmpbase;
1257 unsigned i;
1258
1259 tmpbase = info->file_max[TGSI_FILE_TEMPORARY] + 1;
1260
1261 ctx->color_base = tmpbase + ctx->numtmp;
1262
1263 /* declare immediate: */
1264 immed = tgsi_default_full_immediate();
1265 immed.Immediate.NrTokens = 1 + 4; /* one for the token itself */
1266 immed.u[0].Float = 0.0;
1267 immed.u[1].Float = 1.0;
1268 immed.u[2].Float = 128.0;
1269 immed.u[3].Float = 0.0;
1270 tctx->emit_immediate(tctx, &immed);
1271
1272 ctx->imm.Register.File = TGSI_FILE_IMMEDIATE;
1273 ctx->imm.Register.Index = info->immediate_count;
1274 ctx->imm.Register.SwizzleX = TGSI_SWIZZLE_X;
1275 ctx->imm.Register.SwizzleY = TGSI_SWIZZLE_Y;
1276 ctx->imm.Register.SwizzleZ = TGSI_SWIZZLE_Z;
1277 ctx->imm.Register.SwizzleW = TGSI_SWIZZLE_W;
1278
1279 /* declare temp regs: */
1280 for (i = 0; i < ctx->numtmp; i++) {
1281 decl = tgsi_default_full_declaration();
1282 decl.Declaration.File = TGSI_FILE_TEMPORARY;
1283 decl.Range.First = decl.Range.Last = tmpbase + i;
1284 tctx->emit_declaration(tctx, &decl);
1285
1286 ctx->tmp[i].src.Register.File = TGSI_FILE_TEMPORARY;
1287 ctx->tmp[i].src.Register.Index = tmpbase + i;
1288 ctx->tmp[i].src.Register.SwizzleX = TGSI_SWIZZLE_X;
1289 ctx->tmp[i].src.Register.SwizzleY = TGSI_SWIZZLE_Y;
1290 ctx->tmp[i].src.Register.SwizzleZ = TGSI_SWIZZLE_Z;
1291 ctx->tmp[i].src.Register.SwizzleW = TGSI_SWIZZLE_W;
1292
1293 ctx->tmp[i].dst.Register.File = TGSI_FILE_TEMPORARY;
1294 ctx->tmp[i].dst.Register.Index = tmpbase + i;
1295 ctx->tmp[i].dst.Register.WriteMask = TGSI_WRITEMASK_XYZW;
1296 }
1297
1298 if (ctx->two_side_colors)
1299 emit_twoside(tctx);
1300 }
1301
1302 static void
rename_color_inputs(struct tgsi_lowering_context * ctx,struct tgsi_full_instruction * inst)1303 rename_color_inputs(struct tgsi_lowering_context *ctx,
1304 struct tgsi_full_instruction *inst)
1305 {
1306 unsigned i, j;
1307 for (i = 0; i < inst->Instruction.NumSrcRegs; i++) {
1308 struct tgsi_src_register *src = &inst->Src[i].Register;
1309 if (src->File == TGSI_FILE_INPUT) {
1310 for (j = 0; j < ctx->two_side_colors; j++) {
1311 if (src->Index == (int)ctx->two_side_idx[j]) {
1312 src->File = TGSI_FILE_TEMPORARY;
1313 src->Index = ctx->color_base + j;
1314 break;
1315 }
1316 }
1317 }
1318 }
1319
1320 }
1321
1322 static void
transform_instr(struct tgsi_transform_context * tctx,struct tgsi_full_instruction * inst)1323 transform_instr(struct tgsi_transform_context *tctx,
1324 struct tgsi_full_instruction *inst)
1325 {
1326 struct tgsi_lowering_context *ctx = tgsi_lowering_context(tctx);
1327
1328 if (!ctx->emitted_decls) {
1329 emit_decls(tctx);
1330 ctx->emitted_decls = 1;
1331 }
1332
1333 /* if emulating two-sided-color, we need to re-write some
1334 * src registers:
1335 */
1336 if (ctx->two_side_colors)
1337 rename_color_inputs(ctx, inst);
1338
1339 switch (inst->Instruction.Opcode) {
1340 case TGSI_OPCODE_DST:
1341 if (!ctx->config->lower_DST)
1342 goto skip;
1343 transform_dst(tctx, inst);
1344 break;
1345 case TGSI_OPCODE_LRP:
1346 if (!ctx->config->lower_LRP)
1347 goto skip;
1348 transform_lrp(tctx, inst);
1349 break;
1350 case TGSI_OPCODE_FRC:
1351 if (!ctx->config->lower_FRC)
1352 goto skip;
1353 transform_frc(tctx, inst);
1354 break;
1355 case TGSI_OPCODE_POW:
1356 if (!ctx->config->lower_POW)
1357 goto skip;
1358 transform_pow(tctx, inst);
1359 break;
1360 case TGSI_OPCODE_LIT:
1361 if (!ctx->config->lower_LIT)
1362 goto skip;
1363 transform_lit(tctx, inst);
1364 break;
1365 case TGSI_OPCODE_EXP:
1366 if (!ctx->config->lower_EXP)
1367 goto skip;
1368 transform_exp(tctx, inst);
1369 break;
1370 case TGSI_OPCODE_LOG:
1371 if (!ctx->config->lower_LOG)
1372 goto skip;
1373 transform_log(tctx, inst);
1374 break;
1375 case TGSI_OPCODE_DP4:
1376 if (!ctx->config->lower_DP4)
1377 goto skip;
1378 transform_dotp(tctx, inst);
1379 break;
1380 case TGSI_OPCODE_DP3:
1381 if (!ctx->config->lower_DP3)
1382 goto skip;
1383 transform_dotp(tctx, inst);
1384 break;
1385 case TGSI_OPCODE_DP2:
1386 if (!ctx->config->lower_DP2)
1387 goto skip;
1388 transform_dotp(tctx, inst);
1389 break;
1390 case TGSI_OPCODE_FLR:
1391 if (!ctx->config->lower_FLR)
1392 goto skip;
1393 transform_flr_ceil(tctx, inst);
1394 break;
1395 case TGSI_OPCODE_CEIL:
1396 if (!ctx->config->lower_CEIL)
1397 goto skip;
1398 transform_flr_ceil(tctx, inst);
1399 break;
1400 case TGSI_OPCODE_TRUNC:
1401 if (!ctx->config->lower_TRUNC)
1402 goto skip;
1403 transform_trunc(tctx, inst);
1404 break;
1405 case TGSI_OPCODE_TEX:
1406 case TGSI_OPCODE_TXP:
1407 case TGSI_OPCODE_TXB:
1408 case TGSI_OPCODE_TXB2:
1409 case TGSI_OPCODE_TXL:
1410 if (transform_samp(tctx, inst))
1411 goto skip;
1412 break;
1413 default:
1414 skip:
1415 tctx->emit_instruction(tctx, inst);
1416 break;
1417 }
1418 }
1419
1420 /* returns NULL if no lowering required, else returns the new
1421 * tokens (which caller is required to free()). In either case
1422 * returns the current info.
1423 */
1424 const struct tgsi_token *
tgsi_transform_lowering(const struct tgsi_lowering_config * config,const struct tgsi_token * tokens,struct tgsi_shader_info * info)1425 tgsi_transform_lowering(const struct tgsi_lowering_config *config,
1426 const struct tgsi_token *tokens,
1427 struct tgsi_shader_info *info)
1428 {
1429 struct tgsi_lowering_context ctx;
1430 struct tgsi_token *newtoks;
1431 int newlen, numtmp;
1432
1433 /* sanity check in case limit is ever increased: */
1434 STATIC_ASSERT((sizeof(config->saturate_s) * 8) >= PIPE_MAX_SAMPLERS);
1435
1436 /* sanity check the lowering */
1437 assert(!(config->lower_FRC && (config->lower_FLR || config->lower_CEIL)));
1438 assert(!(config->lower_FRC && config->lower_TRUNC));
1439
1440 memset(&ctx, 0, sizeof(ctx));
1441 ctx.base.transform_instruction = transform_instr;
1442 ctx.info = info;
1443 ctx.config = config;
1444
1445 tgsi_scan_shader(tokens, info);
1446
1447 /* if we are adding fragment shader support to emulate two-sided
1448 * color, then figure out the number of additional inputs we need
1449 * to create for BCOLOR's..
1450 */
1451 if ((info->processor == PIPE_SHADER_FRAGMENT) &&
1452 config->color_two_side) {
1453 int i;
1454 ctx.face_idx = -1;
1455 for (i = 0; i <= info->file_max[TGSI_FILE_INPUT]; i++) {
1456 if (info->input_semantic_name[i] == TGSI_SEMANTIC_COLOR)
1457 ctx.two_side_idx[ctx.two_side_colors++] = i;
1458 if (info->input_semantic_name[i] == TGSI_SEMANTIC_FACE)
1459 ctx.face_idx = i;
1460 }
1461 }
1462
1463 ctx.saturate = config->saturate_r | config->saturate_s | config->saturate_t;
1464
1465 #define OPCS(x) ((config->lower_ ## x) ? info->opcode_count[TGSI_OPCODE_ ## x] : 0)
1466 /* if there are no instructions to lower, then we are done: */
1467 if (!(OPCS(DST) ||
1468 OPCS(LRP) ||
1469 OPCS(FRC) ||
1470 OPCS(POW) ||
1471 OPCS(LIT) ||
1472 OPCS(EXP) ||
1473 OPCS(LOG) ||
1474 OPCS(DP4) ||
1475 OPCS(DP3) ||
1476 OPCS(DP2) ||
1477 OPCS(FLR) ||
1478 OPCS(CEIL) ||
1479 OPCS(TRUNC) ||
1480 OPCS(TXP) ||
1481 ctx.two_side_colors ||
1482 ctx.saturate))
1483 return NULL;
1484
1485 #if 0 /* debug */
1486 _debug_printf("BEFORE:");
1487 tgsi_dump(tokens, 0);
1488 #endif
1489
1490 numtmp = 0;
1491 newlen = tgsi_num_tokens(tokens);
1492 if (OPCS(DST)) {
1493 newlen += DST_GROW * OPCS(DST);
1494 numtmp = MAX2(numtmp, DST_TMP);
1495 }
1496 if (OPCS(LRP)) {
1497 newlen += LRP_GROW * OPCS(LRP);
1498 numtmp = MAX2(numtmp, LRP_TMP);
1499 }
1500 if (OPCS(FRC)) {
1501 newlen += FRC_GROW * OPCS(FRC);
1502 numtmp = MAX2(numtmp, FRC_TMP);
1503 }
1504 if (OPCS(POW)) {
1505 newlen += POW_GROW * OPCS(POW);
1506 numtmp = MAX2(numtmp, POW_TMP);
1507 }
1508 if (OPCS(LIT)) {
1509 newlen += LIT_GROW * OPCS(LIT);
1510 numtmp = MAX2(numtmp, LIT_TMP);
1511 }
1512 if (OPCS(EXP)) {
1513 newlen += EXP_GROW * OPCS(EXP);
1514 numtmp = MAX2(numtmp, EXP_TMP);
1515 }
1516 if (OPCS(LOG)) {
1517 newlen += LOG_GROW * OPCS(LOG);
1518 numtmp = MAX2(numtmp, LOG_TMP);
1519 }
1520 if (OPCS(DP4)) {
1521 newlen += DP4_GROW * OPCS(DP4);
1522 numtmp = MAX2(numtmp, DOTP_TMP);
1523 }
1524 if (OPCS(DP3)) {
1525 newlen += DP3_GROW * OPCS(DP3);
1526 numtmp = MAX2(numtmp, DOTP_TMP);
1527 }
1528 if (OPCS(DP2)) {
1529 newlen += DP2_GROW * OPCS(DP2);
1530 numtmp = MAX2(numtmp, DOTP_TMP);
1531 }
1532 if (OPCS(FLR)) {
1533 newlen += FLR_GROW * OPCS(FLR);
1534 numtmp = MAX2(numtmp, FLR_TMP);
1535 }
1536 if (OPCS(CEIL)) {
1537 newlen += CEIL_GROW * OPCS(CEIL);
1538 numtmp = MAX2(numtmp, CEIL_TMP);
1539 }
1540 if (OPCS(TRUNC)) {
1541 newlen += TRUNC_GROW * OPCS(TRUNC);
1542 numtmp = MAX2(numtmp, TRUNC_TMP);
1543 }
1544 if (ctx.saturate || config->lower_TXP) {
1545 int n = 0;
1546
1547 if (ctx.saturate) {
1548 n = info->opcode_count[TGSI_OPCODE_TEX] +
1549 info->opcode_count[TGSI_OPCODE_TXP] +
1550 info->opcode_count[TGSI_OPCODE_TXB] +
1551 info->opcode_count[TGSI_OPCODE_TXB2] +
1552 info->opcode_count[TGSI_OPCODE_TXL];
1553 } else if (config->lower_TXP) {
1554 n = info->opcode_count[TGSI_OPCODE_TXP];
1555 }
1556
1557 newlen += SAMP_GROW * n;
1558 numtmp = MAX2(numtmp, SAMP_TMP);
1559 }
1560
1561 /* specifically don't include two_side_colors temps in the count: */
1562 ctx.numtmp = numtmp;
1563
1564 if (ctx.two_side_colors) {
1565 newlen += TWOSIDE_GROW(ctx.two_side_colors);
1566 /* note: we permanently consume temp regs, re-writing references
1567 * to IN.COLOR[n] to TEMP[m] (holding the output of of the CMP
1568 * instruction that selects which varying to use):
1569 */
1570 numtmp += ctx.two_side_colors;
1571 }
1572
1573 newlen += 2 * numtmp;
1574 newlen += 5; /* immediate */
1575
1576 newtoks = tgsi_transform_shader(tokens, newlen, &ctx.base);
1577 if (!newtoks)
1578 return NULL;
1579
1580 tgsi_scan_shader(newtoks, info);
1581
1582 #if 0 /* debug */
1583 _debug_printf("AFTER:");
1584 tgsi_dump(newtoks, 0);
1585 #endif
1586
1587 return newtoks;
1588 }
1589