xref: /aosp_15_r20/external/mesa3d/src/gallium/auxiliary/tgsi/tgsi_lowering.c (revision 6104692788411f58d303aa86923a9ff6ecaded22)
1 /*
2  * Copyright (C) 2014 Rob Clark <[email protected]>
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21  * SOFTWARE.
22  *
23  * Authors:
24  *    Rob Clark <[email protected]>
25  */
26 
27 #include "tgsi/tgsi_transform.h"
28 #include "tgsi/tgsi_scan.h"
29 #include "tgsi/tgsi_dump.h"
30 
31 #include "util/compiler.h"
32 #include "util/u_debug.h"
33 #include "util/u_math.h"
34 
35 #include "tgsi_lowering.h"
36 
37 struct tgsi_lowering_context {
38    struct tgsi_transform_context base;
39    const struct tgsi_lowering_config *config;
40    struct tgsi_shader_info *info;
41    unsigned two_side_colors;
42    unsigned two_side_idx[PIPE_MAX_SHADER_INPUTS];
43    unsigned color_base;  /* base register for chosen COLOR/BCOLOR's */
44    int face_idx;
45    unsigned numtmp;
46    struct {
47       struct tgsi_full_src_register src;
48       struct tgsi_full_dst_register dst;
49    } tmp[2];
50 #define A 0
51 #define B 1
52    struct tgsi_full_src_register imm;
53    int emitted_decls;
54    unsigned saturate;
55 };
56 
57 static inline struct tgsi_lowering_context *
tgsi_lowering_context(struct tgsi_transform_context * tctx)58 tgsi_lowering_context(struct tgsi_transform_context *tctx)
59 {
60    return (struct tgsi_lowering_context *)tctx;
61 }
62 
63 /*
64  * Utility helpers:
65  */
66 
67 static void
reg_dst(struct tgsi_full_dst_register * dst,const struct tgsi_full_dst_register * orig_dst,unsigned wrmask)68 reg_dst(struct tgsi_full_dst_register *dst,
69 	const struct tgsi_full_dst_register *orig_dst, unsigned wrmask)
70 {
71    *dst = *orig_dst;
72    dst->Register.WriteMask &= wrmask;
73    assert(dst->Register.WriteMask);
74 }
75 
76 static inline void
get_swiz(unsigned * swiz,const struct tgsi_src_register * src)77 get_swiz(unsigned *swiz, const struct tgsi_src_register *src)
78 {
79    swiz[0] = src->SwizzleX;
80    swiz[1] = src->SwizzleY;
81    swiz[2] = src->SwizzleZ;
82    swiz[3] = src->SwizzleW;
83 }
84 
85 static void
reg_src(struct tgsi_full_src_register * src,const struct tgsi_full_src_register * orig_src,unsigned sx,unsigned sy,unsigned sz,unsigned sw)86 reg_src(struct tgsi_full_src_register *src,
87 	const struct tgsi_full_src_register *orig_src,
88 	unsigned sx, unsigned sy, unsigned sz, unsigned sw)
89 {
90    unsigned swiz[4];
91    get_swiz(swiz, &orig_src->Register);
92    *src = *orig_src;
93    src->Register.SwizzleX = swiz[sx];
94    src->Register.SwizzleY = swiz[sy];
95    src->Register.SwizzleZ = swiz[sz];
96    src->Register.SwizzleW = swiz[sw];
97 }
98 
99 #define TGSI_SWIZZLE__ TGSI_SWIZZLE_X  /* don't-care value! */
100 #define SWIZ(x,y,z,w) TGSI_SWIZZLE_ ## x, TGSI_SWIZZLE_ ## y,   \
101       TGSI_SWIZZLE_ ## z, TGSI_SWIZZLE_ ## w
102 
103 /*
104  * if (dst.x aliases src.x) {
105  *   MOV tmpA.x, src.x
106  *   src = tmpA
107  * }
108  * COS dst.x, src.x
109  * SIN dst.y, src.x
110  * MOV dst.zw, imm{0.0, 1.0}
111  */
112 static bool
aliases(const struct tgsi_full_dst_register * dst,unsigned dst_mask,const struct tgsi_full_src_register * src,unsigned src_mask)113 aliases(const struct tgsi_full_dst_register *dst, unsigned dst_mask,
114 	const struct tgsi_full_src_register *src, unsigned src_mask)
115 {
116    if ((dst->Register.File == src->Register.File) &&
117        (dst->Register.Index == src->Register.Index)) {
118       unsigned i, actual_mask = 0;
119       unsigned swiz[4];
120       get_swiz(swiz, &src->Register);
121       for (i = 0; i < 4; i++)
122          if (src_mask & (1 << i))
123             actual_mask |= (1 << swiz[i]);
124       if (actual_mask & dst_mask)
125          return true;
126    }
127    return false;
128 }
129 
130 static void
create_mov(struct tgsi_transform_context * tctx,const struct tgsi_full_dst_register * dst,const struct tgsi_full_src_register * src,unsigned mask,unsigned saturate)131 create_mov(struct tgsi_transform_context *tctx,
132            const struct tgsi_full_dst_register *dst,
133            const struct tgsi_full_src_register *src,
134            unsigned mask, unsigned saturate)
135 {
136    struct tgsi_full_instruction new_inst;
137 
138    new_inst = tgsi_default_full_instruction();
139    new_inst.Instruction.Opcode = TGSI_OPCODE_MOV;
140    new_inst.Instruction.Saturate = saturate;
141    new_inst.Instruction.NumDstRegs = 1;
142    reg_dst(&new_inst.Dst[0], dst, mask);
143    new_inst.Instruction.NumSrcRegs = 1;
144    reg_src(&new_inst.Src[0], src, SWIZ(X, Y, Z, W));
145    tctx->emit_instruction(tctx, &new_inst);
146 }
147 
148 /* to help calculate # of tgsi tokens for a lowering.. we assume
149  * the worst case, ie. removed instructions don't have ADDR[] or
150  * anything which increases the # of tokens per src/dst and the
151  * inserted instructions do.
152  *
153  * OINST() - old instruction
154  *    1         : instruction itself
155  *    1         : dst
156  *    1 * nargs : srcN
157  *
158  * NINST() - new instruction
159  *    1         : instruction itself
160  *    2         : dst
161  *    2 * nargs : srcN
162  */
163 
164 #define OINST(nargs)  (1 + 1 + 1 * (nargs))
165 #define NINST(nargs)  (1 + 2 + 2 * (nargs))
166 
167 /*
168  * Lowering Translators:
169  */
170 
171 /* DST - Distance Vector
172  *   dst.x = 1.0
173  *   dst.y = src0.y \times src1.y
174  *   dst.z = src0.z
175  *   dst.w = src1.w
176  *
177  * ; note: could be more clever and use just a single temp
178  * ;       if I was clever enough to re-write the swizzles.
179  * ; needs: 2 tmp, imm{1.0}
180  * if (dst.y aliases src0.z) {
181  *   MOV tmpA.yz, src0.yz
182  *   src0 = tmpA
183  * }
184  * if (dst.yz aliases src1.w) {
185  *   MOV tmpB.yw, src1.yw
186  *   src1 = tmpB
187  * }
188  * MUL dst.y, src0.y, src1.y
189  * MOV dst.z, src0.z
190  * MOV dst.w, src1.w
191  * MOV dst.x, imm{1.0}
192  */
193 #define DST_GROW (NINST(1) + NINST(1) + NINST(2) + NINST(1) + \
194 		NINST(1) + NINST(1) - OINST(2))
195 #define DST_TMP  2
196 static void
transform_dst(struct tgsi_transform_context * tctx,struct tgsi_full_instruction * inst)197 transform_dst(struct tgsi_transform_context *tctx,
198               struct tgsi_full_instruction *inst)
199 {
200    struct tgsi_lowering_context *ctx = tgsi_lowering_context(tctx);
201    struct tgsi_full_dst_register *dst  = &inst->Dst[0];
202    struct tgsi_full_src_register *src0 = &inst->Src[0];
203    struct tgsi_full_src_register *src1 = &inst->Src[1];
204    struct tgsi_full_instruction new_inst;
205 
206    if (aliases(dst, TGSI_WRITEMASK_Y, src0, TGSI_WRITEMASK_Z)) {
207       create_mov(tctx, &ctx->tmp[A].dst, src0, TGSI_WRITEMASK_YZ, 0);
208       src0 = &ctx->tmp[A].src;
209    }
210 
211    if (aliases(dst, TGSI_WRITEMASK_YZ, src1, TGSI_WRITEMASK_W)) {
212       create_mov(tctx, &ctx->tmp[B].dst, src1, TGSI_WRITEMASK_YW, 0);
213       src1 = &ctx->tmp[B].src;
214    }
215 
216    if (dst->Register.WriteMask & TGSI_WRITEMASK_Y) {
217       /* MUL dst.y, src0.y, src1.y */
218       new_inst = tgsi_default_full_instruction();
219       new_inst.Instruction.Opcode = TGSI_OPCODE_MUL;
220       new_inst.Instruction.NumDstRegs = 1;
221       reg_dst(&new_inst.Dst[0], dst, TGSI_WRITEMASK_Y);
222       new_inst.Instruction.NumSrcRegs = 2;
223       reg_src(&new_inst.Src[0], src0, SWIZ(_, Y, _, _));
224       reg_src(&new_inst.Src[1], src1, SWIZ(_, Y, _, _));
225       tctx->emit_instruction(tctx, &new_inst);
226    }
227 
228    if (dst->Register.WriteMask & TGSI_WRITEMASK_Z) {
229       /* MOV dst.z, src0.z */
230       new_inst = tgsi_default_full_instruction();
231       new_inst.Instruction.Opcode = TGSI_OPCODE_MOV;
232       new_inst.Instruction.NumDstRegs = 1;
233       reg_dst(&new_inst.Dst[0], dst, TGSI_WRITEMASK_Z);
234       new_inst.Instruction.NumSrcRegs = 1;
235       reg_src(&new_inst.Src[0], src0, SWIZ(_, _, Z, _));
236       tctx->emit_instruction(tctx, &new_inst);
237    }
238 
239    if (dst->Register.WriteMask & TGSI_WRITEMASK_W) {
240       /* MOV dst.w, src1.w */
241       new_inst = tgsi_default_full_instruction();
242       new_inst.Instruction.Opcode = TGSI_OPCODE_MOV;
243       new_inst.Instruction.NumDstRegs = 1;
244       reg_dst(&new_inst.Dst[0], dst, TGSI_WRITEMASK_W);
245       new_inst.Instruction.NumSrcRegs = 1;
246       reg_src(&new_inst.Src[0], src1, SWIZ(_, _, _, W));
247       tctx->emit_instruction(tctx, &new_inst);
248    }
249 
250    if (dst->Register.WriteMask & TGSI_WRITEMASK_X) {
251       /* MOV dst.x, imm{1.0} */
252       new_inst = tgsi_default_full_instruction();
253       new_inst.Instruction.Opcode = TGSI_OPCODE_MOV;
254       new_inst.Instruction.NumDstRegs = 1;
255       reg_dst(&new_inst.Dst[0], dst, TGSI_WRITEMASK_X);
256       new_inst.Instruction.NumSrcRegs = 1;
257       reg_src(&new_inst.Src[0], &ctx->imm, SWIZ(Y, _, _, _));
258       tctx->emit_instruction(tctx, &new_inst);
259    }
260 }
261 
262 /* LRP - Linear Interpolate
263  *  dst.x = src0.x \times src1.x + (1.0 - src0.x) \times src2.x
264  *  dst.y = src0.y \times src1.y + (1.0 - src0.y) \times src2.y
265  *  dst.z = src0.z \times src1.z + (1.0 - src0.z) \times src2.z
266  *  dst.w = src0.w \times src1.w + (1.0 - src0.w) \times src2.w
267  *
268  * This becomes: src0 \times src1 + src2 - src0 \times src2, which
269  * can then become: src0 \times src1 - (src0 \times src2 - src2)
270  *
271  * ; needs: 1 tmp
272  * MAD tmpA, src0, src2, -src2
273  * MAD dst, src0, src1, -tmpA
274  */
275 #define LRP_GROW (NINST(3) + NINST(3) - OINST(3))
276 #define LRP_TMP  1
277 static void
transform_lrp(struct tgsi_transform_context * tctx,struct tgsi_full_instruction * inst)278 transform_lrp(struct tgsi_transform_context *tctx,
279               struct tgsi_full_instruction *inst)
280 {
281    struct tgsi_lowering_context *ctx = tgsi_lowering_context(tctx);
282    struct tgsi_full_dst_register *dst  = &inst->Dst[0];
283    struct tgsi_full_src_register *src0 = &inst->Src[0];
284    struct tgsi_full_src_register *src1 = &inst->Src[1];
285    struct tgsi_full_src_register *src2 = &inst->Src[2];
286    struct tgsi_full_instruction new_inst;
287 
288    if (dst->Register.WriteMask & TGSI_WRITEMASK_XYZW) {
289       /* MAD tmpA, src0, src2, -src2 */
290       new_inst = tgsi_default_full_instruction();
291       new_inst.Instruction.Opcode = TGSI_OPCODE_MAD;
292       new_inst.Instruction.NumDstRegs = 1;
293       reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_XYZW);
294       new_inst.Instruction.NumSrcRegs = 3;
295       reg_src(&new_inst.Src[0], src0, SWIZ(X, Y, Z, W));
296       reg_src(&new_inst.Src[1], src2, SWIZ(X, Y, Z, W));
297       reg_src(&new_inst.Src[2], src2, SWIZ(X, Y, Z, W));
298       new_inst.Src[2].Register.Negate = !new_inst.Src[2].Register.Negate;
299       tctx->emit_instruction(tctx, &new_inst);
300 
301       /* MAD dst, src0, src1, -tmpA */
302       new_inst = tgsi_default_full_instruction();
303       new_inst.Instruction.Opcode = TGSI_OPCODE_MAD;
304       new_inst.Instruction.NumDstRegs = 1;
305       reg_dst(&new_inst.Dst[0], dst, TGSI_WRITEMASK_XYZW);
306       new_inst.Instruction.NumSrcRegs = 3;
307       reg_src(&new_inst.Src[0], src0, SWIZ(X, Y, Z, W));
308       reg_src(&new_inst.Src[1], src1, SWIZ(X, Y, Z, W));
309       reg_src(&new_inst.Src[2], &ctx->tmp[A].src, SWIZ(X, Y, Z, W));
310       new_inst.Src[2].Register.Negate = true;
311       tctx->emit_instruction(tctx, &new_inst);
312    }
313 }
314 
315 /* FRC - Fraction
316  *  dst.x = src.x - \lfloor src.x\rfloor
317  *  dst.y = src.y - \lfloor src.y\rfloor
318  *  dst.z = src.z - \lfloor src.z\rfloor
319  *  dst.w = src.w - \lfloor src.w\rfloor
320  *
321  * ; needs: 1 tmp
322  * FLR tmpA, src
323  * SUB dst, src, tmpA
324  */
325 #define FRC_GROW (NINST(1) + NINST(2) - OINST(1))
326 #define FRC_TMP  1
327 static void
transform_frc(struct tgsi_transform_context * tctx,struct tgsi_full_instruction * inst)328 transform_frc(struct tgsi_transform_context *tctx,
329               struct tgsi_full_instruction *inst)
330 {
331    struct tgsi_lowering_context *ctx = tgsi_lowering_context(tctx);
332    struct tgsi_full_dst_register *dst = &inst->Dst[0];
333    struct tgsi_full_src_register *src = &inst->Src[0];
334    struct tgsi_full_instruction new_inst;
335 
336    if (dst->Register.WriteMask & TGSI_WRITEMASK_XYZW) {
337       /* FLR tmpA, src */
338       new_inst = tgsi_default_full_instruction();
339       new_inst.Instruction.Opcode = TGSI_OPCODE_FLR;
340       new_inst.Instruction.NumDstRegs = 1;
341       reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_XYZW);
342       new_inst.Instruction.NumSrcRegs = 1;
343       reg_src(&new_inst.Src[0], src, SWIZ(X, Y, Z, W));
344       tctx->emit_instruction(tctx, &new_inst);
345 
346       /* SUB dst, src, tmpA */
347       new_inst = tgsi_default_full_instruction();
348       new_inst.Instruction.Opcode = TGSI_OPCODE_ADD;
349       new_inst.Instruction.NumDstRegs = 1;
350       reg_dst(&new_inst.Dst[0], dst, TGSI_WRITEMASK_XYZW);
351       new_inst.Instruction.NumSrcRegs = 2;
352       reg_src(&new_inst.Src[0], src, SWIZ(X, Y, Z, W));
353       reg_src(&new_inst.Src[1], &ctx->tmp[A].src, SWIZ(X, Y, Z, W));
354       new_inst.Src[1].Register.Negate = 1;
355       tctx->emit_instruction(tctx, &new_inst);
356    }
357 }
358 
359 /* POW - Power
360  *  dst.x = src0.x^{src1.x}
361  *  dst.y = src0.x^{src1.x}
362  *  dst.z = src0.x^{src1.x}
363  *  dst.w = src0.x^{src1.x}
364  *
365  * ; needs: 1 tmp
366  * LG2 tmpA.x, src0.x
367  * MUL tmpA.x, src1.x, tmpA.x
368  * EX2 dst, tmpA.x
369  */
370 #define POW_GROW (NINST(1) + NINST(2) + NINST(1) - OINST(2))
371 #define POW_TMP  1
372 static void
transform_pow(struct tgsi_transform_context * tctx,struct tgsi_full_instruction * inst)373 transform_pow(struct tgsi_transform_context *tctx,
374               struct tgsi_full_instruction *inst)
375 {
376    struct tgsi_lowering_context *ctx = tgsi_lowering_context(tctx);
377    struct tgsi_full_dst_register *dst  = &inst->Dst[0];
378    struct tgsi_full_src_register *src0 = &inst->Src[0];
379    struct tgsi_full_src_register *src1 = &inst->Src[1];
380    struct tgsi_full_instruction new_inst;
381 
382    if (dst->Register.WriteMask & TGSI_WRITEMASK_XYZW) {
383       /* LG2 tmpA.x, src0.x */
384       new_inst = tgsi_default_full_instruction();
385       new_inst.Instruction.Opcode = TGSI_OPCODE_LG2;
386       new_inst.Instruction.NumDstRegs = 1;
387       reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_X);
388       new_inst.Instruction.NumSrcRegs = 1;
389       reg_src(&new_inst.Src[0], src0, SWIZ(X, _, _, _));
390       tctx->emit_instruction(tctx, &new_inst);
391 
392       /* MUL tmpA.x, src1.x, tmpA.x */
393       new_inst = tgsi_default_full_instruction();
394       new_inst.Instruction.Opcode = TGSI_OPCODE_MUL;
395       new_inst.Instruction.NumDstRegs = 1;
396       reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_X);
397       new_inst.Instruction.NumSrcRegs = 2;
398       reg_src(&new_inst.Src[0], src1, SWIZ(X, _, _, _));
399       reg_src(&new_inst.Src[1], &ctx->tmp[A].src, SWIZ(X, _, _, _));
400       tctx->emit_instruction(tctx, &new_inst);
401 
402       /* EX2 dst, tmpA.x */
403       new_inst = tgsi_default_full_instruction();
404       new_inst.Instruction.Opcode = TGSI_OPCODE_EX2;
405       new_inst.Instruction.NumDstRegs = 1;
406       reg_dst(&new_inst.Dst[0], dst, TGSI_WRITEMASK_XYZW);
407       new_inst.Instruction.NumSrcRegs = 1;
408       reg_src(&new_inst.Src[0], &ctx->tmp[A].src, SWIZ(X, _, _, _));
409       tctx->emit_instruction(tctx, &new_inst);
410    }
411 }
412 
413 /* LIT - Light Coefficients
414  *  dst.x = 1.0
415  *  dst.y = max(src.x, 0.0)
416  *  dst.z = (src.x > 0.0) ? max(src.y, 0.0)^{clamp(src.w, -128.0, 128.0))} : 0
417  *  dst.w = 1.0
418  *
419  * ; needs: 1 tmp, imm{0.0}, imm{1.0}, imm{128.0}
420  * MAX tmpA.xy, src.xy, imm{0.0}
421  * CLAMP tmpA.z, src.w, -imm{128.0}, imm{128.0}
422  * LG2 tmpA.y, tmpA.y
423  * MUL tmpA.y, tmpA.z, tmpA.y
424  * EX2 tmpA.y, tmpA.y
425  * CMP tmpA.y, -src.x, tmpA.y, imm{0.0}
426  * MOV dst.yz, tmpA.xy
427  * MOV dst.xw, imm{1.0}
428  */
429 #define LIT_GROW (NINST(1) + NINST(3) + NINST(1) + NINST(2) + \
430 		NINST(1) + NINST(3) + NINST(1) + NINST(1) - OINST(1))
431 #define LIT_TMP  1
432 static void
transform_lit(struct tgsi_transform_context * tctx,struct tgsi_full_instruction * inst)433 transform_lit(struct tgsi_transform_context *tctx,
434               struct tgsi_full_instruction *inst)
435 {
436    struct tgsi_lowering_context *ctx = tgsi_lowering_context(tctx);
437    struct tgsi_full_dst_register *dst = &inst->Dst[0];
438    struct tgsi_full_src_register *src = &inst->Src[0];
439    struct tgsi_full_instruction new_inst;
440 
441    if (dst->Register.WriteMask & TGSI_WRITEMASK_YZ) {
442       /* MAX tmpA.xy, src.xy, imm{0.0} */
443       new_inst = tgsi_default_full_instruction();
444       new_inst.Instruction.Opcode = TGSI_OPCODE_MAX;
445       new_inst.Instruction.NumDstRegs = 1;
446       reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_XY);
447       new_inst.Instruction.NumSrcRegs = 2;
448       reg_src(&new_inst.Src[0], src, SWIZ(X, Y, _, _));
449       reg_src(&new_inst.Src[1], &ctx->imm, SWIZ(X, X, _, _));
450       tctx->emit_instruction(tctx, &new_inst);
451 
452       /* MIN tmpA.z, src.w, imm{128.0} */
453       new_inst = tgsi_default_full_instruction();
454       new_inst.Instruction.Opcode = TGSI_OPCODE_MIN;
455       new_inst.Instruction.NumDstRegs = 1;
456       reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_Z);
457       new_inst.Instruction.NumSrcRegs = 2;
458       reg_src(&new_inst.Src[0], src, SWIZ(_, _, W, _));
459       reg_src(&new_inst.Src[1], &ctx->imm, SWIZ(_, _, Z, _));
460       tctx->emit_instruction(tctx, &new_inst);
461 
462       /* MAX tmpA.z, tmpA.z, -imm{128.0} */
463       new_inst = tgsi_default_full_instruction();
464       new_inst.Instruction.Opcode = TGSI_OPCODE_MAX;
465       new_inst.Instruction.NumDstRegs = 1;
466       reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_Z);
467       new_inst.Instruction.NumSrcRegs = 2;
468       reg_src(&new_inst.Src[0], &ctx->tmp[A].src, SWIZ(_, _, Z, _));
469       reg_src(&new_inst.Src[1], &ctx->imm, SWIZ(_, _, Z, _));
470       new_inst.Src[1].Register.Negate = true;
471       tctx->emit_instruction(tctx, &new_inst);
472 
473       /* LG2 tmpA.y, tmpA.y */
474       new_inst = tgsi_default_full_instruction();
475       new_inst.Instruction.Opcode = TGSI_OPCODE_LG2;
476       new_inst.Instruction.NumDstRegs = 1;
477       reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_Y);
478       new_inst.Instruction.NumSrcRegs = 1;
479       reg_src(&new_inst.Src[0], &ctx->tmp[A].src, SWIZ(Y, _, _, _));
480       tctx->emit_instruction(tctx, &new_inst);
481 
482       /* MUL tmpA.y, tmpA.z, tmpA.y */
483       new_inst = tgsi_default_full_instruction();
484       new_inst.Instruction.Opcode = TGSI_OPCODE_MUL;
485       new_inst.Instruction.NumDstRegs = 1;
486       reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_Y);
487       new_inst.Instruction.NumSrcRegs = 2;
488       reg_src(&new_inst.Src[0], &ctx->tmp[A].src, SWIZ(_, Z, _, _));
489       reg_src(&new_inst.Src[1], &ctx->tmp[A].src, SWIZ(_, Y, _, _));
490       tctx->emit_instruction(tctx, &new_inst);
491 
492       /* EX2 tmpA.y, tmpA.y */
493       new_inst = tgsi_default_full_instruction();
494       new_inst.Instruction.Opcode = TGSI_OPCODE_EX2;
495       new_inst.Instruction.NumDstRegs = 1;
496       reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_Y);
497       new_inst.Instruction.NumSrcRegs = 1;
498       reg_src(&new_inst.Src[0], &ctx->tmp[A].src, SWIZ(Y, _, _, _));
499       tctx->emit_instruction(tctx, &new_inst);
500 
501       /* CMP tmpA.y, -src.x, tmpA.y, imm{0.0} */
502       new_inst = tgsi_default_full_instruction();
503       new_inst.Instruction.Opcode = TGSI_OPCODE_CMP;
504       new_inst.Instruction.NumDstRegs = 1;
505       reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_Y);
506       new_inst.Instruction.NumSrcRegs = 3;
507       reg_src(&new_inst.Src[0], src, SWIZ(_, X, _, _));
508       new_inst.Src[0].Register.Negate = true;
509       reg_src(&new_inst.Src[1], &ctx->tmp[A].src, SWIZ(_, Y, _, _));
510       reg_src(&new_inst.Src[2], &ctx->imm, SWIZ(_, X, _, _));
511       tctx->emit_instruction(tctx, &new_inst);
512 
513       /* MOV dst.yz, tmpA.xy */
514       new_inst = tgsi_default_full_instruction();
515       new_inst.Instruction.Opcode = TGSI_OPCODE_MOV;
516       new_inst.Instruction.NumDstRegs = 1;
517       reg_dst(&new_inst.Dst[0], dst, TGSI_WRITEMASK_YZ);
518       new_inst.Instruction.NumSrcRegs = 1;
519       reg_src(&new_inst.Src[0], &ctx->tmp[A].src, SWIZ(_, X, Y, _));
520       tctx->emit_instruction(tctx, &new_inst);
521    }
522 
523    if (dst->Register.WriteMask & TGSI_WRITEMASK_XW) {
524       /* MOV dst.xw, imm{1.0} */
525       new_inst = tgsi_default_full_instruction();
526       new_inst.Instruction.Opcode = TGSI_OPCODE_MOV;
527       new_inst.Instruction.NumDstRegs = 1;
528       reg_dst(&new_inst.Dst[0], dst, TGSI_WRITEMASK_XW);
529       new_inst.Instruction.NumSrcRegs = 1;
530       reg_src(&new_inst.Src[0], &ctx->imm, SWIZ(Y, _, _, Y));
531       tctx->emit_instruction(tctx, &new_inst);
532    }
533 }
534 
535 /* EXP - Approximate Exponential Base 2
536  *  dst.x = 2^{\lfloor src.x\rfloor}
537  *  dst.y = src.x - \lfloor src.x\rfloor
538  *  dst.z = 2^{src.x}
539  *  dst.w = 1.0
540  *
541  * ; needs: 1 tmp, imm{1.0}
542  * if (lowering FLR) {
543  *   FRC tmpA.x, src.x
544  *   SUB tmpA.x, src.x, tmpA.x
545  * } else {
546  *   FLR tmpA.x, src.x
547  * }
548  * EX2 tmpA.y, src.x
549  * SUB dst.y, src.x, tmpA.x
550  * EX2 dst.x, tmpA.x
551  * MOV dst.z, tmpA.y
552  * MOV dst.w, imm{1.0}
553  */
554 #define EXP_GROW (NINST(1) + NINST(2) + NINST(1) + NINST(2) + NINST(1) + \
555 		NINST(1)+ NINST(1) - OINST(1))
556 #define EXP_TMP  1
557 static void
transform_exp(struct tgsi_transform_context * tctx,struct tgsi_full_instruction * inst)558 transform_exp(struct tgsi_transform_context *tctx,
559               struct tgsi_full_instruction *inst)
560 {
561    struct tgsi_lowering_context *ctx = tgsi_lowering_context(tctx);
562    struct tgsi_full_dst_register *dst = &inst->Dst[0];
563    struct tgsi_full_src_register *src = &inst->Src[0];
564    struct tgsi_full_instruction new_inst;
565 
566    if (dst->Register.WriteMask & TGSI_WRITEMASK_XY) {
567       if (ctx->config->lower_FLR) {
568          /* FRC tmpA.x, src.x */
569          new_inst = tgsi_default_full_instruction();
570          new_inst.Instruction.Opcode = TGSI_OPCODE_FRC;
571          new_inst.Instruction.NumDstRegs = 1;
572          reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_X);
573          new_inst.Instruction.NumSrcRegs = 1;
574          reg_src(&new_inst.Src[0], src, SWIZ(X, _, _, _));
575          tctx->emit_instruction(tctx, &new_inst);
576 
577          /* SUB tmpA.x, src.x, tmpA.x */
578          new_inst = tgsi_default_full_instruction();
579          new_inst.Instruction.Opcode = TGSI_OPCODE_ADD;
580          new_inst.Instruction.NumDstRegs = 1;
581          reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_X);
582          new_inst.Instruction.NumSrcRegs = 2;
583          reg_src(&new_inst.Src[0], src, SWIZ(X, _, _, _));
584          reg_src(&new_inst.Src[1], &ctx->tmp[A].src, SWIZ(X, _, _, _));
585          new_inst.Src[1].Register.Negate = 1;
586          tctx->emit_instruction(tctx, &new_inst);
587      } else {
588          /* FLR tmpA.x, src.x */
589          new_inst = tgsi_default_full_instruction();
590          new_inst.Instruction.Opcode = TGSI_OPCODE_FLR;
591          new_inst.Instruction.NumDstRegs = 1;
592          reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_X);
593          new_inst.Instruction.NumSrcRegs = 1;
594          reg_src(&new_inst.Src[0], src, SWIZ(X, _, _, _));
595          tctx->emit_instruction(tctx, &new_inst);
596       }
597    }
598 
599    if (dst->Register.WriteMask & TGSI_WRITEMASK_Z) {
600       /* EX2 tmpA.y, src.x */
601       new_inst = tgsi_default_full_instruction();
602       new_inst.Instruction.Opcode = TGSI_OPCODE_EX2;
603       new_inst.Instruction.NumDstRegs = 1;
604       reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_Y);
605       new_inst.Instruction.NumSrcRegs = 1;
606       reg_src(&new_inst.Src[0], src, SWIZ(X, _, _, _));
607       tctx->emit_instruction(tctx, &new_inst);
608    }
609 
610    if (dst->Register.WriteMask & TGSI_WRITEMASK_Y) {
611       /* SUB dst.y, src.x, tmpA.x */
612       new_inst = tgsi_default_full_instruction();
613       new_inst.Instruction.Opcode = TGSI_OPCODE_ADD;
614       new_inst.Instruction.NumDstRegs = 1;
615       reg_dst(&new_inst.Dst[0], dst, TGSI_WRITEMASK_Y);
616       new_inst.Instruction.NumSrcRegs = 2;
617       reg_src(&new_inst.Src[0], src, SWIZ(_, X, _, _));
618       reg_src(&new_inst.Src[1], &ctx->tmp[A].src, SWIZ(_, X, _, _));
619       new_inst.Src[1].Register.Negate = 1;
620       tctx->emit_instruction(tctx, &new_inst);
621    }
622 
623    if (dst->Register.WriteMask & TGSI_WRITEMASK_X) {
624       /* EX2 dst.x, tmpA.x */
625       new_inst = tgsi_default_full_instruction();
626       new_inst.Instruction.Opcode = TGSI_OPCODE_EX2;
627       new_inst.Instruction.NumDstRegs = 1;
628       reg_dst(&new_inst.Dst[0], dst, TGSI_WRITEMASK_X);
629       new_inst.Instruction.NumSrcRegs = 1;
630       reg_src(&new_inst.Src[0], &ctx->tmp[A].src, SWIZ(X, _, _, _));
631       tctx->emit_instruction(tctx, &new_inst);
632    }
633 
634    if (dst->Register.WriteMask & TGSI_WRITEMASK_Z) {
635       /* MOV dst.z, tmpA.y */
636       new_inst = tgsi_default_full_instruction();
637       new_inst.Instruction.Opcode = TGSI_OPCODE_MOV;
638       new_inst.Instruction.NumDstRegs = 1;
639       reg_dst(&new_inst.Dst[0], dst, TGSI_WRITEMASK_Z);
640       new_inst.Instruction.NumSrcRegs = 1;
641       reg_src(&new_inst.Src[0], &ctx->tmp[A].src, SWIZ(_, _, Y, _));
642       tctx->emit_instruction(tctx, &new_inst);
643    }
644 
645    if (dst->Register.WriteMask & TGSI_WRITEMASK_W) {
646       /* MOV dst.w, imm{1.0} */
647       new_inst = tgsi_default_full_instruction();
648       new_inst.Instruction.Opcode = TGSI_OPCODE_MOV;
649       new_inst.Instruction.NumDstRegs = 1;
650       reg_dst(&new_inst.Dst[0], dst, TGSI_WRITEMASK_W);
651       new_inst.Instruction.NumSrcRegs = 1;
652       reg_src(&new_inst.Src[0], &ctx->imm, SWIZ(_, _, _, Y));
653       tctx->emit_instruction(tctx, &new_inst);
654    }
655 }
656 
657 /* LOG - Approximate Logarithm Base 2
658  *  dst.x = \lfloor\log_2{|src.x|}\rfloor
659  *  dst.y = \frac{|src.x|}{2^{\lfloor\log_2{|src.x|}\rfloor}}
660  *  dst.z = \log_2{|src.x|}
661  *  dst.w = 1.0
662  *
663  * ; needs: 1 tmp, imm{1.0}
664  * LG2 tmpA.x, |src.x|
665  * if (lowering FLR) {
666  *   FRC tmpA.y, tmpA.x
667  *   SUB tmpA.y, tmpA.x, tmpA.y
668  * } else {
669  *   FLR tmpA.y, tmpA.x
670  * }
671  * EX2 tmpA.z, tmpA.y
672  * RCP tmpA.z, tmpA.z
673  * MUL dst.y, |src.x|, tmpA.z
674  * MOV dst.xz, tmpA.yx
675  * MOV dst.w, imm{1.0}
676  */
677 #define LOG_GROW (NINST(1) + NINST(1) + NINST(2) + NINST(1) + NINST(1) + \
678 		NINST(2) + NINST(1) + NINST(1) - OINST(1))
679 #define LOG_TMP  1
680 static void
transform_log(struct tgsi_transform_context * tctx,struct tgsi_full_instruction * inst)681 transform_log(struct tgsi_transform_context *tctx,
682               struct tgsi_full_instruction *inst)
683 {
684    struct tgsi_lowering_context *ctx = tgsi_lowering_context(tctx);
685    struct tgsi_full_dst_register *dst = &inst->Dst[0];
686    struct tgsi_full_src_register *src = &inst->Src[0];
687    struct tgsi_full_instruction new_inst;
688 
689    if (dst->Register.WriteMask & TGSI_WRITEMASK_XYZ) {
690       /* LG2 tmpA.x, |src.x| */
691       new_inst = tgsi_default_full_instruction();
692       new_inst.Instruction.Opcode = TGSI_OPCODE_LG2;
693       new_inst.Instruction.NumDstRegs = 1;
694       reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_X);
695       new_inst.Instruction.NumSrcRegs = 1;
696       reg_src(&new_inst.Src[0], src, SWIZ(X, _, _, _));
697       new_inst.Src[0].Register.Absolute = true;
698       tctx->emit_instruction(tctx, &new_inst);
699    }
700 
701    if (dst->Register.WriteMask & TGSI_WRITEMASK_XY) {
702       if (ctx->config->lower_FLR) {
703          /* FRC tmpA.y, tmpA.x */
704          new_inst = tgsi_default_full_instruction();
705          new_inst.Instruction.Opcode = TGSI_OPCODE_FRC;
706          new_inst.Instruction.NumDstRegs = 1;
707          reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_Y);
708          new_inst.Instruction.NumSrcRegs = 1;
709          reg_src(&new_inst.Src[0], &ctx->tmp[A].src, SWIZ(_, X, _, _));
710          tctx->emit_instruction(tctx, &new_inst);
711 
712          /* SUB tmpA.y, tmpA.x, tmpA.y */
713          new_inst = tgsi_default_full_instruction();
714          new_inst.Instruction.Opcode = TGSI_OPCODE_ADD;
715          new_inst.Instruction.NumDstRegs = 1;
716          reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_Y);
717          new_inst.Instruction.NumSrcRegs = 2;
718          reg_src(&new_inst.Src[0], &ctx->tmp[A].src, SWIZ(_, X, _, _));
719          reg_src(&new_inst.Src[1], &ctx->tmp[A].src, SWIZ(_, Y, _, _));
720          new_inst.Src[1].Register.Negate = 1;
721          tctx->emit_instruction(tctx, &new_inst);
722       } else {
723          /* FLR tmpA.y, tmpA.x */
724          new_inst = tgsi_default_full_instruction();
725          new_inst.Instruction.Opcode = TGSI_OPCODE_FLR;
726          new_inst.Instruction.NumDstRegs = 1;
727          reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_Y);
728          new_inst.Instruction.NumSrcRegs = 1;
729          reg_src(&new_inst.Src[0], &ctx->tmp[A].src, SWIZ(_, X, _, _));
730          tctx->emit_instruction(tctx, &new_inst);
731       }
732    }
733 
734    if (dst->Register.WriteMask & TGSI_WRITEMASK_Y) {
735       /* EX2 tmpA.z, tmpA.y */
736       new_inst = tgsi_default_full_instruction();
737       new_inst.Instruction.Opcode = TGSI_OPCODE_EX2;
738       new_inst.Instruction.NumDstRegs = 1;
739       reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_Z);
740       new_inst.Instruction.NumSrcRegs = 1;
741       reg_src(&new_inst.Src[0], &ctx->tmp[A].src, SWIZ(Y, _, _, _));
742       tctx->emit_instruction(tctx, &new_inst);
743 
744       /* RCP tmpA.z, tmpA.z */
745       new_inst = tgsi_default_full_instruction();
746       new_inst.Instruction.Opcode = TGSI_OPCODE_RCP;
747       new_inst.Instruction.NumDstRegs = 1;
748       reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_Z);
749       new_inst.Instruction.NumSrcRegs = 1;
750       reg_src(&new_inst.Src[0], &ctx->tmp[A].src, SWIZ(Z, _, _, _));
751       tctx->emit_instruction(tctx, &new_inst);
752 
753       /* MUL dst.y, |src.x|, tmpA.z */
754       new_inst = tgsi_default_full_instruction();
755       new_inst.Instruction.Opcode = TGSI_OPCODE_MUL;
756       new_inst.Instruction.NumDstRegs = 1;
757       reg_dst(&new_inst.Dst[0], dst, TGSI_WRITEMASK_Y);
758       new_inst.Instruction.NumSrcRegs = 2;
759       reg_src(&new_inst.Src[0], src, SWIZ(_, X, _, _));
760       new_inst.Src[0].Register.Absolute = true;
761       reg_src(&new_inst.Src[1], &ctx->tmp[A].src, SWIZ(_, Z, _, _));
762       tctx->emit_instruction(tctx, &new_inst);
763    }
764 
765    if (dst->Register.WriteMask & TGSI_WRITEMASK_XZ) {
766       /* MOV dst.xz, tmpA.yx */
767       new_inst = tgsi_default_full_instruction();
768       new_inst.Instruction.Opcode = TGSI_OPCODE_MOV;
769       new_inst.Instruction.NumDstRegs = 1;
770       reg_dst(&new_inst.Dst[0], dst, TGSI_WRITEMASK_XZ);
771       new_inst.Instruction.NumSrcRegs = 1;
772       reg_src(&new_inst.Src[0], &ctx->tmp[A].src, SWIZ(Y, _, X, _));
773       tctx->emit_instruction(tctx, &new_inst);
774    }
775 
776    if (dst->Register.WriteMask & TGSI_WRITEMASK_W) {
777       /* MOV dst.w, imm{1.0} */
778       new_inst = tgsi_default_full_instruction();
779       new_inst.Instruction.Opcode = TGSI_OPCODE_MOV;
780       new_inst.Instruction.NumDstRegs = 1;
781       reg_dst(&new_inst.Dst[0], dst, TGSI_WRITEMASK_W);
782       new_inst.Instruction.NumSrcRegs = 1;
783       reg_src(&new_inst.Src[0], &ctx->imm, SWIZ(_, _, _, Y));
784       tctx->emit_instruction(tctx, &new_inst);
785    }
786 }
787 
788 /* DP4 - 4-component Dot Product
789  *   dst = src0.x \times src1.x + src0.y \times src1.y + src0.z \times src1.z + src0.w \times src1.w
790  *
791  * DP3 - 3-component Dot Product
792  *   dst = src0.x \times src1.x + src0.y \times src1.y + src0.z \times src1.z
793  *
794  * DP2 - 2-component Dot Product
795  *   dst = src0.x \times src1.x + src0.y \times src1.y
796  *
797  * NOTE: these are translated into sequence of MUL/MAD(/ADD) scalar
798  * operations, which is what you'd prefer for a ISA that is natively
799  * scalar.  Probably a native vector ISA would at least already have
800  * DP4/DP3 instructions, but perhaps there is room for an alternative
801  * translation for DP2 using vector instructions.
802  *
803  * ; needs: 1 tmp
804  * MUL tmpA.x, src0.x, src1.x
805  * MAD tmpA.x, src0.y, src1.y, tmpA.x
806  * if (DP3 || DP4) {
807  *   MAD tmpA.x, src0.z, src1.z, tmpA.x
808  *   if (DP4) {
809  *     MAD tmpA.x, src0.w, src1.w, tmpA.x
810  *   }
811  * }
812  * ; fixup last instruction to replicate into dst
813  */
814 #define DP4_GROW  (NINST(2) + NINST(3) + NINST(3) + NINST(3) - OINST(2))
815 #define DP3_GROW  (NINST(2) + NINST(3) + NINST(3) - OINST(2))
816 #define DP2_GROW  (NINST(2) + NINST(3) - OINST(2))
817 #define DOTP_TMP  1
818 static void
transform_dotp(struct tgsi_transform_context * tctx,struct tgsi_full_instruction * inst)819 transform_dotp(struct tgsi_transform_context *tctx,
820                struct tgsi_full_instruction *inst)
821 {
822    struct tgsi_lowering_context *ctx = tgsi_lowering_context(tctx);
823    struct tgsi_full_dst_register *dst  = &inst->Dst[0];
824    struct tgsi_full_src_register *src0 = &inst->Src[0];
825    struct tgsi_full_src_register *src1 = &inst->Src[1];
826    struct tgsi_full_instruction new_inst;
827    enum tgsi_opcode opcode = inst->Instruction.Opcode;
828 
829    /* NOTE: any potential last instruction must replicate src on all
830     * components (since it could be re-written to write to final dst)
831     */
832 
833    if (dst->Register.WriteMask & TGSI_WRITEMASK_XYZW) {
834       /* MUL tmpA.x, src0.x, src1.x */
835       new_inst = tgsi_default_full_instruction();
836       new_inst.Instruction.Opcode = TGSI_OPCODE_MUL;
837       new_inst.Instruction.NumDstRegs = 1;
838       reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_X);
839       new_inst.Instruction.NumSrcRegs = 2;
840       reg_src(&new_inst.Src[0], src0, SWIZ(X, _, _, _));
841       reg_src(&new_inst.Src[1], src1, SWIZ(X, _, _, _));
842       tctx->emit_instruction(tctx, &new_inst);
843 
844       /* MAD tmpA.x, src0.y, src1.y, tmpA.x */
845       new_inst = tgsi_default_full_instruction();
846       new_inst.Instruction.Opcode = TGSI_OPCODE_MAD;
847       new_inst.Instruction.NumDstRegs = 1;
848       reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_X);
849       new_inst.Instruction.NumSrcRegs = 3;
850       reg_src(&new_inst.Src[0], src0, SWIZ(Y, Y, Y, Y));
851       reg_src(&new_inst.Src[1], src1, SWIZ(Y, Y, Y, Y));
852       reg_src(&new_inst.Src[2], &ctx->tmp[A].src, SWIZ(X, X, X, X));
853 
854       if ((opcode == TGSI_OPCODE_DP3) ||
855           (opcode == TGSI_OPCODE_DP4)) {
856          tctx->emit_instruction(tctx, &new_inst);
857 
858          /* MAD tmpA.x, src0.z, src1.z, tmpA.x */
859          new_inst = tgsi_default_full_instruction();
860          new_inst.Instruction.Opcode = TGSI_OPCODE_MAD;
861          new_inst.Instruction.NumDstRegs = 1;
862          reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_X);
863          new_inst.Instruction.NumSrcRegs = 3;
864          reg_src(&new_inst.Src[0], src0, SWIZ(Z, Z, Z, Z));
865          reg_src(&new_inst.Src[1], src1, SWIZ(Z, Z, Z, Z));
866          reg_src(&new_inst.Src[2], &ctx->tmp[A].src, SWIZ(X, X, X, X));
867 
868          if (opcode == TGSI_OPCODE_DP4) {
869             tctx->emit_instruction(tctx, &new_inst);
870 
871             /* MAD tmpA.x, src0.w, src1.w, tmpA.x */
872             new_inst = tgsi_default_full_instruction();
873             new_inst.Instruction.Opcode = TGSI_OPCODE_MAD;
874             new_inst.Instruction.NumDstRegs = 1;
875             reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_X);
876             new_inst.Instruction.NumSrcRegs = 3;
877             reg_src(&new_inst.Src[0], src0, SWIZ(W, W, W, W));
878             reg_src(&new_inst.Src[1], src1, SWIZ(W, W, W, W));
879             reg_src(&new_inst.Src[2], &ctx->tmp[A].src, SWIZ(X, X, X, X));
880          }
881       }
882 
883       /* fixup last instruction to write to dst: */
884       reg_dst(&new_inst.Dst[0], dst, TGSI_WRITEMASK_XYZW);
885 
886       tctx->emit_instruction(tctx, &new_inst);
887    }
888 }
889 
890 /* FLR - floor, CEIL - ceil
891  * ; needs: 1 tmp
892  * if (CEIL) {
893  *   FRC tmpA, -src
894  *   ADD dst, src, tmpA
895  * } else {
896  *   FRC tmpA, src
897  *   SUB dst, src, tmpA
898  * }
899  */
900 #define FLR_GROW (NINST(1) + NINST(2) - OINST(1))
901 #define CEIL_GROW (NINST(1) + NINST(2) - OINST(1))
902 #define FLR_TMP 1
903 #define CEIL_TMP 1
904 static void
transform_flr_ceil(struct tgsi_transform_context * tctx,struct tgsi_full_instruction * inst)905 transform_flr_ceil(struct tgsi_transform_context *tctx,
906                    struct tgsi_full_instruction *inst)
907 {
908    struct tgsi_lowering_context *ctx = tgsi_lowering_context(tctx);
909    struct tgsi_full_dst_register *dst  = &inst->Dst[0];
910    struct tgsi_full_src_register *src0 = &inst->Src[0];
911    struct tgsi_full_instruction new_inst;
912    enum tgsi_opcode opcode = inst->Instruction.Opcode;
913 
914    if (dst->Register.WriteMask & TGSI_WRITEMASK_XYZW) {
915       /* FLR: FRC tmpA, src  CEIL: FRC tmpA, -src */
916       new_inst = tgsi_default_full_instruction();
917       new_inst.Instruction.Opcode = TGSI_OPCODE_FRC;
918       new_inst.Instruction.NumDstRegs = 1;
919       reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_XYZW);
920       new_inst.Instruction.NumSrcRegs = 1;
921       reg_src(&new_inst.Src[0], src0, SWIZ(X, Y, Z, W));
922 
923       if (opcode == TGSI_OPCODE_CEIL)
924          new_inst.Src[0].Register.Negate = !new_inst.Src[0].Register.Negate;
925       tctx->emit_instruction(tctx, &new_inst);
926 
927       /* FLR: SUB dst, src, tmpA  CEIL: ADD dst, src, tmpA */
928       new_inst = tgsi_default_full_instruction();
929       new_inst.Instruction.Opcode = TGSI_OPCODE_ADD;
930       new_inst.Instruction.NumDstRegs = 1;
931       reg_dst(&new_inst.Dst[0], dst, TGSI_WRITEMASK_XYZW);
932       new_inst.Instruction.NumSrcRegs = 2;
933       reg_src(&new_inst.Src[0], src0, SWIZ(X, Y, Z, W));
934       reg_src(&new_inst.Src[1], &ctx->tmp[A].src, SWIZ(X, Y, Z, W));
935       if (opcode == TGSI_OPCODE_FLR)
936          new_inst.Src[1].Register.Negate = 1;
937       tctx->emit_instruction(tctx, &new_inst);
938    }
939 }
940 
941 /* TRUNC - truncate off fractional part
942  *  dst.x = trunc(src.x)
943  *  dst.y = trunc(src.y)
944  *  dst.z = trunc(src.z)
945  *  dst.w = trunc(src.w)
946  *
947  * ; needs: 1 tmp
948  * if (lower FLR) {
949  *   FRC tmpA, |src|
950  *   SUB tmpA, |src|, tmpA
951  * } else {
952  *   FLR tmpA, |src|
953  * }
954  * CMP dst, src, -tmpA, tmpA
955  */
956 #define TRUNC_GROW (NINST(1) + NINST(2) + NINST(3) - OINST(1))
957 #define TRUNC_TMP 1
958 static void
transform_trunc(struct tgsi_transform_context * tctx,struct tgsi_full_instruction * inst)959 transform_trunc(struct tgsi_transform_context *tctx,
960                 struct tgsi_full_instruction *inst)
961 {
962    struct tgsi_lowering_context *ctx = tgsi_lowering_context(tctx);
963    struct tgsi_full_dst_register *dst  = &inst->Dst[0];
964    struct tgsi_full_src_register *src0 = &inst->Src[0];
965    struct tgsi_full_instruction new_inst;
966 
967    if (dst->Register.WriteMask & TGSI_WRITEMASK_XYZW) {
968       if (ctx->config->lower_FLR) {
969          new_inst = tgsi_default_full_instruction();
970          new_inst.Instruction.Opcode = TGSI_OPCODE_FRC;
971          new_inst.Instruction.NumDstRegs = 1;
972          reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_XYZW);
973          new_inst.Instruction.NumSrcRegs = 1;
974          reg_src(&new_inst.Src[0], src0, SWIZ(X, Y, Z, W));
975          new_inst.Src[0].Register.Absolute = true;
976          new_inst.Src[0].Register.Negate = false;
977          tctx->emit_instruction(tctx, &new_inst);
978 
979          new_inst = tgsi_default_full_instruction();
980          new_inst.Instruction.Opcode = TGSI_OPCODE_ADD;
981          new_inst.Instruction.NumDstRegs = 1;
982          reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_XYZW);
983          new_inst.Instruction.NumSrcRegs = 2;
984          reg_src(&new_inst.Src[0], src0, SWIZ(X, Y, Z, W));
985          new_inst.Src[0].Register.Absolute = true;
986          new_inst.Src[0].Register.Negate = false;
987          reg_src(&new_inst.Src[1], &ctx->tmp[A].src, SWIZ(X, Y, Z, W));
988          new_inst.Src[1].Register.Negate = 1;
989          tctx->emit_instruction(tctx, &new_inst);
990       } else {
991          new_inst = tgsi_default_full_instruction();
992          new_inst.Instruction.Opcode = TGSI_OPCODE_FLR;
993          new_inst.Instruction.NumDstRegs = 1;
994          reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_XYZW);
995          new_inst.Instruction.NumSrcRegs = 1;
996          reg_src(&new_inst.Src[0], src0, SWIZ(X, Y, Z, W));
997          new_inst.Src[0].Register.Absolute = true;
998          new_inst.Src[0].Register.Negate = false;
999          tctx->emit_instruction(tctx, &new_inst);
1000       }
1001 
1002       new_inst = tgsi_default_full_instruction();
1003       new_inst.Instruction.Opcode = TGSI_OPCODE_CMP;
1004       new_inst.Instruction.NumDstRegs = 1;
1005       reg_dst(&new_inst.Dst[0], dst, TGSI_WRITEMASK_XYZW);
1006       new_inst.Instruction.NumSrcRegs = 3;
1007       reg_src(&new_inst.Src[0], src0, SWIZ(X, Y, Z, W));
1008       reg_src(&new_inst.Src[1], &ctx->tmp[A].src, SWIZ(X, Y, Z, W));
1009       new_inst.Src[1].Register.Negate = true;
1010       reg_src(&new_inst.Src[2], &ctx->tmp[A].src, SWIZ(X, Y, Z, W));
1011       tctx->emit_instruction(tctx, &new_inst);
1012    }
1013 }
1014 
1015 /* Inserts a MOV_SAT for the needed components of tex coord.  Note that
1016  * in the case of TXP, the clamping must happen *after* projection, so
1017  * we need to lower TXP to TEX.
1018  *
1019  *   MOV tmpA, src0
1020  *   if (opc == TXP) {
1021  *     ; do perspective division manually before clamping:
1022  *     RCP tmpB, tmpA.w
1023  *     MUL tmpB.<pmask>, tmpA, tmpB.xxxx
1024  *     opc = TEX;
1025  *   }
1026  *   MOV_SAT tmpA.<mask>, tmpA  ; <mask> is the clamped s/t/r coords
1027  *   <opc> dst, tmpA, ...
1028  */
1029 #define SAMP_GROW (NINST(1) + NINST(1) + NINST(2) + NINST(1))
1030 #define SAMP_TMP  2
1031 static int
transform_samp(struct tgsi_transform_context * tctx,struct tgsi_full_instruction * inst)1032 transform_samp(struct tgsi_transform_context *tctx,
1033                struct tgsi_full_instruction *inst)
1034 {
1035    struct tgsi_lowering_context *ctx = tgsi_lowering_context(tctx);
1036    struct tgsi_full_src_register *coord = &inst->Src[0];
1037    struct tgsi_full_src_register *samp;
1038    struct tgsi_full_instruction new_inst;
1039    /* mask is clamped coords, pmask is all coords (for projection): */
1040    unsigned mask = 0, pmask = 0, smask;
1041    unsigned tex = inst->Texture.Texture;
1042    enum tgsi_opcode opcode = inst->Instruction.Opcode;
1043    bool lower_txp = (opcode == TGSI_OPCODE_TXP) &&
1044 		   (ctx->config->lower_TXP & (1 << tex));
1045 
1046    if (opcode == TGSI_OPCODE_TXB2) {
1047       samp = &inst->Src[2];
1048    } else {
1049       samp = &inst->Src[1];
1050    }
1051 
1052    /* convert sampler # to bitmask to test: */
1053    smask = 1 << samp->Register.Index;
1054 
1055    /* check if we actually need to lower this one: */
1056    if (!(ctx->saturate & smask) && !lower_txp)
1057       return -1;
1058 
1059    /* figure out which coordinates need saturating:
1060     *   - RECT textures should not get saturated
1061     *   - array index coords should not get saturated
1062     */
1063    switch (tex) {
1064    case TGSI_TEXTURE_3D:
1065    case TGSI_TEXTURE_CUBE:
1066    case TGSI_TEXTURE_CUBE_ARRAY:
1067    case TGSI_TEXTURE_SHADOWCUBE:
1068    case TGSI_TEXTURE_SHADOWCUBE_ARRAY:
1069       if (ctx->config->saturate_r & smask)
1070          mask |= TGSI_WRITEMASK_Z;
1071       pmask |= TGSI_WRITEMASK_Z;
1072       FALLTHROUGH;
1073 
1074    case TGSI_TEXTURE_2D:
1075    case TGSI_TEXTURE_2D_ARRAY:
1076    case TGSI_TEXTURE_SHADOW2D:
1077    case TGSI_TEXTURE_SHADOW2D_ARRAY:
1078    case TGSI_TEXTURE_2D_MSAA:
1079    case TGSI_TEXTURE_2D_ARRAY_MSAA:
1080       if (ctx->config->saturate_t & smask)
1081          mask |= TGSI_WRITEMASK_Y;
1082       pmask |= TGSI_WRITEMASK_Y;
1083       FALLTHROUGH;
1084 
1085    case TGSI_TEXTURE_1D:
1086    case TGSI_TEXTURE_1D_ARRAY:
1087    case TGSI_TEXTURE_SHADOW1D:
1088    case TGSI_TEXTURE_SHADOW1D_ARRAY:
1089       if (ctx->config->saturate_s & smask)
1090          mask |= TGSI_WRITEMASK_X;
1091       pmask |= TGSI_WRITEMASK_X;
1092       break;
1093 
1094    case TGSI_TEXTURE_RECT:
1095    case TGSI_TEXTURE_SHADOWRECT:
1096       /* we don't saturate, but in case of lower_txp we
1097        * still need to do the perspective divide:
1098        */
1099        pmask = TGSI_WRITEMASK_XY;
1100        break;
1101    }
1102 
1103    /* sanity check.. driver could be asking to saturate a non-
1104     * existent coordinate component:
1105     */
1106    if (!mask && !lower_txp)
1107       return -1;
1108 
1109    /* MOV tmpA, src0 */
1110    create_mov(tctx, &ctx->tmp[A].dst, coord, TGSI_WRITEMASK_XYZW, 0);
1111 
1112    /* This is a bit sad.. we need to clamp *after* the coords
1113     * are projected, which means lowering TXP to TEX and doing
1114     * the projection ourself.  But since I haven't figured out
1115     * how to make the lowering code deliver an electric shock
1116     * to anyone using GL_CLAMP, we must do this instead:
1117     */
1118    if (opcode == TGSI_OPCODE_TXP) {
1119       /* RCP tmpB.x tmpA.w */
1120       new_inst = tgsi_default_full_instruction();
1121       new_inst.Instruction.Opcode = TGSI_OPCODE_RCP;
1122       new_inst.Instruction.NumDstRegs = 1;
1123       reg_dst(&new_inst.Dst[0], &ctx->tmp[B].dst, TGSI_WRITEMASK_X);
1124       new_inst.Instruction.NumSrcRegs = 1;
1125       reg_src(&new_inst.Src[0], &ctx->tmp[A].src, SWIZ(W, _, _, _));
1126       tctx->emit_instruction(tctx, &new_inst);
1127 
1128       /* MUL tmpA.mask, tmpA, tmpB.xxxx */
1129       new_inst = tgsi_default_full_instruction();
1130       new_inst.Instruction.Opcode = TGSI_OPCODE_MUL;
1131       new_inst.Instruction.NumDstRegs = 1;
1132       reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, pmask);
1133       new_inst.Instruction.NumSrcRegs = 2;
1134       reg_src(&new_inst.Src[0], &ctx->tmp[A].src, SWIZ(X, Y, Z, W));
1135       reg_src(&new_inst.Src[1], &ctx->tmp[B].src, SWIZ(X, X, X, X));
1136       tctx->emit_instruction(tctx, &new_inst);
1137 
1138       opcode = TGSI_OPCODE_TEX;
1139    }
1140 
1141    /* MOV_SAT tmpA.<mask>, tmpA */
1142    if (mask) {
1143       create_mov(tctx, &ctx->tmp[A].dst, &ctx->tmp[A].src, mask, 1);
1144    }
1145 
1146    /* modify the texture samp instruction to take fixed up coord: */
1147    new_inst = *inst;
1148    new_inst.Instruction.Opcode = opcode;
1149    new_inst.Src[0] = ctx->tmp[A].src;
1150    tctx->emit_instruction(tctx, &new_inst);
1151 
1152    return 0;
1153 }
1154 
1155 /* Two-sided color emulation:
1156  * For each COLOR input, create a corresponding BCOLOR input, plus
1157  * CMP instruction to select front or back color based on FACE
1158  */
1159 #define TWOSIDE_GROW(n)  (                      \
1160       2 +         /* FACE */                    \
1161       ((n) * 3) + /* IN[], BCOLOR[n], <intrp> */\
1162       ((n) * 1) + /* TEMP[] */                  \
1163       ((n) * NINST(3))   /* CMP instr */        \
1164       )
1165 
1166 static void
emit_twoside(struct tgsi_transform_context * tctx)1167 emit_twoside(struct tgsi_transform_context *tctx)
1168 {
1169    struct tgsi_lowering_context *ctx = tgsi_lowering_context(tctx);
1170    struct tgsi_shader_info *info = ctx->info;
1171    struct tgsi_full_declaration decl;
1172    struct tgsi_full_instruction new_inst;
1173    unsigned inbase, tmpbase;
1174    unsigned i;
1175 
1176    inbase  = info->file_max[TGSI_FILE_INPUT] + 1;
1177    tmpbase = info->file_max[TGSI_FILE_TEMPORARY] + 1;
1178 
1179    /* additional inputs for BCOLOR's */
1180    for (i = 0; i < ctx->two_side_colors; i++) {
1181       unsigned in_idx = ctx->two_side_idx[i];
1182       decl = tgsi_default_full_declaration();
1183       decl.Declaration.File = TGSI_FILE_INPUT;
1184       decl.Declaration.Semantic = true;
1185       decl.Range.First = decl.Range.Last = inbase + i;
1186       decl.Semantic.Name = TGSI_SEMANTIC_BCOLOR;
1187       decl.Semantic.Index = info->input_semantic_index[in_idx];
1188       decl.Declaration.Interpolate = true;
1189       decl.Interp.Interpolate = info->input_interpolate[in_idx];
1190       decl.Interp.Location = info->input_interpolate_loc[in_idx];
1191       tctx->emit_declaration(tctx, &decl);
1192    }
1193 
1194    /* additional input for FACE */
1195    if (ctx->two_side_colors && (ctx->face_idx == -1)) {
1196       decl = tgsi_default_full_declaration();
1197       decl.Declaration.File = TGSI_FILE_INPUT;
1198       decl.Declaration.Semantic = true;
1199       decl.Range.First = decl.Range.Last = inbase + ctx->two_side_colors;
1200       decl.Semantic.Name = TGSI_SEMANTIC_FACE;
1201       decl.Semantic.Index = 0;
1202       tctx->emit_declaration(tctx, &decl);
1203 
1204       ctx->face_idx = decl.Range.First;
1205    }
1206 
1207    /* additional temps for COLOR/BCOLOR selection: */
1208    for (i = 0; i < ctx->two_side_colors; i++) {
1209       decl = tgsi_default_full_declaration();
1210       decl.Declaration.File = TGSI_FILE_TEMPORARY;
1211       decl.Range.First = decl.Range.Last = tmpbase + ctx->numtmp + i;
1212       tctx->emit_declaration(tctx, &decl);
1213    }
1214 
1215    /* and finally additional instructions to select COLOR/BCOLOR: */
1216    for (i = 0; i < ctx->two_side_colors; i++) {
1217       new_inst = tgsi_default_full_instruction();
1218       new_inst.Instruction.Opcode = TGSI_OPCODE_CMP;
1219 
1220       new_inst.Instruction.NumDstRegs = 1;
1221       new_inst.Dst[0].Register.File  = TGSI_FILE_TEMPORARY;
1222       new_inst.Dst[0].Register.Index = tmpbase + ctx->numtmp + i;
1223       new_inst.Dst[0].Register.WriteMask = TGSI_WRITEMASK_XYZW;
1224 
1225       new_inst.Instruction.NumSrcRegs = 3;
1226       new_inst.Src[0].Register.File  = TGSI_FILE_INPUT;
1227       new_inst.Src[0].Register.Index = ctx->face_idx;
1228       new_inst.Src[0].Register.SwizzleX = TGSI_SWIZZLE_X;
1229       new_inst.Src[0].Register.SwizzleY = TGSI_SWIZZLE_X;
1230       new_inst.Src[0].Register.SwizzleZ = TGSI_SWIZZLE_X;
1231       new_inst.Src[0].Register.SwizzleW = TGSI_SWIZZLE_X;
1232       new_inst.Src[1].Register.File  = TGSI_FILE_INPUT;
1233       new_inst.Src[1].Register.Index = inbase + i;
1234       new_inst.Src[1].Register.SwizzleX = TGSI_SWIZZLE_X;
1235       new_inst.Src[1].Register.SwizzleY = TGSI_SWIZZLE_Y;
1236       new_inst.Src[1].Register.SwizzleZ = TGSI_SWIZZLE_Z;
1237       new_inst.Src[1].Register.SwizzleW = TGSI_SWIZZLE_W;
1238       new_inst.Src[2].Register.File  = TGSI_FILE_INPUT;
1239       new_inst.Src[2].Register.Index = ctx->two_side_idx[i];
1240       new_inst.Src[2].Register.SwizzleX = TGSI_SWIZZLE_X;
1241       new_inst.Src[2].Register.SwizzleY = TGSI_SWIZZLE_Y;
1242       new_inst.Src[2].Register.SwizzleZ = TGSI_SWIZZLE_Z;
1243       new_inst.Src[2].Register.SwizzleW = TGSI_SWIZZLE_W;
1244 
1245       tctx->emit_instruction(tctx, &new_inst);
1246    }
1247 }
1248 
1249 static void
emit_decls(struct tgsi_transform_context * tctx)1250 emit_decls(struct tgsi_transform_context *tctx)
1251 {
1252    struct tgsi_lowering_context *ctx = tgsi_lowering_context(tctx);
1253    struct tgsi_shader_info *info = ctx->info;
1254    struct tgsi_full_declaration decl;
1255    struct tgsi_full_immediate immed;
1256    unsigned tmpbase;
1257    unsigned i;
1258 
1259    tmpbase = info->file_max[TGSI_FILE_TEMPORARY] + 1;
1260 
1261    ctx->color_base = tmpbase + ctx->numtmp;
1262 
1263    /* declare immediate: */
1264    immed = tgsi_default_full_immediate();
1265    immed.Immediate.NrTokens = 1 + 4; /* one for the token itself */
1266    immed.u[0].Float = 0.0;
1267    immed.u[1].Float = 1.0;
1268    immed.u[2].Float = 128.0;
1269    immed.u[3].Float = 0.0;
1270    tctx->emit_immediate(tctx, &immed);
1271 
1272    ctx->imm.Register.File = TGSI_FILE_IMMEDIATE;
1273    ctx->imm.Register.Index = info->immediate_count;
1274    ctx->imm.Register.SwizzleX = TGSI_SWIZZLE_X;
1275    ctx->imm.Register.SwizzleY = TGSI_SWIZZLE_Y;
1276    ctx->imm.Register.SwizzleZ = TGSI_SWIZZLE_Z;
1277    ctx->imm.Register.SwizzleW = TGSI_SWIZZLE_W;
1278 
1279    /* declare temp regs: */
1280    for (i = 0; i < ctx->numtmp; i++) {
1281       decl = tgsi_default_full_declaration();
1282       decl.Declaration.File = TGSI_FILE_TEMPORARY;
1283       decl.Range.First = decl.Range.Last = tmpbase + i;
1284       tctx->emit_declaration(tctx, &decl);
1285 
1286       ctx->tmp[i].src.Register.File  = TGSI_FILE_TEMPORARY;
1287       ctx->tmp[i].src.Register.Index = tmpbase + i;
1288       ctx->tmp[i].src.Register.SwizzleX = TGSI_SWIZZLE_X;
1289       ctx->tmp[i].src.Register.SwizzleY = TGSI_SWIZZLE_Y;
1290       ctx->tmp[i].src.Register.SwizzleZ = TGSI_SWIZZLE_Z;
1291       ctx->tmp[i].src.Register.SwizzleW = TGSI_SWIZZLE_W;
1292 
1293       ctx->tmp[i].dst.Register.File  = TGSI_FILE_TEMPORARY;
1294       ctx->tmp[i].dst.Register.Index = tmpbase + i;
1295       ctx->tmp[i].dst.Register.WriteMask = TGSI_WRITEMASK_XYZW;
1296    }
1297 
1298    if (ctx->two_side_colors)
1299       emit_twoside(tctx);
1300 }
1301 
1302 static void
rename_color_inputs(struct tgsi_lowering_context * ctx,struct tgsi_full_instruction * inst)1303 rename_color_inputs(struct tgsi_lowering_context *ctx,
1304                     struct tgsi_full_instruction *inst)
1305 {
1306    unsigned i, j;
1307    for (i = 0; i < inst->Instruction.NumSrcRegs; i++) {
1308       struct tgsi_src_register *src = &inst->Src[i].Register;
1309       if (src->File == TGSI_FILE_INPUT) {
1310          for (j = 0; j < ctx->two_side_colors; j++) {
1311 	    if (src->Index == (int)ctx->two_side_idx[j]) {
1312                src->File = TGSI_FILE_TEMPORARY;
1313                src->Index = ctx->color_base + j;
1314                break;
1315             }
1316          }
1317       }
1318    }
1319 
1320 }
1321 
1322 static void
transform_instr(struct tgsi_transform_context * tctx,struct tgsi_full_instruction * inst)1323 transform_instr(struct tgsi_transform_context *tctx,
1324 		struct tgsi_full_instruction *inst)
1325 {
1326    struct tgsi_lowering_context *ctx = tgsi_lowering_context(tctx);
1327 
1328    if (!ctx->emitted_decls) {
1329       emit_decls(tctx);
1330       ctx->emitted_decls = 1;
1331    }
1332 
1333    /* if emulating two-sided-color, we need to re-write some
1334     * src registers:
1335     */
1336    if (ctx->two_side_colors)
1337       rename_color_inputs(ctx, inst);
1338 
1339    switch (inst->Instruction.Opcode) {
1340    case TGSI_OPCODE_DST:
1341       if (!ctx->config->lower_DST)
1342          goto skip;
1343       transform_dst(tctx, inst);
1344       break;
1345    case TGSI_OPCODE_LRP:
1346       if (!ctx->config->lower_LRP)
1347          goto skip;
1348       transform_lrp(tctx, inst);
1349       break;
1350    case TGSI_OPCODE_FRC:
1351       if (!ctx->config->lower_FRC)
1352          goto skip;
1353       transform_frc(tctx, inst);
1354       break;
1355    case TGSI_OPCODE_POW:
1356       if (!ctx->config->lower_POW)
1357          goto skip;
1358       transform_pow(tctx, inst);
1359       break;
1360    case TGSI_OPCODE_LIT:
1361       if (!ctx->config->lower_LIT)
1362          goto skip;
1363       transform_lit(tctx, inst);
1364       break;
1365    case TGSI_OPCODE_EXP:
1366       if (!ctx->config->lower_EXP)
1367          goto skip;
1368       transform_exp(tctx, inst);
1369       break;
1370    case TGSI_OPCODE_LOG:
1371       if (!ctx->config->lower_LOG)
1372          goto skip;
1373       transform_log(tctx, inst);
1374       break;
1375    case TGSI_OPCODE_DP4:
1376       if (!ctx->config->lower_DP4)
1377          goto skip;
1378       transform_dotp(tctx, inst);
1379       break;
1380    case TGSI_OPCODE_DP3:
1381       if (!ctx->config->lower_DP3)
1382          goto skip;
1383       transform_dotp(tctx, inst);
1384       break;
1385    case TGSI_OPCODE_DP2:
1386       if (!ctx->config->lower_DP2)
1387          goto skip;
1388       transform_dotp(tctx, inst);
1389       break;
1390    case TGSI_OPCODE_FLR:
1391       if (!ctx->config->lower_FLR)
1392          goto skip;
1393       transform_flr_ceil(tctx, inst);
1394       break;
1395    case TGSI_OPCODE_CEIL:
1396       if (!ctx->config->lower_CEIL)
1397          goto skip;
1398       transform_flr_ceil(tctx, inst);
1399       break;
1400    case TGSI_OPCODE_TRUNC:
1401       if (!ctx->config->lower_TRUNC)
1402          goto skip;
1403       transform_trunc(tctx, inst);
1404       break;
1405    case TGSI_OPCODE_TEX:
1406    case TGSI_OPCODE_TXP:
1407    case TGSI_OPCODE_TXB:
1408    case TGSI_OPCODE_TXB2:
1409    case TGSI_OPCODE_TXL:
1410       if (transform_samp(tctx, inst))
1411          goto skip;
1412       break;
1413    default:
1414    skip:
1415       tctx->emit_instruction(tctx, inst);
1416       break;
1417    }
1418 }
1419 
1420 /* returns NULL if no lowering required, else returns the new
1421  * tokens (which caller is required to free()).  In either case
1422  * returns the current info.
1423  */
1424 const struct tgsi_token *
tgsi_transform_lowering(const struct tgsi_lowering_config * config,const struct tgsi_token * tokens,struct tgsi_shader_info * info)1425 tgsi_transform_lowering(const struct tgsi_lowering_config *config,
1426                         const struct tgsi_token *tokens,
1427                         struct tgsi_shader_info *info)
1428 {
1429    struct tgsi_lowering_context ctx;
1430    struct tgsi_token *newtoks;
1431    int newlen, numtmp;
1432 
1433    /* sanity check in case limit is ever increased: */
1434    STATIC_ASSERT((sizeof(config->saturate_s) * 8) >= PIPE_MAX_SAMPLERS);
1435 
1436    /* sanity check the lowering */
1437    assert(!(config->lower_FRC && (config->lower_FLR || config->lower_CEIL)));
1438    assert(!(config->lower_FRC && config->lower_TRUNC));
1439 
1440    memset(&ctx, 0, sizeof(ctx));
1441    ctx.base.transform_instruction = transform_instr;
1442    ctx.info = info;
1443    ctx.config = config;
1444 
1445    tgsi_scan_shader(tokens, info);
1446 
1447    /* if we are adding fragment shader support to emulate two-sided
1448     * color, then figure out the number of additional inputs we need
1449     * to create for BCOLOR's..
1450     */
1451    if ((info->processor == PIPE_SHADER_FRAGMENT) &&
1452        config->color_two_side) {
1453       int i;
1454       ctx.face_idx = -1;
1455       for (i = 0; i <= info->file_max[TGSI_FILE_INPUT]; i++) {
1456          if (info->input_semantic_name[i] == TGSI_SEMANTIC_COLOR)
1457             ctx.two_side_idx[ctx.two_side_colors++] = i;
1458          if (info->input_semantic_name[i] == TGSI_SEMANTIC_FACE)
1459             ctx.face_idx = i;
1460       }
1461    }
1462 
1463    ctx.saturate = config->saturate_r | config->saturate_s | config->saturate_t;
1464 
1465 #define OPCS(x) ((config->lower_ ## x) ? info->opcode_count[TGSI_OPCODE_ ## x] : 0)
1466    /* if there are no instructions to lower, then we are done: */
1467    if (!(OPCS(DST) ||
1468          OPCS(LRP) ||
1469          OPCS(FRC) ||
1470          OPCS(POW) ||
1471          OPCS(LIT) ||
1472          OPCS(EXP) ||
1473          OPCS(LOG) ||
1474          OPCS(DP4) ||
1475          OPCS(DP3) ||
1476          OPCS(DP2) ||
1477          OPCS(FLR) ||
1478          OPCS(CEIL) ||
1479          OPCS(TRUNC) ||
1480          OPCS(TXP) ||
1481          ctx.two_side_colors ||
1482          ctx.saturate))
1483       return NULL;
1484 
1485 #if 0  /* debug */
1486    _debug_printf("BEFORE:");
1487    tgsi_dump(tokens, 0);
1488 #endif
1489 
1490    numtmp = 0;
1491    newlen = tgsi_num_tokens(tokens);
1492    if (OPCS(DST)) {
1493       newlen += DST_GROW * OPCS(DST);
1494       numtmp = MAX2(numtmp, DST_TMP);
1495    }
1496    if (OPCS(LRP)) {
1497       newlen += LRP_GROW * OPCS(LRP);
1498       numtmp = MAX2(numtmp, LRP_TMP);
1499    }
1500    if (OPCS(FRC)) {
1501       newlen += FRC_GROW * OPCS(FRC);
1502       numtmp = MAX2(numtmp, FRC_TMP);
1503    }
1504    if (OPCS(POW)) {
1505       newlen += POW_GROW * OPCS(POW);
1506       numtmp = MAX2(numtmp, POW_TMP);
1507    }
1508    if (OPCS(LIT)) {
1509       newlen += LIT_GROW * OPCS(LIT);
1510       numtmp = MAX2(numtmp, LIT_TMP);
1511    }
1512    if (OPCS(EXP)) {
1513       newlen += EXP_GROW * OPCS(EXP);
1514       numtmp = MAX2(numtmp, EXP_TMP);
1515    }
1516    if (OPCS(LOG)) {
1517       newlen += LOG_GROW * OPCS(LOG);
1518       numtmp = MAX2(numtmp, LOG_TMP);
1519    }
1520    if (OPCS(DP4)) {
1521       newlen += DP4_GROW * OPCS(DP4);
1522       numtmp = MAX2(numtmp, DOTP_TMP);
1523    }
1524    if (OPCS(DP3)) {
1525       newlen += DP3_GROW * OPCS(DP3);
1526       numtmp = MAX2(numtmp, DOTP_TMP);
1527    }
1528    if (OPCS(DP2)) {
1529       newlen += DP2_GROW * OPCS(DP2);
1530       numtmp = MAX2(numtmp, DOTP_TMP);
1531    }
1532    if (OPCS(FLR)) {
1533       newlen += FLR_GROW * OPCS(FLR);
1534       numtmp = MAX2(numtmp, FLR_TMP);
1535    }
1536    if (OPCS(CEIL)) {
1537       newlen += CEIL_GROW * OPCS(CEIL);
1538       numtmp = MAX2(numtmp, CEIL_TMP);
1539    }
1540    if (OPCS(TRUNC)) {
1541       newlen += TRUNC_GROW * OPCS(TRUNC);
1542       numtmp = MAX2(numtmp, TRUNC_TMP);
1543    }
1544    if (ctx.saturate || config->lower_TXP) {
1545       int n = 0;
1546 
1547       if (ctx.saturate) {
1548          n = info->opcode_count[TGSI_OPCODE_TEX] +
1549             info->opcode_count[TGSI_OPCODE_TXP] +
1550             info->opcode_count[TGSI_OPCODE_TXB] +
1551             info->opcode_count[TGSI_OPCODE_TXB2] +
1552             info->opcode_count[TGSI_OPCODE_TXL];
1553       } else if (config->lower_TXP) {
1554           n = info->opcode_count[TGSI_OPCODE_TXP];
1555       }
1556 
1557       newlen += SAMP_GROW * n;
1558       numtmp = MAX2(numtmp, SAMP_TMP);
1559    }
1560 
1561    /* specifically don't include two_side_colors temps in the count: */
1562    ctx.numtmp = numtmp;
1563 
1564    if (ctx.two_side_colors) {
1565       newlen += TWOSIDE_GROW(ctx.two_side_colors);
1566       /* note: we permanently consume temp regs, re-writing references
1567        * to IN.COLOR[n] to TEMP[m] (holding the output of of the CMP
1568        * instruction that selects which varying to use):
1569        */
1570       numtmp += ctx.two_side_colors;
1571    }
1572 
1573    newlen += 2 * numtmp;
1574    newlen += 5;        /* immediate */
1575 
1576    newtoks = tgsi_transform_shader(tokens, newlen, &ctx.base);
1577    if (!newtoks)
1578       return NULL;
1579 
1580    tgsi_scan_shader(newtoks, info);
1581 
1582 #if 0  /* debug */
1583    _debug_printf("AFTER:");
1584    tgsi_dump(newtoks, 0);
1585 #endif
1586 
1587    return newtoks;
1588 }
1589