xref: /aosp_15_r20/external/mesa3d/src/gallium/drivers/svga/svga_tgsi_insn.c (revision 6104692788411f58d303aa86923a9ff6ecaded22)
1 /*
2  * Copyright (c) 2008-2024 Broadcom. All Rights Reserved.
3  * The term “Broadcom” refers to Broadcom Inc.
4  * and/or its subsidiaries.
5  * SPDX-License-Identifier: MIT
6  */
7 
8 
9 #include "pipe/p_shader_tokens.h"
10 #include "tgsi/tgsi_dump.h"
11 #include "tgsi/tgsi_parse.h"
12 #include "util/u_memory.h"
13 #include "util/u_math.h"
14 #include "util/u_pstipple.h"
15 
16 #include "svga_tgsi_emit.h"
17 #include "svga_context.h"
18 
19 
20 static bool emit_vs_postamble( struct svga_shader_emitter *emit );
21 static bool emit_ps_postamble( struct svga_shader_emitter *emit );
22 
23 
24 static SVGA3dShaderOpCodeType
translate_opcode(enum tgsi_opcode opcode)25 translate_opcode(enum tgsi_opcode opcode)
26 {
27    switch (opcode) {
28    case TGSI_OPCODE_ADD:        return SVGA3DOP_ADD;
29    case TGSI_OPCODE_DP3:        return SVGA3DOP_DP3;
30    case TGSI_OPCODE_DP4:        return SVGA3DOP_DP4;
31    case TGSI_OPCODE_FRC:        return SVGA3DOP_FRC;
32    case TGSI_OPCODE_MAD:        return SVGA3DOP_MAD;
33    case TGSI_OPCODE_MAX:        return SVGA3DOP_MAX;
34    case TGSI_OPCODE_MIN:        return SVGA3DOP_MIN;
35    case TGSI_OPCODE_MOV:        return SVGA3DOP_MOV;
36    case TGSI_OPCODE_MUL:        return SVGA3DOP_MUL;
37    case TGSI_OPCODE_NOP:        return SVGA3DOP_NOP;
38    default:
39       assert(!"svga: unexpected opcode in translate_opcode()");
40       return SVGA3DOP_LAST_INST;
41    }
42 }
43 
44 
45 static SVGA3dShaderRegType
translate_file(enum tgsi_file_type file)46 translate_file(enum tgsi_file_type file)
47 {
48    switch (file) {
49    case TGSI_FILE_TEMPORARY: return SVGA3DREG_TEMP;
50    case TGSI_FILE_INPUT:     return SVGA3DREG_INPUT;
51    case TGSI_FILE_OUTPUT:    return SVGA3DREG_OUTPUT; /* VS3.0+ only */
52    case TGSI_FILE_IMMEDIATE: return SVGA3DREG_CONST;
53    case TGSI_FILE_CONSTANT:  return SVGA3DREG_CONST;
54    case TGSI_FILE_SAMPLER:   return SVGA3DREG_SAMPLER;
55    case TGSI_FILE_ADDRESS:   return SVGA3DREG_ADDR;
56    default:
57       assert(!"svga: unexpected register file in translate_file()");
58       return SVGA3DREG_TEMP;
59    }
60 }
61 
62 
63 /**
64  * Translate a TGSI destination register to an SVGA3DShaderDestToken.
65  * \param insn  the TGSI instruction
66  * \param idx  which TGSI dest register to translate (usually (always?) zero)
67  */
68 static SVGA3dShaderDestToken
translate_dst_register(struct svga_shader_emitter * emit,const struct tgsi_full_instruction * insn,unsigned idx)69 translate_dst_register( struct svga_shader_emitter *emit,
70                         const struct tgsi_full_instruction *insn,
71                         unsigned idx )
72 {
73    const struct tgsi_full_dst_register *reg = &insn->Dst[idx];
74    SVGA3dShaderDestToken dest;
75 
76    switch (reg->Register.File) {
77    case TGSI_FILE_OUTPUT:
78       /* Output registers encode semantic information in their name.
79        * Need to lookup a table built at decl time:
80        */
81       dest = emit->output_map[reg->Register.Index];
82       emit->num_output_writes++;
83       break;
84 
85    default:
86       {
87          unsigned index = reg->Register.Index;
88          assert(index < SVGA3D_TEMPREG_MAX);
89          index = MIN2(index, SVGA3D_TEMPREG_MAX - 1);
90          dest = dst_register(translate_file(reg->Register.File), index);
91       }
92       break;
93    }
94 
95    if (reg->Register.Indirect) {
96       debug_warning("Indirect indexing of dest registers is not supported!\n");
97    }
98 
99    dest.mask = reg->Register.WriteMask;
100    assert(dest.mask);
101 
102    if (insn->Instruction.Saturate)
103       dest.dstMod = SVGA3DDSTMOD_SATURATE;
104 
105    return dest;
106 }
107 
108 
109 /**
110  * Apply a swizzle to a src_register, returning a new src_register
111  * Ex: swizzle(SRC.ZZYY, SWIZZLE_Z, SWIZZLE_W, SWIZZLE_X, SWIZZLE_Y)
112  * would return SRC.YYZZ
113  */
114 static struct src_register
swizzle(struct src_register src,unsigned x,unsigned y,unsigned z,unsigned w)115 swizzle(struct src_register src,
116         unsigned x, unsigned y, unsigned z, unsigned w)
117 {
118    assert(x < 4);
119    assert(y < 4);
120    assert(z < 4);
121    assert(w < 4);
122    x = (src.base.swizzle >> (x * 2)) & 0x3;
123    y = (src.base.swizzle >> (y * 2)) & 0x3;
124    z = (src.base.swizzle >> (z * 2)) & 0x3;
125    w = (src.base.swizzle >> (w * 2)) & 0x3;
126 
127    src.base.swizzle = TRANSLATE_SWIZZLE(x, y, z, w);
128 
129    return src;
130 }
131 
132 
133 /**
134  * Apply a "scalar" swizzle to a src_register returning a new
135  * src_register where all the swizzle terms are the same.
136  * Ex: scalar(SRC.WZYX, SWIZZLE_Y) would return SRC.ZZZZ
137  */
138 static struct src_register
scalar(struct src_register src,unsigned comp)139 scalar(struct src_register src, unsigned comp)
140 {
141    assert(comp < 4);
142    return swizzle( src, comp, comp, comp, comp );
143 }
144 
145 
146 static bool
svga_arl_needs_adjustment(const struct svga_shader_emitter * emit)147 svga_arl_needs_adjustment( const struct svga_shader_emitter *emit )
148 {
149    unsigned i;
150 
151    for (i = 0; i < emit->num_arl_consts; ++i) {
152       if (emit->arl_consts[i].arl_num == emit->current_arl)
153          return true;
154    }
155    return false;
156 }
157 
158 
159 static int
svga_arl_adjustment(const struct svga_shader_emitter * emit)160 svga_arl_adjustment( const struct svga_shader_emitter *emit )
161 {
162    unsigned i;
163 
164    for (i = 0; i < emit->num_arl_consts; ++i) {
165       if (emit->arl_consts[i].arl_num == emit->current_arl)
166          return emit->arl_consts[i].number;
167    }
168    return 0;
169 }
170 
171 
172 /**
173  * Translate a TGSI src register to a src_register.
174  */
175 static struct src_register
translate_src_register(const struct svga_shader_emitter * emit,const struct tgsi_full_src_register * reg)176 translate_src_register( const struct svga_shader_emitter *emit,
177                         const struct tgsi_full_src_register *reg )
178 {
179    struct src_register src;
180 
181    switch (reg->Register.File) {
182    case TGSI_FILE_INPUT:
183       /* Input registers are referred to by their semantic name rather
184        * than by index.  Use the mapping build up from the decls:
185        */
186       src = emit->input_map[reg->Register.Index];
187       break;
188 
189    case TGSI_FILE_IMMEDIATE:
190       /* Immediates are appended after TGSI constants in the D3D
191        * constant buffer.
192        */
193       src = src_register( translate_file( reg->Register.File ),
194                           reg->Register.Index + emit->imm_start );
195       break;
196 
197    default:
198       src = src_register( translate_file( reg->Register.File ),
199                           reg->Register.Index );
200       break;
201    }
202 
203    /* Indirect addressing.
204     */
205    if (reg->Register.Indirect) {
206       if (emit->unit == PIPE_SHADER_FRAGMENT) {
207          /* Pixel shaders have only loop registers for relative
208           * addressing into inputs. Ignore the redundant address
209           * register, the contents of aL should be in sync with it.
210           */
211          if (reg->Register.File == TGSI_FILE_INPUT) {
212             src.base.relAddr = 1;
213             src.indirect = src_token(SVGA3DREG_LOOP, 0);
214          }
215       }
216       else {
217          /* Constant buffers only.
218           */
219          if (reg->Register.File == TGSI_FILE_CONSTANT) {
220             /* we shift the offset towards the minimum */
221             if (svga_arl_needs_adjustment( emit )) {
222                src.base.num -= svga_arl_adjustment( emit );
223             }
224             src.base.relAddr = 1;
225 
226             /* Not really sure what should go in the second token:
227              */
228             src.indirect = src_token( SVGA3DREG_ADDR,
229                                       reg->Indirect.Index );
230 
231             src.indirect.swizzle = SWIZZLE_XXXX;
232          }
233       }
234    }
235 
236    src = swizzle( src,
237                   reg->Register.SwizzleX,
238                   reg->Register.SwizzleY,
239                   reg->Register.SwizzleZ,
240                   reg->Register.SwizzleW );
241 
242    /* src.mod isn't a bitfield, unfortunately */
243    if (reg->Register.Absolute) {
244       if (reg->Register.Negate)
245          src.base.srcMod = SVGA3DSRCMOD_ABSNEG;
246       else
247          src.base.srcMod = SVGA3DSRCMOD_ABS;
248    }
249    else {
250       if (reg->Register.Negate)
251          src.base.srcMod = SVGA3DSRCMOD_NEG;
252       else
253          src.base.srcMod = SVGA3DSRCMOD_NONE;
254    }
255 
256    return src;
257 }
258 
259 
260 /*
261  * Get a temporary register.
262  * Note: if we exceed the temporary register limit we just use
263  * register SVGA3D_TEMPREG_MAX - 1.
264  */
265 static SVGA3dShaderDestToken
get_temp(struct svga_shader_emitter * emit)266 get_temp( struct svga_shader_emitter *emit )
267 {
268    int i = emit->nr_hw_temp + emit->internal_temp_count++;
269    if (i >= SVGA3D_TEMPREG_MAX) {
270       debug_warn_once("svga: Too many temporary registers used in shader\n");
271       i = SVGA3D_TEMPREG_MAX - 1;
272    }
273    return dst_register( SVGA3DREG_TEMP, i );
274 }
275 
276 
277 /**
278  * Release a single temp.  Currently only effective if it was the last
279  * allocated temp, otherwise release will be delayed until the next
280  * call to reset_temp_regs().
281  */
282 static void
release_temp(struct svga_shader_emitter * emit,SVGA3dShaderDestToken temp)283 release_temp( struct svga_shader_emitter *emit,
284               SVGA3dShaderDestToken temp )
285 {
286    if (temp.num == emit->internal_temp_count - 1)
287       emit->internal_temp_count--;
288 }
289 
290 
291 /**
292  * Release all temps.
293  */
294 static void
reset_temp_regs(struct svga_shader_emitter * emit)295 reset_temp_regs(struct svga_shader_emitter *emit)
296 {
297    emit->internal_temp_count = 0;
298 }
299 
300 
301 /** Emit bytecode for a src_register */
302 static bool
emit_src(struct svga_shader_emitter * emit,const struct src_register src)303 emit_src(struct svga_shader_emitter *emit, const struct src_register src)
304 {
305    if (src.base.relAddr) {
306       assert(src.base.reserved0);
307       assert(src.indirect.reserved0);
308       return (svga_shader_emit_dword( emit, src.base.value ) &&
309               svga_shader_emit_dword( emit, src.indirect.value ));
310    }
311    else {
312       assert(src.base.reserved0);
313       return svga_shader_emit_dword( emit, src.base.value );
314    }
315 }
316 
317 
318 /** Emit bytecode for a dst_register */
319 static bool
emit_dst(struct svga_shader_emitter * emit,SVGA3dShaderDestToken dest)320 emit_dst(struct svga_shader_emitter *emit, SVGA3dShaderDestToken dest)
321 {
322    assert(dest.reserved0);
323    assert(dest.mask);
324    return svga_shader_emit_dword( emit, dest.value );
325 }
326 
327 
328 /** Emit bytecode for a 1-operand instruction */
329 static bool
emit_op1(struct svga_shader_emitter * emit,SVGA3dShaderInstToken inst,SVGA3dShaderDestToken dest,struct src_register src0)330 emit_op1(struct svga_shader_emitter *emit,
331          SVGA3dShaderInstToken inst,
332          SVGA3dShaderDestToken dest,
333          struct src_register src0)
334 {
335    return (emit_instruction(emit, inst) &&
336            emit_dst(emit, dest) &&
337            emit_src(emit, src0));
338 }
339 
340 
341 /** Emit bytecode for a 2-operand instruction */
342 static bool
emit_op2(struct svga_shader_emitter * emit,SVGA3dShaderInstToken inst,SVGA3dShaderDestToken dest,struct src_register src0,struct src_register src1)343 emit_op2(struct svga_shader_emitter *emit,
344          SVGA3dShaderInstToken inst,
345          SVGA3dShaderDestToken dest,
346          struct src_register src0,
347          struct src_register src1)
348 {
349    return (emit_instruction(emit, inst) &&
350            emit_dst(emit, dest) &&
351            emit_src(emit, src0) &&
352            emit_src(emit, src1));
353 }
354 
355 
356 /** Emit bytecode for a 3-operand instruction */
357 static bool
emit_op3(struct svga_shader_emitter * emit,SVGA3dShaderInstToken inst,SVGA3dShaderDestToken dest,struct src_register src0,struct src_register src1,struct src_register src2)358 emit_op3(struct svga_shader_emitter *emit,
359          SVGA3dShaderInstToken inst,
360          SVGA3dShaderDestToken dest,
361          struct src_register src0,
362          struct src_register src1,
363          struct src_register src2)
364 {
365    return (emit_instruction(emit, inst) &&
366            emit_dst(emit, dest) &&
367            emit_src(emit, src0) &&
368            emit_src(emit, src1) &&
369            emit_src(emit, src2));
370 }
371 
372 
373 /** Emit bytecode for a 4-operand instruction */
374 static bool
emit_op4(struct svga_shader_emitter * emit,SVGA3dShaderInstToken inst,SVGA3dShaderDestToken dest,struct src_register src0,struct src_register src1,struct src_register src2,struct src_register src3)375 emit_op4(struct svga_shader_emitter *emit,
376          SVGA3dShaderInstToken inst,
377          SVGA3dShaderDestToken dest,
378          struct src_register src0,
379          struct src_register src1,
380          struct src_register src2,
381          struct src_register src3)
382 {
383    return (emit_instruction(emit, inst) &&
384            emit_dst(emit, dest) &&
385            emit_src(emit, src0) &&
386            emit_src(emit, src1) &&
387            emit_src(emit, src2) &&
388            emit_src(emit, src3));
389 }
390 
391 
392 /**
393  * Apply the absolute value modifier to the given src_register, returning
394  * a new src_register.
395  */
396 static struct src_register
absolute(struct src_register src)397 absolute(struct src_register src)
398 {
399    src.base.srcMod = SVGA3DSRCMOD_ABS;
400    return src;
401 }
402 
403 
404 /**
405  * Apply the negation modifier to the given src_register, returning
406  * a new src_register.
407  */
408 static struct src_register
negate(struct src_register src)409 negate(struct src_register src)
410 {
411    switch (src.base.srcMod) {
412    case SVGA3DSRCMOD_ABS:
413       src.base.srcMod = SVGA3DSRCMOD_ABSNEG;
414       break;
415    case SVGA3DSRCMOD_ABSNEG:
416       src.base.srcMod = SVGA3DSRCMOD_ABS;
417       break;
418    case SVGA3DSRCMOD_NEG:
419       src.base.srcMod = SVGA3DSRCMOD_NONE;
420       break;
421    case SVGA3DSRCMOD_NONE:
422       src.base.srcMod = SVGA3DSRCMOD_NEG;
423       break;
424    }
425    return src;
426 }
427 
428 
429 
430 /* Replace the src with the temporary specified in the dst, but copying
431  * only the necessary channels, and preserving the original swizzle (which is
432  * important given that several opcodes have constraints in the allowed
433  * swizzles).
434  */
435 static bool
emit_repl(struct svga_shader_emitter * emit,SVGA3dShaderDestToken dst,struct src_register * src0)436 emit_repl(struct svga_shader_emitter *emit,
437           SVGA3dShaderDestToken dst,
438           struct src_register *src0)
439 {
440    unsigned src0_swizzle;
441    unsigned chan;
442 
443    assert(SVGA3dShaderGetRegType(dst.value) == SVGA3DREG_TEMP);
444 
445    src0_swizzle = src0->base.swizzle;
446 
447    dst.mask = 0;
448    for (chan = 0; chan < 4; ++chan) {
449       unsigned swizzle = (src0_swizzle >> (chan *2)) & 0x3;
450       dst.mask |= 1 << swizzle;
451    }
452    assert(dst.mask);
453 
454    src0->base.swizzle = SVGA3DSWIZZLE_NONE;
455 
456    if (!emit_op1( emit, inst_token( SVGA3DOP_MOV ), dst, *src0 ))
457       return false;
458 
459    *src0 = src( dst );
460    src0->base.swizzle = src0_swizzle;
461 
462    return true;
463 }
464 
465 
466 /**
467  * Submit/emit an instruction with zero operands.
468  */
469 static bool
submit_op0(struct svga_shader_emitter * emit,SVGA3dShaderInstToken inst,SVGA3dShaderDestToken dest)470 submit_op0(struct svga_shader_emitter *emit,
471            SVGA3dShaderInstToken inst,
472            SVGA3dShaderDestToken dest)
473 {
474    return (emit_instruction( emit, inst ) &&
475            emit_dst( emit, dest ));
476 }
477 
478 
479 /**
480  * Submit/emit an instruction with one operand.
481  */
482 static bool
submit_op1(struct svga_shader_emitter * emit,SVGA3dShaderInstToken inst,SVGA3dShaderDestToken dest,struct src_register src0)483 submit_op1(struct svga_shader_emitter *emit,
484            SVGA3dShaderInstToken inst,
485            SVGA3dShaderDestToken dest,
486            struct src_register src0)
487 {
488    return emit_op1( emit, inst, dest, src0 );
489 }
490 
491 
492 /**
493  * Submit/emit an instruction with two operands.
494  *
495  * SVGA shaders may not refer to >1 constant register in a single
496  * instruction.  This function checks for that usage and inserts a
497  * move to temporary if detected.
498  *
499  * The same applies to input registers -- at most a single input
500  * register may be read by any instruction.
501  */
502 static bool
submit_op2(struct svga_shader_emitter * emit,SVGA3dShaderInstToken inst,SVGA3dShaderDestToken dest,struct src_register src0,struct src_register src1)503 submit_op2(struct svga_shader_emitter *emit,
504            SVGA3dShaderInstToken inst,
505            SVGA3dShaderDestToken dest,
506            struct src_register src0,
507            struct src_register src1)
508 {
509    SVGA3dShaderDestToken temp;
510    SVGA3dShaderRegType type0, type1;
511    bool need_temp = false;
512 
513    temp.value = 0;
514    type0 = SVGA3dShaderGetRegType( src0.base.value );
515    type1 = SVGA3dShaderGetRegType( src1.base.value );
516 
517    if (type0 == SVGA3DREG_CONST &&
518        type1 == SVGA3DREG_CONST &&
519        src0.base.num != src1.base.num)
520       need_temp = true;
521 
522    if (type0 == SVGA3DREG_INPUT &&
523        type1 == SVGA3DREG_INPUT &&
524        src0.base.num != src1.base.num)
525       need_temp = true;
526 
527    if (need_temp) {
528       temp = get_temp( emit );
529 
530       if (!emit_repl( emit, temp, &src0 ))
531          return false;
532    }
533 
534    if (!emit_op2( emit, inst, dest, src0, src1 ))
535       return false;
536 
537    if (need_temp)
538       release_temp( emit, temp );
539 
540    return true;
541 }
542 
543 
544 /**
545  * Submit/emit an instruction with three operands.
546  *
547  * SVGA shaders may not refer to >1 constant register in a single
548  * instruction.  This function checks for that usage and inserts a
549  * move to temporary if detected.
550  */
551 static bool
submit_op3(struct svga_shader_emitter * emit,SVGA3dShaderInstToken inst,SVGA3dShaderDestToken dest,struct src_register src0,struct src_register src1,struct src_register src2)552 submit_op3(struct svga_shader_emitter *emit,
553            SVGA3dShaderInstToken inst,
554            SVGA3dShaderDestToken dest,
555            struct src_register src0,
556            struct src_register src1,
557            struct src_register src2)
558 {
559    SVGA3dShaderDestToken temp0;
560    SVGA3dShaderDestToken temp1;
561    bool need_temp0 = false;
562    bool need_temp1 = false;
563    SVGA3dShaderRegType type0, type1, type2;
564 
565    temp0.value = 0;
566    temp1.value = 0;
567    type0 = SVGA3dShaderGetRegType( src0.base.value );
568    type1 = SVGA3dShaderGetRegType( src1.base.value );
569    type2 = SVGA3dShaderGetRegType( src2.base.value );
570 
571    if (inst.op != SVGA3DOP_SINCOS) {
572       if (type0 == SVGA3DREG_CONST &&
573           ((type1 == SVGA3DREG_CONST && src0.base.num != src1.base.num) ||
574            (type2 == SVGA3DREG_CONST && src0.base.num != src2.base.num)))
575          need_temp0 = true;
576 
577       if (type1 == SVGA3DREG_CONST &&
578           (type2 == SVGA3DREG_CONST && src1.base.num != src2.base.num))
579          need_temp1 = true;
580    }
581 
582    if (type0 == SVGA3DREG_INPUT &&
583        ((type1 == SVGA3DREG_INPUT && src0.base.num != src1.base.num) ||
584         (type2 == SVGA3DREG_INPUT && src0.base.num != src2.base.num)))
585       need_temp0 = true;
586 
587    if (type1 == SVGA3DREG_INPUT &&
588        (type2 == SVGA3DREG_INPUT && src1.base.num != src2.base.num))
589       need_temp1 = true;
590 
591    if (need_temp0) {
592       temp0 = get_temp( emit );
593 
594       if (!emit_repl( emit, temp0, &src0 ))
595          return false;
596    }
597 
598    if (need_temp1) {
599       temp1 = get_temp( emit );
600 
601       if (!emit_repl( emit, temp1, &src1 ))
602          return false;
603    }
604 
605    if (!emit_op3( emit, inst, dest, src0, src1, src2 ))
606       return false;
607 
608    if (need_temp1)
609       release_temp( emit, temp1 );
610    if (need_temp0)
611       release_temp( emit, temp0 );
612    return true;
613 }
614 
615 
616 /**
617  * Submit/emit an instruction with four operands.
618  *
619  * SVGA shaders may not refer to >1 constant register in a single
620  * instruction.  This function checks for that usage and inserts a
621  * move to temporary if detected.
622  */
623 static bool
submit_op4(struct svga_shader_emitter * emit,SVGA3dShaderInstToken inst,SVGA3dShaderDestToken dest,struct src_register src0,struct src_register src1,struct src_register src2,struct src_register src3)624 submit_op4(struct svga_shader_emitter *emit,
625            SVGA3dShaderInstToken inst,
626            SVGA3dShaderDestToken dest,
627            struct src_register src0,
628            struct src_register src1,
629            struct src_register src2,
630            struct src_register src3)
631 {
632    SVGA3dShaderDestToken temp0;
633    SVGA3dShaderDestToken temp3;
634    bool need_temp0 = false;
635    bool need_temp3 = false;
636    SVGA3dShaderRegType type0, type1, type2, type3;
637 
638    temp0.value = 0;
639    temp3.value = 0;
640    type0 = SVGA3dShaderGetRegType( src0.base.value );
641    type1 = SVGA3dShaderGetRegType( src1.base.value );
642    type2 = SVGA3dShaderGetRegType( src2.base.value );
643    type3 = SVGA3dShaderGetRegType( src2.base.value );
644 
645    /* Make life a little easier - this is only used by the TXD
646     * instruction which is guaranteed not to have a constant/input reg
647     * in one slot at least:
648     */
649    assert(type1 == SVGA3DREG_SAMPLER);
650    (void) type1;
651 
652    if (type0 == SVGA3DREG_CONST &&
653        ((type3 == SVGA3DREG_CONST && src0.base.num != src3.base.num) ||
654         (type2 == SVGA3DREG_CONST && src0.base.num != src2.base.num)))
655       need_temp0 = true;
656 
657    if (type3 == SVGA3DREG_CONST &&
658        (type2 == SVGA3DREG_CONST && src3.base.num != src2.base.num))
659       need_temp3 = true;
660 
661    if (type0 == SVGA3DREG_INPUT &&
662        ((type3 == SVGA3DREG_INPUT && src0.base.num != src3.base.num) ||
663         (type2 == SVGA3DREG_INPUT && src0.base.num != src2.base.num)))
664       need_temp0 = true;
665 
666    if (type3 == SVGA3DREG_INPUT &&
667        (type2 == SVGA3DREG_INPUT && src3.base.num != src2.base.num))
668       need_temp3 = true;
669 
670    if (need_temp0) {
671       temp0 = get_temp( emit );
672 
673       if (!emit_repl( emit, temp0, &src0 ))
674          return false;
675    }
676 
677    if (need_temp3) {
678       temp3 = get_temp( emit );
679 
680       if (!emit_repl( emit, temp3, &src3 ))
681          return false;
682    }
683 
684    if (!emit_op4( emit, inst, dest, src0, src1, src2, src3 ))
685       return false;
686 
687    if (need_temp3)
688       release_temp( emit, temp3 );
689    if (need_temp0)
690       release_temp( emit, temp0 );
691    return true;
692 }
693 
694 
695 /**
696  * Do the src and dest registers refer to the same register?
697  */
698 static bool
alias_src_dst(struct src_register src,SVGA3dShaderDestToken dst)699 alias_src_dst(struct src_register src,
700               SVGA3dShaderDestToken dst)
701 {
702    if (src.base.num != dst.num)
703       return false;
704 
705    if (SVGA3dShaderGetRegType(dst.value) !=
706        SVGA3dShaderGetRegType(src.base.value))
707       return false;
708 
709    return true;
710 }
711 
712 
713 /**
714  * Helper for emitting SVGA immediate values using the SVGA3DOP_DEF[I]
715  * instructions.
716  */
717 static bool
emit_def_const(struct svga_shader_emitter * emit,SVGA3dShaderConstType type,unsigned idx,float a,float b,float c,float d)718 emit_def_const(struct svga_shader_emitter *emit,
719                SVGA3dShaderConstType type,
720                unsigned idx, float a, float b, float c, float d)
721 {
722    SVGA3DOpDefArgs def;
723    SVGA3dShaderInstToken opcode;
724 
725    switch (type) {
726    case SVGA3D_CONST_TYPE_FLOAT:
727       opcode = inst_token( SVGA3DOP_DEF );
728       def.dst = dst_register( SVGA3DREG_CONST, idx );
729       def.constValues[0] = a;
730       def.constValues[1] = b;
731       def.constValues[2] = c;
732       def.constValues[3] = d;
733       break;
734    case SVGA3D_CONST_TYPE_INT:
735       opcode = inst_token( SVGA3DOP_DEFI );
736       def.dst = dst_register( SVGA3DREG_CONSTINT, idx );
737       def.constIValues[0] = (int)a;
738       def.constIValues[1] = (int)b;
739       def.constIValues[2] = (int)c;
740       def.constIValues[3] = (int)d;
741       break;
742    default:
743       assert(0);
744       opcode = inst_token( SVGA3DOP_NOP );
745       break;
746    }
747 
748    if (!emit_instruction(emit, opcode) ||
749        !svga_shader_emit_dwords( emit, def.values, ARRAY_SIZE(def.values)))
750       return false;
751 
752    return true;
753 }
754 
755 
756 static bool
create_loop_const(struct svga_shader_emitter * emit)757 create_loop_const( struct svga_shader_emitter *emit )
758 {
759    unsigned idx = emit->nr_hw_int_const++;
760 
761    if (!emit_def_const( emit, SVGA3D_CONST_TYPE_INT, idx,
762                         255, /* iteration count */
763                         0, /* initial value */
764                         1, /* step size */
765                         0 /* not used, must be 0 */))
766       return false;
767 
768    emit->loop_const_idx = idx;
769    emit->created_loop_const = true;
770 
771    return true;
772 }
773 
774 static bool
create_arl_consts(struct svga_shader_emitter * emit)775 create_arl_consts( struct svga_shader_emitter *emit )
776 {
777    int i;
778 
779    for (i = 0; i < emit->num_arl_consts; i += 4) {
780       int j;
781       unsigned idx = emit->nr_hw_float_const++;
782       float vals[4];
783       for (j = 0; j < 4 && (j + i) < emit->num_arl_consts; ++j) {
784          vals[j] = (float) emit->arl_consts[i + j].number;
785          emit->arl_consts[i + j].idx = idx;
786          switch (j) {
787          case 0:
788             emit->arl_consts[i + 0].swizzle = TGSI_SWIZZLE_X;
789             break;
790          case 1:
791             emit->arl_consts[i + 0].swizzle = TGSI_SWIZZLE_Y;
792             break;
793          case 2:
794             emit->arl_consts[i + 0].swizzle = TGSI_SWIZZLE_Z;
795             break;
796          case 3:
797             emit->arl_consts[i + 0].swizzle = TGSI_SWIZZLE_W;
798             break;
799          }
800       }
801       while (j < 4)
802          vals[j++] = 0;
803 
804       if (!emit_def_const( emit, SVGA3D_CONST_TYPE_FLOAT, idx,
805                            vals[0], vals[1],
806                            vals[2], vals[3]))
807          return false;
808    }
809 
810    return true;
811 }
812 
813 
814 /**
815  * Return the register which holds the pixel shaders front/back-
816  * facing value.
817  */
818 static struct src_register
get_vface(struct svga_shader_emitter * emit)819 get_vface( struct svga_shader_emitter *emit )
820 {
821    assert(emit->emitted_vface);
822    return src_register(SVGA3DREG_MISCTYPE, SVGA3DMISCREG_FACE);
823 }
824 
825 
826 /**
827  * Create/emit a "common" constant with values {0, 0.5, -1, 1}.
828  * We can swizzle this to produce other useful constants such as
829  * {0, 0, 0, 0}, {1, 1, 1, 1}, etc.
830  */
831 static bool
create_common_immediate(struct svga_shader_emitter * emit)832 create_common_immediate( struct svga_shader_emitter *emit )
833 {
834    unsigned idx = emit->nr_hw_float_const++;
835 
836    /* Emit the constant (0, 0.5, -1, 1) and use swizzling to generate
837     * other useful vectors.
838     */
839    if (!emit_def_const( emit, SVGA3D_CONST_TYPE_FLOAT,
840                         idx, 0.0f, 0.5f, -1.0f, 1.0f ))
841       return false;
842    emit->common_immediate_idx[0] = idx;
843    idx++;
844 
845    /* Emit constant {2, 0, 0, 0} (only the 2 is used for now) */
846    if (emit->key.vs.adjust_attrib_range) {
847       if (!emit_def_const( emit, SVGA3D_CONST_TYPE_FLOAT,
848                            idx, 2.0f, 0.0f, 0.0f, 0.0f ))
849          return false;
850       emit->common_immediate_idx[1] = idx;
851    }
852    else {
853       emit->common_immediate_idx[1] = -1;
854    }
855 
856    emit->created_common_immediate = true;
857 
858    return true;
859 }
860 
861 
862 /**
863  * Return swizzle/position for the given value in the "common" immediate.
864  */
865 static inline unsigned
common_immediate_swizzle(float value)866 common_immediate_swizzle(float value)
867 {
868    if (value == 0.0f)
869       return TGSI_SWIZZLE_X;
870    else if (value == 0.5f)
871       return TGSI_SWIZZLE_Y;
872    else if (value == -1.0f)
873       return TGSI_SWIZZLE_Z;
874    else if (value == 1.0f)
875       return TGSI_SWIZZLE_W;
876    else {
877       assert(!"illegal value in common_immediate_swizzle");
878       return TGSI_SWIZZLE_X;
879    }
880 }
881 
882 
883 /**
884  * Returns an immediate reg where all the terms are either 0, 1, 2 or 0.5
885  */
886 static struct src_register
get_immediate(struct svga_shader_emitter * emit,float x,float y,float z,float w)887 get_immediate(struct svga_shader_emitter *emit,
888               float x, float y, float z, float w)
889 {
890    unsigned sx = common_immediate_swizzle(x);
891    unsigned sy = common_immediate_swizzle(y);
892    unsigned sz = common_immediate_swizzle(z);
893    unsigned sw = common_immediate_swizzle(w);
894    assert(emit->created_common_immediate);
895    assert(emit->common_immediate_idx[0] >= 0);
896    return swizzle(src_register(SVGA3DREG_CONST, emit->common_immediate_idx[0]),
897                   sx, sy, sz, sw);
898 }
899 
900 
901 /**
902  * returns {0, 0, 0, 0} immediate
903  */
904 static struct src_register
get_zero_immediate(struct svga_shader_emitter * emit)905 get_zero_immediate( struct svga_shader_emitter *emit )
906 {
907    assert(emit->created_common_immediate);
908    assert(emit->common_immediate_idx[0] >= 0);
909    return swizzle(src_register( SVGA3DREG_CONST,
910                                 emit->common_immediate_idx[0]),
911                   0, 0, 0, 0);
912 }
913 
914 
915 /**
916  * returns {1, 1, 1, 1} immediate
917  */
918 static struct src_register
get_one_immediate(struct svga_shader_emitter * emit)919 get_one_immediate( struct svga_shader_emitter *emit )
920 {
921    assert(emit->created_common_immediate);
922    assert(emit->common_immediate_idx[0] >= 0);
923    return swizzle(src_register( SVGA3DREG_CONST,
924                                 emit->common_immediate_idx[0]),
925                   3, 3, 3, 3);
926 }
927 
928 
929 /**
930  * returns {0.5, 0.5, 0.5, 0.5} immediate
931  */
932 static struct src_register
get_half_immediate(struct svga_shader_emitter * emit)933 get_half_immediate( struct svga_shader_emitter *emit )
934 {
935    assert(emit->created_common_immediate);
936    assert(emit->common_immediate_idx[0] >= 0);
937    return swizzle(src_register(SVGA3DREG_CONST, emit->common_immediate_idx[0]),
938                   1, 1, 1, 1);
939 }
940 
941 
942 /**
943  * returns {2, 2, 2, 2} immediate
944  */
945 static struct src_register
get_two_immediate(struct svga_shader_emitter * emit)946 get_two_immediate( struct svga_shader_emitter *emit )
947 {
948    /* Note we use the second common immediate here */
949    assert(emit->created_common_immediate);
950    assert(emit->common_immediate_idx[1] >= 0);
951    return swizzle(src_register( SVGA3DREG_CONST,
952                                 emit->common_immediate_idx[1]),
953                   0, 0, 0, 0);
954 }
955 
956 
957 /**
958  * returns the loop const
959  */
960 static struct src_register
get_loop_const(struct svga_shader_emitter * emit)961 get_loop_const( struct svga_shader_emitter *emit )
962 {
963    assert(emit->created_loop_const);
964    assert(emit->loop_const_idx >= 0);
965    return src_register( SVGA3DREG_CONSTINT,
966                         emit->loop_const_idx );
967 }
968 
969 
970 static struct src_register
get_fake_arl_const(struct svga_shader_emitter * emit)971 get_fake_arl_const( struct svga_shader_emitter *emit )
972 {
973    struct src_register reg;
974    int idx = 0, swizzle = 0, i;
975 
976    for (i = 0; i < emit->num_arl_consts; ++ i) {
977       if (emit->arl_consts[i].arl_num == emit->current_arl) {
978          idx = emit->arl_consts[i].idx;
979          swizzle = emit->arl_consts[i].swizzle;
980       }
981    }
982 
983    reg = src_register( SVGA3DREG_CONST, idx );
984    return scalar(reg, swizzle);
985 }
986 
987 
988 /**
989  * Return a register which holds the width and height of the texture
990  * currently bound to the given sampler.
991  */
992 static struct src_register
get_tex_dimensions(struct svga_shader_emitter * emit,int sampler_num)993 get_tex_dimensions( struct svga_shader_emitter *emit, int sampler_num )
994 {
995    int idx;
996    struct src_register reg;
997 
998    /* the width/height indexes start right after constants */
999    idx = emit->key.tex[sampler_num].width_height_idx +
1000          emit->info.file_max[TGSI_FILE_CONSTANT] + 1;
1001 
1002    reg = src_register( SVGA3DREG_CONST, idx );
1003    return reg;
1004 }
1005 
1006 
1007 static bool
emit_fake_arl(struct svga_shader_emitter * emit,const struct tgsi_full_instruction * insn)1008 emit_fake_arl(struct svga_shader_emitter *emit,
1009               const struct tgsi_full_instruction *insn)
1010 {
1011    const struct src_register src0 =
1012       translate_src_register(emit, &insn->Src[0] );
1013    struct src_register src1 = get_fake_arl_const( emit );
1014    SVGA3dShaderDestToken dst = translate_dst_register( emit, insn, 0 );
1015    SVGA3dShaderDestToken tmp = get_temp( emit );
1016 
1017    if (!submit_op1(emit, inst_token( SVGA3DOP_MOV ), tmp, src0))
1018       return false;
1019 
1020    if (!submit_op2( emit, inst_token( SVGA3DOP_ADD ), tmp, src( tmp ),
1021                     src1))
1022       return false;
1023 
1024    /* replicate the original swizzle */
1025    src1 = src(tmp);
1026    src1.base.swizzle = src0.base.swizzle;
1027 
1028    return submit_op1( emit, inst_token( SVGA3DOP_MOVA ),
1029                       dst, src1 );
1030 }
1031 
1032 
1033 static bool
emit_if(struct svga_shader_emitter * emit,const struct tgsi_full_instruction * insn)1034 emit_if(struct svga_shader_emitter *emit,
1035         const struct tgsi_full_instruction *insn)
1036 {
1037    struct src_register src0 =
1038       translate_src_register(emit, &insn->Src[0]);
1039    struct src_register zero = get_zero_immediate(emit);
1040    SVGA3dShaderInstToken if_token = inst_token( SVGA3DOP_IFC );
1041 
1042    if_token.control = SVGA3DOPCOMPC_NE;
1043 
1044    if (SVGA3dShaderGetRegType(src0.base.value) == SVGA3DREG_CONST) {
1045       /*
1046        * Max different constant registers readable per IFC instruction is 1.
1047        */
1048       SVGA3dShaderDestToken tmp = get_temp( emit );
1049 
1050       if (!submit_op1(emit, inst_token( SVGA3DOP_MOV ), tmp, src0))
1051          return false;
1052 
1053       src0 = scalar(src( tmp ), TGSI_SWIZZLE_X);
1054    }
1055 
1056    emit->dynamic_branching_level++;
1057 
1058    return (emit_instruction( emit, if_token ) &&
1059            emit_src( emit, src0 ) &&
1060            emit_src( emit, zero ) );
1061 }
1062 
1063 
1064 static bool
emit_else(struct svga_shader_emitter * emit,const struct tgsi_full_instruction * insn)1065 emit_else(struct svga_shader_emitter *emit,
1066           const struct tgsi_full_instruction *insn)
1067 {
1068    return emit_instruction(emit, inst_token(SVGA3DOP_ELSE));
1069 }
1070 
1071 
1072 static bool
emit_endif(struct svga_shader_emitter * emit,const struct tgsi_full_instruction * insn)1073 emit_endif(struct svga_shader_emitter *emit,
1074            const struct tgsi_full_instruction *insn)
1075 {
1076    emit->dynamic_branching_level--;
1077 
1078    return emit_instruction(emit, inst_token(SVGA3DOP_ENDIF));
1079 }
1080 
1081 
1082 /**
1083  * Translate the following TGSI FLR instruction.
1084  *    FLR  DST, SRC
1085  * To the following SVGA3D instruction sequence.
1086  *    FRC  TMP, SRC
1087  *    SUB  DST, SRC, TMP
1088  */
1089 static bool
emit_floor(struct svga_shader_emitter * emit,const struct tgsi_full_instruction * insn)1090 emit_floor(struct svga_shader_emitter *emit,
1091            const struct tgsi_full_instruction *insn )
1092 {
1093    SVGA3dShaderDestToken dst = translate_dst_register( emit, insn, 0 );
1094    const struct src_register src0 =
1095       translate_src_register(emit, &insn->Src[0] );
1096    SVGA3dShaderDestToken temp = get_temp( emit );
1097 
1098    /* FRC  TMP, SRC */
1099    if (!submit_op1( emit, inst_token( SVGA3DOP_FRC ), temp, src0 ))
1100       return false;
1101 
1102    /* SUB  DST, SRC, TMP */
1103    if (!submit_op2( emit, inst_token( SVGA3DOP_ADD ), dst, src0,
1104                     negate( src( temp ) ) ))
1105       return false;
1106 
1107    return true;
1108 }
1109 
1110 
1111 /**
1112  * Translate the following TGSI CEIL instruction.
1113  *    CEIL  DST, SRC
1114  * To the following SVGA3D instruction sequence.
1115  *    FRC  TMP, -SRC
1116  *    ADD  DST, SRC, TMP
1117  */
1118 static bool
emit_ceil(struct svga_shader_emitter * emit,const struct tgsi_full_instruction * insn)1119 emit_ceil(struct svga_shader_emitter *emit,
1120           const struct tgsi_full_instruction *insn)
1121 {
1122    SVGA3dShaderDestToken dst = translate_dst_register(emit, insn, 0);
1123    const struct src_register src0 =
1124       translate_src_register(emit, &insn->Src[0]);
1125    SVGA3dShaderDestToken temp = get_temp(emit);
1126 
1127    /* FRC  TMP, -SRC */
1128    if (!submit_op1(emit, inst_token(SVGA3DOP_FRC), temp, negate(src0)))
1129       return false;
1130 
1131    /* ADD DST, SRC, TMP */
1132    if (!submit_op2(emit, inst_token(SVGA3DOP_ADD), dst, src0, src(temp)))
1133       return false;
1134 
1135    return true;
1136 }
1137 
1138 
1139 /**
1140  * Translate the following TGSI DIV instruction.
1141  *    DIV  DST.xy, SRC0, SRC1
1142  * To the following SVGA3D instruction sequence.
1143  *    RCP  TMP.x, SRC1.xxxx
1144  *    RCP  TMP.y, SRC1.yyyy
1145  *    MUL  DST.xy, SRC0, TMP
1146  */
1147 static bool
emit_div(struct svga_shader_emitter * emit,const struct tgsi_full_instruction * insn)1148 emit_div(struct svga_shader_emitter *emit,
1149          const struct tgsi_full_instruction *insn )
1150 {
1151    SVGA3dShaderDestToken dst = translate_dst_register( emit, insn, 0 );
1152    const struct src_register src0 =
1153       translate_src_register(emit, &insn->Src[0] );
1154    const struct src_register src1 =
1155       translate_src_register(emit, &insn->Src[1] );
1156    SVGA3dShaderDestToken temp = get_temp( emit );
1157    unsigned i;
1158 
1159    /* For each enabled element, perform a RCP instruction.  Note that
1160     * RCP is scalar in SVGA3D:
1161     */
1162    for (i = 0; i < 4; i++) {
1163       unsigned channel = 1 << i;
1164       if (dst.mask & channel) {
1165          /* RCP  TMP.?, SRC1.???? */
1166          if (!submit_op1( emit, inst_token( SVGA3DOP_RCP ),
1167                           writemask(temp, channel),
1168                           scalar(src1, i) ))
1169             return false;
1170       }
1171    }
1172 
1173    /* Vector mul:
1174     * MUL  DST, SRC0, TMP
1175     */
1176    if (!submit_op2( emit, inst_token( SVGA3DOP_MUL ), dst, src0,
1177                     src( temp ) ))
1178       return false;
1179 
1180    return true;
1181 }
1182 
1183 
1184 /**
1185  * Translate the following TGSI DP2 instruction.
1186  *    DP2  DST, SRC1, SRC2
1187  * To the following SVGA3D instruction sequence.
1188  *    MUL  TMP, SRC1, SRC2
1189  *    ADD  DST, TMP.xxxx, TMP.yyyy
1190  */
1191 static bool
emit_dp2(struct svga_shader_emitter * emit,const struct tgsi_full_instruction * insn)1192 emit_dp2(struct svga_shader_emitter *emit,
1193          const struct tgsi_full_instruction *insn )
1194 {
1195    SVGA3dShaderDestToken dst = translate_dst_register( emit, insn, 0 );
1196    const struct src_register src0 =
1197       translate_src_register(emit, &insn->Src[0]);
1198    const struct src_register src1 =
1199       translate_src_register(emit, &insn->Src[1]);
1200    SVGA3dShaderDestToken temp = get_temp( emit );
1201    struct src_register temp_src0, temp_src1;
1202 
1203    /* MUL  TMP, SRC1, SRC2 */
1204    if (!submit_op2( emit, inst_token( SVGA3DOP_MUL ), temp, src0, src1 ))
1205       return false;
1206 
1207    temp_src0 = scalar(src( temp ), TGSI_SWIZZLE_X);
1208    temp_src1 = scalar(src( temp ), TGSI_SWIZZLE_Y);
1209 
1210    /* ADD  DST, TMP.xxxx, TMP.yyyy */
1211    if (!submit_op2( emit, inst_token( SVGA3DOP_ADD ), dst,
1212                     temp_src0, temp_src1 ))
1213       return false;
1214 
1215    return true;
1216 }
1217 
1218 
1219 /**
1220  * Sine / Cosine helper function.
1221  */
1222 static bool
do_emit_sincos(struct svga_shader_emitter * emit,SVGA3dShaderDestToken dst,struct src_register src0)1223 do_emit_sincos(struct svga_shader_emitter *emit,
1224                SVGA3dShaderDestToken dst,
1225                struct src_register src0)
1226 {
1227    src0 = scalar(src0, TGSI_SWIZZLE_X);
1228    return submit_op1(emit, inst_token(SVGA3DOP_SINCOS), dst, src0);
1229 }
1230 
1231 
1232 /**
1233  * Translate TGSI SIN instruction into:
1234  * SCS TMP SRC
1235  * MOV DST TMP.yyyy
1236  */
1237 static bool
emit_sin(struct svga_shader_emitter * emit,const struct tgsi_full_instruction * insn)1238 emit_sin(struct svga_shader_emitter *emit,
1239          const struct tgsi_full_instruction *insn )
1240 {
1241    SVGA3dShaderDestToken dst = translate_dst_register( emit, insn, 0 );
1242    struct src_register src0 =
1243       translate_src_register(emit, &insn->Src[0] );
1244    SVGA3dShaderDestToken temp = get_temp( emit );
1245 
1246    /* SCS TMP SRC */
1247    if (!do_emit_sincos(emit, writemask(temp, TGSI_WRITEMASK_Y), src0))
1248       return false;
1249 
1250    src0 = scalar(src( temp ), TGSI_SWIZZLE_Y);
1251 
1252    /* MOV DST TMP.yyyy */
1253    if (!submit_op1( emit, inst_token( SVGA3DOP_MOV ), dst, src0 ))
1254       return false;
1255 
1256    return true;
1257 }
1258 
1259 
1260 /*
1261  * Translate TGSI COS instruction into:
1262  * SCS TMP SRC
1263  * MOV DST TMP.xxxx
1264  */
1265 static bool
emit_cos(struct svga_shader_emitter * emit,const struct tgsi_full_instruction * insn)1266 emit_cos(struct svga_shader_emitter *emit,
1267          const struct tgsi_full_instruction *insn)
1268 {
1269    SVGA3dShaderDestToken dst = translate_dst_register( emit, insn, 0 );
1270    struct src_register src0 =
1271       translate_src_register(emit, &insn->Src[0] );
1272    SVGA3dShaderDestToken temp = get_temp( emit );
1273 
1274    /* SCS TMP SRC */
1275    if (!do_emit_sincos( emit, writemask(temp, TGSI_WRITEMASK_X), src0 ))
1276       return false;
1277 
1278    src0 = scalar(src( temp ), TGSI_SWIZZLE_X);
1279 
1280    /* MOV DST TMP.xxxx */
1281    if (!submit_op1( emit, inst_token( SVGA3DOP_MOV ), dst, src0 ))
1282       return false;
1283 
1284    return true;
1285 }
1286 
1287 
1288 /**
1289  * Translate/emit TGSI SSG (Set Sign: -1, 0, +1) instruction.
1290  */
1291 static bool
emit_ssg(struct svga_shader_emitter * emit,const struct tgsi_full_instruction * insn)1292 emit_ssg(struct svga_shader_emitter *emit,
1293          const struct tgsi_full_instruction *insn)
1294 {
1295    SVGA3dShaderDestToken dst = translate_dst_register( emit, insn, 0 );
1296    struct src_register src0 =
1297       translate_src_register(emit, &insn->Src[0] );
1298    SVGA3dShaderDestToken temp0 = get_temp( emit );
1299    SVGA3dShaderDestToken temp1 = get_temp( emit );
1300    struct src_register zero, one;
1301 
1302    if (emit->unit == PIPE_SHADER_VERTEX) {
1303       /* SGN  DST, SRC0, TMP0, TMP1 */
1304       return submit_op3( emit, inst_token( SVGA3DOP_SGN ), dst, src0,
1305                          src( temp0 ), src( temp1 ) );
1306    }
1307 
1308    one = get_one_immediate(emit);
1309    zero = get_zero_immediate(emit);
1310 
1311    /* CMP  TMP0, SRC0, one, zero */
1312    if (!submit_op3( emit, inst_token( SVGA3DOP_CMP ),
1313                     writemask( temp0, dst.mask ), src0, one, zero ))
1314       return false;
1315 
1316    /* CMP  TMP1, negate(SRC0), negate(one), zero */
1317    if (!submit_op3( emit, inst_token( SVGA3DOP_CMP ),
1318                     writemask( temp1, dst.mask ), negate( src0 ), negate( one ),
1319                     zero ))
1320       return false;
1321 
1322    /* ADD  DST, TMP0, TMP1 */
1323    return submit_op2( emit, inst_token( SVGA3DOP_ADD ), dst, src( temp0 ),
1324                       src( temp1 ) );
1325 }
1326 
1327 
1328 /**
1329  * Translate/emit the conditional discard instruction (discard if
1330  * any of X,Y,Z,W are negative).
1331  */
1332 static bool
emit_cond_discard(struct svga_shader_emitter * emit,const struct tgsi_full_instruction * insn)1333 emit_cond_discard(struct svga_shader_emitter *emit,
1334                   const struct tgsi_full_instruction *insn)
1335 {
1336    const struct tgsi_full_src_register *reg = &insn->Src[0];
1337    struct src_register src0, srcIn;
1338    const bool special = (reg->Register.Absolute ||
1339                          reg->Register.Negate ||
1340                          reg->Register.Indirect ||
1341                          reg->Register.SwizzleX != 0 ||
1342                          reg->Register.SwizzleY != 1 ||
1343                          reg->Register.SwizzleZ != 2 ||
1344                          reg->Register.File != TGSI_FILE_TEMPORARY);
1345    SVGA3dShaderDestToken temp;
1346 
1347    src0 = srcIn = translate_src_register( emit, reg );
1348 
1349    if (special) {
1350       /* need a temp reg */
1351       temp = get_temp( emit );
1352    }
1353 
1354    if (special) {
1355       /* move the source into a temp register */
1356       submit_op1(emit, inst_token(SVGA3DOP_MOV), temp, src0);
1357 
1358       src0 = src( temp );
1359    }
1360 
1361    /* Do the discard by checking if any of the XYZW components are < 0.
1362     * Note that ps_2_0 and later take XYZW in consideration, while ps_1_x
1363     * only used XYZ.  The MSDN documentation about this is incorrect.
1364     */
1365    if (!submit_op0( emit, inst_token( SVGA3DOP_TEXKILL ), dst(src0) ))
1366       return false;
1367 
1368    return true;
1369 }
1370 
1371 
1372 /**
1373  * Translate/emit the unconditional discard instruction (usually found inside
1374  * an IF/ELSE/ENDIF block).
1375  */
1376 static bool
emit_discard(struct svga_shader_emitter * emit,const struct tgsi_full_instruction * insn)1377 emit_discard(struct svga_shader_emitter *emit,
1378              const struct tgsi_full_instruction *insn)
1379 {
1380    SVGA3dShaderDestToken temp;
1381    struct src_register one = get_one_immediate(emit);
1382    SVGA3dShaderInstToken inst = inst_token( SVGA3DOP_TEXKILL );
1383 
1384    /* texkill doesn't allow negation on the operand so lets move
1385     * negation of {1} to a temp register */
1386    temp = get_temp( emit );
1387    if (!submit_op1( emit, inst_token( SVGA3DOP_MOV ), temp,
1388                     negate( one ) ))
1389       return false;
1390 
1391    return submit_op0( emit, inst, temp );
1392 }
1393 
1394 
1395 /**
1396  * Test if r1 and r2 are the same register.
1397  */
1398 static bool
same_register(struct src_register r1,struct src_register r2)1399 same_register(struct src_register r1, struct src_register r2)
1400 {
1401    return (r1.base.num == r2.base.num &&
1402            r1.base.type_upper == r2.base.type_upper &&
1403            r1.base.type_lower == r2.base.type_lower);
1404 }
1405 
1406 
1407 
1408 /**
1409  * Implement conditionals by initializing destination reg to 'fail',
1410  * then set predicate reg with UFOP_SETP, then move 'pass' to dest
1411  * based on predicate reg.
1412  *
1413  * SETP src0, cmp, src1  -- do this first to avoid aliasing problems.
1414  * MOV dst, fail
1415  * MOV dst, pass, p0
1416  */
1417 static bool
emit_conditional(struct svga_shader_emitter * emit,enum pipe_compare_func compare_func,SVGA3dShaderDestToken dst,struct src_register src0,struct src_register src1,struct src_register pass,struct src_register fail)1418 emit_conditional(struct svga_shader_emitter *emit,
1419                  enum pipe_compare_func compare_func,
1420                  SVGA3dShaderDestToken dst,
1421                  struct src_register src0,
1422                  struct src_register src1,
1423                  struct src_register pass,
1424                  struct src_register fail)
1425 {
1426    SVGA3dShaderDestToken pred_reg = dst_register( SVGA3DREG_PREDICATE, 0 );
1427    SVGA3dShaderInstToken setp_token;
1428 
1429    switch (compare_func) {
1430    case PIPE_FUNC_NEVER:
1431       return submit_op1( emit, inst_token( SVGA3DOP_MOV ),
1432                          dst, fail );
1433       break;
1434    case PIPE_FUNC_LESS:
1435       setp_token = inst_token_setp(SVGA3DOPCOMP_LT);
1436       break;
1437    case PIPE_FUNC_EQUAL:
1438       setp_token = inst_token_setp(SVGA3DOPCOMP_EQ);
1439       break;
1440    case PIPE_FUNC_LEQUAL:
1441       setp_token = inst_token_setp(SVGA3DOPCOMP_LE);
1442       break;
1443    case PIPE_FUNC_GREATER:
1444       setp_token = inst_token_setp(SVGA3DOPCOMP_GT);
1445       break;
1446    case PIPE_FUNC_NOTEQUAL:
1447       setp_token = inst_token_setp(SVGA3DOPCOMPC_NE);
1448       break;
1449    case PIPE_FUNC_GEQUAL:
1450       setp_token = inst_token_setp(SVGA3DOPCOMP_GE);
1451       break;
1452    case PIPE_FUNC_ALWAYS:
1453       return submit_op1( emit, inst_token( SVGA3DOP_MOV ),
1454                          dst, pass );
1455       break;
1456    }
1457 
1458    if (same_register(src(dst), pass)) {
1459       /* We'll get bad results if the dst and pass registers are the same
1460        * so use a temp register containing pass.
1461        */
1462       SVGA3dShaderDestToken temp = get_temp(emit);
1463       if (!submit_op1(emit, inst_token(SVGA3DOP_MOV), temp, pass))
1464          return false;
1465       pass = src(temp);
1466    }
1467 
1468    /* SETP src0, COMPOP, src1 */
1469    if (!submit_op2( emit, setp_token, pred_reg,
1470                     src0, src1 ))
1471       return false;
1472 
1473    /* MOV dst, fail */
1474    if (!submit_op1(emit, inst_token(SVGA3DOP_MOV), dst, fail))
1475       return false;
1476 
1477    /* MOV dst, pass (predicated)
1478     *
1479     * Note that the predicate reg (and possible modifiers) is passed
1480     * as the first source argument.
1481     */
1482    if (!submit_op2(emit,
1483                    inst_token_predicated(SVGA3DOP_MOV), dst,
1484                    src(pred_reg), pass))
1485       return false;
1486 
1487    return true;
1488 }
1489 
1490 
1491 /**
1492  * Helper for emiting 'selection' commands.  Basically:
1493  * if (src0 OP src1)
1494  *    dst = 1.0;
1495  * else
1496  *    dst = 0.0;
1497  */
1498 static bool
emit_select(struct svga_shader_emitter * emit,enum pipe_compare_func compare_func,SVGA3dShaderDestToken dst,struct src_register src0,struct src_register src1)1499 emit_select(struct svga_shader_emitter *emit,
1500             enum pipe_compare_func compare_func,
1501             SVGA3dShaderDestToken dst,
1502             struct src_register src0,
1503             struct src_register src1 )
1504 {
1505    /* There are some SVGA instructions which implement some selects
1506     * directly, but they are only available in the vertex shader.
1507     */
1508    if (emit->unit == PIPE_SHADER_VERTEX) {
1509       switch (compare_func) {
1510       case PIPE_FUNC_GEQUAL:
1511          return submit_op2( emit, inst_token( SVGA3DOP_SGE ), dst, src0, src1 );
1512       case PIPE_FUNC_LEQUAL:
1513          return submit_op2( emit, inst_token( SVGA3DOP_SGE ), dst, src1, src0 );
1514       case PIPE_FUNC_GREATER:
1515          return submit_op2( emit, inst_token( SVGA3DOP_SLT ), dst, src1, src0 );
1516       case PIPE_FUNC_LESS:
1517          return submit_op2( emit, inst_token( SVGA3DOP_SLT ), dst, src0, src1 );
1518       default:
1519          break;
1520       }
1521    }
1522 
1523    /* Otherwise, need to use the setp approach:
1524     */
1525    {
1526       struct src_register one, zero;
1527       /* zero immediate is 0,0,0,1 */
1528       zero = get_zero_immediate(emit);
1529       one = get_one_immediate(emit);
1530 
1531       return emit_conditional(emit, compare_func, dst, src0, src1, one, zero);
1532    }
1533 }
1534 
1535 
1536 /**
1537  * Translate/emit a TGSI SEQ, SNE, SLT, SGE, etc. instruction.
1538  */
1539 static bool
emit_select_op(struct svga_shader_emitter * emit,unsigned compare,const struct tgsi_full_instruction * insn)1540 emit_select_op(struct svga_shader_emitter *emit,
1541                unsigned compare,
1542                const struct tgsi_full_instruction *insn)
1543 {
1544    SVGA3dShaderDestToken dst = translate_dst_register( emit, insn, 0 );
1545    struct src_register src0 = translate_src_register(
1546       emit, &insn->Src[0] );
1547    struct src_register src1 = translate_src_register(
1548       emit, &insn->Src[1] );
1549 
1550    return emit_select( emit, compare, dst, src0, src1 );
1551 }
1552 
1553 
1554 /**
1555  * Translate TGSI CMP instruction.  Component-wise:
1556  * dst = (src0 < 0.0) ? src1 : src2
1557  */
1558 static bool
emit_cmp(struct svga_shader_emitter * emit,const struct tgsi_full_instruction * insn)1559 emit_cmp(struct svga_shader_emitter *emit,
1560          const struct tgsi_full_instruction *insn)
1561 {
1562    SVGA3dShaderDestToken dst = translate_dst_register( emit, insn, 0 );
1563    const struct src_register src0 =
1564       translate_src_register(emit, &insn->Src[0] );
1565    const struct src_register src1 =
1566       translate_src_register(emit, &insn->Src[1] );
1567    const struct src_register src2 =
1568       translate_src_register(emit, &insn->Src[2] );
1569 
1570    if (emit->unit == PIPE_SHADER_VERTEX) {
1571       struct src_register zero = get_zero_immediate(emit);
1572       /* We used to simulate CMP with SLT+LRP.  But that didn't work when
1573        * src1 or src2 was Inf/NaN.  In particular, GLSL sqrt(0) failed
1574        * because it involves a CMP to handle the 0 case.
1575        * Use a conditional expression instead.
1576        */
1577       return emit_conditional(emit, PIPE_FUNC_LESS, dst,
1578                               src0, zero, src1, src2);
1579    }
1580    else {
1581       assert(emit->unit == PIPE_SHADER_FRAGMENT);
1582 
1583       /* CMP  DST, SRC0, SRC2, SRC1 */
1584       return submit_op3( emit, inst_token( SVGA3DOP_CMP ), dst,
1585                          src0, src2, src1);
1586    }
1587 }
1588 
1589 
1590 /**
1591  * Translate/emit 2-operand (coord, sampler) texture instructions.
1592  */
1593 static bool
emit_tex2(struct svga_shader_emitter * emit,const struct tgsi_full_instruction * insn,SVGA3dShaderDestToken dst)1594 emit_tex2(struct svga_shader_emitter *emit,
1595           const struct tgsi_full_instruction *insn,
1596           SVGA3dShaderDestToken dst)
1597 {
1598    SVGA3dShaderInstToken inst;
1599    struct src_register texcoord;
1600    struct src_register sampler;
1601    SVGA3dShaderDestToken tmp;
1602 
1603    inst.value = 0;
1604 
1605    switch (insn->Instruction.Opcode) {
1606    case TGSI_OPCODE_TEX:
1607       inst.op = SVGA3DOP_TEX;
1608       break;
1609    case TGSI_OPCODE_TXP:
1610       inst.op = SVGA3DOP_TEX;
1611       inst.control = SVGA3DOPCONT_PROJECT;
1612       break;
1613    case TGSI_OPCODE_TXB:
1614       inst.op = SVGA3DOP_TEX;
1615       inst.control = SVGA3DOPCONT_BIAS;
1616       break;
1617    case TGSI_OPCODE_TXL:
1618       inst.op = SVGA3DOP_TEXLDL;
1619       break;
1620    default:
1621       assert(0);
1622       return false;
1623    }
1624 
1625    texcoord = translate_src_register( emit, &insn->Src[0] );
1626    sampler = translate_src_register( emit, &insn->Src[1] );
1627 
1628    if (emit->key.tex[sampler.base.num].unnormalized ||
1629        emit->dynamic_branching_level > 0)
1630       tmp = get_temp( emit );
1631 
1632    /* Can't do mipmapping inside dynamic branch constructs.  Force LOD
1633     * zero in that case.
1634     */
1635    if (emit->dynamic_branching_level > 0 &&
1636        inst.op == SVGA3DOP_TEX &&
1637        SVGA3dShaderGetRegType(texcoord.base.value) == SVGA3DREG_TEMP) {
1638       struct src_register zero = get_zero_immediate(emit);
1639 
1640       /* MOV  tmp, texcoord */
1641       if (!submit_op1( emit,
1642                        inst_token( SVGA3DOP_MOV ),
1643                        tmp,
1644                        texcoord ))
1645          return false;
1646 
1647       /* MOV  tmp.w, zero */
1648       if (!submit_op1( emit,
1649                        inst_token( SVGA3DOP_MOV ),
1650                        writemask( tmp, TGSI_WRITEMASK_W ),
1651                        zero ))
1652          return false;
1653 
1654       texcoord = src( tmp );
1655       inst.op = SVGA3DOP_TEXLDL;
1656    }
1657 
1658    /* Explicit normalization of texcoords:
1659     */
1660    if (emit->key.tex[sampler.base.num].unnormalized) {
1661       struct src_register wh = get_tex_dimensions( emit, sampler.base.num );
1662 
1663       /* MUL  tmp, SRC0, WH */
1664       if (!submit_op2( emit, inst_token( SVGA3DOP_MUL ),
1665                        tmp, texcoord, wh ))
1666          return false;
1667 
1668       texcoord = src( tmp );
1669    }
1670 
1671    return submit_op2( emit, inst, dst, texcoord, sampler );
1672 }
1673 
1674 
1675 /**
1676  * Translate/emit 4-operand (coord, ddx, ddy, sampler) texture instructions.
1677  */
1678 static bool
emit_tex4(struct svga_shader_emitter * emit,const struct tgsi_full_instruction * insn,SVGA3dShaderDestToken dst)1679 emit_tex4(struct svga_shader_emitter *emit,
1680           const struct tgsi_full_instruction *insn,
1681           SVGA3dShaderDestToken dst )
1682 {
1683    SVGA3dShaderInstToken inst;
1684    struct src_register texcoord;
1685    struct src_register ddx;
1686    struct src_register ddy;
1687    struct src_register sampler;
1688 
1689    texcoord = translate_src_register( emit, &insn->Src[0] );
1690    ddx      = translate_src_register( emit, &insn->Src[1] );
1691    ddy      = translate_src_register( emit, &insn->Src[2] );
1692    sampler  = translate_src_register( emit, &insn->Src[3] );
1693 
1694    inst.value = 0;
1695 
1696    switch (insn->Instruction.Opcode) {
1697    case TGSI_OPCODE_TXD:
1698       inst.op = SVGA3DOP_TEXLDD; /* 4 args! */
1699       break;
1700    default:
1701       assert(0);
1702       return false;
1703    }
1704 
1705    return submit_op4( emit, inst, dst, texcoord, sampler, ddx, ddy );
1706 }
1707 
1708 
1709 /**
1710  * Emit texture swizzle code.  We do this here since SVGA samplers don't
1711  * directly support swizzles.
1712  */
1713 static bool
emit_tex_swizzle(struct svga_shader_emitter * emit,SVGA3dShaderDestToken dst,struct src_register src,unsigned swizzle_x,unsigned swizzle_y,unsigned swizzle_z,unsigned swizzle_w)1714 emit_tex_swizzle(struct svga_shader_emitter *emit,
1715                  SVGA3dShaderDestToken dst,
1716                  struct src_register src,
1717                  unsigned swizzle_x,
1718                  unsigned swizzle_y,
1719                  unsigned swizzle_z,
1720                  unsigned swizzle_w)
1721 {
1722    const unsigned swizzleIn[4] = {swizzle_x, swizzle_y, swizzle_z, swizzle_w};
1723    unsigned srcSwizzle[4];
1724    unsigned srcWritemask = 0x0, zeroWritemask = 0x0, oneWritemask = 0x0;
1725    unsigned i;
1726 
1727    /* build writemasks and srcSwizzle terms */
1728    for (i = 0; i < 4; i++) {
1729       if (swizzleIn[i] == PIPE_SWIZZLE_0) {
1730          srcSwizzle[i] = TGSI_SWIZZLE_X + i;
1731          zeroWritemask |= (1 << i);
1732       }
1733       else if (swizzleIn[i] == PIPE_SWIZZLE_1) {
1734          srcSwizzle[i] = TGSI_SWIZZLE_X + i;
1735          oneWritemask |= (1 << i);
1736       }
1737       else {
1738          srcSwizzle[i] = swizzleIn[i];
1739          srcWritemask |= (1 << i);
1740       }
1741    }
1742 
1743    /* write x/y/z/w comps */
1744    if (dst.mask & srcWritemask) {
1745       if (!submit_op1(emit,
1746                       inst_token(SVGA3DOP_MOV),
1747                       writemask(dst, srcWritemask),
1748                       swizzle(src,
1749                               srcSwizzle[0],
1750                               srcSwizzle[1],
1751                               srcSwizzle[2],
1752                               srcSwizzle[3])))
1753          return false;
1754    }
1755 
1756    /* write 0 comps */
1757    if (dst.mask & zeroWritemask) {
1758       if (!submit_op1(emit,
1759                       inst_token(SVGA3DOP_MOV),
1760                       writemask(dst, zeroWritemask),
1761                       get_zero_immediate(emit)))
1762          return false;
1763    }
1764 
1765    /* write 1 comps */
1766    if (dst.mask & oneWritemask) {
1767       if (!submit_op1(emit,
1768                       inst_token(SVGA3DOP_MOV),
1769                       writemask(dst, oneWritemask),
1770                       get_one_immediate(emit)))
1771          return false;
1772    }
1773 
1774    return true;
1775 }
1776 
1777 
1778 /**
1779  * Translate/emit a TGSI texture sample instruction.
1780  */
1781 static bool
emit_tex(struct svga_shader_emitter * emit,const struct tgsi_full_instruction * insn)1782 emit_tex(struct svga_shader_emitter *emit,
1783          const struct tgsi_full_instruction *insn)
1784 {
1785    SVGA3dShaderDestToken dst =
1786       translate_dst_register( emit, insn, 0 );
1787    struct src_register src0 =
1788       translate_src_register( emit, &insn->Src[0] );
1789    struct src_register src1 =
1790       translate_src_register( emit, &insn->Src[1] );
1791 
1792    SVGA3dShaderDestToken tex_result;
1793    const unsigned unit = src1.base.num;
1794 
1795    /* check for shadow samplers */
1796    bool compare = (emit->key.tex[unit].compare_mode ==
1797                    PIPE_TEX_COMPARE_R_TO_TEXTURE);
1798 
1799    /* texture swizzle */
1800    bool swizzle = (emit->key.tex[unit].swizzle_r != PIPE_SWIZZLE_X ||
1801                    emit->key.tex[unit].swizzle_g != PIPE_SWIZZLE_Y ||
1802                    emit->key.tex[unit].swizzle_b != PIPE_SWIZZLE_Z ||
1803                    emit->key.tex[unit].swizzle_a != PIPE_SWIZZLE_W);
1804 
1805    bool saturate = insn->Instruction.Saturate;
1806 
1807    /* If doing compare processing or tex swizzle or saturation, we need to put
1808     * the fetched color into a temporary so it can be used as a source later on.
1809     */
1810    if (compare || swizzle || saturate) {
1811       tex_result = get_temp( emit );
1812    }
1813    else {
1814       tex_result = dst;
1815    }
1816 
1817    switch(insn->Instruction.Opcode) {
1818    case TGSI_OPCODE_TEX:
1819    case TGSI_OPCODE_TXB:
1820    case TGSI_OPCODE_TXP:
1821    case TGSI_OPCODE_TXL:
1822       if (!emit_tex2( emit, insn, tex_result ))
1823          return false;
1824       break;
1825    case TGSI_OPCODE_TXD:
1826       if (!emit_tex4( emit, insn, tex_result ))
1827          return false;
1828       break;
1829    default:
1830       assert(0);
1831    }
1832 
1833    if (compare) {
1834       SVGA3dShaderDestToken dst2;
1835 
1836       if (swizzle || saturate)
1837          dst2 = tex_result;
1838       else
1839          dst2 = dst;
1840 
1841       if (dst.mask & TGSI_WRITEMASK_XYZ) {
1842          SVGA3dShaderDestToken src0_zdivw = get_temp( emit );
1843          /* When sampling a depth texture, the result of the comparison is in
1844           * the Y component.
1845           */
1846          struct src_register tex_src_x = scalar(src(tex_result), TGSI_SWIZZLE_Y);
1847          struct src_register r_coord;
1848 
1849          if (insn->Instruction.Opcode == TGSI_OPCODE_TXP) {
1850             /* Divide texcoord R by Q */
1851             if (!submit_op1( emit, inst_token( SVGA3DOP_RCP ),
1852                              writemask(src0_zdivw, TGSI_WRITEMASK_X),
1853                              scalar(src0, TGSI_SWIZZLE_W) ))
1854                return false;
1855 
1856             if (!submit_op2( emit, inst_token( SVGA3DOP_MUL ),
1857                              writemask(src0_zdivw, TGSI_WRITEMASK_X),
1858                              scalar(src0, TGSI_SWIZZLE_Z),
1859                              scalar(src(src0_zdivw), TGSI_SWIZZLE_X) ))
1860                return false;
1861 
1862             r_coord = scalar(src(src0_zdivw), TGSI_SWIZZLE_X);
1863          }
1864          else {
1865             r_coord = scalar(src0, TGSI_SWIZZLE_Z);
1866          }
1867 
1868          /* Compare texture sample value against R component of texcoord */
1869          if (!emit_select(emit,
1870                           emit->key.tex[unit].compare_func,
1871                           writemask( dst2, TGSI_WRITEMASK_XYZ ),
1872                           r_coord,
1873                           tex_src_x))
1874             return false;
1875       }
1876 
1877       if (dst.mask & TGSI_WRITEMASK_W) {
1878          struct src_register one = get_one_immediate(emit);
1879 
1880         if (!submit_op1( emit, inst_token( SVGA3DOP_MOV ),
1881                          writemask( dst2, TGSI_WRITEMASK_W ),
1882                          one ))
1883            return false;
1884       }
1885    }
1886 
1887    if (saturate && !swizzle) {
1888       /* MOV_SAT real_dst, dst */
1889       if (!submit_op1( emit, inst_token( SVGA3DOP_MOV ), dst, src(tex_result) ))
1890          return false;
1891    }
1892    else if (swizzle) {
1893       /* swizzle from tex_result to dst (handles saturation too, if any) */
1894       emit_tex_swizzle(emit,
1895                        dst, src(tex_result),
1896                        emit->key.tex[unit].swizzle_r,
1897                        emit->key.tex[unit].swizzle_g,
1898                        emit->key.tex[unit].swizzle_b,
1899                        emit->key.tex[unit].swizzle_a);
1900    }
1901 
1902    return true;
1903 }
1904 
1905 
1906 static bool
emit_bgnloop(struct svga_shader_emitter * emit,const struct tgsi_full_instruction * insn)1907 emit_bgnloop(struct svga_shader_emitter *emit,
1908              const struct tgsi_full_instruction *insn)
1909 {
1910    SVGA3dShaderInstToken inst = inst_token( SVGA3DOP_LOOP );
1911    struct src_register loop_reg = src_register( SVGA3DREG_LOOP, 0 );
1912    struct src_register const_int = get_loop_const( emit );
1913 
1914    emit->dynamic_branching_level++;
1915 
1916    return (emit_instruction( emit, inst ) &&
1917            emit_src( emit, loop_reg ) &&
1918            emit_src( emit, const_int ) );
1919 }
1920 
1921 
1922 static bool
emit_endloop(struct svga_shader_emitter * emit,const struct tgsi_full_instruction * insn)1923 emit_endloop(struct svga_shader_emitter *emit,
1924              const struct tgsi_full_instruction *insn)
1925 {
1926    SVGA3dShaderInstToken inst = inst_token( SVGA3DOP_ENDLOOP );
1927 
1928    emit->dynamic_branching_level--;
1929 
1930    return emit_instruction( emit, inst );
1931 }
1932 
1933 
1934 /**
1935  * Translate/emit TGSI BREAK (out of loop) instruction.
1936  */
1937 static bool
emit_brk(struct svga_shader_emitter * emit,const struct tgsi_full_instruction * insn)1938 emit_brk(struct svga_shader_emitter *emit,
1939          const struct tgsi_full_instruction *insn)
1940 {
1941    SVGA3dShaderInstToken inst = inst_token( SVGA3DOP_BREAK );
1942    return emit_instruction( emit, inst );
1943 }
1944 
1945 
1946 /**
1947  * Emit simple instruction which operates on one scalar value (not
1948  * a vector).  Ex: LG2, RCP, RSQ.
1949  */
1950 static bool
emit_scalar_op1(struct svga_shader_emitter * emit,SVGA3dShaderOpCodeType opcode,const struct tgsi_full_instruction * insn)1951 emit_scalar_op1(struct svga_shader_emitter *emit,
1952                 SVGA3dShaderOpCodeType opcode,
1953                 const struct tgsi_full_instruction *insn)
1954 {
1955    SVGA3dShaderInstToken inst;
1956    SVGA3dShaderDestToken dst;
1957    struct src_register src;
1958 
1959    inst = inst_token( opcode );
1960    dst = translate_dst_register( emit, insn, 0 );
1961    src = translate_src_register( emit, &insn->Src[0] );
1962    src = scalar( src, TGSI_SWIZZLE_X );
1963 
1964    return submit_op1( emit, inst, dst, src );
1965 }
1966 
1967 
1968 /**
1969  * Translate/emit a simple instruction (one which has no special-case
1970  * code) such as ADD, MUL, MIN, MAX.
1971  */
1972 static bool
emit_simple_instruction(struct svga_shader_emitter * emit,SVGA3dShaderOpCodeType opcode,const struct tgsi_full_instruction * insn)1973 emit_simple_instruction(struct svga_shader_emitter *emit,
1974                         SVGA3dShaderOpCodeType opcode,
1975                         const struct tgsi_full_instruction *insn)
1976 {
1977    const struct tgsi_full_src_register *src = insn->Src;
1978    SVGA3dShaderInstToken inst;
1979    SVGA3dShaderDestToken dst;
1980 
1981    inst = inst_token( opcode );
1982    dst = translate_dst_register( emit, insn, 0 );
1983 
1984    switch (insn->Instruction.NumSrcRegs) {
1985    case 0:
1986       return submit_op0( emit, inst, dst );
1987    case 1:
1988       return submit_op1( emit, inst, dst,
1989                          translate_src_register( emit, &src[0] ));
1990    case 2:
1991       return submit_op2( emit, inst, dst,
1992                          translate_src_register( emit, &src[0] ),
1993                          translate_src_register( emit, &src[1] ) );
1994    case 3:
1995       return submit_op3( emit, inst, dst,
1996                          translate_src_register( emit, &src[0] ),
1997                          translate_src_register( emit, &src[1] ),
1998                          translate_src_register( emit, &src[2] ) );
1999    default:
2000       assert(0);
2001       return false;
2002    }
2003 }
2004 
2005 
2006 /**
2007  * TGSI_OPCODE_MOVE is only special-cased here to detect the
2008  * svga_fragment_shader::constant_color_output case.
2009  */
2010 static bool
emit_mov(struct svga_shader_emitter * emit,const struct tgsi_full_instruction * insn)2011 emit_mov(struct svga_shader_emitter *emit,
2012          const struct tgsi_full_instruction *insn)
2013 {
2014    const struct tgsi_full_src_register *src = &insn->Src[0];
2015    const struct tgsi_full_dst_register *dst = &insn->Dst[0];
2016 
2017    if (emit->unit == PIPE_SHADER_FRAGMENT &&
2018        dst->Register.File == TGSI_FILE_OUTPUT &&
2019        dst->Register.Index == 0 &&
2020        src->Register.File == TGSI_FILE_CONSTANT &&
2021        !src->Register.Indirect) {
2022       emit->constant_color_output = true;
2023    }
2024 
2025    return emit_simple_instruction(emit, SVGA3DOP_MOV, insn);
2026 }
2027 
2028 
2029 /**
2030  * Translate TGSI SQRT instruction
2031  * if src1 == 0
2032  *    mov dst, src1
2033  * else
2034  *    rsq temp, src1
2035  *    rcp dst, temp
2036  * endif
2037  */
2038 static bool
emit_sqrt(struct svga_shader_emitter * emit,const struct tgsi_full_instruction * insn)2039 emit_sqrt(struct svga_shader_emitter *emit,
2040          const struct tgsi_full_instruction *insn)
2041 {
2042    const struct src_register src1 = translate_src_register(emit, &insn->Src[0]);
2043    const struct src_register zero = get_zero_immediate(emit);
2044    SVGA3dShaderDestToken dst = translate_dst_register(emit, insn, 0);
2045    SVGA3dShaderDestToken temp = get_temp(emit);
2046    SVGA3dShaderInstToken if_token = inst_token(SVGA3DOP_IFC);
2047    bool ret = true;
2048 
2049    if_token.control = SVGA3DOPCOMP_EQ;
2050 
2051    if (!(emit_instruction(emit, if_token) &&
2052          emit_src(emit, src1) &&
2053          emit_src(emit, zero))) {
2054       ret = false;
2055       goto cleanup;
2056    }
2057 
2058    if (!submit_op1(emit,
2059               inst_token(SVGA3DOP_MOV),
2060               dst, src1)) {
2061       ret = false;
2062       goto cleanup;
2063    }
2064 
2065    if (!emit_instruction(emit, inst_token(SVGA3DOP_ELSE))) {
2066       ret = false;
2067       goto cleanup;
2068    }
2069 
2070    if (!submit_op1(emit,
2071               inst_token(SVGA3DOP_RSQ),
2072               temp, src1)) {
2073       ret = false;
2074       goto cleanup;
2075    }
2076 
2077    if (!submit_op1(emit,
2078               inst_token(SVGA3DOP_RCP),
2079               dst, src(temp))) {
2080       ret = false;
2081       goto cleanup;
2082    }
2083 
2084    if (!emit_instruction(emit, inst_token(SVGA3DOP_ENDIF))) {
2085       ret = false;
2086       goto cleanup;
2087    }
2088 
2089 cleanup:
2090    release_temp(emit, temp);
2091 
2092    return ret;
2093 }
2094 
2095 
2096 /**
2097  * Translate/emit TGSI DDX, DDY instructions.
2098  */
2099 static bool
emit_deriv(struct svga_shader_emitter * emit,const struct tgsi_full_instruction * insn)2100 emit_deriv(struct svga_shader_emitter *emit,
2101            const struct tgsi_full_instruction *insn )
2102 {
2103    if (emit->dynamic_branching_level > 0 &&
2104        insn->Src[0].Register.File == TGSI_FILE_TEMPORARY)
2105    {
2106       SVGA3dShaderDestToken dst =
2107          translate_dst_register( emit, insn, 0 );
2108 
2109       /* Deriv opcodes not valid inside dynamic branching, workaround
2110        * by zeroing out the destination.
2111        */
2112       if (!submit_op1(emit,
2113                       inst_token( SVGA3DOP_MOV ),
2114                       dst,
2115                       get_zero_immediate(emit)))
2116          return false;
2117 
2118       return true;
2119    }
2120    else {
2121       SVGA3dShaderOpCodeType opcode;
2122       const struct tgsi_full_src_register *reg = &insn->Src[0];
2123       SVGA3dShaderInstToken inst;
2124       SVGA3dShaderDestToken dst;
2125       struct src_register src0;
2126 
2127       switch (insn->Instruction.Opcode) {
2128       case TGSI_OPCODE_DDX:
2129          opcode = SVGA3DOP_DSX;
2130          break;
2131       case TGSI_OPCODE_DDY:
2132          opcode = SVGA3DOP_DSY;
2133          break;
2134       default:
2135          return false;
2136       }
2137 
2138       inst = inst_token( opcode );
2139       dst = translate_dst_register( emit, insn, 0 );
2140       src0 = translate_src_register( emit, reg );
2141 
2142       /* We cannot use negate or abs on source to dsx/dsy instruction.
2143        */
2144       if (reg->Register.Absolute ||
2145           reg->Register.Negate) {
2146          SVGA3dShaderDestToken temp = get_temp( emit );
2147 
2148          if (!emit_repl( emit, temp, &src0 ))
2149             return false;
2150       }
2151 
2152       return submit_op1( emit, inst, dst, src0 );
2153    }
2154 }
2155 
2156 
2157 /**
2158  * Translate/emit ARL (Address Register Load) instruction.  Used to
2159  * move a value into the special 'address' register.  Used to implement
2160  * indirect/variable indexing into arrays.
2161  */
2162 static bool
emit_arl(struct svga_shader_emitter * emit,const struct tgsi_full_instruction * insn)2163 emit_arl(struct svga_shader_emitter *emit,
2164          const struct tgsi_full_instruction *insn)
2165 {
2166    ++emit->current_arl;
2167    if (emit->unit == PIPE_SHADER_FRAGMENT) {
2168       /* MOVA not present in pixel shader instruction set.
2169        * Ignore this instruction altogether since it is
2170        * only used for loop counters -- and for that
2171        * we reference aL directly.
2172        */
2173       return true;
2174    }
2175    if (svga_arl_needs_adjustment( emit )) {
2176       return emit_fake_arl( emit, insn );
2177    } else {
2178       /* no need to adjust, just emit straight arl */
2179       return emit_simple_instruction(emit, SVGA3DOP_MOVA, insn);
2180    }
2181 }
2182 
2183 
2184 static bool
emit_pow(struct svga_shader_emitter * emit,const struct tgsi_full_instruction * insn)2185 emit_pow(struct svga_shader_emitter *emit,
2186          const struct tgsi_full_instruction *insn)
2187 {
2188    SVGA3dShaderDestToken dst = translate_dst_register( emit, insn, 0 );
2189    struct src_register src0 = translate_src_register(
2190       emit, &insn->Src[0] );
2191    struct src_register src1 = translate_src_register(
2192       emit, &insn->Src[1] );
2193    bool need_tmp = false;
2194 
2195    /* POW can only output to a temporary */
2196    if (insn->Dst[0].Register.File != TGSI_FILE_TEMPORARY)
2197       need_tmp = true;
2198 
2199    /* POW src1 must not be the same register as dst */
2200    if (alias_src_dst( src1, dst ))
2201       need_tmp = true;
2202 
2203    /* it's a scalar op */
2204    src0 = scalar( src0, TGSI_SWIZZLE_X );
2205    src1 = scalar( src1, TGSI_SWIZZLE_X );
2206 
2207    if (need_tmp) {
2208       SVGA3dShaderDestToken tmp =
2209          writemask(get_temp( emit ), TGSI_WRITEMASK_X );
2210 
2211       if (!submit_op2(emit, inst_token( SVGA3DOP_POW ), tmp, src0, src1))
2212          return false;
2213 
2214       return submit_op1(emit, inst_token( SVGA3DOP_MOV ),
2215                         dst, scalar(src(tmp), 0) );
2216    }
2217    else {
2218       return submit_op2(emit, inst_token( SVGA3DOP_POW ), dst, src0, src1);
2219    }
2220 }
2221 
2222 
2223 /**
2224  * Emit a LRP (linear interpolation) instruction.
2225  */
2226 static bool
submit_lrp(struct svga_shader_emitter * emit,SVGA3dShaderDestToken dst,struct src_register src0,struct src_register src1,struct src_register src2)2227 submit_lrp(struct svga_shader_emitter *emit,
2228            SVGA3dShaderDestToken dst,
2229            struct src_register src0,
2230            struct src_register src1,
2231            struct src_register src2)
2232 {
2233    SVGA3dShaderDestToken tmp;
2234    bool need_dst_tmp = false;
2235 
2236    /* The dst reg must be a temporary, and not be the same as src0 or src2 */
2237    if (SVGA3dShaderGetRegType(dst.value) != SVGA3DREG_TEMP ||
2238        alias_src_dst(src0, dst) ||
2239        alias_src_dst(src2, dst))
2240       need_dst_tmp = true;
2241 
2242    if (need_dst_tmp) {
2243       tmp = get_temp( emit );
2244       tmp.mask = dst.mask;
2245    }
2246    else {
2247       tmp = dst;
2248    }
2249 
2250    if (!submit_op3(emit, inst_token( SVGA3DOP_LRP ), tmp, src0, src1, src2))
2251       return false;
2252 
2253    if (need_dst_tmp) {
2254       if (!submit_op1(emit, inst_token( SVGA3DOP_MOV ), dst, src( tmp )))
2255          return false;
2256    }
2257 
2258    return true;
2259 }
2260 
2261 
2262 /**
2263  * Translate/emit LRP (Linear Interpolation) instruction.
2264  */
2265 static bool
emit_lrp(struct svga_shader_emitter * emit,const struct tgsi_full_instruction * insn)2266 emit_lrp(struct svga_shader_emitter *emit,
2267          const struct tgsi_full_instruction *insn)
2268 {
2269    SVGA3dShaderDestToken dst = translate_dst_register( emit, insn, 0 );
2270    const struct src_register src0 = translate_src_register(
2271       emit, &insn->Src[0] );
2272    const struct src_register src1 = translate_src_register(
2273       emit, &insn->Src[1] );
2274    const struct src_register src2 = translate_src_register(
2275       emit, &insn->Src[2] );
2276 
2277    return submit_lrp(emit, dst, src0, src1, src2);
2278 }
2279 
2280 /**
2281  * Translate/emit DST (Distance function) instruction.
2282  */
2283 static bool
emit_dst_insn(struct svga_shader_emitter * emit,const struct tgsi_full_instruction * insn)2284 emit_dst_insn(struct svga_shader_emitter *emit,
2285               const struct tgsi_full_instruction *insn)
2286 {
2287    if (emit->unit == PIPE_SHADER_VERTEX) {
2288       /* SVGA/DX9 has a DST instruction, but only for vertex shaders:
2289        */
2290       return emit_simple_instruction(emit, SVGA3DOP_DST, insn);
2291    }
2292    else {
2293       /* result[0] = 1    * 1;
2294        * result[1] = a[1] * b[1];
2295        * result[2] = a[2] * 1;
2296        * result[3] = 1    * b[3];
2297        */
2298       SVGA3dShaderDestToken dst = translate_dst_register( emit, insn, 0 );
2299       SVGA3dShaderDestToken tmp;
2300       const struct src_register src0 = translate_src_register(
2301          emit, &insn->Src[0] );
2302       const struct src_register src1 = translate_src_register(
2303          emit, &insn->Src[1] );
2304       bool need_tmp = false;
2305 
2306       if (SVGA3dShaderGetRegType(dst.value) != SVGA3DREG_TEMP ||
2307           alias_src_dst(src0, dst) ||
2308           alias_src_dst(src1, dst))
2309          need_tmp = true;
2310 
2311       if (need_tmp) {
2312          tmp = get_temp( emit );
2313       }
2314       else {
2315          tmp = dst;
2316       }
2317 
2318       /* tmp.xw = 1.0
2319        */
2320       if (tmp.mask & TGSI_WRITEMASK_XW) {
2321          if (!submit_op1( emit, inst_token( SVGA3DOP_MOV ),
2322                           writemask(tmp, TGSI_WRITEMASK_XW ),
2323                           get_one_immediate(emit)))
2324             return false;
2325       }
2326 
2327       /* tmp.yz = src0
2328        */
2329       if (tmp.mask & TGSI_WRITEMASK_YZ) {
2330          if (!submit_op1( emit, inst_token( SVGA3DOP_MOV ),
2331                           writemask(tmp, TGSI_WRITEMASK_YZ ),
2332                           src0))
2333             return false;
2334       }
2335 
2336       /* tmp.yw = tmp * src1
2337        */
2338       if (tmp.mask & TGSI_WRITEMASK_YW) {
2339          if (!submit_op2( emit, inst_token( SVGA3DOP_MUL ),
2340                           writemask(tmp, TGSI_WRITEMASK_YW ),
2341                           src(tmp),
2342                           src1))
2343             return false;
2344       }
2345 
2346       /* dst = tmp
2347        */
2348       if (need_tmp) {
2349          if (!submit_op1( emit, inst_token( SVGA3DOP_MOV ),
2350                           dst,
2351                           src(tmp)))
2352             return false;
2353       }
2354    }
2355 
2356    return true;
2357 }
2358 
2359 
2360 static bool
emit_exp(struct svga_shader_emitter * emit,const struct tgsi_full_instruction * insn)2361 emit_exp(struct svga_shader_emitter *emit,
2362          const struct tgsi_full_instruction *insn)
2363 {
2364    SVGA3dShaderDestToken dst = translate_dst_register( emit, insn, 0 );
2365    struct src_register src0 =
2366       translate_src_register( emit, &insn->Src[0] );
2367    SVGA3dShaderDestToken fraction;
2368 
2369    if (dst.mask & TGSI_WRITEMASK_Y)
2370       fraction = dst;
2371    else if (dst.mask & TGSI_WRITEMASK_X)
2372       fraction = get_temp( emit );
2373    else
2374       fraction.value = 0;
2375 
2376    /* If y is being written, fill it with src0 - floor(src0).
2377     */
2378    if (dst.mask & TGSI_WRITEMASK_XY) {
2379       if (!submit_op1( emit, inst_token( SVGA3DOP_FRC ),
2380                        writemask( fraction, TGSI_WRITEMASK_Y ),
2381                        src0 ))
2382          return false;
2383    }
2384 
2385    /* If x is being written, fill it with 2 ^ floor(src0).
2386     */
2387    if (dst.mask & TGSI_WRITEMASK_X) {
2388       if (!submit_op2( emit, inst_token( SVGA3DOP_ADD ),
2389                        writemask( dst, TGSI_WRITEMASK_X ),
2390                        src0,
2391                        scalar( negate( src( fraction ) ), TGSI_SWIZZLE_Y ) ) )
2392          return false;
2393 
2394       if (!submit_op1( emit, inst_token( SVGA3DOP_EXP ),
2395                        writemask( dst, TGSI_WRITEMASK_X ),
2396                        scalar( src( dst ), TGSI_SWIZZLE_X ) ) )
2397          return false;
2398 
2399       if (!(dst.mask & TGSI_WRITEMASK_Y))
2400          release_temp( emit, fraction );
2401    }
2402 
2403    /* If z is being written, fill it with 2 ^ src0 (partial precision).
2404     */
2405    if (dst.mask & TGSI_WRITEMASK_Z) {
2406       if (!submit_op1( emit, inst_token( SVGA3DOP_EXPP ),
2407                        writemask( dst, TGSI_WRITEMASK_Z ),
2408                        src0 ) )
2409          return false;
2410    }
2411 
2412    /* If w is being written, fill it with one.
2413     */
2414    if (dst.mask & TGSI_WRITEMASK_W) {
2415       if (!submit_op1( emit, inst_token( SVGA3DOP_MOV ),
2416                        writemask(dst, TGSI_WRITEMASK_W),
2417                        get_one_immediate(emit)))
2418          return false;
2419    }
2420 
2421    return true;
2422 }
2423 
2424 
2425 /**
2426  * Translate/emit LIT (Lighting helper) instruction.
2427  */
2428 static bool
emit_lit(struct svga_shader_emitter * emit,const struct tgsi_full_instruction * insn)2429 emit_lit(struct svga_shader_emitter *emit,
2430          const struct tgsi_full_instruction *insn)
2431 {
2432    if (emit->unit == PIPE_SHADER_VERTEX) {
2433       /* SVGA/DX9 has a LIT instruction, but only for vertex shaders:
2434        */
2435       return emit_simple_instruction(emit, SVGA3DOP_LIT, insn);
2436    }
2437    else {
2438       /* D3D vs. GL semantics can be fairly easily accommodated by
2439        * variations on this sequence.
2440        *
2441        * GL:
2442        *   tmp.y = src.x
2443        *   tmp.z = pow(src.y,src.w)
2444        *   p0 = src0.xxxx > 0
2445        *   result = zero.wxxw
2446        *   (p0) result.yz = tmp
2447        *
2448        * D3D:
2449        *   tmp.y = src.x
2450        *   tmp.z = pow(src.y,src.w)
2451        *   p0 = src0.xxyy > 0
2452        *   result = zero.wxxw
2453        *   (p0) result.yz = tmp
2454        *
2455        * Will implement the GL version for now.
2456        */
2457       SVGA3dShaderDestToken dst = translate_dst_register( emit, insn, 0 );
2458       SVGA3dShaderDestToken tmp = get_temp( emit );
2459       const struct src_register src0 = translate_src_register(
2460          emit, &insn->Src[0] );
2461 
2462       /* tmp = pow(src.y, src.w)
2463        */
2464       if (dst.mask & TGSI_WRITEMASK_Z) {
2465          if (!submit_op2(emit, inst_token( SVGA3DOP_POW ),
2466                          tmp,
2467                          scalar(src0, 1),
2468                          scalar(src0, 3)))
2469             return false;
2470       }
2471 
2472       /* tmp.y = src.x
2473        */
2474       if (dst.mask & TGSI_WRITEMASK_Y) {
2475          if (!submit_op1( emit, inst_token( SVGA3DOP_MOV ),
2476                           writemask(tmp, TGSI_WRITEMASK_Y ),
2477                           scalar(src0, 0)))
2478             return false;
2479       }
2480 
2481       /* Can't quite do this with emit conditional due to the extra
2482        * writemask on the predicated mov:
2483        */
2484       {
2485          SVGA3dShaderDestToken pred_reg = dst_register( SVGA3DREG_PREDICATE, 0 );
2486          struct src_register predsrc;
2487 
2488          /* D3D vs GL semantics:
2489           */
2490          if (0)
2491             predsrc = swizzle(src0, 0, 0, 1, 1); /* D3D */
2492          else
2493             predsrc = swizzle(src0, 0, 0, 0, 0); /* GL */
2494 
2495          /* SETP src0.xxyy, GT, {0}.x */
2496          if (!submit_op2( emit,
2497                           inst_token_setp(SVGA3DOPCOMP_GT),
2498                           pred_reg,
2499                           predsrc,
2500                           get_zero_immediate(emit)))
2501             return false;
2502 
2503          /* MOV dst, fail */
2504          if (!submit_op1( emit, inst_token( SVGA3DOP_MOV ), dst,
2505                           get_immediate(emit, 1.0f, 0.0f, 0.0f, 1.0f)))
2506              return false;
2507 
2508          /* MOV dst.yz, tmp (predicated)
2509           *
2510           * Note that the predicate reg (and possible modifiers) is passed
2511           * as the first source argument.
2512           */
2513          if (dst.mask & TGSI_WRITEMASK_YZ) {
2514             if (!submit_op2( emit,
2515                              inst_token_predicated(SVGA3DOP_MOV),
2516                              writemask(dst, TGSI_WRITEMASK_YZ),
2517                              src( pred_reg ), src( tmp ) ))
2518                return false;
2519          }
2520       }
2521    }
2522 
2523    return true;
2524 }
2525 
2526 
2527 static bool
emit_ex2(struct svga_shader_emitter * emit,const struct tgsi_full_instruction * insn)2528 emit_ex2(struct svga_shader_emitter *emit,
2529          const struct tgsi_full_instruction *insn)
2530 {
2531    SVGA3dShaderInstToken inst;
2532    SVGA3dShaderDestToken dst;
2533    struct src_register src0;
2534 
2535    inst = inst_token( SVGA3DOP_EXP );
2536    dst = translate_dst_register( emit, insn, 0 );
2537    src0 = translate_src_register( emit, &insn->Src[0] );
2538    src0 = scalar( src0, TGSI_SWIZZLE_X );
2539 
2540    if (dst.mask != TGSI_WRITEMASK_XYZW) {
2541       SVGA3dShaderDestToken tmp = get_temp( emit );
2542 
2543       if (!submit_op1( emit, inst, tmp, src0 ))
2544          return false;
2545 
2546       return submit_op1( emit, inst_token( SVGA3DOP_MOV ),
2547                          dst,
2548                          scalar( src( tmp ), TGSI_SWIZZLE_X ) );
2549    }
2550 
2551    return submit_op1( emit, inst, dst, src0 );
2552 }
2553 
2554 
2555 static bool
emit_log(struct svga_shader_emitter * emit,const struct tgsi_full_instruction * insn)2556 emit_log(struct svga_shader_emitter *emit,
2557          const struct tgsi_full_instruction *insn)
2558 {
2559    SVGA3dShaderDestToken dst = translate_dst_register( emit, insn, 0 );
2560    struct src_register src0 =
2561       translate_src_register( emit, &insn->Src[0] );
2562    SVGA3dShaderDestToken abs_tmp;
2563    struct src_register abs_src0;
2564    SVGA3dShaderDestToken log2_abs;
2565 
2566    abs_tmp.value = 0;
2567 
2568    if (dst.mask & TGSI_WRITEMASK_Z)
2569       log2_abs = dst;
2570    else if (dst.mask & TGSI_WRITEMASK_XY)
2571       log2_abs = get_temp( emit );
2572    else
2573       log2_abs.value = 0;
2574 
2575    /* If z is being written, fill it with log2( abs( src0 ) ).
2576     */
2577    if (dst.mask & TGSI_WRITEMASK_XYZ) {
2578       if (!src0.base.srcMod || src0.base.srcMod == SVGA3DSRCMOD_ABS)
2579          abs_src0 = src0;
2580       else {
2581          abs_tmp = get_temp( emit );
2582 
2583          if (!submit_op1( emit, inst_token( SVGA3DOP_MOV ),
2584                           abs_tmp,
2585                           src0 ) )
2586             return false;
2587 
2588          abs_src0 = src( abs_tmp );
2589       }
2590 
2591       abs_src0 = absolute( scalar( abs_src0, TGSI_SWIZZLE_X ) );
2592 
2593       if (!submit_op1( emit, inst_token( SVGA3DOP_LOG ),
2594                        writemask( log2_abs, TGSI_WRITEMASK_Z ),
2595                        abs_src0 ) )
2596          return false;
2597    }
2598 
2599    if (dst.mask & TGSI_WRITEMASK_XY) {
2600       SVGA3dShaderDestToken floor_log2;
2601 
2602       if (dst.mask & TGSI_WRITEMASK_X)
2603          floor_log2 = dst;
2604       else
2605          floor_log2 = get_temp( emit );
2606 
2607       /* If x is being written, fill it with floor( log2( abs( src0 ) ) ).
2608        */
2609       if (!submit_op1( emit, inst_token( SVGA3DOP_FRC ),
2610                        writemask( floor_log2, TGSI_WRITEMASK_X ),
2611                        scalar( src( log2_abs ), TGSI_SWIZZLE_Z ) ) )
2612          return false;
2613 
2614       if (!submit_op2( emit, inst_token( SVGA3DOP_ADD ),
2615                        writemask( floor_log2, TGSI_WRITEMASK_X ),
2616                        scalar( src( log2_abs ), TGSI_SWIZZLE_Z ),
2617                        negate( src( floor_log2 ) ) ) )
2618          return false;
2619 
2620       /* If y is being written, fill it with
2621        * abs ( src0 ) / ( 2 ^ floor( log2( abs( src0 ) ) ) ).
2622        */
2623       if (dst.mask & TGSI_WRITEMASK_Y) {
2624          if (!submit_op1( emit, inst_token( SVGA3DOP_EXP ),
2625                           writemask( dst, TGSI_WRITEMASK_Y ),
2626                           negate( scalar( src( floor_log2 ),
2627                                           TGSI_SWIZZLE_X ) ) ) )
2628             return false;
2629 
2630          if (!submit_op2( emit, inst_token( SVGA3DOP_MUL ),
2631                           writemask( dst, TGSI_WRITEMASK_Y ),
2632                           src( dst ),
2633                           abs_src0 ) )
2634             return false;
2635       }
2636 
2637       if (!(dst.mask & TGSI_WRITEMASK_X))
2638          release_temp( emit, floor_log2 );
2639 
2640       if (!(dst.mask & TGSI_WRITEMASK_Z))
2641          release_temp( emit, log2_abs );
2642    }
2643 
2644    if (dst.mask & TGSI_WRITEMASK_XYZ && src0.base.srcMod &&
2645        src0.base.srcMod != SVGA3DSRCMOD_ABS)
2646       release_temp( emit, abs_tmp );
2647 
2648    /* If w is being written, fill it with one.
2649     */
2650    if (dst.mask & TGSI_WRITEMASK_W) {
2651       if (!submit_op1( emit, inst_token( SVGA3DOP_MOV ),
2652                        writemask(dst, TGSI_WRITEMASK_W),
2653                        get_one_immediate(emit)))
2654          return false;
2655    }
2656 
2657    return true;
2658 }
2659 
2660 
2661 /**
2662  * Translate TGSI TRUNC or ROUND instruction.
2663  * We need to truncate toward zero. Ex: trunc(-1.9) = -1
2664  * Different approaches are needed for VS versus PS.
2665  */
2666 static bool
emit_trunc_round(struct svga_shader_emitter * emit,const struct tgsi_full_instruction * insn,bool round)2667 emit_trunc_round(struct svga_shader_emitter *emit,
2668                  const struct tgsi_full_instruction *insn,
2669                  bool round)
2670 {
2671    SVGA3dShaderDestToken dst = translate_dst_register(emit, insn, 0);
2672    const struct src_register src0 =
2673       translate_src_register(emit, &insn->Src[0] );
2674    SVGA3dShaderDestToken t1 = get_temp(emit);
2675 
2676    if (round) {
2677       SVGA3dShaderDestToken t0 = get_temp(emit);
2678       struct src_register half = get_half_immediate(emit);
2679 
2680       /* t0 = abs(src0) + 0.5 */
2681       if (!submit_op2(emit, inst_token(SVGA3DOP_ADD), t0,
2682                       absolute(src0), half))
2683          return false;
2684 
2685       /* t1 = fract(t0) */
2686       if (!submit_op1(emit, inst_token(SVGA3DOP_FRC), t1, src(t0)))
2687          return false;
2688 
2689       /* t1 = t0 - t1 */
2690       if (!submit_op2(emit, inst_token(SVGA3DOP_ADD), t1, src(t0),
2691                       negate(src(t1))))
2692          return false;
2693    }
2694    else {
2695       /* trunc */
2696 
2697       /* t1 = fract(abs(src0)) */
2698       if (!submit_op1(emit, inst_token(SVGA3DOP_FRC), t1, absolute(src0)))
2699          return false;
2700 
2701       /* t1 = abs(src0) - t1 */
2702       if (!submit_op2(emit, inst_token(SVGA3DOP_ADD), t1, absolute(src0),
2703                       negate(src(t1))))
2704          return false;
2705    }
2706 
2707    /*
2708     * Now we need to multiply t1 by the sign of the original value.
2709    */
2710    if (emit->unit == PIPE_SHADER_VERTEX) {
2711       /* For VS: use SGN instruction */
2712       /* Need two extra/dummy registers: */
2713       SVGA3dShaderDestToken t2 = get_temp(emit), t3 = get_temp(emit),
2714          t4 = get_temp(emit);
2715 
2716       /* t2 = sign(src0) */
2717       if (!submit_op3(emit, inst_token(SVGA3DOP_SGN), t2, src0,
2718                       src(t3), src(t4)))
2719          return false;
2720 
2721       /* dst = t1 * t2 */
2722       if (!submit_op2(emit, inst_token(SVGA3DOP_MUL), dst, src(t1), src(t2)))
2723          return false;
2724    }
2725    else {
2726       /* For FS: Use CMP instruction */
2727       return submit_op3(emit, inst_token( SVGA3DOP_CMP ), dst,
2728                         src0, src(t1), negate(src(t1)));
2729    }
2730 
2731    return true;
2732 }
2733 
2734 
2735 /**
2736  * Translate/emit "begin subroutine" instruction/marker/label.
2737  */
2738 static bool
emit_bgnsub(struct svga_shader_emitter * emit,unsigned position,const struct tgsi_full_instruction * insn)2739 emit_bgnsub(struct svga_shader_emitter *emit,
2740             unsigned position,
2741             const struct tgsi_full_instruction *insn)
2742 {
2743    unsigned i;
2744 
2745    /* Note that we've finished the main function and are now emitting
2746     * subroutines.  This affects how we terminate the generated
2747     * shader.
2748     */
2749    emit->in_main_func = false;
2750 
2751    for (i = 0; i < emit->nr_labels; i++) {
2752       if (emit->label[i] == position) {
2753          return (emit_instruction( emit, inst_token( SVGA3DOP_RET ) ) &&
2754                  emit_instruction( emit, inst_token( SVGA3DOP_LABEL ) ) &&
2755                  emit_src( emit, src_register( SVGA3DREG_LABEL, i )));
2756       }
2757    }
2758 
2759    assert(0);
2760    return true;
2761 }
2762 
2763 
2764 /**
2765  * Translate/emit subroutine call instruction.
2766  */
2767 static bool
emit_call(struct svga_shader_emitter * emit,const struct tgsi_full_instruction * insn)2768 emit_call(struct svga_shader_emitter *emit,
2769           const struct tgsi_full_instruction *insn)
2770 {
2771    unsigned position = insn->Label.Label;
2772    unsigned i;
2773 
2774    for (i = 0; i < emit->nr_labels; i++) {
2775       if (emit->label[i] == position)
2776          break;
2777    }
2778 
2779    if (emit->nr_labels == ARRAY_SIZE(emit->label))
2780       return false;
2781 
2782    if (i == emit->nr_labels) {
2783       emit->label[i] = position;
2784       emit->nr_labels++;
2785    }
2786 
2787    return (emit_instruction( emit, inst_token( SVGA3DOP_CALL ) ) &&
2788            emit_src( emit, src_register( SVGA3DREG_LABEL, i )));
2789 }
2790 
2791 
2792 /**
2793  * Called at the end of the shader.  Actually, emit special "fix-up"
2794  * code for the vertex/fragment shader.
2795  */
2796 static bool
emit_end(struct svga_shader_emitter * emit)2797 emit_end(struct svga_shader_emitter *emit)
2798 {
2799    if (emit->unit == PIPE_SHADER_VERTEX) {
2800       return emit_vs_postamble( emit );
2801    }
2802    else {
2803       return emit_ps_postamble( emit );
2804    }
2805 }
2806 
2807 
2808 /**
2809  * Translate any TGSI instruction to SVGA.
2810  */
2811 static bool
svga_emit_instruction(struct svga_shader_emitter * emit,unsigned position,const struct tgsi_full_instruction * insn)2812 svga_emit_instruction(struct svga_shader_emitter *emit,
2813                       unsigned position,
2814                       const struct tgsi_full_instruction *insn)
2815 {
2816    switch (insn->Instruction.Opcode) {
2817 
2818    case TGSI_OPCODE_ARL:
2819       return emit_arl( emit, insn );
2820 
2821    case TGSI_OPCODE_TEX:
2822    case TGSI_OPCODE_TXB:
2823    case TGSI_OPCODE_TXP:
2824    case TGSI_OPCODE_TXL:
2825    case TGSI_OPCODE_TXD:
2826       return emit_tex( emit, insn );
2827 
2828    case TGSI_OPCODE_DDX:
2829    case TGSI_OPCODE_DDY:
2830       return emit_deriv( emit, insn );
2831 
2832    case TGSI_OPCODE_BGNSUB:
2833       return emit_bgnsub( emit, position, insn );
2834 
2835    case TGSI_OPCODE_ENDSUB:
2836       return true;
2837 
2838    case TGSI_OPCODE_CAL:
2839       return emit_call( emit, insn );
2840 
2841    case TGSI_OPCODE_FLR:
2842       return emit_floor( emit, insn );
2843 
2844    case TGSI_OPCODE_TRUNC:
2845       return emit_trunc_round( emit, insn, false );
2846 
2847    case TGSI_OPCODE_ROUND:
2848       return emit_trunc_round( emit, insn, true );
2849 
2850    case TGSI_OPCODE_CEIL:
2851       return emit_ceil( emit, insn );
2852 
2853    case TGSI_OPCODE_CMP:
2854       return emit_cmp( emit, insn );
2855 
2856    case TGSI_OPCODE_DIV:
2857       return emit_div( emit, insn );
2858 
2859    case TGSI_OPCODE_DP2:
2860       return emit_dp2( emit, insn );
2861 
2862    case TGSI_OPCODE_COS:
2863       return emit_cos( emit, insn );
2864 
2865    case TGSI_OPCODE_SIN:
2866       return emit_sin( emit, insn );
2867 
2868    case TGSI_OPCODE_END:
2869       /* TGSI always finishes the main func with an END */
2870       return emit_end( emit );
2871 
2872    case TGSI_OPCODE_KILL_IF:
2873       return emit_cond_discard( emit, insn );
2874 
2875       /* Selection opcodes.  The underlying language is fairly
2876        * non-orthogonal about these.
2877        */
2878    case TGSI_OPCODE_SEQ:
2879       return emit_select_op( emit, PIPE_FUNC_EQUAL, insn );
2880 
2881    case TGSI_OPCODE_SNE:
2882       return emit_select_op( emit, PIPE_FUNC_NOTEQUAL, insn );
2883 
2884    case TGSI_OPCODE_SGT:
2885       return emit_select_op( emit, PIPE_FUNC_GREATER, insn );
2886 
2887    case TGSI_OPCODE_SGE:
2888       return emit_select_op( emit, PIPE_FUNC_GEQUAL, insn );
2889 
2890    case TGSI_OPCODE_SLT:
2891       return emit_select_op( emit, PIPE_FUNC_LESS, insn );
2892 
2893    case TGSI_OPCODE_SLE:
2894       return emit_select_op( emit, PIPE_FUNC_LEQUAL, insn );
2895 
2896    case TGSI_OPCODE_POW:
2897       return emit_pow( emit, insn );
2898 
2899    case TGSI_OPCODE_EX2:
2900       return emit_ex2( emit, insn );
2901 
2902    case TGSI_OPCODE_EXP:
2903       return emit_exp( emit, insn );
2904 
2905    case TGSI_OPCODE_LOG:
2906       return emit_log( emit, insn );
2907 
2908    case TGSI_OPCODE_LG2:
2909       return emit_scalar_op1( emit, SVGA3DOP_LOG, insn );
2910 
2911    case TGSI_OPCODE_RSQ:
2912       return emit_scalar_op1( emit, SVGA3DOP_RSQ, insn );
2913 
2914    case TGSI_OPCODE_RCP:
2915       return emit_scalar_op1( emit, SVGA3DOP_RCP, insn );
2916 
2917    case TGSI_OPCODE_CONT:
2918       /* not expected (we return PIPE_SHADER_CAP_CONT_SUPPORTED = 0) */
2919       return false;
2920 
2921    case TGSI_OPCODE_RET:
2922       /* This is a noop -- we tell mesa that we can't support RET
2923        * within a function (early return), so this will always be
2924        * followed by an ENDSUB.
2925        */
2926       return true;
2927 
2928       /* These aren't actually used by any of the frontends we care
2929        * about:
2930        */
2931    case TGSI_OPCODE_AND:
2932    case TGSI_OPCODE_OR:
2933    case TGSI_OPCODE_I2F:
2934    case TGSI_OPCODE_NOT:
2935    case TGSI_OPCODE_SHL:
2936    case TGSI_OPCODE_ISHR:
2937    case TGSI_OPCODE_XOR:
2938       return false;
2939 
2940    case TGSI_OPCODE_IF:
2941       return emit_if( emit, insn );
2942    case TGSI_OPCODE_ELSE:
2943       return emit_else( emit, insn );
2944    case TGSI_OPCODE_ENDIF:
2945       return emit_endif( emit, insn );
2946 
2947    case TGSI_OPCODE_BGNLOOP:
2948       return emit_bgnloop( emit, insn );
2949    case TGSI_OPCODE_ENDLOOP:
2950       return emit_endloop( emit, insn );
2951    case TGSI_OPCODE_BRK:
2952       return emit_brk( emit, insn );
2953 
2954    case TGSI_OPCODE_KILL:
2955       return emit_discard( emit, insn );
2956 
2957    case TGSI_OPCODE_DST:
2958       return emit_dst_insn( emit, insn );
2959 
2960    case TGSI_OPCODE_LIT:
2961       return emit_lit( emit, insn );
2962 
2963    case TGSI_OPCODE_LRP:
2964       return emit_lrp( emit, insn );
2965 
2966    case TGSI_OPCODE_SSG:
2967       return emit_ssg( emit, insn );
2968 
2969    case TGSI_OPCODE_MOV:
2970       return emit_mov( emit, insn );
2971 
2972    case TGSI_OPCODE_SQRT:
2973       return emit_sqrt( emit, insn );
2974 
2975    default:
2976       {
2977          SVGA3dShaderOpCodeType opcode =
2978             translate_opcode(insn->Instruction.Opcode);
2979 
2980          if (opcode == SVGA3DOP_LAST_INST)
2981             return false;
2982 
2983          if (!emit_simple_instruction( emit, opcode, insn ))
2984             return false;
2985       }
2986    }
2987 
2988    return true;
2989 }
2990 
2991 
2992 /**
2993  * Translate/emit a TGSI IMMEDIATE declaration.
2994  * An immediate vector is a constant that's hard-coded into the shader.
2995  */
2996 static bool
svga_emit_immediate(struct svga_shader_emitter * emit,const struct tgsi_full_immediate * imm)2997 svga_emit_immediate(struct svga_shader_emitter *emit,
2998                     const struct tgsi_full_immediate *imm)
2999 {
3000    static const float id[4] = {0,0,0,1};
3001    float value[4];
3002    unsigned i;
3003 
3004    assert(1 <= imm->Immediate.NrTokens && imm->Immediate.NrTokens <= 5);
3005    for (i = 0; i < 4 && i < imm->Immediate.NrTokens - 1; i++) {
3006       float f = imm->u[i].Float;
3007       value[i] = util_is_inf_or_nan(f) ? 0.0f : f;
3008    }
3009 
3010    /* If the immediate has less than four values, fill in the remaining
3011     * positions from id={0,0,0,1}.
3012     */
3013    for ( ; i < 4; i++ )
3014       value[i] = id[i];
3015 
3016    return emit_def_const( emit, SVGA3D_CONST_TYPE_FLOAT,
3017                           emit->imm_start + emit->internal_imm_count++,
3018                           value[0], value[1], value[2], value[3]);
3019 }
3020 
3021 
3022 static bool
make_immediate(struct svga_shader_emitter * emit,float a,float b,float c,float d,struct src_register * out)3023 make_immediate(struct svga_shader_emitter *emit,
3024                float a, float b, float c, float d,
3025                struct src_register *out )
3026 {
3027    unsigned idx = emit->nr_hw_float_const++;
3028 
3029    if (!emit_def_const( emit, SVGA3D_CONST_TYPE_FLOAT,
3030                         idx, a, b, c, d ))
3031       return false;
3032 
3033    *out = src_register( SVGA3DREG_CONST, idx );
3034 
3035    return true;
3036 }
3037 
3038 
3039 /**
3040  * Emit special VS instructions at top of shader.
3041  */
3042 static bool
emit_vs_preamble(struct svga_shader_emitter * emit)3043 emit_vs_preamble(struct svga_shader_emitter *emit)
3044 {
3045    if (!emit->key.vs.need_prescale) {
3046       if (!make_immediate( emit, 0, 0, .5, .5,
3047                            &emit->imm_0055))
3048          return false;
3049    }
3050 
3051    return true;
3052 }
3053 
3054 
3055 /**
3056  * Emit special PS instructions at top of shader.
3057  */
3058 static bool
emit_ps_preamble(struct svga_shader_emitter * emit)3059 emit_ps_preamble(struct svga_shader_emitter *emit)
3060 {
3061    if (emit->ps_reads_pos && emit->info.reads_z) {
3062       /*
3063        * Assemble the position from various bits of inputs. Depth and W are
3064        * passed in a texcoord this is due to D3D's vPos not hold Z or W.
3065        * Also fixup the perspective interpolation.
3066        *
3067        * temp_pos.xy = vPos.xy
3068        * temp_pos.w = rcp(texcoord1.w);
3069        * temp_pos.z = texcoord1.z * temp_pos.w;
3070        */
3071       if (!submit_op1( emit,
3072                        inst_token(SVGA3DOP_MOV),
3073                        writemask( emit->ps_temp_pos, TGSI_WRITEMASK_XY ),
3074                        emit->ps_true_pos ))
3075          return false;
3076 
3077       if (!submit_op1( emit,
3078                        inst_token(SVGA3DOP_RCP),
3079                        writemask( emit->ps_temp_pos, TGSI_WRITEMASK_W ),
3080                        scalar( emit->ps_depth_pos, TGSI_SWIZZLE_W ) ))
3081          return false;
3082 
3083       if (!submit_op2( emit,
3084                        inst_token(SVGA3DOP_MUL),
3085                        writemask( emit->ps_temp_pos, TGSI_WRITEMASK_Z ),
3086                        scalar( emit->ps_depth_pos, TGSI_SWIZZLE_Z ),
3087                        scalar( src(emit->ps_temp_pos), TGSI_SWIZZLE_W ) ))
3088          return false;
3089    }
3090 
3091    return true;
3092 }
3093 
3094 
3095 /**
3096  * Emit special PS instructions at end of shader.
3097  */
3098 static bool
emit_ps_postamble(struct svga_shader_emitter * emit)3099 emit_ps_postamble(struct svga_shader_emitter *emit)
3100 {
3101    unsigned i;
3102 
3103    /* PS oDepth is incredibly fragile and it's very hard to catch the
3104     * types of usage that break it during shader emit.  Easier just to
3105     * redirect the main program to a temporary and then only touch
3106     * oDepth with a hand-crafted MOV below.
3107     */
3108    if (SVGA3dShaderGetRegType(emit->true_pos.value) != 0) {
3109       if (!submit_op1( emit,
3110                        inst_token(SVGA3DOP_MOV),
3111                        emit->true_pos,
3112                        scalar(src(emit->temp_pos), TGSI_SWIZZLE_Z) ))
3113          return false;
3114    }
3115 
3116    for (i = 0; i < PIPE_MAX_COLOR_BUFS; i++) {
3117       if (SVGA3dShaderGetRegType(emit->true_color_output[i].value) != 0) {
3118          /* Potentially override output colors with white for XOR
3119           * logicop workaround.
3120           */
3121          if (emit->unit == PIPE_SHADER_FRAGMENT &&
3122              emit->key.fs.white_fragments) {
3123             struct src_register one = get_one_immediate(emit);
3124 
3125             if (!submit_op1( emit,
3126                              inst_token(SVGA3DOP_MOV),
3127                              emit->true_color_output[i],
3128                              one ))
3129                return false;
3130          }
3131          else if (emit->unit == PIPE_SHADER_FRAGMENT &&
3132                   i < emit->key.fs.write_color0_to_n_cbufs) {
3133             /* Write temp color output [0] to true output [i] */
3134             if (!submit_op1(emit, inst_token(SVGA3DOP_MOV),
3135                             emit->true_color_output[i],
3136                             src(emit->temp_color_output[0]))) {
3137                return false;
3138             }
3139          }
3140          else {
3141             if (!submit_op1( emit,
3142                              inst_token(SVGA3DOP_MOV),
3143                              emit->true_color_output[i],
3144                              src(emit->temp_color_output[i]) ))
3145                return false;
3146          }
3147       }
3148    }
3149 
3150    return true;
3151 }
3152 
3153 
3154 /**
3155  * Emit special VS instructions at end of shader.
3156  */
3157 static bool
emit_vs_postamble(struct svga_shader_emitter * emit)3158 emit_vs_postamble(struct svga_shader_emitter *emit)
3159 {
3160    /* PSIZ output is incredibly fragile and it's very hard to catch
3161     * the types of usage that break it during shader emit.  Easier
3162     * just to redirect the main program to a temporary and then only
3163     * touch PSIZ with a hand-crafted MOV below.
3164     */
3165    if (SVGA3dShaderGetRegType(emit->true_psiz.value) != 0) {
3166       if (!submit_op1( emit,
3167                        inst_token(SVGA3DOP_MOV),
3168                        emit->true_psiz,
3169                        scalar(src(emit->temp_psiz), TGSI_SWIZZLE_X) ))
3170          return false;
3171    }
3172 
3173    /* Need to perform various manipulations on vertex position to cope
3174     * with the different GL and D3D clip spaces.
3175     */
3176    if (emit->key.vs.need_prescale) {
3177       SVGA3dShaderDestToken temp_pos = emit->temp_pos;
3178       SVGA3dShaderDestToken depth = emit->depth_pos;
3179       SVGA3dShaderDestToken pos = emit->true_pos;
3180       unsigned offset = emit->info.file_max[TGSI_FILE_CONSTANT] + 1;
3181       struct src_register prescale_scale = src_register( SVGA3DREG_CONST,
3182                                                          offset + 0 );
3183       struct src_register prescale_trans = src_register( SVGA3DREG_CONST,
3184                                                          offset + 1 );
3185 
3186       if (!submit_op1( emit,
3187                        inst_token(SVGA3DOP_MOV),
3188                        writemask(depth, TGSI_WRITEMASK_W),
3189                        scalar(src(temp_pos), TGSI_SWIZZLE_W) ))
3190          return false;
3191 
3192       /* MUL temp_pos.xyz,    temp_pos,      prescale.scale
3193        * MAD result.position, temp_pos.wwww, prescale.trans, temp_pos
3194        *   --> Note that prescale.trans.w == 0
3195        */
3196       if (!submit_op2( emit,
3197                        inst_token(SVGA3DOP_MUL),
3198                        writemask(temp_pos, TGSI_WRITEMASK_XYZ),
3199                        src(temp_pos),
3200                        prescale_scale ))
3201          return false;
3202 
3203       if (!submit_op3( emit,
3204                        inst_token(SVGA3DOP_MAD),
3205                        pos,
3206                        swizzle(src(temp_pos), 3, 3, 3, 3),
3207                        prescale_trans,
3208                        src(temp_pos)))
3209          return false;
3210 
3211       /* Also write to depth value */
3212       if (!submit_op3( emit,
3213                        inst_token(SVGA3DOP_MAD),
3214                        writemask(depth, TGSI_WRITEMASK_Z),
3215                        swizzle(src(temp_pos), 3, 3, 3, 3),
3216                        prescale_trans,
3217                        src(temp_pos) ))
3218          return false;
3219    }
3220    else {
3221       SVGA3dShaderDestToken temp_pos = emit->temp_pos;
3222       SVGA3dShaderDestToken depth = emit->depth_pos;
3223       SVGA3dShaderDestToken pos = emit->true_pos;
3224       struct src_register imm_0055 = emit->imm_0055;
3225 
3226       /* Adjust GL clipping coordinate space to hardware (D3D-style):
3227        *
3228        * DP4 temp_pos.z, {0,0,.5,.5}, temp_pos
3229        * MOV result.position, temp_pos
3230        */
3231       if (!submit_op2( emit,
3232                        inst_token(SVGA3DOP_DP4),
3233                        writemask(temp_pos, TGSI_WRITEMASK_Z),
3234                        imm_0055,
3235                        src(temp_pos) ))
3236          return false;
3237 
3238       if (!submit_op1( emit,
3239                        inst_token(SVGA3DOP_MOV),
3240                        pos,
3241                        src(temp_pos) ))
3242          return false;
3243 
3244       /* Move the manipulated depth into the extra texcoord reg */
3245       if (!submit_op1( emit,
3246                        inst_token(SVGA3DOP_MOV),
3247                        writemask(depth, TGSI_WRITEMASK_ZW),
3248                        src(temp_pos) ))
3249          return false;
3250    }
3251 
3252    return true;
3253 }
3254 
3255 
3256 /**
3257  * For the pixel shader: emit the code which chooses the front
3258  * or back face color depending on triangle orientation.
3259  * This happens at the top of the fragment shader.
3260  *
3261  *  0: IF VFACE :4
3262  *  1:   COLOR = FrontColor;
3263  *  2: ELSE
3264  *  3:   COLOR = BackColor;
3265  *  4: ENDIF
3266  */
3267 static bool
emit_light_twoside(struct svga_shader_emitter * emit)3268 emit_light_twoside(struct svga_shader_emitter *emit)
3269 {
3270    struct src_register vface, zero;
3271    struct src_register front[2];
3272    struct src_register back[2];
3273    SVGA3dShaderDestToken color[2];
3274    int count = emit->internal_color_count;
3275    unsigned i;
3276    SVGA3dShaderInstToken if_token;
3277 
3278    if (count == 0)
3279       return true;
3280 
3281    vface = get_vface( emit );
3282    zero = get_zero_immediate(emit);
3283 
3284    /* Can't use get_temp() to allocate the color reg as such
3285     * temporaries will be reclaimed after each instruction by the call
3286     * to reset_temp_regs().
3287     */
3288    for (i = 0; i < count; i++) {
3289       color[i] = dst_register( SVGA3DREG_TEMP, emit->nr_hw_temp++ );
3290       front[i] = emit->input_map[emit->internal_color_idx[i]];
3291 
3292       /* Back is always the next input:
3293        */
3294       back[i] = front[i];
3295       back[i].base.num = front[i].base.num + 1;
3296 
3297       /* Reassign the input_map to the actual front-face color:
3298        */
3299       emit->input_map[emit->internal_color_idx[i]] = src(color[i]);
3300    }
3301 
3302    if_token = inst_token( SVGA3DOP_IFC );
3303 
3304    if (emit->key.fs.front_ccw)
3305       if_token.control = SVGA3DOPCOMP_LT;
3306    else
3307       if_token.control = SVGA3DOPCOMP_GT;
3308 
3309    if (!(emit_instruction( emit, if_token ) &&
3310          emit_src( emit, vface ) &&
3311          emit_src( emit, zero ) ))
3312       return false;
3313 
3314    for (i = 0; i < count; i++) {
3315       if (!submit_op1( emit, inst_token( SVGA3DOP_MOV ), color[i], front[i] ))
3316          return false;
3317    }
3318 
3319    if (!(emit_instruction( emit, inst_token( SVGA3DOP_ELSE))))
3320       return false;
3321 
3322    for (i = 0; i < count; i++) {
3323       if (!submit_op1( emit, inst_token( SVGA3DOP_MOV ), color[i], back[i] ))
3324          return false;
3325    }
3326 
3327    if (!emit_instruction( emit, inst_token( SVGA3DOP_ENDIF ) ))
3328       return false;
3329 
3330    return true;
3331 }
3332 
3333 
3334 /**
3335  * Emit special setup code for the front/back face register in the FS.
3336  *  0: SETP_GT TEMP, VFACE, 0
3337  *  where TEMP is a fake frontface register
3338  */
3339 static bool
emit_frontface(struct svga_shader_emitter * emit)3340 emit_frontface(struct svga_shader_emitter *emit)
3341 {
3342    struct src_register vface;
3343    SVGA3dShaderDestToken temp;
3344    struct src_register pass, fail;
3345 
3346    vface = get_vface( emit );
3347 
3348    /* Can't use get_temp() to allocate the fake frontface reg as such
3349     * temporaries will be reclaimed after each instruction by the call
3350     * to reset_temp_regs().
3351     */
3352    temp = dst_register( SVGA3DREG_TEMP,
3353                         emit->nr_hw_temp++ );
3354 
3355    if (emit->key.fs.front_ccw) {
3356       pass = get_zero_immediate(emit);
3357       fail = get_one_immediate(emit);
3358    } else {
3359       pass = get_one_immediate(emit);
3360       fail = get_zero_immediate(emit);
3361    }
3362 
3363    if (!emit_conditional(emit, PIPE_FUNC_GREATER,
3364                          temp, vface, get_zero_immediate(emit),
3365                          pass, fail))
3366       return false;
3367 
3368    /* Reassign the input_map to the actual front-face color:
3369     */
3370    emit->input_map[emit->internal_frontface_idx] = src(temp);
3371 
3372    return true;
3373 }
3374 
3375 
3376 /**
3377  * Emit code to invert the T component of the incoming texture coordinate.
3378  * This is used for drawing point sprites when
3379  * pipe_rasterizer_state::sprite_coord_mode == PIPE_SPRITE_COORD_LOWER_LEFT.
3380  */
3381 static bool
emit_inverted_texcoords(struct svga_shader_emitter * emit)3382 emit_inverted_texcoords(struct svga_shader_emitter *emit)
3383 {
3384    unsigned inverted_texcoords = emit->inverted_texcoords;
3385 
3386    while (inverted_texcoords) {
3387       const unsigned unit = ffs(inverted_texcoords) - 1;
3388 
3389       assert(emit->inverted_texcoords & (1 << unit));
3390 
3391       assert(unit < ARRAY_SIZE(emit->ps_true_texcoord));
3392 
3393       assert(unit < ARRAY_SIZE(emit->ps_inverted_texcoord_input));
3394 
3395       assert(emit->ps_inverted_texcoord_input[unit]
3396              < ARRAY_SIZE(emit->input_map));
3397 
3398       /* inverted = coord * (1, -1, 1, 1) + (0, 1, 0, 0) */
3399       if (!submit_op3(emit,
3400                       inst_token(SVGA3DOP_MAD),
3401                       dst(emit->ps_inverted_texcoord[unit]),
3402                       emit->ps_true_texcoord[unit],
3403                       get_immediate(emit, 1.0f, -1.0f, 1.0f, 1.0f),
3404                       get_immediate(emit, 0.0f, 1.0f, 0.0f, 0.0f)))
3405          return false;
3406 
3407       /* Reassign the input_map entry to the new texcoord register */
3408       emit->input_map[emit->ps_inverted_texcoord_input[unit]] =
3409          emit->ps_inverted_texcoord[unit];
3410 
3411       inverted_texcoords &= ~(1 << unit);
3412    }
3413 
3414    return true;
3415 }
3416 
3417 
3418 /**
3419  * Emit code to adjust vertex shader inputs/attributes:
3420  * - Change range from [0,1] to [-1,1] (for normalized byte/short attribs).
3421  * - Set attrib W component = 1.
3422  */
3423 static bool
emit_adjusted_vertex_attribs(struct svga_shader_emitter * emit)3424 emit_adjusted_vertex_attribs(struct svga_shader_emitter *emit)
3425 {
3426    unsigned adjust_mask = (emit->key.vs.adjust_attrib_range |
3427                            emit->key.vs.adjust_attrib_w_1);
3428 
3429    while (adjust_mask) {
3430       /* Adjust vertex attrib range and/or set W component = 1 */
3431       const unsigned index = u_bit_scan(&adjust_mask);
3432       struct src_register tmp;
3433 
3434       /* allocate a temp reg */
3435       tmp = src_register(SVGA3DREG_TEMP, emit->nr_hw_temp);
3436       emit->nr_hw_temp++;
3437 
3438       if (emit->key.vs.adjust_attrib_range & (1 << index)) {
3439          /* The vertex input/attribute is supposed to be a signed value in
3440           * the range [-1,1] but we actually fetched/converted it to the
3441           * range [0,1].  This most likely happens when the app specifies a
3442           * signed byte attribute but we interpreted it as unsigned bytes.
3443           * See also svga_translate_vertex_format().
3444           *
3445           * Here, we emit some extra instructions to adjust
3446           * the attribute values from [0,1] to [-1,1].
3447           *
3448           * The adjustment we implement is:
3449           *   new_attrib = attrib * 2.0;
3450           *   if (attrib >= 0.5)
3451           *      new_attrib = new_attrib - 2.0;
3452           * This isn't exactly right (it's off by a bit or so) but close enough.
3453           */
3454          SVGA3dShaderDestToken pred_reg = dst_register(SVGA3DREG_PREDICATE, 0);
3455 
3456          /* tmp = attrib * 2.0 */
3457          if (!submit_op2(emit,
3458                          inst_token(SVGA3DOP_MUL),
3459                          dst(tmp),
3460                          emit->input_map[index],
3461                          get_two_immediate(emit)))
3462             return false;
3463 
3464          /* pred = (attrib >= 0.5) */
3465          if (!submit_op2(emit,
3466                          inst_token_setp(SVGA3DOPCOMP_GE),
3467                          pred_reg,
3468                          emit->input_map[index],  /* vert attrib */
3469                          get_half_immediate(emit)))  /* 0.5 */
3470             return false;
3471 
3472          /* sub(pred) tmp, tmp, 2.0 */
3473          if (!submit_op3(emit,
3474                          inst_token_predicated(SVGA3DOP_SUB),
3475                          dst(tmp),
3476                          src(pred_reg),
3477                          tmp,
3478                          get_two_immediate(emit)))
3479             return false;
3480       }
3481       else {
3482          /* just copy the vertex input attrib to the temp register */
3483          if (!submit_op1(emit,
3484                          inst_token(SVGA3DOP_MOV),
3485                          dst(tmp),
3486                          emit->input_map[index]))
3487             return false;
3488       }
3489 
3490       if (emit->key.vs.adjust_attrib_w_1 & (1 << index)) {
3491          /* move 1 into W position of tmp */
3492          if (!submit_op1(emit,
3493                          inst_token(SVGA3DOP_MOV),
3494                          writemask(dst(tmp), TGSI_WRITEMASK_W),
3495                          get_one_immediate(emit)))
3496             return false;
3497       }
3498 
3499       /* Reassign the input_map entry to the new tmp register */
3500       emit->input_map[index] = tmp;
3501    }
3502 
3503    return true;
3504 }
3505 
3506 
3507 /**
3508  * Determine if we need to create the "common" immediate value which is
3509  * used for generating useful vector constants such as {0,0,0,0} and
3510  * {1,1,1,1}.
3511  * We could just do this all the time except that we want to conserve
3512  * registers whenever possible.
3513  */
3514 static bool
needs_to_create_common_immediate(const struct svga_shader_emitter * emit)3515 needs_to_create_common_immediate(const struct svga_shader_emitter *emit)
3516 {
3517    unsigned i;
3518 
3519    if (emit->unit == PIPE_SHADER_FRAGMENT) {
3520       if (emit->key.fs.light_twoside)
3521          return true;
3522 
3523       if (emit->key.fs.white_fragments)
3524          return true;
3525 
3526       if (emit->emit_frontface)
3527          return true;
3528 
3529       if (emit->info.opcode_count[TGSI_OPCODE_DST] >= 1 ||
3530           emit->info.opcode_count[TGSI_OPCODE_SSG] >= 1 ||
3531           emit->info.opcode_count[TGSI_OPCODE_LIT] >= 1)
3532          return true;
3533 
3534       if (emit->inverted_texcoords)
3535          return true;
3536 
3537       /* look for any PIPE_SWIZZLE_0/ONE terms */
3538       for (i = 0; i < emit->key.num_textures; i++) {
3539          if (emit->key.tex[i].swizzle_r > PIPE_SWIZZLE_W ||
3540              emit->key.tex[i].swizzle_g > PIPE_SWIZZLE_W ||
3541              emit->key.tex[i].swizzle_b > PIPE_SWIZZLE_W ||
3542              emit->key.tex[i].swizzle_a > PIPE_SWIZZLE_W)
3543             return true;
3544       }
3545 
3546       for (i = 0; i < emit->key.num_textures; i++) {
3547          if (emit->key.tex[i].compare_mode
3548              == PIPE_TEX_COMPARE_R_TO_TEXTURE)
3549             return true;
3550       }
3551    }
3552    else if (emit->unit == PIPE_SHADER_VERTEX) {
3553       if (emit->info.opcode_count[TGSI_OPCODE_CMP] >= 1)
3554          return true;
3555       if (emit->key.vs.adjust_attrib_range ||
3556           emit->key.vs.adjust_attrib_w_1)
3557          return true;
3558    }
3559 
3560    if (emit->info.opcode_count[TGSI_OPCODE_IF] >= 1 ||
3561        emit->info.opcode_count[TGSI_OPCODE_BGNLOOP] >= 1 ||
3562        emit->info.opcode_count[TGSI_OPCODE_DDX] >= 1 ||
3563        emit->info.opcode_count[TGSI_OPCODE_DDY] >= 1 ||
3564        emit->info.opcode_count[TGSI_OPCODE_ROUND] >= 1 ||
3565        emit->info.opcode_count[TGSI_OPCODE_SGE] >= 1 ||
3566        emit->info.opcode_count[TGSI_OPCODE_SGT] >= 1 ||
3567        emit->info.opcode_count[TGSI_OPCODE_SLE] >= 1 ||
3568        emit->info.opcode_count[TGSI_OPCODE_SLT] >= 1 ||
3569        emit->info.opcode_count[TGSI_OPCODE_SNE] >= 1 ||
3570        emit->info.opcode_count[TGSI_OPCODE_SEQ] >= 1 ||
3571        emit->info.opcode_count[TGSI_OPCODE_EXP] >= 1 ||
3572        emit->info.opcode_count[TGSI_OPCODE_LOG] >= 1 ||
3573        emit->info.opcode_count[TGSI_OPCODE_KILL] >= 1 ||
3574        emit->info.opcode_count[TGSI_OPCODE_SQRT] >= 1)
3575       return true;
3576 
3577    return false;
3578 }
3579 
3580 
3581 /**
3582  * Do we need to create a looping constant?
3583  */
3584 static bool
needs_to_create_loop_const(const struct svga_shader_emitter * emit)3585 needs_to_create_loop_const(const struct svga_shader_emitter *emit)
3586 {
3587    return (emit->info.opcode_count[TGSI_OPCODE_BGNLOOP] >= 1);
3588 }
3589 
3590 
3591 static bool
needs_to_create_arl_consts(const struct svga_shader_emitter * emit)3592 needs_to_create_arl_consts(const struct svga_shader_emitter *emit)
3593 {
3594    return (emit->num_arl_consts > 0);
3595 }
3596 
3597 
3598 static bool
pre_parse_add_indirect(struct svga_shader_emitter * emit,int num,int current_arl)3599 pre_parse_add_indirect( struct svga_shader_emitter *emit,
3600                         int num, int current_arl)
3601 {
3602    unsigned i;
3603    assert(num < 0);
3604 
3605    for (i = 0; i < emit->num_arl_consts; ++i) {
3606       if (emit->arl_consts[i].arl_num == current_arl)
3607          break;
3608    }
3609    /* new entry */
3610    if (emit->num_arl_consts == i) {
3611       ++emit->num_arl_consts;
3612    }
3613    emit->arl_consts[i].number = (emit->arl_consts[i].number > num) ?
3614                                 num :
3615                                 emit->arl_consts[i].number;
3616    emit->arl_consts[i].arl_num = current_arl;
3617    return true;
3618 }
3619 
3620 
3621 static bool
pre_parse_instruction(struct svga_shader_emitter * emit,const struct tgsi_full_instruction * insn,int current_arl)3622 pre_parse_instruction( struct svga_shader_emitter *emit,
3623                        const struct tgsi_full_instruction *insn,
3624                        int current_arl)
3625 {
3626    if (insn->Src[0].Register.Indirect &&
3627        insn->Src[0].Indirect.File == TGSI_FILE_ADDRESS) {
3628       const struct tgsi_full_src_register *reg = &insn->Src[0];
3629       if (reg->Register.Index < 0) {
3630          pre_parse_add_indirect(emit, reg->Register.Index, current_arl);
3631       }
3632    }
3633 
3634    if (insn->Src[1].Register.Indirect &&
3635        insn->Src[1].Indirect.File == TGSI_FILE_ADDRESS) {
3636       const struct tgsi_full_src_register *reg = &insn->Src[1];
3637       if (reg->Register.Index < 0) {
3638          pre_parse_add_indirect(emit, reg->Register.Index, current_arl);
3639       }
3640    }
3641 
3642    if (insn->Src[2].Register.Indirect &&
3643        insn->Src[2].Indirect.File == TGSI_FILE_ADDRESS) {
3644       const struct tgsi_full_src_register *reg = &insn->Src[2];
3645       if (reg->Register.Index < 0) {
3646          pre_parse_add_indirect(emit, reg->Register.Index, current_arl);
3647       }
3648    }
3649 
3650    return true;
3651 }
3652 
3653 
3654 static bool
pre_parse_tokens(struct svga_shader_emitter * emit,const struct tgsi_token * tokens)3655 pre_parse_tokens( struct svga_shader_emitter *emit,
3656                   const struct tgsi_token *tokens )
3657 {
3658    struct tgsi_parse_context parse;
3659    int current_arl = 0;
3660 
3661    tgsi_parse_init( &parse, tokens );
3662 
3663    while (!tgsi_parse_end_of_tokens( &parse )) {
3664       tgsi_parse_token( &parse );
3665       switch (parse.FullToken.Token.Type) {
3666       case TGSI_TOKEN_TYPE_IMMEDIATE:
3667       case TGSI_TOKEN_TYPE_DECLARATION:
3668          break;
3669       case TGSI_TOKEN_TYPE_INSTRUCTION:
3670          if (parse.FullToken.FullInstruction.Instruction.Opcode ==
3671              TGSI_OPCODE_ARL) {
3672             ++current_arl;
3673          }
3674          if (!pre_parse_instruction( emit, &parse.FullToken.FullInstruction,
3675                                      current_arl ))
3676             return false;
3677          break;
3678       default:
3679          break;
3680       }
3681 
3682    }
3683    return true;
3684 }
3685 
3686 
3687 static bool
svga_shader_emit_helpers(struct svga_shader_emitter * emit)3688 svga_shader_emit_helpers(struct svga_shader_emitter *emit)
3689 {
3690    if (needs_to_create_common_immediate( emit )) {
3691       create_common_immediate( emit );
3692    }
3693    if (needs_to_create_loop_const( emit )) {
3694       create_loop_const( emit );
3695    }
3696    if (needs_to_create_arl_consts( emit )) {
3697       create_arl_consts( emit );
3698    }
3699 
3700    if (emit->unit == PIPE_SHADER_FRAGMENT) {
3701       if (!svga_shader_emit_samplers_decl( emit ))
3702          return false;
3703 
3704       if (!emit_ps_preamble( emit ))
3705          return false;
3706 
3707       if (emit->key.fs.light_twoside) {
3708          if (!emit_light_twoside( emit ))
3709             return false;
3710       }
3711       if (emit->emit_frontface) {
3712          if (!emit_frontface( emit ))
3713             return false;
3714       }
3715       if (emit->inverted_texcoords) {
3716          if (!emit_inverted_texcoords( emit ))
3717             return false;
3718       }
3719    }
3720    else {
3721       assert(emit->unit == PIPE_SHADER_VERTEX);
3722       if (emit->key.vs.adjust_attrib_range) {
3723          if (!emit_adjusted_vertex_attribs(emit) ||
3724              emit->key.vs.adjust_attrib_w_1) {
3725             return false;
3726          }
3727       }
3728    }
3729 
3730    return true;
3731 }
3732 
3733 
3734 /**
3735  * This is the main entrypoint into the TGSI instruction translater.
3736  * Translate TGSI shader tokens into an SVGA shader.
3737  */
3738 bool
svga_shader_emit_instructions(struct svga_shader_emitter * emit,const struct tgsi_token * tokens)3739 svga_shader_emit_instructions(struct svga_shader_emitter *emit,
3740                               const struct tgsi_token *tokens)
3741 {
3742    struct tgsi_parse_context parse;
3743    const struct tgsi_token *new_tokens = NULL;
3744    bool ret = true;
3745    bool helpers_emitted = false;
3746    unsigned line_nr = 0;
3747 
3748    if (emit->unit == PIPE_SHADER_FRAGMENT && emit->key.fs.pstipple) {
3749       unsigned unit;
3750 
3751       new_tokens = util_pstipple_create_fragment_shader(tokens, &unit, 0,
3752                                                         TGSI_FILE_INPUT);
3753 
3754       if (new_tokens) {
3755          /* Setup texture state for stipple */
3756          emit->sampler_target[unit] = TGSI_TEXTURE_2D;
3757          emit->key.tex[unit].swizzle_r = TGSI_SWIZZLE_X;
3758          emit->key.tex[unit].swizzle_g = TGSI_SWIZZLE_Y;
3759          emit->key.tex[unit].swizzle_b = TGSI_SWIZZLE_Z;
3760          emit->key.tex[unit].swizzle_a = TGSI_SWIZZLE_W;
3761 
3762          emit->pstipple_sampler_unit = unit;
3763 
3764          tokens = new_tokens;
3765       }
3766    }
3767 
3768    tgsi_parse_init( &parse, tokens );
3769    emit->internal_imm_count = 0;
3770 
3771    if (emit->unit == PIPE_SHADER_VERTEX) {
3772       ret = emit_vs_preamble( emit );
3773       if (!ret)
3774          goto done;
3775    }
3776 
3777    pre_parse_tokens(emit, tokens);
3778 
3779    while (!tgsi_parse_end_of_tokens( &parse )) {
3780       tgsi_parse_token( &parse );
3781 
3782       switch (parse.FullToken.Token.Type) {
3783       case TGSI_TOKEN_TYPE_IMMEDIATE:
3784          ret = svga_emit_immediate( emit, &parse.FullToken.FullImmediate );
3785          if (!ret)
3786             goto done;
3787          break;
3788 
3789       case TGSI_TOKEN_TYPE_DECLARATION:
3790          ret = svga_translate_decl_sm30( emit, &parse.FullToken.FullDeclaration );
3791          if (!ret)
3792             goto done;
3793          break;
3794 
3795       case TGSI_TOKEN_TYPE_INSTRUCTION:
3796          if (!helpers_emitted) {
3797             if (!svga_shader_emit_helpers( emit ))
3798                goto done;
3799             helpers_emitted = true;
3800          }
3801          ret = svga_emit_instruction( emit,
3802                                       line_nr++,
3803                                       &parse.FullToken.FullInstruction );
3804          if (!ret)
3805             goto done;
3806          break;
3807       default:
3808          break;
3809       }
3810 
3811       reset_temp_regs( emit );
3812    }
3813 
3814    /* Need to terminate the current subroutine.  Note that the
3815     * hardware doesn't tolerate shaders without sub-routines
3816     * terminating with RET+END.
3817     */
3818    if (!emit->in_main_func) {
3819       ret = emit_instruction( emit, inst_token( SVGA3DOP_RET ) );
3820       if (!ret)
3821          goto done;
3822    }
3823 
3824    assert(emit->dynamic_branching_level == 0);
3825 
3826    /* Need to terminate the whole shader:
3827     */
3828    ret = emit_instruction( emit, inst_token( SVGA3DOP_END ) );
3829    if (!ret)
3830       goto done;
3831 
3832 done:
3833    tgsi_parse_free( &parse );
3834    if (new_tokens) {
3835       tgsi_free_tokens(new_tokens);
3836    }
3837 
3838    return ret;
3839 }
3840