1 /*
2 * Copyright (c) 2008-2024 Broadcom. All Rights Reserved.
3 * The term “Broadcom” refers to Broadcom Inc.
4 * and/or its subsidiaries.
5 * SPDX-License-Identifier: MIT
6 */
7
8
9 #include "pipe/p_shader_tokens.h"
10 #include "tgsi/tgsi_dump.h"
11 #include "tgsi/tgsi_parse.h"
12 #include "util/u_memory.h"
13 #include "util/u_math.h"
14 #include "util/u_pstipple.h"
15
16 #include "svga_tgsi_emit.h"
17 #include "svga_context.h"
18
19
20 static bool emit_vs_postamble( struct svga_shader_emitter *emit );
21 static bool emit_ps_postamble( struct svga_shader_emitter *emit );
22
23
24 static SVGA3dShaderOpCodeType
translate_opcode(enum tgsi_opcode opcode)25 translate_opcode(enum tgsi_opcode opcode)
26 {
27 switch (opcode) {
28 case TGSI_OPCODE_ADD: return SVGA3DOP_ADD;
29 case TGSI_OPCODE_DP3: return SVGA3DOP_DP3;
30 case TGSI_OPCODE_DP4: return SVGA3DOP_DP4;
31 case TGSI_OPCODE_FRC: return SVGA3DOP_FRC;
32 case TGSI_OPCODE_MAD: return SVGA3DOP_MAD;
33 case TGSI_OPCODE_MAX: return SVGA3DOP_MAX;
34 case TGSI_OPCODE_MIN: return SVGA3DOP_MIN;
35 case TGSI_OPCODE_MOV: return SVGA3DOP_MOV;
36 case TGSI_OPCODE_MUL: return SVGA3DOP_MUL;
37 case TGSI_OPCODE_NOP: return SVGA3DOP_NOP;
38 default:
39 assert(!"svga: unexpected opcode in translate_opcode()");
40 return SVGA3DOP_LAST_INST;
41 }
42 }
43
44
45 static SVGA3dShaderRegType
translate_file(enum tgsi_file_type file)46 translate_file(enum tgsi_file_type file)
47 {
48 switch (file) {
49 case TGSI_FILE_TEMPORARY: return SVGA3DREG_TEMP;
50 case TGSI_FILE_INPUT: return SVGA3DREG_INPUT;
51 case TGSI_FILE_OUTPUT: return SVGA3DREG_OUTPUT; /* VS3.0+ only */
52 case TGSI_FILE_IMMEDIATE: return SVGA3DREG_CONST;
53 case TGSI_FILE_CONSTANT: return SVGA3DREG_CONST;
54 case TGSI_FILE_SAMPLER: return SVGA3DREG_SAMPLER;
55 case TGSI_FILE_ADDRESS: return SVGA3DREG_ADDR;
56 default:
57 assert(!"svga: unexpected register file in translate_file()");
58 return SVGA3DREG_TEMP;
59 }
60 }
61
62
63 /**
64 * Translate a TGSI destination register to an SVGA3DShaderDestToken.
65 * \param insn the TGSI instruction
66 * \param idx which TGSI dest register to translate (usually (always?) zero)
67 */
68 static SVGA3dShaderDestToken
translate_dst_register(struct svga_shader_emitter * emit,const struct tgsi_full_instruction * insn,unsigned idx)69 translate_dst_register( struct svga_shader_emitter *emit,
70 const struct tgsi_full_instruction *insn,
71 unsigned idx )
72 {
73 const struct tgsi_full_dst_register *reg = &insn->Dst[idx];
74 SVGA3dShaderDestToken dest;
75
76 switch (reg->Register.File) {
77 case TGSI_FILE_OUTPUT:
78 /* Output registers encode semantic information in their name.
79 * Need to lookup a table built at decl time:
80 */
81 dest = emit->output_map[reg->Register.Index];
82 emit->num_output_writes++;
83 break;
84
85 default:
86 {
87 unsigned index = reg->Register.Index;
88 assert(index < SVGA3D_TEMPREG_MAX);
89 index = MIN2(index, SVGA3D_TEMPREG_MAX - 1);
90 dest = dst_register(translate_file(reg->Register.File), index);
91 }
92 break;
93 }
94
95 if (reg->Register.Indirect) {
96 debug_warning("Indirect indexing of dest registers is not supported!\n");
97 }
98
99 dest.mask = reg->Register.WriteMask;
100 assert(dest.mask);
101
102 if (insn->Instruction.Saturate)
103 dest.dstMod = SVGA3DDSTMOD_SATURATE;
104
105 return dest;
106 }
107
108
109 /**
110 * Apply a swizzle to a src_register, returning a new src_register
111 * Ex: swizzle(SRC.ZZYY, SWIZZLE_Z, SWIZZLE_W, SWIZZLE_X, SWIZZLE_Y)
112 * would return SRC.YYZZ
113 */
114 static struct src_register
swizzle(struct src_register src,unsigned x,unsigned y,unsigned z,unsigned w)115 swizzle(struct src_register src,
116 unsigned x, unsigned y, unsigned z, unsigned w)
117 {
118 assert(x < 4);
119 assert(y < 4);
120 assert(z < 4);
121 assert(w < 4);
122 x = (src.base.swizzle >> (x * 2)) & 0x3;
123 y = (src.base.swizzle >> (y * 2)) & 0x3;
124 z = (src.base.swizzle >> (z * 2)) & 0x3;
125 w = (src.base.swizzle >> (w * 2)) & 0x3;
126
127 src.base.swizzle = TRANSLATE_SWIZZLE(x, y, z, w);
128
129 return src;
130 }
131
132
133 /**
134 * Apply a "scalar" swizzle to a src_register returning a new
135 * src_register where all the swizzle terms are the same.
136 * Ex: scalar(SRC.WZYX, SWIZZLE_Y) would return SRC.ZZZZ
137 */
138 static struct src_register
scalar(struct src_register src,unsigned comp)139 scalar(struct src_register src, unsigned comp)
140 {
141 assert(comp < 4);
142 return swizzle( src, comp, comp, comp, comp );
143 }
144
145
146 static bool
svga_arl_needs_adjustment(const struct svga_shader_emitter * emit)147 svga_arl_needs_adjustment( const struct svga_shader_emitter *emit )
148 {
149 unsigned i;
150
151 for (i = 0; i < emit->num_arl_consts; ++i) {
152 if (emit->arl_consts[i].arl_num == emit->current_arl)
153 return true;
154 }
155 return false;
156 }
157
158
159 static int
svga_arl_adjustment(const struct svga_shader_emitter * emit)160 svga_arl_adjustment( const struct svga_shader_emitter *emit )
161 {
162 unsigned i;
163
164 for (i = 0; i < emit->num_arl_consts; ++i) {
165 if (emit->arl_consts[i].arl_num == emit->current_arl)
166 return emit->arl_consts[i].number;
167 }
168 return 0;
169 }
170
171
172 /**
173 * Translate a TGSI src register to a src_register.
174 */
175 static struct src_register
translate_src_register(const struct svga_shader_emitter * emit,const struct tgsi_full_src_register * reg)176 translate_src_register( const struct svga_shader_emitter *emit,
177 const struct tgsi_full_src_register *reg )
178 {
179 struct src_register src;
180
181 switch (reg->Register.File) {
182 case TGSI_FILE_INPUT:
183 /* Input registers are referred to by their semantic name rather
184 * than by index. Use the mapping build up from the decls:
185 */
186 src = emit->input_map[reg->Register.Index];
187 break;
188
189 case TGSI_FILE_IMMEDIATE:
190 /* Immediates are appended after TGSI constants in the D3D
191 * constant buffer.
192 */
193 src = src_register( translate_file( reg->Register.File ),
194 reg->Register.Index + emit->imm_start );
195 break;
196
197 default:
198 src = src_register( translate_file( reg->Register.File ),
199 reg->Register.Index );
200 break;
201 }
202
203 /* Indirect addressing.
204 */
205 if (reg->Register.Indirect) {
206 if (emit->unit == PIPE_SHADER_FRAGMENT) {
207 /* Pixel shaders have only loop registers for relative
208 * addressing into inputs. Ignore the redundant address
209 * register, the contents of aL should be in sync with it.
210 */
211 if (reg->Register.File == TGSI_FILE_INPUT) {
212 src.base.relAddr = 1;
213 src.indirect = src_token(SVGA3DREG_LOOP, 0);
214 }
215 }
216 else {
217 /* Constant buffers only.
218 */
219 if (reg->Register.File == TGSI_FILE_CONSTANT) {
220 /* we shift the offset towards the minimum */
221 if (svga_arl_needs_adjustment( emit )) {
222 src.base.num -= svga_arl_adjustment( emit );
223 }
224 src.base.relAddr = 1;
225
226 /* Not really sure what should go in the second token:
227 */
228 src.indirect = src_token( SVGA3DREG_ADDR,
229 reg->Indirect.Index );
230
231 src.indirect.swizzle = SWIZZLE_XXXX;
232 }
233 }
234 }
235
236 src = swizzle( src,
237 reg->Register.SwizzleX,
238 reg->Register.SwizzleY,
239 reg->Register.SwizzleZ,
240 reg->Register.SwizzleW );
241
242 /* src.mod isn't a bitfield, unfortunately */
243 if (reg->Register.Absolute) {
244 if (reg->Register.Negate)
245 src.base.srcMod = SVGA3DSRCMOD_ABSNEG;
246 else
247 src.base.srcMod = SVGA3DSRCMOD_ABS;
248 }
249 else {
250 if (reg->Register.Negate)
251 src.base.srcMod = SVGA3DSRCMOD_NEG;
252 else
253 src.base.srcMod = SVGA3DSRCMOD_NONE;
254 }
255
256 return src;
257 }
258
259
260 /*
261 * Get a temporary register.
262 * Note: if we exceed the temporary register limit we just use
263 * register SVGA3D_TEMPREG_MAX - 1.
264 */
265 static SVGA3dShaderDestToken
get_temp(struct svga_shader_emitter * emit)266 get_temp( struct svga_shader_emitter *emit )
267 {
268 int i = emit->nr_hw_temp + emit->internal_temp_count++;
269 if (i >= SVGA3D_TEMPREG_MAX) {
270 debug_warn_once("svga: Too many temporary registers used in shader\n");
271 i = SVGA3D_TEMPREG_MAX - 1;
272 }
273 return dst_register( SVGA3DREG_TEMP, i );
274 }
275
276
277 /**
278 * Release a single temp. Currently only effective if it was the last
279 * allocated temp, otherwise release will be delayed until the next
280 * call to reset_temp_regs().
281 */
282 static void
release_temp(struct svga_shader_emitter * emit,SVGA3dShaderDestToken temp)283 release_temp( struct svga_shader_emitter *emit,
284 SVGA3dShaderDestToken temp )
285 {
286 if (temp.num == emit->internal_temp_count - 1)
287 emit->internal_temp_count--;
288 }
289
290
291 /**
292 * Release all temps.
293 */
294 static void
reset_temp_regs(struct svga_shader_emitter * emit)295 reset_temp_regs(struct svga_shader_emitter *emit)
296 {
297 emit->internal_temp_count = 0;
298 }
299
300
301 /** Emit bytecode for a src_register */
302 static bool
emit_src(struct svga_shader_emitter * emit,const struct src_register src)303 emit_src(struct svga_shader_emitter *emit, const struct src_register src)
304 {
305 if (src.base.relAddr) {
306 assert(src.base.reserved0);
307 assert(src.indirect.reserved0);
308 return (svga_shader_emit_dword( emit, src.base.value ) &&
309 svga_shader_emit_dword( emit, src.indirect.value ));
310 }
311 else {
312 assert(src.base.reserved0);
313 return svga_shader_emit_dword( emit, src.base.value );
314 }
315 }
316
317
318 /** Emit bytecode for a dst_register */
319 static bool
emit_dst(struct svga_shader_emitter * emit,SVGA3dShaderDestToken dest)320 emit_dst(struct svga_shader_emitter *emit, SVGA3dShaderDestToken dest)
321 {
322 assert(dest.reserved0);
323 assert(dest.mask);
324 return svga_shader_emit_dword( emit, dest.value );
325 }
326
327
328 /** Emit bytecode for a 1-operand instruction */
329 static bool
emit_op1(struct svga_shader_emitter * emit,SVGA3dShaderInstToken inst,SVGA3dShaderDestToken dest,struct src_register src0)330 emit_op1(struct svga_shader_emitter *emit,
331 SVGA3dShaderInstToken inst,
332 SVGA3dShaderDestToken dest,
333 struct src_register src0)
334 {
335 return (emit_instruction(emit, inst) &&
336 emit_dst(emit, dest) &&
337 emit_src(emit, src0));
338 }
339
340
341 /** Emit bytecode for a 2-operand instruction */
342 static bool
emit_op2(struct svga_shader_emitter * emit,SVGA3dShaderInstToken inst,SVGA3dShaderDestToken dest,struct src_register src0,struct src_register src1)343 emit_op2(struct svga_shader_emitter *emit,
344 SVGA3dShaderInstToken inst,
345 SVGA3dShaderDestToken dest,
346 struct src_register src0,
347 struct src_register src1)
348 {
349 return (emit_instruction(emit, inst) &&
350 emit_dst(emit, dest) &&
351 emit_src(emit, src0) &&
352 emit_src(emit, src1));
353 }
354
355
356 /** Emit bytecode for a 3-operand instruction */
357 static bool
emit_op3(struct svga_shader_emitter * emit,SVGA3dShaderInstToken inst,SVGA3dShaderDestToken dest,struct src_register src0,struct src_register src1,struct src_register src2)358 emit_op3(struct svga_shader_emitter *emit,
359 SVGA3dShaderInstToken inst,
360 SVGA3dShaderDestToken dest,
361 struct src_register src0,
362 struct src_register src1,
363 struct src_register src2)
364 {
365 return (emit_instruction(emit, inst) &&
366 emit_dst(emit, dest) &&
367 emit_src(emit, src0) &&
368 emit_src(emit, src1) &&
369 emit_src(emit, src2));
370 }
371
372
373 /** Emit bytecode for a 4-operand instruction */
374 static bool
emit_op4(struct svga_shader_emitter * emit,SVGA3dShaderInstToken inst,SVGA3dShaderDestToken dest,struct src_register src0,struct src_register src1,struct src_register src2,struct src_register src3)375 emit_op4(struct svga_shader_emitter *emit,
376 SVGA3dShaderInstToken inst,
377 SVGA3dShaderDestToken dest,
378 struct src_register src0,
379 struct src_register src1,
380 struct src_register src2,
381 struct src_register src3)
382 {
383 return (emit_instruction(emit, inst) &&
384 emit_dst(emit, dest) &&
385 emit_src(emit, src0) &&
386 emit_src(emit, src1) &&
387 emit_src(emit, src2) &&
388 emit_src(emit, src3));
389 }
390
391
392 /**
393 * Apply the absolute value modifier to the given src_register, returning
394 * a new src_register.
395 */
396 static struct src_register
absolute(struct src_register src)397 absolute(struct src_register src)
398 {
399 src.base.srcMod = SVGA3DSRCMOD_ABS;
400 return src;
401 }
402
403
404 /**
405 * Apply the negation modifier to the given src_register, returning
406 * a new src_register.
407 */
408 static struct src_register
negate(struct src_register src)409 negate(struct src_register src)
410 {
411 switch (src.base.srcMod) {
412 case SVGA3DSRCMOD_ABS:
413 src.base.srcMod = SVGA3DSRCMOD_ABSNEG;
414 break;
415 case SVGA3DSRCMOD_ABSNEG:
416 src.base.srcMod = SVGA3DSRCMOD_ABS;
417 break;
418 case SVGA3DSRCMOD_NEG:
419 src.base.srcMod = SVGA3DSRCMOD_NONE;
420 break;
421 case SVGA3DSRCMOD_NONE:
422 src.base.srcMod = SVGA3DSRCMOD_NEG;
423 break;
424 }
425 return src;
426 }
427
428
429
430 /* Replace the src with the temporary specified in the dst, but copying
431 * only the necessary channels, and preserving the original swizzle (which is
432 * important given that several opcodes have constraints in the allowed
433 * swizzles).
434 */
435 static bool
emit_repl(struct svga_shader_emitter * emit,SVGA3dShaderDestToken dst,struct src_register * src0)436 emit_repl(struct svga_shader_emitter *emit,
437 SVGA3dShaderDestToken dst,
438 struct src_register *src0)
439 {
440 unsigned src0_swizzle;
441 unsigned chan;
442
443 assert(SVGA3dShaderGetRegType(dst.value) == SVGA3DREG_TEMP);
444
445 src0_swizzle = src0->base.swizzle;
446
447 dst.mask = 0;
448 for (chan = 0; chan < 4; ++chan) {
449 unsigned swizzle = (src0_swizzle >> (chan *2)) & 0x3;
450 dst.mask |= 1 << swizzle;
451 }
452 assert(dst.mask);
453
454 src0->base.swizzle = SVGA3DSWIZZLE_NONE;
455
456 if (!emit_op1( emit, inst_token( SVGA3DOP_MOV ), dst, *src0 ))
457 return false;
458
459 *src0 = src( dst );
460 src0->base.swizzle = src0_swizzle;
461
462 return true;
463 }
464
465
466 /**
467 * Submit/emit an instruction with zero operands.
468 */
469 static bool
submit_op0(struct svga_shader_emitter * emit,SVGA3dShaderInstToken inst,SVGA3dShaderDestToken dest)470 submit_op0(struct svga_shader_emitter *emit,
471 SVGA3dShaderInstToken inst,
472 SVGA3dShaderDestToken dest)
473 {
474 return (emit_instruction( emit, inst ) &&
475 emit_dst( emit, dest ));
476 }
477
478
479 /**
480 * Submit/emit an instruction with one operand.
481 */
482 static bool
submit_op1(struct svga_shader_emitter * emit,SVGA3dShaderInstToken inst,SVGA3dShaderDestToken dest,struct src_register src0)483 submit_op1(struct svga_shader_emitter *emit,
484 SVGA3dShaderInstToken inst,
485 SVGA3dShaderDestToken dest,
486 struct src_register src0)
487 {
488 return emit_op1( emit, inst, dest, src0 );
489 }
490
491
492 /**
493 * Submit/emit an instruction with two operands.
494 *
495 * SVGA shaders may not refer to >1 constant register in a single
496 * instruction. This function checks for that usage and inserts a
497 * move to temporary if detected.
498 *
499 * The same applies to input registers -- at most a single input
500 * register may be read by any instruction.
501 */
502 static bool
submit_op2(struct svga_shader_emitter * emit,SVGA3dShaderInstToken inst,SVGA3dShaderDestToken dest,struct src_register src0,struct src_register src1)503 submit_op2(struct svga_shader_emitter *emit,
504 SVGA3dShaderInstToken inst,
505 SVGA3dShaderDestToken dest,
506 struct src_register src0,
507 struct src_register src1)
508 {
509 SVGA3dShaderDestToken temp;
510 SVGA3dShaderRegType type0, type1;
511 bool need_temp = false;
512
513 temp.value = 0;
514 type0 = SVGA3dShaderGetRegType( src0.base.value );
515 type1 = SVGA3dShaderGetRegType( src1.base.value );
516
517 if (type0 == SVGA3DREG_CONST &&
518 type1 == SVGA3DREG_CONST &&
519 src0.base.num != src1.base.num)
520 need_temp = true;
521
522 if (type0 == SVGA3DREG_INPUT &&
523 type1 == SVGA3DREG_INPUT &&
524 src0.base.num != src1.base.num)
525 need_temp = true;
526
527 if (need_temp) {
528 temp = get_temp( emit );
529
530 if (!emit_repl( emit, temp, &src0 ))
531 return false;
532 }
533
534 if (!emit_op2( emit, inst, dest, src0, src1 ))
535 return false;
536
537 if (need_temp)
538 release_temp( emit, temp );
539
540 return true;
541 }
542
543
544 /**
545 * Submit/emit an instruction with three operands.
546 *
547 * SVGA shaders may not refer to >1 constant register in a single
548 * instruction. This function checks for that usage and inserts a
549 * move to temporary if detected.
550 */
551 static bool
submit_op3(struct svga_shader_emitter * emit,SVGA3dShaderInstToken inst,SVGA3dShaderDestToken dest,struct src_register src0,struct src_register src1,struct src_register src2)552 submit_op3(struct svga_shader_emitter *emit,
553 SVGA3dShaderInstToken inst,
554 SVGA3dShaderDestToken dest,
555 struct src_register src0,
556 struct src_register src1,
557 struct src_register src2)
558 {
559 SVGA3dShaderDestToken temp0;
560 SVGA3dShaderDestToken temp1;
561 bool need_temp0 = false;
562 bool need_temp1 = false;
563 SVGA3dShaderRegType type0, type1, type2;
564
565 temp0.value = 0;
566 temp1.value = 0;
567 type0 = SVGA3dShaderGetRegType( src0.base.value );
568 type1 = SVGA3dShaderGetRegType( src1.base.value );
569 type2 = SVGA3dShaderGetRegType( src2.base.value );
570
571 if (inst.op != SVGA3DOP_SINCOS) {
572 if (type0 == SVGA3DREG_CONST &&
573 ((type1 == SVGA3DREG_CONST && src0.base.num != src1.base.num) ||
574 (type2 == SVGA3DREG_CONST && src0.base.num != src2.base.num)))
575 need_temp0 = true;
576
577 if (type1 == SVGA3DREG_CONST &&
578 (type2 == SVGA3DREG_CONST && src1.base.num != src2.base.num))
579 need_temp1 = true;
580 }
581
582 if (type0 == SVGA3DREG_INPUT &&
583 ((type1 == SVGA3DREG_INPUT && src0.base.num != src1.base.num) ||
584 (type2 == SVGA3DREG_INPUT && src0.base.num != src2.base.num)))
585 need_temp0 = true;
586
587 if (type1 == SVGA3DREG_INPUT &&
588 (type2 == SVGA3DREG_INPUT && src1.base.num != src2.base.num))
589 need_temp1 = true;
590
591 if (need_temp0) {
592 temp0 = get_temp( emit );
593
594 if (!emit_repl( emit, temp0, &src0 ))
595 return false;
596 }
597
598 if (need_temp1) {
599 temp1 = get_temp( emit );
600
601 if (!emit_repl( emit, temp1, &src1 ))
602 return false;
603 }
604
605 if (!emit_op3( emit, inst, dest, src0, src1, src2 ))
606 return false;
607
608 if (need_temp1)
609 release_temp( emit, temp1 );
610 if (need_temp0)
611 release_temp( emit, temp0 );
612 return true;
613 }
614
615
616 /**
617 * Submit/emit an instruction with four operands.
618 *
619 * SVGA shaders may not refer to >1 constant register in a single
620 * instruction. This function checks for that usage and inserts a
621 * move to temporary if detected.
622 */
623 static bool
submit_op4(struct svga_shader_emitter * emit,SVGA3dShaderInstToken inst,SVGA3dShaderDestToken dest,struct src_register src0,struct src_register src1,struct src_register src2,struct src_register src3)624 submit_op4(struct svga_shader_emitter *emit,
625 SVGA3dShaderInstToken inst,
626 SVGA3dShaderDestToken dest,
627 struct src_register src0,
628 struct src_register src1,
629 struct src_register src2,
630 struct src_register src3)
631 {
632 SVGA3dShaderDestToken temp0;
633 SVGA3dShaderDestToken temp3;
634 bool need_temp0 = false;
635 bool need_temp3 = false;
636 SVGA3dShaderRegType type0, type1, type2, type3;
637
638 temp0.value = 0;
639 temp3.value = 0;
640 type0 = SVGA3dShaderGetRegType( src0.base.value );
641 type1 = SVGA3dShaderGetRegType( src1.base.value );
642 type2 = SVGA3dShaderGetRegType( src2.base.value );
643 type3 = SVGA3dShaderGetRegType( src2.base.value );
644
645 /* Make life a little easier - this is only used by the TXD
646 * instruction which is guaranteed not to have a constant/input reg
647 * in one slot at least:
648 */
649 assert(type1 == SVGA3DREG_SAMPLER);
650 (void) type1;
651
652 if (type0 == SVGA3DREG_CONST &&
653 ((type3 == SVGA3DREG_CONST && src0.base.num != src3.base.num) ||
654 (type2 == SVGA3DREG_CONST && src0.base.num != src2.base.num)))
655 need_temp0 = true;
656
657 if (type3 == SVGA3DREG_CONST &&
658 (type2 == SVGA3DREG_CONST && src3.base.num != src2.base.num))
659 need_temp3 = true;
660
661 if (type0 == SVGA3DREG_INPUT &&
662 ((type3 == SVGA3DREG_INPUT && src0.base.num != src3.base.num) ||
663 (type2 == SVGA3DREG_INPUT && src0.base.num != src2.base.num)))
664 need_temp0 = true;
665
666 if (type3 == SVGA3DREG_INPUT &&
667 (type2 == SVGA3DREG_INPUT && src3.base.num != src2.base.num))
668 need_temp3 = true;
669
670 if (need_temp0) {
671 temp0 = get_temp( emit );
672
673 if (!emit_repl( emit, temp0, &src0 ))
674 return false;
675 }
676
677 if (need_temp3) {
678 temp3 = get_temp( emit );
679
680 if (!emit_repl( emit, temp3, &src3 ))
681 return false;
682 }
683
684 if (!emit_op4( emit, inst, dest, src0, src1, src2, src3 ))
685 return false;
686
687 if (need_temp3)
688 release_temp( emit, temp3 );
689 if (need_temp0)
690 release_temp( emit, temp0 );
691 return true;
692 }
693
694
695 /**
696 * Do the src and dest registers refer to the same register?
697 */
698 static bool
alias_src_dst(struct src_register src,SVGA3dShaderDestToken dst)699 alias_src_dst(struct src_register src,
700 SVGA3dShaderDestToken dst)
701 {
702 if (src.base.num != dst.num)
703 return false;
704
705 if (SVGA3dShaderGetRegType(dst.value) !=
706 SVGA3dShaderGetRegType(src.base.value))
707 return false;
708
709 return true;
710 }
711
712
713 /**
714 * Helper for emitting SVGA immediate values using the SVGA3DOP_DEF[I]
715 * instructions.
716 */
717 static bool
emit_def_const(struct svga_shader_emitter * emit,SVGA3dShaderConstType type,unsigned idx,float a,float b,float c,float d)718 emit_def_const(struct svga_shader_emitter *emit,
719 SVGA3dShaderConstType type,
720 unsigned idx, float a, float b, float c, float d)
721 {
722 SVGA3DOpDefArgs def;
723 SVGA3dShaderInstToken opcode;
724
725 switch (type) {
726 case SVGA3D_CONST_TYPE_FLOAT:
727 opcode = inst_token( SVGA3DOP_DEF );
728 def.dst = dst_register( SVGA3DREG_CONST, idx );
729 def.constValues[0] = a;
730 def.constValues[1] = b;
731 def.constValues[2] = c;
732 def.constValues[3] = d;
733 break;
734 case SVGA3D_CONST_TYPE_INT:
735 opcode = inst_token( SVGA3DOP_DEFI );
736 def.dst = dst_register( SVGA3DREG_CONSTINT, idx );
737 def.constIValues[0] = (int)a;
738 def.constIValues[1] = (int)b;
739 def.constIValues[2] = (int)c;
740 def.constIValues[3] = (int)d;
741 break;
742 default:
743 assert(0);
744 opcode = inst_token( SVGA3DOP_NOP );
745 break;
746 }
747
748 if (!emit_instruction(emit, opcode) ||
749 !svga_shader_emit_dwords( emit, def.values, ARRAY_SIZE(def.values)))
750 return false;
751
752 return true;
753 }
754
755
756 static bool
create_loop_const(struct svga_shader_emitter * emit)757 create_loop_const( struct svga_shader_emitter *emit )
758 {
759 unsigned idx = emit->nr_hw_int_const++;
760
761 if (!emit_def_const( emit, SVGA3D_CONST_TYPE_INT, idx,
762 255, /* iteration count */
763 0, /* initial value */
764 1, /* step size */
765 0 /* not used, must be 0 */))
766 return false;
767
768 emit->loop_const_idx = idx;
769 emit->created_loop_const = true;
770
771 return true;
772 }
773
774 static bool
create_arl_consts(struct svga_shader_emitter * emit)775 create_arl_consts( struct svga_shader_emitter *emit )
776 {
777 int i;
778
779 for (i = 0; i < emit->num_arl_consts; i += 4) {
780 int j;
781 unsigned idx = emit->nr_hw_float_const++;
782 float vals[4];
783 for (j = 0; j < 4 && (j + i) < emit->num_arl_consts; ++j) {
784 vals[j] = (float) emit->arl_consts[i + j].number;
785 emit->arl_consts[i + j].idx = idx;
786 switch (j) {
787 case 0:
788 emit->arl_consts[i + 0].swizzle = TGSI_SWIZZLE_X;
789 break;
790 case 1:
791 emit->arl_consts[i + 0].swizzle = TGSI_SWIZZLE_Y;
792 break;
793 case 2:
794 emit->arl_consts[i + 0].swizzle = TGSI_SWIZZLE_Z;
795 break;
796 case 3:
797 emit->arl_consts[i + 0].swizzle = TGSI_SWIZZLE_W;
798 break;
799 }
800 }
801 while (j < 4)
802 vals[j++] = 0;
803
804 if (!emit_def_const( emit, SVGA3D_CONST_TYPE_FLOAT, idx,
805 vals[0], vals[1],
806 vals[2], vals[3]))
807 return false;
808 }
809
810 return true;
811 }
812
813
814 /**
815 * Return the register which holds the pixel shaders front/back-
816 * facing value.
817 */
818 static struct src_register
get_vface(struct svga_shader_emitter * emit)819 get_vface( struct svga_shader_emitter *emit )
820 {
821 assert(emit->emitted_vface);
822 return src_register(SVGA3DREG_MISCTYPE, SVGA3DMISCREG_FACE);
823 }
824
825
826 /**
827 * Create/emit a "common" constant with values {0, 0.5, -1, 1}.
828 * We can swizzle this to produce other useful constants such as
829 * {0, 0, 0, 0}, {1, 1, 1, 1}, etc.
830 */
831 static bool
create_common_immediate(struct svga_shader_emitter * emit)832 create_common_immediate( struct svga_shader_emitter *emit )
833 {
834 unsigned idx = emit->nr_hw_float_const++;
835
836 /* Emit the constant (0, 0.5, -1, 1) and use swizzling to generate
837 * other useful vectors.
838 */
839 if (!emit_def_const( emit, SVGA3D_CONST_TYPE_FLOAT,
840 idx, 0.0f, 0.5f, -1.0f, 1.0f ))
841 return false;
842 emit->common_immediate_idx[0] = idx;
843 idx++;
844
845 /* Emit constant {2, 0, 0, 0} (only the 2 is used for now) */
846 if (emit->key.vs.adjust_attrib_range) {
847 if (!emit_def_const( emit, SVGA3D_CONST_TYPE_FLOAT,
848 idx, 2.0f, 0.0f, 0.0f, 0.0f ))
849 return false;
850 emit->common_immediate_idx[1] = idx;
851 }
852 else {
853 emit->common_immediate_idx[1] = -1;
854 }
855
856 emit->created_common_immediate = true;
857
858 return true;
859 }
860
861
862 /**
863 * Return swizzle/position for the given value in the "common" immediate.
864 */
865 static inline unsigned
common_immediate_swizzle(float value)866 common_immediate_swizzle(float value)
867 {
868 if (value == 0.0f)
869 return TGSI_SWIZZLE_X;
870 else if (value == 0.5f)
871 return TGSI_SWIZZLE_Y;
872 else if (value == -1.0f)
873 return TGSI_SWIZZLE_Z;
874 else if (value == 1.0f)
875 return TGSI_SWIZZLE_W;
876 else {
877 assert(!"illegal value in common_immediate_swizzle");
878 return TGSI_SWIZZLE_X;
879 }
880 }
881
882
883 /**
884 * Returns an immediate reg where all the terms are either 0, 1, 2 or 0.5
885 */
886 static struct src_register
get_immediate(struct svga_shader_emitter * emit,float x,float y,float z,float w)887 get_immediate(struct svga_shader_emitter *emit,
888 float x, float y, float z, float w)
889 {
890 unsigned sx = common_immediate_swizzle(x);
891 unsigned sy = common_immediate_swizzle(y);
892 unsigned sz = common_immediate_swizzle(z);
893 unsigned sw = common_immediate_swizzle(w);
894 assert(emit->created_common_immediate);
895 assert(emit->common_immediate_idx[0] >= 0);
896 return swizzle(src_register(SVGA3DREG_CONST, emit->common_immediate_idx[0]),
897 sx, sy, sz, sw);
898 }
899
900
901 /**
902 * returns {0, 0, 0, 0} immediate
903 */
904 static struct src_register
get_zero_immediate(struct svga_shader_emitter * emit)905 get_zero_immediate( struct svga_shader_emitter *emit )
906 {
907 assert(emit->created_common_immediate);
908 assert(emit->common_immediate_idx[0] >= 0);
909 return swizzle(src_register( SVGA3DREG_CONST,
910 emit->common_immediate_idx[0]),
911 0, 0, 0, 0);
912 }
913
914
915 /**
916 * returns {1, 1, 1, 1} immediate
917 */
918 static struct src_register
get_one_immediate(struct svga_shader_emitter * emit)919 get_one_immediate( struct svga_shader_emitter *emit )
920 {
921 assert(emit->created_common_immediate);
922 assert(emit->common_immediate_idx[0] >= 0);
923 return swizzle(src_register( SVGA3DREG_CONST,
924 emit->common_immediate_idx[0]),
925 3, 3, 3, 3);
926 }
927
928
929 /**
930 * returns {0.5, 0.5, 0.5, 0.5} immediate
931 */
932 static struct src_register
get_half_immediate(struct svga_shader_emitter * emit)933 get_half_immediate( struct svga_shader_emitter *emit )
934 {
935 assert(emit->created_common_immediate);
936 assert(emit->common_immediate_idx[0] >= 0);
937 return swizzle(src_register(SVGA3DREG_CONST, emit->common_immediate_idx[0]),
938 1, 1, 1, 1);
939 }
940
941
942 /**
943 * returns {2, 2, 2, 2} immediate
944 */
945 static struct src_register
get_two_immediate(struct svga_shader_emitter * emit)946 get_two_immediate( struct svga_shader_emitter *emit )
947 {
948 /* Note we use the second common immediate here */
949 assert(emit->created_common_immediate);
950 assert(emit->common_immediate_idx[1] >= 0);
951 return swizzle(src_register( SVGA3DREG_CONST,
952 emit->common_immediate_idx[1]),
953 0, 0, 0, 0);
954 }
955
956
957 /**
958 * returns the loop const
959 */
960 static struct src_register
get_loop_const(struct svga_shader_emitter * emit)961 get_loop_const( struct svga_shader_emitter *emit )
962 {
963 assert(emit->created_loop_const);
964 assert(emit->loop_const_idx >= 0);
965 return src_register( SVGA3DREG_CONSTINT,
966 emit->loop_const_idx );
967 }
968
969
970 static struct src_register
get_fake_arl_const(struct svga_shader_emitter * emit)971 get_fake_arl_const( struct svga_shader_emitter *emit )
972 {
973 struct src_register reg;
974 int idx = 0, swizzle = 0, i;
975
976 for (i = 0; i < emit->num_arl_consts; ++ i) {
977 if (emit->arl_consts[i].arl_num == emit->current_arl) {
978 idx = emit->arl_consts[i].idx;
979 swizzle = emit->arl_consts[i].swizzle;
980 }
981 }
982
983 reg = src_register( SVGA3DREG_CONST, idx );
984 return scalar(reg, swizzle);
985 }
986
987
988 /**
989 * Return a register which holds the width and height of the texture
990 * currently bound to the given sampler.
991 */
992 static struct src_register
get_tex_dimensions(struct svga_shader_emitter * emit,int sampler_num)993 get_tex_dimensions( struct svga_shader_emitter *emit, int sampler_num )
994 {
995 int idx;
996 struct src_register reg;
997
998 /* the width/height indexes start right after constants */
999 idx = emit->key.tex[sampler_num].width_height_idx +
1000 emit->info.file_max[TGSI_FILE_CONSTANT] + 1;
1001
1002 reg = src_register( SVGA3DREG_CONST, idx );
1003 return reg;
1004 }
1005
1006
1007 static bool
emit_fake_arl(struct svga_shader_emitter * emit,const struct tgsi_full_instruction * insn)1008 emit_fake_arl(struct svga_shader_emitter *emit,
1009 const struct tgsi_full_instruction *insn)
1010 {
1011 const struct src_register src0 =
1012 translate_src_register(emit, &insn->Src[0] );
1013 struct src_register src1 = get_fake_arl_const( emit );
1014 SVGA3dShaderDestToken dst = translate_dst_register( emit, insn, 0 );
1015 SVGA3dShaderDestToken tmp = get_temp( emit );
1016
1017 if (!submit_op1(emit, inst_token( SVGA3DOP_MOV ), tmp, src0))
1018 return false;
1019
1020 if (!submit_op2( emit, inst_token( SVGA3DOP_ADD ), tmp, src( tmp ),
1021 src1))
1022 return false;
1023
1024 /* replicate the original swizzle */
1025 src1 = src(tmp);
1026 src1.base.swizzle = src0.base.swizzle;
1027
1028 return submit_op1( emit, inst_token( SVGA3DOP_MOVA ),
1029 dst, src1 );
1030 }
1031
1032
1033 static bool
emit_if(struct svga_shader_emitter * emit,const struct tgsi_full_instruction * insn)1034 emit_if(struct svga_shader_emitter *emit,
1035 const struct tgsi_full_instruction *insn)
1036 {
1037 struct src_register src0 =
1038 translate_src_register(emit, &insn->Src[0]);
1039 struct src_register zero = get_zero_immediate(emit);
1040 SVGA3dShaderInstToken if_token = inst_token( SVGA3DOP_IFC );
1041
1042 if_token.control = SVGA3DOPCOMPC_NE;
1043
1044 if (SVGA3dShaderGetRegType(src0.base.value) == SVGA3DREG_CONST) {
1045 /*
1046 * Max different constant registers readable per IFC instruction is 1.
1047 */
1048 SVGA3dShaderDestToken tmp = get_temp( emit );
1049
1050 if (!submit_op1(emit, inst_token( SVGA3DOP_MOV ), tmp, src0))
1051 return false;
1052
1053 src0 = scalar(src( tmp ), TGSI_SWIZZLE_X);
1054 }
1055
1056 emit->dynamic_branching_level++;
1057
1058 return (emit_instruction( emit, if_token ) &&
1059 emit_src( emit, src0 ) &&
1060 emit_src( emit, zero ) );
1061 }
1062
1063
1064 static bool
emit_else(struct svga_shader_emitter * emit,const struct tgsi_full_instruction * insn)1065 emit_else(struct svga_shader_emitter *emit,
1066 const struct tgsi_full_instruction *insn)
1067 {
1068 return emit_instruction(emit, inst_token(SVGA3DOP_ELSE));
1069 }
1070
1071
1072 static bool
emit_endif(struct svga_shader_emitter * emit,const struct tgsi_full_instruction * insn)1073 emit_endif(struct svga_shader_emitter *emit,
1074 const struct tgsi_full_instruction *insn)
1075 {
1076 emit->dynamic_branching_level--;
1077
1078 return emit_instruction(emit, inst_token(SVGA3DOP_ENDIF));
1079 }
1080
1081
1082 /**
1083 * Translate the following TGSI FLR instruction.
1084 * FLR DST, SRC
1085 * To the following SVGA3D instruction sequence.
1086 * FRC TMP, SRC
1087 * SUB DST, SRC, TMP
1088 */
1089 static bool
emit_floor(struct svga_shader_emitter * emit,const struct tgsi_full_instruction * insn)1090 emit_floor(struct svga_shader_emitter *emit,
1091 const struct tgsi_full_instruction *insn )
1092 {
1093 SVGA3dShaderDestToken dst = translate_dst_register( emit, insn, 0 );
1094 const struct src_register src0 =
1095 translate_src_register(emit, &insn->Src[0] );
1096 SVGA3dShaderDestToken temp = get_temp( emit );
1097
1098 /* FRC TMP, SRC */
1099 if (!submit_op1( emit, inst_token( SVGA3DOP_FRC ), temp, src0 ))
1100 return false;
1101
1102 /* SUB DST, SRC, TMP */
1103 if (!submit_op2( emit, inst_token( SVGA3DOP_ADD ), dst, src0,
1104 negate( src( temp ) ) ))
1105 return false;
1106
1107 return true;
1108 }
1109
1110
1111 /**
1112 * Translate the following TGSI CEIL instruction.
1113 * CEIL DST, SRC
1114 * To the following SVGA3D instruction sequence.
1115 * FRC TMP, -SRC
1116 * ADD DST, SRC, TMP
1117 */
1118 static bool
emit_ceil(struct svga_shader_emitter * emit,const struct tgsi_full_instruction * insn)1119 emit_ceil(struct svga_shader_emitter *emit,
1120 const struct tgsi_full_instruction *insn)
1121 {
1122 SVGA3dShaderDestToken dst = translate_dst_register(emit, insn, 0);
1123 const struct src_register src0 =
1124 translate_src_register(emit, &insn->Src[0]);
1125 SVGA3dShaderDestToken temp = get_temp(emit);
1126
1127 /* FRC TMP, -SRC */
1128 if (!submit_op1(emit, inst_token(SVGA3DOP_FRC), temp, negate(src0)))
1129 return false;
1130
1131 /* ADD DST, SRC, TMP */
1132 if (!submit_op2(emit, inst_token(SVGA3DOP_ADD), dst, src0, src(temp)))
1133 return false;
1134
1135 return true;
1136 }
1137
1138
1139 /**
1140 * Translate the following TGSI DIV instruction.
1141 * DIV DST.xy, SRC0, SRC1
1142 * To the following SVGA3D instruction sequence.
1143 * RCP TMP.x, SRC1.xxxx
1144 * RCP TMP.y, SRC1.yyyy
1145 * MUL DST.xy, SRC0, TMP
1146 */
1147 static bool
emit_div(struct svga_shader_emitter * emit,const struct tgsi_full_instruction * insn)1148 emit_div(struct svga_shader_emitter *emit,
1149 const struct tgsi_full_instruction *insn )
1150 {
1151 SVGA3dShaderDestToken dst = translate_dst_register( emit, insn, 0 );
1152 const struct src_register src0 =
1153 translate_src_register(emit, &insn->Src[0] );
1154 const struct src_register src1 =
1155 translate_src_register(emit, &insn->Src[1] );
1156 SVGA3dShaderDestToken temp = get_temp( emit );
1157 unsigned i;
1158
1159 /* For each enabled element, perform a RCP instruction. Note that
1160 * RCP is scalar in SVGA3D:
1161 */
1162 for (i = 0; i < 4; i++) {
1163 unsigned channel = 1 << i;
1164 if (dst.mask & channel) {
1165 /* RCP TMP.?, SRC1.???? */
1166 if (!submit_op1( emit, inst_token( SVGA3DOP_RCP ),
1167 writemask(temp, channel),
1168 scalar(src1, i) ))
1169 return false;
1170 }
1171 }
1172
1173 /* Vector mul:
1174 * MUL DST, SRC0, TMP
1175 */
1176 if (!submit_op2( emit, inst_token( SVGA3DOP_MUL ), dst, src0,
1177 src( temp ) ))
1178 return false;
1179
1180 return true;
1181 }
1182
1183
1184 /**
1185 * Translate the following TGSI DP2 instruction.
1186 * DP2 DST, SRC1, SRC2
1187 * To the following SVGA3D instruction sequence.
1188 * MUL TMP, SRC1, SRC2
1189 * ADD DST, TMP.xxxx, TMP.yyyy
1190 */
1191 static bool
emit_dp2(struct svga_shader_emitter * emit,const struct tgsi_full_instruction * insn)1192 emit_dp2(struct svga_shader_emitter *emit,
1193 const struct tgsi_full_instruction *insn )
1194 {
1195 SVGA3dShaderDestToken dst = translate_dst_register( emit, insn, 0 );
1196 const struct src_register src0 =
1197 translate_src_register(emit, &insn->Src[0]);
1198 const struct src_register src1 =
1199 translate_src_register(emit, &insn->Src[1]);
1200 SVGA3dShaderDestToken temp = get_temp( emit );
1201 struct src_register temp_src0, temp_src1;
1202
1203 /* MUL TMP, SRC1, SRC2 */
1204 if (!submit_op2( emit, inst_token( SVGA3DOP_MUL ), temp, src0, src1 ))
1205 return false;
1206
1207 temp_src0 = scalar(src( temp ), TGSI_SWIZZLE_X);
1208 temp_src1 = scalar(src( temp ), TGSI_SWIZZLE_Y);
1209
1210 /* ADD DST, TMP.xxxx, TMP.yyyy */
1211 if (!submit_op2( emit, inst_token( SVGA3DOP_ADD ), dst,
1212 temp_src0, temp_src1 ))
1213 return false;
1214
1215 return true;
1216 }
1217
1218
1219 /**
1220 * Sine / Cosine helper function.
1221 */
1222 static bool
do_emit_sincos(struct svga_shader_emitter * emit,SVGA3dShaderDestToken dst,struct src_register src0)1223 do_emit_sincos(struct svga_shader_emitter *emit,
1224 SVGA3dShaderDestToken dst,
1225 struct src_register src0)
1226 {
1227 src0 = scalar(src0, TGSI_SWIZZLE_X);
1228 return submit_op1(emit, inst_token(SVGA3DOP_SINCOS), dst, src0);
1229 }
1230
1231
1232 /**
1233 * Translate TGSI SIN instruction into:
1234 * SCS TMP SRC
1235 * MOV DST TMP.yyyy
1236 */
1237 static bool
emit_sin(struct svga_shader_emitter * emit,const struct tgsi_full_instruction * insn)1238 emit_sin(struct svga_shader_emitter *emit,
1239 const struct tgsi_full_instruction *insn )
1240 {
1241 SVGA3dShaderDestToken dst = translate_dst_register( emit, insn, 0 );
1242 struct src_register src0 =
1243 translate_src_register(emit, &insn->Src[0] );
1244 SVGA3dShaderDestToken temp = get_temp( emit );
1245
1246 /* SCS TMP SRC */
1247 if (!do_emit_sincos(emit, writemask(temp, TGSI_WRITEMASK_Y), src0))
1248 return false;
1249
1250 src0 = scalar(src( temp ), TGSI_SWIZZLE_Y);
1251
1252 /* MOV DST TMP.yyyy */
1253 if (!submit_op1( emit, inst_token( SVGA3DOP_MOV ), dst, src0 ))
1254 return false;
1255
1256 return true;
1257 }
1258
1259
1260 /*
1261 * Translate TGSI COS instruction into:
1262 * SCS TMP SRC
1263 * MOV DST TMP.xxxx
1264 */
1265 static bool
emit_cos(struct svga_shader_emitter * emit,const struct tgsi_full_instruction * insn)1266 emit_cos(struct svga_shader_emitter *emit,
1267 const struct tgsi_full_instruction *insn)
1268 {
1269 SVGA3dShaderDestToken dst = translate_dst_register( emit, insn, 0 );
1270 struct src_register src0 =
1271 translate_src_register(emit, &insn->Src[0] );
1272 SVGA3dShaderDestToken temp = get_temp( emit );
1273
1274 /* SCS TMP SRC */
1275 if (!do_emit_sincos( emit, writemask(temp, TGSI_WRITEMASK_X), src0 ))
1276 return false;
1277
1278 src0 = scalar(src( temp ), TGSI_SWIZZLE_X);
1279
1280 /* MOV DST TMP.xxxx */
1281 if (!submit_op1( emit, inst_token( SVGA3DOP_MOV ), dst, src0 ))
1282 return false;
1283
1284 return true;
1285 }
1286
1287
1288 /**
1289 * Translate/emit TGSI SSG (Set Sign: -1, 0, +1) instruction.
1290 */
1291 static bool
emit_ssg(struct svga_shader_emitter * emit,const struct tgsi_full_instruction * insn)1292 emit_ssg(struct svga_shader_emitter *emit,
1293 const struct tgsi_full_instruction *insn)
1294 {
1295 SVGA3dShaderDestToken dst = translate_dst_register( emit, insn, 0 );
1296 struct src_register src0 =
1297 translate_src_register(emit, &insn->Src[0] );
1298 SVGA3dShaderDestToken temp0 = get_temp( emit );
1299 SVGA3dShaderDestToken temp1 = get_temp( emit );
1300 struct src_register zero, one;
1301
1302 if (emit->unit == PIPE_SHADER_VERTEX) {
1303 /* SGN DST, SRC0, TMP0, TMP1 */
1304 return submit_op3( emit, inst_token( SVGA3DOP_SGN ), dst, src0,
1305 src( temp0 ), src( temp1 ) );
1306 }
1307
1308 one = get_one_immediate(emit);
1309 zero = get_zero_immediate(emit);
1310
1311 /* CMP TMP0, SRC0, one, zero */
1312 if (!submit_op3( emit, inst_token( SVGA3DOP_CMP ),
1313 writemask( temp0, dst.mask ), src0, one, zero ))
1314 return false;
1315
1316 /* CMP TMP1, negate(SRC0), negate(one), zero */
1317 if (!submit_op3( emit, inst_token( SVGA3DOP_CMP ),
1318 writemask( temp1, dst.mask ), negate( src0 ), negate( one ),
1319 zero ))
1320 return false;
1321
1322 /* ADD DST, TMP0, TMP1 */
1323 return submit_op2( emit, inst_token( SVGA3DOP_ADD ), dst, src( temp0 ),
1324 src( temp1 ) );
1325 }
1326
1327
1328 /**
1329 * Translate/emit the conditional discard instruction (discard if
1330 * any of X,Y,Z,W are negative).
1331 */
1332 static bool
emit_cond_discard(struct svga_shader_emitter * emit,const struct tgsi_full_instruction * insn)1333 emit_cond_discard(struct svga_shader_emitter *emit,
1334 const struct tgsi_full_instruction *insn)
1335 {
1336 const struct tgsi_full_src_register *reg = &insn->Src[0];
1337 struct src_register src0, srcIn;
1338 const bool special = (reg->Register.Absolute ||
1339 reg->Register.Negate ||
1340 reg->Register.Indirect ||
1341 reg->Register.SwizzleX != 0 ||
1342 reg->Register.SwizzleY != 1 ||
1343 reg->Register.SwizzleZ != 2 ||
1344 reg->Register.File != TGSI_FILE_TEMPORARY);
1345 SVGA3dShaderDestToken temp;
1346
1347 src0 = srcIn = translate_src_register( emit, reg );
1348
1349 if (special) {
1350 /* need a temp reg */
1351 temp = get_temp( emit );
1352 }
1353
1354 if (special) {
1355 /* move the source into a temp register */
1356 submit_op1(emit, inst_token(SVGA3DOP_MOV), temp, src0);
1357
1358 src0 = src( temp );
1359 }
1360
1361 /* Do the discard by checking if any of the XYZW components are < 0.
1362 * Note that ps_2_0 and later take XYZW in consideration, while ps_1_x
1363 * only used XYZ. The MSDN documentation about this is incorrect.
1364 */
1365 if (!submit_op0( emit, inst_token( SVGA3DOP_TEXKILL ), dst(src0) ))
1366 return false;
1367
1368 return true;
1369 }
1370
1371
1372 /**
1373 * Translate/emit the unconditional discard instruction (usually found inside
1374 * an IF/ELSE/ENDIF block).
1375 */
1376 static bool
emit_discard(struct svga_shader_emitter * emit,const struct tgsi_full_instruction * insn)1377 emit_discard(struct svga_shader_emitter *emit,
1378 const struct tgsi_full_instruction *insn)
1379 {
1380 SVGA3dShaderDestToken temp;
1381 struct src_register one = get_one_immediate(emit);
1382 SVGA3dShaderInstToken inst = inst_token( SVGA3DOP_TEXKILL );
1383
1384 /* texkill doesn't allow negation on the operand so lets move
1385 * negation of {1} to a temp register */
1386 temp = get_temp( emit );
1387 if (!submit_op1( emit, inst_token( SVGA3DOP_MOV ), temp,
1388 negate( one ) ))
1389 return false;
1390
1391 return submit_op0( emit, inst, temp );
1392 }
1393
1394
1395 /**
1396 * Test if r1 and r2 are the same register.
1397 */
1398 static bool
same_register(struct src_register r1,struct src_register r2)1399 same_register(struct src_register r1, struct src_register r2)
1400 {
1401 return (r1.base.num == r2.base.num &&
1402 r1.base.type_upper == r2.base.type_upper &&
1403 r1.base.type_lower == r2.base.type_lower);
1404 }
1405
1406
1407
1408 /**
1409 * Implement conditionals by initializing destination reg to 'fail',
1410 * then set predicate reg with UFOP_SETP, then move 'pass' to dest
1411 * based on predicate reg.
1412 *
1413 * SETP src0, cmp, src1 -- do this first to avoid aliasing problems.
1414 * MOV dst, fail
1415 * MOV dst, pass, p0
1416 */
1417 static bool
emit_conditional(struct svga_shader_emitter * emit,enum pipe_compare_func compare_func,SVGA3dShaderDestToken dst,struct src_register src0,struct src_register src1,struct src_register pass,struct src_register fail)1418 emit_conditional(struct svga_shader_emitter *emit,
1419 enum pipe_compare_func compare_func,
1420 SVGA3dShaderDestToken dst,
1421 struct src_register src0,
1422 struct src_register src1,
1423 struct src_register pass,
1424 struct src_register fail)
1425 {
1426 SVGA3dShaderDestToken pred_reg = dst_register( SVGA3DREG_PREDICATE, 0 );
1427 SVGA3dShaderInstToken setp_token;
1428
1429 switch (compare_func) {
1430 case PIPE_FUNC_NEVER:
1431 return submit_op1( emit, inst_token( SVGA3DOP_MOV ),
1432 dst, fail );
1433 break;
1434 case PIPE_FUNC_LESS:
1435 setp_token = inst_token_setp(SVGA3DOPCOMP_LT);
1436 break;
1437 case PIPE_FUNC_EQUAL:
1438 setp_token = inst_token_setp(SVGA3DOPCOMP_EQ);
1439 break;
1440 case PIPE_FUNC_LEQUAL:
1441 setp_token = inst_token_setp(SVGA3DOPCOMP_LE);
1442 break;
1443 case PIPE_FUNC_GREATER:
1444 setp_token = inst_token_setp(SVGA3DOPCOMP_GT);
1445 break;
1446 case PIPE_FUNC_NOTEQUAL:
1447 setp_token = inst_token_setp(SVGA3DOPCOMPC_NE);
1448 break;
1449 case PIPE_FUNC_GEQUAL:
1450 setp_token = inst_token_setp(SVGA3DOPCOMP_GE);
1451 break;
1452 case PIPE_FUNC_ALWAYS:
1453 return submit_op1( emit, inst_token( SVGA3DOP_MOV ),
1454 dst, pass );
1455 break;
1456 }
1457
1458 if (same_register(src(dst), pass)) {
1459 /* We'll get bad results if the dst and pass registers are the same
1460 * so use a temp register containing pass.
1461 */
1462 SVGA3dShaderDestToken temp = get_temp(emit);
1463 if (!submit_op1(emit, inst_token(SVGA3DOP_MOV), temp, pass))
1464 return false;
1465 pass = src(temp);
1466 }
1467
1468 /* SETP src0, COMPOP, src1 */
1469 if (!submit_op2( emit, setp_token, pred_reg,
1470 src0, src1 ))
1471 return false;
1472
1473 /* MOV dst, fail */
1474 if (!submit_op1(emit, inst_token(SVGA3DOP_MOV), dst, fail))
1475 return false;
1476
1477 /* MOV dst, pass (predicated)
1478 *
1479 * Note that the predicate reg (and possible modifiers) is passed
1480 * as the first source argument.
1481 */
1482 if (!submit_op2(emit,
1483 inst_token_predicated(SVGA3DOP_MOV), dst,
1484 src(pred_reg), pass))
1485 return false;
1486
1487 return true;
1488 }
1489
1490
1491 /**
1492 * Helper for emiting 'selection' commands. Basically:
1493 * if (src0 OP src1)
1494 * dst = 1.0;
1495 * else
1496 * dst = 0.0;
1497 */
1498 static bool
emit_select(struct svga_shader_emitter * emit,enum pipe_compare_func compare_func,SVGA3dShaderDestToken dst,struct src_register src0,struct src_register src1)1499 emit_select(struct svga_shader_emitter *emit,
1500 enum pipe_compare_func compare_func,
1501 SVGA3dShaderDestToken dst,
1502 struct src_register src0,
1503 struct src_register src1 )
1504 {
1505 /* There are some SVGA instructions which implement some selects
1506 * directly, but they are only available in the vertex shader.
1507 */
1508 if (emit->unit == PIPE_SHADER_VERTEX) {
1509 switch (compare_func) {
1510 case PIPE_FUNC_GEQUAL:
1511 return submit_op2( emit, inst_token( SVGA3DOP_SGE ), dst, src0, src1 );
1512 case PIPE_FUNC_LEQUAL:
1513 return submit_op2( emit, inst_token( SVGA3DOP_SGE ), dst, src1, src0 );
1514 case PIPE_FUNC_GREATER:
1515 return submit_op2( emit, inst_token( SVGA3DOP_SLT ), dst, src1, src0 );
1516 case PIPE_FUNC_LESS:
1517 return submit_op2( emit, inst_token( SVGA3DOP_SLT ), dst, src0, src1 );
1518 default:
1519 break;
1520 }
1521 }
1522
1523 /* Otherwise, need to use the setp approach:
1524 */
1525 {
1526 struct src_register one, zero;
1527 /* zero immediate is 0,0,0,1 */
1528 zero = get_zero_immediate(emit);
1529 one = get_one_immediate(emit);
1530
1531 return emit_conditional(emit, compare_func, dst, src0, src1, one, zero);
1532 }
1533 }
1534
1535
1536 /**
1537 * Translate/emit a TGSI SEQ, SNE, SLT, SGE, etc. instruction.
1538 */
1539 static bool
emit_select_op(struct svga_shader_emitter * emit,unsigned compare,const struct tgsi_full_instruction * insn)1540 emit_select_op(struct svga_shader_emitter *emit,
1541 unsigned compare,
1542 const struct tgsi_full_instruction *insn)
1543 {
1544 SVGA3dShaderDestToken dst = translate_dst_register( emit, insn, 0 );
1545 struct src_register src0 = translate_src_register(
1546 emit, &insn->Src[0] );
1547 struct src_register src1 = translate_src_register(
1548 emit, &insn->Src[1] );
1549
1550 return emit_select( emit, compare, dst, src0, src1 );
1551 }
1552
1553
1554 /**
1555 * Translate TGSI CMP instruction. Component-wise:
1556 * dst = (src0 < 0.0) ? src1 : src2
1557 */
1558 static bool
emit_cmp(struct svga_shader_emitter * emit,const struct tgsi_full_instruction * insn)1559 emit_cmp(struct svga_shader_emitter *emit,
1560 const struct tgsi_full_instruction *insn)
1561 {
1562 SVGA3dShaderDestToken dst = translate_dst_register( emit, insn, 0 );
1563 const struct src_register src0 =
1564 translate_src_register(emit, &insn->Src[0] );
1565 const struct src_register src1 =
1566 translate_src_register(emit, &insn->Src[1] );
1567 const struct src_register src2 =
1568 translate_src_register(emit, &insn->Src[2] );
1569
1570 if (emit->unit == PIPE_SHADER_VERTEX) {
1571 struct src_register zero = get_zero_immediate(emit);
1572 /* We used to simulate CMP with SLT+LRP. But that didn't work when
1573 * src1 or src2 was Inf/NaN. In particular, GLSL sqrt(0) failed
1574 * because it involves a CMP to handle the 0 case.
1575 * Use a conditional expression instead.
1576 */
1577 return emit_conditional(emit, PIPE_FUNC_LESS, dst,
1578 src0, zero, src1, src2);
1579 }
1580 else {
1581 assert(emit->unit == PIPE_SHADER_FRAGMENT);
1582
1583 /* CMP DST, SRC0, SRC2, SRC1 */
1584 return submit_op3( emit, inst_token( SVGA3DOP_CMP ), dst,
1585 src0, src2, src1);
1586 }
1587 }
1588
1589
1590 /**
1591 * Translate/emit 2-operand (coord, sampler) texture instructions.
1592 */
1593 static bool
emit_tex2(struct svga_shader_emitter * emit,const struct tgsi_full_instruction * insn,SVGA3dShaderDestToken dst)1594 emit_tex2(struct svga_shader_emitter *emit,
1595 const struct tgsi_full_instruction *insn,
1596 SVGA3dShaderDestToken dst)
1597 {
1598 SVGA3dShaderInstToken inst;
1599 struct src_register texcoord;
1600 struct src_register sampler;
1601 SVGA3dShaderDestToken tmp;
1602
1603 inst.value = 0;
1604
1605 switch (insn->Instruction.Opcode) {
1606 case TGSI_OPCODE_TEX:
1607 inst.op = SVGA3DOP_TEX;
1608 break;
1609 case TGSI_OPCODE_TXP:
1610 inst.op = SVGA3DOP_TEX;
1611 inst.control = SVGA3DOPCONT_PROJECT;
1612 break;
1613 case TGSI_OPCODE_TXB:
1614 inst.op = SVGA3DOP_TEX;
1615 inst.control = SVGA3DOPCONT_BIAS;
1616 break;
1617 case TGSI_OPCODE_TXL:
1618 inst.op = SVGA3DOP_TEXLDL;
1619 break;
1620 default:
1621 assert(0);
1622 return false;
1623 }
1624
1625 texcoord = translate_src_register( emit, &insn->Src[0] );
1626 sampler = translate_src_register( emit, &insn->Src[1] );
1627
1628 if (emit->key.tex[sampler.base.num].unnormalized ||
1629 emit->dynamic_branching_level > 0)
1630 tmp = get_temp( emit );
1631
1632 /* Can't do mipmapping inside dynamic branch constructs. Force LOD
1633 * zero in that case.
1634 */
1635 if (emit->dynamic_branching_level > 0 &&
1636 inst.op == SVGA3DOP_TEX &&
1637 SVGA3dShaderGetRegType(texcoord.base.value) == SVGA3DREG_TEMP) {
1638 struct src_register zero = get_zero_immediate(emit);
1639
1640 /* MOV tmp, texcoord */
1641 if (!submit_op1( emit,
1642 inst_token( SVGA3DOP_MOV ),
1643 tmp,
1644 texcoord ))
1645 return false;
1646
1647 /* MOV tmp.w, zero */
1648 if (!submit_op1( emit,
1649 inst_token( SVGA3DOP_MOV ),
1650 writemask( tmp, TGSI_WRITEMASK_W ),
1651 zero ))
1652 return false;
1653
1654 texcoord = src( tmp );
1655 inst.op = SVGA3DOP_TEXLDL;
1656 }
1657
1658 /* Explicit normalization of texcoords:
1659 */
1660 if (emit->key.tex[sampler.base.num].unnormalized) {
1661 struct src_register wh = get_tex_dimensions( emit, sampler.base.num );
1662
1663 /* MUL tmp, SRC0, WH */
1664 if (!submit_op2( emit, inst_token( SVGA3DOP_MUL ),
1665 tmp, texcoord, wh ))
1666 return false;
1667
1668 texcoord = src( tmp );
1669 }
1670
1671 return submit_op2( emit, inst, dst, texcoord, sampler );
1672 }
1673
1674
1675 /**
1676 * Translate/emit 4-operand (coord, ddx, ddy, sampler) texture instructions.
1677 */
1678 static bool
emit_tex4(struct svga_shader_emitter * emit,const struct tgsi_full_instruction * insn,SVGA3dShaderDestToken dst)1679 emit_tex4(struct svga_shader_emitter *emit,
1680 const struct tgsi_full_instruction *insn,
1681 SVGA3dShaderDestToken dst )
1682 {
1683 SVGA3dShaderInstToken inst;
1684 struct src_register texcoord;
1685 struct src_register ddx;
1686 struct src_register ddy;
1687 struct src_register sampler;
1688
1689 texcoord = translate_src_register( emit, &insn->Src[0] );
1690 ddx = translate_src_register( emit, &insn->Src[1] );
1691 ddy = translate_src_register( emit, &insn->Src[2] );
1692 sampler = translate_src_register( emit, &insn->Src[3] );
1693
1694 inst.value = 0;
1695
1696 switch (insn->Instruction.Opcode) {
1697 case TGSI_OPCODE_TXD:
1698 inst.op = SVGA3DOP_TEXLDD; /* 4 args! */
1699 break;
1700 default:
1701 assert(0);
1702 return false;
1703 }
1704
1705 return submit_op4( emit, inst, dst, texcoord, sampler, ddx, ddy );
1706 }
1707
1708
1709 /**
1710 * Emit texture swizzle code. We do this here since SVGA samplers don't
1711 * directly support swizzles.
1712 */
1713 static bool
emit_tex_swizzle(struct svga_shader_emitter * emit,SVGA3dShaderDestToken dst,struct src_register src,unsigned swizzle_x,unsigned swizzle_y,unsigned swizzle_z,unsigned swizzle_w)1714 emit_tex_swizzle(struct svga_shader_emitter *emit,
1715 SVGA3dShaderDestToken dst,
1716 struct src_register src,
1717 unsigned swizzle_x,
1718 unsigned swizzle_y,
1719 unsigned swizzle_z,
1720 unsigned swizzle_w)
1721 {
1722 const unsigned swizzleIn[4] = {swizzle_x, swizzle_y, swizzle_z, swizzle_w};
1723 unsigned srcSwizzle[4];
1724 unsigned srcWritemask = 0x0, zeroWritemask = 0x0, oneWritemask = 0x0;
1725 unsigned i;
1726
1727 /* build writemasks and srcSwizzle terms */
1728 for (i = 0; i < 4; i++) {
1729 if (swizzleIn[i] == PIPE_SWIZZLE_0) {
1730 srcSwizzle[i] = TGSI_SWIZZLE_X + i;
1731 zeroWritemask |= (1 << i);
1732 }
1733 else if (swizzleIn[i] == PIPE_SWIZZLE_1) {
1734 srcSwizzle[i] = TGSI_SWIZZLE_X + i;
1735 oneWritemask |= (1 << i);
1736 }
1737 else {
1738 srcSwizzle[i] = swizzleIn[i];
1739 srcWritemask |= (1 << i);
1740 }
1741 }
1742
1743 /* write x/y/z/w comps */
1744 if (dst.mask & srcWritemask) {
1745 if (!submit_op1(emit,
1746 inst_token(SVGA3DOP_MOV),
1747 writemask(dst, srcWritemask),
1748 swizzle(src,
1749 srcSwizzle[0],
1750 srcSwizzle[1],
1751 srcSwizzle[2],
1752 srcSwizzle[3])))
1753 return false;
1754 }
1755
1756 /* write 0 comps */
1757 if (dst.mask & zeroWritemask) {
1758 if (!submit_op1(emit,
1759 inst_token(SVGA3DOP_MOV),
1760 writemask(dst, zeroWritemask),
1761 get_zero_immediate(emit)))
1762 return false;
1763 }
1764
1765 /* write 1 comps */
1766 if (dst.mask & oneWritemask) {
1767 if (!submit_op1(emit,
1768 inst_token(SVGA3DOP_MOV),
1769 writemask(dst, oneWritemask),
1770 get_one_immediate(emit)))
1771 return false;
1772 }
1773
1774 return true;
1775 }
1776
1777
1778 /**
1779 * Translate/emit a TGSI texture sample instruction.
1780 */
1781 static bool
emit_tex(struct svga_shader_emitter * emit,const struct tgsi_full_instruction * insn)1782 emit_tex(struct svga_shader_emitter *emit,
1783 const struct tgsi_full_instruction *insn)
1784 {
1785 SVGA3dShaderDestToken dst =
1786 translate_dst_register( emit, insn, 0 );
1787 struct src_register src0 =
1788 translate_src_register( emit, &insn->Src[0] );
1789 struct src_register src1 =
1790 translate_src_register( emit, &insn->Src[1] );
1791
1792 SVGA3dShaderDestToken tex_result;
1793 const unsigned unit = src1.base.num;
1794
1795 /* check for shadow samplers */
1796 bool compare = (emit->key.tex[unit].compare_mode ==
1797 PIPE_TEX_COMPARE_R_TO_TEXTURE);
1798
1799 /* texture swizzle */
1800 bool swizzle = (emit->key.tex[unit].swizzle_r != PIPE_SWIZZLE_X ||
1801 emit->key.tex[unit].swizzle_g != PIPE_SWIZZLE_Y ||
1802 emit->key.tex[unit].swizzle_b != PIPE_SWIZZLE_Z ||
1803 emit->key.tex[unit].swizzle_a != PIPE_SWIZZLE_W);
1804
1805 bool saturate = insn->Instruction.Saturate;
1806
1807 /* If doing compare processing or tex swizzle or saturation, we need to put
1808 * the fetched color into a temporary so it can be used as a source later on.
1809 */
1810 if (compare || swizzle || saturate) {
1811 tex_result = get_temp( emit );
1812 }
1813 else {
1814 tex_result = dst;
1815 }
1816
1817 switch(insn->Instruction.Opcode) {
1818 case TGSI_OPCODE_TEX:
1819 case TGSI_OPCODE_TXB:
1820 case TGSI_OPCODE_TXP:
1821 case TGSI_OPCODE_TXL:
1822 if (!emit_tex2( emit, insn, tex_result ))
1823 return false;
1824 break;
1825 case TGSI_OPCODE_TXD:
1826 if (!emit_tex4( emit, insn, tex_result ))
1827 return false;
1828 break;
1829 default:
1830 assert(0);
1831 }
1832
1833 if (compare) {
1834 SVGA3dShaderDestToken dst2;
1835
1836 if (swizzle || saturate)
1837 dst2 = tex_result;
1838 else
1839 dst2 = dst;
1840
1841 if (dst.mask & TGSI_WRITEMASK_XYZ) {
1842 SVGA3dShaderDestToken src0_zdivw = get_temp( emit );
1843 /* When sampling a depth texture, the result of the comparison is in
1844 * the Y component.
1845 */
1846 struct src_register tex_src_x = scalar(src(tex_result), TGSI_SWIZZLE_Y);
1847 struct src_register r_coord;
1848
1849 if (insn->Instruction.Opcode == TGSI_OPCODE_TXP) {
1850 /* Divide texcoord R by Q */
1851 if (!submit_op1( emit, inst_token( SVGA3DOP_RCP ),
1852 writemask(src0_zdivw, TGSI_WRITEMASK_X),
1853 scalar(src0, TGSI_SWIZZLE_W) ))
1854 return false;
1855
1856 if (!submit_op2( emit, inst_token( SVGA3DOP_MUL ),
1857 writemask(src0_zdivw, TGSI_WRITEMASK_X),
1858 scalar(src0, TGSI_SWIZZLE_Z),
1859 scalar(src(src0_zdivw), TGSI_SWIZZLE_X) ))
1860 return false;
1861
1862 r_coord = scalar(src(src0_zdivw), TGSI_SWIZZLE_X);
1863 }
1864 else {
1865 r_coord = scalar(src0, TGSI_SWIZZLE_Z);
1866 }
1867
1868 /* Compare texture sample value against R component of texcoord */
1869 if (!emit_select(emit,
1870 emit->key.tex[unit].compare_func,
1871 writemask( dst2, TGSI_WRITEMASK_XYZ ),
1872 r_coord,
1873 tex_src_x))
1874 return false;
1875 }
1876
1877 if (dst.mask & TGSI_WRITEMASK_W) {
1878 struct src_register one = get_one_immediate(emit);
1879
1880 if (!submit_op1( emit, inst_token( SVGA3DOP_MOV ),
1881 writemask( dst2, TGSI_WRITEMASK_W ),
1882 one ))
1883 return false;
1884 }
1885 }
1886
1887 if (saturate && !swizzle) {
1888 /* MOV_SAT real_dst, dst */
1889 if (!submit_op1( emit, inst_token( SVGA3DOP_MOV ), dst, src(tex_result) ))
1890 return false;
1891 }
1892 else if (swizzle) {
1893 /* swizzle from tex_result to dst (handles saturation too, if any) */
1894 emit_tex_swizzle(emit,
1895 dst, src(tex_result),
1896 emit->key.tex[unit].swizzle_r,
1897 emit->key.tex[unit].swizzle_g,
1898 emit->key.tex[unit].swizzle_b,
1899 emit->key.tex[unit].swizzle_a);
1900 }
1901
1902 return true;
1903 }
1904
1905
1906 static bool
emit_bgnloop(struct svga_shader_emitter * emit,const struct tgsi_full_instruction * insn)1907 emit_bgnloop(struct svga_shader_emitter *emit,
1908 const struct tgsi_full_instruction *insn)
1909 {
1910 SVGA3dShaderInstToken inst = inst_token( SVGA3DOP_LOOP );
1911 struct src_register loop_reg = src_register( SVGA3DREG_LOOP, 0 );
1912 struct src_register const_int = get_loop_const( emit );
1913
1914 emit->dynamic_branching_level++;
1915
1916 return (emit_instruction( emit, inst ) &&
1917 emit_src( emit, loop_reg ) &&
1918 emit_src( emit, const_int ) );
1919 }
1920
1921
1922 static bool
emit_endloop(struct svga_shader_emitter * emit,const struct tgsi_full_instruction * insn)1923 emit_endloop(struct svga_shader_emitter *emit,
1924 const struct tgsi_full_instruction *insn)
1925 {
1926 SVGA3dShaderInstToken inst = inst_token( SVGA3DOP_ENDLOOP );
1927
1928 emit->dynamic_branching_level--;
1929
1930 return emit_instruction( emit, inst );
1931 }
1932
1933
1934 /**
1935 * Translate/emit TGSI BREAK (out of loop) instruction.
1936 */
1937 static bool
emit_brk(struct svga_shader_emitter * emit,const struct tgsi_full_instruction * insn)1938 emit_brk(struct svga_shader_emitter *emit,
1939 const struct tgsi_full_instruction *insn)
1940 {
1941 SVGA3dShaderInstToken inst = inst_token( SVGA3DOP_BREAK );
1942 return emit_instruction( emit, inst );
1943 }
1944
1945
1946 /**
1947 * Emit simple instruction which operates on one scalar value (not
1948 * a vector). Ex: LG2, RCP, RSQ.
1949 */
1950 static bool
emit_scalar_op1(struct svga_shader_emitter * emit,SVGA3dShaderOpCodeType opcode,const struct tgsi_full_instruction * insn)1951 emit_scalar_op1(struct svga_shader_emitter *emit,
1952 SVGA3dShaderOpCodeType opcode,
1953 const struct tgsi_full_instruction *insn)
1954 {
1955 SVGA3dShaderInstToken inst;
1956 SVGA3dShaderDestToken dst;
1957 struct src_register src;
1958
1959 inst = inst_token( opcode );
1960 dst = translate_dst_register( emit, insn, 0 );
1961 src = translate_src_register( emit, &insn->Src[0] );
1962 src = scalar( src, TGSI_SWIZZLE_X );
1963
1964 return submit_op1( emit, inst, dst, src );
1965 }
1966
1967
1968 /**
1969 * Translate/emit a simple instruction (one which has no special-case
1970 * code) such as ADD, MUL, MIN, MAX.
1971 */
1972 static bool
emit_simple_instruction(struct svga_shader_emitter * emit,SVGA3dShaderOpCodeType opcode,const struct tgsi_full_instruction * insn)1973 emit_simple_instruction(struct svga_shader_emitter *emit,
1974 SVGA3dShaderOpCodeType opcode,
1975 const struct tgsi_full_instruction *insn)
1976 {
1977 const struct tgsi_full_src_register *src = insn->Src;
1978 SVGA3dShaderInstToken inst;
1979 SVGA3dShaderDestToken dst;
1980
1981 inst = inst_token( opcode );
1982 dst = translate_dst_register( emit, insn, 0 );
1983
1984 switch (insn->Instruction.NumSrcRegs) {
1985 case 0:
1986 return submit_op0( emit, inst, dst );
1987 case 1:
1988 return submit_op1( emit, inst, dst,
1989 translate_src_register( emit, &src[0] ));
1990 case 2:
1991 return submit_op2( emit, inst, dst,
1992 translate_src_register( emit, &src[0] ),
1993 translate_src_register( emit, &src[1] ) );
1994 case 3:
1995 return submit_op3( emit, inst, dst,
1996 translate_src_register( emit, &src[0] ),
1997 translate_src_register( emit, &src[1] ),
1998 translate_src_register( emit, &src[2] ) );
1999 default:
2000 assert(0);
2001 return false;
2002 }
2003 }
2004
2005
2006 /**
2007 * TGSI_OPCODE_MOVE is only special-cased here to detect the
2008 * svga_fragment_shader::constant_color_output case.
2009 */
2010 static bool
emit_mov(struct svga_shader_emitter * emit,const struct tgsi_full_instruction * insn)2011 emit_mov(struct svga_shader_emitter *emit,
2012 const struct tgsi_full_instruction *insn)
2013 {
2014 const struct tgsi_full_src_register *src = &insn->Src[0];
2015 const struct tgsi_full_dst_register *dst = &insn->Dst[0];
2016
2017 if (emit->unit == PIPE_SHADER_FRAGMENT &&
2018 dst->Register.File == TGSI_FILE_OUTPUT &&
2019 dst->Register.Index == 0 &&
2020 src->Register.File == TGSI_FILE_CONSTANT &&
2021 !src->Register.Indirect) {
2022 emit->constant_color_output = true;
2023 }
2024
2025 return emit_simple_instruction(emit, SVGA3DOP_MOV, insn);
2026 }
2027
2028
2029 /**
2030 * Translate TGSI SQRT instruction
2031 * if src1 == 0
2032 * mov dst, src1
2033 * else
2034 * rsq temp, src1
2035 * rcp dst, temp
2036 * endif
2037 */
2038 static bool
emit_sqrt(struct svga_shader_emitter * emit,const struct tgsi_full_instruction * insn)2039 emit_sqrt(struct svga_shader_emitter *emit,
2040 const struct tgsi_full_instruction *insn)
2041 {
2042 const struct src_register src1 = translate_src_register(emit, &insn->Src[0]);
2043 const struct src_register zero = get_zero_immediate(emit);
2044 SVGA3dShaderDestToken dst = translate_dst_register(emit, insn, 0);
2045 SVGA3dShaderDestToken temp = get_temp(emit);
2046 SVGA3dShaderInstToken if_token = inst_token(SVGA3DOP_IFC);
2047 bool ret = true;
2048
2049 if_token.control = SVGA3DOPCOMP_EQ;
2050
2051 if (!(emit_instruction(emit, if_token) &&
2052 emit_src(emit, src1) &&
2053 emit_src(emit, zero))) {
2054 ret = false;
2055 goto cleanup;
2056 }
2057
2058 if (!submit_op1(emit,
2059 inst_token(SVGA3DOP_MOV),
2060 dst, src1)) {
2061 ret = false;
2062 goto cleanup;
2063 }
2064
2065 if (!emit_instruction(emit, inst_token(SVGA3DOP_ELSE))) {
2066 ret = false;
2067 goto cleanup;
2068 }
2069
2070 if (!submit_op1(emit,
2071 inst_token(SVGA3DOP_RSQ),
2072 temp, src1)) {
2073 ret = false;
2074 goto cleanup;
2075 }
2076
2077 if (!submit_op1(emit,
2078 inst_token(SVGA3DOP_RCP),
2079 dst, src(temp))) {
2080 ret = false;
2081 goto cleanup;
2082 }
2083
2084 if (!emit_instruction(emit, inst_token(SVGA3DOP_ENDIF))) {
2085 ret = false;
2086 goto cleanup;
2087 }
2088
2089 cleanup:
2090 release_temp(emit, temp);
2091
2092 return ret;
2093 }
2094
2095
2096 /**
2097 * Translate/emit TGSI DDX, DDY instructions.
2098 */
2099 static bool
emit_deriv(struct svga_shader_emitter * emit,const struct tgsi_full_instruction * insn)2100 emit_deriv(struct svga_shader_emitter *emit,
2101 const struct tgsi_full_instruction *insn )
2102 {
2103 if (emit->dynamic_branching_level > 0 &&
2104 insn->Src[0].Register.File == TGSI_FILE_TEMPORARY)
2105 {
2106 SVGA3dShaderDestToken dst =
2107 translate_dst_register( emit, insn, 0 );
2108
2109 /* Deriv opcodes not valid inside dynamic branching, workaround
2110 * by zeroing out the destination.
2111 */
2112 if (!submit_op1(emit,
2113 inst_token( SVGA3DOP_MOV ),
2114 dst,
2115 get_zero_immediate(emit)))
2116 return false;
2117
2118 return true;
2119 }
2120 else {
2121 SVGA3dShaderOpCodeType opcode;
2122 const struct tgsi_full_src_register *reg = &insn->Src[0];
2123 SVGA3dShaderInstToken inst;
2124 SVGA3dShaderDestToken dst;
2125 struct src_register src0;
2126
2127 switch (insn->Instruction.Opcode) {
2128 case TGSI_OPCODE_DDX:
2129 opcode = SVGA3DOP_DSX;
2130 break;
2131 case TGSI_OPCODE_DDY:
2132 opcode = SVGA3DOP_DSY;
2133 break;
2134 default:
2135 return false;
2136 }
2137
2138 inst = inst_token( opcode );
2139 dst = translate_dst_register( emit, insn, 0 );
2140 src0 = translate_src_register( emit, reg );
2141
2142 /* We cannot use negate or abs on source to dsx/dsy instruction.
2143 */
2144 if (reg->Register.Absolute ||
2145 reg->Register.Negate) {
2146 SVGA3dShaderDestToken temp = get_temp( emit );
2147
2148 if (!emit_repl( emit, temp, &src0 ))
2149 return false;
2150 }
2151
2152 return submit_op1( emit, inst, dst, src0 );
2153 }
2154 }
2155
2156
2157 /**
2158 * Translate/emit ARL (Address Register Load) instruction. Used to
2159 * move a value into the special 'address' register. Used to implement
2160 * indirect/variable indexing into arrays.
2161 */
2162 static bool
emit_arl(struct svga_shader_emitter * emit,const struct tgsi_full_instruction * insn)2163 emit_arl(struct svga_shader_emitter *emit,
2164 const struct tgsi_full_instruction *insn)
2165 {
2166 ++emit->current_arl;
2167 if (emit->unit == PIPE_SHADER_FRAGMENT) {
2168 /* MOVA not present in pixel shader instruction set.
2169 * Ignore this instruction altogether since it is
2170 * only used for loop counters -- and for that
2171 * we reference aL directly.
2172 */
2173 return true;
2174 }
2175 if (svga_arl_needs_adjustment( emit )) {
2176 return emit_fake_arl( emit, insn );
2177 } else {
2178 /* no need to adjust, just emit straight arl */
2179 return emit_simple_instruction(emit, SVGA3DOP_MOVA, insn);
2180 }
2181 }
2182
2183
2184 static bool
emit_pow(struct svga_shader_emitter * emit,const struct tgsi_full_instruction * insn)2185 emit_pow(struct svga_shader_emitter *emit,
2186 const struct tgsi_full_instruction *insn)
2187 {
2188 SVGA3dShaderDestToken dst = translate_dst_register( emit, insn, 0 );
2189 struct src_register src0 = translate_src_register(
2190 emit, &insn->Src[0] );
2191 struct src_register src1 = translate_src_register(
2192 emit, &insn->Src[1] );
2193 bool need_tmp = false;
2194
2195 /* POW can only output to a temporary */
2196 if (insn->Dst[0].Register.File != TGSI_FILE_TEMPORARY)
2197 need_tmp = true;
2198
2199 /* POW src1 must not be the same register as dst */
2200 if (alias_src_dst( src1, dst ))
2201 need_tmp = true;
2202
2203 /* it's a scalar op */
2204 src0 = scalar( src0, TGSI_SWIZZLE_X );
2205 src1 = scalar( src1, TGSI_SWIZZLE_X );
2206
2207 if (need_tmp) {
2208 SVGA3dShaderDestToken tmp =
2209 writemask(get_temp( emit ), TGSI_WRITEMASK_X );
2210
2211 if (!submit_op2(emit, inst_token( SVGA3DOP_POW ), tmp, src0, src1))
2212 return false;
2213
2214 return submit_op1(emit, inst_token( SVGA3DOP_MOV ),
2215 dst, scalar(src(tmp), 0) );
2216 }
2217 else {
2218 return submit_op2(emit, inst_token( SVGA3DOP_POW ), dst, src0, src1);
2219 }
2220 }
2221
2222
2223 /**
2224 * Emit a LRP (linear interpolation) instruction.
2225 */
2226 static bool
submit_lrp(struct svga_shader_emitter * emit,SVGA3dShaderDestToken dst,struct src_register src0,struct src_register src1,struct src_register src2)2227 submit_lrp(struct svga_shader_emitter *emit,
2228 SVGA3dShaderDestToken dst,
2229 struct src_register src0,
2230 struct src_register src1,
2231 struct src_register src2)
2232 {
2233 SVGA3dShaderDestToken tmp;
2234 bool need_dst_tmp = false;
2235
2236 /* The dst reg must be a temporary, and not be the same as src0 or src2 */
2237 if (SVGA3dShaderGetRegType(dst.value) != SVGA3DREG_TEMP ||
2238 alias_src_dst(src0, dst) ||
2239 alias_src_dst(src2, dst))
2240 need_dst_tmp = true;
2241
2242 if (need_dst_tmp) {
2243 tmp = get_temp( emit );
2244 tmp.mask = dst.mask;
2245 }
2246 else {
2247 tmp = dst;
2248 }
2249
2250 if (!submit_op3(emit, inst_token( SVGA3DOP_LRP ), tmp, src0, src1, src2))
2251 return false;
2252
2253 if (need_dst_tmp) {
2254 if (!submit_op1(emit, inst_token( SVGA3DOP_MOV ), dst, src( tmp )))
2255 return false;
2256 }
2257
2258 return true;
2259 }
2260
2261
2262 /**
2263 * Translate/emit LRP (Linear Interpolation) instruction.
2264 */
2265 static bool
emit_lrp(struct svga_shader_emitter * emit,const struct tgsi_full_instruction * insn)2266 emit_lrp(struct svga_shader_emitter *emit,
2267 const struct tgsi_full_instruction *insn)
2268 {
2269 SVGA3dShaderDestToken dst = translate_dst_register( emit, insn, 0 );
2270 const struct src_register src0 = translate_src_register(
2271 emit, &insn->Src[0] );
2272 const struct src_register src1 = translate_src_register(
2273 emit, &insn->Src[1] );
2274 const struct src_register src2 = translate_src_register(
2275 emit, &insn->Src[2] );
2276
2277 return submit_lrp(emit, dst, src0, src1, src2);
2278 }
2279
2280 /**
2281 * Translate/emit DST (Distance function) instruction.
2282 */
2283 static bool
emit_dst_insn(struct svga_shader_emitter * emit,const struct tgsi_full_instruction * insn)2284 emit_dst_insn(struct svga_shader_emitter *emit,
2285 const struct tgsi_full_instruction *insn)
2286 {
2287 if (emit->unit == PIPE_SHADER_VERTEX) {
2288 /* SVGA/DX9 has a DST instruction, but only for vertex shaders:
2289 */
2290 return emit_simple_instruction(emit, SVGA3DOP_DST, insn);
2291 }
2292 else {
2293 /* result[0] = 1 * 1;
2294 * result[1] = a[1] * b[1];
2295 * result[2] = a[2] * 1;
2296 * result[3] = 1 * b[3];
2297 */
2298 SVGA3dShaderDestToken dst = translate_dst_register( emit, insn, 0 );
2299 SVGA3dShaderDestToken tmp;
2300 const struct src_register src0 = translate_src_register(
2301 emit, &insn->Src[0] );
2302 const struct src_register src1 = translate_src_register(
2303 emit, &insn->Src[1] );
2304 bool need_tmp = false;
2305
2306 if (SVGA3dShaderGetRegType(dst.value) != SVGA3DREG_TEMP ||
2307 alias_src_dst(src0, dst) ||
2308 alias_src_dst(src1, dst))
2309 need_tmp = true;
2310
2311 if (need_tmp) {
2312 tmp = get_temp( emit );
2313 }
2314 else {
2315 tmp = dst;
2316 }
2317
2318 /* tmp.xw = 1.0
2319 */
2320 if (tmp.mask & TGSI_WRITEMASK_XW) {
2321 if (!submit_op1( emit, inst_token( SVGA3DOP_MOV ),
2322 writemask(tmp, TGSI_WRITEMASK_XW ),
2323 get_one_immediate(emit)))
2324 return false;
2325 }
2326
2327 /* tmp.yz = src0
2328 */
2329 if (tmp.mask & TGSI_WRITEMASK_YZ) {
2330 if (!submit_op1( emit, inst_token( SVGA3DOP_MOV ),
2331 writemask(tmp, TGSI_WRITEMASK_YZ ),
2332 src0))
2333 return false;
2334 }
2335
2336 /* tmp.yw = tmp * src1
2337 */
2338 if (tmp.mask & TGSI_WRITEMASK_YW) {
2339 if (!submit_op2( emit, inst_token( SVGA3DOP_MUL ),
2340 writemask(tmp, TGSI_WRITEMASK_YW ),
2341 src(tmp),
2342 src1))
2343 return false;
2344 }
2345
2346 /* dst = tmp
2347 */
2348 if (need_tmp) {
2349 if (!submit_op1( emit, inst_token( SVGA3DOP_MOV ),
2350 dst,
2351 src(tmp)))
2352 return false;
2353 }
2354 }
2355
2356 return true;
2357 }
2358
2359
2360 static bool
emit_exp(struct svga_shader_emitter * emit,const struct tgsi_full_instruction * insn)2361 emit_exp(struct svga_shader_emitter *emit,
2362 const struct tgsi_full_instruction *insn)
2363 {
2364 SVGA3dShaderDestToken dst = translate_dst_register( emit, insn, 0 );
2365 struct src_register src0 =
2366 translate_src_register( emit, &insn->Src[0] );
2367 SVGA3dShaderDestToken fraction;
2368
2369 if (dst.mask & TGSI_WRITEMASK_Y)
2370 fraction = dst;
2371 else if (dst.mask & TGSI_WRITEMASK_X)
2372 fraction = get_temp( emit );
2373 else
2374 fraction.value = 0;
2375
2376 /* If y is being written, fill it with src0 - floor(src0).
2377 */
2378 if (dst.mask & TGSI_WRITEMASK_XY) {
2379 if (!submit_op1( emit, inst_token( SVGA3DOP_FRC ),
2380 writemask( fraction, TGSI_WRITEMASK_Y ),
2381 src0 ))
2382 return false;
2383 }
2384
2385 /* If x is being written, fill it with 2 ^ floor(src0).
2386 */
2387 if (dst.mask & TGSI_WRITEMASK_X) {
2388 if (!submit_op2( emit, inst_token( SVGA3DOP_ADD ),
2389 writemask( dst, TGSI_WRITEMASK_X ),
2390 src0,
2391 scalar( negate( src( fraction ) ), TGSI_SWIZZLE_Y ) ) )
2392 return false;
2393
2394 if (!submit_op1( emit, inst_token( SVGA3DOP_EXP ),
2395 writemask( dst, TGSI_WRITEMASK_X ),
2396 scalar( src( dst ), TGSI_SWIZZLE_X ) ) )
2397 return false;
2398
2399 if (!(dst.mask & TGSI_WRITEMASK_Y))
2400 release_temp( emit, fraction );
2401 }
2402
2403 /* If z is being written, fill it with 2 ^ src0 (partial precision).
2404 */
2405 if (dst.mask & TGSI_WRITEMASK_Z) {
2406 if (!submit_op1( emit, inst_token( SVGA3DOP_EXPP ),
2407 writemask( dst, TGSI_WRITEMASK_Z ),
2408 src0 ) )
2409 return false;
2410 }
2411
2412 /* If w is being written, fill it with one.
2413 */
2414 if (dst.mask & TGSI_WRITEMASK_W) {
2415 if (!submit_op1( emit, inst_token( SVGA3DOP_MOV ),
2416 writemask(dst, TGSI_WRITEMASK_W),
2417 get_one_immediate(emit)))
2418 return false;
2419 }
2420
2421 return true;
2422 }
2423
2424
2425 /**
2426 * Translate/emit LIT (Lighting helper) instruction.
2427 */
2428 static bool
emit_lit(struct svga_shader_emitter * emit,const struct tgsi_full_instruction * insn)2429 emit_lit(struct svga_shader_emitter *emit,
2430 const struct tgsi_full_instruction *insn)
2431 {
2432 if (emit->unit == PIPE_SHADER_VERTEX) {
2433 /* SVGA/DX9 has a LIT instruction, but only for vertex shaders:
2434 */
2435 return emit_simple_instruction(emit, SVGA3DOP_LIT, insn);
2436 }
2437 else {
2438 /* D3D vs. GL semantics can be fairly easily accommodated by
2439 * variations on this sequence.
2440 *
2441 * GL:
2442 * tmp.y = src.x
2443 * tmp.z = pow(src.y,src.w)
2444 * p0 = src0.xxxx > 0
2445 * result = zero.wxxw
2446 * (p0) result.yz = tmp
2447 *
2448 * D3D:
2449 * tmp.y = src.x
2450 * tmp.z = pow(src.y,src.w)
2451 * p0 = src0.xxyy > 0
2452 * result = zero.wxxw
2453 * (p0) result.yz = tmp
2454 *
2455 * Will implement the GL version for now.
2456 */
2457 SVGA3dShaderDestToken dst = translate_dst_register( emit, insn, 0 );
2458 SVGA3dShaderDestToken tmp = get_temp( emit );
2459 const struct src_register src0 = translate_src_register(
2460 emit, &insn->Src[0] );
2461
2462 /* tmp = pow(src.y, src.w)
2463 */
2464 if (dst.mask & TGSI_WRITEMASK_Z) {
2465 if (!submit_op2(emit, inst_token( SVGA3DOP_POW ),
2466 tmp,
2467 scalar(src0, 1),
2468 scalar(src0, 3)))
2469 return false;
2470 }
2471
2472 /* tmp.y = src.x
2473 */
2474 if (dst.mask & TGSI_WRITEMASK_Y) {
2475 if (!submit_op1( emit, inst_token( SVGA3DOP_MOV ),
2476 writemask(tmp, TGSI_WRITEMASK_Y ),
2477 scalar(src0, 0)))
2478 return false;
2479 }
2480
2481 /* Can't quite do this with emit conditional due to the extra
2482 * writemask on the predicated mov:
2483 */
2484 {
2485 SVGA3dShaderDestToken pred_reg = dst_register( SVGA3DREG_PREDICATE, 0 );
2486 struct src_register predsrc;
2487
2488 /* D3D vs GL semantics:
2489 */
2490 if (0)
2491 predsrc = swizzle(src0, 0, 0, 1, 1); /* D3D */
2492 else
2493 predsrc = swizzle(src0, 0, 0, 0, 0); /* GL */
2494
2495 /* SETP src0.xxyy, GT, {0}.x */
2496 if (!submit_op2( emit,
2497 inst_token_setp(SVGA3DOPCOMP_GT),
2498 pred_reg,
2499 predsrc,
2500 get_zero_immediate(emit)))
2501 return false;
2502
2503 /* MOV dst, fail */
2504 if (!submit_op1( emit, inst_token( SVGA3DOP_MOV ), dst,
2505 get_immediate(emit, 1.0f, 0.0f, 0.0f, 1.0f)))
2506 return false;
2507
2508 /* MOV dst.yz, tmp (predicated)
2509 *
2510 * Note that the predicate reg (and possible modifiers) is passed
2511 * as the first source argument.
2512 */
2513 if (dst.mask & TGSI_WRITEMASK_YZ) {
2514 if (!submit_op2( emit,
2515 inst_token_predicated(SVGA3DOP_MOV),
2516 writemask(dst, TGSI_WRITEMASK_YZ),
2517 src( pred_reg ), src( tmp ) ))
2518 return false;
2519 }
2520 }
2521 }
2522
2523 return true;
2524 }
2525
2526
2527 static bool
emit_ex2(struct svga_shader_emitter * emit,const struct tgsi_full_instruction * insn)2528 emit_ex2(struct svga_shader_emitter *emit,
2529 const struct tgsi_full_instruction *insn)
2530 {
2531 SVGA3dShaderInstToken inst;
2532 SVGA3dShaderDestToken dst;
2533 struct src_register src0;
2534
2535 inst = inst_token( SVGA3DOP_EXP );
2536 dst = translate_dst_register( emit, insn, 0 );
2537 src0 = translate_src_register( emit, &insn->Src[0] );
2538 src0 = scalar( src0, TGSI_SWIZZLE_X );
2539
2540 if (dst.mask != TGSI_WRITEMASK_XYZW) {
2541 SVGA3dShaderDestToken tmp = get_temp( emit );
2542
2543 if (!submit_op1( emit, inst, tmp, src0 ))
2544 return false;
2545
2546 return submit_op1( emit, inst_token( SVGA3DOP_MOV ),
2547 dst,
2548 scalar( src( tmp ), TGSI_SWIZZLE_X ) );
2549 }
2550
2551 return submit_op1( emit, inst, dst, src0 );
2552 }
2553
2554
2555 static bool
emit_log(struct svga_shader_emitter * emit,const struct tgsi_full_instruction * insn)2556 emit_log(struct svga_shader_emitter *emit,
2557 const struct tgsi_full_instruction *insn)
2558 {
2559 SVGA3dShaderDestToken dst = translate_dst_register( emit, insn, 0 );
2560 struct src_register src0 =
2561 translate_src_register( emit, &insn->Src[0] );
2562 SVGA3dShaderDestToken abs_tmp;
2563 struct src_register abs_src0;
2564 SVGA3dShaderDestToken log2_abs;
2565
2566 abs_tmp.value = 0;
2567
2568 if (dst.mask & TGSI_WRITEMASK_Z)
2569 log2_abs = dst;
2570 else if (dst.mask & TGSI_WRITEMASK_XY)
2571 log2_abs = get_temp( emit );
2572 else
2573 log2_abs.value = 0;
2574
2575 /* If z is being written, fill it with log2( abs( src0 ) ).
2576 */
2577 if (dst.mask & TGSI_WRITEMASK_XYZ) {
2578 if (!src0.base.srcMod || src0.base.srcMod == SVGA3DSRCMOD_ABS)
2579 abs_src0 = src0;
2580 else {
2581 abs_tmp = get_temp( emit );
2582
2583 if (!submit_op1( emit, inst_token( SVGA3DOP_MOV ),
2584 abs_tmp,
2585 src0 ) )
2586 return false;
2587
2588 abs_src0 = src( abs_tmp );
2589 }
2590
2591 abs_src0 = absolute( scalar( abs_src0, TGSI_SWIZZLE_X ) );
2592
2593 if (!submit_op1( emit, inst_token( SVGA3DOP_LOG ),
2594 writemask( log2_abs, TGSI_WRITEMASK_Z ),
2595 abs_src0 ) )
2596 return false;
2597 }
2598
2599 if (dst.mask & TGSI_WRITEMASK_XY) {
2600 SVGA3dShaderDestToken floor_log2;
2601
2602 if (dst.mask & TGSI_WRITEMASK_X)
2603 floor_log2 = dst;
2604 else
2605 floor_log2 = get_temp( emit );
2606
2607 /* If x is being written, fill it with floor( log2( abs( src0 ) ) ).
2608 */
2609 if (!submit_op1( emit, inst_token( SVGA3DOP_FRC ),
2610 writemask( floor_log2, TGSI_WRITEMASK_X ),
2611 scalar( src( log2_abs ), TGSI_SWIZZLE_Z ) ) )
2612 return false;
2613
2614 if (!submit_op2( emit, inst_token( SVGA3DOP_ADD ),
2615 writemask( floor_log2, TGSI_WRITEMASK_X ),
2616 scalar( src( log2_abs ), TGSI_SWIZZLE_Z ),
2617 negate( src( floor_log2 ) ) ) )
2618 return false;
2619
2620 /* If y is being written, fill it with
2621 * abs ( src0 ) / ( 2 ^ floor( log2( abs( src0 ) ) ) ).
2622 */
2623 if (dst.mask & TGSI_WRITEMASK_Y) {
2624 if (!submit_op1( emit, inst_token( SVGA3DOP_EXP ),
2625 writemask( dst, TGSI_WRITEMASK_Y ),
2626 negate( scalar( src( floor_log2 ),
2627 TGSI_SWIZZLE_X ) ) ) )
2628 return false;
2629
2630 if (!submit_op2( emit, inst_token( SVGA3DOP_MUL ),
2631 writemask( dst, TGSI_WRITEMASK_Y ),
2632 src( dst ),
2633 abs_src0 ) )
2634 return false;
2635 }
2636
2637 if (!(dst.mask & TGSI_WRITEMASK_X))
2638 release_temp( emit, floor_log2 );
2639
2640 if (!(dst.mask & TGSI_WRITEMASK_Z))
2641 release_temp( emit, log2_abs );
2642 }
2643
2644 if (dst.mask & TGSI_WRITEMASK_XYZ && src0.base.srcMod &&
2645 src0.base.srcMod != SVGA3DSRCMOD_ABS)
2646 release_temp( emit, abs_tmp );
2647
2648 /* If w is being written, fill it with one.
2649 */
2650 if (dst.mask & TGSI_WRITEMASK_W) {
2651 if (!submit_op1( emit, inst_token( SVGA3DOP_MOV ),
2652 writemask(dst, TGSI_WRITEMASK_W),
2653 get_one_immediate(emit)))
2654 return false;
2655 }
2656
2657 return true;
2658 }
2659
2660
2661 /**
2662 * Translate TGSI TRUNC or ROUND instruction.
2663 * We need to truncate toward zero. Ex: trunc(-1.9) = -1
2664 * Different approaches are needed for VS versus PS.
2665 */
2666 static bool
emit_trunc_round(struct svga_shader_emitter * emit,const struct tgsi_full_instruction * insn,bool round)2667 emit_trunc_round(struct svga_shader_emitter *emit,
2668 const struct tgsi_full_instruction *insn,
2669 bool round)
2670 {
2671 SVGA3dShaderDestToken dst = translate_dst_register(emit, insn, 0);
2672 const struct src_register src0 =
2673 translate_src_register(emit, &insn->Src[0] );
2674 SVGA3dShaderDestToken t1 = get_temp(emit);
2675
2676 if (round) {
2677 SVGA3dShaderDestToken t0 = get_temp(emit);
2678 struct src_register half = get_half_immediate(emit);
2679
2680 /* t0 = abs(src0) + 0.5 */
2681 if (!submit_op2(emit, inst_token(SVGA3DOP_ADD), t0,
2682 absolute(src0), half))
2683 return false;
2684
2685 /* t1 = fract(t0) */
2686 if (!submit_op1(emit, inst_token(SVGA3DOP_FRC), t1, src(t0)))
2687 return false;
2688
2689 /* t1 = t0 - t1 */
2690 if (!submit_op2(emit, inst_token(SVGA3DOP_ADD), t1, src(t0),
2691 negate(src(t1))))
2692 return false;
2693 }
2694 else {
2695 /* trunc */
2696
2697 /* t1 = fract(abs(src0)) */
2698 if (!submit_op1(emit, inst_token(SVGA3DOP_FRC), t1, absolute(src0)))
2699 return false;
2700
2701 /* t1 = abs(src0) - t1 */
2702 if (!submit_op2(emit, inst_token(SVGA3DOP_ADD), t1, absolute(src0),
2703 negate(src(t1))))
2704 return false;
2705 }
2706
2707 /*
2708 * Now we need to multiply t1 by the sign of the original value.
2709 */
2710 if (emit->unit == PIPE_SHADER_VERTEX) {
2711 /* For VS: use SGN instruction */
2712 /* Need two extra/dummy registers: */
2713 SVGA3dShaderDestToken t2 = get_temp(emit), t3 = get_temp(emit),
2714 t4 = get_temp(emit);
2715
2716 /* t2 = sign(src0) */
2717 if (!submit_op3(emit, inst_token(SVGA3DOP_SGN), t2, src0,
2718 src(t3), src(t4)))
2719 return false;
2720
2721 /* dst = t1 * t2 */
2722 if (!submit_op2(emit, inst_token(SVGA3DOP_MUL), dst, src(t1), src(t2)))
2723 return false;
2724 }
2725 else {
2726 /* For FS: Use CMP instruction */
2727 return submit_op3(emit, inst_token( SVGA3DOP_CMP ), dst,
2728 src0, src(t1), negate(src(t1)));
2729 }
2730
2731 return true;
2732 }
2733
2734
2735 /**
2736 * Translate/emit "begin subroutine" instruction/marker/label.
2737 */
2738 static bool
emit_bgnsub(struct svga_shader_emitter * emit,unsigned position,const struct tgsi_full_instruction * insn)2739 emit_bgnsub(struct svga_shader_emitter *emit,
2740 unsigned position,
2741 const struct tgsi_full_instruction *insn)
2742 {
2743 unsigned i;
2744
2745 /* Note that we've finished the main function and are now emitting
2746 * subroutines. This affects how we terminate the generated
2747 * shader.
2748 */
2749 emit->in_main_func = false;
2750
2751 for (i = 0; i < emit->nr_labels; i++) {
2752 if (emit->label[i] == position) {
2753 return (emit_instruction( emit, inst_token( SVGA3DOP_RET ) ) &&
2754 emit_instruction( emit, inst_token( SVGA3DOP_LABEL ) ) &&
2755 emit_src( emit, src_register( SVGA3DREG_LABEL, i )));
2756 }
2757 }
2758
2759 assert(0);
2760 return true;
2761 }
2762
2763
2764 /**
2765 * Translate/emit subroutine call instruction.
2766 */
2767 static bool
emit_call(struct svga_shader_emitter * emit,const struct tgsi_full_instruction * insn)2768 emit_call(struct svga_shader_emitter *emit,
2769 const struct tgsi_full_instruction *insn)
2770 {
2771 unsigned position = insn->Label.Label;
2772 unsigned i;
2773
2774 for (i = 0; i < emit->nr_labels; i++) {
2775 if (emit->label[i] == position)
2776 break;
2777 }
2778
2779 if (emit->nr_labels == ARRAY_SIZE(emit->label))
2780 return false;
2781
2782 if (i == emit->nr_labels) {
2783 emit->label[i] = position;
2784 emit->nr_labels++;
2785 }
2786
2787 return (emit_instruction( emit, inst_token( SVGA3DOP_CALL ) ) &&
2788 emit_src( emit, src_register( SVGA3DREG_LABEL, i )));
2789 }
2790
2791
2792 /**
2793 * Called at the end of the shader. Actually, emit special "fix-up"
2794 * code for the vertex/fragment shader.
2795 */
2796 static bool
emit_end(struct svga_shader_emitter * emit)2797 emit_end(struct svga_shader_emitter *emit)
2798 {
2799 if (emit->unit == PIPE_SHADER_VERTEX) {
2800 return emit_vs_postamble( emit );
2801 }
2802 else {
2803 return emit_ps_postamble( emit );
2804 }
2805 }
2806
2807
2808 /**
2809 * Translate any TGSI instruction to SVGA.
2810 */
2811 static bool
svga_emit_instruction(struct svga_shader_emitter * emit,unsigned position,const struct tgsi_full_instruction * insn)2812 svga_emit_instruction(struct svga_shader_emitter *emit,
2813 unsigned position,
2814 const struct tgsi_full_instruction *insn)
2815 {
2816 switch (insn->Instruction.Opcode) {
2817
2818 case TGSI_OPCODE_ARL:
2819 return emit_arl( emit, insn );
2820
2821 case TGSI_OPCODE_TEX:
2822 case TGSI_OPCODE_TXB:
2823 case TGSI_OPCODE_TXP:
2824 case TGSI_OPCODE_TXL:
2825 case TGSI_OPCODE_TXD:
2826 return emit_tex( emit, insn );
2827
2828 case TGSI_OPCODE_DDX:
2829 case TGSI_OPCODE_DDY:
2830 return emit_deriv( emit, insn );
2831
2832 case TGSI_OPCODE_BGNSUB:
2833 return emit_bgnsub( emit, position, insn );
2834
2835 case TGSI_OPCODE_ENDSUB:
2836 return true;
2837
2838 case TGSI_OPCODE_CAL:
2839 return emit_call( emit, insn );
2840
2841 case TGSI_OPCODE_FLR:
2842 return emit_floor( emit, insn );
2843
2844 case TGSI_OPCODE_TRUNC:
2845 return emit_trunc_round( emit, insn, false );
2846
2847 case TGSI_OPCODE_ROUND:
2848 return emit_trunc_round( emit, insn, true );
2849
2850 case TGSI_OPCODE_CEIL:
2851 return emit_ceil( emit, insn );
2852
2853 case TGSI_OPCODE_CMP:
2854 return emit_cmp( emit, insn );
2855
2856 case TGSI_OPCODE_DIV:
2857 return emit_div( emit, insn );
2858
2859 case TGSI_OPCODE_DP2:
2860 return emit_dp2( emit, insn );
2861
2862 case TGSI_OPCODE_COS:
2863 return emit_cos( emit, insn );
2864
2865 case TGSI_OPCODE_SIN:
2866 return emit_sin( emit, insn );
2867
2868 case TGSI_OPCODE_END:
2869 /* TGSI always finishes the main func with an END */
2870 return emit_end( emit );
2871
2872 case TGSI_OPCODE_KILL_IF:
2873 return emit_cond_discard( emit, insn );
2874
2875 /* Selection opcodes. The underlying language is fairly
2876 * non-orthogonal about these.
2877 */
2878 case TGSI_OPCODE_SEQ:
2879 return emit_select_op( emit, PIPE_FUNC_EQUAL, insn );
2880
2881 case TGSI_OPCODE_SNE:
2882 return emit_select_op( emit, PIPE_FUNC_NOTEQUAL, insn );
2883
2884 case TGSI_OPCODE_SGT:
2885 return emit_select_op( emit, PIPE_FUNC_GREATER, insn );
2886
2887 case TGSI_OPCODE_SGE:
2888 return emit_select_op( emit, PIPE_FUNC_GEQUAL, insn );
2889
2890 case TGSI_OPCODE_SLT:
2891 return emit_select_op( emit, PIPE_FUNC_LESS, insn );
2892
2893 case TGSI_OPCODE_SLE:
2894 return emit_select_op( emit, PIPE_FUNC_LEQUAL, insn );
2895
2896 case TGSI_OPCODE_POW:
2897 return emit_pow( emit, insn );
2898
2899 case TGSI_OPCODE_EX2:
2900 return emit_ex2( emit, insn );
2901
2902 case TGSI_OPCODE_EXP:
2903 return emit_exp( emit, insn );
2904
2905 case TGSI_OPCODE_LOG:
2906 return emit_log( emit, insn );
2907
2908 case TGSI_OPCODE_LG2:
2909 return emit_scalar_op1( emit, SVGA3DOP_LOG, insn );
2910
2911 case TGSI_OPCODE_RSQ:
2912 return emit_scalar_op1( emit, SVGA3DOP_RSQ, insn );
2913
2914 case TGSI_OPCODE_RCP:
2915 return emit_scalar_op1( emit, SVGA3DOP_RCP, insn );
2916
2917 case TGSI_OPCODE_CONT:
2918 /* not expected (we return PIPE_SHADER_CAP_CONT_SUPPORTED = 0) */
2919 return false;
2920
2921 case TGSI_OPCODE_RET:
2922 /* This is a noop -- we tell mesa that we can't support RET
2923 * within a function (early return), so this will always be
2924 * followed by an ENDSUB.
2925 */
2926 return true;
2927
2928 /* These aren't actually used by any of the frontends we care
2929 * about:
2930 */
2931 case TGSI_OPCODE_AND:
2932 case TGSI_OPCODE_OR:
2933 case TGSI_OPCODE_I2F:
2934 case TGSI_OPCODE_NOT:
2935 case TGSI_OPCODE_SHL:
2936 case TGSI_OPCODE_ISHR:
2937 case TGSI_OPCODE_XOR:
2938 return false;
2939
2940 case TGSI_OPCODE_IF:
2941 return emit_if( emit, insn );
2942 case TGSI_OPCODE_ELSE:
2943 return emit_else( emit, insn );
2944 case TGSI_OPCODE_ENDIF:
2945 return emit_endif( emit, insn );
2946
2947 case TGSI_OPCODE_BGNLOOP:
2948 return emit_bgnloop( emit, insn );
2949 case TGSI_OPCODE_ENDLOOP:
2950 return emit_endloop( emit, insn );
2951 case TGSI_OPCODE_BRK:
2952 return emit_brk( emit, insn );
2953
2954 case TGSI_OPCODE_KILL:
2955 return emit_discard( emit, insn );
2956
2957 case TGSI_OPCODE_DST:
2958 return emit_dst_insn( emit, insn );
2959
2960 case TGSI_OPCODE_LIT:
2961 return emit_lit( emit, insn );
2962
2963 case TGSI_OPCODE_LRP:
2964 return emit_lrp( emit, insn );
2965
2966 case TGSI_OPCODE_SSG:
2967 return emit_ssg( emit, insn );
2968
2969 case TGSI_OPCODE_MOV:
2970 return emit_mov( emit, insn );
2971
2972 case TGSI_OPCODE_SQRT:
2973 return emit_sqrt( emit, insn );
2974
2975 default:
2976 {
2977 SVGA3dShaderOpCodeType opcode =
2978 translate_opcode(insn->Instruction.Opcode);
2979
2980 if (opcode == SVGA3DOP_LAST_INST)
2981 return false;
2982
2983 if (!emit_simple_instruction( emit, opcode, insn ))
2984 return false;
2985 }
2986 }
2987
2988 return true;
2989 }
2990
2991
2992 /**
2993 * Translate/emit a TGSI IMMEDIATE declaration.
2994 * An immediate vector is a constant that's hard-coded into the shader.
2995 */
2996 static bool
svga_emit_immediate(struct svga_shader_emitter * emit,const struct tgsi_full_immediate * imm)2997 svga_emit_immediate(struct svga_shader_emitter *emit,
2998 const struct tgsi_full_immediate *imm)
2999 {
3000 static const float id[4] = {0,0,0,1};
3001 float value[4];
3002 unsigned i;
3003
3004 assert(1 <= imm->Immediate.NrTokens && imm->Immediate.NrTokens <= 5);
3005 for (i = 0; i < 4 && i < imm->Immediate.NrTokens - 1; i++) {
3006 float f = imm->u[i].Float;
3007 value[i] = util_is_inf_or_nan(f) ? 0.0f : f;
3008 }
3009
3010 /* If the immediate has less than four values, fill in the remaining
3011 * positions from id={0,0,0,1}.
3012 */
3013 for ( ; i < 4; i++ )
3014 value[i] = id[i];
3015
3016 return emit_def_const( emit, SVGA3D_CONST_TYPE_FLOAT,
3017 emit->imm_start + emit->internal_imm_count++,
3018 value[0], value[1], value[2], value[3]);
3019 }
3020
3021
3022 static bool
make_immediate(struct svga_shader_emitter * emit,float a,float b,float c,float d,struct src_register * out)3023 make_immediate(struct svga_shader_emitter *emit,
3024 float a, float b, float c, float d,
3025 struct src_register *out )
3026 {
3027 unsigned idx = emit->nr_hw_float_const++;
3028
3029 if (!emit_def_const( emit, SVGA3D_CONST_TYPE_FLOAT,
3030 idx, a, b, c, d ))
3031 return false;
3032
3033 *out = src_register( SVGA3DREG_CONST, idx );
3034
3035 return true;
3036 }
3037
3038
3039 /**
3040 * Emit special VS instructions at top of shader.
3041 */
3042 static bool
emit_vs_preamble(struct svga_shader_emitter * emit)3043 emit_vs_preamble(struct svga_shader_emitter *emit)
3044 {
3045 if (!emit->key.vs.need_prescale) {
3046 if (!make_immediate( emit, 0, 0, .5, .5,
3047 &emit->imm_0055))
3048 return false;
3049 }
3050
3051 return true;
3052 }
3053
3054
3055 /**
3056 * Emit special PS instructions at top of shader.
3057 */
3058 static bool
emit_ps_preamble(struct svga_shader_emitter * emit)3059 emit_ps_preamble(struct svga_shader_emitter *emit)
3060 {
3061 if (emit->ps_reads_pos && emit->info.reads_z) {
3062 /*
3063 * Assemble the position from various bits of inputs. Depth and W are
3064 * passed in a texcoord this is due to D3D's vPos not hold Z or W.
3065 * Also fixup the perspective interpolation.
3066 *
3067 * temp_pos.xy = vPos.xy
3068 * temp_pos.w = rcp(texcoord1.w);
3069 * temp_pos.z = texcoord1.z * temp_pos.w;
3070 */
3071 if (!submit_op1( emit,
3072 inst_token(SVGA3DOP_MOV),
3073 writemask( emit->ps_temp_pos, TGSI_WRITEMASK_XY ),
3074 emit->ps_true_pos ))
3075 return false;
3076
3077 if (!submit_op1( emit,
3078 inst_token(SVGA3DOP_RCP),
3079 writemask( emit->ps_temp_pos, TGSI_WRITEMASK_W ),
3080 scalar( emit->ps_depth_pos, TGSI_SWIZZLE_W ) ))
3081 return false;
3082
3083 if (!submit_op2( emit,
3084 inst_token(SVGA3DOP_MUL),
3085 writemask( emit->ps_temp_pos, TGSI_WRITEMASK_Z ),
3086 scalar( emit->ps_depth_pos, TGSI_SWIZZLE_Z ),
3087 scalar( src(emit->ps_temp_pos), TGSI_SWIZZLE_W ) ))
3088 return false;
3089 }
3090
3091 return true;
3092 }
3093
3094
3095 /**
3096 * Emit special PS instructions at end of shader.
3097 */
3098 static bool
emit_ps_postamble(struct svga_shader_emitter * emit)3099 emit_ps_postamble(struct svga_shader_emitter *emit)
3100 {
3101 unsigned i;
3102
3103 /* PS oDepth is incredibly fragile and it's very hard to catch the
3104 * types of usage that break it during shader emit. Easier just to
3105 * redirect the main program to a temporary and then only touch
3106 * oDepth with a hand-crafted MOV below.
3107 */
3108 if (SVGA3dShaderGetRegType(emit->true_pos.value) != 0) {
3109 if (!submit_op1( emit,
3110 inst_token(SVGA3DOP_MOV),
3111 emit->true_pos,
3112 scalar(src(emit->temp_pos), TGSI_SWIZZLE_Z) ))
3113 return false;
3114 }
3115
3116 for (i = 0; i < PIPE_MAX_COLOR_BUFS; i++) {
3117 if (SVGA3dShaderGetRegType(emit->true_color_output[i].value) != 0) {
3118 /* Potentially override output colors with white for XOR
3119 * logicop workaround.
3120 */
3121 if (emit->unit == PIPE_SHADER_FRAGMENT &&
3122 emit->key.fs.white_fragments) {
3123 struct src_register one = get_one_immediate(emit);
3124
3125 if (!submit_op1( emit,
3126 inst_token(SVGA3DOP_MOV),
3127 emit->true_color_output[i],
3128 one ))
3129 return false;
3130 }
3131 else if (emit->unit == PIPE_SHADER_FRAGMENT &&
3132 i < emit->key.fs.write_color0_to_n_cbufs) {
3133 /* Write temp color output [0] to true output [i] */
3134 if (!submit_op1(emit, inst_token(SVGA3DOP_MOV),
3135 emit->true_color_output[i],
3136 src(emit->temp_color_output[0]))) {
3137 return false;
3138 }
3139 }
3140 else {
3141 if (!submit_op1( emit,
3142 inst_token(SVGA3DOP_MOV),
3143 emit->true_color_output[i],
3144 src(emit->temp_color_output[i]) ))
3145 return false;
3146 }
3147 }
3148 }
3149
3150 return true;
3151 }
3152
3153
3154 /**
3155 * Emit special VS instructions at end of shader.
3156 */
3157 static bool
emit_vs_postamble(struct svga_shader_emitter * emit)3158 emit_vs_postamble(struct svga_shader_emitter *emit)
3159 {
3160 /* PSIZ output is incredibly fragile and it's very hard to catch
3161 * the types of usage that break it during shader emit. Easier
3162 * just to redirect the main program to a temporary and then only
3163 * touch PSIZ with a hand-crafted MOV below.
3164 */
3165 if (SVGA3dShaderGetRegType(emit->true_psiz.value) != 0) {
3166 if (!submit_op1( emit,
3167 inst_token(SVGA3DOP_MOV),
3168 emit->true_psiz,
3169 scalar(src(emit->temp_psiz), TGSI_SWIZZLE_X) ))
3170 return false;
3171 }
3172
3173 /* Need to perform various manipulations on vertex position to cope
3174 * with the different GL and D3D clip spaces.
3175 */
3176 if (emit->key.vs.need_prescale) {
3177 SVGA3dShaderDestToken temp_pos = emit->temp_pos;
3178 SVGA3dShaderDestToken depth = emit->depth_pos;
3179 SVGA3dShaderDestToken pos = emit->true_pos;
3180 unsigned offset = emit->info.file_max[TGSI_FILE_CONSTANT] + 1;
3181 struct src_register prescale_scale = src_register( SVGA3DREG_CONST,
3182 offset + 0 );
3183 struct src_register prescale_trans = src_register( SVGA3DREG_CONST,
3184 offset + 1 );
3185
3186 if (!submit_op1( emit,
3187 inst_token(SVGA3DOP_MOV),
3188 writemask(depth, TGSI_WRITEMASK_W),
3189 scalar(src(temp_pos), TGSI_SWIZZLE_W) ))
3190 return false;
3191
3192 /* MUL temp_pos.xyz, temp_pos, prescale.scale
3193 * MAD result.position, temp_pos.wwww, prescale.trans, temp_pos
3194 * --> Note that prescale.trans.w == 0
3195 */
3196 if (!submit_op2( emit,
3197 inst_token(SVGA3DOP_MUL),
3198 writemask(temp_pos, TGSI_WRITEMASK_XYZ),
3199 src(temp_pos),
3200 prescale_scale ))
3201 return false;
3202
3203 if (!submit_op3( emit,
3204 inst_token(SVGA3DOP_MAD),
3205 pos,
3206 swizzle(src(temp_pos), 3, 3, 3, 3),
3207 prescale_trans,
3208 src(temp_pos)))
3209 return false;
3210
3211 /* Also write to depth value */
3212 if (!submit_op3( emit,
3213 inst_token(SVGA3DOP_MAD),
3214 writemask(depth, TGSI_WRITEMASK_Z),
3215 swizzle(src(temp_pos), 3, 3, 3, 3),
3216 prescale_trans,
3217 src(temp_pos) ))
3218 return false;
3219 }
3220 else {
3221 SVGA3dShaderDestToken temp_pos = emit->temp_pos;
3222 SVGA3dShaderDestToken depth = emit->depth_pos;
3223 SVGA3dShaderDestToken pos = emit->true_pos;
3224 struct src_register imm_0055 = emit->imm_0055;
3225
3226 /* Adjust GL clipping coordinate space to hardware (D3D-style):
3227 *
3228 * DP4 temp_pos.z, {0,0,.5,.5}, temp_pos
3229 * MOV result.position, temp_pos
3230 */
3231 if (!submit_op2( emit,
3232 inst_token(SVGA3DOP_DP4),
3233 writemask(temp_pos, TGSI_WRITEMASK_Z),
3234 imm_0055,
3235 src(temp_pos) ))
3236 return false;
3237
3238 if (!submit_op1( emit,
3239 inst_token(SVGA3DOP_MOV),
3240 pos,
3241 src(temp_pos) ))
3242 return false;
3243
3244 /* Move the manipulated depth into the extra texcoord reg */
3245 if (!submit_op1( emit,
3246 inst_token(SVGA3DOP_MOV),
3247 writemask(depth, TGSI_WRITEMASK_ZW),
3248 src(temp_pos) ))
3249 return false;
3250 }
3251
3252 return true;
3253 }
3254
3255
3256 /**
3257 * For the pixel shader: emit the code which chooses the front
3258 * or back face color depending on triangle orientation.
3259 * This happens at the top of the fragment shader.
3260 *
3261 * 0: IF VFACE :4
3262 * 1: COLOR = FrontColor;
3263 * 2: ELSE
3264 * 3: COLOR = BackColor;
3265 * 4: ENDIF
3266 */
3267 static bool
emit_light_twoside(struct svga_shader_emitter * emit)3268 emit_light_twoside(struct svga_shader_emitter *emit)
3269 {
3270 struct src_register vface, zero;
3271 struct src_register front[2];
3272 struct src_register back[2];
3273 SVGA3dShaderDestToken color[2];
3274 int count = emit->internal_color_count;
3275 unsigned i;
3276 SVGA3dShaderInstToken if_token;
3277
3278 if (count == 0)
3279 return true;
3280
3281 vface = get_vface( emit );
3282 zero = get_zero_immediate(emit);
3283
3284 /* Can't use get_temp() to allocate the color reg as such
3285 * temporaries will be reclaimed after each instruction by the call
3286 * to reset_temp_regs().
3287 */
3288 for (i = 0; i < count; i++) {
3289 color[i] = dst_register( SVGA3DREG_TEMP, emit->nr_hw_temp++ );
3290 front[i] = emit->input_map[emit->internal_color_idx[i]];
3291
3292 /* Back is always the next input:
3293 */
3294 back[i] = front[i];
3295 back[i].base.num = front[i].base.num + 1;
3296
3297 /* Reassign the input_map to the actual front-face color:
3298 */
3299 emit->input_map[emit->internal_color_idx[i]] = src(color[i]);
3300 }
3301
3302 if_token = inst_token( SVGA3DOP_IFC );
3303
3304 if (emit->key.fs.front_ccw)
3305 if_token.control = SVGA3DOPCOMP_LT;
3306 else
3307 if_token.control = SVGA3DOPCOMP_GT;
3308
3309 if (!(emit_instruction( emit, if_token ) &&
3310 emit_src( emit, vface ) &&
3311 emit_src( emit, zero ) ))
3312 return false;
3313
3314 for (i = 0; i < count; i++) {
3315 if (!submit_op1( emit, inst_token( SVGA3DOP_MOV ), color[i], front[i] ))
3316 return false;
3317 }
3318
3319 if (!(emit_instruction( emit, inst_token( SVGA3DOP_ELSE))))
3320 return false;
3321
3322 for (i = 0; i < count; i++) {
3323 if (!submit_op1( emit, inst_token( SVGA3DOP_MOV ), color[i], back[i] ))
3324 return false;
3325 }
3326
3327 if (!emit_instruction( emit, inst_token( SVGA3DOP_ENDIF ) ))
3328 return false;
3329
3330 return true;
3331 }
3332
3333
3334 /**
3335 * Emit special setup code for the front/back face register in the FS.
3336 * 0: SETP_GT TEMP, VFACE, 0
3337 * where TEMP is a fake frontface register
3338 */
3339 static bool
emit_frontface(struct svga_shader_emitter * emit)3340 emit_frontface(struct svga_shader_emitter *emit)
3341 {
3342 struct src_register vface;
3343 SVGA3dShaderDestToken temp;
3344 struct src_register pass, fail;
3345
3346 vface = get_vface( emit );
3347
3348 /* Can't use get_temp() to allocate the fake frontface reg as such
3349 * temporaries will be reclaimed after each instruction by the call
3350 * to reset_temp_regs().
3351 */
3352 temp = dst_register( SVGA3DREG_TEMP,
3353 emit->nr_hw_temp++ );
3354
3355 if (emit->key.fs.front_ccw) {
3356 pass = get_zero_immediate(emit);
3357 fail = get_one_immediate(emit);
3358 } else {
3359 pass = get_one_immediate(emit);
3360 fail = get_zero_immediate(emit);
3361 }
3362
3363 if (!emit_conditional(emit, PIPE_FUNC_GREATER,
3364 temp, vface, get_zero_immediate(emit),
3365 pass, fail))
3366 return false;
3367
3368 /* Reassign the input_map to the actual front-face color:
3369 */
3370 emit->input_map[emit->internal_frontface_idx] = src(temp);
3371
3372 return true;
3373 }
3374
3375
3376 /**
3377 * Emit code to invert the T component of the incoming texture coordinate.
3378 * This is used for drawing point sprites when
3379 * pipe_rasterizer_state::sprite_coord_mode == PIPE_SPRITE_COORD_LOWER_LEFT.
3380 */
3381 static bool
emit_inverted_texcoords(struct svga_shader_emitter * emit)3382 emit_inverted_texcoords(struct svga_shader_emitter *emit)
3383 {
3384 unsigned inverted_texcoords = emit->inverted_texcoords;
3385
3386 while (inverted_texcoords) {
3387 const unsigned unit = ffs(inverted_texcoords) - 1;
3388
3389 assert(emit->inverted_texcoords & (1 << unit));
3390
3391 assert(unit < ARRAY_SIZE(emit->ps_true_texcoord));
3392
3393 assert(unit < ARRAY_SIZE(emit->ps_inverted_texcoord_input));
3394
3395 assert(emit->ps_inverted_texcoord_input[unit]
3396 < ARRAY_SIZE(emit->input_map));
3397
3398 /* inverted = coord * (1, -1, 1, 1) + (0, 1, 0, 0) */
3399 if (!submit_op3(emit,
3400 inst_token(SVGA3DOP_MAD),
3401 dst(emit->ps_inverted_texcoord[unit]),
3402 emit->ps_true_texcoord[unit],
3403 get_immediate(emit, 1.0f, -1.0f, 1.0f, 1.0f),
3404 get_immediate(emit, 0.0f, 1.0f, 0.0f, 0.0f)))
3405 return false;
3406
3407 /* Reassign the input_map entry to the new texcoord register */
3408 emit->input_map[emit->ps_inverted_texcoord_input[unit]] =
3409 emit->ps_inverted_texcoord[unit];
3410
3411 inverted_texcoords &= ~(1 << unit);
3412 }
3413
3414 return true;
3415 }
3416
3417
3418 /**
3419 * Emit code to adjust vertex shader inputs/attributes:
3420 * - Change range from [0,1] to [-1,1] (for normalized byte/short attribs).
3421 * - Set attrib W component = 1.
3422 */
3423 static bool
emit_adjusted_vertex_attribs(struct svga_shader_emitter * emit)3424 emit_adjusted_vertex_attribs(struct svga_shader_emitter *emit)
3425 {
3426 unsigned adjust_mask = (emit->key.vs.adjust_attrib_range |
3427 emit->key.vs.adjust_attrib_w_1);
3428
3429 while (adjust_mask) {
3430 /* Adjust vertex attrib range and/or set W component = 1 */
3431 const unsigned index = u_bit_scan(&adjust_mask);
3432 struct src_register tmp;
3433
3434 /* allocate a temp reg */
3435 tmp = src_register(SVGA3DREG_TEMP, emit->nr_hw_temp);
3436 emit->nr_hw_temp++;
3437
3438 if (emit->key.vs.adjust_attrib_range & (1 << index)) {
3439 /* The vertex input/attribute is supposed to be a signed value in
3440 * the range [-1,1] but we actually fetched/converted it to the
3441 * range [0,1]. This most likely happens when the app specifies a
3442 * signed byte attribute but we interpreted it as unsigned bytes.
3443 * See also svga_translate_vertex_format().
3444 *
3445 * Here, we emit some extra instructions to adjust
3446 * the attribute values from [0,1] to [-1,1].
3447 *
3448 * The adjustment we implement is:
3449 * new_attrib = attrib * 2.0;
3450 * if (attrib >= 0.5)
3451 * new_attrib = new_attrib - 2.0;
3452 * This isn't exactly right (it's off by a bit or so) but close enough.
3453 */
3454 SVGA3dShaderDestToken pred_reg = dst_register(SVGA3DREG_PREDICATE, 0);
3455
3456 /* tmp = attrib * 2.0 */
3457 if (!submit_op2(emit,
3458 inst_token(SVGA3DOP_MUL),
3459 dst(tmp),
3460 emit->input_map[index],
3461 get_two_immediate(emit)))
3462 return false;
3463
3464 /* pred = (attrib >= 0.5) */
3465 if (!submit_op2(emit,
3466 inst_token_setp(SVGA3DOPCOMP_GE),
3467 pred_reg,
3468 emit->input_map[index], /* vert attrib */
3469 get_half_immediate(emit))) /* 0.5 */
3470 return false;
3471
3472 /* sub(pred) tmp, tmp, 2.0 */
3473 if (!submit_op3(emit,
3474 inst_token_predicated(SVGA3DOP_SUB),
3475 dst(tmp),
3476 src(pred_reg),
3477 tmp,
3478 get_two_immediate(emit)))
3479 return false;
3480 }
3481 else {
3482 /* just copy the vertex input attrib to the temp register */
3483 if (!submit_op1(emit,
3484 inst_token(SVGA3DOP_MOV),
3485 dst(tmp),
3486 emit->input_map[index]))
3487 return false;
3488 }
3489
3490 if (emit->key.vs.adjust_attrib_w_1 & (1 << index)) {
3491 /* move 1 into W position of tmp */
3492 if (!submit_op1(emit,
3493 inst_token(SVGA3DOP_MOV),
3494 writemask(dst(tmp), TGSI_WRITEMASK_W),
3495 get_one_immediate(emit)))
3496 return false;
3497 }
3498
3499 /* Reassign the input_map entry to the new tmp register */
3500 emit->input_map[index] = tmp;
3501 }
3502
3503 return true;
3504 }
3505
3506
3507 /**
3508 * Determine if we need to create the "common" immediate value which is
3509 * used for generating useful vector constants such as {0,0,0,0} and
3510 * {1,1,1,1}.
3511 * We could just do this all the time except that we want to conserve
3512 * registers whenever possible.
3513 */
3514 static bool
needs_to_create_common_immediate(const struct svga_shader_emitter * emit)3515 needs_to_create_common_immediate(const struct svga_shader_emitter *emit)
3516 {
3517 unsigned i;
3518
3519 if (emit->unit == PIPE_SHADER_FRAGMENT) {
3520 if (emit->key.fs.light_twoside)
3521 return true;
3522
3523 if (emit->key.fs.white_fragments)
3524 return true;
3525
3526 if (emit->emit_frontface)
3527 return true;
3528
3529 if (emit->info.opcode_count[TGSI_OPCODE_DST] >= 1 ||
3530 emit->info.opcode_count[TGSI_OPCODE_SSG] >= 1 ||
3531 emit->info.opcode_count[TGSI_OPCODE_LIT] >= 1)
3532 return true;
3533
3534 if (emit->inverted_texcoords)
3535 return true;
3536
3537 /* look for any PIPE_SWIZZLE_0/ONE terms */
3538 for (i = 0; i < emit->key.num_textures; i++) {
3539 if (emit->key.tex[i].swizzle_r > PIPE_SWIZZLE_W ||
3540 emit->key.tex[i].swizzle_g > PIPE_SWIZZLE_W ||
3541 emit->key.tex[i].swizzle_b > PIPE_SWIZZLE_W ||
3542 emit->key.tex[i].swizzle_a > PIPE_SWIZZLE_W)
3543 return true;
3544 }
3545
3546 for (i = 0; i < emit->key.num_textures; i++) {
3547 if (emit->key.tex[i].compare_mode
3548 == PIPE_TEX_COMPARE_R_TO_TEXTURE)
3549 return true;
3550 }
3551 }
3552 else if (emit->unit == PIPE_SHADER_VERTEX) {
3553 if (emit->info.opcode_count[TGSI_OPCODE_CMP] >= 1)
3554 return true;
3555 if (emit->key.vs.adjust_attrib_range ||
3556 emit->key.vs.adjust_attrib_w_1)
3557 return true;
3558 }
3559
3560 if (emit->info.opcode_count[TGSI_OPCODE_IF] >= 1 ||
3561 emit->info.opcode_count[TGSI_OPCODE_BGNLOOP] >= 1 ||
3562 emit->info.opcode_count[TGSI_OPCODE_DDX] >= 1 ||
3563 emit->info.opcode_count[TGSI_OPCODE_DDY] >= 1 ||
3564 emit->info.opcode_count[TGSI_OPCODE_ROUND] >= 1 ||
3565 emit->info.opcode_count[TGSI_OPCODE_SGE] >= 1 ||
3566 emit->info.opcode_count[TGSI_OPCODE_SGT] >= 1 ||
3567 emit->info.opcode_count[TGSI_OPCODE_SLE] >= 1 ||
3568 emit->info.opcode_count[TGSI_OPCODE_SLT] >= 1 ||
3569 emit->info.opcode_count[TGSI_OPCODE_SNE] >= 1 ||
3570 emit->info.opcode_count[TGSI_OPCODE_SEQ] >= 1 ||
3571 emit->info.opcode_count[TGSI_OPCODE_EXP] >= 1 ||
3572 emit->info.opcode_count[TGSI_OPCODE_LOG] >= 1 ||
3573 emit->info.opcode_count[TGSI_OPCODE_KILL] >= 1 ||
3574 emit->info.opcode_count[TGSI_OPCODE_SQRT] >= 1)
3575 return true;
3576
3577 return false;
3578 }
3579
3580
3581 /**
3582 * Do we need to create a looping constant?
3583 */
3584 static bool
needs_to_create_loop_const(const struct svga_shader_emitter * emit)3585 needs_to_create_loop_const(const struct svga_shader_emitter *emit)
3586 {
3587 return (emit->info.opcode_count[TGSI_OPCODE_BGNLOOP] >= 1);
3588 }
3589
3590
3591 static bool
needs_to_create_arl_consts(const struct svga_shader_emitter * emit)3592 needs_to_create_arl_consts(const struct svga_shader_emitter *emit)
3593 {
3594 return (emit->num_arl_consts > 0);
3595 }
3596
3597
3598 static bool
pre_parse_add_indirect(struct svga_shader_emitter * emit,int num,int current_arl)3599 pre_parse_add_indirect( struct svga_shader_emitter *emit,
3600 int num, int current_arl)
3601 {
3602 unsigned i;
3603 assert(num < 0);
3604
3605 for (i = 0; i < emit->num_arl_consts; ++i) {
3606 if (emit->arl_consts[i].arl_num == current_arl)
3607 break;
3608 }
3609 /* new entry */
3610 if (emit->num_arl_consts == i) {
3611 ++emit->num_arl_consts;
3612 }
3613 emit->arl_consts[i].number = (emit->arl_consts[i].number > num) ?
3614 num :
3615 emit->arl_consts[i].number;
3616 emit->arl_consts[i].arl_num = current_arl;
3617 return true;
3618 }
3619
3620
3621 static bool
pre_parse_instruction(struct svga_shader_emitter * emit,const struct tgsi_full_instruction * insn,int current_arl)3622 pre_parse_instruction( struct svga_shader_emitter *emit,
3623 const struct tgsi_full_instruction *insn,
3624 int current_arl)
3625 {
3626 if (insn->Src[0].Register.Indirect &&
3627 insn->Src[0].Indirect.File == TGSI_FILE_ADDRESS) {
3628 const struct tgsi_full_src_register *reg = &insn->Src[0];
3629 if (reg->Register.Index < 0) {
3630 pre_parse_add_indirect(emit, reg->Register.Index, current_arl);
3631 }
3632 }
3633
3634 if (insn->Src[1].Register.Indirect &&
3635 insn->Src[1].Indirect.File == TGSI_FILE_ADDRESS) {
3636 const struct tgsi_full_src_register *reg = &insn->Src[1];
3637 if (reg->Register.Index < 0) {
3638 pre_parse_add_indirect(emit, reg->Register.Index, current_arl);
3639 }
3640 }
3641
3642 if (insn->Src[2].Register.Indirect &&
3643 insn->Src[2].Indirect.File == TGSI_FILE_ADDRESS) {
3644 const struct tgsi_full_src_register *reg = &insn->Src[2];
3645 if (reg->Register.Index < 0) {
3646 pre_parse_add_indirect(emit, reg->Register.Index, current_arl);
3647 }
3648 }
3649
3650 return true;
3651 }
3652
3653
3654 static bool
pre_parse_tokens(struct svga_shader_emitter * emit,const struct tgsi_token * tokens)3655 pre_parse_tokens( struct svga_shader_emitter *emit,
3656 const struct tgsi_token *tokens )
3657 {
3658 struct tgsi_parse_context parse;
3659 int current_arl = 0;
3660
3661 tgsi_parse_init( &parse, tokens );
3662
3663 while (!tgsi_parse_end_of_tokens( &parse )) {
3664 tgsi_parse_token( &parse );
3665 switch (parse.FullToken.Token.Type) {
3666 case TGSI_TOKEN_TYPE_IMMEDIATE:
3667 case TGSI_TOKEN_TYPE_DECLARATION:
3668 break;
3669 case TGSI_TOKEN_TYPE_INSTRUCTION:
3670 if (parse.FullToken.FullInstruction.Instruction.Opcode ==
3671 TGSI_OPCODE_ARL) {
3672 ++current_arl;
3673 }
3674 if (!pre_parse_instruction( emit, &parse.FullToken.FullInstruction,
3675 current_arl ))
3676 return false;
3677 break;
3678 default:
3679 break;
3680 }
3681
3682 }
3683 return true;
3684 }
3685
3686
3687 static bool
svga_shader_emit_helpers(struct svga_shader_emitter * emit)3688 svga_shader_emit_helpers(struct svga_shader_emitter *emit)
3689 {
3690 if (needs_to_create_common_immediate( emit )) {
3691 create_common_immediate( emit );
3692 }
3693 if (needs_to_create_loop_const( emit )) {
3694 create_loop_const( emit );
3695 }
3696 if (needs_to_create_arl_consts( emit )) {
3697 create_arl_consts( emit );
3698 }
3699
3700 if (emit->unit == PIPE_SHADER_FRAGMENT) {
3701 if (!svga_shader_emit_samplers_decl( emit ))
3702 return false;
3703
3704 if (!emit_ps_preamble( emit ))
3705 return false;
3706
3707 if (emit->key.fs.light_twoside) {
3708 if (!emit_light_twoside( emit ))
3709 return false;
3710 }
3711 if (emit->emit_frontface) {
3712 if (!emit_frontface( emit ))
3713 return false;
3714 }
3715 if (emit->inverted_texcoords) {
3716 if (!emit_inverted_texcoords( emit ))
3717 return false;
3718 }
3719 }
3720 else {
3721 assert(emit->unit == PIPE_SHADER_VERTEX);
3722 if (emit->key.vs.adjust_attrib_range) {
3723 if (!emit_adjusted_vertex_attribs(emit) ||
3724 emit->key.vs.adjust_attrib_w_1) {
3725 return false;
3726 }
3727 }
3728 }
3729
3730 return true;
3731 }
3732
3733
3734 /**
3735 * This is the main entrypoint into the TGSI instruction translater.
3736 * Translate TGSI shader tokens into an SVGA shader.
3737 */
3738 bool
svga_shader_emit_instructions(struct svga_shader_emitter * emit,const struct tgsi_token * tokens)3739 svga_shader_emit_instructions(struct svga_shader_emitter *emit,
3740 const struct tgsi_token *tokens)
3741 {
3742 struct tgsi_parse_context parse;
3743 const struct tgsi_token *new_tokens = NULL;
3744 bool ret = true;
3745 bool helpers_emitted = false;
3746 unsigned line_nr = 0;
3747
3748 if (emit->unit == PIPE_SHADER_FRAGMENT && emit->key.fs.pstipple) {
3749 unsigned unit;
3750
3751 new_tokens = util_pstipple_create_fragment_shader(tokens, &unit, 0,
3752 TGSI_FILE_INPUT);
3753
3754 if (new_tokens) {
3755 /* Setup texture state for stipple */
3756 emit->sampler_target[unit] = TGSI_TEXTURE_2D;
3757 emit->key.tex[unit].swizzle_r = TGSI_SWIZZLE_X;
3758 emit->key.tex[unit].swizzle_g = TGSI_SWIZZLE_Y;
3759 emit->key.tex[unit].swizzle_b = TGSI_SWIZZLE_Z;
3760 emit->key.tex[unit].swizzle_a = TGSI_SWIZZLE_W;
3761
3762 emit->pstipple_sampler_unit = unit;
3763
3764 tokens = new_tokens;
3765 }
3766 }
3767
3768 tgsi_parse_init( &parse, tokens );
3769 emit->internal_imm_count = 0;
3770
3771 if (emit->unit == PIPE_SHADER_VERTEX) {
3772 ret = emit_vs_preamble( emit );
3773 if (!ret)
3774 goto done;
3775 }
3776
3777 pre_parse_tokens(emit, tokens);
3778
3779 while (!tgsi_parse_end_of_tokens( &parse )) {
3780 tgsi_parse_token( &parse );
3781
3782 switch (parse.FullToken.Token.Type) {
3783 case TGSI_TOKEN_TYPE_IMMEDIATE:
3784 ret = svga_emit_immediate( emit, &parse.FullToken.FullImmediate );
3785 if (!ret)
3786 goto done;
3787 break;
3788
3789 case TGSI_TOKEN_TYPE_DECLARATION:
3790 ret = svga_translate_decl_sm30( emit, &parse.FullToken.FullDeclaration );
3791 if (!ret)
3792 goto done;
3793 break;
3794
3795 case TGSI_TOKEN_TYPE_INSTRUCTION:
3796 if (!helpers_emitted) {
3797 if (!svga_shader_emit_helpers( emit ))
3798 goto done;
3799 helpers_emitted = true;
3800 }
3801 ret = svga_emit_instruction( emit,
3802 line_nr++,
3803 &parse.FullToken.FullInstruction );
3804 if (!ret)
3805 goto done;
3806 break;
3807 default:
3808 break;
3809 }
3810
3811 reset_temp_regs( emit );
3812 }
3813
3814 /* Need to terminate the current subroutine. Note that the
3815 * hardware doesn't tolerate shaders without sub-routines
3816 * terminating with RET+END.
3817 */
3818 if (!emit->in_main_func) {
3819 ret = emit_instruction( emit, inst_token( SVGA3DOP_RET ) );
3820 if (!ret)
3821 goto done;
3822 }
3823
3824 assert(emit->dynamic_branching_level == 0);
3825
3826 /* Need to terminate the whole shader:
3827 */
3828 ret = emit_instruction( emit, inst_token( SVGA3DOP_END ) );
3829 if (!ret)
3830 goto done;
3831
3832 done:
3833 tgsi_parse_free( &parse );
3834 if (new_tokens) {
3835 tgsi_free_tokens(new_tokens);
3836 }
3837
3838 return ret;
3839 }
3840