1 /**************************************************************************
2 *
3 * Copyright 2007 VMware, Inc.
4 * All Rights Reserved.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the
8 * "Software"), to deal in the Software without restriction, including
9 * without limitation the rights to use, copy, modify, merge, publish,
10 * distribute, sub license, and/or sell copies of the Software, and to
11 * permit persons to whom the Software is furnished to do so, subject to
12 * the following conditions:
13 *
14 * The above copyright notice and this permission notice (including the
15 * next paragraph) shall be included in all copies or substantial portions
16 * of the Software.
17 *
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
19 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
21 * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
22 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
23 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
24 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25 *
26 **************************************************************************/
27
28 #include <stdarg.h>
29
30 #include "i915_context.h"
31 #include "i915_debug.h"
32 #include "i915_debug_private.h"
33 #include "i915_fpc.h"
34 #include "i915_reg.h"
35
36 #include "nir/nir.h"
37 #include "pipe/p_shader_tokens.h"
38 #include "tgsi/tgsi_dump.h"
39 #include "tgsi/tgsi_info.h"
40 #include "tgsi/tgsi_parse.h"
41 #include "util/log.h"
42 #include "util/ralloc.h"
43 #include "util/u_math.h"
44 #include "util/u_memory.h"
45 #include "util/u_string.h"
46
47 #include "draw/draw_vertex.h"
48
49 #ifndef M_PI
50 #define M_PI 3.14159265358979323846
51 #endif
52
53 /**
54 * Simple pass-through fragment shader to use when we don't have
55 * a real shader (or it fails to compile for some reason).
56 */
57 static unsigned passthrough_program[] = {
58 _3DSTATE_PIXEL_SHADER_PROGRAM | ((1 * 3) - 1),
59 /* move to output color:
60 */
61 (A0_MOV | (REG_TYPE_OC << A0_DEST_TYPE_SHIFT) | A0_DEST_CHANNEL_ALL |
62 (REG_TYPE_R << A0_SRC0_TYPE_SHIFT) | (0 << A0_SRC0_NR_SHIFT)),
63 ((SRC_ONE << A1_SRC0_CHANNEL_X_SHIFT) |
64 (SRC_ZERO << A1_SRC0_CHANNEL_Y_SHIFT) |
65 (SRC_ZERO << A1_SRC0_CHANNEL_Z_SHIFT) |
66 (SRC_ONE << A1_SRC0_CHANNEL_W_SHIFT)),
67 0};
68
69 /**
70 * component-wise negation of ureg
71 */
72 static inline int
negate(int reg,int x,int y,int z,int w)73 negate(int reg, int x, int y, int z, int w)
74 {
75 /* Another neat thing about the UREG representation */
76 return reg ^ (((x & 1) << UREG_CHANNEL_X_NEGATE_SHIFT) |
77 ((y & 1) << UREG_CHANNEL_Y_NEGATE_SHIFT) |
78 ((z & 1) << UREG_CHANNEL_Z_NEGATE_SHIFT) |
79 ((w & 1) << UREG_CHANNEL_W_NEGATE_SHIFT));
80 }
81
82 /**
83 * In the event of a translation failure, we'll generate a simple color
84 * pass-through program.
85 */
86 static void
i915_use_passthrough_shader(struct i915_fragment_shader * fs)87 i915_use_passthrough_shader(struct i915_fragment_shader *fs)
88 {
89 fs->program = (uint32_t *)MALLOC(sizeof(passthrough_program));
90 if (fs->program) {
91 memcpy(fs->program, passthrough_program, sizeof(passthrough_program));
92 fs->program_len = ARRAY_SIZE(passthrough_program);
93 }
94 fs->num_constants = 0;
95 }
96
97 void
i915_program_error(struct i915_fp_compile * p,const char * msg,...)98 i915_program_error(struct i915_fp_compile *p, const char *msg, ...)
99 {
100 va_list args;
101 va_start(args, msg);
102 ralloc_vasprintf_append(&p->error, msg, args);
103 va_end(args);
104 }
105
106 static uint32_t
get_mapping(struct i915_fragment_shader * fs,enum tgsi_semantic semantic,int index)107 get_mapping(struct i915_fragment_shader *fs, enum tgsi_semantic semantic,
108 int index)
109 {
110 int i;
111 for (i = 0; i < I915_TEX_UNITS; i++) {
112 if (fs->texcoords[i].semantic == -1) {
113 fs->texcoords[i].semantic = semantic;
114 fs->texcoords[i].index = index;
115 return i;
116 }
117 if (fs->texcoords[i].semantic == semantic &&
118 fs->texcoords[i].index == index)
119 return i;
120 }
121 debug_printf("Exceeded max generics\n");
122 return 0;
123 }
124
125 /**
126 * Construct a ureg for the given source register. Will emit
127 * constants, apply swizzling and negation as needed.
128 */
129 static uint32_t
src_vector(struct i915_fp_compile * p,const struct i915_full_src_register * source,struct i915_fragment_shader * fs)130 src_vector(struct i915_fp_compile *p,
131 const struct i915_full_src_register *source,
132 struct i915_fragment_shader *fs)
133 {
134 uint32_t index = source->Register.Index;
135 uint32_t src = 0, sem_name, sem_ind;
136
137 switch (source->Register.File) {
138 case TGSI_FILE_TEMPORARY:
139 if (source->Register.Index >= I915_MAX_TEMPORARY) {
140 i915_program_error(p, "Exceeded max temporary reg");
141 return 0;
142 }
143 src = UREG(REG_TYPE_R, index);
144 break;
145 case TGSI_FILE_INPUT:
146 /* XXX: Packing COL1, FOGC into a single attribute works for
147 * texenv programs, but will fail for real fragment programs
148 * that use these attributes and expect them to be a full 4
149 * components wide. Could use a texcoord to pass these
150 * attributes if necessary, but that won't work in the general
151 * case.
152 *
153 * We also use a texture coordinate to pass wpos when possible.
154 */
155
156 sem_name = p->shader->info.input_semantic_name[index];
157 sem_ind = p->shader->info.input_semantic_index[index];
158
159 switch (sem_name) {
160 case TGSI_SEMANTIC_GENERIC:
161 case TGSI_SEMANTIC_TEXCOORD:
162 case TGSI_SEMANTIC_PCOORD:
163 case TGSI_SEMANTIC_POSITION: {
164 if (sem_name == TGSI_SEMANTIC_PCOORD)
165 fs->reads_pntc = true;
166
167 int real_tex_unit = get_mapping(fs, sem_name, sem_ind);
168 src = i915_emit_decl(p, REG_TYPE_T, T_TEX0 + real_tex_unit,
169 D0_CHANNEL_ALL);
170 break;
171 }
172 case TGSI_SEMANTIC_COLOR:
173 if (sem_ind == 0) {
174 src = i915_emit_decl(p, REG_TYPE_T, T_DIFFUSE, D0_CHANNEL_ALL);
175 } else {
176 /* secondary color */
177 assert(sem_ind == 1);
178 src = i915_emit_decl(p, REG_TYPE_T, T_SPECULAR, D0_CHANNEL_XYZ);
179 src = swizzle(src, X, Y, Z, ONE);
180 }
181 break;
182 case TGSI_SEMANTIC_FOG:
183 src = i915_emit_decl(p, REG_TYPE_T, T_FOG_W, D0_CHANNEL_W);
184 src = swizzle(src, W, W, W, W);
185 break;
186 case TGSI_SEMANTIC_FACE: {
187 /* for back/front faces */
188 int real_tex_unit = get_mapping(fs, sem_name, sem_ind);
189 src =
190 i915_emit_decl(p, REG_TYPE_T, T_TEX0 + real_tex_unit, D0_CHANNEL_X);
191 break;
192 }
193 default:
194 i915_program_error(p, "Bad source->Index");
195 return 0;
196 }
197 break;
198
199 case TGSI_FILE_IMMEDIATE: {
200 assert(index < p->num_immediates);
201
202 uint8_t swiz[4] = {source->Register.SwizzleX, source->Register.SwizzleY,
203 source->Register.SwizzleZ, source->Register.SwizzleW};
204
205 uint8_t neg[4] = {source->Register.Negate, source->Register.Negate,
206 source->Register.Negate, source->Register.Negate};
207
208 unsigned i;
209
210 for (i = 0; i < 4; i++) {
211 if (swiz[i] == TGSI_SWIZZLE_ZERO || swiz[i] == TGSI_SWIZZLE_ONE) {
212 continue;
213 } else if (p->immediates[index][swiz[i]] == 0.0) {
214 swiz[i] = TGSI_SWIZZLE_ZERO;
215 } else if (p->immediates[index][swiz[i]] == 1.0) {
216 swiz[i] = TGSI_SWIZZLE_ONE;
217 } else if (p->immediates[index][swiz[i]] == -1.0) {
218 swiz[i] = TGSI_SWIZZLE_ONE;
219 neg[i] ^= 1;
220 } else {
221 break;
222 }
223 }
224
225 if (i == 4) {
226 return negate(
227 swizzle(UREG(REG_TYPE_R, 0), swiz[0], swiz[1], swiz[2], swiz[3]),
228 neg[0], neg[1], neg[2], neg[3]);
229 }
230
231 index = p->immediates_map[index];
232 FALLTHROUGH;
233 }
234
235 case TGSI_FILE_CONSTANT:
236 src = UREG(REG_TYPE_CONST, index);
237 break;
238
239 default:
240 i915_program_error(p, "Bad source->File");
241 return 0;
242 }
243
244 src = swizzle(src, source->Register.SwizzleX, source->Register.SwizzleY,
245 source->Register.SwizzleZ, source->Register.SwizzleW);
246
247 /* No HW abs flag, so we have to max with the negation. */
248 if (source->Register.Absolute) {
249 uint32_t tmp = i915_get_utemp(p);
250 i915_emit_arith(p, A0_MAX, tmp, A0_DEST_CHANNEL_ALL, 0, src,
251 negate(src, 1, 1, 1, 1), 0);
252 src = tmp;
253 }
254
255 /* There's both negate-all-components and per-component negation.
256 * Try to handle both here.
257 */
258 {
259 int n = source->Register.Negate;
260 src = negate(src, n, n, n, n);
261 }
262
263 return src;
264 }
265
266 /**
267 * Construct a ureg for a destination register.
268 */
269 static uint32_t
get_result_vector(struct i915_fp_compile * p,const struct i915_full_dst_register * dest)270 get_result_vector(struct i915_fp_compile *p,
271 const struct i915_full_dst_register *dest)
272 {
273 switch (dest->Register.File) {
274 case TGSI_FILE_OUTPUT: {
275 uint32_t sem_name =
276 p->shader->info.output_semantic_name[dest->Register.Index];
277 switch (sem_name) {
278 case TGSI_SEMANTIC_POSITION:
279 return UREG(REG_TYPE_OD, 0);
280 case TGSI_SEMANTIC_COLOR:
281 return UREG(REG_TYPE_OC, 0);
282 default:
283 i915_program_error(p, "Bad inst->DstReg.Index/semantics");
284 return 0;
285 }
286 }
287 case TGSI_FILE_TEMPORARY:
288 return UREG(REG_TYPE_R, dest->Register.Index);
289 default:
290 i915_program_error(p, "Bad inst->DstReg.File");
291 return 0;
292 }
293 }
294
295 /**
296 * Compute flags for saturation and writemask.
297 */
298 static uint32_t
get_result_flags(const struct i915_full_instruction * inst)299 get_result_flags(const struct i915_full_instruction *inst)
300 {
301 const uint32_t writeMask = inst->Dst[0].Register.WriteMask;
302 uint32_t flags = 0x0;
303
304 if (inst->Instruction.Saturate)
305 flags |= A0_DEST_SATURATE;
306
307 if (writeMask & TGSI_WRITEMASK_X)
308 flags |= A0_DEST_CHANNEL_X;
309 if (writeMask & TGSI_WRITEMASK_Y)
310 flags |= A0_DEST_CHANNEL_Y;
311 if (writeMask & TGSI_WRITEMASK_Z)
312 flags |= A0_DEST_CHANNEL_Z;
313 if (writeMask & TGSI_WRITEMASK_W)
314 flags |= A0_DEST_CHANNEL_W;
315
316 return flags;
317 }
318
319 /**
320 * Convert TGSI_TEXTURE_x token to DO_SAMPLE_TYPE_x token
321 */
322 static uint32_t
translate_tex_src_target(struct i915_fp_compile * p,uint32_t tex)323 translate_tex_src_target(struct i915_fp_compile *p, uint32_t tex)
324 {
325 switch (tex) {
326 case TGSI_TEXTURE_SHADOW1D:
327 FALLTHROUGH;
328 case TGSI_TEXTURE_1D:
329 return D0_SAMPLE_TYPE_2D;
330
331 case TGSI_TEXTURE_SHADOW2D:
332 FALLTHROUGH;
333 case TGSI_TEXTURE_2D:
334 return D0_SAMPLE_TYPE_2D;
335
336 case TGSI_TEXTURE_SHADOWRECT:
337 FALLTHROUGH;
338 case TGSI_TEXTURE_RECT:
339 return D0_SAMPLE_TYPE_2D;
340
341 case TGSI_TEXTURE_3D:
342 return D0_SAMPLE_TYPE_VOLUME;
343
344 case TGSI_TEXTURE_CUBE:
345 return D0_SAMPLE_TYPE_CUBE;
346
347 default:
348 i915_program_error(p, "TexSrc type");
349 return 0;
350 }
351 }
352
353 /**
354 * Return the number of coords needed to access a given TGSI_TEXTURE_*
355 */
356 uint32_t
i915_coord_mask(enum tgsi_opcode opcode,enum tgsi_texture_type tex)357 i915_coord_mask(enum tgsi_opcode opcode, enum tgsi_texture_type tex)
358 {
359 uint32_t coord_mask = 0;
360
361 if (opcode == TGSI_OPCODE_TXP || opcode == TGSI_OPCODE_TXB)
362 coord_mask |= TGSI_WRITEMASK_W;
363
364 switch (tex) {
365 case TGSI_TEXTURE_1D: /* See the 1D coord swizzle below. */
366 case TGSI_TEXTURE_2D:
367 case TGSI_TEXTURE_RECT:
368 return coord_mask | TGSI_WRITEMASK_XY;
369
370 case TGSI_TEXTURE_SHADOW1D:
371 case TGSI_TEXTURE_SHADOW2D:
372 case TGSI_TEXTURE_SHADOWRECT:
373 case TGSI_TEXTURE_3D:
374 case TGSI_TEXTURE_CUBE:
375 return coord_mask | TGSI_WRITEMASK_XYZ;
376
377 default:
378 unreachable("bad texture target");
379 }
380 }
381
382 /**
383 * Generate texel lookup instruction.
384 */
385 static void
emit_tex(struct i915_fp_compile * p,const struct i915_full_instruction * inst,uint32_t opcode,struct i915_fragment_shader * fs)386 emit_tex(struct i915_fp_compile *p, const struct i915_full_instruction *inst,
387 uint32_t opcode, struct i915_fragment_shader *fs)
388 {
389 uint32_t texture = inst->Texture.Texture;
390 uint32_t unit = inst->Src[1].Register.Index;
391 uint32_t tex = translate_tex_src_target(p, texture);
392 uint32_t sampler = i915_emit_decl(p, REG_TYPE_S, unit, tex);
393 uint32_t coord = src_vector(p, &inst->Src[0], fs);
394
395 /* For 1D textures, set the Y coord to the same as X. Otherwise, we could
396 * select the wrong LOD based on the uninitialized Y coord when we sample our
397 * 1D textures as 2D.
398 */
399 if (texture == TGSI_TEXTURE_1D || texture == TGSI_TEXTURE_SHADOW1D)
400 coord = swizzle(coord, X, X, Z, W);
401
402 i915_emit_texld(p, get_result_vector(p, &inst->Dst[0]),
403 get_result_flags(inst), sampler, coord, opcode,
404 i915_coord_mask(inst->Instruction.Opcode, texture));
405 }
406
407 /**
408 * Generate a simple arithmetic instruction
409 * \param opcode the i915 opcode
410 * \param numArgs the number of input/src arguments
411 */
412 static void
emit_simple_arith(struct i915_fp_compile * p,const struct i915_full_instruction * inst,uint32_t opcode,uint32_t numArgs,struct i915_fragment_shader * fs)413 emit_simple_arith(struct i915_fp_compile *p,
414 const struct i915_full_instruction *inst, uint32_t opcode,
415 uint32_t numArgs, struct i915_fragment_shader *fs)
416 {
417 uint32_t arg1, arg2, arg3;
418
419 assert(numArgs <= 3);
420
421 arg1 = (numArgs < 1) ? 0 : src_vector(p, &inst->Src[0], fs);
422 arg2 = (numArgs < 2) ? 0 : src_vector(p, &inst->Src[1], fs);
423 arg3 = (numArgs < 3) ? 0 : src_vector(p, &inst->Src[2], fs);
424
425 i915_emit_arith(p, opcode, get_result_vector(p, &inst->Dst[0]),
426 get_result_flags(inst), 0, arg1, arg2, arg3);
427 }
428
429 /** As above, but swap the first two src regs */
430 static void
emit_simple_arith_swap2(struct i915_fp_compile * p,const struct i915_full_instruction * inst,uint32_t opcode,uint32_t numArgs,struct i915_fragment_shader * fs)431 emit_simple_arith_swap2(struct i915_fp_compile *p,
432 const struct i915_full_instruction *inst,
433 uint32_t opcode, uint32_t numArgs,
434 struct i915_fragment_shader *fs)
435 {
436 struct i915_full_instruction inst2;
437
438 assert(numArgs == 2);
439
440 /* transpose first two registers */
441 inst2 = *inst;
442 inst2.Src[0] = inst->Src[1];
443 inst2.Src[1] = inst->Src[0];
444
445 emit_simple_arith(p, &inst2, opcode, numArgs, fs);
446 }
447
448 /*
449 * Translate TGSI instruction to i915 instruction.
450 *
451 * Possible concerns:
452 *
453 * DDX, DDY -- return 0
454 * SIN, COS -- could use another taylor step?
455 * LIT -- results seem a little different to sw mesa
456 * LOG -- different to mesa on negative numbers, but this is conformant.
457 */
458 static void
i915_translate_instruction(struct i915_fp_compile * p,const struct i915_full_instruction * inst,struct i915_fragment_shader * fs)459 i915_translate_instruction(struct i915_fp_compile *p,
460 const struct i915_full_instruction *inst,
461 struct i915_fragment_shader *fs)
462 {
463 uint32_t src0, src1, src2, flags;
464 uint32_t tmp = 0;
465
466 switch (inst->Instruction.Opcode) {
467 case TGSI_OPCODE_ADD:
468 emit_simple_arith(p, inst, A0_ADD, 2, fs);
469 break;
470
471 case TGSI_OPCODE_CEIL:
472 src0 = src_vector(p, &inst->Src[0], fs);
473 tmp = i915_get_utemp(p);
474 flags = get_result_flags(inst);
475 i915_emit_arith(p, A0_FLR, tmp, flags & A0_DEST_CHANNEL_ALL, 0,
476 negate(src0, 1, 1, 1, 1), 0, 0);
477 i915_emit_arith(p, A0_MOV, get_result_vector(p, &inst->Dst[0]), flags, 0,
478 negate(tmp, 1, 1, 1, 1), 0, 0);
479 break;
480
481 case TGSI_OPCODE_CMP:
482 src0 = src_vector(p, &inst->Src[0], fs);
483 src1 = src_vector(p, &inst->Src[1], fs);
484 src2 = src_vector(p, &inst->Src[2], fs);
485 i915_emit_arith(p, A0_CMP, get_result_vector(p, &inst->Dst[0]),
486 get_result_flags(inst), 0, src0, src2,
487 src1); /* NOTE: order of src2, src1 */
488 break;
489
490 case TGSI_OPCODE_DDX:
491 case TGSI_OPCODE_DDY:
492 /* XXX We just output 0 here */
493 debug_printf("Punting DDX/DDY\n");
494 src0 = get_result_vector(p, &inst->Dst[0]);
495 i915_emit_arith(p, A0_MOV, get_result_vector(p, &inst->Dst[0]),
496 get_result_flags(inst), 0,
497 swizzle(src0, ZERO, ZERO, ZERO, ZERO), 0, 0);
498 break;
499
500 case TGSI_OPCODE_DP2:
501 src0 = src_vector(p, &inst->Src[0], fs);
502 src1 = src_vector(p, &inst->Src[1], fs);
503
504 i915_emit_arith(p, A0_DP3, get_result_vector(p, &inst->Dst[0]),
505 get_result_flags(inst), 0,
506 swizzle(src0, X, Y, ZERO, ZERO), src1, 0);
507 break;
508
509 case TGSI_OPCODE_DP3:
510 emit_simple_arith(p, inst, A0_DP3, 2, fs);
511 break;
512
513 case TGSI_OPCODE_DP4:
514 emit_simple_arith(p, inst, A0_DP4, 2, fs);
515 break;
516
517 case TGSI_OPCODE_DST:
518 src0 = src_vector(p, &inst->Src[0], fs);
519 src1 = src_vector(p, &inst->Src[1], fs);
520
521 /* result[0] = 1 * 1;
522 * result[1] = a[1] * b[1];
523 * result[2] = a[2] * 1;
524 * result[3] = 1 * b[3];
525 */
526 i915_emit_arith(p, A0_MUL, get_result_vector(p, &inst->Dst[0]),
527 get_result_flags(inst), 0, swizzle(src0, ONE, Y, Z, ONE),
528 swizzle(src1, ONE, Y, ONE, W), 0);
529 break;
530
531 case TGSI_OPCODE_END:
532 /* no-op */
533 break;
534
535 case TGSI_OPCODE_EX2:
536 src0 = src_vector(p, &inst->Src[0], fs);
537
538 i915_emit_arith(p, A0_EXP, get_result_vector(p, &inst->Dst[0]),
539 get_result_flags(inst), 0, swizzle(src0, X, X, X, X), 0,
540 0);
541 break;
542
543 case TGSI_OPCODE_FLR:
544 emit_simple_arith(p, inst, A0_FLR, 1, fs);
545 break;
546
547 case TGSI_OPCODE_FRC:
548 emit_simple_arith(p, inst, A0_FRC, 1, fs);
549 break;
550
551 case TGSI_OPCODE_KILL_IF:
552 /* kill if src[0].x < 0 || src[0].y < 0 ... */
553 src0 = src_vector(p, &inst->Src[0], fs);
554 tmp = i915_get_utemp(p);
555
556 i915_emit_texld(p, tmp, /* dest reg: a dummy reg */
557 A0_DEST_CHANNEL_ALL, /* dest writemask */
558 0, /* sampler */
559 src0, /* coord*/
560 T0_TEXKILL, /* opcode */
561 TGSI_WRITEMASK_XYZW); /* coord_mask */
562 break;
563
564 case TGSI_OPCODE_KILL:
565 /* unconditional kill */
566 tmp = i915_get_utemp(p);
567
568 i915_emit_texld(p, tmp, /* dest reg: a dummy reg */
569 A0_DEST_CHANNEL_ALL, /* dest writemask */
570 0, /* sampler */
571 negate(swizzle(UREG(REG_TYPE_R, 0), ONE, ONE, ONE, ONE),
572 1, 1, 1, 1), /* coord */
573 T0_TEXKILL, /* opcode */
574 TGSI_WRITEMASK_X); /* coord_mask */
575 break;
576
577 case TGSI_OPCODE_LG2:
578 src0 = src_vector(p, &inst->Src[0], fs);
579
580 i915_emit_arith(p, A0_LOG, get_result_vector(p, &inst->Dst[0]),
581 get_result_flags(inst), 0, swizzle(src0, X, X, X, X), 0,
582 0);
583 break;
584
585 case TGSI_OPCODE_LIT:
586 src0 = src_vector(p, &inst->Src[0], fs);
587 tmp = i915_get_utemp(p);
588
589 /* tmp = max( a.xyzw, a.00zw )
590 * XXX: Clamp tmp.w to -128..128
591 * tmp.y = log(tmp.y)
592 * tmp.y = tmp.w * tmp.y
593 * tmp.y = exp(tmp.y)
594 * result = cmp (a.11-x1, a.1x01, a.1xy1 )
595 */
596 i915_emit_arith(p, A0_MAX, tmp, A0_DEST_CHANNEL_ALL, 0, src0,
597 swizzle(src0, ZERO, ZERO, Z, W), 0);
598
599 i915_emit_arith(p, A0_LOG, tmp, A0_DEST_CHANNEL_Y, 0,
600 swizzle(tmp, Y, Y, Y, Y), 0, 0);
601
602 i915_emit_arith(p, A0_MUL, tmp, A0_DEST_CHANNEL_Y, 0,
603 swizzle(tmp, ZERO, Y, ZERO, ZERO),
604 swizzle(tmp, ZERO, W, ZERO, ZERO), 0);
605
606 i915_emit_arith(p, A0_EXP, tmp, A0_DEST_CHANNEL_Y, 0,
607 swizzle(tmp, Y, Y, Y, Y), 0, 0);
608
609 i915_emit_arith(
610 p, A0_CMP, get_result_vector(p, &inst->Dst[0]), get_result_flags(inst),
611 0, negate(swizzle(tmp, ONE, ONE, X, ONE), 0, 0, 1, 0),
612 swizzle(tmp, ONE, X, ZERO, ONE), swizzle(tmp, ONE, X, Y, ONE));
613
614 break;
615
616 case TGSI_OPCODE_LRP:
617 src0 = src_vector(p, &inst->Src[0], fs);
618 src1 = src_vector(p, &inst->Src[1], fs);
619 src2 = src_vector(p, &inst->Src[2], fs);
620 flags = get_result_flags(inst);
621 tmp = i915_get_utemp(p);
622
623 /* b*a + c*(1-a)
624 *
625 * b*a + c - ca
626 *
627 * tmp = b*a + c,
628 * result = (-c)*a + tmp
629 */
630 i915_emit_arith(p, A0_MAD, tmp, flags & A0_DEST_CHANNEL_ALL, 0, src1,
631 src0, src2);
632
633 i915_emit_arith(p, A0_MAD, get_result_vector(p, &inst->Dst[0]), flags, 0,
634 negate(src2, 1, 1, 1, 1), src0, tmp);
635 break;
636
637 case TGSI_OPCODE_MAD:
638 emit_simple_arith(p, inst, A0_MAD, 3, fs);
639 break;
640
641 case TGSI_OPCODE_MAX:
642 emit_simple_arith(p, inst, A0_MAX, 2, fs);
643 break;
644
645 case TGSI_OPCODE_MIN:
646 emit_simple_arith(p, inst, A0_MIN, 2, fs);
647 break;
648
649 case TGSI_OPCODE_MOV:
650 emit_simple_arith(p, inst, A0_MOV, 1, fs);
651 break;
652
653 case TGSI_OPCODE_MUL:
654 emit_simple_arith(p, inst, A0_MUL, 2, fs);
655 break;
656
657 case TGSI_OPCODE_NOP:
658 break;
659
660 case TGSI_OPCODE_POW:
661 src0 = src_vector(p, &inst->Src[0], fs);
662 src1 = src_vector(p, &inst->Src[1], fs);
663 tmp = i915_get_utemp(p);
664 flags = get_result_flags(inst);
665
666 /* XXX: masking on intermediate values, here and elsewhere.
667 */
668 i915_emit_arith(p, A0_LOG, tmp, A0_DEST_CHANNEL_X, 0,
669 swizzle(src0, X, X, X, X), 0, 0);
670
671 i915_emit_arith(p, A0_MUL, tmp, A0_DEST_CHANNEL_X, 0, tmp, src1, 0);
672
673 i915_emit_arith(p, A0_EXP, get_result_vector(p, &inst->Dst[0]), flags, 0,
674 swizzle(tmp, X, X, X, X), 0, 0);
675 break;
676
677 case TGSI_OPCODE_RET:
678 /* XXX: no-op? */
679 break;
680
681 case TGSI_OPCODE_RCP:
682 src0 = src_vector(p, &inst->Src[0], fs);
683
684 i915_emit_arith(p, A0_RCP, get_result_vector(p, &inst->Dst[0]),
685 get_result_flags(inst), 0, swizzle(src0, X, X, X, X), 0,
686 0);
687 break;
688
689 case TGSI_OPCODE_RSQ:
690 src0 = src_vector(p, &inst->Src[0], fs);
691
692 i915_emit_arith(p, A0_RSQ, get_result_vector(p, &inst->Dst[0]),
693 get_result_flags(inst), 0, swizzle(src0, X, X, X, X), 0,
694 0);
695 break;
696
697 case TGSI_OPCODE_SEQ: {
698 const uint32_t zero =
699 swizzle(UREG(REG_TYPE_R, 0), SRC_ZERO, SRC_ZERO, SRC_ZERO, SRC_ZERO);
700
701 /* if we're both >= and <= then we're == */
702 src0 = src_vector(p, &inst->Src[0], fs);
703 src1 = src_vector(p, &inst->Src[1], fs);
704 tmp = i915_get_utemp(p);
705
706 if (src0 == zero || src1 == zero) {
707 if (src0 == zero)
708 src0 = src1;
709
710 /* x == 0 is equivalent to -abs(x) >= 0, but the latter requires only
711 * two instructions instead of three.
712 */
713 i915_emit_arith(p, A0_MAX, tmp, A0_DEST_CHANNEL_ALL, 0, src0,
714 negate(src0, 1, 1, 1, 1), 0);
715 i915_emit_arith(p, A0_SGE, get_result_vector(p, &inst->Dst[0]),
716 get_result_flags(inst), 0, negate(tmp, 1, 1, 1, 1),
717 zero, 0);
718 } else {
719 i915_emit_arith(p, A0_SGE, tmp, A0_DEST_CHANNEL_ALL, 0, src0, src1, 0);
720
721 i915_emit_arith(p, A0_SGE, get_result_vector(p, &inst->Dst[0]),
722 get_result_flags(inst), 0, src1, src0, 0);
723
724 i915_emit_arith(p, A0_MUL, get_result_vector(p, &inst->Dst[0]),
725 get_result_flags(inst), 0,
726 get_result_vector(p, &inst->Dst[0]), tmp, 0);
727 }
728
729 break;
730 }
731
732 case TGSI_OPCODE_SGE:
733 emit_simple_arith(p, inst, A0_SGE, 2, fs);
734 break;
735
736 case TGSI_OPCODE_SLE:
737 /* like SGE, but swap reg0, reg1 */
738 emit_simple_arith_swap2(p, inst, A0_SGE, 2, fs);
739 break;
740
741 case TGSI_OPCODE_SLT:
742 emit_simple_arith(p, inst, A0_SLT, 2, fs);
743 break;
744
745 case TGSI_OPCODE_SGT:
746 /* like SLT, but swap reg0, reg1 */
747 emit_simple_arith_swap2(p, inst, A0_SLT, 2, fs);
748 break;
749
750 case TGSI_OPCODE_SNE: {
751 const uint32_t zero =
752 swizzle(UREG(REG_TYPE_R, 0), SRC_ZERO, SRC_ZERO, SRC_ZERO, SRC_ZERO);
753
754 /* if we're < or > then we're != */
755 src0 = src_vector(p, &inst->Src[0], fs);
756 src1 = src_vector(p, &inst->Src[1], fs);
757 tmp = i915_get_utemp(p);
758
759 if (src0 == zero || src1 == zero) {
760 if (src0 == zero)
761 src0 = src1;
762
763 /* x != 0 is equivalent to -abs(x) < 0, but the latter requires only
764 * two instructions instead of three.
765 */
766 i915_emit_arith(p, A0_MAX, tmp, A0_DEST_CHANNEL_ALL, 0, src0,
767 negate(src0, 1, 1, 1, 1), 0);
768 i915_emit_arith(p, A0_SLT, get_result_vector(p, &inst->Dst[0]),
769 get_result_flags(inst), 0, negate(tmp, 1, 1, 1, 1),
770 zero, 0);
771 } else {
772 i915_emit_arith(p, A0_SLT, tmp, A0_DEST_CHANNEL_ALL, 0, src0, src1, 0);
773
774 i915_emit_arith(p, A0_SLT, get_result_vector(p, &inst->Dst[0]),
775 get_result_flags(inst), 0, src1, src0, 0);
776
777 i915_emit_arith(p, A0_ADD, get_result_vector(p, &inst->Dst[0]),
778 get_result_flags(inst), 0,
779 get_result_vector(p, &inst->Dst[0]), tmp, 0);
780 }
781 break;
782 }
783
784 case TGSI_OPCODE_SSG:
785 /* compute (src>0) - (src<0) */
786 src0 = src_vector(p, &inst->Src[0], fs);
787 tmp = i915_get_utemp(p);
788
789 i915_emit_arith(p, A0_SLT, tmp, A0_DEST_CHANNEL_ALL, 0, src0,
790 swizzle(src0, ZERO, ZERO, ZERO, ZERO), 0);
791
792 i915_emit_arith(p, A0_SLT, get_result_vector(p, &inst->Dst[0]),
793 get_result_flags(inst), 0,
794 swizzle(src0, ZERO, ZERO, ZERO, ZERO), src0, 0);
795
796 i915_emit_arith(
797 p, A0_ADD, get_result_vector(p, &inst->Dst[0]), get_result_flags(inst),
798 0, get_result_vector(p, &inst->Dst[0]), negate(tmp, 1, 1, 1, 1), 0);
799 break;
800
801 case TGSI_OPCODE_TEX:
802 emit_tex(p, inst, T0_TEXLD, fs);
803 break;
804
805 case TGSI_OPCODE_TRUNC:
806 emit_simple_arith(p, inst, A0_TRC, 1, fs);
807 break;
808
809 case TGSI_OPCODE_TXB:
810 emit_tex(p, inst, T0_TEXLDB, fs);
811 break;
812
813 case TGSI_OPCODE_TXP:
814 emit_tex(p, inst, T0_TEXLDP, fs);
815 break;
816
817 default:
818 i915_program_error(p, "bad opcode %s (%d)",
819 tgsi_get_opcode_name(inst->Instruction.Opcode),
820 inst->Instruction.Opcode);
821 return;
822 }
823
824 i915_release_utemps(p);
825 }
826
827 static void
i915_translate_token(struct i915_fp_compile * p,const union i915_full_token * token,struct i915_fragment_shader * fs)828 i915_translate_token(struct i915_fp_compile *p,
829 const union i915_full_token *token,
830 struct i915_fragment_shader *fs)
831 {
832 struct i915_fragment_shader *ifs = p->shader;
833 switch (token->Token.Type) {
834 case TGSI_TOKEN_TYPE_PROPERTY:
835 /* Ignore properties where we only support one value. */
836 assert(token->FullProperty.Property.PropertyName ==
837 TGSI_PROPERTY_FS_COORD_ORIGIN ||
838 token->FullProperty.Property.PropertyName ==
839 TGSI_PROPERTY_FS_COORD_PIXEL_CENTER ||
840 token->FullProperty.Property.PropertyName ==
841 TGSI_PROPERTY_FS_COLOR0_WRITES_ALL_CBUFS ||
842 token->FullProperty.Property.PropertyName ==
843 TGSI_PROPERTY_SEPARABLE_PROGRAM);
844 break;
845
846 case TGSI_TOKEN_TYPE_DECLARATION:
847 if (token->FullDeclaration.Declaration.File == TGSI_FILE_CONSTANT) {
848 if (token->FullDeclaration.Range.Last >= I915_MAX_CONSTANT) {
849 i915_program_error(p, "Exceeded %d max uniforms",
850 I915_MAX_CONSTANT);
851 } else {
852 uint32_t i;
853 for (i = token->FullDeclaration.Range.First;
854 i <= token->FullDeclaration.Range.Last; i++) {
855 ifs->constant_flags[i] = I915_CONSTFLAG_USER;
856 ifs->num_constants = MAX2(ifs->num_constants, i + 1);
857 }
858 }
859 } else if (token->FullDeclaration.Declaration.File ==
860 TGSI_FILE_TEMPORARY) {
861 if (token->FullDeclaration.Range.Last >= I915_MAX_TEMPORARY) {
862 i915_program_error(p, "Exceeded max TGSI temps (%d/%d)",
863 token->FullDeclaration.Range.Last + 1, I915_MAX_TEMPORARY);
864 } else {
865 uint32_t i;
866 for (i = token->FullDeclaration.Range.First;
867 i <= token->FullDeclaration.Range.Last; i++) {
868 /* XXX just use shader->info->file_mask[TGSI_FILE_TEMPORARY] */
869 p->temp_flag |= (1 << i); /* mark temp as used */
870 }
871 }
872 }
873 break;
874
875 case TGSI_TOKEN_TYPE_IMMEDIATE: {
876 const struct tgsi_full_immediate *imm = &token->FullImmediate;
877 const uint32_t pos = p->num_immediates++;
878 uint32_t j;
879 assert(imm->Immediate.NrTokens <= 4 + 1);
880 for (j = 0; j < imm->Immediate.NrTokens - 1; j++) {
881 p->immediates[pos][j] = imm->u[j].Float;
882 }
883 } break;
884
885 case TGSI_TOKEN_TYPE_INSTRUCTION:
886 if (p->first_instruction) {
887 /* resolve location of immediates */
888 uint32_t i, j;
889 for (i = 0; i < p->num_immediates; i++) {
890 /* find constant slot for this immediate */
891 for (j = 0; j < I915_MAX_CONSTANT; j++) {
892 if (ifs->constant_flags[j] == 0x0) {
893 memcpy(ifs->constants[j], p->immediates[i],
894 4 * sizeof(float));
895 /*printf("immediate %d maps to const %d\n", i, j);*/
896 ifs->constant_flags[j] = 0xf; /* all four comps used */
897 p->immediates_map[i] = j;
898 ifs->num_constants = MAX2(ifs->num_constants, j + 1);
899 break;
900 }
901 }
902 if (j == I915_MAX_CONSTANT) {
903 i915_program_error(p, "Exceeded %d max uniforms and immediates.",
904 I915_MAX_CONSTANT);
905 }
906 }
907
908 p->first_instruction = false;
909 }
910
911 i915_translate_instruction(p, &token->FullInstruction, fs);
912 break;
913
914 default:
915 assert(0);
916 }
917 }
918
919 /**
920 * Translate TGSI fragment shader into i915 hardware instructions.
921 * \param p the translation state
922 * \param tokens the TGSI token array
923 */
924 static void
i915_translate_instructions(struct i915_fp_compile * p,const struct i915_token_list * tokens,struct i915_fragment_shader * fs)925 i915_translate_instructions(struct i915_fp_compile *p,
926 const struct i915_token_list *tokens,
927 struct i915_fragment_shader *fs)
928 {
929 int i;
930 for (i = 0; i < tokens->NumTokens && !p->error[0]; i++) {
931 i915_translate_token(p, &tokens->Tokens[i], fs);
932 }
933 }
934
935 static struct i915_fp_compile *
i915_init_compile(struct i915_fragment_shader * ifs)936 i915_init_compile(struct i915_fragment_shader *ifs)
937 {
938 struct i915_fp_compile *p = CALLOC_STRUCT(i915_fp_compile);
939 int i;
940
941 p->shader = ifs;
942 p->error = ralloc_strdup(NULL, "");
943
944 /* Put new constants at end of const buffer, growing downward.
945 * The problem is we don't know how many user-defined constants might
946 * be specified with pipe->set_constant_buffer().
947 * Should pre-scan the user's program to determine the highest-numbered
948 * constant referenced.
949 */
950 ifs->num_constants = 0;
951 memset(ifs->constant_flags, 0, sizeof(ifs->constant_flags));
952
953 memset(&p->register_phases, 0, sizeof(p->register_phases));
954
955 for (i = 0; i < I915_TEX_UNITS; i++)
956 ifs->texcoords[i].semantic = -1;
957
958 p->first_instruction = true;
959
960 p->nr_tex_indirect = 1; /* correct? */
961 p->nr_tex_insn = 0;
962 p->nr_alu_insn = 0;
963 p->nr_decl_insn = 0;
964
965 p->csr = p->program;
966 p->decl = p->declarations;
967 p->decl_s = 0;
968 p->decl_t = 0;
969 p->temp_flag = ~0x0U << I915_MAX_TEMPORARY;
970 p->utemp_flag = ~0x7;
971
972 /* initialize the first program word */
973 *(p->decl++) = _3DSTATE_PIXEL_SHADER_PROGRAM;
974
975 return p;
976 }
977
978 /* Copy compile results to the fragment program struct and destroy the
979 * compilation context.
980 */
981 static void
i915_fini_compile(struct i915_context * i915,struct i915_fp_compile * p)982 i915_fini_compile(struct i915_context *i915, struct i915_fp_compile *p)
983 {
984 struct i915_fragment_shader *ifs = p->shader;
985 unsigned long program_size = (unsigned long)(p->csr - p->program);
986 unsigned long decl_size = (unsigned long)(p->decl - p->declarations);
987
988 if (p->nr_tex_indirect > I915_MAX_TEX_INDIRECT) {
989 i915_program_error(p,
990 "Exceeded max nr indirect texture lookups (%d/%d)\n",
991 p->nr_tex_indirect, I915_MAX_TEX_INDIRECT);
992 }
993
994 if (p->nr_tex_insn > I915_MAX_TEX_INSN) {
995 i915_program_error(p, "Exceeded max TEX instructions (%d/%d)",
996 p->nr_tex_insn, I915_MAX_TEX_INSN);
997 }
998
999 if (p->nr_alu_insn > I915_MAX_ALU_INSN) {
1000 i915_program_error(p, "Exceeded max ALU instructions (%d/%d)",
1001 p->nr_alu_insn, I915_MAX_ALU_INSN);
1002 }
1003
1004 if (p->nr_decl_insn > I915_MAX_DECL_INSN) {
1005 i915_program_error(p, "Exceeded max DECL instructions (%d/%d)",
1006 p->nr_decl_insn, I915_MAX_DECL_INSN);
1007 }
1008
1009 /* hw doesn't seem to like empty frag programs (num_instructions == 1 is just
1010 * TGSI_END), even when the depth write fixup gets emitted below - maybe that
1011 * one is fishy, too?
1012 */
1013 if (ifs->info.num_instructions == 1)
1014 i915_program_error(p, "Empty fragment shader");
1015
1016 if (strlen(p->error) != 0) {
1017 p->NumNativeInstructions = 0;
1018 p->NumNativeAluInstructions = 0;
1019 p->NumNativeTexInstructions = 0;
1020 p->NumNativeTexIndirections = 0;
1021
1022 i915_use_passthrough_shader(ifs);
1023 } else {
1024 p->NumNativeInstructions =
1025 p->nr_alu_insn + p->nr_tex_insn + p->nr_decl_insn;
1026 p->NumNativeAluInstructions = p->nr_alu_insn;
1027 p->NumNativeTexInstructions = p->nr_tex_insn;
1028 p->NumNativeTexIndirections = p->nr_tex_indirect;
1029
1030 /* patch in the program length */
1031 p->declarations[0] |= program_size + decl_size - 2;
1032
1033 /* Copy compilation results to fragment program struct:
1034 */
1035 assert(!ifs->program);
1036
1037 ifs->program_len = decl_size + program_size;
1038 ifs->program = (uint32_t *)MALLOC(ifs->program_len * sizeof(uint32_t));
1039 memcpy(ifs->program, p->declarations, decl_size * sizeof(uint32_t));
1040 memcpy(&ifs->program[decl_size], p->program,
1041 program_size * sizeof(uint32_t));
1042
1043 if (i915) {
1044 util_debug_message(
1045 &i915->debug, SHADER_INFO,
1046 "%s shader: %d inst, %d tex, %d tex_indirect, %d temps, %d const",
1047 _mesa_shader_stage_to_abbrev(MESA_SHADER_FRAGMENT),
1048 (int)program_size, p->nr_tex_insn, p->nr_tex_indirect,
1049 p->shader->info.file_max[TGSI_FILE_TEMPORARY] + 1,
1050 ifs->num_constants);
1051 }
1052 }
1053
1054 if (strlen(p->error) != 0)
1055 ifs->error = p->error;
1056 else
1057 ralloc_free(p->error);
1058
1059 /* Release the compilation struct:
1060 */
1061 FREE(p);
1062 }
1063
1064 /**
1065 * Rather than trying to intercept and jiggle depth writes during
1066 * emit, just move the value into its correct position at the end of
1067 * the program:
1068 */
1069 static void
i915_fixup_depth_write(struct i915_fp_compile * p)1070 i915_fixup_depth_write(struct i915_fp_compile *p)
1071 {
1072 for (int i = 0; i < p->shader->info.num_outputs; i++) {
1073 if (p->shader->info.output_semantic_name[i] != TGSI_SEMANTIC_POSITION)
1074 continue;
1075
1076 const uint32_t depth = UREG(REG_TYPE_OD, 0);
1077
1078 i915_emit_arith(p, A0_MOV, /* opcode */
1079 depth, /* dest reg */
1080 A0_DEST_CHANNEL_W, /* write mask */
1081 0, /* saturate? */
1082 swizzle(depth, X, Y, Z, Z), /* src0 */
1083 0, 0 /* src1, src2 */);
1084 }
1085 }
1086
1087 void
i915_translate_fragment_program(struct i915_context * i915,struct i915_fragment_shader * fs)1088 i915_translate_fragment_program(struct i915_context *i915,
1089 struct i915_fragment_shader *fs)
1090 {
1091 struct i915_fp_compile *p;
1092 const struct tgsi_token *tokens = fs->state.tokens;
1093 struct i915_token_list *i_tokens;
1094 bool debug =
1095 I915_DBG_ON(DBG_FS) && (!fs->internal || NIR_DEBUG(PRINT_INTERNAL));
1096
1097 if (debug) {
1098 mesa_logi("TGSI fragment shader:");
1099 tgsi_dump(tokens, 0);
1100 }
1101
1102 p = i915_init_compile(fs);
1103
1104 i_tokens = i915_optimize(tokens);
1105 i915_translate_instructions(p, i_tokens, fs);
1106 i915_fixup_depth_write(p);
1107
1108 i915_fini_compile(i915, p);
1109 i915_optimize_free(i_tokens);
1110
1111 if (debug) {
1112 if (fs->error)
1113 mesa_loge("%s", fs->error);
1114
1115 mesa_logi("i915 fragment shader with %d constants%s", fs->num_constants,
1116 fs->num_constants ? ":" : "");
1117
1118 for (int i = 0; i < I915_MAX_CONSTANT; i++) {
1119 if (fs->constant_flags[i] &&
1120 fs->constant_flags[i] != I915_CONSTFLAG_USER) {
1121 mesa_logi("\t\tC[%d] = { %f, %f, %f, %f }", i, fs->constants[i][0],
1122 fs->constants[i][1], fs->constants[i][2],
1123 fs->constants[i][3]);
1124 }
1125 }
1126 i915_disassemble_program(fs->program, fs->program_len);
1127 }
1128 }
1129