xref: /aosp_15_r20/external/mesa3d/src/gallium/drivers/i915/i915_fpc_translate.c (revision 6104692788411f58d303aa86923a9ff6ecaded22)
1 /**************************************************************************
2  *
3  * Copyright 2007 VMware, Inc.
4  * All Rights Reserved.
5  *
6  * Permission is hereby granted, free of charge, to any person obtaining a
7  * copy of this software and associated documentation files (the
8  * "Software"), to deal in the Software without restriction, including
9  * without limitation the rights to use, copy, modify, merge, publish,
10  * distribute, sub license, and/or sell copies of the Software, and to
11  * permit persons to whom the Software is furnished to do so, subject to
12  * the following conditions:
13  *
14  * The above copyright notice and this permission notice (including the
15  * next paragraph) shall be included in all copies or substantial portions
16  * of the Software.
17  *
18  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
19  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
21  * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
22  * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
23  * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
24  * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25  *
26  **************************************************************************/
27 
28 #include <stdarg.h>
29 
30 #include "i915_context.h"
31 #include "i915_debug.h"
32 #include "i915_debug_private.h"
33 #include "i915_fpc.h"
34 #include "i915_reg.h"
35 
36 #include "nir/nir.h"
37 #include "pipe/p_shader_tokens.h"
38 #include "tgsi/tgsi_dump.h"
39 #include "tgsi/tgsi_info.h"
40 #include "tgsi/tgsi_parse.h"
41 #include "util/log.h"
42 #include "util/ralloc.h"
43 #include "util/u_math.h"
44 #include "util/u_memory.h"
45 #include "util/u_string.h"
46 
47 #include "draw/draw_vertex.h"
48 
49 #ifndef M_PI
50 #define M_PI 3.14159265358979323846
51 #endif
52 
53 /**
54  * Simple pass-through fragment shader to use when we don't have
55  * a real shader (or it fails to compile for some reason).
56  */
57 static unsigned passthrough_program[] = {
58    _3DSTATE_PIXEL_SHADER_PROGRAM | ((1 * 3) - 1),
59    /* move to output color:
60     */
61    (A0_MOV | (REG_TYPE_OC << A0_DEST_TYPE_SHIFT) | A0_DEST_CHANNEL_ALL |
62     (REG_TYPE_R << A0_SRC0_TYPE_SHIFT) | (0 << A0_SRC0_NR_SHIFT)),
63    ((SRC_ONE << A1_SRC0_CHANNEL_X_SHIFT) |
64     (SRC_ZERO << A1_SRC0_CHANNEL_Y_SHIFT) |
65     (SRC_ZERO << A1_SRC0_CHANNEL_Z_SHIFT) |
66     (SRC_ONE << A1_SRC0_CHANNEL_W_SHIFT)),
67    0};
68 
69 /**
70  * component-wise negation of ureg
71  */
72 static inline int
negate(int reg,int x,int y,int z,int w)73 negate(int reg, int x, int y, int z, int w)
74 {
75    /* Another neat thing about the UREG representation */
76    return reg ^ (((x & 1) << UREG_CHANNEL_X_NEGATE_SHIFT) |
77                  ((y & 1) << UREG_CHANNEL_Y_NEGATE_SHIFT) |
78                  ((z & 1) << UREG_CHANNEL_Z_NEGATE_SHIFT) |
79                  ((w & 1) << UREG_CHANNEL_W_NEGATE_SHIFT));
80 }
81 
82 /**
83  * In the event of a translation failure, we'll generate a simple color
84  * pass-through program.
85  */
86 static void
i915_use_passthrough_shader(struct i915_fragment_shader * fs)87 i915_use_passthrough_shader(struct i915_fragment_shader *fs)
88 {
89    fs->program = (uint32_t *)MALLOC(sizeof(passthrough_program));
90    if (fs->program) {
91       memcpy(fs->program, passthrough_program, sizeof(passthrough_program));
92       fs->program_len = ARRAY_SIZE(passthrough_program);
93    }
94    fs->num_constants = 0;
95 }
96 
97 void
i915_program_error(struct i915_fp_compile * p,const char * msg,...)98 i915_program_error(struct i915_fp_compile *p, const char *msg, ...)
99 {
100    va_list args;
101    va_start(args, msg);
102    ralloc_vasprintf_append(&p->error, msg, args);
103    va_end(args);
104 }
105 
106 static uint32_t
get_mapping(struct i915_fragment_shader * fs,enum tgsi_semantic semantic,int index)107 get_mapping(struct i915_fragment_shader *fs, enum tgsi_semantic semantic,
108             int index)
109 {
110    int i;
111    for (i = 0; i < I915_TEX_UNITS; i++) {
112       if (fs->texcoords[i].semantic == -1) {
113          fs->texcoords[i].semantic = semantic;
114          fs->texcoords[i].index = index;
115          return i;
116       }
117       if (fs->texcoords[i].semantic == semantic &&
118           fs->texcoords[i].index == index)
119          return i;
120    }
121    debug_printf("Exceeded max generics\n");
122    return 0;
123 }
124 
125 /**
126  * Construct a ureg for the given source register.  Will emit
127  * constants, apply swizzling and negation as needed.
128  */
129 static uint32_t
src_vector(struct i915_fp_compile * p,const struct i915_full_src_register * source,struct i915_fragment_shader * fs)130 src_vector(struct i915_fp_compile *p,
131            const struct i915_full_src_register *source,
132            struct i915_fragment_shader *fs)
133 {
134    uint32_t index = source->Register.Index;
135    uint32_t src = 0, sem_name, sem_ind;
136 
137    switch (source->Register.File) {
138    case TGSI_FILE_TEMPORARY:
139       if (source->Register.Index >= I915_MAX_TEMPORARY) {
140          i915_program_error(p, "Exceeded max temporary reg");
141          return 0;
142       }
143       src = UREG(REG_TYPE_R, index);
144       break;
145    case TGSI_FILE_INPUT:
146       /* XXX: Packing COL1, FOGC into a single attribute works for
147        * texenv programs, but will fail for real fragment programs
148        * that use these attributes and expect them to be a full 4
149        * components wide.  Could use a texcoord to pass these
150        * attributes if necessary, but that won't work in the general
151        * case.
152        *
153        * We also use a texture coordinate to pass wpos when possible.
154        */
155 
156       sem_name = p->shader->info.input_semantic_name[index];
157       sem_ind = p->shader->info.input_semantic_index[index];
158 
159       switch (sem_name) {
160       case TGSI_SEMANTIC_GENERIC:
161       case TGSI_SEMANTIC_TEXCOORD:
162       case TGSI_SEMANTIC_PCOORD:
163       case TGSI_SEMANTIC_POSITION: {
164          if (sem_name == TGSI_SEMANTIC_PCOORD)
165             fs->reads_pntc = true;
166 
167          int real_tex_unit = get_mapping(fs, sem_name, sem_ind);
168          src = i915_emit_decl(p, REG_TYPE_T, T_TEX0 + real_tex_unit,
169                               D0_CHANNEL_ALL);
170          break;
171       }
172       case TGSI_SEMANTIC_COLOR:
173          if (sem_ind == 0) {
174             src = i915_emit_decl(p, REG_TYPE_T, T_DIFFUSE, D0_CHANNEL_ALL);
175          } else {
176             /* secondary color */
177             assert(sem_ind == 1);
178             src = i915_emit_decl(p, REG_TYPE_T, T_SPECULAR, D0_CHANNEL_XYZ);
179             src = swizzle(src, X, Y, Z, ONE);
180          }
181          break;
182       case TGSI_SEMANTIC_FOG:
183          src = i915_emit_decl(p, REG_TYPE_T, T_FOG_W, D0_CHANNEL_W);
184          src = swizzle(src, W, W, W, W);
185          break;
186       case TGSI_SEMANTIC_FACE: {
187          /* for back/front faces */
188          int real_tex_unit = get_mapping(fs, sem_name, sem_ind);
189          src =
190             i915_emit_decl(p, REG_TYPE_T, T_TEX0 + real_tex_unit, D0_CHANNEL_X);
191          break;
192       }
193       default:
194          i915_program_error(p, "Bad source->Index");
195          return 0;
196       }
197       break;
198 
199    case TGSI_FILE_IMMEDIATE: {
200       assert(index < p->num_immediates);
201 
202       uint8_t swiz[4] = {source->Register.SwizzleX, source->Register.SwizzleY,
203                          source->Register.SwizzleZ, source->Register.SwizzleW};
204 
205       uint8_t neg[4] = {source->Register.Negate, source->Register.Negate,
206                         source->Register.Negate, source->Register.Negate};
207 
208       unsigned i;
209 
210       for (i = 0; i < 4; i++) {
211          if (swiz[i] == TGSI_SWIZZLE_ZERO || swiz[i] == TGSI_SWIZZLE_ONE) {
212             continue;
213          } else if (p->immediates[index][swiz[i]] == 0.0) {
214             swiz[i] = TGSI_SWIZZLE_ZERO;
215          } else if (p->immediates[index][swiz[i]] == 1.0) {
216             swiz[i] = TGSI_SWIZZLE_ONE;
217          } else if (p->immediates[index][swiz[i]] == -1.0) {
218             swiz[i] = TGSI_SWIZZLE_ONE;
219             neg[i] ^= 1;
220          } else {
221             break;
222          }
223       }
224 
225       if (i == 4) {
226          return negate(
227             swizzle(UREG(REG_TYPE_R, 0), swiz[0], swiz[1], swiz[2], swiz[3]),
228             neg[0], neg[1], neg[2], neg[3]);
229       }
230 
231       index = p->immediates_map[index];
232       FALLTHROUGH;
233    }
234 
235    case TGSI_FILE_CONSTANT:
236       src = UREG(REG_TYPE_CONST, index);
237       break;
238 
239    default:
240       i915_program_error(p, "Bad source->File");
241       return 0;
242    }
243 
244    src = swizzle(src, source->Register.SwizzleX, source->Register.SwizzleY,
245                  source->Register.SwizzleZ, source->Register.SwizzleW);
246 
247    /* No HW abs flag, so we have to max with the negation. */
248    if (source->Register.Absolute) {
249       uint32_t tmp = i915_get_utemp(p);
250       i915_emit_arith(p, A0_MAX, tmp, A0_DEST_CHANNEL_ALL, 0, src,
251                       negate(src, 1, 1, 1, 1), 0);
252       src = tmp;
253    }
254 
255    /* There's both negate-all-components and per-component negation.
256     * Try to handle both here.
257     */
258    {
259       int n = source->Register.Negate;
260       src = negate(src, n, n, n, n);
261    }
262 
263    return src;
264 }
265 
266 /**
267  * Construct a ureg for a destination register.
268  */
269 static uint32_t
get_result_vector(struct i915_fp_compile * p,const struct i915_full_dst_register * dest)270 get_result_vector(struct i915_fp_compile *p,
271                   const struct i915_full_dst_register *dest)
272 {
273    switch (dest->Register.File) {
274    case TGSI_FILE_OUTPUT: {
275       uint32_t sem_name =
276          p->shader->info.output_semantic_name[dest->Register.Index];
277       switch (sem_name) {
278       case TGSI_SEMANTIC_POSITION:
279          return UREG(REG_TYPE_OD, 0);
280       case TGSI_SEMANTIC_COLOR:
281          return UREG(REG_TYPE_OC, 0);
282       default:
283          i915_program_error(p, "Bad inst->DstReg.Index/semantics");
284          return 0;
285       }
286    }
287    case TGSI_FILE_TEMPORARY:
288       return UREG(REG_TYPE_R, dest->Register.Index);
289    default:
290       i915_program_error(p, "Bad inst->DstReg.File");
291       return 0;
292    }
293 }
294 
295 /**
296  * Compute flags for saturation and writemask.
297  */
298 static uint32_t
get_result_flags(const struct i915_full_instruction * inst)299 get_result_flags(const struct i915_full_instruction *inst)
300 {
301    const uint32_t writeMask = inst->Dst[0].Register.WriteMask;
302    uint32_t flags = 0x0;
303 
304    if (inst->Instruction.Saturate)
305       flags |= A0_DEST_SATURATE;
306 
307    if (writeMask & TGSI_WRITEMASK_X)
308       flags |= A0_DEST_CHANNEL_X;
309    if (writeMask & TGSI_WRITEMASK_Y)
310       flags |= A0_DEST_CHANNEL_Y;
311    if (writeMask & TGSI_WRITEMASK_Z)
312       flags |= A0_DEST_CHANNEL_Z;
313    if (writeMask & TGSI_WRITEMASK_W)
314       flags |= A0_DEST_CHANNEL_W;
315 
316    return flags;
317 }
318 
319 /**
320  * Convert TGSI_TEXTURE_x token to DO_SAMPLE_TYPE_x token
321  */
322 static uint32_t
translate_tex_src_target(struct i915_fp_compile * p,uint32_t tex)323 translate_tex_src_target(struct i915_fp_compile *p, uint32_t tex)
324 {
325    switch (tex) {
326    case TGSI_TEXTURE_SHADOW1D:
327       FALLTHROUGH;
328    case TGSI_TEXTURE_1D:
329       return D0_SAMPLE_TYPE_2D;
330 
331    case TGSI_TEXTURE_SHADOW2D:
332       FALLTHROUGH;
333    case TGSI_TEXTURE_2D:
334       return D0_SAMPLE_TYPE_2D;
335 
336    case TGSI_TEXTURE_SHADOWRECT:
337       FALLTHROUGH;
338    case TGSI_TEXTURE_RECT:
339       return D0_SAMPLE_TYPE_2D;
340 
341    case TGSI_TEXTURE_3D:
342       return D0_SAMPLE_TYPE_VOLUME;
343 
344    case TGSI_TEXTURE_CUBE:
345       return D0_SAMPLE_TYPE_CUBE;
346 
347    default:
348       i915_program_error(p, "TexSrc type");
349       return 0;
350    }
351 }
352 
353 /**
354  * Return the number of coords needed to access a given TGSI_TEXTURE_*
355  */
356 uint32_t
i915_coord_mask(enum tgsi_opcode opcode,enum tgsi_texture_type tex)357 i915_coord_mask(enum tgsi_opcode opcode, enum tgsi_texture_type tex)
358 {
359    uint32_t coord_mask = 0;
360 
361    if (opcode == TGSI_OPCODE_TXP || opcode == TGSI_OPCODE_TXB)
362       coord_mask |= TGSI_WRITEMASK_W;
363 
364    switch (tex) {
365    case TGSI_TEXTURE_1D: /* See the 1D coord swizzle below. */
366    case TGSI_TEXTURE_2D:
367    case TGSI_TEXTURE_RECT:
368       return coord_mask | TGSI_WRITEMASK_XY;
369 
370    case TGSI_TEXTURE_SHADOW1D:
371    case TGSI_TEXTURE_SHADOW2D:
372    case TGSI_TEXTURE_SHADOWRECT:
373    case TGSI_TEXTURE_3D:
374    case TGSI_TEXTURE_CUBE:
375       return coord_mask | TGSI_WRITEMASK_XYZ;
376 
377    default:
378       unreachable("bad texture target");
379    }
380 }
381 
382 /**
383  * Generate texel lookup instruction.
384  */
385 static void
emit_tex(struct i915_fp_compile * p,const struct i915_full_instruction * inst,uint32_t opcode,struct i915_fragment_shader * fs)386 emit_tex(struct i915_fp_compile *p, const struct i915_full_instruction *inst,
387          uint32_t opcode, struct i915_fragment_shader *fs)
388 {
389    uint32_t texture = inst->Texture.Texture;
390    uint32_t unit = inst->Src[1].Register.Index;
391    uint32_t tex = translate_tex_src_target(p, texture);
392    uint32_t sampler = i915_emit_decl(p, REG_TYPE_S, unit, tex);
393    uint32_t coord = src_vector(p, &inst->Src[0], fs);
394 
395    /* For 1D textures, set the Y coord to the same as X.  Otherwise, we could
396     * select the wrong LOD based on the uninitialized Y coord when we sample our
397     * 1D textures as 2D.
398     */
399    if (texture == TGSI_TEXTURE_1D || texture == TGSI_TEXTURE_SHADOW1D)
400       coord = swizzle(coord, X, X, Z, W);
401 
402    i915_emit_texld(p, get_result_vector(p, &inst->Dst[0]),
403                    get_result_flags(inst), sampler, coord, opcode,
404                    i915_coord_mask(inst->Instruction.Opcode, texture));
405 }
406 
407 /**
408  * Generate a simple arithmetic instruction
409  * \param opcode  the i915 opcode
410  * \param numArgs  the number of input/src arguments
411  */
412 static void
emit_simple_arith(struct i915_fp_compile * p,const struct i915_full_instruction * inst,uint32_t opcode,uint32_t numArgs,struct i915_fragment_shader * fs)413 emit_simple_arith(struct i915_fp_compile *p,
414                   const struct i915_full_instruction *inst, uint32_t opcode,
415                   uint32_t numArgs, struct i915_fragment_shader *fs)
416 {
417    uint32_t arg1, arg2, arg3;
418 
419    assert(numArgs <= 3);
420 
421    arg1 = (numArgs < 1) ? 0 : src_vector(p, &inst->Src[0], fs);
422    arg2 = (numArgs < 2) ? 0 : src_vector(p, &inst->Src[1], fs);
423    arg3 = (numArgs < 3) ? 0 : src_vector(p, &inst->Src[2], fs);
424 
425    i915_emit_arith(p, opcode, get_result_vector(p, &inst->Dst[0]),
426                    get_result_flags(inst), 0, arg1, arg2, arg3);
427 }
428 
429 /** As above, but swap the first two src regs */
430 static void
emit_simple_arith_swap2(struct i915_fp_compile * p,const struct i915_full_instruction * inst,uint32_t opcode,uint32_t numArgs,struct i915_fragment_shader * fs)431 emit_simple_arith_swap2(struct i915_fp_compile *p,
432                         const struct i915_full_instruction *inst,
433                         uint32_t opcode, uint32_t numArgs,
434                         struct i915_fragment_shader *fs)
435 {
436    struct i915_full_instruction inst2;
437 
438    assert(numArgs == 2);
439 
440    /* transpose first two registers */
441    inst2 = *inst;
442    inst2.Src[0] = inst->Src[1];
443    inst2.Src[1] = inst->Src[0];
444 
445    emit_simple_arith(p, &inst2, opcode, numArgs, fs);
446 }
447 
448 /*
449  * Translate TGSI instruction to i915 instruction.
450  *
451  * Possible concerns:
452  *
453  * DDX, DDY -- return 0
454  * SIN, COS -- could use another taylor step?
455  * LIT      -- results seem a little different to sw mesa
456  * LOG      -- different to mesa on negative numbers, but this is conformant.
457  */
458 static void
i915_translate_instruction(struct i915_fp_compile * p,const struct i915_full_instruction * inst,struct i915_fragment_shader * fs)459 i915_translate_instruction(struct i915_fp_compile *p,
460                            const struct i915_full_instruction *inst,
461                            struct i915_fragment_shader *fs)
462 {
463    uint32_t src0, src1, src2, flags;
464    uint32_t tmp = 0;
465 
466    switch (inst->Instruction.Opcode) {
467    case TGSI_OPCODE_ADD:
468       emit_simple_arith(p, inst, A0_ADD, 2, fs);
469       break;
470 
471    case TGSI_OPCODE_CEIL:
472       src0 = src_vector(p, &inst->Src[0], fs);
473       tmp = i915_get_utemp(p);
474       flags = get_result_flags(inst);
475       i915_emit_arith(p, A0_FLR, tmp, flags & A0_DEST_CHANNEL_ALL, 0,
476                       negate(src0, 1, 1, 1, 1), 0, 0);
477       i915_emit_arith(p, A0_MOV, get_result_vector(p, &inst->Dst[0]), flags, 0,
478                       negate(tmp, 1, 1, 1, 1), 0, 0);
479       break;
480 
481    case TGSI_OPCODE_CMP:
482       src0 = src_vector(p, &inst->Src[0], fs);
483       src1 = src_vector(p, &inst->Src[1], fs);
484       src2 = src_vector(p, &inst->Src[2], fs);
485       i915_emit_arith(p, A0_CMP, get_result_vector(p, &inst->Dst[0]),
486                       get_result_flags(inst), 0, src0, src2,
487                       src1); /* NOTE: order of src2, src1 */
488       break;
489 
490    case TGSI_OPCODE_DDX:
491    case TGSI_OPCODE_DDY:
492       /* XXX We just output 0 here */
493       debug_printf("Punting DDX/DDY\n");
494       src0 = get_result_vector(p, &inst->Dst[0]);
495       i915_emit_arith(p, A0_MOV, get_result_vector(p, &inst->Dst[0]),
496                       get_result_flags(inst), 0,
497                       swizzle(src0, ZERO, ZERO, ZERO, ZERO), 0, 0);
498       break;
499 
500    case TGSI_OPCODE_DP2:
501       src0 = src_vector(p, &inst->Src[0], fs);
502       src1 = src_vector(p, &inst->Src[1], fs);
503 
504       i915_emit_arith(p, A0_DP3, get_result_vector(p, &inst->Dst[0]),
505                       get_result_flags(inst), 0,
506                       swizzle(src0, X, Y, ZERO, ZERO), src1, 0);
507       break;
508 
509    case TGSI_OPCODE_DP3:
510       emit_simple_arith(p, inst, A0_DP3, 2, fs);
511       break;
512 
513    case TGSI_OPCODE_DP4:
514       emit_simple_arith(p, inst, A0_DP4, 2, fs);
515       break;
516 
517    case TGSI_OPCODE_DST:
518       src0 = src_vector(p, &inst->Src[0], fs);
519       src1 = src_vector(p, &inst->Src[1], fs);
520 
521       /* result[0] = 1    * 1;
522        * result[1] = a[1] * b[1];
523        * result[2] = a[2] * 1;
524        * result[3] = 1    * b[3];
525        */
526       i915_emit_arith(p, A0_MUL, get_result_vector(p, &inst->Dst[0]),
527                       get_result_flags(inst), 0, swizzle(src0, ONE, Y, Z, ONE),
528                       swizzle(src1, ONE, Y, ONE, W), 0);
529       break;
530 
531    case TGSI_OPCODE_END:
532       /* no-op */
533       break;
534 
535    case TGSI_OPCODE_EX2:
536       src0 = src_vector(p, &inst->Src[0], fs);
537 
538       i915_emit_arith(p, A0_EXP, get_result_vector(p, &inst->Dst[0]),
539                       get_result_flags(inst), 0, swizzle(src0, X, X, X, X), 0,
540                       0);
541       break;
542 
543    case TGSI_OPCODE_FLR:
544       emit_simple_arith(p, inst, A0_FLR, 1, fs);
545       break;
546 
547    case TGSI_OPCODE_FRC:
548       emit_simple_arith(p, inst, A0_FRC, 1, fs);
549       break;
550 
551    case TGSI_OPCODE_KILL_IF:
552       /* kill if src[0].x < 0 || src[0].y < 0 ... */
553       src0 = src_vector(p, &inst->Src[0], fs);
554       tmp = i915_get_utemp(p);
555 
556       i915_emit_texld(p, tmp,               /* dest reg: a dummy reg */
557                       A0_DEST_CHANNEL_ALL,  /* dest writemask */
558                       0,                    /* sampler */
559                       src0,                 /* coord*/
560                       T0_TEXKILL,           /* opcode */
561                       TGSI_WRITEMASK_XYZW); /* coord_mask */
562       break;
563 
564    case TGSI_OPCODE_KILL:
565       /* unconditional kill */
566       tmp = i915_get_utemp(p);
567 
568       i915_emit_texld(p, tmp,              /* dest reg: a dummy reg */
569                       A0_DEST_CHANNEL_ALL, /* dest writemask */
570                       0,                   /* sampler */
571                       negate(swizzle(UREG(REG_TYPE_R, 0), ONE, ONE, ONE, ONE),
572                              1, 1, 1, 1), /* coord */
573                       T0_TEXKILL,         /* opcode */
574                       TGSI_WRITEMASK_X);  /* coord_mask */
575       break;
576 
577    case TGSI_OPCODE_LG2:
578       src0 = src_vector(p, &inst->Src[0], fs);
579 
580       i915_emit_arith(p, A0_LOG, get_result_vector(p, &inst->Dst[0]),
581                       get_result_flags(inst), 0, swizzle(src0, X, X, X, X), 0,
582                       0);
583       break;
584 
585    case TGSI_OPCODE_LIT:
586       src0 = src_vector(p, &inst->Src[0], fs);
587       tmp = i915_get_utemp(p);
588 
589       /* tmp = max( a.xyzw, a.00zw )
590        * XXX: Clamp tmp.w to -128..128
591        * tmp.y = log(tmp.y)
592        * tmp.y = tmp.w * tmp.y
593        * tmp.y = exp(tmp.y)
594        * result = cmp (a.11-x1, a.1x01, a.1xy1 )
595        */
596       i915_emit_arith(p, A0_MAX, tmp, A0_DEST_CHANNEL_ALL, 0, src0,
597                       swizzle(src0, ZERO, ZERO, Z, W), 0);
598 
599       i915_emit_arith(p, A0_LOG, tmp, A0_DEST_CHANNEL_Y, 0,
600                       swizzle(tmp, Y, Y, Y, Y), 0, 0);
601 
602       i915_emit_arith(p, A0_MUL, tmp, A0_DEST_CHANNEL_Y, 0,
603                       swizzle(tmp, ZERO, Y, ZERO, ZERO),
604                       swizzle(tmp, ZERO, W, ZERO, ZERO), 0);
605 
606       i915_emit_arith(p, A0_EXP, tmp, A0_DEST_CHANNEL_Y, 0,
607                       swizzle(tmp, Y, Y, Y, Y), 0, 0);
608 
609       i915_emit_arith(
610          p, A0_CMP, get_result_vector(p, &inst->Dst[0]), get_result_flags(inst),
611          0, negate(swizzle(tmp, ONE, ONE, X, ONE), 0, 0, 1, 0),
612          swizzle(tmp, ONE, X, ZERO, ONE), swizzle(tmp, ONE, X, Y, ONE));
613 
614       break;
615 
616    case TGSI_OPCODE_LRP:
617       src0 = src_vector(p, &inst->Src[0], fs);
618       src1 = src_vector(p, &inst->Src[1], fs);
619       src2 = src_vector(p, &inst->Src[2], fs);
620       flags = get_result_flags(inst);
621       tmp = i915_get_utemp(p);
622 
623       /* b*a + c*(1-a)
624        *
625        * b*a + c - ca
626        *
627        * tmp = b*a + c,
628        * result = (-c)*a + tmp
629        */
630       i915_emit_arith(p, A0_MAD, tmp, flags & A0_DEST_CHANNEL_ALL, 0, src1,
631                       src0, src2);
632 
633       i915_emit_arith(p, A0_MAD, get_result_vector(p, &inst->Dst[0]), flags, 0,
634                       negate(src2, 1, 1, 1, 1), src0, tmp);
635       break;
636 
637    case TGSI_OPCODE_MAD:
638       emit_simple_arith(p, inst, A0_MAD, 3, fs);
639       break;
640 
641    case TGSI_OPCODE_MAX:
642       emit_simple_arith(p, inst, A0_MAX, 2, fs);
643       break;
644 
645    case TGSI_OPCODE_MIN:
646       emit_simple_arith(p, inst, A0_MIN, 2, fs);
647       break;
648 
649    case TGSI_OPCODE_MOV:
650       emit_simple_arith(p, inst, A0_MOV, 1, fs);
651       break;
652 
653    case TGSI_OPCODE_MUL:
654       emit_simple_arith(p, inst, A0_MUL, 2, fs);
655       break;
656 
657    case TGSI_OPCODE_NOP:
658       break;
659 
660    case TGSI_OPCODE_POW:
661       src0 = src_vector(p, &inst->Src[0], fs);
662       src1 = src_vector(p, &inst->Src[1], fs);
663       tmp = i915_get_utemp(p);
664       flags = get_result_flags(inst);
665 
666       /* XXX: masking on intermediate values, here and elsewhere.
667        */
668       i915_emit_arith(p, A0_LOG, tmp, A0_DEST_CHANNEL_X, 0,
669                       swizzle(src0, X, X, X, X), 0, 0);
670 
671       i915_emit_arith(p, A0_MUL, tmp, A0_DEST_CHANNEL_X, 0, tmp, src1, 0);
672 
673       i915_emit_arith(p, A0_EXP, get_result_vector(p, &inst->Dst[0]), flags, 0,
674                       swizzle(tmp, X, X, X, X), 0, 0);
675       break;
676 
677    case TGSI_OPCODE_RET:
678       /* XXX: no-op? */
679       break;
680 
681    case TGSI_OPCODE_RCP:
682       src0 = src_vector(p, &inst->Src[0], fs);
683 
684       i915_emit_arith(p, A0_RCP, get_result_vector(p, &inst->Dst[0]),
685                       get_result_flags(inst), 0, swizzle(src0, X, X, X, X), 0,
686                       0);
687       break;
688 
689    case TGSI_OPCODE_RSQ:
690       src0 = src_vector(p, &inst->Src[0], fs);
691 
692       i915_emit_arith(p, A0_RSQ, get_result_vector(p, &inst->Dst[0]),
693                       get_result_flags(inst), 0, swizzle(src0, X, X, X, X), 0,
694                       0);
695       break;
696 
697    case TGSI_OPCODE_SEQ: {
698       const uint32_t zero =
699          swizzle(UREG(REG_TYPE_R, 0), SRC_ZERO, SRC_ZERO, SRC_ZERO, SRC_ZERO);
700 
701       /* if we're both >= and <= then we're == */
702       src0 = src_vector(p, &inst->Src[0], fs);
703       src1 = src_vector(p, &inst->Src[1], fs);
704       tmp = i915_get_utemp(p);
705 
706       if (src0 == zero || src1 == zero) {
707          if (src0 == zero)
708             src0 = src1;
709 
710          /* x == 0 is equivalent to -abs(x) >= 0, but the latter requires only
711           * two instructions instead of three.
712           */
713          i915_emit_arith(p, A0_MAX, tmp, A0_DEST_CHANNEL_ALL, 0, src0,
714                          negate(src0, 1, 1, 1, 1), 0);
715          i915_emit_arith(p, A0_SGE, get_result_vector(p, &inst->Dst[0]),
716                          get_result_flags(inst), 0, negate(tmp, 1, 1, 1, 1),
717                          zero, 0);
718       } else {
719          i915_emit_arith(p, A0_SGE, tmp, A0_DEST_CHANNEL_ALL, 0, src0, src1, 0);
720 
721          i915_emit_arith(p, A0_SGE, get_result_vector(p, &inst->Dst[0]),
722                          get_result_flags(inst), 0, src1, src0, 0);
723 
724          i915_emit_arith(p, A0_MUL, get_result_vector(p, &inst->Dst[0]),
725                          get_result_flags(inst), 0,
726                          get_result_vector(p, &inst->Dst[0]), tmp, 0);
727       }
728 
729       break;
730    }
731 
732    case TGSI_OPCODE_SGE:
733       emit_simple_arith(p, inst, A0_SGE, 2, fs);
734       break;
735 
736    case TGSI_OPCODE_SLE:
737       /* like SGE, but swap reg0, reg1 */
738       emit_simple_arith_swap2(p, inst, A0_SGE, 2, fs);
739       break;
740 
741    case TGSI_OPCODE_SLT:
742       emit_simple_arith(p, inst, A0_SLT, 2, fs);
743       break;
744 
745    case TGSI_OPCODE_SGT:
746       /* like SLT, but swap reg0, reg1 */
747       emit_simple_arith_swap2(p, inst, A0_SLT, 2, fs);
748       break;
749 
750    case TGSI_OPCODE_SNE: {
751       const uint32_t zero =
752          swizzle(UREG(REG_TYPE_R, 0), SRC_ZERO, SRC_ZERO, SRC_ZERO, SRC_ZERO);
753 
754       /* if we're < or > then we're != */
755       src0 = src_vector(p, &inst->Src[0], fs);
756       src1 = src_vector(p, &inst->Src[1], fs);
757       tmp = i915_get_utemp(p);
758 
759       if (src0 == zero || src1 == zero) {
760          if (src0 == zero)
761             src0 = src1;
762 
763          /* x != 0 is equivalent to -abs(x) < 0, but the latter requires only
764           * two instructions instead of three.
765           */
766          i915_emit_arith(p, A0_MAX, tmp, A0_DEST_CHANNEL_ALL, 0, src0,
767                          negate(src0, 1, 1, 1, 1), 0);
768          i915_emit_arith(p, A0_SLT, get_result_vector(p, &inst->Dst[0]),
769                          get_result_flags(inst), 0, negate(tmp, 1, 1, 1, 1),
770                          zero, 0);
771       } else {
772          i915_emit_arith(p, A0_SLT, tmp, A0_DEST_CHANNEL_ALL, 0, src0, src1, 0);
773 
774          i915_emit_arith(p, A0_SLT, get_result_vector(p, &inst->Dst[0]),
775                          get_result_flags(inst), 0, src1, src0, 0);
776 
777          i915_emit_arith(p, A0_ADD, get_result_vector(p, &inst->Dst[0]),
778                          get_result_flags(inst), 0,
779                          get_result_vector(p, &inst->Dst[0]), tmp, 0);
780       }
781       break;
782    }
783 
784    case TGSI_OPCODE_SSG:
785       /* compute (src>0) - (src<0) */
786       src0 = src_vector(p, &inst->Src[0], fs);
787       tmp = i915_get_utemp(p);
788 
789       i915_emit_arith(p, A0_SLT, tmp, A0_DEST_CHANNEL_ALL, 0, src0,
790                       swizzle(src0, ZERO, ZERO, ZERO, ZERO), 0);
791 
792       i915_emit_arith(p, A0_SLT, get_result_vector(p, &inst->Dst[0]),
793                       get_result_flags(inst), 0,
794                       swizzle(src0, ZERO, ZERO, ZERO, ZERO), src0, 0);
795 
796       i915_emit_arith(
797          p, A0_ADD, get_result_vector(p, &inst->Dst[0]), get_result_flags(inst),
798          0, get_result_vector(p, &inst->Dst[0]), negate(tmp, 1, 1, 1, 1), 0);
799       break;
800 
801    case TGSI_OPCODE_TEX:
802       emit_tex(p, inst, T0_TEXLD, fs);
803       break;
804 
805    case TGSI_OPCODE_TRUNC:
806       emit_simple_arith(p, inst, A0_TRC, 1, fs);
807       break;
808 
809    case TGSI_OPCODE_TXB:
810       emit_tex(p, inst, T0_TEXLDB, fs);
811       break;
812 
813    case TGSI_OPCODE_TXP:
814       emit_tex(p, inst, T0_TEXLDP, fs);
815       break;
816 
817    default:
818       i915_program_error(p, "bad opcode %s (%d)",
819                          tgsi_get_opcode_name(inst->Instruction.Opcode),
820                          inst->Instruction.Opcode);
821       return;
822    }
823 
824    i915_release_utemps(p);
825 }
826 
827 static void
i915_translate_token(struct i915_fp_compile * p,const union i915_full_token * token,struct i915_fragment_shader * fs)828 i915_translate_token(struct i915_fp_compile *p,
829                      const union i915_full_token *token,
830                      struct i915_fragment_shader *fs)
831 {
832    struct i915_fragment_shader *ifs = p->shader;
833    switch (token->Token.Type) {
834    case TGSI_TOKEN_TYPE_PROPERTY:
835       /* Ignore properties where we only support one value. */
836       assert(token->FullProperty.Property.PropertyName ==
837                 TGSI_PROPERTY_FS_COORD_ORIGIN ||
838              token->FullProperty.Property.PropertyName ==
839                 TGSI_PROPERTY_FS_COORD_PIXEL_CENTER ||
840              token->FullProperty.Property.PropertyName ==
841                 TGSI_PROPERTY_FS_COLOR0_WRITES_ALL_CBUFS ||
842              token->FullProperty.Property.PropertyName ==
843                 TGSI_PROPERTY_SEPARABLE_PROGRAM);
844       break;
845 
846    case TGSI_TOKEN_TYPE_DECLARATION:
847       if (token->FullDeclaration.Declaration.File == TGSI_FILE_CONSTANT) {
848          if (token->FullDeclaration.Range.Last >= I915_MAX_CONSTANT) {
849             i915_program_error(p, "Exceeded %d max uniforms",
850                                I915_MAX_CONSTANT);
851          } else {
852             uint32_t i;
853             for (i = token->FullDeclaration.Range.First;
854                  i <= token->FullDeclaration.Range.Last; i++) {
855                ifs->constant_flags[i] = I915_CONSTFLAG_USER;
856                ifs->num_constants = MAX2(ifs->num_constants, i + 1);
857             }
858          }
859       } else if (token->FullDeclaration.Declaration.File ==
860                  TGSI_FILE_TEMPORARY) {
861          if (token->FullDeclaration.Range.Last >= I915_MAX_TEMPORARY) {
862             i915_program_error(p, "Exceeded max TGSI temps (%d/%d)",
863                                token->FullDeclaration.Range.Last + 1, I915_MAX_TEMPORARY);
864          } else {
865             uint32_t i;
866             for (i = token->FullDeclaration.Range.First;
867                  i <= token->FullDeclaration.Range.Last; i++) {
868                /* XXX just use shader->info->file_mask[TGSI_FILE_TEMPORARY] */
869                p->temp_flag |= (1 << i); /* mark temp as used */
870             }
871          }
872       }
873       break;
874 
875    case TGSI_TOKEN_TYPE_IMMEDIATE: {
876       const struct tgsi_full_immediate *imm = &token->FullImmediate;
877       const uint32_t pos = p->num_immediates++;
878       uint32_t j;
879       assert(imm->Immediate.NrTokens <= 4 + 1);
880       for (j = 0; j < imm->Immediate.NrTokens - 1; j++) {
881          p->immediates[pos][j] = imm->u[j].Float;
882       }
883    } break;
884 
885    case TGSI_TOKEN_TYPE_INSTRUCTION:
886       if (p->first_instruction) {
887          /* resolve location of immediates */
888          uint32_t i, j;
889          for (i = 0; i < p->num_immediates; i++) {
890             /* find constant slot for this immediate */
891             for (j = 0; j < I915_MAX_CONSTANT; j++) {
892                if (ifs->constant_flags[j] == 0x0) {
893                   memcpy(ifs->constants[j], p->immediates[i],
894                          4 * sizeof(float));
895                   /*printf("immediate %d maps to const %d\n", i, j);*/
896                   ifs->constant_flags[j] = 0xf; /* all four comps used */
897                   p->immediates_map[i] = j;
898                   ifs->num_constants = MAX2(ifs->num_constants, j + 1);
899                   break;
900                }
901             }
902             if (j == I915_MAX_CONSTANT) {
903                i915_program_error(p, "Exceeded %d max uniforms and immediates.",
904                                   I915_MAX_CONSTANT);
905             }
906          }
907 
908          p->first_instruction = false;
909       }
910 
911       i915_translate_instruction(p, &token->FullInstruction, fs);
912       break;
913 
914    default:
915       assert(0);
916    }
917 }
918 
919 /**
920  * Translate TGSI fragment shader into i915 hardware instructions.
921  * \param p  the translation state
922  * \param tokens  the TGSI token array
923  */
924 static void
i915_translate_instructions(struct i915_fp_compile * p,const struct i915_token_list * tokens,struct i915_fragment_shader * fs)925 i915_translate_instructions(struct i915_fp_compile *p,
926                             const struct i915_token_list *tokens,
927                             struct i915_fragment_shader *fs)
928 {
929    int i;
930    for (i = 0; i < tokens->NumTokens && !p->error[0]; i++) {
931       i915_translate_token(p, &tokens->Tokens[i], fs);
932    }
933 }
934 
935 static struct i915_fp_compile *
i915_init_compile(struct i915_fragment_shader * ifs)936 i915_init_compile(struct i915_fragment_shader *ifs)
937 {
938    struct i915_fp_compile *p = CALLOC_STRUCT(i915_fp_compile);
939    int i;
940 
941    p->shader = ifs;
942    p->error = ralloc_strdup(NULL, "");
943 
944    /* Put new constants at end of const buffer, growing downward.
945     * The problem is we don't know how many user-defined constants might
946     * be specified with pipe->set_constant_buffer().
947     * Should pre-scan the user's program to determine the highest-numbered
948     * constant referenced.
949     */
950    ifs->num_constants = 0;
951    memset(ifs->constant_flags, 0, sizeof(ifs->constant_flags));
952 
953    memset(&p->register_phases, 0, sizeof(p->register_phases));
954 
955    for (i = 0; i < I915_TEX_UNITS; i++)
956       ifs->texcoords[i].semantic = -1;
957 
958    p->first_instruction = true;
959 
960    p->nr_tex_indirect = 1; /* correct? */
961    p->nr_tex_insn = 0;
962    p->nr_alu_insn = 0;
963    p->nr_decl_insn = 0;
964 
965    p->csr = p->program;
966    p->decl = p->declarations;
967    p->decl_s = 0;
968    p->decl_t = 0;
969    p->temp_flag = ~0x0U << I915_MAX_TEMPORARY;
970    p->utemp_flag = ~0x7;
971 
972    /* initialize the first program word */
973    *(p->decl++) = _3DSTATE_PIXEL_SHADER_PROGRAM;
974 
975    return p;
976 }
977 
978 /* Copy compile results to the fragment program struct and destroy the
979  * compilation context.
980  */
981 static void
i915_fini_compile(struct i915_context * i915,struct i915_fp_compile * p)982 i915_fini_compile(struct i915_context *i915, struct i915_fp_compile *p)
983 {
984    struct i915_fragment_shader *ifs = p->shader;
985    unsigned long program_size = (unsigned long)(p->csr - p->program);
986    unsigned long decl_size = (unsigned long)(p->decl - p->declarations);
987 
988    if (p->nr_tex_indirect > I915_MAX_TEX_INDIRECT) {
989       i915_program_error(p,
990                          "Exceeded max nr indirect texture lookups (%d/%d)\n",
991                          p->nr_tex_indirect, I915_MAX_TEX_INDIRECT);
992    }
993 
994    if (p->nr_tex_insn > I915_MAX_TEX_INSN) {
995       i915_program_error(p, "Exceeded max TEX instructions (%d/%d)",
996                          p->nr_tex_insn, I915_MAX_TEX_INSN);
997    }
998 
999    if (p->nr_alu_insn > I915_MAX_ALU_INSN) {
1000       i915_program_error(p, "Exceeded max ALU instructions (%d/%d)",
1001                          p->nr_alu_insn, I915_MAX_ALU_INSN);
1002    }
1003 
1004    if (p->nr_decl_insn > I915_MAX_DECL_INSN) {
1005       i915_program_error(p, "Exceeded max DECL instructions (%d/%d)",
1006                          p->nr_decl_insn, I915_MAX_DECL_INSN);
1007    }
1008 
1009    /* hw doesn't seem to like empty frag programs (num_instructions == 1 is just
1010     * TGSI_END), even when the depth write fixup gets emitted below - maybe that
1011     * one is fishy, too?
1012     */
1013    if (ifs->info.num_instructions == 1)
1014       i915_program_error(p, "Empty fragment shader");
1015 
1016    if (strlen(p->error) != 0) {
1017       p->NumNativeInstructions = 0;
1018       p->NumNativeAluInstructions = 0;
1019       p->NumNativeTexInstructions = 0;
1020       p->NumNativeTexIndirections = 0;
1021 
1022       i915_use_passthrough_shader(ifs);
1023    } else {
1024       p->NumNativeInstructions =
1025          p->nr_alu_insn + p->nr_tex_insn + p->nr_decl_insn;
1026       p->NumNativeAluInstructions = p->nr_alu_insn;
1027       p->NumNativeTexInstructions = p->nr_tex_insn;
1028       p->NumNativeTexIndirections = p->nr_tex_indirect;
1029 
1030       /* patch in the program length */
1031       p->declarations[0] |= program_size + decl_size - 2;
1032 
1033       /* Copy compilation results to fragment program struct:
1034        */
1035       assert(!ifs->program);
1036 
1037       ifs->program_len = decl_size + program_size;
1038       ifs->program = (uint32_t *)MALLOC(ifs->program_len * sizeof(uint32_t));
1039       memcpy(ifs->program, p->declarations, decl_size * sizeof(uint32_t));
1040       memcpy(&ifs->program[decl_size], p->program,
1041              program_size * sizeof(uint32_t));
1042 
1043       if (i915) {
1044          util_debug_message(
1045             &i915->debug, SHADER_INFO,
1046             "%s shader: %d inst, %d tex, %d tex_indirect, %d temps, %d const",
1047             _mesa_shader_stage_to_abbrev(MESA_SHADER_FRAGMENT),
1048             (int)program_size, p->nr_tex_insn, p->nr_tex_indirect,
1049             p->shader->info.file_max[TGSI_FILE_TEMPORARY] + 1,
1050             ifs->num_constants);
1051       }
1052    }
1053 
1054    if (strlen(p->error) != 0)
1055       ifs->error = p->error;
1056    else
1057       ralloc_free(p->error);
1058 
1059    /* Release the compilation struct:
1060     */
1061    FREE(p);
1062 }
1063 
1064 /**
1065  * Rather than trying to intercept and jiggle depth writes during
1066  * emit, just move the value into its correct position at the end of
1067  * the program:
1068  */
1069 static void
i915_fixup_depth_write(struct i915_fp_compile * p)1070 i915_fixup_depth_write(struct i915_fp_compile *p)
1071 {
1072    for (int i = 0; i < p->shader->info.num_outputs; i++) {
1073       if (p->shader->info.output_semantic_name[i] != TGSI_SEMANTIC_POSITION)
1074          continue;
1075 
1076       const uint32_t depth = UREG(REG_TYPE_OD, 0);
1077 
1078       i915_emit_arith(p, A0_MOV,                  /* opcode */
1079                       depth,                      /* dest reg */
1080                       A0_DEST_CHANNEL_W,          /* write mask */
1081                       0,                          /* saturate? */
1082                       swizzle(depth, X, Y, Z, Z), /* src0 */
1083                       0, 0 /* src1, src2 */);
1084    }
1085 }
1086 
1087 void
i915_translate_fragment_program(struct i915_context * i915,struct i915_fragment_shader * fs)1088 i915_translate_fragment_program(struct i915_context *i915,
1089                                 struct i915_fragment_shader *fs)
1090 {
1091    struct i915_fp_compile *p;
1092    const struct tgsi_token *tokens = fs->state.tokens;
1093    struct i915_token_list *i_tokens;
1094    bool debug =
1095       I915_DBG_ON(DBG_FS) && (!fs->internal || NIR_DEBUG(PRINT_INTERNAL));
1096 
1097    if (debug) {
1098       mesa_logi("TGSI fragment shader:");
1099       tgsi_dump(tokens, 0);
1100    }
1101 
1102    p = i915_init_compile(fs);
1103 
1104    i_tokens = i915_optimize(tokens);
1105    i915_translate_instructions(p, i_tokens, fs);
1106    i915_fixup_depth_write(p);
1107 
1108    i915_fini_compile(i915, p);
1109    i915_optimize_free(i_tokens);
1110 
1111    if (debug) {
1112       if (fs->error)
1113          mesa_loge("%s", fs->error);
1114 
1115       mesa_logi("i915 fragment shader with %d constants%s", fs->num_constants,
1116                 fs->num_constants ? ":" : "");
1117 
1118       for (int i = 0; i < I915_MAX_CONSTANT; i++) {
1119          if (fs->constant_flags[i] &&
1120              fs->constant_flags[i] != I915_CONSTFLAG_USER) {
1121             mesa_logi("\t\tC[%d] = { %f, %f, %f, %f }", i, fs->constants[i][0],
1122                       fs->constants[i][1], fs->constants[i][2],
1123                       fs->constants[i][3]);
1124          }
1125       }
1126       i915_disassemble_program(fs->program, fs->program_len);
1127    }
1128 }
1129