xref: /aosp_15_r20/external/mesa3d/src/mesa/state_tracker/st_atifs_to_nir.c (revision 6104692788411f58d303aa86923a9ff6ecaded22)
1 /*
2  * Copyright (C) 2016 Miklós Máté
3  * Copyright (C) 2020 Google LLC
4  *
5  * Permission is hereby granted, free of charge, to any person obtaining a
6  * copy of this software and associated documentation files (the "Software"),
7  * to deal in the Software without restriction, including without limitation
8  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
9  * and/or sell copies of the Software, and to permit persons to whom the
10  * Software is furnished to do so, subject to the following conditions:
11  *
12  * The above copyright notice and this permission notice shall be included in
13  * all copies or substantial portions of the Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
19  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
20  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
21  * OTHER DEALINGS IN THE SOFTWARE.
22  */
23 
24 #include "main/mtypes.h"
25 #include "main/atifragshader.h"
26 #include "main/errors.h"
27 #include "program/prog_parameter.h"
28 #include "program/prog_instruction.h"
29 #include "program/prog_to_nir.h"
30 
31 #include "st_program.h"
32 #include "st_atifs_to_nir.h"
33 #include "compiler/nir/nir_builder.h"
34 
35 /**
36  * Intermediate state used during shader translation.
37  */
38 struct st_translate {
39    nir_builder *b;
40    struct ati_fragment_shader *atifs;
41 
42    nir_def *temps[MAX_PROGRAM_TEMPS];
43 
44    nir_variable *fragcolor;
45    nir_variable *constants;
46    nir_variable *samplers[MAX_TEXTURE_UNITS];
47 
48    nir_def *inputs[VARYING_SLOT_MAX];
49 
50    unsigned current_pass;
51 
52    bool regs_written[MAX_NUM_PASSES_ATI][MAX_NUM_FRAGMENT_REGISTERS_ATI];
53 
54    bool error;
55 };
56 
57 static nir_def *
nir_channel_vec4(nir_builder * b,nir_def * src,unsigned channel)58 nir_channel_vec4(nir_builder *b, nir_def *src, unsigned channel)
59 {
60    unsigned swizzle[4] = { channel, channel, channel, channel };
61    return nir_swizzle(b, src, swizzle, 4);
62 }
63 
64 static nir_def *
nir_imm_vec4_float(nir_builder * b,float f)65 nir_imm_vec4_float(nir_builder *b, float f)
66 {
67    return nir_imm_vec4(b, f, f, f, f);
68 }
69 
70 static nir_def *
get_temp(struct st_translate * t,unsigned index)71 get_temp(struct st_translate *t, unsigned index)
72 {
73    if (!t->temps[index])
74       t->temps[index] = nir_undef(t->b, 4, 32);
75    return t->temps[index];
76 }
77 
78 static nir_def *
apply_swizzle(struct st_translate * t,struct nir_def * src,GLuint swizzle)79 apply_swizzle(struct st_translate *t,
80               struct nir_def *src, GLuint swizzle)
81 {
82    /* From the ATI_fs spec:
83     *
84     *     "Table 3.20 shows the <swizzle> modes:
85     *
86     *                           Coordinates Used for 1D or      Coordinates Used for
87     *      Swizzle              2D SampleMap and PassTexCoord   3D or cubemap SampleMap
88     *      -------              -----------------------------   -----------------------
89     *      SWIZZLE_STR_ATI      (s, t, r, undefined)            (s, t, r, undefined)
90     *      SWIZZLE_STQ_ATI      (s, t, q, undefined)            (s, t, q, undefined)
91     *      SWIZZLE_STR_DR_ATI   (s/r, t/r, 1/r, undefined)      (undefined)
92     *      SWIZZLE_STQ_DQ_ATI   (s/q, t/q, 1/q, undefined)      (undefined)
93     */
94    if (swizzle == GL_SWIZZLE_STR_ATI) {
95       return src;
96    } else if (swizzle == GL_SWIZZLE_STQ_ATI) {
97       static unsigned xywz[4] = { 0, 1, 3, 2 };
98       return nir_swizzle(t->b, src, xywz, 4);
99    } else {
100       nir_def *rcp = nir_frcp(t->b, nir_channel(t->b, src,
101                                                     swizzle == GL_SWIZZLE_STR_DR_ATI ? 2 : 3));
102 
103       nir_def *st_mul = nir_fmul(t->b, nir_trim_vector(t->b, src, 2), rcp);
104 
105       return nir_vec4(t->b,
106                       nir_channel(t->b, st_mul, 0),
107                       nir_channel(t->b, st_mul, 1),
108                       rcp,
109                       rcp);
110    }
111 }
112 
113 static nir_def *
load_input(struct st_translate * t,gl_varying_slot slot)114 load_input(struct st_translate *t, gl_varying_slot slot)
115 {
116    if (!t->inputs[slot]) {
117       nir_variable *var = nir_create_variable_with_location(t->b->shader, nir_var_shader_in, slot,
118                                                             glsl_vec4_type());
119       var->data.interpolation = INTERP_MODE_NONE;
120 
121       t->inputs[slot] = nir_load_var(t->b, var);
122    }
123 
124    return t->inputs[slot];
125 }
126 
127 static nir_def *
atifs_load_uniform(struct st_translate * t,int index)128 atifs_load_uniform(struct st_translate *t, int index)
129 {
130    nir_deref_instr *deref = nir_build_deref_array(t->b,
131                                                   nir_build_deref_var(t->b, t->constants),
132                                                   nir_imm_int(t->b, index));
133    return nir_load_deref(t->b, deref);
134 }
135 
136 static struct nir_def *
get_source(struct st_translate * t,GLenum src_type)137 get_source(struct st_translate *t, GLenum src_type)
138 {
139    if (src_type >= GL_REG_0_ATI && src_type <= GL_REG_5_ATI) {
140       if (t->regs_written[t->current_pass][src_type - GL_REG_0_ATI]) {
141          return get_temp(t, src_type - GL_REG_0_ATI);
142       } else {
143          return nir_imm_vec4_float(t->b, 0.0);
144       }
145    } else if (src_type >= GL_CON_0_ATI && src_type <= GL_CON_7_ATI) {
146       int index = src_type - GL_CON_0_ATI;
147       if (t->atifs->LocalConstDef & (1 << index)) {
148          return nir_imm_vec4(t->b,
149                              t->atifs->Constants[index][0],
150                              t->atifs->Constants[index][1],
151                              t->atifs->Constants[index][2],
152                              t->atifs->Constants[index][3]);
153       } else {
154          return atifs_load_uniform(t, index);
155       }
156    } else if (src_type == GL_ZERO) {
157       return nir_imm_vec4_float(t->b, 0.0);
158    } else if (src_type == GL_ONE) {
159       return nir_imm_vec4_float(t->b, 1.0);
160    } else if (src_type == GL_PRIMARY_COLOR_ARB) {
161       return load_input(t, VARYING_SLOT_COL0);
162    } else if (src_type == GL_SECONDARY_INTERPOLATOR_ATI) {
163       return load_input(t, VARYING_SLOT_COL1);
164    } else {
165       /* frontend prevents this */
166       unreachable("unknown source");
167    }
168 }
169 
170 static nir_def *
prepare_argument(struct st_translate * t,const struct atifs_instruction * inst,const unsigned argId,bool alpha)171 prepare_argument(struct st_translate *t, const struct atifs_instruction *inst,
172                  const unsigned argId, bool alpha)
173 {
174    if (argId >= inst->ArgCount[alpha]) {
175       _mesa_warning(0, "Using 0 for missing argument %d\n", argId);
176       return nir_imm_vec4_float(t->b, 0.0f);
177    }
178 
179    const struct atifragshader_src_register *srcReg = &inst->SrcReg[alpha][argId];
180 
181    nir_def *src = get_source(t, srcReg->Index);
182 
183    switch (srcReg->argRep) {
184    case GL_NONE:
185       break;
186    case GL_RED:
187       src = nir_channel_vec4(t->b, src, 0);
188       break;
189    case GL_GREEN:
190       src = nir_channel_vec4(t->b, src, 1);
191       break;
192    case GL_BLUE:
193       src = nir_channel_vec4(t->b, src, 2);
194       break;
195    case GL_ALPHA:
196       src = nir_channel_vec4(t->b, src, 3);
197       break;
198    }
199 
200    t->temps[MAX_NUM_FRAGMENT_REGISTERS_ATI + argId] = src;
201 
202    if (srcReg->argMod & GL_COMP_BIT_ATI)
203       src = nir_fsub_imm(t->b, 1.0, src);
204    if (srcReg->argMod & GL_BIAS_BIT_ATI)
205       src = nir_fadd_imm(t->b, src, -0.5);
206    if (srcReg->argMod & GL_2X_BIT_ATI)
207       src = nir_fadd(t->b, src, src);
208    if (srcReg->argMod & GL_NEGATE_BIT_ATI)
209       src = nir_fneg(t->b, src);
210 
211    return src;
212 }
213 
214 static nir_def *
emit_arith_inst(struct st_translate * t,const struct atifs_instruction * inst,bool alpha)215 emit_arith_inst(struct st_translate *t,
216                 const struct atifs_instruction *inst,
217                 bool alpha)
218 {
219    nir_def *src[3] = {0};
220    for (int i = 0; i < inst->ArgCount[alpha]; i++)
221       src[i] = prepare_argument(t, inst, i, alpha);
222 
223    switch (inst->Opcode[alpha]) {
224    case GL_MOV_ATI:
225       return src[0];
226 
227    case GL_ADD_ATI:
228       return nir_fadd(t->b, src[0], src[1]);
229 
230    case GL_SUB_ATI:
231       return nir_fsub(t->b, src[0], src[1]);
232 
233    case GL_MUL_ATI:
234       return nir_fmul(t->b, src[0], src[1]);
235 
236    case GL_MAD_ATI:
237       return nir_ffma(t->b, src[0], src[1], src[2]);
238 
239    case GL_LERP_ATI:
240       return nir_flrp(t->b, src[2], src[1], src[0]);
241 
242    case GL_CND_ATI:
243       return nir_bcsel(t->b,
244                        nir_fle_imm(t->b, src[2], 0.5),
245                        src[1],
246                        src[0]);
247 
248    case GL_CND0_ATI:
249       return nir_bcsel(t->b,
250                        nir_fge_imm(t->b, src[2], 0.0),
251                        src[0],
252                        src[1]);
253 
254    case GL_DOT2_ADD_ATI:
255       return nir_channel_vec4(t->b,
256                               nir_fadd(t->b,
257                                        nir_fdot2(t->b, src[0], src[1]),
258                                        nir_channel(t->b, src[1], 2)),
259                               0);
260 
261    case GL_DOT3_ATI:
262       return nir_channel_vec4(t->b, nir_fdot3(t->b,src[0], src[1]), 0);
263 
264    case GL_DOT4_ATI:
265       return nir_channel_vec4(t->b, nir_fdot4(t->b,src[0], src[1]), 0);
266 
267    default:
268       unreachable("Unknown ATI_fs opcode");
269    }
270 }
271 
272 static nir_def *
emit_dstmod(struct st_translate * t,struct nir_def * dst,GLuint dstMod)273 emit_dstmod(struct st_translate *t,
274             struct nir_def *dst, GLuint dstMod)
275 {
276    switch (dstMod & ~GL_SATURATE_BIT_ATI) {
277    case GL_2X_BIT_ATI:
278       dst = nir_fmul_imm(t->b, dst, 2.0f);
279       break;
280    case GL_4X_BIT_ATI:
281       dst = nir_fmul_imm(t->b, dst, 4.0f);
282       break;
283    case GL_8X_BIT_ATI:
284       dst = nir_fmul_imm(t->b, dst, 8.0f);
285       break;
286    case GL_HALF_BIT_ATI:
287       dst = nir_fmul_imm(t->b, dst, 0.5f);
288       break;
289    case GL_QUARTER_BIT_ATI:
290       dst = nir_fmul_imm(t->b, dst, 0.25f);
291       break;
292    case GL_EIGHTH_BIT_ATI:
293       dst = nir_fmul_imm(t->b, dst, 0.125f);
294       break;
295    default:
296       break;
297    }
298 
299    if (dstMod & GL_SATURATE_BIT_ATI)
300       dst = nir_fsat(t->b, dst);
301 
302    return dst;
303 }
304 
305 /**
306  * Compile one setup instruction to NIR instructions.
307  */
308 static void
compile_setupinst(struct st_translate * t,const unsigned r,const struct atifs_setupinst * texinst)309 compile_setupinst(struct st_translate *t,
310                   const unsigned r,
311                   const struct atifs_setupinst *texinst)
312 {
313    if (!texinst->Opcode)
314       return;
315 
316    GLuint pass_tex = texinst->src;
317 
318    nir_def *coord;
319 
320    if (pass_tex >= GL_TEXTURE0_ARB && pass_tex <= GL_TEXTURE7_ARB) {
321       unsigned attr = pass_tex - GL_TEXTURE0_ARB;
322 
323       coord = load_input(t, VARYING_SLOT_TEX0 + attr);
324    } else if (pass_tex >= GL_REG_0_ATI && pass_tex <= GL_REG_5_ATI) {
325       unsigned reg = pass_tex - GL_REG_0_ATI;
326 
327       /* the frontend already validated that REG is only allowed in second pass */
328       if (t->regs_written[0][reg]) {
329          coord = t->temps[reg];
330       } else {
331          coord = nir_imm_vec4_float(t->b, 0.0f);
332       }
333    } else {
334       coord = nir_undef(t->b, 4, 32);
335    }
336    coord = apply_swizzle(t, coord, texinst->swizzle);
337 
338    if (texinst->Opcode == ATI_FRAGMENT_SHADER_SAMPLE_OP) {
339       nir_variable *tex_var = t->samplers[r];
340       if (!tex_var) {
341          /* The actual sampler dim will be determined at draw time and lowered
342           * by st_nir_update_atifs_samplers. Setting it to 3D for now means we
343           * don't optimize out coordinate channels we may need later.
344           */
345          const struct glsl_type *sampler_type =
346              glsl_sampler_type(GLSL_SAMPLER_DIM_3D, false, false, GLSL_TYPE_FLOAT);
347 
348          tex_var = nir_variable_create(t->b->shader, nir_var_uniform, sampler_type, "tex");
349          tex_var->data.binding = r;
350          tex_var->data.explicit_binding = true;
351          t->samplers[r] = tex_var;
352       }
353       nir_deref_instr *tex_deref = nir_build_deref_var(t->b, t->samplers[r]);
354 
355       nir_tex_instr *tex = nir_tex_instr_create(t->b->shader, 3);
356       tex->op = nir_texop_tex;
357       tex->sampler_dim = glsl_get_sampler_dim(tex_var->type);
358       tex->dest_type = nir_type_float32;
359       tex->coord_components =
360          glsl_get_sampler_dim_coordinate_components(tex->sampler_dim);
361 
362       tex->src[0] = nir_tex_src_for_ssa(nir_tex_src_texture_deref,
363                                         &tex_deref->def);
364       tex->src[1] = nir_tex_src_for_ssa(nir_tex_src_sampler_deref,
365                                         &tex_deref->def);
366       tex->src[2] = nir_tex_src_for_ssa(nir_tex_src_coord,
367                                         nir_trim_vector(t->b, coord, tex->coord_components));
368 
369       nir_def_init(&tex->instr, &tex->def, 4, 32);
370       nir_builder_instr_insert(t->b, &tex->instr);
371 
372       t->temps[r] = &tex->def;
373    } else if (texinst->Opcode == ATI_FRAGMENT_SHADER_PASS_OP) {
374       t->temps[r] = coord;
375    }
376 
377    t->regs_written[t->current_pass][r] = true;
378 }
379 
380 /**
381  * Compile one arithmetic operation COLOR&ALPHA pair into NIR instructions.
382  */
383 static void
compile_instruction(struct st_translate * t,const struct atifs_instruction * inst)384 compile_instruction(struct st_translate *t,
385                     const struct atifs_instruction *inst)
386 {
387    unsigned optype;
388 
389    for (optype = 0; optype < 2; optype++) { /* color, alpha */
390       unsigned dstreg = inst->DstReg[optype].Index - GL_REG_0_ATI;
391 
392       if (!inst->Opcode[optype])
393          continue;
394 
395       /* Execute the op */
396       nir_def *result = emit_arith_inst(t, inst, optype);
397       result = emit_dstmod(t, result, inst->DstReg[optype].dstMod);
398 
399       /* Do the writemask */
400       nir_const_value wrmask[4];
401       for (int i = 0; i < 4; i++) {
402          bool bit = inst->DstReg[optype].dstMask & (1 << i);
403          wrmask[i] = nir_const_value_for_bool(bit, 1);
404       }
405 
406       t->temps[dstreg] = nir_bcsel(t->b,
407                                    nir_build_imm(t->b, 4, 1, wrmask),
408                                    result,
409                                    get_temp(t, dstreg));
410       t->regs_written[t->current_pass][dstreg] = true;
411    }
412 }
413 
414 
415 /* Creates the uniform variable referencing the ATI_fragment_shader constants.
416  */
417 static void
st_atifs_setup_uniforms(struct st_translate * t,struct gl_program * program)418 st_atifs_setup_uniforms(struct st_translate *t, struct gl_program *program)
419 {
420    const struct glsl_type *type =
421       glsl_array_type(glsl_vec4_type(), program->Parameters->NumParameters, 0);
422    t->constants =
423       nir_variable_create(t->b->shader, nir_var_uniform, type,
424                           "gl_ATI_fragment_shader_constants");
425 }
426 
427 /**
428  * Called when a new variant is needed, we need to translate
429  * the ATI fragment shader to NIR
430  */
431 nir_shader *
st_translate_atifs_program(struct ati_fragment_shader * atifs,struct gl_program * program,const nir_shader_compiler_options * options)432 st_translate_atifs_program(struct ati_fragment_shader *atifs,
433                            struct gl_program *program,
434                            const nir_shader_compiler_options *options)
435 {
436    nir_builder b = nir_builder_init_simple_shader(MESA_SHADER_FRAGMENT, options, "ATI_fs");
437 
438    struct st_translate translate = {
439       .atifs = atifs,
440       .b = &b,
441    };
442    struct st_translate *t = &translate;
443 
444    /* Copy the shader_info from the gl_program */
445    t->b->shader->info = program->info;
446 
447    nir_shader *s = t->b->shader;
448    s->info.name = ralloc_asprintf(s, "ATIFS%d", program->Id);
449    s->info.internal = false;
450 
451    t->fragcolor = nir_create_variable_with_location(b.shader, nir_var_shader_out,
452                                                     FRAG_RESULT_COLOR, glsl_vec4_type());
453 
454    st_atifs_setup_uniforms(t, program);
455 
456    /* emit instructions */
457    for (unsigned pass = 0; pass < atifs->NumPasses; pass++) {
458       t->current_pass = pass;
459       for (unsigned r = 0; r < MAX_NUM_FRAGMENT_REGISTERS_ATI; r++) {
460          struct atifs_setupinst *texinst = &atifs->SetupInst[pass][r];
461          compile_setupinst(t, r, texinst);
462       }
463       for (unsigned i = 0; i < atifs->numArithInstr[pass]; i++) {
464          struct atifs_instruction *inst = &atifs->Instructions[pass][i];
465          compile_instruction(t, inst);
466       }
467    }
468 
469    if (t->regs_written[atifs->NumPasses-1][0])
470       nir_store_var(t->b, t->fragcolor, t->temps[0], 0xf);
471 
472    return b.shader;
473 }
474 
475 static bool
st_nir_lower_atifs_samplers_instr(nir_builder * b,nir_instr * instr,void * data)476 st_nir_lower_atifs_samplers_instr(nir_builder *b, nir_instr *instr, void *data)
477 {
478    const uint8_t *texture_index = data;
479 
480    /* Can't just do this in tex handling below, as llvmpipe leaves dead code
481     * derefs around.
482     */
483    if (instr->type == nir_instr_type_deref) {
484       nir_deref_instr *deref = nir_instr_as_deref(instr);
485       nir_variable *var = nir_deref_instr_get_variable(deref);
486       if (glsl_type_is_sampler(var->type))
487          deref->type = var->type;
488    }
489 
490    if (instr->type != nir_instr_type_tex)
491       return false;
492 
493    b->cursor = nir_before_instr(instr);
494 
495    nir_tex_instr *tex = nir_instr_as_tex(instr);
496 
497    unsigned unit;
498    int sampler_src_idx = nir_tex_instr_src_index(tex, nir_tex_src_sampler_deref);
499    if (sampler_src_idx >= 0) {
500       nir_deref_instr *deref = nir_src_as_deref(tex->src[sampler_src_idx].src);
501       nir_variable *var = nir_deref_instr_get_variable(deref);
502       unit = var->data.binding;
503    } else {
504       unit = tex->sampler_index;
505    }
506 
507    bool is_array;
508    tex->sampler_dim =
509        _mesa_texture_index_to_sampler_dim(texture_index[unit], &is_array);
510 
511    int coords_idx = nir_tex_instr_src_index(tex, nir_tex_src_coord);
512    assert(coords_idx >= 0);
513    int coord_components =
514        glsl_get_sampler_dim_coordinate_components(tex->sampler_dim);
515    /* Trim unused coords, or append undefs as necessary (if someone
516     * accidentally enables a cube array).
517     */
518    if (coord_components != tex->coord_components) {
519       nir_def *coords = tex->src[coords_idx].src.ssa;
520       nir_src_rewrite(&tex->src[coords_idx].src,
521                       nir_resize_vector(b, coords, coord_components));
522       tex->coord_components = coord_components;
523    }
524 
525    return true;
526 }
527 
528 /**
529  * Rewrites sampler dimensions and coordinate components for the currently
530  * active texture unit at draw time.
531  */
532 bool
st_nir_lower_atifs_samplers(struct nir_shader * s,const uint8_t * texture_index)533 st_nir_lower_atifs_samplers(struct nir_shader *s, const uint8_t *texture_index)
534 {
535    nir_foreach_uniform_variable(var, s) {
536       if (!glsl_type_is_sampler(var->type))
537          continue;
538       bool is_array;
539       enum glsl_sampler_dim sampler_dim =
540           _mesa_texture_index_to_sampler_dim(texture_index[var->data.binding], &is_array);
541       var->type = glsl_sampler_type(sampler_dim, false, is_array, GLSL_TYPE_FLOAT);
542    }
543 
544    return nir_shader_instructions_pass(s, st_nir_lower_atifs_samplers_instr,\
545                                        nir_metadata_control_flow,
546                                        (void *)texture_index);
547 }
548