1 /*
2 * Copyright (C) 2016 Miklós Máté
3 * Copyright (C) 2020 Google LLC
4 *
5 * Permission is hereby granted, free of charge, to any person obtaining a
6 * copy of this software and associated documentation files (the "Software"),
7 * to deal in the Software without restriction, including without limitation
8 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
9 * and/or sell copies of the Software, and to permit persons to whom the
10 * Software is furnished to do so, subject to the following conditions:
11 *
12 * The above copyright notice and this permission notice shall be included in
13 * all copies or substantial portions of the Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
19 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
20 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
21 * OTHER DEALINGS IN THE SOFTWARE.
22 */
23
24 #include "main/mtypes.h"
25 #include "main/atifragshader.h"
26 #include "main/errors.h"
27 #include "program/prog_parameter.h"
28 #include "program/prog_instruction.h"
29 #include "program/prog_to_nir.h"
30
31 #include "st_program.h"
32 #include "st_atifs_to_nir.h"
33 #include "compiler/nir/nir_builder.h"
34
35 /**
36 * Intermediate state used during shader translation.
37 */
38 struct st_translate {
39 nir_builder *b;
40 struct ati_fragment_shader *atifs;
41
42 nir_def *temps[MAX_PROGRAM_TEMPS];
43
44 nir_variable *fragcolor;
45 nir_variable *constants;
46 nir_variable *samplers[MAX_TEXTURE_UNITS];
47
48 nir_def *inputs[VARYING_SLOT_MAX];
49
50 unsigned current_pass;
51
52 bool regs_written[MAX_NUM_PASSES_ATI][MAX_NUM_FRAGMENT_REGISTERS_ATI];
53
54 bool error;
55 };
56
57 static nir_def *
nir_channel_vec4(nir_builder * b,nir_def * src,unsigned channel)58 nir_channel_vec4(nir_builder *b, nir_def *src, unsigned channel)
59 {
60 unsigned swizzle[4] = { channel, channel, channel, channel };
61 return nir_swizzle(b, src, swizzle, 4);
62 }
63
64 static nir_def *
nir_imm_vec4_float(nir_builder * b,float f)65 nir_imm_vec4_float(nir_builder *b, float f)
66 {
67 return nir_imm_vec4(b, f, f, f, f);
68 }
69
70 static nir_def *
get_temp(struct st_translate * t,unsigned index)71 get_temp(struct st_translate *t, unsigned index)
72 {
73 if (!t->temps[index])
74 t->temps[index] = nir_undef(t->b, 4, 32);
75 return t->temps[index];
76 }
77
78 static nir_def *
apply_swizzle(struct st_translate * t,struct nir_def * src,GLuint swizzle)79 apply_swizzle(struct st_translate *t,
80 struct nir_def *src, GLuint swizzle)
81 {
82 /* From the ATI_fs spec:
83 *
84 * "Table 3.20 shows the <swizzle> modes:
85 *
86 * Coordinates Used for 1D or Coordinates Used for
87 * Swizzle 2D SampleMap and PassTexCoord 3D or cubemap SampleMap
88 * ------- ----------------------------- -----------------------
89 * SWIZZLE_STR_ATI (s, t, r, undefined) (s, t, r, undefined)
90 * SWIZZLE_STQ_ATI (s, t, q, undefined) (s, t, q, undefined)
91 * SWIZZLE_STR_DR_ATI (s/r, t/r, 1/r, undefined) (undefined)
92 * SWIZZLE_STQ_DQ_ATI (s/q, t/q, 1/q, undefined) (undefined)
93 */
94 if (swizzle == GL_SWIZZLE_STR_ATI) {
95 return src;
96 } else if (swizzle == GL_SWIZZLE_STQ_ATI) {
97 static unsigned xywz[4] = { 0, 1, 3, 2 };
98 return nir_swizzle(t->b, src, xywz, 4);
99 } else {
100 nir_def *rcp = nir_frcp(t->b, nir_channel(t->b, src,
101 swizzle == GL_SWIZZLE_STR_DR_ATI ? 2 : 3));
102
103 nir_def *st_mul = nir_fmul(t->b, nir_trim_vector(t->b, src, 2), rcp);
104
105 return nir_vec4(t->b,
106 nir_channel(t->b, st_mul, 0),
107 nir_channel(t->b, st_mul, 1),
108 rcp,
109 rcp);
110 }
111 }
112
113 static nir_def *
load_input(struct st_translate * t,gl_varying_slot slot)114 load_input(struct st_translate *t, gl_varying_slot slot)
115 {
116 if (!t->inputs[slot]) {
117 nir_variable *var = nir_create_variable_with_location(t->b->shader, nir_var_shader_in, slot,
118 glsl_vec4_type());
119 var->data.interpolation = INTERP_MODE_NONE;
120
121 t->inputs[slot] = nir_load_var(t->b, var);
122 }
123
124 return t->inputs[slot];
125 }
126
127 static nir_def *
atifs_load_uniform(struct st_translate * t,int index)128 atifs_load_uniform(struct st_translate *t, int index)
129 {
130 nir_deref_instr *deref = nir_build_deref_array(t->b,
131 nir_build_deref_var(t->b, t->constants),
132 nir_imm_int(t->b, index));
133 return nir_load_deref(t->b, deref);
134 }
135
136 static struct nir_def *
get_source(struct st_translate * t,GLenum src_type)137 get_source(struct st_translate *t, GLenum src_type)
138 {
139 if (src_type >= GL_REG_0_ATI && src_type <= GL_REG_5_ATI) {
140 if (t->regs_written[t->current_pass][src_type - GL_REG_0_ATI]) {
141 return get_temp(t, src_type - GL_REG_0_ATI);
142 } else {
143 return nir_imm_vec4_float(t->b, 0.0);
144 }
145 } else if (src_type >= GL_CON_0_ATI && src_type <= GL_CON_7_ATI) {
146 int index = src_type - GL_CON_0_ATI;
147 if (t->atifs->LocalConstDef & (1 << index)) {
148 return nir_imm_vec4(t->b,
149 t->atifs->Constants[index][0],
150 t->atifs->Constants[index][1],
151 t->atifs->Constants[index][2],
152 t->atifs->Constants[index][3]);
153 } else {
154 return atifs_load_uniform(t, index);
155 }
156 } else if (src_type == GL_ZERO) {
157 return nir_imm_vec4_float(t->b, 0.0);
158 } else if (src_type == GL_ONE) {
159 return nir_imm_vec4_float(t->b, 1.0);
160 } else if (src_type == GL_PRIMARY_COLOR_ARB) {
161 return load_input(t, VARYING_SLOT_COL0);
162 } else if (src_type == GL_SECONDARY_INTERPOLATOR_ATI) {
163 return load_input(t, VARYING_SLOT_COL1);
164 } else {
165 /* frontend prevents this */
166 unreachable("unknown source");
167 }
168 }
169
170 static nir_def *
prepare_argument(struct st_translate * t,const struct atifs_instruction * inst,const unsigned argId,bool alpha)171 prepare_argument(struct st_translate *t, const struct atifs_instruction *inst,
172 const unsigned argId, bool alpha)
173 {
174 if (argId >= inst->ArgCount[alpha]) {
175 _mesa_warning(0, "Using 0 for missing argument %d\n", argId);
176 return nir_imm_vec4_float(t->b, 0.0f);
177 }
178
179 const struct atifragshader_src_register *srcReg = &inst->SrcReg[alpha][argId];
180
181 nir_def *src = get_source(t, srcReg->Index);
182
183 switch (srcReg->argRep) {
184 case GL_NONE:
185 break;
186 case GL_RED:
187 src = nir_channel_vec4(t->b, src, 0);
188 break;
189 case GL_GREEN:
190 src = nir_channel_vec4(t->b, src, 1);
191 break;
192 case GL_BLUE:
193 src = nir_channel_vec4(t->b, src, 2);
194 break;
195 case GL_ALPHA:
196 src = nir_channel_vec4(t->b, src, 3);
197 break;
198 }
199
200 t->temps[MAX_NUM_FRAGMENT_REGISTERS_ATI + argId] = src;
201
202 if (srcReg->argMod & GL_COMP_BIT_ATI)
203 src = nir_fsub_imm(t->b, 1.0, src);
204 if (srcReg->argMod & GL_BIAS_BIT_ATI)
205 src = nir_fadd_imm(t->b, src, -0.5);
206 if (srcReg->argMod & GL_2X_BIT_ATI)
207 src = nir_fadd(t->b, src, src);
208 if (srcReg->argMod & GL_NEGATE_BIT_ATI)
209 src = nir_fneg(t->b, src);
210
211 return src;
212 }
213
214 static nir_def *
emit_arith_inst(struct st_translate * t,const struct atifs_instruction * inst,bool alpha)215 emit_arith_inst(struct st_translate *t,
216 const struct atifs_instruction *inst,
217 bool alpha)
218 {
219 nir_def *src[3] = {0};
220 for (int i = 0; i < inst->ArgCount[alpha]; i++)
221 src[i] = prepare_argument(t, inst, i, alpha);
222
223 switch (inst->Opcode[alpha]) {
224 case GL_MOV_ATI:
225 return src[0];
226
227 case GL_ADD_ATI:
228 return nir_fadd(t->b, src[0], src[1]);
229
230 case GL_SUB_ATI:
231 return nir_fsub(t->b, src[0], src[1]);
232
233 case GL_MUL_ATI:
234 return nir_fmul(t->b, src[0], src[1]);
235
236 case GL_MAD_ATI:
237 return nir_ffma(t->b, src[0], src[1], src[2]);
238
239 case GL_LERP_ATI:
240 return nir_flrp(t->b, src[2], src[1], src[0]);
241
242 case GL_CND_ATI:
243 return nir_bcsel(t->b,
244 nir_fle_imm(t->b, src[2], 0.5),
245 src[1],
246 src[0]);
247
248 case GL_CND0_ATI:
249 return nir_bcsel(t->b,
250 nir_fge_imm(t->b, src[2], 0.0),
251 src[0],
252 src[1]);
253
254 case GL_DOT2_ADD_ATI:
255 return nir_channel_vec4(t->b,
256 nir_fadd(t->b,
257 nir_fdot2(t->b, src[0], src[1]),
258 nir_channel(t->b, src[1], 2)),
259 0);
260
261 case GL_DOT3_ATI:
262 return nir_channel_vec4(t->b, nir_fdot3(t->b,src[0], src[1]), 0);
263
264 case GL_DOT4_ATI:
265 return nir_channel_vec4(t->b, nir_fdot4(t->b,src[0], src[1]), 0);
266
267 default:
268 unreachable("Unknown ATI_fs opcode");
269 }
270 }
271
272 static nir_def *
emit_dstmod(struct st_translate * t,struct nir_def * dst,GLuint dstMod)273 emit_dstmod(struct st_translate *t,
274 struct nir_def *dst, GLuint dstMod)
275 {
276 switch (dstMod & ~GL_SATURATE_BIT_ATI) {
277 case GL_2X_BIT_ATI:
278 dst = nir_fmul_imm(t->b, dst, 2.0f);
279 break;
280 case GL_4X_BIT_ATI:
281 dst = nir_fmul_imm(t->b, dst, 4.0f);
282 break;
283 case GL_8X_BIT_ATI:
284 dst = nir_fmul_imm(t->b, dst, 8.0f);
285 break;
286 case GL_HALF_BIT_ATI:
287 dst = nir_fmul_imm(t->b, dst, 0.5f);
288 break;
289 case GL_QUARTER_BIT_ATI:
290 dst = nir_fmul_imm(t->b, dst, 0.25f);
291 break;
292 case GL_EIGHTH_BIT_ATI:
293 dst = nir_fmul_imm(t->b, dst, 0.125f);
294 break;
295 default:
296 break;
297 }
298
299 if (dstMod & GL_SATURATE_BIT_ATI)
300 dst = nir_fsat(t->b, dst);
301
302 return dst;
303 }
304
305 /**
306 * Compile one setup instruction to NIR instructions.
307 */
308 static void
compile_setupinst(struct st_translate * t,const unsigned r,const struct atifs_setupinst * texinst)309 compile_setupinst(struct st_translate *t,
310 const unsigned r,
311 const struct atifs_setupinst *texinst)
312 {
313 if (!texinst->Opcode)
314 return;
315
316 GLuint pass_tex = texinst->src;
317
318 nir_def *coord;
319
320 if (pass_tex >= GL_TEXTURE0_ARB && pass_tex <= GL_TEXTURE7_ARB) {
321 unsigned attr = pass_tex - GL_TEXTURE0_ARB;
322
323 coord = load_input(t, VARYING_SLOT_TEX0 + attr);
324 } else if (pass_tex >= GL_REG_0_ATI && pass_tex <= GL_REG_5_ATI) {
325 unsigned reg = pass_tex - GL_REG_0_ATI;
326
327 /* the frontend already validated that REG is only allowed in second pass */
328 if (t->regs_written[0][reg]) {
329 coord = t->temps[reg];
330 } else {
331 coord = nir_imm_vec4_float(t->b, 0.0f);
332 }
333 } else {
334 coord = nir_undef(t->b, 4, 32);
335 }
336 coord = apply_swizzle(t, coord, texinst->swizzle);
337
338 if (texinst->Opcode == ATI_FRAGMENT_SHADER_SAMPLE_OP) {
339 nir_variable *tex_var = t->samplers[r];
340 if (!tex_var) {
341 /* The actual sampler dim will be determined at draw time and lowered
342 * by st_nir_update_atifs_samplers. Setting it to 3D for now means we
343 * don't optimize out coordinate channels we may need later.
344 */
345 const struct glsl_type *sampler_type =
346 glsl_sampler_type(GLSL_SAMPLER_DIM_3D, false, false, GLSL_TYPE_FLOAT);
347
348 tex_var = nir_variable_create(t->b->shader, nir_var_uniform, sampler_type, "tex");
349 tex_var->data.binding = r;
350 tex_var->data.explicit_binding = true;
351 t->samplers[r] = tex_var;
352 }
353 nir_deref_instr *tex_deref = nir_build_deref_var(t->b, t->samplers[r]);
354
355 nir_tex_instr *tex = nir_tex_instr_create(t->b->shader, 3);
356 tex->op = nir_texop_tex;
357 tex->sampler_dim = glsl_get_sampler_dim(tex_var->type);
358 tex->dest_type = nir_type_float32;
359 tex->coord_components =
360 glsl_get_sampler_dim_coordinate_components(tex->sampler_dim);
361
362 tex->src[0] = nir_tex_src_for_ssa(nir_tex_src_texture_deref,
363 &tex_deref->def);
364 tex->src[1] = nir_tex_src_for_ssa(nir_tex_src_sampler_deref,
365 &tex_deref->def);
366 tex->src[2] = nir_tex_src_for_ssa(nir_tex_src_coord,
367 nir_trim_vector(t->b, coord, tex->coord_components));
368
369 nir_def_init(&tex->instr, &tex->def, 4, 32);
370 nir_builder_instr_insert(t->b, &tex->instr);
371
372 t->temps[r] = &tex->def;
373 } else if (texinst->Opcode == ATI_FRAGMENT_SHADER_PASS_OP) {
374 t->temps[r] = coord;
375 }
376
377 t->regs_written[t->current_pass][r] = true;
378 }
379
380 /**
381 * Compile one arithmetic operation COLOR&ALPHA pair into NIR instructions.
382 */
383 static void
compile_instruction(struct st_translate * t,const struct atifs_instruction * inst)384 compile_instruction(struct st_translate *t,
385 const struct atifs_instruction *inst)
386 {
387 unsigned optype;
388
389 for (optype = 0; optype < 2; optype++) { /* color, alpha */
390 unsigned dstreg = inst->DstReg[optype].Index - GL_REG_0_ATI;
391
392 if (!inst->Opcode[optype])
393 continue;
394
395 /* Execute the op */
396 nir_def *result = emit_arith_inst(t, inst, optype);
397 result = emit_dstmod(t, result, inst->DstReg[optype].dstMod);
398
399 /* Do the writemask */
400 nir_const_value wrmask[4];
401 for (int i = 0; i < 4; i++) {
402 bool bit = inst->DstReg[optype].dstMask & (1 << i);
403 wrmask[i] = nir_const_value_for_bool(bit, 1);
404 }
405
406 t->temps[dstreg] = nir_bcsel(t->b,
407 nir_build_imm(t->b, 4, 1, wrmask),
408 result,
409 get_temp(t, dstreg));
410 t->regs_written[t->current_pass][dstreg] = true;
411 }
412 }
413
414
415 /* Creates the uniform variable referencing the ATI_fragment_shader constants.
416 */
417 static void
st_atifs_setup_uniforms(struct st_translate * t,struct gl_program * program)418 st_atifs_setup_uniforms(struct st_translate *t, struct gl_program *program)
419 {
420 const struct glsl_type *type =
421 glsl_array_type(glsl_vec4_type(), program->Parameters->NumParameters, 0);
422 t->constants =
423 nir_variable_create(t->b->shader, nir_var_uniform, type,
424 "gl_ATI_fragment_shader_constants");
425 }
426
427 /**
428 * Called when a new variant is needed, we need to translate
429 * the ATI fragment shader to NIR
430 */
431 nir_shader *
st_translate_atifs_program(struct ati_fragment_shader * atifs,struct gl_program * program,const nir_shader_compiler_options * options)432 st_translate_atifs_program(struct ati_fragment_shader *atifs,
433 struct gl_program *program,
434 const nir_shader_compiler_options *options)
435 {
436 nir_builder b = nir_builder_init_simple_shader(MESA_SHADER_FRAGMENT, options, "ATI_fs");
437
438 struct st_translate translate = {
439 .atifs = atifs,
440 .b = &b,
441 };
442 struct st_translate *t = &translate;
443
444 /* Copy the shader_info from the gl_program */
445 t->b->shader->info = program->info;
446
447 nir_shader *s = t->b->shader;
448 s->info.name = ralloc_asprintf(s, "ATIFS%d", program->Id);
449 s->info.internal = false;
450
451 t->fragcolor = nir_create_variable_with_location(b.shader, nir_var_shader_out,
452 FRAG_RESULT_COLOR, glsl_vec4_type());
453
454 st_atifs_setup_uniforms(t, program);
455
456 /* emit instructions */
457 for (unsigned pass = 0; pass < atifs->NumPasses; pass++) {
458 t->current_pass = pass;
459 for (unsigned r = 0; r < MAX_NUM_FRAGMENT_REGISTERS_ATI; r++) {
460 struct atifs_setupinst *texinst = &atifs->SetupInst[pass][r];
461 compile_setupinst(t, r, texinst);
462 }
463 for (unsigned i = 0; i < atifs->numArithInstr[pass]; i++) {
464 struct atifs_instruction *inst = &atifs->Instructions[pass][i];
465 compile_instruction(t, inst);
466 }
467 }
468
469 if (t->regs_written[atifs->NumPasses-1][0])
470 nir_store_var(t->b, t->fragcolor, t->temps[0], 0xf);
471
472 return b.shader;
473 }
474
475 static bool
st_nir_lower_atifs_samplers_instr(nir_builder * b,nir_instr * instr,void * data)476 st_nir_lower_atifs_samplers_instr(nir_builder *b, nir_instr *instr, void *data)
477 {
478 const uint8_t *texture_index = data;
479
480 /* Can't just do this in tex handling below, as llvmpipe leaves dead code
481 * derefs around.
482 */
483 if (instr->type == nir_instr_type_deref) {
484 nir_deref_instr *deref = nir_instr_as_deref(instr);
485 nir_variable *var = nir_deref_instr_get_variable(deref);
486 if (glsl_type_is_sampler(var->type))
487 deref->type = var->type;
488 }
489
490 if (instr->type != nir_instr_type_tex)
491 return false;
492
493 b->cursor = nir_before_instr(instr);
494
495 nir_tex_instr *tex = nir_instr_as_tex(instr);
496
497 unsigned unit;
498 int sampler_src_idx = nir_tex_instr_src_index(tex, nir_tex_src_sampler_deref);
499 if (sampler_src_idx >= 0) {
500 nir_deref_instr *deref = nir_src_as_deref(tex->src[sampler_src_idx].src);
501 nir_variable *var = nir_deref_instr_get_variable(deref);
502 unit = var->data.binding;
503 } else {
504 unit = tex->sampler_index;
505 }
506
507 bool is_array;
508 tex->sampler_dim =
509 _mesa_texture_index_to_sampler_dim(texture_index[unit], &is_array);
510
511 int coords_idx = nir_tex_instr_src_index(tex, nir_tex_src_coord);
512 assert(coords_idx >= 0);
513 int coord_components =
514 glsl_get_sampler_dim_coordinate_components(tex->sampler_dim);
515 /* Trim unused coords, or append undefs as necessary (if someone
516 * accidentally enables a cube array).
517 */
518 if (coord_components != tex->coord_components) {
519 nir_def *coords = tex->src[coords_idx].src.ssa;
520 nir_src_rewrite(&tex->src[coords_idx].src,
521 nir_resize_vector(b, coords, coord_components));
522 tex->coord_components = coord_components;
523 }
524
525 return true;
526 }
527
528 /**
529 * Rewrites sampler dimensions and coordinate components for the currently
530 * active texture unit at draw time.
531 */
532 bool
st_nir_lower_atifs_samplers(struct nir_shader * s,const uint8_t * texture_index)533 st_nir_lower_atifs_samplers(struct nir_shader *s, const uint8_t *texture_index)
534 {
535 nir_foreach_uniform_variable(var, s) {
536 if (!glsl_type_is_sampler(var->type))
537 continue;
538 bool is_array;
539 enum glsl_sampler_dim sampler_dim =
540 _mesa_texture_index_to_sampler_dim(texture_index[var->data.binding], &is_array);
541 var->type = glsl_sampler_type(sampler_dim, false, is_array, GLSL_TYPE_FLOAT);
542 }
543
544 return nir_shader_instructions_pass(s, st_nir_lower_atifs_samplers_instr,\
545 nir_metadata_control_flow,
546 (void *)texture_index);
547 }
548