1 /*
2 * Copyright 2009 Marek Olšák <[email protected]>
3 * SPDX-License-Identifier: MIT
4 */
5
6 /* This file contains the vertex shader transformations for SW TCL needed
7 * to overcome the limitations of the r300 rasterizer.
8 *
9 * Transformations:
10 * 1) If the secondary color output is present, the primary color must be
11 * present too.
12 * 2) If any back-face color output is present, there must be all 4 color
13 * outputs and missing ones must be inserted.
14 * 3) Insert a trailing texcoord output containing a copy of POS, for WPOS.
15 *
16 * I know this code is cumbersome, but I don't know of any nicer way
17 * of transforming TGSI shaders. ~ M.
18 */
19
20 #include "r300_vs.h"
21
22 #include <stdio.h>
23
24 #include "tgsi/tgsi_transform.h"
25 #include "tgsi/tgsi_dump.h"
26
27 #include "draw/draw_context.h"
28
29 struct vs_transform_context {
30 struct tgsi_transform_context base;
31
32 bool color_used[2];
33 bool bcolor_used[2];
34
35 /* Index of the pos output, typically 0. */
36 unsigned pos_output;
37 /* Index of the pos temp where all writes of pos are redirected to. */
38 unsigned pos_temp;
39 /* The index of the last generic output, after which we insert a new
40 * output for WPOS. */
41 int last_generic;
42
43 unsigned num_outputs;
44 /* Used to shift output decl. indices when inserting new ones. */
45 unsigned decl_shift;
46 /* Used to remap writes to output decls if their indices changed. */
47 unsigned out_remap[32];
48
49 /* First instruction processed? */
50 bool first_instruction;
51 /* End instruction processed? */
52 bool end_instruction;
53
54 bool temp_used[1024];
55 };
56
emit_temp(struct tgsi_transform_context * ctx,unsigned reg)57 static void emit_temp(struct tgsi_transform_context *ctx, unsigned reg)
58 {
59 struct tgsi_full_declaration decl;
60
61 decl = tgsi_default_full_declaration();
62 decl.Declaration.File = TGSI_FILE_TEMPORARY;
63 decl.Range.First = decl.Range.Last = reg;
64 ctx->emit_declaration(ctx, &decl);
65 }
66
emit_output(struct tgsi_transform_context * ctx,unsigned name,unsigned index,unsigned interp,unsigned reg)67 static void emit_output(struct tgsi_transform_context *ctx,
68 unsigned name, unsigned index, unsigned interp,
69 unsigned reg)
70 {
71 struct vs_transform_context *vsctx = (struct vs_transform_context *)ctx;
72 struct tgsi_full_declaration decl;
73
74 decl = tgsi_default_full_declaration();
75 decl.Declaration.File = TGSI_FILE_OUTPUT;
76 decl.Declaration.Interpolate = 1;
77 decl.Declaration.Semantic = true;
78 decl.Semantic.Name = name;
79 decl.Semantic.Index = index;
80 decl.Range.First = decl.Range.Last = reg;
81 decl.Interp.Interpolate = interp;
82 ctx->emit_declaration(ctx, &decl);
83 ++vsctx->num_outputs;
84 }
85
insert_output_before(struct tgsi_transform_context * ctx,struct tgsi_full_declaration * before,unsigned name,unsigned index,unsigned interp)86 static void insert_output_before(struct tgsi_transform_context *ctx,
87 struct tgsi_full_declaration *before,
88 unsigned name, unsigned index, unsigned interp)
89 {
90 struct vs_transform_context *vsctx = (struct vs_transform_context *)ctx;
91 unsigned i;
92
93 /* Make a place for the new output. */
94 for (i = before->Range.First; i < ARRAY_SIZE(vsctx->out_remap); i++) {
95 ++vsctx->out_remap[i];
96 }
97
98 /* Insert the new output. */
99 emit_output(ctx, name, index, interp,
100 before->Range.First + vsctx->decl_shift);
101
102 ++vsctx->decl_shift;
103 }
104
insert_output_after(struct tgsi_transform_context * ctx,struct tgsi_full_declaration * after,unsigned name,unsigned index,unsigned interp)105 static void insert_output_after(struct tgsi_transform_context *ctx,
106 struct tgsi_full_declaration *after,
107 unsigned name, unsigned index, unsigned interp)
108 {
109 struct vs_transform_context *vsctx = (struct vs_transform_context *)ctx;
110 unsigned i;
111
112 /* Make a place for the new output. */
113 for (i = after->Range.First+1; i < ARRAY_SIZE(vsctx->out_remap); i++) {
114 ++vsctx->out_remap[i];
115 }
116
117 /* Insert the new output. */
118 emit_output(ctx, name, index, interp,
119 after->Range.First + 1);
120
121 ++vsctx->decl_shift;
122 }
123
transform_decl(struct tgsi_transform_context * ctx,struct tgsi_full_declaration * decl)124 static void transform_decl(struct tgsi_transform_context *ctx,
125 struct tgsi_full_declaration *decl)
126 {
127 struct vs_transform_context *vsctx = (struct vs_transform_context *)ctx;
128 unsigned i;
129
130 if (decl->Declaration.File == TGSI_FILE_OUTPUT) {
131 switch (decl->Semantic.Name) {
132 case TGSI_SEMANTIC_POSITION:
133 vsctx->pos_output = decl->Range.First;
134 break;
135
136 case TGSI_SEMANTIC_COLOR:
137 assert(decl->Semantic.Index < 2);
138
139 /* We must rasterize the first color if the second one is
140 * used, otherwise the rasterizer doesn't do the color
141 * selection correctly. Declare it, but don't write to it. */
142 if (decl->Semantic.Index == 1 && !vsctx->color_used[0]) {
143 insert_output_before(ctx, decl, TGSI_SEMANTIC_COLOR, 0,
144 TGSI_INTERPOLATE_LINEAR);
145 vsctx->color_used[0] = true;
146 }
147 break;
148
149 case TGSI_SEMANTIC_BCOLOR:
150 assert(decl->Semantic.Index < 2);
151
152 /* We must rasterize all 4 colors if back-face colors are
153 * used, otherwise the rasterizer doesn't do the color
154 * selection correctly. Declare it, but don't write to it. */
155 if (!vsctx->color_used[0]) {
156 insert_output_before(ctx, decl, TGSI_SEMANTIC_COLOR, 0,
157 TGSI_INTERPOLATE_LINEAR);
158 vsctx->color_used[0] = true;
159 }
160 if (!vsctx->color_used[1]) {
161 insert_output_before(ctx, decl, TGSI_SEMANTIC_COLOR, 1,
162 TGSI_INTERPOLATE_LINEAR);
163 vsctx->color_used[1] = true;
164 }
165 if (decl->Semantic.Index == 1 && !vsctx->bcolor_used[0]) {
166 insert_output_before(ctx, decl, TGSI_SEMANTIC_BCOLOR, 0,
167 TGSI_INTERPOLATE_LINEAR);
168 vsctx->bcolor_used[0] = true;
169 }
170 break;
171
172 case TGSI_SEMANTIC_GENERIC:
173 vsctx->last_generic = MAX2(vsctx->last_generic, decl->Semantic.Index);
174 break;
175 }
176
177 /* Since we're inserting new outputs in between, the following outputs
178 * should be moved to the right so that they don't overlap with
179 * the newly added ones. */
180 decl->Range.First += vsctx->decl_shift;
181 decl->Range.Last += vsctx->decl_shift;
182
183 ++vsctx->num_outputs;
184 } else if (decl->Declaration.File == TGSI_FILE_TEMPORARY) {
185 for (i = decl->Range.First; i <= decl->Range.Last; i++) {
186 vsctx->temp_used[i] = true;
187 }
188 }
189
190 ctx->emit_declaration(ctx, decl);
191
192 /* Insert BCOLOR1 if needed. */
193 if (decl->Declaration.File == TGSI_FILE_OUTPUT &&
194 decl->Semantic.Name == TGSI_SEMANTIC_BCOLOR &&
195 !vsctx->bcolor_used[1]) {
196 insert_output_after(ctx, decl, TGSI_SEMANTIC_BCOLOR, 1,
197 TGSI_INTERPOLATE_LINEAR);
198 }
199 }
200
transform_inst(struct tgsi_transform_context * ctx,struct tgsi_full_instruction * inst)201 static void transform_inst(struct tgsi_transform_context *ctx,
202 struct tgsi_full_instruction *inst)
203 {
204 struct vs_transform_context *vsctx = (struct vs_transform_context *) ctx;
205 struct tgsi_full_instruction new_inst;
206 unsigned i;
207
208 if (!vsctx->first_instruction) {
209 vsctx->first_instruction = true;
210
211 /* Insert the generic output for WPOS. */
212 emit_output(ctx, TGSI_SEMANTIC_GENERIC, vsctx->last_generic + 1,
213 TGSI_INTERPOLATE_PERSPECTIVE, vsctx->num_outputs);
214
215 /* Find a free temp for POSITION. */
216 for (i = 0; i < ARRAY_SIZE(vsctx->temp_used); i++) {
217 if (!vsctx->temp_used[i]) {
218 emit_temp(ctx, i);
219 vsctx->pos_temp = i;
220 break;
221 }
222 }
223 }
224
225 if (inst->Instruction.Opcode == TGSI_OPCODE_END) {
226 /* MOV OUT[pos_output], TEMP[pos_temp]; */
227 new_inst = tgsi_default_full_instruction();
228 new_inst.Instruction.Opcode = TGSI_OPCODE_MOV;
229 new_inst.Instruction.NumDstRegs = 1;
230 new_inst.Dst[0].Register.File = TGSI_FILE_OUTPUT;
231 new_inst.Dst[0].Register.Index = vsctx->pos_output;
232 new_inst.Dst[0].Register.WriteMask = TGSI_WRITEMASK_XYZW;
233 new_inst.Instruction.NumSrcRegs = 1;
234 new_inst.Src[0].Register.File = TGSI_FILE_TEMPORARY;
235 new_inst.Src[0].Register.Index = vsctx->pos_temp;
236 ctx->emit_instruction(ctx, &new_inst);
237
238 /* MOV OUT[n-1], TEMP[pos_temp]; */
239 new_inst = tgsi_default_full_instruction();
240 new_inst.Instruction.Opcode = TGSI_OPCODE_MOV;
241 new_inst.Instruction.NumDstRegs = 1;
242 new_inst.Dst[0].Register.File = TGSI_FILE_OUTPUT;
243 new_inst.Dst[0].Register.Index = vsctx->num_outputs - 1;
244 new_inst.Dst[0].Register.WriteMask = TGSI_WRITEMASK_XYZW;
245 new_inst.Instruction.NumSrcRegs = 1;
246 new_inst.Src[0].Register.File = TGSI_FILE_TEMPORARY;
247 new_inst.Src[0].Register.Index = vsctx->pos_temp;
248 ctx->emit_instruction(ctx, &new_inst);
249
250 vsctx->end_instruction = true;
251 } else {
252 /* Not an END instruction. */
253 /* Fix writes to outputs. */
254 for (i = 0; i < inst->Instruction.NumDstRegs; i++) {
255 struct tgsi_full_dst_register *dst = &inst->Dst[i];
256 if (dst->Register.File == TGSI_FILE_OUTPUT) {
257 if (dst->Register.Index == vsctx->pos_output) {
258 /* Replace writes to OUT[pos_output] with TEMP[pos_temp]. */
259 dst->Register.File = TGSI_FILE_TEMPORARY;
260 dst->Register.Index = vsctx->pos_temp;
261 } else {
262 /* Not a position, good...
263 * Since we were changing the indices of output decls,
264 * we must redirect writes into them too. */
265 dst->Register.Index = vsctx->out_remap[dst->Register.Index];
266 }
267 }
268 }
269
270 /* Inserting 2 instructions before the END opcode moves all following
271 * labels by 2. Subroutines are always after the END opcode so
272 * they're always moved. */
273 if (inst->Instruction.Opcode == TGSI_OPCODE_CAL) {
274 inst->Label.Label += 2;
275 }
276 /* The labels of the following opcodes are moved only after
277 * the END opcode. */
278 if (vsctx->end_instruction &&
279 (inst->Instruction.Opcode == TGSI_OPCODE_IF ||
280 inst->Instruction.Opcode == TGSI_OPCODE_ELSE ||
281 inst->Instruction.Opcode == TGSI_OPCODE_BGNLOOP ||
282 inst->Instruction.Opcode == TGSI_OPCODE_ENDLOOP)) {
283 inst->Label.Label += 2;
284 }
285 }
286
287 ctx->emit_instruction(ctx, inst);
288 }
289
r300_draw_init_vertex_shader(struct r300_context * r300,struct r300_vertex_shader * vs)290 void r300_draw_init_vertex_shader(struct r300_context *r300,
291 struct r300_vertex_shader *vs)
292 {
293 struct draw_context *draw = r300->draw;
294 struct tgsi_shader_info info;
295 struct vs_transform_context transform;
296 const uint newLen = tgsi_num_tokens(vs->state.tokens) + 100;
297 struct pipe_shader_state new_vs = {
298 .type = PIPE_SHADER_IR_TGSI,
299 .tokens = tgsi_alloc_tokens(newLen)
300 };
301 unsigned i;
302
303 tgsi_scan_shader(vs->state.tokens, &info);
304
305 memset(&transform, 0, sizeof(transform));
306 for (i = 0; i < ARRAY_SIZE(transform.out_remap); i++) {
307 transform.out_remap[i] = i;
308 }
309 transform.last_generic = -1;
310 transform.base.transform_instruction = transform_inst;
311 transform.base.transform_declaration = transform_decl;
312
313 for (i = 0; i < info.num_outputs; i++) {
314 unsigned index = info.output_semantic_index[i];
315
316 switch (info.output_semantic_name[i]) {
317 case TGSI_SEMANTIC_COLOR:
318 assert(index < 2);
319 transform.color_used[index] = true;
320 break;
321
322 case TGSI_SEMANTIC_BCOLOR:
323 assert(index < 2);
324 transform.bcolor_used[index] = true;
325 break;
326 }
327 }
328
329 new_vs.tokens = tgsi_transform_shader(vs->state.tokens, newLen, &transform.base);
330 if (!new_vs.tokens)
331 return;
332
333 #if 0
334 printf("----------------------------------------------\norig shader:\n");
335 tgsi_dump(vs->state.tokens, 0);
336 printf("----------------------------------------------\nnew shader:\n");
337 tgsi_dump(new_vs.tokens, 0);
338 printf("----------------------------------------------\n");
339 #endif
340
341 /* Free old tokens. */
342 FREE((void*)vs->state.tokens);
343
344 vs->draw_vs = draw_create_vertex_shader(draw, &new_vs);
345
346 /* Instead of duplicating and freeing the tokens, copy the pointer directly. */
347 vs->state.tokens = new_vs.tokens;
348
349 /* Init the VS output table for the rasterizer. */
350 r300_init_vs_outputs(r300, vs);
351
352 /* Make the last generic be WPOS. */
353 vs->shader->outputs.wpos = vs->shader->outputs.generic[transform.last_generic + 1];
354 vs->shader->outputs.generic[transform.last_generic + 1] = ATTR_UNUSED;
355 }
356