xref: /aosp_15_r20/external/mesa3d/src/gallium/auxiliary/tgsi/tgsi_dynamic_indexing.c (revision 6104692788411f58d303aa86923a9ff6ecaded22)
1 /*
2  * Copyright 2018 VMware, Inc.
3  * All Rights Reserved.
4  *
5  * Permission is hereby granted, free of charge, to any person obtaining a
6  * copy of this software and associated documentation files (the
7  * "Software"), to deal in the Software without restriction, including
8  * without limitation the rights to use, copy, modify, merge, publish,
9  * distribute, sub license, and/or sell copies of the Software, and to
10  * permit persons to whom the Software is furnished to do so, subject to
11  * the following conditions:
12  *
13  * The above copyright notice and this permission notice (including the
14  * next paragraph) shall be included in all copies or substantial portions
15  * of the Software.
16  *
17  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
18  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
19  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
20  * IN NO EVENT SHALL THE AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR
21  * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
22  * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
23  * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
24  */
25 
26 
27 /**
28  * This utility transforms the shader to support dynamic array indexing
29  * for samplers and constant buffers.
30  * It calculates dynamic array index first and then compare it with each
31  * index and operation will be performed with matching index
32  */
33 
34 #include "util/u_debug.h"
35 #include "util/u_math.h"
36 #include "tgsi_info.h"
37 #include "tgsi_dynamic_indexing.h"
38 #include "tgsi_transform.h"
39 #include "tgsi_dump.h"
40 #include "pipe/p_state.h"
41 
42 
43 struct dIndexing_transform_context
44 {
45    struct tgsi_transform_context base;
46    unsigned orig_num_tmp;
47    unsigned orig_num_imm;
48    unsigned num_const_bufs;
49    unsigned num_samplers;
50    unsigned num_iterations;
51    unsigned const_buf_range[PIPE_MAX_CONSTANT_BUFFERS];
52 };
53 
54 
55 static inline struct dIndexing_transform_context *
dIndexing_transform_context(struct tgsi_transform_context * ctx)56 dIndexing_transform_context(struct tgsi_transform_context *ctx)
57 {
58    return (struct dIndexing_transform_context *) ctx;
59 }
60 
61 
62 /**
63  * TGSI declaration transform callback.
64  */
65 static void
dIndexing_decl(struct tgsi_transform_context * ctx,struct tgsi_full_declaration * decl)66 dIndexing_decl(struct tgsi_transform_context *ctx,
67                struct tgsi_full_declaration *decl)
68 {
69    struct dIndexing_transform_context *dc = dIndexing_transform_context(ctx);
70 
71    if (decl->Declaration.File == TGSI_FILE_TEMPORARY) {
72       /**
73        * Emit some extra temporary register to use in keeping track of
74        * dynamic index.
75        */
76       dc->orig_num_tmp = decl->Range.Last;
77       decl->Range.Last = decl->Range.Last + 3;
78    }
79    else if (decl->Declaration.File == TGSI_FILE_CONSTANT) {
80       /* Keep track of number of constants in each buffer */
81       dc->const_buf_range[decl->Dim.Index2D] = decl->Range.Last;
82    }
83    ctx->emit_declaration(ctx, decl);
84 }
85 
86 
87 /**
88  * TGSI transform prolog callback.
89  */
90 static void
dIndexing_prolog(struct tgsi_transform_context * ctx)91 dIndexing_prolog(struct tgsi_transform_context *ctx)
92 {
93    tgsi_transform_immediate_int_decl(ctx, 0, 1, 2, 3);
94    tgsi_transform_immediate_int_decl(ctx, 4, 5, 6, 7);
95 }
96 
97 
98 /**
99  * This function emits some extra instruction to remove dynamic array
100  * indexing of constant buffers / samplers from the shader.
101  * It calculates dynamic array index first and compare it with each index for
102  * declared constants/samplers.
103  */
104 static void
remove_dynamic_indexes(struct tgsi_transform_context * ctx,struct tgsi_full_instruction * orig_inst,const struct tgsi_full_src_register * reg)105 remove_dynamic_indexes(struct tgsi_transform_context *ctx,
106                        struct tgsi_full_instruction *orig_inst,
107                        const struct tgsi_full_src_register *reg)
108 {
109    struct dIndexing_transform_context *dc = dIndexing_transform_context(ctx);
110    int i, j;
111    int tmp_loopIdx = dc->orig_num_tmp + 1;
112    int tmp_cond = dc->orig_num_tmp + 2;
113    int tmp_arrayIdx = dc->orig_num_tmp + 3;
114    int imm_index = dc->orig_num_imm;
115    struct tgsi_full_instruction inst;
116    unsigned INVALID_INDEX = 99999;
117    enum tgsi_file_type file = TGSI_FILE_NULL;
118    unsigned index = INVALID_INDEX;
119    unsigned imm_swz_index = INVALID_INDEX;
120 
121    /* calculate dynamic array index store it in tmp_arrayIdx.x */
122    inst = tgsi_default_full_instruction();
123    inst.Instruction.Opcode = TGSI_OPCODE_UADD;
124    inst.Instruction.NumDstRegs = 1;
125    tgsi_transform_dst_reg(&inst.Dst[0], TGSI_FILE_TEMPORARY,
126                           tmp_arrayIdx, TGSI_WRITEMASK_X);
127    inst.Instruction.NumSrcRegs = 2;
128    if (reg->Register.File == TGSI_FILE_CONSTANT) {
129       file = reg->DimIndirect.File;
130       index = reg->DimIndirect.Index;
131       imm_swz_index = reg->Dimension.Index;
132    }
133    else if (reg->Register.File == TGSI_FILE_SAMPLER) {
134       file = reg->Indirect.File;
135       index = reg->Indirect.Index;
136       imm_swz_index = reg->Register.Index;
137    }
138    tgsi_transform_src_reg(&inst.Src[0], file,
139                           index, TGSI_SWIZZLE_X,
140                           TGSI_SWIZZLE_X, TGSI_SWIZZLE_X, TGSI_SWIZZLE_X);
141    tgsi_transform_src_reg(&inst.Src[1], TGSI_FILE_IMMEDIATE,
142                           imm_index + (imm_swz_index / 4),
143                           imm_swz_index % 4,
144                           imm_swz_index % 4,
145                           imm_swz_index % 4,
146                           imm_swz_index % 4);
147    ctx->emit_instruction(ctx, &inst);
148 
149    /* initialize counter to zero: tmp_loopIdx = 0 */
150    inst = tgsi_default_full_instruction();
151    inst.Instruction.Opcode = TGSI_OPCODE_MOV;
152    inst.Instruction.NumDstRegs = 1;
153    tgsi_transform_dst_reg(&inst.Dst[0], TGSI_FILE_TEMPORARY,
154                           tmp_loopIdx, TGSI_WRITEMASK_X);
155    inst.Instruction.NumSrcRegs = 1;
156    tgsi_transform_src_reg(&inst.Src[0], TGSI_FILE_IMMEDIATE,
157                           imm_index, TGSI_SWIZZLE_X,
158                           TGSI_SWIZZLE_X, TGSI_SWIZZLE_X,
159                           TGSI_SWIZZLE_X);
160    ctx->emit_instruction(ctx, &inst);
161 
162    for (i = 0; i < dc->num_iterations; i++) {
163       bool out_of_bound_index = false;
164       /**
165        * Make sure we are not exceeding index limit of constant buffer
166        *
167        * For example, In declaration, We have
168        *
169        * DCL CONST[0][0..1]
170        * DCL CONST[1][0..2]
171        * DCL CONST[2][0]
172        *
173        * and our dynamic index instruction is
174        * MOV TEMP[0], CONST[ADDR[0].x][1]
175        *
176        * We have to make sure to skip unrolling for CONST[2] because
177        * it has only one constant in the buffer
178        */
179       if ((reg->Register.File == TGSI_FILE_CONSTANT) &&
180           (!reg->Register.Indirect &&
181            (reg->Register.Index > dc->const_buf_range[i]))) {
182          out_of_bound_index = true;
183       }
184 
185       if (!out_of_bound_index) {
186          /**
187           * If we have an instruction of the format:
188           * OPCODE dst, src..., CONST[K][foo], src...
189           * where K is dynamic and tmp_loopIdx = i (loopcount),
190           * replace it with:
191           *
192           * if (K == tmp_loopIdx)
193           *    OPCODE dst, src... where src is CONST[i][foo] and i is constant
194           * }
195           *
196           * Similarly, If instruction uses dynamic array index for samplers
197           * e.g. OPCODE dst, src, SAMPL[k] ..
198           * replace it with:
199           * if (K == tmp_loopIdx)
200           *    OPCODE dst, src, SAMPL[i][foo]... where i is constant.
201           * }
202           */
203          inst = tgsi_default_full_instruction();
204          inst.Instruction.Opcode = TGSI_OPCODE_USEQ;
205          inst.Instruction.NumDstRegs = 1;
206          tgsi_transform_dst_reg(&inst.Dst[0], TGSI_FILE_TEMPORARY,
207                                 tmp_cond, TGSI_WRITEMASK_X);
208          inst.Instruction.NumSrcRegs = 2;
209          tgsi_transform_src_reg(&inst.Src[0], TGSI_FILE_TEMPORARY,
210                                 tmp_arrayIdx, TGSI_SWIZZLE_X,
211                                 TGSI_SWIZZLE_X, TGSI_SWIZZLE_X,
212                                 TGSI_SWIZZLE_X);
213          tgsi_transform_src_reg(&inst.Src[1], TGSI_FILE_TEMPORARY,
214                                 tmp_loopIdx, TGSI_SWIZZLE_X,
215                                 TGSI_SWIZZLE_X, TGSI_SWIZZLE_X,
216                                 TGSI_SWIZZLE_X);
217          ctx->emit_instruction(ctx, &inst);
218 
219          inst = tgsi_default_full_instruction();
220          inst.Instruction.Opcode = TGSI_OPCODE_UIF;
221          inst.Instruction.NumDstRegs = 0;
222          inst.Instruction.NumSrcRegs = 1;
223          tgsi_transform_src_reg(&inst.Src[0], TGSI_FILE_TEMPORARY,
224                                 tmp_cond, TGSI_SWIZZLE_X,
225                                 TGSI_SWIZZLE_X, TGSI_SWIZZLE_X,
226                                 TGSI_SWIZZLE_X);
227          ctx->emit_instruction(ctx, &inst);
228 
229          /* emit instruction with new, non-dynamic source registers */
230          inst = *orig_inst;
231          for (j = 0; j < inst.Instruction.NumSrcRegs; j++) {
232             if (inst.Src[j].Dimension.Indirect &&
233                 inst.Src[j].Register.File == TGSI_FILE_CONSTANT) {
234                inst.Src[j].Register.Dimension = 1;
235                inst.Src[j].Dimension.Index = i;
236                inst.Src[j].Dimension.Indirect = 0;
237             }
238             else if (inst.Src[j].Register.Indirect &&
239                      inst.Src[j].Register.File == TGSI_FILE_SAMPLER) {
240                inst.Src[j].Register.Indirect = 0;
241                inst.Src[j].Register.Index = i;
242             }
243          }
244          ctx->emit_instruction(ctx, &inst);
245 
246          inst = tgsi_default_full_instruction();
247          inst.Instruction.Opcode = TGSI_OPCODE_ENDIF;
248          inst.Instruction.NumDstRegs = 0;
249          inst.Instruction.NumSrcRegs = 0;
250          ctx->emit_instruction(ctx, &inst);
251       }
252 
253       /**
254        * Increment counter
255        * UADD tmp_loopIdx.x tmp_loopIdx.x imm(1)
256        */
257       inst = tgsi_default_full_instruction();
258       inst.Instruction.Opcode = TGSI_OPCODE_UADD;
259       inst.Instruction.NumDstRegs = 1;
260       tgsi_transform_dst_reg(&inst.Dst[0], TGSI_FILE_TEMPORARY,
261                              tmp_loopIdx, TGSI_WRITEMASK_X);
262       inst.Instruction.NumSrcRegs = 2;
263       tgsi_transform_src_reg(&inst.Src[0], TGSI_FILE_TEMPORARY,
264                               tmp_loopIdx, TGSI_SWIZZLE_X,
265                               TGSI_SWIZZLE_X, TGSI_SWIZZLE_X, TGSI_SWIZZLE_X);
266       tgsi_transform_src_reg(&inst.Src[1], TGSI_FILE_IMMEDIATE, imm_index,
267                              TGSI_SWIZZLE_Y, TGSI_SWIZZLE_Y,
268                              TGSI_SWIZZLE_Y, TGSI_SWIZZLE_Y);
269 
270       ctx->emit_instruction(ctx, &inst);
271    }
272 }
273 
274 
275 /**
276  * TGSI instruction transform callback.
277  */
278 static void
dIndexing_inst(struct tgsi_transform_context * ctx,struct tgsi_full_instruction * inst)279 dIndexing_inst(struct tgsi_transform_context *ctx,
280                struct tgsi_full_instruction *inst)
281 {
282    int i;
283    bool indexing = false;
284    struct dIndexing_transform_context *dc = dIndexing_transform_context(ctx);
285 
286    for (i = 0; i < inst->Instruction.NumSrcRegs; i++) {
287       struct tgsi_full_src_register *src;
288       src = &inst->Src[i];
289       /* check if constant buffer/sampler is using dynamic index */
290       if ((src->Dimension.Indirect &&
291            src->Register.File == TGSI_FILE_CONSTANT) ||
292           (src->Register.Indirect &&
293            src->Register.File == TGSI_FILE_SAMPLER)) {
294 
295          if (indexing)
296             assert("More than one src has dynamic indexing");
297 
298          if (src->Register.File == TGSI_FILE_CONSTANT)
299             dc->num_iterations = dc->num_const_bufs;
300          else
301             dc->num_iterations = dc->num_samplers;
302 
303          remove_dynamic_indexes(ctx, inst, src);
304          indexing = true;
305       }
306    }
307 
308    if (!indexing) {
309       ctx->emit_instruction(ctx, inst);
310    }
311 }
312 
313 /**
314  * TGSI utility to remove dynamic array indexing for constant buffers and
315  * samplers.
316  *
317  * This utility accepts bitmask of declared constant buffers and samplers,
318  * number of immediates used in shader.
319  *
320  * If dynamic array index is used for constant buffers and samplers, this
321  * utility removes those dynamic indexes from shader. It also makes sure
322  * that it has same output as per original shader.
323  * This is achieved by calculating dynamic array index first and then compare
324  * it with each constant buffer/ sampler index and replace that dynamic index
325  * with static index.
326  */
327 struct tgsi_token *
tgsi_remove_dynamic_indexing(const struct tgsi_token * tokens_in,unsigned const_buffers_declared_bitmask,unsigned samplers_declared_bitmask,unsigned imm_count)328 tgsi_remove_dynamic_indexing(const struct tgsi_token *tokens_in,
329                              unsigned const_buffers_declared_bitmask,
330                              unsigned samplers_declared_bitmask,
331                              unsigned imm_count)
332 {
333    struct dIndexing_transform_context transform;
334    const unsigned num_new_tokens = 1000; /* should be enough */
335    const unsigned new_len = tgsi_num_tokens(tokens_in) + num_new_tokens;
336 
337    /* setup transformation context */
338    memset(&transform, 0, sizeof(transform));
339    transform.base.transform_declaration = dIndexing_decl;
340    transform.base.transform_instruction = dIndexing_inst;
341    transform.base.prolog = dIndexing_prolog;
342 
343    transform.orig_num_tmp = 0;
344    transform.orig_num_imm = imm_count;
345    /* get count of declared const buffers and sampler from their bitmasks*/
346    transform.num_const_bufs = log2(const_buffers_declared_bitmask + 1);
347    transform.num_samplers = log2(samplers_declared_bitmask + 1);
348    transform.num_iterations = 0;
349 
350    return tgsi_transform_shader(tokens_in, new_len, &transform.base);
351 }
352 
353 
354