xref: /aosp_15_r20/external/mesa3d/src/gallium/drivers/llvmpipe/lp_state_fs_analysis.c (revision 6104692788411f58d303aa86923a9ff6ecaded22)
1 /**************************************************************************
2  *
3  * Copyright 2010-2021 VMware, Inc.
4  * All Rights Reserved.
5  *
6  * Permission is hereby granted, free of charge, to any person obtaining a
7  * copy of this software and associated documentation files (the
8  * "Software"), to deal in the Software without restriction, including
9  * without limitation the rights to use, copy, modify, merge, publish,
10  * distribute, sub license, and/or sell copies of the Software, and to
11  * permit persons to whom the Software is furnished to do so, subject to
12  * the following conditions:
13  *
14  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16  * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
17  * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM,
18  * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
19  * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
20  * USE OR OTHER DEALINGS IN THE SOFTWARE.
21  *
22  * The above copyright notice and this permission notice (including the
23  * next paragraph) shall be included in all copies or substantial portions
24  * of the Software.
25  *
26  **************************************************************************/
27 
28 
29 #include "util/u_memory.h"
30 #include "util/u_math.h"
31 #include "lp_debug.h"
32 #include "lp_state.h"
33 #include "nir.h"
34 
35 /*
36  * Check if the given nir_src comes directly from a FS input.
37  */
38 static bool
is_fs_input(const nir_src * src)39 is_fs_input(const nir_src *src)
40 {
41    const nir_instr *parent = src->ssa[0].parent_instr;
42    if (!parent) {
43       return false;
44    }
45 
46    if (parent->type == nir_instr_type_alu) {
47       const nir_alu_instr *alu = nir_instr_as_alu(parent);
48       if (alu->op == nir_op_vec2 ||
49           alu->op == nir_op_vec3 ||
50           alu->op == nir_op_vec4) {
51          /* Check if any of the components come from an FS input */
52          unsigned num_src = nir_op_infos[alu->op].num_inputs;
53          for (unsigned i = 0; i < num_src; i++) {
54             if (is_fs_input(&alu->src[i].src)) {
55                return true;
56             }
57          }
58       }
59    } else if (parent->type == nir_instr_type_intrinsic) {
60       const nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(parent);
61       /* loading from an FS input? */
62       if (intrin->intrinsic == nir_intrinsic_load_deref) {
63          if (is_fs_input(&intrin->src[0])) {
64             return true;
65          }
66       }
67    } else if (parent->type == nir_instr_type_deref) {
68       const nir_deref_instr *deref = nir_instr_as_deref(parent);
69       /* deref'ing an FS input? */
70       if (deref &&
71           deref->deref_type == nir_deref_type_var &&
72           deref->modes == nir_var_shader_in) {
73          return true;
74       }
75    }
76 
77    return false;
78 }
79 
80 
81 /*
82  * Determine whether the given alu src comes directly from an input
83  * register.  If so, return true and the input register index and
84  * component.  Return false otherwise.
85  */
86 static bool
get_nir_input_info(const nir_alu_src * src,unsigned * input_index,int * input_component)87 get_nir_input_info(const nir_alu_src *src,
88                    unsigned *input_index,
89                    int *input_component)
90 {
91    // The parent instr should be a nir_intrinsic_load_deref.
92    const nir_instr *parent = src->src.ssa[0].parent_instr;
93    if (!parent || parent->type != nir_instr_type_intrinsic) {
94       return false;
95    }
96    nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(parent);
97    if (!intrin ||
98        intrin->intrinsic != nir_intrinsic_load_deref) {
99       return false;
100    }
101 
102    // The parent of the load should be a type_deref.
103    parent = intrin->src->ssa->parent_instr;
104    if (!parent || parent->type != nir_instr_type_deref) {
105       return false;
106    }
107 
108    // The var being deref'd should be a shader input register.
109    nir_deref_instr *deref = nir_instr_as_deref(parent);
110    if (!deref || deref->deref_type != nir_deref_type_var ||
111        deref->modes != nir_var_shader_in) {
112       return false;
113    }
114 
115    /*
116     * If the texture coordinate input is declared as two variables like this:
117     * decl_var shader_in INTERP_MODE_NONE float coord (VARYING_SLOT_VAR0.x, 0, 0)
118     * decl_var shader_in INTERP_MODE_NONE float coord@0 (VARYING_SLOT_VAR0.y, 0, 0)
119     * Then deref->var->data.location_frac will be 0 for the first var and 1
120     * for the second var and the texcoord will be set up with:
121     *   vec2 32 ssa_5 = vec2 ssa_2, ssa_4  (note: no swizzles)
122     *
123     * Alternately, if the texture coordinate input is declared as one
124     * variable like this:
125     * decl_var shader_in INTERP_MODE_NONE vec4 i1xyzw (VARYING_SLOT_VAR1.xyzw, 0, 0)
126     * then deref->var->data.location_frac will be 0 and the
127     * tex coord will be setup with:
128     *   vec2 32 ssa_2 = vec2 ssa_1.x, ssa_1.y
129     *
130     * We can handle both cases by adding deref->var->data.location_frac and
131     * src->swizzle[0].
132     */
133    *input_index = deref->var->data.driver_location;
134    *input_component = deref->var->data.location_frac + src->swizzle[0];
135    assert(*input_component >= 0);
136    assert(*input_component <= 3);
137 
138    return true;
139 }
140 
141 
142 /*
143  * Examine the texcoord argument to a texture instruction to determine
144  * if the texcoord comes directly from a fragment shader input.  If so
145  * return true and return the FS input register index for the coordinate
146  * and the (2-component) swizzle.  Return false otherwise.
147  */
148 static bool
get_texcoord_provenance(const nir_tex_src * texcoord,unsigned * coord_fs_input_index,int swizzle[4])149 get_texcoord_provenance(const nir_tex_src *texcoord,
150                         unsigned *coord_fs_input_index, // out
151                         int swizzle[4]) // out
152 {
153    assert(texcoord->src_type == nir_tex_src_coord);
154 
155    // The parent instr of the coord should be an nir_op_vec2 alu op
156    const nir_instr *parent = texcoord->src.ssa->parent_instr;
157    if (!parent || parent->type != nir_instr_type_alu) {
158       return false;
159    }
160    const nir_alu_instr *alu = nir_instr_as_alu(parent);
161    if (!alu || alu->op != nir_op_vec2) {
162       return false;
163    }
164 
165    // Loop over nir_op_vec2 instruction arguments to find the
166    // input register index and component.
167    unsigned input_reg_indexes[2];
168    for (unsigned comp = 0; comp < 2; comp++) {
169       if (!get_nir_input_info(&alu->src[comp],
170                               &input_reg_indexes[comp], &swizzle[comp])) {
171          return false;
172       }
173    }
174 
175    // Both texcoord components should come from the same input register.
176    if (input_reg_indexes[0] != input_reg_indexes[1]) {
177       return false;
178    }
179 
180    *coord_fs_input_index = input_reg_indexes[0];
181 
182    return true;
183 }
184 
185 
186 /*
187  * Check if all the values of a nir_load_const_instr are 32-bit
188  * floats in the range [0,1].  If so, return true, else return false.
189  */
190 static bool
check_load_const_in_zero_one(const nir_load_const_instr * load)191 check_load_const_in_zero_one(const nir_load_const_instr *load)
192 {
193    if (load->def.bit_size != 32)
194       return false;
195    for (unsigned c = 0; c < load->def.num_components; c++) {
196       float val = load->value[c].f32;
197       if (val < 0.0 || val > 1.0 || isnan(val)) {
198          return false;
199       }
200    }
201    return true;
202 }
203 
204 
205 /*
206  * Examine the NIR shader to determine if it's "linear".
207  * For the linear path, we're optimizing the case of rendering a window-
208  * aligned, textured quad.  Basically, FS must get the output color from
209  * a texture lookup and, possibly, a constant color.  If the color comes
210  * from some other sort of computation or from a VS output (FS input), we
211  * can't use the linear path.
212  */
213 static bool
llvmpipe_nir_fn_is_linear_compat(const struct nir_shader * shader,nir_function_impl * impl,struct lp_tgsi_info * info)214 llvmpipe_nir_fn_is_linear_compat(const struct nir_shader *shader,
215                                  nir_function_impl *impl,
216                                  struct lp_tgsi_info *info)
217 {
218    nir_foreach_block(block, impl) {
219       nir_foreach_instr_safe(instr, block) {
220          switch (instr->type) {
221          case nir_instr_type_deref: {
222             nir_deref_instr *deref = nir_instr_as_deref(instr);
223             if (deref->deref_type != nir_deref_type_var)
224                return false;
225             if (deref->var->data.mode == nir_var_shader_out &&
226                 deref->var->data.location_frac != 0)
227                return false;
228             break;
229          }
230          case nir_instr_type_load_const: {
231             nir_load_const_instr *load = nir_instr_as_load_const(instr);
232             if (!check_load_const_in_zero_one(load)) {
233                return false;
234             }
235             break;
236          }
237          case nir_instr_type_intrinsic: {
238             nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
239             if (intrin->intrinsic != nir_intrinsic_load_deref &&
240                 intrin->intrinsic != nir_intrinsic_store_deref &&
241                 intrin->intrinsic != nir_intrinsic_load_ubo)
242                return false;
243 
244             if (intrin->intrinsic == nir_intrinsic_load_ubo) {
245                if (!nir_src_is_const(intrin->src[0]))
246                   return false;
247                nir_load_const_instr *load =
248                   nir_instr_as_load_const(intrin->src[0].ssa->parent_instr);
249                if (load->value[0].u32 != 0 || load->def.num_components > 1)
250                   return false;
251             } else if (intrin->intrinsic == nir_intrinsic_store_deref) {
252                /*
253                 * Assume the store destination is the FS output color.
254                 * Check if the store src comes directly from a FS input.
255                 * If so, we cannot use the linear path since we don't have
256                 * code to convert VS outputs / FS inputs to ubyte with the
257                 * needed swizzling.
258                 */
259                if (is_fs_input(&intrin->src[1])) {
260                   return false;
261                }
262             }
263             break;
264          }
265          case nir_instr_type_tex: {
266             nir_tex_instr *tex = nir_instr_as_tex(instr);
267             struct lp_tgsi_texture_info *tex_info = &info->tex[info->num_texs];
268             int texcoord_swizzle[4] = {-1, -1, -1, -1};
269             unsigned coord_fs_input_index = 0;
270 
271             for (unsigned i = 0; i < tex->num_srcs; i++) {
272                if (tex->src[i].src_type == nir_tex_src_coord) {
273                   if (!get_texcoord_provenance(&tex->src[i],
274                                                &coord_fs_input_index,
275                                                texcoord_swizzle)) {
276                      //debug nir_print_shader((nir_shader *) shader, stdout);
277                      return false;
278                   }
279                } else if (tex->src[i].src_type == nir_tex_src_texture_handle ||
280                           tex->src[i].src_type == nir_tex_src_sampler_handle) {
281                   return false;
282                }
283             }
284 
285             switch (tex->op) {
286             case nir_texop_tex:
287                tex_info->modifier = LP_BLD_TEX_MODIFIER_NONE;
288                break;
289             default:
290                /* inaccurate but sufficient. */
291                tex_info->modifier = LP_BLD_TEX_MODIFIER_EXPLICIT_LOD;
292                return false;
293             }
294             switch (tex->sampler_dim) {
295             case GLSL_SAMPLER_DIM_2D:
296                tex_info->target = TGSI_TEXTURE_2D;
297                break;
298             default:
299                /* inaccurate but sufficient. */
300                tex_info->target = TGSI_TEXTURE_1D;
301                return false;
302             }
303 
304             tex_info->sampler_unit = tex->sampler_index;
305             tex_info->texture_unit = tex->texture_index;
306 
307             /* this is enforced in the scanner previously. */
308             tex_info->coord[0].file = TGSI_FILE_INPUT;  // S
309             tex_info->coord[1].file = TGSI_FILE_INPUT;  // T
310             assert(texcoord_swizzle[0] >= 0);
311             assert(texcoord_swizzle[1] >= 0);
312             tex_info->coord[0].swizzle = texcoord_swizzle[0]; // S
313             tex_info->coord[1].swizzle = texcoord_swizzle[1]; // T
314             tex_info->coord[0].u.index = coord_fs_input_index;
315             tex_info->coord[1].u.index = coord_fs_input_index;
316 
317             info->num_texs++;
318             break;
319          }
320          case nir_instr_type_alu: {
321             const nir_alu_instr *alu = nir_instr_as_alu(instr);
322             switch (alu->op) {
323             case nir_op_mov:
324             case nir_op_vec2:
325             case nir_op_vec4:
326                // these instructions are OK
327                break;
328             case nir_op_fmul: {
329                unsigned num_src = nir_op_infos[alu->op].num_inputs;;
330                for (unsigned s = 0; s < num_src; s++) {
331                   /* If the MUL uses immediate values, the values must
332                    * be 32-bit floats in the range [0,1].
333                    */
334                   if (nir_src_is_const(alu->src[s].src)) {
335                      nir_load_const_instr *load =
336                         nir_instr_as_load_const(alu->src[s].src.ssa->parent_instr);
337                      if (!check_load_const_in_zero_one(load)) {
338                         return false;
339                      }
340                   } else if (is_fs_input(&alu->src[s].src)) {
341                      /* we don't know if the fs inputs are in [0,1] */
342                      return false;
343                   }
344                }
345                break;
346             }
347             default:
348                // disallowed instruction
349                return false;
350             }
351             break;
352          }
353          default:
354             return false;
355          }
356       }
357    }
358    return true;
359 }
360 
361 
362 static bool
llvmpipe_nir_is_linear_compat(struct nir_shader * shader,struct lp_tgsi_info * info)363 llvmpipe_nir_is_linear_compat(struct nir_shader *shader,
364                               struct lp_tgsi_info *info)
365 {
366    int num_tex = info->num_texs;
367 
368    if (util_bitcount64(shader->info.inputs_read) > LP_MAX_LINEAR_INPUTS)
369       return false;
370 
371    if (!shader->info.outputs_written || shader->info.fs.color_is_dual_source ||
372        (shader->info.outputs_written & ~BITFIELD64_BIT(FRAG_RESULT_DATA0)))
373       return false;
374 
375    info->num_texs = 0;
376    nir_foreach_function_impl(impl, shader) {
377       if (!llvmpipe_nir_fn_is_linear_compat(shader, impl, info))
378          return false;
379    }
380    info->num_texs = num_tex;
381    return true;
382 }
383 
384 
385 /*
386  * Analyze the given NIR fragment shader and set its shader->kind field
387  * to LP_FS_KIND_x.
388  */
389 void
llvmpipe_fs_analyse_nir(struct lp_fragment_shader * shader)390 llvmpipe_fs_analyse_nir(struct lp_fragment_shader *shader)
391 {
392    if (!shader->info.indirect_textures &&
393        !shader->info.sampler_texture_units_different &&
394        shader->info.num_texs <= LP_MAX_LINEAR_TEXTURES &&
395        llvmpipe_nir_is_linear_compat(shader->base.ir.nir, &shader->info)) {
396       shader->kind = LP_FS_KIND_LLVM_LINEAR;
397    } else {
398       shader->kind = LP_FS_KIND_GENERAL;
399    }
400 }
401 
402