1 /**************************************************************************
2 *
3 * Copyright 2010-2021 VMware, Inc.
4 * All Rights Reserved.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the
8 * "Software"), to deal in the Software without restriction, including
9 * without limitation the rights to use, copy, modify, merge, publish,
10 * distribute, sub license, and/or sell copies of the Software, and to
11 * permit persons to whom the Software is furnished to do so, subject to
12 * the following conditions:
13 *
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
17 * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM,
18 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
19 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
20 * USE OR OTHER DEALINGS IN THE SOFTWARE.
21 *
22 * The above copyright notice and this permission notice (including the
23 * next paragraph) shall be included in all copies or substantial portions
24 * of the Software.
25 *
26 **************************************************************************/
27
28
29 #include "util/u_memory.h"
30 #include "util/u_math.h"
31 #include "lp_debug.h"
32 #include "lp_state.h"
33 #include "nir.h"
34
35 /*
36 * Check if the given nir_src comes directly from a FS input.
37 */
38 static bool
is_fs_input(const nir_src * src)39 is_fs_input(const nir_src *src)
40 {
41 const nir_instr *parent = src->ssa[0].parent_instr;
42 if (!parent) {
43 return false;
44 }
45
46 if (parent->type == nir_instr_type_alu) {
47 const nir_alu_instr *alu = nir_instr_as_alu(parent);
48 if (alu->op == nir_op_vec2 ||
49 alu->op == nir_op_vec3 ||
50 alu->op == nir_op_vec4) {
51 /* Check if any of the components come from an FS input */
52 unsigned num_src = nir_op_infos[alu->op].num_inputs;
53 for (unsigned i = 0; i < num_src; i++) {
54 if (is_fs_input(&alu->src[i].src)) {
55 return true;
56 }
57 }
58 }
59 } else if (parent->type == nir_instr_type_intrinsic) {
60 const nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(parent);
61 /* loading from an FS input? */
62 if (intrin->intrinsic == nir_intrinsic_load_deref) {
63 if (is_fs_input(&intrin->src[0])) {
64 return true;
65 }
66 }
67 } else if (parent->type == nir_instr_type_deref) {
68 const nir_deref_instr *deref = nir_instr_as_deref(parent);
69 /* deref'ing an FS input? */
70 if (deref &&
71 deref->deref_type == nir_deref_type_var &&
72 deref->modes == nir_var_shader_in) {
73 return true;
74 }
75 }
76
77 return false;
78 }
79
80
81 /*
82 * Determine whether the given alu src comes directly from an input
83 * register. If so, return true and the input register index and
84 * component. Return false otherwise.
85 */
86 static bool
get_nir_input_info(const nir_alu_src * src,unsigned * input_index,int * input_component)87 get_nir_input_info(const nir_alu_src *src,
88 unsigned *input_index,
89 int *input_component)
90 {
91 // The parent instr should be a nir_intrinsic_load_deref.
92 const nir_instr *parent = src->src.ssa[0].parent_instr;
93 if (!parent || parent->type != nir_instr_type_intrinsic) {
94 return false;
95 }
96 nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(parent);
97 if (!intrin ||
98 intrin->intrinsic != nir_intrinsic_load_deref) {
99 return false;
100 }
101
102 // The parent of the load should be a type_deref.
103 parent = intrin->src->ssa->parent_instr;
104 if (!parent || parent->type != nir_instr_type_deref) {
105 return false;
106 }
107
108 // The var being deref'd should be a shader input register.
109 nir_deref_instr *deref = nir_instr_as_deref(parent);
110 if (!deref || deref->deref_type != nir_deref_type_var ||
111 deref->modes != nir_var_shader_in) {
112 return false;
113 }
114
115 /*
116 * If the texture coordinate input is declared as two variables like this:
117 * decl_var shader_in INTERP_MODE_NONE float coord (VARYING_SLOT_VAR0.x, 0, 0)
118 * decl_var shader_in INTERP_MODE_NONE float coord@0 (VARYING_SLOT_VAR0.y, 0, 0)
119 * Then deref->var->data.location_frac will be 0 for the first var and 1
120 * for the second var and the texcoord will be set up with:
121 * vec2 32 ssa_5 = vec2 ssa_2, ssa_4 (note: no swizzles)
122 *
123 * Alternately, if the texture coordinate input is declared as one
124 * variable like this:
125 * decl_var shader_in INTERP_MODE_NONE vec4 i1xyzw (VARYING_SLOT_VAR1.xyzw, 0, 0)
126 * then deref->var->data.location_frac will be 0 and the
127 * tex coord will be setup with:
128 * vec2 32 ssa_2 = vec2 ssa_1.x, ssa_1.y
129 *
130 * We can handle both cases by adding deref->var->data.location_frac and
131 * src->swizzle[0].
132 */
133 *input_index = deref->var->data.driver_location;
134 *input_component = deref->var->data.location_frac + src->swizzle[0];
135 assert(*input_component >= 0);
136 assert(*input_component <= 3);
137
138 return true;
139 }
140
141
142 /*
143 * Examine the texcoord argument to a texture instruction to determine
144 * if the texcoord comes directly from a fragment shader input. If so
145 * return true and return the FS input register index for the coordinate
146 * and the (2-component) swizzle. Return false otherwise.
147 */
148 static bool
get_texcoord_provenance(const nir_tex_src * texcoord,unsigned * coord_fs_input_index,int swizzle[4])149 get_texcoord_provenance(const nir_tex_src *texcoord,
150 unsigned *coord_fs_input_index, // out
151 int swizzle[4]) // out
152 {
153 assert(texcoord->src_type == nir_tex_src_coord);
154
155 // The parent instr of the coord should be an nir_op_vec2 alu op
156 const nir_instr *parent = texcoord->src.ssa->parent_instr;
157 if (!parent || parent->type != nir_instr_type_alu) {
158 return false;
159 }
160 const nir_alu_instr *alu = nir_instr_as_alu(parent);
161 if (!alu || alu->op != nir_op_vec2) {
162 return false;
163 }
164
165 // Loop over nir_op_vec2 instruction arguments to find the
166 // input register index and component.
167 unsigned input_reg_indexes[2];
168 for (unsigned comp = 0; comp < 2; comp++) {
169 if (!get_nir_input_info(&alu->src[comp],
170 &input_reg_indexes[comp], &swizzle[comp])) {
171 return false;
172 }
173 }
174
175 // Both texcoord components should come from the same input register.
176 if (input_reg_indexes[0] != input_reg_indexes[1]) {
177 return false;
178 }
179
180 *coord_fs_input_index = input_reg_indexes[0];
181
182 return true;
183 }
184
185
186 /*
187 * Check if all the values of a nir_load_const_instr are 32-bit
188 * floats in the range [0,1]. If so, return true, else return false.
189 */
190 static bool
check_load_const_in_zero_one(const nir_load_const_instr * load)191 check_load_const_in_zero_one(const nir_load_const_instr *load)
192 {
193 if (load->def.bit_size != 32)
194 return false;
195 for (unsigned c = 0; c < load->def.num_components; c++) {
196 float val = load->value[c].f32;
197 if (val < 0.0 || val > 1.0 || isnan(val)) {
198 return false;
199 }
200 }
201 return true;
202 }
203
204
205 /*
206 * Examine the NIR shader to determine if it's "linear".
207 * For the linear path, we're optimizing the case of rendering a window-
208 * aligned, textured quad. Basically, FS must get the output color from
209 * a texture lookup and, possibly, a constant color. If the color comes
210 * from some other sort of computation or from a VS output (FS input), we
211 * can't use the linear path.
212 */
213 static bool
llvmpipe_nir_fn_is_linear_compat(const struct nir_shader * shader,nir_function_impl * impl,struct lp_tgsi_info * info)214 llvmpipe_nir_fn_is_linear_compat(const struct nir_shader *shader,
215 nir_function_impl *impl,
216 struct lp_tgsi_info *info)
217 {
218 nir_foreach_block(block, impl) {
219 nir_foreach_instr_safe(instr, block) {
220 switch (instr->type) {
221 case nir_instr_type_deref: {
222 nir_deref_instr *deref = nir_instr_as_deref(instr);
223 if (deref->deref_type != nir_deref_type_var)
224 return false;
225 if (deref->var->data.mode == nir_var_shader_out &&
226 deref->var->data.location_frac != 0)
227 return false;
228 break;
229 }
230 case nir_instr_type_load_const: {
231 nir_load_const_instr *load = nir_instr_as_load_const(instr);
232 if (!check_load_const_in_zero_one(load)) {
233 return false;
234 }
235 break;
236 }
237 case nir_instr_type_intrinsic: {
238 nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
239 if (intrin->intrinsic != nir_intrinsic_load_deref &&
240 intrin->intrinsic != nir_intrinsic_store_deref &&
241 intrin->intrinsic != nir_intrinsic_load_ubo)
242 return false;
243
244 if (intrin->intrinsic == nir_intrinsic_load_ubo) {
245 if (!nir_src_is_const(intrin->src[0]))
246 return false;
247 nir_load_const_instr *load =
248 nir_instr_as_load_const(intrin->src[0].ssa->parent_instr);
249 if (load->value[0].u32 != 0 || load->def.num_components > 1)
250 return false;
251 } else if (intrin->intrinsic == nir_intrinsic_store_deref) {
252 /*
253 * Assume the store destination is the FS output color.
254 * Check if the store src comes directly from a FS input.
255 * If so, we cannot use the linear path since we don't have
256 * code to convert VS outputs / FS inputs to ubyte with the
257 * needed swizzling.
258 */
259 if (is_fs_input(&intrin->src[1])) {
260 return false;
261 }
262 }
263 break;
264 }
265 case nir_instr_type_tex: {
266 nir_tex_instr *tex = nir_instr_as_tex(instr);
267 struct lp_tgsi_texture_info *tex_info = &info->tex[info->num_texs];
268 int texcoord_swizzle[4] = {-1, -1, -1, -1};
269 unsigned coord_fs_input_index = 0;
270
271 for (unsigned i = 0; i < tex->num_srcs; i++) {
272 if (tex->src[i].src_type == nir_tex_src_coord) {
273 if (!get_texcoord_provenance(&tex->src[i],
274 &coord_fs_input_index,
275 texcoord_swizzle)) {
276 //debug nir_print_shader((nir_shader *) shader, stdout);
277 return false;
278 }
279 } else if (tex->src[i].src_type == nir_tex_src_texture_handle ||
280 tex->src[i].src_type == nir_tex_src_sampler_handle) {
281 return false;
282 }
283 }
284
285 switch (tex->op) {
286 case nir_texop_tex:
287 tex_info->modifier = LP_BLD_TEX_MODIFIER_NONE;
288 break;
289 default:
290 /* inaccurate but sufficient. */
291 tex_info->modifier = LP_BLD_TEX_MODIFIER_EXPLICIT_LOD;
292 return false;
293 }
294 switch (tex->sampler_dim) {
295 case GLSL_SAMPLER_DIM_2D:
296 tex_info->target = TGSI_TEXTURE_2D;
297 break;
298 default:
299 /* inaccurate but sufficient. */
300 tex_info->target = TGSI_TEXTURE_1D;
301 return false;
302 }
303
304 tex_info->sampler_unit = tex->sampler_index;
305 tex_info->texture_unit = tex->texture_index;
306
307 /* this is enforced in the scanner previously. */
308 tex_info->coord[0].file = TGSI_FILE_INPUT; // S
309 tex_info->coord[1].file = TGSI_FILE_INPUT; // T
310 assert(texcoord_swizzle[0] >= 0);
311 assert(texcoord_swizzle[1] >= 0);
312 tex_info->coord[0].swizzle = texcoord_swizzle[0]; // S
313 tex_info->coord[1].swizzle = texcoord_swizzle[1]; // T
314 tex_info->coord[0].u.index = coord_fs_input_index;
315 tex_info->coord[1].u.index = coord_fs_input_index;
316
317 info->num_texs++;
318 break;
319 }
320 case nir_instr_type_alu: {
321 const nir_alu_instr *alu = nir_instr_as_alu(instr);
322 switch (alu->op) {
323 case nir_op_mov:
324 case nir_op_vec2:
325 case nir_op_vec4:
326 // these instructions are OK
327 break;
328 case nir_op_fmul: {
329 unsigned num_src = nir_op_infos[alu->op].num_inputs;;
330 for (unsigned s = 0; s < num_src; s++) {
331 /* If the MUL uses immediate values, the values must
332 * be 32-bit floats in the range [0,1].
333 */
334 if (nir_src_is_const(alu->src[s].src)) {
335 nir_load_const_instr *load =
336 nir_instr_as_load_const(alu->src[s].src.ssa->parent_instr);
337 if (!check_load_const_in_zero_one(load)) {
338 return false;
339 }
340 } else if (is_fs_input(&alu->src[s].src)) {
341 /* we don't know if the fs inputs are in [0,1] */
342 return false;
343 }
344 }
345 break;
346 }
347 default:
348 // disallowed instruction
349 return false;
350 }
351 break;
352 }
353 default:
354 return false;
355 }
356 }
357 }
358 return true;
359 }
360
361
362 static bool
llvmpipe_nir_is_linear_compat(struct nir_shader * shader,struct lp_tgsi_info * info)363 llvmpipe_nir_is_linear_compat(struct nir_shader *shader,
364 struct lp_tgsi_info *info)
365 {
366 int num_tex = info->num_texs;
367
368 if (util_bitcount64(shader->info.inputs_read) > LP_MAX_LINEAR_INPUTS)
369 return false;
370
371 if (!shader->info.outputs_written || shader->info.fs.color_is_dual_source ||
372 (shader->info.outputs_written & ~BITFIELD64_BIT(FRAG_RESULT_DATA0)))
373 return false;
374
375 info->num_texs = 0;
376 nir_foreach_function_impl(impl, shader) {
377 if (!llvmpipe_nir_fn_is_linear_compat(shader, impl, info))
378 return false;
379 }
380 info->num_texs = num_tex;
381 return true;
382 }
383
384
385 /*
386 * Analyze the given NIR fragment shader and set its shader->kind field
387 * to LP_FS_KIND_x.
388 */
389 void
llvmpipe_fs_analyse_nir(struct lp_fragment_shader * shader)390 llvmpipe_fs_analyse_nir(struct lp_fragment_shader *shader)
391 {
392 if (!shader->info.indirect_textures &&
393 !shader->info.sampler_texture_units_different &&
394 shader->info.num_texs <= LP_MAX_LINEAR_TEXTURES &&
395 llvmpipe_nir_is_linear_compat(shader->base.ir.nir, &shader->info)) {
396 shader->kind = LP_FS_KIND_LLVM_LINEAR;
397 } else {
398 shader->kind = LP_FS_KIND_GENERAL;
399 }
400 }
401
402