xref: /aosp_15_r20/external/mesa3d/src/nouveau/vulkan/nvk_codegen.c (revision 6104692788411f58d303aa86923a9ff6ecaded22)
1 /*
2  * Copyright © 2022 Collabora Ltd. and Red Hat Inc.
3  * SPDX-License-Identifier: MIT
4  */
5 #include "nvk_cmd_buffer.h"
6 #include "nvk_physical_device.h"
7 #include "nvk_shader.h"
8 
9 #include "nir.h"
10 #include "nir_builder.h"
11 #include "nir_xfb_info.h"
12 
13 #include "nv50_ir_driver.h"
14 #include "pipe/p_defines.h"
15 #include "pipe/p_shader_tokens.h"
16 #include "util/u_memory.h"
17 
18 #include "nv_push_cl9097.h"
19 
20 uint64_t
nvk_cg_get_prog_debug(void)21 nvk_cg_get_prog_debug(void)
22 {
23    return debug_get_num_option("NV50_PROG_DEBUG", 0);
24 }
25 
26 uint64_t
nvk_cg_get_prog_optimize(void)27 nvk_cg_get_prog_optimize(void)
28 {
29    return debug_get_num_option("NV50_PROG_OPTIMIZE", 3);
30 }
31 
32 const nir_shader_compiler_options *
nvk_cg_nir_options(const struct nvk_physical_device * pdev,gl_shader_stage stage)33 nvk_cg_nir_options(const struct nvk_physical_device *pdev,
34                    gl_shader_stage stage)
35 {
36    return nv50_ir_nir_shader_compiler_options(pdev->info.chipset, stage);
37 }
38 
39 static nir_variable *
find_or_create_input(nir_builder * b,const struct glsl_type * type,const char * name,unsigned location)40 find_or_create_input(nir_builder *b, const struct glsl_type *type,
41                      const char *name, unsigned location)
42 {
43    nir_foreach_shader_in_variable(in, b->shader) {
44       if (in->data.location == location)
45          return in;
46    }
47    nir_variable *in = nir_variable_create(b->shader, nir_var_shader_in,
48                                           type, name);
49    in->data.location = location;
50    if (glsl_type_is_integer(type))
51       in->data.interpolation = INTERP_MODE_FLAT;
52    else
53       in->data.interpolation = INTERP_MODE_NOPERSPECTIVE;
54 
55    return in;
56 }
57 
58 static bool
lower_fragcoord_instr(nir_builder * b,nir_instr * instr,UNUSED void * _data)59 lower_fragcoord_instr(nir_builder *b, nir_instr *instr, UNUSED void *_data)
60 {
61    assert(b->shader->info.stage == MESA_SHADER_FRAGMENT);
62    nir_variable *var;
63 
64    if (instr->type != nir_instr_type_intrinsic)
65       return false;
66 
67    nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
68    b->cursor = nir_before_instr(&intrin->instr);
69 
70    nir_def *val;
71    switch (intrin->intrinsic) {
72    case nir_intrinsic_load_frag_coord:
73       var = find_or_create_input(b, glsl_vec4_type(),
74                                  "gl_FragCoord",
75                                  VARYING_SLOT_POS);
76       val = nir_load_var(b, var);
77       break;
78    case nir_intrinsic_load_point_coord:
79       var = find_or_create_input(b, glsl_vector_type(GLSL_TYPE_FLOAT, 2),
80                                  "gl_PointCoord",
81                                  VARYING_SLOT_PNTC);
82       val = nir_load_var(b, var);
83       break;
84    case nir_intrinsic_load_sample_pos:
85       var = find_or_create_input(b, glsl_vec4_type(),
86                                  "gl_FragCoord",
87                                  VARYING_SLOT_POS);
88       val = nir_ffract(b, nir_trim_vector(b, nir_load_var(b, var), 2));
89       break;
90    case nir_intrinsic_load_layer_id:
91       var = find_or_create_input(b, glsl_int_type(),
92                                  "gl_Layer", VARYING_SLOT_LAYER);
93       val = nir_load_var(b, var);
94       break;
95 
96    default:
97       return false;
98    }
99 
100    nir_def_rewrite_uses(&intrin->def, val);
101 
102    return true;
103 }
104 
105 void
nvk_cg_preprocess_nir(nir_shader * nir)106 nvk_cg_preprocess_nir(nir_shader *nir)
107 {
108    NIR_PASS(_, nir, nir_split_struct_vars, nir_var_function_temp);
109    NIR_PASS(_, nir, nir_lower_vars_to_ssa);
110 
111    NIR_PASS(_, nir, nir_split_var_copies);
112    NIR_PASS(_, nir, nir_lower_vars_to_ssa);
113 
114    NIR_PASS(_, nir, nir_lower_global_vars_to_local);
115    NIR_PASS(_, nir, nir_remove_dead_variables, nir_var_function_temp, NULL);
116 
117    NIR_PASS(_, nir, nir_lower_system_values);
118 
119    if (nir->info.stage == MESA_SHADER_FRAGMENT) {
120       NIR_PASS(_, nir, nir_shader_instructions_pass, lower_fragcoord_instr,
121                nir_metadata_control_flow, NULL);
122    }
123 
124    nvk_cg_optimize_nir(nir);
125 
126    NIR_PASS(_, nir, nir_lower_var_copies);
127 }
128 
129 void
nvk_cg_optimize_nir(nir_shader * nir)130 nvk_cg_optimize_nir(nir_shader *nir)
131 {
132    bool progress;
133 
134    do {
135       progress = false;
136 
137       NIR_PASS(progress, nir, nir_split_array_vars, nir_var_function_temp);
138       NIR_PASS(progress, nir, nir_shrink_vec_array_vars, nir_var_function_temp);
139 
140       if (!nir->info.var_copies_lowered) {
141          /* Only run this pass if nir_lower_var_copies was not called
142           * yet. That would lower away any copy_deref instructions and we
143           * don't want to introduce any more.
144           */
145          NIR_PASS(progress, nir, nir_opt_find_array_copies);
146       }
147       NIR_PASS(progress, nir, nir_opt_copy_prop_vars);
148       NIR_PASS(progress, nir, nir_opt_dead_write_vars);
149       NIR_PASS(progress, nir, nir_lower_vars_to_ssa);
150       NIR_PASS(progress, nir, nir_copy_prop);
151       NIR_PASS(progress, nir, nir_opt_remove_phis);
152       NIR_PASS(progress, nir, nir_opt_dce);
153       if (nir_opt_loop(nir)) {
154          progress = true;
155          NIR_PASS(progress, nir, nir_copy_prop);
156          NIR_PASS(progress, nir, nir_opt_remove_phis);
157          NIR_PASS(progress, nir, nir_opt_dce);
158       }
159       NIR_PASS(progress, nir, nir_opt_if, nir_opt_if_optimize_phi_true_false);
160       NIR_PASS(progress, nir, nir_opt_dead_cf);
161       NIR_PASS(progress, nir, nir_opt_cse);
162       /*
163        * this should be fine, likely a backend problem,
164        * but a bunch of tessellation shaders blow up.
165        * we should revisit this when NAK is merged.
166        */
167       NIR_PASS(progress, nir, nir_opt_peephole_select, 2, true, true);
168       NIR_PASS(progress, nir, nir_opt_constant_folding);
169       NIR_PASS(progress, nir, nir_opt_algebraic);
170 
171       NIR_PASS(progress, nir, nir_opt_undef);
172 
173       if (nir->options->max_unroll_iterations) {
174          NIR_PASS(progress, nir, nir_opt_loop_unroll);
175       }
176    } while (progress);
177 
178    NIR_PASS(progress, nir, nir_opt_shrink_vectors, true);
179    NIR_PASS(progress, nir, nir_remove_dead_variables,
180             nir_var_function_temp | nir_var_shader_in | nir_var_shader_out, NULL);
181 }
182 
183 static bool
lower_image_size_to_txs(nir_builder * b,nir_intrinsic_instr * intrin,UNUSED void * _data)184 lower_image_size_to_txs(nir_builder *b, nir_intrinsic_instr *intrin,
185                         UNUSED void *_data)
186 {
187    if (intrin->intrinsic != nir_intrinsic_image_deref_size)
188       return false;
189 
190    b->cursor = nir_instr_remove(&intrin->instr);
191 
192    nir_deref_instr *img = nir_src_as_deref(intrin->src[0]);
193    nir_def *lod = nir_tex_type_has_lod(img->type) ?
194                       intrin->src[1].ssa : NULL;
195    nir_def *size = nir_txs_deref(b, img, lod);
196 
197    if (glsl_get_sampler_dim(img->type) == GLSL_SAMPLER_DIM_CUBE) {
198       /* Cube image descriptors are set up as simple arrays but SPIR-V wants
199        * the number of cubes.
200        */
201       if (glsl_sampler_type_is_array(img->type)) {
202          size = nir_vec3(b, nir_channel(b, size, 0),
203                             nir_channel(b, size, 1),
204                             nir_udiv_imm(b, nir_channel(b, size, 2), 6));
205       } else {
206          size = nir_vec3(b, nir_channel(b, size, 0),
207                             nir_channel(b, size, 1),
208                             nir_imm_int(b, 1));
209       }
210    }
211 
212    nir_def_rewrite_uses(&intrin->def, size);
213 
214    return true;
215 }
216 
217 static int
count_location_slots(const struct glsl_type * type,bool bindless)218 count_location_slots(const struct glsl_type *type, bool bindless)
219 {
220    return glsl_count_attribute_slots(type, false);
221 }
222 
223 static void
assign_io_locations(nir_shader * nir)224 assign_io_locations(nir_shader *nir)
225 {
226    if (nir->info.stage != MESA_SHADER_VERTEX) {
227       unsigned location = 0;
228       nir_foreach_variable_with_modes(var, nir, nir_var_shader_in) {
229          var->data.driver_location = location;
230          if (nir_is_arrayed_io(var, nir->info.stage)) {
231             location += glsl_count_attribute_slots(glsl_get_array_element(var->type), false);
232          } else {
233             location += glsl_count_attribute_slots(var->type, false);
234          }
235       }
236       nir->num_inputs = location;
237    } else {
238       nir_foreach_shader_in_variable(var, nir) {
239          assert(var->data.location >= VERT_ATTRIB_GENERIC0);
240          var->data.driver_location = var->data.location - VERT_ATTRIB_GENERIC0;
241       }
242    }
243 
244    {
245       unsigned location = 0;
246       nir_foreach_variable_with_modes(var, nir, nir_var_shader_out) {
247          var->data.driver_location = location;
248          if (nir_is_arrayed_io(var, nir->info.stage)) {
249             location += glsl_count_attribute_slots(glsl_get_array_element(var->type), false);
250          } else {
251             location += glsl_count_attribute_slots(var->type, false);
252          }
253       }
254       nir->num_outputs = location;
255    }
256 }
257 
258 static void
nak_cg_postprocess_nir(nir_shader * nir)259 nak_cg_postprocess_nir(nir_shader *nir)
260 {
261    NIR_PASS(_, nir, nir_shader_intrinsics_pass, lower_image_size_to_txs,
262             nir_metadata_control_flow, NULL);
263 
264    uint32_t indirect_mask = nir_var_function_temp;
265 
266    NIR_PASS(_, nir, nir_lower_indirect_derefs, indirect_mask, 16);
267 
268    nvk_cg_optimize_nir(nir);
269    if (nir->info.stage != MESA_SHADER_COMPUTE)
270       assign_io_locations(nir);
271 
272    NIR_PASS(_, nir, nir_lower_int64);
273 
274    nir_shader_gather_info(nir, nir_shader_get_entrypoint(nir));
275 }
276 
277 /* NOTE: Using a[0x270] in FP may cause an error even if we're using less than
278  * 124 scalar varying values.
279  */
280 static uint32_t
nvc0_shader_input_address(unsigned sn,unsigned si)281 nvc0_shader_input_address(unsigned sn, unsigned si)
282 {
283    switch (sn) {
284    case TGSI_SEMANTIC_TESSOUTER:    return 0x000 + si * 0x4;
285    case TGSI_SEMANTIC_TESSINNER:    return 0x010 + si * 0x4;
286    case TGSI_SEMANTIC_PATCH:        return 0x020 + si * 0x10;
287    case TGSI_SEMANTIC_PRIMID:       return 0x060;
288    case TGSI_SEMANTIC_LAYER:        return 0x064;
289    case TGSI_SEMANTIC_VIEWPORT_INDEX:return 0x068;
290    case TGSI_SEMANTIC_PSIZE:        return 0x06c;
291    case TGSI_SEMANTIC_POSITION:     return 0x070;
292    case TGSI_SEMANTIC_GENERIC:      return 0x080 + si * 0x10;
293    case TGSI_SEMANTIC_FOG:          return 0x2e8;
294    case TGSI_SEMANTIC_COLOR:        return 0x280 + si * 0x10;
295    case TGSI_SEMANTIC_BCOLOR:       return 0x2a0 + si * 0x10;
296    case TGSI_SEMANTIC_CLIPDIST:     return 0x2c0 + si * 0x10;
297    case TGSI_SEMANTIC_CLIPVERTEX:   return 0x270;
298    case TGSI_SEMANTIC_PCOORD:       return 0x2e0;
299    case TGSI_SEMANTIC_TESSCOORD:    return 0x2f0;
300    case TGSI_SEMANTIC_INSTANCEID:   return 0x2f8;
301    case TGSI_SEMANTIC_VERTEXID:     return 0x2fc;
302    case TGSI_SEMANTIC_TEXCOORD:     return 0x300 + si * 0x10;
303    default:
304       assert(!"invalid TGSI input semantic");
305       return ~0;
306    }
307 }
308 
309 static uint32_t
nvc0_shader_output_address(unsigned sn,unsigned si)310 nvc0_shader_output_address(unsigned sn, unsigned si)
311 {
312    switch (sn) {
313    case TGSI_SEMANTIC_TESSOUTER:     return 0x000 + si * 0x4;
314    case TGSI_SEMANTIC_TESSINNER:     return 0x010 + si * 0x4;
315    case TGSI_SEMANTIC_PATCH:         return 0x020 + si * 0x10;
316    case TGSI_SEMANTIC_PRIMID:        return 0x060;
317    case TGSI_SEMANTIC_LAYER:         return 0x064;
318    case TGSI_SEMANTIC_VIEWPORT_INDEX:return 0x068;
319    case TGSI_SEMANTIC_PSIZE:         return 0x06c;
320    case TGSI_SEMANTIC_POSITION:      return 0x070;
321    case TGSI_SEMANTIC_GENERIC:       return 0x080 + si * 0x10;
322    case TGSI_SEMANTIC_FOG:           return 0x2e8;
323    case TGSI_SEMANTIC_COLOR:         return 0x280 + si * 0x10;
324    case TGSI_SEMANTIC_BCOLOR:        return 0x2a0 + si * 0x10;
325    case TGSI_SEMANTIC_CLIPDIST:      return 0x2c0 + si * 0x10;
326    case TGSI_SEMANTIC_CLIPVERTEX:    return 0x270;
327    case TGSI_SEMANTIC_TEXCOORD:      return 0x300 + si * 0x10;
328    case TGSI_SEMANTIC_VIEWPORT_MASK: return 0x3a0;
329    case TGSI_SEMANTIC_EDGEFLAG:      return ~0;
330    default:
331       assert(!"invalid TGSI output semantic");
332       return ~0;
333    }
334 }
335 
336 static int
nvc0_vp_assign_input_slots(struct nv50_ir_prog_info_out * info)337 nvc0_vp_assign_input_slots(struct nv50_ir_prog_info_out *info)
338 {
339    unsigned i, c, n;
340 
341    for (n = 0, i = 0; i < info->numInputs; ++i) {
342       switch (info->in[i].sn) {
343       case TGSI_SEMANTIC_INSTANCEID: /* for SM4 only, in TGSI they're SVs */
344       case TGSI_SEMANTIC_VERTEXID:
345          info->in[i].mask = 0x1;
346          info->in[i].slot[0] =
347             nvc0_shader_input_address(info->in[i].sn, 0) / 4;
348          continue;
349       default:
350          break;
351       }
352       for (c = 0; c < 4; ++c)
353          info->in[i].slot[c] = (0x80 + n * 0x10 + c * 0x4) / 4;
354       ++n;
355    }
356 
357    return 0;
358 }
359 
360 static int
nvc0_sp_assign_input_slots(struct nv50_ir_prog_info_out * info)361 nvc0_sp_assign_input_slots(struct nv50_ir_prog_info_out *info)
362 {
363    unsigned offset;
364    unsigned i, c;
365 
366    for (i = 0; i < info->numInputs; ++i) {
367       offset = nvc0_shader_input_address(info->in[i].sn, info->in[i].si);
368 
369       for (c = 0; c < 4; ++c)
370          info->in[i].slot[c] = (offset + c * 0x4) / 4;
371    }
372 
373    return 0;
374 }
375 
376 static int
nvc0_fp_assign_output_slots(struct nv50_ir_prog_info_out * info)377 nvc0_fp_assign_output_slots(struct nv50_ir_prog_info_out *info)
378 {
379    unsigned count = info->prop.fp.numColourResults * 4;
380    unsigned i, c;
381 
382    /* Compute the relative position of each color output, since skipped MRT
383     * positions will not have registers allocated to them.
384     */
385    unsigned colors[8] = {0};
386    for (i = 0; i < info->numOutputs; ++i)
387       if (info->out[i].sn == TGSI_SEMANTIC_COLOR)
388          colors[info->out[i].si] = 1;
389    for (i = 0, c = 0; i < 8; i++)
390       if (colors[i])
391          colors[i] = c++;
392    for (i = 0; i < info->numOutputs; ++i)
393       if (info->out[i].sn == TGSI_SEMANTIC_COLOR)
394          for (c = 0; c < 4; ++c)
395             info->out[i].slot[c] = colors[info->out[i].si] * 4 + c;
396 
397    if (info->io.sampleMask < NV50_CODEGEN_MAX_VARYINGS)
398       info->out[info->io.sampleMask].slot[0] = count++;
399    else
400    if (info->target >= 0xe0)
401       count++; /* on Kepler, depth is always last colour reg + 2 */
402 
403    if (info->io.fragDepth < NV50_CODEGEN_MAX_VARYINGS)
404       info->out[info->io.fragDepth].slot[2] = count;
405 
406    return 0;
407 }
408 
409 static int
nvc0_sp_assign_output_slots(struct nv50_ir_prog_info_out * info)410 nvc0_sp_assign_output_slots(struct nv50_ir_prog_info_out *info)
411 {
412    unsigned offset;
413    unsigned i, c;
414 
415    for (i = 0; i < info->numOutputs; ++i) {
416       offset = nvc0_shader_output_address(info->out[i].sn, info->out[i].si);
417 
418       for (c = 0; c < 4; ++c)
419          info->out[i].slot[c] = (offset + c * 0x4) / 4;
420    }
421 
422    return 0;
423 }
424 
425 static int
nvc0_program_assign_varying_slots(struct nv50_ir_prog_info_out * info)426 nvc0_program_assign_varying_slots(struct nv50_ir_prog_info_out *info)
427 {
428    int ret;
429 
430    if (info->type == PIPE_SHADER_VERTEX)
431       ret = nvc0_vp_assign_input_slots(info);
432    else
433       ret = nvc0_sp_assign_input_slots(info);
434    if (ret)
435       return ret;
436 
437    if (info->type == PIPE_SHADER_FRAGMENT)
438       ret = nvc0_fp_assign_output_slots(info);
439    else
440       ret = nvc0_sp_assign_output_slots(info);
441    return ret;
442 }
443 
444 static inline void
nvk_vtgs_hdr_update_oread(struct nvk_shader * vs,uint8_t slot)445 nvk_vtgs_hdr_update_oread(struct nvk_shader *vs, uint8_t slot)
446 {
447    uint8_t min = (vs->info.hdr[4] >> 12) & 0xff;
448    uint8_t max = (vs->info.hdr[4] >> 24);
449 
450    min = MIN2(min, slot);
451    max = MAX2(max, slot);
452 
453    vs->info.hdr[4] = (max << 24) | (min << 12);
454 }
455 
456 static int
nvk_vtgp_gen_header(struct nvk_shader * vs,struct nv50_ir_prog_info_out * info)457 nvk_vtgp_gen_header(struct nvk_shader *vs, struct nv50_ir_prog_info_out *info)
458 {
459    unsigned i, c, a;
460 
461    for (i = 0; i < info->numInputs; ++i) {
462       if (info->in[i].patch)
463          continue;
464       for (c = 0; c < 4; ++c) {
465          a = info->in[i].slot[c];
466          if (info->in[i].mask & (1 << c))
467             vs->info.hdr[5 + a / 32] |= 1 << (a % 32);
468       }
469    }
470 
471    for (i = 0; i < info->numOutputs; ++i) {
472       if (info->out[i].patch)
473          continue;
474       for (c = 0; c < 4; ++c) {
475          if (!(info->out[i].mask & (1 << c)))
476             continue;
477          assert(info->out[i].slot[c] >= 0x40 / 4);
478          a = info->out[i].slot[c] - 0x40 / 4;
479          vs->info.hdr[13 + a / 32] |= 1 << (a % 32);
480          if (info->out[i].oread)
481             nvk_vtgs_hdr_update_oread(vs, info->out[i].slot[c]);
482       }
483    }
484 
485    for (i = 0; i < info->numSysVals; ++i) {
486       switch (info->sv[i].sn) {
487       case SYSTEM_VALUE_PRIMITIVE_ID:
488          vs->info.hdr[5] |= 1 << 24;
489          break;
490       case SYSTEM_VALUE_INSTANCE_ID:
491          vs->info.hdr[10] |= 1 << 30;
492          break;
493       case SYSTEM_VALUE_VERTEX_ID:
494          vs->info.hdr[10] |= 1 << 31;
495          break;
496       case SYSTEM_VALUE_TESS_COORD:
497          /* We don't have the mask, nor the slots populated. While this could
498           * be achieved, the vast majority of the time if either of the coords
499           * are read, then both will be read.
500           */
501          nvk_vtgs_hdr_update_oread(vs, 0x2f0 / 4);
502          nvk_vtgs_hdr_update_oread(vs, 0x2f4 / 4);
503          break;
504       default:
505          break;
506       }
507    }
508 
509    vs->info.vtg.writes_layer = (vs->info.hdr[13] & (1 << 9)) != 0;
510    vs->info.vtg.clip_enable = (1 << info->io.clipDistances) - 1;
511    vs->info.vtg.cull_enable =
512       ((1 << info->io.cullDistances) - 1) << info->io.clipDistances;
513 
514    return 0;
515 }
516 
517 static int
nvk_vs_gen_header(struct nvk_shader * vs,struct nv50_ir_prog_info_out * info)518 nvk_vs_gen_header(struct nvk_shader *vs, struct nv50_ir_prog_info_out *info)
519 {
520    vs->info.hdr[0] = 0x20061 | (1 << 10);
521    vs->info.hdr[4] = 0xff000;
522 
523    return nvk_vtgp_gen_header(vs, info);
524 }
525 
526 static int
nvk_gs_gen_header(struct nvk_shader * gs,const struct nir_shader * nir,struct nv50_ir_prog_info_out * info)527 nvk_gs_gen_header(struct nvk_shader *gs,
528                   const struct nir_shader *nir,
529                   struct nv50_ir_prog_info_out *info)
530 {
531    gs->info.hdr[0] = 0x20061 | (4 << 10);
532 
533    gs->info.hdr[2] = MIN2(info->prop.gp.instanceCount, 32) << 24;
534 
535    switch (info->prop.gp.outputPrim) {
536    case MESA_PRIM_POINTS:
537       gs->info.hdr[3] = 0x01000000;
538       break;
539    case MESA_PRIM_LINE_STRIP:
540       gs->info.hdr[3] = 0x06000000;
541       break;
542    case MESA_PRIM_TRIANGLE_STRIP:
543       gs->info.hdr[3] = 0x07000000;
544       break;
545    default:
546       assert(0);
547       break;
548    }
549 
550    gs->info.hdr[4] = CLAMP(info->prop.gp.maxVertices, 1, 1024);
551 
552    gs->info.hdr[0] |= nir->info.gs.active_stream_mask << 28;
553 
554    return nvk_vtgp_gen_header(gs, info);
555 }
556 
557 static void
nvk_generate_tessellation_parameters(const struct nv50_ir_prog_info_out * info,struct nvk_shader * shader)558 nvk_generate_tessellation_parameters(const struct nv50_ir_prog_info_out *info,
559                                      struct nvk_shader *shader)
560 {
561    // TODO: this is a little confusing because nouveau codegen uses
562    // MESA_PRIM_POINTS for unspecified domain and
563    // MESA_PRIM_POINTS = 0, the same as NV9097 ISOLINE enum
564    switch (info->prop.tp.domain) {
565    case MESA_PRIM_LINES:
566       shader->info.ts.domain = NAK_TS_DOMAIN_ISOLINE;
567       break;
568    case MESA_PRIM_TRIANGLES:
569       shader->info.ts.domain = NAK_TS_DOMAIN_TRIANGLE;
570       break;
571    case MESA_PRIM_QUADS:
572       shader->info.ts.domain = NAK_TS_DOMAIN_QUAD;
573       break;
574    default:
575       return;
576    }
577 
578    switch (info->prop.tp.partitioning) {
579    case PIPE_TESS_SPACING_EQUAL:
580       shader->info.ts.spacing = NAK_TS_SPACING_INTEGER;
581       break;
582    case PIPE_TESS_SPACING_FRACTIONAL_ODD:
583       shader->info.ts.spacing = NAK_TS_SPACING_FRACT_ODD;
584       break;
585    case PIPE_TESS_SPACING_FRACTIONAL_EVEN:
586       shader->info.ts.spacing = NAK_TS_SPACING_FRACT_EVEN;
587       break;
588    default:
589       assert(!"invalid tessellator partitioning");
590       break;
591    }
592 
593    if (info->prop.tp.outputPrim == MESA_PRIM_POINTS) { // point_mode
594       shader->info.ts.prims = NAK_TS_PRIMS_POINTS;
595    } else if (info->prop.tp.domain == MESA_PRIM_LINES) { // isoline domain
596       shader->info.ts.prims = NAK_TS_PRIMS_LINES;
597    } else {  // triangle/quad domain
598       if (info->prop.tp.winding > 0) {
599          shader->info.ts.prims = NAK_TS_PRIMS_TRIANGLES_CW;
600       } else {
601          shader->info.ts.prims = NAK_TS_PRIMS_TRIANGLES_CCW;
602       }
603    }
604 }
605 
606 static int
nvk_tcs_gen_header(struct nvk_shader * tcs,struct nv50_ir_prog_info_out * info)607 nvk_tcs_gen_header(struct nvk_shader *tcs, struct nv50_ir_prog_info_out *info)
608 {
609    unsigned opcs = 6; /* output patch constants (at least the TessFactors) */
610 
611    if (info->numPatchConstants)
612       opcs = 8 + info->numPatchConstants * 4;
613 
614    tcs->info.hdr[0] = 0x20061 | (2 << 10);
615 
616    tcs->info.hdr[1] = opcs << 24;
617    tcs->info.hdr[2] = info->prop.tp.outputPatchSize << 24;
618 
619    tcs->info.hdr[4] = 0xff000; /* initial min/max parallel output read address */
620 
621    nvk_vtgp_gen_header(tcs, info);
622 
623    if (info->target >= NVISA_GM107_CHIPSET) {
624       /* On GM107+, the number of output patch components has moved in the TCP
625        * header, but it seems like blob still also uses the old position.
626        * Also, the high 8-bits are located in between the min/max parallel
627        * field and has to be set after updating the outputs. */
628       tcs->info.hdr[3] = (opcs & 0x0f) << 28;
629       tcs->info.hdr[4] |= (opcs & 0xf0) << 16;
630    }
631 
632    nvk_generate_tessellation_parameters(info, tcs);
633 
634    return 0;
635 }
636 
637 static int
nvk_tes_gen_header(struct nvk_shader * tes,struct nv50_ir_prog_info_out * info)638 nvk_tes_gen_header(struct nvk_shader *tes, struct nv50_ir_prog_info_out *info)
639 {
640    tes->info.hdr[0] = 0x20061 | (3 << 10);
641    tes->info.hdr[4] = 0xff000;
642 
643    nvk_vtgp_gen_header(tes, info);
644 
645    nvk_generate_tessellation_parameters(info, tes);
646 
647    tes->info.hdr[18] |= 0x3 << 12; /* ? */
648 
649    return 0;
650 }
651 
652 #define NVC0_INTERP_FLAT          (1 << 0)
653 #define NVC0_INTERP_PERSPECTIVE   (2 << 0)
654 #define NVC0_INTERP_LINEAR        (3 << 0)
655 #define NVC0_INTERP_CENTROID      (1 << 2)
656 
657 static uint8_t
nvk_hdr_interp_mode(const struct nv50_ir_varying * var)658 nvk_hdr_interp_mode(const struct nv50_ir_varying *var)
659 {
660    if (var->linear)
661       return NVC0_INTERP_LINEAR;
662    if (var->flat)
663       return NVC0_INTERP_FLAT;
664    return NVC0_INTERP_PERSPECTIVE;
665 }
666 
667 
668 static int
nvk_fs_gen_header(struct nvk_shader * fs,const struct nak_fs_key * key,struct nv50_ir_prog_info_out * info)669 nvk_fs_gen_header(struct nvk_shader *fs, const struct nak_fs_key *key,
670                   struct nv50_ir_prog_info_out *info)
671 {
672    unsigned i, c, a, m;
673 
674    /* just 00062 on Kepler */
675    fs->info.hdr[0] = 0x20062 | (5 << 10);
676    fs->info.hdr[5] = 0x80000000; /* getting a trap if FRAG_COORD_UMASK.w = 0 */
677 
678    if (info->prop.fp.usesDiscard || key->zs_self_dep)
679       fs->info.hdr[0] |= 0x8000;
680    if (!info->prop.fp.separateFragData)
681       fs->info.hdr[0] |= 0x4000;
682    if (info->io.sampleMask < 80 /* PIPE_MAX_SHADER_OUTPUTS */)
683       fs->info.hdr[19] |= 0x1;
684    if (info->prop.fp.writesDepth) {
685       fs->info.hdr[19] |= 0x2;
686       fs->info.fs.writes_depth = true;
687    }
688 
689    for (i = 0; i < info->numInputs; ++i) {
690       m = nvk_hdr_interp_mode(&info->in[i]);
691       for (c = 0; c < 4; ++c) {
692          if (!(info->in[i].mask & (1 << c)))
693             continue;
694          a = info->in[i].slot[c];
695          if (info->in[i].slot[0] >= (0x060 / 4) &&
696              info->in[i].slot[0] <= (0x07c / 4)) {
697             fs->info.hdr[5] |= 1 << (24 + (a - 0x060 / 4));
698          } else
699          if (info->in[i].slot[0] >= (0x2c0 / 4) &&
700              info->in[i].slot[0] <= (0x2fc / 4)) {
701             fs->info.hdr[14] |= (1 << (a - 0x280 / 4)) & 0x07ff0000;
702          } else {
703             if (info->in[i].slot[c] < (0x040 / 4) ||
704                 info->in[i].slot[c] > (0x380 / 4))
705                continue;
706             a *= 2;
707             if (info->in[i].slot[0] >= (0x300 / 4))
708                a -= 32;
709             fs->info.hdr[4 + a / 32] |= m << (a % 32);
710          }
711       }
712    }
713    /* GM20x+ needs TGSI_SEMANTIC_POSITION to access sample locations */
714    if (info->prop.fp.readsSampleLocations && info->target >= NVISA_GM200_CHIPSET)
715       fs->info.hdr[5] |= 0x30000000;
716 
717    for (i = 0; i < info->numOutputs; ++i) {
718       if (info->out[i].sn == TGSI_SEMANTIC_COLOR)
719          fs->info.hdr[18] |= 0xf << (4 * info->out[i].si);
720    }
721 
722    /* There are no "regular" attachments, but the shader still needs to be
723     * executed. It seems like it wants to think that it has some color
724     * outputs in order to actually run.
725     */
726    if (info->prop.fp.numColourResults == 0 &&
727        !info->prop.fp.writesDepth &&
728        info->io.sampleMask >= 80 /* PIPE_MAX_SHADER_OUTPUTS */)
729       fs->info.hdr[18] |= 0xf;
730 
731    fs->info.fs.early_fragment_tests = info->prop.fp.earlyFragTests;
732    fs->info.fs.reads_sample_mask = info->prop.fp.usesSampleMaskIn;
733    fs->info.fs.post_depth_coverage = info->prop.fp.postDepthCoverage;
734 
735    return 0;
736 }
737 
find_register_index_for_xfb_output(const struct nir_shader * nir,nir_xfb_output_info output)738 static uint8_t find_register_index_for_xfb_output(const struct nir_shader *nir,
739                                                   nir_xfb_output_info output)
740 {
741    nir_foreach_shader_out_variable(var, nir) {
742       uint32_t slots = glsl_count_vec4_slots(var->type, false, false);
743       for (uint32_t i = 0; i < slots; ++i) {
744          if (output.location == (var->data.location+i)) {
745             return var->data.driver_location+i;
746          }
747       }
748    }
749    // should not be reached
750    return 0;
751 }
752 
753 static void
nvk_fill_transform_feedback_state(struct nak_xfb_info * xfb,struct nir_shader * nir,const struct nv50_ir_prog_info_out * info)754 nvk_fill_transform_feedback_state(struct nak_xfb_info *xfb,
755                                   struct nir_shader *nir,
756                                   const struct nv50_ir_prog_info_out *info)
757 {
758    const uint8_t max_buffers = 4;
759    const uint8_t dw_bytes = 4;
760    const struct nir_xfb_info *nx = nir->xfb_info;
761    //nir_print_xfb_info(nx, stdout);
762 
763    memset(xfb, 0, sizeof(*xfb));
764 
765    for (uint8_t b = 0; b < max_buffers; ++b) {
766       xfb->stride[b] = b < nx->buffers_written ? nx->buffers[b].stride : 0;
767       xfb->attr_count[b] = 0;
768       xfb->stream[b] = nx->buffer_to_stream[b];
769    }
770    memset(xfb->attr_index, 0xff, sizeof(xfb->attr_index)); /* = skip */
771 
772    if (info->numOutputs == 0)
773       return;
774 
775    for (uint32_t i = 0; i < nx->output_count; ++i) {
776       const nir_xfb_output_info output = nx->outputs[i];
777       const uint8_t b = output.buffer;
778       const uint8_t r = find_register_index_for_xfb_output(nir, output);
779       uint32_t p = output.offset / dw_bytes;
780 
781       assert(r < info->numOutputs && p < ARRAY_SIZE(xfb->attr_index[b]));
782 
783       u_foreach_bit(c, nx->outputs[i].component_mask)
784          xfb->attr_index[b][p++] = info->out[r].slot[c];
785 
786       xfb->attr_count[b] = MAX2(xfb->attr_count[b], p);
787    }
788 
789    /* zero unused indices */
790    for (uint8_t b = 0; b < 4; ++b)
791       for (uint32_t c = xfb->attr_count[b]; c & 3; ++c)
792          xfb->attr_index[b][c] = 0;
793 }
794 
795 VkResult
nvk_cg_compile_nir(struct nvk_physical_device * pdev,nir_shader * nir,const struct nak_fs_key * fs_key,struct nvk_shader * shader)796 nvk_cg_compile_nir(struct nvk_physical_device *pdev, nir_shader *nir,
797                    const struct nak_fs_key *fs_key,
798                    struct nvk_shader *shader)
799 {
800    struct nv50_ir_prog_info *info;
801    struct nv50_ir_prog_info_out info_out = {};
802    int ret;
803 
804    nak_cg_postprocess_nir(nir);
805 
806    info = CALLOC_STRUCT(nv50_ir_prog_info);
807    if (!info)
808       return false;
809 
810    info->type = nir->info.stage;
811    info->target = pdev->info.chipset;
812    info->bin.nir = nir;
813 
814    for (unsigned i = 0; i < 3; i++)
815       shader->info.cs.local_size[i] = nir->info.workgroup_size[i];
816 
817    info->dbgFlags = nvk_cg_get_prog_debug();
818    info->optLevel = nvk_cg_get_prog_optimize();
819    info->io.auxCBSlot = 1;
820    info->io.uboInfoBase = 0;
821    info->io.drawInfoBase = nvk_root_descriptor_offset(draw.base_vertex);
822    if (nir->info.stage == MESA_SHADER_COMPUTE) {
823       info->prop.cp.gridInfoBase = 0;
824    } else {
825       info->assignSlots = nvc0_program_assign_varying_slots;
826    }
827    ret = nv50_ir_generate_code(info, &info_out);
828    if (ret)
829       return VK_ERROR_UNKNOWN;
830 
831    if (info_out.bin.fixupData) {
832       nv50_ir_apply_fixups(info_out.bin.fixupData, info_out.bin.code,
833                            fs_key && fs_key->force_sample_shading,
834                            false /* flatshade */, false /* alphatest */,
835                            fs_key && fs_key->force_sample_shading);
836    }
837 
838    shader->info.stage = nir->info.stage;
839    shader->code_ptr = (uint8_t *)info_out.bin.code;
840    shader->code_size = info_out.bin.codeSize;
841 
842    if (info_out.target >= NVISA_GV100_CHIPSET)
843       shader->info.num_gprs = MAX2(4, info_out.bin.maxGPR + 3);
844    else
845       shader->info.num_gprs = MAX2(4, info_out.bin.maxGPR + 1);
846    shader->info.num_control_barriers = info_out.numBarriers;
847 
848    if (info_out.bin.tlsSpace) {
849       assert(info_out.bin.tlsSpace < (1 << 24));
850       shader->info.hdr[0] |= 1 << 26;
851       shader->info.hdr[1] |= align(info_out.bin.tlsSpace, 0x10); /* l[] size */
852       shader->info.slm_size = info_out.bin.tlsSpace;
853    }
854 
855    switch (info->type) {
856    case PIPE_SHADER_VERTEX:
857       ret = nvk_vs_gen_header(shader, &info_out);
858       break;
859    case PIPE_SHADER_FRAGMENT:
860       ret = nvk_fs_gen_header(shader, fs_key, &info_out);
861       shader->info.fs.uses_sample_shading = nir->info.fs.uses_sample_shading;
862       break;
863    case PIPE_SHADER_GEOMETRY:
864       ret = nvk_gs_gen_header(shader, nir, &info_out);
865       break;
866    case PIPE_SHADER_TESS_CTRL:
867       ret = nvk_tcs_gen_header(shader, &info_out);
868       break;
869    case PIPE_SHADER_TESS_EVAL:
870       ret = nvk_tes_gen_header(shader, &info_out);
871       break;
872    case PIPE_SHADER_COMPUTE:
873       shader->info.cs.smem_size = info_out.bin.smemSize;
874       break;
875    default:
876       unreachable("Invalid shader stage");
877       break;
878    }
879    assert(ret == 0);
880 
881    if (info_out.io.globalAccess)
882       shader->info.hdr[0] |= 1 << 26;
883    if (info_out.io.globalAccess & 0x2)
884       shader->info.hdr[0] |= 1 << 16;
885    if (info_out.io.fp64)
886       shader->info.hdr[0] |= 1 << 27;
887 
888    if (nir->xfb_info)
889       nvk_fill_transform_feedback_state(&shader->info.vtg.xfb, nir, &info_out);
890 
891    return VK_SUCCESS;
892 }
893