xref: /aosp_15_r20/external/mesa3d/src/freedreno/vulkan/tu_shader.cc (revision 6104692788411f58d303aa86923a9ff6ecaded22)
1*61046927SAndroid Build Coastguard Worker /*
2*61046927SAndroid Build Coastguard Worker  * Copyright © 2019 Google LLC
3*61046927SAndroid Build Coastguard Worker  * SPDX-License-Identifier: MIT
4*61046927SAndroid Build Coastguard Worker  */
5*61046927SAndroid Build Coastguard Worker 
6*61046927SAndroid Build Coastguard Worker #include "tu_shader.h"
7*61046927SAndroid Build Coastguard Worker 
8*61046927SAndroid Build Coastguard Worker #include "spirv/nir_spirv.h"
9*61046927SAndroid Build Coastguard Worker #include "util/mesa-sha1.h"
10*61046927SAndroid Build Coastguard Worker #include "nir/nir_xfb_info.h"
11*61046927SAndroid Build Coastguard Worker #include "vk_nir.h"
12*61046927SAndroid Build Coastguard Worker #include "vk_nir_convert_ycbcr.h"
13*61046927SAndroid Build Coastguard Worker #include "vk_pipeline.h"
14*61046927SAndroid Build Coastguard Worker #include "vk_util.h"
15*61046927SAndroid Build Coastguard Worker 
16*61046927SAndroid Build Coastguard Worker #include "ir3/ir3_compiler.h"
17*61046927SAndroid Build Coastguard Worker #include "ir3/ir3_nir.h"
18*61046927SAndroid Build Coastguard Worker 
19*61046927SAndroid Build Coastguard Worker #include "tu_device.h"
20*61046927SAndroid Build Coastguard Worker #include "tu_descriptor_set.h"
21*61046927SAndroid Build Coastguard Worker #include "tu_lrz.h"
22*61046927SAndroid Build Coastguard Worker #include "tu_pipeline.h"
23*61046927SAndroid Build Coastguard Worker #include "tu_rmv.h"
24*61046927SAndroid Build Coastguard Worker 
25*61046927SAndroid Build Coastguard Worker #include <initializer_list>
26*61046927SAndroid Build Coastguard Worker 
27*61046927SAndroid Build Coastguard Worker nir_shader *
tu_spirv_to_nir(struct tu_device * dev,void * mem_ctx,VkPipelineCreateFlags2KHR pipeline_flags,const VkPipelineShaderStageCreateInfo * stage_info,gl_shader_stage stage)28*61046927SAndroid Build Coastguard Worker tu_spirv_to_nir(struct tu_device *dev,
29*61046927SAndroid Build Coastguard Worker                 void *mem_ctx,
30*61046927SAndroid Build Coastguard Worker                 VkPipelineCreateFlags2KHR pipeline_flags,
31*61046927SAndroid Build Coastguard Worker                 const VkPipelineShaderStageCreateInfo *stage_info,
32*61046927SAndroid Build Coastguard Worker                 gl_shader_stage stage)
33*61046927SAndroid Build Coastguard Worker {
34*61046927SAndroid Build Coastguard Worker    /* TODO these are made-up */
35*61046927SAndroid Build Coastguard Worker    const struct spirv_to_nir_options spirv_options = {
36*61046927SAndroid Build Coastguard Worker       /* ViewID is a sysval in geometry stages and an input in the FS */
37*61046927SAndroid Build Coastguard Worker       .view_index_is_input = stage == MESA_SHADER_FRAGMENT,
38*61046927SAndroid Build Coastguard Worker 
39*61046927SAndroid Build Coastguard Worker       /* Use 16-bit math for RelaxedPrecision ALU ops */
40*61046927SAndroid Build Coastguard Worker       .mediump_16bit_alu = true,
41*61046927SAndroid Build Coastguard Worker 
42*61046927SAndroid Build Coastguard Worker       .ubo_addr_format = nir_address_format_vec2_index_32bit_offset,
43*61046927SAndroid Build Coastguard Worker       .ssbo_addr_format = nir_address_format_vec2_index_32bit_offset,
44*61046927SAndroid Build Coastguard Worker 
45*61046927SAndroid Build Coastguard Worker       /* Accessed via stg/ldg */
46*61046927SAndroid Build Coastguard Worker       .phys_ssbo_addr_format = nir_address_format_64bit_global,
47*61046927SAndroid Build Coastguard Worker 
48*61046927SAndroid Build Coastguard Worker       /* Accessed via the const register file */
49*61046927SAndroid Build Coastguard Worker       .push_const_addr_format = nir_address_format_logical,
50*61046927SAndroid Build Coastguard Worker 
51*61046927SAndroid Build Coastguard Worker       /* Accessed via ldl/stl */
52*61046927SAndroid Build Coastguard Worker       .shared_addr_format = nir_address_format_32bit_offset,
53*61046927SAndroid Build Coastguard Worker 
54*61046927SAndroid Build Coastguard Worker       /* Accessed via stg/ldg (not used with Vulkan?) */
55*61046927SAndroid Build Coastguard Worker       .global_addr_format = nir_address_format_64bit_global,
56*61046927SAndroid Build Coastguard Worker    };
57*61046927SAndroid Build Coastguard Worker 
58*61046927SAndroid Build Coastguard Worker    const nir_shader_compiler_options *nir_options =
59*61046927SAndroid Build Coastguard Worker       ir3_get_compiler_options(dev->compiler);
60*61046927SAndroid Build Coastguard Worker 
61*61046927SAndroid Build Coastguard Worker    nir_shader *nir;
62*61046927SAndroid Build Coastguard Worker    VkResult result =
63*61046927SAndroid Build Coastguard Worker       vk_pipeline_shader_stage_to_nir(&dev->vk, pipeline_flags, stage_info,
64*61046927SAndroid Build Coastguard Worker                                       &spirv_options, nir_options,
65*61046927SAndroid Build Coastguard Worker                                       mem_ctx, &nir);
66*61046927SAndroid Build Coastguard Worker    if (result != VK_SUCCESS)
67*61046927SAndroid Build Coastguard Worker       return NULL;
68*61046927SAndroid Build Coastguard Worker 
69*61046927SAndroid Build Coastguard Worker    /* ir3 uses num_ubos and num_ssbos to track the number of *bindful*
70*61046927SAndroid Build Coastguard Worker     * UBOs/SSBOs, but spirv_to_nir sets them to the total number of objects
71*61046927SAndroid Build Coastguard Worker     * which is useless for us, so reset them here.
72*61046927SAndroid Build Coastguard Worker     */
73*61046927SAndroid Build Coastguard Worker    nir->info.num_ubos = 0;
74*61046927SAndroid Build Coastguard Worker    nir->info.num_ssbos = 0;
75*61046927SAndroid Build Coastguard Worker 
76*61046927SAndroid Build Coastguard Worker    if (TU_DEBUG(NIR)) {
77*61046927SAndroid Build Coastguard Worker       fprintf(stderr, "translated nir:\n");
78*61046927SAndroid Build Coastguard Worker       nir_print_shader(nir, stderr);
79*61046927SAndroid Build Coastguard Worker    }
80*61046927SAndroid Build Coastguard Worker 
81*61046927SAndroid Build Coastguard Worker    const struct nir_lower_sysvals_to_varyings_options sysvals_to_varyings = {
82*61046927SAndroid Build Coastguard Worker       .point_coord = true,
83*61046927SAndroid Build Coastguard Worker    };
84*61046927SAndroid Build Coastguard Worker    NIR_PASS_V(nir, nir_lower_sysvals_to_varyings, &sysvals_to_varyings);
85*61046927SAndroid Build Coastguard Worker 
86*61046927SAndroid Build Coastguard Worker    NIR_PASS_V(nir, nir_lower_global_vars_to_local);
87*61046927SAndroid Build Coastguard Worker 
88*61046927SAndroid Build Coastguard Worker    /* Older glslang missing bf6efd0316d8 ("SPV: Fix #2293: keep relaxed
89*61046927SAndroid Build Coastguard Worker     * precision on arg passed to relaxed param") will pass function args through
90*61046927SAndroid Build Coastguard Worker     * a highp temporary, so we need the nir_opt_find_array_copies() and a copy
91*61046927SAndroid Build Coastguard Worker     * prop before we lower mediump vars, or you'll be unable to optimize out
92*61046927SAndroid Build Coastguard Worker     * array copies after lowering.  We do this before splitting copies, since
93*61046927SAndroid Build Coastguard Worker     * that works against nir_opt_find_array_copies().
94*61046927SAndroid Build Coastguard Worker     * */
95*61046927SAndroid Build Coastguard Worker    NIR_PASS_V(nir, nir_opt_find_array_copies);
96*61046927SAndroid Build Coastguard Worker    NIR_PASS_V(nir, nir_opt_copy_prop_vars);
97*61046927SAndroid Build Coastguard Worker    NIR_PASS_V(nir, nir_opt_dce);
98*61046927SAndroid Build Coastguard Worker 
99*61046927SAndroid Build Coastguard Worker    NIR_PASS_V(nir, nir_split_var_copies);
100*61046927SAndroid Build Coastguard Worker    NIR_PASS_V(nir, nir_lower_var_copies);
101*61046927SAndroid Build Coastguard Worker 
102*61046927SAndroid Build Coastguard Worker    NIR_PASS_V(nir, nir_lower_mediump_vars, nir_var_function_temp | nir_var_shader_temp | nir_var_mem_shared);
103*61046927SAndroid Build Coastguard Worker    NIR_PASS_V(nir, nir_opt_copy_prop_vars);
104*61046927SAndroid Build Coastguard Worker    NIR_PASS_V(nir, nir_opt_combine_stores, nir_var_all);
105*61046927SAndroid Build Coastguard Worker 
106*61046927SAndroid Build Coastguard Worker    NIR_PASS_V(nir, nir_lower_system_values);
107*61046927SAndroid Build Coastguard Worker    NIR_PASS_V(nir, nir_lower_is_helper_invocation);
108*61046927SAndroid Build Coastguard Worker 
109*61046927SAndroid Build Coastguard Worker    ir3_optimize_loop(dev->compiler, nir);
110*61046927SAndroid Build Coastguard Worker 
111*61046927SAndroid Build Coastguard Worker    NIR_PASS_V(nir, nir_opt_conditional_discard);
112*61046927SAndroid Build Coastguard Worker 
113*61046927SAndroid Build Coastguard Worker    return nir;
114*61046927SAndroid Build Coastguard Worker }
115*61046927SAndroid Build Coastguard Worker 
116*61046927SAndroid Build Coastguard Worker static void
lower_load_push_constant(struct tu_device * dev,nir_builder * b,nir_intrinsic_instr * instr,struct tu_shader * shader,const struct tu_pipeline_layout * layout)117*61046927SAndroid Build Coastguard Worker lower_load_push_constant(struct tu_device *dev,
118*61046927SAndroid Build Coastguard Worker                          nir_builder *b,
119*61046927SAndroid Build Coastguard Worker                          nir_intrinsic_instr *instr,
120*61046927SAndroid Build Coastguard Worker                          struct tu_shader *shader,
121*61046927SAndroid Build Coastguard Worker                          const struct tu_pipeline_layout *layout)
122*61046927SAndroid Build Coastguard Worker {
123*61046927SAndroid Build Coastguard Worker    uint32_t base = nir_intrinsic_base(instr);
124*61046927SAndroid Build Coastguard Worker    assert(base % 4 == 0);
125*61046927SAndroid Build Coastguard Worker 
126*61046927SAndroid Build Coastguard Worker    if (tu6_shared_constants_enable(layout, dev->compiler)) {
127*61046927SAndroid Build Coastguard Worker       /* All stages share the same range.  We could potentially add
128*61046927SAndroid Build Coastguard Worker        * push_constant_offset to layout and apply it, but this is good for
129*61046927SAndroid Build Coastguard Worker        * now.
130*61046927SAndroid Build Coastguard Worker        */
131*61046927SAndroid Build Coastguard Worker       base += dev->compiler->shared_consts_base_offset * 4;
132*61046927SAndroid Build Coastguard Worker    } else {
133*61046927SAndroid Build Coastguard Worker       assert(base >= shader->const_state.push_consts.lo * 4);
134*61046927SAndroid Build Coastguard Worker       base -= shader->const_state.push_consts.lo * 4;
135*61046927SAndroid Build Coastguard Worker    }
136*61046927SAndroid Build Coastguard Worker 
137*61046927SAndroid Build Coastguard Worker    nir_def *load =
138*61046927SAndroid Build Coastguard Worker       nir_load_const_ir3(b, instr->num_components, instr->def.bit_size,
139*61046927SAndroid Build Coastguard Worker                          nir_ushr_imm(b, instr->src[0].ssa, 2), .base = base);
140*61046927SAndroid Build Coastguard Worker 
141*61046927SAndroid Build Coastguard Worker    nir_def_replace(&instr->def, load);
142*61046927SAndroid Build Coastguard Worker }
143*61046927SAndroid Build Coastguard Worker 
144*61046927SAndroid Build Coastguard Worker static void
lower_vulkan_resource_index(struct tu_device * dev,nir_builder * b,nir_intrinsic_instr * instr,struct tu_shader * shader,const struct tu_pipeline_layout * layout)145*61046927SAndroid Build Coastguard Worker lower_vulkan_resource_index(struct tu_device *dev, nir_builder *b,
146*61046927SAndroid Build Coastguard Worker                             nir_intrinsic_instr *instr,
147*61046927SAndroid Build Coastguard Worker                             struct tu_shader *shader,
148*61046927SAndroid Build Coastguard Worker                             const struct tu_pipeline_layout *layout)
149*61046927SAndroid Build Coastguard Worker {
150*61046927SAndroid Build Coastguard Worker    struct ir3_compiler *compiler = dev->compiler;
151*61046927SAndroid Build Coastguard Worker    nir_def *vulkan_idx = instr->src[0].ssa;
152*61046927SAndroid Build Coastguard Worker 
153*61046927SAndroid Build Coastguard Worker    unsigned set = nir_intrinsic_desc_set(instr);
154*61046927SAndroid Build Coastguard Worker    unsigned binding = nir_intrinsic_binding(instr);
155*61046927SAndroid Build Coastguard Worker    struct tu_descriptor_set_layout *set_layout = layout->set[set].layout;
156*61046927SAndroid Build Coastguard Worker    struct tu_descriptor_set_binding_layout *binding_layout =
157*61046927SAndroid Build Coastguard Worker       &set_layout->binding[binding];
158*61046927SAndroid Build Coastguard Worker    nir_def *base;
159*61046927SAndroid Build Coastguard Worker 
160*61046927SAndroid Build Coastguard Worker    if (binding_layout->type == VK_DESCRIPTOR_TYPE_INLINE_UNIFORM_BLOCK)
161*61046927SAndroid Build Coastguard Worker       return;
162*61046927SAndroid Build Coastguard Worker 
163*61046927SAndroid Build Coastguard Worker    shader->active_desc_sets |= 1u << set;
164*61046927SAndroid Build Coastguard Worker 
165*61046927SAndroid Build Coastguard Worker    switch (binding_layout->type) {
166*61046927SAndroid Build Coastguard Worker    case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC:
167*61046927SAndroid Build Coastguard Worker    case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC: {
168*61046927SAndroid Build Coastguard Worker       int offset = 0;
169*61046927SAndroid Build Coastguard Worker       for (unsigned i = 0; i < set; i++) {
170*61046927SAndroid Build Coastguard Worker          if (shader->dynamic_descriptor_sizes[i] >= 0) {
171*61046927SAndroid Build Coastguard Worker             offset += shader->dynamic_descriptor_sizes[i];
172*61046927SAndroid Build Coastguard Worker          } else {
173*61046927SAndroid Build Coastguard Worker             offset = -1;
174*61046927SAndroid Build Coastguard Worker             break;
175*61046927SAndroid Build Coastguard Worker          }
176*61046927SAndroid Build Coastguard Worker       }
177*61046927SAndroid Build Coastguard Worker 
178*61046927SAndroid Build Coastguard Worker       if (offset < 0) {
179*61046927SAndroid Build Coastguard Worker          /* With independent sets, we don't know
180*61046927SAndroid Build Coastguard Worker           * layout->set[set].dynamic_offset_start until after link time which
181*61046927SAndroid Build Coastguard Worker           * with fast linking means after the shader is compiled. We have to
182*61046927SAndroid Build Coastguard Worker           * get it from the const file instead.
183*61046927SAndroid Build Coastguard Worker           */
184*61046927SAndroid Build Coastguard Worker          base = nir_imm_int(b, binding_layout->dynamic_offset_offset / (4 * A6XX_TEX_CONST_DWORDS));
185*61046927SAndroid Build Coastguard Worker          nir_def *dynamic_offset_start;
186*61046927SAndroid Build Coastguard Worker          if (compiler->load_shader_consts_via_preamble) {
187*61046927SAndroid Build Coastguard Worker             dynamic_offset_start =
188*61046927SAndroid Build Coastguard Worker                ir3_load_driver_ubo(b, 1, &shader->const_state.dynamic_offsets_ubo, set);
189*61046927SAndroid Build Coastguard Worker          } else {
190*61046927SAndroid Build Coastguard Worker             dynamic_offset_start = nir_load_const_ir3(
191*61046927SAndroid Build Coastguard Worker                b, 1, 32, nir_imm_int(b, 0),
192*61046927SAndroid Build Coastguard Worker                .base = shader->const_state.dynamic_offset_loc + set);
193*61046927SAndroid Build Coastguard Worker          }
194*61046927SAndroid Build Coastguard Worker          base = nir_iadd(b, base, dynamic_offset_start);
195*61046927SAndroid Build Coastguard Worker       } else {
196*61046927SAndroid Build Coastguard Worker          base = nir_imm_int(b, (offset +
197*61046927SAndroid Build Coastguard Worker             binding_layout->dynamic_offset_offset) / (4 * A6XX_TEX_CONST_DWORDS));
198*61046927SAndroid Build Coastguard Worker       }
199*61046927SAndroid Build Coastguard Worker       assert(dev->physical_device->reserved_set_idx >= 0);
200*61046927SAndroid Build Coastguard Worker       set = dev->physical_device->reserved_set_idx;
201*61046927SAndroid Build Coastguard Worker       break;
202*61046927SAndroid Build Coastguard Worker    }
203*61046927SAndroid Build Coastguard Worker    default:
204*61046927SAndroid Build Coastguard Worker       base = nir_imm_int(b, binding_layout->offset / (4 * A6XX_TEX_CONST_DWORDS));
205*61046927SAndroid Build Coastguard Worker       break;
206*61046927SAndroid Build Coastguard Worker    }
207*61046927SAndroid Build Coastguard Worker 
208*61046927SAndroid Build Coastguard Worker    unsigned stride = binding_layout->size / (4 * A6XX_TEX_CONST_DWORDS);
209*61046927SAndroid Build Coastguard Worker    assert(util_is_power_of_two_nonzero(stride));
210*61046927SAndroid Build Coastguard Worker    nir_def *shift = nir_imm_int(b, util_logbase2(stride));
211*61046927SAndroid Build Coastguard Worker 
212*61046927SAndroid Build Coastguard Worker    nir_def *def = nir_vec3(b, nir_imm_int(b, set),
213*61046927SAndroid Build Coastguard Worker                                nir_iadd(b, base,
214*61046927SAndroid Build Coastguard Worker                                         nir_ishl(b, vulkan_idx, shift)),
215*61046927SAndroid Build Coastguard Worker                                shift);
216*61046927SAndroid Build Coastguard Worker 
217*61046927SAndroid Build Coastguard Worker    nir_def_replace(&instr->def, def);
218*61046927SAndroid Build Coastguard Worker }
219*61046927SAndroid Build Coastguard Worker 
220*61046927SAndroid Build Coastguard Worker static void
lower_vulkan_resource_reindex(nir_builder * b,nir_intrinsic_instr * instr)221*61046927SAndroid Build Coastguard Worker lower_vulkan_resource_reindex(nir_builder *b, nir_intrinsic_instr *instr)
222*61046927SAndroid Build Coastguard Worker {
223*61046927SAndroid Build Coastguard Worker    nir_def *old_index = instr->src[0].ssa;
224*61046927SAndroid Build Coastguard Worker    nir_def *delta = instr->src[1].ssa;
225*61046927SAndroid Build Coastguard Worker    nir_def *shift = nir_channel(b, old_index, 2);
226*61046927SAndroid Build Coastguard Worker 
227*61046927SAndroid Build Coastguard Worker    nir_def *new_index =
228*61046927SAndroid Build Coastguard Worker       nir_vec3(b, nir_channel(b, old_index, 0),
229*61046927SAndroid Build Coastguard Worker                nir_iadd(b, nir_channel(b, old_index, 1),
230*61046927SAndroid Build Coastguard Worker                         nir_ishl(b, delta, shift)),
231*61046927SAndroid Build Coastguard Worker                shift);
232*61046927SAndroid Build Coastguard Worker 
233*61046927SAndroid Build Coastguard Worker    nir_def_replace(&instr->def, new_index);
234*61046927SAndroid Build Coastguard Worker }
235*61046927SAndroid Build Coastguard Worker 
236*61046927SAndroid Build Coastguard Worker static void
lower_load_vulkan_descriptor(nir_builder * b,nir_intrinsic_instr * intrin)237*61046927SAndroid Build Coastguard Worker lower_load_vulkan_descriptor(nir_builder *b, nir_intrinsic_instr *intrin)
238*61046927SAndroid Build Coastguard Worker {
239*61046927SAndroid Build Coastguard Worker    nir_def *old_index = intrin->src[0].ssa;
240*61046927SAndroid Build Coastguard Worker    /* Loading the descriptor happens as part of the load/store instruction so
241*61046927SAndroid Build Coastguard Worker     * this is a no-op. We just need to turn the shift into an offset of 0.
242*61046927SAndroid Build Coastguard Worker     */
243*61046927SAndroid Build Coastguard Worker    nir_def *new_index =
244*61046927SAndroid Build Coastguard Worker       nir_vec3(b, nir_channel(b, old_index, 0),
245*61046927SAndroid Build Coastguard Worker                nir_channel(b, old_index, 1),
246*61046927SAndroid Build Coastguard Worker                nir_imm_int(b, 0));
247*61046927SAndroid Build Coastguard Worker    nir_def_replace(&intrin->def, new_index);
248*61046927SAndroid Build Coastguard Worker }
249*61046927SAndroid Build Coastguard Worker 
250*61046927SAndroid Build Coastguard Worker static bool
lower_ssbo_ubo_intrinsic(struct tu_device * dev,nir_builder * b,nir_intrinsic_instr * intrin)251*61046927SAndroid Build Coastguard Worker lower_ssbo_ubo_intrinsic(struct tu_device *dev,
252*61046927SAndroid Build Coastguard Worker                          nir_builder *b, nir_intrinsic_instr *intrin)
253*61046927SAndroid Build Coastguard Worker {
254*61046927SAndroid Build Coastguard Worker    const nir_intrinsic_info *info = &nir_intrinsic_infos[intrin->intrinsic];
255*61046927SAndroid Build Coastguard Worker 
256*61046927SAndroid Build Coastguard Worker    /* The bindless base is part of the instruction, which means that part of
257*61046927SAndroid Build Coastguard Worker     * the "pointer" has to be constant. We solve this in the same way the blob
258*61046927SAndroid Build Coastguard Worker     * does, by generating a bunch of if-statements. In the usual case where
259*61046927SAndroid Build Coastguard Worker     * the descriptor set is constant we can skip that, though).
260*61046927SAndroid Build Coastguard Worker     */
261*61046927SAndroid Build Coastguard Worker 
262*61046927SAndroid Build Coastguard Worker    unsigned buffer_src;
263*61046927SAndroid Build Coastguard Worker    if (intrin->intrinsic == nir_intrinsic_store_ssbo) {
264*61046927SAndroid Build Coastguard Worker       /* This has the value first */
265*61046927SAndroid Build Coastguard Worker       buffer_src = 1;
266*61046927SAndroid Build Coastguard Worker    } else {
267*61046927SAndroid Build Coastguard Worker       buffer_src = 0;
268*61046927SAndroid Build Coastguard Worker    }
269*61046927SAndroid Build Coastguard Worker 
270*61046927SAndroid Build Coastguard Worker    /* Don't lower non-bindless UBO loads of driver params */
271*61046927SAndroid Build Coastguard Worker    if (intrin->src[buffer_src].ssa->num_components == 1)
272*61046927SAndroid Build Coastguard Worker       return false;
273*61046927SAndroid Build Coastguard Worker 
274*61046927SAndroid Build Coastguard Worker    nir_scalar scalar_idx = nir_scalar_resolved(intrin->src[buffer_src].ssa, 0);
275*61046927SAndroid Build Coastguard Worker    nir_def *descriptor_idx = nir_channel(b, intrin->src[buffer_src].ssa, 1);
276*61046927SAndroid Build Coastguard Worker 
277*61046927SAndroid Build Coastguard Worker    if (intrin->intrinsic == nir_intrinsic_load_ubo &&
278*61046927SAndroid Build Coastguard Worker        dev->instance->allow_oob_indirect_ubo_loads) {
279*61046927SAndroid Build Coastguard Worker       nir_scalar offset = nir_scalar_resolved(intrin->src[1].ssa, 0);
280*61046927SAndroid Build Coastguard Worker       if (!nir_scalar_is_const(offset)) {
281*61046927SAndroid Build Coastguard Worker          nir_intrinsic_set_range(intrin, ~0);
282*61046927SAndroid Build Coastguard Worker       }
283*61046927SAndroid Build Coastguard Worker    }
284*61046927SAndroid Build Coastguard Worker 
285*61046927SAndroid Build Coastguard Worker    /* Descriptor index has to be adjusted in the following cases:
286*61046927SAndroid Build Coastguard Worker     *  - isam loads, when the 16-bit descriptor cannot also be used for 32-bit
287*61046927SAndroid Build Coastguard Worker     *    loads -- next-index descriptor will be able to do that;
288*61046927SAndroid Build Coastguard Worker     *  - 8-bit SSBO loads and stores -- next-index descriptor is dedicated to
289*61046927SAndroid Build Coastguard Worker     *    storage accesses of that size.
290*61046927SAndroid Build Coastguard Worker     */
291*61046927SAndroid Build Coastguard Worker    if ((dev->physical_device->info->a6xx.storage_16bit &&
292*61046927SAndroid Build Coastguard Worker         !dev->physical_device->info->a6xx.has_isam_v &&
293*61046927SAndroid Build Coastguard Worker         intrin->intrinsic == nir_intrinsic_load_ssbo &&
294*61046927SAndroid Build Coastguard Worker         (nir_intrinsic_access(intrin) & ACCESS_CAN_REORDER) &&
295*61046927SAndroid Build Coastguard Worker         intrin->def.bit_size > 16) ||
296*61046927SAndroid Build Coastguard Worker        (dev->physical_device->info->a7xx.storage_8bit &&
297*61046927SAndroid Build Coastguard Worker         ((intrin->intrinsic == nir_intrinsic_load_ssbo && intrin->def.bit_size == 8) ||
298*61046927SAndroid Build Coastguard Worker          (intrin->intrinsic == nir_intrinsic_store_ssbo && intrin->src[0].ssa->bit_size == 8)))) {
299*61046927SAndroid Build Coastguard Worker       descriptor_idx = nir_iadd_imm(b, descriptor_idx, 1);
300*61046927SAndroid Build Coastguard Worker    }
301*61046927SAndroid Build Coastguard Worker 
302*61046927SAndroid Build Coastguard Worker    nir_def *results[MAX_SETS] = { NULL };
303*61046927SAndroid Build Coastguard Worker 
304*61046927SAndroid Build Coastguard Worker    if (nir_scalar_is_const(scalar_idx)) {
305*61046927SAndroid Build Coastguard Worker       nir_def *bindless =
306*61046927SAndroid Build Coastguard Worker          nir_bindless_resource_ir3(b, 32, descriptor_idx, .desc_set = nir_scalar_as_uint(scalar_idx));
307*61046927SAndroid Build Coastguard Worker       nir_src_rewrite(&intrin->src[buffer_src], bindless);
308*61046927SAndroid Build Coastguard Worker       return true;
309*61046927SAndroid Build Coastguard Worker    }
310*61046927SAndroid Build Coastguard Worker 
311*61046927SAndroid Build Coastguard Worker    nir_def *base_idx = nir_channel(b, scalar_idx.def, scalar_idx.comp);
312*61046927SAndroid Build Coastguard Worker    for (unsigned i = 0; i < dev->physical_device->info->a6xx.max_sets; i++) {
313*61046927SAndroid Build Coastguard Worker       /* if (base_idx == i) { ... */
314*61046927SAndroid Build Coastguard Worker       nir_if *nif = nir_push_if(b, nir_ieq_imm(b, base_idx, i));
315*61046927SAndroid Build Coastguard Worker 
316*61046927SAndroid Build Coastguard Worker       nir_def *bindless =
317*61046927SAndroid Build Coastguard Worker          nir_bindless_resource_ir3(b, 32, descriptor_idx, .desc_set = i);
318*61046927SAndroid Build Coastguard Worker 
319*61046927SAndroid Build Coastguard Worker       nir_intrinsic_instr *copy =
320*61046927SAndroid Build Coastguard Worker          nir_intrinsic_instr_create(b->shader, intrin->intrinsic);
321*61046927SAndroid Build Coastguard Worker 
322*61046927SAndroid Build Coastguard Worker       copy->num_components = intrin->num_components;
323*61046927SAndroid Build Coastguard Worker 
324*61046927SAndroid Build Coastguard Worker       for (unsigned src = 0; src < info->num_srcs; src++) {
325*61046927SAndroid Build Coastguard Worker          if (src == buffer_src)
326*61046927SAndroid Build Coastguard Worker             copy->src[src] = nir_src_for_ssa(bindless);
327*61046927SAndroid Build Coastguard Worker          else
328*61046927SAndroid Build Coastguard Worker             copy->src[src] = nir_src_for_ssa(intrin->src[src].ssa);
329*61046927SAndroid Build Coastguard Worker       }
330*61046927SAndroid Build Coastguard Worker 
331*61046927SAndroid Build Coastguard Worker       for (unsigned idx = 0; idx < info->num_indices; idx++) {
332*61046927SAndroid Build Coastguard Worker          copy->const_index[idx] = intrin->const_index[idx];
333*61046927SAndroid Build Coastguard Worker       }
334*61046927SAndroid Build Coastguard Worker 
335*61046927SAndroid Build Coastguard Worker       if (info->has_dest) {
336*61046927SAndroid Build Coastguard Worker          nir_def_init(&copy->instr, &copy->def,
337*61046927SAndroid Build Coastguard Worker                       intrin->def.num_components,
338*61046927SAndroid Build Coastguard Worker                       intrin->def.bit_size);
339*61046927SAndroid Build Coastguard Worker          results[i] = &copy->def;
340*61046927SAndroid Build Coastguard Worker       }
341*61046927SAndroid Build Coastguard Worker 
342*61046927SAndroid Build Coastguard Worker       nir_builder_instr_insert(b, &copy->instr);
343*61046927SAndroid Build Coastguard Worker 
344*61046927SAndroid Build Coastguard Worker       /* } else { ... */
345*61046927SAndroid Build Coastguard Worker       nir_push_else(b, nif);
346*61046927SAndroid Build Coastguard Worker    }
347*61046927SAndroid Build Coastguard Worker 
348*61046927SAndroid Build Coastguard Worker    nir_def *result =
349*61046927SAndroid Build Coastguard Worker       nir_undef(b, intrin->def.num_components, intrin->def.bit_size);
350*61046927SAndroid Build Coastguard Worker    for (int i = dev->physical_device->info->a6xx.max_sets - 1; i >= 0; i--) {
351*61046927SAndroid Build Coastguard Worker       nir_pop_if(b, NULL);
352*61046927SAndroid Build Coastguard Worker       if (info->has_dest)
353*61046927SAndroid Build Coastguard Worker          result = nir_if_phi(b, results[i], result);
354*61046927SAndroid Build Coastguard Worker    }
355*61046927SAndroid Build Coastguard Worker 
356*61046927SAndroid Build Coastguard Worker    if (info->has_dest)
357*61046927SAndroid Build Coastguard Worker       nir_def_rewrite_uses(&intrin->def, result);
358*61046927SAndroid Build Coastguard Worker    nir_instr_remove(&intrin->instr);
359*61046927SAndroid Build Coastguard Worker    return true;
360*61046927SAndroid Build Coastguard Worker }
361*61046927SAndroid Build Coastguard Worker 
362*61046927SAndroid Build Coastguard Worker static nir_def *
build_bindless(struct tu_device * dev,nir_builder * b,nir_deref_instr * deref,bool is_sampler,struct tu_shader * shader,const struct tu_pipeline_layout * layout)363*61046927SAndroid Build Coastguard Worker build_bindless(struct tu_device *dev, nir_builder *b,
364*61046927SAndroid Build Coastguard Worker                nir_deref_instr *deref, bool is_sampler,
365*61046927SAndroid Build Coastguard Worker                struct tu_shader *shader,
366*61046927SAndroid Build Coastguard Worker                const struct tu_pipeline_layout *layout)
367*61046927SAndroid Build Coastguard Worker {
368*61046927SAndroid Build Coastguard Worker    nir_variable *var = nir_deref_instr_get_variable(deref);
369*61046927SAndroid Build Coastguard Worker 
370*61046927SAndroid Build Coastguard Worker    unsigned set = var->data.descriptor_set;
371*61046927SAndroid Build Coastguard Worker    unsigned binding = var->data.binding;
372*61046927SAndroid Build Coastguard Worker    const struct tu_descriptor_set_binding_layout *bind_layout =
373*61046927SAndroid Build Coastguard Worker       &layout->set[set].layout->binding[binding];
374*61046927SAndroid Build Coastguard Worker 
375*61046927SAndroid Build Coastguard Worker    /* input attachments use non bindless workaround */
376*61046927SAndroid Build Coastguard Worker    if (bind_layout->type == VK_DESCRIPTOR_TYPE_INPUT_ATTACHMENT &&
377*61046927SAndroid Build Coastguard Worker        !TU_DEBUG(DYNAMIC)) {
378*61046927SAndroid Build Coastguard Worker       const struct glsl_type *glsl_type = glsl_without_array(var->type);
379*61046927SAndroid Build Coastguard Worker       uint32_t idx = var->data.index * 2;
380*61046927SAndroid Build Coastguard Worker 
381*61046927SAndroid Build Coastguard Worker       BITSET_SET_RANGE_INSIDE_WORD(b->shader->info.textures_used, idx, (idx + bind_layout->array_size * 2) - 1);
382*61046927SAndroid Build Coastguard Worker 
383*61046927SAndroid Build Coastguard Worker       /* D24S8 workaround: stencil of D24S8 will be sampled as uint */
384*61046927SAndroid Build Coastguard Worker       if (glsl_get_sampler_result_type(glsl_type) == GLSL_TYPE_UINT)
385*61046927SAndroid Build Coastguard Worker          idx += 1;
386*61046927SAndroid Build Coastguard Worker 
387*61046927SAndroid Build Coastguard Worker       if (deref->deref_type == nir_deref_type_var)
388*61046927SAndroid Build Coastguard Worker          return nir_imm_int(b, idx);
389*61046927SAndroid Build Coastguard Worker 
390*61046927SAndroid Build Coastguard Worker       nir_def *arr_index = deref->arr.index.ssa;
391*61046927SAndroid Build Coastguard Worker       return nir_iadd_imm(b, nir_imul_imm(b, arr_index, 2), idx);
392*61046927SAndroid Build Coastguard Worker    }
393*61046927SAndroid Build Coastguard Worker 
394*61046927SAndroid Build Coastguard Worker    shader->active_desc_sets |= 1u << set;
395*61046927SAndroid Build Coastguard Worker 
396*61046927SAndroid Build Coastguard Worker    nir_def *desc_offset;
397*61046927SAndroid Build Coastguard Worker    unsigned descriptor_stride;
398*61046927SAndroid Build Coastguard Worker    unsigned offset = 0;
399*61046927SAndroid Build Coastguard Worker    /* Samplers come second in combined image/sampler descriptors, see
400*61046927SAndroid Build Coastguard Worker       * write_combined_image_sampler_descriptor().
401*61046927SAndroid Build Coastguard Worker       */
402*61046927SAndroid Build Coastguard Worker    if (is_sampler && bind_layout->type ==
403*61046927SAndroid Build Coastguard Worker          VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER) {
404*61046927SAndroid Build Coastguard Worker       offset = 1;
405*61046927SAndroid Build Coastguard Worker    }
406*61046927SAndroid Build Coastguard Worker    desc_offset =
407*61046927SAndroid Build Coastguard Worker       nir_imm_int(b, (bind_layout->offset / (4 * A6XX_TEX_CONST_DWORDS)) +
408*61046927SAndroid Build Coastguard Worker                   offset);
409*61046927SAndroid Build Coastguard Worker    descriptor_stride = bind_layout->size / (4 * A6XX_TEX_CONST_DWORDS);
410*61046927SAndroid Build Coastguard Worker 
411*61046927SAndroid Build Coastguard Worker    if (deref->deref_type != nir_deref_type_var) {
412*61046927SAndroid Build Coastguard Worker       assert(deref->deref_type == nir_deref_type_array);
413*61046927SAndroid Build Coastguard Worker 
414*61046927SAndroid Build Coastguard Worker       nir_def *arr_index = deref->arr.index.ssa;
415*61046927SAndroid Build Coastguard Worker       desc_offset = nir_iadd(b, desc_offset,
416*61046927SAndroid Build Coastguard Worker                              nir_imul_imm(b, arr_index, descriptor_stride));
417*61046927SAndroid Build Coastguard Worker    }
418*61046927SAndroid Build Coastguard Worker 
419*61046927SAndroid Build Coastguard Worker    return nir_bindless_resource_ir3(b, 32, desc_offset, .desc_set = set);
420*61046927SAndroid Build Coastguard Worker }
421*61046927SAndroid Build Coastguard Worker 
422*61046927SAndroid Build Coastguard Worker static void
lower_image_deref(struct tu_device * dev,nir_builder * b,nir_intrinsic_instr * instr,struct tu_shader * shader,const struct tu_pipeline_layout * layout)423*61046927SAndroid Build Coastguard Worker lower_image_deref(struct tu_device *dev, nir_builder *b,
424*61046927SAndroid Build Coastguard Worker                   nir_intrinsic_instr *instr, struct tu_shader *shader,
425*61046927SAndroid Build Coastguard Worker                   const struct tu_pipeline_layout *layout)
426*61046927SAndroid Build Coastguard Worker {
427*61046927SAndroid Build Coastguard Worker    nir_deref_instr *deref = nir_src_as_deref(instr->src[0]);
428*61046927SAndroid Build Coastguard Worker    nir_def *bindless = build_bindless(dev, b, deref, false, shader, layout);
429*61046927SAndroid Build Coastguard Worker    nir_rewrite_image_intrinsic(instr, bindless, true);
430*61046927SAndroid Build Coastguard Worker }
431*61046927SAndroid Build Coastguard Worker 
432*61046927SAndroid Build Coastguard Worker static bool
lower_intrinsic(nir_builder * b,nir_intrinsic_instr * instr,struct tu_device * dev,struct tu_shader * shader,const struct tu_pipeline_layout * layout)433*61046927SAndroid Build Coastguard Worker lower_intrinsic(nir_builder *b, nir_intrinsic_instr *instr,
434*61046927SAndroid Build Coastguard Worker                 struct tu_device *dev,
435*61046927SAndroid Build Coastguard Worker                 struct tu_shader *shader,
436*61046927SAndroid Build Coastguard Worker                 const struct tu_pipeline_layout *layout)
437*61046927SAndroid Build Coastguard Worker {
438*61046927SAndroid Build Coastguard Worker    switch (instr->intrinsic) {
439*61046927SAndroid Build Coastguard Worker    case nir_intrinsic_load_push_constant:
440*61046927SAndroid Build Coastguard Worker       lower_load_push_constant(dev, b, instr, shader, layout);
441*61046927SAndroid Build Coastguard Worker       return true;
442*61046927SAndroid Build Coastguard Worker 
443*61046927SAndroid Build Coastguard Worker    case nir_intrinsic_load_vulkan_descriptor:
444*61046927SAndroid Build Coastguard Worker       lower_load_vulkan_descriptor(b, instr);
445*61046927SAndroid Build Coastguard Worker       return true;
446*61046927SAndroid Build Coastguard Worker 
447*61046927SAndroid Build Coastguard Worker    case nir_intrinsic_vulkan_resource_index:
448*61046927SAndroid Build Coastguard Worker       lower_vulkan_resource_index(dev, b, instr, shader, layout);
449*61046927SAndroid Build Coastguard Worker       return true;
450*61046927SAndroid Build Coastguard Worker    case nir_intrinsic_vulkan_resource_reindex:
451*61046927SAndroid Build Coastguard Worker       lower_vulkan_resource_reindex(b, instr);
452*61046927SAndroid Build Coastguard Worker       return true;
453*61046927SAndroid Build Coastguard Worker 
454*61046927SAndroid Build Coastguard Worker    case nir_intrinsic_load_ubo:
455*61046927SAndroid Build Coastguard Worker    case nir_intrinsic_load_ssbo:
456*61046927SAndroid Build Coastguard Worker    case nir_intrinsic_store_ssbo:
457*61046927SAndroid Build Coastguard Worker    case nir_intrinsic_ssbo_atomic:
458*61046927SAndroid Build Coastguard Worker    case nir_intrinsic_ssbo_atomic_swap:
459*61046927SAndroid Build Coastguard Worker    case nir_intrinsic_get_ssbo_size:
460*61046927SAndroid Build Coastguard Worker       return lower_ssbo_ubo_intrinsic(dev, b, instr);
461*61046927SAndroid Build Coastguard Worker 
462*61046927SAndroid Build Coastguard Worker    case nir_intrinsic_image_deref_load:
463*61046927SAndroid Build Coastguard Worker    case nir_intrinsic_image_deref_store:
464*61046927SAndroid Build Coastguard Worker    case nir_intrinsic_image_deref_atomic:
465*61046927SAndroid Build Coastguard Worker    case nir_intrinsic_image_deref_atomic_swap:
466*61046927SAndroid Build Coastguard Worker    case nir_intrinsic_image_deref_size:
467*61046927SAndroid Build Coastguard Worker    case nir_intrinsic_image_deref_samples:
468*61046927SAndroid Build Coastguard Worker       lower_image_deref(dev, b, instr, shader, layout);
469*61046927SAndroid Build Coastguard Worker       return true;
470*61046927SAndroid Build Coastguard Worker 
471*61046927SAndroid Build Coastguard Worker    case nir_intrinsic_load_frag_size_ir3:
472*61046927SAndroid Build Coastguard Worker    case nir_intrinsic_load_frag_offset_ir3: {
473*61046927SAndroid Build Coastguard Worker       if (!dev->compiler->load_shader_consts_via_preamble)
474*61046927SAndroid Build Coastguard Worker          return false;
475*61046927SAndroid Build Coastguard Worker 
476*61046927SAndroid Build Coastguard Worker       enum ir3_driver_param param =
477*61046927SAndroid Build Coastguard Worker          instr->intrinsic == nir_intrinsic_load_frag_size_ir3 ?
478*61046927SAndroid Build Coastguard Worker          IR3_DP_FS_FRAG_SIZE : IR3_DP_FS_FRAG_OFFSET;
479*61046927SAndroid Build Coastguard Worker 
480*61046927SAndroid Build Coastguard Worker       unsigned offset = param - IR3_DP_FS_DYNAMIC;
481*61046927SAndroid Build Coastguard Worker 
482*61046927SAndroid Build Coastguard Worker       nir_def *view = instr->src[0].ssa;
483*61046927SAndroid Build Coastguard Worker       nir_def *result =
484*61046927SAndroid Build Coastguard Worker          ir3_load_driver_ubo_indirect(b, 2, &shader->const_state.fdm_ubo,
485*61046927SAndroid Build Coastguard Worker                                       offset, view, nir_intrinsic_range(instr));
486*61046927SAndroid Build Coastguard Worker 
487*61046927SAndroid Build Coastguard Worker       nir_def_replace(&instr->def, result);
488*61046927SAndroid Build Coastguard Worker       return true;
489*61046927SAndroid Build Coastguard Worker    }
490*61046927SAndroid Build Coastguard Worker    case nir_intrinsic_load_frag_invocation_count: {
491*61046927SAndroid Build Coastguard Worker       if (!dev->compiler->load_shader_consts_via_preamble)
492*61046927SAndroid Build Coastguard Worker          return false;
493*61046927SAndroid Build Coastguard Worker 
494*61046927SAndroid Build Coastguard Worker       nir_def *result =
495*61046927SAndroid Build Coastguard Worker          ir3_load_driver_ubo(b, 1, &shader->const_state.fdm_ubo,
496*61046927SAndroid Build Coastguard Worker                              IR3_DP_FS_FRAG_INVOCATION_COUNT -
497*61046927SAndroid Build Coastguard Worker                              IR3_DP_FS_DYNAMIC);
498*61046927SAndroid Build Coastguard Worker 
499*61046927SAndroid Build Coastguard Worker       nir_def_replace(&instr->def, result);
500*61046927SAndroid Build Coastguard Worker       return true;
501*61046927SAndroid Build Coastguard Worker    }
502*61046927SAndroid Build Coastguard Worker 
503*61046927SAndroid Build Coastguard Worker    default:
504*61046927SAndroid Build Coastguard Worker       return false;
505*61046927SAndroid Build Coastguard Worker    }
506*61046927SAndroid Build Coastguard Worker }
507*61046927SAndroid Build Coastguard Worker 
508*61046927SAndroid Build Coastguard Worker static void
lower_tex_ycbcr(const struct tu_pipeline_layout * layout,nir_builder * builder,nir_tex_instr * tex)509*61046927SAndroid Build Coastguard Worker lower_tex_ycbcr(const struct tu_pipeline_layout *layout,
510*61046927SAndroid Build Coastguard Worker                 nir_builder *builder,
511*61046927SAndroid Build Coastguard Worker                 nir_tex_instr *tex)
512*61046927SAndroid Build Coastguard Worker {
513*61046927SAndroid Build Coastguard Worker    int deref_src_idx = nir_tex_instr_src_index(tex, nir_tex_src_texture_deref);
514*61046927SAndroid Build Coastguard Worker    assert(deref_src_idx >= 0);
515*61046927SAndroid Build Coastguard Worker    nir_deref_instr *deref = nir_src_as_deref(tex->src[deref_src_idx].src);
516*61046927SAndroid Build Coastguard Worker 
517*61046927SAndroid Build Coastguard Worker    nir_variable *var = nir_deref_instr_get_variable(deref);
518*61046927SAndroid Build Coastguard Worker    const struct tu_descriptor_set_layout *set_layout =
519*61046927SAndroid Build Coastguard Worker       layout->set[var->data.descriptor_set].layout;
520*61046927SAndroid Build Coastguard Worker    const struct tu_descriptor_set_binding_layout *binding =
521*61046927SAndroid Build Coastguard Worker       &set_layout->binding[var->data.binding];
522*61046927SAndroid Build Coastguard Worker    const struct vk_ycbcr_conversion_state *ycbcr_samplers =
523*61046927SAndroid Build Coastguard Worker       tu_immutable_ycbcr_samplers(set_layout, binding);
524*61046927SAndroid Build Coastguard Worker 
525*61046927SAndroid Build Coastguard Worker    if (!ycbcr_samplers)
526*61046927SAndroid Build Coastguard Worker       return;
527*61046927SAndroid Build Coastguard Worker 
528*61046927SAndroid Build Coastguard Worker    /* For the following instructions, we don't apply any change */
529*61046927SAndroid Build Coastguard Worker    if (tex->op == nir_texop_txs ||
530*61046927SAndroid Build Coastguard Worker        tex->op == nir_texop_query_levels ||
531*61046927SAndroid Build Coastguard Worker        tex->op == nir_texop_lod)
532*61046927SAndroid Build Coastguard Worker       return;
533*61046927SAndroid Build Coastguard Worker 
534*61046927SAndroid Build Coastguard Worker    assert(tex->texture_index == 0);
535*61046927SAndroid Build Coastguard Worker    unsigned array_index = 0;
536*61046927SAndroid Build Coastguard Worker    if (deref->deref_type != nir_deref_type_var) {
537*61046927SAndroid Build Coastguard Worker       assert(deref->deref_type == nir_deref_type_array);
538*61046927SAndroid Build Coastguard Worker       if (!nir_src_is_const(deref->arr.index))
539*61046927SAndroid Build Coastguard Worker          return;
540*61046927SAndroid Build Coastguard Worker       array_index = nir_src_as_uint(deref->arr.index);
541*61046927SAndroid Build Coastguard Worker       array_index = MIN2(array_index, binding->array_size - 1);
542*61046927SAndroid Build Coastguard Worker    }
543*61046927SAndroid Build Coastguard Worker    const struct vk_ycbcr_conversion_state *ycbcr_sampler = ycbcr_samplers + array_index;
544*61046927SAndroid Build Coastguard Worker 
545*61046927SAndroid Build Coastguard Worker    if (ycbcr_sampler->ycbcr_model == VK_SAMPLER_YCBCR_MODEL_CONVERSION_RGB_IDENTITY)
546*61046927SAndroid Build Coastguard Worker       return;
547*61046927SAndroid Build Coastguard Worker 
548*61046927SAndroid Build Coastguard Worker    /* Skip if not actually a YCbCr format.  CtsGraphics, for example, tries to create
549*61046927SAndroid Build Coastguard Worker     * YcbcrConversions for RGB formats.
550*61046927SAndroid Build Coastguard Worker     */
551*61046927SAndroid Build Coastguard Worker    if (!vk_format_get_ycbcr_info(ycbcr_sampler->format))
552*61046927SAndroid Build Coastguard Worker       return;
553*61046927SAndroid Build Coastguard Worker 
554*61046927SAndroid Build Coastguard Worker    builder->cursor = nir_after_instr(&tex->instr);
555*61046927SAndroid Build Coastguard Worker 
556*61046927SAndroid Build Coastguard Worker    uint8_t bits = vk_format_get_component_bits(ycbcr_sampler->format,
557*61046927SAndroid Build Coastguard Worker                                                UTIL_FORMAT_COLORSPACE_RGB,
558*61046927SAndroid Build Coastguard Worker                                                PIPE_SWIZZLE_X);
559*61046927SAndroid Build Coastguard Worker    uint32_t bpcs[3] = {bits, bits, bits}; /* TODO: use right bpc for each channel ? */
560*61046927SAndroid Build Coastguard Worker    nir_def *result = nir_convert_ycbcr_to_rgb(builder,
561*61046927SAndroid Build Coastguard Worker                                               ycbcr_sampler->ycbcr_model,
562*61046927SAndroid Build Coastguard Worker                                               ycbcr_sampler->ycbcr_range,
563*61046927SAndroid Build Coastguard Worker                                               &tex->def,
564*61046927SAndroid Build Coastguard Worker                                               bpcs);
565*61046927SAndroid Build Coastguard Worker    nir_def_rewrite_uses_after(&tex->def, result,
566*61046927SAndroid Build Coastguard Worker                               result->parent_instr);
567*61046927SAndroid Build Coastguard Worker 
568*61046927SAndroid Build Coastguard Worker    builder->cursor = nir_before_instr(&tex->instr);
569*61046927SAndroid Build Coastguard Worker }
570*61046927SAndroid Build Coastguard Worker 
571*61046927SAndroid Build Coastguard Worker static bool
lower_tex(nir_builder * b,nir_tex_instr * tex,struct tu_device * dev,struct tu_shader * shader,const struct tu_pipeline_layout * layout)572*61046927SAndroid Build Coastguard Worker lower_tex(nir_builder *b, nir_tex_instr *tex, struct tu_device *dev,
573*61046927SAndroid Build Coastguard Worker           struct tu_shader *shader, const struct tu_pipeline_layout *layout)
574*61046927SAndroid Build Coastguard Worker {
575*61046927SAndroid Build Coastguard Worker    lower_tex_ycbcr(layout, b, tex);
576*61046927SAndroid Build Coastguard Worker 
577*61046927SAndroid Build Coastguard Worker    int sampler_src_idx = nir_tex_instr_src_index(tex, nir_tex_src_sampler_deref);
578*61046927SAndroid Build Coastguard Worker    if (sampler_src_idx >= 0) {
579*61046927SAndroid Build Coastguard Worker       nir_deref_instr *deref = nir_src_as_deref(tex->src[sampler_src_idx].src);
580*61046927SAndroid Build Coastguard Worker       nir_def *bindless = build_bindless(dev, b, deref, true, shader, layout);
581*61046927SAndroid Build Coastguard Worker       nir_src_rewrite(&tex->src[sampler_src_idx].src, bindless);
582*61046927SAndroid Build Coastguard Worker       tex->src[sampler_src_idx].src_type = nir_tex_src_sampler_handle;
583*61046927SAndroid Build Coastguard Worker    }
584*61046927SAndroid Build Coastguard Worker 
585*61046927SAndroid Build Coastguard Worker    int tex_src_idx = nir_tex_instr_src_index(tex, nir_tex_src_texture_deref);
586*61046927SAndroid Build Coastguard Worker    if (tex_src_idx >= 0) {
587*61046927SAndroid Build Coastguard Worker       nir_deref_instr *deref = nir_src_as_deref(tex->src[tex_src_idx].src);
588*61046927SAndroid Build Coastguard Worker       nir_def *bindless = build_bindless(dev, b, deref, false, shader, layout);
589*61046927SAndroid Build Coastguard Worker       nir_src_rewrite(&tex->src[tex_src_idx].src, bindless);
590*61046927SAndroid Build Coastguard Worker       tex->src[tex_src_idx].src_type = nir_tex_src_texture_handle;
591*61046927SAndroid Build Coastguard Worker 
592*61046927SAndroid Build Coastguard Worker       /* for the input attachment case: */
593*61046927SAndroid Build Coastguard Worker       if (bindless->parent_instr->type != nir_instr_type_intrinsic)
594*61046927SAndroid Build Coastguard Worker          tex->src[tex_src_idx].src_type = nir_tex_src_texture_offset;
595*61046927SAndroid Build Coastguard Worker    }
596*61046927SAndroid Build Coastguard Worker 
597*61046927SAndroid Build Coastguard Worker    return true;
598*61046927SAndroid Build Coastguard Worker }
599*61046927SAndroid Build Coastguard Worker 
600*61046927SAndroid Build Coastguard Worker struct lower_instr_params {
601*61046927SAndroid Build Coastguard Worker    struct tu_device *dev;
602*61046927SAndroid Build Coastguard Worker    struct tu_shader *shader;
603*61046927SAndroid Build Coastguard Worker    const struct tu_pipeline_layout *layout;
604*61046927SAndroid Build Coastguard Worker };
605*61046927SAndroid Build Coastguard Worker 
606*61046927SAndroid Build Coastguard Worker static bool
lower_instr(nir_builder * b,nir_instr * instr,void * cb_data)607*61046927SAndroid Build Coastguard Worker lower_instr(nir_builder *b, nir_instr *instr, void *cb_data)
608*61046927SAndroid Build Coastguard Worker {
609*61046927SAndroid Build Coastguard Worker    struct lower_instr_params *params = (struct lower_instr_params *) cb_data;
610*61046927SAndroid Build Coastguard Worker    b->cursor = nir_before_instr(instr);
611*61046927SAndroid Build Coastguard Worker    switch (instr->type) {
612*61046927SAndroid Build Coastguard Worker    case nir_instr_type_tex:
613*61046927SAndroid Build Coastguard Worker       return lower_tex(b, nir_instr_as_tex(instr), params->dev, params->shader, params->layout);
614*61046927SAndroid Build Coastguard Worker    case nir_instr_type_intrinsic:
615*61046927SAndroid Build Coastguard Worker       return lower_intrinsic(b, nir_instr_as_intrinsic(instr), params->dev, params->shader, params->layout);
616*61046927SAndroid Build Coastguard Worker    default:
617*61046927SAndroid Build Coastguard Worker       return false;
618*61046927SAndroid Build Coastguard Worker    }
619*61046927SAndroid Build Coastguard Worker }
620*61046927SAndroid Build Coastguard Worker 
621*61046927SAndroid Build Coastguard Worker /* Since we always push inline uniforms into constant memory, lower loads of
622*61046927SAndroid Build Coastguard Worker  * them to load_uniform which turns into constant memory loads.
623*61046927SAndroid Build Coastguard Worker  */
624*61046927SAndroid Build Coastguard Worker static bool
lower_inline_ubo(nir_builder * b,nir_intrinsic_instr * intrin,void * cb_data)625*61046927SAndroid Build Coastguard Worker lower_inline_ubo(nir_builder *b, nir_intrinsic_instr *intrin, void *cb_data)
626*61046927SAndroid Build Coastguard Worker {
627*61046927SAndroid Build Coastguard Worker    if (intrin->intrinsic != nir_intrinsic_load_ubo)
628*61046927SAndroid Build Coastguard Worker       return false;
629*61046927SAndroid Build Coastguard Worker 
630*61046927SAndroid Build Coastguard Worker    struct lower_instr_params *params = (struct lower_instr_params *) cb_data;
631*61046927SAndroid Build Coastguard Worker    struct tu_shader *shader = params->shader;
632*61046927SAndroid Build Coastguard Worker    const struct tu_pipeline_layout *layout = params->layout;
633*61046927SAndroid Build Coastguard Worker 
634*61046927SAndroid Build Coastguard Worker    nir_binding binding = nir_chase_binding(intrin->src[0]);
635*61046927SAndroid Build Coastguard Worker 
636*61046927SAndroid Build Coastguard Worker    if (!binding.success)
637*61046927SAndroid Build Coastguard Worker       return false;
638*61046927SAndroid Build Coastguard Worker 
639*61046927SAndroid Build Coastguard Worker    struct tu_descriptor_set_layout *set_layout = layout->set[binding.desc_set].layout;
640*61046927SAndroid Build Coastguard Worker    struct tu_descriptor_set_binding_layout *binding_layout =
641*61046927SAndroid Build Coastguard Worker       &set_layout->binding[binding.binding];
642*61046927SAndroid Build Coastguard Worker 
643*61046927SAndroid Build Coastguard Worker    if (binding_layout->type != VK_DESCRIPTOR_TYPE_INLINE_UNIFORM_BLOCK)
644*61046927SAndroid Build Coastguard Worker       return false;
645*61046927SAndroid Build Coastguard Worker 
646*61046927SAndroid Build Coastguard Worker    /* lookup the const offset of the inline UBO */
647*61046927SAndroid Build Coastguard Worker    struct tu_const_state *const_state = &shader->const_state;
648*61046927SAndroid Build Coastguard Worker 
649*61046927SAndroid Build Coastguard Worker    unsigned base = UINT_MAX;
650*61046927SAndroid Build Coastguard Worker    unsigned range;
651*61046927SAndroid Build Coastguard Worker    bool use_load = false;
652*61046927SAndroid Build Coastguard Worker    bool use_ldg_k =
653*61046927SAndroid Build Coastguard Worker       params->dev->physical_device->info->a7xx.load_inline_uniforms_via_preamble_ldgk;
654*61046927SAndroid Build Coastguard Worker 
655*61046927SAndroid Build Coastguard Worker    for (unsigned i = 0; i < const_state->num_inline_ubos; i++) {
656*61046927SAndroid Build Coastguard Worker       if (const_state->ubos[i].base == binding.desc_set &&
657*61046927SAndroid Build Coastguard Worker           const_state->ubos[i].offset == binding_layout->offset) {
658*61046927SAndroid Build Coastguard Worker          range = const_state->ubos[i].size_vec4 * 4;
659*61046927SAndroid Build Coastguard Worker          if (use_ldg_k) {
660*61046927SAndroid Build Coastguard Worker             base = i * 2;
661*61046927SAndroid Build Coastguard Worker          } else {
662*61046927SAndroid Build Coastguard Worker             use_load = const_state->ubos[i].push_address;
663*61046927SAndroid Build Coastguard Worker             base = const_state->ubos[i].const_offset_vec4 * 4;
664*61046927SAndroid Build Coastguard Worker          }
665*61046927SAndroid Build Coastguard Worker          break;
666*61046927SAndroid Build Coastguard Worker       }
667*61046927SAndroid Build Coastguard Worker    }
668*61046927SAndroid Build Coastguard Worker 
669*61046927SAndroid Build Coastguard Worker    if (base == UINT_MAX) {
670*61046927SAndroid Build Coastguard Worker       /* Assume we're loading out-of-bounds from a 0-sized inline uniform
671*61046927SAndroid Build Coastguard Worker        * filtered out below.
672*61046927SAndroid Build Coastguard Worker        */
673*61046927SAndroid Build Coastguard Worker       nir_def_rewrite_uses(&intrin->def,
674*61046927SAndroid Build Coastguard Worker                                nir_undef(b, intrin->num_components,
675*61046927SAndroid Build Coastguard Worker                                              intrin->def.bit_size));
676*61046927SAndroid Build Coastguard Worker       return true;
677*61046927SAndroid Build Coastguard Worker    }
678*61046927SAndroid Build Coastguard Worker 
679*61046927SAndroid Build Coastguard Worker    nir_def *offset = intrin->src[1].ssa;
680*61046927SAndroid Build Coastguard Worker 
681*61046927SAndroid Build Coastguard Worker    b->cursor = nir_before_instr(&intrin->instr);
682*61046927SAndroid Build Coastguard Worker    nir_def *val;
683*61046927SAndroid Build Coastguard Worker 
684*61046927SAndroid Build Coastguard Worker    if (use_load || use_ldg_k) {
685*61046927SAndroid Build Coastguard Worker       nir_def *base_addr;
686*61046927SAndroid Build Coastguard Worker       if (use_ldg_k) {
687*61046927SAndroid Build Coastguard Worker          base_addr = ir3_load_driver_ubo(b, 2,
688*61046927SAndroid Build Coastguard Worker                                          &params->shader->const_state.inline_uniforms_ubo,
689*61046927SAndroid Build Coastguard Worker                                          base);
690*61046927SAndroid Build Coastguard Worker       } else {
691*61046927SAndroid Build Coastguard Worker          base_addr =
692*61046927SAndroid Build Coastguard Worker             nir_load_const_ir3(b, 2, 32, nir_imm_int(b, 0), .base = base);
693*61046927SAndroid Build Coastguard Worker       }
694*61046927SAndroid Build Coastguard Worker       val = nir_load_global_ir3(b, intrin->num_components,
695*61046927SAndroid Build Coastguard Worker                                 intrin->def.bit_size,
696*61046927SAndroid Build Coastguard Worker                                 base_addr, nir_ishr_imm(b, offset, 2),
697*61046927SAndroid Build Coastguard Worker                                 .access =
698*61046927SAndroid Build Coastguard Worker                                  (enum gl_access_qualifier)(
699*61046927SAndroid Build Coastguard Worker                                     (enum gl_access_qualifier)(ACCESS_NON_WRITEABLE | ACCESS_CAN_REORDER) |
700*61046927SAndroid Build Coastguard Worker                                     ACCESS_CAN_SPECULATE),
701*61046927SAndroid Build Coastguard Worker                                 .align_mul = 16,
702*61046927SAndroid Build Coastguard Worker                                 .align_offset = 0,
703*61046927SAndroid Build Coastguard Worker                                 .range_base = 0,
704*61046927SAndroid Build Coastguard Worker                                 .range = range);
705*61046927SAndroid Build Coastguard Worker    } else {
706*61046927SAndroid Build Coastguard Worker       val =
707*61046927SAndroid Build Coastguard Worker          nir_load_const_ir3(b, intrin->num_components, intrin->def.bit_size,
708*61046927SAndroid Build Coastguard Worker                             nir_ishr_imm(b, offset, 2), .base = base);
709*61046927SAndroid Build Coastguard Worker    }
710*61046927SAndroid Build Coastguard Worker 
711*61046927SAndroid Build Coastguard Worker    nir_def_replace(&intrin->def, val);
712*61046927SAndroid Build Coastguard Worker    return true;
713*61046927SAndroid Build Coastguard Worker }
714*61046927SAndroid Build Coastguard Worker 
715*61046927SAndroid Build Coastguard Worker /* Figure out the range of push constants that we're actually going to push to
716*61046927SAndroid Build Coastguard Worker  * the shader, and tell the backend to reserve this range when pushing UBO
717*61046927SAndroid Build Coastguard Worker  * constants.
718*61046927SAndroid Build Coastguard Worker  */
719*61046927SAndroid Build Coastguard Worker 
720*61046927SAndroid Build Coastguard Worker static void
gather_push_constants(nir_shader * shader,struct tu_shader * tu_shader)721*61046927SAndroid Build Coastguard Worker gather_push_constants(nir_shader *shader, struct tu_shader *tu_shader)
722*61046927SAndroid Build Coastguard Worker {
723*61046927SAndroid Build Coastguard Worker    uint32_t min = UINT32_MAX, max = 0;
724*61046927SAndroid Build Coastguard Worker    nir_foreach_function_impl(impl, shader) {
725*61046927SAndroid Build Coastguard Worker       nir_foreach_block(block, impl) {
726*61046927SAndroid Build Coastguard Worker          nir_foreach_instr_safe(instr, block) {
727*61046927SAndroid Build Coastguard Worker             if (instr->type != nir_instr_type_intrinsic)
728*61046927SAndroid Build Coastguard Worker                continue;
729*61046927SAndroid Build Coastguard Worker 
730*61046927SAndroid Build Coastguard Worker             nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
731*61046927SAndroid Build Coastguard Worker             if (intrin->intrinsic != nir_intrinsic_load_push_constant)
732*61046927SAndroid Build Coastguard Worker                continue;
733*61046927SAndroid Build Coastguard Worker 
734*61046927SAndroid Build Coastguard Worker             uint32_t base = nir_intrinsic_base(intrin);
735*61046927SAndroid Build Coastguard Worker             uint32_t range = nir_intrinsic_range(intrin);
736*61046927SAndroid Build Coastguard Worker             min = MIN2(min, base);
737*61046927SAndroid Build Coastguard Worker             max = MAX2(max, base + range);
738*61046927SAndroid Build Coastguard Worker             break;
739*61046927SAndroid Build Coastguard Worker          }
740*61046927SAndroid Build Coastguard Worker       }
741*61046927SAndroid Build Coastguard Worker    }
742*61046927SAndroid Build Coastguard Worker 
743*61046927SAndroid Build Coastguard Worker    if (min >= max) {
744*61046927SAndroid Build Coastguard Worker       tu_shader->const_state.push_consts = (struct tu_push_constant_range) {};
745*61046927SAndroid Build Coastguard Worker       return;
746*61046927SAndroid Build Coastguard Worker    }
747*61046927SAndroid Build Coastguard Worker 
748*61046927SAndroid Build Coastguard Worker    /* CP_LOAD_STATE OFFSET and NUM_UNIT for SHARED_CONSTS are in units of
749*61046927SAndroid Build Coastguard Worker     * dwords while loading regular consts is in units of vec4's.
750*61046927SAndroid Build Coastguard Worker     * So we unify the unit here as dwords for tu_push_constant_range, then
751*61046927SAndroid Build Coastguard Worker     * we should consider correct unit when emitting.
752*61046927SAndroid Build Coastguard Worker     *
753*61046927SAndroid Build Coastguard Worker     * Note there's an alignment requirement of 16 dwords on OFFSET. Expand
754*61046927SAndroid Build Coastguard Worker     * the range and change units accordingly.
755*61046927SAndroid Build Coastguard Worker     */
756*61046927SAndroid Build Coastguard Worker    tu_shader->const_state.push_consts.lo = (min / 4) / 4 * 4;
757*61046927SAndroid Build Coastguard Worker    tu_shader->const_state.push_consts.dwords =
758*61046927SAndroid Build Coastguard Worker       align(max, 16) / 4 - tu_shader->const_state.push_consts.lo;
759*61046927SAndroid Build Coastguard Worker }
760*61046927SAndroid Build Coastguard Worker 
761*61046927SAndroid Build Coastguard Worker static bool
shader_uses_push_consts(nir_shader * shader)762*61046927SAndroid Build Coastguard Worker shader_uses_push_consts(nir_shader *shader)
763*61046927SAndroid Build Coastguard Worker {
764*61046927SAndroid Build Coastguard Worker    nir_foreach_function_impl (impl, shader) {
765*61046927SAndroid Build Coastguard Worker       nir_foreach_block (block, impl) {
766*61046927SAndroid Build Coastguard Worker          nir_foreach_instr_safe (instr, block) {
767*61046927SAndroid Build Coastguard Worker             if (instr->type != nir_instr_type_intrinsic)
768*61046927SAndroid Build Coastguard Worker                continue;
769*61046927SAndroid Build Coastguard Worker 
770*61046927SAndroid Build Coastguard Worker             nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
771*61046927SAndroid Build Coastguard Worker             if (intrin->intrinsic == nir_intrinsic_load_push_constant)
772*61046927SAndroid Build Coastguard Worker                return true;
773*61046927SAndroid Build Coastguard Worker          }
774*61046927SAndroid Build Coastguard Worker       }
775*61046927SAndroid Build Coastguard Worker    }
776*61046927SAndroid Build Coastguard Worker    return false;
777*61046927SAndroid Build Coastguard Worker }
778*61046927SAndroid Build Coastguard Worker 
779*61046927SAndroid Build Coastguard Worker static bool
tu_lower_io(nir_shader * shader,struct tu_device * dev,struct tu_shader * tu_shader,const struct tu_pipeline_layout * layout,unsigned * reserved_consts_vec4_out)780*61046927SAndroid Build Coastguard Worker tu_lower_io(nir_shader *shader, struct tu_device *dev,
781*61046927SAndroid Build Coastguard Worker             struct tu_shader *tu_shader,
782*61046927SAndroid Build Coastguard Worker             const struct tu_pipeline_layout *layout,
783*61046927SAndroid Build Coastguard Worker             unsigned *reserved_consts_vec4_out)
784*61046927SAndroid Build Coastguard Worker {
785*61046927SAndroid Build Coastguard Worker    tu_shader->const_state.push_consts = (struct tu_push_constant_range) {
786*61046927SAndroid Build Coastguard Worker       .lo = 0,
787*61046927SAndroid Build Coastguard Worker       .dwords = layout->push_constant_size / 4,
788*61046927SAndroid Build Coastguard Worker       .type = tu_push_consts_type(layout, dev->compiler),
789*61046927SAndroid Build Coastguard Worker    };
790*61046927SAndroid Build Coastguard Worker 
791*61046927SAndroid Build Coastguard Worker    if (tu_shader->const_state.push_consts.type == IR3_PUSH_CONSTS_PER_STAGE) {
792*61046927SAndroid Build Coastguard Worker       gather_push_constants(shader, tu_shader);
793*61046927SAndroid Build Coastguard Worker    } else if (tu_shader->const_state.push_consts.type ==
794*61046927SAndroid Build Coastguard Worker             IR3_PUSH_CONSTS_SHARED_PREAMBLE) {
795*61046927SAndroid Build Coastguard Worker       /* Disable pushing constants for this stage if none were loaded in the
796*61046927SAndroid Build Coastguard Worker        * shader.  If all stages don't load their declared push constants, as
797*61046927SAndroid Build Coastguard Worker        * is often the case under zink, then we could additionally skip
798*61046927SAndroid Build Coastguard Worker        * emitting REG_A7XX_HLSQ_SHARED_CONSTS_IMM entirely.
799*61046927SAndroid Build Coastguard Worker        */
800*61046927SAndroid Build Coastguard Worker       if (!shader_uses_push_consts(shader))
801*61046927SAndroid Build Coastguard Worker          tu_shader->const_state.push_consts = (struct tu_push_constant_range) {};
802*61046927SAndroid Build Coastguard Worker    }
803*61046927SAndroid Build Coastguard Worker 
804*61046927SAndroid Build Coastguard Worker    struct tu_const_state *const_state = &tu_shader->const_state;
805*61046927SAndroid Build Coastguard Worker    unsigned reserved_consts_vec4 =
806*61046927SAndroid Build Coastguard Worker       align(DIV_ROUND_UP(const_state->push_consts.dwords, 4),
807*61046927SAndroid Build Coastguard Worker             dev->compiler->const_upload_unit);
808*61046927SAndroid Build Coastguard Worker 
809*61046927SAndroid Build Coastguard Worker    bool unknown_dynamic_size = false;
810*61046927SAndroid Build Coastguard Worker    bool unknown_dynamic_offset = false;
811*61046927SAndroid Build Coastguard Worker    for (unsigned i = 0; i < layout->num_sets; i++) {
812*61046927SAndroid Build Coastguard Worker       if (tu_shader->dynamic_descriptor_sizes[i] == -1) {
813*61046927SAndroid Build Coastguard Worker          unknown_dynamic_size = true;
814*61046927SAndroid Build Coastguard Worker       } else if (unknown_dynamic_size &&
815*61046927SAndroid Build Coastguard Worker                  tu_shader->dynamic_descriptor_sizes[i] > 0) {
816*61046927SAndroid Build Coastguard Worker          /* If there is an unknown size followed by a known size, then we may
817*61046927SAndroid Build Coastguard Worker           * need to dynamically determine the offset when linking.
818*61046927SAndroid Build Coastguard Worker           */
819*61046927SAndroid Build Coastguard Worker          unknown_dynamic_offset = true;
820*61046927SAndroid Build Coastguard Worker       }
821*61046927SAndroid Build Coastguard Worker    }
822*61046927SAndroid Build Coastguard Worker 
823*61046927SAndroid Build Coastguard Worker    if (unknown_dynamic_offset) {
824*61046927SAndroid Build Coastguard Worker       const_state->dynamic_offset_loc = reserved_consts_vec4 * 4;
825*61046927SAndroid Build Coastguard Worker       assert(dev->physical_device->reserved_set_idx >= 0);
826*61046927SAndroid Build Coastguard Worker       reserved_consts_vec4 += DIV_ROUND_UP(dev->physical_device->reserved_set_idx, 4);
827*61046927SAndroid Build Coastguard Worker    } else {
828*61046927SAndroid Build Coastguard Worker       const_state->dynamic_offset_loc = UINT32_MAX;
829*61046927SAndroid Build Coastguard Worker    }
830*61046927SAndroid Build Coastguard Worker 
831*61046927SAndroid Build Coastguard Worker    /* Reserve space for inline uniforms, so we can always load them from
832*61046927SAndroid Build Coastguard Worker     * constants and not setup a UBO descriptor for them.
833*61046927SAndroid Build Coastguard Worker     */
834*61046927SAndroid Build Coastguard Worker    bool use_ldg_k =
835*61046927SAndroid Build Coastguard Worker       dev->physical_device->info->a7xx.load_inline_uniforms_via_preamble_ldgk;
836*61046927SAndroid Build Coastguard Worker    for (unsigned set = 0; set < layout->num_sets; set++) {
837*61046927SAndroid Build Coastguard Worker       const struct tu_descriptor_set_layout *desc_layout =
838*61046927SAndroid Build Coastguard Worker          layout->set[set].layout;
839*61046927SAndroid Build Coastguard Worker 
840*61046927SAndroid Build Coastguard Worker       if (!desc_layout || !desc_layout->has_inline_uniforms)
841*61046927SAndroid Build Coastguard Worker          continue;
842*61046927SAndroid Build Coastguard Worker 
843*61046927SAndroid Build Coastguard Worker       for (unsigned b = 0; b < desc_layout->binding_count; b++) {
844*61046927SAndroid Build Coastguard Worker          const struct tu_descriptor_set_binding_layout *binding =
845*61046927SAndroid Build Coastguard Worker             &desc_layout->binding[b];
846*61046927SAndroid Build Coastguard Worker 
847*61046927SAndroid Build Coastguard Worker          if (binding->type != VK_DESCRIPTOR_TYPE_INLINE_UNIFORM_BLOCK)
848*61046927SAndroid Build Coastguard Worker             continue;
849*61046927SAndroid Build Coastguard Worker          if (!(binding->shader_stages &
850*61046927SAndroid Build Coastguard Worker                mesa_to_vk_shader_stage(shader->info.stage)))
851*61046927SAndroid Build Coastguard Worker             continue;
852*61046927SAndroid Build Coastguard Worker 
853*61046927SAndroid Build Coastguard Worker          /* If we don't know the size at compile time due to a variable
854*61046927SAndroid Build Coastguard Worker           * descriptor count, then with descriptor buffers we cannot know
855*61046927SAndroid Build Coastguard Worker           * how much space the real inline uniform has. In this case we fall
856*61046927SAndroid Build Coastguard Worker           * back to pushing the address and using ldg, which is slower than
857*61046927SAndroid Build Coastguard Worker           * setting up a descriptor but setting up our own descriptor with
858*61046927SAndroid Build Coastguard Worker           * descriptor_buffer is also painful and has to be done on the GPU
859*61046927SAndroid Build Coastguard Worker           * and doesn't avoid the UBO getting pushed anyway and faulting if a
860*61046927SAndroid Build Coastguard Worker           * out-of-bounds access is hidden behind an if and not dynamically
861*61046927SAndroid Build Coastguard Worker           * executed. Given the small max size, there shouldn't be much reason
862*61046927SAndroid Build Coastguard Worker           * to use variable size anyway.
863*61046927SAndroid Build Coastguard Worker           */
864*61046927SAndroid Build Coastguard Worker          bool push_address = !use_ldg_k && desc_layout->has_variable_descriptors &&
865*61046927SAndroid Build Coastguard Worker             b == desc_layout->binding_count - 1;
866*61046927SAndroid Build Coastguard Worker 
867*61046927SAndroid Build Coastguard Worker          if (push_address) {
868*61046927SAndroid Build Coastguard Worker             perf_debug(dev,
869*61046927SAndroid Build Coastguard Worker                        "falling back to ldg for variable-sized inline "
870*61046927SAndroid Build Coastguard Worker                        "uniform block");
871*61046927SAndroid Build Coastguard Worker          }
872*61046927SAndroid Build Coastguard Worker 
873*61046927SAndroid Build Coastguard Worker          assert(const_state->num_inline_ubos < ARRAY_SIZE(const_state->ubos));
874*61046927SAndroid Build Coastguard Worker          unsigned size_vec4 = push_address ? 1 : DIV_ROUND_UP(binding->size, 16);
875*61046927SAndroid Build Coastguard Worker          const_state->ubos[const_state->num_inline_ubos++] = (struct tu_inline_ubo) {
876*61046927SAndroid Build Coastguard Worker             .base = set,
877*61046927SAndroid Build Coastguard Worker             .offset = binding->offset,
878*61046927SAndroid Build Coastguard Worker             .push_address = push_address,
879*61046927SAndroid Build Coastguard Worker             .const_offset_vec4 = reserved_consts_vec4,
880*61046927SAndroid Build Coastguard Worker             .size_vec4 = size_vec4,
881*61046927SAndroid Build Coastguard Worker          };
882*61046927SAndroid Build Coastguard Worker 
883*61046927SAndroid Build Coastguard Worker          if (!use_ldg_k)
884*61046927SAndroid Build Coastguard Worker             reserved_consts_vec4 += align(size_vec4, dev->compiler->const_upload_unit);
885*61046927SAndroid Build Coastguard Worker       }
886*61046927SAndroid Build Coastguard Worker    }
887*61046927SAndroid Build Coastguard Worker 
888*61046927SAndroid Build Coastguard Worker    *reserved_consts_vec4_out = reserved_consts_vec4;
889*61046927SAndroid Build Coastguard Worker 
890*61046927SAndroid Build Coastguard Worker    struct lower_instr_params params = {
891*61046927SAndroid Build Coastguard Worker       .dev = dev,
892*61046927SAndroid Build Coastguard Worker       .shader = tu_shader,
893*61046927SAndroid Build Coastguard Worker       .layout = layout,
894*61046927SAndroid Build Coastguard Worker    };
895*61046927SAndroid Build Coastguard Worker 
896*61046927SAndroid Build Coastguard Worker    bool progress = false;
897*61046927SAndroid Build Coastguard Worker    if (const_state->num_inline_ubos) {
898*61046927SAndroid Build Coastguard Worker       progress |= nir_shader_intrinsics_pass(shader, lower_inline_ubo,
899*61046927SAndroid Build Coastguard Worker                                                nir_metadata_none,
900*61046927SAndroid Build Coastguard Worker                                                &params);
901*61046927SAndroid Build Coastguard Worker    }
902*61046927SAndroid Build Coastguard Worker 
903*61046927SAndroid Build Coastguard Worker    progress |= nir_shader_instructions_pass(shader,
904*61046927SAndroid Build Coastguard Worker                                             lower_instr,
905*61046927SAndroid Build Coastguard Worker                                             nir_metadata_none,
906*61046927SAndroid Build Coastguard Worker                                             &params);
907*61046927SAndroid Build Coastguard Worker 
908*61046927SAndroid Build Coastguard Worker    /* Remove now-unused variables so that when we gather the shader info later
909*61046927SAndroid Build Coastguard Worker     * they won't be counted.
910*61046927SAndroid Build Coastguard Worker     */
911*61046927SAndroid Build Coastguard Worker 
912*61046927SAndroid Build Coastguard Worker    if (progress)
913*61046927SAndroid Build Coastguard Worker       nir_opt_dce(shader);
914*61046927SAndroid Build Coastguard Worker 
915*61046927SAndroid Build Coastguard Worker    progress |=
916*61046927SAndroid Build Coastguard Worker       nir_remove_dead_variables(shader,
917*61046927SAndroid Build Coastguard Worker                                 nir_var_uniform | nir_var_mem_ubo | nir_var_mem_ssbo,
918*61046927SAndroid Build Coastguard Worker                                 NULL);
919*61046927SAndroid Build Coastguard Worker 
920*61046927SAndroid Build Coastguard Worker    return progress;
921*61046927SAndroid Build Coastguard Worker }
922*61046927SAndroid Build Coastguard Worker 
923*61046927SAndroid Build Coastguard Worker struct lower_fdm_options {
924*61046927SAndroid Build Coastguard Worker    unsigned num_views;
925*61046927SAndroid Build Coastguard Worker    bool adjust_fragcoord;
926*61046927SAndroid Build Coastguard Worker    bool multiview;
927*61046927SAndroid Build Coastguard Worker };
928*61046927SAndroid Build Coastguard Worker 
929*61046927SAndroid Build Coastguard Worker static bool
lower_fdm_filter(const nir_instr * instr,const void * data)930*61046927SAndroid Build Coastguard Worker lower_fdm_filter(const nir_instr *instr, const void *data)
931*61046927SAndroid Build Coastguard Worker {
932*61046927SAndroid Build Coastguard Worker    const struct lower_fdm_options *options =
933*61046927SAndroid Build Coastguard Worker       (const struct lower_fdm_options *)data;
934*61046927SAndroid Build Coastguard Worker 
935*61046927SAndroid Build Coastguard Worker    if (instr->type != nir_instr_type_intrinsic)
936*61046927SAndroid Build Coastguard Worker       return false;
937*61046927SAndroid Build Coastguard Worker 
938*61046927SAndroid Build Coastguard Worker    nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
939*61046927SAndroid Build Coastguard Worker    return intrin->intrinsic == nir_intrinsic_load_frag_size ||
940*61046927SAndroid Build Coastguard Worker       (intrin->intrinsic == nir_intrinsic_load_frag_coord &&
941*61046927SAndroid Build Coastguard Worker        options->adjust_fragcoord);
942*61046927SAndroid Build Coastguard Worker }
943*61046927SAndroid Build Coastguard Worker 
944*61046927SAndroid Build Coastguard Worker static nir_def *
lower_fdm_instr(struct nir_builder * b,nir_instr * instr,void * data)945*61046927SAndroid Build Coastguard Worker lower_fdm_instr(struct nir_builder *b, nir_instr *instr, void *data)
946*61046927SAndroid Build Coastguard Worker {
947*61046927SAndroid Build Coastguard Worker    const struct lower_fdm_options *options =
948*61046927SAndroid Build Coastguard Worker       (const struct lower_fdm_options *)data;
949*61046927SAndroid Build Coastguard Worker 
950*61046927SAndroid Build Coastguard Worker    nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
951*61046927SAndroid Build Coastguard Worker 
952*61046927SAndroid Build Coastguard Worker    nir_def *view;
953*61046927SAndroid Build Coastguard Worker    if (options->multiview) {
954*61046927SAndroid Build Coastguard Worker       nir_variable *view_var =
955*61046927SAndroid Build Coastguard Worker          nir_find_variable_with_location(b->shader, nir_var_shader_in,
956*61046927SAndroid Build Coastguard Worker                                          VARYING_SLOT_VIEW_INDEX);
957*61046927SAndroid Build Coastguard Worker 
958*61046927SAndroid Build Coastguard Worker       if (view_var == NULL) {
959*61046927SAndroid Build Coastguard Worker          view_var = nir_variable_create(b->shader, nir_var_shader_in,
960*61046927SAndroid Build Coastguard Worker                                         glsl_int_type(), NULL);
961*61046927SAndroid Build Coastguard Worker          view_var->data.location = VARYING_SLOT_VIEW_INDEX;
962*61046927SAndroid Build Coastguard Worker          view_var->data.interpolation = INTERP_MODE_FLAT;
963*61046927SAndroid Build Coastguard Worker          view_var->data.driver_location = b->shader->num_inputs++;
964*61046927SAndroid Build Coastguard Worker       }
965*61046927SAndroid Build Coastguard Worker 
966*61046927SAndroid Build Coastguard Worker       view = nir_load_var(b, view_var);
967*61046927SAndroid Build Coastguard Worker    } else {
968*61046927SAndroid Build Coastguard Worker       view = nir_imm_int(b, 0);
969*61046927SAndroid Build Coastguard Worker    }
970*61046927SAndroid Build Coastguard Worker 
971*61046927SAndroid Build Coastguard Worker    nir_def *frag_size =
972*61046927SAndroid Build Coastguard Worker       nir_load_frag_size_ir3(b, view, .range = options->num_views);
973*61046927SAndroid Build Coastguard Worker 
974*61046927SAndroid Build Coastguard Worker    if (intrin->intrinsic == nir_intrinsic_load_frag_coord) {
975*61046927SAndroid Build Coastguard Worker       nir_def *frag_offset =
976*61046927SAndroid Build Coastguard Worker          nir_load_frag_offset_ir3(b, view, .range = options->num_views);
977*61046927SAndroid Build Coastguard Worker       nir_def *unscaled_coord = nir_load_frag_coord_unscaled_ir3(b);
978*61046927SAndroid Build Coastguard Worker       nir_def *xy = nir_trim_vector(b, unscaled_coord, 2);
979*61046927SAndroid Build Coastguard Worker       xy = nir_fmul(b, nir_fsub(b, xy, frag_offset), nir_i2f32(b, frag_size));
980*61046927SAndroid Build Coastguard Worker       return nir_vec4(b,
981*61046927SAndroid Build Coastguard Worker                       nir_channel(b, xy, 0),
982*61046927SAndroid Build Coastguard Worker                       nir_channel(b, xy, 1),
983*61046927SAndroid Build Coastguard Worker                       nir_channel(b, unscaled_coord, 2),
984*61046927SAndroid Build Coastguard Worker                       nir_channel(b, unscaled_coord, 3));
985*61046927SAndroid Build Coastguard Worker    }
986*61046927SAndroid Build Coastguard Worker 
987*61046927SAndroid Build Coastguard Worker    assert(intrin->intrinsic == nir_intrinsic_load_frag_size);
988*61046927SAndroid Build Coastguard Worker    return frag_size;
989*61046927SAndroid Build Coastguard Worker }
990*61046927SAndroid Build Coastguard Worker 
991*61046927SAndroid Build Coastguard Worker static bool
tu_nir_lower_fdm(nir_shader * shader,const struct lower_fdm_options * options)992*61046927SAndroid Build Coastguard Worker tu_nir_lower_fdm(nir_shader *shader, const struct lower_fdm_options *options)
993*61046927SAndroid Build Coastguard Worker {
994*61046927SAndroid Build Coastguard Worker    return nir_shader_lower_instructions(shader, lower_fdm_filter,
995*61046927SAndroid Build Coastguard Worker                                         lower_fdm_instr, (void *)options);
996*61046927SAndroid Build Coastguard Worker }
997*61046927SAndroid Build Coastguard Worker 
998*61046927SAndroid Build Coastguard Worker static void
shared_type_info(const struct glsl_type * type,unsigned * size,unsigned * align)999*61046927SAndroid Build Coastguard Worker shared_type_info(const struct glsl_type *type, unsigned *size, unsigned *align)
1000*61046927SAndroid Build Coastguard Worker {
1001*61046927SAndroid Build Coastguard Worker    assert(glsl_type_is_vector_or_scalar(type));
1002*61046927SAndroid Build Coastguard Worker 
1003*61046927SAndroid Build Coastguard Worker    unsigned comp_size =
1004*61046927SAndroid Build Coastguard Worker       glsl_type_is_boolean(type) ? 4 : glsl_get_bit_size(type) / 8;
1005*61046927SAndroid Build Coastguard Worker    unsigned length = glsl_get_vector_elements(type);
1006*61046927SAndroid Build Coastguard Worker    *size = comp_size * length;
1007*61046927SAndroid Build Coastguard Worker    *align = comp_size;
1008*61046927SAndroid Build Coastguard Worker }
1009*61046927SAndroid Build Coastguard Worker 
1010*61046927SAndroid Build Coastguard Worker static void
tu_gather_xfb_info(nir_shader * nir,struct ir3_stream_output_info * info)1011*61046927SAndroid Build Coastguard Worker tu_gather_xfb_info(nir_shader *nir, struct ir3_stream_output_info *info)
1012*61046927SAndroid Build Coastguard Worker {
1013*61046927SAndroid Build Coastguard Worker    nir_shader_gather_xfb_info(nir);
1014*61046927SAndroid Build Coastguard Worker 
1015*61046927SAndroid Build Coastguard Worker    if (!nir->xfb_info)
1016*61046927SAndroid Build Coastguard Worker       return;
1017*61046927SAndroid Build Coastguard Worker 
1018*61046927SAndroid Build Coastguard Worker    nir_xfb_info *xfb = nir->xfb_info;
1019*61046927SAndroid Build Coastguard Worker 
1020*61046927SAndroid Build Coastguard Worker    uint8_t output_map[VARYING_SLOT_TESS_MAX];
1021*61046927SAndroid Build Coastguard Worker    memset(output_map, 0, sizeof(output_map));
1022*61046927SAndroid Build Coastguard Worker 
1023*61046927SAndroid Build Coastguard Worker    nir_foreach_shader_out_variable(var, nir) {
1024*61046927SAndroid Build Coastguard Worker       unsigned slots = nir_variable_count_slots(var, var->type);
1025*61046927SAndroid Build Coastguard Worker       for (unsigned i = 0; i < slots; i++)
1026*61046927SAndroid Build Coastguard Worker          output_map[var->data.location + i] = var->data.driver_location + i;
1027*61046927SAndroid Build Coastguard Worker    }
1028*61046927SAndroid Build Coastguard Worker 
1029*61046927SAndroid Build Coastguard Worker    assert(xfb->output_count <= IR3_MAX_SO_OUTPUTS);
1030*61046927SAndroid Build Coastguard Worker    info->num_outputs = xfb->output_count;
1031*61046927SAndroid Build Coastguard Worker 
1032*61046927SAndroid Build Coastguard Worker    for (int i = 0; i < IR3_MAX_SO_BUFFERS; i++) {
1033*61046927SAndroid Build Coastguard Worker       info->stride[i] = xfb->buffers[i].stride / 4;
1034*61046927SAndroid Build Coastguard Worker       info->buffer_to_stream[i] = xfb->buffer_to_stream[i];
1035*61046927SAndroid Build Coastguard Worker    }
1036*61046927SAndroid Build Coastguard Worker 
1037*61046927SAndroid Build Coastguard Worker    info->streams_written = xfb->streams_written;
1038*61046927SAndroid Build Coastguard Worker 
1039*61046927SAndroid Build Coastguard Worker    for (int i = 0; i < xfb->output_count; i++) {
1040*61046927SAndroid Build Coastguard Worker       info->output[i].register_index = output_map[xfb->outputs[i].location];
1041*61046927SAndroid Build Coastguard Worker       info->output[i].start_component = xfb->outputs[i].component_offset;
1042*61046927SAndroid Build Coastguard Worker       info->output[i].num_components =
1043*61046927SAndroid Build Coastguard Worker                            util_bitcount(xfb->outputs[i].component_mask);
1044*61046927SAndroid Build Coastguard Worker       info->output[i].output_buffer  = xfb->outputs[i].buffer;
1045*61046927SAndroid Build Coastguard Worker       info->output[i].dst_offset = xfb->outputs[i].offset / 4;
1046*61046927SAndroid Build Coastguard Worker       info->output[i].stream = xfb->buffer_to_stream[xfb->outputs[i].buffer];
1047*61046927SAndroid Build Coastguard Worker    }
1048*61046927SAndroid Build Coastguard Worker }
1049*61046927SAndroid Build Coastguard Worker 
1050*61046927SAndroid Build Coastguard Worker static uint32_t
tu_xs_get_immediates_packet_size_dwords(const struct ir3_shader_variant * xs)1051*61046927SAndroid Build Coastguard Worker tu_xs_get_immediates_packet_size_dwords(const struct ir3_shader_variant *xs)
1052*61046927SAndroid Build Coastguard Worker {
1053*61046927SAndroid Build Coastguard Worker    const struct ir3_const_state *const_state = ir3_const_state(xs);
1054*61046927SAndroid Build Coastguard Worker    uint32_t base = const_state->offsets.immediate;
1055*61046927SAndroid Build Coastguard Worker    int32_t size = DIV_ROUND_UP(const_state->immediates_count, 4);
1056*61046927SAndroid Build Coastguard Worker 
1057*61046927SAndroid Build Coastguard Worker    /* truncate size to avoid writing constants that shader
1058*61046927SAndroid Build Coastguard Worker     * does not use:
1059*61046927SAndroid Build Coastguard Worker     */
1060*61046927SAndroid Build Coastguard Worker    size = MIN2(size + base, xs->constlen) - base;
1061*61046927SAndroid Build Coastguard Worker 
1062*61046927SAndroid Build Coastguard Worker    return MAX2(size, 0) * 4;
1063*61046927SAndroid Build Coastguard Worker }
1064*61046927SAndroid Build Coastguard Worker 
1065*61046927SAndroid Build Coastguard Worker /* We allocate fixed-length substreams for shader state, however some
1066*61046927SAndroid Build Coastguard Worker  * parts of the state may have unbound length. Their additional space
1067*61046927SAndroid Build Coastguard Worker  * requirements should be calculated here.
1068*61046927SAndroid Build Coastguard Worker  */
1069*61046927SAndroid Build Coastguard Worker static uint32_t
tu_xs_get_additional_cs_size_dwords(const struct ir3_shader_variant * xs)1070*61046927SAndroid Build Coastguard Worker tu_xs_get_additional_cs_size_dwords(const struct ir3_shader_variant *xs)
1071*61046927SAndroid Build Coastguard Worker {
1072*61046927SAndroid Build Coastguard Worker    const struct ir3_const_state *const_state = ir3_const_state(xs);
1073*61046927SAndroid Build Coastguard Worker 
1074*61046927SAndroid Build Coastguard Worker    uint32_t size = tu_xs_get_immediates_packet_size_dwords(xs);
1075*61046927SAndroid Build Coastguard Worker 
1076*61046927SAndroid Build Coastguard Worker    /* Variable number of UBO upload ranges. */
1077*61046927SAndroid Build Coastguard Worker    size += 4 * const_state->ubo_state.num_enabled;
1078*61046927SAndroid Build Coastguard Worker 
1079*61046927SAndroid Build Coastguard Worker    /* Variable number of dwords for the primitive map */
1080*61046927SAndroid Build Coastguard Worker    size += xs->input_size;
1081*61046927SAndroid Build Coastguard Worker 
1082*61046927SAndroid Build Coastguard Worker    size += xs->constant_data_size / 4;
1083*61046927SAndroid Build Coastguard Worker 
1084*61046927SAndroid Build Coastguard Worker    return size;
1085*61046927SAndroid Build Coastguard Worker }
1086*61046927SAndroid Build Coastguard Worker 
1087*61046927SAndroid Build Coastguard Worker static const struct xs_config {
1088*61046927SAndroid Build Coastguard Worker    uint16_t reg_sp_xs_config;
1089*61046927SAndroid Build Coastguard Worker    uint16_t reg_sp_xs_instrlen;
1090*61046927SAndroid Build Coastguard Worker    uint16_t reg_sp_xs_first_exec_offset;
1091*61046927SAndroid Build Coastguard Worker    uint16_t reg_sp_xs_pvt_mem_hw_stack_offset;
1092*61046927SAndroid Build Coastguard Worker    uint16_t reg_sp_xs_vgpr_config;
1093*61046927SAndroid Build Coastguard Worker } xs_config[] = {
1094*61046927SAndroid Build Coastguard Worker    [MESA_SHADER_VERTEX] = {
1095*61046927SAndroid Build Coastguard Worker       REG_A6XX_SP_VS_CONFIG,
1096*61046927SAndroid Build Coastguard Worker       REG_A6XX_SP_VS_INSTRLEN,
1097*61046927SAndroid Build Coastguard Worker       REG_A6XX_SP_VS_OBJ_FIRST_EXEC_OFFSET,
1098*61046927SAndroid Build Coastguard Worker       REG_A6XX_SP_VS_PVT_MEM_HW_STACK_OFFSET,
1099*61046927SAndroid Build Coastguard Worker       REG_A7XX_SP_VS_VGPR_CONFIG,
1100*61046927SAndroid Build Coastguard Worker    },
1101*61046927SAndroid Build Coastguard Worker    [MESA_SHADER_TESS_CTRL] = {
1102*61046927SAndroid Build Coastguard Worker       REG_A6XX_SP_HS_CONFIG,
1103*61046927SAndroid Build Coastguard Worker       REG_A6XX_SP_HS_INSTRLEN,
1104*61046927SAndroid Build Coastguard Worker       REG_A6XX_SP_HS_OBJ_FIRST_EXEC_OFFSET,
1105*61046927SAndroid Build Coastguard Worker       REG_A6XX_SP_HS_PVT_MEM_HW_STACK_OFFSET,
1106*61046927SAndroid Build Coastguard Worker       REG_A7XX_SP_HS_VGPR_CONFIG,
1107*61046927SAndroid Build Coastguard Worker    },
1108*61046927SAndroid Build Coastguard Worker    [MESA_SHADER_TESS_EVAL] = {
1109*61046927SAndroid Build Coastguard Worker       REG_A6XX_SP_DS_CONFIG,
1110*61046927SAndroid Build Coastguard Worker       REG_A6XX_SP_DS_INSTRLEN,
1111*61046927SAndroid Build Coastguard Worker       REG_A6XX_SP_DS_OBJ_FIRST_EXEC_OFFSET,
1112*61046927SAndroid Build Coastguard Worker       REG_A6XX_SP_DS_PVT_MEM_HW_STACK_OFFSET,
1113*61046927SAndroid Build Coastguard Worker       REG_A7XX_SP_DS_VGPR_CONFIG,
1114*61046927SAndroid Build Coastguard Worker    },
1115*61046927SAndroid Build Coastguard Worker    [MESA_SHADER_GEOMETRY] = {
1116*61046927SAndroid Build Coastguard Worker       REG_A6XX_SP_GS_CONFIG,
1117*61046927SAndroid Build Coastguard Worker       REG_A6XX_SP_GS_INSTRLEN,
1118*61046927SAndroid Build Coastguard Worker       REG_A6XX_SP_GS_OBJ_FIRST_EXEC_OFFSET,
1119*61046927SAndroid Build Coastguard Worker       REG_A6XX_SP_GS_PVT_MEM_HW_STACK_OFFSET,
1120*61046927SAndroid Build Coastguard Worker       REG_A7XX_SP_GS_VGPR_CONFIG,
1121*61046927SAndroid Build Coastguard Worker    },
1122*61046927SAndroid Build Coastguard Worker    [MESA_SHADER_FRAGMENT] = {
1123*61046927SAndroid Build Coastguard Worker       REG_A6XX_SP_FS_CONFIG,
1124*61046927SAndroid Build Coastguard Worker       REG_A6XX_SP_FS_INSTRLEN,
1125*61046927SAndroid Build Coastguard Worker       REG_A6XX_SP_FS_OBJ_FIRST_EXEC_OFFSET,
1126*61046927SAndroid Build Coastguard Worker       REG_A6XX_SP_FS_PVT_MEM_HW_STACK_OFFSET,
1127*61046927SAndroid Build Coastguard Worker       REG_A7XX_SP_FS_VGPR_CONFIG,
1128*61046927SAndroid Build Coastguard Worker    },
1129*61046927SAndroid Build Coastguard Worker    [MESA_SHADER_COMPUTE] = {
1130*61046927SAndroid Build Coastguard Worker       REG_A6XX_SP_CS_CONFIG,
1131*61046927SAndroid Build Coastguard Worker       REG_A6XX_SP_CS_INSTRLEN,
1132*61046927SAndroid Build Coastguard Worker       REG_A6XX_SP_CS_OBJ_FIRST_EXEC_OFFSET,
1133*61046927SAndroid Build Coastguard Worker       REG_A6XX_SP_CS_PVT_MEM_HW_STACK_OFFSET,
1134*61046927SAndroid Build Coastguard Worker       REG_A7XX_SP_CS_VGPR_CONFIG,
1135*61046927SAndroid Build Coastguard Worker    },
1136*61046927SAndroid Build Coastguard Worker };
1137*61046927SAndroid Build Coastguard Worker 
1138*61046927SAndroid Build Coastguard Worker void
tu6_emit_xs(struct tu_cs * cs,gl_shader_stage stage,const struct ir3_shader_variant * xs,const struct tu_pvtmem_config * pvtmem,uint64_t binary_iova)1139*61046927SAndroid Build Coastguard Worker tu6_emit_xs(struct tu_cs *cs,
1140*61046927SAndroid Build Coastguard Worker             gl_shader_stage stage, /* xs->type, but xs may be NULL */
1141*61046927SAndroid Build Coastguard Worker             const struct ir3_shader_variant *xs,
1142*61046927SAndroid Build Coastguard Worker             const struct tu_pvtmem_config *pvtmem,
1143*61046927SAndroid Build Coastguard Worker             uint64_t binary_iova)
1144*61046927SAndroid Build Coastguard Worker {
1145*61046927SAndroid Build Coastguard Worker    const struct xs_config *cfg = &xs_config[stage];
1146*61046927SAndroid Build Coastguard Worker 
1147*61046927SAndroid Build Coastguard Worker    if (!xs) {
1148*61046927SAndroid Build Coastguard Worker       /* shader stage disabled */
1149*61046927SAndroid Build Coastguard Worker       return;
1150*61046927SAndroid Build Coastguard Worker    }
1151*61046927SAndroid Build Coastguard Worker 
1152*61046927SAndroid Build Coastguard Worker    enum a6xx_threadsize thrsz =
1153*61046927SAndroid Build Coastguard Worker       xs->info.double_threadsize ? THREAD128 : THREAD64;
1154*61046927SAndroid Build Coastguard Worker    switch (stage) {
1155*61046927SAndroid Build Coastguard Worker    case MESA_SHADER_VERTEX:
1156*61046927SAndroid Build Coastguard Worker       tu_cs_emit_regs(cs, A6XX_SP_VS_CTRL_REG0(
1157*61046927SAndroid Build Coastguard Worker                .halfregfootprint = xs->info.max_half_reg + 1,
1158*61046927SAndroid Build Coastguard Worker                .fullregfootprint = xs->info.max_reg + 1,
1159*61046927SAndroid Build Coastguard Worker                .branchstack = ir3_shader_branchstack_hw(xs),
1160*61046927SAndroid Build Coastguard Worker                .mergedregs = xs->mergedregs,
1161*61046927SAndroid Build Coastguard Worker                .earlypreamble = xs->early_preamble,
1162*61046927SAndroid Build Coastguard Worker       ));
1163*61046927SAndroid Build Coastguard Worker       break;
1164*61046927SAndroid Build Coastguard Worker    case MESA_SHADER_TESS_CTRL:
1165*61046927SAndroid Build Coastguard Worker       tu_cs_emit_regs(cs, A6XX_SP_HS_CTRL_REG0(
1166*61046927SAndroid Build Coastguard Worker                .halfregfootprint = xs->info.max_half_reg + 1,
1167*61046927SAndroid Build Coastguard Worker                .fullregfootprint = xs->info.max_reg + 1,
1168*61046927SAndroid Build Coastguard Worker                .branchstack = ir3_shader_branchstack_hw(xs),
1169*61046927SAndroid Build Coastguard Worker                .earlypreamble = xs->early_preamble,
1170*61046927SAndroid Build Coastguard Worker       ));
1171*61046927SAndroid Build Coastguard Worker       break;
1172*61046927SAndroid Build Coastguard Worker    case MESA_SHADER_TESS_EVAL:
1173*61046927SAndroid Build Coastguard Worker       tu_cs_emit_regs(cs, A6XX_SP_DS_CTRL_REG0(
1174*61046927SAndroid Build Coastguard Worker                .halfregfootprint = xs->info.max_half_reg + 1,
1175*61046927SAndroid Build Coastguard Worker                .fullregfootprint = xs->info.max_reg + 1,
1176*61046927SAndroid Build Coastguard Worker                .branchstack = ir3_shader_branchstack_hw(xs),
1177*61046927SAndroid Build Coastguard Worker                .earlypreamble = xs->early_preamble,
1178*61046927SAndroid Build Coastguard Worker       ));
1179*61046927SAndroid Build Coastguard Worker       break;
1180*61046927SAndroid Build Coastguard Worker    case MESA_SHADER_GEOMETRY:
1181*61046927SAndroid Build Coastguard Worker       tu_cs_emit_regs(cs, A6XX_SP_GS_CTRL_REG0(
1182*61046927SAndroid Build Coastguard Worker                .halfregfootprint = xs->info.max_half_reg + 1,
1183*61046927SAndroid Build Coastguard Worker                .fullregfootprint = xs->info.max_reg + 1,
1184*61046927SAndroid Build Coastguard Worker                .branchstack = ir3_shader_branchstack_hw(xs),
1185*61046927SAndroid Build Coastguard Worker                .earlypreamble = xs->early_preamble,
1186*61046927SAndroid Build Coastguard Worker       ));
1187*61046927SAndroid Build Coastguard Worker       break;
1188*61046927SAndroid Build Coastguard Worker    case MESA_SHADER_FRAGMENT:
1189*61046927SAndroid Build Coastguard Worker       tu_cs_emit_regs(cs, A6XX_SP_FS_CTRL_REG0(
1190*61046927SAndroid Build Coastguard Worker                .halfregfootprint = xs->info.max_half_reg + 1,
1191*61046927SAndroid Build Coastguard Worker                .fullregfootprint = xs->info.max_reg + 1,
1192*61046927SAndroid Build Coastguard Worker                .branchstack = ir3_shader_branchstack_hw(xs),
1193*61046927SAndroid Build Coastguard Worker                .threadsize = thrsz,
1194*61046927SAndroid Build Coastguard Worker                .varying = xs->total_in != 0,
1195*61046927SAndroid Build Coastguard Worker                .lodpixmask = xs->need_full_quad,
1196*61046927SAndroid Build Coastguard Worker                /* unknown bit, seems unnecessary */
1197*61046927SAndroid Build Coastguard Worker                .unk24 = true,
1198*61046927SAndroid Build Coastguard Worker                .pixlodenable = xs->need_pixlod,
1199*61046927SAndroid Build Coastguard Worker                .earlypreamble = xs->early_preamble,
1200*61046927SAndroid Build Coastguard Worker                .mergedregs = xs->mergedregs,
1201*61046927SAndroid Build Coastguard Worker       ));
1202*61046927SAndroid Build Coastguard Worker       break;
1203*61046927SAndroid Build Coastguard Worker    case MESA_SHADER_COMPUTE:
1204*61046927SAndroid Build Coastguard Worker       thrsz = cs->device->physical_device->info->a6xx
1205*61046927SAndroid Build Coastguard Worker             .supports_double_threadsize ? thrsz : THREAD128;
1206*61046927SAndroid Build Coastguard Worker       tu_cs_emit_regs(cs, A6XX_SP_CS_CTRL_REG0(
1207*61046927SAndroid Build Coastguard Worker                .halfregfootprint = xs->info.max_half_reg + 1,
1208*61046927SAndroid Build Coastguard Worker                .fullregfootprint = xs->info.max_reg + 1,
1209*61046927SAndroid Build Coastguard Worker                .branchstack = ir3_shader_branchstack_hw(xs),
1210*61046927SAndroid Build Coastguard Worker                .threadsize = thrsz,
1211*61046927SAndroid Build Coastguard Worker                .earlypreamble = xs->early_preamble,
1212*61046927SAndroid Build Coastguard Worker                .mergedregs = xs->mergedregs,
1213*61046927SAndroid Build Coastguard Worker       ));
1214*61046927SAndroid Build Coastguard Worker       break;
1215*61046927SAndroid Build Coastguard Worker    default:
1216*61046927SAndroid Build Coastguard Worker       unreachable("bad shader stage");
1217*61046927SAndroid Build Coastguard Worker    }
1218*61046927SAndroid Build Coastguard Worker 
1219*61046927SAndroid Build Coastguard Worker    tu_cs_emit_pkt4(cs, cfg->reg_sp_xs_instrlen, 1);
1220*61046927SAndroid Build Coastguard Worker    tu_cs_emit(cs, xs->instrlen);
1221*61046927SAndroid Build Coastguard Worker 
1222*61046927SAndroid Build Coastguard Worker    /* emit program binary & private memory layout
1223*61046927SAndroid Build Coastguard Worker     * binary_iova should be aligned to 1 instrlen unit (128 bytes)
1224*61046927SAndroid Build Coastguard Worker     */
1225*61046927SAndroid Build Coastguard Worker 
1226*61046927SAndroid Build Coastguard Worker    assert((binary_iova & 0x7f) == 0);
1227*61046927SAndroid Build Coastguard Worker    assert((pvtmem->iova & 0x1f) == 0);
1228*61046927SAndroid Build Coastguard Worker 
1229*61046927SAndroid Build Coastguard Worker    tu_cs_emit_pkt4(cs, cfg->reg_sp_xs_first_exec_offset, 7);
1230*61046927SAndroid Build Coastguard Worker    tu_cs_emit(cs, 0);
1231*61046927SAndroid Build Coastguard Worker    tu_cs_emit_qw(cs, binary_iova);
1232*61046927SAndroid Build Coastguard Worker    tu_cs_emit(cs,
1233*61046927SAndroid Build Coastguard Worker               A6XX_SP_VS_PVT_MEM_PARAM_MEMSIZEPERITEM(pvtmem->per_fiber_size));
1234*61046927SAndroid Build Coastguard Worker    tu_cs_emit_qw(cs, pvtmem->iova);
1235*61046927SAndroid Build Coastguard Worker    tu_cs_emit(cs, A6XX_SP_VS_PVT_MEM_SIZE_TOTALPVTMEMSIZE(pvtmem->per_sp_size) |
1236*61046927SAndroid Build Coastguard Worker                   COND(pvtmem->per_wave, A6XX_SP_VS_PVT_MEM_SIZE_PERWAVEMEMLAYOUT));
1237*61046927SAndroid Build Coastguard Worker 
1238*61046927SAndroid Build Coastguard Worker    tu_cs_emit_pkt4(cs, cfg->reg_sp_xs_pvt_mem_hw_stack_offset, 1);
1239*61046927SAndroid Build Coastguard Worker    tu_cs_emit(cs, A6XX_SP_VS_PVT_MEM_HW_STACK_OFFSET_OFFSET(pvtmem->per_sp_size));
1240*61046927SAndroid Build Coastguard Worker 
1241*61046927SAndroid Build Coastguard Worker    if (cs->device->physical_device->info->chip >= A7XX) {
1242*61046927SAndroid Build Coastguard Worker       tu_cs_emit_pkt4(cs, cfg->reg_sp_xs_vgpr_config, 1);
1243*61046927SAndroid Build Coastguard Worker       tu_cs_emit(cs, 0);
1244*61046927SAndroid Build Coastguard Worker    }
1245*61046927SAndroid Build Coastguard Worker 
1246*61046927SAndroid Build Coastguard Worker    if (cs->device->physical_device->info->chip == A6XX) {
1247*61046927SAndroid Build Coastguard Worker       uint32_t shader_preload_size =
1248*61046927SAndroid Build Coastguard Worker          MIN2(xs->instrlen, cs->device->physical_device->info->a6xx.instr_cache_size);
1249*61046927SAndroid Build Coastguard Worker 
1250*61046927SAndroid Build Coastguard Worker       tu_cs_emit_pkt7(cs, tu6_stage2opcode(stage), 3);
1251*61046927SAndroid Build Coastguard Worker       tu_cs_emit(cs, CP_LOAD_STATE6_0_DST_OFF(0) |
1252*61046927SAndroid Build Coastguard Worker                      CP_LOAD_STATE6_0_STATE_TYPE(ST6_SHADER) |
1253*61046927SAndroid Build Coastguard Worker                      CP_LOAD_STATE6_0_STATE_SRC(SS6_INDIRECT) |
1254*61046927SAndroid Build Coastguard Worker                      CP_LOAD_STATE6_0_STATE_BLOCK(tu6_stage2shadersb(stage)) |
1255*61046927SAndroid Build Coastguard Worker                      CP_LOAD_STATE6_0_NUM_UNIT(shader_preload_size));
1256*61046927SAndroid Build Coastguard Worker       tu_cs_emit_qw(cs, binary_iova);
1257*61046927SAndroid Build Coastguard Worker    }
1258*61046927SAndroid Build Coastguard Worker 
1259*61046927SAndroid Build Coastguard Worker    /* emit immediates */
1260*61046927SAndroid Build Coastguard Worker 
1261*61046927SAndroid Build Coastguard Worker    const struct ir3_const_state *const_state = ir3_const_state(xs);
1262*61046927SAndroid Build Coastguard Worker    uint32_t base = const_state->offsets.immediate;
1263*61046927SAndroid Build Coastguard Worker    unsigned immediate_size = tu_xs_get_immediates_packet_size_dwords(xs);
1264*61046927SAndroid Build Coastguard Worker 
1265*61046927SAndroid Build Coastguard Worker    if (immediate_size > 0) {
1266*61046927SAndroid Build Coastguard Worker       assert(!cs->device->physical_device->info->a7xx.load_shader_consts_via_preamble);
1267*61046927SAndroid Build Coastguard Worker       tu_cs_emit_pkt7(cs, tu6_stage2opcode(stage), 3 + immediate_size);
1268*61046927SAndroid Build Coastguard Worker       tu_cs_emit(cs, CP_LOAD_STATE6_0_DST_OFF(base) |
1269*61046927SAndroid Build Coastguard Worker                  CP_LOAD_STATE6_0_STATE_TYPE(ST6_CONSTANTS) |
1270*61046927SAndroid Build Coastguard Worker                  CP_LOAD_STATE6_0_STATE_SRC(SS6_DIRECT) |
1271*61046927SAndroid Build Coastguard Worker                  CP_LOAD_STATE6_0_STATE_BLOCK(tu6_stage2shadersb(stage)) |
1272*61046927SAndroid Build Coastguard Worker                  CP_LOAD_STATE6_0_NUM_UNIT(immediate_size / 4));
1273*61046927SAndroid Build Coastguard Worker       tu_cs_emit(cs, CP_LOAD_STATE6_1_EXT_SRC_ADDR(0));
1274*61046927SAndroid Build Coastguard Worker       tu_cs_emit(cs, CP_LOAD_STATE6_2_EXT_SRC_ADDR_HI(0));
1275*61046927SAndroid Build Coastguard Worker 
1276*61046927SAndroid Build Coastguard Worker       tu_cs_emit_array(cs, const_state->immediates, immediate_size);
1277*61046927SAndroid Build Coastguard Worker    }
1278*61046927SAndroid Build Coastguard Worker 
1279*61046927SAndroid Build Coastguard Worker    if (const_state->consts_ubo.idx != -1) {
1280*61046927SAndroid Build Coastguard Worker       uint64_t iova = binary_iova + xs->info.constant_data_offset;
1281*61046927SAndroid Build Coastguard Worker       uint32_t offset = const_state->consts_ubo.idx;
1282*61046927SAndroid Build Coastguard Worker 
1283*61046927SAndroid Build Coastguard Worker       /* Upload UBO state for the constant data. */
1284*61046927SAndroid Build Coastguard Worker       tu_cs_emit_pkt7(cs, tu6_stage2opcode(stage), 5);
1285*61046927SAndroid Build Coastguard Worker       tu_cs_emit(cs,
1286*61046927SAndroid Build Coastguard Worker                  CP_LOAD_STATE6_0_DST_OFF(offset) |
1287*61046927SAndroid Build Coastguard Worker                  CP_LOAD_STATE6_0_STATE_TYPE(ST6_UBO)|
1288*61046927SAndroid Build Coastguard Worker                  CP_LOAD_STATE6_0_STATE_SRC(SS6_DIRECT) |
1289*61046927SAndroid Build Coastguard Worker                  CP_LOAD_STATE6_0_STATE_BLOCK(tu6_stage2shadersb(stage)) |
1290*61046927SAndroid Build Coastguard Worker                  CP_LOAD_STATE6_0_NUM_UNIT(1));
1291*61046927SAndroid Build Coastguard Worker       tu_cs_emit(cs, CP_LOAD_STATE6_1_EXT_SRC_ADDR(0));
1292*61046927SAndroid Build Coastguard Worker       tu_cs_emit(cs, CP_LOAD_STATE6_2_EXT_SRC_ADDR_HI(0));
1293*61046927SAndroid Build Coastguard Worker       int size_vec4s = DIV_ROUND_UP(xs->constant_data_size, 16);
1294*61046927SAndroid Build Coastguard Worker       tu_cs_emit_qw(cs,
1295*61046927SAndroid Build Coastguard Worker                     iova |
1296*61046927SAndroid Build Coastguard Worker                     (uint64_t)A6XX_UBO_1_SIZE(size_vec4s) << 32);
1297*61046927SAndroid Build Coastguard Worker 
1298*61046927SAndroid Build Coastguard Worker       /* Upload the constant data to the const file if needed. */
1299*61046927SAndroid Build Coastguard Worker       const struct ir3_ubo_analysis_state *ubo_state = &const_state->ubo_state;
1300*61046927SAndroid Build Coastguard Worker 
1301*61046927SAndroid Build Coastguard Worker       if (!cs->device->physical_device->info->a7xx.load_shader_consts_via_preamble) {
1302*61046927SAndroid Build Coastguard Worker          for (int i = 0; i < ubo_state->num_enabled; i++) {
1303*61046927SAndroid Build Coastguard Worker             if (ubo_state->range[i].ubo.block != offset ||
1304*61046927SAndroid Build Coastguard Worker                 ubo_state->range[i].ubo.bindless) {
1305*61046927SAndroid Build Coastguard Worker                continue;
1306*61046927SAndroid Build Coastguard Worker             }
1307*61046927SAndroid Build Coastguard Worker 
1308*61046927SAndroid Build Coastguard Worker             uint32_t start = ubo_state->range[i].start;
1309*61046927SAndroid Build Coastguard Worker             uint32_t end = ubo_state->range[i].end;
1310*61046927SAndroid Build Coastguard Worker             uint32_t size = MIN2(end - start,
1311*61046927SAndroid Build Coastguard Worker                                  (16 * xs->constlen) - ubo_state->range[i].offset);
1312*61046927SAndroid Build Coastguard Worker 
1313*61046927SAndroid Build Coastguard Worker             tu_cs_emit_pkt7(cs, tu6_stage2opcode(stage), 3);
1314*61046927SAndroid Build Coastguard Worker             tu_cs_emit(cs,
1315*61046927SAndroid Build Coastguard Worker                      CP_LOAD_STATE6_0_DST_OFF(ubo_state->range[i].offset / 16) |
1316*61046927SAndroid Build Coastguard Worker                      CP_LOAD_STATE6_0_STATE_TYPE(ST6_CONSTANTS) |
1317*61046927SAndroid Build Coastguard Worker                      CP_LOAD_STATE6_0_STATE_SRC(SS6_INDIRECT) |
1318*61046927SAndroid Build Coastguard Worker                      CP_LOAD_STATE6_0_STATE_BLOCK(tu6_stage2shadersb(stage)) |
1319*61046927SAndroid Build Coastguard Worker                      CP_LOAD_STATE6_0_NUM_UNIT(size / 16));
1320*61046927SAndroid Build Coastguard Worker             tu_cs_emit_qw(cs, iova + start);
1321*61046927SAndroid Build Coastguard Worker          }
1322*61046927SAndroid Build Coastguard Worker       }
1323*61046927SAndroid Build Coastguard Worker    }
1324*61046927SAndroid Build Coastguard Worker 
1325*61046927SAndroid Build Coastguard Worker    /* emit statically-known FS driver param */
1326*61046927SAndroid Build Coastguard Worker    if (stage == MESA_SHADER_FRAGMENT && const_state->driver_params_ubo.size > 0) {
1327*61046927SAndroid Build Coastguard Worker       uint32_t data[4] = {xs->info.double_threadsize ? 128 : 64, 0, 0, 0};
1328*61046927SAndroid Build Coastguard Worker       uint32_t size = ARRAY_SIZE(data);
1329*61046927SAndroid Build Coastguard Worker 
1330*61046927SAndroid Build Coastguard Worker       /* A7XX TODO: Emit data via sub_cs instead of NOP */
1331*61046927SAndroid Build Coastguard Worker       uint64_t iova = tu_cs_emit_data_nop(cs, data, size, 4);
1332*61046927SAndroid Build Coastguard Worker       uint32_t base = const_state->driver_params_ubo.idx;
1333*61046927SAndroid Build Coastguard Worker 
1334*61046927SAndroid Build Coastguard Worker       tu_cs_emit_pkt7(cs, tu6_stage2opcode(stage), 5);
1335*61046927SAndroid Build Coastguard Worker       tu_cs_emit(cs, CP_LOAD_STATE6_0_DST_OFF(base) |
1336*61046927SAndroid Build Coastguard Worker                  CP_LOAD_STATE6_0_STATE_TYPE(ST6_UBO) |
1337*61046927SAndroid Build Coastguard Worker                  CP_LOAD_STATE6_0_STATE_SRC(SS6_DIRECT) |
1338*61046927SAndroid Build Coastguard Worker                  CP_LOAD_STATE6_0_STATE_BLOCK(tu6_stage2shadersb(stage)) |
1339*61046927SAndroid Build Coastguard Worker                  CP_LOAD_STATE6_0_NUM_UNIT(1));
1340*61046927SAndroid Build Coastguard Worker       tu_cs_emit(cs, CP_LOAD_STATE6_1_EXT_SRC_ADDR(0));
1341*61046927SAndroid Build Coastguard Worker       tu_cs_emit(cs, CP_LOAD_STATE6_2_EXT_SRC_ADDR_HI(0));
1342*61046927SAndroid Build Coastguard Worker       int size_vec4s = DIV_ROUND_UP(size, 4);
1343*61046927SAndroid Build Coastguard Worker       tu_cs_emit_qw(cs, iova | ((uint64_t)A6XX_UBO_1_SIZE(size_vec4s) << 32));
1344*61046927SAndroid Build Coastguard Worker    } else if (stage == MESA_SHADER_FRAGMENT && const_state->num_driver_params > 0) {
1345*61046927SAndroid Build Coastguard Worker       uint32_t base = const_state->offsets.driver_param;
1346*61046927SAndroid Build Coastguard Worker       int32_t size = DIV_ROUND_UP(MAX2(const_state->num_driver_params, 4), 4);
1347*61046927SAndroid Build Coastguard Worker       size = MAX2(MIN2(size + base, xs->constlen) - base, 0);
1348*61046927SAndroid Build Coastguard Worker 
1349*61046927SAndroid Build Coastguard Worker       if (size > 0) {
1350*61046927SAndroid Build Coastguard Worker          tu_cs_emit_pkt7(cs, tu6_stage2opcode(stage), 3 + 4);
1351*61046927SAndroid Build Coastguard Worker          tu_cs_emit(cs, CP_LOAD_STATE6_0_DST_OFF(base) |
1352*61046927SAndroid Build Coastguard Worker                     CP_LOAD_STATE6_0_STATE_TYPE(ST6_CONSTANTS) |
1353*61046927SAndroid Build Coastguard Worker                     CP_LOAD_STATE6_0_STATE_SRC(SS6_DIRECT) |
1354*61046927SAndroid Build Coastguard Worker                     CP_LOAD_STATE6_0_STATE_BLOCK(tu6_stage2shadersb(stage)) |
1355*61046927SAndroid Build Coastguard Worker                     CP_LOAD_STATE6_0_NUM_UNIT(size));
1356*61046927SAndroid Build Coastguard Worker          tu_cs_emit(cs, CP_LOAD_STATE6_1_EXT_SRC_ADDR(0));
1357*61046927SAndroid Build Coastguard Worker          tu_cs_emit(cs, CP_LOAD_STATE6_2_EXT_SRC_ADDR_HI(0));
1358*61046927SAndroid Build Coastguard Worker 
1359*61046927SAndroid Build Coastguard Worker          tu_cs_emit(cs, xs->info.double_threadsize ? 128 : 64);
1360*61046927SAndroid Build Coastguard Worker          tu_cs_emit(cs, 0);
1361*61046927SAndroid Build Coastguard Worker          tu_cs_emit(cs, 0);
1362*61046927SAndroid Build Coastguard Worker          tu_cs_emit(cs, 0);
1363*61046927SAndroid Build Coastguard Worker       }
1364*61046927SAndroid Build Coastguard Worker    }
1365*61046927SAndroid Build Coastguard Worker }
1366*61046927SAndroid Build Coastguard Worker 
1367*61046927SAndroid Build Coastguard Worker template <chip CHIP>
1368*61046927SAndroid Build Coastguard Worker static void
tu6_emit_cs_config(struct tu_cs * cs,const struct ir3_shader_variant * v,const struct tu_pvtmem_config * pvtmem,uint64_t binary_iova)1369*61046927SAndroid Build Coastguard Worker tu6_emit_cs_config(struct tu_cs *cs,
1370*61046927SAndroid Build Coastguard Worker                    const struct ir3_shader_variant *v,
1371*61046927SAndroid Build Coastguard Worker                    const struct tu_pvtmem_config *pvtmem,
1372*61046927SAndroid Build Coastguard Worker                    uint64_t binary_iova)
1373*61046927SAndroid Build Coastguard Worker {
1374*61046927SAndroid Build Coastguard Worker    bool shared_consts_enable =
1375*61046927SAndroid Build Coastguard Worker       ir3_const_state(v)->push_consts_type == IR3_PUSH_CONSTS_SHARED;
1376*61046927SAndroid Build Coastguard Worker    tu6_emit_shared_consts_enable<CHIP>(cs, shared_consts_enable);
1377*61046927SAndroid Build Coastguard Worker 
1378*61046927SAndroid Build Coastguard Worker    tu_cs_emit_regs(cs, HLSQ_INVALIDATE_CMD(CHIP,
1379*61046927SAndroid Build Coastguard Worker          .cs_state = true,
1380*61046927SAndroid Build Coastguard Worker          .cs_ibo = true,
1381*61046927SAndroid Build Coastguard Worker          .cs_shared_const = shared_consts_enable));
1382*61046927SAndroid Build Coastguard Worker 
1383*61046927SAndroid Build Coastguard Worker    tu6_emit_xs_config<CHIP>(cs, MESA_SHADER_COMPUTE, v);
1384*61046927SAndroid Build Coastguard Worker    tu6_emit_xs(cs, MESA_SHADER_COMPUTE, v, pvtmem, binary_iova);
1385*61046927SAndroid Build Coastguard Worker 
1386*61046927SAndroid Build Coastguard Worker    uint32_t shared_size = MAX2(((int)v->shared_size - 1) / 1024, 1);
1387*61046927SAndroid Build Coastguard Worker    tu_cs_emit_pkt4(cs, REG_A6XX_SP_CS_UNKNOWN_A9B1, 1);
1388*61046927SAndroid Build Coastguard Worker    tu_cs_emit(cs, A6XX_SP_CS_UNKNOWN_A9B1_SHARED_SIZE(shared_size) |
1389*61046927SAndroid Build Coastguard Worker                   A6XX_SP_CS_UNKNOWN_A9B1_UNK6);
1390*61046927SAndroid Build Coastguard Worker 
1391*61046927SAndroid Build Coastguard Worker    if (CHIP == A6XX && cs->device->physical_device->info->a6xx.has_lpac) {
1392*61046927SAndroid Build Coastguard Worker       tu_cs_emit_pkt4(cs, REG_A6XX_HLSQ_CS_UNKNOWN_B9D0, 1);
1393*61046927SAndroid Build Coastguard Worker       tu_cs_emit(cs, A6XX_HLSQ_CS_UNKNOWN_B9D0_SHARED_SIZE(shared_size) |
1394*61046927SAndroid Build Coastguard Worker                      A6XX_HLSQ_CS_UNKNOWN_B9D0_UNK6);
1395*61046927SAndroid Build Coastguard Worker    }
1396*61046927SAndroid Build Coastguard Worker 
1397*61046927SAndroid Build Coastguard Worker    uint32_t local_invocation_id =
1398*61046927SAndroid Build Coastguard Worker       ir3_find_sysval_regid(v, SYSTEM_VALUE_LOCAL_INVOCATION_ID);
1399*61046927SAndroid Build Coastguard Worker    uint32_t work_group_id =
1400*61046927SAndroid Build Coastguard Worker       ir3_find_sysval_regid(v, SYSTEM_VALUE_WORKGROUP_ID);
1401*61046927SAndroid Build Coastguard Worker 
1402*61046927SAndroid Build Coastguard Worker    /*
1403*61046927SAndroid Build Coastguard Worker     * Devices that do not support double threadsize take the threadsize from
1404*61046927SAndroid Build Coastguard Worker     * A6XX_HLSQ_FS_CNTL_0_THREADSIZE instead of A6XX_HLSQ_CS_CNTL_1_THREADSIZE
1405*61046927SAndroid Build Coastguard Worker     * which is always set to THREAD128.
1406*61046927SAndroid Build Coastguard Worker     */
1407*61046927SAndroid Build Coastguard Worker    enum a6xx_threadsize thrsz = v->info.double_threadsize ? THREAD128 : THREAD64;
1408*61046927SAndroid Build Coastguard Worker    enum a6xx_threadsize thrsz_cs = cs->device->physical_device->info->a6xx
1409*61046927SAndroid Build Coastguard Worker       .supports_double_threadsize ? thrsz : THREAD128;
1410*61046927SAndroid Build Coastguard Worker    if (CHIP == A6XX) {
1411*61046927SAndroid Build Coastguard Worker       tu_cs_emit_pkt4(cs, REG_A6XX_HLSQ_CS_CNTL_0, 2);
1412*61046927SAndroid Build Coastguard Worker       tu_cs_emit(cs,
1413*61046927SAndroid Build Coastguard Worker                  A6XX_HLSQ_CS_CNTL_0_WGIDCONSTID(work_group_id) |
1414*61046927SAndroid Build Coastguard Worker                  A6XX_HLSQ_CS_CNTL_0_WGSIZECONSTID(regid(63, 0)) |
1415*61046927SAndroid Build Coastguard Worker                  A6XX_HLSQ_CS_CNTL_0_WGOFFSETCONSTID(regid(63, 0)) |
1416*61046927SAndroid Build Coastguard Worker                  A6XX_HLSQ_CS_CNTL_0_LOCALIDREGID(local_invocation_id));
1417*61046927SAndroid Build Coastguard Worker       tu_cs_emit(cs, A6XX_HLSQ_CS_CNTL_1_LINEARLOCALIDREGID(regid(63, 0)) |
1418*61046927SAndroid Build Coastguard Worker                      A6XX_HLSQ_CS_CNTL_1_THREADSIZE(thrsz_cs));
1419*61046927SAndroid Build Coastguard Worker       if (!cs->device->physical_device->info->a6xx.supports_double_threadsize) {
1420*61046927SAndroid Build Coastguard Worker          tu_cs_emit_pkt4(cs, REG_A6XX_HLSQ_FS_CNTL_0, 1);
1421*61046927SAndroid Build Coastguard Worker          tu_cs_emit(cs, A6XX_HLSQ_FS_CNTL_0_THREADSIZE(thrsz));
1422*61046927SAndroid Build Coastguard Worker       }
1423*61046927SAndroid Build Coastguard Worker 
1424*61046927SAndroid Build Coastguard Worker       if (cs->device->physical_device->info->a6xx.has_lpac) {
1425*61046927SAndroid Build Coastguard Worker          tu_cs_emit_pkt4(cs, REG_A6XX_SP_CS_CNTL_0, 2);
1426*61046927SAndroid Build Coastguard Worker          tu_cs_emit(cs,
1427*61046927SAndroid Build Coastguard Worker                     A6XX_SP_CS_CNTL_0_WGIDCONSTID(work_group_id) |
1428*61046927SAndroid Build Coastguard Worker                     A6XX_SP_CS_CNTL_0_WGSIZECONSTID(regid(63, 0)) |
1429*61046927SAndroid Build Coastguard Worker                     A6XX_SP_CS_CNTL_0_WGOFFSETCONSTID(regid(63, 0)) |
1430*61046927SAndroid Build Coastguard Worker                     A6XX_SP_CS_CNTL_0_LOCALIDREGID(local_invocation_id));
1431*61046927SAndroid Build Coastguard Worker          tu_cs_emit(cs, A6XX_SP_CS_CNTL_1_LINEARLOCALIDREGID(regid(63, 0)) |
1432*61046927SAndroid Build Coastguard Worker                   A6XX_SP_CS_CNTL_1_THREADSIZE(thrsz));
1433*61046927SAndroid Build Coastguard Worker       }
1434*61046927SAndroid Build Coastguard Worker    } else {
1435*61046927SAndroid Build Coastguard Worker       unsigned tile_height = (v->local_size[1] % 8 == 0)   ? 3
1436*61046927SAndroid Build Coastguard Worker                              : (v->local_size[1] % 4 == 0) ? 5
1437*61046927SAndroid Build Coastguard Worker                              : (v->local_size[1] % 2 == 0) ? 9
1438*61046927SAndroid Build Coastguard Worker                                                            : 17;
1439*61046927SAndroid Build Coastguard Worker       tu_cs_emit_regs(
1440*61046927SAndroid Build Coastguard Worker          cs, HLSQ_CS_CNTL_1(CHIP,
1441*61046927SAndroid Build Coastguard Worker                    .linearlocalidregid = regid(63, 0), .threadsize = thrsz_cs,
1442*61046927SAndroid Build Coastguard Worker                    .workgrouprastorderzfirsten = true,
1443*61046927SAndroid Build Coastguard Worker                    .wgtilewidth = 4, .wgtileheight = tile_height));
1444*61046927SAndroid Build Coastguard Worker 
1445*61046927SAndroid Build Coastguard Worker       tu_cs_emit_regs(cs, HLSQ_FS_CNTL_0(CHIP, .threadsize = THREAD64));
1446*61046927SAndroid Build Coastguard Worker 
1447*61046927SAndroid Build Coastguard Worker       tu_cs_emit_pkt4(cs, REG_A6XX_SP_CS_CNTL_0, 1);
1448*61046927SAndroid Build Coastguard Worker       tu_cs_emit(cs, A6XX_SP_CS_CNTL_0_WGIDCONSTID(work_group_id) |
1449*61046927SAndroid Build Coastguard Worker                         A6XX_SP_CS_CNTL_0_WGSIZECONSTID(regid(63, 0)) |
1450*61046927SAndroid Build Coastguard Worker                         A6XX_SP_CS_CNTL_0_WGOFFSETCONSTID(regid(63, 0)) |
1451*61046927SAndroid Build Coastguard Worker                         A6XX_SP_CS_CNTL_0_LOCALIDREGID(local_invocation_id));
1452*61046927SAndroid Build Coastguard Worker 
1453*61046927SAndroid Build Coastguard Worker       tu_cs_emit_regs(cs,
1454*61046927SAndroid Build Coastguard Worker                       SP_CS_CNTL_1(CHIP,
1455*61046927SAndroid Build Coastguard Worker                         .linearlocalidregid = regid(63, 0),
1456*61046927SAndroid Build Coastguard Worker                         .threadsize = thrsz_cs,
1457*61046927SAndroid Build Coastguard Worker                         .workitemrastorder =
1458*61046927SAndroid Build Coastguard Worker                            v->cs.force_linear_dispatch ?
1459*61046927SAndroid Build Coastguard Worker                            WORKITEMRASTORDER_LINEAR :
1460*61046927SAndroid Build Coastguard Worker                            WORKITEMRASTORDER_TILED, ));
1461*61046927SAndroid Build Coastguard Worker 
1462*61046927SAndroid Build Coastguard Worker       tu_cs_emit_regs(
1463*61046927SAndroid Build Coastguard Worker          cs, A7XX_HLSQ_CS_LOCAL_SIZE(.localsizex = v->local_size[0] - 1,
1464*61046927SAndroid Build Coastguard Worker                                      .localsizey = v->local_size[1] - 1,
1465*61046927SAndroid Build Coastguard Worker                                      .localsizez = v->local_size[2] - 1, ));
1466*61046927SAndroid Build Coastguard Worker 
1467*61046927SAndroid Build Coastguard Worker       tu_cs_emit_regs(cs, A7XX_SP_CS_UNKNOWN_A9BE(0)); // Sometimes is 0x08000000
1468*61046927SAndroid Build Coastguard Worker    }
1469*61046927SAndroid Build Coastguard Worker }
1470*61046927SAndroid Build Coastguard Worker 
1471*61046927SAndroid Build Coastguard Worker #define TU6_EMIT_VFD_DEST_MAX_DWORDS (MAX_VERTEX_ATTRIBS + 2)
1472*61046927SAndroid Build Coastguard Worker 
1473*61046927SAndroid Build Coastguard Worker static void
tu6_emit_vfd_dest(struct tu_cs * cs,const struct ir3_shader_variant * vs)1474*61046927SAndroid Build Coastguard Worker tu6_emit_vfd_dest(struct tu_cs *cs,
1475*61046927SAndroid Build Coastguard Worker                   const struct ir3_shader_variant *vs)
1476*61046927SAndroid Build Coastguard Worker {
1477*61046927SAndroid Build Coastguard Worker    int32_t input_for_attr[MAX_VERTEX_ATTRIBS];
1478*61046927SAndroid Build Coastguard Worker    uint32_t attr_count = 0;
1479*61046927SAndroid Build Coastguard Worker 
1480*61046927SAndroid Build Coastguard Worker    for (unsigned i = 0; i < MAX_VERTEX_ATTRIBS; i++)
1481*61046927SAndroid Build Coastguard Worker       input_for_attr[i] = -1;
1482*61046927SAndroid Build Coastguard Worker 
1483*61046927SAndroid Build Coastguard Worker    for (unsigned i = 0; i < vs->inputs_count; i++) {
1484*61046927SAndroid Build Coastguard Worker       if (vs->inputs[i].sysval || vs->inputs[i].regid == regid(63, 0))
1485*61046927SAndroid Build Coastguard Worker          continue;
1486*61046927SAndroid Build Coastguard Worker 
1487*61046927SAndroid Build Coastguard Worker       assert(vs->inputs[i].slot >= VERT_ATTRIB_GENERIC0);
1488*61046927SAndroid Build Coastguard Worker       unsigned loc = vs->inputs[i].slot - VERT_ATTRIB_GENERIC0;
1489*61046927SAndroid Build Coastguard Worker       input_for_attr[loc] = i;
1490*61046927SAndroid Build Coastguard Worker       attr_count = MAX2(attr_count, loc + 1);
1491*61046927SAndroid Build Coastguard Worker    }
1492*61046927SAndroid Build Coastguard Worker 
1493*61046927SAndroid Build Coastguard Worker    tu_cs_emit_regs(cs,
1494*61046927SAndroid Build Coastguard Worker                    A6XX_VFD_CONTROL_0(
1495*61046927SAndroid Build Coastguard Worker                      .fetch_cnt = attr_count, /* decode_cnt for binning pass ? */
1496*61046927SAndroid Build Coastguard Worker                      .decode_cnt = attr_count));
1497*61046927SAndroid Build Coastguard Worker 
1498*61046927SAndroid Build Coastguard Worker    if (attr_count)
1499*61046927SAndroid Build Coastguard Worker       tu_cs_emit_pkt4(cs, REG_A6XX_VFD_DEST_CNTL_INSTR(0), attr_count);
1500*61046927SAndroid Build Coastguard Worker 
1501*61046927SAndroid Build Coastguard Worker    for (unsigned i = 0; i < attr_count; i++) {
1502*61046927SAndroid Build Coastguard Worker       if (input_for_attr[i] >= 0) {
1503*61046927SAndroid Build Coastguard Worker             unsigned input_idx = input_for_attr[i];
1504*61046927SAndroid Build Coastguard Worker             tu_cs_emit(cs, A6XX_VFD_DEST_CNTL_INSTR(0,
1505*61046927SAndroid Build Coastguard Worker                              .writemask = vs->inputs[input_idx].compmask,
1506*61046927SAndroid Build Coastguard Worker                              .regid = vs->inputs[input_idx].regid).value);
1507*61046927SAndroid Build Coastguard Worker       } else {
1508*61046927SAndroid Build Coastguard Worker             tu_cs_emit(cs, A6XX_VFD_DEST_CNTL_INSTR(0,
1509*61046927SAndroid Build Coastguard Worker                              .writemask = 0,
1510*61046927SAndroid Build Coastguard Worker                              .regid = regid(63, 0)).value);
1511*61046927SAndroid Build Coastguard Worker       }
1512*61046927SAndroid Build Coastguard Worker    }
1513*61046927SAndroid Build Coastguard Worker }
1514*61046927SAndroid Build Coastguard Worker 
1515*61046927SAndroid Build Coastguard Worker static enum a6xx_tex_prefetch_cmd
tu6_tex_opc_to_prefetch_cmd(opc_t tex_opc)1516*61046927SAndroid Build Coastguard Worker tu6_tex_opc_to_prefetch_cmd(opc_t tex_opc)
1517*61046927SAndroid Build Coastguard Worker {
1518*61046927SAndroid Build Coastguard Worker    switch (tex_opc) {
1519*61046927SAndroid Build Coastguard Worker    case OPC_SAM:
1520*61046927SAndroid Build Coastguard Worker       return TEX_PREFETCH_SAM;
1521*61046927SAndroid Build Coastguard Worker    default:
1522*61046927SAndroid Build Coastguard Worker       unreachable("Unknown tex opc for prefeth cmd");
1523*61046927SAndroid Build Coastguard Worker    }
1524*61046927SAndroid Build Coastguard Worker }
1525*61046927SAndroid Build Coastguard Worker 
1526*61046927SAndroid Build Coastguard Worker template <chip CHIP>
1527*61046927SAndroid Build Coastguard Worker static void
tu6_emit_fs_inputs(struct tu_cs * cs,const struct ir3_shader_variant * fs)1528*61046927SAndroid Build Coastguard Worker tu6_emit_fs_inputs(struct tu_cs *cs, const struct ir3_shader_variant *fs)
1529*61046927SAndroid Build Coastguard Worker {
1530*61046927SAndroid Build Coastguard Worker    uint32_t face_regid, coord_regid, zwcoord_regid, samp_id_regid;
1531*61046927SAndroid Build Coastguard Worker    uint32_t ij_regid[IJ_COUNT];
1532*61046927SAndroid Build Coastguard Worker    uint32_t smask_in_regid;
1533*61046927SAndroid Build Coastguard Worker 
1534*61046927SAndroid Build Coastguard Worker    bool sample_shading = fs->per_samp | fs->key.sample_shading;
1535*61046927SAndroid Build Coastguard Worker    bool enable_varyings = fs->total_in > 0;
1536*61046927SAndroid Build Coastguard Worker 
1537*61046927SAndroid Build Coastguard Worker    samp_id_regid   = ir3_find_sysval_regid(fs, SYSTEM_VALUE_SAMPLE_ID);
1538*61046927SAndroid Build Coastguard Worker    smask_in_regid  = ir3_find_sysval_regid(fs, SYSTEM_VALUE_SAMPLE_MASK_IN);
1539*61046927SAndroid Build Coastguard Worker    face_regid      = ir3_find_sysval_regid(fs, SYSTEM_VALUE_FRONT_FACE);
1540*61046927SAndroid Build Coastguard Worker    coord_regid     = ir3_find_sysval_regid(fs, SYSTEM_VALUE_FRAG_COORD);
1541*61046927SAndroid Build Coastguard Worker    zwcoord_regid   = VALIDREG(coord_regid) ? coord_regid + 2 : regid(63, 0);
1542*61046927SAndroid Build Coastguard Worker    for (unsigned i = 0; i < ARRAY_SIZE(ij_regid); i++)
1543*61046927SAndroid Build Coastguard Worker       ij_regid[i] = ir3_find_sysval_regid(fs, SYSTEM_VALUE_BARYCENTRIC_PERSP_PIXEL + i);
1544*61046927SAndroid Build Coastguard Worker 
1545*61046927SAndroid Build Coastguard Worker    if (fs->num_sampler_prefetch > 0) {
1546*61046927SAndroid Build Coastguard Worker       /* It seems like ij_pix is *required* to be r0.x */
1547*61046927SAndroid Build Coastguard Worker       assert(!VALIDREG(ij_regid[IJ_PERSP_PIXEL]) ||
1548*61046927SAndroid Build Coastguard Worker              ij_regid[IJ_PERSP_PIXEL] == regid(0, 0));
1549*61046927SAndroid Build Coastguard Worker    }
1550*61046927SAndroid Build Coastguard Worker 
1551*61046927SAndroid Build Coastguard Worker    tu_cs_emit_pkt4(cs, REG_A6XX_SP_FS_PREFETCH_CNTL, 1 + fs->num_sampler_prefetch);
1552*61046927SAndroid Build Coastguard Worker    tu_cs_emit(cs, A6XX_SP_FS_PREFETCH_CNTL_COUNT(fs->num_sampler_prefetch) |
1553*61046927SAndroid Build Coastguard Worker                      COND(CHIP >= A7XX, A6XX_SP_FS_PREFETCH_CNTL_CONSTSLOTID(0x1ff)) |
1554*61046927SAndroid Build Coastguard Worker                      COND(CHIP >= A7XX, A6XX_SP_FS_PREFETCH_CNTL_CONSTSLOTID4COORD(0x1ff)) |
1555*61046927SAndroid Build Coastguard Worker                      COND(!VALIDREG(ij_regid[IJ_PERSP_PIXEL]),
1556*61046927SAndroid Build Coastguard Worker                           A6XX_SP_FS_PREFETCH_CNTL_IJ_WRITE_DISABLE) |
1557*61046927SAndroid Build Coastguard Worker                      COND(fs->prefetch_end_of_quad,
1558*61046927SAndroid Build Coastguard Worker                           A6XX_SP_FS_PREFETCH_CNTL_ENDOFQUAD));
1559*61046927SAndroid Build Coastguard Worker    for (int i = 0; i < fs->num_sampler_prefetch; i++) {
1560*61046927SAndroid Build Coastguard Worker       const struct ir3_sampler_prefetch *prefetch = &fs->sampler_prefetch[i];
1561*61046927SAndroid Build Coastguard Worker       tu_cs_emit(
1562*61046927SAndroid Build Coastguard Worker          cs, SP_FS_PREFETCH_CMD(
1563*61046927SAndroid Build Coastguard Worker                 CHIP, i, .src = prefetch->src, .samp_id = prefetch->samp_id,
1564*61046927SAndroid Build Coastguard Worker                 .tex_id = prefetch->tex_id, .dst = prefetch->dst,
1565*61046927SAndroid Build Coastguard Worker                 .wrmask = prefetch->wrmask, .half = prefetch->half_precision,
1566*61046927SAndroid Build Coastguard Worker                 .bindless = prefetch->bindless,
1567*61046927SAndroid Build Coastguard Worker                 .cmd = tu6_tex_opc_to_prefetch_cmd(prefetch->tex_opc), ).value);
1568*61046927SAndroid Build Coastguard Worker    }
1569*61046927SAndroid Build Coastguard Worker 
1570*61046927SAndroid Build Coastguard Worker    if (fs->num_sampler_prefetch > 0) {
1571*61046927SAndroid Build Coastguard Worker       tu_cs_emit_pkt4(cs, REG_A6XX_SP_FS_BINDLESS_PREFETCH_CMD(0), fs->num_sampler_prefetch);
1572*61046927SAndroid Build Coastguard Worker       for (int i = 0; i < fs->num_sampler_prefetch; i++) {
1573*61046927SAndroid Build Coastguard Worker          const struct ir3_sampler_prefetch *prefetch = &fs->sampler_prefetch[i];
1574*61046927SAndroid Build Coastguard Worker          tu_cs_emit(cs,
1575*61046927SAndroid Build Coastguard Worker                     A6XX_SP_FS_BINDLESS_PREFETCH_CMD_SAMP_ID(prefetch->samp_bindless_id) |
1576*61046927SAndroid Build Coastguard Worker                     A6XX_SP_FS_BINDLESS_PREFETCH_CMD_TEX_ID(prefetch->tex_bindless_id));
1577*61046927SAndroid Build Coastguard Worker       }
1578*61046927SAndroid Build Coastguard Worker    }
1579*61046927SAndroid Build Coastguard Worker 
1580*61046927SAndroid Build Coastguard Worker    tu_cs_emit_regs(cs,
1581*61046927SAndroid Build Coastguard Worker       HLSQ_CONTROL_1_REG(CHIP,
1582*61046927SAndroid Build Coastguard Worker          .primallocthreshold =
1583*61046927SAndroid Build Coastguard Worker             cs->device->physical_device->info->a6xx.prim_alloc_threshold),
1584*61046927SAndroid Build Coastguard Worker       HLSQ_CONTROL_2_REG(CHIP, .faceregid = face_regid,
1585*61046927SAndroid Build Coastguard Worker                          .sampleid = samp_id_regid,
1586*61046927SAndroid Build Coastguard Worker                          .samplemask = smask_in_regid,
1587*61046927SAndroid Build Coastguard Worker                          .centerrhw = ij_regid[IJ_PERSP_CENTER_RHW]),
1588*61046927SAndroid Build Coastguard Worker       HLSQ_CONTROL_3_REG(CHIP, .ij_persp_pixel = ij_regid[IJ_PERSP_PIXEL],
1589*61046927SAndroid Build Coastguard Worker                          .ij_linear_pixel = ij_regid[IJ_LINEAR_PIXEL],
1590*61046927SAndroid Build Coastguard Worker                          .ij_persp_centroid = ij_regid[IJ_PERSP_CENTROID],
1591*61046927SAndroid Build Coastguard Worker                          .ij_linear_centroid = ij_regid[IJ_LINEAR_CENTROID]),
1592*61046927SAndroid Build Coastguard Worker       HLSQ_CONTROL_4_REG(CHIP, .ij_persp_sample = ij_regid[IJ_PERSP_SAMPLE],
1593*61046927SAndroid Build Coastguard Worker                          .ij_linear_sample = ij_regid[IJ_LINEAR_SAMPLE],
1594*61046927SAndroid Build Coastguard Worker                          .xycoordregid = coord_regid,
1595*61046927SAndroid Build Coastguard Worker                          .zwcoordregid = zwcoord_regid),
1596*61046927SAndroid Build Coastguard Worker       HLSQ_CONTROL_5_REG(CHIP, .dword = 0xfcfc), );
1597*61046927SAndroid Build Coastguard Worker 
1598*61046927SAndroid Build Coastguard Worker    if (CHIP >= A7XX) {
1599*61046927SAndroid Build Coastguard Worker       uint32_t sysval_regs = 0;
1600*61046927SAndroid Build Coastguard Worker       for (unsigned i = 0; i < ARRAY_SIZE(ij_regid); i++) {
1601*61046927SAndroid Build Coastguard Worker          if (VALIDREG(ij_regid[i])) {
1602*61046927SAndroid Build Coastguard Worker             if (i == IJ_PERSP_CENTER_RHW)
1603*61046927SAndroid Build Coastguard Worker                sysval_regs += 1;
1604*61046927SAndroid Build Coastguard Worker             else
1605*61046927SAndroid Build Coastguard Worker                sysval_regs += 2;
1606*61046927SAndroid Build Coastguard Worker          }
1607*61046927SAndroid Build Coastguard Worker       }
1608*61046927SAndroid Build Coastguard Worker 
1609*61046927SAndroid Build Coastguard Worker       for (uint32_t sysval : { face_regid, samp_id_regid, smask_in_regid }) {
1610*61046927SAndroid Build Coastguard Worker          if (VALIDREG(sysval))
1611*61046927SAndroid Build Coastguard Worker             sysval_regs += 1;
1612*61046927SAndroid Build Coastguard Worker       }
1613*61046927SAndroid Build Coastguard Worker 
1614*61046927SAndroid Build Coastguard Worker       for (uint32_t sysval : { coord_regid, zwcoord_regid }) {
1615*61046927SAndroid Build Coastguard Worker          if (VALIDREG(sysval))
1616*61046927SAndroid Build Coastguard Worker             sysval_regs += 2;
1617*61046927SAndroid Build Coastguard Worker       }
1618*61046927SAndroid Build Coastguard Worker 
1619*61046927SAndroid Build Coastguard Worker       tu_cs_emit_regs(cs, A7XX_HLSQ_UNKNOWN_A9AE(.sysval_regs_count = sysval_regs,
1620*61046927SAndroid Build Coastguard Worker                                                  .unk8 = 1,
1621*61046927SAndroid Build Coastguard Worker                                                  .unk9 = 1));
1622*61046927SAndroid Build Coastguard Worker    }
1623*61046927SAndroid Build Coastguard Worker 
1624*61046927SAndroid Build Coastguard Worker    enum a6xx_threadsize thrsz = fs->info.double_threadsize ? THREAD128 : THREAD64;
1625*61046927SAndroid Build Coastguard Worker    tu_cs_emit_regs(cs, HLSQ_FS_CNTL_0(CHIP, .threadsize = thrsz, .varyings = enable_varyings));
1626*61046927SAndroid Build Coastguard Worker 
1627*61046927SAndroid Build Coastguard Worker    bool need_size = fs->frag_face || fs->fragcoord_compmask != 0;
1628*61046927SAndroid Build Coastguard Worker    bool need_size_persamp = false;
1629*61046927SAndroid Build Coastguard Worker    if (VALIDREG(ij_regid[IJ_PERSP_CENTER_RHW])) {
1630*61046927SAndroid Build Coastguard Worker       if (sample_shading)
1631*61046927SAndroid Build Coastguard Worker          need_size_persamp = true;
1632*61046927SAndroid Build Coastguard Worker       else
1633*61046927SAndroid Build Coastguard Worker          need_size = true;
1634*61046927SAndroid Build Coastguard Worker    }
1635*61046927SAndroid Build Coastguard Worker 
1636*61046927SAndroid Build Coastguard Worker    tu_cs_emit_pkt4(cs, REG_A6XX_GRAS_CNTL, 1);
1637*61046927SAndroid Build Coastguard Worker    tu_cs_emit(cs,
1638*61046927SAndroid Build Coastguard Worker          CONDREG(ij_regid[IJ_PERSP_PIXEL], A6XX_GRAS_CNTL_IJ_PERSP_PIXEL) |
1639*61046927SAndroid Build Coastguard Worker          CONDREG(ij_regid[IJ_PERSP_CENTROID], A6XX_GRAS_CNTL_IJ_PERSP_CENTROID) |
1640*61046927SAndroid Build Coastguard Worker          CONDREG(ij_regid[IJ_PERSP_SAMPLE], A6XX_GRAS_CNTL_IJ_PERSP_SAMPLE) |
1641*61046927SAndroid Build Coastguard Worker          CONDREG(ij_regid[IJ_LINEAR_PIXEL], A6XX_GRAS_CNTL_IJ_LINEAR_PIXEL) |
1642*61046927SAndroid Build Coastguard Worker          CONDREG(ij_regid[IJ_LINEAR_CENTROID], A6XX_GRAS_CNTL_IJ_LINEAR_CENTROID) |
1643*61046927SAndroid Build Coastguard Worker          CONDREG(ij_regid[IJ_LINEAR_SAMPLE], A6XX_GRAS_CNTL_IJ_LINEAR_SAMPLE) |
1644*61046927SAndroid Build Coastguard Worker          COND(need_size, A6XX_GRAS_CNTL_IJ_LINEAR_PIXEL) |
1645*61046927SAndroid Build Coastguard Worker          COND(need_size_persamp, A6XX_GRAS_CNTL_IJ_LINEAR_SAMPLE) |
1646*61046927SAndroid Build Coastguard Worker          COND(fs->fragcoord_compmask != 0, A6XX_GRAS_CNTL_COORD_MASK(fs->fragcoord_compmask)));
1647*61046927SAndroid Build Coastguard Worker 
1648*61046927SAndroid Build Coastguard Worker    tu_cs_emit_pkt4(cs, REG_A6XX_RB_RENDER_CONTROL0, 2);
1649*61046927SAndroid Build Coastguard Worker    tu_cs_emit(cs,
1650*61046927SAndroid Build Coastguard Worker          CONDREG(ij_regid[IJ_PERSP_PIXEL], A6XX_RB_RENDER_CONTROL0_IJ_PERSP_PIXEL) |
1651*61046927SAndroid Build Coastguard Worker          CONDREG(ij_regid[IJ_PERSP_CENTROID], A6XX_RB_RENDER_CONTROL0_IJ_PERSP_CENTROID) |
1652*61046927SAndroid Build Coastguard Worker          CONDREG(ij_regid[IJ_PERSP_SAMPLE], A6XX_RB_RENDER_CONTROL0_IJ_PERSP_SAMPLE) |
1653*61046927SAndroid Build Coastguard Worker          CONDREG(ij_regid[IJ_LINEAR_PIXEL], A6XX_RB_RENDER_CONTROL0_IJ_LINEAR_PIXEL) |
1654*61046927SAndroid Build Coastguard Worker          CONDREG(ij_regid[IJ_LINEAR_CENTROID], A6XX_RB_RENDER_CONTROL0_IJ_LINEAR_CENTROID) |
1655*61046927SAndroid Build Coastguard Worker          CONDREG(ij_regid[IJ_LINEAR_SAMPLE], A6XX_RB_RENDER_CONTROL0_IJ_LINEAR_SAMPLE) |
1656*61046927SAndroid Build Coastguard Worker          COND(need_size, A6XX_RB_RENDER_CONTROL0_IJ_LINEAR_PIXEL) |
1657*61046927SAndroid Build Coastguard Worker          COND(enable_varyings, A6XX_RB_RENDER_CONTROL0_UNK10) |
1658*61046927SAndroid Build Coastguard Worker          COND(need_size_persamp, A6XX_RB_RENDER_CONTROL0_IJ_LINEAR_SAMPLE) |
1659*61046927SAndroid Build Coastguard Worker          COND(fs->fragcoord_compmask != 0,
1660*61046927SAndroid Build Coastguard Worker                            A6XX_RB_RENDER_CONTROL0_COORD_MASK(fs->fragcoord_compmask)));
1661*61046927SAndroid Build Coastguard Worker    tu_cs_emit(cs,
1662*61046927SAndroid Build Coastguard Worker          A6XX_RB_RENDER_CONTROL1_FRAGCOORDSAMPLEMODE(
1663*61046927SAndroid Build Coastguard Worker             sample_shading ? FRAGCOORD_SAMPLE : FRAGCOORD_CENTER) |
1664*61046927SAndroid Build Coastguard Worker          CONDREG(smask_in_regid, A6XX_RB_RENDER_CONTROL1_SAMPLEMASK) |
1665*61046927SAndroid Build Coastguard Worker          CONDREG(samp_id_regid, A6XX_RB_RENDER_CONTROL1_SAMPLEID) |
1666*61046927SAndroid Build Coastguard Worker          CONDREG(ij_regid[IJ_PERSP_CENTER_RHW], A6XX_RB_RENDER_CONTROL1_CENTERRHW) |
1667*61046927SAndroid Build Coastguard Worker          COND(fs->post_depth_coverage, A6XX_RB_RENDER_CONTROL1_POSTDEPTHCOVERAGE)  |
1668*61046927SAndroid Build Coastguard Worker          COND(fs->frag_face, A6XX_RB_RENDER_CONTROL1_FACENESS));
1669*61046927SAndroid Build Coastguard Worker 
1670*61046927SAndroid Build Coastguard Worker    tu_cs_emit_pkt4(cs, REG_A6XX_RB_SAMPLE_CNTL, 1);
1671*61046927SAndroid Build Coastguard Worker    tu_cs_emit(cs, COND(sample_shading, A6XX_RB_SAMPLE_CNTL_PER_SAMP_MODE));
1672*61046927SAndroid Build Coastguard Worker 
1673*61046927SAndroid Build Coastguard Worker    tu_cs_emit_pkt4(cs, REG_A6XX_GRAS_LRZ_PS_INPUT_CNTL, 1);
1674*61046927SAndroid Build Coastguard Worker    tu_cs_emit(cs, CONDREG(samp_id_regid, A6XX_GRAS_LRZ_PS_INPUT_CNTL_SAMPLEID) |
1675*61046927SAndroid Build Coastguard Worker               A6XX_GRAS_LRZ_PS_INPUT_CNTL_FRAGCOORDSAMPLEMODE(
1676*61046927SAndroid Build Coastguard Worker                  sample_shading ? FRAGCOORD_SAMPLE : FRAGCOORD_CENTER));
1677*61046927SAndroid Build Coastguard Worker 
1678*61046927SAndroid Build Coastguard Worker    tu_cs_emit_pkt4(cs, REG_A6XX_GRAS_SAMPLE_CNTL, 1);
1679*61046927SAndroid Build Coastguard Worker    tu_cs_emit(cs, COND(sample_shading, A6XX_GRAS_SAMPLE_CNTL_PER_SAMP_MODE));
1680*61046927SAndroid Build Coastguard Worker 
1681*61046927SAndroid Build Coastguard Worker    uint32_t varmask[4] = { 0 };
1682*61046927SAndroid Build Coastguard Worker 
1683*61046927SAndroid Build Coastguard Worker    for (int i = ir3_next_varying(fs, -1); i < fs->inputs_count;
1684*61046927SAndroid Build Coastguard Worker         i = ir3_next_varying(fs, i)) {
1685*61046927SAndroid Build Coastguard Worker       if (fs->inputs[i].inloc >= fs->total_in)
1686*61046927SAndroid Build Coastguard Worker          continue;
1687*61046927SAndroid Build Coastguard Worker 
1688*61046927SAndroid Build Coastguard Worker       unsigned loc = fs->inputs[i].inloc;
1689*61046927SAndroid Build Coastguard Worker       for (int j = 0; j < util_last_bit(fs->inputs[i].compmask); j++) {
1690*61046927SAndroid Build Coastguard Worker          uint8_t comploc = loc + j;
1691*61046927SAndroid Build Coastguard Worker          varmask[comploc / 32] |= 1 << (comploc % 32);
1692*61046927SAndroid Build Coastguard Worker       }
1693*61046927SAndroid Build Coastguard Worker    }
1694*61046927SAndroid Build Coastguard Worker 
1695*61046927SAndroid Build Coastguard Worker    tu_cs_emit_pkt4(cs, REG_A6XX_VPC_VAR_DISABLE(0), 4);
1696*61046927SAndroid Build Coastguard Worker    tu_cs_emit(cs, ~varmask[0]);
1697*61046927SAndroid Build Coastguard Worker    tu_cs_emit(cs, ~varmask[1]);
1698*61046927SAndroid Build Coastguard Worker    tu_cs_emit(cs, ~varmask[2]);
1699*61046927SAndroid Build Coastguard Worker    tu_cs_emit(cs, ~varmask[3]);
1700*61046927SAndroid Build Coastguard Worker 
1701*61046927SAndroid Build Coastguard Worker    unsigned primid_loc = ir3_find_input_loc(fs, VARYING_SLOT_PRIMITIVE_ID);
1702*61046927SAndroid Build Coastguard Worker    unsigned viewid_loc = ir3_find_input_loc(fs, VARYING_SLOT_VIEW_INDEX);
1703*61046927SAndroid Build Coastguard Worker 
1704*61046927SAndroid Build Coastguard Worker    tu_cs_emit_pkt4(cs, REG_A6XX_VPC_CNTL_0, 1);
1705*61046927SAndroid Build Coastguard Worker    tu_cs_emit(cs, A6XX_VPC_CNTL_0_NUMNONPOSVAR(fs->total_in) |
1706*61046927SAndroid Build Coastguard Worker                   COND(fs && fs->total_in, A6XX_VPC_CNTL_0_VARYING) |
1707*61046927SAndroid Build Coastguard Worker                   A6XX_VPC_CNTL_0_PRIMIDLOC(primid_loc) |
1708*61046927SAndroid Build Coastguard Worker                   A6XX_VPC_CNTL_0_VIEWIDLOC(viewid_loc));
1709*61046927SAndroid Build Coastguard Worker }
1710*61046927SAndroid Build Coastguard Worker 
1711*61046927SAndroid Build Coastguard Worker static void
tu6_emit_fs_outputs(struct tu_cs * cs,const struct ir3_shader_variant * fs)1712*61046927SAndroid Build Coastguard Worker tu6_emit_fs_outputs(struct tu_cs *cs,
1713*61046927SAndroid Build Coastguard Worker                     const struct ir3_shader_variant *fs)
1714*61046927SAndroid Build Coastguard Worker {
1715*61046927SAndroid Build Coastguard Worker    uint32_t smask_regid, posz_regid, stencilref_regid;
1716*61046927SAndroid Build Coastguard Worker 
1717*61046927SAndroid Build Coastguard Worker    posz_regid      = ir3_find_output_regid(fs, FRAG_RESULT_DEPTH);
1718*61046927SAndroid Build Coastguard Worker    smask_regid     = ir3_find_output_regid(fs, FRAG_RESULT_SAMPLE_MASK);
1719*61046927SAndroid Build Coastguard Worker    stencilref_regid = ir3_find_output_regid(fs, FRAG_RESULT_STENCIL);
1720*61046927SAndroid Build Coastguard Worker 
1721*61046927SAndroid Build Coastguard Worker    int output_reg_count = 0;
1722*61046927SAndroid Build Coastguard Worker    uint32_t fragdata_regid[8];
1723*61046927SAndroid Build Coastguard Worker 
1724*61046927SAndroid Build Coastguard Worker    assert(!fs->color0_mrt);
1725*61046927SAndroid Build Coastguard Worker    for (uint32_t i = 0; i < ARRAY_SIZE(fragdata_regid); i++) {
1726*61046927SAndroid Build Coastguard Worker       fragdata_regid[i] = ir3_find_output_regid(fs, FRAG_RESULT_DATA0 + i);
1727*61046927SAndroid Build Coastguard Worker       if (VALIDREG(fragdata_regid[i]))
1728*61046927SAndroid Build Coastguard Worker          output_reg_count = i + 1;
1729*61046927SAndroid Build Coastguard Worker    }
1730*61046927SAndroid Build Coastguard Worker 
1731*61046927SAndroid Build Coastguard Worker    tu_cs_emit_pkt4(cs, REG_A6XX_SP_FS_OUTPUT_CNTL0, 1);
1732*61046927SAndroid Build Coastguard Worker    tu_cs_emit(cs, A6XX_SP_FS_OUTPUT_CNTL0_DEPTH_REGID(posz_regid) |
1733*61046927SAndroid Build Coastguard Worker                   A6XX_SP_FS_OUTPUT_CNTL0_SAMPMASK_REGID(smask_regid) |
1734*61046927SAndroid Build Coastguard Worker                   A6XX_SP_FS_OUTPUT_CNTL0_STENCILREF_REGID(stencilref_regid) |
1735*61046927SAndroid Build Coastguard Worker                   COND(fs->dual_src_blend, A6XX_SP_FS_OUTPUT_CNTL0_DUAL_COLOR_IN_ENABLE));
1736*61046927SAndroid Build Coastguard Worker 
1737*61046927SAndroid Build Coastguard Worker    /* There is no point in having component enabled which is not written
1738*61046927SAndroid Build Coastguard Worker     * by the shader. Per VK spec it is an UB, however a few apps depend on
1739*61046927SAndroid Build Coastguard Worker     * attachment not being changed if FS doesn't have corresponding output.
1740*61046927SAndroid Build Coastguard Worker     */
1741*61046927SAndroid Build Coastguard Worker    uint32_t fs_render_components = 0;
1742*61046927SAndroid Build Coastguard Worker 
1743*61046927SAndroid Build Coastguard Worker    tu_cs_emit_pkt4(cs, REG_A6XX_SP_FS_OUTPUT_REG(0), output_reg_count);
1744*61046927SAndroid Build Coastguard Worker    for (uint32_t i = 0; i < output_reg_count; i++) {
1745*61046927SAndroid Build Coastguard Worker       tu_cs_emit(cs, A6XX_SP_FS_OUTPUT_REG_REGID(fragdata_regid[i]) |
1746*61046927SAndroid Build Coastguard Worker                      (COND(fragdata_regid[i] & HALF_REG_ID,
1747*61046927SAndroid Build Coastguard Worker                            A6XX_SP_FS_OUTPUT_REG_HALF_PRECISION)));
1748*61046927SAndroid Build Coastguard Worker 
1749*61046927SAndroid Build Coastguard Worker       if (VALIDREG(fragdata_regid[i])) {
1750*61046927SAndroid Build Coastguard Worker          fs_render_components |= 0xf << (i * 4);
1751*61046927SAndroid Build Coastguard Worker       }
1752*61046927SAndroid Build Coastguard Worker    }
1753*61046927SAndroid Build Coastguard Worker 
1754*61046927SAndroid Build Coastguard Worker    tu_cs_emit_regs(cs,
1755*61046927SAndroid Build Coastguard Worker                    A6XX_SP_FS_RENDER_COMPONENTS(.dword = fs_render_components));
1756*61046927SAndroid Build Coastguard Worker 
1757*61046927SAndroid Build Coastguard Worker    tu_cs_emit_pkt4(cs, REG_A6XX_RB_FS_OUTPUT_CNTL0, 1);
1758*61046927SAndroid Build Coastguard Worker    tu_cs_emit(cs, COND(fs->writes_pos, A6XX_RB_FS_OUTPUT_CNTL0_FRAG_WRITES_Z) |
1759*61046927SAndroid Build Coastguard Worker                   COND(fs->writes_smask, A6XX_RB_FS_OUTPUT_CNTL0_FRAG_WRITES_SAMPMASK) |
1760*61046927SAndroid Build Coastguard Worker                   COND(fs->writes_stencilref, A6XX_RB_FS_OUTPUT_CNTL0_FRAG_WRITES_STENCILREF) |
1761*61046927SAndroid Build Coastguard Worker                   COND(fs->dual_src_blend, A6XX_RB_FS_OUTPUT_CNTL0_DUAL_COLOR_IN_ENABLE));
1762*61046927SAndroid Build Coastguard Worker 
1763*61046927SAndroid Build Coastguard Worker    tu_cs_emit_regs(cs,
1764*61046927SAndroid Build Coastguard Worker                    A6XX_RB_RENDER_COMPONENTS(.dword = fs_render_components));
1765*61046927SAndroid Build Coastguard Worker }
1766*61046927SAndroid Build Coastguard Worker 
1767*61046927SAndroid Build Coastguard Worker template <chip CHIP>
1768*61046927SAndroid Build Coastguard Worker void
tu6_emit_vs(struct tu_cs * cs,const struct ir3_shader_variant * vs,uint32_t view_mask)1769*61046927SAndroid Build Coastguard Worker tu6_emit_vs(struct tu_cs *cs,
1770*61046927SAndroid Build Coastguard Worker             const struct ir3_shader_variant *vs,
1771*61046927SAndroid Build Coastguard Worker             uint32_t view_mask)
1772*61046927SAndroid Build Coastguard Worker {
1773*61046927SAndroid Build Coastguard Worker    bool multi_pos_output = vs->multi_pos_output;
1774*61046927SAndroid Build Coastguard Worker 
1775*61046927SAndroid Build Coastguard Worker    uint32_t multiview_views = util_logbase2(view_mask) + 1;
1776*61046927SAndroid Build Coastguard Worker    uint32_t multiview_cntl = view_mask ?
1777*61046927SAndroid Build Coastguard Worker       A6XX_PC_MULTIVIEW_CNTL_ENABLE |
1778*61046927SAndroid Build Coastguard Worker       A6XX_PC_MULTIVIEW_CNTL_VIEWS(multiview_views) |
1779*61046927SAndroid Build Coastguard Worker       COND(!multi_pos_output, A6XX_PC_MULTIVIEW_CNTL_DISABLEMULTIPOS)
1780*61046927SAndroid Build Coastguard Worker       : 0;
1781*61046927SAndroid Build Coastguard Worker 
1782*61046927SAndroid Build Coastguard Worker    /* Copy what the blob does here. This will emit an extra 0x3f
1783*61046927SAndroid Build Coastguard Worker     * CP_EVENT_WRITE when multiview is disabled. I'm not exactly sure what
1784*61046927SAndroid Build Coastguard Worker     * this is working around yet.
1785*61046927SAndroid Build Coastguard Worker     */
1786*61046927SAndroid Build Coastguard Worker    if (cs->device->physical_device->info->a6xx.has_cp_reg_write) {
1787*61046927SAndroid Build Coastguard Worker       tu_cs_emit_pkt7(cs, CP_REG_WRITE, 3);
1788*61046927SAndroid Build Coastguard Worker       tu_cs_emit(cs, CP_REG_WRITE_0_TRACKER(UNK_EVENT_WRITE));
1789*61046927SAndroid Build Coastguard Worker       tu_cs_emit(cs, REG_A6XX_PC_MULTIVIEW_CNTL);
1790*61046927SAndroid Build Coastguard Worker    } else {
1791*61046927SAndroid Build Coastguard Worker       tu_cs_emit_pkt4(cs, REG_A6XX_PC_MULTIVIEW_CNTL, 1);
1792*61046927SAndroid Build Coastguard Worker    }
1793*61046927SAndroid Build Coastguard Worker    tu_cs_emit(cs, multiview_cntl);
1794*61046927SAndroid Build Coastguard Worker 
1795*61046927SAndroid Build Coastguard Worker    tu_cs_emit_pkt4(cs, REG_A6XX_VFD_MULTIVIEW_CNTL, 1);
1796*61046927SAndroid Build Coastguard Worker    tu_cs_emit(cs, multiview_cntl);
1797*61046927SAndroid Build Coastguard Worker 
1798*61046927SAndroid Build Coastguard Worker    if (multiview_cntl &&
1799*61046927SAndroid Build Coastguard Worker        cs->device->physical_device->info->a6xx.supports_multiview_mask) {
1800*61046927SAndroid Build Coastguard Worker       tu_cs_emit_pkt4(cs, REG_A6XX_PC_MULTIVIEW_MASK, 1);
1801*61046927SAndroid Build Coastguard Worker       tu_cs_emit(cs, view_mask);
1802*61046927SAndroid Build Coastguard Worker    }
1803*61046927SAndroid Build Coastguard Worker 
1804*61046927SAndroid Build Coastguard Worker    if (CHIP >= A7XX) {
1805*61046927SAndroid Build Coastguard Worker       tu_cs_emit_pkt4(cs, REG_A7XX_VPC_MULTIVIEW_CNTL, 1);
1806*61046927SAndroid Build Coastguard Worker       tu_cs_emit(cs, multiview_cntl);
1807*61046927SAndroid Build Coastguard Worker 
1808*61046927SAndroid Build Coastguard Worker       tu_cs_emit_pkt4(cs, REG_A7XX_VPC_MULTIVIEW_MASK, 1);
1809*61046927SAndroid Build Coastguard Worker       tu_cs_emit(cs, view_mask);
1810*61046927SAndroid Build Coastguard Worker    }
1811*61046927SAndroid Build Coastguard Worker 
1812*61046927SAndroid Build Coastguard Worker    tu6_emit_vfd_dest(cs, vs);
1813*61046927SAndroid Build Coastguard Worker 
1814*61046927SAndroid Build Coastguard Worker    const uint32_t vertexid_regid =
1815*61046927SAndroid Build Coastguard Worker          ir3_find_sysval_regid(vs, SYSTEM_VALUE_VERTEX_ID);
1816*61046927SAndroid Build Coastguard Worker    const uint32_t instanceid_regid =
1817*61046927SAndroid Build Coastguard Worker          ir3_find_sysval_regid(vs, SYSTEM_VALUE_INSTANCE_ID);
1818*61046927SAndroid Build Coastguard Worker 
1819*61046927SAndroid Build Coastguard Worker    /* Note: we currently don't support multiview with tess or GS. If we did,
1820*61046927SAndroid Build Coastguard Worker     * and the HW actually works, then we'd have to somehow share this across
1821*61046927SAndroid Build Coastguard Worker     * stages. Note that the blob doesn't support this either.
1822*61046927SAndroid Build Coastguard Worker     */
1823*61046927SAndroid Build Coastguard Worker    const uint32_t viewid_regid =
1824*61046927SAndroid Build Coastguard Worker       ir3_find_sysval_regid(vs, SYSTEM_VALUE_VIEW_INDEX);
1825*61046927SAndroid Build Coastguard Worker 
1826*61046927SAndroid Build Coastguard Worker    const uint32_t vs_primitiveid_regid =
1827*61046927SAndroid Build Coastguard Worker       ir3_find_sysval_regid(vs, SYSTEM_VALUE_PRIMITIVE_ID);
1828*61046927SAndroid Build Coastguard Worker 
1829*61046927SAndroid Build Coastguard Worker    tu_cs_emit_pkt4(cs, REG_A6XX_VFD_CONTROL_1, 1);
1830*61046927SAndroid Build Coastguard Worker    tu_cs_emit(cs, A6XX_VFD_CONTROL_1_REGID4VTX(vertexid_regid) |
1831*61046927SAndroid Build Coastguard Worker                   A6XX_VFD_CONTROL_1_REGID4INST(instanceid_regid) |
1832*61046927SAndroid Build Coastguard Worker                   A6XX_VFD_CONTROL_1_REGID4PRIMID(vs_primitiveid_regid) |
1833*61046927SAndroid Build Coastguard Worker                   A6XX_VFD_CONTROL_1_REGID4VIEWID(viewid_regid));
1834*61046927SAndroid Build Coastguard Worker }
1835*61046927SAndroid Build Coastguard Worker TU_GENX(tu6_emit_vs);
1836*61046927SAndroid Build Coastguard Worker 
1837*61046927SAndroid Build Coastguard Worker template <chip CHIP>
1838*61046927SAndroid Build Coastguard Worker void
tu6_emit_hs(struct tu_cs * cs,const struct ir3_shader_variant * hs)1839*61046927SAndroid Build Coastguard Worker tu6_emit_hs(struct tu_cs *cs,
1840*61046927SAndroid Build Coastguard Worker             const struct ir3_shader_variant *hs)
1841*61046927SAndroid Build Coastguard Worker {
1842*61046927SAndroid Build Coastguard Worker    const uint32_t hs_rel_patch_regid =
1843*61046927SAndroid Build Coastguard Worker          ir3_find_sysval_regid(hs, SYSTEM_VALUE_REL_PATCH_ID_IR3);
1844*61046927SAndroid Build Coastguard Worker    const uint32_t hs_invocation_regid =
1845*61046927SAndroid Build Coastguard Worker          ir3_find_sysval_regid(hs, SYSTEM_VALUE_TCS_HEADER_IR3);
1846*61046927SAndroid Build Coastguard Worker 
1847*61046927SAndroid Build Coastguard Worker    tu_cs_emit_pkt4(cs, REG_A6XX_VFD_CONTROL_2, 1);
1848*61046927SAndroid Build Coastguard Worker    tu_cs_emit(cs, A6XX_VFD_CONTROL_2_REGID_HSRELPATCHID(hs_rel_patch_regid) |
1849*61046927SAndroid Build Coastguard Worker                   A6XX_VFD_CONTROL_2_REGID_INVOCATIONID(hs_invocation_regid));
1850*61046927SAndroid Build Coastguard Worker 
1851*61046927SAndroid Build Coastguard Worker    if (hs) {
1852*61046927SAndroid Build Coastguard Worker       tu_cs_emit_pkt4(cs, REG_A6XX_PC_TESS_NUM_VERTEX, 1);
1853*61046927SAndroid Build Coastguard Worker       tu_cs_emit(cs, hs->tess.tcs_vertices_out);
1854*61046927SAndroid Build Coastguard Worker    }
1855*61046927SAndroid Build Coastguard Worker }
1856*61046927SAndroid Build Coastguard Worker TU_GENX(tu6_emit_hs);
1857*61046927SAndroid Build Coastguard Worker 
1858*61046927SAndroid Build Coastguard Worker template <chip CHIP>
1859*61046927SAndroid Build Coastguard Worker void
tu6_emit_ds(struct tu_cs * cs,const struct ir3_shader_variant * ds)1860*61046927SAndroid Build Coastguard Worker tu6_emit_ds(struct tu_cs *cs,
1861*61046927SAndroid Build Coastguard Worker             const struct ir3_shader_variant *ds)
1862*61046927SAndroid Build Coastguard Worker {
1863*61046927SAndroid Build Coastguard Worker    const uint32_t ds_rel_patch_regid =
1864*61046927SAndroid Build Coastguard Worker          ir3_find_sysval_regid(ds, SYSTEM_VALUE_REL_PATCH_ID_IR3);
1865*61046927SAndroid Build Coastguard Worker    const uint32_t tess_coord_x_regid =
1866*61046927SAndroid Build Coastguard Worker          ir3_find_sysval_regid(ds, SYSTEM_VALUE_TESS_COORD);
1867*61046927SAndroid Build Coastguard Worker    const uint32_t tess_coord_y_regid = VALIDREG(tess_coord_x_regid) ?
1868*61046927SAndroid Build Coastguard Worker          tess_coord_x_regid + 1 :
1869*61046927SAndroid Build Coastguard Worker          regid(63, 0);
1870*61046927SAndroid Build Coastguard Worker    const uint32_t ds_primitiveid_regid =
1871*61046927SAndroid Build Coastguard Worker          ir3_find_sysval_regid(ds, SYSTEM_VALUE_PRIMITIVE_ID);
1872*61046927SAndroid Build Coastguard Worker 
1873*61046927SAndroid Build Coastguard Worker    tu_cs_emit_pkt4(cs, REG_A6XX_VFD_CONTROL_3, 2);
1874*61046927SAndroid Build Coastguard Worker    tu_cs_emit(cs, A6XX_VFD_CONTROL_3_REGID_DSRELPATCHID(ds_rel_patch_regid) |
1875*61046927SAndroid Build Coastguard Worker                   A6XX_VFD_CONTROL_3_REGID_TESSX(tess_coord_x_regid) |
1876*61046927SAndroid Build Coastguard Worker                   A6XX_VFD_CONTROL_3_REGID_TESSY(tess_coord_y_regid) |
1877*61046927SAndroid Build Coastguard Worker                   A6XX_VFD_CONTROL_3_REGID_DSPRIMID(ds_primitiveid_regid));
1878*61046927SAndroid Build Coastguard Worker    tu_cs_emit(cs, 0x000000fc); /* VFD_CONTROL_4 */
1879*61046927SAndroid Build Coastguard Worker }
1880*61046927SAndroid Build Coastguard Worker TU_GENX(tu6_emit_ds);
1881*61046927SAndroid Build Coastguard Worker 
1882*61046927SAndroid Build Coastguard Worker static enum a6xx_tess_output
primitive_to_tess(enum mesa_prim primitive)1883*61046927SAndroid Build Coastguard Worker primitive_to_tess(enum mesa_prim primitive) {
1884*61046927SAndroid Build Coastguard Worker    switch (primitive) {
1885*61046927SAndroid Build Coastguard Worker    case MESA_PRIM_POINTS:
1886*61046927SAndroid Build Coastguard Worker       return TESS_POINTS;
1887*61046927SAndroid Build Coastguard Worker    case MESA_PRIM_LINE_STRIP:
1888*61046927SAndroid Build Coastguard Worker       return TESS_LINES;
1889*61046927SAndroid Build Coastguard Worker    case MESA_PRIM_TRIANGLE_STRIP:
1890*61046927SAndroid Build Coastguard Worker       return TESS_CW_TRIS;
1891*61046927SAndroid Build Coastguard Worker    default:
1892*61046927SAndroid Build Coastguard Worker       unreachable("");
1893*61046927SAndroid Build Coastguard Worker    }
1894*61046927SAndroid Build Coastguard Worker }
1895*61046927SAndroid Build Coastguard Worker 
1896*61046927SAndroid Build Coastguard Worker template <chip CHIP>
1897*61046927SAndroid Build Coastguard Worker void
tu6_emit_gs(struct tu_cs * cs,const struct ir3_shader_variant * gs)1898*61046927SAndroid Build Coastguard Worker tu6_emit_gs(struct tu_cs *cs,
1899*61046927SAndroid Build Coastguard Worker             const struct ir3_shader_variant *gs)
1900*61046927SAndroid Build Coastguard Worker {
1901*61046927SAndroid Build Coastguard Worker    const uint32_t gsheader_regid =
1902*61046927SAndroid Build Coastguard Worker          ir3_find_sysval_regid(gs, SYSTEM_VALUE_GS_HEADER_IR3);
1903*61046927SAndroid Build Coastguard Worker 
1904*61046927SAndroid Build Coastguard Worker    tu_cs_emit_pkt4(cs, REG_A6XX_VFD_CONTROL_5, 1);
1905*61046927SAndroid Build Coastguard Worker    tu_cs_emit(cs, A6XX_VFD_CONTROL_5_REGID_GSHEADER(gsheader_regid) |
1906*61046927SAndroid Build Coastguard Worker                   0xfc00);
1907*61046927SAndroid Build Coastguard Worker 
1908*61046927SAndroid Build Coastguard Worker    if (gs) {
1909*61046927SAndroid Build Coastguard Worker       uint32_t vertices_out, invocations;
1910*61046927SAndroid Build Coastguard Worker 
1911*61046927SAndroid Build Coastguard Worker       vertices_out = gs->gs.vertices_out - 1;
1912*61046927SAndroid Build Coastguard Worker       enum a6xx_tess_output output = primitive_to_tess((enum mesa_prim) gs->gs.output_primitive);
1913*61046927SAndroid Build Coastguard Worker       invocations = gs->gs.invocations - 1;
1914*61046927SAndroid Build Coastguard Worker 
1915*61046927SAndroid Build Coastguard Worker       uint32_t primitive_cntl =
1916*61046927SAndroid Build Coastguard Worker          A6XX_PC_PRIMITIVE_CNTL_5(.gs_vertices_out = vertices_out,
1917*61046927SAndroid Build Coastguard Worker                                   .gs_invocations = invocations,
1918*61046927SAndroid Build Coastguard Worker                                   .gs_output = output,).value;
1919*61046927SAndroid Build Coastguard Worker 
1920*61046927SAndroid Build Coastguard Worker       tu_cs_emit_pkt4(cs, REG_A6XX_PC_PRIMITIVE_CNTL_5, 1);
1921*61046927SAndroid Build Coastguard Worker       tu_cs_emit(cs, primitive_cntl);
1922*61046927SAndroid Build Coastguard Worker 
1923*61046927SAndroid Build Coastguard Worker       if (CHIP >= A7XX) {
1924*61046927SAndroid Build Coastguard Worker          tu_cs_emit_pkt4(cs, REG_A7XX_VPC_PRIMITIVE_CNTL_5, 1);
1925*61046927SAndroid Build Coastguard Worker          tu_cs_emit(cs, primitive_cntl);
1926*61046927SAndroid Build Coastguard Worker       } else {
1927*61046927SAndroid Build Coastguard Worker          tu_cs_emit_pkt4(cs, REG_A6XX_VPC_GS_PARAM, 1);
1928*61046927SAndroid Build Coastguard Worker          tu_cs_emit(cs, 0xff);
1929*61046927SAndroid Build Coastguard Worker       }
1930*61046927SAndroid Build Coastguard Worker    }
1931*61046927SAndroid Build Coastguard Worker }
1932*61046927SAndroid Build Coastguard Worker TU_GENX(tu6_emit_gs);
1933*61046927SAndroid Build Coastguard Worker 
1934*61046927SAndroid Build Coastguard Worker template <chip CHIP>
1935*61046927SAndroid Build Coastguard Worker void
tu6_emit_fs(struct tu_cs * cs,const struct ir3_shader_variant * fs)1936*61046927SAndroid Build Coastguard Worker tu6_emit_fs(struct tu_cs *cs,
1937*61046927SAndroid Build Coastguard Worker             const struct ir3_shader_variant *fs)
1938*61046927SAndroid Build Coastguard Worker {
1939*61046927SAndroid Build Coastguard Worker    tu_cs_emit_pkt4(cs, REG_A6XX_VFD_CONTROL_6, 1);
1940*61046927SAndroid Build Coastguard Worker    tu_cs_emit(cs, COND(fs && fs->reads_primid, A6XX_VFD_CONTROL_6_PRIMID4PSEN));
1941*61046927SAndroid Build Coastguard Worker 
1942*61046927SAndroid Build Coastguard Worker    tu_cs_emit_regs(cs, A6XX_PC_PS_CNTL(.primitiveiden = fs && fs->reads_primid));
1943*61046927SAndroid Build Coastguard Worker 
1944*61046927SAndroid Build Coastguard Worker    if (CHIP >= A7XX) {
1945*61046927SAndroid Build Coastguard Worker       tu_cs_emit_regs(cs, A6XX_GRAS_UNKNOWN_8110(0x2));
1946*61046927SAndroid Build Coastguard Worker       tu_cs_emit_regs(cs, A7XX_HLSQ_FS_UNKNOWN_A9AA(.consts_load_disable = false));
1947*61046927SAndroid Build Coastguard Worker    }
1948*61046927SAndroid Build Coastguard Worker 
1949*61046927SAndroid Build Coastguard Worker    if (fs) {
1950*61046927SAndroid Build Coastguard Worker       tu6_emit_fs_inputs<CHIP>(cs, fs);
1951*61046927SAndroid Build Coastguard Worker       tu6_emit_fs_outputs(cs, fs);
1952*61046927SAndroid Build Coastguard Worker    } else {
1953*61046927SAndroid Build Coastguard Worker       /* TODO: check if these can be skipped if fs is disabled */
1954*61046927SAndroid Build Coastguard Worker       struct ir3_shader_variant dummy_variant = {};
1955*61046927SAndroid Build Coastguard Worker       tu6_emit_fs_inputs<CHIP>(cs, &dummy_variant);
1956*61046927SAndroid Build Coastguard Worker       tu6_emit_fs_outputs(cs, &dummy_variant);
1957*61046927SAndroid Build Coastguard Worker    }
1958*61046927SAndroid Build Coastguard Worker }
1959*61046927SAndroid Build Coastguard Worker TU_GENX(tu6_emit_fs);
1960*61046927SAndroid Build Coastguard Worker 
1961*61046927SAndroid Build Coastguard Worker template <chip CHIP>
1962*61046927SAndroid Build Coastguard Worker static void
tu6_emit_variant(struct tu_cs * cs,gl_shader_stage stage,const struct ir3_shader_variant * xs,struct tu_pvtmem_config * pvtmem_config,uint32_t view_mask,uint64_t binary_iova)1963*61046927SAndroid Build Coastguard Worker tu6_emit_variant(struct tu_cs *cs,
1964*61046927SAndroid Build Coastguard Worker                  gl_shader_stage stage,
1965*61046927SAndroid Build Coastguard Worker                  const struct ir3_shader_variant *xs,
1966*61046927SAndroid Build Coastguard Worker                  struct tu_pvtmem_config *pvtmem_config,
1967*61046927SAndroid Build Coastguard Worker                  uint32_t view_mask,
1968*61046927SAndroid Build Coastguard Worker                  uint64_t binary_iova)
1969*61046927SAndroid Build Coastguard Worker {
1970*61046927SAndroid Build Coastguard Worker    if (stage == MESA_SHADER_COMPUTE) {
1971*61046927SAndroid Build Coastguard Worker       tu6_emit_cs_config<CHIP>(cs, xs, pvtmem_config, binary_iova);
1972*61046927SAndroid Build Coastguard Worker       return;
1973*61046927SAndroid Build Coastguard Worker    }
1974*61046927SAndroid Build Coastguard Worker 
1975*61046927SAndroid Build Coastguard Worker    tu6_emit_xs(cs, stage, xs, pvtmem_config, binary_iova);
1976*61046927SAndroid Build Coastguard Worker 
1977*61046927SAndroid Build Coastguard Worker    switch (stage) {
1978*61046927SAndroid Build Coastguard Worker    case MESA_SHADER_VERTEX:
1979*61046927SAndroid Build Coastguard Worker       tu6_emit_vs<CHIP>(cs, xs, view_mask);
1980*61046927SAndroid Build Coastguard Worker       break;
1981*61046927SAndroid Build Coastguard Worker    case MESA_SHADER_TESS_CTRL:
1982*61046927SAndroid Build Coastguard Worker       tu6_emit_hs<CHIP>(cs, xs);
1983*61046927SAndroid Build Coastguard Worker       break;
1984*61046927SAndroid Build Coastguard Worker    case MESA_SHADER_TESS_EVAL:
1985*61046927SAndroid Build Coastguard Worker       tu6_emit_ds<CHIP>(cs, xs);
1986*61046927SAndroid Build Coastguard Worker       break;
1987*61046927SAndroid Build Coastguard Worker    case MESA_SHADER_GEOMETRY:
1988*61046927SAndroid Build Coastguard Worker       tu6_emit_gs<CHIP>(cs, xs);
1989*61046927SAndroid Build Coastguard Worker       break;
1990*61046927SAndroid Build Coastguard Worker    case MESA_SHADER_FRAGMENT:
1991*61046927SAndroid Build Coastguard Worker       tu6_emit_fs<CHIP>(cs, xs);
1992*61046927SAndroid Build Coastguard Worker       break;
1993*61046927SAndroid Build Coastguard Worker    default:
1994*61046927SAndroid Build Coastguard Worker       unreachable("unknown shader stage");
1995*61046927SAndroid Build Coastguard Worker    }
1996*61046927SAndroid Build Coastguard Worker }
1997*61046927SAndroid Build Coastguard Worker 
1998*61046927SAndroid Build Coastguard Worker static VkResult
tu_setup_pvtmem(struct tu_device * dev,struct tu_shader * shader,struct tu_pvtmem_config * config,uint32_t pvtmem_bytes,bool per_wave)1999*61046927SAndroid Build Coastguard Worker tu_setup_pvtmem(struct tu_device *dev,
2000*61046927SAndroid Build Coastguard Worker                 struct tu_shader *shader,
2001*61046927SAndroid Build Coastguard Worker                 struct tu_pvtmem_config *config,
2002*61046927SAndroid Build Coastguard Worker                 uint32_t pvtmem_bytes,
2003*61046927SAndroid Build Coastguard Worker                 bool per_wave)
2004*61046927SAndroid Build Coastguard Worker {
2005*61046927SAndroid Build Coastguard Worker    if (!pvtmem_bytes) {
2006*61046927SAndroid Build Coastguard Worker       memset(config, 0, sizeof(*config));
2007*61046927SAndroid Build Coastguard Worker       return VK_SUCCESS;
2008*61046927SAndroid Build Coastguard Worker    }
2009*61046927SAndroid Build Coastguard Worker 
2010*61046927SAndroid Build Coastguard Worker    /* There is a substantial memory footprint from private memory BOs being
2011*61046927SAndroid Build Coastguard Worker     * allocated on a per-pipeline basis and it isn't required as the same
2012*61046927SAndroid Build Coastguard Worker     * BO can be utilized by multiple pipelines as long as they have the
2013*61046927SAndroid Build Coastguard Worker     * private memory layout (sizes and per-wave/per-fiber) to avoid being
2014*61046927SAndroid Build Coastguard Worker     * overwritten by other active pipelines using the same BO with differing
2015*61046927SAndroid Build Coastguard Worker     * private memory layouts resulting memory corruption.
2016*61046927SAndroid Build Coastguard Worker     *
2017*61046927SAndroid Build Coastguard Worker     * To avoid this, we create private memory BOs on a per-device level with
2018*61046927SAndroid Build Coastguard Worker     * an associated private memory layout then dynamically grow them when
2019*61046927SAndroid Build Coastguard Worker     * needed and reuse them across pipelines. Growth is done in terms of
2020*61046927SAndroid Build Coastguard Worker     * powers of two so that we can avoid frequent reallocation of the
2021*61046927SAndroid Build Coastguard Worker     * private memory BOs.
2022*61046927SAndroid Build Coastguard Worker     */
2023*61046927SAndroid Build Coastguard Worker 
2024*61046927SAndroid Build Coastguard Worker    struct tu_pvtmem_bo *pvtmem_bo =
2025*61046927SAndroid Build Coastguard Worker       per_wave ? &dev->wave_pvtmem_bo : &dev->fiber_pvtmem_bo;
2026*61046927SAndroid Build Coastguard Worker    mtx_lock(&pvtmem_bo->mtx);
2027*61046927SAndroid Build Coastguard Worker 
2028*61046927SAndroid Build Coastguard Worker    if (pvtmem_bo->per_fiber_size < pvtmem_bytes) {
2029*61046927SAndroid Build Coastguard Worker       if (pvtmem_bo->bo)
2030*61046927SAndroid Build Coastguard Worker          tu_bo_finish(dev, pvtmem_bo->bo);
2031*61046927SAndroid Build Coastguard Worker 
2032*61046927SAndroid Build Coastguard Worker       pvtmem_bo->per_fiber_size =
2033*61046927SAndroid Build Coastguard Worker          util_next_power_of_two(ALIGN(pvtmem_bytes, 512));
2034*61046927SAndroid Build Coastguard Worker       pvtmem_bo->per_sp_size =
2035*61046927SAndroid Build Coastguard Worker          ALIGN(pvtmem_bo->per_fiber_size *
2036*61046927SAndroid Build Coastguard Worker                   dev->physical_device->info->fibers_per_sp,
2037*61046927SAndroid Build Coastguard Worker                1 << 12);
2038*61046927SAndroid Build Coastguard Worker       uint32_t total_size =
2039*61046927SAndroid Build Coastguard Worker          dev->physical_device->info->num_sp_cores * pvtmem_bo->per_sp_size;
2040*61046927SAndroid Build Coastguard Worker 
2041*61046927SAndroid Build Coastguard Worker       VkResult result = tu_bo_init_new(dev, NULL, &pvtmem_bo->bo, total_size,
2042*61046927SAndroid Build Coastguard Worker                                        TU_BO_ALLOC_INTERNAL_RESOURCE, "pvtmem");
2043*61046927SAndroid Build Coastguard Worker       if (result != VK_SUCCESS) {
2044*61046927SAndroid Build Coastguard Worker          mtx_unlock(&pvtmem_bo->mtx);
2045*61046927SAndroid Build Coastguard Worker          return result;
2046*61046927SAndroid Build Coastguard Worker       }
2047*61046927SAndroid Build Coastguard Worker    }
2048*61046927SAndroid Build Coastguard Worker 
2049*61046927SAndroid Build Coastguard Worker    config->per_wave = per_wave;
2050*61046927SAndroid Build Coastguard Worker    config->per_fiber_size = pvtmem_bo->per_fiber_size;
2051*61046927SAndroid Build Coastguard Worker    config->per_sp_size = pvtmem_bo->per_sp_size;
2052*61046927SAndroid Build Coastguard Worker 
2053*61046927SAndroid Build Coastguard Worker    shader->pvtmem_bo = tu_bo_get_ref(pvtmem_bo->bo);
2054*61046927SAndroid Build Coastguard Worker    config->iova = shader->pvtmem_bo->iova;
2055*61046927SAndroid Build Coastguard Worker 
2056*61046927SAndroid Build Coastguard Worker    mtx_unlock(&pvtmem_bo->mtx);
2057*61046927SAndroid Build Coastguard Worker 
2058*61046927SAndroid Build Coastguard Worker    return VK_SUCCESS;
2059*61046927SAndroid Build Coastguard Worker }
2060*61046927SAndroid Build Coastguard Worker 
2061*61046927SAndroid Build Coastguard Worker static uint64_t
tu_upload_variant(struct tu_cs * cs,const struct ir3_shader_variant * variant)2062*61046927SAndroid Build Coastguard Worker tu_upload_variant(struct tu_cs *cs,
2063*61046927SAndroid Build Coastguard Worker                   const struct ir3_shader_variant *variant)
2064*61046927SAndroid Build Coastguard Worker {
2065*61046927SAndroid Build Coastguard Worker    struct tu_cs_memory memory;
2066*61046927SAndroid Build Coastguard Worker 
2067*61046927SAndroid Build Coastguard Worker    if (!variant)
2068*61046927SAndroid Build Coastguard Worker       return 0;
2069*61046927SAndroid Build Coastguard Worker 
2070*61046927SAndroid Build Coastguard Worker    /* this expects to get enough alignment because shaders are allocated first
2071*61046927SAndroid Build Coastguard Worker     * and total size is always aligned correctly
2072*61046927SAndroid Build Coastguard Worker     * note: an assert in tu6_emit_xs_config validates the alignment
2073*61046927SAndroid Build Coastguard Worker     */
2074*61046927SAndroid Build Coastguard Worker    tu_cs_alloc(cs, variant->info.size / 4, 1, &memory);
2075*61046927SAndroid Build Coastguard Worker 
2076*61046927SAndroid Build Coastguard Worker    memcpy(memory.map, variant->bin, variant->info.size);
2077*61046927SAndroid Build Coastguard Worker    return memory.iova;
2078*61046927SAndroid Build Coastguard Worker }
2079*61046927SAndroid Build Coastguard Worker 
2080*61046927SAndroid Build Coastguard Worker static VkResult
tu_upload_shader(struct tu_device * dev,struct tu_shader * shader)2081*61046927SAndroid Build Coastguard Worker tu_upload_shader(struct tu_device *dev,
2082*61046927SAndroid Build Coastguard Worker                  struct tu_shader *shader)
2083*61046927SAndroid Build Coastguard Worker {
2084*61046927SAndroid Build Coastguard Worker    const struct ir3_shader_variant *v = shader->variant;
2085*61046927SAndroid Build Coastguard Worker    const struct ir3_shader_variant *binning = v ? v->binning : NULL;
2086*61046927SAndroid Build Coastguard Worker    const struct ir3_shader_variant *safe_const = shader->safe_const_variant;
2087*61046927SAndroid Build Coastguard Worker 
2088*61046927SAndroid Build Coastguard Worker    if (v->type == MESA_SHADER_VERTEX && v->stream_output.num_outputs != 0)
2089*61046927SAndroid Build Coastguard Worker       binning = v;
2090*61046927SAndroid Build Coastguard Worker 
2091*61046927SAndroid Build Coastguard Worker    uint32_t size = 0;
2092*61046927SAndroid Build Coastguard Worker    if (v->type == MESA_SHADER_VERTEX)
2093*61046927SAndroid Build Coastguard Worker       size += TU6_EMIT_VFD_DEST_MAX_DWORDS;
2094*61046927SAndroid Build Coastguard Worker 
2095*61046927SAndroid Build Coastguard Worker    const unsigned xs_size = 128;
2096*61046927SAndroid Build Coastguard Worker    const unsigned vpc_size = 32 + (v->stream_output.num_outputs != 0 ? 256 : 0);
2097*61046927SAndroid Build Coastguard Worker 
2098*61046927SAndroid Build Coastguard Worker    size += xs_size + tu_xs_get_additional_cs_size_dwords(v);
2099*61046927SAndroid Build Coastguard Worker    size += v->info.size / 4;
2100*61046927SAndroid Build Coastguard Worker    if (binning) {
2101*61046927SAndroid Build Coastguard Worker       size += xs_size + tu_xs_get_additional_cs_size_dwords(binning);
2102*61046927SAndroid Build Coastguard Worker       size += binning->info.size / 4;
2103*61046927SAndroid Build Coastguard Worker    }
2104*61046927SAndroid Build Coastguard Worker 
2105*61046927SAndroid Build Coastguard Worker    if (safe_const) {
2106*61046927SAndroid Build Coastguard Worker       size += xs_size + tu_xs_get_additional_cs_size_dwords(safe_const);
2107*61046927SAndroid Build Coastguard Worker       size += safe_const->info.size / 4;
2108*61046927SAndroid Build Coastguard Worker    }
2109*61046927SAndroid Build Coastguard Worker 
2110*61046927SAndroid Build Coastguard Worker    /* We emit an empty VPC including streamout state in the binning draw state */
2111*61046927SAndroid Build Coastguard Worker    if (binning || v->type == MESA_SHADER_GEOMETRY) {
2112*61046927SAndroid Build Coastguard Worker       size += vpc_size;
2113*61046927SAndroid Build Coastguard Worker    }
2114*61046927SAndroid Build Coastguard Worker 
2115*61046927SAndroid Build Coastguard Worker    pthread_mutex_lock(&dev->pipeline_mutex);
2116*61046927SAndroid Build Coastguard Worker    VkResult result = tu_suballoc_bo_alloc(&shader->bo, &dev->pipeline_suballoc,
2117*61046927SAndroid Build Coastguard Worker                                           size * 4, 128);
2118*61046927SAndroid Build Coastguard Worker    pthread_mutex_unlock(&dev->pipeline_mutex);
2119*61046927SAndroid Build Coastguard Worker 
2120*61046927SAndroid Build Coastguard Worker    if (result != VK_SUCCESS)
2121*61046927SAndroid Build Coastguard Worker       return result;
2122*61046927SAndroid Build Coastguard Worker 
2123*61046927SAndroid Build Coastguard Worker    uint32_t pvtmem_size = v->pvtmem_size;
2124*61046927SAndroid Build Coastguard Worker    bool per_wave = v->pvtmem_per_wave;
2125*61046927SAndroid Build Coastguard Worker 
2126*61046927SAndroid Build Coastguard Worker    if (v->binning) {
2127*61046927SAndroid Build Coastguard Worker       pvtmem_size = MAX2(pvtmem_size, shader->variant->binning->pvtmem_size);
2128*61046927SAndroid Build Coastguard Worker       if (!shader->variant->binning->pvtmem_per_wave)
2129*61046927SAndroid Build Coastguard Worker          per_wave = false;
2130*61046927SAndroid Build Coastguard Worker    }
2131*61046927SAndroid Build Coastguard Worker 
2132*61046927SAndroid Build Coastguard Worker    if (shader->safe_const_variant) {
2133*61046927SAndroid Build Coastguard Worker       pvtmem_size = MAX2(pvtmem_size, shader->safe_const_variant->pvtmem_size);
2134*61046927SAndroid Build Coastguard Worker       if (!shader->safe_const_variant->pvtmem_per_wave)
2135*61046927SAndroid Build Coastguard Worker          per_wave = false;
2136*61046927SAndroid Build Coastguard Worker 
2137*61046927SAndroid Build Coastguard Worker       if (shader->safe_const_variant->binning) {
2138*61046927SAndroid Build Coastguard Worker          pvtmem_size = MAX2(pvtmem_size, shader->safe_const_variant->binning->pvtmem_size);
2139*61046927SAndroid Build Coastguard Worker          if (!shader->safe_const_variant->binning->pvtmem_per_wave)
2140*61046927SAndroid Build Coastguard Worker             per_wave = false;
2141*61046927SAndroid Build Coastguard Worker       }
2142*61046927SAndroid Build Coastguard Worker    }
2143*61046927SAndroid Build Coastguard Worker 
2144*61046927SAndroid Build Coastguard Worker    struct tu_pvtmem_config pvtmem_config;
2145*61046927SAndroid Build Coastguard Worker 
2146*61046927SAndroid Build Coastguard Worker    result = tu_setup_pvtmem(dev, shader, &pvtmem_config, pvtmem_size, per_wave);
2147*61046927SAndroid Build Coastguard Worker    if (result != VK_SUCCESS) {
2148*61046927SAndroid Build Coastguard Worker       pthread_mutex_lock(&dev->pipeline_mutex);
2149*61046927SAndroid Build Coastguard Worker       tu_suballoc_bo_free(&dev->pipeline_suballoc, &shader->bo);
2150*61046927SAndroid Build Coastguard Worker       pthread_mutex_unlock(&dev->pipeline_mutex);
2151*61046927SAndroid Build Coastguard Worker       return result;
2152*61046927SAndroid Build Coastguard Worker    }
2153*61046927SAndroid Build Coastguard Worker 
2154*61046927SAndroid Build Coastguard Worker    TU_RMV(cmd_buffer_suballoc_bo_create, dev, &shader->bo);
2155*61046927SAndroid Build Coastguard Worker    tu_cs_init_suballoc(&shader->cs, dev, &shader->bo);
2156*61046927SAndroid Build Coastguard Worker 
2157*61046927SAndroid Build Coastguard Worker    uint64_t iova = tu_upload_variant(&shader->cs, v);
2158*61046927SAndroid Build Coastguard Worker    uint64_t binning_iova = tu_upload_variant(&shader->cs, binning);
2159*61046927SAndroid Build Coastguard Worker    uint64_t safe_const_iova = tu_upload_variant(&shader->cs, safe_const);
2160*61046927SAndroid Build Coastguard Worker 
2161*61046927SAndroid Build Coastguard Worker    struct tu_cs sub_cs;
2162*61046927SAndroid Build Coastguard Worker    tu_cs_begin_sub_stream(&shader->cs, xs_size +
2163*61046927SAndroid Build Coastguard Worker                           tu_xs_get_additional_cs_size_dwords(v), &sub_cs);
2164*61046927SAndroid Build Coastguard Worker    TU_CALLX(dev, tu6_emit_variant)(
2165*61046927SAndroid Build Coastguard Worker       &sub_cs, shader->variant->type, shader->variant, &pvtmem_config,
2166*61046927SAndroid Build Coastguard Worker       shader->view_mask, iova);
2167*61046927SAndroid Build Coastguard Worker    shader->state = tu_cs_end_draw_state(&shader->cs, &sub_cs);
2168*61046927SAndroid Build Coastguard Worker 
2169*61046927SAndroid Build Coastguard Worker    if (safe_const) {
2170*61046927SAndroid Build Coastguard Worker       tu_cs_begin_sub_stream(&shader->cs, xs_size +
2171*61046927SAndroid Build Coastguard Worker                              tu_xs_get_additional_cs_size_dwords(safe_const), &sub_cs);
2172*61046927SAndroid Build Coastguard Worker       TU_CALLX(dev, tu6_emit_variant)(
2173*61046927SAndroid Build Coastguard Worker          &sub_cs, v->type, safe_const, &pvtmem_config, shader->view_mask,
2174*61046927SAndroid Build Coastguard Worker          safe_const_iova);
2175*61046927SAndroid Build Coastguard Worker       shader->safe_const_state = tu_cs_end_draw_state(&shader->cs, &sub_cs);
2176*61046927SAndroid Build Coastguard Worker    }
2177*61046927SAndroid Build Coastguard Worker 
2178*61046927SAndroid Build Coastguard Worker    if (binning) {
2179*61046927SAndroid Build Coastguard Worker       tu_cs_begin_sub_stream(&shader->cs, xs_size + vpc_size +
2180*61046927SAndroid Build Coastguard Worker                              tu_xs_get_additional_cs_size_dwords(binning), &sub_cs);
2181*61046927SAndroid Build Coastguard Worker       TU_CALLX(dev, tu6_emit_variant)(
2182*61046927SAndroid Build Coastguard Worker          &sub_cs, v->type, binning, &pvtmem_config, shader->view_mask,
2183*61046927SAndroid Build Coastguard Worker          binning_iova);
2184*61046927SAndroid Build Coastguard Worker       /* emit an empty VPC */
2185*61046927SAndroid Build Coastguard Worker       TU_CALLX(dev, tu6_emit_vpc)(&sub_cs, binning, NULL, NULL, NULL, NULL);
2186*61046927SAndroid Build Coastguard Worker       shader->binning_state = tu_cs_end_draw_state(&shader->cs, &sub_cs);
2187*61046927SAndroid Build Coastguard Worker    }
2188*61046927SAndroid Build Coastguard Worker 
2189*61046927SAndroid Build Coastguard Worker    /* We don't support binning variants for GS, so the same draw state is used
2190*61046927SAndroid Build Coastguard Worker     * when binning and when drawing, but the VPC draw state is not executed
2191*61046927SAndroid Build Coastguard Worker     * when binning so we still need to generate an appropriate VPC config for
2192*61046927SAndroid Build Coastguard Worker     * binning.
2193*61046927SAndroid Build Coastguard Worker     */
2194*61046927SAndroid Build Coastguard Worker    if (v->type == MESA_SHADER_GEOMETRY) {
2195*61046927SAndroid Build Coastguard Worker       tu_cs_begin_sub_stream(&shader->cs, vpc_size, &sub_cs);
2196*61046927SAndroid Build Coastguard Worker       TU_CALLX(dev, tu6_emit_vpc)(&sub_cs, NULL, NULL, NULL, v, NULL);
2197*61046927SAndroid Build Coastguard Worker       shader->binning_state = tu_cs_end_draw_state(&shader->cs, &sub_cs);
2198*61046927SAndroid Build Coastguard Worker    }
2199*61046927SAndroid Build Coastguard Worker 
2200*61046927SAndroid Build Coastguard Worker    return VK_SUCCESS;
2201*61046927SAndroid Build Coastguard Worker }
2202*61046927SAndroid Build Coastguard Worker 
2203*61046927SAndroid Build Coastguard Worker static bool
2204*61046927SAndroid Build Coastguard Worker tu_shader_serialize(struct vk_pipeline_cache_object *object,
2205*61046927SAndroid Build Coastguard Worker                     struct blob *blob);
2206*61046927SAndroid Build Coastguard Worker 
2207*61046927SAndroid Build Coastguard Worker static struct vk_pipeline_cache_object *
2208*61046927SAndroid Build Coastguard Worker tu_shader_deserialize(struct vk_pipeline_cache *cache,
2209*61046927SAndroid Build Coastguard Worker                       const void *key_data,
2210*61046927SAndroid Build Coastguard Worker                       size_t key_size,
2211*61046927SAndroid Build Coastguard Worker                       struct blob_reader *blob);
2212*61046927SAndroid Build Coastguard Worker 
2213*61046927SAndroid Build Coastguard Worker static void
tu_shader_pipeline_cache_object_destroy(struct vk_device * vk_device,struct vk_pipeline_cache_object * object)2214*61046927SAndroid Build Coastguard Worker tu_shader_pipeline_cache_object_destroy(struct vk_device *vk_device,
2215*61046927SAndroid Build Coastguard Worker                                         struct vk_pipeline_cache_object *object)
2216*61046927SAndroid Build Coastguard Worker {
2217*61046927SAndroid Build Coastguard Worker    struct tu_device *device = container_of(vk_device, struct tu_device, vk);
2218*61046927SAndroid Build Coastguard Worker    struct tu_shader *shader =
2219*61046927SAndroid Build Coastguard Worker       container_of(object, struct tu_shader, base);
2220*61046927SAndroid Build Coastguard Worker 
2221*61046927SAndroid Build Coastguard Worker    vk_pipeline_cache_object_finish(&shader->base);
2222*61046927SAndroid Build Coastguard Worker    tu_shader_destroy(device, shader);
2223*61046927SAndroid Build Coastguard Worker }
2224*61046927SAndroid Build Coastguard Worker 
2225*61046927SAndroid Build Coastguard Worker const struct vk_pipeline_cache_object_ops tu_shader_ops = {
2226*61046927SAndroid Build Coastguard Worker    .serialize = tu_shader_serialize,
2227*61046927SAndroid Build Coastguard Worker    .deserialize = tu_shader_deserialize,
2228*61046927SAndroid Build Coastguard Worker    .destroy = tu_shader_pipeline_cache_object_destroy,
2229*61046927SAndroid Build Coastguard Worker };
2230*61046927SAndroid Build Coastguard Worker 
2231*61046927SAndroid Build Coastguard Worker static struct tu_shader *
tu_shader_init(struct tu_device * dev,const void * key_data,size_t key_size)2232*61046927SAndroid Build Coastguard Worker tu_shader_init(struct tu_device *dev, const void *key_data, size_t key_size)
2233*61046927SAndroid Build Coastguard Worker {
2234*61046927SAndroid Build Coastguard Worker    VK_MULTIALLOC(ma);
2235*61046927SAndroid Build Coastguard Worker    VK_MULTIALLOC_DECL(&ma, struct tu_shader, shader, 1);
2236*61046927SAndroid Build Coastguard Worker    VK_MULTIALLOC_DECL_SIZE(&ma, char, obj_key_data, key_size);
2237*61046927SAndroid Build Coastguard Worker 
2238*61046927SAndroid Build Coastguard Worker    if (!vk_multialloc_zalloc(&ma, &dev->vk.alloc,
2239*61046927SAndroid Build Coastguard Worker                              VK_SYSTEM_ALLOCATION_SCOPE_DEVICE))
2240*61046927SAndroid Build Coastguard Worker       return NULL;
2241*61046927SAndroid Build Coastguard Worker 
2242*61046927SAndroid Build Coastguard Worker    memcpy(obj_key_data, key_data, key_size);
2243*61046927SAndroid Build Coastguard Worker 
2244*61046927SAndroid Build Coastguard Worker    vk_pipeline_cache_object_init(&dev->vk, &shader->base,
2245*61046927SAndroid Build Coastguard Worker                                  &tu_shader_ops, obj_key_data, key_size);
2246*61046927SAndroid Build Coastguard Worker 
2247*61046927SAndroid Build Coastguard Worker    shader->const_state.fdm_ubo.idx = -1;
2248*61046927SAndroid Build Coastguard Worker    shader->const_state.dynamic_offsets_ubo.idx = -1;
2249*61046927SAndroid Build Coastguard Worker    shader->const_state.inline_uniforms_ubo.idx = -1;
2250*61046927SAndroid Build Coastguard Worker 
2251*61046927SAndroid Build Coastguard Worker    return shader;
2252*61046927SAndroid Build Coastguard Worker }
2253*61046927SAndroid Build Coastguard Worker 
2254*61046927SAndroid Build Coastguard Worker static bool
tu_shader_serialize(struct vk_pipeline_cache_object * object,struct blob * blob)2255*61046927SAndroid Build Coastguard Worker tu_shader_serialize(struct vk_pipeline_cache_object *object,
2256*61046927SAndroid Build Coastguard Worker                     struct blob *blob)
2257*61046927SAndroid Build Coastguard Worker {
2258*61046927SAndroid Build Coastguard Worker    struct tu_shader *shader =
2259*61046927SAndroid Build Coastguard Worker       container_of(object, struct tu_shader, base);
2260*61046927SAndroid Build Coastguard Worker 
2261*61046927SAndroid Build Coastguard Worker    blob_write_bytes(blob, &shader->const_state, sizeof(shader->const_state));
2262*61046927SAndroid Build Coastguard Worker    blob_write_bytes(blob, &shader->dynamic_descriptor_sizes,
2263*61046927SAndroid Build Coastguard Worker                     sizeof(shader->dynamic_descriptor_sizes));
2264*61046927SAndroid Build Coastguard Worker    blob_write_uint32(blob, shader->view_mask);
2265*61046927SAndroid Build Coastguard Worker    blob_write_uint8(blob, shader->active_desc_sets);
2266*61046927SAndroid Build Coastguard Worker 
2267*61046927SAndroid Build Coastguard Worker    ir3_store_variant(blob, shader->variant);
2268*61046927SAndroid Build Coastguard Worker 
2269*61046927SAndroid Build Coastguard Worker    if (shader->safe_const_variant) {
2270*61046927SAndroid Build Coastguard Worker       blob_write_uint8(blob, 1);
2271*61046927SAndroid Build Coastguard Worker       ir3_store_variant(blob, shader->safe_const_variant);
2272*61046927SAndroid Build Coastguard Worker    } else {
2273*61046927SAndroid Build Coastguard Worker       blob_write_uint8(blob, 0);
2274*61046927SAndroid Build Coastguard Worker    }
2275*61046927SAndroid Build Coastguard Worker 
2276*61046927SAndroid Build Coastguard Worker 
2277*61046927SAndroid Build Coastguard Worker 
2278*61046927SAndroid Build Coastguard Worker    switch (shader->variant->type) {
2279*61046927SAndroid Build Coastguard Worker    case MESA_SHADER_TESS_EVAL:
2280*61046927SAndroid Build Coastguard Worker       blob_write_bytes(blob, &shader->tes, sizeof(shader->tes));
2281*61046927SAndroid Build Coastguard Worker       break;
2282*61046927SAndroid Build Coastguard Worker    case MESA_SHADER_FRAGMENT:
2283*61046927SAndroid Build Coastguard Worker       blob_write_bytes(blob, &shader->fs, sizeof(shader->fs));
2284*61046927SAndroid Build Coastguard Worker       break;
2285*61046927SAndroid Build Coastguard Worker    default:
2286*61046927SAndroid Build Coastguard Worker       break;
2287*61046927SAndroid Build Coastguard Worker    }
2288*61046927SAndroid Build Coastguard Worker 
2289*61046927SAndroid Build Coastguard Worker    return true;
2290*61046927SAndroid Build Coastguard Worker }
2291*61046927SAndroid Build Coastguard Worker 
2292*61046927SAndroid Build Coastguard Worker static struct vk_pipeline_cache_object *
tu_shader_deserialize(struct vk_pipeline_cache * cache,const void * key_data,size_t key_size,struct blob_reader * blob)2293*61046927SAndroid Build Coastguard Worker tu_shader_deserialize(struct vk_pipeline_cache *cache,
2294*61046927SAndroid Build Coastguard Worker                       const void *key_data,
2295*61046927SAndroid Build Coastguard Worker                       size_t key_size,
2296*61046927SAndroid Build Coastguard Worker                       struct blob_reader *blob)
2297*61046927SAndroid Build Coastguard Worker {
2298*61046927SAndroid Build Coastguard Worker    struct tu_device *dev =
2299*61046927SAndroid Build Coastguard Worker       container_of(cache->base.device, struct tu_device, vk);
2300*61046927SAndroid Build Coastguard Worker    struct tu_shader *shader =
2301*61046927SAndroid Build Coastguard Worker       tu_shader_init(dev, key_data, key_size);
2302*61046927SAndroid Build Coastguard Worker 
2303*61046927SAndroid Build Coastguard Worker    if (!shader)
2304*61046927SAndroid Build Coastguard Worker       return NULL;
2305*61046927SAndroid Build Coastguard Worker 
2306*61046927SAndroid Build Coastguard Worker    blob_copy_bytes(blob, &shader->const_state, sizeof(shader->const_state));
2307*61046927SAndroid Build Coastguard Worker    blob_copy_bytes(blob, &shader->dynamic_descriptor_sizes,
2308*61046927SAndroid Build Coastguard Worker                    sizeof(shader->dynamic_descriptor_sizes));
2309*61046927SAndroid Build Coastguard Worker    shader->view_mask = blob_read_uint32(blob);
2310*61046927SAndroid Build Coastguard Worker    shader->active_desc_sets = blob_read_uint8(blob);
2311*61046927SAndroid Build Coastguard Worker 
2312*61046927SAndroid Build Coastguard Worker    shader->variant = ir3_retrieve_variant(blob, dev->compiler, NULL);
2313*61046927SAndroid Build Coastguard Worker 
2314*61046927SAndroid Build Coastguard Worker    bool has_safe_const = blob_read_uint8(blob);
2315*61046927SAndroid Build Coastguard Worker    if (has_safe_const)
2316*61046927SAndroid Build Coastguard Worker       shader->safe_const_variant = ir3_retrieve_variant(blob, dev->compiler, NULL);
2317*61046927SAndroid Build Coastguard Worker 
2318*61046927SAndroid Build Coastguard Worker    switch (shader->variant->type) {
2319*61046927SAndroid Build Coastguard Worker    case MESA_SHADER_TESS_EVAL:
2320*61046927SAndroid Build Coastguard Worker       blob_copy_bytes(blob, &shader->tes, sizeof(shader->tes));
2321*61046927SAndroid Build Coastguard Worker       break;
2322*61046927SAndroid Build Coastguard Worker    case MESA_SHADER_FRAGMENT:
2323*61046927SAndroid Build Coastguard Worker       blob_copy_bytes(blob, &shader->fs, sizeof(shader->fs));
2324*61046927SAndroid Build Coastguard Worker       break;
2325*61046927SAndroid Build Coastguard Worker    default:
2326*61046927SAndroid Build Coastguard Worker       break;
2327*61046927SAndroid Build Coastguard Worker    }
2328*61046927SAndroid Build Coastguard Worker 
2329*61046927SAndroid Build Coastguard Worker    VkResult result = tu_upload_shader(dev, shader);
2330*61046927SAndroid Build Coastguard Worker    if (result != VK_SUCCESS) {
2331*61046927SAndroid Build Coastguard Worker       vk_free(&dev->vk.alloc, shader);
2332*61046927SAndroid Build Coastguard Worker       return NULL;
2333*61046927SAndroid Build Coastguard Worker    }
2334*61046927SAndroid Build Coastguard Worker 
2335*61046927SAndroid Build Coastguard Worker    return &shader->base;
2336*61046927SAndroid Build Coastguard Worker }
2337*61046927SAndroid Build Coastguard Worker 
2338*61046927SAndroid Build Coastguard Worker VkResult
tu_shader_create(struct tu_device * dev,struct tu_shader ** shader_out,nir_shader * nir,const struct tu_shader_key * key,const struct ir3_shader_key * ir3_key,const void * key_data,size_t key_size,struct tu_pipeline_layout * layout,bool executable_info)2339*61046927SAndroid Build Coastguard Worker tu_shader_create(struct tu_device *dev,
2340*61046927SAndroid Build Coastguard Worker                  struct tu_shader **shader_out,
2341*61046927SAndroid Build Coastguard Worker                  nir_shader *nir,
2342*61046927SAndroid Build Coastguard Worker                  const struct tu_shader_key *key,
2343*61046927SAndroid Build Coastguard Worker                  const struct ir3_shader_key *ir3_key,
2344*61046927SAndroid Build Coastguard Worker                  const void *key_data,
2345*61046927SAndroid Build Coastguard Worker                  size_t key_size,
2346*61046927SAndroid Build Coastguard Worker                  struct tu_pipeline_layout *layout,
2347*61046927SAndroid Build Coastguard Worker                  bool executable_info)
2348*61046927SAndroid Build Coastguard Worker {
2349*61046927SAndroid Build Coastguard Worker    struct tu_shader *shader = tu_shader_init(dev, key_data, key_size);
2350*61046927SAndroid Build Coastguard Worker 
2351*61046927SAndroid Build Coastguard Worker    if (!shader)
2352*61046927SAndroid Build Coastguard Worker       return VK_ERROR_OUT_OF_HOST_MEMORY;
2353*61046927SAndroid Build Coastguard Worker 
2354*61046927SAndroid Build Coastguard Worker    const nir_opt_access_options access_options = {
2355*61046927SAndroid Build Coastguard Worker       .is_vulkan = true,
2356*61046927SAndroid Build Coastguard Worker    };
2357*61046927SAndroid Build Coastguard Worker    NIR_PASS_V(nir, nir_opt_access, &access_options);
2358*61046927SAndroid Build Coastguard Worker 
2359*61046927SAndroid Build Coastguard Worker    if (nir->info.stage == MESA_SHADER_FRAGMENT) {
2360*61046927SAndroid Build Coastguard Worker       const nir_input_attachment_options att_options = {
2361*61046927SAndroid Build Coastguard Worker          .use_fragcoord_sysval = true,
2362*61046927SAndroid Build Coastguard Worker          .use_layer_id_sysval = false,
2363*61046927SAndroid Build Coastguard Worker          /* When using multiview rendering, we must use
2364*61046927SAndroid Build Coastguard Worker           * gl_ViewIndex as the layer id to pass to the texture
2365*61046927SAndroid Build Coastguard Worker           * sampling function. gl_Layer doesn't work when
2366*61046927SAndroid Build Coastguard Worker           * multiview is enabled.
2367*61046927SAndroid Build Coastguard Worker           */
2368*61046927SAndroid Build Coastguard Worker          .use_view_id_for_layer = key->multiview_mask != 0,
2369*61046927SAndroid Build Coastguard Worker          .unscaled_input_attachment_ir3 = key->unscaled_input_fragcoord,
2370*61046927SAndroid Build Coastguard Worker       };
2371*61046927SAndroid Build Coastguard Worker       NIR_PASS_V(nir, nir_lower_input_attachments, &att_options);
2372*61046927SAndroid Build Coastguard Worker    }
2373*61046927SAndroid Build Coastguard Worker 
2374*61046927SAndroid Build Coastguard Worker    /* This has to happen before lower_input_attachments, because we have to
2375*61046927SAndroid Build Coastguard Worker     * lower input attachment coordinates except if unscaled.
2376*61046927SAndroid Build Coastguard Worker     */
2377*61046927SAndroid Build Coastguard Worker    const struct lower_fdm_options fdm_options = {
2378*61046927SAndroid Build Coastguard Worker       .num_views = MAX2(util_last_bit(key->multiview_mask), 1),
2379*61046927SAndroid Build Coastguard Worker       .adjust_fragcoord = key->fragment_density_map,
2380*61046927SAndroid Build Coastguard Worker    };
2381*61046927SAndroid Build Coastguard Worker    NIR_PASS_V(nir, tu_nir_lower_fdm, &fdm_options);
2382*61046927SAndroid Build Coastguard Worker 
2383*61046927SAndroid Build Coastguard Worker 
2384*61046927SAndroid Build Coastguard Worker    /* This needs to happen before multiview lowering which rewrites store
2385*61046927SAndroid Build Coastguard Worker     * instructions of the position variable, so that we can just rewrite one
2386*61046927SAndroid Build Coastguard Worker     * store at the end instead of having to rewrite every store specified by
2387*61046927SAndroid Build Coastguard Worker     * the user.
2388*61046927SAndroid Build Coastguard Worker     */
2389*61046927SAndroid Build Coastguard Worker    ir3_nir_lower_io_to_temporaries(nir);
2390*61046927SAndroid Build Coastguard Worker 
2391*61046927SAndroid Build Coastguard Worker    if (nir->info.stage == MESA_SHADER_VERTEX && key->multiview_mask) {
2392*61046927SAndroid Build Coastguard Worker       tu_nir_lower_multiview(nir, key->multiview_mask, dev);
2393*61046927SAndroid Build Coastguard Worker    }
2394*61046927SAndroid Build Coastguard Worker 
2395*61046927SAndroid Build Coastguard Worker    if (nir->info.stage == MESA_SHADER_FRAGMENT && key->force_sample_interp) {
2396*61046927SAndroid Build Coastguard Worker       nir_foreach_shader_in_variable(var, nir) {
2397*61046927SAndroid Build Coastguard Worker          if (!var->data.centroid)
2398*61046927SAndroid Build Coastguard Worker             var->data.sample = true;
2399*61046927SAndroid Build Coastguard Worker       }
2400*61046927SAndroid Build Coastguard Worker    }
2401*61046927SAndroid Build Coastguard Worker 
2402*61046927SAndroid Build Coastguard Worker    NIR_PASS_V(nir, nir_lower_explicit_io, nir_var_mem_push_const,
2403*61046927SAndroid Build Coastguard Worker               nir_address_format_32bit_offset);
2404*61046927SAndroid Build Coastguard Worker 
2405*61046927SAndroid Build Coastguard Worker    NIR_PASS_V(nir, nir_lower_explicit_io,
2406*61046927SAndroid Build Coastguard Worker               nir_var_mem_ubo | nir_var_mem_ssbo,
2407*61046927SAndroid Build Coastguard Worker               nir_address_format_vec2_index_32bit_offset);
2408*61046927SAndroid Build Coastguard Worker 
2409*61046927SAndroid Build Coastguard Worker    NIR_PASS_V(nir, nir_lower_explicit_io,
2410*61046927SAndroid Build Coastguard Worker               nir_var_mem_global,
2411*61046927SAndroid Build Coastguard Worker               nir_address_format_64bit_global);
2412*61046927SAndroid Build Coastguard Worker 
2413*61046927SAndroid Build Coastguard Worker    if (nir->info.stage == MESA_SHADER_COMPUTE) {
2414*61046927SAndroid Build Coastguard Worker       if (!nir->info.shared_memory_explicit_layout) {
2415*61046927SAndroid Build Coastguard Worker          NIR_PASS_V(nir, nir_lower_vars_to_explicit_types,
2416*61046927SAndroid Build Coastguard Worker                     nir_var_mem_shared, shared_type_info);
2417*61046927SAndroid Build Coastguard Worker       }
2418*61046927SAndroid Build Coastguard Worker       NIR_PASS_V(nir, nir_lower_explicit_io,
2419*61046927SAndroid Build Coastguard Worker                  nir_var_mem_shared,
2420*61046927SAndroid Build Coastguard Worker                  nir_address_format_32bit_offset);
2421*61046927SAndroid Build Coastguard Worker 
2422*61046927SAndroid Build Coastguard Worker       if (nir->info.zero_initialize_shared_memory && nir->info.shared_size > 0) {
2423*61046927SAndroid Build Coastguard Worker          const unsigned chunk_size = 16; /* max single store size */
2424*61046927SAndroid Build Coastguard Worker          /* Shared memory is allocated in 1024b chunks in HW, but the zero-init
2425*61046927SAndroid Build Coastguard Worker           * extension only requires us to initialize the memory that the shader
2426*61046927SAndroid Build Coastguard Worker           * is allocated at the API level, and it's up to the user to ensure
2427*61046927SAndroid Build Coastguard Worker           * that accesses are limited to those bounds.
2428*61046927SAndroid Build Coastguard Worker           */
2429*61046927SAndroid Build Coastguard Worker          const unsigned shared_size = ALIGN(nir->info.shared_size, chunk_size);
2430*61046927SAndroid Build Coastguard Worker          NIR_PASS_V(nir, nir_zero_initialize_shared_memory, shared_size, chunk_size);
2431*61046927SAndroid Build Coastguard Worker       }
2432*61046927SAndroid Build Coastguard Worker 
2433*61046927SAndroid Build Coastguard Worker       const struct nir_lower_compute_system_values_options compute_sysval_options = {
2434*61046927SAndroid Build Coastguard Worker          .has_base_workgroup_id = true,
2435*61046927SAndroid Build Coastguard Worker       };
2436*61046927SAndroid Build Coastguard Worker       NIR_PASS_V(nir, nir_lower_compute_system_values, &compute_sysval_options);
2437*61046927SAndroid Build Coastguard Worker    }
2438*61046927SAndroid Build Coastguard Worker 
2439*61046927SAndroid Build Coastguard Worker    nir_assign_io_var_locations(nir, nir_var_shader_in, &nir->num_inputs, nir->info.stage);
2440*61046927SAndroid Build Coastguard Worker    nir_assign_io_var_locations(nir, nir_var_shader_out, &nir->num_outputs, nir->info.stage);
2441*61046927SAndroid Build Coastguard Worker 
2442*61046927SAndroid Build Coastguard Worker   /* Gather information for transform feedback. This should be called after:
2443*61046927SAndroid Build Coastguard Worker     * - nir_split_per_member_structs.
2444*61046927SAndroid Build Coastguard Worker     * - nir_remove_dead_variables with varyings, so that we could align
2445*61046927SAndroid Build Coastguard Worker     *   stream outputs correctly.
2446*61046927SAndroid Build Coastguard Worker     * - nir_assign_io_var_locations - to have valid driver_location
2447*61046927SAndroid Build Coastguard Worker     */
2448*61046927SAndroid Build Coastguard Worker    struct ir3_stream_output_info so_info = {};
2449*61046927SAndroid Build Coastguard Worker    if (nir->info.stage == MESA_SHADER_VERTEX ||
2450*61046927SAndroid Build Coastguard Worker          nir->info.stage == MESA_SHADER_TESS_EVAL ||
2451*61046927SAndroid Build Coastguard Worker          nir->info.stage == MESA_SHADER_GEOMETRY)
2452*61046927SAndroid Build Coastguard Worker       tu_gather_xfb_info(nir, &so_info);
2453*61046927SAndroid Build Coastguard Worker 
2454*61046927SAndroid Build Coastguard Worker    for (unsigned i = 0; i < layout->num_sets; i++) {
2455*61046927SAndroid Build Coastguard Worker       if (layout->set[i].layout) {
2456*61046927SAndroid Build Coastguard Worker          shader->dynamic_descriptor_sizes[i] =
2457*61046927SAndroid Build Coastguard Worker             layout->set[i].layout->dynamic_offset_size;
2458*61046927SAndroid Build Coastguard Worker       } else {
2459*61046927SAndroid Build Coastguard Worker          shader->dynamic_descriptor_sizes[i] = -1;
2460*61046927SAndroid Build Coastguard Worker       }
2461*61046927SAndroid Build Coastguard Worker    }
2462*61046927SAndroid Build Coastguard Worker 
2463*61046927SAndroid Build Coastguard Worker    unsigned reserved_consts_vec4 = 0;
2464*61046927SAndroid Build Coastguard Worker    NIR_PASS_V(nir, tu_lower_io, dev, shader, layout, &reserved_consts_vec4);
2465*61046927SAndroid Build Coastguard Worker 
2466*61046927SAndroid Build Coastguard Worker    nir_shader_gather_info(nir, nir_shader_get_entrypoint(nir));
2467*61046927SAndroid Build Coastguard Worker 
2468*61046927SAndroid Build Coastguard Worker    ir3_finalize_nir(dev->compiler, nir);
2469*61046927SAndroid Build Coastguard Worker 
2470*61046927SAndroid Build Coastguard Worker    const struct ir3_shader_options options = {
2471*61046927SAndroid Build Coastguard Worker       .num_reserved_user_consts = reserved_consts_vec4,
2472*61046927SAndroid Build Coastguard Worker       .api_wavesize = key->api_wavesize,
2473*61046927SAndroid Build Coastguard Worker       .real_wavesize = key->real_wavesize,
2474*61046927SAndroid Build Coastguard Worker       .push_consts_type = shader->const_state.push_consts.type,
2475*61046927SAndroid Build Coastguard Worker       .push_consts_base = shader->const_state.push_consts.lo,
2476*61046927SAndroid Build Coastguard Worker       .push_consts_dwords = shader->const_state.push_consts.dwords,
2477*61046927SAndroid Build Coastguard Worker    };
2478*61046927SAndroid Build Coastguard Worker 
2479*61046927SAndroid Build Coastguard Worker    struct ir3_shader *ir3_shader =
2480*61046927SAndroid Build Coastguard Worker       ir3_shader_from_nir(dev->compiler, nir, &options, &so_info);
2481*61046927SAndroid Build Coastguard Worker 
2482*61046927SAndroid Build Coastguard Worker    shader->variant =
2483*61046927SAndroid Build Coastguard Worker       ir3_shader_create_variant(ir3_shader, ir3_key, executable_info);
2484*61046927SAndroid Build Coastguard Worker 
2485*61046927SAndroid Build Coastguard Worker    if (ir3_exceeds_safe_constlen(shader->variant)) {
2486*61046927SAndroid Build Coastguard Worker       struct ir3_shader_key safe_constlen_key = *ir3_key;
2487*61046927SAndroid Build Coastguard Worker       safe_constlen_key.safe_constlen = true;
2488*61046927SAndroid Build Coastguard Worker       shader->safe_const_variant =
2489*61046927SAndroid Build Coastguard Worker          ir3_shader_create_variant(ir3_shader, &safe_constlen_key,
2490*61046927SAndroid Build Coastguard Worker                                    executable_info);
2491*61046927SAndroid Build Coastguard Worker    }
2492*61046927SAndroid Build Coastguard Worker 
2493*61046927SAndroid Build Coastguard Worker    ir3_shader_destroy(ir3_shader);
2494*61046927SAndroid Build Coastguard Worker 
2495*61046927SAndroid Build Coastguard Worker    shader->view_mask = key->multiview_mask;
2496*61046927SAndroid Build Coastguard Worker 
2497*61046927SAndroid Build Coastguard Worker    switch (shader->variant->type) {
2498*61046927SAndroid Build Coastguard Worker    case MESA_SHADER_TESS_EVAL: {
2499*61046927SAndroid Build Coastguard Worker       const struct ir3_shader_variant *tes = shader->variant;
2500*61046927SAndroid Build Coastguard Worker       if (tes->tess.point_mode) {
2501*61046927SAndroid Build Coastguard Worker          shader->tes.tess_output_lower_left =
2502*61046927SAndroid Build Coastguard Worker             shader->tes.tess_output_upper_left = TESS_POINTS;
2503*61046927SAndroid Build Coastguard Worker       } else if (tes->tess.primitive_mode == TESS_PRIMITIVE_ISOLINES) {
2504*61046927SAndroid Build Coastguard Worker          shader->tes.tess_output_lower_left =
2505*61046927SAndroid Build Coastguard Worker             shader->tes.tess_output_upper_left = TESS_LINES;
2506*61046927SAndroid Build Coastguard Worker       } else if (tes->tess.ccw) {
2507*61046927SAndroid Build Coastguard Worker          /* Tessellation orientation in HW is specified with a lower-left
2508*61046927SAndroid Build Coastguard Worker           * origin, we need to swap them if the origin is upper-left.
2509*61046927SAndroid Build Coastguard Worker           */
2510*61046927SAndroid Build Coastguard Worker          shader->tes.tess_output_lower_left = TESS_CCW_TRIS;
2511*61046927SAndroid Build Coastguard Worker          shader->tes.tess_output_upper_left = TESS_CW_TRIS;
2512*61046927SAndroid Build Coastguard Worker       } else {
2513*61046927SAndroid Build Coastguard Worker          shader->tes.tess_output_lower_left = TESS_CW_TRIS;
2514*61046927SAndroid Build Coastguard Worker          shader->tes.tess_output_upper_left = TESS_CCW_TRIS;
2515*61046927SAndroid Build Coastguard Worker       }
2516*61046927SAndroid Build Coastguard Worker 
2517*61046927SAndroid Build Coastguard Worker       switch (tes->tess.spacing) {
2518*61046927SAndroid Build Coastguard Worker       case TESS_SPACING_EQUAL:
2519*61046927SAndroid Build Coastguard Worker          shader->tes.tess_spacing = TESS_EQUAL;
2520*61046927SAndroid Build Coastguard Worker          break;
2521*61046927SAndroid Build Coastguard Worker       case TESS_SPACING_FRACTIONAL_ODD:
2522*61046927SAndroid Build Coastguard Worker          shader->tes.tess_spacing = TESS_FRACTIONAL_ODD;
2523*61046927SAndroid Build Coastguard Worker          break;
2524*61046927SAndroid Build Coastguard Worker       case TESS_SPACING_FRACTIONAL_EVEN:
2525*61046927SAndroid Build Coastguard Worker          shader->tes.tess_spacing = TESS_FRACTIONAL_EVEN;
2526*61046927SAndroid Build Coastguard Worker          break;
2527*61046927SAndroid Build Coastguard Worker       case TESS_SPACING_UNSPECIFIED:
2528*61046927SAndroid Build Coastguard Worker       default:
2529*61046927SAndroid Build Coastguard Worker          unreachable("invalid tess spacing");
2530*61046927SAndroid Build Coastguard Worker       }
2531*61046927SAndroid Build Coastguard Worker 
2532*61046927SAndroid Build Coastguard Worker       break;
2533*61046927SAndroid Build Coastguard Worker    }
2534*61046927SAndroid Build Coastguard Worker    case MESA_SHADER_FRAGMENT: {
2535*61046927SAndroid Build Coastguard Worker       const struct ir3_shader_variant *fs = shader->variant;
2536*61046927SAndroid Build Coastguard Worker       shader->fs.per_samp = fs->per_samp || ir3_key->sample_shading;
2537*61046927SAndroid Build Coastguard Worker       shader->fs.has_fdm = key->fragment_density_map;
2538*61046927SAndroid Build Coastguard Worker       if (fs->has_kill)
2539*61046927SAndroid Build Coastguard Worker          shader->fs.lrz.status |= TU_LRZ_FORCE_DISABLE_WRITE;
2540*61046927SAndroid Build Coastguard Worker       if (fs->no_earlyz || (fs->writes_pos && !fs->fs.early_fragment_tests))
2541*61046927SAndroid Build Coastguard Worker          shader->fs.lrz.status = TU_LRZ_FORCE_DISABLE_LRZ;
2542*61046927SAndroid Build Coastguard Worker       /* FDM isn't compatible with LRZ, because the LRZ image uses the original
2543*61046927SAndroid Build Coastguard Worker        * resolution and we would need to use the low resolution.
2544*61046927SAndroid Build Coastguard Worker        *
2545*61046927SAndroid Build Coastguard Worker        * TODO: Use a patchpoint to only disable LRZ for scaled bins.
2546*61046927SAndroid Build Coastguard Worker        */
2547*61046927SAndroid Build Coastguard Worker       if (key->fragment_density_map)
2548*61046927SAndroid Build Coastguard Worker          shader->fs.lrz.status = TU_LRZ_FORCE_DISABLE_LRZ;
2549*61046927SAndroid Build Coastguard Worker       if (!fs->fs.early_fragment_tests &&
2550*61046927SAndroid Build Coastguard Worker           (fs->no_earlyz || fs->writes_pos || fs->writes_stencilref || fs->writes_smask)) {
2551*61046927SAndroid Build Coastguard Worker          shader->fs.lrz.force_late_z = true;
2552*61046927SAndroid Build Coastguard Worker       }
2553*61046927SAndroid Build Coastguard Worker       break;
2554*61046927SAndroid Build Coastguard Worker    }
2555*61046927SAndroid Build Coastguard Worker    default:
2556*61046927SAndroid Build Coastguard Worker       break;
2557*61046927SAndroid Build Coastguard Worker    }
2558*61046927SAndroid Build Coastguard Worker 
2559*61046927SAndroid Build Coastguard Worker    VkResult result = tu_upload_shader(dev, shader);
2560*61046927SAndroid Build Coastguard Worker    if (result != VK_SUCCESS) {
2561*61046927SAndroid Build Coastguard Worker       vk_free(&dev->vk.alloc, shader);
2562*61046927SAndroid Build Coastguard Worker       return result;
2563*61046927SAndroid Build Coastguard Worker    }
2564*61046927SAndroid Build Coastguard Worker 
2565*61046927SAndroid Build Coastguard Worker    *shader_out = shader;
2566*61046927SAndroid Build Coastguard Worker    return VK_SUCCESS;
2567*61046927SAndroid Build Coastguard Worker }
2568*61046927SAndroid Build Coastguard Worker 
2569*61046927SAndroid Build Coastguard Worker static void
tu_link_shaders(nir_shader ** shaders,unsigned shaders_count)2570*61046927SAndroid Build Coastguard Worker tu_link_shaders(nir_shader **shaders, unsigned shaders_count)
2571*61046927SAndroid Build Coastguard Worker {
2572*61046927SAndroid Build Coastguard Worker    nir_shader *consumer = NULL;
2573*61046927SAndroid Build Coastguard Worker    for (gl_shader_stage stage = (gl_shader_stage) (shaders_count - 1);
2574*61046927SAndroid Build Coastguard Worker         stage >= MESA_SHADER_VERTEX; stage = (gl_shader_stage) (stage - 1)) {
2575*61046927SAndroid Build Coastguard Worker       if (!shaders[stage])
2576*61046927SAndroid Build Coastguard Worker          continue;
2577*61046927SAndroid Build Coastguard Worker 
2578*61046927SAndroid Build Coastguard Worker       nir_shader *producer = shaders[stage];
2579*61046927SAndroid Build Coastguard Worker       if (!consumer) {
2580*61046927SAndroid Build Coastguard Worker          consumer = producer;
2581*61046927SAndroid Build Coastguard Worker          continue;
2582*61046927SAndroid Build Coastguard Worker       }
2583*61046927SAndroid Build Coastguard Worker 
2584*61046927SAndroid Build Coastguard Worker       if (nir_link_opt_varyings(producer, consumer)) {
2585*61046927SAndroid Build Coastguard Worker          NIR_PASS_V(consumer, nir_opt_constant_folding);
2586*61046927SAndroid Build Coastguard Worker          NIR_PASS_V(consumer, nir_opt_algebraic);
2587*61046927SAndroid Build Coastguard Worker          NIR_PASS_V(consumer, nir_opt_dce);
2588*61046927SAndroid Build Coastguard Worker       }
2589*61046927SAndroid Build Coastguard Worker 
2590*61046927SAndroid Build Coastguard Worker       const nir_remove_dead_variables_options out_var_opts = {
2591*61046927SAndroid Build Coastguard Worker          .can_remove_var = nir_vk_is_not_xfb_output,
2592*61046927SAndroid Build Coastguard Worker       };
2593*61046927SAndroid Build Coastguard Worker       NIR_PASS_V(producer, nir_remove_dead_variables, nir_var_shader_out, &out_var_opts);
2594*61046927SAndroid Build Coastguard Worker 
2595*61046927SAndroid Build Coastguard Worker       NIR_PASS_V(consumer, nir_remove_dead_variables, nir_var_shader_in, NULL);
2596*61046927SAndroid Build Coastguard Worker 
2597*61046927SAndroid Build Coastguard Worker       bool progress = nir_remove_unused_varyings(producer, consumer);
2598*61046927SAndroid Build Coastguard Worker 
2599*61046927SAndroid Build Coastguard Worker       nir_compact_varyings(producer, consumer, true);
2600*61046927SAndroid Build Coastguard Worker       if (progress) {
2601*61046927SAndroid Build Coastguard Worker          if (nir_lower_global_vars_to_local(producer)) {
2602*61046927SAndroid Build Coastguard Worker             /* Remove dead writes, which can remove input loads */
2603*61046927SAndroid Build Coastguard Worker             NIR_PASS_V(producer, nir_remove_dead_variables, nir_var_shader_temp, NULL);
2604*61046927SAndroid Build Coastguard Worker             NIR_PASS_V(producer, nir_opt_dce);
2605*61046927SAndroid Build Coastguard Worker          }
2606*61046927SAndroid Build Coastguard Worker          nir_lower_global_vars_to_local(consumer);
2607*61046927SAndroid Build Coastguard Worker       }
2608*61046927SAndroid Build Coastguard Worker 
2609*61046927SAndroid Build Coastguard Worker       consumer = producer;
2610*61046927SAndroid Build Coastguard Worker    }
2611*61046927SAndroid Build Coastguard Worker 
2612*61046927SAndroid Build Coastguard Worker    /* Gather info after linking so that we can fill out the ir3 shader key.
2613*61046927SAndroid Build Coastguard Worker     */
2614*61046927SAndroid Build Coastguard Worker    for (gl_shader_stage stage = MESA_SHADER_VERTEX;
2615*61046927SAndroid Build Coastguard Worker         stage <= MESA_SHADER_FRAGMENT; stage = (gl_shader_stage) (stage + 1)) {
2616*61046927SAndroid Build Coastguard Worker       if (shaders[stage])
2617*61046927SAndroid Build Coastguard Worker          nir_shader_gather_info(shaders[stage],
2618*61046927SAndroid Build Coastguard Worker                                 nir_shader_get_entrypoint(shaders[stage]));
2619*61046927SAndroid Build Coastguard Worker    }
2620*61046927SAndroid Build Coastguard Worker }
2621*61046927SAndroid Build Coastguard Worker 
2622*61046927SAndroid Build Coastguard Worker static uint32_t
tu6_get_tessmode(const struct nir_shader * shader)2623*61046927SAndroid Build Coastguard Worker tu6_get_tessmode(const struct nir_shader *shader)
2624*61046927SAndroid Build Coastguard Worker {
2625*61046927SAndroid Build Coastguard Worker    enum tess_primitive_mode primitive_mode = shader->info.tess._primitive_mode;
2626*61046927SAndroid Build Coastguard Worker    switch (primitive_mode) {
2627*61046927SAndroid Build Coastguard Worker    case TESS_PRIMITIVE_ISOLINES:
2628*61046927SAndroid Build Coastguard Worker       return IR3_TESS_ISOLINES;
2629*61046927SAndroid Build Coastguard Worker    case TESS_PRIMITIVE_TRIANGLES:
2630*61046927SAndroid Build Coastguard Worker       return IR3_TESS_TRIANGLES;
2631*61046927SAndroid Build Coastguard Worker    case TESS_PRIMITIVE_QUADS:
2632*61046927SAndroid Build Coastguard Worker       return IR3_TESS_QUADS;
2633*61046927SAndroid Build Coastguard Worker    case TESS_PRIMITIVE_UNSPECIFIED:
2634*61046927SAndroid Build Coastguard Worker       return IR3_TESS_NONE;
2635*61046927SAndroid Build Coastguard Worker    default:
2636*61046927SAndroid Build Coastguard Worker       unreachable("bad tessmode");
2637*61046927SAndroid Build Coastguard Worker    }
2638*61046927SAndroid Build Coastguard Worker }
2639*61046927SAndroid Build Coastguard Worker 
2640*61046927SAndroid Build Coastguard Worker VkResult
tu_compile_shaders(struct tu_device * device,VkPipelineCreateFlags2KHR pipeline_flags,const VkPipelineShaderStageCreateInfo ** stage_infos,nir_shader ** nir,const struct tu_shader_key * keys,struct tu_pipeline_layout * layout,const unsigned char * pipeline_sha1,struct tu_shader ** shaders,char ** nir_initial_disasm,void * nir_initial_disasm_mem_ctx,nir_shader ** nir_out,VkPipelineCreationFeedback * stage_feedbacks)2641*61046927SAndroid Build Coastguard Worker tu_compile_shaders(struct tu_device *device,
2642*61046927SAndroid Build Coastguard Worker                    VkPipelineCreateFlags2KHR pipeline_flags,
2643*61046927SAndroid Build Coastguard Worker                    const VkPipelineShaderStageCreateInfo **stage_infos,
2644*61046927SAndroid Build Coastguard Worker                    nir_shader **nir,
2645*61046927SAndroid Build Coastguard Worker                    const struct tu_shader_key *keys,
2646*61046927SAndroid Build Coastguard Worker                    struct tu_pipeline_layout *layout,
2647*61046927SAndroid Build Coastguard Worker                    const unsigned char *pipeline_sha1,
2648*61046927SAndroid Build Coastguard Worker                    struct tu_shader **shaders,
2649*61046927SAndroid Build Coastguard Worker                    char **nir_initial_disasm,
2650*61046927SAndroid Build Coastguard Worker                    void *nir_initial_disasm_mem_ctx,
2651*61046927SAndroid Build Coastguard Worker                    nir_shader **nir_out,
2652*61046927SAndroid Build Coastguard Worker                    VkPipelineCreationFeedback *stage_feedbacks)
2653*61046927SAndroid Build Coastguard Worker {
2654*61046927SAndroid Build Coastguard Worker    struct ir3_shader_key ir3_key = {};
2655*61046927SAndroid Build Coastguard Worker    VkResult result = VK_SUCCESS;
2656*61046927SAndroid Build Coastguard Worker    void *mem_ctx = ralloc_context(NULL);
2657*61046927SAndroid Build Coastguard Worker 
2658*61046927SAndroid Build Coastguard Worker    for (gl_shader_stage stage = MESA_SHADER_VERTEX; stage < MESA_SHADER_STAGES;
2659*61046927SAndroid Build Coastguard Worker         stage = (gl_shader_stage) (stage + 1)) {
2660*61046927SAndroid Build Coastguard Worker       const VkPipelineShaderStageCreateInfo *stage_info = stage_infos[stage];
2661*61046927SAndroid Build Coastguard Worker       if (!stage_info)
2662*61046927SAndroid Build Coastguard Worker          continue;
2663*61046927SAndroid Build Coastguard Worker 
2664*61046927SAndroid Build Coastguard Worker       int64_t stage_start = os_time_get_nano();
2665*61046927SAndroid Build Coastguard Worker 
2666*61046927SAndroid Build Coastguard Worker       nir[stage] = tu_spirv_to_nir(device, mem_ctx, pipeline_flags,
2667*61046927SAndroid Build Coastguard Worker                                    stage_info, stage);
2668*61046927SAndroid Build Coastguard Worker       if (!nir[stage]) {
2669*61046927SAndroid Build Coastguard Worker          result = VK_ERROR_OUT_OF_HOST_MEMORY;
2670*61046927SAndroid Build Coastguard Worker          goto fail;
2671*61046927SAndroid Build Coastguard Worker       }
2672*61046927SAndroid Build Coastguard Worker 
2673*61046927SAndroid Build Coastguard Worker       stage_feedbacks[stage].flags = VK_PIPELINE_CREATION_FEEDBACK_VALID_BIT;
2674*61046927SAndroid Build Coastguard Worker       stage_feedbacks[stage].duration += os_time_get_nano() - stage_start;
2675*61046927SAndroid Build Coastguard Worker    }
2676*61046927SAndroid Build Coastguard Worker 
2677*61046927SAndroid Build Coastguard Worker    if (nir[MESA_SHADER_GEOMETRY])
2678*61046927SAndroid Build Coastguard Worker       ir3_key.has_gs = true;
2679*61046927SAndroid Build Coastguard Worker 
2680*61046927SAndroid Build Coastguard Worker    ir3_key.sample_shading = keys[MESA_SHADER_FRAGMENT].force_sample_interp;
2681*61046927SAndroid Build Coastguard Worker 
2682*61046927SAndroid Build Coastguard Worker    if (nir_initial_disasm) {
2683*61046927SAndroid Build Coastguard Worker       for (gl_shader_stage stage = MESA_SHADER_VERTEX;
2684*61046927SAndroid Build Coastguard Worker            stage < MESA_SHADER_STAGES;
2685*61046927SAndroid Build Coastguard Worker            stage = (gl_shader_stage) (stage + 1)) {
2686*61046927SAndroid Build Coastguard Worker       if (!nir[stage])
2687*61046927SAndroid Build Coastguard Worker          continue;
2688*61046927SAndroid Build Coastguard Worker 
2689*61046927SAndroid Build Coastguard Worker       nir_initial_disasm[stage] =
2690*61046927SAndroid Build Coastguard Worker          nir_shader_as_str(nir[stage], nir_initial_disasm_mem_ctx);
2691*61046927SAndroid Build Coastguard Worker       }
2692*61046927SAndroid Build Coastguard Worker    }
2693*61046927SAndroid Build Coastguard Worker 
2694*61046927SAndroid Build Coastguard Worker    tu_link_shaders(nir, MESA_SHADER_STAGES);
2695*61046927SAndroid Build Coastguard Worker 
2696*61046927SAndroid Build Coastguard Worker    if (nir_out) {
2697*61046927SAndroid Build Coastguard Worker       for (gl_shader_stage stage = MESA_SHADER_VERTEX;
2698*61046927SAndroid Build Coastguard Worker            stage < MESA_SHADER_STAGES; stage = (gl_shader_stage) (stage + 1)) {
2699*61046927SAndroid Build Coastguard Worker          if (!nir[stage])
2700*61046927SAndroid Build Coastguard Worker             continue;
2701*61046927SAndroid Build Coastguard Worker 
2702*61046927SAndroid Build Coastguard Worker          nir_out[stage] = nir_shader_clone(NULL, nir[stage]);
2703*61046927SAndroid Build Coastguard Worker       }
2704*61046927SAndroid Build Coastguard Worker    }
2705*61046927SAndroid Build Coastguard Worker 
2706*61046927SAndroid Build Coastguard Worker    /* With pipelines, tessellation modes can be set on either shader, for
2707*61046927SAndroid Build Coastguard Worker     * compatibility with HLSL and GLSL, and the driver is supposed to merge
2708*61046927SAndroid Build Coastguard Worker     * them. Shader objects requires modes to be set on at least the TES except
2709*61046927SAndroid Build Coastguard Worker     * for OutputVertices which has to be set at least on the TCS. Make sure
2710*61046927SAndroid Build Coastguard Worker     * all modes are set on the TES when compiling together multiple shaders,
2711*61046927SAndroid Build Coastguard Worker     * and then from this point on we will use the modes in the TES (and output
2712*61046927SAndroid Build Coastguard Worker     * vertices on the TCS).
2713*61046927SAndroid Build Coastguard Worker     */
2714*61046927SAndroid Build Coastguard Worker    if (nir[MESA_SHADER_TESS_EVAL]) {
2715*61046927SAndroid Build Coastguard Worker       nir_shader *tcs = nir[MESA_SHADER_TESS_CTRL];
2716*61046927SAndroid Build Coastguard Worker       nir_shader *tes = nir[MESA_SHADER_TESS_EVAL];
2717*61046927SAndroid Build Coastguard Worker 
2718*61046927SAndroid Build Coastguard Worker       if (tes->info.tess._primitive_mode == TESS_PRIMITIVE_UNSPECIFIED)
2719*61046927SAndroid Build Coastguard Worker          tes->info.tess._primitive_mode = tcs->info.tess._primitive_mode;
2720*61046927SAndroid Build Coastguard Worker 
2721*61046927SAndroid Build Coastguard Worker       tes->info.tess.point_mode |= tcs->info.tess.point_mode;
2722*61046927SAndroid Build Coastguard Worker       tes->info.tess.ccw |= tcs->info.tess.ccw;
2723*61046927SAndroid Build Coastguard Worker 
2724*61046927SAndroid Build Coastguard Worker       if (tes->info.tess.spacing == TESS_SPACING_UNSPECIFIED) {
2725*61046927SAndroid Build Coastguard Worker          tes->info.tess.spacing = tcs->info.tess.spacing;
2726*61046927SAndroid Build Coastguard Worker       }
2727*61046927SAndroid Build Coastguard Worker 
2728*61046927SAndroid Build Coastguard Worker       if (tcs->info.tess.tcs_vertices_out == 0)
2729*61046927SAndroid Build Coastguard Worker          tcs->info.tess.tcs_vertices_out = tes->info.tess.tcs_vertices_out;
2730*61046927SAndroid Build Coastguard Worker 
2731*61046927SAndroid Build Coastguard Worker       ir3_key.tessellation = tu6_get_tessmode(tes);
2732*61046927SAndroid Build Coastguard Worker    }
2733*61046927SAndroid Build Coastguard Worker 
2734*61046927SAndroid Build Coastguard Worker    for (gl_shader_stage stage = MESA_SHADER_VERTEX; stage < MESA_SHADER_STAGES;
2735*61046927SAndroid Build Coastguard Worker         stage = (gl_shader_stage) (stage + 1)) {
2736*61046927SAndroid Build Coastguard Worker       if (!nir[stage])
2737*61046927SAndroid Build Coastguard Worker          continue;
2738*61046927SAndroid Build Coastguard Worker 
2739*61046927SAndroid Build Coastguard Worker       if (stage > MESA_SHADER_TESS_CTRL) {
2740*61046927SAndroid Build Coastguard Worker          if (stage == MESA_SHADER_FRAGMENT) {
2741*61046927SAndroid Build Coastguard Worker             ir3_key.tcs_store_primid = ir3_key.tcs_store_primid ||
2742*61046927SAndroid Build Coastguard Worker                (nir[stage]->info.inputs_read & (1ull << VARYING_SLOT_PRIMITIVE_ID));
2743*61046927SAndroid Build Coastguard Worker          } else {
2744*61046927SAndroid Build Coastguard Worker             ir3_key.tcs_store_primid = ir3_key.tcs_store_primid ||
2745*61046927SAndroid Build Coastguard Worker                BITSET_TEST(nir[stage]->info.system_values_read, SYSTEM_VALUE_PRIMITIVE_ID);
2746*61046927SAndroid Build Coastguard Worker          }
2747*61046927SAndroid Build Coastguard Worker       }
2748*61046927SAndroid Build Coastguard Worker    }
2749*61046927SAndroid Build Coastguard Worker 
2750*61046927SAndroid Build Coastguard Worker    /* In the the tess-but-not-FS case we don't know whether the FS will read
2751*61046927SAndroid Build Coastguard Worker     * PrimID so we need to unconditionally store it.
2752*61046927SAndroid Build Coastguard Worker     */
2753*61046927SAndroid Build Coastguard Worker    if (nir[MESA_SHADER_TESS_CTRL] && !nir[MESA_SHADER_FRAGMENT])
2754*61046927SAndroid Build Coastguard Worker       ir3_key.tcs_store_primid = true;
2755*61046927SAndroid Build Coastguard Worker 
2756*61046927SAndroid Build Coastguard Worker    for (gl_shader_stage stage = MESA_SHADER_VERTEX; stage < MESA_SHADER_STAGES;
2757*61046927SAndroid Build Coastguard Worker         stage = (gl_shader_stage) (stage + 1)) {
2758*61046927SAndroid Build Coastguard Worker       if (!nir[stage] || shaders[stage])
2759*61046927SAndroid Build Coastguard Worker          continue;
2760*61046927SAndroid Build Coastguard Worker 
2761*61046927SAndroid Build Coastguard Worker       int64_t stage_start = os_time_get_nano();
2762*61046927SAndroid Build Coastguard Worker 
2763*61046927SAndroid Build Coastguard Worker       unsigned char shader_sha1[21];
2764*61046927SAndroid Build Coastguard Worker       memcpy(shader_sha1, pipeline_sha1, 20);
2765*61046927SAndroid Build Coastguard Worker       shader_sha1[20] = (unsigned char) stage;
2766*61046927SAndroid Build Coastguard Worker 
2767*61046927SAndroid Build Coastguard Worker       result = tu_shader_create(device,
2768*61046927SAndroid Build Coastguard Worker                                 &shaders[stage], nir[stage], &keys[stage],
2769*61046927SAndroid Build Coastguard Worker                                 &ir3_key, shader_sha1, sizeof(shader_sha1),
2770*61046927SAndroid Build Coastguard Worker                                 layout, !!nir_initial_disasm);
2771*61046927SAndroid Build Coastguard Worker       if (result != VK_SUCCESS) {
2772*61046927SAndroid Build Coastguard Worker          goto fail;
2773*61046927SAndroid Build Coastguard Worker       }
2774*61046927SAndroid Build Coastguard Worker 
2775*61046927SAndroid Build Coastguard Worker       stage_feedbacks[stage].duration += os_time_get_nano() - stage_start;
2776*61046927SAndroid Build Coastguard Worker    }
2777*61046927SAndroid Build Coastguard Worker 
2778*61046927SAndroid Build Coastguard Worker    ralloc_free(mem_ctx);
2779*61046927SAndroid Build Coastguard Worker 
2780*61046927SAndroid Build Coastguard Worker    return VK_SUCCESS;
2781*61046927SAndroid Build Coastguard Worker 
2782*61046927SAndroid Build Coastguard Worker fail:
2783*61046927SAndroid Build Coastguard Worker    ralloc_free(mem_ctx);
2784*61046927SAndroid Build Coastguard Worker 
2785*61046927SAndroid Build Coastguard Worker    for (gl_shader_stage stage = MESA_SHADER_VERTEX; stage < MESA_SHADER_STAGES;
2786*61046927SAndroid Build Coastguard Worker         stage = (gl_shader_stage) (stage + 1)) {
2787*61046927SAndroid Build Coastguard Worker       if (shaders[stage]) {
2788*61046927SAndroid Build Coastguard Worker          tu_shader_destroy(device, shaders[stage]);
2789*61046927SAndroid Build Coastguard Worker       }
2790*61046927SAndroid Build Coastguard Worker       if (nir_out && nir_out[stage]) {
2791*61046927SAndroid Build Coastguard Worker          ralloc_free(nir_out[stage]);
2792*61046927SAndroid Build Coastguard Worker       }
2793*61046927SAndroid Build Coastguard Worker    }
2794*61046927SAndroid Build Coastguard Worker 
2795*61046927SAndroid Build Coastguard Worker    return result;
2796*61046927SAndroid Build Coastguard Worker }
2797*61046927SAndroid Build Coastguard Worker 
2798*61046927SAndroid Build Coastguard Worker void
tu_shader_key_subgroup_size(struct tu_shader_key * key,bool allow_varying_subgroup_size,bool require_full_subgroups,const VkPipelineShaderStageRequiredSubgroupSizeCreateInfo * subgroup_info,struct tu_device * dev)2799*61046927SAndroid Build Coastguard Worker tu_shader_key_subgroup_size(struct tu_shader_key *key,
2800*61046927SAndroid Build Coastguard Worker                             bool allow_varying_subgroup_size,
2801*61046927SAndroid Build Coastguard Worker                             bool require_full_subgroups,
2802*61046927SAndroid Build Coastguard Worker                             const VkPipelineShaderStageRequiredSubgroupSizeCreateInfo *subgroup_info,
2803*61046927SAndroid Build Coastguard Worker                             struct tu_device *dev)
2804*61046927SAndroid Build Coastguard Worker {
2805*61046927SAndroid Build Coastguard Worker    enum ir3_wavesize_option api_wavesize, real_wavesize;
2806*61046927SAndroid Build Coastguard Worker    if (!dev->physical_device->info->a6xx.supports_double_threadsize) {
2807*61046927SAndroid Build Coastguard Worker       api_wavesize = IR3_SINGLE_ONLY;
2808*61046927SAndroid Build Coastguard Worker       real_wavesize = IR3_SINGLE_ONLY;
2809*61046927SAndroid Build Coastguard Worker    } else {
2810*61046927SAndroid Build Coastguard Worker       if (allow_varying_subgroup_size) {
2811*61046927SAndroid Build Coastguard Worker          api_wavesize = real_wavesize = IR3_SINGLE_OR_DOUBLE;
2812*61046927SAndroid Build Coastguard Worker       } else {
2813*61046927SAndroid Build Coastguard Worker          if (subgroup_info) {
2814*61046927SAndroid Build Coastguard Worker             if (subgroup_info->requiredSubgroupSize == dev->compiler->threadsize_base) {
2815*61046927SAndroid Build Coastguard Worker                api_wavesize = IR3_SINGLE_ONLY;
2816*61046927SAndroid Build Coastguard Worker             } else {
2817*61046927SAndroid Build Coastguard Worker                assert(subgroup_info->requiredSubgroupSize == dev->compiler->threadsize_base * 2);
2818*61046927SAndroid Build Coastguard Worker                api_wavesize = IR3_DOUBLE_ONLY;
2819*61046927SAndroid Build Coastguard Worker             }
2820*61046927SAndroid Build Coastguard Worker          } else {
2821*61046927SAndroid Build Coastguard Worker             /* Match the exposed subgroupSize. */
2822*61046927SAndroid Build Coastguard Worker             api_wavesize = IR3_DOUBLE_ONLY;
2823*61046927SAndroid Build Coastguard Worker          }
2824*61046927SAndroid Build Coastguard Worker 
2825*61046927SAndroid Build Coastguard Worker          if (require_full_subgroups)
2826*61046927SAndroid Build Coastguard Worker             real_wavesize = api_wavesize;
2827*61046927SAndroid Build Coastguard Worker          else if (api_wavesize == IR3_SINGLE_ONLY)
2828*61046927SAndroid Build Coastguard Worker             real_wavesize = IR3_SINGLE_ONLY;
2829*61046927SAndroid Build Coastguard Worker          else
2830*61046927SAndroid Build Coastguard Worker             real_wavesize = IR3_SINGLE_OR_DOUBLE;
2831*61046927SAndroid Build Coastguard Worker       }
2832*61046927SAndroid Build Coastguard Worker    }
2833*61046927SAndroid Build Coastguard Worker 
2834*61046927SAndroid Build Coastguard Worker    key->api_wavesize = api_wavesize;
2835*61046927SAndroid Build Coastguard Worker    key->real_wavesize = real_wavesize;
2836*61046927SAndroid Build Coastguard Worker }
2837*61046927SAndroid Build Coastguard Worker 
2838*61046927SAndroid Build Coastguard Worker static VkResult
tu_empty_shader_create(struct tu_device * dev,struct tu_shader ** shader_out,gl_shader_stage stage)2839*61046927SAndroid Build Coastguard Worker tu_empty_shader_create(struct tu_device *dev,
2840*61046927SAndroid Build Coastguard Worker                        struct tu_shader **shader_out,
2841*61046927SAndroid Build Coastguard Worker                        gl_shader_stage stage)
2842*61046927SAndroid Build Coastguard Worker {
2843*61046927SAndroid Build Coastguard Worker    struct tu_shader *shader = tu_shader_init(dev, NULL, 0);
2844*61046927SAndroid Build Coastguard Worker 
2845*61046927SAndroid Build Coastguard Worker    if (!shader)
2846*61046927SAndroid Build Coastguard Worker       return VK_ERROR_OUT_OF_HOST_MEMORY;
2847*61046927SAndroid Build Coastguard Worker 
2848*61046927SAndroid Build Coastguard Worker    pthread_mutex_lock(&dev->pipeline_mutex);
2849*61046927SAndroid Build Coastguard Worker    VkResult result = tu_suballoc_bo_alloc(&shader->bo, &dev->pipeline_suballoc,
2850*61046927SAndroid Build Coastguard Worker                                           32 * 4, 128);
2851*61046927SAndroid Build Coastguard Worker    pthread_mutex_unlock(&dev->pipeline_mutex);
2852*61046927SAndroid Build Coastguard Worker 
2853*61046927SAndroid Build Coastguard Worker    if (result != VK_SUCCESS) {
2854*61046927SAndroid Build Coastguard Worker       vk_free(&dev->vk.alloc, shader);
2855*61046927SAndroid Build Coastguard Worker       return result;
2856*61046927SAndroid Build Coastguard Worker    }
2857*61046927SAndroid Build Coastguard Worker 
2858*61046927SAndroid Build Coastguard Worker    TU_RMV(cmd_buffer_suballoc_bo_create, dev, &shader->bo);
2859*61046927SAndroid Build Coastguard Worker    tu_cs_init_suballoc(&shader->cs, dev, &shader->bo);
2860*61046927SAndroid Build Coastguard Worker 
2861*61046927SAndroid Build Coastguard Worker    struct tu_pvtmem_config pvtmem_config = { };
2862*61046927SAndroid Build Coastguard Worker 
2863*61046927SAndroid Build Coastguard Worker    struct tu_cs sub_cs;
2864*61046927SAndroid Build Coastguard Worker    tu_cs_begin_sub_stream(&shader->cs, 32, &sub_cs);
2865*61046927SAndroid Build Coastguard Worker    TU_CALLX(dev, tu6_emit_variant)(&sub_cs, stage, NULL, &pvtmem_config, 0, 0);
2866*61046927SAndroid Build Coastguard Worker    shader->state = tu_cs_end_draw_state(&shader->cs, &sub_cs);
2867*61046927SAndroid Build Coastguard Worker 
2868*61046927SAndroid Build Coastguard Worker    *shader_out = shader;
2869*61046927SAndroid Build Coastguard Worker    return VK_SUCCESS;
2870*61046927SAndroid Build Coastguard Worker }
2871*61046927SAndroid Build Coastguard Worker 
2872*61046927SAndroid Build Coastguard Worker static VkResult
tu_empty_fs_create(struct tu_device * dev,struct tu_shader ** shader,bool fragment_density_map)2873*61046927SAndroid Build Coastguard Worker tu_empty_fs_create(struct tu_device *dev, struct tu_shader **shader,
2874*61046927SAndroid Build Coastguard Worker                    bool fragment_density_map)
2875*61046927SAndroid Build Coastguard Worker {
2876*61046927SAndroid Build Coastguard Worker    struct ir3_shader_key key = {};
2877*61046927SAndroid Build Coastguard Worker    const struct ir3_shader_options options = {};
2878*61046927SAndroid Build Coastguard Worker    struct ir3_stream_output_info so_info = {};
2879*61046927SAndroid Build Coastguard Worker    const nir_shader_compiler_options *nir_options =
2880*61046927SAndroid Build Coastguard Worker       ir3_get_compiler_options(dev->compiler);
2881*61046927SAndroid Build Coastguard Worker    nir_builder fs_b;
2882*61046927SAndroid Build Coastguard Worker 
2883*61046927SAndroid Build Coastguard Worker    fs_b = nir_builder_init_simple_shader(MESA_SHADER_FRAGMENT, nir_options,
2884*61046927SAndroid Build Coastguard Worker                                          "noop_fs");
2885*61046927SAndroid Build Coastguard Worker 
2886*61046927SAndroid Build Coastguard Worker    *shader = tu_shader_init(dev, NULL, 0);
2887*61046927SAndroid Build Coastguard Worker    if (!*shader)
2888*61046927SAndroid Build Coastguard Worker       return VK_ERROR_OUT_OF_HOST_MEMORY;
2889*61046927SAndroid Build Coastguard Worker 
2890*61046927SAndroid Build Coastguard Worker    (*shader)->fs.has_fdm = fragment_density_map;
2891*61046927SAndroid Build Coastguard Worker    if (fragment_density_map)
2892*61046927SAndroid Build Coastguard Worker       (*shader)->fs.lrz.status = TU_LRZ_FORCE_DISABLE_LRZ;
2893*61046927SAndroid Build Coastguard Worker 
2894*61046927SAndroid Build Coastguard Worker    for (unsigned i = 0; i < MAX_SETS; i++)
2895*61046927SAndroid Build Coastguard Worker       (*shader)->dynamic_descriptor_sizes[i] = -1;
2896*61046927SAndroid Build Coastguard Worker 
2897*61046927SAndroid Build Coastguard Worker    struct ir3_shader *ir3_shader =
2898*61046927SAndroid Build Coastguard Worker       ir3_shader_from_nir(dev->compiler, fs_b.shader, &options, &so_info);
2899*61046927SAndroid Build Coastguard Worker    (*shader)->variant = ir3_shader_create_variant(ir3_shader, &key, false);
2900*61046927SAndroid Build Coastguard Worker    ir3_shader_destroy(ir3_shader);
2901*61046927SAndroid Build Coastguard Worker 
2902*61046927SAndroid Build Coastguard Worker    return tu_upload_shader(dev, *shader);
2903*61046927SAndroid Build Coastguard Worker }
2904*61046927SAndroid Build Coastguard Worker 
2905*61046927SAndroid Build Coastguard Worker VkResult
tu_init_empty_shaders(struct tu_device * dev)2906*61046927SAndroid Build Coastguard Worker tu_init_empty_shaders(struct tu_device *dev)
2907*61046927SAndroid Build Coastguard Worker {
2908*61046927SAndroid Build Coastguard Worker    VkResult result;
2909*61046927SAndroid Build Coastguard Worker 
2910*61046927SAndroid Build Coastguard Worker    result = tu_empty_shader_create(dev, &dev->empty_tcs, MESA_SHADER_TESS_CTRL);
2911*61046927SAndroid Build Coastguard Worker    if (result != VK_SUCCESS)
2912*61046927SAndroid Build Coastguard Worker       goto out;
2913*61046927SAndroid Build Coastguard Worker 
2914*61046927SAndroid Build Coastguard Worker    result = tu_empty_shader_create(dev, &dev->empty_tes, MESA_SHADER_TESS_EVAL);
2915*61046927SAndroid Build Coastguard Worker    if (result != VK_SUCCESS)
2916*61046927SAndroid Build Coastguard Worker       goto out;
2917*61046927SAndroid Build Coastguard Worker 
2918*61046927SAndroid Build Coastguard Worker    result = tu_empty_shader_create(dev, &dev->empty_gs, MESA_SHADER_GEOMETRY);
2919*61046927SAndroid Build Coastguard Worker    if (result != VK_SUCCESS)
2920*61046927SAndroid Build Coastguard Worker       goto out;
2921*61046927SAndroid Build Coastguard Worker 
2922*61046927SAndroid Build Coastguard Worker    result = tu_empty_fs_create(dev, &dev->empty_fs, false);
2923*61046927SAndroid Build Coastguard Worker    if (result != VK_SUCCESS)
2924*61046927SAndroid Build Coastguard Worker       goto out;
2925*61046927SAndroid Build Coastguard Worker 
2926*61046927SAndroid Build Coastguard Worker    result = tu_empty_fs_create(dev, &dev->empty_fs_fdm, true);
2927*61046927SAndroid Build Coastguard Worker    if (result != VK_SUCCESS)
2928*61046927SAndroid Build Coastguard Worker       goto out;
2929*61046927SAndroid Build Coastguard Worker 
2930*61046927SAndroid Build Coastguard Worker    return VK_SUCCESS;
2931*61046927SAndroid Build Coastguard Worker 
2932*61046927SAndroid Build Coastguard Worker out:
2933*61046927SAndroid Build Coastguard Worker    if (dev->empty_tcs)
2934*61046927SAndroid Build Coastguard Worker       vk_pipeline_cache_object_unref(&dev->vk, &dev->empty_tcs->base);
2935*61046927SAndroid Build Coastguard Worker    if (dev->empty_tes)
2936*61046927SAndroid Build Coastguard Worker       vk_pipeline_cache_object_unref(&dev->vk, &dev->empty_tes->base);
2937*61046927SAndroid Build Coastguard Worker    if (dev->empty_gs)
2938*61046927SAndroid Build Coastguard Worker       vk_pipeline_cache_object_unref(&dev->vk, &dev->empty_gs->base);
2939*61046927SAndroid Build Coastguard Worker    if (dev->empty_fs)
2940*61046927SAndroid Build Coastguard Worker       vk_pipeline_cache_object_unref(&dev->vk, &dev->empty_fs->base);
2941*61046927SAndroid Build Coastguard Worker    if (dev->empty_fs_fdm)
2942*61046927SAndroid Build Coastguard Worker       vk_pipeline_cache_object_unref(&dev->vk, &dev->empty_fs_fdm->base);
2943*61046927SAndroid Build Coastguard Worker    return result;
2944*61046927SAndroid Build Coastguard Worker }
2945*61046927SAndroid Build Coastguard Worker 
2946*61046927SAndroid Build Coastguard Worker void
tu_destroy_empty_shaders(struct tu_device * dev)2947*61046927SAndroid Build Coastguard Worker tu_destroy_empty_shaders(struct tu_device *dev)
2948*61046927SAndroid Build Coastguard Worker {
2949*61046927SAndroid Build Coastguard Worker    vk_pipeline_cache_object_unref(&dev->vk, &dev->empty_tcs->base);
2950*61046927SAndroid Build Coastguard Worker    vk_pipeline_cache_object_unref(&dev->vk, &dev->empty_tes->base);
2951*61046927SAndroid Build Coastguard Worker    vk_pipeline_cache_object_unref(&dev->vk, &dev->empty_gs->base);
2952*61046927SAndroid Build Coastguard Worker    vk_pipeline_cache_object_unref(&dev->vk, &dev->empty_fs->base);
2953*61046927SAndroid Build Coastguard Worker    vk_pipeline_cache_object_unref(&dev->vk, &dev->empty_fs_fdm->base);
2954*61046927SAndroid Build Coastguard Worker }
2955*61046927SAndroid Build Coastguard Worker 
2956*61046927SAndroid Build Coastguard Worker void
tu_shader_destroy(struct tu_device * dev,struct tu_shader * shader)2957*61046927SAndroid Build Coastguard Worker tu_shader_destroy(struct tu_device *dev,
2958*61046927SAndroid Build Coastguard Worker                   struct tu_shader *shader)
2959*61046927SAndroid Build Coastguard Worker {
2960*61046927SAndroid Build Coastguard Worker    tu_cs_finish(&shader->cs);
2961*61046927SAndroid Build Coastguard Worker    TU_RMV(resource_destroy, dev, &shader->bo);
2962*61046927SAndroid Build Coastguard Worker 
2963*61046927SAndroid Build Coastguard Worker    pthread_mutex_lock(&dev->pipeline_mutex);
2964*61046927SAndroid Build Coastguard Worker    tu_suballoc_bo_free(&dev->pipeline_suballoc, &shader->bo);
2965*61046927SAndroid Build Coastguard Worker    pthread_mutex_unlock(&dev->pipeline_mutex);
2966*61046927SAndroid Build Coastguard Worker 
2967*61046927SAndroid Build Coastguard Worker    if (shader->pvtmem_bo)
2968*61046927SAndroid Build Coastguard Worker       tu_bo_finish(dev, shader->pvtmem_bo);
2969*61046927SAndroid Build Coastguard Worker 
2970*61046927SAndroid Build Coastguard Worker    if (shader->variant)
2971*61046927SAndroid Build Coastguard Worker       ralloc_free((void *)shader->variant);
2972*61046927SAndroid Build Coastguard Worker    if (shader->safe_const_variant)
2973*61046927SAndroid Build Coastguard Worker       ralloc_free((void *)shader->safe_const_variant);
2974*61046927SAndroid Build Coastguard Worker 
2975*61046927SAndroid Build Coastguard Worker    vk_free(&dev->vk.alloc, shader);
2976*61046927SAndroid Build Coastguard Worker }
2977