xref: /aosp_15_r20/external/mesa3d/src/gallium/drivers/iris/iris_indirect_gen.c (revision 6104692788411f58d303aa86923a9ff6ecaded22)
1 /* Copyright © 2023 Intel Corporation
2  * SPDX-License-Identifier: MIT
3  */
4 
5 #include <stdio.h>
6 #include <errno.h>
7 
8 #ifdef HAVE_VALGRIND
9 #include <valgrind.h>
10 #include <memcheck.h>
11 #define VG(x) x
12 #else
13 #define VG(x)
14 #endif
15 
16 #include "pipe/p_defines.h"
17 #include "pipe/p_state.h"
18 #include "pipe/p_context.h"
19 #include "pipe/p_screen.h"
20 #include "util/u_upload_mgr.h"
21 #include "compiler/nir/nir_builder.h"
22 #include "compiler/nir/nir_serialize.h"
23 #include "intel/common/intel_aux_map.h"
24 #include "intel/common/intel_l3_config.h"
25 #include "intel/common/intel_sample_positions.h"
26 #include "intel/ds/intel_tracepoints.h"
27 #include "iris_batch.h"
28 #include "iris_context.h"
29 #include "iris_defines.h"
30 #include "iris_pipe.h"
31 #include "iris_resource.h"
32 #include "iris_utrace.h"
33 
34 #include "iris_genx_macros.h"
35 
36 #if GFX_VER >= 9
37 #include "intel/compiler/brw_compiler.h"
38 #include "intel/common/intel_genX_state_brw.h"
39 #else
40 #include "intel/compiler/elk/elk_compiler.h"
41 #include "intel/common/intel_genX_state_elk.h"
42 #endif
43 
44 #include "libintel_shaders.h"
45 
46 #if GFX_VERx10 == 80
47 # include "intel_gfx8_shaders_code.h"
48 #elif GFX_VERx10 == 90
49 # include "intel_gfx9_shaders_code.h"
50 #elif GFX_VERx10 == 110
51 # include "intel_gfx11_shaders_code.h"
52 #elif GFX_VERx10 == 120
53 # include "intel_gfx12_shaders_code.h"
54 #elif GFX_VERx10 == 125
55 # include "intel_gfx125_shaders_code.h"
56 #elif GFX_VERx10 == 200
57 # include "intel_gfx20_shaders_code.h"
58 #else
59 # error "Unsupported generation"
60 #endif
61 
62 #define load_param(b, bit_size, struct_name, field_name)          \
63    nir_load_uniform(b, 1, bit_size, nir_imm_int(b, 0),            \
64                     .base = offsetof(struct_name, field_name),   \
65                     .range = bit_size / 8)
66 
67 static nir_def *
load_fragment_index(nir_builder * b)68 load_fragment_index(nir_builder *b)
69 {
70    nir_def *pos_in = nir_f2i32(b, nir_trim_vector(b, nir_load_frag_coord(b), 2));
71    return nir_iadd(b,
72                    nir_imul_imm(b, nir_channel(b, pos_in, 1), 8192),
73                    nir_channel(b, pos_in, 0));
74 }
75 
76 static nir_shader *
load_shader_lib(struct iris_screen * screen,void * mem_ctx)77 load_shader_lib(struct iris_screen *screen, void *mem_ctx)
78 {
79    const nir_shader_compiler_options *nir_options =
80 #if GFX_VER >= 9
81       screen->brw->nir_options[MESA_SHADER_KERNEL];
82 #else
83       screen->elk->nir_options[MESA_SHADER_KERNEL];
84 #endif
85 
86    struct blob_reader blob;
87    blob_reader_init(&blob, (void *)genX(intel_shaders_nir),
88                     sizeof(genX(intel_shaders_nir)));
89    return nir_deserialize(mem_ctx, nir_options, &blob);
90 }
91 
92 static unsigned
iris_call_generation_shader(struct iris_screen * screen,nir_builder * b)93 iris_call_generation_shader(struct iris_screen *screen, nir_builder *b)
94 {
95    genX(libiris_write_draw)(
96       b,
97       load_param(b, 64, struct iris_gen_indirect_params, generated_cmds_addr),
98       load_param(b, 64, struct iris_gen_indirect_params, indirect_data_addr),
99       load_param(b, 64, struct iris_gen_indirect_params, draw_id_addr),
100       load_param(b, 32, struct iris_gen_indirect_params, indirect_data_stride),
101       load_param(b, 64, struct iris_gen_indirect_params, draw_count_addr),
102       load_param(b, 32, struct iris_gen_indirect_params, draw_base),
103       load_param(b, 32, struct iris_gen_indirect_params, max_draw_count),
104       load_param(b, 32, struct iris_gen_indirect_params, flags),
105       load_param(b, 32, struct iris_gen_indirect_params, ring_count),
106       load_param(b, 64, struct iris_gen_indirect_params, gen_addr),
107       load_param(b, 64, struct iris_gen_indirect_params, end_addr),
108       load_fragment_index(b));
109    return sizeof(struct iris_gen_indirect_params);
110 }
111 
112 void
genX(init_screen_gen_state)113 genX(init_screen_gen_state)(struct iris_screen *screen)
114 {
115    screen->vtbl.load_shader_lib = load_shader_lib;
116    screen->vtbl.call_generation_shader = iris_call_generation_shader;
117 }
118 
119 /**
120  * Stream out temporary/short-lived state.
121  *
122  * This allocates space, pins the BO, and includes the BO address in the
123  * returned offset (which works because all state lives in 32-bit memory
124  * zones).
125  */
126 static void *
upload_state(struct iris_batch * batch,struct u_upload_mgr * uploader,struct iris_state_ref * ref,unsigned size,unsigned alignment)127 upload_state(struct iris_batch *batch,
128              struct u_upload_mgr *uploader,
129              struct iris_state_ref *ref,
130              unsigned size,
131              unsigned alignment)
132 {
133    void *p = NULL;
134    u_upload_alloc(uploader, 0, size, alignment, &ref->offset, &ref->res, &p);
135    iris_use_pinned_bo(batch, iris_resource_bo(ref->res), false, IRIS_DOMAIN_NONE);
136    return p;
137 }
138 
139 static uint32_t *
stream_state(struct iris_batch * batch,struct u_upload_mgr * uploader,struct pipe_resource ** out_res,unsigned size,unsigned alignment,uint32_t * out_offset)140 stream_state(struct iris_batch *batch,
141              struct u_upload_mgr *uploader,
142              struct pipe_resource **out_res,
143              unsigned size,
144              unsigned alignment,
145              uint32_t *out_offset)
146 {
147    void *ptr = NULL;
148 
149    u_upload_alloc(uploader, 0, size, alignment, out_offset, out_res, &ptr);
150 
151    struct iris_bo *bo = iris_resource_bo(*out_res);
152    iris_use_pinned_bo(batch, bo, false, IRIS_DOMAIN_NONE);
153 
154    iris_record_state_size(batch->state_sizes,
155                           bo->address + *out_offset, size);
156 
157    *out_offset += iris_bo_offset_from_base_address(bo);
158 
159    return ptr;
160 }
161 
162 static void
emit_indirect_generate_draw(struct iris_batch * batch,struct iris_address params_addr,unsigned params_size,unsigned ring_count)163 emit_indirect_generate_draw(struct iris_batch *batch,
164                             struct iris_address params_addr,
165                             unsigned params_size,
166                             unsigned ring_count)
167 {
168    struct iris_screen *screen = batch->screen;
169    struct iris_context *ice = batch->ice;
170    struct isl_device *isl_dev = &screen->isl_dev;
171    const struct intel_device_info *devinfo = screen->devinfo;
172 
173    /* State emission */
174    uint32_t ves_dws[1 + 2 * GENX(VERTEX_ELEMENT_STATE_length)];
175    iris_pack_command(GENX(3DSTATE_VERTEX_ELEMENTS), ves_dws, ve) {
176       ve.DWordLength = 1 + GENX(VERTEX_ELEMENT_STATE_length) * 2 -
177                            GENX(3DSTATE_VERTEX_ELEMENTS_length_bias);
178    }
179    iris_pack_state(GENX(VERTEX_ELEMENT_STATE), &ves_dws[1], ve) {
180       ve.VertexBufferIndex = 1;
181       ve.Valid = true;
182       ve.SourceElementFormat = ISL_FORMAT_R32G32B32A32_FLOAT;
183       ve.SourceElementOffset = 0;
184       ve.Component0Control = VFCOMP_STORE_SRC;
185       ve.Component1Control = VFCOMP_STORE_0;
186       ve.Component2Control = VFCOMP_STORE_0;
187       ve.Component3Control = VFCOMP_STORE_0;
188    }
189    iris_pack_state(GENX(VERTEX_ELEMENT_STATE), &ves_dws[3], ve) {
190       ve.VertexBufferIndex   = 0;
191       ve.Valid               = true;
192       ve.SourceElementFormat = ISL_FORMAT_R32G32B32_FLOAT;
193       ve.SourceElementOffset = 0;
194       ve.Component0Control   = VFCOMP_STORE_SRC;
195       ve.Component1Control   = VFCOMP_STORE_SRC;
196       ve.Component2Control   = VFCOMP_STORE_SRC;
197       ve.Component3Control   = VFCOMP_STORE_1_FP;
198    }
199 
200    iris_batch_emit(batch, ves_dws, sizeof(ves_dws));
201 
202    iris_emit_cmd(batch, GENX(3DSTATE_VF_STATISTICS), vf);
203    iris_emit_cmd(batch, GENX(3DSTATE_VF_SGVS), sgvs) {
204       sgvs.InstanceIDEnable = true;
205       sgvs.InstanceIDComponentNumber = COMP_1;
206       sgvs.InstanceIDElementOffset = 0;
207    }
208 #if GFX_VER >= 11
209    iris_emit_cmd(batch, GENX(3DSTATE_VF_SGVS_2), sgvs);
210 #endif
211    iris_emit_cmd(batch, GENX(3DSTATE_VF_INSTANCING), vfi) {
212       vfi.InstancingEnable   = false;
213       vfi.VertexElementIndex = 0;
214    }
215    iris_emit_cmd(batch, GENX(3DSTATE_VF_INSTANCING), vfi) {
216       vfi.InstancingEnable   = false;
217       vfi.VertexElementIndex = 1;
218    }
219 
220    iris_emit_cmd(batch, GENX(3DSTATE_VF_TOPOLOGY), topo) {
221       topo.PrimitiveTopologyType = _3DPRIM_RECTLIST;
222    }
223 
224    ice->shaders.urb.cfg.size[MESA_SHADER_VERTEX] = 1;
225    ice->shaders.urb.cfg.size[MESA_SHADER_TESS_CTRL] = 1;
226    ice->shaders.urb.cfg.size[MESA_SHADER_TESS_EVAL] = 1;
227    ice->shaders.urb.cfg.size[MESA_SHADER_GEOMETRY] = 1;
228    genX(emit_urb_config)(batch,
229                          false /* has_tess_eval */,
230                          false /* has_geometry */);
231 
232    iris_emit_cmd(batch, GENX(3DSTATE_PS_BLEND), ps_blend) {
233       ps_blend.HasWriteableRT = true;
234    }
235 
236    iris_emit_cmd(batch, GENX(3DSTATE_WM_DEPTH_STENCIL), wm);
237 
238 #if GFX_VER >= 12
239    iris_emit_cmd(batch, GENX(3DSTATE_DEPTH_BOUNDS), db) {
240       db.DepthBoundsTestEnable = false;
241       db.DepthBoundsTestMinValue = 0.0;
242       db.DepthBoundsTestMaxValue = 1.0;
243    }
244 #endif
245 
246    iris_emit_cmd(batch, GENX(3DSTATE_MULTISAMPLE), ms);
247    iris_emit_cmd(batch, GENX(3DSTATE_SAMPLE_MASK), sm) {
248       sm.SampleMask = 0x1;
249    }
250 
251    iris_emit_cmd(batch, GENX(3DSTATE_VS), vs);
252    iris_emit_cmd(batch, GENX(3DSTATE_HS), hs);
253    iris_emit_cmd(batch, GENX(3DSTATE_TE), te);
254    iris_emit_cmd(batch, GENX(3DSTATE_DS), DS);
255 
256    iris_emit_cmd(batch, GENX(3DSTATE_STREAMOUT), so);
257 
258    iris_emit_cmd(batch, GENX(3DSTATE_GS), gs);
259 
260    iris_emit_cmd(batch, GENX(3DSTATE_CLIP), clip) {
261       clip.PerspectiveDivideDisable = true;
262    }
263 
264    iris_emit_cmd(batch, GENX(3DSTATE_SF), sf) {
265 #if GFX_VER >= 12
266       sf.DerefBlockSize = ice->state.urb_deref_block_size;
267 #endif
268    }
269 
270    iris_emit_cmd(batch, GENX(3DSTATE_RASTER), raster) {
271       raster.CullMode = CULLMODE_NONE;
272    }
273 
274    const struct iris_compiled_shader *shader = ice->draw.generation.shader;
275    const struct iris_fs_data *fs_data = iris_fs_data_const(shader);
276 
277    iris_emit_cmd(batch, GENX(3DSTATE_SBE), sbe) {
278       sbe.VertexURBEntryReadOffset = 1;
279       sbe.NumberofSFOutputAttributes = fs_data->num_varying_inputs;
280       sbe.VertexURBEntryReadLength = MAX2((fs_data->num_varying_inputs + 1) / 2, 1);
281       sbe.ConstantInterpolationEnable = fs_data->flat_inputs;
282       sbe.ForceVertexURBEntryReadLength = true;
283       sbe.ForceVertexURBEntryReadOffset = true;
284 #if GFX_VER >= 9
285       for (unsigned i = 0; i < 32; i++)
286          sbe.AttributeActiveComponentFormat[i] = ACF_XYZW;
287 #endif
288    }
289 
290    iris_emit_cmd(batch, GENX(3DSTATE_WM), wm) {
291       if (fs_data->has_side_effects || fs_data->uses_kill)
292          wm.ForceThreadDispatchEnable = ForceON;
293    }
294 
295    iris_emit_cmd(batch, GENX(3DSTATE_PS), ps) {
296 #if GFX_VER >= 9
297       struct brw_wm_prog_data *wm_prog_data = brw_wm_prog_data(shader->brw_prog_data);
298 #else
299       struct elk_wm_prog_data *wm_prog_data = elk_wm_prog_data(shader->elk_prog_data);
300 #endif
301       intel_set_ps_dispatch_state(&ps, devinfo, wm_prog_data,
302                                   1 /* rasterization_samples */,
303                                   0 /* msaa_flags */);
304 
305       ps.VectorMaskEnable       = fs_data->uses_vmask;
306 
307       ps.BindingTableEntryCount = GFX_VER == 9 ? 1 : 0;
308 #if GFX_VER < 20
309       ps.PushConstantEnable     = shader->nr_params > 0 ||
310                                   shader->ubo_ranges[0].length;
311 #endif
312 
313 #if GFX_VER >= 9
314       ps.DispatchGRFStartRegisterForConstantSetupData0 =
315          brw_wm_prog_data_dispatch_grf_start_reg(wm_prog_data, ps, 0);
316       ps.DispatchGRFStartRegisterForConstantSetupData1 =
317          brw_wm_prog_data_dispatch_grf_start_reg(wm_prog_data, ps, 1);
318 #if GFX_VER < 20
319       ps.DispatchGRFStartRegisterForConstantSetupData2 =
320          brw_wm_prog_data_dispatch_grf_start_reg(wm_prog_data, ps, 2);
321 #endif
322 
323       ps.KernelStartPointer0 = KSP(ice->draw.generation.shader) +
324          brw_wm_prog_data_prog_offset(wm_prog_data, ps, 0);
325       ps.KernelStartPointer1 = KSP(ice->draw.generation.shader) +
326          brw_wm_prog_data_prog_offset(wm_prog_data, ps, 1);
327 #if GFX_VER < 20
328       ps.KernelStartPointer2 = KSP(ice->draw.generation.shader) +
329          brw_wm_prog_data_prog_offset(wm_prog_data, ps, 2);
330 #endif
331 #else
332       ps.DispatchGRFStartRegisterForConstantSetupData0 =
333          elk_wm_prog_data_dispatch_grf_start_reg(wm_prog_data, ps, 0);
334       ps.DispatchGRFStartRegisterForConstantSetupData1 =
335          elk_wm_prog_data_dispatch_grf_start_reg(wm_prog_data, ps, 1);
336       ps.DispatchGRFStartRegisterForConstantSetupData2 =
337          elk_wm_prog_data_dispatch_grf_start_reg(wm_prog_data, ps, 2);
338 
339       ps.KernelStartPointer0 = KSP(ice->draw.generation.shader) +
340          elk_wm_prog_data_prog_offset(wm_prog_data, ps, 0);
341       ps.KernelStartPointer1 = KSP(ice->draw.generation.shader) +
342          elk_wm_prog_data_prog_offset(wm_prog_data, ps, 1);
343       ps.KernelStartPointer2 = KSP(ice->draw.generation.shader) +
344          elk_wm_prog_data_prog_offset(wm_prog_data, ps, 2);
345 #endif
346 
347       ps.MaximumNumberofThreadsPerPSD = devinfo->max_threads_per_psd - 1;
348    }
349 
350    iris_emit_cmd(batch, GENX(3DSTATE_PS_EXTRA), psx) {
351       psx.PixelShaderValid = true;
352 #if GFX_VER < 20
353       psx.AttributeEnable = fs_data->num_varying_inputs > 0;
354 #endif
355       psx.PixelShaderIsPerSample = fs_data->is_per_sample;
356       psx.PixelShaderComputedDepthMode = fs_data->computed_depth_mode;
357 #if GFX_VER >= 9
358 #if GFX_VER >= 20
359       assert(!fs_data->pulls_bary);
360 #else
361       psx.PixelShaderPullsBary = fs_data->pulls_bary;
362 #endif
363       psx.PixelShaderComputesStencil = fs_data->computed_stencil;
364 #endif
365       psx.PixelShaderHasUAV = GFX_VER == 8;
366    }
367 
368    iris_emit_cmd(batch, GENX(3DSTATE_VIEWPORT_STATE_POINTERS_CC), cc) {
369       uint32_t cc_vp_address;
370       uint32_t *cc_vp_map =
371          stream_state(batch, ice->state.dynamic_uploader,
372                       &ice->state.last_res.cc_vp,
373                       4 * GENX(CC_VIEWPORT_length), 32, &cc_vp_address);
374 
375       iris_pack_state(GENX(CC_VIEWPORT), cc_vp_map, ccv) {
376          ccv.MinimumDepth = 0.0f;
377          ccv.MaximumDepth = 1.0f;
378       }
379       cc.CCViewportPointer = cc_vp_address;
380    }
381 
382 #if GFX_VER >= 12
383    /* Disable Primitive Replication. */
384    iris_emit_cmd(batch, GENX(3DSTATE_PRIMITIVE_REPLICATION), pr);
385 #endif
386 
387 #if GFX_VERx10 == 125
388    /* DG2: Wa_22011440098
389     * MTL: Wa_18022330953
390     *
391     * In 3D mode, after programming push constant alloc command immediately
392     * program push constant command(ZERO length) without any commit between
393     * them.
394     *
395     * Note that Wa_16011448509 isn't needed here as all address bits are zero.
396     */
397    iris_emit_cmd(batch, GENX(3DSTATE_CONSTANT_ALL), c) {
398       /* Update empty push constants for all stages (bitmask = 11111b) */
399       c.ShaderUpdateEnable = 0x1f;
400       c.MOCS = iris_mocs(NULL, isl_dev, 0);
401    }
402 #endif
403 
404    float x0 = 0.0f, x1 = MIN2(ring_count, 8192);
405    float y0 = 0.0f, y1 = DIV_ROUND_UP(ring_count, 8192);
406    float z = 0.0f;
407 
408    float *vertices =
409       upload_state(batch, ice->state.dynamic_uploader,
410                    &ice->draw.generation.vertices,
411                    ALIGN(9 * sizeof(float), 8), 8);
412 
413    vertices[0] = x1; vertices[1] = y1; vertices[2] = z; /* v0 */
414    vertices[3] = x0; vertices[4] = y1; vertices[5] = z; /* v1 */
415    vertices[6] = x0; vertices[7] = y0; vertices[8] = z; /* v2 */
416 
417 
418    uint32_t vbs_dws[1 + GENX(VERTEX_BUFFER_STATE_length)];
419    iris_pack_command(GENX(3DSTATE_VERTEX_BUFFERS), vbs_dws, vbs) {
420       vbs.DWordLength = ARRAY_SIZE(vbs_dws) -
421                         GENX(3DSTATE_VERTEX_BUFFERS_length_bias);
422    }
423    _iris_pack_state(batch, GENX(VERTEX_BUFFER_STATE), &vbs_dws[1], vb) {
424       vb.VertexBufferIndex     = 0;
425       vb.AddressModifyEnable   = true;
426       vb.BufferStartingAddress = ro_bo(iris_resource_bo(ice->draw.generation.vertices.res),
427                                        ice->draw.generation.vertices.offset);
428       vb.BufferPitch           = 3 * sizeof(float);
429       vb.BufferSize            = 9 * sizeof(float);
430       vb.MOCS                  = iris_mocs(NULL, isl_dev, ISL_SURF_USAGE_VERTEX_BUFFER_BIT);
431 #if GFX_VER >= 12
432       vb.L3BypassDisable       = true;
433 #endif
434    }
435    iris_batch_emit(batch, vbs_dws, sizeof(vbs_dws));
436 
437 #if GFX_VERx10 > 120
438    uint32_t const_dws[GENX(3DSTATE_CONSTANT_ALL_length) +
439                       GENX(3DSTATE_CONSTANT_ALL_DATA_length)];
440 
441    iris_pack_command(GENX(3DSTATE_CONSTANT_ALL), const_dws, all) {
442       all.DWordLength = ARRAY_SIZE(const_dws) -
443          GENX(3DSTATE_CONSTANT_ALL_length_bias);
444       all.ShaderUpdateEnable = 1 << MESA_SHADER_FRAGMENT;
445       all.MOCS = isl_mocs(isl_dev, 0, false);
446       all.PointerBufferMask = 0x1;
447    }
448    _iris_pack_state(batch, GENX(3DSTATE_CONSTANT_ALL_DATA),
449                     &const_dws[GENX(3DSTATE_CONSTANT_ALL_length)], data) {
450       data.PointerToConstantBuffer = params_addr;
451       data.ConstantBufferReadLength = DIV_ROUND_UP(params_size, 32);
452    }
453    iris_batch_emit(batch, const_dws, sizeof(const_dws));
454 #else
455    /* The Skylake PRM contains the following restriction:
456     *
457     *    "The driver must ensure The following case does not occur without a
458     *     flush to the 3D engine: 3DSTATE_CONSTANT_* with buffer 3 read length
459     *     equal to zero committed followed by a 3DSTATE_CONSTANT_* with buffer
460     *     0 read length not equal to zero committed."
461     *
462     * To avoid this, we program the highest slot.
463     */
464    iris_emit_cmd(batch, GENX(3DSTATE_CONSTANT_PS), c) {
465 #if GFX_VER > 8
466       c.MOCS = iris_mocs(NULL, isl_dev, ISL_SURF_USAGE_CONSTANT_BUFFER_BIT);
467 #endif
468       c.ConstantBody.ReadLength[3] = DIV_ROUND_UP(params_size, 32);
469       c.ConstantBody.Buffer[3] = params_addr;
470    }
471 #endif
472 
473 #if GFX_VER <= 9
474    /* Gfx9 requires 3DSTATE_BINDING_TABLE_POINTERS_XS to be re-emitted in
475     * order to commit constants. TODO: Investigate "Disable Gather at Set
476     * Shader" to go back to legacy mode...
477     *
478     * The null writes of the generation shader also appear to disturb the next
479     * RT writes, so we choose to reemit the binding table to a null RT on Gfx8
480     * too.
481     */
482    struct iris_binder *binder = &ice->state.binder;
483    iris_emit_cmd(batch, GENX(3DSTATE_BINDING_TABLE_POINTERS_PS), ptr) {
484       ptr.PointertoPSBindingTable =
485          binder->bt_offset[MESA_SHADER_FRAGMENT] >> IRIS_BT_OFFSET_SHIFT;
486    }
487    uint32_t *bt_map = binder->map + binder->bt_offset[MESA_SHADER_FRAGMENT];
488    uint32_t surf_base_offset = binder->bo->address;
489    bt_map[0] = ice->state.null_fb.offset - surf_base_offset;
490 #endif
491 
492    genX(maybe_emit_breakpoint)(batch, true);
493 
494    iris_emit_cmd(batch, GENX(3DPRIMITIVE), prim) {
495       prim.VertexAccessType         = SEQUENTIAL;
496       prim.PrimitiveTopologyType    = _3DPRIM_RECTLIST;
497       prim.VertexCountPerInstance   = 3;
498       prim.InstanceCount            = 1;
499    }
500 
501 
502    /* We've smashed all state compared to what the normal 3D pipeline
503     * rendering tracks for GL.
504     */
505 
506    uint64_t skip_bits = (IRIS_DIRTY_POLYGON_STIPPLE |
507                          IRIS_DIRTY_SO_BUFFERS |
508                          IRIS_DIRTY_SO_DECL_LIST |
509                          IRIS_DIRTY_LINE_STIPPLE |
510                          IRIS_ALL_DIRTY_FOR_COMPUTE |
511                          IRIS_DIRTY_SCISSOR_RECT |
512                          IRIS_DIRTY_VF);
513    /* Wa_14016820455
514     * On Gfx 12.5 platforms, the SF_CL_VIEWPORT pointer can be invalidated
515     * likely by a read cache invalidation when clipping is disabled, so we
516     * don't skip its dirty bit here, in order to reprogram it.
517     */
518    if (GFX_VERx10 != 125)
519       skip_bits |= IRIS_DIRTY_SF_CL_VIEWPORT;
520 
521    uint64_t skip_stage_bits = (IRIS_ALL_STAGE_DIRTY_FOR_COMPUTE |
522                                IRIS_STAGE_DIRTY_UNCOMPILED_VS |
523                                IRIS_STAGE_DIRTY_UNCOMPILED_TCS |
524                                IRIS_STAGE_DIRTY_UNCOMPILED_TES |
525                                IRIS_STAGE_DIRTY_UNCOMPILED_GS |
526                                IRIS_STAGE_DIRTY_UNCOMPILED_FS |
527                                IRIS_STAGE_DIRTY_SAMPLER_STATES_VS |
528                                IRIS_STAGE_DIRTY_SAMPLER_STATES_TCS |
529                                IRIS_STAGE_DIRTY_SAMPLER_STATES_TES |
530                                IRIS_STAGE_DIRTY_SAMPLER_STATES_GS);
531 
532    if (!ice->shaders.prog[MESA_SHADER_TESS_EVAL]) {
533       /* Generation disabled tessellation, but it was already off anyway */
534       skip_stage_bits |= IRIS_STAGE_DIRTY_TCS |
535                          IRIS_STAGE_DIRTY_TES |
536                          IRIS_STAGE_DIRTY_CONSTANTS_TCS |
537                          IRIS_STAGE_DIRTY_CONSTANTS_TES |
538                          IRIS_STAGE_DIRTY_BINDINGS_TCS |
539                          IRIS_STAGE_DIRTY_BINDINGS_TES;
540    }
541 
542    if (!ice->shaders.prog[MESA_SHADER_GEOMETRY]) {
543       /* Generation disabled geometry shaders, but it was already off
544        * anyway
545        */
546       skip_stage_bits |= IRIS_STAGE_DIRTY_GS |
547                          IRIS_STAGE_DIRTY_CONSTANTS_GS |
548                          IRIS_STAGE_DIRTY_BINDINGS_GS;
549    }
550 
551    ice->state.dirty |= ~skip_bits;
552    ice->state.stage_dirty |= ~skip_stage_bits;
553 
554    for (int i = 0; i < ARRAY_SIZE(ice->shaders.urb.cfg.size); i++)
555       ice->shaders.urb.cfg.size[i] = 0;
556 
557 #if GFX_VER <= 9
558    /* Now reupdate the binding tables with the new offsets for the actual
559     * application shaders.
560     */
561    iris_binder_reserve_3d(ice);
562    screen->vtbl.update_binder_address(batch, binder);
563 #endif
564 }
565 
566 #define RING_SIZE (128 * 1024)
567 
568 static void
ensure_ring_bo(struct iris_context * ice,struct iris_screen * screen)569 ensure_ring_bo(struct iris_context *ice, struct iris_screen *screen)
570 {
571    struct iris_bufmgr *bufmgr = screen->bufmgr;
572 
573    if (ice->draw.generation.ring_bo != NULL)
574       return;
575 
576    ice->draw.generation.ring_bo =
577       iris_bo_alloc(bufmgr, "gen ring",
578                     RING_SIZE, 8, IRIS_MEMZONE_OTHER,
579                     BO_ALLOC_NO_SUBALLOC);
580    iris_get_backing_bo(ice->draw.generation.ring_bo)->real.capture = true;
581 }
582 
583 struct iris_gen_indirect_params *
genX(emit_indirect_generate)584 genX(emit_indirect_generate)(struct iris_batch *batch,
585                              const struct pipe_draw_info *draw,
586                              const struct pipe_draw_indirect_info *indirect,
587                              const struct pipe_draw_start_count_bias *sc,
588                              struct iris_address *out_params_addr)
589 {
590    struct iris_screen *screen = batch->screen;
591    struct iris_context *ice = batch->ice;
592 
593    iris_ensure_indirect_generation_shader(batch);
594    ensure_ring_bo(ice, screen);
595 
596    const size_t struct_stride = draw->index_size > 0 ?
597       sizeof(uint32_t) * 5 :
598       sizeof(uint32_t) * 4;
599    unsigned cmd_stride = 0;
600    if (ice->state.vs_uses_draw_params ||
601        ice->state.vs_uses_derived_draw_params) {
602       cmd_stride += 4; /* 3DSTATE_VERTEX_BUFFERS */
603 
604       if (ice->state.vs_uses_draw_params)
605          cmd_stride += 4 * GENX(VERTEX_BUFFER_STATE_length);
606 
607       if (ice->state.vs_uses_derived_draw_params)
608          cmd_stride += 4 * GENX(VERTEX_BUFFER_STATE_length);
609    }
610    cmd_stride += 4 * GENX(3DPRIMITIVE_length);
611 
612    const unsigned setup_dws =
613 #if GFX_VER >= 12
614       GENX(MI_ARB_CHECK_length) +
615 #endif
616       GENX(MI_BATCH_BUFFER_START_length);
617    const unsigned ring_count =
618       (RING_SIZE - 4 * setup_dws) /
619       (cmd_stride + 4 * 2 /* draw_id, is_indexed_draw */);
620 
621    uint32_t params_size = align(sizeof(struct iris_gen_indirect_params), 32);
622    struct iris_gen_indirect_params *params =
623       upload_state(batch, ice->ctx.const_uploader,
624                    &ice->draw.generation.params,
625                    params_size, 64);
626    *out_params_addr =
627       ro_bo(iris_resource_bo(ice->draw.generation.params.res),
628             ice->draw.generation.params.offset);
629 
630    iris_use_pinned_bo(batch,
631                       iris_resource_bo(indirect->buffer),
632                       false, IRIS_DOMAIN_NONE);
633    if (indirect->indirect_draw_count) {
634       iris_use_pinned_bo(batch,
635                          iris_resource_bo(indirect->indirect_draw_count),
636                          false, IRIS_DOMAIN_NONE);
637    }
638    iris_use_pinned_bo(batch, ice->draw.generation.ring_bo,
639                       false, IRIS_DOMAIN_NONE);
640 
641    *params = (struct iris_gen_indirect_params) {
642       .generated_cmds_addr  = ice->draw.generation.ring_bo->address,
643       .ring_count           = ring_count,
644       .draw_id_addr         = ice->draw.generation.ring_bo->address +
645                               ring_count * cmd_stride +
646                               4 * GENX(MI_BATCH_BUFFER_START_length),
647       .draw_count_addr      = indirect->indirect_draw_count ?
648                               (iris_resource_bo(indirect->indirect_draw_count)->address +
649                                indirect->indirect_draw_count_offset) : 0,
650       .indirect_data_addr   = iris_resource_bo(indirect->buffer)->address +
651                               indirect->offset,
652       .indirect_data_stride = indirect->stride == 0 ?
653                               struct_stride : indirect->stride,
654       .max_draw_count       = indirect->draw_count,
655       .flags                = (draw->index_size > 0 ? ANV_GENERATED_FLAG_INDEXED : 0) |
656                               (ice->state.predicate == IRIS_PREDICATE_STATE_USE_BIT ?
657                                ANV_GENERATED_FLAG_PREDICATED : 0) |
658                               (ice->state.vs_uses_draw_params ?
659                                ANV_GENERATED_FLAG_BASE : 0) |
660                               (ice->state.vs_uses_derived_draw_params ?
661                                ANV_GENERATED_FLAG_DRAWID : 0) |
662                               (iris_mocs(NULL, &screen->isl_dev,
663                                          ISL_SURF_USAGE_VERTEX_BUFFER_BIT) << 8) |
664                               ((cmd_stride / 4) << 16) |
665                               util_bitcount64(ice->state.bound_vertex_buffers) << 24,
666    };
667 
668    genX(maybe_emit_breakpoint)(batch, true);
669 
670    emit_indirect_generate_draw(batch, *out_params_addr, params_size,
671                                MIN2(ring_count, indirect->draw_count));
672 
673    genX(emit_3dprimitive_was)(batch, indirect, ice->state.prim_mode, sc->count);
674    genX(maybe_emit_breakpoint)(batch, false);
675 
676 
677    return params;
678 }
679