1 /* Copyright © 2023 Intel Corporation
2 * SPDX-License-Identifier: MIT
3 */
4
5 #include <stdio.h>
6 #include <errno.h>
7
8 #ifdef HAVE_VALGRIND
9 #include <valgrind.h>
10 #include <memcheck.h>
11 #define VG(x) x
12 #else
13 #define VG(x)
14 #endif
15
16 #include "pipe/p_defines.h"
17 #include "pipe/p_state.h"
18 #include "pipe/p_context.h"
19 #include "pipe/p_screen.h"
20 #include "util/u_upload_mgr.h"
21 #include "compiler/nir/nir_builder.h"
22 #include "compiler/nir/nir_serialize.h"
23 #include "intel/common/intel_aux_map.h"
24 #include "intel/common/intel_l3_config.h"
25 #include "intel/common/intel_sample_positions.h"
26 #include "intel/ds/intel_tracepoints.h"
27 #include "iris_batch.h"
28 #include "iris_context.h"
29 #include "iris_defines.h"
30 #include "iris_pipe.h"
31 #include "iris_resource.h"
32 #include "iris_utrace.h"
33
34 #include "iris_genx_macros.h"
35
36 #if GFX_VER >= 9
37 #include "intel/compiler/brw_compiler.h"
38 #include "intel/common/intel_genX_state_brw.h"
39 #else
40 #include "intel/compiler/elk/elk_compiler.h"
41 #include "intel/common/intel_genX_state_elk.h"
42 #endif
43
44 #include "libintel_shaders.h"
45
46 #if GFX_VERx10 == 80
47 # include "intel_gfx8_shaders_code.h"
48 #elif GFX_VERx10 == 90
49 # include "intel_gfx9_shaders_code.h"
50 #elif GFX_VERx10 == 110
51 # include "intel_gfx11_shaders_code.h"
52 #elif GFX_VERx10 == 120
53 # include "intel_gfx12_shaders_code.h"
54 #elif GFX_VERx10 == 125
55 # include "intel_gfx125_shaders_code.h"
56 #elif GFX_VERx10 == 200
57 # include "intel_gfx20_shaders_code.h"
58 #else
59 # error "Unsupported generation"
60 #endif
61
62 #define load_param(b, bit_size, struct_name, field_name) \
63 nir_load_uniform(b, 1, bit_size, nir_imm_int(b, 0), \
64 .base = offsetof(struct_name, field_name), \
65 .range = bit_size / 8)
66
67 static nir_def *
load_fragment_index(nir_builder * b)68 load_fragment_index(nir_builder *b)
69 {
70 nir_def *pos_in = nir_f2i32(b, nir_trim_vector(b, nir_load_frag_coord(b), 2));
71 return nir_iadd(b,
72 nir_imul_imm(b, nir_channel(b, pos_in, 1), 8192),
73 nir_channel(b, pos_in, 0));
74 }
75
76 static nir_shader *
load_shader_lib(struct iris_screen * screen,void * mem_ctx)77 load_shader_lib(struct iris_screen *screen, void *mem_ctx)
78 {
79 const nir_shader_compiler_options *nir_options =
80 #if GFX_VER >= 9
81 screen->brw->nir_options[MESA_SHADER_KERNEL];
82 #else
83 screen->elk->nir_options[MESA_SHADER_KERNEL];
84 #endif
85
86 struct blob_reader blob;
87 blob_reader_init(&blob, (void *)genX(intel_shaders_nir),
88 sizeof(genX(intel_shaders_nir)));
89 return nir_deserialize(mem_ctx, nir_options, &blob);
90 }
91
92 static unsigned
iris_call_generation_shader(struct iris_screen * screen,nir_builder * b)93 iris_call_generation_shader(struct iris_screen *screen, nir_builder *b)
94 {
95 genX(libiris_write_draw)(
96 b,
97 load_param(b, 64, struct iris_gen_indirect_params, generated_cmds_addr),
98 load_param(b, 64, struct iris_gen_indirect_params, indirect_data_addr),
99 load_param(b, 64, struct iris_gen_indirect_params, draw_id_addr),
100 load_param(b, 32, struct iris_gen_indirect_params, indirect_data_stride),
101 load_param(b, 64, struct iris_gen_indirect_params, draw_count_addr),
102 load_param(b, 32, struct iris_gen_indirect_params, draw_base),
103 load_param(b, 32, struct iris_gen_indirect_params, max_draw_count),
104 load_param(b, 32, struct iris_gen_indirect_params, flags),
105 load_param(b, 32, struct iris_gen_indirect_params, ring_count),
106 load_param(b, 64, struct iris_gen_indirect_params, gen_addr),
107 load_param(b, 64, struct iris_gen_indirect_params, end_addr),
108 load_fragment_index(b));
109 return sizeof(struct iris_gen_indirect_params);
110 }
111
112 void
genX(init_screen_gen_state)113 genX(init_screen_gen_state)(struct iris_screen *screen)
114 {
115 screen->vtbl.load_shader_lib = load_shader_lib;
116 screen->vtbl.call_generation_shader = iris_call_generation_shader;
117 }
118
119 /**
120 * Stream out temporary/short-lived state.
121 *
122 * This allocates space, pins the BO, and includes the BO address in the
123 * returned offset (which works because all state lives in 32-bit memory
124 * zones).
125 */
126 static void *
upload_state(struct iris_batch * batch,struct u_upload_mgr * uploader,struct iris_state_ref * ref,unsigned size,unsigned alignment)127 upload_state(struct iris_batch *batch,
128 struct u_upload_mgr *uploader,
129 struct iris_state_ref *ref,
130 unsigned size,
131 unsigned alignment)
132 {
133 void *p = NULL;
134 u_upload_alloc(uploader, 0, size, alignment, &ref->offset, &ref->res, &p);
135 iris_use_pinned_bo(batch, iris_resource_bo(ref->res), false, IRIS_DOMAIN_NONE);
136 return p;
137 }
138
139 static uint32_t *
stream_state(struct iris_batch * batch,struct u_upload_mgr * uploader,struct pipe_resource ** out_res,unsigned size,unsigned alignment,uint32_t * out_offset)140 stream_state(struct iris_batch *batch,
141 struct u_upload_mgr *uploader,
142 struct pipe_resource **out_res,
143 unsigned size,
144 unsigned alignment,
145 uint32_t *out_offset)
146 {
147 void *ptr = NULL;
148
149 u_upload_alloc(uploader, 0, size, alignment, out_offset, out_res, &ptr);
150
151 struct iris_bo *bo = iris_resource_bo(*out_res);
152 iris_use_pinned_bo(batch, bo, false, IRIS_DOMAIN_NONE);
153
154 iris_record_state_size(batch->state_sizes,
155 bo->address + *out_offset, size);
156
157 *out_offset += iris_bo_offset_from_base_address(bo);
158
159 return ptr;
160 }
161
162 static void
emit_indirect_generate_draw(struct iris_batch * batch,struct iris_address params_addr,unsigned params_size,unsigned ring_count)163 emit_indirect_generate_draw(struct iris_batch *batch,
164 struct iris_address params_addr,
165 unsigned params_size,
166 unsigned ring_count)
167 {
168 struct iris_screen *screen = batch->screen;
169 struct iris_context *ice = batch->ice;
170 struct isl_device *isl_dev = &screen->isl_dev;
171 const struct intel_device_info *devinfo = screen->devinfo;
172
173 /* State emission */
174 uint32_t ves_dws[1 + 2 * GENX(VERTEX_ELEMENT_STATE_length)];
175 iris_pack_command(GENX(3DSTATE_VERTEX_ELEMENTS), ves_dws, ve) {
176 ve.DWordLength = 1 + GENX(VERTEX_ELEMENT_STATE_length) * 2 -
177 GENX(3DSTATE_VERTEX_ELEMENTS_length_bias);
178 }
179 iris_pack_state(GENX(VERTEX_ELEMENT_STATE), &ves_dws[1], ve) {
180 ve.VertexBufferIndex = 1;
181 ve.Valid = true;
182 ve.SourceElementFormat = ISL_FORMAT_R32G32B32A32_FLOAT;
183 ve.SourceElementOffset = 0;
184 ve.Component0Control = VFCOMP_STORE_SRC;
185 ve.Component1Control = VFCOMP_STORE_0;
186 ve.Component2Control = VFCOMP_STORE_0;
187 ve.Component3Control = VFCOMP_STORE_0;
188 }
189 iris_pack_state(GENX(VERTEX_ELEMENT_STATE), &ves_dws[3], ve) {
190 ve.VertexBufferIndex = 0;
191 ve.Valid = true;
192 ve.SourceElementFormat = ISL_FORMAT_R32G32B32_FLOAT;
193 ve.SourceElementOffset = 0;
194 ve.Component0Control = VFCOMP_STORE_SRC;
195 ve.Component1Control = VFCOMP_STORE_SRC;
196 ve.Component2Control = VFCOMP_STORE_SRC;
197 ve.Component3Control = VFCOMP_STORE_1_FP;
198 }
199
200 iris_batch_emit(batch, ves_dws, sizeof(ves_dws));
201
202 iris_emit_cmd(batch, GENX(3DSTATE_VF_STATISTICS), vf);
203 iris_emit_cmd(batch, GENX(3DSTATE_VF_SGVS), sgvs) {
204 sgvs.InstanceIDEnable = true;
205 sgvs.InstanceIDComponentNumber = COMP_1;
206 sgvs.InstanceIDElementOffset = 0;
207 }
208 #if GFX_VER >= 11
209 iris_emit_cmd(batch, GENX(3DSTATE_VF_SGVS_2), sgvs);
210 #endif
211 iris_emit_cmd(batch, GENX(3DSTATE_VF_INSTANCING), vfi) {
212 vfi.InstancingEnable = false;
213 vfi.VertexElementIndex = 0;
214 }
215 iris_emit_cmd(batch, GENX(3DSTATE_VF_INSTANCING), vfi) {
216 vfi.InstancingEnable = false;
217 vfi.VertexElementIndex = 1;
218 }
219
220 iris_emit_cmd(batch, GENX(3DSTATE_VF_TOPOLOGY), topo) {
221 topo.PrimitiveTopologyType = _3DPRIM_RECTLIST;
222 }
223
224 ice->shaders.urb.cfg.size[MESA_SHADER_VERTEX] = 1;
225 ice->shaders.urb.cfg.size[MESA_SHADER_TESS_CTRL] = 1;
226 ice->shaders.urb.cfg.size[MESA_SHADER_TESS_EVAL] = 1;
227 ice->shaders.urb.cfg.size[MESA_SHADER_GEOMETRY] = 1;
228 genX(emit_urb_config)(batch,
229 false /* has_tess_eval */,
230 false /* has_geometry */);
231
232 iris_emit_cmd(batch, GENX(3DSTATE_PS_BLEND), ps_blend) {
233 ps_blend.HasWriteableRT = true;
234 }
235
236 iris_emit_cmd(batch, GENX(3DSTATE_WM_DEPTH_STENCIL), wm);
237
238 #if GFX_VER >= 12
239 iris_emit_cmd(batch, GENX(3DSTATE_DEPTH_BOUNDS), db) {
240 db.DepthBoundsTestEnable = false;
241 db.DepthBoundsTestMinValue = 0.0;
242 db.DepthBoundsTestMaxValue = 1.0;
243 }
244 #endif
245
246 iris_emit_cmd(batch, GENX(3DSTATE_MULTISAMPLE), ms);
247 iris_emit_cmd(batch, GENX(3DSTATE_SAMPLE_MASK), sm) {
248 sm.SampleMask = 0x1;
249 }
250
251 iris_emit_cmd(batch, GENX(3DSTATE_VS), vs);
252 iris_emit_cmd(batch, GENX(3DSTATE_HS), hs);
253 iris_emit_cmd(batch, GENX(3DSTATE_TE), te);
254 iris_emit_cmd(batch, GENX(3DSTATE_DS), DS);
255
256 iris_emit_cmd(batch, GENX(3DSTATE_STREAMOUT), so);
257
258 iris_emit_cmd(batch, GENX(3DSTATE_GS), gs);
259
260 iris_emit_cmd(batch, GENX(3DSTATE_CLIP), clip) {
261 clip.PerspectiveDivideDisable = true;
262 }
263
264 iris_emit_cmd(batch, GENX(3DSTATE_SF), sf) {
265 #if GFX_VER >= 12
266 sf.DerefBlockSize = ice->state.urb_deref_block_size;
267 #endif
268 }
269
270 iris_emit_cmd(batch, GENX(3DSTATE_RASTER), raster) {
271 raster.CullMode = CULLMODE_NONE;
272 }
273
274 const struct iris_compiled_shader *shader = ice->draw.generation.shader;
275 const struct iris_fs_data *fs_data = iris_fs_data_const(shader);
276
277 iris_emit_cmd(batch, GENX(3DSTATE_SBE), sbe) {
278 sbe.VertexURBEntryReadOffset = 1;
279 sbe.NumberofSFOutputAttributes = fs_data->num_varying_inputs;
280 sbe.VertexURBEntryReadLength = MAX2((fs_data->num_varying_inputs + 1) / 2, 1);
281 sbe.ConstantInterpolationEnable = fs_data->flat_inputs;
282 sbe.ForceVertexURBEntryReadLength = true;
283 sbe.ForceVertexURBEntryReadOffset = true;
284 #if GFX_VER >= 9
285 for (unsigned i = 0; i < 32; i++)
286 sbe.AttributeActiveComponentFormat[i] = ACF_XYZW;
287 #endif
288 }
289
290 iris_emit_cmd(batch, GENX(3DSTATE_WM), wm) {
291 if (fs_data->has_side_effects || fs_data->uses_kill)
292 wm.ForceThreadDispatchEnable = ForceON;
293 }
294
295 iris_emit_cmd(batch, GENX(3DSTATE_PS), ps) {
296 #if GFX_VER >= 9
297 struct brw_wm_prog_data *wm_prog_data = brw_wm_prog_data(shader->brw_prog_data);
298 #else
299 struct elk_wm_prog_data *wm_prog_data = elk_wm_prog_data(shader->elk_prog_data);
300 #endif
301 intel_set_ps_dispatch_state(&ps, devinfo, wm_prog_data,
302 1 /* rasterization_samples */,
303 0 /* msaa_flags */);
304
305 ps.VectorMaskEnable = fs_data->uses_vmask;
306
307 ps.BindingTableEntryCount = GFX_VER == 9 ? 1 : 0;
308 #if GFX_VER < 20
309 ps.PushConstantEnable = shader->nr_params > 0 ||
310 shader->ubo_ranges[0].length;
311 #endif
312
313 #if GFX_VER >= 9
314 ps.DispatchGRFStartRegisterForConstantSetupData0 =
315 brw_wm_prog_data_dispatch_grf_start_reg(wm_prog_data, ps, 0);
316 ps.DispatchGRFStartRegisterForConstantSetupData1 =
317 brw_wm_prog_data_dispatch_grf_start_reg(wm_prog_data, ps, 1);
318 #if GFX_VER < 20
319 ps.DispatchGRFStartRegisterForConstantSetupData2 =
320 brw_wm_prog_data_dispatch_grf_start_reg(wm_prog_data, ps, 2);
321 #endif
322
323 ps.KernelStartPointer0 = KSP(ice->draw.generation.shader) +
324 brw_wm_prog_data_prog_offset(wm_prog_data, ps, 0);
325 ps.KernelStartPointer1 = KSP(ice->draw.generation.shader) +
326 brw_wm_prog_data_prog_offset(wm_prog_data, ps, 1);
327 #if GFX_VER < 20
328 ps.KernelStartPointer2 = KSP(ice->draw.generation.shader) +
329 brw_wm_prog_data_prog_offset(wm_prog_data, ps, 2);
330 #endif
331 #else
332 ps.DispatchGRFStartRegisterForConstantSetupData0 =
333 elk_wm_prog_data_dispatch_grf_start_reg(wm_prog_data, ps, 0);
334 ps.DispatchGRFStartRegisterForConstantSetupData1 =
335 elk_wm_prog_data_dispatch_grf_start_reg(wm_prog_data, ps, 1);
336 ps.DispatchGRFStartRegisterForConstantSetupData2 =
337 elk_wm_prog_data_dispatch_grf_start_reg(wm_prog_data, ps, 2);
338
339 ps.KernelStartPointer0 = KSP(ice->draw.generation.shader) +
340 elk_wm_prog_data_prog_offset(wm_prog_data, ps, 0);
341 ps.KernelStartPointer1 = KSP(ice->draw.generation.shader) +
342 elk_wm_prog_data_prog_offset(wm_prog_data, ps, 1);
343 ps.KernelStartPointer2 = KSP(ice->draw.generation.shader) +
344 elk_wm_prog_data_prog_offset(wm_prog_data, ps, 2);
345 #endif
346
347 ps.MaximumNumberofThreadsPerPSD = devinfo->max_threads_per_psd - 1;
348 }
349
350 iris_emit_cmd(batch, GENX(3DSTATE_PS_EXTRA), psx) {
351 psx.PixelShaderValid = true;
352 #if GFX_VER < 20
353 psx.AttributeEnable = fs_data->num_varying_inputs > 0;
354 #endif
355 psx.PixelShaderIsPerSample = fs_data->is_per_sample;
356 psx.PixelShaderComputedDepthMode = fs_data->computed_depth_mode;
357 #if GFX_VER >= 9
358 #if GFX_VER >= 20
359 assert(!fs_data->pulls_bary);
360 #else
361 psx.PixelShaderPullsBary = fs_data->pulls_bary;
362 #endif
363 psx.PixelShaderComputesStencil = fs_data->computed_stencil;
364 #endif
365 psx.PixelShaderHasUAV = GFX_VER == 8;
366 }
367
368 iris_emit_cmd(batch, GENX(3DSTATE_VIEWPORT_STATE_POINTERS_CC), cc) {
369 uint32_t cc_vp_address;
370 uint32_t *cc_vp_map =
371 stream_state(batch, ice->state.dynamic_uploader,
372 &ice->state.last_res.cc_vp,
373 4 * GENX(CC_VIEWPORT_length), 32, &cc_vp_address);
374
375 iris_pack_state(GENX(CC_VIEWPORT), cc_vp_map, ccv) {
376 ccv.MinimumDepth = 0.0f;
377 ccv.MaximumDepth = 1.0f;
378 }
379 cc.CCViewportPointer = cc_vp_address;
380 }
381
382 #if GFX_VER >= 12
383 /* Disable Primitive Replication. */
384 iris_emit_cmd(batch, GENX(3DSTATE_PRIMITIVE_REPLICATION), pr);
385 #endif
386
387 #if GFX_VERx10 == 125
388 /* DG2: Wa_22011440098
389 * MTL: Wa_18022330953
390 *
391 * In 3D mode, after programming push constant alloc command immediately
392 * program push constant command(ZERO length) without any commit between
393 * them.
394 *
395 * Note that Wa_16011448509 isn't needed here as all address bits are zero.
396 */
397 iris_emit_cmd(batch, GENX(3DSTATE_CONSTANT_ALL), c) {
398 /* Update empty push constants for all stages (bitmask = 11111b) */
399 c.ShaderUpdateEnable = 0x1f;
400 c.MOCS = iris_mocs(NULL, isl_dev, 0);
401 }
402 #endif
403
404 float x0 = 0.0f, x1 = MIN2(ring_count, 8192);
405 float y0 = 0.0f, y1 = DIV_ROUND_UP(ring_count, 8192);
406 float z = 0.0f;
407
408 float *vertices =
409 upload_state(batch, ice->state.dynamic_uploader,
410 &ice->draw.generation.vertices,
411 ALIGN(9 * sizeof(float), 8), 8);
412
413 vertices[0] = x1; vertices[1] = y1; vertices[2] = z; /* v0 */
414 vertices[3] = x0; vertices[4] = y1; vertices[5] = z; /* v1 */
415 vertices[6] = x0; vertices[7] = y0; vertices[8] = z; /* v2 */
416
417
418 uint32_t vbs_dws[1 + GENX(VERTEX_BUFFER_STATE_length)];
419 iris_pack_command(GENX(3DSTATE_VERTEX_BUFFERS), vbs_dws, vbs) {
420 vbs.DWordLength = ARRAY_SIZE(vbs_dws) -
421 GENX(3DSTATE_VERTEX_BUFFERS_length_bias);
422 }
423 _iris_pack_state(batch, GENX(VERTEX_BUFFER_STATE), &vbs_dws[1], vb) {
424 vb.VertexBufferIndex = 0;
425 vb.AddressModifyEnable = true;
426 vb.BufferStartingAddress = ro_bo(iris_resource_bo(ice->draw.generation.vertices.res),
427 ice->draw.generation.vertices.offset);
428 vb.BufferPitch = 3 * sizeof(float);
429 vb.BufferSize = 9 * sizeof(float);
430 vb.MOCS = iris_mocs(NULL, isl_dev, ISL_SURF_USAGE_VERTEX_BUFFER_BIT);
431 #if GFX_VER >= 12
432 vb.L3BypassDisable = true;
433 #endif
434 }
435 iris_batch_emit(batch, vbs_dws, sizeof(vbs_dws));
436
437 #if GFX_VERx10 > 120
438 uint32_t const_dws[GENX(3DSTATE_CONSTANT_ALL_length) +
439 GENX(3DSTATE_CONSTANT_ALL_DATA_length)];
440
441 iris_pack_command(GENX(3DSTATE_CONSTANT_ALL), const_dws, all) {
442 all.DWordLength = ARRAY_SIZE(const_dws) -
443 GENX(3DSTATE_CONSTANT_ALL_length_bias);
444 all.ShaderUpdateEnable = 1 << MESA_SHADER_FRAGMENT;
445 all.MOCS = isl_mocs(isl_dev, 0, false);
446 all.PointerBufferMask = 0x1;
447 }
448 _iris_pack_state(batch, GENX(3DSTATE_CONSTANT_ALL_DATA),
449 &const_dws[GENX(3DSTATE_CONSTANT_ALL_length)], data) {
450 data.PointerToConstantBuffer = params_addr;
451 data.ConstantBufferReadLength = DIV_ROUND_UP(params_size, 32);
452 }
453 iris_batch_emit(batch, const_dws, sizeof(const_dws));
454 #else
455 /* The Skylake PRM contains the following restriction:
456 *
457 * "The driver must ensure The following case does not occur without a
458 * flush to the 3D engine: 3DSTATE_CONSTANT_* with buffer 3 read length
459 * equal to zero committed followed by a 3DSTATE_CONSTANT_* with buffer
460 * 0 read length not equal to zero committed."
461 *
462 * To avoid this, we program the highest slot.
463 */
464 iris_emit_cmd(batch, GENX(3DSTATE_CONSTANT_PS), c) {
465 #if GFX_VER > 8
466 c.MOCS = iris_mocs(NULL, isl_dev, ISL_SURF_USAGE_CONSTANT_BUFFER_BIT);
467 #endif
468 c.ConstantBody.ReadLength[3] = DIV_ROUND_UP(params_size, 32);
469 c.ConstantBody.Buffer[3] = params_addr;
470 }
471 #endif
472
473 #if GFX_VER <= 9
474 /* Gfx9 requires 3DSTATE_BINDING_TABLE_POINTERS_XS to be re-emitted in
475 * order to commit constants. TODO: Investigate "Disable Gather at Set
476 * Shader" to go back to legacy mode...
477 *
478 * The null writes of the generation shader also appear to disturb the next
479 * RT writes, so we choose to reemit the binding table to a null RT on Gfx8
480 * too.
481 */
482 struct iris_binder *binder = &ice->state.binder;
483 iris_emit_cmd(batch, GENX(3DSTATE_BINDING_TABLE_POINTERS_PS), ptr) {
484 ptr.PointertoPSBindingTable =
485 binder->bt_offset[MESA_SHADER_FRAGMENT] >> IRIS_BT_OFFSET_SHIFT;
486 }
487 uint32_t *bt_map = binder->map + binder->bt_offset[MESA_SHADER_FRAGMENT];
488 uint32_t surf_base_offset = binder->bo->address;
489 bt_map[0] = ice->state.null_fb.offset - surf_base_offset;
490 #endif
491
492 genX(maybe_emit_breakpoint)(batch, true);
493
494 iris_emit_cmd(batch, GENX(3DPRIMITIVE), prim) {
495 prim.VertexAccessType = SEQUENTIAL;
496 prim.PrimitiveTopologyType = _3DPRIM_RECTLIST;
497 prim.VertexCountPerInstance = 3;
498 prim.InstanceCount = 1;
499 }
500
501
502 /* We've smashed all state compared to what the normal 3D pipeline
503 * rendering tracks for GL.
504 */
505
506 uint64_t skip_bits = (IRIS_DIRTY_POLYGON_STIPPLE |
507 IRIS_DIRTY_SO_BUFFERS |
508 IRIS_DIRTY_SO_DECL_LIST |
509 IRIS_DIRTY_LINE_STIPPLE |
510 IRIS_ALL_DIRTY_FOR_COMPUTE |
511 IRIS_DIRTY_SCISSOR_RECT |
512 IRIS_DIRTY_VF);
513 /* Wa_14016820455
514 * On Gfx 12.5 platforms, the SF_CL_VIEWPORT pointer can be invalidated
515 * likely by a read cache invalidation when clipping is disabled, so we
516 * don't skip its dirty bit here, in order to reprogram it.
517 */
518 if (GFX_VERx10 != 125)
519 skip_bits |= IRIS_DIRTY_SF_CL_VIEWPORT;
520
521 uint64_t skip_stage_bits = (IRIS_ALL_STAGE_DIRTY_FOR_COMPUTE |
522 IRIS_STAGE_DIRTY_UNCOMPILED_VS |
523 IRIS_STAGE_DIRTY_UNCOMPILED_TCS |
524 IRIS_STAGE_DIRTY_UNCOMPILED_TES |
525 IRIS_STAGE_DIRTY_UNCOMPILED_GS |
526 IRIS_STAGE_DIRTY_UNCOMPILED_FS |
527 IRIS_STAGE_DIRTY_SAMPLER_STATES_VS |
528 IRIS_STAGE_DIRTY_SAMPLER_STATES_TCS |
529 IRIS_STAGE_DIRTY_SAMPLER_STATES_TES |
530 IRIS_STAGE_DIRTY_SAMPLER_STATES_GS);
531
532 if (!ice->shaders.prog[MESA_SHADER_TESS_EVAL]) {
533 /* Generation disabled tessellation, but it was already off anyway */
534 skip_stage_bits |= IRIS_STAGE_DIRTY_TCS |
535 IRIS_STAGE_DIRTY_TES |
536 IRIS_STAGE_DIRTY_CONSTANTS_TCS |
537 IRIS_STAGE_DIRTY_CONSTANTS_TES |
538 IRIS_STAGE_DIRTY_BINDINGS_TCS |
539 IRIS_STAGE_DIRTY_BINDINGS_TES;
540 }
541
542 if (!ice->shaders.prog[MESA_SHADER_GEOMETRY]) {
543 /* Generation disabled geometry shaders, but it was already off
544 * anyway
545 */
546 skip_stage_bits |= IRIS_STAGE_DIRTY_GS |
547 IRIS_STAGE_DIRTY_CONSTANTS_GS |
548 IRIS_STAGE_DIRTY_BINDINGS_GS;
549 }
550
551 ice->state.dirty |= ~skip_bits;
552 ice->state.stage_dirty |= ~skip_stage_bits;
553
554 for (int i = 0; i < ARRAY_SIZE(ice->shaders.urb.cfg.size); i++)
555 ice->shaders.urb.cfg.size[i] = 0;
556
557 #if GFX_VER <= 9
558 /* Now reupdate the binding tables with the new offsets for the actual
559 * application shaders.
560 */
561 iris_binder_reserve_3d(ice);
562 screen->vtbl.update_binder_address(batch, binder);
563 #endif
564 }
565
566 #define RING_SIZE (128 * 1024)
567
568 static void
ensure_ring_bo(struct iris_context * ice,struct iris_screen * screen)569 ensure_ring_bo(struct iris_context *ice, struct iris_screen *screen)
570 {
571 struct iris_bufmgr *bufmgr = screen->bufmgr;
572
573 if (ice->draw.generation.ring_bo != NULL)
574 return;
575
576 ice->draw.generation.ring_bo =
577 iris_bo_alloc(bufmgr, "gen ring",
578 RING_SIZE, 8, IRIS_MEMZONE_OTHER,
579 BO_ALLOC_NO_SUBALLOC);
580 iris_get_backing_bo(ice->draw.generation.ring_bo)->real.capture = true;
581 }
582
583 struct iris_gen_indirect_params *
genX(emit_indirect_generate)584 genX(emit_indirect_generate)(struct iris_batch *batch,
585 const struct pipe_draw_info *draw,
586 const struct pipe_draw_indirect_info *indirect,
587 const struct pipe_draw_start_count_bias *sc,
588 struct iris_address *out_params_addr)
589 {
590 struct iris_screen *screen = batch->screen;
591 struct iris_context *ice = batch->ice;
592
593 iris_ensure_indirect_generation_shader(batch);
594 ensure_ring_bo(ice, screen);
595
596 const size_t struct_stride = draw->index_size > 0 ?
597 sizeof(uint32_t) * 5 :
598 sizeof(uint32_t) * 4;
599 unsigned cmd_stride = 0;
600 if (ice->state.vs_uses_draw_params ||
601 ice->state.vs_uses_derived_draw_params) {
602 cmd_stride += 4; /* 3DSTATE_VERTEX_BUFFERS */
603
604 if (ice->state.vs_uses_draw_params)
605 cmd_stride += 4 * GENX(VERTEX_BUFFER_STATE_length);
606
607 if (ice->state.vs_uses_derived_draw_params)
608 cmd_stride += 4 * GENX(VERTEX_BUFFER_STATE_length);
609 }
610 cmd_stride += 4 * GENX(3DPRIMITIVE_length);
611
612 const unsigned setup_dws =
613 #if GFX_VER >= 12
614 GENX(MI_ARB_CHECK_length) +
615 #endif
616 GENX(MI_BATCH_BUFFER_START_length);
617 const unsigned ring_count =
618 (RING_SIZE - 4 * setup_dws) /
619 (cmd_stride + 4 * 2 /* draw_id, is_indexed_draw */);
620
621 uint32_t params_size = align(sizeof(struct iris_gen_indirect_params), 32);
622 struct iris_gen_indirect_params *params =
623 upload_state(batch, ice->ctx.const_uploader,
624 &ice->draw.generation.params,
625 params_size, 64);
626 *out_params_addr =
627 ro_bo(iris_resource_bo(ice->draw.generation.params.res),
628 ice->draw.generation.params.offset);
629
630 iris_use_pinned_bo(batch,
631 iris_resource_bo(indirect->buffer),
632 false, IRIS_DOMAIN_NONE);
633 if (indirect->indirect_draw_count) {
634 iris_use_pinned_bo(batch,
635 iris_resource_bo(indirect->indirect_draw_count),
636 false, IRIS_DOMAIN_NONE);
637 }
638 iris_use_pinned_bo(batch, ice->draw.generation.ring_bo,
639 false, IRIS_DOMAIN_NONE);
640
641 *params = (struct iris_gen_indirect_params) {
642 .generated_cmds_addr = ice->draw.generation.ring_bo->address,
643 .ring_count = ring_count,
644 .draw_id_addr = ice->draw.generation.ring_bo->address +
645 ring_count * cmd_stride +
646 4 * GENX(MI_BATCH_BUFFER_START_length),
647 .draw_count_addr = indirect->indirect_draw_count ?
648 (iris_resource_bo(indirect->indirect_draw_count)->address +
649 indirect->indirect_draw_count_offset) : 0,
650 .indirect_data_addr = iris_resource_bo(indirect->buffer)->address +
651 indirect->offset,
652 .indirect_data_stride = indirect->stride == 0 ?
653 struct_stride : indirect->stride,
654 .max_draw_count = indirect->draw_count,
655 .flags = (draw->index_size > 0 ? ANV_GENERATED_FLAG_INDEXED : 0) |
656 (ice->state.predicate == IRIS_PREDICATE_STATE_USE_BIT ?
657 ANV_GENERATED_FLAG_PREDICATED : 0) |
658 (ice->state.vs_uses_draw_params ?
659 ANV_GENERATED_FLAG_BASE : 0) |
660 (ice->state.vs_uses_derived_draw_params ?
661 ANV_GENERATED_FLAG_DRAWID : 0) |
662 (iris_mocs(NULL, &screen->isl_dev,
663 ISL_SURF_USAGE_VERTEX_BUFFER_BIT) << 8) |
664 ((cmd_stride / 4) << 16) |
665 util_bitcount64(ice->state.bound_vertex_buffers) << 24,
666 };
667
668 genX(maybe_emit_breakpoint)(batch, true);
669
670 emit_indirect_generate_draw(batch, *out_params_addr, params_size,
671 MIN2(ring_count, indirect->draw_count));
672
673 genX(emit_3dprimitive_was)(batch, indirect, ice->state.prim_mode, sc->count);
674 genX(maybe_emit_breakpoint)(batch, false);
675
676
677 return params;
678 }
679