1 /*
2 * Copyright © 2022 Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 */
23
24 #ifndef GENX_CMD_DRAW_GENERATED_INDIRECT_H
25 #define GENX_CMD_DRAW_GENERATED_INDIRECT_H
26
27 #include <assert.h>
28 #include <stdbool.h>
29
30 #include "util/macros.h"
31
32 #include "common/intel_genX_state_brw.h"
33
34 #include "anv_private.h"
35 #include "anv_internal_kernels.h"
36
37 /* This is a maximum number of items a fragment shader can generate due to the
38 * viewport size.
39 */
40 #define MAX_GENERATED_DRAW_COUNT (8192 * 8192)
41
42 #define MAX_RING_BO_ITEMS (8192)
43
44 static struct anv_state
genX(cmd_buffer_emit_generate_draws)45 genX(cmd_buffer_emit_generate_draws)(struct anv_cmd_buffer *cmd_buffer,
46 struct anv_simple_shader *simple_state,
47 struct anv_address generated_cmds_addr,
48 uint32_t generated_cmd_stride,
49 struct anv_address indirect_data_addr,
50 uint32_t indirect_data_stride,
51 struct anv_address draw_id_addr,
52 uint32_t item_base,
53 uint32_t item_count,
54 struct anv_address count_addr,
55 uint32_t max_count,
56 bool indexed,
57 uint32_t ring_count)
58 {
59 struct anv_device *device = cmd_buffer->device;
60
61 struct anv_state push_data_state =
62 genX(simple_shader_alloc_push)(simple_state,
63 sizeof(struct anv_gen_indirect_params));
64 if (push_data_state.map == NULL)
65 return ANV_STATE_NULL;
66
67 struct anv_graphics_pipeline *pipeline =
68 anv_pipeline_to_graphics(cmd_buffer->state.gfx.base.pipeline);
69 const struct brw_vs_prog_data *vs_prog_data = get_vs_prog_data(pipeline);
70 const bool use_tbimr = cmd_buffer->state.gfx.dyn_state.use_tbimr;
71
72 struct anv_address draw_count_addr;
73 if (anv_address_is_null(count_addr)) {
74 draw_count_addr = anv_address_add(
75 genX(simple_shader_push_state_address)(simple_state, push_data_state),
76 offsetof(struct anv_gen_indirect_params, draw_count));
77 } else {
78 draw_count_addr = count_addr;
79 }
80
81 struct anv_gen_indirect_params *push_data = push_data_state.map;
82 *push_data = (struct anv_gen_indirect_params) {
83 .draw_id_addr = anv_address_physical(draw_id_addr),
84 .indirect_data_addr = anv_address_physical(indirect_data_addr),
85 .indirect_data_stride = indirect_data_stride,
86 .flags = (use_tbimr ? ANV_GENERATED_FLAG_TBIMR : 0) |
87 (indexed ? ANV_GENERATED_FLAG_INDEXED : 0) |
88 (cmd_buffer->state.conditional_render_enabled ?
89 ANV_GENERATED_FLAG_PREDICATED : 0) |
90 ((vs_prog_data->uses_firstvertex ||
91 vs_prog_data->uses_baseinstance) ?
92 ANV_GENERATED_FLAG_BASE : 0) |
93 (vs_prog_data->uses_drawid ? ANV_GENERATED_FLAG_DRAWID : 0) |
94 (anv_mocs(device, indirect_data_addr.bo,
95 ISL_SURF_USAGE_VERTEX_BUFFER_BIT) << 8) |
96 (!anv_address_is_null(count_addr) ?
97 ANV_GENERATED_FLAG_COUNT : 0) |
98 (ring_count != 0 ? ANV_GENERATED_FLAG_RING_MODE : 0) |
99 ((generated_cmd_stride / 4) << 16),
100 .draw_base = item_base,
101 .max_draw_count = max_count,
102 .ring_count = ring_count,
103 .instance_multiplier = pipeline->instance_multiplier,
104 .draw_count = anv_address_is_null(count_addr) ? max_count : 0,
105 .generated_cmds_addr = anv_address_physical(generated_cmds_addr),
106 .draw_count_addr = anv_address_physical(draw_count_addr),
107 };
108
109 genX(emit_simple_shader_dispatch)(simple_state, item_count, push_data_state);
110
111 return push_data_state;
112 }
113
114 static void
genX(cmd_buffer_emit_indirect_generated_draws_init)115 genX(cmd_buffer_emit_indirect_generated_draws_init)(struct anv_cmd_buffer *cmd_buffer)
116 {
117 anv_batch_emit_ensure_space(&cmd_buffer->generation.batch, 4);
118
119 trace_intel_begin_generate_draws(&cmd_buffer->trace);
120
121 anv_batch_emit(&cmd_buffer->batch, GENX(MI_BATCH_BUFFER_START), bbs) {
122 bbs.AddressSpaceIndicator = ASI_PPGTT;
123 bbs.BatchBufferStartAddress =
124 anv_batch_current_address(&cmd_buffer->generation.batch);
125 }
126
127 cmd_buffer->generation.return_addr = anv_batch_current_address(&cmd_buffer->batch);
128
129 #if GFX_VER >= 12
130 anv_batch_emit(&cmd_buffer->batch, GENX(MI_ARB_CHECK), arb) {
131 arb.PreParserDisableMask = true;
132 arb.PreParserDisable = false;
133 }
134 #endif
135
136 trace_intel_end_generate_draws(&cmd_buffer->trace);
137
138 struct anv_shader_bin *gen_kernel;
139 VkResult ret =
140 anv_device_get_internal_shader(
141 cmd_buffer->device,
142 ANV_INTERNAL_KERNEL_GENERATED_DRAWS,
143 &gen_kernel);
144 if (ret != VK_SUCCESS) {
145 anv_batch_set_error(&cmd_buffer->batch, ret);
146 return;
147 }
148
149 struct anv_device *device = cmd_buffer->device;
150 struct anv_simple_shader *state = &cmd_buffer->generation.shader_state;
151 *state = (struct anv_simple_shader) {
152 .device = device,
153 .cmd_buffer = cmd_buffer,
154 .dynamic_state_stream = &cmd_buffer->dynamic_state_stream,
155 .general_state_stream = &cmd_buffer->general_state_stream,
156 .batch = &cmd_buffer->generation.batch,
157 .kernel = gen_kernel,
158 .l3_config = device->internal_kernels_l3_config,
159 .urb_cfg = &cmd_buffer->state.gfx.urb_cfg,
160 };
161
162 genX(emit_simple_shader_init)(state);
163 }
164
165 static struct anv_address
genX(cmd_buffer_get_draw_id_addr)166 genX(cmd_buffer_get_draw_id_addr)(struct anv_cmd_buffer *cmd_buffer,
167 uint32_t draw_id_count)
168 {
169 #if GFX_VER >= 11
170 return ANV_NULL_ADDRESS;
171 #else
172 struct anv_graphics_pipeline *pipeline =
173 anv_pipeline_to_graphics(cmd_buffer->state.gfx.base.pipeline);
174 const struct brw_vs_prog_data *vs_prog_data = get_vs_prog_data(pipeline);
175 if (!vs_prog_data->uses_drawid)
176 return ANV_NULL_ADDRESS;
177
178 struct anv_state draw_id_state =
179 anv_cmd_buffer_alloc_temporary_state(cmd_buffer, 4 * draw_id_count, 4);
180 return anv_cmd_buffer_temporary_state_address(cmd_buffer, draw_id_state);
181 #endif
182 }
183
184 static uint32_t
genX(cmd_buffer_get_generated_draw_stride)185 genX(cmd_buffer_get_generated_draw_stride)(struct anv_cmd_buffer *cmd_buffer)
186 {
187 /* With the extended parameters in 3DPRIMITIVE on Gfx11+ we can emit
188 * everything. Prior to this, we need to emit a couple of
189 * VERTEX_BUFFER_STATE.
190 */
191 #if GFX_VER >= 11
192 return 4 * GENX(3DPRIMITIVE_EXTENDED_length);
193 #else
194 struct anv_graphics_pipeline *pipeline =
195 anv_pipeline_to_graphics(cmd_buffer->state.gfx.base.pipeline);
196 const struct brw_vs_prog_data *vs_prog_data = get_vs_prog_data(pipeline);
197
198 uint32_t len = 0;
199
200 if (vs_prog_data->uses_firstvertex ||
201 vs_prog_data->uses_baseinstance ||
202 vs_prog_data->uses_drawid) {
203 len += 4; /* 3DSTATE_VERTEX_BUFFERS */
204
205 if (vs_prog_data->uses_firstvertex ||
206 vs_prog_data->uses_baseinstance)
207 len += 4 * GENX(VERTEX_BUFFER_STATE_length);
208
209 if (vs_prog_data->uses_drawid)
210 len += 4 * GENX(VERTEX_BUFFER_STATE_length);
211 }
212
213 return len + 4 * GENX(3DPRIMITIVE_length);
214 #endif
215 }
216
217 static void
genX(cmd_buffer_rewrite_forward_end_addr)218 genX(cmd_buffer_rewrite_forward_end_addr)(struct anv_cmd_buffer *cmd_buffer,
219 struct anv_gen_indirect_params *params)
220 {
221 /* We don't know the end_addr until we have emitted all the generation
222 * draws. Go and edit the address of all the push parameters.
223 */
224 uint64_t end_addr =
225 anv_address_physical(anv_batch_current_address(&cmd_buffer->batch));
226 while (params != NULL) {
227 params->end_addr = end_addr;
228 params = params->prev;
229 }
230 }
231
232 static void
genX(cmd_buffer_emit_indirect_generated_draws_inplace)233 genX(cmd_buffer_emit_indirect_generated_draws_inplace)(struct anv_cmd_buffer *cmd_buffer,
234 struct anv_address indirect_data_addr,
235 uint32_t indirect_data_stride,
236 struct anv_address count_addr,
237 uint32_t max_draw_count,
238 bool indexed)
239 {
240 const bool start_generation_batch =
241 anv_address_is_null(cmd_buffer->generation.return_addr);
242
243 genX(flush_pipeline_select_3d)(cmd_buffer);
244
245 struct anv_address draw_id_addr =
246 genX(cmd_buffer_get_draw_id_addr)(cmd_buffer, max_draw_count);
247
248 #if GFX_VER == 9
249 /* Mark the VB-0 as using the entire dynamic state pool area, but only for
250 * the draw call starting the generation batch. All the following ones will
251 * use the same area.
252 */
253 if (start_generation_batch) {
254 struct anv_device *device = cmd_buffer->device;
255 genX(cmd_buffer_set_binding_for_gfx8_vb_flush)(
256 cmd_buffer, 0,
257 (struct anv_address) {
258 .offset = device->physical->va.dynamic_state_pool.addr,
259 },
260 device->physical->va.dynamic_state_pool.size);
261 }
262
263 struct anv_graphics_pipeline *pipeline =
264 anv_pipeline_to_graphics(cmd_buffer->state.gfx.base.pipeline);
265 const struct brw_vs_prog_data *vs_prog_data = get_vs_prog_data(pipeline);
266
267 if (vs_prog_data->uses_baseinstance ||
268 vs_prog_data->uses_firstvertex) {
269 /* We're using the indirect buffer directly to source base instance &
270 * first vertex values. Mark the entire area as used.
271 */
272 genX(cmd_buffer_set_binding_for_gfx8_vb_flush)(cmd_buffer, ANV_SVGS_VB_INDEX,
273 indirect_data_addr,
274 indirect_data_stride * max_draw_count);
275 }
276
277 if (vs_prog_data->uses_drawid) {
278 /* Mark the whole draw id buffer as used. */
279 genX(cmd_buffer_set_binding_for_gfx8_vb_flush)(cmd_buffer, ANV_SVGS_VB_INDEX,
280 draw_id_addr,
281 sizeof(uint32_t) * max_draw_count);
282 }
283 #endif
284
285 /* Apply the pipeline flush here so the indirect data is available for the
286 * generation shader.
287 */
288 genX(cmd_buffer_apply_pipe_flushes)(cmd_buffer);
289
290 if (start_generation_batch)
291 genX(cmd_buffer_emit_indirect_generated_draws_init)(cmd_buffer);
292
293 /* Emit the 3D state in the main batch. */
294 genX(cmd_buffer_flush_gfx_state)(cmd_buffer);
295
296 if (cmd_buffer->state.conditional_render_enabled)
297 genX(cmd_emit_conditional_render_predicate)(cmd_buffer);
298
299 const uint32_t draw_cmd_stride =
300 genX(cmd_buffer_get_generated_draw_stride)(cmd_buffer);
301
302 struct anv_gen_indirect_params *last_params = NULL;
303 uint32_t item_base = 0;
304 while (item_base < max_draw_count) {
305 const uint32_t item_count = MIN2(max_draw_count - item_base,
306 MAX_GENERATED_DRAW_COUNT);
307 const uint32_t draw_cmd_size = item_count * draw_cmd_stride;
308
309 /* Ensure we have enough contiguous space for all the draws so that the
310 * compute shader can edit all the 3DPRIMITIVEs from a single base
311 * address.
312 *
313 * TODO: we might have to split that if the amount of space is to large (at
314 * 1Mb?).
315 */
316 VkResult result = anv_batch_emit_ensure_space(&cmd_buffer->batch,
317 draw_cmd_size);
318 if (result != VK_SUCCESS)
319 return;
320
321 struct anv_state params_state =
322 genX(cmd_buffer_emit_generate_draws)(
323 cmd_buffer,
324 &cmd_buffer->generation.shader_state,
325 anv_batch_current_address(&cmd_buffer->batch),
326 draw_cmd_stride,
327 indirect_data_addr,
328 indirect_data_stride,
329 anv_address_add(draw_id_addr, 4 * item_base),
330 item_base,
331 item_count,
332 count_addr,
333 max_draw_count,
334 indexed,
335 0 /* ring_count */);
336 struct anv_gen_indirect_params *params = params_state.map;
337 if (params == NULL)
338 return;
339
340 anv_batch_advance(&cmd_buffer->batch, draw_cmd_size);
341
342 item_base += item_count;
343
344 params->prev = last_params;
345 last_params = params;
346 }
347
348 genX(cmd_buffer_rewrite_forward_end_addr)(cmd_buffer, last_params);
349
350 #if GFX_VER == 9
351 update_dirty_vbs_for_gfx8_vb_flush(cmd_buffer, indexed ? RANDOM : SEQUENTIAL);
352 #endif
353 }
354
355 static void
genX(cmd_buffer_emit_indirect_generated_draws_inring)356 genX(cmd_buffer_emit_indirect_generated_draws_inring)(struct anv_cmd_buffer *cmd_buffer,
357 struct anv_address indirect_data_addr,
358 uint32_t indirect_data_stride,
359 struct anv_address count_addr,
360 uint32_t max_draw_count,
361 bool indexed)
362 {
363 struct anv_device *device = cmd_buffer->device;
364
365 genX(flush_pipeline_select_3d)(cmd_buffer);
366
367 const uint32_t draw_cmd_stride =
368 genX(cmd_buffer_get_generated_draw_stride)(cmd_buffer);
369
370 if (cmd_buffer->generation.ring_bo == NULL) {
371 const uint32_t bo_size = align(
372 #if GFX_VER >= 12
373 GENX(MI_ARB_CHECK_length) * 4 +
374 #endif
375 draw_cmd_stride * MAX_RING_BO_ITEMS +
376 #if GFX_VER == 9
377 4 * MAX_RING_BO_ITEMS +
378 #endif
379 GENX(MI_BATCH_BUFFER_START_length) * 4,
380 4096);
381 VkResult result = anv_bo_pool_alloc(&device->batch_bo_pool, bo_size,
382 &cmd_buffer->generation.ring_bo);
383 if (result != VK_SUCCESS) {
384 anv_batch_set_error(&cmd_buffer->batch, result);
385 return;
386 }
387 }
388
389 /* How many items will be generated by each iteration of the generation
390 * shader dispatch.
391 */
392 const uint32_t ring_count = MIN2(MAX_RING_BO_ITEMS, max_draw_count);
393
394 /* The ring bo has the following layout:
395 *
396 * --------------------------------------------------
397 * | MI_ARB_CHECK to resume CS prefetch (Gfx12+) |
398 * |------------------------------------------------|
399 * | ring_count * 3DPRIMITIVE |
400 * |------------------------------------------------|
401 * | jump instruction (either back to generate more |
402 * | commands or to the next set of commands) |
403 * |------------------------------------------------|
404 * | draw ids (only used on Gfx9) |
405 * --------------------------------------------------
406 */
407
408 struct anv_address draw_id_addr = (struct anv_address) {
409 .bo = cmd_buffer->generation.ring_bo,
410 .offset = ring_count * draw_cmd_stride +
411 GENX(MI_BATCH_BUFFER_START_length) * 4,
412 };
413
414 struct anv_address draw_cmds_addr = (struct anv_address) {
415 .bo = cmd_buffer->generation.ring_bo,
416 #if GFX_VER >= 12
417 .offset = GENX(MI_ARB_CHECK_length) * 4,
418 #endif
419 };
420
421 #if GFX_VER >= 12
422 struct GENX(MI_ARB_CHECK) resume_prefetch = {
423 .PreParserDisableMask = true,
424 .PreParserDisable = false,
425 };
426 GENX(MI_ARB_CHECK_pack)(NULL, cmd_buffer->generation.ring_bo->map,
427 &resume_prefetch);
428 #endif
429
430 #if GFX_VER == 9
431 /* Mark the VB-0 as using the entire ring_bo, but only for the draw call
432 * starting the generation batch. All the following ones will use the same
433 * area.
434 */
435 genX(cmd_buffer_set_binding_for_gfx8_vb_flush)(
436 cmd_buffer, 0,
437 (struct anv_address) {
438 .bo = cmd_buffer->generation.ring_bo,
439 },
440 cmd_buffer->generation.ring_bo->size);
441
442 struct anv_graphics_pipeline *pipeline =
443 anv_pipeline_to_graphics(cmd_buffer->state.gfx.base.pipeline);
444 const struct brw_vs_prog_data *vs_prog_data = get_vs_prog_data(pipeline);
445
446 if (vs_prog_data->uses_baseinstance ||
447 vs_prog_data->uses_firstvertex) {
448 /* We're using the indirect buffer directly to source base instance &
449 * first vertex values. Mark the entire area as used.
450 */
451 genX(cmd_buffer_set_binding_for_gfx8_vb_flush)(cmd_buffer, ANV_SVGS_VB_INDEX,
452 indirect_data_addr,
453 indirect_data_stride * max_draw_count);
454 }
455
456 if (vs_prog_data->uses_drawid) {
457 /* Mark the whole draw id buffer as used. */
458 genX(cmd_buffer_set_binding_for_gfx8_vb_flush)(cmd_buffer, ANV_SVGS_VB_INDEX,
459 draw_id_addr,
460 sizeof(uint32_t) * max_draw_count);
461 }
462 #endif
463
464 /* Apply the pipeline flush here so the indirect data is available for the
465 * generation shader.
466 */
467 genX(cmd_buffer_apply_pipe_flushes)(cmd_buffer);
468
469 trace_intel_begin_generate_draws(&cmd_buffer->trace);
470
471 /***
472 * This is where the command buffer below will jump back to if we need to
473 * generate more draws.
474 */
475 struct anv_address gen_addr = anv_batch_current_address(&cmd_buffer->batch);
476
477 struct anv_shader_bin *gen_kernel;
478 VkResult ret =
479 anv_device_get_internal_shader(
480 cmd_buffer->device,
481 ANV_INTERNAL_KERNEL_GENERATED_DRAWS,
482 &gen_kernel);
483 if (ret != VK_SUCCESS) {
484 anv_batch_set_error(&cmd_buffer->batch, ret);
485 return;
486 }
487
488 struct anv_simple_shader simple_state = (struct anv_simple_shader) {
489 .device = device,
490 .cmd_buffer = cmd_buffer,
491 .dynamic_state_stream = &cmd_buffer->dynamic_state_stream,
492 .general_state_stream = &cmd_buffer->general_state_stream,
493 .batch = &cmd_buffer->batch,
494 .kernel = gen_kernel,
495 .l3_config = device->internal_kernels_l3_config,
496 .urb_cfg = &cmd_buffer->state.gfx.urb_cfg,
497 };
498 genX(emit_simple_shader_init)(&simple_state);
499
500 struct anv_state params_state =
501 genX(cmd_buffer_emit_generate_draws)(
502 cmd_buffer,
503 &simple_state,
504 draw_cmds_addr,
505 draw_cmd_stride,
506 indirect_data_addr,
507 indirect_data_stride,
508 draw_id_addr,
509 0 /* item_base */,
510 MIN2(MAX_RING_BO_ITEMS, max_draw_count) /* item_count */,
511 count_addr,
512 max_draw_count,
513 indexed,
514 ring_count);
515 struct anv_gen_indirect_params *params = params_state.map;
516
517 anv_add_pending_pipe_bits(cmd_buffer,
518 #if GFX_VER == 9
519 ANV_PIPE_VF_CACHE_INVALIDATE_BIT |
520 #endif
521 ANV_PIPE_DATA_CACHE_FLUSH_BIT |
522 ANV_PIPE_CS_STALL_BIT,
523 "after generation flush");
524
525 trace_intel_end_generate_draws(&cmd_buffer->trace);
526
527 /* Emit the 3D state in the main batch. */
528 genX(cmd_buffer_flush_gfx_state)(cmd_buffer);
529
530 if (cmd_buffer->state.conditional_render_enabled)
531 genX(cmd_emit_conditional_render_predicate)(cmd_buffer);
532
533 if (max_draw_count > 0) {
534 #if GFX_VER >= 12
535 /* Prior to Gfx12 we cannot disable the CS prefetch but it doesn't matter
536 * as the prefetch shouldn't follow the MI_BATCH_BUFFER_START.
537 */
538 anv_batch_emit(&cmd_buffer->batch, GENX(MI_ARB_CHECK), arb) {
539 arb.PreParserDisableMask = true;
540 arb.PreParserDisable = true;
541 }
542 #endif
543
544 /* Jump into the ring buffer. */
545 anv_batch_emit(&cmd_buffer->batch, GENX(MI_BATCH_BUFFER_START), bbs) {
546 bbs.AddressSpaceIndicator = ASI_PPGTT;
547 bbs.BatchBufferStartAddress = (struct anv_address) {
548 .bo = cmd_buffer->generation.ring_bo,
549 };
550 }
551
552 /***
553 * This is the location at which the ring buffer jumps to if it needs to
554 * generate more draw calls. We do the following :
555 * - wait for draws in the ring buffer to complete (cs stall) so we're
556 * sure the push constant data we're about to edit is not read anymore
557 * - increment the base draw number by the number of draws
558 * executed in the ring
559 * - invalidate the constant cache since the
560 * anv_generated_indirect_params::draw::draw_base is updated
561 * - jump back to the generation shader
562 */
563 struct anv_address inc_addr =
564 anv_batch_current_address(&cmd_buffer->batch);
565
566 anv_add_pending_pipe_bits(cmd_buffer,
567 ANV_PIPE_STALL_AT_SCOREBOARD_BIT |
568 ANV_PIPE_CS_STALL_BIT,
569 "after generated draws batch");
570 genX(cmd_buffer_apply_pipe_flushes)(cmd_buffer);
571
572 struct mi_builder b;
573 mi_builder_init(&b, cmd_buffer->device->info, &cmd_buffer->batch);
574
575 struct anv_address draw_base_addr = anv_address_add(
576 genX(simple_shader_push_state_address)(
577 &simple_state, params_state),
578 offsetof(struct anv_gen_indirect_params, draw_base));
579
580 const uint32_t mocs = anv_mocs_for_address(cmd_buffer->device,
581 &draw_base_addr);
582 mi_builder_set_mocs(&b, mocs);
583 mi_builder_set_write_check(&b, true);
584
585 mi_store(&b, mi_mem32(draw_base_addr),
586 mi_iadd(&b, mi_mem32(draw_base_addr),
587 mi_imm(ring_count)));
588
589 /* Make sure the MI writes are globally observable */
590 mi_ensure_write_fence(&b);
591
592 anv_add_pending_pipe_bits(cmd_buffer,
593 ANV_PIPE_CONSTANT_CACHE_INVALIDATE_BIT,
594 "after generated draws batch increment");
595 genX(cmd_buffer_apply_pipe_flushes)(cmd_buffer);
596
597 anv_batch_emit(&cmd_buffer->batch, GENX(MI_BATCH_BUFFER_START), bbs) {
598 bbs.AddressSpaceIndicator = ASI_PPGTT;
599 bbs.BatchBufferStartAddress = gen_addr;
600 }
601
602 /***
603 * This is the location at which the ring buffer jump to once all the draw
604 * calls have executed.
605 */
606 struct anv_address end_addr = anv_batch_current_address(&cmd_buffer->batch);
607
608 /* Reset the draw_base field in case we ever replay the command buffer. */
609 mi_store(&b, mi_mem32(draw_base_addr), mi_imm(0));
610
611 /* Make sure the MI writes are globally observable */
612 mi_ensure_write_fence(&b);
613
614 anv_add_pending_pipe_bits(cmd_buffer,
615 ANV_PIPE_CONSTANT_CACHE_INVALIDATE_BIT,
616 "after generated draws end");
617
618 params->gen_addr = anv_address_physical(inc_addr);
619 params->end_addr = anv_address_physical(end_addr);
620 }
621 }
622
623 static void
genX(cmd_buffer_emit_indirect_generated_draws)624 genX(cmd_buffer_emit_indirect_generated_draws)(struct anv_cmd_buffer *cmd_buffer,
625 struct anv_address indirect_data_addr,
626 uint32_t indirect_data_stride,
627 struct anv_address count_addr,
628 uint32_t max_draw_count,
629 bool indexed)
630 {
631 /* In order to have the vertex fetch gather the data we need to have a non
632 * 0 stride. It's possible to have a 0 stride given by the application when
633 * draw_count is 1, but we need a correct value for the
634 * VERTEX_BUFFER_STATE::BufferPitch, so ensure the caller set this
635 * correctly :
636 *
637 * Vulkan spec, vkCmdDrawIndirect:
638 *
639 * "If drawCount is less than or equal to one, stride is ignored."
640 */
641 assert(indirect_data_stride > 0);
642
643 const bool use_ring_buffer = max_draw_count >=
644 cmd_buffer->device->physical->instance->generated_indirect_ring_threshold;
645 if (use_ring_buffer) {
646 genX(cmd_buffer_emit_indirect_generated_draws_inring)(cmd_buffer,
647 indirect_data_addr,
648 indirect_data_stride,
649 count_addr,
650 max_draw_count,
651 indexed);
652 } else {
653 genX(cmd_buffer_emit_indirect_generated_draws_inplace)(cmd_buffer,
654 indirect_data_addr,
655 indirect_data_stride,
656 count_addr,
657 max_draw_count,
658 indexed);
659 }
660 }
661
662 #endif /* GENX_CMD_DRAW_GENERATED_INDIRECT_H */
663