xref: /aosp_15_r20/external/mesa3d/src/intel/vulkan/genX_cmd_draw_generated_indirect.h (revision 6104692788411f58d303aa86923a9ff6ecaded22)
1 /*
2  * Copyright © 2022 Intel Corporation
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21  * IN THE SOFTWARE.
22  */
23 
24 #ifndef GENX_CMD_DRAW_GENERATED_INDIRECT_H
25 #define GENX_CMD_DRAW_GENERATED_INDIRECT_H
26 
27 #include <assert.h>
28 #include <stdbool.h>
29 
30 #include "util/macros.h"
31 
32 #include "common/intel_genX_state_brw.h"
33 
34 #include "anv_private.h"
35 #include "anv_internal_kernels.h"
36 
37 /* This is a maximum number of items a fragment shader can generate due to the
38  * viewport size.
39  */
40 #define MAX_GENERATED_DRAW_COUNT (8192 * 8192)
41 
42 #define MAX_RING_BO_ITEMS (8192)
43 
44 static struct anv_state
genX(cmd_buffer_emit_generate_draws)45 genX(cmd_buffer_emit_generate_draws)(struct anv_cmd_buffer *cmd_buffer,
46                                      struct anv_simple_shader *simple_state,
47                                      struct anv_address generated_cmds_addr,
48                                      uint32_t generated_cmd_stride,
49                                      struct anv_address indirect_data_addr,
50                                      uint32_t indirect_data_stride,
51                                      struct anv_address draw_id_addr,
52                                      uint32_t item_base,
53                                      uint32_t item_count,
54                                      struct anv_address count_addr,
55                                      uint32_t max_count,
56                                      bool indexed,
57                                      uint32_t ring_count)
58 {
59    struct anv_device *device = cmd_buffer->device;
60 
61    struct anv_state push_data_state =
62       genX(simple_shader_alloc_push)(simple_state,
63                                      sizeof(struct anv_gen_indirect_params));
64    if (push_data_state.map == NULL)
65       return ANV_STATE_NULL;
66 
67    struct anv_graphics_pipeline *pipeline =
68       anv_pipeline_to_graphics(cmd_buffer->state.gfx.base.pipeline);
69    const struct brw_vs_prog_data *vs_prog_data = get_vs_prog_data(pipeline);
70    const bool use_tbimr = cmd_buffer->state.gfx.dyn_state.use_tbimr;
71 
72    struct anv_address draw_count_addr;
73    if (anv_address_is_null(count_addr)) {
74       draw_count_addr = anv_address_add(
75          genX(simple_shader_push_state_address)(simple_state, push_data_state),
76          offsetof(struct anv_gen_indirect_params, draw_count));
77    } else {
78       draw_count_addr = count_addr;
79    }
80 
81    struct anv_gen_indirect_params *push_data = push_data_state.map;
82    *push_data = (struct anv_gen_indirect_params) {
83       .draw_id_addr           = anv_address_physical(draw_id_addr),
84       .indirect_data_addr     = anv_address_physical(indirect_data_addr),
85       .indirect_data_stride   = indirect_data_stride,
86       .flags                  = (use_tbimr ? ANV_GENERATED_FLAG_TBIMR : 0) |
87                                 (indexed ? ANV_GENERATED_FLAG_INDEXED : 0) |
88                                 (cmd_buffer->state.conditional_render_enabled ?
89                                  ANV_GENERATED_FLAG_PREDICATED : 0) |
90                                 ((vs_prog_data->uses_firstvertex ||
91                                   vs_prog_data->uses_baseinstance) ?
92                                  ANV_GENERATED_FLAG_BASE : 0) |
93                                 (vs_prog_data->uses_drawid ? ANV_GENERATED_FLAG_DRAWID : 0) |
94                                 (anv_mocs(device, indirect_data_addr.bo,
95                                           ISL_SURF_USAGE_VERTEX_BUFFER_BIT) << 8) |
96                                 (!anv_address_is_null(count_addr) ?
97                                  ANV_GENERATED_FLAG_COUNT : 0) |
98                                 (ring_count != 0 ? ANV_GENERATED_FLAG_RING_MODE : 0) |
99                                 ((generated_cmd_stride / 4) << 16),
100       .draw_base              = item_base,
101       .max_draw_count         = max_count,
102       .ring_count             = ring_count,
103       .instance_multiplier    = pipeline->instance_multiplier,
104       .draw_count             = anv_address_is_null(count_addr) ? max_count : 0,
105       .generated_cmds_addr    = anv_address_physical(generated_cmds_addr),
106       .draw_count_addr        = anv_address_physical(draw_count_addr),
107    };
108 
109    genX(emit_simple_shader_dispatch)(simple_state, item_count, push_data_state);
110 
111    return push_data_state;
112 }
113 
114 static void
genX(cmd_buffer_emit_indirect_generated_draws_init)115 genX(cmd_buffer_emit_indirect_generated_draws_init)(struct anv_cmd_buffer *cmd_buffer)
116 {
117    anv_batch_emit_ensure_space(&cmd_buffer->generation.batch, 4);
118 
119    trace_intel_begin_generate_draws(&cmd_buffer->trace);
120 
121    anv_batch_emit(&cmd_buffer->batch, GENX(MI_BATCH_BUFFER_START), bbs) {
122       bbs.AddressSpaceIndicator = ASI_PPGTT;
123       bbs.BatchBufferStartAddress =
124          anv_batch_current_address(&cmd_buffer->generation.batch);
125    }
126 
127    cmd_buffer->generation.return_addr = anv_batch_current_address(&cmd_buffer->batch);
128 
129 #if GFX_VER >= 12
130    anv_batch_emit(&cmd_buffer->batch, GENX(MI_ARB_CHECK), arb) {
131       arb.PreParserDisableMask = true;
132       arb.PreParserDisable = false;
133    }
134 #endif
135 
136    trace_intel_end_generate_draws(&cmd_buffer->trace);
137 
138    struct anv_shader_bin *gen_kernel;
139    VkResult ret =
140       anv_device_get_internal_shader(
141          cmd_buffer->device,
142          ANV_INTERNAL_KERNEL_GENERATED_DRAWS,
143          &gen_kernel);
144    if (ret != VK_SUCCESS) {
145       anv_batch_set_error(&cmd_buffer->batch, ret);
146       return;
147    }
148 
149    struct anv_device *device = cmd_buffer->device;
150    struct anv_simple_shader *state = &cmd_buffer->generation.shader_state;
151    *state = (struct anv_simple_shader) {
152       .device               = device,
153       .cmd_buffer           = cmd_buffer,
154       .dynamic_state_stream = &cmd_buffer->dynamic_state_stream,
155       .general_state_stream = &cmd_buffer->general_state_stream,
156       .batch                = &cmd_buffer->generation.batch,
157       .kernel               = gen_kernel,
158       .l3_config            = device->internal_kernels_l3_config,
159       .urb_cfg              = &cmd_buffer->state.gfx.urb_cfg,
160    };
161 
162    genX(emit_simple_shader_init)(state);
163 }
164 
165 static struct anv_address
genX(cmd_buffer_get_draw_id_addr)166 genX(cmd_buffer_get_draw_id_addr)(struct anv_cmd_buffer *cmd_buffer,
167                                   uint32_t draw_id_count)
168 {
169 #if GFX_VER >= 11
170    return ANV_NULL_ADDRESS;
171 #else
172    struct anv_graphics_pipeline *pipeline =
173       anv_pipeline_to_graphics(cmd_buffer->state.gfx.base.pipeline);
174    const struct brw_vs_prog_data *vs_prog_data = get_vs_prog_data(pipeline);
175    if (!vs_prog_data->uses_drawid)
176       return ANV_NULL_ADDRESS;
177 
178    struct anv_state draw_id_state =
179       anv_cmd_buffer_alloc_temporary_state(cmd_buffer, 4 * draw_id_count, 4);
180    return anv_cmd_buffer_temporary_state_address(cmd_buffer, draw_id_state);
181 #endif
182 }
183 
184 static uint32_t
genX(cmd_buffer_get_generated_draw_stride)185 genX(cmd_buffer_get_generated_draw_stride)(struct anv_cmd_buffer *cmd_buffer)
186 {
187    /* With the extended parameters in 3DPRIMITIVE on Gfx11+ we can emit
188     * everything. Prior to this, we need to emit a couple of
189     * VERTEX_BUFFER_STATE.
190     */
191 #if GFX_VER >= 11
192    return 4 * GENX(3DPRIMITIVE_EXTENDED_length);
193 #else
194    struct anv_graphics_pipeline *pipeline =
195       anv_pipeline_to_graphics(cmd_buffer->state.gfx.base.pipeline);
196    const struct brw_vs_prog_data *vs_prog_data = get_vs_prog_data(pipeline);
197 
198    uint32_t len = 0;
199 
200    if (vs_prog_data->uses_firstvertex ||
201        vs_prog_data->uses_baseinstance ||
202        vs_prog_data->uses_drawid) {
203       len += 4; /* 3DSTATE_VERTEX_BUFFERS */
204 
205       if (vs_prog_data->uses_firstvertex ||
206           vs_prog_data->uses_baseinstance)
207          len += 4 * GENX(VERTEX_BUFFER_STATE_length);
208 
209       if (vs_prog_data->uses_drawid)
210          len += 4 * GENX(VERTEX_BUFFER_STATE_length);
211    }
212 
213    return len + 4 * GENX(3DPRIMITIVE_length);
214 #endif
215 }
216 
217 static void
genX(cmd_buffer_rewrite_forward_end_addr)218 genX(cmd_buffer_rewrite_forward_end_addr)(struct anv_cmd_buffer *cmd_buffer,
219                                           struct anv_gen_indirect_params *params)
220 {
221    /* We don't know the end_addr until we have emitted all the generation
222     * draws. Go and edit the address of all the push parameters.
223     */
224    uint64_t end_addr =
225       anv_address_physical(anv_batch_current_address(&cmd_buffer->batch));
226    while (params != NULL) {
227       params->end_addr = end_addr;
228       params = params->prev;
229    }
230 }
231 
232 static void
genX(cmd_buffer_emit_indirect_generated_draws_inplace)233 genX(cmd_buffer_emit_indirect_generated_draws_inplace)(struct anv_cmd_buffer *cmd_buffer,
234                                                        struct anv_address indirect_data_addr,
235                                                        uint32_t indirect_data_stride,
236                                                        struct anv_address count_addr,
237                                                        uint32_t max_draw_count,
238                                                        bool indexed)
239 {
240    const bool start_generation_batch =
241       anv_address_is_null(cmd_buffer->generation.return_addr);
242 
243    genX(flush_pipeline_select_3d)(cmd_buffer);
244 
245    struct anv_address draw_id_addr =
246       genX(cmd_buffer_get_draw_id_addr)(cmd_buffer, max_draw_count);
247 
248 #if GFX_VER == 9
249    /* Mark the VB-0 as using the entire dynamic state pool area, but only for
250     * the draw call starting the generation batch. All the following ones will
251     * use the same area.
252     */
253    if (start_generation_batch) {
254       struct anv_device *device = cmd_buffer->device;
255       genX(cmd_buffer_set_binding_for_gfx8_vb_flush)(
256          cmd_buffer, 0,
257          (struct anv_address) {
258             .offset = device->physical->va.dynamic_state_pool.addr,
259          },
260          device->physical->va.dynamic_state_pool.size);
261    }
262 
263    struct anv_graphics_pipeline *pipeline =
264       anv_pipeline_to_graphics(cmd_buffer->state.gfx.base.pipeline);
265    const struct brw_vs_prog_data *vs_prog_data = get_vs_prog_data(pipeline);
266 
267    if (vs_prog_data->uses_baseinstance ||
268        vs_prog_data->uses_firstvertex) {
269       /* We're using the indirect buffer directly to source base instance &
270        * first vertex values. Mark the entire area as used.
271        */
272       genX(cmd_buffer_set_binding_for_gfx8_vb_flush)(cmd_buffer, ANV_SVGS_VB_INDEX,
273                                                      indirect_data_addr,
274                                                      indirect_data_stride * max_draw_count);
275    }
276 
277    if (vs_prog_data->uses_drawid) {
278       /* Mark the whole draw id buffer as used. */
279       genX(cmd_buffer_set_binding_for_gfx8_vb_flush)(cmd_buffer, ANV_SVGS_VB_INDEX,
280                                                      draw_id_addr,
281                                                      sizeof(uint32_t) * max_draw_count);
282    }
283 #endif
284 
285    /* Apply the pipeline flush here so the indirect data is available for the
286     * generation shader.
287     */
288    genX(cmd_buffer_apply_pipe_flushes)(cmd_buffer);
289 
290    if (start_generation_batch)
291       genX(cmd_buffer_emit_indirect_generated_draws_init)(cmd_buffer);
292 
293    /* Emit the 3D state in the main batch. */
294    genX(cmd_buffer_flush_gfx_state)(cmd_buffer);
295 
296    if (cmd_buffer->state.conditional_render_enabled)
297       genX(cmd_emit_conditional_render_predicate)(cmd_buffer);
298 
299    const uint32_t draw_cmd_stride =
300       genX(cmd_buffer_get_generated_draw_stride)(cmd_buffer);
301 
302    struct anv_gen_indirect_params *last_params = NULL;
303    uint32_t item_base = 0;
304    while (item_base < max_draw_count) {
305       const uint32_t item_count = MIN2(max_draw_count - item_base,
306                                        MAX_GENERATED_DRAW_COUNT);
307       const uint32_t draw_cmd_size = item_count * draw_cmd_stride;
308 
309       /* Ensure we have enough contiguous space for all the draws so that the
310        * compute shader can edit all the 3DPRIMITIVEs from a single base
311        * address.
312        *
313        * TODO: we might have to split that if the amount of space is to large (at
314        *       1Mb?).
315        */
316       VkResult result = anv_batch_emit_ensure_space(&cmd_buffer->batch,
317                                                     draw_cmd_size);
318       if (result != VK_SUCCESS)
319          return;
320 
321       struct anv_state params_state =
322          genX(cmd_buffer_emit_generate_draws)(
323             cmd_buffer,
324             &cmd_buffer->generation.shader_state,
325             anv_batch_current_address(&cmd_buffer->batch),
326             draw_cmd_stride,
327             indirect_data_addr,
328             indirect_data_stride,
329             anv_address_add(draw_id_addr, 4 * item_base),
330             item_base,
331             item_count,
332             count_addr,
333             max_draw_count,
334             indexed,
335             0 /* ring_count */);
336       struct anv_gen_indirect_params *params = params_state.map;
337       if (params == NULL)
338          return;
339 
340       anv_batch_advance(&cmd_buffer->batch, draw_cmd_size);
341 
342       item_base += item_count;
343 
344       params->prev = last_params;
345       last_params = params;
346    }
347 
348    genX(cmd_buffer_rewrite_forward_end_addr)(cmd_buffer, last_params);
349 
350 #if GFX_VER == 9
351    update_dirty_vbs_for_gfx8_vb_flush(cmd_buffer, indexed ? RANDOM : SEQUENTIAL);
352 #endif
353 }
354 
355 static void
genX(cmd_buffer_emit_indirect_generated_draws_inring)356 genX(cmd_buffer_emit_indirect_generated_draws_inring)(struct anv_cmd_buffer *cmd_buffer,
357                                                       struct anv_address indirect_data_addr,
358                                                       uint32_t indirect_data_stride,
359                                                       struct anv_address count_addr,
360                                                       uint32_t max_draw_count,
361                                                       bool indexed)
362 {
363    struct anv_device *device = cmd_buffer->device;
364 
365    genX(flush_pipeline_select_3d)(cmd_buffer);
366 
367    const uint32_t draw_cmd_stride =
368       genX(cmd_buffer_get_generated_draw_stride)(cmd_buffer);
369 
370    if (cmd_buffer->generation.ring_bo == NULL) {
371       const uint32_t bo_size = align(
372 #if GFX_VER >= 12
373          GENX(MI_ARB_CHECK_length) * 4 +
374 #endif
375          draw_cmd_stride * MAX_RING_BO_ITEMS +
376 #if GFX_VER == 9
377          4 * MAX_RING_BO_ITEMS +
378 #endif
379          GENX(MI_BATCH_BUFFER_START_length) * 4,
380          4096);
381       VkResult result = anv_bo_pool_alloc(&device->batch_bo_pool, bo_size,
382                                           &cmd_buffer->generation.ring_bo);
383       if (result != VK_SUCCESS) {
384          anv_batch_set_error(&cmd_buffer->batch, result);
385          return;
386       }
387    }
388 
389    /* How many items will be generated by each iteration of the generation
390     * shader dispatch.
391     */
392    const uint32_t ring_count = MIN2(MAX_RING_BO_ITEMS, max_draw_count);
393 
394    /* The ring bo has the following layout:
395     *
396     *   --------------------------------------------------
397     *   | MI_ARB_CHECK to resume CS prefetch (Gfx12+)    |
398     *   |------------------------------------------------|
399     *   |            ring_count * 3DPRIMITIVE            |
400     *   |------------------------------------------------|
401     *   | jump instruction (either back to generate more |
402     *   | commands or to the next set of commands)       |
403     *   |------------------------------------------------|
404     *   |          draw ids (only used on Gfx9)          |
405     *   --------------------------------------------------
406     */
407 
408    struct anv_address draw_id_addr = (struct anv_address) {
409       .bo     = cmd_buffer->generation.ring_bo,
410       .offset = ring_count * draw_cmd_stride +
411                 GENX(MI_BATCH_BUFFER_START_length) * 4,
412    };
413 
414    struct anv_address draw_cmds_addr = (struct anv_address) {
415       .bo = cmd_buffer->generation.ring_bo,
416 #if GFX_VER >= 12
417       .offset = GENX(MI_ARB_CHECK_length) * 4,
418 #endif
419    };
420 
421 #if GFX_VER >= 12
422    struct GENX(MI_ARB_CHECK) resume_prefetch = {
423       .PreParserDisableMask = true,
424       .PreParserDisable = false,
425    };
426    GENX(MI_ARB_CHECK_pack)(NULL, cmd_buffer->generation.ring_bo->map,
427                            &resume_prefetch);
428 #endif
429 
430 #if GFX_VER == 9
431    /* Mark the VB-0 as using the entire ring_bo, but only for the draw call
432     * starting the generation batch. All the following ones will use the same
433     * area.
434     */
435    genX(cmd_buffer_set_binding_for_gfx8_vb_flush)(
436       cmd_buffer, 0,
437       (struct anv_address) {
438          .bo = cmd_buffer->generation.ring_bo,
439       },
440       cmd_buffer->generation.ring_bo->size);
441 
442    struct anv_graphics_pipeline *pipeline =
443       anv_pipeline_to_graphics(cmd_buffer->state.gfx.base.pipeline);
444    const struct brw_vs_prog_data *vs_prog_data = get_vs_prog_data(pipeline);
445 
446    if (vs_prog_data->uses_baseinstance ||
447        vs_prog_data->uses_firstvertex) {
448       /* We're using the indirect buffer directly to source base instance &
449        * first vertex values. Mark the entire area as used.
450        */
451       genX(cmd_buffer_set_binding_for_gfx8_vb_flush)(cmd_buffer, ANV_SVGS_VB_INDEX,
452                                                      indirect_data_addr,
453                                                      indirect_data_stride * max_draw_count);
454    }
455 
456    if (vs_prog_data->uses_drawid) {
457       /* Mark the whole draw id buffer as used. */
458       genX(cmd_buffer_set_binding_for_gfx8_vb_flush)(cmd_buffer, ANV_SVGS_VB_INDEX,
459                                                      draw_id_addr,
460                                                      sizeof(uint32_t) * max_draw_count);
461    }
462 #endif
463 
464    /* Apply the pipeline flush here so the indirect data is available for the
465     * generation shader.
466     */
467    genX(cmd_buffer_apply_pipe_flushes)(cmd_buffer);
468 
469    trace_intel_begin_generate_draws(&cmd_buffer->trace);
470 
471    /***
472     * This is where the command buffer below will jump back to if we need to
473     * generate more draws.
474     */
475    struct anv_address gen_addr = anv_batch_current_address(&cmd_buffer->batch);
476 
477    struct anv_shader_bin *gen_kernel;
478    VkResult ret =
479       anv_device_get_internal_shader(
480          cmd_buffer->device,
481          ANV_INTERNAL_KERNEL_GENERATED_DRAWS,
482          &gen_kernel);
483    if (ret != VK_SUCCESS) {
484       anv_batch_set_error(&cmd_buffer->batch, ret);
485       return;
486    }
487 
488    struct anv_simple_shader simple_state = (struct anv_simple_shader) {
489       .device               = device,
490       .cmd_buffer           = cmd_buffer,
491       .dynamic_state_stream = &cmd_buffer->dynamic_state_stream,
492       .general_state_stream = &cmd_buffer->general_state_stream,
493       .batch                = &cmd_buffer->batch,
494       .kernel               = gen_kernel,
495       .l3_config            = device->internal_kernels_l3_config,
496       .urb_cfg              = &cmd_buffer->state.gfx.urb_cfg,
497    };
498    genX(emit_simple_shader_init)(&simple_state);
499 
500    struct anv_state params_state =
501       genX(cmd_buffer_emit_generate_draws)(
502          cmd_buffer,
503          &simple_state,
504          draw_cmds_addr,
505          draw_cmd_stride,
506          indirect_data_addr,
507          indirect_data_stride,
508          draw_id_addr,
509          0 /* item_base */,
510          MIN2(MAX_RING_BO_ITEMS, max_draw_count) /* item_count */,
511          count_addr,
512          max_draw_count,
513          indexed,
514          ring_count);
515    struct anv_gen_indirect_params *params = params_state.map;
516 
517    anv_add_pending_pipe_bits(cmd_buffer,
518 #if GFX_VER == 9
519                              ANV_PIPE_VF_CACHE_INVALIDATE_BIT |
520 #endif
521                              ANV_PIPE_DATA_CACHE_FLUSH_BIT |
522                              ANV_PIPE_CS_STALL_BIT,
523                              "after generation flush");
524 
525    trace_intel_end_generate_draws(&cmd_buffer->trace);
526 
527    /* Emit the 3D state in the main batch. */
528    genX(cmd_buffer_flush_gfx_state)(cmd_buffer);
529 
530    if (cmd_buffer->state.conditional_render_enabled)
531       genX(cmd_emit_conditional_render_predicate)(cmd_buffer);
532 
533    if (max_draw_count > 0) {
534 #if GFX_VER >= 12
535       /* Prior to Gfx12 we cannot disable the CS prefetch but it doesn't matter
536        * as the prefetch shouldn't follow the MI_BATCH_BUFFER_START.
537        */
538       anv_batch_emit(&cmd_buffer->batch, GENX(MI_ARB_CHECK), arb) {
539          arb.PreParserDisableMask = true;
540          arb.PreParserDisable = true;
541       }
542 #endif
543 
544       /* Jump into the ring buffer. */
545       anv_batch_emit(&cmd_buffer->batch, GENX(MI_BATCH_BUFFER_START), bbs) {
546          bbs.AddressSpaceIndicator = ASI_PPGTT;
547          bbs.BatchBufferStartAddress = (struct anv_address) {
548             .bo = cmd_buffer->generation.ring_bo,
549          };
550       }
551 
552       /***
553        * This is the location at which the ring buffer jumps to if it needs to
554        * generate more draw calls. We do the following :
555        *    - wait for draws in the ring buffer to complete (cs stall) so we're
556        *      sure the push constant data we're about to edit is not read anymore
557        *    - increment the base draw number by the number of draws
558        *      executed in the ring
559        *    - invalidate the constant cache since the
560        *      anv_generated_indirect_params::draw::draw_base is updated
561        *    - jump back to the generation shader
562        */
563       struct anv_address inc_addr =
564          anv_batch_current_address(&cmd_buffer->batch);
565 
566       anv_add_pending_pipe_bits(cmd_buffer,
567                                 ANV_PIPE_STALL_AT_SCOREBOARD_BIT |
568                                 ANV_PIPE_CS_STALL_BIT,
569                                 "after generated draws batch");
570       genX(cmd_buffer_apply_pipe_flushes)(cmd_buffer);
571 
572       struct mi_builder b;
573       mi_builder_init(&b, cmd_buffer->device->info, &cmd_buffer->batch);
574 
575       struct anv_address draw_base_addr = anv_address_add(
576          genX(simple_shader_push_state_address)(
577             &simple_state, params_state),
578          offsetof(struct anv_gen_indirect_params, draw_base));
579 
580       const uint32_t mocs = anv_mocs_for_address(cmd_buffer->device,
581                                                  &draw_base_addr);
582       mi_builder_set_mocs(&b, mocs);
583       mi_builder_set_write_check(&b, true);
584 
585       mi_store(&b, mi_mem32(draw_base_addr),
586                    mi_iadd(&b, mi_mem32(draw_base_addr),
587                                mi_imm(ring_count)));
588 
589       /* Make sure the MI writes are globally observable */
590       mi_ensure_write_fence(&b);
591 
592       anv_add_pending_pipe_bits(cmd_buffer,
593                                 ANV_PIPE_CONSTANT_CACHE_INVALIDATE_BIT,
594                                 "after generated draws batch increment");
595       genX(cmd_buffer_apply_pipe_flushes)(cmd_buffer);
596 
597       anv_batch_emit(&cmd_buffer->batch, GENX(MI_BATCH_BUFFER_START), bbs) {
598          bbs.AddressSpaceIndicator = ASI_PPGTT;
599          bbs.BatchBufferStartAddress = gen_addr;
600       }
601 
602       /***
603        * This is the location at which the ring buffer jump to once all the draw
604        * calls have executed.
605        */
606       struct anv_address end_addr = anv_batch_current_address(&cmd_buffer->batch);
607 
608       /* Reset the draw_base field in case we ever replay the command buffer. */
609       mi_store(&b, mi_mem32(draw_base_addr), mi_imm(0));
610 
611       /* Make sure the MI writes are globally observable */
612       mi_ensure_write_fence(&b);
613 
614       anv_add_pending_pipe_bits(cmd_buffer,
615                                 ANV_PIPE_CONSTANT_CACHE_INVALIDATE_BIT,
616                                 "after generated draws end");
617 
618       params->gen_addr = anv_address_physical(inc_addr);
619       params->end_addr = anv_address_physical(end_addr);
620    }
621 }
622 
623 static void
genX(cmd_buffer_emit_indirect_generated_draws)624 genX(cmd_buffer_emit_indirect_generated_draws)(struct anv_cmd_buffer *cmd_buffer,
625                                                struct anv_address indirect_data_addr,
626                                                uint32_t indirect_data_stride,
627                                                struct anv_address count_addr,
628                                                uint32_t max_draw_count,
629                                                bool indexed)
630 {
631    /* In order to have the vertex fetch gather the data we need to have a non
632     * 0 stride. It's possible to have a 0 stride given by the application when
633     * draw_count is 1, but we need a correct value for the
634     * VERTEX_BUFFER_STATE::BufferPitch, so ensure the caller set this
635     * correctly :
636     *
637     * Vulkan spec, vkCmdDrawIndirect:
638     *
639     *   "If drawCount is less than or equal to one, stride is ignored."
640     */
641    assert(indirect_data_stride > 0);
642 
643    const bool use_ring_buffer = max_draw_count >=
644       cmd_buffer->device->physical->instance->generated_indirect_ring_threshold;
645    if (use_ring_buffer) {
646       genX(cmd_buffer_emit_indirect_generated_draws_inring)(cmd_buffer,
647                                                             indirect_data_addr,
648                                                             indirect_data_stride,
649                                                             count_addr,
650                                                             max_draw_count,
651                                                             indexed);
652    } else {
653       genX(cmd_buffer_emit_indirect_generated_draws_inplace)(cmd_buffer,
654                                                              indirect_data_addr,
655                                                              indirect_data_stride,
656                                                              count_addr,
657                                                              max_draw_count,
658                                                              indexed);
659    }
660 }
661 
662 #endif /* GENX_CMD_DRAW_GENERATED_INDIRECT_H */
663