1*61046927SAndroid Build Coastguard WorkerANV 2*61046927SAndroid Build Coastguard Worker=== 3*61046927SAndroid Build Coastguard Worker 4*61046927SAndroid Build Coastguard WorkerExperimental features 5*61046927SAndroid Build Coastguard Worker--------------------- 6*61046927SAndroid Build Coastguard Worker 7*61046927SAndroid Build Coastguard Worker.. _`Bindless model`: 8*61046927SAndroid Build Coastguard Worker 9*61046927SAndroid Build Coastguard WorkerBinding Model 10*61046927SAndroid Build Coastguard Worker------------- 11*61046927SAndroid Build Coastguard Worker 12*61046927SAndroid Build Coastguard WorkerHere is the ANV bindless binding model that was implemented for the 13*61046927SAndroid Build Coastguard Workerdescriptor indexing feature of Vulkan 1.2 : 14*61046927SAndroid Build Coastguard Worker 15*61046927SAndroid Build Coastguard Worker.. graphviz:: 16*61046927SAndroid Build Coastguard Worker 17*61046927SAndroid Build Coastguard Worker digraph G { 18*61046927SAndroid Build Coastguard Worker fontcolor="black"; 19*61046927SAndroid Build Coastguard Worker compound=true; 20*61046927SAndroid Build Coastguard Worker 21*61046927SAndroid Build Coastguard Worker subgraph cluster_1 { 22*61046927SAndroid Build Coastguard Worker label = "Binding Table (HW)"; 23*61046927SAndroid Build Coastguard Worker 24*61046927SAndroid Build Coastguard Worker bgcolor="cornflowerblue"; 25*61046927SAndroid Build Coastguard Worker 26*61046927SAndroid Build Coastguard Worker node [ style=filled,shape="record",fillcolor="white", 27*61046927SAndroid Build Coastguard Worker label="RT0" ] n0; 28*61046927SAndroid Build Coastguard Worker node [ label="RT1" ] n1; 29*61046927SAndroid Build Coastguard Worker node [ label="dynbuf0"] n2; 30*61046927SAndroid Build Coastguard Worker node [ label="set0" ] n3; 31*61046927SAndroid Build Coastguard Worker node [ label="set1" ] n4; 32*61046927SAndroid Build Coastguard Worker node [ label="set2" ] n5; 33*61046927SAndroid Build Coastguard Worker 34*61046927SAndroid Build Coastguard Worker n0 -> n1 -> n2 -> n3 -> n4 -> n5 [style=invis]; 35*61046927SAndroid Build Coastguard Worker } 36*61046927SAndroid Build Coastguard Worker subgraph cluster_2 { 37*61046927SAndroid Build Coastguard Worker label = "Descriptor Set 0"; 38*61046927SAndroid Build Coastguard Worker 39*61046927SAndroid Build Coastguard Worker bgcolor="burlywood3"; 40*61046927SAndroid Build Coastguard Worker fixedsize = true; 41*61046927SAndroid Build Coastguard Worker 42*61046927SAndroid Build Coastguard Worker node [ style=filled,shape="record",fillcolor="white", fixedsize = true, width=4, 43*61046927SAndroid Build Coastguard Worker label="binding 0 - STORAGE_IMAGE\n anv_storage_image_descriptor" ] n8; 44*61046927SAndroid Build Coastguard Worker node [ label="binding 1 - COMBINED_IMAGE_SAMPLER\n anv_sampled_image_descriptor" ] n9; 45*61046927SAndroid Build Coastguard Worker node [ label="binding 2 - UNIFORM_BUFFER\n anv_address_range_descriptor" ] n10; 46*61046927SAndroid Build Coastguard Worker node [ label="binding 3 - UNIFORM_TEXEL_BUFFER\n anv_storage_image_descriptor" ] n11; 47*61046927SAndroid Build Coastguard Worker 48*61046927SAndroid Build Coastguard Worker n8 -> n9 -> n10 -> n11 [style=invis]; 49*61046927SAndroid Build Coastguard Worker } 50*61046927SAndroid Build Coastguard Worker subgraph cluster_5 { 51*61046927SAndroid Build Coastguard Worker label = "Vulkan Objects" 52*61046927SAndroid Build Coastguard Worker 53*61046927SAndroid Build Coastguard Worker fontcolor="black"; 54*61046927SAndroid Build Coastguard Worker bgcolor="darkolivegreen4"; 55*61046927SAndroid Build Coastguard Worker 56*61046927SAndroid Build Coastguard Worker subgraph cluster_6 { 57*61046927SAndroid Build Coastguard Worker label = "VkImageView"; 58*61046927SAndroid Build Coastguard Worker 59*61046927SAndroid Build Coastguard Worker bgcolor=darkolivegreen3; 60*61046927SAndroid Build Coastguard Worker node [ style=filled,shape="box",fillcolor="white", fixedsize = true, width=2, 61*61046927SAndroid Build Coastguard Worker label="surface_state" ] n12; 62*61046927SAndroid Build Coastguard Worker } 63*61046927SAndroid Build Coastguard Worker subgraph cluster_7 { 64*61046927SAndroid Build Coastguard Worker label = "VkSampler"; 65*61046927SAndroid Build Coastguard Worker 66*61046927SAndroid Build Coastguard Worker bgcolor=darkolivegreen3; 67*61046927SAndroid Build Coastguard Worker node [ style=filled,shape="box",fillcolor="white", fixedsize = true, width=2, 68*61046927SAndroid Build Coastguard Worker label="sample_state" ] n13; 69*61046927SAndroid Build Coastguard Worker } 70*61046927SAndroid Build Coastguard Worker subgraph cluster_8 { 71*61046927SAndroid Build Coastguard Worker label = "VkImageView"; 72*61046927SAndroid Build Coastguard Worker bgcolor="darkolivegreen3"; 73*61046927SAndroid Build Coastguard Worker 74*61046927SAndroid Build Coastguard Worker node [ style=filled,shape="box",fillcolor="white", fixedsize = true, width=2, 75*61046927SAndroid Build Coastguard Worker label="surface_state" ] n14; 76*61046927SAndroid Build Coastguard Worker } 77*61046927SAndroid Build Coastguard Worker subgraph cluster_9 { 78*61046927SAndroid Build Coastguard Worker label = "VkBuffer"; 79*61046927SAndroid Build Coastguard Worker bgcolor=darkolivegreen3; 80*61046927SAndroid Build Coastguard Worker 81*61046927SAndroid Build Coastguard Worker node [ style=filled,shape="box",fillcolor="white", fixedsize = true, width=2, 82*61046927SAndroid Build Coastguard Worker label="address" ] n15; 83*61046927SAndroid Build Coastguard Worker } 84*61046927SAndroid Build Coastguard Worker subgraph cluster_10 { 85*61046927SAndroid Build Coastguard Worker label = "VkBufferView"; 86*61046927SAndroid Build Coastguard Worker 87*61046927SAndroid Build Coastguard Worker bgcolor=darkolivegreen3; 88*61046927SAndroid Build Coastguard Worker node [ style=filled,shape="box",fillcolor="white", fixedsize = true, width=2, 89*61046927SAndroid Build Coastguard Worker label="surface_state" ] n16; 90*61046927SAndroid Build Coastguard Worker } 91*61046927SAndroid Build Coastguard Worker 92*61046927SAndroid Build Coastguard Worker n12 -> n13 -> n14 -> n15 -> n16 [style=invis]; 93*61046927SAndroid Build Coastguard Worker } 94*61046927SAndroid Build Coastguard Worker 95*61046927SAndroid Build Coastguard Worker subgraph cluster_11 { 96*61046927SAndroid Build Coastguard Worker subgraph cluster_12 { 97*61046927SAndroid Build Coastguard Worker label = "CommandBuffer state stream"; 98*61046927SAndroid Build Coastguard Worker 99*61046927SAndroid Build Coastguard Worker bgcolor="gold3"; 100*61046927SAndroid Build Coastguard Worker node [ style=filled,shape="box",fillcolor="white", fixedsize = true, width=2, 101*61046927SAndroid Build Coastguard Worker label="surface_state" ] n17; 102*61046927SAndroid Build Coastguard Worker node [ label="surface_state" ] n18; 103*61046927SAndroid Build Coastguard Worker node [ label="surface_state" ] n19; 104*61046927SAndroid Build Coastguard Worker 105*61046927SAndroid Build Coastguard Worker n17 -> n18 -> n19 [style=invis]; 106*61046927SAndroid Build Coastguard Worker } 107*61046927SAndroid Build Coastguard Worker } 108*61046927SAndroid Build Coastguard Worker 109*61046927SAndroid Build Coastguard Worker n3 -> n8 [lhead=cluster_2]; 110*61046927SAndroid Build Coastguard Worker 111*61046927SAndroid Build Coastguard Worker n8 -> n12; 112*61046927SAndroid Build Coastguard Worker n9 -> n13; 113*61046927SAndroid Build Coastguard Worker n9 -> n14; 114*61046927SAndroid Build Coastguard Worker n10 -> n15; 115*61046927SAndroid Build Coastguard Worker n11 -> n16; 116*61046927SAndroid Build Coastguard Worker 117*61046927SAndroid Build Coastguard Worker n0 -> n17; 118*61046927SAndroid Build Coastguard Worker n1 -> n18; 119*61046927SAndroid Build Coastguard Worker n2 -> n19; 120*61046927SAndroid Build Coastguard Worker } 121*61046927SAndroid Build Coastguard Worker 122*61046927SAndroid Build Coastguard Worker 123*61046927SAndroid Build Coastguard Worker 124*61046927SAndroid Build Coastguard WorkerThe HW binding table is generated when the draw or dispatch commands 125*61046927SAndroid Build Coastguard Workerare emitted. Here are the types of entries one can find in the binding 126*61046927SAndroid Build Coastguard Workertable : 127*61046927SAndroid Build Coastguard Worker 128*61046927SAndroid Build Coastguard Worker- The currently bound descriptor sets, one entry per descriptor set 129*61046927SAndroid Build Coastguard Worker (our limit is 8). 130*61046927SAndroid Build Coastguard Worker 131*61046927SAndroid Build Coastguard Worker- For dynamic buffers, one entry per dynamic buffer. 132*61046927SAndroid Build Coastguard Worker 133*61046927SAndroid Build Coastguard Worker- For draw commands, render target entries if needed. 134*61046927SAndroid Build Coastguard Worker 135*61046927SAndroid Build Coastguard WorkerThe entries of the HW binding table for descriptor sets are 136*61046927SAndroid Build Coastguard WorkerRENDER_SURFACE_STATE similar to what you would have for a normal 137*61046927SAndroid Build Coastguard Workeruniform buffer. The shader will emit reads this buffer first to get 138*61046927SAndroid Build Coastguard Workerthe information it needs to access a surface/sampler/etc... and then 139*61046927SAndroid Build Coastguard Workeremits the appropriate message using the information gathered from the 140*61046927SAndroid Build Coastguard Workerdescriptor set buffer. 141*61046927SAndroid Build Coastguard Worker 142*61046927SAndroid Build Coastguard WorkerEach binding type entry gets an associated structure in memory 143*61046927SAndroid Build Coastguard Worker(``anv_storage_image_descriptor``, ``anv_sampled_image_descriptor``, 144*61046927SAndroid Build Coastguard Worker``anv_address_range_descriptor``, ``anv_storage_image_descriptor``). 145*61046927SAndroid Build Coastguard WorkerThis is the information read by the shader. 146*61046927SAndroid Build Coastguard Worker 147*61046927SAndroid Build Coastguard Worker 148*61046927SAndroid Build Coastguard Worker.. _`Binding tables`: 149*61046927SAndroid Build Coastguard Worker 150*61046927SAndroid Build Coastguard WorkerBinding Tables 151*61046927SAndroid Build Coastguard Worker-------------- 152*61046927SAndroid Build Coastguard Worker 153*61046927SAndroid Build Coastguard WorkerBinding tables are arrays of 32bit offset entries referencing surface 154*61046927SAndroid Build Coastguard Workerstates. This is how shaders can refer to binding table entry to read 155*61046927SAndroid Build Coastguard Workeror write a surface. For example fragment shaders will often refer to 156*61046927SAndroid Build Coastguard Workerentry 0 as the first render target. 157*61046927SAndroid Build Coastguard Worker 158*61046927SAndroid Build Coastguard WorkerThe way binding tables are managed is fairly awkward. 159*61046927SAndroid Build Coastguard Worker 160*61046927SAndroid Build Coastguard WorkerEach shader stage must have its binding table programmed through 161*61046927SAndroid Build Coastguard Workera corresponding instruction 162*61046927SAndroid Build Coastguard Worker``3DSTATE_BINDING_TABLE_POINTERS_*`` (each stage has its own). 163*61046927SAndroid Build Coastguard Worker 164*61046927SAndroid Build Coastguard Worker.. graphviz:: 165*61046927SAndroid Build Coastguard Worker 166*61046927SAndroid Build Coastguard Worker digraph structs { 167*61046927SAndroid Build Coastguard Worker node [shape=record]; 168*61046927SAndroid Build Coastguard Worker struct3 [label="{ binding tables\n area | { <bt4> BT4 | <bt3> BT3 | ... | <bt0> BT0 } }|{ surface state\n area |{<ss0> ss0|<ss1> ss1|<ss2> ss2|...}}"]; 169*61046927SAndroid Build Coastguard Worker struct3:bt0 -> struct3:ss0; 170*61046927SAndroid Build Coastguard Worker struct3:bt0 -> struct3:ss1; 171*61046927SAndroid Build Coastguard Worker } 172*61046927SAndroid Build Coastguard Worker 173*61046927SAndroid Build Coastguard Worker 174*61046927SAndroid Build Coastguard WorkerThe value programmed in the ``3DSTATE_BINDING_TABLE_POINTERS_*`` 175*61046927SAndroid Build Coastguard Workerinstructions is not a 64bit pointer but an offset from the address 176*61046927SAndroid Build Coastguard Workerprogrammed in ``STATE_BASE_ADDRESS::Surface State Base Address`` or 177*61046927SAndroid Build Coastguard Worker``3DSTATE_BINDING_TABLE_POOL_ALLOC::Binding Table Pool Base Address`` 178*61046927SAndroid Build Coastguard Worker(available on Gfx11+). The offset value in 179*61046927SAndroid Build Coastguard Worker``3DSTATE_BINDING_TABLE_POINTERS_*`` is also limited to a few bits 180*61046927SAndroid Build Coastguard Worker(not a full 32bit value), meaning that as we use more and more binding 181*61046927SAndroid Build Coastguard Workertables we need to reposition ``STATE_BASE_ADDRESS::Surface State Base 182*61046927SAndroid Build Coastguard WorkerAddress`` to make space for new binding table arrays. 183*61046927SAndroid Build Coastguard Worker 184*61046927SAndroid Build Coastguard WorkerTo make things even more awkward, the binding table entries are also 185*61046927SAndroid Build Coastguard Workerrelative to ``STATE_BASE_ADDRESS::Surface State Base Address`` so as 186*61046927SAndroid Build Coastguard Workerwe change ``STATE_BASE_ADDRESS::Surface State Base Address`` we need 187*61046927SAndroid Build Coastguard Workeradd that offsets to the binding table entries. 188*61046927SAndroid Build Coastguard Worker 189*61046927SAndroid Build Coastguard WorkerThe way with deal with this is that we allocate 4Gb of address space 190*61046927SAndroid Build Coastguard Worker(since the binding table entries can address 4Gb of surface state 191*61046927SAndroid Build Coastguard Workerelements). We reserve the first gigabyte exclusively to binding 192*61046927SAndroid Build Coastguard Workertables, so that anywhere we position our binding table in that first 193*61046927SAndroid Build Coastguard Workergigabyte, it can always refer to the surface states in the next 3Gb. 194*61046927SAndroid Build Coastguard Worker 195*61046927SAndroid Build Coastguard Worker 196*61046927SAndroid Build Coastguard Worker.. _`Descriptor Set Memory Layout`: 197*61046927SAndroid Build Coastguard Worker 198*61046927SAndroid Build Coastguard WorkerDescriptor Set Memory Layout 199*61046927SAndroid Build Coastguard Worker---------------------------- 200*61046927SAndroid Build Coastguard Worker 201*61046927SAndroid Build Coastguard WorkerHere is a representation of how the descriptor set bindings, with each 202*61046927SAndroid Build Coastguard Workerelements in each binding is mapped to a the descriptor set memory : 203*61046927SAndroid Build Coastguard Worker 204*61046927SAndroid Build Coastguard Worker.. graphviz:: 205*61046927SAndroid Build Coastguard Worker 206*61046927SAndroid Build Coastguard Worker digraph structs { 207*61046927SAndroid Build Coastguard Worker node [shape=record]; 208*61046927SAndroid Build Coastguard Worker rankdir=LR; 209*61046927SAndroid Build Coastguard Worker 210*61046927SAndroid Build Coastguard Worker struct1 [label="Descriptor Set | \ 211*61046927SAndroid Build Coastguard Worker <b0> binding 0\n STORAGE_IMAGE \n (array_length=3) | \ 212*61046927SAndroid Build Coastguard Worker <b1> binding 1\n COMBINED_IMAGE_SAMPLER \n (array_length=2) | \ 213*61046927SAndroid Build Coastguard Worker <b2> binding 2\n UNIFORM_BUFFER \n (array_length=1) | \ 214*61046927SAndroid Build Coastguard Worker <b3> binding 3\n UNIFORM_TEXEL_BUFFER \n (array_length=1)"]; 215*61046927SAndroid Build Coastguard Worker struct2 [label="Descriptor Set Memory | \ 216*61046927SAndroid Build Coastguard Worker <b0e0> anv_storage_image_descriptor|\ 217*61046927SAndroid Build Coastguard Worker <b0e1> anv_storage_image_descriptor|\ 218*61046927SAndroid Build Coastguard Worker <b0e2> anv_storage_image_descriptor|\ 219*61046927SAndroid Build Coastguard Worker <b1e0> anv_sampled_image_descriptor|\ 220*61046927SAndroid Build Coastguard Worker <b1e1> anv_sampled_image_descriptor|\ 221*61046927SAndroid Build Coastguard Worker <b2e0> anv_address_range_descriptor|\ 222*61046927SAndroid Build Coastguard Worker <b3e0> anv_storage_image_descriptor"]; 223*61046927SAndroid Build Coastguard Worker 224*61046927SAndroid Build Coastguard Worker struct1:b0 -> struct2:b0e0; 225*61046927SAndroid Build Coastguard Worker struct1:b0 -> struct2:b0e1; 226*61046927SAndroid Build Coastguard Worker struct1:b0 -> struct2:b0e2; 227*61046927SAndroid Build Coastguard Worker struct1:b1 -> struct2:b1e0; 228*61046927SAndroid Build Coastguard Worker struct1:b1 -> struct2:b1e1; 229*61046927SAndroid Build Coastguard Worker struct1:b2 -> struct2:b2e0; 230*61046927SAndroid Build Coastguard Worker struct1:b3 -> struct2:b3e0; 231*61046927SAndroid Build Coastguard Worker } 232*61046927SAndroid Build Coastguard Worker 233*61046927SAndroid Build Coastguard WorkerEach Binding in the descriptor set is allocated an array of 234*61046927SAndroid Build Coastguard Worker``anv_*_descriptor`` data structure. The type of ``anv_*_descriptor`` 235*61046927SAndroid Build Coastguard Workerused for a binding is selected based on the ``VkDescriptorType`` of 236*61046927SAndroid Build Coastguard Workerthe bindings. 237*61046927SAndroid Build Coastguard Worker 238*61046927SAndroid Build Coastguard WorkerThe value of ``anv_descriptor_set_binding_layout::descriptor_offset`` 239*61046927SAndroid Build Coastguard Workeris a byte offset from the descriptor set memory to the associated 240*61046927SAndroid Build Coastguard Workerbinding. ``anv_descriptor_set_binding_layout::array_size`` is the 241*61046927SAndroid Build Coastguard Workernumber of ``anv_*_descriptor`` elements in the descriptor set memory 242*61046927SAndroid Build Coastguard Workerfrom that offset for the binding. 243*61046927SAndroid Build Coastguard Worker 244*61046927SAndroid Build Coastguard Worker 245*61046927SAndroid Build Coastguard WorkerPipeline state emission 246*61046927SAndroid Build Coastguard Worker----------------------- 247*61046927SAndroid Build Coastguard Worker 248*61046927SAndroid Build Coastguard WorkerVulkan initially started by baking as much state as possible in 249*61046927SAndroid Build Coastguard Workerpipelines. But extension after extension, more and more state has 250*61046927SAndroid Build Coastguard Workerbecome potentially dynamic. 251*61046927SAndroid Build Coastguard Worker 252*61046927SAndroid Build Coastguard WorkerANV tries to limit the amount of time an instruction has to be packed 253*61046927SAndroid Build Coastguard Workerto reprogram part of the 3D pipeline state. The packing is happening 254*61046927SAndroid Build Coastguard Workerin 2 places : 255*61046927SAndroid Build Coastguard Worker 256*61046927SAndroid Build Coastguard Worker- ``genX_pipeline.c`` where the non dynamic state is emitted in the 257*61046927SAndroid Build Coastguard Worker pipeline batch. Chunks of the batches are copied into the command 258*61046927SAndroid Build Coastguard Worker buffer as a result of calling ``vkCmdBindPipeline()``, depending on 259*61046927SAndroid Build Coastguard Worker what changes from the previously bound graphics pipeline 260*61046927SAndroid Build Coastguard Worker 261*61046927SAndroid Build Coastguard Worker- ``genX_gfx_state.c`` where the dynamic state is added to already 262*61046927SAndroid Build Coastguard Worker packed instructions from ``genX_pipeline.c`` 263*61046927SAndroid Build Coastguard Worker 264*61046927SAndroid Build Coastguard WorkerThe rule to know where to emit an instruction programming the 3D 265*61046927SAndroid Build Coastguard Workerpipeline is as follow : 266*61046927SAndroid Build Coastguard Worker 267*61046927SAndroid Build Coastguard Worker- If any field of the instruction can be made dynamic, it should be 268*61046927SAndroid Build Coastguard Worker emitted in ``genX_gfx_state.c`` 269*61046927SAndroid Build Coastguard Worker 270*61046927SAndroid Build Coastguard Worker- Otherwise, the instruction can be emitted in ``genX_pipeline.c`` 271*61046927SAndroid Build Coastguard Worker 272*61046927SAndroid Build Coastguard WorkerWhen a piece of state programming is dynamic, it should have a 273*61046927SAndroid Build Coastguard Workercorresponding field in ``anv_gfx_dynamic_state`` and the 274*61046927SAndroid Build Coastguard Worker``genX(cmd_buffer_flush_gfx_runtime_state)`` function should be 275*61046927SAndroid Build Coastguard Workerupdated to ensure we minimize the amount of time an instruction should 276*61046927SAndroid Build Coastguard Workerbe emitted. Each instruction should have a associated 277*61046927SAndroid Build Coastguard Worker``ANV_GFX_STATE_*`` mask so that the dynamic emission code can tell 278*61046927SAndroid Build Coastguard Workerwhen to re-emit an instruction. 279*61046927SAndroid Build Coastguard Worker 280*61046927SAndroid Build Coastguard Worker 281*61046927SAndroid Build Coastguard WorkerGenerated indirect draws optimization 282*61046927SAndroid Build Coastguard Worker------------------------------------- 283*61046927SAndroid Build Coastguard Worker 284*61046927SAndroid Build Coastguard WorkerIndirect draws have traditionally been implemented on Intel HW by 285*61046927SAndroid Build Coastguard Workerloading the indirect parameters from memory into HW registers using 286*61046927SAndroid Build Coastguard Workerthe command streamer's ``MI_LOAD_REGISTER_MEM`` instruction before 287*61046927SAndroid Build Coastguard Workerdispatching a draw call to the 3D pipeline. 288*61046927SAndroid Build Coastguard Worker 289*61046927SAndroid Build Coastguard WorkerOn recent products, it was found that the command streamer is showing 290*61046927SAndroid Build Coastguard Workeras performance bottleneck, because it cannot dispatch draw calls fast 291*61046927SAndroid Build Coastguard Workerenough to keep the 3D pipeline busy. 292*61046927SAndroid Build Coastguard Worker 293*61046927SAndroid Build Coastguard WorkerThe solution to this problem is to change the way we deal with 294*61046927SAndroid Build Coastguard Workerindirect draws. Instead of loading HW registers with values using the 295*61046927SAndroid Build Coastguard Workercommand streamer, we generate entire set of ``3DPRIMITIVE`` 296*61046927SAndroid Build Coastguard Workerinstructions using a shader. The generated instructions contain the 297*61046927SAndroid Build Coastguard Workerentire draw call parameters. This way the command streamer executes 298*61046927SAndroid Build Coastguard Workeronly ``3DPRIMITIVE`` instructions and doesn't do any data loading from 299*61046927SAndroid Build Coastguard Workermemory or touch HW registers, feeding the 3D pipeline as fast as it 300*61046927SAndroid Build Coastguard Workercan. 301*61046927SAndroid Build Coastguard Worker 302*61046927SAndroid Build Coastguard WorkerIn ANV this implemented in 2 different ways : 303*61046927SAndroid Build Coastguard Worker 304*61046927SAndroid Build Coastguard WorkerBy generating instructions directly into the command stream using a 305*61046927SAndroid Build Coastguard Workerside batch buffer. When ANV encounters the first indirect draws, it 306*61046927SAndroid Build Coastguard Workergenerates a jump into the side batch, the side batch contains a draw 307*61046927SAndroid Build Coastguard Workercall using a generation shader for each indirect draw. We keep adding 308*61046927SAndroid Build Coastguard Workeron more generation draws into the batch until we have to stop due to 309*61046927SAndroid Build Coastguard Workercommand buffer end, secondary command buffer calls or a barrier 310*61046927SAndroid Build Coastguard Workercontaining the access flag ``VK_ACCESS_INDIRECT_COMMAND_READ_BIT``. 311*61046927SAndroid Build Coastguard WorkerThe side batch buffer jump back right after the instruction where it 312*61046927SAndroid Build Coastguard Workerwas called. Here is a high level diagram showing how the generation 313*61046927SAndroid Build Coastguard Workerbatch buffer writes in the main command buffer : 314*61046927SAndroid Build Coastguard Worker 315*61046927SAndroid Build Coastguard Worker.. graphviz:: 316*61046927SAndroid Build Coastguard Worker 317*61046927SAndroid Build Coastguard Worker digraph commands_mode { 318*61046927SAndroid Build Coastguard Worker rankdir = "LR" 319*61046927SAndroid Build Coastguard Worker "main-command-buffer" [ 320*61046927SAndroid Build Coastguard Worker label = "main command buffer|...|draw indirect0 start|<f0>jump to\ngeneration batch|<f1>|<f2>empty instruction0|<f3>empty instruction1|...|draw indirect0 end|...|draw indirect1 start|<f4>empty instruction0|<f5>empty instruction1|...|<f6>draw indirect1 end|..." 321*61046927SAndroid Build Coastguard Worker shape = "record" 322*61046927SAndroid Build Coastguard Worker ]; 323*61046927SAndroid Build Coastguard Worker "generation-command-buffer" [ 324*61046927SAndroid Build Coastguard Worker label = "generation command buffer|<f0>|<f1>write draw indirect0|<f2>write draw indirect1|...|<f3>exit jump" 325*61046927SAndroid Build Coastguard Worker shape = "record" 326*61046927SAndroid Build Coastguard Worker ]; 327*61046927SAndroid Build Coastguard Worker "main-command-buffer":f0 -> "generation-command-buffer":f0; 328*61046927SAndroid Build Coastguard Worker "generation-command-buffer":f1 -> "main-command-buffer":f2 [color="#0000ff"]; 329*61046927SAndroid Build Coastguard Worker "generation-command-buffer":f1 -> "main-command-buffer":f3 [color="#0000ff"]; 330*61046927SAndroid Build Coastguard Worker "generation-command-buffer":f2 -> "main-command-buffer":f4 [color="#0000ff"]; 331*61046927SAndroid Build Coastguard Worker "generation-command-buffer":f2 -> "main-command-buffer":f5 [color="#0000ff"]; 332*61046927SAndroid Build Coastguard Worker "generation-command-buffer":f3 -> "main-command-buffer":f1; 333*61046927SAndroid Build Coastguard Worker } 334*61046927SAndroid Build Coastguard Worker 335*61046927SAndroid Build Coastguard WorkerBy generating instructions into a ring buffer of commands, when the 336*61046927SAndroid Build Coastguard Workerdraw count number is high. This solution allows smaller batches to be 337*61046927SAndroid Build Coastguard Workeremitted. Here is a high level diagram showing how things are 338*61046927SAndroid Build Coastguard Workerexecuted : 339*61046927SAndroid Build Coastguard Worker 340*61046927SAndroid Build Coastguard Worker.. graphviz:: 341*61046927SAndroid Build Coastguard Worker 342*61046927SAndroid Build Coastguard Worker digraph ring_mode { 343*61046927SAndroid Build Coastguard Worker rankdir=LR; 344*61046927SAndroid Build Coastguard Worker "main-command-buffer" [ 345*61046927SAndroid Build Coastguard Worker label = "main command buffer|...| draw indirect |<f1>generation shader|<f2> jump to ring|<f3> increment\ndraw_base|<f4>..." 346*61046927SAndroid Build Coastguard Worker shape = "record" 347*61046927SAndroid Build Coastguard Worker ]; 348*61046927SAndroid Build Coastguard Worker "ring-buffer" [ 349*61046927SAndroid Build Coastguard Worker label = "ring buffer|<f0>generated draw0|<f1>generated draw1|<f2>generated draw2|...|<f3>exit jump" 350*61046927SAndroid Build Coastguard Worker shape = "record" 351*61046927SAndroid Build Coastguard Worker ]; 352*61046927SAndroid Build Coastguard Worker "main-command-buffer":f2 -> "ring-buffer":f0; 353*61046927SAndroid Build Coastguard Worker "ring-buffer":f3 -> "main-command-buffer":f3; 354*61046927SAndroid Build Coastguard Worker "ring-buffer":f3 -> "main-command-buffer":f4; 355*61046927SAndroid Build Coastguard Worker "main-command-buffer":f3 -> "main-command-buffer":f1; 356*61046927SAndroid Build Coastguard Worker "main-command-buffer":f1 -> "ring-buffer":f1 [color="#0000ff"]; 357*61046927SAndroid Build Coastguard Worker "main-command-buffer":f1 -> "ring-buffer":f2 [color="#0000ff"]; 358*61046927SAndroid Build Coastguard Worker } 359*61046927SAndroid Build Coastguard Worker 360*61046927SAndroid Build Coastguard WorkerRuntime dependencies 361*61046927SAndroid Build Coastguard Worker-------------------- 362*61046927SAndroid Build Coastguard Worker 363*61046927SAndroid Build Coastguard WorkerStarting with Intel 12th generation/Alder Lake-P and Intel Arc Alchemist, the Intel 3D driver stack requires GuC firmware for proper operation. You have two options to install the firmware: 364*61046927SAndroid Build Coastguard Worker 365*61046927SAndroid Build Coastguard Worker- Distro package: Install the pre-packaged firmware included in your Linux distribution's repositories. 366*61046927SAndroid Build Coastguard Worker- Manual download: You can download the firmware from the official repository: https://git.kernel.org/pub/scm/linux/kernel/git/firmware/linux-firmware.git/tree/i915. Place the downloaded files in the /lib/firmware/i915 directory. 367*61046927SAndroid Build Coastguard Worker 368*61046927SAndroid Build Coastguard WorkerImportant: For optimal performance, we recommend updating the GuC firmware to version 70.6.3 or later.