xref: /aosp_15_r20/external/mesa3d/src/nouveau/vulkan/nvk_query_pool.c (revision 6104692788411f58d303aa86923a9ff6ecaded22)
1 /*
2  * Copyright © 2022 Collabora Ltd. and Red Hat Inc.
3  * SPDX-License-Identifier: MIT
4  */
5 #include "nvk_query_pool.h"
6 
7 #include "nvk_buffer.h"
8 #include "nvk_cmd_buffer.h"
9 #include "nvk_device.h"
10 #include "nvk_entrypoints.h"
11 #include "nvk_event.h"
12 #include "nvk_mme.h"
13 #include "nvk_physical_device.h"
14 #include "nvkmd/nvkmd.h"
15 
16 #include "vk_common_entrypoints.h"
17 #include "vk_meta.h"
18 #include "vk_pipeline.h"
19 
20 #include "compiler/nir/nir.h"
21 #include "compiler/nir/nir_builder.h"
22 
23 #include "util/os_time.h"
24 
25 #include "nv_push_cl906f.h"
26 #include "nv_push_cl9097.h"
27 #include "nv_push_cla0c0.h"
28 #include "nv_push_clc597.h"
29 
30 struct nvk_query_report {
31    uint64_t value;
32    uint64_t timestamp;
33 };
34 
35 VKAPI_ATTR VkResult VKAPI_CALL
nvk_CreateQueryPool(VkDevice device,const VkQueryPoolCreateInfo * pCreateInfo,const VkAllocationCallbacks * pAllocator,VkQueryPool * pQueryPool)36 nvk_CreateQueryPool(VkDevice device,
37                     const VkQueryPoolCreateInfo *pCreateInfo,
38                     const VkAllocationCallbacks *pAllocator,
39                     VkQueryPool *pQueryPool)
40 {
41    VK_FROM_HANDLE(nvk_device, dev, device);
42    struct nvk_physical_device *pdev = nvk_device_physical(dev);
43    struct nvk_query_pool *pool;
44    VkResult result;
45 
46    pool = vk_query_pool_create(&dev->vk, pCreateInfo,
47                                pAllocator, sizeof(*pool));
48    if (!pool)
49       return vk_error(dev, VK_ERROR_OUT_OF_HOST_MEMORY);
50 
51    /* We place the availability first and then data */
52    pool->query_start = align(pool->vk.query_count * sizeof(uint32_t),
53                              sizeof(struct nvk_query_report));
54 
55    uint32_t reports_per_query;
56    switch (pCreateInfo->queryType) {
57    case VK_QUERY_TYPE_OCCLUSION:
58    case VK_QUERY_TYPE_PRIMITIVES_GENERATED_EXT:
59       reports_per_query = 2;
60       break;
61    case VK_QUERY_TYPE_TIMESTAMP:
62       reports_per_query = 1;
63       break;
64    case VK_QUERY_TYPE_PIPELINE_STATISTICS:
65       reports_per_query = 2 * util_bitcount(pool->vk.pipeline_statistics);
66       break;
67    case VK_QUERY_TYPE_TRANSFORM_FEEDBACK_STREAM_EXT:
68       // 2 for primitives succeeded 2 for primitives needed
69       reports_per_query = 4;
70       break;
71    default:
72       unreachable("Unsupported query type");
73    }
74    pool->query_stride = reports_per_query * sizeof(struct nvk_query_report);
75 
76    if (pool->vk.query_count > 0) {
77       uint32_t mem_size = pool->query_start +
78                           pool->query_stride * pool->vk.query_count;
79       result = nvkmd_dev_alloc_mapped_mem(dev->nvkmd, &dev->vk.base,
80                                           mem_size, 0 /* align_B */,
81                                           NVKMD_MEM_GART,
82                                           NVKMD_MEM_MAP_RDWR,
83                                           &pool->mem);
84       if (result != VK_SUCCESS) {
85          vk_query_pool_destroy(&dev->vk, pAllocator, &pool->vk);
86          return result;
87       }
88 
89       if (pdev->debug_flags & NVK_DEBUG_ZERO_MEMORY)
90          memset(pool->mem->map, 0, mem_size);
91    }
92 
93    *pQueryPool = nvk_query_pool_to_handle(pool);
94 
95    return VK_SUCCESS;
96 }
97 
98 VKAPI_ATTR void VKAPI_CALL
nvk_DestroyQueryPool(VkDevice device,VkQueryPool queryPool,const VkAllocationCallbacks * pAllocator)99 nvk_DestroyQueryPool(VkDevice device,
100                      VkQueryPool queryPool,
101                      const VkAllocationCallbacks *pAllocator)
102 {
103    VK_FROM_HANDLE(nvk_device, dev, device);
104    VK_FROM_HANDLE(nvk_query_pool, pool, queryPool);
105 
106    if (!pool)
107       return;
108 
109    if (pool->mem)
110       nvkmd_mem_unref(pool->mem);
111    vk_query_pool_destroy(&dev->vk, pAllocator, &pool->vk);
112 }
113 
114 static uint64_t
nvk_query_available_addr(struct nvk_query_pool * pool,uint32_t query)115 nvk_query_available_addr(struct nvk_query_pool *pool, uint32_t query)
116 {
117    assert(query < pool->vk.query_count);
118    return pool->mem->va->addr + query * sizeof(uint32_t);
119 }
120 
121 static nir_def *
nvk_nir_available_addr(nir_builder * b,nir_def * pool_addr,nir_def * query)122 nvk_nir_available_addr(nir_builder *b, nir_def *pool_addr,
123                        nir_def *query)
124 {
125    nir_def *offset = nir_imul_imm(b, query, sizeof(uint32_t));
126    return nir_iadd(b, pool_addr, nir_u2u64(b, offset));
127 }
128 
129 static uint32_t *
nvk_query_available_map(struct nvk_query_pool * pool,uint32_t query)130 nvk_query_available_map(struct nvk_query_pool *pool, uint32_t query)
131 {
132    assert(query < pool->vk.query_count);
133    return (uint32_t *)pool->mem->map + query;
134 }
135 
136 static uint64_t
nvk_query_offset(struct nvk_query_pool * pool,uint32_t query)137 nvk_query_offset(struct nvk_query_pool *pool, uint32_t query)
138 {
139    assert(query < pool->vk.query_count);
140    return pool->query_start + query * pool->query_stride;
141 }
142 
143 static uint64_t
nvk_query_report_addr(struct nvk_query_pool * pool,uint32_t query)144 nvk_query_report_addr(struct nvk_query_pool *pool, uint32_t query)
145 {
146    return pool->mem->va->addr + nvk_query_offset(pool, query);
147 }
148 
149 static nir_def *
nvk_nir_query_report_addr(nir_builder * b,nir_def * pool_addr,nir_def * query_start,nir_def * query_stride,nir_def * query)150 nvk_nir_query_report_addr(nir_builder *b, nir_def *pool_addr,
151                           nir_def *query_start, nir_def *query_stride,
152                           nir_def *query)
153 {
154    nir_def *offset =
155       nir_iadd(b, query_start, nir_umul_2x32_64(b, query, query_stride));
156    return nir_iadd(b, pool_addr, offset);
157 }
158 
159 static struct nvk_query_report *
nvk_query_report_map(struct nvk_query_pool * pool,uint32_t query)160 nvk_query_report_map(struct nvk_query_pool *pool, uint32_t query)
161 {
162    return (void *)((char *)pool->mem->map + nvk_query_offset(pool, query));
163 }
164 
165 /**
166  * Goes through a series of consecutive query indices in the given pool,
167  * setting all element values to 0 and emitting them as available.
168  */
169 static void
emit_zero_queries(struct nvk_cmd_buffer * cmd,struct nvk_query_pool * pool,uint32_t first_index,uint32_t num_queries)170 emit_zero_queries(struct nvk_cmd_buffer *cmd, struct nvk_query_pool *pool,
171                   uint32_t first_index, uint32_t num_queries)
172 {
173    switch (pool->vk.query_type) {
174    case VK_QUERY_TYPE_OCCLUSION:
175    case VK_QUERY_TYPE_TIMESTAMP:
176    case VK_QUERY_TYPE_PIPELINE_STATISTICS:
177    case VK_QUERY_TYPE_PRIMITIVES_GENERATED_EXT: {
178       for (uint32_t i = 0; i < num_queries; i++) {
179          uint64_t addr = nvk_query_available_addr(pool, first_index + i);
180 
181          struct nv_push *p = nvk_cmd_buffer_push(cmd, 5);
182          P_MTHD(p, NV9097, SET_REPORT_SEMAPHORE_A);
183          P_NV9097_SET_REPORT_SEMAPHORE_A(p, addr >> 32);
184          P_NV9097_SET_REPORT_SEMAPHORE_B(p, addr);
185          P_NV9097_SET_REPORT_SEMAPHORE_C(p, 1);
186          P_NV9097_SET_REPORT_SEMAPHORE_D(p, {
187             .operation = OPERATION_RELEASE,
188             .release = RELEASE_AFTER_ALL_PRECEEDING_WRITES_COMPLETE,
189             .pipeline_location = PIPELINE_LOCATION_ALL,
190             .structure_size = STRUCTURE_SIZE_ONE_WORD,
191          });
192       }
193       break;
194    }
195    default:
196       unreachable("Unsupported query type");
197    }
198 }
199 
200 VKAPI_ATTR void VKAPI_CALL
nvk_ResetQueryPool(VkDevice device,VkQueryPool queryPool,uint32_t firstQuery,uint32_t queryCount)201 nvk_ResetQueryPool(VkDevice device,
202                    VkQueryPool queryPool,
203                    uint32_t firstQuery,
204                    uint32_t queryCount)
205 {
206    VK_FROM_HANDLE(nvk_query_pool, pool, queryPool);
207 
208    uint32_t *available = nvk_query_available_map(pool, firstQuery);
209    memset(available, 0, queryCount * sizeof(*available));
210 }
211 
212 VKAPI_ATTR void VKAPI_CALL
nvk_CmdResetQueryPool(VkCommandBuffer commandBuffer,VkQueryPool queryPool,uint32_t firstQuery,uint32_t queryCount)213 nvk_CmdResetQueryPool(VkCommandBuffer commandBuffer,
214                       VkQueryPool queryPool,
215                       uint32_t firstQuery,
216                       uint32_t queryCount)
217 {
218    VK_FROM_HANDLE(nvk_cmd_buffer, cmd, commandBuffer);
219    VK_FROM_HANDLE(nvk_query_pool, pool, queryPool);
220 
221    for (uint32_t i = 0; i < queryCount; i++) {
222       uint64_t addr = nvk_query_available_addr(pool, firstQuery + i);
223 
224       struct nv_push *p = nvk_cmd_buffer_push(cmd, 5);
225       P_MTHD(p, NV9097, SET_REPORT_SEMAPHORE_A);
226       P_NV9097_SET_REPORT_SEMAPHORE_A(p, addr >> 32);
227       P_NV9097_SET_REPORT_SEMAPHORE_B(p, addr);
228       P_NV9097_SET_REPORT_SEMAPHORE_C(p, 0);
229       P_NV9097_SET_REPORT_SEMAPHORE_D(p, {
230          .operation = OPERATION_RELEASE,
231          .release = RELEASE_AFTER_ALL_PRECEEDING_WRITES_COMPLETE,
232          .pipeline_location = PIPELINE_LOCATION_ALL,
233          .structure_size = STRUCTURE_SIZE_ONE_WORD,
234       });
235    }
236 
237    /* Wait for the above writes to complete.  This prevents WaW hazards on any
238     * later query availability updates and ensures vkCmdCopyQueryPoolResults
239     * will see the query as unavailable if it happens before the query is
240     * completed again.
241     */
242    for (uint32_t i = 0; i < queryCount; i++) {
243       uint64_t addr = nvk_query_available_addr(pool, firstQuery + i);
244 
245       struct nv_push *p = nvk_cmd_buffer_push(cmd, 5);
246       __push_mthd(p, SUBC_NV9097, NV906F_SEMAPHOREA);
247       P_NV906F_SEMAPHOREA(p, addr >> 32);
248       P_NV906F_SEMAPHOREB(p, (addr & UINT32_MAX) >> 2);
249       P_NV906F_SEMAPHOREC(p, 0);
250       P_NV906F_SEMAPHORED(p, {
251          .operation = OPERATION_ACQUIRE,
252          .acquire_switch = ACQUIRE_SWITCH_ENABLED,
253          .release_size = RELEASE_SIZE_4BYTE,
254       });
255    }
256 }
257 
258 VKAPI_ATTR void VKAPI_CALL
nvk_CmdWriteTimestamp2(VkCommandBuffer commandBuffer,VkPipelineStageFlags2 stage,VkQueryPool queryPool,uint32_t query)259 nvk_CmdWriteTimestamp2(VkCommandBuffer commandBuffer,
260                        VkPipelineStageFlags2 stage,
261                        VkQueryPool queryPool,
262                        uint32_t query)
263 {
264    VK_FROM_HANDLE(nvk_cmd_buffer, cmd, commandBuffer);
265    VK_FROM_HANDLE(nvk_query_pool, pool, queryPool);
266 
267    struct nv_push *p = nvk_cmd_buffer_push(cmd, 10);
268 
269    uint64_t report_addr = nvk_query_report_addr(pool, query);
270    P_MTHD(p, NV9097, SET_REPORT_SEMAPHORE_A);
271    P_NV9097_SET_REPORT_SEMAPHORE_A(p, report_addr >> 32);
272    P_NV9097_SET_REPORT_SEMAPHORE_B(p, report_addr);
273    P_NV9097_SET_REPORT_SEMAPHORE_C(p, 0);
274    P_NV9097_SET_REPORT_SEMAPHORE_D(p, {
275       .operation = OPERATION_REPORT_ONLY,
276       .pipeline_location = vk_stage_flags_to_nv9097_pipeline_location(stage),
277       .structure_size = STRUCTURE_SIZE_FOUR_WORDS,
278    });
279 
280    uint64_t available_addr = nvk_query_available_addr(pool, query);
281    P_MTHD(p, NV9097, SET_REPORT_SEMAPHORE_A);
282    P_NV9097_SET_REPORT_SEMAPHORE_A(p, available_addr >> 32);
283    P_NV9097_SET_REPORT_SEMAPHORE_B(p, available_addr);
284    P_NV9097_SET_REPORT_SEMAPHORE_C(p, 1);
285    P_NV9097_SET_REPORT_SEMAPHORE_D(p, {
286       .operation = OPERATION_RELEASE,
287       .release = RELEASE_AFTER_ALL_PRECEEDING_WRITES_COMPLETE,
288       .pipeline_location = PIPELINE_LOCATION_ALL,
289       .structure_size = STRUCTURE_SIZE_ONE_WORD,
290    });
291 
292    /* From the Vulkan spec:
293     *
294     *   "If vkCmdWriteTimestamp2 is called while executing a render pass
295     *    instance that has multiview enabled, the timestamp uses N consecutive
296     *    query indices in the query pool (starting at query) where N is the
297     *    number of bits set in the view mask of the subpass the command is
298     *    executed in. The resulting query values are determined by an
299     *    implementation-dependent choice of one of the following behaviors:"
300     *
301     * In our case, only the first query is used, so we emit zeros for the
302     * remaining queries, as described in the first behavior listed in the
303     * Vulkan spec:
304     *
305     *   "The first query is a timestamp value and (if more than one bit is set
306     *   in the view mask) zero is written to the remaining queries."
307     */
308    if (cmd->state.gfx.render.view_mask != 0) {
309       const uint32_t num_queries =
310          util_bitcount(cmd->state.gfx.render.view_mask);
311       if (num_queries > 1)
312          emit_zero_queries(cmd, pool, query + 1, num_queries - 1);
313    }
314 }
315 
316 struct nvk_3d_stat_query {
317    VkQueryPipelineStatisticFlagBits flag;
318    uint8_t loc;
319    uint8_t report;
320 };
321 
322 /* This must remain sorted in flag order */
323 static const struct nvk_3d_stat_query nvk_3d_stat_queries[] = {{
324    .flag    = VK_QUERY_PIPELINE_STATISTIC_INPUT_ASSEMBLY_VERTICES_BIT,
325    .loc     = NV9097_SET_REPORT_SEMAPHORE_D_PIPELINE_LOCATION_DATA_ASSEMBLER,
326    .report  = NV9097_SET_REPORT_SEMAPHORE_D_REPORT_DA_VERTICES_GENERATED,
327 }, {
328    .flag    = VK_QUERY_PIPELINE_STATISTIC_INPUT_ASSEMBLY_PRIMITIVES_BIT,
329    .loc     = NV9097_SET_REPORT_SEMAPHORE_D_PIPELINE_LOCATION_DATA_ASSEMBLER,
330    .report  = NV9097_SET_REPORT_SEMAPHORE_D_REPORT_DA_PRIMITIVES_GENERATED,
331 }, {
332    .flag    = VK_QUERY_PIPELINE_STATISTIC_VERTEX_SHADER_INVOCATIONS_BIT,
333    .loc     = NV9097_SET_REPORT_SEMAPHORE_D_PIPELINE_LOCATION_VERTEX_SHADER,
334    .report  = NV9097_SET_REPORT_SEMAPHORE_D_REPORT_VS_INVOCATIONS,
335 }, {
336    .flag    = VK_QUERY_PIPELINE_STATISTIC_GEOMETRY_SHADER_INVOCATIONS_BIT,
337    .loc     = NV9097_SET_REPORT_SEMAPHORE_D_PIPELINE_LOCATION_GEOMETRY_SHADER,
338    .report  = NV9097_SET_REPORT_SEMAPHORE_D_REPORT_GS_INVOCATIONS,
339 }, {
340    .flag    = VK_QUERY_PIPELINE_STATISTIC_GEOMETRY_SHADER_PRIMITIVES_BIT,
341    .loc     = NV9097_SET_REPORT_SEMAPHORE_D_PIPELINE_LOCATION_GEOMETRY_SHADER,
342    .report  = NV9097_SET_REPORT_SEMAPHORE_D_REPORT_GS_PRIMITIVES_GENERATED,
343 }, {
344    .flag    = VK_QUERY_PIPELINE_STATISTIC_CLIPPING_INVOCATIONS_BIT,
345    .loc     = NV9097_SET_REPORT_SEMAPHORE_D_PIPELINE_LOCATION_VPC, /* TODO */
346    .report  = NV9097_SET_REPORT_SEMAPHORE_D_REPORT_CLIPPER_INVOCATIONS,
347 }, {
348    .flag    = VK_QUERY_PIPELINE_STATISTIC_CLIPPING_PRIMITIVES_BIT,
349    .loc     = NV9097_SET_REPORT_SEMAPHORE_D_PIPELINE_LOCATION_VPC, /* TODO */
350    .report  = NV9097_SET_REPORT_SEMAPHORE_D_REPORT_CLIPPER_PRIMITIVES_GENERATED,
351 }, {
352    .flag    = VK_QUERY_PIPELINE_STATISTIC_FRAGMENT_SHADER_INVOCATIONS_BIT,
353    .loc     = NV9097_SET_REPORT_SEMAPHORE_D_PIPELINE_LOCATION_PIXEL_SHADER,
354    .report  = NV9097_SET_REPORT_SEMAPHORE_D_REPORT_PS_INVOCATIONS,
355 }, {
356    .flag    = VK_QUERY_PIPELINE_STATISTIC_TESSELLATION_CONTROL_SHADER_PATCHES_BIT,
357    .loc     = NV9097_SET_REPORT_SEMAPHORE_D_PIPELINE_LOCATION_TESSELATION_INIT_SHADER,
358    .report  = NV9097_SET_REPORT_SEMAPHORE_D_REPORT_TI_INVOCATIONS,
359 }, {
360    .flag    = VK_QUERY_PIPELINE_STATISTIC_TESSELLATION_EVALUATION_SHADER_INVOCATIONS_BIT,
361    .loc     = NV9097_SET_REPORT_SEMAPHORE_D_PIPELINE_LOCATION_TESSELATION_SHADER,
362    .report  = NV9097_SET_REPORT_SEMAPHORE_D_REPORT_TS_INVOCATIONS,
363 }, {
364    .flag    = VK_QUERY_PIPELINE_STATISTIC_COMPUTE_SHADER_INVOCATIONS_BIT,
365    .loc     = UINT8_MAX,
366    .report  = UINT8_MAX,
367 }};
368 
369 static void
mme_store_global(struct mme_builder * b,struct mme_value64 addr,struct mme_value v)370 mme_store_global(struct mme_builder *b,
371                  struct mme_value64 addr,
372                  struct mme_value v)
373 {
374    mme_mthd(b, NV9097_SET_REPORT_SEMAPHORE_A);
375    mme_emit_addr64(b, addr);
376    mme_emit(b, v);
377    mme_emit(b, mme_imm(0x10000000));
378 }
379 
380 void
nvk_mme_write_cs_invocations(struct mme_builder * b)381 nvk_mme_write_cs_invocations(struct mme_builder *b)
382 {
383    struct mme_value64 dst_addr = mme_load_addr64(b);
384 
385    struct mme_value accum_hi = mme_state(b,
386       NVC597_SET_MME_SHADOW_SCRATCH(NVK_MME_SCRATCH_CS_INVOCATIONS_HI));
387    struct mme_value accum_lo = mme_state(b,
388       NVC597_SET_MME_SHADOW_SCRATCH(NVK_MME_SCRATCH_CS_INVOCATIONS_LO));
389    struct mme_value64 accum = mme_value64(accum_lo, accum_hi);
390 
391    mme_store_global(b, dst_addr, accum.lo);
392    mme_store_global(b, mme_add64(b, dst_addr, mme_imm64(4)), accum.hi);
393 }
394 
395 static void
nvk_cmd_begin_end_query(struct nvk_cmd_buffer * cmd,struct nvk_query_pool * pool,uint32_t query,uint32_t index,bool end)396 nvk_cmd_begin_end_query(struct nvk_cmd_buffer *cmd,
397                         struct nvk_query_pool *pool,
398                         uint32_t query, uint32_t index,
399                         bool end)
400 {
401    uint64_t report_addr = nvk_query_report_addr(pool, query) +
402                           end * sizeof(struct nvk_query_report);
403 
404    uint32_t end_size = 7 * end;
405 
406    struct nv_push *p;
407    switch (pool->vk.query_type) {
408    case VK_QUERY_TYPE_OCCLUSION:
409       p = nvk_cmd_buffer_push(cmd, 7 + end_size);
410 
411       P_IMMD(p, NV9097, SET_ZPASS_PIXEL_COUNT, !end);
412 
413       P_MTHD(p, NV9097, SET_REPORT_SEMAPHORE_A);
414       P_NV9097_SET_REPORT_SEMAPHORE_A(p, report_addr >> 32);
415       P_NV9097_SET_REPORT_SEMAPHORE_B(p, report_addr);
416       P_NV9097_SET_REPORT_SEMAPHORE_C(p, 0);
417       P_NV9097_SET_REPORT_SEMAPHORE_D(p, {
418          .operation = OPERATION_REPORT_ONLY,
419          .pipeline_location = PIPELINE_LOCATION_ALL,
420          .report = REPORT_ZPASS_PIXEL_CNT64,
421          .structure_size = STRUCTURE_SIZE_FOUR_WORDS,
422          .flush_disable = true,
423       });
424       break;
425 
426    case VK_QUERY_TYPE_PIPELINE_STATISTICS: {
427       uint32_t stat_count = util_bitcount(pool->vk.pipeline_statistics);
428       p = nvk_cmd_buffer_push(cmd, stat_count * 5 + end_size);
429 
430       ASSERTED uint32_t stats_left = pool->vk.pipeline_statistics;
431       for (uint32_t i = 0; i < ARRAY_SIZE(nvk_3d_stat_queries); i++) {
432          const struct nvk_3d_stat_query *sq = &nvk_3d_stat_queries[i];
433          if (!(stats_left & sq->flag))
434             continue;
435 
436          /* The 3D stat queries array MUST be sorted */
437          assert(!(stats_left & (sq->flag - 1)));
438 
439          if (sq->flag == VK_QUERY_PIPELINE_STATISTIC_COMPUTE_SHADER_INVOCATIONS_BIT) {
440             P_1INC(p, NVC597, CALL_MME_MACRO(NVK_MME_WRITE_CS_INVOCATIONS));
441             P_INLINE_DATA(p, report_addr >> 32);
442             P_INLINE_DATA(p, report_addr);
443          } else {
444             P_MTHD(p, NV9097, SET_REPORT_SEMAPHORE_A);
445             P_NV9097_SET_REPORT_SEMAPHORE_A(p, report_addr >> 32);
446             P_NV9097_SET_REPORT_SEMAPHORE_B(p, report_addr);
447             P_NV9097_SET_REPORT_SEMAPHORE_C(p, 0);
448             P_NV9097_SET_REPORT_SEMAPHORE_D(p, {
449                .operation = OPERATION_REPORT_ONLY,
450                .pipeline_location = sq->loc,
451                .report = sq->report,
452                .structure_size = STRUCTURE_SIZE_FOUR_WORDS,
453                .flush_disable = true,
454             });
455          }
456 
457          report_addr += 2 * sizeof(struct nvk_query_report);
458          stats_left &= ~sq->flag;
459       }
460       break;
461    }
462 
463    case VK_QUERY_TYPE_TRANSFORM_FEEDBACK_STREAM_EXT: {
464       const uint32_t xfb_reports[] = {
465          NV9097_SET_REPORT_SEMAPHORE_D_REPORT_STREAMING_PRIMITIVES_SUCCEEDED,
466          NV9097_SET_REPORT_SEMAPHORE_D_REPORT_STREAMING_PRIMITIVES_NEEDED,
467       };
468       p = nvk_cmd_buffer_push(cmd, 5 * ARRAY_SIZE(xfb_reports) + end_size);
469       for (uint32_t i = 0; i < ARRAY_SIZE(xfb_reports); ++i) {
470          P_MTHD(p, NV9097, SET_REPORT_SEMAPHORE_A);
471          P_NV9097_SET_REPORT_SEMAPHORE_A(p, report_addr >> 32);
472          P_NV9097_SET_REPORT_SEMAPHORE_B(p, report_addr);
473          P_NV9097_SET_REPORT_SEMAPHORE_C(p, 0);
474          P_NV9097_SET_REPORT_SEMAPHORE_D(p, {
475                .operation = OPERATION_REPORT_ONLY,
476                .pipeline_location = PIPELINE_LOCATION_STREAMING_OUTPUT,
477                .report = xfb_reports[i],
478                .structure_size = STRUCTURE_SIZE_FOUR_WORDS,
479                .sub_report = index,
480                .flush_disable = true,
481                });
482          report_addr += 2 * sizeof(struct nvk_query_report);
483       }
484       break;
485    }
486 
487    case VK_QUERY_TYPE_PRIMITIVES_GENERATED_EXT:
488       p = nvk_cmd_buffer_push(cmd, 5 + end_size);
489 
490       P_MTHD(p, NV9097, SET_REPORT_SEMAPHORE_A);
491       P_NV9097_SET_REPORT_SEMAPHORE_A(p, report_addr >> 32);
492       P_NV9097_SET_REPORT_SEMAPHORE_B(p, report_addr);
493       P_NV9097_SET_REPORT_SEMAPHORE_C(p, 1);
494       P_NV9097_SET_REPORT_SEMAPHORE_D(p, {
495          .operation = OPERATION_REPORT_ONLY,
496          .pipeline_location = PIPELINE_LOCATION_STREAMING_OUTPUT,
497          .report = REPORT_VTG_PRIMITIVES_OUT,
498          .sub_report = index,
499          .structure_size = STRUCTURE_SIZE_FOUR_WORDS,
500          .flush_disable = true,
501       });
502       break;
503 
504    default:
505       unreachable("Unsupported query type");
506    }
507 
508    if (end) {
509       P_IMMD(p, NV9097, FLUSH_PENDING_WRITES, 0);
510 
511       uint64_t available_addr = nvk_query_available_addr(pool, query);
512       P_MTHD(p, NV9097, SET_REPORT_SEMAPHORE_A);
513       P_NV9097_SET_REPORT_SEMAPHORE_A(p, available_addr >> 32);
514       P_NV9097_SET_REPORT_SEMAPHORE_B(p, available_addr);
515       P_NV9097_SET_REPORT_SEMAPHORE_C(p, 1);
516       P_NV9097_SET_REPORT_SEMAPHORE_D(p, {
517          .operation = OPERATION_RELEASE,
518          .release = RELEASE_AFTER_ALL_PRECEEDING_WRITES_COMPLETE,
519          .pipeline_location = PIPELINE_LOCATION_ALL,
520          .structure_size = STRUCTURE_SIZE_ONE_WORD,
521       });
522    }
523 }
524 
525 VKAPI_ATTR void VKAPI_CALL
nvk_CmdBeginQueryIndexedEXT(VkCommandBuffer commandBuffer,VkQueryPool queryPool,uint32_t query,VkQueryControlFlags flags,uint32_t index)526 nvk_CmdBeginQueryIndexedEXT(VkCommandBuffer commandBuffer,
527                             VkQueryPool queryPool,
528                             uint32_t query,
529                             VkQueryControlFlags flags,
530                             uint32_t index)
531 {
532    VK_FROM_HANDLE(nvk_cmd_buffer, cmd, commandBuffer);
533    VK_FROM_HANDLE(nvk_query_pool, pool, queryPool);
534 
535    nvk_cmd_begin_end_query(cmd, pool, query, index, false);
536 }
537 
538 VKAPI_ATTR void VKAPI_CALL
nvk_CmdEndQueryIndexedEXT(VkCommandBuffer commandBuffer,VkQueryPool queryPool,uint32_t query,uint32_t index)539 nvk_CmdEndQueryIndexedEXT(VkCommandBuffer commandBuffer,
540                           VkQueryPool queryPool,
541                           uint32_t query,
542                           uint32_t index)
543 {
544    VK_FROM_HANDLE(nvk_cmd_buffer, cmd, commandBuffer);
545    VK_FROM_HANDLE(nvk_query_pool, pool, queryPool);
546 
547    nvk_cmd_begin_end_query(cmd, pool, query, index, true);
548 
549    /* From the Vulkan spec:
550     *
551     *   "If queries are used while executing a render pass instance that has
552     *    multiview enabled, the query uses N consecutive query indices in
553     *    the query pool (starting at query) where N is the number of bits set
554     *    in the view mask in the subpass the query is used in. How the
555     *    numerical results of the query are distributed among the queries is
556     *    implementation-dependent."
557     *
558     * In our case, only the first query is used, so we emit zeros for the
559     * remaining queries.
560     */
561    if (cmd->state.gfx.render.view_mask != 0) {
562       const uint32_t num_queries =
563          util_bitcount(cmd->state.gfx.render.view_mask);
564       if (num_queries > 1)
565          emit_zero_queries(cmd, pool, query + 1, num_queries - 1);
566    }
567 }
568 
569 static bool
nvk_query_is_available(struct nvk_query_pool * pool,uint32_t query)570 nvk_query_is_available(struct nvk_query_pool *pool, uint32_t query)
571 {
572    uint32_t *available = nvk_query_available_map(pool, query);
573    return p_atomic_read(available) != 0;
574 }
575 
576 #define NVK_QUERY_TIMEOUT 2000000000ull
577 
578 static VkResult
nvk_query_wait_for_available(struct nvk_device * dev,struct nvk_query_pool * pool,uint32_t query)579 nvk_query_wait_for_available(struct nvk_device *dev,
580                              struct nvk_query_pool *pool,
581                              uint32_t query)
582 {
583    uint64_t abs_timeout_ns = os_time_get_absolute_timeout(NVK_QUERY_TIMEOUT);
584 
585    while (os_time_get_nano() < abs_timeout_ns) {
586       if (nvk_query_is_available(pool, query))
587          return VK_SUCCESS;
588 
589       VkResult status = vk_device_check_status(&dev->vk);
590       if (status != VK_SUCCESS)
591          return status;
592    }
593 
594    return vk_device_set_lost(&dev->vk, "query timeout");
595 }
596 
597 static void
cpu_write_query_result(void * dst,uint32_t idx,VkQueryResultFlags flags,uint64_t result)598 cpu_write_query_result(void *dst, uint32_t idx,
599                        VkQueryResultFlags flags,
600                        uint64_t result)
601 {
602    if (flags & VK_QUERY_RESULT_64_BIT) {
603       uint64_t *dst64 = dst;
604       dst64[idx] = result;
605    } else {
606       uint32_t *dst32 = dst;
607       dst32[idx] = result;
608    }
609 }
610 
611 static void
cpu_get_query_delta(void * dst,const struct nvk_query_report * src,uint32_t idx,VkQueryResultFlags flags)612 cpu_get_query_delta(void *dst, const struct nvk_query_report *src,
613                     uint32_t idx, VkQueryResultFlags flags)
614 {
615    uint64_t delta = src[idx * 2 + 1].value - src[idx * 2].value;
616    cpu_write_query_result(dst, idx, flags, delta);
617 }
618 
619 VKAPI_ATTR VkResult VKAPI_CALL
nvk_GetQueryPoolResults(VkDevice device,VkQueryPool queryPool,uint32_t firstQuery,uint32_t queryCount,size_t dataSize,void * pData,VkDeviceSize stride,VkQueryResultFlags flags)620 nvk_GetQueryPoolResults(VkDevice device,
621                         VkQueryPool queryPool,
622                         uint32_t firstQuery,
623                         uint32_t queryCount,
624                         size_t dataSize,
625                         void *pData,
626                         VkDeviceSize stride,
627                         VkQueryResultFlags flags)
628 {
629    VK_FROM_HANDLE(nvk_device, dev, device);
630    VK_FROM_HANDLE(nvk_query_pool, pool, queryPool);
631 
632    if (vk_device_is_lost(&dev->vk))
633       return VK_ERROR_DEVICE_LOST;
634 
635    VkResult status = VK_SUCCESS;
636    for (uint32_t i = 0; i < queryCount; i++) {
637       const uint32_t query = firstQuery + i;
638 
639       bool available = nvk_query_is_available(pool, query);
640 
641       if (!available && (flags & VK_QUERY_RESULT_WAIT_BIT)) {
642          status = nvk_query_wait_for_available(dev, pool, query);
643          if (status != VK_SUCCESS)
644             return status;
645 
646          available = true;
647       }
648 
649       bool write_results = available || (flags & VK_QUERY_RESULT_PARTIAL_BIT);
650 
651       const struct nvk_query_report *src = nvk_query_report_map(pool, query);
652       assert(i * stride < dataSize);
653       void *dst = (char *)pData + i * stride;
654 
655       uint32_t available_dst_idx = 1;
656       switch (pool->vk.query_type) {
657       case VK_QUERY_TYPE_OCCLUSION:
658       case VK_QUERY_TYPE_PRIMITIVES_GENERATED_EXT:
659          if (write_results)
660             cpu_get_query_delta(dst, src, 0, flags);
661          break;
662       case VK_QUERY_TYPE_PIPELINE_STATISTICS: {
663          uint32_t stat_count = util_bitcount(pool->vk.pipeline_statistics);
664          available_dst_idx = stat_count;
665          if (write_results) {
666             for (uint32_t j = 0; j < stat_count; j++)
667                cpu_get_query_delta(dst, src, j, flags);
668          }
669          break;
670       }
671       case VK_QUERY_TYPE_TRANSFORM_FEEDBACK_STREAM_EXT: {
672          const int prims_succeeded_idx = 0;
673          const int prims_needed_idx = 1;
674          available_dst_idx = 2;
675          if (write_results) {
676             cpu_get_query_delta(dst, src, prims_succeeded_idx, flags);
677             cpu_get_query_delta(dst, src, prims_needed_idx, flags);
678          }
679          break;
680       }
681       case VK_QUERY_TYPE_TIMESTAMP:
682          if (write_results)
683             cpu_write_query_result(dst, 0, flags, src->timestamp);
684          break;
685       default:
686          unreachable("Unsupported query type");
687       }
688 
689       if (!write_results)
690          status = VK_NOT_READY;
691 
692       if (flags & VK_QUERY_RESULT_WITH_AVAILABILITY_BIT)
693          cpu_write_query_result(dst, available_dst_idx, flags, available);
694    }
695 
696    return status;
697 }
698 
699 struct nvk_copy_query_push {
700    uint64_t pool_addr;
701    uint32_t query_start;
702    uint32_t query_stride;
703    uint32_t first_query;
704    uint32_t query_count;
705    uint64_t dst_addr;
706    uint64_t dst_stride;
707    uint32_t flags;
708 };
709 
710 static nir_def *
load_struct_var(nir_builder * b,nir_variable * var,uint32_t field)711 load_struct_var(nir_builder *b, nir_variable *var, uint32_t field)
712 {
713    nir_deref_instr *deref =
714       nir_build_deref_struct(b, nir_build_deref_var(b, var), field);
715    return nir_load_deref(b, deref);
716 }
717 
718 static void
nir_write_query_result(nir_builder * b,nir_def * dst_addr,nir_def * idx,nir_def * flags,nir_def * result)719 nir_write_query_result(nir_builder *b, nir_def *dst_addr,
720                        nir_def *idx, nir_def *flags,
721                        nir_def *result)
722 {
723    assert(result->num_components == 1);
724    assert(result->bit_size == 64);
725 
726    nir_push_if(b, nir_test_mask(b, flags, VK_QUERY_RESULT_64_BIT));
727    {
728       nir_def *offset = nir_i2i64(b, nir_imul_imm(b, idx, 8));
729       nir_store_global(b, nir_iadd(b, dst_addr, offset), 8, result, 0x1);
730    }
731    nir_push_else(b, NULL);
732    {
733       nir_def *result32 = nir_u2u32(b, result);
734       nir_def *offset = nir_i2i64(b, nir_imul_imm(b, idx, 4));
735       nir_store_global(b, nir_iadd(b, dst_addr, offset), 4, result32, 0x1);
736    }
737    nir_pop_if(b, NULL);
738 }
739 
740 static void
nir_get_query_delta(nir_builder * b,nir_def * dst_addr,nir_def * report_addr,nir_def * idx,nir_def * flags)741 nir_get_query_delta(nir_builder *b, nir_def *dst_addr,
742                     nir_def *report_addr, nir_def *idx,
743                     nir_def *flags)
744 {
745    nir_def *offset =
746       nir_imul_imm(b, idx, 2 * sizeof(struct nvk_query_report));
747    nir_def *begin_addr =
748       nir_iadd(b, report_addr, nir_i2i64(b, offset));
749    nir_def *end_addr =
750       nir_iadd_imm(b, begin_addr, sizeof(struct nvk_query_report));
751 
752    /* nvk_query_report::timestamp is the first uint64_t */
753    nir_def *begin = nir_load_global(b, begin_addr, 16, 1, 64);
754    nir_def *end = nir_load_global(b, end_addr, 16, 1, 64);
755 
756    nir_def *delta = nir_isub(b, end, begin);
757 
758    nir_write_query_result(b, dst_addr, idx, flags, delta);
759 }
760 
761 static void
nvk_nir_copy_query(nir_builder * b,nir_variable * push,nir_def * i)762 nvk_nir_copy_query(nir_builder *b, nir_variable *push, nir_def *i)
763 {
764    nir_def *pool_addr = load_struct_var(b, push, 0);
765    nir_def *query_start = nir_u2u64(b, load_struct_var(b, push, 1));
766    nir_def *query_stride = load_struct_var(b, push, 2);
767    nir_def *first_query = load_struct_var(b, push, 3);
768    nir_def *dst_addr = load_struct_var(b, push, 5);
769    nir_def *dst_stride = load_struct_var(b, push, 6);
770    nir_def *flags = load_struct_var(b, push, 7);
771 
772    nir_def *query = nir_iadd(b, first_query, i);
773 
774    nir_def *avail_addr = nvk_nir_available_addr(b, pool_addr, query);
775    nir_def *available =
776       nir_i2b(b, nir_load_global(b, avail_addr, 4, 1, 32));
777 
778    nir_def *partial = nir_test_mask(b, flags, VK_QUERY_RESULT_PARTIAL_BIT);
779    nir_def *write_results = nir_ior(b, available, partial);
780 
781    nir_def *report_addr =
782       nvk_nir_query_report_addr(b, pool_addr, query_start, query_stride,
783                                 query);
784    nir_def *dst_offset = nir_imul(b, nir_u2u64(b, i), dst_stride);
785 
786    /* Timestamp queries are the only ones use a single report */
787    nir_def *is_timestamp =
788       nir_ieq_imm(b, query_stride, sizeof(struct nvk_query_report));
789 
790    nir_def *one = nir_imm_int(b, 1);
791    nir_def *num_reports;
792    nir_push_if(b, is_timestamp);
793    {
794       nir_push_if(b, write_results);
795       {
796          /* This is the timestamp case.  We add 8 because we're loading
797           * nvk_query_report::timestamp.
798           */
799          nir_def *timestamp =
800             nir_load_global(b, nir_iadd_imm(b, report_addr, 8), 8, 1, 64);
801 
802          nir_write_query_result(b, nir_iadd(b, dst_addr, dst_offset),
803                                 nir_imm_int(b, 0), flags, timestamp);
804       }
805       nir_pop_if(b, NULL);
806    }
807    nir_push_else(b, NULL);
808    {
809       /* Everything that isn't a timestamp has the invariant that the
810        * number of destination entries is equal to the query stride divided
811        * by the size of two reports.
812        */
813       num_reports = nir_udiv_imm(b, query_stride,
814                                  2 * sizeof(struct nvk_query_report));
815 
816       nir_push_if(b, write_results);
817       {
818          nir_variable *r =
819             nir_local_variable_create(b->impl, glsl_uint_type(), "r");
820          nir_store_var(b, r, nir_imm_int(b, 0), 0x1);
821 
822          nir_push_loop(b);
823          {
824             nir_break_if(b, nir_ige(b, nir_load_var(b, r), num_reports));
825 
826             nir_get_query_delta(b, nir_iadd(b, dst_addr, dst_offset),
827                                 report_addr, nir_load_var(b, r), flags);
828 
829             nir_store_var(b, r, nir_iadd_imm(b, nir_load_var(b, r), 1), 0x1);
830          }
831          nir_pop_loop(b, NULL);
832       }
833       nir_pop_if(b, NULL);
834    }
835    nir_pop_if(b, NULL);
836 
837    num_reports = nir_if_phi(b, one, num_reports);
838 
839    nir_push_if(b, nir_test_mask(b, flags, VK_QUERY_RESULT_WITH_AVAILABILITY_BIT));
840    {
841       nir_write_query_result(b, nir_iadd(b, dst_addr, dst_offset),
842                              num_reports, flags, nir_b2i64(b, available));
843    }
844    nir_pop_if(b, NULL);
845 }
846 
847 static nir_shader *
build_copy_queries_shader(void)848 build_copy_queries_shader(void)
849 {
850    nir_builder build =
851       nir_builder_init_simple_shader(MESA_SHADER_COMPUTE, NULL,
852                                      "nvk-meta-copy-queries");
853    nir_builder *b = &build;
854 
855    struct glsl_struct_field push_fields[] = {
856       { .type = glsl_uint64_t_type(), .name = "pool_addr", .offset = 0 },
857       { .type = glsl_uint_type(), .name = "query_start", .offset = 8 },
858       { .type = glsl_uint_type(), .name = "query_stride", .offset = 12 },
859       { .type = glsl_uint_type(), .name = "first_query", .offset = 16 },
860       { .type = glsl_uint_type(), .name = "query_count", .offset = 20 },
861       { .type = glsl_uint64_t_type(), .name = "dst_addr", .offset = 24 },
862       { .type = glsl_uint64_t_type(), .name = "dst_stride", .offset = 32 },
863       { .type = glsl_uint_type(), .name = "flags", .offset = 40 },
864    };
865    const struct glsl_type *push_iface_type =
866       glsl_interface_type(push_fields, ARRAY_SIZE(push_fields),
867                           GLSL_INTERFACE_PACKING_STD140,
868                           false /* row_major */, "push");
869    nir_variable *push = nir_variable_create(b->shader, nir_var_mem_push_const,
870                                             push_iface_type, "push");
871 
872    b->shader->info.workgroup_size[0] = 32;
873    nir_def *wg_id = nir_load_workgroup_id(b);
874    nir_def *i = nir_iadd(b, nir_load_subgroup_invocation(b),
875                             nir_imul_imm(b, nir_channel(b, wg_id, 0), 32));
876 
877    nir_def *query_count = load_struct_var(b, push, 4);
878    nir_push_if(b, nir_ilt(b, i, query_count));
879    {
880       nvk_nir_copy_query(b, push, i);
881    }
882    nir_pop_if(b, NULL);
883 
884    return build.shader;
885 }
886 
887 static struct nvk_shader *
atomic_set_or_destroy_shader(struct nvk_device * dev,struct nvk_shader ** shader_ptr,struct nvk_shader * shader,const VkAllocationCallbacks * alloc)888 atomic_set_or_destroy_shader(struct nvk_device *dev,
889                              struct nvk_shader **shader_ptr,
890                              struct nvk_shader *shader,
891                              const VkAllocationCallbacks *alloc)
892 {
893    struct nvk_shader *old_shader = p_atomic_cmpxchg(shader_ptr, NULL, shader);
894    if (old_shader == NULL) {
895       return shader;
896    } else {
897       vk_shader_destroy(&dev->vk, &shader->vk, alloc);
898       return old_shader;
899    }
900 }
901 
902 static VkResult
get_copy_queries_shader(struct nvk_device * dev,struct nvk_shader ** shader_out)903 get_copy_queries_shader(struct nvk_device *dev,
904                         struct nvk_shader **shader_out)
905 {
906    struct nvk_shader *shader = p_atomic_read(&dev->copy_queries);
907    if (shader != NULL) {
908       *shader_out = shader;
909       return VK_SUCCESS;
910    }
911 
912    nir_shader *nir = build_copy_queries_shader();
913    VkResult result = nvk_compile_nir_shader(dev, nir, &dev->vk.alloc, &shader);
914    if (result != VK_SUCCESS)
915       return result;
916 
917    *shader_out = atomic_set_or_destroy_shader(dev, &dev->copy_queries,
918                                               shader, &dev->vk.alloc);
919 
920    return VK_SUCCESS;
921 }
922 
923 static void
nvk_meta_copy_query_pool_results(struct nvk_cmd_buffer * cmd,struct nvk_query_pool * pool,uint32_t first_query,uint32_t query_count,uint64_t dst_addr,uint64_t dst_stride,VkQueryResultFlags flags)924 nvk_meta_copy_query_pool_results(struct nvk_cmd_buffer *cmd,
925                                  struct nvk_query_pool *pool,
926                                  uint32_t first_query,
927                                  uint32_t query_count,
928                                  uint64_t dst_addr,
929                                  uint64_t dst_stride,
930                                  VkQueryResultFlags flags)
931 {
932    struct nvk_device *dev = nvk_cmd_buffer_device(cmd);
933 
934    struct nvk_shader *shader;
935    VkResult result = get_copy_queries_shader(dev, &shader);
936    if (result != VK_SUCCESS) {
937       vk_command_buffer_set_error(&cmd->vk, result);
938       return;
939    }
940 
941    const struct nvk_copy_query_push push = {
942       .pool_addr = pool->mem->va->addr,
943       .query_start = pool->query_start,
944       .query_stride = pool->query_stride,
945       .first_query = first_query,
946       .query_count = query_count,
947       .dst_addr = dst_addr,
948       .dst_stride = dst_stride,
949       .flags = flags,
950    };
951    nvk_cmd_dispatch_shader(cmd, shader, &push, sizeof(push),
952                            DIV_ROUND_UP(query_count, 32), 1, 1);
953 }
954 
955 VKAPI_ATTR void VKAPI_CALL
nvk_CmdCopyQueryPoolResults(VkCommandBuffer commandBuffer,VkQueryPool queryPool,uint32_t firstQuery,uint32_t queryCount,VkBuffer dstBuffer,VkDeviceSize dstOffset,VkDeviceSize stride,VkQueryResultFlags flags)956 nvk_CmdCopyQueryPoolResults(VkCommandBuffer commandBuffer,
957                             VkQueryPool queryPool,
958                             uint32_t firstQuery,
959                             uint32_t queryCount,
960                             VkBuffer dstBuffer,
961                             VkDeviceSize dstOffset,
962                             VkDeviceSize stride,
963                             VkQueryResultFlags flags)
964 {
965    VK_FROM_HANDLE(nvk_cmd_buffer, cmd, commandBuffer);
966    VK_FROM_HANDLE(nvk_query_pool, pool, queryPool);
967    VK_FROM_HANDLE(nvk_buffer, dst_buffer, dstBuffer);
968 
969    if (flags & VK_QUERY_RESULT_WAIT_BIT) {
970       for (uint32_t i = 0; i < queryCount; i++) {
971          uint64_t avail_addr = nvk_query_available_addr(pool, firstQuery + i);
972 
973          struct nv_push *p = nvk_cmd_buffer_push(cmd, 5);
974          __push_mthd(p, SUBC_NV9097, NV906F_SEMAPHOREA);
975          P_NV906F_SEMAPHOREA(p, avail_addr >> 32);
976          P_NV906F_SEMAPHOREB(p, (avail_addr & UINT32_MAX) >> 2);
977          P_NV906F_SEMAPHOREC(p, 1);
978          P_NV906F_SEMAPHORED(p, {
979             .operation = OPERATION_ACQ_GEQ,
980             .acquire_switch = ACQUIRE_SWITCH_ENABLED,
981             .release_size = RELEASE_SIZE_4BYTE,
982          });
983       }
984    }
985 
986    uint64_t dst_addr = nvk_buffer_address(dst_buffer, dstOffset);
987    nvk_meta_copy_query_pool_results(cmd, pool, firstQuery, queryCount,
988                                     dst_addr, stride, flags);
989 }
990 
991