xref: /aosp_15_r20/external/mesa3d/src/intel/vulkan/genX_query.c (revision 6104692788411f58d303aa86923a9ff6ecaded22)
1*61046927SAndroid Build Coastguard Worker /*
2*61046927SAndroid Build Coastguard Worker  * Copyright © 2015 Intel Corporation
3*61046927SAndroid Build Coastguard Worker  *
4*61046927SAndroid Build Coastguard Worker  * Permission is hereby granted, free of charge, to any person obtaining a
5*61046927SAndroid Build Coastguard Worker  * copy of this software and associated documentation files (the "Software"),
6*61046927SAndroid Build Coastguard Worker  * to deal in the Software without restriction, including without limitation
7*61046927SAndroid Build Coastguard Worker  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8*61046927SAndroid Build Coastguard Worker  * and/or sell copies of the Software, and to permit persons to whom the
9*61046927SAndroid Build Coastguard Worker  * Software is furnished to do so, subject to the following conditions:
10*61046927SAndroid Build Coastguard Worker  *
11*61046927SAndroid Build Coastguard Worker  * The above copyright notice and this permission notice (including the next
12*61046927SAndroid Build Coastguard Worker  * paragraph) shall be included in all copies or substantial portions of the
13*61046927SAndroid Build Coastguard Worker  * Software.
14*61046927SAndroid Build Coastguard Worker  *
15*61046927SAndroid Build Coastguard Worker  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16*61046927SAndroid Build Coastguard Worker  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17*61046927SAndroid Build Coastguard Worker  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18*61046927SAndroid Build Coastguard Worker  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19*61046927SAndroid Build Coastguard Worker  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20*61046927SAndroid Build Coastguard Worker  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21*61046927SAndroid Build Coastguard Worker  * IN THE SOFTWARE.
22*61046927SAndroid Build Coastguard Worker  */
23*61046927SAndroid Build Coastguard Worker 
24*61046927SAndroid Build Coastguard Worker #include <assert.h>
25*61046927SAndroid Build Coastguard Worker #include <stdbool.h>
26*61046927SAndroid Build Coastguard Worker #include <string.h>
27*61046927SAndroid Build Coastguard Worker #include <unistd.h>
28*61046927SAndroid Build Coastguard Worker #include <fcntl.h>
29*61046927SAndroid Build Coastguard Worker 
30*61046927SAndroid Build Coastguard Worker #include "anv_private.h"
31*61046927SAndroid Build Coastguard Worker 
32*61046927SAndroid Build Coastguard Worker #include "util/os_time.h"
33*61046927SAndroid Build Coastguard Worker 
34*61046927SAndroid Build Coastguard Worker #include "genxml/gen_macros.h"
35*61046927SAndroid Build Coastguard Worker #include "genxml/genX_pack.h"
36*61046927SAndroid Build Coastguard Worker 
37*61046927SAndroid Build Coastguard Worker #include "ds/intel_tracepoints.h"
38*61046927SAndroid Build Coastguard Worker 
39*61046927SAndroid Build Coastguard Worker #include "anv_internal_kernels.h"
40*61046927SAndroid Build Coastguard Worker #include "genX_mi_builder.h"
41*61046927SAndroid Build Coastguard Worker 
42*61046927SAndroid Build Coastguard Worker #if GFX_VERx10 >= 125
43*61046927SAndroid Build Coastguard Worker #define ANV_PIPELINE_STATISTICS_MASK 0x00001fff
44*61046927SAndroid Build Coastguard Worker #else
45*61046927SAndroid Build Coastguard Worker #define ANV_PIPELINE_STATISTICS_MASK 0x000007ff
46*61046927SAndroid Build Coastguard Worker #endif
47*61046927SAndroid Build Coastguard Worker 
48*61046927SAndroid Build Coastguard Worker #include "perf/intel_perf.h"
49*61046927SAndroid Build Coastguard Worker #include "perf/intel_perf_mdapi.h"
50*61046927SAndroid Build Coastguard Worker #include "perf/intel_perf_regs.h"
51*61046927SAndroid Build Coastguard Worker 
52*61046927SAndroid Build Coastguard Worker #include "vk_util.h"
53*61046927SAndroid Build Coastguard Worker 
54*61046927SAndroid Build Coastguard Worker static struct anv_address
anv_query_address(struct anv_query_pool * pool,uint32_t query)55*61046927SAndroid Build Coastguard Worker anv_query_address(struct anv_query_pool *pool, uint32_t query)
56*61046927SAndroid Build Coastguard Worker {
57*61046927SAndroid Build Coastguard Worker    return (struct anv_address) {
58*61046927SAndroid Build Coastguard Worker       .bo = pool->bo,
59*61046927SAndroid Build Coastguard Worker       .offset = query * pool->stride,
60*61046927SAndroid Build Coastguard Worker    };
61*61046927SAndroid Build Coastguard Worker }
62*61046927SAndroid Build Coastguard Worker 
63*61046927SAndroid Build Coastguard Worker static void
emit_query_mi_flush_availability(struct anv_cmd_buffer * cmd_buffer,struct anv_address addr,bool available)64*61046927SAndroid Build Coastguard Worker emit_query_mi_flush_availability(struct anv_cmd_buffer *cmd_buffer,
65*61046927SAndroid Build Coastguard Worker                                  struct anv_address addr,
66*61046927SAndroid Build Coastguard Worker                                  bool available)
67*61046927SAndroid Build Coastguard Worker {
68*61046927SAndroid Build Coastguard Worker    anv_batch_emit(&cmd_buffer->batch, GENX(MI_FLUSH_DW), flush) {
69*61046927SAndroid Build Coastguard Worker       flush.PostSyncOperation = WriteImmediateData;
70*61046927SAndroid Build Coastguard Worker       flush.Address = addr;
71*61046927SAndroid Build Coastguard Worker       flush.ImmediateData = available;
72*61046927SAndroid Build Coastguard Worker    }
73*61046927SAndroid Build Coastguard Worker }
74*61046927SAndroid Build Coastguard Worker 
genX(CreateQueryPool)75*61046927SAndroid Build Coastguard Worker VkResult genX(CreateQueryPool)(
76*61046927SAndroid Build Coastguard Worker     VkDevice                                    _device,
77*61046927SAndroid Build Coastguard Worker     const VkQueryPoolCreateInfo*                pCreateInfo,
78*61046927SAndroid Build Coastguard Worker     const VkAllocationCallbacks*                pAllocator,
79*61046927SAndroid Build Coastguard Worker     VkQueryPool*                                pQueryPool)
80*61046927SAndroid Build Coastguard Worker {
81*61046927SAndroid Build Coastguard Worker    ANV_FROM_HANDLE(anv_device, device, _device);
82*61046927SAndroid Build Coastguard Worker    const struct anv_physical_device *pdevice = device->physical;
83*61046927SAndroid Build Coastguard Worker    const VkQueryPoolPerformanceCreateInfoKHR *perf_query_info = NULL;
84*61046927SAndroid Build Coastguard Worker    struct intel_perf_counter_pass *counter_pass;
85*61046927SAndroid Build Coastguard Worker    struct intel_perf_query_info **pass_query;
86*61046927SAndroid Build Coastguard Worker    uint32_t n_passes = 0;
87*61046927SAndroid Build Coastguard Worker    uint32_t data_offset = 0;
88*61046927SAndroid Build Coastguard Worker    VK_MULTIALLOC(ma);
89*61046927SAndroid Build Coastguard Worker    VkResult result;
90*61046927SAndroid Build Coastguard Worker 
91*61046927SAndroid Build Coastguard Worker    assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_QUERY_POOL_CREATE_INFO);
92*61046927SAndroid Build Coastguard Worker 
93*61046927SAndroid Build Coastguard Worker    /* Query pool slots are made up of some number of 64-bit values packed
94*61046927SAndroid Build Coastguard Worker     * tightly together. For most query types have the first 64-bit value is
95*61046927SAndroid Build Coastguard Worker     * the "available" bit which is 0 when the query is unavailable and 1 when
96*61046927SAndroid Build Coastguard Worker     * it is available. The 64-bit values that follow are determined by the
97*61046927SAndroid Build Coastguard Worker     * type of query.
98*61046927SAndroid Build Coastguard Worker     *
99*61046927SAndroid Build Coastguard Worker     * For performance queries, we have a requirement to align OA reports at
100*61046927SAndroid Build Coastguard Worker     * 64bytes so we put those first and have the "available" bit behind
101*61046927SAndroid Build Coastguard Worker     * together with some other counters.
102*61046927SAndroid Build Coastguard Worker     */
103*61046927SAndroid Build Coastguard Worker    uint32_t uint64s_per_slot = 0;
104*61046927SAndroid Build Coastguard Worker 
105*61046927SAndroid Build Coastguard Worker    VK_MULTIALLOC_DECL(&ma, struct anv_query_pool, pool, 1);
106*61046927SAndroid Build Coastguard Worker 
107*61046927SAndroid Build Coastguard Worker    VkQueryPipelineStatisticFlags pipeline_statistics = 0;
108*61046927SAndroid Build Coastguard Worker    switch (pCreateInfo->queryType) {
109*61046927SAndroid Build Coastguard Worker    case VK_QUERY_TYPE_OCCLUSION:
110*61046927SAndroid Build Coastguard Worker       /* Occlusion queries have two values: begin and end. */
111*61046927SAndroid Build Coastguard Worker       uint64s_per_slot = 1 + 2;
112*61046927SAndroid Build Coastguard Worker       break;
113*61046927SAndroid Build Coastguard Worker    case VK_QUERY_TYPE_TIMESTAMP:
114*61046927SAndroid Build Coastguard Worker       /* Timestamps just have the one timestamp value */
115*61046927SAndroid Build Coastguard Worker       uint64s_per_slot = 1 + 1;
116*61046927SAndroid Build Coastguard Worker       break;
117*61046927SAndroid Build Coastguard Worker    case VK_QUERY_TYPE_PIPELINE_STATISTICS:
118*61046927SAndroid Build Coastguard Worker       pipeline_statistics = pCreateInfo->pipelineStatistics;
119*61046927SAndroid Build Coastguard Worker       /* We're going to trust this field implicitly so we need to ensure that
120*61046927SAndroid Build Coastguard Worker        * no unhandled extension bits leak in.
121*61046927SAndroid Build Coastguard Worker        */
122*61046927SAndroid Build Coastguard Worker       pipeline_statistics &= ANV_PIPELINE_STATISTICS_MASK;
123*61046927SAndroid Build Coastguard Worker 
124*61046927SAndroid Build Coastguard Worker       /* Statistics queries have a min and max for every statistic */
125*61046927SAndroid Build Coastguard Worker       uint64s_per_slot = 1 + 2 * util_bitcount(pipeline_statistics);
126*61046927SAndroid Build Coastguard Worker       break;
127*61046927SAndroid Build Coastguard Worker    case VK_QUERY_TYPE_TRANSFORM_FEEDBACK_STREAM_EXT:
128*61046927SAndroid Build Coastguard Worker       /* Transform feedback queries are 4 values, begin/end for
129*61046927SAndroid Build Coastguard Worker        * written/available.
130*61046927SAndroid Build Coastguard Worker        */
131*61046927SAndroid Build Coastguard Worker       uint64s_per_slot = 1 + 4;
132*61046927SAndroid Build Coastguard Worker       break;
133*61046927SAndroid Build Coastguard Worker    case VK_QUERY_TYPE_PERFORMANCE_QUERY_INTEL: {
134*61046927SAndroid Build Coastguard Worker       const struct intel_perf_query_field_layout *layout =
135*61046927SAndroid Build Coastguard Worker          &pdevice->perf->query_layout;
136*61046927SAndroid Build Coastguard Worker 
137*61046927SAndroid Build Coastguard Worker       uint64s_per_slot = 2; /* availability + marker */
138*61046927SAndroid Build Coastguard Worker       /* Align to the requirement of the layout */
139*61046927SAndroid Build Coastguard Worker       uint64s_per_slot = align(uint64s_per_slot,
140*61046927SAndroid Build Coastguard Worker                                DIV_ROUND_UP(layout->alignment, sizeof(uint64_t)));
141*61046927SAndroid Build Coastguard Worker       data_offset = uint64s_per_slot * sizeof(uint64_t);
142*61046927SAndroid Build Coastguard Worker       /* Add the query data for begin & end commands */
143*61046927SAndroid Build Coastguard Worker       uint64s_per_slot += 2 * DIV_ROUND_UP(layout->size, sizeof(uint64_t));
144*61046927SAndroid Build Coastguard Worker       break;
145*61046927SAndroid Build Coastguard Worker    }
146*61046927SAndroid Build Coastguard Worker    case VK_QUERY_TYPE_PERFORMANCE_QUERY_KHR: {
147*61046927SAndroid Build Coastguard Worker       const struct intel_perf_query_field_layout *layout =
148*61046927SAndroid Build Coastguard Worker          &pdevice->perf->query_layout;
149*61046927SAndroid Build Coastguard Worker       const struct anv_queue_family *queue_family;
150*61046927SAndroid Build Coastguard Worker 
151*61046927SAndroid Build Coastguard Worker       perf_query_info = vk_find_struct_const(pCreateInfo->pNext,
152*61046927SAndroid Build Coastguard Worker                                              QUERY_POOL_PERFORMANCE_CREATE_INFO_KHR);
153*61046927SAndroid Build Coastguard Worker       /* Same restriction as in EnumeratePhysicalDeviceQueueFamilyPerformanceQueryCountersKHR() */
154*61046927SAndroid Build Coastguard Worker       queue_family = &pdevice->queue.families[perf_query_info->queueFamilyIndex];
155*61046927SAndroid Build Coastguard Worker       if (!queue_family->supports_perf)
156*61046927SAndroid Build Coastguard Worker          return vk_error(device, VK_ERROR_UNKNOWN);
157*61046927SAndroid Build Coastguard Worker 
158*61046927SAndroid Build Coastguard Worker       n_passes = intel_perf_get_n_passes(pdevice->perf,
159*61046927SAndroid Build Coastguard Worker                                          perf_query_info->pCounterIndices,
160*61046927SAndroid Build Coastguard Worker                                          perf_query_info->counterIndexCount,
161*61046927SAndroid Build Coastguard Worker                                          NULL);
162*61046927SAndroid Build Coastguard Worker       vk_multialloc_add(&ma, &counter_pass, struct intel_perf_counter_pass,
163*61046927SAndroid Build Coastguard Worker                              perf_query_info->counterIndexCount);
164*61046927SAndroid Build Coastguard Worker       vk_multialloc_add(&ma, &pass_query, struct intel_perf_query_info *,
165*61046927SAndroid Build Coastguard Worker                              n_passes);
166*61046927SAndroid Build Coastguard Worker       uint64s_per_slot = 1 /* availability */;
167*61046927SAndroid Build Coastguard Worker       /* Align to the requirement of the layout */
168*61046927SAndroid Build Coastguard Worker       uint64s_per_slot = align(uint64s_per_slot,
169*61046927SAndroid Build Coastguard Worker                                DIV_ROUND_UP(layout->alignment, sizeof(uint64_t)));
170*61046927SAndroid Build Coastguard Worker       data_offset = uint64s_per_slot * sizeof(uint64_t);
171*61046927SAndroid Build Coastguard Worker       /* Add the query data for begin & end commands */
172*61046927SAndroid Build Coastguard Worker       uint64s_per_slot += 2 * DIV_ROUND_UP(layout->size, sizeof(uint64_t));
173*61046927SAndroid Build Coastguard Worker       /* Multiply by the number of passes */
174*61046927SAndroid Build Coastguard Worker       uint64s_per_slot *= n_passes;
175*61046927SAndroid Build Coastguard Worker       break;
176*61046927SAndroid Build Coastguard Worker    }
177*61046927SAndroid Build Coastguard Worker    case VK_QUERY_TYPE_PRIMITIVES_GENERATED_EXT:
178*61046927SAndroid Build Coastguard Worker       /* Query has two values: begin and end. */
179*61046927SAndroid Build Coastguard Worker       uint64s_per_slot = 1 + 2;
180*61046927SAndroid Build Coastguard Worker       break;
181*61046927SAndroid Build Coastguard Worker #if GFX_VERx10 >= 125
182*61046927SAndroid Build Coastguard Worker    case VK_QUERY_TYPE_ACCELERATION_STRUCTURE_COMPACTED_SIZE_KHR:
183*61046927SAndroid Build Coastguard Worker    case VK_QUERY_TYPE_ACCELERATION_STRUCTURE_SIZE_KHR:
184*61046927SAndroid Build Coastguard Worker       uint64s_per_slot = 1 + 1 /* availability + size (PostbuildInfoCurrentSize, PostbuildInfoCompactedSize) */;
185*61046927SAndroid Build Coastguard Worker       break;
186*61046927SAndroid Build Coastguard Worker 
187*61046927SAndroid Build Coastguard Worker    case VK_QUERY_TYPE_ACCELERATION_STRUCTURE_SERIALIZATION_SIZE_KHR:
188*61046927SAndroid Build Coastguard Worker    case VK_QUERY_TYPE_ACCELERATION_STRUCTURE_SERIALIZATION_BOTTOM_LEVEL_POINTERS_KHR:
189*61046927SAndroid Build Coastguard Worker       uint64s_per_slot = 1 + 2 /* availability + size (PostbuildInfoSerializationDesc) */;
190*61046927SAndroid Build Coastguard Worker       break;
191*61046927SAndroid Build Coastguard Worker 
192*61046927SAndroid Build Coastguard Worker    case VK_QUERY_TYPE_MESH_PRIMITIVES_GENERATED_EXT:
193*61046927SAndroid Build Coastguard Worker       /* Query has two values: begin and end. */
194*61046927SAndroid Build Coastguard Worker       uint64s_per_slot = 1 + 2;
195*61046927SAndroid Build Coastguard Worker       break;
196*61046927SAndroid Build Coastguard Worker 
197*61046927SAndroid Build Coastguard Worker #endif
198*61046927SAndroid Build Coastguard Worker    case VK_QUERY_TYPE_RESULT_STATUS_ONLY_KHR:
199*61046927SAndroid Build Coastguard Worker       uint64s_per_slot = 1;
200*61046927SAndroid Build Coastguard Worker       break;
201*61046927SAndroid Build Coastguard Worker    case VK_QUERY_TYPE_VIDEO_ENCODE_FEEDBACK_KHR:
202*61046927SAndroid Build Coastguard Worker       uint64s_per_slot = 1 + 1; /* availability + length of written bitstream data */
203*61046927SAndroid Build Coastguard Worker       break;
204*61046927SAndroid Build Coastguard Worker    default:
205*61046927SAndroid Build Coastguard Worker       assert(!"Invalid query type");
206*61046927SAndroid Build Coastguard Worker    }
207*61046927SAndroid Build Coastguard Worker 
208*61046927SAndroid Build Coastguard Worker    if (!vk_multialloc_zalloc2(&ma, &device->vk.alloc, pAllocator,
209*61046927SAndroid Build Coastguard Worker                               VK_SYSTEM_ALLOCATION_SCOPE_OBJECT))
210*61046927SAndroid Build Coastguard Worker       return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
211*61046927SAndroid Build Coastguard Worker 
212*61046927SAndroid Build Coastguard Worker    vk_query_pool_init(&device->vk, &pool->vk, pCreateInfo);
213*61046927SAndroid Build Coastguard Worker    pool->stride = uint64s_per_slot * sizeof(uint64_t);
214*61046927SAndroid Build Coastguard Worker 
215*61046927SAndroid Build Coastguard Worker    if (pool->vk.query_type == VK_QUERY_TYPE_PERFORMANCE_QUERY_INTEL) {
216*61046927SAndroid Build Coastguard Worker       pool->data_offset = data_offset;
217*61046927SAndroid Build Coastguard Worker       pool->snapshot_size = (pool->stride - data_offset) / 2;
218*61046927SAndroid Build Coastguard Worker    }
219*61046927SAndroid Build Coastguard Worker    else if (pool->vk.query_type == VK_QUERY_TYPE_PERFORMANCE_QUERY_KHR) {
220*61046927SAndroid Build Coastguard Worker       pool->pass_size = pool->stride / n_passes;
221*61046927SAndroid Build Coastguard Worker       pool->data_offset = data_offset;
222*61046927SAndroid Build Coastguard Worker       pool->snapshot_size = (pool->pass_size - data_offset) / 2;
223*61046927SAndroid Build Coastguard Worker       pool->n_counters = perf_query_info->counterIndexCount;
224*61046927SAndroid Build Coastguard Worker       pool->counter_pass = counter_pass;
225*61046927SAndroid Build Coastguard Worker       intel_perf_get_counters_passes(pdevice->perf,
226*61046927SAndroid Build Coastguard Worker                                      perf_query_info->pCounterIndices,
227*61046927SAndroid Build Coastguard Worker                                      perf_query_info->counterIndexCount,
228*61046927SAndroid Build Coastguard Worker                                      pool->counter_pass);
229*61046927SAndroid Build Coastguard Worker       pool->n_passes = n_passes;
230*61046927SAndroid Build Coastguard Worker       pool->pass_query = pass_query;
231*61046927SAndroid Build Coastguard Worker       intel_perf_get_n_passes(pdevice->perf,
232*61046927SAndroid Build Coastguard Worker                               perf_query_info->pCounterIndices,
233*61046927SAndroid Build Coastguard Worker                               perf_query_info->counterIndexCount,
234*61046927SAndroid Build Coastguard Worker                               pool->pass_query);
235*61046927SAndroid Build Coastguard Worker    } else if (pool->vk.query_type == VK_QUERY_TYPE_VIDEO_ENCODE_FEEDBACK_KHR) {
236*61046927SAndroid Build Coastguard Worker       const VkVideoProfileInfoKHR* pVideoProfile = vk_find_struct_const(pCreateInfo->pNext, VIDEO_PROFILE_INFO_KHR);
237*61046927SAndroid Build Coastguard Worker       assert (pVideoProfile);
238*61046927SAndroid Build Coastguard Worker 
239*61046927SAndroid Build Coastguard Worker       pool->codec = pVideoProfile->videoCodecOperation;
240*61046927SAndroid Build Coastguard Worker    }
241*61046927SAndroid Build Coastguard Worker 
242*61046927SAndroid Build Coastguard Worker    uint64_t size = pool->vk.query_count * (uint64_t)pool->stride;
243*61046927SAndroid Build Coastguard Worker 
244*61046927SAndroid Build Coastguard Worker    /* For KHR_performance_query we need some space in the buffer for a small
245*61046927SAndroid Build Coastguard Worker     * batch updating ANV_PERF_QUERY_OFFSET_REG.
246*61046927SAndroid Build Coastguard Worker     */
247*61046927SAndroid Build Coastguard Worker    if (pool->vk.query_type == VK_QUERY_TYPE_PERFORMANCE_QUERY_KHR) {
248*61046927SAndroid Build Coastguard Worker       pool->khr_perf_preamble_stride = 32;
249*61046927SAndroid Build Coastguard Worker       pool->khr_perf_preambles_offset = size;
250*61046927SAndroid Build Coastguard Worker       size += (uint64_t)pool->n_passes * pool->khr_perf_preamble_stride;
251*61046927SAndroid Build Coastguard Worker    }
252*61046927SAndroid Build Coastguard Worker 
253*61046927SAndroid Build Coastguard Worker    result = anv_device_alloc_bo(device, "query-pool", size,
254*61046927SAndroid Build Coastguard Worker                                 ANV_BO_ALLOC_MAPPED |
255*61046927SAndroid Build Coastguard Worker                                 ANV_BO_ALLOC_HOST_CACHED_COHERENT |
256*61046927SAndroid Build Coastguard Worker                                 ANV_BO_ALLOC_CAPTURE,
257*61046927SAndroid Build Coastguard Worker                                 0 /* explicit_address */,
258*61046927SAndroid Build Coastguard Worker                                 &pool->bo);
259*61046927SAndroid Build Coastguard Worker    if (result != VK_SUCCESS)
260*61046927SAndroid Build Coastguard Worker       goto fail;
261*61046927SAndroid Build Coastguard Worker 
262*61046927SAndroid Build Coastguard Worker    if (pool->vk.query_type == VK_QUERY_TYPE_PERFORMANCE_QUERY_KHR) {
263*61046927SAndroid Build Coastguard Worker       for (uint32_t p = 0; p < pool->n_passes; p++) {
264*61046927SAndroid Build Coastguard Worker          struct mi_builder b;
265*61046927SAndroid Build Coastguard Worker          struct anv_batch batch = {
266*61046927SAndroid Build Coastguard Worker             .start = pool->bo->map + khr_perf_query_preamble_offset(pool, p),
267*61046927SAndroid Build Coastguard Worker             .end = pool->bo->map + khr_perf_query_preamble_offset(pool, p) + pool->khr_perf_preamble_stride,
268*61046927SAndroid Build Coastguard Worker          };
269*61046927SAndroid Build Coastguard Worker          batch.next = batch.start;
270*61046927SAndroid Build Coastguard Worker 
271*61046927SAndroid Build Coastguard Worker          mi_builder_init(&b, device->info, &batch);
272*61046927SAndroid Build Coastguard Worker          mi_store(&b, mi_reg64(ANV_PERF_QUERY_OFFSET_REG),
273*61046927SAndroid Build Coastguard Worker                       mi_imm(p * (uint64_t)pool->pass_size));
274*61046927SAndroid Build Coastguard Worker          anv_batch_emit(&batch, GENX(MI_BATCH_BUFFER_END), bbe);
275*61046927SAndroid Build Coastguard Worker       }
276*61046927SAndroid Build Coastguard Worker    }
277*61046927SAndroid Build Coastguard Worker 
278*61046927SAndroid Build Coastguard Worker    ANV_RMV(query_pool_create, device, pool, false);
279*61046927SAndroid Build Coastguard Worker 
280*61046927SAndroid Build Coastguard Worker    *pQueryPool = anv_query_pool_to_handle(pool);
281*61046927SAndroid Build Coastguard Worker 
282*61046927SAndroid Build Coastguard Worker    return VK_SUCCESS;
283*61046927SAndroid Build Coastguard Worker 
284*61046927SAndroid Build Coastguard Worker  fail:
285*61046927SAndroid Build Coastguard Worker    vk_free2(&device->vk.alloc, pAllocator, pool);
286*61046927SAndroid Build Coastguard Worker 
287*61046927SAndroid Build Coastguard Worker    return result;
288*61046927SAndroid Build Coastguard Worker }
289*61046927SAndroid Build Coastguard Worker 
genX(DestroyQueryPool)290*61046927SAndroid Build Coastguard Worker void genX(DestroyQueryPool)(
291*61046927SAndroid Build Coastguard Worker     VkDevice                                    _device,
292*61046927SAndroid Build Coastguard Worker     VkQueryPool                                 _pool,
293*61046927SAndroid Build Coastguard Worker     const VkAllocationCallbacks*                pAllocator)
294*61046927SAndroid Build Coastguard Worker {
295*61046927SAndroid Build Coastguard Worker    ANV_FROM_HANDLE(anv_device, device, _device);
296*61046927SAndroid Build Coastguard Worker    ANV_FROM_HANDLE(anv_query_pool, pool, _pool);
297*61046927SAndroid Build Coastguard Worker 
298*61046927SAndroid Build Coastguard Worker    if (!pool)
299*61046927SAndroid Build Coastguard Worker       return;
300*61046927SAndroid Build Coastguard Worker 
301*61046927SAndroid Build Coastguard Worker    ANV_RMV(resource_destroy, device, pool);
302*61046927SAndroid Build Coastguard Worker 
303*61046927SAndroid Build Coastguard Worker    anv_device_release_bo(device, pool->bo);
304*61046927SAndroid Build Coastguard Worker    vk_object_free(&device->vk, pAllocator, pool);
305*61046927SAndroid Build Coastguard Worker }
306*61046927SAndroid Build Coastguard Worker 
307*61046927SAndroid Build Coastguard Worker /**
308*61046927SAndroid Build Coastguard Worker  * VK_KHR_performance_query layout  :
309*61046927SAndroid Build Coastguard Worker  *
310*61046927SAndroid Build Coastguard Worker  * --------------------------------------------
311*61046927SAndroid Build Coastguard Worker  * |       availability (8b)       | |        |
312*61046927SAndroid Build Coastguard Worker  * |-------------------------------| |        |
313*61046927SAndroid Build Coastguard Worker  * |       some padding (see       | |        |
314*61046927SAndroid Build Coastguard Worker  * | query_field_layout:alignment) | | Pass 0 |
315*61046927SAndroid Build Coastguard Worker  * |-------------------------------| |        |
316*61046927SAndroid Build Coastguard Worker  * |           query data          | |        |
317*61046927SAndroid Build Coastguard Worker  * | (2 * query_field_layout:size) | |        |
318*61046927SAndroid Build Coastguard Worker  * |-------------------------------|--        | Query 0
319*61046927SAndroid Build Coastguard Worker  * |       availability (8b)       | |        |
320*61046927SAndroid Build Coastguard Worker  * |-------------------------------| |        |
321*61046927SAndroid Build Coastguard Worker  * |       some padding (see       | |        |
322*61046927SAndroid Build Coastguard Worker  * | query_field_layout:alignment) | | Pass 1 |
323*61046927SAndroid Build Coastguard Worker  * |-------------------------------| |        |
324*61046927SAndroid Build Coastguard Worker  * |           query data          | |        |
325*61046927SAndroid Build Coastguard Worker  * | (2 * query_field_layout:size) | |        |
326*61046927SAndroid Build Coastguard Worker  * |-------------------------------|-----------
327*61046927SAndroid Build Coastguard Worker  * |       availability (8b)       | |        |
328*61046927SAndroid Build Coastguard Worker  * |-------------------------------| |        |
329*61046927SAndroid Build Coastguard Worker  * |       some padding (see       | |        |
330*61046927SAndroid Build Coastguard Worker  * | query_field_layout:alignment) | | Pass 0 |
331*61046927SAndroid Build Coastguard Worker  * |-------------------------------| |        |
332*61046927SAndroid Build Coastguard Worker  * |           query data          | |        |
333*61046927SAndroid Build Coastguard Worker  * | (2 * query_field_layout:size) | |        |
334*61046927SAndroid Build Coastguard Worker  * |-------------------------------|--        | Query 1
335*61046927SAndroid Build Coastguard Worker  * |               ...             | |        |
336*61046927SAndroid Build Coastguard Worker  * --------------------------------------------
337*61046927SAndroid Build Coastguard Worker  */
338*61046927SAndroid Build Coastguard Worker 
339*61046927SAndroid Build Coastguard Worker static uint64_t
khr_perf_query_availability_offset(struct anv_query_pool * pool,uint32_t query,uint32_t pass)340*61046927SAndroid Build Coastguard Worker khr_perf_query_availability_offset(struct anv_query_pool *pool, uint32_t query, uint32_t pass)
341*61046927SAndroid Build Coastguard Worker {
342*61046927SAndroid Build Coastguard Worker    return (query * (uint64_t)pool->stride) + (pass * (uint64_t)pool->pass_size);
343*61046927SAndroid Build Coastguard Worker }
344*61046927SAndroid Build Coastguard Worker 
345*61046927SAndroid Build Coastguard Worker static uint64_t
khr_perf_query_data_offset(struct anv_query_pool * pool,uint32_t query,uint32_t pass,bool end)346*61046927SAndroid Build Coastguard Worker khr_perf_query_data_offset(struct anv_query_pool *pool, uint32_t query, uint32_t pass, bool end)
347*61046927SAndroid Build Coastguard Worker {
348*61046927SAndroid Build Coastguard Worker    return khr_perf_query_availability_offset(pool, query, pass) +
349*61046927SAndroid Build Coastguard Worker           pool->data_offset + (end ? pool->snapshot_size : 0);
350*61046927SAndroid Build Coastguard Worker }
351*61046927SAndroid Build Coastguard Worker 
352*61046927SAndroid Build Coastguard Worker static struct anv_address
khr_perf_query_availability_address(struct anv_query_pool * pool,uint32_t query,uint32_t pass)353*61046927SAndroid Build Coastguard Worker khr_perf_query_availability_address(struct anv_query_pool *pool, uint32_t query, uint32_t pass)
354*61046927SAndroid Build Coastguard Worker {
355*61046927SAndroid Build Coastguard Worker    return anv_address_add(
356*61046927SAndroid Build Coastguard Worker       (struct anv_address) { .bo = pool->bo, },
357*61046927SAndroid Build Coastguard Worker       khr_perf_query_availability_offset(pool, query, pass));
358*61046927SAndroid Build Coastguard Worker }
359*61046927SAndroid Build Coastguard Worker 
360*61046927SAndroid Build Coastguard Worker static struct anv_address
khr_perf_query_data_address(struct anv_query_pool * pool,uint32_t query,uint32_t pass,bool end)361*61046927SAndroid Build Coastguard Worker khr_perf_query_data_address(struct anv_query_pool *pool, uint32_t query, uint32_t pass, bool end)
362*61046927SAndroid Build Coastguard Worker {
363*61046927SAndroid Build Coastguard Worker    return anv_address_add(
364*61046927SAndroid Build Coastguard Worker       (struct anv_address) { .bo = pool->bo, },
365*61046927SAndroid Build Coastguard Worker       khr_perf_query_data_offset(pool, query, pass, end));
366*61046927SAndroid Build Coastguard Worker }
367*61046927SAndroid Build Coastguard Worker 
368*61046927SAndroid Build Coastguard Worker static bool
khr_perf_query_ensure_relocs(struct anv_cmd_buffer * cmd_buffer)369*61046927SAndroid Build Coastguard Worker khr_perf_query_ensure_relocs(struct anv_cmd_buffer *cmd_buffer)
370*61046927SAndroid Build Coastguard Worker {
371*61046927SAndroid Build Coastguard Worker    if (anv_batch_has_error(&cmd_buffer->batch))
372*61046927SAndroid Build Coastguard Worker       return false;
373*61046927SAndroid Build Coastguard Worker 
374*61046927SAndroid Build Coastguard Worker    if (cmd_buffer->self_mod_locations)
375*61046927SAndroid Build Coastguard Worker       return true;
376*61046927SAndroid Build Coastguard Worker 
377*61046927SAndroid Build Coastguard Worker    struct anv_device *device = cmd_buffer->device;
378*61046927SAndroid Build Coastguard Worker    const struct anv_physical_device *pdevice = device->physical;
379*61046927SAndroid Build Coastguard Worker 
380*61046927SAndroid Build Coastguard Worker    cmd_buffer->self_mod_locations =
381*61046927SAndroid Build Coastguard Worker       vk_alloc(&cmd_buffer->vk.pool->alloc,
382*61046927SAndroid Build Coastguard Worker                pdevice->n_perf_query_commands * sizeof(*cmd_buffer->self_mod_locations), 8,
383*61046927SAndroid Build Coastguard Worker                VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
384*61046927SAndroid Build Coastguard Worker 
385*61046927SAndroid Build Coastguard Worker    if (!cmd_buffer->self_mod_locations) {
386*61046927SAndroid Build Coastguard Worker       anv_batch_set_error(&cmd_buffer->batch, VK_ERROR_OUT_OF_HOST_MEMORY);
387*61046927SAndroid Build Coastguard Worker       return false;
388*61046927SAndroid Build Coastguard Worker    }
389*61046927SAndroid Build Coastguard Worker 
390*61046927SAndroid Build Coastguard Worker    return true;
391*61046927SAndroid Build Coastguard Worker }
392*61046927SAndroid Build Coastguard Worker 
393*61046927SAndroid Build Coastguard Worker /**
394*61046927SAndroid Build Coastguard Worker  * VK_INTEL_performance_query layout :
395*61046927SAndroid Build Coastguard Worker  *
396*61046927SAndroid Build Coastguard Worker  * ---------------------------------
397*61046927SAndroid Build Coastguard Worker  * |       availability (8b)       |
398*61046927SAndroid Build Coastguard Worker  * |-------------------------------|
399*61046927SAndroid Build Coastguard Worker  * |          marker (8b)          |
400*61046927SAndroid Build Coastguard Worker  * |-------------------------------|
401*61046927SAndroid Build Coastguard Worker  * |       some padding (see       |
402*61046927SAndroid Build Coastguard Worker  * | query_field_layout:alignment) |
403*61046927SAndroid Build Coastguard Worker  * |-------------------------------|
404*61046927SAndroid Build Coastguard Worker  * |           query data          |
405*61046927SAndroid Build Coastguard Worker  * | (2 * query_field_layout:size) |
406*61046927SAndroid Build Coastguard Worker  * ---------------------------------
407*61046927SAndroid Build Coastguard Worker  */
408*61046927SAndroid Build Coastguard Worker 
409*61046927SAndroid Build Coastguard Worker static uint32_t
intel_perf_marker_offset(void)410*61046927SAndroid Build Coastguard Worker intel_perf_marker_offset(void)
411*61046927SAndroid Build Coastguard Worker {
412*61046927SAndroid Build Coastguard Worker    return 8;
413*61046927SAndroid Build Coastguard Worker }
414*61046927SAndroid Build Coastguard Worker 
415*61046927SAndroid Build Coastguard Worker static uint32_t
intel_perf_query_data_offset(struct anv_query_pool * pool,bool end)416*61046927SAndroid Build Coastguard Worker intel_perf_query_data_offset(struct anv_query_pool *pool, bool end)
417*61046927SAndroid Build Coastguard Worker {
418*61046927SAndroid Build Coastguard Worker    return pool->data_offset + (end ? pool->snapshot_size : 0);
419*61046927SAndroid Build Coastguard Worker }
420*61046927SAndroid Build Coastguard Worker 
421*61046927SAndroid Build Coastguard Worker static void
cpu_write_query_result(void * dst_slot,VkQueryResultFlags flags,uint32_t value_index,uint64_t result)422*61046927SAndroid Build Coastguard Worker cpu_write_query_result(void *dst_slot, VkQueryResultFlags flags,
423*61046927SAndroid Build Coastguard Worker                        uint32_t value_index, uint64_t result)
424*61046927SAndroid Build Coastguard Worker {
425*61046927SAndroid Build Coastguard Worker    if (flags & VK_QUERY_RESULT_64_BIT) {
426*61046927SAndroid Build Coastguard Worker       uint64_t *dst64 = dst_slot;
427*61046927SAndroid Build Coastguard Worker       dst64[value_index] = result;
428*61046927SAndroid Build Coastguard Worker    } else {
429*61046927SAndroid Build Coastguard Worker       uint32_t *dst32 = dst_slot;
430*61046927SAndroid Build Coastguard Worker       dst32[value_index] = result;
431*61046927SAndroid Build Coastguard Worker    }
432*61046927SAndroid Build Coastguard Worker }
433*61046927SAndroid Build Coastguard Worker 
434*61046927SAndroid Build Coastguard Worker static void *
query_slot(struct anv_query_pool * pool,uint32_t query)435*61046927SAndroid Build Coastguard Worker query_slot(struct anv_query_pool *pool, uint32_t query)
436*61046927SAndroid Build Coastguard Worker {
437*61046927SAndroid Build Coastguard Worker    return pool->bo->map + query * pool->stride;
438*61046927SAndroid Build Coastguard Worker }
439*61046927SAndroid Build Coastguard Worker 
440*61046927SAndroid Build Coastguard Worker static bool
query_is_available(struct anv_query_pool * pool,uint32_t query)441*61046927SAndroid Build Coastguard Worker query_is_available(struct anv_query_pool *pool, uint32_t query)
442*61046927SAndroid Build Coastguard Worker {
443*61046927SAndroid Build Coastguard Worker    if (pool->vk.query_type == VK_QUERY_TYPE_PERFORMANCE_QUERY_KHR) {
444*61046927SAndroid Build Coastguard Worker       for (uint32_t p = 0; p < pool->n_passes; p++) {
445*61046927SAndroid Build Coastguard Worker          volatile uint64_t *slot =
446*61046927SAndroid Build Coastguard Worker             pool->bo->map + khr_perf_query_availability_offset(pool, query, p);
447*61046927SAndroid Build Coastguard Worker          if (!slot[0])
448*61046927SAndroid Build Coastguard Worker             return false;
449*61046927SAndroid Build Coastguard Worker       }
450*61046927SAndroid Build Coastguard Worker       return true;
451*61046927SAndroid Build Coastguard Worker    }
452*61046927SAndroid Build Coastguard Worker 
453*61046927SAndroid Build Coastguard Worker    return *(volatile uint64_t *)query_slot(pool, query);
454*61046927SAndroid Build Coastguard Worker }
455*61046927SAndroid Build Coastguard Worker 
456*61046927SAndroid Build Coastguard Worker static VkResult
wait_for_available(struct anv_device * device,struct anv_query_pool * pool,uint32_t query)457*61046927SAndroid Build Coastguard Worker wait_for_available(struct anv_device *device,
458*61046927SAndroid Build Coastguard Worker                    struct anv_query_pool *pool, uint32_t query)
459*61046927SAndroid Build Coastguard Worker {
460*61046927SAndroid Build Coastguard Worker    /* By default we leave a 2s timeout before declaring the device lost. */
461*61046927SAndroid Build Coastguard Worker    uint64_t rel_timeout = 2 * NSEC_PER_SEC;
462*61046927SAndroid Build Coastguard Worker    if (pool->vk.query_type == VK_QUERY_TYPE_PERFORMANCE_QUERY_KHR) {
463*61046927SAndroid Build Coastguard Worker       /* With performance queries, there is an additional 500us reconfiguration
464*61046927SAndroid Build Coastguard Worker        * time in i915.
465*61046927SAndroid Build Coastguard Worker        */
466*61046927SAndroid Build Coastguard Worker       rel_timeout += 500 * 1000;
467*61046927SAndroid Build Coastguard Worker       /* Additionally a command buffer can be replayed N times to gather data
468*61046927SAndroid Build Coastguard Worker        * for each of the metric sets to capture all the counters requested.
469*61046927SAndroid Build Coastguard Worker        */
470*61046927SAndroid Build Coastguard Worker       rel_timeout *= pool->n_passes;
471*61046927SAndroid Build Coastguard Worker    }
472*61046927SAndroid Build Coastguard Worker    uint64_t abs_timeout_ns = os_time_get_absolute_timeout(rel_timeout);
473*61046927SAndroid Build Coastguard Worker 
474*61046927SAndroid Build Coastguard Worker    while (os_time_get_nano() < abs_timeout_ns) {
475*61046927SAndroid Build Coastguard Worker       if (query_is_available(pool, query))
476*61046927SAndroid Build Coastguard Worker          return VK_SUCCESS;
477*61046927SAndroid Build Coastguard Worker       VkResult status = vk_device_check_status(&device->vk);
478*61046927SAndroid Build Coastguard Worker       if (status != VK_SUCCESS)
479*61046927SAndroid Build Coastguard Worker          return status;
480*61046927SAndroid Build Coastguard Worker    }
481*61046927SAndroid Build Coastguard Worker 
482*61046927SAndroid Build Coastguard Worker    return vk_device_set_lost(&device->vk, "query timeout");
483*61046927SAndroid Build Coastguard Worker }
484*61046927SAndroid Build Coastguard Worker 
genX(GetQueryPoolResults)485*61046927SAndroid Build Coastguard Worker VkResult genX(GetQueryPoolResults)(
486*61046927SAndroid Build Coastguard Worker     VkDevice                                    _device,
487*61046927SAndroid Build Coastguard Worker     VkQueryPool                                 queryPool,
488*61046927SAndroid Build Coastguard Worker     uint32_t                                    firstQuery,
489*61046927SAndroid Build Coastguard Worker     uint32_t                                    queryCount,
490*61046927SAndroid Build Coastguard Worker     size_t                                      dataSize,
491*61046927SAndroid Build Coastguard Worker     void*                                       pData,
492*61046927SAndroid Build Coastguard Worker     VkDeviceSize                                stride,
493*61046927SAndroid Build Coastguard Worker     VkQueryResultFlags                          flags)
494*61046927SAndroid Build Coastguard Worker {
495*61046927SAndroid Build Coastguard Worker    ANV_FROM_HANDLE(anv_device, device, _device);
496*61046927SAndroid Build Coastguard Worker    ANV_FROM_HANDLE(anv_query_pool, pool, queryPool);
497*61046927SAndroid Build Coastguard Worker 
498*61046927SAndroid Build Coastguard Worker    assert(
499*61046927SAndroid Build Coastguard Worker #if GFX_VERx10 >= 125
500*61046927SAndroid Build Coastguard Worker    pool->vk.query_type == VK_QUERY_TYPE_ACCELERATION_STRUCTURE_COMPACTED_SIZE_KHR ||
501*61046927SAndroid Build Coastguard Worker    pool->vk.query_type == VK_QUERY_TYPE_ACCELERATION_STRUCTURE_SERIALIZATION_SIZE_KHR ||
502*61046927SAndroid Build Coastguard Worker    pool->vk.query_type == VK_QUERY_TYPE_ACCELERATION_STRUCTURE_SIZE_KHR ||
503*61046927SAndroid Build Coastguard Worker    pool->vk.query_type == VK_QUERY_TYPE_ACCELERATION_STRUCTURE_SERIALIZATION_BOTTOM_LEVEL_POINTERS_KHR ||
504*61046927SAndroid Build Coastguard Worker    pool->vk.query_type == VK_QUERY_TYPE_MESH_PRIMITIVES_GENERATED_EXT ||
505*61046927SAndroid Build Coastguard Worker #endif
506*61046927SAndroid Build Coastguard Worker    pool->vk.query_type == VK_QUERY_TYPE_OCCLUSION ||
507*61046927SAndroid Build Coastguard Worker    pool->vk.query_type == VK_QUERY_TYPE_PIPELINE_STATISTICS ||
508*61046927SAndroid Build Coastguard Worker    pool->vk.query_type == VK_QUERY_TYPE_TIMESTAMP ||
509*61046927SAndroid Build Coastguard Worker    pool->vk.query_type == VK_QUERY_TYPE_TRANSFORM_FEEDBACK_STREAM_EXT ||
510*61046927SAndroid Build Coastguard Worker    pool->vk.query_type == VK_QUERY_TYPE_PERFORMANCE_QUERY_KHR ||
511*61046927SAndroid Build Coastguard Worker    pool->vk.query_type == VK_QUERY_TYPE_PERFORMANCE_QUERY_INTEL ||
512*61046927SAndroid Build Coastguard Worker    pool->vk.query_type == VK_QUERY_TYPE_PRIMITIVES_GENERATED_EXT ||
513*61046927SAndroid Build Coastguard Worker    pool->vk.query_type == VK_QUERY_TYPE_RESULT_STATUS_ONLY_KHR ||
514*61046927SAndroid Build Coastguard Worker    pool->vk.query_type == VK_QUERY_TYPE_VIDEO_ENCODE_FEEDBACK_KHR);
515*61046927SAndroid Build Coastguard Worker 
516*61046927SAndroid Build Coastguard Worker    if (vk_device_is_lost(&device->vk))
517*61046927SAndroid Build Coastguard Worker       return VK_ERROR_DEVICE_LOST;
518*61046927SAndroid Build Coastguard Worker 
519*61046927SAndroid Build Coastguard Worker    if (pData == NULL)
520*61046927SAndroid Build Coastguard Worker       return VK_SUCCESS;
521*61046927SAndroid Build Coastguard Worker 
522*61046927SAndroid Build Coastguard Worker    void *data_end = pData + dataSize;
523*61046927SAndroid Build Coastguard Worker 
524*61046927SAndroid Build Coastguard Worker    VkResult status = VK_SUCCESS;
525*61046927SAndroid Build Coastguard Worker    for (uint32_t i = 0; i < queryCount; i++) {
526*61046927SAndroid Build Coastguard Worker       bool available = query_is_available(pool, firstQuery + i);
527*61046927SAndroid Build Coastguard Worker 
528*61046927SAndroid Build Coastguard Worker       if (!available && (flags & VK_QUERY_RESULT_WAIT_BIT)) {
529*61046927SAndroid Build Coastguard Worker          status = wait_for_available(device, pool, firstQuery + i);
530*61046927SAndroid Build Coastguard Worker          if (status != VK_SUCCESS) {
531*61046927SAndroid Build Coastguard Worker             return status;
532*61046927SAndroid Build Coastguard Worker          }
533*61046927SAndroid Build Coastguard Worker 
534*61046927SAndroid Build Coastguard Worker          available = true;
535*61046927SAndroid Build Coastguard Worker       }
536*61046927SAndroid Build Coastguard Worker 
537*61046927SAndroid Build Coastguard Worker       /* From the Vulkan 1.0.42 spec:
538*61046927SAndroid Build Coastguard Worker        *
539*61046927SAndroid Build Coastguard Worker        *    "If VK_QUERY_RESULT_WAIT_BIT and VK_QUERY_RESULT_PARTIAL_BIT are
540*61046927SAndroid Build Coastguard Worker        *    both not set then no result values are written to pData for
541*61046927SAndroid Build Coastguard Worker        *    queries that are in the unavailable state at the time of the call,
542*61046927SAndroid Build Coastguard Worker        *    and vkGetQueryPoolResults returns VK_NOT_READY. However,
543*61046927SAndroid Build Coastguard Worker        *    availability state is still written to pData for those queries if
544*61046927SAndroid Build Coastguard Worker        *    VK_QUERY_RESULT_WITH_AVAILABILITY_BIT is set."
545*61046927SAndroid Build Coastguard Worker        *
546*61046927SAndroid Build Coastguard Worker        * From VK_KHR_performance_query :
547*61046927SAndroid Build Coastguard Worker        *
548*61046927SAndroid Build Coastguard Worker        *    "VK_QUERY_RESULT_PERFORMANCE_QUERY_RECORDED_COUNTERS_BIT_KHR specifies
549*61046927SAndroid Build Coastguard Worker        *     that the result should contain the number of counters that were recorded
550*61046927SAndroid Build Coastguard Worker        *     into a query pool of type ename:VK_QUERY_TYPE_PERFORMANCE_QUERY_KHR"
551*61046927SAndroid Build Coastguard Worker        */
552*61046927SAndroid Build Coastguard Worker       bool write_results = available || (flags & VK_QUERY_RESULT_PARTIAL_BIT);
553*61046927SAndroid Build Coastguard Worker 
554*61046927SAndroid Build Coastguard Worker       uint32_t idx = 0;
555*61046927SAndroid Build Coastguard Worker       switch (pool->vk.query_type) {
556*61046927SAndroid Build Coastguard Worker       case VK_QUERY_TYPE_OCCLUSION:
557*61046927SAndroid Build Coastguard Worker       case VK_QUERY_TYPE_PRIMITIVES_GENERATED_EXT:
558*61046927SAndroid Build Coastguard Worker #if GFX_VERx10 >= 125
559*61046927SAndroid Build Coastguard Worker       case VK_QUERY_TYPE_MESH_PRIMITIVES_GENERATED_EXT:
560*61046927SAndroid Build Coastguard Worker #endif
561*61046927SAndroid Build Coastguard Worker       {
562*61046927SAndroid Build Coastguard Worker          uint64_t *slot = query_slot(pool, firstQuery + i);
563*61046927SAndroid Build Coastguard Worker          if (write_results) {
564*61046927SAndroid Build Coastguard Worker             /* From the Vulkan 1.2.132 spec:
565*61046927SAndroid Build Coastguard Worker              *
566*61046927SAndroid Build Coastguard Worker              *    "If VK_QUERY_RESULT_PARTIAL_BIT is set,
567*61046927SAndroid Build Coastguard Worker              *    VK_QUERY_RESULT_WAIT_BIT is not set, and the query’s status
568*61046927SAndroid Build Coastguard Worker              *    is unavailable, an intermediate result value between zero and
569*61046927SAndroid Build Coastguard Worker              *    the final result value is written to pData for that query."
570*61046927SAndroid Build Coastguard Worker              */
571*61046927SAndroid Build Coastguard Worker             uint64_t result = available ? slot[2] - slot[1] : 0;
572*61046927SAndroid Build Coastguard Worker             cpu_write_query_result(pData, flags, idx, result);
573*61046927SAndroid Build Coastguard Worker          }
574*61046927SAndroid Build Coastguard Worker          idx++;
575*61046927SAndroid Build Coastguard Worker          break;
576*61046927SAndroid Build Coastguard Worker       }
577*61046927SAndroid Build Coastguard Worker 
578*61046927SAndroid Build Coastguard Worker       case VK_QUERY_TYPE_PIPELINE_STATISTICS: {
579*61046927SAndroid Build Coastguard Worker          uint64_t *slot = query_slot(pool, firstQuery + i);
580*61046927SAndroid Build Coastguard Worker          uint32_t statistics = pool->vk.pipeline_statistics;
581*61046927SAndroid Build Coastguard Worker          while (statistics) {
582*61046927SAndroid Build Coastguard Worker             UNUSED uint32_t stat = u_bit_scan(&statistics);
583*61046927SAndroid Build Coastguard Worker             if (write_results) {
584*61046927SAndroid Build Coastguard Worker                /* If a query is not available but VK_QUERY_RESULT_PARTIAL_BIT is set, write 0. */
585*61046927SAndroid Build Coastguard Worker                uint64_t result = available ? slot[idx * 2 + 2] - slot[idx * 2 + 1] : 0;
586*61046927SAndroid Build Coastguard Worker                cpu_write_query_result(pData, flags, idx, result);
587*61046927SAndroid Build Coastguard Worker             }
588*61046927SAndroid Build Coastguard Worker             idx++;
589*61046927SAndroid Build Coastguard Worker          }
590*61046927SAndroid Build Coastguard Worker          assert(idx == util_bitcount(pool->vk.pipeline_statistics));
591*61046927SAndroid Build Coastguard Worker          break;
592*61046927SAndroid Build Coastguard Worker       }
593*61046927SAndroid Build Coastguard Worker 
594*61046927SAndroid Build Coastguard Worker       case VK_QUERY_TYPE_TRANSFORM_FEEDBACK_STREAM_EXT: {
595*61046927SAndroid Build Coastguard Worker          uint64_t *slot = query_slot(pool, firstQuery + i);
596*61046927SAndroid Build Coastguard Worker          if (write_results) {
597*61046927SAndroid Build Coastguard Worker             /* If a query is not available but VK_QUERY_RESULT_PARTIAL_BIT is set, write 0. */
598*61046927SAndroid Build Coastguard Worker             uint64_t result = available ? slot[2] - slot[1] : 0;
599*61046927SAndroid Build Coastguard Worker             cpu_write_query_result(pData, flags, idx, result);
600*61046927SAndroid Build Coastguard Worker          }
601*61046927SAndroid Build Coastguard Worker          idx++;
602*61046927SAndroid Build Coastguard Worker          if (write_results) {
603*61046927SAndroid Build Coastguard Worker             /* If a query is not available but VK_QUERY_RESULT_PARTIAL_BIT is set, write 0. */
604*61046927SAndroid Build Coastguard Worker             uint64_t result = available ? slot[4] - slot[3] : 0;
605*61046927SAndroid Build Coastguard Worker             cpu_write_query_result(pData, flags, idx, result);
606*61046927SAndroid Build Coastguard Worker          }
607*61046927SAndroid Build Coastguard Worker          idx++;
608*61046927SAndroid Build Coastguard Worker          break;
609*61046927SAndroid Build Coastguard Worker       }
610*61046927SAndroid Build Coastguard Worker 
611*61046927SAndroid Build Coastguard Worker #if GFX_VERx10 >= 125
612*61046927SAndroid Build Coastguard Worker       case VK_QUERY_TYPE_ACCELERATION_STRUCTURE_SIZE_KHR:
613*61046927SAndroid Build Coastguard Worker       case VK_QUERY_TYPE_ACCELERATION_STRUCTURE_COMPACTED_SIZE_KHR:
614*61046927SAndroid Build Coastguard Worker       case VK_QUERY_TYPE_ACCELERATION_STRUCTURE_SERIALIZATION_SIZE_KHR: {
615*61046927SAndroid Build Coastguard Worker          uint64_t *slot = query_slot(pool, firstQuery + i);
616*61046927SAndroid Build Coastguard Worker          if (write_results)
617*61046927SAndroid Build Coastguard Worker             cpu_write_query_result(pData, flags, idx, slot[1]);
618*61046927SAndroid Build Coastguard Worker          idx++;
619*61046927SAndroid Build Coastguard Worker          break;
620*61046927SAndroid Build Coastguard Worker       }
621*61046927SAndroid Build Coastguard Worker 
622*61046927SAndroid Build Coastguard Worker       case VK_QUERY_TYPE_ACCELERATION_STRUCTURE_SERIALIZATION_BOTTOM_LEVEL_POINTERS_KHR: {
623*61046927SAndroid Build Coastguard Worker          uint64_t *slot = query_slot(pool, firstQuery + i);
624*61046927SAndroid Build Coastguard Worker          if (write_results)
625*61046927SAndroid Build Coastguard Worker             cpu_write_query_result(pData, flags, idx, slot[2]);
626*61046927SAndroid Build Coastguard Worker          idx++;
627*61046927SAndroid Build Coastguard Worker          break;
628*61046927SAndroid Build Coastguard Worker       }
629*61046927SAndroid Build Coastguard Worker #endif
630*61046927SAndroid Build Coastguard Worker 
631*61046927SAndroid Build Coastguard Worker       case VK_QUERY_TYPE_TIMESTAMP: {
632*61046927SAndroid Build Coastguard Worker          uint64_t *slot = query_slot(pool, firstQuery + i);
633*61046927SAndroid Build Coastguard Worker          if (write_results)
634*61046927SAndroid Build Coastguard Worker             cpu_write_query_result(pData, flags, idx, slot[1]);
635*61046927SAndroid Build Coastguard Worker          idx++;
636*61046927SAndroid Build Coastguard Worker          break;
637*61046927SAndroid Build Coastguard Worker       }
638*61046927SAndroid Build Coastguard Worker 
639*61046927SAndroid Build Coastguard Worker       case VK_QUERY_TYPE_PERFORMANCE_QUERY_KHR: {
640*61046927SAndroid Build Coastguard Worker          const struct anv_physical_device *pdevice = device->physical;
641*61046927SAndroid Build Coastguard Worker          assert((flags & (VK_QUERY_RESULT_WITH_AVAILABILITY_BIT |
642*61046927SAndroid Build Coastguard Worker                           VK_QUERY_RESULT_PARTIAL_BIT)) == 0);
643*61046927SAndroid Build Coastguard Worker          for (uint32_t p = 0; p < pool->n_passes; p++) {
644*61046927SAndroid Build Coastguard Worker             const struct intel_perf_query_info *query = pool->pass_query[p];
645*61046927SAndroid Build Coastguard Worker             struct intel_perf_query_result result;
646*61046927SAndroid Build Coastguard Worker             intel_perf_query_result_clear(&result);
647*61046927SAndroid Build Coastguard Worker             intel_perf_query_result_accumulate_fields(&result, query,
648*61046927SAndroid Build Coastguard Worker                                                       pool->bo->map + khr_perf_query_data_offset(pool, firstQuery + i, p, false),
649*61046927SAndroid Build Coastguard Worker                                                       pool->bo->map + khr_perf_query_data_offset(pool, firstQuery + i, p, true),
650*61046927SAndroid Build Coastguard Worker                                                       false /* no_oa_accumulate */);
651*61046927SAndroid Build Coastguard Worker             anv_perf_write_pass_results(pdevice->perf, pool, p, &result, pData);
652*61046927SAndroid Build Coastguard Worker          }
653*61046927SAndroid Build Coastguard Worker          break;
654*61046927SAndroid Build Coastguard Worker       }
655*61046927SAndroid Build Coastguard Worker 
656*61046927SAndroid Build Coastguard Worker       case VK_QUERY_TYPE_PERFORMANCE_QUERY_INTEL: {
657*61046927SAndroid Build Coastguard Worker          if (!write_results)
658*61046927SAndroid Build Coastguard Worker             break;
659*61046927SAndroid Build Coastguard Worker          const void *query_data = query_slot(pool, firstQuery + i);
660*61046927SAndroid Build Coastguard Worker          const struct intel_perf_query_info *query = &device->physical->perf->queries[0];
661*61046927SAndroid Build Coastguard Worker          struct intel_perf_query_result result;
662*61046927SAndroid Build Coastguard Worker          intel_perf_query_result_clear(&result);
663*61046927SAndroid Build Coastguard Worker          intel_perf_query_result_accumulate_fields(&result, query,
664*61046927SAndroid Build Coastguard Worker                                                    query_data + intel_perf_query_data_offset(pool, false),
665*61046927SAndroid Build Coastguard Worker                                                    query_data + intel_perf_query_data_offset(pool, true),
666*61046927SAndroid Build Coastguard Worker                                                    false /* no_oa_accumulate */);
667*61046927SAndroid Build Coastguard Worker          intel_perf_query_result_write_mdapi(pData, stride,
668*61046927SAndroid Build Coastguard Worker                                              device->info,
669*61046927SAndroid Build Coastguard Worker                                              query, &result);
670*61046927SAndroid Build Coastguard Worker          const uint64_t *marker = query_data + intel_perf_marker_offset();
671*61046927SAndroid Build Coastguard Worker          intel_perf_query_mdapi_write_marker(pData, stride, device->info, *marker);
672*61046927SAndroid Build Coastguard Worker          break;
673*61046927SAndroid Build Coastguard Worker       }
674*61046927SAndroid Build Coastguard Worker 
675*61046927SAndroid Build Coastguard Worker       case VK_QUERY_TYPE_RESULT_STATUS_ONLY_KHR:
676*61046927SAndroid Build Coastguard Worker          if (!write_results)
677*61046927SAndroid Build Coastguard Worker             break;
678*61046927SAndroid Build Coastguard Worker          const uint32_t *query_data = query_slot(pool, firstQuery + i);
679*61046927SAndroid Build Coastguard Worker          uint32_t result = available ? *query_data : 0;
680*61046927SAndroid Build Coastguard Worker          cpu_write_query_result(pData, flags, idx, result);
681*61046927SAndroid Build Coastguard Worker          break;
682*61046927SAndroid Build Coastguard Worker       case VK_QUERY_TYPE_VIDEO_ENCODE_FEEDBACK_KHR: {
683*61046927SAndroid Build Coastguard Worker          if (!write_results)
684*61046927SAndroid Build Coastguard Worker             break;
685*61046927SAndroid Build Coastguard Worker 
686*61046927SAndroid Build Coastguard Worker          /*
687*61046927SAndroid Build Coastguard Worker           * Slot 0 : Availability.
688*61046927SAndroid Build Coastguard Worker           * Slot 1 : Bitstream bytes written.
689*61046927SAndroid Build Coastguard Worker           */
690*61046927SAndroid Build Coastguard Worker          const uint64_t *slot = query_slot(pool, firstQuery + i);
691*61046927SAndroid Build Coastguard Worker          /* Set 0 as offset. */
692*61046927SAndroid Build Coastguard Worker          cpu_write_query_result(pData, flags, idx++, 0);
693*61046927SAndroid Build Coastguard Worker          cpu_write_query_result(pData, flags, idx++, slot[1]);
694*61046927SAndroid Build Coastguard Worker          break;
695*61046927SAndroid Build Coastguard Worker       }
696*61046927SAndroid Build Coastguard Worker 
697*61046927SAndroid Build Coastguard Worker       default:
698*61046927SAndroid Build Coastguard Worker          unreachable("invalid pool type");
699*61046927SAndroid Build Coastguard Worker       }
700*61046927SAndroid Build Coastguard Worker 
701*61046927SAndroid Build Coastguard Worker       if (!write_results)
702*61046927SAndroid Build Coastguard Worker          status = VK_NOT_READY;
703*61046927SAndroid Build Coastguard Worker 
704*61046927SAndroid Build Coastguard Worker       if (flags & (VK_QUERY_RESULT_WITH_AVAILABILITY_BIT |
705*61046927SAndroid Build Coastguard Worker                    VK_QUERY_RESULT_WITH_STATUS_BIT_KHR))
706*61046927SAndroid Build Coastguard Worker          cpu_write_query_result(pData, flags, idx, available);
707*61046927SAndroid Build Coastguard Worker 
708*61046927SAndroid Build Coastguard Worker       pData += stride;
709*61046927SAndroid Build Coastguard Worker       if (pData >= data_end)
710*61046927SAndroid Build Coastguard Worker          break;
711*61046927SAndroid Build Coastguard Worker    }
712*61046927SAndroid Build Coastguard Worker 
713*61046927SAndroid Build Coastguard Worker    return status;
714*61046927SAndroid Build Coastguard Worker }
715*61046927SAndroid Build Coastguard Worker 
716*61046927SAndroid Build Coastguard Worker static void
emit_ps_depth_count(struct anv_cmd_buffer * cmd_buffer,struct anv_address addr)717*61046927SAndroid Build Coastguard Worker emit_ps_depth_count(struct anv_cmd_buffer *cmd_buffer,
718*61046927SAndroid Build Coastguard Worker                     struct anv_address addr)
719*61046927SAndroid Build Coastguard Worker {
720*61046927SAndroid Build Coastguard Worker    cmd_buffer->state.pending_pipe_bits |= ANV_PIPE_POST_SYNC_BIT;
721*61046927SAndroid Build Coastguard Worker    genX(cmd_buffer_apply_pipe_flushes)(cmd_buffer);
722*61046927SAndroid Build Coastguard Worker 
723*61046927SAndroid Build Coastguard Worker    bool cs_stall_needed = (GFX_VER == 9 && cmd_buffer->device->info->gt == 4);
724*61046927SAndroid Build Coastguard Worker    genx_batch_emit_pipe_control_write
725*61046927SAndroid Build Coastguard Worker       (&cmd_buffer->batch, cmd_buffer->device->info,
726*61046927SAndroid Build Coastguard Worker        cmd_buffer->state.current_pipeline, WritePSDepthCount, addr, 0,
727*61046927SAndroid Build Coastguard Worker        ANV_PIPE_DEPTH_STALL_BIT | (cs_stall_needed ? ANV_PIPE_CS_STALL_BIT : 0));
728*61046927SAndroid Build Coastguard Worker }
729*61046927SAndroid Build Coastguard Worker 
730*61046927SAndroid Build Coastguard Worker static void
emit_query_mi_availability(struct mi_builder * b,struct anv_address addr,bool available)731*61046927SAndroid Build Coastguard Worker emit_query_mi_availability(struct mi_builder *b,
732*61046927SAndroid Build Coastguard Worker                            struct anv_address addr,
733*61046927SAndroid Build Coastguard Worker                            bool available)
734*61046927SAndroid Build Coastguard Worker {
735*61046927SAndroid Build Coastguard Worker    mi_store(b, mi_mem64(addr), mi_imm(available));
736*61046927SAndroid Build Coastguard Worker }
737*61046927SAndroid Build Coastguard Worker 
738*61046927SAndroid Build Coastguard Worker static void
emit_query_pc_availability(struct anv_cmd_buffer * cmd_buffer,struct anv_address addr,bool available)739*61046927SAndroid Build Coastguard Worker emit_query_pc_availability(struct anv_cmd_buffer *cmd_buffer,
740*61046927SAndroid Build Coastguard Worker                            struct anv_address addr,
741*61046927SAndroid Build Coastguard Worker                            bool available)
742*61046927SAndroid Build Coastguard Worker {
743*61046927SAndroid Build Coastguard Worker    cmd_buffer->state.pending_pipe_bits |= ANV_PIPE_POST_SYNC_BIT;
744*61046927SAndroid Build Coastguard Worker    genX(cmd_buffer_apply_pipe_flushes)(cmd_buffer);
745*61046927SAndroid Build Coastguard Worker 
746*61046927SAndroid Build Coastguard Worker    genx_batch_emit_pipe_control_write
747*61046927SAndroid Build Coastguard Worker       (&cmd_buffer->batch, cmd_buffer->device->info,
748*61046927SAndroid Build Coastguard Worker        cmd_buffer->state.current_pipeline, WriteImmediateData, addr,
749*61046927SAndroid Build Coastguard Worker        available, 0);
750*61046927SAndroid Build Coastguard Worker }
751*61046927SAndroid Build Coastguard Worker 
752*61046927SAndroid Build Coastguard Worker /**
753*61046927SAndroid Build Coastguard Worker  * Goes through a series of consecutive query indices in the given pool
754*61046927SAndroid Build Coastguard Worker  * setting all element values to 0 and emitting them as available.
755*61046927SAndroid Build Coastguard Worker  */
756*61046927SAndroid Build Coastguard Worker static void
emit_zero_queries(struct anv_cmd_buffer * cmd_buffer,struct mi_builder * b,struct anv_query_pool * pool,uint32_t first_index,uint32_t num_queries)757*61046927SAndroid Build Coastguard Worker emit_zero_queries(struct anv_cmd_buffer *cmd_buffer,
758*61046927SAndroid Build Coastguard Worker                   struct mi_builder *b, struct anv_query_pool *pool,
759*61046927SAndroid Build Coastguard Worker                   uint32_t first_index, uint32_t num_queries)
760*61046927SAndroid Build Coastguard Worker {
761*61046927SAndroid Build Coastguard Worker    switch (pool->vk.query_type) {
762*61046927SAndroid Build Coastguard Worker    case VK_QUERY_TYPE_OCCLUSION:
763*61046927SAndroid Build Coastguard Worker    case VK_QUERY_TYPE_TIMESTAMP:
764*61046927SAndroid Build Coastguard Worker       /* These queries are written with a PIPE_CONTROL so clear them using the
765*61046927SAndroid Build Coastguard Worker        * PIPE_CONTROL as well so we don't have to synchronize between 2 types
766*61046927SAndroid Build Coastguard Worker        * of operations.
767*61046927SAndroid Build Coastguard Worker        */
768*61046927SAndroid Build Coastguard Worker       assert((pool->stride % 8) == 0);
769*61046927SAndroid Build Coastguard Worker       for (uint32_t i = 0; i < num_queries; i++) {
770*61046927SAndroid Build Coastguard Worker          struct anv_address slot_addr =
771*61046927SAndroid Build Coastguard Worker             anv_query_address(pool, first_index + i);
772*61046927SAndroid Build Coastguard Worker 
773*61046927SAndroid Build Coastguard Worker          for (uint32_t qword = 1; qword < (pool->stride / 8); qword++) {
774*61046927SAndroid Build Coastguard Worker             emit_query_pc_availability(cmd_buffer,
775*61046927SAndroid Build Coastguard Worker                                        anv_address_add(slot_addr, qword * 8),
776*61046927SAndroid Build Coastguard Worker                                        false);
777*61046927SAndroid Build Coastguard Worker          }
778*61046927SAndroid Build Coastguard Worker          emit_query_pc_availability(cmd_buffer, slot_addr, true);
779*61046927SAndroid Build Coastguard Worker       }
780*61046927SAndroid Build Coastguard Worker       break;
781*61046927SAndroid Build Coastguard Worker 
782*61046927SAndroid Build Coastguard Worker    case VK_QUERY_TYPE_PRIMITIVES_GENERATED_EXT:
783*61046927SAndroid Build Coastguard Worker    case VK_QUERY_TYPE_PIPELINE_STATISTICS:
784*61046927SAndroid Build Coastguard Worker    case VK_QUERY_TYPE_TRANSFORM_FEEDBACK_STREAM_EXT:
785*61046927SAndroid Build Coastguard Worker #if GFX_VERx10 >= 125
786*61046927SAndroid Build Coastguard Worker    case VK_QUERY_TYPE_MESH_PRIMITIVES_GENERATED_EXT:
787*61046927SAndroid Build Coastguard Worker #endif
788*61046927SAndroid Build Coastguard Worker       for (uint32_t i = 0; i < num_queries; i++) {
789*61046927SAndroid Build Coastguard Worker          struct anv_address slot_addr =
790*61046927SAndroid Build Coastguard Worker             anv_query_address(pool, first_index + i);
791*61046927SAndroid Build Coastguard Worker          mi_memset(b, anv_address_add(slot_addr, 8), 0, pool->stride - 8);
792*61046927SAndroid Build Coastguard Worker          emit_query_mi_availability(b, slot_addr, true);
793*61046927SAndroid Build Coastguard Worker       }
794*61046927SAndroid Build Coastguard Worker       break;
795*61046927SAndroid Build Coastguard Worker 
796*61046927SAndroid Build Coastguard Worker    case VK_QUERY_TYPE_PERFORMANCE_QUERY_KHR: {
797*61046927SAndroid Build Coastguard Worker       for (uint32_t i = 0; i < num_queries; i++) {
798*61046927SAndroid Build Coastguard Worker          for (uint32_t p = 0; p < pool->n_passes; p++) {
799*61046927SAndroid Build Coastguard Worker             mi_memset(b, khr_perf_query_data_address(pool, first_index + i, p, false),
800*61046927SAndroid Build Coastguard Worker                          0, 2 * pool->snapshot_size);
801*61046927SAndroid Build Coastguard Worker             emit_query_mi_availability(b,
802*61046927SAndroid Build Coastguard Worker                                        khr_perf_query_availability_address(pool, first_index + i, p),
803*61046927SAndroid Build Coastguard Worker                                        true);
804*61046927SAndroid Build Coastguard Worker          }
805*61046927SAndroid Build Coastguard Worker       }
806*61046927SAndroid Build Coastguard Worker       break;
807*61046927SAndroid Build Coastguard Worker    }
808*61046927SAndroid Build Coastguard Worker 
809*61046927SAndroid Build Coastguard Worker    case VK_QUERY_TYPE_PERFORMANCE_QUERY_INTEL:
810*61046927SAndroid Build Coastguard Worker       for (uint32_t i = 0; i < num_queries; i++) {
811*61046927SAndroid Build Coastguard Worker          struct anv_address slot_addr =
812*61046927SAndroid Build Coastguard Worker             anv_query_address(pool, first_index + i);
813*61046927SAndroid Build Coastguard Worker          mi_memset(b, anv_address_add(slot_addr, 8), 0, pool->stride - 8);
814*61046927SAndroid Build Coastguard Worker          emit_query_mi_availability(b, slot_addr, true);
815*61046927SAndroid Build Coastguard Worker       }
816*61046927SAndroid Build Coastguard Worker       break;
817*61046927SAndroid Build Coastguard Worker 
818*61046927SAndroid Build Coastguard Worker    default:
819*61046927SAndroid Build Coastguard Worker       unreachable("Unsupported query type");
820*61046927SAndroid Build Coastguard Worker    }
821*61046927SAndroid Build Coastguard Worker }
822*61046927SAndroid Build Coastguard Worker 
genX(CmdResetQueryPool)823*61046927SAndroid Build Coastguard Worker void genX(CmdResetQueryPool)(
824*61046927SAndroid Build Coastguard Worker     VkCommandBuffer                             commandBuffer,
825*61046927SAndroid Build Coastguard Worker     VkQueryPool                                 queryPool,
826*61046927SAndroid Build Coastguard Worker     uint32_t                                    firstQuery,
827*61046927SAndroid Build Coastguard Worker     uint32_t                                    queryCount)
828*61046927SAndroid Build Coastguard Worker {
829*61046927SAndroid Build Coastguard Worker    ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer);
830*61046927SAndroid Build Coastguard Worker    ANV_FROM_HANDLE(anv_query_pool, pool, queryPool);
831*61046927SAndroid Build Coastguard Worker    struct anv_physical_device *pdevice = cmd_buffer->device->physical;
832*61046927SAndroid Build Coastguard Worker 
833*61046927SAndroid Build Coastguard Worker    /* Shader clearing is only possible on render/compute when not in protected
834*61046927SAndroid Build Coastguard Worker     * mode.
835*61046927SAndroid Build Coastguard Worker     */
836*61046927SAndroid Build Coastguard Worker    if (anv_cmd_buffer_is_render_or_compute_queue(cmd_buffer) &&
837*61046927SAndroid Build Coastguard Worker        (cmd_buffer->vk.pool->flags & VK_COMMAND_POOL_CREATE_PROTECTED_BIT) == 0 &&
838*61046927SAndroid Build Coastguard Worker        queryCount >= pdevice->instance->query_clear_with_blorp_threshold) {
839*61046927SAndroid Build Coastguard Worker       trace_intel_begin_query_clear_blorp(&cmd_buffer->trace);
840*61046927SAndroid Build Coastguard Worker 
841*61046927SAndroid Build Coastguard Worker       anv_cmd_buffer_fill_area(cmd_buffer,
842*61046927SAndroid Build Coastguard Worker                                anv_query_address(pool, firstQuery),
843*61046927SAndroid Build Coastguard Worker                                queryCount * pool->stride,
844*61046927SAndroid Build Coastguard Worker                                0, false);
845*61046927SAndroid Build Coastguard Worker 
846*61046927SAndroid Build Coastguard Worker       /* The pending clearing writes are in compute if we're in gpgpu mode on
847*61046927SAndroid Build Coastguard Worker        * the render engine or on the compute engine.
848*61046927SAndroid Build Coastguard Worker        */
849*61046927SAndroid Build Coastguard Worker       if (anv_cmd_buffer_is_compute_queue(cmd_buffer) ||
850*61046927SAndroid Build Coastguard Worker           cmd_buffer->state.current_pipeline == pdevice->gpgpu_pipeline_value) {
851*61046927SAndroid Build Coastguard Worker          cmd_buffer->state.queries.clear_bits =
852*61046927SAndroid Build Coastguard Worker             ANV_QUERY_COMPUTE_WRITES_PENDING_BITS;
853*61046927SAndroid Build Coastguard Worker       } else {
854*61046927SAndroid Build Coastguard Worker          cmd_buffer->state.queries.clear_bits =
855*61046927SAndroid Build Coastguard Worker             ANV_QUERY_RENDER_TARGET_WRITES_PENDING_BITS(&pdevice->info);
856*61046927SAndroid Build Coastguard Worker       }
857*61046927SAndroid Build Coastguard Worker 
858*61046927SAndroid Build Coastguard Worker       trace_intel_end_query_clear_blorp(&cmd_buffer->trace, queryCount);
859*61046927SAndroid Build Coastguard Worker       return;
860*61046927SAndroid Build Coastguard Worker    }
861*61046927SAndroid Build Coastguard Worker 
862*61046927SAndroid Build Coastguard Worker    trace_intel_begin_query_clear_cs(&cmd_buffer->trace);
863*61046927SAndroid Build Coastguard Worker 
864*61046927SAndroid Build Coastguard Worker    switch (pool->vk.query_type) {
865*61046927SAndroid Build Coastguard Worker    case VK_QUERY_TYPE_OCCLUSION:
866*61046927SAndroid Build Coastguard Worker #if GFX_VERx10 >= 125
867*61046927SAndroid Build Coastguard Worker    case VK_QUERY_TYPE_ACCELERATION_STRUCTURE_COMPACTED_SIZE_KHR:
868*61046927SAndroid Build Coastguard Worker    case VK_QUERY_TYPE_ACCELERATION_STRUCTURE_SERIALIZATION_SIZE_KHR:
869*61046927SAndroid Build Coastguard Worker    case VK_QUERY_TYPE_ACCELERATION_STRUCTURE_SIZE_KHR:
870*61046927SAndroid Build Coastguard Worker    case VK_QUERY_TYPE_ACCELERATION_STRUCTURE_SERIALIZATION_BOTTOM_LEVEL_POINTERS_KHR:
871*61046927SAndroid Build Coastguard Worker #endif
872*61046927SAndroid Build Coastguard Worker       for (uint32_t i = 0; i < queryCount; i++) {
873*61046927SAndroid Build Coastguard Worker          emit_query_pc_availability(cmd_buffer,
874*61046927SAndroid Build Coastguard Worker                                     anv_query_address(pool, firstQuery + i),
875*61046927SAndroid Build Coastguard Worker                                     false);
876*61046927SAndroid Build Coastguard Worker       }
877*61046927SAndroid Build Coastguard Worker       break;
878*61046927SAndroid Build Coastguard Worker 
879*61046927SAndroid Build Coastguard Worker    case VK_QUERY_TYPE_TIMESTAMP: {
880*61046927SAndroid Build Coastguard Worker       for (uint32_t i = 0; i < queryCount; i++) {
881*61046927SAndroid Build Coastguard Worker          emit_query_pc_availability(cmd_buffer,
882*61046927SAndroid Build Coastguard Worker                                     anv_query_address(pool, firstQuery + i),
883*61046927SAndroid Build Coastguard Worker                                     false);
884*61046927SAndroid Build Coastguard Worker       }
885*61046927SAndroid Build Coastguard Worker 
886*61046927SAndroid Build Coastguard Worker       /* Add a CS stall here to make sure the PIPE_CONTROL above has
887*61046927SAndroid Build Coastguard Worker        * completed. Otherwise some timestamps written later with MI_STORE_*
888*61046927SAndroid Build Coastguard Worker        * commands might race with the PIPE_CONTROL in the loop above.
889*61046927SAndroid Build Coastguard Worker        */
890*61046927SAndroid Build Coastguard Worker       anv_add_pending_pipe_bits(cmd_buffer, ANV_PIPE_CS_STALL_BIT,
891*61046927SAndroid Build Coastguard Worker                                 "vkCmdResetQueryPool of timestamps");
892*61046927SAndroid Build Coastguard Worker       genX(cmd_buffer_apply_pipe_flushes)(cmd_buffer);
893*61046927SAndroid Build Coastguard Worker       break;
894*61046927SAndroid Build Coastguard Worker    }
895*61046927SAndroid Build Coastguard Worker 
896*61046927SAndroid Build Coastguard Worker    case VK_QUERY_TYPE_PIPELINE_STATISTICS:
897*61046927SAndroid Build Coastguard Worker    case VK_QUERY_TYPE_TRANSFORM_FEEDBACK_STREAM_EXT:
898*61046927SAndroid Build Coastguard Worker    case VK_QUERY_TYPE_PRIMITIVES_GENERATED_EXT:
899*61046927SAndroid Build Coastguard Worker    case VK_QUERY_TYPE_VIDEO_ENCODE_FEEDBACK_KHR:
900*61046927SAndroid Build Coastguard Worker #if GFX_VERx10 >= 125
901*61046927SAndroid Build Coastguard Worker    case VK_QUERY_TYPE_MESH_PRIMITIVES_GENERATED_EXT:
902*61046927SAndroid Build Coastguard Worker #endif
903*61046927SAndroid Build Coastguard Worker    {
904*61046927SAndroid Build Coastguard Worker       struct mi_builder b;
905*61046927SAndroid Build Coastguard Worker       mi_builder_init(&b, cmd_buffer->device->info, &cmd_buffer->batch);
906*61046927SAndroid Build Coastguard Worker 
907*61046927SAndroid Build Coastguard Worker       for (uint32_t i = 0; i < queryCount; i++)
908*61046927SAndroid Build Coastguard Worker          emit_query_mi_availability(&b, anv_query_address(pool, firstQuery + i), false);
909*61046927SAndroid Build Coastguard Worker       break;
910*61046927SAndroid Build Coastguard Worker    }
911*61046927SAndroid Build Coastguard Worker 
912*61046927SAndroid Build Coastguard Worker    case VK_QUERY_TYPE_PERFORMANCE_QUERY_KHR: {
913*61046927SAndroid Build Coastguard Worker       struct mi_builder b;
914*61046927SAndroid Build Coastguard Worker       mi_builder_init(&b, cmd_buffer->device->info, &cmd_buffer->batch);
915*61046927SAndroid Build Coastguard Worker 
916*61046927SAndroid Build Coastguard Worker       for (uint32_t i = 0; i < queryCount; i++) {
917*61046927SAndroid Build Coastguard Worker          for (uint32_t p = 0; p < pool->n_passes; p++) {
918*61046927SAndroid Build Coastguard Worker             emit_query_mi_availability(
919*61046927SAndroid Build Coastguard Worker                &b,
920*61046927SAndroid Build Coastguard Worker                khr_perf_query_availability_address(pool, firstQuery + i, p),
921*61046927SAndroid Build Coastguard Worker                false);
922*61046927SAndroid Build Coastguard Worker          }
923*61046927SAndroid Build Coastguard Worker       }
924*61046927SAndroid Build Coastguard Worker       break;
925*61046927SAndroid Build Coastguard Worker    }
926*61046927SAndroid Build Coastguard Worker 
927*61046927SAndroid Build Coastguard Worker    case VK_QUERY_TYPE_PERFORMANCE_QUERY_INTEL: {
928*61046927SAndroid Build Coastguard Worker       struct mi_builder b;
929*61046927SAndroid Build Coastguard Worker       mi_builder_init(&b, cmd_buffer->device->info, &cmd_buffer->batch);
930*61046927SAndroid Build Coastguard Worker 
931*61046927SAndroid Build Coastguard Worker       for (uint32_t i = 0; i < queryCount; i++)
932*61046927SAndroid Build Coastguard Worker          emit_query_mi_availability(&b, anv_query_address(pool, firstQuery + i), false);
933*61046927SAndroid Build Coastguard Worker       break;
934*61046927SAndroid Build Coastguard Worker    }
935*61046927SAndroid Build Coastguard Worker    case VK_QUERY_TYPE_RESULT_STATUS_ONLY_KHR:
936*61046927SAndroid Build Coastguard Worker       for (uint32_t i = 0; i < queryCount; i++)
937*61046927SAndroid Build Coastguard Worker          emit_query_mi_flush_availability(cmd_buffer, anv_query_address(pool, firstQuery + i), false);
938*61046927SAndroid Build Coastguard Worker       break;
939*61046927SAndroid Build Coastguard Worker    default:
940*61046927SAndroid Build Coastguard Worker       unreachable("Unsupported query type");
941*61046927SAndroid Build Coastguard Worker    }
942*61046927SAndroid Build Coastguard Worker 
943*61046927SAndroid Build Coastguard Worker    trace_intel_end_query_clear_cs(&cmd_buffer->trace, queryCount);
944*61046927SAndroid Build Coastguard Worker }
945*61046927SAndroid Build Coastguard Worker 
genX(ResetQueryPool)946*61046927SAndroid Build Coastguard Worker void genX(ResetQueryPool)(
947*61046927SAndroid Build Coastguard Worker     VkDevice                                    _device,
948*61046927SAndroid Build Coastguard Worker     VkQueryPool                                 queryPool,
949*61046927SAndroid Build Coastguard Worker     uint32_t                                    firstQuery,
950*61046927SAndroid Build Coastguard Worker     uint32_t                                    queryCount)
951*61046927SAndroid Build Coastguard Worker {
952*61046927SAndroid Build Coastguard Worker    ANV_FROM_HANDLE(anv_query_pool, pool, queryPool);
953*61046927SAndroid Build Coastguard Worker 
954*61046927SAndroid Build Coastguard Worker    for (uint32_t i = 0; i < queryCount; i++) {
955*61046927SAndroid Build Coastguard Worker       if (pool->vk.query_type == VK_QUERY_TYPE_PERFORMANCE_QUERY_KHR) {
956*61046927SAndroid Build Coastguard Worker          for (uint32_t p = 0; p < pool->n_passes; p++) {
957*61046927SAndroid Build Coastguard Worker             uint64_t *pass_slot = pool->bo->map +
958*61046927SAndroid Build Coastguard Worker                khr_perf_query_availability_offset(pool, firstQuery + i, p);
959*61046927SAndroid Build Coastguard Worker             *pass_slot = 0;
960*61046927SAndroid Build Coastguard Worker          }
961*61046927SAndroid Build Coastguard Worker       } else {
962*61046927SAndroid Build Coastguard Worker          uint64_t *slot = query_slot(pool, firstQuery + i);
963*61046927SAndroid Build Coastguard Worker          *slot = 0;
964*61046927SAndroid Build Coastguard Worker       }
965*61046927SAndroid Build Coastguard Worker    }
966*61046927SAndroid Build Coastguard Worker }
967*61046927SAndroid Build Coastguard Worker 
968*61046927SAndroid Build Coastguard Worker static const uint32_t vk_pipeline_stat_to_reg[] = {
969*61046927SAndroid Build Coastguard Worker    GENX(IA_VERTICES_COUNT_num),
970*61046927SAndroid Build Coastguard Worker    GENX(IA_PRIMITIVES_COUNT_num),
971*61046927SAndroid Build Coastguard Worker    GENX(VS_INVOCATION_COUNT_num),
972*61046927SAndroid Build Coastguard Worker    GENX(GS_INVOCATION_COUNT_num),
973*61046927SAndroid Build Coastguard Worker    GENX(GS_PRIMITIVES_COUNT_num),
974*61046927SAndroid Build Coastguard Worker    GENX(CL_INVOCATION_COUNT_num),
975*61046927SAndroid Build Coastguard Worker    GENX(CL_PRIMITIVES_COUNT_num),
976*61046927SAndroid Build Coastguard Worker    GENX(PS_INVOCATION_COUNT_num),
977*61046927SAndroid Build Coastguard Worker    GENX(HS_INVOCATION_COUNT_num),
978*61046927SAndroid Build Coastguard Worker    GENX(DS_INVOCATION_COUNT_num),
979*61046927SAndroid Build Coastguard Worker    GENX(CS_INVOCATION_COUNT_num),
980*61046927SAndroid Build Coastguard Worker #if GFX_VERx10 >= 125
981*61046927SAndroid Build Coastguard Worker    GENX(TASK_INVOCATION_COUNT_num),
982*61046927SAndroid Build Coastguard Worker    GENX(MESH_INVOCATION_COUNT_num)
983*61046927SAndroid Build Coastguard Worker #endif
984*61046927SAndroid Build Coastguard Worker };
985*61046927SAndroid Build Coastguard Worker 
986*61046927SAndroid Build Coastguard Worker static void
emit_pipeline_stat(struct mi_builder * b,uint32_t stat,struct anv_address addr)987*61046927SAndroid Build Coastguard Worker emit_pipeline_stat(struct mi_builder *b, uint32_t stat,
988*61046927SAndroid Build Coastguard Worker                    struct anv_address addr)
989*61046927SAndroid Build Coastguard Worker {
990*61046927SAndroid Build Coastguard Worker    STATIC_ASSERT(ANV_PIPELINE_STATISTICS_MASK ==
991*61046927SAndroid Build Coastguard Worker                  (1 << ARRAY_SIZE(vk_pipeline_stat_to_reg)) - 1);
992*61046927SAndroid Build Coastguard Worker 
993*61046927SAndroid Build Coastguard Worker    assert(stat < ARRAY_SIZE(vk_pipeline_stat_to_reg));
994*61046927SAndroid Build Coastguard Worker    mi_store(b, mi_mem64(addr), mi_reg64(vk_pipeline_stat_to_reg[stat]));
995*61046927SAndroid Build Coastguard Worker }
996*61046927SAndroid Build Coastguard Worker 
997*61046927SAndroid Build Coastguard Worker static void
emit_xfb_query(struct mi_builder * b,uint32_t stream,struct anv_address addr)998*61046927SAndroid Build Coastguard Worker emit_xfb_query(struct mi_builder *b, uint32_t stream,
999*61046927SAndroid Build Coastguard Worker                struct anv_address addr)
1000*61046927SAndroid Build Coastguard Worker {
1001*61046927SAndroid Build Coastguard Worker    assert(stream < MAX_XFB_STREAMS);
1002*61046927SAndroid Build Coastguard Worker 
1003*61046927SAndroid Build Coastguard Worker    mi_store(b, mi_mem64(anv_address_add(addr, 0)),
1004*61046927SAndroid Build Coastguard Worker                mi_reg64(GENX(SO_NUM_PRIMS_WRITTEN0_num) + stream * 8));
1005*61046927SAndroid Build Coastguard Worker    mi_store(b, mi_mem64(anv_address_add(addr, 16)),
1006*61046927SAndroid Build Coastguard Worker                mi_reg64(GENX(SO_PRIM_STORAGE_NEEDED0_num) + stream * 8));
1007*61046927SAndroid Build Coastguard Worker }
1008*61046927SAndroid Build Coastguard Worker 
1009*61046927SAndroid Build Coastguard Worker static void
emit_perf_intel_query(struct anv_cmd_buffer * cmd_buffer,struct anv_query_pool * pool,struct mi_builder * b,struct anv_address query_addr,bool end)1010*61046927SAndroid Build Coastguard Worker emit_perf_intel_query(struct anv_cmd_buffer *cmd_buffer,
1011*61046927SAndroid Build Coastguard Worker                       struct anv_query_pool *pool,
1012*61046927SAndroid Build Coastguard Worker                       struct mi_builder *b,
1013*61046927SAndroid Build Coastguard Worker                       struct anv_address query_addr,
1014*61046927SAndroid Build Coastguard Worker                       bool end)
1015*61046927SAndroid Build Coastguard Worker {
1016*61046927SAndroid Build Coastguard Worker    const struct intel_perf_query_field_layout *layout =
1017*61046927SAndroid Build Coastguard Worker       &cmd_buffer->device->physical->perf->query_layout;
1018*61046927SAndroid Build Coastguard Worker    struct anv_address data_addr =
1019*61046927SAndroid Build Coastguard Worker       anv_address_add(query_addr, intel_perf_query_data_offset(pool, end));
1020*61046927SAndroid Build Coastguard Worker 
1021*61046927SAndroid Build Coastguard Worker    for (uint32_t f = 0; f < layout->n_fields; f++) {
1022*61046927SAndroid Build Coastguard Worker       const struct intel_perf_query_field *field =
1023*61046927SAndroid Build Coastguard Worker          &layout->fields[end ? f : (layout->n_fields - 1 - f)];
1024*61046927SAndroid Build Coastguard Worker 
1025*61046927SAndroid Build Coastguard Worker       switch (field->type) {
1026*61046927SAndroid Build Coastguard Worker       case INTEL_PERF_QUERY_FIELD_TYPE_MI_RPC:
1027*61046927SAndroid Build Coastguard Worker          anv_batch_emit(&cmd_buffer->batch, GENX(MI_REPORT_PERF_COUNT), rpc) {
1028*61046927SAndroid Build Coastguard Worker             rpc.MemoryAddress = anv_address_add(data_addr, field->location);
1029*61046927SAndroid Build Coastguard Worker          }
1030*61046927SAndroid Build Coastguard Worker          break;
1031*61046927SAndroid Build Coastguard Worker 
1032*61046927SAndroid Build Coastguard Worker       case INTEL_PERF_QUERY_FIELD_TYPE_SRM_PERFCNT:
1033*61046927SAndroid Build Coastguard Worker       case INTEL_PERF_QUERY_FIELD_TYPE_SRM_RPSTAT:
1034*61046927SAndroid Build Coastguard Worker       case INTEL_PERF_QUERY_FIELD_TYPE_SRM_OA_A:
1035*61046927SAndroid Build Coastguard Worker       case INTEL_PERF_QUERY_FIELD_TYPE_SRM_OA_B:
1036*61046927SAndroid Build Coastguard Worker       case INTEL_PERF_QUERY_FIELD_TYPE_SRM_OA_C:
1037*61046927SAndroid Build Coastguard Worker       case INTEL_PERF_QUERY_FIELD_TYPE_SRM_OA_PEC: {
1038*61046927SAndroid Build Coastguard Worker          struct anv_address addr = anv_address_add(data_addr, field->location);
1039*61046927SAndroid Build Coastguard Worker          struct mi_value src = field->size == 8 ?
1040*61046927SAndroid Build Coastguard Worker             mi_reg64(field->mmio_offset) :
1041*61046927SAndroid Build Coastguard Worker             mi_reg32(field->mmio_offset);
1042*61046927SAndroid Build Coastguard Worker          struct mi_value dst = field->size == 8 ?
1043*61046927SAndroid Build Coastguard Worker             mi_mem64(addr) : mi_mem32(addr);
1044*61046927SAndroid Build Coastguard Worker          mi_store(b, dst, src);
1045*61046927SAndroid Build Coastguard Worker          break;
1046*61046927SAndroid Build Coastguard Worker       }
1047*61046927SAndroid Build Coastguard Worker 
1048*61046927SAndroid Build Coastguard Worker       default:
1049*61046927SAndroid Build Coastguard Worker          unreachable("Invalid query field");
1050*61046927SAndroid Build Coastguard Worker          break;
1051*61046927SAndroid Build Coastguard Worker       }
1052*61046927SAndroid Build Coastguard Worker    }
1053*61046927SAndroid Build Coastguard Worker }
1054*61046927SAndroid Build Coastguard Worker 
1055*61046927SAndroid Build Coastguard Worker static void
emit_query_clear_flush(struct anv_cmd_buffer * cmd_buffer,struct anv_query_pool * pool,const char * reason)1056*61046927SAndroid Build Coastguard Worker emit_query_clear_flush(struct anv_cmd_buffer *cmd_buffer,
1057*61046927SAndroid Build Coastguard Worker                        struct anv_query_pool *pool,
1058*61046927SAndroid Build Coastguard Worker                        const char *reason)
1059*61046927SAndroid Build Coastguard Worker {
1060*61046927SAndroid Build Coastguard Worker    if (cmd_buffer->state.queries.clear_bits == 0)
1061*61046927SAndroid Build Coastguard Worker       return;
1062*61046927SAndroid Build Coastguard Worker 
1063*61046927SAndroid Build Coastguard Worker    anv_add_pending_pipe_bits(cmd_buffer,
1064*61046927SAndroid Build Coastguard Worker                              ANV_PIPE_QUERY_BITS(
1065*61046927SAndroid Build Coastguard Worker                                 cmd_buffer->state.queries.clear_bits),
1066*61046927SAndroid Build Coastguard Worker                              reason);
1067*61046927SAndroid Build Coastguard Worker    genX(cmd_buffer_apply_pipe_flushes)(cmd_buffer);
1068*61046927SAndroid Build Coastguard Worker }
1069*61046927SAndroid Build Coastguard Worker 
1070*61046927SAndroid Build Coastguard Worker 
genX(CmdBeginQueryIndexedEXT)1071*61046927SAndroid Build Coastguard Worker void genX(CmdBeginQueryIndexedEXT)(
1072*61046927SAndroid Build Coastguard Worker     VkCommandBuffer                             commandBuffer,
1073*61046927SAndroid Build Coastguard Worker     VkQueryPool                                 queryPool,
1074*61046927SAndroid Build Coastguard Worker     uint32_t                                    query,
1075*61046927SAndroid Build Coastguard Worker     VkQueryControlFlags                         flags,
1076*61046927SAndroid Build Coastguard Worker     uint32_t                                    index)
1077*61046927SAndroid Build Coastguard Worker {
1078*61046927SAndroid Build Coastguard Worker    ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer);
1079*61046927SAndroid Build Coastguard Worker    ANV_FROM_HANDLE(anv_query_pool, pool, queryPool);
1080*61046927SAndroid Build Coastguard Worker    struct anv_address query_addr = anv_query_address(pool, query);
1081*61046927SAndroid Build Coastguard Worker 
1082*61046927SAndroid Build Coastguard Worker    emit_query_clear_flush(cmd_buffer, pool, "CmdBeginQuery* flush query clears");
1083*61046927SAndroid Build Coastguard Worker 
1084*61046927SAndroid Build Coastguard Worker    struct mi_builder b;
1085*61046927SAndroid Build Coastguard Worker    mi_builder_init(&b, cmd_buffer->device->info, &cmd_buffer->batch);
1086*61046927SAndroid Build Coastguard Worker    const uint32_t mocs = anv_mocs_for_address(cmd_buffer->device, &query_addr);
1087*61046927SAndroid Build Coastguard Worker    mi_builder_set_mocs(&b, mocs);
1088*61046927SAndroid Build Coastguard Worker 
1089*61046927SAndroid Build Coastguard Worker    switch (pool->vk.query_type) {
1090*61046927SAndroid Build Coastguard Worker    case VK_QUERY_TYPE_OCCLUSION:
1091*61046927SAndroid Build Coastguard Worker       cmd_buffer->state.gfx.n_occlusion_queries++;
1092*61046927SAndroid Build Coastguard Worker       cmd_buffer->state.gfx.dirty |= ANV_CMD_DIRTY_OCCLUSION_QUERY_ACTIVE;
1093*61046927SAndroid Build Coastguard Worker       emit_ps_depth_count(cmd_buffer, anv_address_add(query_addr, 8));
1094*61046927SAndroid Build Coastguard Worker       break;
1095*61046927SAndroid Build Coastguard Worker 
1096*61046927SAndroid Build Coastguard Worker    case VK_QUERY_TYPE_PRIMITIVES_GENERATED_EXT:
1097*61046927SAndroid Build Coastguard Worker       genx_batch_emit_pipe_control(&cmd_buffer->batch,
1098*61046927SAndroid Build Coastguard Worker                                    cmd_buffer->device->info,
1099*61046927SAndroid Build Coastguard Worker                                    cmd_buffer->state.current_pipeline,
1100*61046927SAndroid Build Coastguard Worker                                    ANV_PIPE_CS_STALL_BIT |
1101*61046927SAndroid Build Coastguard Worker                                    ANV_PIPE_STALL_AT_SCOREBOARD_BIT);
1102*61046927SAndroid Build Coastguard Worker       mi_store(&b, mi_mem64(anv_address_add(query_addr, 8)),
1103*61046927SAndroid Build Coastguard Worker                    mi_reg64(GENX(CL_INVOCATION_COUNT_num)));
1104*61046927SAndroid Build Coastguard Worker       break;
1105*61046927SAndroid Build Coastguard Worker 
1106*61046927SAndroid Build Coastguard Worker #if GFX_VERx10 >= 125
1107*61046927SAndroid Build Coastguard Worker    case VK_QUERY_TYPE_MESH_PRIMITIVES_GENERATED_EXT:
1108*61046927SAndroid Build Coastguard Worker       genx_batch_emit_pipe_control(&cmd_buffer->batch,
1109*61046927SAndroid Build Coastguard Worker                                    cmd_buffer->device->info,
1110*61046927SAndroid Build Coastguard Worker                                    cmd_buffer->state.current_pipeline,
1111*61046927SAndroid Build Coastguard Worker                                    ANV_PIPE_CS_STALL_BIT |
1112*61046927SAndroid Build Coastguard Worker                                    ANV_PIPE_STALL_AT_SCOREBOARD_BIT);
1113*61046927SAndroid Build Coastguard Worker       mi_store(&b, mi_mem64(anv_address_add(query_addr, 8)),
1114*61046927SAndroid Build Coastguard Worker                    mi_reg64(GENX(MESH_PRIMITIVE_COUNT_num)));
1115*61046927SAndroid Build Coastguard Worker       break;
1116*61046927SAndroid Build Coastguard Worker #endif
1117*61046927SAndroid Build Coastguard Worker 
1118*61046927SAndroid Build Coastguard Worker    case VK_QUERY_TYPE_PIPELINE_STATISTICS: {
1119*61046927SAndroid Build Coastguard Worker       /* TODO: This might only be necessary for certain stats */
1120*61046927SAndroid Build Coastguard Worker       genx_batch_emit_pipe_control(&cmd_buffer->batch,
1121*61046927SAndroid Build Coastguard Worker                                    cmd_buffer->device->info,
1122*61046927SAndroid Build Coastguard Worker                                    cmd_buffer->state.current_pipeline,
1123*61046927SAndroid Build Coastguard Worker                                    ANV_PIPE_CS_STALL_BIT |
1124*61046927SAndroid Build Coastguard Worker                                    ANV_PIPE_STALL_AT_SCOREBOARD_BIT);
1125*61046927SAndroid Build Coastguard Worker 
1126*61046927SAndroid Build Coastguard Worker       uint32_t statistics = pool->vk.pipeline_statistics;
1127*61046927SAndroid Build Coastguard Worker       uint32_t offset = 8;
1128*61046927SAndroid Build Coastguard Worker       while (statistics) {
1129*61046927SAndroid Build Coastguard Worker          uint32_t stat = u_bit_scan(&statistics);
1130*61046927SAndroid Build Coastguard Worker          emit_pipeline_stat(&b, stat, anv_address_add(query_addr, offset));
1131*61046927SAndroid Build Coastguard Worker          offset += 16;
1132*61046927SAndroid Build Coastguard Worker       }
1133*61046927SAndroid Build Coastguard Worker       break;
1134*61046927SAndroid Build Coastguard Worker    }
1135*61046927SAndroid Build Coastguard Worker 
1136*61046927SAndroid Build Coastguard Worker    case VK_QUERY_TYPE_TRANSFORM_FEEDBACK_STREAM_EXT:
1137*61046927SAndroid Build Coastguard Worker       genx_batch_emit_pipe_control(&cmd_buffer->batch,
1138*61046927SAndroid Build Coastguard Worker                                    cmd_buffer->device->info,
1139*61046927SAndroid Build Coastguard Worker                                    cmd_buffer->state.current_pipeline,
1140*61046927SAndroid Build Coastguard Worker                                    ANV_PIPE_CS_STALL_BIT |
1141*61046927SAndroid Build Coastguard Worker                                    ANV_PIPE_STALL_AT_SCOREBOARD_BIT);
1142*61046927SAndroid Build Coastguard Worker       emit_xfb_query(&b, index, anv_address_add(query_addr, 8));
1143*61046927SAndroid Build Coastguard Worker       break;
1144*61046927SAndroid Build Coastguard Worker 
1145*61046927SAndroid Build Coastguard Worker    case VK_QUERY_TYPE_PERFORMANCE_QUERY_KHR: {
1146*61046927SAndroid Build Coastguard Worker       if (!khr_perf_query_ensure_relocs(cmd_buffer))
1147*61046927SAndroid Build Coastguard Worker          return;
1148*61046927SAndroid Build Coastguard Worker 
1149*61046927SAndroid Build Coastguard Worker       const struct anv_physical_device *pdevice = cmd_buffer->device->physical;
1150*61046927SAndroid Build Coastguard Worker       const struct intel_perf_query_field_layout *layout = &pdevice->perf->query_layout;
1151*61046927SAndroid Build Coastguard Worker 
1152*61046927SAndroid Build Coastguard Worker       uint32_t reloc_idx = 0;
1153*61046927SAndroid Build Coastguard Worker       for (uint32_t end = 0; end < 2; end++) {
1154*61046927SAndroid Build Coastguard Worker          for (uint32_t r = 0; r < layout->n_fields; r++) {
1155*61046927SAndroid Build Coastguard Worker             const struct intel_perf_query_field *field =
1156*61046927SAndroid Build Coastguard Worker                &layout->fields[end ? r : (layout->n_fields - 1 - r)];
1157*61046927SAndroid Build Coastguard Worker             struct mi_value reg_addr =
1158*61046927SAndroid Build Coastguard Worker                mi_iadd(
1159*61046927SAndroid Build Coastguard Worker                   &b,
1160*61046927SAndroid Build Coastguard Worker                   mi_imm(intel_canonical_address(pool->bo->offset +
1161*61046927SAndroid Build Coastguard Worker                                                  khr_perf_query_data_offset(pool, query, 0, end) +
1162*61046927SAndroid Build Coastguard Worker                                                  field->location)),
1163*61046927SAndroid Build Coastguard Worker                   mi_reg64(ANV_PERF_QUERY_OFFSET_REG));
1164*61046927SAndroid Build Coastguard Worker             cmd_buffer->self_mod_locations[reloc_idx++] =
1165*61046927SAndroid Build Coastguard Worker                mi_store_relocated_address_reg64(&b, reg_addr);
1166*61046927SAndroid Build Coastguard Worker 
1167*61046927SAndroid Build Coastguard Worker             if (field->type != INTEL_PERF_QUERY_FIELD_TYPE_MI_RPC &&
1168*61046927SAndroid Build Coastguard Worker                 field->size == 8) {
1169*61046927SAndroid Build Coastguard Worker                reg_addr =
1170*61046927SAndroid Build Coastguard Worker                   mi_iadd(
1171*61046927SAndroid Build Coastguard Worker                      &b,
1172*61046927SAndroid Build Coastguard Worker                      mi_imm(intel_canonical_address(pool->bo->offset +
1173*61046927SAndroid Build Coastguard Worker                                                     khr_perf_query_data_offset(pool, query, 0, end) +
1174*61046927SAndroid Build Coastguard Worker                                                     field->location + 4)),
1175*61046927SAndroid Build Coastguard Worker                      mi_reg64(ANV_PERF_QUERY_OFFSET_REG));
1176*61046927SAndroid Build Coastguard Worker                cmd_buffer->self_mod_locations[reloc_idx++] =
1177*61046927SAndroid Build Coastguard Worker                   mi_store_relocated_address_reg64(&b, reg_addr);
1178*61046927SAndroid Build Coastguard Worker             }
1179*61046927SAndroid Build Coastguard Worker          }
1180*61046927SAndroid Build Coastguard Worker       }
1181*61046927SAndroid Build Coastguard Worker 
1182*61046927SAndroid Build Coastguard Worker       struct mi_value availability_write_offset =
1183*61046927SAndroid Build Coastguard Worker          mi_iadd(
1184*61046927SAndroid Build Coastguard Worker             &b,
1185*61046927SAndroid Build Coastguard Worker             mi_imm(
1186*61046927SAndroid Build Coastguard Worker                intel_canonical_address(
1187*61046927SAndroid Build Coastguard Worker                   pool->bo->offset +
1188*61046927SAndroid Build Coastguard Worker                   khr_perf_query_availability_offset(pool, query, 0 /* pass */))),
1189*61046927SAndroid Build Coastguard Worker             mi_reg64(ANV_PERF_QUERY_OFFSET_REG));
1190*61046927SAndroid Build Coastguard Worker       cmd_buffer->self_mod_locations[reloc_idx++] =
1191*61046927SAndroid Build Coastguard Worker          mi_store_relocated_address_reg64(&b, availability_write_offset);
1192*61046927SAndroid Build Coastguard Worker 
1193*61046927SAndroid Build Coastguard Worker       assert(reloc_idx == pdevice->n_perf_query_commands);
1194*61046927SAndroid Build Coastguard Worker 
1195*61046927SAndroid Build Coastguard Worker       const struct intel_device_info *devinfo = cmd_buffer->device->info;
1196*61046927SAndroid Build Coastguard Worker       const enum intel_engine_class engine_class = cmd_buffer->queue_family->engine_class;
1197*61046927SAndroid Build Coastguard Worker       mi_self_mod_barrier(&b, devinfo->engine_class_prefetch[engine_class]);
1198*61046927SAndroid Build Coastguard Worker 
1199*61046927SAndroid Build Coastguard Worker       genx_batch_emit_pipe_control(&cmd_buffer->batch,
1200*61046927SAndroid Build Coastguard Worker                                    cmd_buffer->device->info,
1201*61046927SAndroid Build Coastguard Worker                                    cmd_buffer->state.current_pipeline,
1202*61046927SAndroid Build Coastguard Worker                                    ANV_PIPE_CS_STALL_BIT |
1203*61046927SAndroid Build Coastguard Worker                                    ANV_PIPE_STALL_AT_SCOREBOARD_BIT);
1204*61046927SAndroid Build Coastguard Worker       cmd_buffer->perf_query_pool = pool;
1205*61046927SAndroid Build Coastguard Worker 
1206*61046927SAndroid Build Coastguard Worker       cmd_buffer->perf_reloc_idx = 0;
1207*61046927SAndroid Build Coastguard Worker       for (uint32_t r = 0; r < layout->n_fields; r++) {
1208*61046927SAndroid Build Coastguard Worker          const struct intel_perf_query_field *field =
1209*61046927SAndroid Build Coastguard Worker             &layout->fields[layout->n_fields - 1 - r];
1210*61046927SAndroid Build Coastguard Worker          void *dws;
1211*61046927SAndroid Build Coastguard Worker 
1212*61046927SAndroid Build Coastguard Worker          switch (field->type) {
1213*61046927SAndroid Build Coastguard Worker          case INTEL_PERF_QUERY_FIELD_TYPE_MI_RPC:
1214*61046927SAndroid Build Coastguard Worker             dws = anv_batch_emitn(&cmd_buffer->batch,
1215*61046927SAndroid Build Coastguard Worker                                   GENX(MI_REPORT_PERF_COUNT_length),
1216*61046927SAndroid Build Coastguard Worker                                   GENX(MI_REPORT_PERF_COUNT),
1217*61046927SAndroid Build Coastguard Worker                                   .MemoryAddress = query_addr /* Will be overwritten */);
1218*61046927SAndroid Build Coastguard Worker             mi_resolve_relocated_address_token(
1219*61046927SAndroid Build Coastguard Worker                &b,
1220*61046927SAndroid Build Coastguard Worker                cmd_buffer->self_mod_locations[cmd_buffer->perf_reloc_idx++],
1221*61046927SAndroid Build Coastguard Worker                dws + GENX(MI_REPORT_PERF_COUNT_MemoryAddress_start) / 8);
1222*61046927SAndroid Build Coastguard Worker             break;
1223*61046927SAndroid Build Coastguard Worker 
1224*61046927SAndroid Build Coastguard Worker          case INTEL_PERF_QUERY_FIELD_TYPE_SRM_PERFCNT:
1225*61046927SAndroid Build Coastguard Worker          case INTEL_PERF_QUERY_FIELD_TYPE_SRM_RPSTAT:
1226*61046927SAndroid Build Coastguard Worker          case INTEL_PERF_QUERY_FIELD_TYPE_SRM_OA_A:
1227*61046927SAndroid Build Coastguard Worker          case INTEL_PERF_QUERY_FIELD_TYPE_SRM_OA_B:
1228*61046927SAndroid Build Coastguard Worker          case INTEL_PERF_QUERY_FIELD_TYPE_SRM_OA_C:
1229*61046927SAndroid Build Coastguard Worker          case INTEL_PERF_QUERY_FIELD_TYPE_SRM_OA_PEC:
1230*61046927SAndroid Build Coastguard Worker             dws =
1231*61046927SAndroid Build Coastguard Worker                anv_batch_emitn(&cmd_buffer->batch,
1232*61046927SAndroid Build Coastguard Worker                                GENX(MI_STORE_REGISTER_MEM_length),
1233*61046927SAndroid Build Coastguard Worker                                GENX(MI_STORE_REGISTER_MEM),
1234*61046927SAndroid Build Coastguard Worker                                .RegisterAddress = field->mmio_offset,
1235*61046927SAndroid Build Coastguard Worker                                .MemoryAddress = query_addr /* Will be overwritten */ );
1236*61046927SAndroid Build Coastguard Worker             mi_resolve_relocated_address_token(
1237*61046927SAndroid Build Coastguard Worker                &b,
1238*61046927SAndroid Build Coastguard Worker                cmd_buffer->self_mod_locations[cmd_buffer->perf_reloc_idx++],
1239*61046927SAndroid Build Coastguard Worker                dws + GENX(MI_STORE_REGISTER_MEM_MemoryAddress_start) / 8);
1240*61046927SAndroid Build Coastguard Worker             if (field->size == 8) {
1241*61046927SAndroid Build Coastguard Worker                dws =
1242*61046927SAndroid Build Coastguard Worker                   anv_batch_emitn(&cmd_buffer->batch,
1243*61046927SAndroid Build Coastguard Worker                                   GENX(MI_STORE_REGISTER_MEM_length),
1244*61046927SAndroid Build Coastguard Worker                                   GENX(MI_STORE_REGISTER_MEM),
1245*61046927SAndroid Build Coastguard Worker                                   .RegisterAddress = field->mmio_offset + 4,
1246*61046927SAndroid Build Coastguard Worker                                   .MemoryAddress = query_addr /* Will be overwritten */ );
1247*61046927SAndroid Build Coastguard Worker                mi_resolve_relocated_address_token(
1248*61046927SAndroid Build Coastguard Worker                   &b,
1249*61046927SAndroid Build Coastguard Worker                   cmd_buffer->self_mod_locations[cmd_buffer->perf_reloc_idx++],
1250*61046927SAndroid Build Coastguard Worker                   dws + GENX(MI_STORE_REGISTER_MEM_MemoryAddress_start) / 8);
1251*61046927SAndroid Build Coastguard Worker             }
1252*61046927SAndroid Build Coastguard Worker             break;
1253*61046927SAndroid Build Coastguard Worker 
1254*61046927SAndroid Build Coastguard Worker          default:
1255*61046927SAndroid Build Coastguard Worker             unreachable("Invalid query field");
1256*61046927SAndroid Build Coastguard Worker             break;
1257*61046927SAndroid Build Coastguard Worker          }
1258*61046927SAndroid Build Coastguard Worker       }
1259*61046927SAndroid Build Coastguard Worker       break;
1260*61046927SAndroid Build Coastguard Worker    }
1261*61046927SAndroid Build Coastguard Worker 
1262*61046927SAndroid Build Coastguard Worker    case VK_QUERY_TYPE_PERFORMANCE_QUERY_INTEL: {
1263*61046927SAndroid Build Coastguard Worker       genx_batch_emit_pipe_control(&cmd_buffer->batch,
1264*61046927SAndroid Build Coastguard Worker                                    cmd_buffer->device->info,
1265*61046927SAndroid Build Coastguard Worker                                    cmd_buffer->state.current_pipeline,
1266*61046927SAndroid Build Coastguard Worker                                    ANV_PIPE_CS_STALL_BIT |
1267*61046927SAndroid Build Coastguard Worker                                    ANV_PIPE_STALL_AT_SCOREBOARD_BIT);
1268*61046927SAndroid Build Coastguard Worker       emit_perf_intel_query(cmd_buffer, pool, &b, query_addr, false);
1269*61046927SAndroid Build Coastguard Worker       break;
1270*61046927SAndroid Build Coastguard Worker    }
1271*61046927SAndroid Build Coastguard Worker    case VK_QUERY_TYPE_RESULT_STATUS_ONLY_KHR:
1272*61046927SAndroid Build Coastguard Worker       emit_query_mi_flush_availability(cmd_buffer, query_addr, false);
1273*61046927SAndroid Build Coastguard Worker       break;
1274*61046927SAndroid Build Coastguard Worker    case VK_QUERY_TYPE_VIDEO_ENCODE_FEEDBACK_KHR:
1275*61046927SAndroid Build Coastguard Worker       emit_query_mi_availability(&b, query_addr, false);
1276*61046927SAndroid Build Coastguard Worker       break;
1277*61046927SAndroid Build Coastguard Worker    default:
1278*61046927SAndroid Build Coastguard Worker       unreachable("");
1279*61046927SAndroid Build Coastguard Worker    }
1280*61046927SAndroid Build Coastguard Worker }
1281*61046927SAndroid Build Coastguard Worker 
genX(CmdEndQueryIndexedEXT)1282*61046927SAndroid Build Coastguard Worker void genX(CmdEndQueryIndexedEXT)(
1283*61046927SAndroid Build Coastguard Worker     VkCommandBuffer                             commandBuffer,
1284*61046927SAndroid Build Coastguard Worker     VkQueryPool                                 queryPool,
1285*61046927SAndroid Build Coastguard Worker     uint32_t                                    query,
1286*61046927SAndroid Build Coastguard Worker     uint32_t                                    index)
1287*61046927SAndroid Build Coastguard Worker {
1288*61046927SAndroid Build Coastguard Worker    ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer);
1289*61046927SAndroid Build Coastguard Worker    ANV_FROM_HANDLE(anv_query_pool, pool, queryPool);
1290*61046927SAndroid Build Coastguard Worker    struct anv_address query_addr = anv_query_address(pool, query);
1291*61046927SAndroid Build Coastguard Worker 
1292*61046927SAndroid Build Coastguard Worker    struct mi_builder b;
1293*61046927SAndroid Build Coastguard Worker    mi_builder_init(&b, cmd_buffer->device->info, &cmd_buffer->batch);
1294*61046927SAndroid Build Coastguard Worker 
1295*61046927SAndroid Build Coastguard Worker    switch (pool->vk.query_type) {
1296*61046927SAndroid Build Coastguard Worker    case VK_QUERY_TYPE_OCCLUSION:
1297*61046927SAndroid Build Coastguard Worker       emit_ps_depth_count(cmd_buffer, anv_address_add(query_addr, 16));
1298*61046927SAndroid Build Coastguard Worker       emit_query_pc_availability(cmd_buffer, query_addr, true);
1299*61046927SAndroid Build Coastguard Worker       cmd_buffer->state.gfx.n_occlusion_queries--;
1300*61046927SAndroid Build Coastguard Worker       cmd_buffer->state.gfx.dirty |= ANV_CMD_DIRTY_OCCLUSION_QUERY_ACTIVE;
1301*61046927SAndroid Build Coastguard Worker       break;
1302*61046927SAndroid Build Coastguard Worker 
1303*61046927SAndroid Build Coastguard Worker    case VK_QUERY_TYPE_PRIMITIVES_GENERATED_EXT:
1304*61046927SAndroid Build Coastguard Worker       /* Ensure previous commands have completed before capturing the register
1305*61046927SAndroid Build Coastguard Worker        * value.
1306*61046927SAndroid Build Coastguard Worker        */
1307*61046927SAndroid Build Coastguard Worker       genx_batch_emit_pipe_control(&cmd_buffer->batch,
1308*61046927SAndroid Build Coastguard Worker                                    cmd_buffer->device->info,
1309*61046927SAndroid Build Coastguard Worker                                    cmd_buffer->state.current_pipeline,
1310*61046927SAndroid Build Coastguard Worker                                    ANV_PIPE_CS_STALL_BIT |
1311*61046927SAndroid Build Coastguard Worker                                    ANV_PIPE_STALL_AT_SCOREBOARD_BIT);
1312*61046927SAndroid Build Coastguard Worker 
1313*61046927SAndroid Build Coastguard Worker       mi_store(&b, mi_mem64(anv_address_add(query_addr, 16)),
1314*61046927SAndroid Build Coastguard Worker                    mi_reg64(GENX(CL_INVOCATION_COUNT_num)));
1315*61046927SAndroid Build Coastguard Worker       emit_query_mi_availability(&b, query_addr, true);
1316*61046927SAndroid Build Coastguard Worker       break;
1317*61046927SAndroid Build Coastguard Worker 
1318*61046927SAndroid Build Coastguard Worker #if GFX_VERx10 >= 125
1319*61046927SAndroid Build Coastguard Worker    case VK_QUERY_TYPE_MESH_PRIMITIVES_GENERATED_EXT:
1320*61046927SAndroid Build Coastguard Worker       genx_batch_emit_pipe_control(&cmd_buffer->batch,
1321*61046927SAndroid Build Coastguard Worker                                    cmd_buffer->device->info,
1322*61046927SAndroid Build Coastguard Worker                                    cmd_buffer->state.current_pipeline,
1323*61046927SAndroid Build Coastguard Worker                                    ANV_PIPE_CS_STALL_BIT |
1324*61046927SAndroid Build Coastguard Worker                                    ANV_PIPE_STALL_AT_SCOREBOARD_BIT);
1325*61046927SAndroid Build Coastguard Worker       mi_store(&b, mi_mem64(anv_address_add(query_addr, 16)),
1326*61046927SAndroid Build Coastguard Worker                    mi_reg64(GENX(MESH_PRIMITIVE_COUNT_num)));
1327*61046927SAndroid Build Coastguard Worker       emit_query_mi_availability(&b, query_addr, true);
1328*61046927SAndroid Build Coastguard Worker       break;
1329*61046927SAndroid Build Coastguard Worker #endif
1330*61046927SAndroid Build Coastguard Worker 
1331*61046927SAndroid Build Coastguard Worker    case VK_QUERY_TYPE_PIPELINE_STATISTICS: {
1332*61046927SAndroid Build Coastguard Worker       /* TODO: This might only be necessary for certain stats */
1333*61046927SAndroid Build Coastguard Worker       genx_batch_emit_pipe_control(&cmd_buffer->batch,
1334*61046927SAndroid Build Coastguard Worker                                    cmd_buffer->device->info,
1335*61046927SAndroid Build Coastguard Worker                                    cmd_buffer->state.current_pipeline,
1336*61046927SAndroid Build Coastguard Worker                                    ANV_PIPE_CS_STALL_BIT |
1337*61046927SAndroid Build Coastguard Worker                                    ANV_PIPE_STALL_AT_SCOREBOARD_BIT);
1338*61046927SAndroid Build Coastguard Worker 
1339*61046927SAndroid Build Coastguard Worker       uint32_t statistics = pool->vk.pipeline_statistics;
1340*61046927SAndroid Build Coastguard Worker       uint32_t offset = 16;
1341*61046927SAndroid Build Coastguard Worker       while (statistics) {
1342*61046927SAndroid Build Coastguard Worker          uint32_t stat = u_bit_scan(&statistics);
1343*61046927SAndroid Build Coastguard Worker          emit_pipeline_stat(&b, stat, anv_address_add(query_addr, offset));
1344*61046927SAndroid Build Coastguard Worker          offset += 16;
1345*61046927SAndroid Build Coastguard Worker       }
1346*61046927SAndroid Build Coastguard Worker 
1347*61046927SAndroid Build Coastguard Worker       emit_query_mi_availability(&b, query_addr, true);
1348*61046927SAndroid Build Coastguard Worker       break;
1349*61046927SAndroid Build Coastguard Worker    }
1350*61046927SAndroid Build Coastguard Worker 
1351*61046927SAndroid Build Coastguard Worker    case VK_QUERY_TYPE_TRANSFORM_FEEDBACK_STREAM_EXT:
1352*61046927SAndroid Build Coastguard Worker       genx_batch_emit_pipe_control(&cmd_buffer->batch,
1353*61046927SAndroid Build Coastguard Worker                                    cmd_buffer->device->info,
1354*61046927SAndroid Build Coastguard Worker                                    cmd_buffer->state.current_pipeline,
1355*61046927SAndroid Build Coastguard Worker                                    ANV_PIPE_CS_STALL_BIT |
1356*61046927SAndroid Build Coastguard Worker                                    ANV_PIPE_STALL_AT_SCOREBOARD_BIT);
1357*61046927SAndroid Build Coastguard Worker       emit_xfb_query(&b, index, anv_address_add(query_addr, 16));
1358*61046927SAndroid Build Coastguard Worker #if GFX_VER == 11
1359*61046927SAndroid Build Coastguard Worker       /* Running the following CTS pattern on ICL will likely report a failure :
1360*61046927SAndroid Build Coastguard Worker        *
1361*61046927SAndroid Build Coastguard Worker        * dEQP-VK.transform_feedback.primitives_generated_query.get.queue_reset.32bit.geom.*
1362*61046927SAndroid Build Coastguard Worker        *
1363*61046927SAndroid Build Coastguard Worker        * If you dump the returned values in genX(GetQueryPoolResults)(), you
1364*61046927SAndroid Build Coastguard Worker        * will notice that the last 64bit value is 0 and rereading the value
1365*61046927SAndroid Build Coastguard Worker        * once more will return a non-zero value. This seems to indicate that
1366*61046927SAndroid Build Coastguard Worker        * the memory writes are not ordered somehow... Otherwise the
1367*61046927SAndroid Build Coastguard Worker        * availability write below would ensure the previous writes above have
1368*61046927SAndroid Build Coastguard Worker        * completed.
1369*61046927SAndroid Build Coastguard Worker        *
1370*61046927SAndroid Build Coastguard Worker        * So as a workaround, we stall CS to make sure the previous writes have
1371*61046927SAndroid Build Coastguard Worker        * landed before emitting the availability.
1372*61046927SAndroid Build Coastguard Worker        */
1373*61046927SAndroid Build Coastguard Worker       genx_batch_emit_pipe_control(&cmd_buffer->batch,
1374*61046927SAndroid Build Coastguard Worker                                    cmd_buffer->device->info,
1375*61046927SAndroid Build Coastguard Worker                                    cmd_buffer->state.current_pipeline,
1376*61046927SAndroid Build Coastguard Worker                                    ANV_PIPE_CS_STALL_BIT);
1377*61046927SAndroid Build Coastguard Worker #endif
1378*61046927SAndroid Build Coastguard Worker       emit_query_mi_availability(&b, query_addr, true);
1379*61046927SAndroid Build Coastguard Worker       break;
1380*61046927SAndroid Build Coastguard Worker 
1381*61046927SAndroid Build Coastguard Worker    case VK_QUERY_TYPE_PERFORMANCE_QUERY_KHR: {
1382*61046927SAndroid Build Coastguard Worker       genx_batch_emit_pipe_control(&cmd_buffer->batch,
1383*61046927SAndroid Build Coastguard Worker                                    cmd_buffer->device->info,
1384*61046927SAndroid Build Coastguard Worker                                    cmd_buffer->state.current_pipeline,
1385*61046927SAndroid Build Coastguard Worker                                    ANV_PIPE_CS_STALL_BIT |
1386*61046927SAndroid Build Coastguard Worker                                    ANV_PIPE_STALL_AT_SCOREBOARD_BIT);
1387*61046927SAndroid Build Coastguard Worker       cmd_buffer->perf_query_pool = pool;
1388*61046927SAndroid Build Coastguard Worker 
1389*61046927SAndroid Build Coastguard Worker       if (!khr_perf_query_ensure_relocs(cmd_buffer))
1390*61046927SAndroid Build Coastguard Worker          return;
1391*61046927SAndroid Build Coastguard Worker 
1392*61046927SAndroid Build Coastguard Worker       const struct anv_physical_device *pdevice = cmd_buffer->device->physical;
1393*61046927SAndroid Build Coastguard Worker       const struct intel_perf_query_field_layout *layout = &pdevice->perf->query_layout;
1394*61046927SAndroid Build Coastguard Worker 
1395*61046927SAndroid Build Coastguard Worker       void *dws;
1396*61046927SAndroid Build Coastguard Worker       for (uint32_t r = 0; r < layout->n_fields; r++) {
1397*61046927SAndroid Build Coastguard Worker          const struct intel_perf_query_field *field = &layout->fields[r];
1398*61046927SAndroid Build Coastguard Worker 
1399*61046927SAndroid Build Coastguard Worker          switch (field->type) {
1400*61046927SAndroid Build Coastguard Worker          case INTEL_PERF_QUERY_FIELD_TYPE_MI_RPC:
1401*61046927SAndroid Build Coastguard Worker             dws = anv_batch_emitn(&cmd_buffer->batch,
1402*61046927SAndroid Build Coastguard Worker                                   GENX(MI_REPORT_PERF_COUNT_length),
1403*61046927SAndroid Build Coastguard Worker                                   GENX(MI_REPORT_PERF_COUNT),
1404*61046927SAndroid Build Coastguard Worker                                   .MemoryAddress = query_addr /* Will be overwritten */);
1405*61046927SAndroid Build Coastguard Worker             mi_resolve_relocated_address_token(
1406*61046927SAndroid Build Coastguard Worker                &b,
1407*61046927SAndroid Build Coastguard Worker                cmd_buffer->self_mod_locations[cmd_buffer->perf_reloc_idx++],
1408*61046927SAndroid Build Coastguard Worker                dws + GENX(MI_REPORT_PERF_COUNT_MemoryAddress_start) / 8);
1409*61046927SAndroid Build Coastguard Worker             break;
1410*61046927SAndroid Build Coastguard Worker 
1411*61046927SAndroid Build Coastguard Worker          case INTEL_PERF_QUERY_FIELD_TYPE_SRM_PERFCNT:
1412*61046927SAndroid Build Coastguard Worker          case INTEL_PERF_QUERY_FIELD_TYPE_SRM_RPSTAT:
1413*61046927SAndroid Build Coastguard Worker          case INTEL_PERF_QUERY_FIELD_TYPE_SRM_OA_A:
1414*61046927SAndroid Build Coastguard Worker          case INTEL_PERF_QUERY_FIELD_TYPE_SRM_OA_B:
1415*61046927SAndroid Build Coastguard Worker          case INTEL_PERF_QUERY_FIELD_TYPE_SRM_OA_C:
1416*61046927SAndroid Build Coastguard Worker          case INTEL_PERF_QUERY_FIELD_TYPE_SRM_OA_PEC:
1417*61046927SAndroid Build Coastguard Worker             dws =
1418*61046927SAndroid Build Coastguard Worker                anv_batch_emitn(&cmd_buffer->batch,
1419*61046927SAndroid Build Coastguard Worker                                GENX(MI_STORE_REGISTER_MEM_length),
1420*61046927SAndroid Build Coastguard Worker                                GENX(MI_STORE_REGISTER_MEM),
1421*61046927SAndroid Build Coastguard Worker                                .RegisterAddress = field->mmio_offset,
1422*61046927SAndroid Build Coastguard Worker                                .MemoryAddress = query_addr /* Will be overwritten */ );
1423*61046927SAndroid Build Coastguard Worker             mi_resolve_relocated_address_token(
1424*61046927SAndroid Build Coastguard Worker                &b,
1425*61046927SAndroid Build Coastguard Worker                cmd_buffer->self_mod_locations[cmd_buffer->perf_reloc_idx++],
1426*61046927SAndroid Build Coastguard Worker                dws + GENX(MI_STORE_REGISTER_MEM_MemoryAddress_start) / 8);
1427*61046927SAndroid Build Coastguard Worker             if (field->size == 8) {
1428*61046927SAndroid Build Coastguard Worker                dws =
1429*61046927SAndroid Build Coastguard Worker                   anv_batch_emitn(&cmd_buffer->batch,
1430*61046927SAndroid Build Coastguard Worker                                   GENX(MI_STORE_REGISTER_MEM_length),
1431*61046927SAndroid Build Coastguard Worker                                   GENX(MI_STORE_REGISTER_MEM),
1432*61046927SAndroid Build Coastguard Worker                                   .RegisterAddress = field->mmio_offset + 4,
1433*61046927SAndroid Build Coastguard Worker                                   .MemoryAddress = query_addr /* Will be overwritten */ );
1434*61046927SAndroid Build Coastguard Worker                mi_resolve_relocated_address_token(
1435*61046927SAndroid Build Coastguard Worker                   &b,
1436*61046927SAndroid Build Coastguard Worker                   cmd_buffer->self_mod_locations[cmd_buffer->perf_reloc_idx++],
1437*61046927SAndroid Build Coastguard Worker                   dws + GENX(MI_STORE_REGISTER_MEM_MemoryAddress_start) / 8);
1438*61046927SAndroid Build Coastguard Worker             }
1439*61046927SAndroid Build Coastguard Worker             break;
1440*61046927SAndroid Build Coastguard Worker 
1441*61046927SAndroid Build Coastguard Worker          default:
1442*61046927SAndroid Build Coastguard Worker             unreachable("Invalid query field");
1443*61046927SAndroid Build Coastguard Worker             break;
1444*61046927SAndroid Build Coastguard Worker          }
1445*61046927SAndroid Build Coastguard Worker       }
1446*61046927SAndroid Build Coastguard Worker 
1447*61046927SAndroid Build Coastguard Worker       dws =
1448*61046927SAndroid Build Coastguard Worker          anv_batch_emitn(&cmd_buffer->batch,
1449*61046927SAndroid Build Coastguard Worker                          GENX(MI_STORE_DATA_IMM_length),
1450*61046927SAndroid Build Coastguard Worker                          GENX(MI_STORE_DATA_IMM),
1451*61046927SAndroid Build Coastguard Worker                          .ImmediateData = true);
1452*61046927SAndroid Build Coastguard Worker       mi_resolve_relocated_address_token(
1453*61046927SAndroid Build Coastguard Worker          &b,
1454*61046927SAndroid Build Coastguard Worker          cmd_buffer->self_mod_locations[cmd_buffer->perf_reloc_idx++],
1455*61046927SAndroid Build Coastguard Worker          dws + GENX(MI_STORE_DATA_IMM_Address_start) / 8);
1456*61046927SAndroid Build Coastguard Worker 
1457*61046927SAndroid Build Coastguard Worker       assert(cmd_buffer->perf_reloc_idx == pdevice->n_perf_query_commands);
1458*61046927SAndroid Build Coastguard Worker       break;
1459*61046927SAndroid Build Coastguard Worker    }
1460*61046927SAndroid Build Coastguard Worker 
1461*61046927SAndroid Build Coastguard Worker    case VK_QUERY_TYPE_PERFORMANCE_QUERY_INTEL: {
1462*61046927SAndroid Build Coastguard Worker       genx_batch_emit_pipe_control(&cmd_buffer->batch,
1463*61046927SAndroid Build Coastguard Worker                                    cmd_buffer->device->info,
1464*61046927SAndroid Build Coastguard Worker                                    cmd_buffer->state.current_pipeline,
1465*61046927SAndroid Build Coastguard Worker                                    ANV_PIPE_CS_STALL_BIT |
1466*61046927SAndroid Build Coastguard Worker                                    ANV_PIPE_STALL_AT_SCOREBOARD_BIT);
1467*61046927SAndroid Build Coastguard Worker       uint32_t marker_offset = intel_perf_marker_offset();
1468*61046927SAndroid Build Coastguard Worker       mi_store(&b, mi_mem64(anv_address_add(query_addr, marker_offset)),
1469*61046927SAndroid Build Coastguard Worker                    mi_imm(cmd_buffer->intel_perf_marker));
1470*61046927SAndroid Build Coastguard Worker       emit_perf_intel_query(cmd_buffer, pool, &b, query_addr, true);
1471*61046927SAndroid Build Coastguard Worker       emit_query_mi_availability(&b, query_addr, true);
1472*61046927SAndroid Build Coastguard Worker       break;
1473*61046927SAndroid Build Coastguard Worker    }
1474*61046927SAndroid Build Coastguard Worker    case VK_QUERY_TYPE_RESULT_STATUS_ONLY_KHR:
1475*61046927SAndroid Build Coastguard Worker       emit_query_mi_flush_availability(cmd_buffer, query_addr, true);
1476*61046927SAndroid Build Coastguard Worker       break;
1477*61046927SAndroid Build Coastguard Worker 
1478*61046927SAndroid Build Coastguard Worker #if GFX_VER < 11
1479*61046927SAndroid Build Coastguard Worker #define MFC_BITSTREAM_BYTECOUNT_FRAME_REG       0x128A0
1480*61046927SAndroid Build Coastguard Worker #define HCP_BITSTREAM_BYTECOUNT_FRAME_REG       0x1E9A0
1481*61046927SAndroid Build Coastguard Worker #elif GFX_VER >= 11
1482*61046927SAndroid Build Coastguard Worker #define MFC_BITSTREAM_BYTECOUNT_FRAME_REG       0x1C08A0
1483*61046927SAndroid Build Coastguard Worker #define HCP_BITSTREAM_BYTECOUNT_FRAME_REG       0x1C28A0
1484*61046927SAndroid Build Coastguard Worker #endif
1485*61046927SAndroid Build Coastguard Worker 
1486*61046927SAndroid Build Coastguard Worker    case VK_QUERY_TYPE_VIDEO_ENCODE_FEEDBACK_KHR: {
1487*61046927SAndroid Build Coastguard Worker       uint32_t reg_addr;
1488*61046927SAndroid Build Coastguard Worker 
1489*61046927SAndroid Build Coastguard Worker       if (pool->codec & VK_VIDEO_CODEC_OPERATION_ENCODE_H264_BIT_KHR) {
1490*61046927SAndroid Build Coastguard Worker          reg_addr = MFC_BITSTREAM_BYTECOUNT_FRAME_REG;
1491*61046927SAndroid Build Coastguard Worker       } else if (pool->codec & VK_VIDEO_CODEC_OPERATION_ENCODE_H265_BIT_KHR) {
1492*61046927SAndroid Build Coastguard Worker          reg_addr = HCP_BITSTREAM_BYTECOUNT_FRAME_REG;
1493*61046927SAndroid Build Coastguard Worker       } else {
1494*61046927SAndroid Build Coastguard Worker          unreachable("Invalid codec operation");
1495*61046927SAndroid Build Coastguard Worker       }
1496*61046927SAndroid Build Coastguard Worker 
1497*61046927SAndroid Build Coastguard Worker       mi_store(&b, mi_mem64(anv_address_add(query_addr, 8)), mi_reg32(reg_addr));
1498*61046927SAndroid Build Coastguard Worker       emit_query_mi_availability(&b, query_addr, true);
1499*61046927SAndroid Build Coastguard Worker       break;
1500*61046927SAndroid Build Coastguard Worker    }
1501*61046927SAndroid Build Coastguard Worker    default:
1502*61046927SAndroid Build Coastguard Worker       unreachable("");
1503*61046927SAndroid Build Coastguard Worker    }
1504*61046927SAndroid Build Coastguard Worker 
1505*61046927SAndroid Build Coastguard Worker    /* When multiview is active the spec requires that N consecutive query
1506*61046927SAndroid Build Coastguard Worker     * indices are used, where N is the number of active views in the subpass.
1507*61046927SAndroid Build Coastguard Worker     * The spec allows that we only write the results to one of the queries
1508*61046927SAndroid Build Coastguard Worker     * but we still need to manage result availability for all the query indices.
1509*61046927SAndroid Build Coastguard Worker     * Since we only emit a single query for all active views in the
1510*61046927SAndroid Build Coastguard Worker     * first index, mark the other query indices as being already available
1511*61046927SAndroid Build Coastguard Worker     * with result 0.
1512*61046927SAndroid Build Coastguard Worker     */
1513*61046927SAndroid Build Coastguard Worker    if (cmd_buffer->state.gfx.view_mask) {
1514*61046927SAndroid Build Coastguard Worker       const uint32_t num_queries =
1515*61046927SAndroid Build Coastguard Worker          util_bitcount(cmd_buffer->state.gfx.view_mask);
1516*61046927SAndroid Build Coastguard Worker       if (num_queries > 1)
1517*61046927SAndroid Build Coastguard Worker          emit_zero_queries(cmd_buffer, &b, pool, query + 1, num_queries - 1);
1518*61046927SAndroid Build Coastguard Worker    }
1519*61046927SAndroid Build Coastguard Worker }
1520*61046927SAndroid Build Coastguard Worker 
1521*61046927SAndroid Build Coastguard Worker #define TIMESTAMP 0x2358
1522*61046927SAndroid Build Coastguard Worker 
genX(CmdWriteTimestamp2)1523*61046927SAndroid Build Coastguard Worker void genX(CmdWriteTimestamp2)(
1524*61046927SAndroid Build Coastguard Worker     VkCommandBuffer                             commandBuffer,
1525*61046927SAndroid Build Coastguard Worker     VkPipelineStageFlags2                       stage,
1526*61046927SAndroid Build Coastguard Worker     VkQueryPool                                 queryPool,
1527*61046927SAndroid Build Coastguard Worker     uint32_t                                    query)
1528*61046927SAndroid Build Coastguard Worker {
1529*61046927SAndroid Build Coastguard Worker    ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer);
1530*61046927SAndroid Build Coastguard Worker    ANV_FROM_HANDLE(anv_query_pool, pool, queryPool);
1531*61046927SAndroid Build Coastguard Worker    struct anv_address query_addr = anv_query_address(pool, query);
1532*61046927SAndroid Build Coastguard Worker 
1533*61046927SAndroid Build Coastguard Worker    assert(pool->vk.query_type == VK_QUERY_TYPE_TIMESTAMP);
1534*61046927SAndroid Build Coastguard Worker 
1535*61046927SAndroid Build Coastguard Worker    emit_query_clear_flush(cmd_buffer, pool,
1536*61046927SAndroid Build Coastguard Worker                           "CmdWriteTimestamp flush query clears");
1537*61046927SAndroid Build Coastguard Worker 
1538*61046927SAndroid Build Coastguard Worker    struct mi_builder b;
1539*61046927SAndroid Build Coastguard Worker    mi_builder_init(&b, cmd_buffer->device->info, &cmd_buffer->batch);
1540*61046927SAndroid Build Coastguard Worker 
1541*61046927SAndroid Build Coastguard Worker    if (stage == VK_PIPELINE_STAGE_2_TOP_OF_PIPE_BIT) {
1542*61046927SAndroid Build Coastguard Worker       mi_store(&b, mi_mem64(anv_address_add(query_addr, 8)),
1543*61046927SAndroid Build Coastguard Worker                    mi_reg64(TIMESTAMP));
1544*61046927SAndroid Build Coastguard Worker       emit_query_mi_availability(&b, query_addr, true);
1545*61046927SAndroid Build Coastguard Worker    } else {
1546*61046927SAndroid Build Coastguard Worker       /* Everything else is bottom-of-pipe */
1547*61046927SAndroid Build Coastguard Worker       cmd_buffer->state.pending_pipe_bits |= ANV_PIPE_POST_SYNC_BIT;
1548*61046927SAndroid Build Coastguard Worker       genX(cmd_buffer_apply_pipe_flushes)(cmd_buffer);
1549*61046927SAndroid Build Coastguard Worker 
1550*61046927SAndroid Build Coastguard Worker       bool cs_stall_needed =
1551*61046927SAndroid Build Coastguard Worker          (GFX_VER == 9 && cmd_buffer->device->info->gt == 4);
1552*61046927SAndroid Build Coastguard Worker 
1553*61046927SAndroid Build Coastguard Worker       if (anv_cmd_buffer_is_blitter_queue(cmd_buffer) ||
1554*61046927SAndroid Build Coastguard Worker           anv_cmd_buffer_is_video_queue(cmd_buffer)) {
1555*61046927SAndroid Build Coastguard Worker          /* Wa_16018063123 - emit fast color dummy blit before MI_FLUSH_DW. */
1556*61046927SAndroid Build Coastguard Worker          if (intel_needs_workaround(cmd_buffer->device->info, 16018063123)) {
1557*61046927SAndroid Build Coastguard Worker             genX(batch_emit_fast_color_dummy_blit)(&cmd_buffer->batch,
1558*61046927SAndroid Build Coastguard Worker                                                    cmd_buffer->device);
1559*61046927SAndroid Build Coastguard Worker          }
1560*61046927SAndroid Build Coastguard Worker          anv_batch_emit(&cmd_buffer->batch, GENX(MI_FLUSH_DW), dw) {
1561*61046927SAndroid Build Coastguard Worker             dw.Address = anv_address_add(query_addr, 8);
1562*61046927SAndroid Build Coastguard Worker             dw.PostSyncOperation = WriteTimestamp;
1563*61046927SAndroid Build Coastguard Worker          }
1564*61046927SAndroid Build Coastguard Worker          emit_query_mi_flush_availability(cmd_buffer, query_addr, true);
1565*61046927SAndroid Build Coastguard Worker       } else {
1566*61046927SAndroid Build Coastguard Worker          genx_batch_emit_pipe_control_write
1567*61046927SAndroid Build Coastguard Worker             (&cmd_buffer->batch, cmd_buffer->device->info,
1568*61046927SAndroid Build Coastguard Worker              cmd_buffer->state.current_pipeline, WriteTimestamp,
1569*61046927SAndroid Build Coastguard Worker              anv_address_add(query_addr, 8), 0,
1570*61046927SAndroid Build Coastguard Worker              cs_stall_needed ? ANV_PIPE_CS_STALL_BIT : 0);
1571*61046927SAndroid Build Coastguard Worker          emit_query_pc_availability(cmd_buffer, query_addr, true);
1572*61046927SAndroid Build Coastguard Worker       }
1573*61046927SAndroid Build Coastguard Worker 
1574*61046927SAndroid Build Coastguard Worker    }
1575*61046927SAndroid Build Coastguard Worker 
1576*61046927SAndroid Build Coastguard Worker 
1577*61046927SAndroid Build Coastguard Worker    /* When multiview is active the spec requires that N consecutive query
1578*61046927SAndroid Build Coastguard Worker     * indices are used, where N is the number of active views in the subpass.
1579*61046927SAndroid Build Coastguard Worker     * The spec allows that we only write the results to one of the queries
1580*61046927SAndroid Build Coastguard Worker     * but we still need to manage result availability for all the query indices.
1581*61046927SAndroid Build Coastguard Worker     * Since we only emit a single query for all active views in the
1582*61046927SAndroid Build Coastguard Worker     * first index, mark the other query indices as being already available
1583*61046927SAndroid Build Coastguard Worker     * with result 0.
1584*61046927SAndroid Build Coastguard Worker     */
1585*61046927SAndroid Build Coastguard Worker    if (cmd_buffer->state.gfx.view_mask) {
1586*61046927SAndroid Build Coastguard Worker       const uint32_t num_queries =
1587*61046927SAndroid Build Coastguard Worker          util_bitcount(cmd_buffer->state.gfx.view_mask);
1588*61046927SAndroid Build Coastguard Worker       if (num_queries > 1)
1589*61046927SAndroid Build Coastguard Worker          emit_zero_queries(cmd_buffer, &b, pool, query + 1, num_queries - 1);
1590*61046927SAndroid Build Coastguard Worker    }
1591*61046927SAndroid Build Coastguard Worker }
1592*61046927SAndroid Build Coastguard Worker 
1593*61046927SAndroid Build Coastguard Worker #define MI_PREDICATE_SRC0    0x2400
1594*61046927SAndroid Build Coastguard Worker #define MI_PREDICATE_SRC1    0x2408
1595*61046927SAndroid Build Coastguard Worker #define MI_PREDICATE_RESULT  0x2418
1596*61046927SAndroid Build Coastguard Worker 
1597*61046927SAndroid Build Coastguard Worker /**
1598*61046927SAndroid Build Coastguard Worker  * Writes the results of a query to dst_addr is the value at poll_addr is equal
1599*61046927SAndroid Build Coastguard Worker  * to the reference value.
1600*61046927SAndroid Build Coastguard Worker  */
1601*61046927SAndroid Build Coastguard Worker static void
gpu_write_query_result_cond(struct anv_cmd_buffer * cmd_buffer,struct mi_builder * b,struct anv_address poll_addr,struct anv_address dst_addr,uint64_t ref_value,VkQueryResultFlags flags,uint32_t value_index,struct mi_value query_result)1602*61046927SAndroid Build Coastguard Worker gpu_write_query_result_cond(struct anv_cmd_buffer *cmd_buffer,
1603*61046927SAndroid Build Coastguard Worker                             struct mi_builder *b,
1604*61046927SAndroid Build Coastguard Worker                             struct anv_address poll_addr,
1605*61046927SAndroid Build Coastguard Worker                             struct anv_address dst_addr,
1606*61046927SAndroid Build Coastguard Worker                             uint64_t ref_value,
1607*61046927SAndroid Build Coastguard Worker                             VkQueryResultFlags flags,
1608*61046927SAndroid Build Coastguard Worker                             uint32_t value_index,
1609*61046927SAndroid Build Coastguard Worker                             struct mi_value query_result)
1610*61046927SAndroid Build Coastguard Worker {
1611*61046927SAndroid Build Coastguard Worker    mi_store(b, mi_reg64(MI_PREDICATE_SRC0), mi_mem64(poll_addr));
1612*61046927SAndroid Build Coastguard Worker    mi_store(b, mi_reg64(MI_PREDICATE_SRC1), mi_imm(ref_value));
1613*61046927SAndroid Build Coastguard Worker    anv_batch_emit(&cmd_buffer->batch, GENX(MI_PREDICATE), mip) {
1614*61046927SAndroid Build Coastguard Worker       mip.LoadOperation    = LOAD_LOAD;
1615*61046927SAndroid Build Coastguard Worker       mip.CombineOperation = COMBINE_SET;
1616*61046927SAndroid Build Coastguard Worker       mip.CompareOperation = COMPARE_SRCS_EQUAL;
1617*61046927SAndroid Build Coastguard Worker    }
1618*61046927SAndroid Build Coastguard Worker 
1619*61046927SAndroid Build Coastguard Worker    if (flags & VK_QUERY_RESULT_64_BIT) {
1620*61046927SAndroid Build Coastguard Worker       struct anv_address res_addr = anv_address_add(dst_addr, value_index * 8);
1621*61046927SAndroid Build Coastguard Worker       mi_store_if(b, mi_mem64(res_addr), query_result);
1622*61046927SAndroid Build Coastguard Worker    } else {
1623*61046927SAndroid Build Coastguard Worker       struct anv_address res_addr = anv_address_add(dst_addr, value_index * 4);
1624*61046927SAndroid Build Coastguard Worker       mi_store_if(b, mi_mem32(res_addr), query_result);
1625*61046927SAndroid Build Coastguard Worker    }
1626*61046927SAndroid Build Coastguard Worker }
1627*61046927SAndroid Build Coastguard Worker 
1628*61046927SAndroid Build Coastguard Worker static void
gpu_write_query_result(struct mi_builder * b,struct anv_address dst_addr,VkQueryResultFlags flags,uint32_t value_index,struct mi_value query_result)1629*61046927SAndroid Build Coastguard Worker gpu_write_query_result(struct mi_builder *b,
1630*61046927SAndroid Build Coastguard Worker                        struct anv_address dst_addr,
1631*61046927SAndroid Build Coastguard Worker                        VkQueryResultFlags flags,
1632*61046927SAndroid Build Coastguard Worker                        uint32_t value_index,
1633*61046927SAndroid Build Coastguard Worker                        struct mi_value query_result)
1634*61046927SAndroid Build Coastguard Worker {
1635*61046927SAndroid Build Coastguard Worker    if (flags & VK_QUERY_RESULT_64_BIT) {
1636*61046927SAndroid Build Coastguard Worker       struct anv_address res_addr = anv_address_add(dst_addr, value_index * 8);
1637*61046927SAndroid Build Coastguard Worker       mi_store(b, mi_mem64(res_addr), query_result);
1638*61046927SAndroid Build Coastguard Worker    } else {
1639*61046927SAndroid Build Coastguard Worker       struct anv_address res_addr = anv_address_add(dst_addr, value_index * 4);
1640*61046927SAndroid Build Coastguard Worker       mi_store(b, mi_mem32(res_addr), query_result);
1641*61046927SAndroid Build Coastguard Worker    }
1642*61046927SAndroid Build Coastguard Worker }
1643*61046927SAndroid Build Coastguard Worker 
1644*61046927SAndroid Build Coastguard Worker static struct mi_value
compute_query_result(struct mi_builder * b,struct anv_address addr)1645*61046927SAndroid Build Coastguard Worker compute_query_result(struct mi_builder *b, struct anv_address addr)
1646*61046927SAndroid Build Coastguard Worker {
1647*61046927SAndroid Build Coastguard Worker    return mi_isub(b, mi_mem64(anv_address_add(addr, 8)),
1648*61046927SAndroid Build Coastguard Worker                      mi_mem64(anv_address_add(addr, 0)));
1649*61046927SAndroid Build Coastguard Worker }
1650*61046927SAndroid Build Coastguard Worker 
1651*61046927SAndroid Build Coastguard Worker static void
copy_query_results_with_cs(struct anv_cmd_buffer * cmd_buffer,struct anv_query_pool * pool,struct anv_address dest_addr,uint64_t dest_stride,uint32_t first_query,uint32_t query_count,VkQueryResultFlags flags)1652*61046927SAndroid Build Coastguard Worker copy_query_results_with_cs(struct anv_cmd_buffer *cmd_buffer,
1653*61046927SAndroid Build Coastguard Worker                            struct anv_query_pool *pool,
1654*61046927SAndroid Build Coastguard Worker                            struct anv_address dest_addr,
1655*61046927SAndroid Build Coastguard Worker                            uint64_t dest_stride,
1656*61046927SAndroid Build Coastguard Worker                            uint32_t first_query,
1657*61046927SAndroid Build Coastguard Worker                            uint32_t query_count,
1658*61046927SAndroid Build Coastguard Worker                            VkQueryResultFlags flags)
1659*61046927SAndroid Build Coastguard Worker {
1660*61046927SAndroid Build Coastguard Worker    enum anv_pipe_bits needed_flushes = 0;
1661*61046927SAndroid Build Coastguard Worker 
1662*61046927SAndroid Build Coastguard Worker    trace_intel_begin_query_copy_cs(&cmd_buffer->trace);
1663*61046927SAndroid Build Coastguard Worker 
1664*61046927SAndroid Build Coastguard Worker    /* If render target writes are ongoing, request a render target cache flush
1665*61046927SAndroid Build Coastguard Worker     * to ensure proper ordering of the commands from the 3d pipe and the
1666*61046927SAndroid Build Coastguard Worker     * command streamer.
1667*61046927SAndroid Build Coastguard Worker     */
1668*61046927SAndroid Build Coastguard Worker 
1669*61046927SAndroid Build Coastguard Worker    const enum anv_query_bits query_bits =
1670*61046927SAndroid Build Coastguard Worker       cmd_buffer->state.queries.buffer_write_bits |
1671*61046927SAndroid Build Coastguard Worker       cmd_buffer->state.queries.clear_bits;
1672*61046927SAndroid Build Coastguard Worker 
1673*61046927SAndroid Build Coastguard Worker    needed_flushes |= ANV_PIPE_QUERY_BITS(query_bits);
1674*61046927SAndroid Build Coastguard Worker 
1675*61046927SAndroid Build Coastguard Worker    /* Occlusion & timestamp queries are written using a PIPE_CONTROL and
1676*61046927SAndroid Build Coastguard Worker     * because we're about to copy values from MI commands, we need to stall
1677*61046927SAndroid Build Coastguard Worker     * the command streamer to make sure the PIPE_CONTROL values have
1678*61046927SAndroid Build Coastguard Worker     * landed, otherwise we could see inconsistent values & availability.
1679*61046927SAndroid Build Coastguard Worker     *
1680*61046927SAndroid Build Coastguard Worker     *  From the vulkan spec:
1681*61046927SAndroid Build Coastguard Worker     *
1682*61046927SAndroid Build Coastguard Worker     *     "vkCmdCopyQueryPoolResults is guaranteed to see the effect of
1683*61046927SAndroid Build Coastguard Worker     *     previous uses of vkCmdResetQueryPool in the same queue, without any
1684*61046927SAndroid Build Coastguard Worker     *     additional synchronization."
1685*61046927SAndroid Build Coastguard Worker     */
1686*61046927SAndroid Build Coastguard Worker    if (pool->vk.query_type == VK_QUERY_TYPE_OCCLUSION ||
1687*61046927SAndroid Build Coastguard Worker        pool->vk.query_type == VK_QUERY_TYPE_TIMESTAMP)
1688*61046927SAndroid Build Coastguard Worker       needed_flushes |= ANV_PIPE_CS_STALL_BIT;
1689*61046927SAndroid Build Coastguard Worker 
1690*61046927SAndroid Build Coastguard Worker    if (needed_flushes) {
1691*61046927SAndroid Build Coastguard Worker       anv_add_pending_pipe_bits(cmd_buffer,
1692*61046927SAndroid Build Coastguard Worker                                 needed_flushes,
1693*61046927SAndroid Build Coastguard Worker                                 "CopyQueryPoolResults");
1694*61046927SAndroid Build Coastguard Worker       genX(cmd_buffer_apply_pipe_flushes)(cmd_buffer);
1695*61046927SAndroid Build Coastguard Worker    }
1696*61046927SAndroid Build Coastguard Worker 
1697*61046927SAndroid Build Coastguard Worker    struct mi_builder b;
1698*61046927SAndroid Build Coastguard Worker    mi_builder_init(&b, cmd_buffer->device->info, &cmd_buffer->batch);
1699*61046927SAndroid Build Coastguard Worker    mi_builder_set_mocs(&b, anv_mocs_for_address(
1700*61046927SAndroid Build Coastguard Worker                           cmd_buffer->device,
1701*61046927SAndroid Build Coastguard Worker                           &(struct anv_address) { .bo = pool->bo }));
1702*61046927SAndroid Build Coastguard Worker 
1703*61046927SAndroid Build Coastguard Worker    for (uint32_t i = 0; i < query_count; i++) {
1704*61046927SAndroid Build Coastguard Worker       struct anv_address query_addr = anv_query_address(pool, first_query + i);
1705*61046927SAndroid Build Coastguard Worker       struct mi_value result;
1706*61046927SAndroid Build Coastguard Worker 
1707*61046927SAndroid Build Coastguard Worker       /* Wait for the availability write to land before we go read the data */
1708*61046927SAndroid Build Coastguard Worker       if (flags & VK_QUERY_RESULT_WAIT_BIT) {
1709*61046927SAndroid Build Coastguard Worker          anv_batch_emit(&cmd_buffer->batch, GENX(MI_SEMAPHORE_WAIT), sem) {
1710*61046927SAndroid Build Coastguard Worker             sem.WaitMode            = PollingMode;
1711*61046927SAndroid Build Coastguard Worker             sem.CompareOperation    = COMPARE_SAD_EQUAL_SDD;
1712*61046927SAndroid Build Coastguard Worker             sem.SemaphoreDataDword  = true;
1713*61046927SAndroid Build Coastguard Worker             sem.SemaphoreAddress    = query_addr;
1714*61046927SAndroid Build Coastguard Worker          }
1715*61046927SAndroid Build Coastguard Worker       }
1716*61046927SAndroid Build Coastguard Worker 
1717*61046927SAndroid Build Coastguard Worker       uint32_t idx = 0;
1718*61046927SAndroid Build Coastguard Worker       switch (pool->vk.query_type) {
1719*61046927SAndroid Build Coastguard Worker       case VK_QUERY_TYPE_OCCLUSION:
1720*61046927SAndroid Build Coastguard Worker       case VK_QUERY_TYPE_PRIMITIVES_GENERATED_EXT:
1721*61046927SAndroid Build Coastguard Worker #if GFX_VERx10 >= 125
1722*61046927SAndroid Build Coastguard Worker       case VK_QUERY_TYPE_MESH_PRIMITIVES_GENERATED_EXT:
1723*61046927SAndroid Build Coastguard Worker #endif
1724*61046927SAndroid Build Coastguard Worker          result = compute_query_result(&b, anv_address_add(query_addr, 8));
1725*61046927SAndroid Build Coastguard Worker          /* Like in the case of vkGetQueryPoolResults, if the query is
1726*61046927SAndroid Build Coastguard Worker           * unavailable and the VK_QUERY_RESULT_PARTIAL_BIT flag is set,
1727*61046927SAndroid Build Coastguard Worker           * conservatively write 0 as the query result. If the
1728*61046927SAndroid Build Coastguard Worker           * VK_QUERY_RESULT_PARTIAL_BIT isn't set, don't write any value.
1729*61046927SAndroid Build Coastguard Worker           */
1730*61046927SAndroid Build Coastguard Worker          gpu_write_query_result_cond(cmd_buffer, &b, query_addr, dest_addr,
1731*61046927SAndroid Build Coastguard Worker                                      1 /* available */, flags, idx, result);
1732*61046927SAndroid Build Coastguard Worker          if (flags & VK_QUERY_RESULT_PARTIAL_BIT) {
1733*61046927SAndroid Build Coastguard Worker             gpu_write_query_result_cond(cmd_buffer, &b, query_addr, dest_addr,
1734*61046927SAndroid Build Coastguard Worker                                         0 /* unavailable */, flags, idx, mi_imm(0));
1735*61046927SAndroid Build Coastguard Worker          }
1736*61046927SAndroid Build Coastguard Worker          idx++;
1737*61046927SAndroid Build Coastguard Worker          break;
1738*61046927SAndroid Build Coastguard Worker 
1739*61046927SAndroid Build Coastguard Worker       case VK_QUERY_TYPE_PIPELINE_STATISTICS: {
1740*61046927SAndroid Build Coastguard Worker          uint32_t statistics = pool->vk.pipeline_statistics;
1741*61046927SAndroid Build Coastguard Worker          while (statistics) {
1742*61046927SAndroid Build Coastguard Worker             UNUSED uint32_t stat = u_bit_scan(&statistics);
1743*61046927SAndroid Build Coastguard Worker             result = compute_query_result(&b, anv_address_add(query_addr,
1744*61046927SAndroid Build Coastguard Worker                                                               idx * 16 + 8));
1745*61046927SAndroid Build Coastguard Worker             gpu_write_query_result(&b, dest_addr, flags, idx++, result);
1746*61046927SAndroid Build Coastguard Worker          }
1747*61046927SAndroid Build Coastguard Worker          assert(idx == util_bitcount(pool->vk.pipeline_statistics));
1748*61046927SAndroid Build Coastguard Worker          break;
1749*61046927SAndroid Build Coastguard Worker       }
1750*61046927SAndroid Build Coastguard Worker 
1751*61046927SAndroid Build Coastguard Worker       case VK_QUERY_TYPE_TRANSFORM_FEEDBACK_STREAM_EXT:
1752*61046927SAndroid Build Coastguard Worker          result = compute_query_result(&b, anv_address_add(query_addr, 8));
1753*61046927SAndroid Build Coastguard Worker          gpu_write_query_result(&b, dest_addr, flags, idx++, result);
1754*61046927SAndroid Build Coastguard Worker          result = compute_query_result(&b, anv_address_add(query_addr, 24));
1755*61046927SAndroid Build Coastguard Worker          gpu_write_query_result(&b, dest_addr, flags, idx++, result);
1756*61046927SAndroid Build Coastguard Worker          break;
1757*61046927SAndroid Build Coastguard Worker 
1758*61046927SAndroid Build Coastguard Worker       case VK_QUERY_TYPE_TIMESTAMP:
1759*61046927SAndroid Build Coastguard Worker          result = mi_mem64(anv_address_add(query_addr, 8));
1760*61046927SAndroid Build Coastguard Worker          gpu_write_query_result(&b, dest_addr, flags, idx++, result);
1761*61046927SAndroid Build Coastguard Worker          break;
1762*61046927SAndroid Build Coastguard Worker 
1763*61046927SAndroid Build Coastguard Worker #if GFX_VERx10 >= 125
1764*61046927SAndroid Build Coastguard Worker       case VK_QUERY_TYPE_ACCELERATION_STRUCTURE_COMPACTED_SIZE_KHR:
1765*61046927SAndroid Build Coastguard Worker       case VK_QUERY_TYPE_ACCELERATION_STRUCTURE_SERIALIZATION_SIZE_KHR:
1766*61046927SAndroid Build Coastguard Worker       case VK_QUERY_TYPE_ACCELERATION_STRUCTURE_SIZE_KHR:
1767*61046927SAndroid Build Coastguard Worker          result = mi_mem64(anv_address_add(query_addr, 8));
1768*61046927SAndroid Build Coastguard Worker          gpu_write_query_result(&b, dest_addr, flags, idx++, result);
1769*61046927SAndroid Build Coastguard Worker          break;
1770*61046927SAndroid Build Coastguard Worker 
1771*61046927SAndroid Build Coastguard Worker       case VK_QUERY_TYPE_ACCELERATION_STRUCTURE_SERIALIZATION_BOTTOM_LEVEL_POINTERS_KHR:
1772*61046927SAndroid Build Coastguard Worker          result = mi_mem64(anv_address_add(query_addr, 16));
1773*61046927SAndroid Build Coastguard Worker          gpu_write_query_result(&b, dest_addr, flags, idx++, result);
1774*61046927SAndroid Build Coastguard Worker          break;
1775*61046927SAndroid Build Coastguard Worker #endif
1776*61046927SAndroid Build Coastguard Worker 
1777*61046927SAndroid Build Coastguard Worker       case VK_QUERY_TYPE_PERFORMANCE_QUERY_KHR:
1778*61046927SAndroid Build Coastguard Worker          unreachable("Copy KHR performance query results not implemented");
1779*61046927SAndroid Build Coastguard Worker          break;
1780*61046927SAndroid Build Coastguard Worker 
1781*61046927SAndroid Build Coastguard Worker       default:
1782*61046927SAndroid Build Coastguard Worker          unreachable("unhandled query type");
1783*61046927SAndroid Build Coastguard Worker       }
1784*61046927SAndroid Build Coastguard Worker 
1785*61046927SAndroid Build Coastguard Worker       if (flags & VK_QUERY_RESULT_WITH_AVAILABILITY_BIT) {
1786*61046927SAndroid Build Coastguard Worker          gpu_write_query_result(&b, dest_addr, flags, idx,
1787*61046927SAndroid Build Coastguard Worker                                 mi_mem64(query_addr));
1788*61046927SAndroid Build Coastguard Worker       }
1789*61046927SAndroid Build Coastguard Worker 
1790*61046927SAndroid Build Coastguard Worker       dest_addr = anv_address_add(dest_addr, dest_stride);
1791*61046927SAndroid Build Coastguard Worker    }
1792*61046927SAndroid Build Coastguard Worker 
1793*61046927SAndroid Build Coastguard Worker    trace_intel_end_query_copy_cs(&cmd_buffer->trace, query_count);
1794*61046927SAndroid Build Coastguard Worker }
1795*61046927SAndroid Build Coastguard Worker 
1796*61046927SAndroid Build Coastguard Worker static void
copy_query_results_with_shader(struct anv_cmd_buffer * cmd_buffer,struct anv_query_pool * pool,struct anv_address dest_addr,uint64_t dest_stride,uint32_t first_query,uint32_t query_count,VkQueryResultFlags flags)1797*61046927SAndroid Build Coastguard Worker copy_query_results_with_shader(struct anv_cmd_buffer *cmd_buffer,
1798*61046927SAndroid Build Coastguard Worker                                struct anv_query_pool *pool,
1799*61046927SAndroid Build Coastguard Worker                                struct anv_address dest_addr,
1800*61046927SAndroid Build Coastguard Worker                                uint64_t dest_stride,
1801*61046927SAndroid Build Coastguard Worker                                uint32_t first_query,
1802*61046927SAndroid Build Coastguard Worker                                uint32_t query_count,
1803*61046927SAndroid Build Coastguard Worker                                VkQueryResultFlags flags)
1804*61046927SAndroid Build Coastguard Worker {
1805*61046927SAndroid Build Coastguard Worker    struct anv_device *device = cmd_buffer->device;
1806*61046927SAndroid Build Coastguard Worker    enum anv_pipe_bits needed_flushes = 0;
1807*61046927SAndroid Build Coastguard Worker 
1808*61046927SAndroid Build Coastguard Worker    trace_intel_begin_query_copy_shader(&cmd_buffer->trace);
1809*61046927SAndroid Build Coastguard Worker 
1810*61046927SAndroid Build Coastguard Worker    /* Ensure all query MI writes are visible to the shader */
1811*61046927SAndroid Build Coastguard Worker    struct mi_builder b;
1812*61046927SAndroid Build Coastguard Worker    mi_builder_init(&b, cmd_buffer->device->info, &cmd_buffer->batch);
1813*61046927SAndroid Build Coastguard Worker    mi_ensure_write_fence(&b);
1814*61046927SAndroid Build Coastguard Worker 
1815*61046927SAndroid Build Coastguard Worker    /* If this is the first command in the batch buffer, make sure we have
1816*61046927SAndroid Build Coastguard Worker     * consistent pipeline mode.
1817*61046927SAndroid Build Coastguard Worker     */
1818*61046927SAndroid Build Coastguard Worker    if (cmd_buffer->state.current_pipeline == UINT32_MAX)
1819*61046927SAndroid Build Coastguard Worker       genX(flush_pipeline_select_3d)(cmd_buffer);
1820*61046927SAndroid Build Coastguard Worker 
1821*61046927SAndroid Build Coastguard Worker    if ((cmd_buffer->state.queries.buffer_write_bits |
1822*61046927SAndroid Build Coastguard Worker         cmd_buffer->state.queries.clear_bits) & ANV_QUERY_WRITES_RT_FLUSH)
1823*61046927SAndroid Build Coastguard Worker       needed_flushes |= ANV_PIPE_RENDER_TARGET_CACHE_FLUSH_BIT;
1824*61046927SAndroid Build Coastguard Worker 
1825*61046927SAndroid Build Coastguard Worker    if ((cmd_buffer->state.queries.buffer_write_bits |
1826*61046927SAndroid Build Coastguard Worker         cmd_buffer->state.queries.clear_bits) & ANV_QUERY_WRITES_DATA_FLUSH) {
1827*61046927SAndroid Build Coastguard Worker       needed_flushes |= (ANV_PIPE_HDC_PIPELINE_FLUSH_BIT |
1828*61046927SAndroid Build Coastguard Worker                          ANV_PIPE_UNTYPED_DATAPORT_CACHE_FLUSH_BIT);
1829*61046927SAndroid Build Coastguard Worker    }
1830*61046927SAndroid Build Coastguard Worker 
1831*61046927SAndroid Build Coastguard Worker    /* Flushes for the queries to complete */
1832*61046927SAndroid Build Coastguard Worker    if (flags & VK_QUERY_RESULT_WAIT_BIT) {
1833*61046927SAndroid Build Coastguard Worker       /* Some queries are done with shaders, so we need to have them flush
1834*61046927SAndroid Build Coastguard Worker        * high level caches writes. The L3 should be shared across the GPU.
1835*61046927SAndroid Build Coastguard Worker        */
1836*61046927SAndroid Build Coastguard Worker       if (pool->vk.query_type == VK_QUERY_TYPE_ACCELERATION_STRUCTURE_COMPACTED_SIZE_KHR ||
1837*61046927SAndroid Build Coastguard Worker           pool->vk.query_type == VK_QUERY_TYPE_ACCELERATION_STRUCTURE_SIZE_KHR ||
1838*61046927SAndroid Build Coastguard Worker           pool->vk.query_type == VK_QUERY_TYPE_ACCELERATION_STRUCTURE_SERIALIZATION_SIZE_KHR ||
1839*61046927SAndroid Build Coastguard Worker           pool->vk.query_type == VK_QUERY_TYPE_ACCELERATION_STRUCTURE_SERIALIZATION_BOTTOM_LEVEL_POINTERS_KHR) {
1840*61046927SAndroid Build Coastguard Worker          needed_flushes |= ANV_PIPE_UNTYPED_DATAPORT_CACHE_FLUSH_BIT;
1841*61046927SAndroid Build Coastguard Worker       }
1842*61046927SAndroid Build Coastguard Worker       /* And we need to stall for previous CS writes to land or the flushes to
1843*61046927SAndroid Build Coastguard Worker        * complete.
1844*61046927SAndroid Build Coastguard Worker        */
1845*61046927SAndroid Build Coastguard Worker       needed_flushes |= ANV_PIPE_CS_STALL_BIT;
1846*61046927SAndroid Build Coastguard Worker    }
1847*61046927SAndroid Build Coastguard Worker 
1848*61046927SAndroid Build Coastguard Worker    /* Occlusion & timestamp queries are written using a PIPE_CONTROL and
1849*61046927SAndroid Build Coastguard Worker     * because we're about to copy values from MI commands, we need to stall
1850*61046927SAndroid Build Coastguard Worker     * the command streamer to make sure the PIPE_CONTROL values have
1851*61046927SAndroid Build Coastguard Worker     * landed, otherwise we could see inconsistent values & availability.
1852*61046927SAndroid Build Coastguard Worker     *
1853*61046927SAndroid Build Coastguard Worker     *  From the vulkan spec:
1854*61046927SAndroid Build Coastguard Worker     *
1855*61046927SAndroid Build Coastguard Worker     *     "vkCmdCopyQueryPoolResults is guaranteed to see the effect of
1856*61046927SAndroid Build Coastguard Worker     *     previous uses of vkCmdResetQueryPool in the same queue, without any
1857*61046927SAndroid Build Coastguard Worker     *     additional synchronization."
1858*61046927SAndroid Build Coastguard Worker     */
1859*61046927SAndroid Build Coastguard Worker    if (pool->vk.query_type == VK_QUERY_TYPE_OCCLUSION ||
1860*61046927SAndroid Build Coastguard Worker        pool->vk.query_type == VK_QUERY_TYPE_TIMESTAMP)
1861*61046927SAndroid Build Coastguard Worker       needed_flushes |= ANV_PIPE_CS_STALL_BIT;
1862*61046927SAndroid Build Coastguard Worker 
1863*61046927SAndroid Build Coastguard Worker    if (needed_flushes) {
1864*61046927SAndroid Build Coastguard Worker       anv_add_pending_pipe_bits(cmd_buffer,
1865*61046927SAndroid Build Coastguard Worker                                 needed_flushes | ANV_PIPE_END_OF_PIPE_SYNC_BIT,
1866*61046927SAndroid Build Coastguard Worker                                 "CopyQueryPoolResults");
1867*61046927SAndroid Build Coastguard Worker       genX(cmd_buffer_apply_pipe_flushes)(cmd_buffer);
1868*61046927SAndroid Build Coastguard Worker    }
1869*61046927SAndroid Build Coastguard Worker 
1870*61046927SAndroid Build Coastguard Worker    struct anv_shader_bin *copy_kernel;
1871*61046927SAndroid Build Coastguard Worker    VkResult ret =
1872*61046927SAndroid Build Coastguard Worker       anv_device_get_internal_shader(
1873*61046927SAndroid Build Coastguard Worker          cmd_buffer->device,
1874*61046927SAndroid Build Coastguard Worker          cmd_buffer->state.current_pipeline == GPGPU ?
1875*61046927SAndroid Build Coastguard Worker          ANV_INTERNAL_KERNEL_COPY_QUERY_RESULTS_COMPUTE :
1876*61046927SAndroid Build Coastguard Worker          ANV_INTERNAL_KERNEL_COPY_QUERY_RESULTS_FRAGMENT,
1877*61046927SAndroid Build Coastguard Worker          &copy_kernel);
1878*61046927SAndroid Build Coastguard Worker    if (ret != VK_SUCCESS) {
1879*61046927SAndroid Build Coastguard Worker       anv_batch_set_error(&cmd_buffer->batch, ret);
1880*61046927SAndroid Build Coastguard Worker       return;
1881*61046927SAndroid Build Coastguard Worker    }
1882*61046927SAndroid Build Coastguard Worker 
1883*61046927SAndroid Build Coastguard Worker    struct anv_simple_shader state = {
1884*61046927SAndroid Build Coastguard Worker       .device               = cmd_buffer->device,
1885*61046927SAndroid Build Coastguard Worker       .cmd_buffer           = cmd_buffer,
1886*61046927SAndroid Build Coastguard Worker       .dynamic_state_stream = &cmd_buffer->dynamic_state_stream,
1887*61046927SAndroid Build Coastguard Worker       .general_state_stream = &cmd_buffer->general_state_stream,
1888*61046927SAndroid Build Coastguard Worker       .batch                = &cmd_buffer->batch,
1889*61046927SAndroid Build Coastguard Worker       .kernel               = copy_kernel,
1890*61046927SAndroid Build Coastguard Worker       .l3_config            = device->internal_kernels_l3_config,
1891*61046927SAndroid Build Coastguard Worker       .urb_cfg              = &cmd_buffer->state.gfx.urb_cfg,
1892*61046927SAndroid Build Coastguard Worker    };
1893*61046927SAndroid Build Coastguard Worker    genX(emit_simple_shader_init)(&state);
1894*61046927SAndroid Build Coastguard Worker 
1895*61046927SAndroid Build Coastguard Worker    struct anv_state push_data_state =
1896*61046927SAndroid Build Coastguard Worker       genX(simple_shader_alloc_push)(&state,
1897*61046927SAndroid Build Coastguard Worker                                      sizeof(struct anv_query_copy_params));
1898*61046927SAndroid Build Coastguard Worker    if (push_data_state.map == NULL)
1899*61046927SAndroid Build Coastguard Worker       return;
1900*61046927SAndroid Build Coastguard Worker 
1901*61046927SAndroid Build Coastguard Worker    struct anv_query_copy_params *params = push_data_state.map;
1902*61046927SAndroid Build Coastguard Worker 
1903*61046927SAndroid Build Coastguard Worker    uint32_t copy_flags =
1904*61046927SAndroid Build Coastguard Worker       ((flags & VK_QUERY_RESULT_64_BIT) ? ANV_COPY_QUERY_FLAG_RESULT64 : 0) |
1905*61046927SAndroid Build Coastguard Worker       ((flags & VK_QUERY_RESULT_WITH_AVAILABILITY_BIT) ? ANV_COPY_QUERY_FLAG_AVAILABLE : 0);
1906*61046927SAndroid Build Coastguard Worker 
1907*61046927SAndroid Build Coastguard Worker    uint32_t num_items = 1;
1908*61046927SAndroid Build Coastguard Worker    uint32_t data_offset = 8 /* behind availability */;
1909*61046927SAndroid Build Coastguard Worker    switch (pool->vk.query_type) {
1910*61046927SAndroid Build Coastguard Worker    case VK_QUERY_TYPE_OCCLUSION:
1911*61046927SAndroid Build Coastguard Worker       copy_flags |= ANV_COPY_QUERY_FLAG_DELTA;
1912*61046927SAndroid Build Coastguard Worker       /* Occlusion and timestamps queries are the only ones where we would have partial data
1913*61046927SAndroid Build Coastguard Worker        * because they are capture with a PIPE_CONTROL post sync operation. The
1914*61046927SAndroid Build Coastguard Worker        * other ones are captured with MI_STORE_REGISTER_DATA so we're always
1915*61046927SAndroid Build Coastguard Worker        * available by the time we reach the copy command.
1916*61046927SAndroid Build Coastguard Worker        */
1917*61046927SAndroid Build Coastguard Worker       copy_flags |= (flags & VK_QUERY_RESULT_PARTIAL_BIT) ? ANV_COPY_QUERY_FLAG_PARTIAL : 0;
1918*61046927SAndroid Build Coastguard Worker       break;
1919*61046927SAndroid Build Coastguard Worker 
1920*61046927SAndroid Build Coastguard Worker    case VK_QUERY_TYPE_TIMESTAMP:
1921*61046927SAndroid Build Coastguard Worker       copy_flags |= (flags & VK_QUERY_RESULT_PARTIAL_BIT) ? ANV_COPY_QUERY_FLAG_PARTIAL : 0;
1922*61046927SAndroid Build Coastguard Worker       break;
1923*61046927SAndroid Build Coastguard Worker 
1924*61046927SAndroid Build Coastguard Worker    case VK_QUERY_TYPE_PRIMITIVES_GENERATED_EXT:
1925*61046927SAndroid Build Coastguard Worker #if GFX_VERx10 >= 125
1926*61046927SAndroid Build Coastguard Worker    case VK_QUERY_TYPE_MESH_PRIMITIVES_GENERATED_EXT:
1927*61046927SAndroid Build Coastguard Worker #endif
1928*61046927SAndroid Build Coastguard Worker       copy_flags |= ANV_COPY_QUERY_FLAG_DELTA;
1929*61046927SAndroid Build Coastguard Worker       break;
1930*61046927SAndroid Build Coastguard Worker 
1931*61046927SAndroid Build Coastguard Worker    case VK_QUERY_TYPE_PIPELINE_STATISTICS:
1932*61046927SAndroid Build Coastguard Worker       num_items = util_bitcount(pool->vk.pipeline_statistics);
1933*61046927SAndroid Build Coastguard Worker       copy_flags |= ANV_COPY_QUERY_FLAG_DELTA;
1934*61046927SAndroid Build Coastguard Worker       break;
1935*61046927SAndroid Build Coastguard Worker 
1936*61046927SAndroid Build Coastguard Worker    case VK_QUERY_TYPE_TRANSFORM_FEEDBACK_STREAM_EXT:
1937*61046927SAndroid Build Coastguard Worker       num_items = 2;
1938*61046927SAndroid Build Coastguard Worker       copy_flags |= ANV_COPY_QUERY_FLAG_DELTA;
1939*61046927SAndroid Build Coastguard Worker       break;
1940*61046927SAndroid Build Coastguard Worker 
1941*61046927SAndroid Build Coastguard Worker    case VK_QUERY_TYPE_ACCELERATION_STRUCTURE_COMPACTED_SIZE_KHR:
1942*61046927SAndroid Build Coastguard Worker    case VK_QUERY_TYPE_ACCELERATION_STRUCTURE_SERIALIZATION_SIZE_KHR:
1943*61046927SAndroid Build Coastguard Worker    case VK_QUERY_TYPE_ACCELERATION_STRUCTURE_SIZE_KHR:
1944*61046927SAndroid Build Coastguard Worker       break;
1945*61046927SAndroid Build Coastguard Worker 
1946*61046927SAndroid Build Coastguard Worker    case VK_QUERY_TYPE_ACCELERATION_STRUCTURE_SERIALIZATION_BOTTOM_LEVEL_POINTERS_KHR:
1947*61046927SAndroid Build Coastguard Worker       data_offset += 8;
1948*61046927SAndroid Build Coastguard Worker       break;
1949*61046927SAndroid Build Coastguard Worker 
1950*61046927SAndroid Build Coastguard Worker    default:
1951*61046927SAndroid Build Coastguard Worker       unreachable("unhandled query type");
1952*61046927SAndroid Build Coastguard Worker    }
1953*61046927SAndroid Build Coastguard Worker 
1954*61046927SAndroid Build Coastguard Worker    *params = (struct anv_query_copy_params) {
1955*61046927SAndroid Build Coastguard Worker       .flags              = copy_flags,
1956*61046927SAndroid Build Coastguard Worker       .num_queries        = query_count,
1957*61046927SAndroid Build Coastguard Worker       .num_items          = num_items,
1958*61046927SAndroid Build Coastguard Worker       .query_base         = first_query,
1959*61046927SAndroid Build Coastguard Worker       .query_stride       = pool->stride,
1960*61046927SAndroid Build Coastguard Worker       .query_data_offset  = data_offset,
1961*61046927SAndroid Build Coastguard Worker       .destination_stride = dest_stride,
1962*61046927SAndroid Build Coastguard Worker       .query_data_addr    = anv_address_physical(
1963*61046927SAndroid Build Coastguard Worker          (struct anv_address) {
1964*61046927SAndroid Build Coastguard Worker             .bo = pool->bo,
1965*61046927SAndroid Build Coastguard Worker          }),
1966*61046927SAndroid Build Coastguard Worker       .destination_addr   = anv_address_physical(dest_addr),
1967*61046927SAndroid Build Coastguard Worker    };
1968*61046927SAndroid Build Coastguard Worker 
1969*61046927SAndroid Build Coastguard Worker    genX(emit_simple_shader_dispatch)(&state, query_count, push_data_state);
1970*61046927SAndroid Build Coastguard Worker 
1971*61046927SAndroid Build Coastguard Worker    /* The query copy result shader is writing using the dataport, flush
1972*61046927SAndroid Build Coastguard Worker     * HDC/Data cache depending on the generation. Also stall at pixel
1973*61046927SAndroid Build Coastguard Worker     * scoreboard in case we're doing the copy with a fragment shader.
1974*61046927SAndroid Build Coastguard Worker     */
1975*61046927SAndroid Build Coastguard Worker    cmd_buffer->state.queries.buffer_write_bits |= ANV_QUERY_WRITES_DATA_FLUSH;
1976*61046927SAndroid Build Coastguard Worker 
1977*61046927SAndroid Build Coastguard Worker    trace_intel_end_query_copy_shader(&cmd_buffer->trace, query_count);
1978*61046927SAndroid Build Coastguard Worker }
1979*61046927SAndroid Build Coastguard Worker 
genX(CmdCopyQueryPoolResults)1980*61046927SAndroid Build Coastguard Worker void genX(CmdCopyQueryPoolResults)(
1981*61046927SAndroid Build Coastguard Worker     VkCommandBuffer                             commandBuffer,
1982*61046927SAndroid Build Coastguard Worker     VkQueryPool                                 queryPool,
1983*61046927SAndroid Build Coastguard Worker     uint32_t                                    firstQuery,
1984*61046927SAndroid Build Coastguard Worker     uint32_t                                    queryCount,
1985*61046927SAndroid Build Coastguard Worker     VkBuffer                                    destBuffer,
1986*61046927SAndroid Build Coastguard Worker     VkDeviceSize                                destOffset,
1987*61046927SAndroid Build Coastguard Worker     VkDeviceSize                                destStride,
1988*61046927SAndroid Build Coastguard Worker     VkQueryResultFlags                          flags)
1989*61046927SAndroid Build Coastguard Worker {
1990*61046927SAndroid Build Coastguard Worker    ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer);
1991*61046927SAndroid Build Coastguard Worker    ANV_FROM_HANDLE(anv_query_pool, pool, queryPool);
1992*61046927SAndroid Build Coastguard Worker    ANV_FROM_HANDLE(anv_buffer, buffer, destBuffer);
1993*61046927SAndroid Build Coastguard Worker    struct anv_device *device = cmd_buffer->device;
1994*61046927SAndroid Build Coastguard Worker    struct anv_physical_device *pdevice = device->physical;
1995*61046927SAndroid Build Coastguard Worker 
1996*61046927SAndroid Build Coastguard Worker    if (queryCount > pdevice->instance->query_copy_with_shader_threshold) {
1997*61046927SAndroid Build Coastguard Worker       copy_query_results_with_shader(cmd_buffer, pool,
1998*61046927SAndroid Build Coastguard Worker                                      anv_address_add(buffer->address,
1999*61046927SAndroid Build Coastguard Worker                                                      destOffset),
2000*61046927SAndroid Build Coastguard Worker                                      destStride,
2001*61046927SAndroid Build Coastguard Worker                                      firstQuery,
2002*61046927SAndroid Build Coastguard Worker                                      queryCount,
2003*61046927SAndroid Build Coastguard Worker                                      flags);
2004*61046927SAndroid Build Coastguard Worker    } else {
2005*61046927SAndroid Build Coastguard Worker       copy_query_results_with_cs(cmd_buffer, pool,
2006*61046927SAndroid Build Coastguard Worker                                  anv_address_add(buffer->address,
2007*61046927SAndroid Build Coastguard Worker                                                  destOffset),
2008*61046927SAndroid Build Coastguard Worker                                  destStride,
2009*61046927SAndroid Build Coastguard Worker                                  firstQuery,
2010*61046927SAndroid Build Coastguard Worker                                  queryCount,
2011*61046927SAndroid Build Coastguard Worker                                  flags);
2012*61046927SAndroid Build Coastguard Worker    }
2013*61046927SAndroid Build Coastguard Worker }
2014*61046927SAndroid Build Coastguard Worker 
2015*61046927SAndroid Build Coastguard Worker #if GFX_VERx10 >= 125 && ANV_SUPPORT_RT
2016*61046927SAndroid Build Coastguard Worker 
2017*61046927SAndroid Build Coastguard Worker #include "grl/include/GRLRTASCommon.h"
2018*61046927SAndroid Build Coastguard Worker #include "grl/grl_metakernel_postbuild_info.h"
2019*61046927SAndroid Build Coastguard Worker 
2020*61046927SAndroid Build Coastguard Worker void
genX(CmdWriteAccelerationStructuresPropertiesKHR)2021*61046927SAndroid Build Coastguard Worker genX(CmdWriteAccelerationStructuresPropertiesKHR)(
2022*61046927SAndroid Build Coastguard Worker     VkCommandBuffer                             commandBuffer,
2023*61046927SAndroid Build Coastguard Worker     uint32_t                                    accelerationStructureCount,
2024*61046927SAndroid Build Coastguard Worker     const VkAccelerationStructureKHR*           pAccelerationStructures,
2025*61046927SAndroid Build Coastguard Worker     VkQueryType                                 queryType,
2026*61046927SAndroid Build Coastguard Worker     VkQueryPool                                 queryPool,
2027*61046927SAndroid Build Coastguard Worker     uint32_t                                    firstQuery)
2028*61046927SAndroid Build Coastguard Worker {
2029*61046927SAndroid Build Coastguard Worker    ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer);
2030*61046927SAndroid Build Coastguard Worker    ANV_FROM_HANDLE(anv_query_pool, pool, queryPool);
2031*61046927SAndroid Build Coastguard Worker 
2032*61046927SAndroid Build Coastguard Worker    assert(queryType == VK_QUERY_TYPE_ACCELERATION_STRUCTURE_COMPACTED_SIZE_KHR ||
2033*61046927SAndroid Build Coastguard Worker           queryType == VK_QUERY_TYPE_ACCELERATION_STRUCTURE_SERIALIZATION_SIZE_KHR ||
2034*61046927SAndroid Build Coastguard Worker           queryType == VK_QUERY_TYPE_ACCELERATION_STRUCTURE_SIZE_KHR ||
2035*61046927SAndroid Build Coastguard Worker           queryType == VK_QUERY_TYPE_ACCELERATION_STRUCTURE_SERIALIZATION_BOTTOM_LEVEL_POINTERS_KHR);
2036*61046927SAndroid Build Coastguard Worker 
2037*61046927SAndroid Build Coastguard Worker    emit_query_clear_flush(cmd_buffer, pool,
2038*61046927SAndroid Build Coastguard Worker                           "CmdWriteAccelerationStructuresPropertiesKHR flush query clears");
2039*61046927SAndroid Build Coastguard Worker 
2040*61046927SAndroid Build Coastguard Worker    struct mi_builder b;
2041*61046927SAndroid Build Coastguard Worker    mi_builder_init(&b, cmd_buffer->device->info, &cmd_buffer->batch);
2042*61046927SAndroid Build Coastguard Worker 
2043*61046927SAndroid Build Coastguard Worker    for (uint32_t i = 0; i < accelerationStructureCount; i++) {
2044*61046927SAndroid Build Coastguard Worker       ANV_FROM_HANDLE(vk_acceleration_structure, accel, pAccelerationStructures[i]);
2045*61046927SAndroid Build Coastguard Worker       struct anv_address query_addr =
2046*61046927SAndroid Build Coastguard Worker          anv_address_add(anv_query_address(pool, firstQuery + i), 8);
2047*61046927SAndroid Build Coastguard Worker 
2048*61046927SAndroid Build Coastguard Worker       switch (queryType) {
2049*61046927SAndroid Build Coastguard Worker       case VK_QUERY_TYPE_ACCELERATION_STRUCTURE_COMPACTED_SIZE_KHR:
2050*61046927SAndroid Build Coastguard Worker          genX(grl_postbuild_info_compacted_size)(cmd_buffer,
2051*61046927SAndroid Build Coastguard Worker                                                  vk_acceleration_structure_get_va(accel),
2052*61046927SAndroid Build Coastguard Worker                                                  anv_address_physical(query_addr));
2053*61046927SAndroid Build Coastguard Worker          break;
2054*61046927SAndroid Build Coastguard Worker 
2055*61046927SAndroid Build Coastguard Worker       case VK_QUERY_TYPE_ACCELERATION_STRUCTURE_SIZE_KHR:
2056*61046927SAndroid Build Coastguard Worker          genX(grl_postbuild_info_current_size)(cmd_buffer,
2057*61046927SAndroid Build Coastguard Worker                                                vk_acceleration_structure_get_va(accel),
2058*61046927SAndroid Build Coastguard Worker                                                anv_address_physical(query_addr));
2059*61046927SAndroid Build Coastguard Worker          break;
2060*61046927SAndroid Build Coastguard Worker 
2061*61046927SAndroid Build Coastguard Worker       case VK_QUERY_TYPE_ACCELERATION_STRUCTURE_SERIALIZATION_SIZE_KHR:
2062*61046927SAndroid Build Coastguard Worker       case VK_QUERY_TYPE_ACCELERATION_STRUCTURE_SERIALIZATION_BOTTOM_LEVEL_POINTERS_KHR:
2063*61046927SAndroid Build Coastguard Worker          genX(grl_postbuild_info_serialized_size)(cmd_buffer,
2064*61046927SAndroid Build Coastguard Worker                                                   vk_acceleration_structure_get_va(accel),
2065*61046927SAndroid Build Coastguard Worker                                                   anv_address_physical(query_addr));
2066*61046927SAndroid Build Coastguard Worker          break;
2067*61046927SAndroid Build Coastguard Worker 
2068*61046927SAndroid Build Coastguard Worker       default:
2069*61046927SAndroid Build Coastguard Worker          unreachable("unhandled query type");
2070*61046927SAndroid Build Coastguard Worker       }
2071*61046927SAndroid Build Coastguard Worker    }
2072*61046927SAndroid Build Coastguard Worker 
2073*61046927SAndroid Build Coastguard Worker    /* TODO: Figure out why MTL needs ANV_PIPE_DATA_CACHE_FLUSH_BIT in order
2074*61046927SAndroid Build Coastguard Worker     * to not lose the availability bit.
2075*61046927SAndroid Build Coastguard Worker     */
2076*61046927SAndroid Build Coastguard Worker    anv_add_pending_pipe_bits(cmd_buffer,
2077*61046927SAndroid Build Coastguard Worker                              ANV_PIPE_END_OF_PIPE_SYNC_BIT |
2078*61046927SAndroid Build Coastguard Worker                              ANV_PIPE_DATA_CACHE_FLUSH_BIT,
2079*61046927SAndroid Build Coastguard Worker                              "after write acceleration struct props");
2080*61046927SAndroid Build Coastguard Worker    genX(cmd_buffer_apply_pipe_flushes)(cmd_buffer);
2081*61046927SAndroid Build Coastguard Worker 
2082*61046927SAndroid Build Coastguard Worker    for (uint32_t i = 0; i < accelerationStructureCount; i++)
2083*61046927SAndroid Build Coastguard Worker       emit_query_mi_availability(&b, anv_query_address(pool, firstQuery + i), true);
2084*61046927SAndroid Build Coastguard Worker }
2085*61046927SAndroid Build Coastguard Worker #endif
2086