xref: /aosp_15_r20/external/mesa3d/src/gallium/drivers/d3d12/d3d12_query.cpp (revision 6104692788411f58d303aa86923a9ff6ecaded22)
1 /*
2  * Copyright © Microsoft Corporation
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21  * IN THE SOFTWARE.
22  */
23 
24 #include "d3d12_query.h"
25 #include "d3d12_compiler.h"
26 #include "d3d12_compute_transforms.h"
27 #include "d3d12_context.h"
28 #include "d3d12_resource.h"
29 #include "d3d12_screen.h"
30 #include "d3d12_fence.h"
31 
32 #include "util/u_dump.h"
33 #include "util/u_inlines.h"
34 #include "util/u_memory.h"
35 #include "util/u_threaded_context.h"
36 
37 #include <dxguids/dxguids.h>
38 
39 static unsigned
num_sub_queries(unsigned query_type,unsigned index)40 num_sub_queries(unsigned query_type, unsigned index)
41 {
42    switch (query_type) {
43    case PIPE_QUERY_PRIMITIVES_GENERATED:
44       return index == 0 ? 3 : 1;
45    case PIPE_QUERY_SO_OVERFLOW_ANY_PREDICATE:
46       return 4;
47    default:
48       return 1;
49    }
50 }
51 
52 static D3D12_QUERY_HEAP_TYPE
d3d12_query_heap_type(unsigned query_type,unsigned sub_query)53 d3d12_query_heap_type(unsigned query_type, unsigned sub_query)
54 {
55    switch (query_type) {
56    case PIPE_QUERY_OCCLUSION_COUNTER:
57    case PIPE_QUERY_OCCLUSION_PREDICATE:
58    case PIPE_QUERY_OCCLUSION_PREDICATE_CONSERVATIVE:
59       return D3D12_QUERY_HEAP_TYPE_OCCLUSION;
60    case PIPE_QUERY_PIPELINE_STATISTICS:
61       return D3D12_QUERY_HEAP_TYPE_PIPELINE_STATISTICS;
62    case PIPE_QUERY_PRIMITIVES_GENERATED:
63       return sub_query == 0 ?
64          D3D12_QUERY_HEAP_TYPE_SO_STATISTICS :
65          D3D12_QUERY_HEAP_TYPE_PIPELINE_STATISTICS;
66    case PIPE_QUERY_PRIMITIVES_EMITTED:
67    case PIPE_QUERY_SO_STATISTICS:
68    case PIPE_QUERY_SO_OVERFLOW_PREDICATE:
69    case PIPE_QUERY_SO_OVERFLOW_ANY_PREDICATE:
70       return D3D12_QUERY_HEAP_TYPE_SO_STATISTICS;
71    case PIPE_QUERY_TIMESTAMP:
72    case PIPE_QUERY_TIME_ELAPSED:
73       return D3D12_QUERY_HEAP_TYPE_TIMESTAMP;
74 
75    default:
76       debug_printf("unknown query: %s\n",
77                    util_str_query_type(query_type, true));
78       unreachable("d3d12: unknown query type");
79    }
80 }
81 
82 static D3D12_QUERY_TYPE
d3d12_query_type(unsigned query_type,unsigned sub_query,unsigned index)83 d3d12_query_type(unsigned query_type, unsigned sub_query, unsigned index)
84 {
85    switch (query_type) {
86    case PIPE_QUERY_OCCLUSION_COUNTER:
87       return D3D12_QUERY_TYPE_OCCLUSION;
88    case PIPE_QUERY_OCCLUSION_PREDICATE:
89    case PIPE_QUERY_OCCLUSION_PREDICATE_CONSERVATIVE:
90       return D3D12_QUERY_TYPE_BINARY_OCCLUSION;
91    case PIPE_QUERY_PIPELINE_STATISTICS:
92       return D3D12_QUERY_TYPE_PIPELINE_STATISTICS;
93    case PIPE_QUERY_PRIMITIVES_GENERATED:
94       if (sub_query > 0)
95          return D3D12_QUERY_TYPE_PIPELINE_STATISTICS;
96       FALLTHROUGH;
97    case PIPE_QUERY_PRIMITIVES_EMITTED:
98    case PIPE_QUERY_SO_STATISTICS:
99    case PIPE_QUERY_SO_OVERFLOW_PREDICATE:
100       return (D3D12_QUERY_TYPE)(D3D12_QUERY_TYPE_SO_STATISTICS_STREAM0 + index);
101    case PIPE_QUERY_SO_OVERFLOW_ANY_PREDICATE:
102       return (D3D12_QUERY_TYPE)(D3D12_QUERY_TYPE_SO_STATISTICS_STREAM0 + sub_query);
103    case PIPE_QUERY_TIMESTAMP:
104    case PIPE_QUERY_TIME_ELAPSED:
105       return D3D12_QUERY_TYPE_TIMESTAMP;
106    default:
107       debug_printf("unknown query: %s\n",
108                    util_str_query_type(query_type, true));
109       unreachable("d3d12: unknown query type");
110    }
111 }
112 
113 static struct pipe_query *
d3d12_create_query(struct pipe_context * pctx,unsigned query_type,unsigned index)114 d3d12_create_query(struct pipe_context *pctx,
115                    unsigned query_type, unsigned index)
116 {
117    struct d3d12_context *ctx = d3d12_context(pctx);
118    struct d3d12_screen *screen = d3d12_screen(pctx->screen);
119    struct d3d12_query *query = CALLOC_STRUCT(d3d12_query);
120    D3D12_QUERY_HEAP_DESC desc = {};
121 
122    if (!query)
123       return NULL;
124 
125    pipe_reference_init(&query->reference, 1);
126    query->type = (pipe_query_type)query_type;
127    query->index = index;
128    for (unsigned i = 0; i < num_sub_queries(query_type, index); ++i) {
129       assert(i < MAX_SUBQUERIES);
130       query->subqueries[i].d3d12qtype = d3d12_query_type(query_type, i, index);
131       query->subqueries[i].num_queries = 16;
132 
133       /* With timer queries we want a few more queries, especially since we need two slots
134        * per query for TIME_ELAPSED queries
135        * For TIMESTAMP, we don't need more than one slot, since there's nothing to accumulate */
136       if (unlikely(query_type == PIPE_QUERY_TIME_ELAPSED))
137          query->subqueries[i].num_queries = 64;
138       else if (query_type == PIPE_QUERY_TIMESTAMP)
139          query->subqueries[i].num_queries = 1;
140 
141       query->subqueries[i].curr_query = 0;
142       desc.Count = query->subqueries[i].num_queries;
143       desc.Type = d3d12_query_heap_type(query_type, i);
144 
145       switch (desc.Type) {
146       case D3D12_QUERY_HEAP_TYPE_PIPELINE_STATISTICS:
147          query->subqueries[i].query_size = sizeof(D3D12_QUERY_DATA_PIPELINE_STATISTICS);
148          break;
149       case D3D12_QUERY_HEAP_TYPE_SO_STATISTICS:
150          query->subqueries[i].query_size = sizeof(D3D12_QUERY_DATA_SO_STATISTICS);
151          break;
152       default:
153          query->subqueries[i].query_size = sizeof(uint64_t);
154          break;
155       }
156       if (FAILED(screen->dev->CreateQueryHeap(&desc,
157                                               IID_PPV_ARGS(&query->subqueries[i].query_heap)))) {
158          FREE(query);
159          return NULL;
160       }
161 
162       /* Query result goes into a readback buffer */
163       size_t buffer_size = query->subqueries[i].query_size * query->subqueries[i].num_queries;
164       u_suballocator_alloc(&ctx->query_allocator, buffer_size, 256,
165                            &query->subqueries[i].buffer_offset, &query->subqueries[i].buffer);
166 
167       query->subqueries[i].active = (query_type == PIPE_QUERY_TIMESTAMP);
168    }
169 
170    return (struct pipe_query *)query;
171 }
172 
173 void
d3d12_destroy_query(struct d3d12_query * query)174 d3d12_destroy_query(struct d3d12_query *query)
175 {
176    pipe_resource *predicate = &query->predicate->base.b;
177    pipe_resource_reference(&predicate, NULL);
178    for (unsigned i = 0; i < num_sub_queries(query->type, query->index); ++i) {
179       query->subqueries[i].query_heap->Release();
180       pipe_resource_reference(&query->subqueries[i].buffer, NULL);
181    }
182    FREE(query);
183 }
184 
185 static void
d3d12_release_query(struct pipe_context * pctx,struct pipe_query * q)186 d3d12_release_query(struct pipe_context *pctx,
187                     struct pipe_query *q)
188 {
189    struct d3d12_query *query = (struct d3d12_query *)q;
190    if (pipe_reference(&query->reference, nullptr)) {
191       d3d12_destroy_query(query);
192    }
193 }
194 
195 static bool
accumulate_subresult_cpu(struct d3d12_context * ctx,struct d3d12_query * q_parent,unsigned sub_query,union pipe_query_result * result)196 accumulate_subresult_cpu(struct d3d12_context *ctx, struct d3d12_query *q_parent,
197                          unsigned sub_query,
198                          union pipe_query_result *result)
199 {
200    struct pipe_transfer *transfer = NULL;
201    struct d3d12_screen *screen = d3d12_screen(ctx->base.screen);
202    struct d3d12_query_impl *q = &q_parent->subqueries[sub_query];
203    unsigned access = PIPE_MAP_READ;
204    void *results;
205 
206    access |= PIPE_MAP_UNSYNCHRONIZED;
207 
208    results = pipe_buffer_map_range(&ctx->base, q->buffer, q->buffer_offset,
209                                    q->num_queries * q->query_size,
210                                    access, &transfer);
211 
212    if (results == NULL)
213       return false;
214 
215    uint64_t *results_u64 = (uint64_t *)results;
216    D3D12_QUERY_DATA_PIPELINE_STATISTICS *results_stats = (D3D12_QUERY_DATA_PIPELINE_STATISTICS *)results;
217    D3D12_QUERY_DATA_SO_STATISTICS *results_so = (D3D12_QUERY_DATA_SO_STATISTICS *)results;
218 
219    memset(result, 0, sizeof(*result));
220    for (unsigned i = 0; i < q->curr_query; ++i) {
221       switch (q->d3d12qtype) {
222       case D3D12_QUERY_TYPE_BINARY_OCCLUSION:
223          result->b |= results_u64[i] != 0;
224          break;
225 
226       case D3D12_QUERY_TYPE_OCCLUSION:
227          result->u64 += results_u64[i];
228          break;
229 
230       case D3D12_QUERY_TYPE_TIMESTAMP:
231          if (q_parent->type == PIPE_QUERY_TIME_ELAPSED)
232             result->u64 += results_u64[2 * i + 1] - results_u64[2 * i];
233          else
234             result->u64 = results_u64[i];
235          break;
236 
237       case D3D12_QUERY_TYPE_PIPELINE_STATISTICS:
238          result->pipeline_statistics.ia_vertices += results_stats[i].IAVertices;
239          result->pipeline_statistics.ia_primitives += results_stats[i].IAPrimitives;
240          result->pipeline_statistics.vs_invocations += results_stats[i].VSInvocations;
241          result->pipeline_statistics.gs_invocations += results_stats[i].GSInvocations;
242          result->pipeline_statistics.gs_primitives += results_stats[i].GSPrimitives;
243          result->pipeline_statistics.c_invocations += results_stats[i].CInvocations;
244          result->pipeline_statistics.c_primitives += results_stats[i].CPrimitives;
245          result->pipeline_statistics.ps_invocations += results_stats[i].PSInvocations;
246          result->pipeline_statistics.hs_invocations += results_stats[i].HSInvocations;
247          result->pipeline_statistics.ds_invocations += results_stats[i].DSInvocations;
248          result->pipeline_statistics.cs_invocations += results_stats[i].CSInvocations;
249          break;
250 
251       case D3D12_QUERY_TYPE_SO_STATISTICS_STREAM0:
252       case D3D12_QUERY_TYPE_SO_STATISTICS_STREAM1:
253       case D3D12_QUERY_TYPE_SO_STATISTICS_STREAM2:
254       case D3D12_QUERY_TYPE_SO_STATISTICS_STREAM3:
255          if (q_parent->type == PIPE_QUERY_SO_OVERFLOW_ANY_PREDICATE ||
256              q_parent->type == PIPE_QUERY_SO_OVERFLOW_PREDICATE) {
257             result->b = results_so[i].NumPrimitivesWritten != results_so[i].PrimitivesStorageNeeded;
258          } else {
259             result->so_statistics.num_primitives_written += results_so[i].NumPrimitivesWritten;
260             result->so_statistics.primitives_storage_needed += results_so[i].PrimitivesStorageNeeded;
261          }
262          break;
263 
264       default:
265          debug_printf("unsupported query type: %s\n",
266                       util_str_query_type(q_parent->type, true));
267          unreachable("unexpected query type");
268       }
269    }
270 
271    pipe_buffer_unmap(&ctx->base, transfer);
272 
273    if (q->d3d12qtype == D3D12_QUERY_TYPE_TIMESTAMP)
274       result->u64 = static_cast<uint64_t>(screen->timestamp_multiplier * result->u64);
275 
276    return true;
277 }
278 
279 static bool
accumulate_result_cpu(struct d3d12_context * ctx,struct d3d12_query * q,union pipe_query_result * result)280 accumulate_result_cpu(struct d3d12_context *ctx, struct d3d12_query *q,
281                       union pipe_query_result *result)
282 {
283    union pipe_query_result local_result;
284 
285    switch (q->type) {
286    case PIPE_QUERY_PRIMITIVES_GENERATED:
287       if (!accumulate_subresult_cpu(ctx, q, 0, &local_result))
288          return false;
289       result->u64 = local_result.so_statistics.primitives_storage_needed;
290 
291       if (q->index == 0) {
292          if (!accumulate_subresult_cpu(ctx, q, 1, &local_result))
293             return false;
294          result->u64 += local_result.pipeline_statistics.gs_primitives;
295 
296          if (!accumulate_subresult_cpu(ctx, q, 2, &local_result))
297             return false;
298          result->u64 += local_result.pipeline_statistics.ia_primitives;
299       }
300       return true;
301    case PIPE_QUERY_PRIMITIVES_EMITTED:
302       if (!accumulate_subresult_cpu(ctx, q, 0, &local_result))
303          return false;
304       result->u64 = local_result.so_statistics.num_primitives_written;
305       return true;
306    case PIPE_QUERY_SO_OVERFLOW_ANY_PREDICATE:
307       result->b = false;
308       for (uint32_t i = 0; i < num_sub_queries(q->type, q->index); ++i) {
309          if (!accumulate_subresult_cpu(ctx, q, i, &local_result))
310             return false;
311          result->b |= local_result.b;
312       }
313       return true;
314    default:
315       assert(num_sub_queries(q->type, q->index) == 1);
316       return accumulate_subresult_cpu(ctx, q, 0, result);
317    }
318 }
319 
320 static bool
subquery_should_be_active(struct d3d12_context * ctx,struct d3d12_query * q,unsigned sub_query)321 subquery_should_be_active(struct d3d12_context *ctx, struct d3d12_query *q, unsigned sub_query)
322 {
323    switch (q->type) {
324    case PIPE_QUERY_PRIMITIVES_GENERATED: {
325       bool has_xfb = !!ctx->gfx_pipeline_state.num_so_targets;
326       struct d3d12_shader_selector *gs = ctx->gfx_stages[PIPE_SHADER_GEOMETRY];
327       bool has_gs = gs && !gs->is_variant;
328       switch (sub_query) {
329       case 0: return has_xfb;
330       case 1: return !has_xfb && has_gs;
331       case 2: return !has_xfb && !has_gs;
332       default: unreachable("Invalid subquery for primitives generated");
333       }
334       break;
335    }
336    default:
337       return true;
338    }
339 }
340 
341 static bool
query_ensure_ready(struct d3d12_screen * screen,struct d3d12_context * ctx,struct d3d12_query * query,bool wait)342 query_ensure_ready(struct d3d12_screen* screen, struct d3d12_context* ctx, struct d3d12_query* query, bool wait)
343 {
344    // If the query is not flushed, it won't have
345    // been submitted yet, and won't have a waitable
346    // fence value
347    if (query->fence_value == UINT64_MAX) {
348       d3d12_flush_cmdlist(ctx);
349    }
350 
351    if (screen->fence->GetCompletedValue() < query->fence_value){
352       if (!wait)
353          return false;
354 
355       screen->fence->SetEventOnCompletion(query->fence_value, NULL);
356    }
357 
358    return true;
359 }
360 
361 static void
accumulate_subresult_gpu(struct d3d12_context * ctx,struct d3d12_query * q_parent,unsigned sub_query)362 accumulate_subresult_gpu(struct d3d12_context *ctx, struct d3d12_query *q_parent,
363                          unsigned sub_query)
364 {
365    d3d12_compute_transform_save_restore save;
366    d3d12_save_compute_transform_state(ctx, &save);
367 
368    d3d12_compute_transform_key key;
369    memset(&key, 0, sizeof(key));
370    key.type = d3d12_compute_transform_type::query_resolve;
371    key.query_resolve.is_64bit = true;
372    key.query_resolve.is_resolve_in_place = true;
373    key.query_resolve.num_subqueries = 1;
374    key.query_resolve.pipe_query_type = q_parent->type;
375    key.query_resolve.single_subquery_index = sub_query;
376    key.query_resolve.is_signed = false;
377    key.query_resolve.timestamp_multiplier = 1.0;
378    ctx->base.bind_compute_state(&ctx->base, d3d12_get_compute_transform(ctx, &key));
379 
380    ctx->transform_state_vars[0] = q_parent->subqueries[sub_query].curr_query;
381    ctx->transform_state_vars[1] = 0;
382    ctx->transform_state_vars[2] = 0;
383    ctx->transform_state_vars[3] = 0;
384    ctx->transform_state_vars[4] = 0;
385 
386    pipe_shader_buffer new_cs_ssbos[1];
387    new_cs_ssbos[0].buffer = q_parent->subqueries[sub_query].buffer;
388    new_cs_ssbos[0].buffer_offset = q_parent->subqueries[sub_query].buffer_offset;
389    new_cs_ssbos[0].buffer_size = q_parent->subqueries[sub_query].query_size * q_parent->subqueries[sub_query].num_queries;
390    ctx->base.set_shader_buffers(&ctx->base, PIPE_SHADER_COMPUTE, 0, 1, new_cs_ssbos, 1);
391 
392    pipe_grid_info grid = {};
393    grid.block[0] = grid.block[1] = grid.block[2] = 1;
394    grid.grid[0] = grid.grid[1] = grid.grid[2] = 1;
395    ctx->base.launch_grid(&ctx->base, &grid);
396 
397    d3d12_restore_compute_transform_state(ctx, &save);
398 }
399 
400 static void
accumulate_result_gpu(struct d3d12_context * ctx,struct d3d12_query * q,struct pipe_resource * dst,uint32_t dst_offset,int index,enum pipe_query_value_type result_type)401 accumulate_result_gpu(struct d3d12_context *ctx, struct d3d12_query *q,
402                       struct pipe_resource *dst, uint32_t dst_offset,
403                       int index, enum pipe_query_value_type result_type)
404 {
405    d3d12_compute_transform_save_restore save;
406    d3d12_save_compute_transform_state(ctx, &save);
407 
408    d3d12_compute_transform_key key;
409    memset(&key, 0, sizeof(key));
410    key.type = d3d12_compute_transform_type::query_resolve;
411    key.query_resolve.is_64bit = result_type == PIPE_QUERY_TYPE_I64 || result_type == PIPE_QUERY_TYPE_U64;
412    key.query_resolve.is_resolve_in_place = false;
413    key.query_resolve.num_subqueries = num_sub_queries(q->type, q->index);
414    key.query_resolve.pipe_query_type = q->type;
415    key.query_resolve.single_result_field_offset = index;
416    key.query_resolve.is_signed = result_type == PIPE_QUERY_TYPE_I32 || result_type == PIPE_QUERY_TYPE_I64;
417    key.query_resolve.timestamp_multiplier = d3d12_screen(ctx->base.screen)->timestamp_multiplier;
418    ctx->base.bind_compute_state(&ctx->base, d3d12_get_compute_transform(ctx, &key));
419 
420    pipe_shader_buffer new_cs_ssbos[5];
421    uint32_t num_ssbos = 0;
422    for (uint32_t i = 0; i < key.query_resolve.num_subqueries; ++i) {
423       ctx->transform_state_vars[i] = q->subqueries[i].curr_query;
424       new_cs_ssbos[num_ssbos].buffer = q->subqueries[i].buffer;
425       new_cs_ssbos[num_ssbos].buffer_offset = q->subqueries[i].buffer_offset;
426       new_cs_ssbos[num_ssbos].buffer_size = q->subqueries[i].query_size * q->subqueries[i].num_queries;
427       num_ssbos++;
428    }
429 
430    assert(dst_offset % (key.query_resolve.is_64bit ? 8 : 4) == 0);
431    ctx->transform_state_vars[4] = dst_offset / (key.query_resolve.is_64bit ? 8 : 4);
432 
433    new_cs_ssbos[num_ssbos].buffer = dst;
434    new_cs_ssbos[num_ssbos].buffer_offset = 0;
435    new_cs_ssbos[num_ssbos].buffer_size = dst->width0;
436    num_ssbos++;
437 
438    ctx->base.set_shader_buffers(&ctx->base, PIPE_SHADER_COMPUTE, 0, num_ssbos, new_cs_ssbos, 1 << (num_ssbos - 1));
439 
440    pipe_grid_info grid = {};
441    grid.block[0] = grid.block[1] = grid.block[2] = 1;
442    grid.grid[0] = grid.grid[1] = grid.grid[2] = 1;
443    ctx->base.launch_grid(&ctx->base, &grid);
444 
445    d3d12_restore_compute_transform_state(ctx, &save);
446 }
447 
448 static void
begin_subquery(struct d3d12_context * ctx,struct d3d12_query * q_parent,unsigned sub_query)449 begin_subquery(struct d3d12_context *ctx, struct d3d12_query *q_parent, unsigned sub_query)
450 {
451    struct d3d12_query_impl *q = &q_parent->subqueries[sub_query];
452    if (q->curr_query == q->num_queries) {
453       /* Accumulate current results and store in first slot */
454       accumulate_subresult_gpu(ctx, q_parent, sub_query);
455       q->curr_query = 1;
456    }
457 
458    ctx->cmdlist->BeginQuery(q->query_heap, q->d3d12qtype, q->curr_query);
459    q->active = true;
460 }
461 
462 static void
begin_query(struct d3d12_context * ctx,struct d3d12_query * q_parent,bool restart)463 begin_query(struct d3d12_context *ctx, struct d3d12_query *q_parent, bool restart)
464 {
465    for (unsigned i = 0; i < num_sub_queries(q_parent->type, q_parent->index); ++i) {
466       if (restart)
467          q_parent->subqueries[i].curr_query = 0;
468 
469       if (!subquery_should_be_active(ctx, q_parent, i))
470          continue;
471 
472       begin_subquery(ctx, q_parent, i);
473    }
474 }
475 
476 
477 static void
begin_timer_query(struct d3d12_context * ctx,struct d3d12_query * q_parent,bool restart)478 begin_timer_query(struct d3d12_context *ctx, struct d3d12_query *q_parent, bool restart)
479 {
480    struct d3d12_query_impl *q = &q_parent->subqueries[0];
481 
482    /* For PIPE_QUERY_TIME_ELAPSED we record one time with BeginQuery and one in
483     * EndQuery, so we need two query slots */
484    unsigned query_index = 2 * q->curr_query;
485 
486    if (restart) {
487       q->curr_query = 0;
488       query_index = 0;
489    } else if (query_index == q->num_queries) {
490       /* Accumulate current results and store in first slot */
491       accumulate_subresult_gpu(ctx, q_parent, 0);
492       q->curr_query = 1;
493    }
494 
495    ctx->cmdlist->EndQuery(q->query_heap, q->d3d12qtype, query_index);
496    q->active = true;
497 }
498 
499 static bool
d3d12_begin_query(struct pipe_context * pctx,struct pipe_query * q)500 d3d12_begin_query(struct pipe_context *pctx,
501                   struct pipe_query *q)
502 {
503    struct d3d12_context *ctx = d3d12_context(pctx);
504    struct d3d12_query *query = (struct d3d12_query *)q;
505 
506    assert(query->type != PIPE_QUERY_TIMESTAMP);
507 
508    if (unlikely(query->type == PIPE_QUERY_TIME_ELAPSED))
509       begin_timer_query(ctx, query, true);
510    else {
511       begin_query(ctx, query, true);
512       list_addtail(&query->active_list, &ctx->active_queries);
513    }
514 
515    return true;
516 }
517 
518 static void
end_subquery(struct d3d12_context * ctx,struct d3d12_query * q_parent,unsigned sub_query)519 end_subquery(struct d3d12_context *ctx, struct d3d12_query *q_parent, unsigned sub_query)
520 {
521    struct d3d12_query_impl *q = &q_parent->subqueries[sub_query];
522 
523    uint64_t offset = 0;
524    struct d3d12_batch *batch = d3d12_current_batch(ctx);
525    struct d3d12_resource *res = (struct d3d12_resource *)q->buffer;
526    ID3D12Resource *d3d12_res = d3d12_resource_underlying(res, &offset);
527 
528    /* For TIMESTAMP, there's only one slot */
529    if (q_parent->type == PIPE_QUERY_TIMESTAMP)
530       q->curr_query = 0;
531 
532    /* With QUERY_TIME_ELAPSED we have recorded one value at
533       * (2 * q->curr_query), and now we record a value at (2 * q->curr_query + 1)
534       * and when resolving the query we subtract the latter from the former */
535 
536    unsigned resolve_count = q_parent->type == PIPE_QUERY_TIME_ELAPSED ? 2 : 1;
537    unsigned resolve_index = resolve_count * q->curr_query;
538    unsigned end_index = resolve_index + resolve_count - 1;
539 
540    offset += q->buffer_offset + resolve_index * q->query_size;
541    ctx->cmdlist->EndQuery(q->query_heap, q->d3d12qtype, end_index);
542    d3d12_transition_resource_state(ctx, res, D3D12_RESOURCE_STATE_COPY_DEST, D3D12_TRANSITION_FLAG_INVALIDATE_BINDINGS);
543    d3d12_apply_resource_states(ctx, false);
544    ctx->cmdlist->ResolveQueryData(q->query_heap, q->d3d12qtype, resolve_index,
545       resolve_count, d3d12_res, offset);
546 
547    d3d12_batch_reference_object(batch, q->query_heap);
548    d3d12_batch_reference_resource(batch, res, true);
549 
550    assert(q->curr_query < q->num_queries);
551    q->curr_query++;
552    q->active = (q_parent->type == PIPE_QUERY_TIMESTAMP);
553 }
554 
555 static void
end_query(struct d3d12_context * ctx,struct d3d12_query * q_parent)556 end_query(struct d3d12_context *ctx, struct d3d12_query *q_parent)
557 {
558    for (unsigned i = 0; i < num_sub_queries(q_parent->type, q_parent->index); ++i) {
559       struct d3d12_query_impl *q = &q_parent->subqueries[i];
560       if (!q->active)
561          continue;
562 
563       end_subquery(ctx, q_parent, i);
564    }
565 }
566 
567 static bool
d3d12_end_query(struct pipe_context * pctx,struct pipe_query * q)568 d3d12_end_query(struct pipe_context *pctx,
569                struct pipe_query *q)
570 {
571    struct d3d12_context *ctx = d3d12_context(pctx);
572    struct d3d12_query *query = (struct d3d12_query *)q;
573 
574    // Assign the sentinel and track now that the query is ended
575    query->fence_value = UINT64_MAX;
576    d3d12_batch_reference_query(d3d12_current_batch(ctx), query);
577 
578    end_query(ctx, query);
579 
580    if (query->type != PIPE_QUERY_TIMESTAMP &&
581        query->type != PIPE_QUERY_TIME_ELAPSED)
582       list_delinit(&query->active_list);
583    return true;
584 }
585 
586 static bool
d3d12_get_query_result(struct pipe_context * pctx,struct pipe_query * q,bool wait,union pipe_query_result * result)587 d3d12_get_query_result(struct pipe_context *pctx,
588                       struct pipe_query *q,
589                       bool wait,
590                       union pipe_query_result *result)
591 {
592    struct d3d12_context *ctx = d3d12_context(pctx);
593    struct d3d12_screen *screen = d3d12_screen(ctx->base.screen);
594    struct d3d12_query *query = (struct d3d12_query *)q;
595 
596    if (!query_ensure_ready(screen, ctx, query, wait))
597       return false;
598 
599    return accumulate_result_cpu(ctx, query, result);
600 }
601 
602 static void
d3d12_get_query_result_resource(struct pipe_context * pctx,struct pipe_query * q,enum pipe_query_flags flags,enum pipe_query_value_type result_type,int index,struct pipe_resource * resource,unsigned offset)603 d3d12_get_query_result_resource(struct pipe_context *pctx,
604                                 struct pipe_query *q,
605                                 enum pipe_query_flags flags,
606                                 enum pipe_query_value_type result_type,
607                                 int index,
608                                 struct pipe_resource *resource,
609                                 unsigned offset)
610 {
611    struct d3d12_context *ctx = d3d12_context(pctx);
612 
613    if (index == -1) {
614       /* Write the "available" bit, which is always true */
615       struct d3d12_resource *res = d3d12_resource(resource);
616       d3d12_transition_resource_state(ctx, res, D3D12_RESOURCE_STATE_COPY_DEST, D3D12_TRANSITION_FLAG_NONE);
617       d3d12_apply_resource_states(ctx, false);
618 
619       D3D12_GPU_VIRTUAL_ADDRESS gpuva_base = d3d12_resource_gpu_virtual_address(res) + offset;
620       D3D12_WRITEBUFFERIMMEDIATE_PARAMETER params[2] = {
621          { gpuva_base, 1 },
622          { gpuva_base + sizeof(uint32_t), 0 },
623       };
624       D3D12_WRITEBUFFERIMMEDIATE_MODE modes[2] = { D3D12_WRITEBUFFERIMMEDIATE_MODE_DEFAULT, D3D12_WRITEBUFFERIMMEDIATE_MODE_DEFAULT };
625       ctx->cmdlist8->WriteBufferImmediate(result_type == PIPE_QUERY_TYPE_I64 || result_type == PIPE_QUERY_TYPE_U64 ? 2 : 1,
626                                           params, modes);
627       return;
628    }
629 
630    struct d3d12_query *query = (struct d3d12_query *)q;
631    accumulate_result_gpu(ctx, query, resource, offset, index, result_type);
632 }
633 
634 void
d3d12_suspend_queries(struct d3d12_context * ctx)635 d3d12_suspend_queries(struct d3d12_context *ctx)
636 {
637    list_for_each_entry(struct d3d12_query, query, &ctx->active_queries, active_list) {
638       end_query(ctx, query);
639    }
640 }
641 
642 void
d3d12_resume_queries(struct d3d12_context * ctx)643 d3d12_resume_queries(struct d3d12_context *ctx)
644 {
645    list_for_each_entry(struct d3d12_query, query, &ctx->active_queries, active_list) {
646       begin_query(ctx, query, false);
647    }
648 }
649 
650 void
d3d12_validate_queries(struct d3d12_context * ctx)651 d3d12_validate_queries(struct d3d12_context *ctx)
652 {
653    /* Nothing to do, all queries are suspended */
654    if (ctx->queries_disabled)
655       return;
656 
657    list_for_each_entry(struct d3d12_query, query, &ctx->active_queries, active_list) {
658       for (unsigned i = 0; i < num_sub_queries(query->type, query->index); ++i) {
659          if (query->subqueries[i].active && !subquery_should_be_active(ctx, query, i))
660             end_subquery(ctx, query, i);
661          else if (!query->subqueries[i].active && subquery_should_be_active(ctx, query, i))
662             begin_subquery(ctx, query, i);
663       }
664    }
665 }
666 
667 static void
d3d12_set_active_query_state(struct pipe_context * pctx,bool enable)668 d3d12_set_active_query_state(struct pipe_context *pctx, bool enable)
669 {
670    struct d3d12_context *ctx = d3d12_context(pctx);
671    ctx->queries_disabled = !enable;
672 
673    if (enable)
674       d3d12_resume_queries(ctx);
675    else
676       d3d12_suspend_queries(ctx);
677 }
678 
679 static void
d3d12_render_condition(struct pipe_context * pctx,struct pipe_query * pquery,bool condition,enum pipe_render_cond_flag mode)680 d3d12_render_condition(struct pipe_context *pctx,
681                        struct pipe_query *pquery,
682                        bool condition,
683                        enum pipe_render_cond_flag mode)
684 {
685    struct d3d12_context *ctx = d3d12_context(pctx);
686    struct d3d12_query *query = (struct d3d12_query *)pquery;
687 
688    if (query == nullptr) {
689       ctx->cmdlist->SetPredication(nullptr, 0, D3D12_PREDICATION_OP_EQUAL_ZERO);
690       ctx->current_predication = nullptr;
691       return;
692    }
693 
694    if (!query->predicate)
695       query->predicate = d3d12_resource(pipe_buffer_create(pctx->screen, 0,
696                                                            PIPE_USAGE_DEFAULT, sizeof(uint64_t)));
697 
698    accumulate_result_gpu(ctx, query, &query->predicate->base.b, 0, 0, PIPE_QUERY_TYPE_U64);
699 
700    d3d12_transition_resource_state(ctx, query->predicate, D3D12_RESOURCE_STATE_PREDICATION, D3D12_TRANSITION_FLAG_NONE);
701    d3d12_apply_resource_states(ctx, false);
702 
703    ctx->current_predication = query->predicate;
704    ctx->predication_condition = condition;
705    d3d12_enable_predication(ctx);
706 }
707 
708 void
d3d12_enable_predication(struct d3d12_context * ctx)709 d3d12_enable_predication(struct d3d12_context *ctx)
710 {
711    /* documentation of ID3D12GraphicsCommandList::SetPredication method:
712       * "resource manipulation commands are _not_ actually performed
713       *  if the resulting predicate data of the predicate is equal to
714       *  the operation specified."
715       */
716    ctx->cmdlist->SetPredication(d3d12_resource_resource(ctx->current_predication), 0,
717                                 ctx->predication_condition ? D3D12_PREDICATION_OP_NOT_EQUAL_ZERO :
718                                 D3D12_PREDICATION_OP_EQUAL_ZERO);
719 }
720 
721 void
d3d12_context_query_init(struct pipe_context * pctx)722 d3d12_context_query_init(struct pipe_context *pctx)
723 {
724    struct d3d12_context *ctx = d3d12_context(pctx);
725    list_inithead(&ctx->active_queries);
726 
727    u_suballocator_init(&ctx->query_allocator, &ctx->base, 4096, 0, PIPE_USAGE_STAGING,
728                          0, true);
729 
730    pctx->create_query = d3d12_create_query;
731    pctx->destroy_query = d3d12_release_query;
732    pctx->begin_query = d3d12_begin_query;
733    pctx->end_query = d3d12_end_query;
734    pctx->get_query_result = d3d12_get_query_result;
735    pctx->get_query_result_resource = d3d12_get_query_result_resource;
736    pctx->set_active_query_state = d3d12_set_active_query_state;
737    pctx->render_condition = d3d12_render_condition;
738 }
739