xref: /aosp_15_r20/external/mesa3d/src/asahi/lib/shaders/query.cl (revision 6104692788411f58d303aa86923a9ff6ecaded22)
1/*
2 * Copyright 2024 Alyssa Rosenzweig
3 * Copyright 2024 Valve Corporation
4 * Copyright 2022 Collabora Ltd. and Red Hat Inc.
5 * SPDX-License-Identifier: MIT
6 */
7#include "libagx.h"
8#include "query.h"
9
10static inline void
11write_query_result(uintptr_t dst_addr, int32_t idx, bool is_64, uint64_t result)
12{
13   /* TODO: do we want real 64-bit stats? sync with CPU impl */
14   result &= 0xffffffff;
15
16   if (is_64) {
17      global uint64_t *out = (global uint64_t *)dst_addr;
18      out[idx] = result;
19   } else {
20      global uint32_t *out = (global uint32_t *)dst_addr;
21      out[idx] = result;
22   }
23}
24
25void
26libagx_copy_query(constant struct libagx_copy_query_push *push, unsigned i)
27{
28   uint64_t dst = push->dst_addr + (((uint64_t)i) * push->dst_stride);
29   uint32_t query = push->first_query + i;
30   bool available = push->availability[query];
31
32   if (available || push->partial) {
33      /* For occlusion queries, results[] points to the device global heap. We
34       * need to remap indices according to the query pool's allocation.
35       */
36      uint result_index = push->oq_index ? push->oq_index[query] : query;
37      uint idx = result_index * push->reports_per_query;
38
39      for (unsigned i = 0; i < push->reports_per_query; ++i) {
40         write_query_result(dst, i, push->_64, push->results[idx + i]);
41      }
42   }
43
44   if (push->with_availability) {
45      write_query_result(dst, push->reports_per_query, push->_64, available);
46   }
47}
48
49void
50libagx_copy_xfb_counters(constant struct libagx_xfb_counter_copy *push)
51{
52   unsigned i = get_local_id(0);
53
54   *(push->dest[i]) = push->src[i] ? *(push->src[i]) : 0;
55}
56
57void
58libagx_increment_statistic(constant struct libagx_increment_params *p)
59{
60   *(p->statistic) += p->delta;
61}
62
63void
64libagx_increment_cs_invocations(constant struct libagx_cs_invocation_params *p)
65{
66   *(p->statistic) += libagx_cs_invocations(p->local_size_threads, p->grid[0],
67                                            p->grid[1], p->grid[2]);
68}
69
70kernel void
71libagx_increment_ia_counters(constant struct libagx_increment_ia_counters *p,
72                             uint index_size_B, uint tid)
73{
74   unsigned count = p->draw[0];
75   local uint scratch;
76
77   if (index_size_B /* implies primitive restart */) {
78      uint start = p->draw[2];
79      uint partial = 0;
80
81      /* Count non-restart indices */
82      for (uint i = tid; i < count; i += 1024) {
83         uint index = libagx_load_index_buffer_internal(
84            p->index_buffer, p->index_buffer_range_el, start + i, index_size_B);
85
86         if (index != p->restart_index)
87            partial++;
88      }
89
90      /* Accumulate the partials across the workgroup */
91      scratch = 0;
92      barrier(CLK_LOCAL_MEM_FENCE);
93      atomic_add(&scratch, partial);
94      barrier(CLK_LOCAL_MEM_FENCE);
95      count = scratch;
96
97      /* Elect a single thread from the workgroup to increment the counters */
98      if (tid != 0)
99         return;
100   }
101
102   count *= p->draw[1];
103
104   if (p->ia_vertices) {
105      *(p->ia_vertices) += count;
106   }
107
108   if (p->vs_invocations) {
109      *(p->vs_invocations) += count;
110   }
111}
112