1/* 2 * Copyright 2024 Alyssa Rosenzweig 3 * Copyright 2024 Valve Corporation 4 * Copyright 2022 Collabora Ltd. and Red Hat Inc. 5 * SPDX-License-Identifier: MIT 6 */ 7#include "libagx.h" 8#include "query.h" 9 10static inline void 11write_query_result(uintptr_t dst_addr, int32_t idx, bool is_64, uint64_t result) 12{ 13 /* TODO: do we want real 64-bit stats? sync with CPU impl */ 14 result &= 0xffffffff; 15 16 if (is_64) { 17 global uint64_t *out = (global uint64_t *)dst_addr; 18 out[idx] = result; 19 } else { 20 global uint32_t *out = (global uint32_t *)dst_addr; 21 out[idx] = result; 22 } 23} 24 25void 26libagx_copy_query(constant struct libagx_copy_query_push *push, unsigned i) 27{ 28 uint64_t dst = push->dst_addr + (((uint64_t)i) * push->dst_stride); 29 uint32_t query = push->first_query + i; 30 bool available = push->availability[query]; 31 32 if (available || push->partial) { 33 /* For occlusion queries, results[] points to the device global heap. We 34 * need to remap indices according to the query pool's allocation. 35 */ 36 uint result_index = push->oq_index ? push->oq_index[query] : query; 37 uint idx = result_index * push->reports_per_query; 38 39 for (unsigned i = 0; i < push->reports_per_query; ++i) { 40 write_query_result(dst, i, push->_64, push->results[idx + i]); 41 } 42 } 43 44 if (push->with_availability) { 45 write_query_result(dst, push->reports_per_query, push->_64, available); 46 } 47} 48 49void 50libagx_copy_xfb_counters(constant struct libagx_xfb_counter_copy *push) 51{ 52 unsigned i = get_local_id(0); 53 54 *(push->dest[i]) = push->src[i] ? *(push->src[i]) : 0; 55} 56 57void 58libagx_increment_statistic(constant struct libagx_increment_params *p) 59{ 60 *(p->statistic) += p->delta; 61} 62 63void 64libagx_increment_cs_invocations(constant struct libagx_cs_invocation_params *p) 65{ 66 *(p->statistic) += libagx_cs_invocations(p->local_size_threads, p->grid[0], 67 p->grid[1], p->grid[2]); 68} 69 70kernel void 71libagx_increment_ia_counters(constant struct libagx_increment_ia_counters *p, 72 uint index_size_B, uint tid) 73{ 74 unsigned count = p->draw[0]; 75 local uint scratch; 76 77 if (index_size_B /* implies primitive restart */) { 78 uint start = p->draw[2]; 79 uint partial = 0; 80 81 /* Count non-restart indices */ 82 for (uint i = tid; i < count; i += 1024) { 83 uint index = libagx_load_index_buffer_internal( 84 p->index_buffer, p->index_buffer_range_el, start + i, index_size_B); 85 86 if (index != p->restart_index) 87 partial++; 88 } 89 90 /* Accumulate the partials across the workgroup */ 91 scratch = 0; 92 barrier(CLK_LOCAL_MEM_FENCE); 93 atomic_add(&scratch, partial); 94 barrier(CLK_LOCAL_MEM_FENCE); 95 count = scratch; 96 97 /* Elect a single thread from the workgroup to increment the counters */ 98 if (tid != 0) 99 return; 100 } 101 102 count *= p->draw[1]; 103 104 if (p->ia_vertices) { 105 *(p->ia_vertices) += count; 106 } 107 108 if (p->vs_invocations) { 109 *(p->vs_invocations) += count; 110 } 111} 112