xref: /aosp_15_r20/external/mesa3d/src/gallium/drivers/freedreno/a6xx/fd6_query.cc (revision 6104692788411f58d303aa86923a9ff6ecaded22)
1 /*
2  * Copyright © 2017 Rob Clark <[email protected]>
3  * Copyright © 2018 Google, Inc.
4  * SPDX-License-Identifier: MIT
5  *
6  * Authors:
7  *    Rob Clark <[email protected]>
8  */
9 
10 #define FD_BO_NO_HARDPIN 1
11 
12 /* NOTE: see https://gitlab.freedesktop.org/freedreno/freedreno/-/wikis/A5xx-Queries */
13 
14 #include "freedreno_query_acc.h"
15 #include "freedreno_resource.h"
16 
17 #include "fd6_context.h"
18 #include "fd6_emit.h"
19 #include "fd6_query.h"
20 
21 #include "fd6_pack.h"
22 
23 /* g++ is a picky about offsets that cannot be resolved at compile time, so
24  * roll our own __offsetof()
25  */
26 #define __offsetof(type, field)                                                \
27    ({ type _x = {}; ((uint8_t *)&_x.field) - ((uint8_t *)&_x);})
28 
29 struct PACKED fd6_query_sample {
30    struct fd_acc_query_sample base;
31 
32    /* The RB_SAMPLE_COUNT_ADDR destination needs to be 16-byte aligned: */
33    uint64_t pad;
34 
35    uint64_t start;
36    uint64_t result;
37    uint64_t stop;
38 };
39 FD_DEFINE_CAST(fd_acc_query_sample, fd6_query_sample);
40 
41 /* offset of a single field of an array of fd6_query_sample: */
42 #define query_sample_idx(aq, idx, field)                                       \
43    fd_resource((aq)->prsc)->bo,                                                \
44       (idx * sizeof(struct fd6_query_sample)) +                                \
45          offsetof(struct fd6_query_sample, field),                             \
46       0, 0
47 
48 /* offset of a single field of fd6_query_sample: */
49 #define query_sample(aq, field) query_sample_idx(aq, 0, field)
50 
51 /*
52  * Occlusion Query:
53  *
54  * OCCLUSION_COUNTER and OCCLUSION_PREDICATE differ only in how they
55  * interpret results
56  */
57 
58 template <chip CHIP>
59 static void
occlusion_resume(struct fd_acc_query * aq,struct fd_batch * batch)60 occlusion_resume(struct fd_acc_query *aq, struct fd_batch *batch)
61 {
62    struct fd_context *ctx = batch->ctx;
63    struct fd_ringbuffer *ring = batch->draw;
64 
65    ASSERT_ALIGNED(struct fd6_query_sample, start, 16);
66 
67    OUT_PKT4(ring, REG_A6XX_RB_SAMPLE_COUNT_CONTROL, 1);
68    OUT_RING(ring, A6XX_RB_SAMPLE_COUNT_CONTROL_COPY);
69 
70    if (!ctx->screen->info->a7xx.has_event_write_sample_count) {
71       OUT_PKT4(ring, REG_A6XX_RB_SAMPLE_COUNT_ADDR, 2);
72       OUT_RELOC(ring, query_sample(aq, start));
73 
74       fd6_event_write<CHIP>(ctx, ring, FD_ZPASS_DONE);
75 
76       /* Copied from blob's cmdstream, not sure why it is done. */
77       if (CHIP == A7XX) {
78          fd6_event_write<CHIP>(ctx, ring, FD_CCU_CLEAN_DEPTH);
79       }
80    } else {
81       OUT_PKT(ring, CP_EVENT_WRITE7,
82          CP_EVENT_WRITE7_0(
83             .event = ZPASS_DONE,
84             .write_sample_count = true,
85          ),
86          EV_DST_RAM_CP_EVENT_WRITE7_1(query_sample(aq, start)),
87       );
88       OUT_PKT(ring, CP_EVENT_WRITE7,
89          CP_EVENT_WRITE7_0(
90             .event = ZPASS_DONE,
91             .write_sample_count = true,
92             .sample_count_end_offset = true,
93             .write_accum_sample_count_diff = true,
94          ),
95          EV_DST_RAM_CP_EVENT_WRITE7_1(query_sample(aq, start)),
96       );
97    }
98 
99 }
100 
101 template <chip CHIP>
102 static void
occlusion_pause(struct fd_acc_query * aq,struct fd_batch * batch)103 occlusion_pause(struct fd_acc_query *aq, struct fd_batch *batch) assert_dt
104 {
105    struct fd_context *ctx = batch->ctx;
106    struct fd_ringbuffer *ring = batch->draw;
107 
108    if (!ctx->screen->info->a7xx.has_event_write_sample_count) {
109       OUT_PKT7(ring, CP_MEM_WRITE, 4);
110       OUT_RELOC(ring, query_sample(aq, stop));
111       OUT_RING(ring, 0xffffffff);
112       OUT_RING(ring, 0xffffffff);
113 
114       OUT_PKT7(ring, CP_WAIT_MEM_WRITES, 0);
115    }
116 
117    OUT_PKT4(ring, REG_A6XX_RB_SAMPLE_COUNT_CONTROL, 1);
118    OUT_RING(ring, A6XX_RB_SAMPLE_COUNT_CONTROL_COPY);
119 
120    ASSERT_ALIGNED(struct fd6_query_sample, stop, 16);
121 
122    if (!ctx->screen->info->a7xx.has_event_write_sample_count) {
123       OUT_PKT4(ring, REG_A6XX_RB_SAMPLE_COUNT_ADDR, 2);
124       OUT_RELOC(ring, query_sample(aq, stop));
125 
126       fd6_event_write<CHIP>(batch->ctx, ring, FD_ZPASS_DONE);
127 
128       /* To avoid stalling in the draw buffer, emit code the code to compute the
129        * counter delta in the epilogue ring.
130        */
131       struct fd_ringbuffer *epilogue = fd_batch_get_tile_epilogue(batch);
132 
133       OUT_PKT7(epilogue, CP_WAIT_REG_MEM, 6);
134       OUT_RING(epilogue, CP_WAIT_REG_MEM_0_FUNCTION(WRITE_NE) |
135                             CP_WAIT_REG_MEM_0_POLL(POLL_MEMORY));
136       OUT_RELOC(epilogue, query_sample(aq, stop));
137       OUT_RING(epilogue, CP_WAIT_REG_MEM_3_REF(0xffffffff));
138       OUT_RING(epilogue, CP_WAIT_REG_MEM_4_MASK(0xffffffff));
139       OUT_RING(epilogue, CP_WAIT_REG_MEM_5_DELAY_LOOP_CYCLES(16));
140 
141       /* result += stop - start: */
142       OUT_PKT7(epilogue, CP_MEM_TO_MEM, 9);
143       OUT_RING(epilogue, CP_MEM_TO_MEM_0_DOUBLE | CP_MEM_TO_MEM_0_NEG_C);
144       OUT_RELOC(epilogue, query_sample(aq, result)); /* dst */
145       OUT_RELOC(epilogue, query_sample(aq, result)); /* srcA */
146       OUT_RELOC(epilogue, query_sample(aq, stop));   /* srcB */
147       OUT_RELOC(epilogue, query_sample(aq, start));  /* srcC */
148    } else {
149       OUT_PKT(ring, CP_EVENT_WRITE7,
150          CP_EVENT_WRITE7_0(
151             .event = ZPASS_DONE,
152             .write_sample_count = true,
153          ),
154          EV_DST_RAM_CP_EVENT_WRITE7_1(query_sample(aq, stop)),
155       );
156       OUT_PKT(ring, CP_EVENT_WRITE7,
157          CP_EVENT_WRITE7_0(
158             .event = ZPASS_DONE,
159             .write_sample_count = true,
160             .sample_count_end_offset = true,
161             .write_accum_sample_count_diff = true,
162          ),
163          /* Note: SQE is adding offsets to the iova, SAMPLE_COUNT_END_OFFSET causes
164           * the result to be written to iova+16, and WRITE_ACCUM_SAMP_COUNT_DIFF
165           * does *(iova + 8) += *(iova + 16) - *iova
166           *
167           * It just so happens this is the layout we already to for start/result/stop
168           * So we just give the start address in all cases.
169           */
170          EV_DST_RAM_CP_EVENT_WRITE7_1(query_sample(aq, start)),
171       );
172    }
173 }
174 
175 static void
occlusion_counter_result(struct fd_acc_query * aq,struct fd_acc_query_sample * s,union pipe_query_result * result)176 occlusion_counter_result(struct fd_acc_query *aq,
177                          struct fd_acc_query_sample *s,
178                          union pipe_query_result *result)
179 {
180    struct fd6_query_sample *sp = fd6_query_sample(s);
181    result->u64 = sp->result;
182 }
183 
184 static void
occlusion_counter_result_resource(struct fd_acc_query * aq,struct fd_ringbuffer * ring,enum pipe_query_value_type result_type,int index,struct fd_resource * dst,unsigned offset)185 occlusion_counter_result_resource(struct fd_acc_query *aq, struct fd_ringbuffer *ring,
186                                   enum pipe_query_value_type result_type,
187                                   int index, struct fd_resource *dst,
188                                   unsigned offset)
189 {
190    copy_result(ring, result_type, dst, offset, fd_resource(aq->prsc),
191                offsetof(struct fd6_query_sample, result));
192 }
193 
194 static void
occlusion_predicate_result(struct fd_acc_query * aq,struct fd_acc_query_sample * s,union pipe_query_result * result)195 occlusion_predicate_result(struct fd_acc_query *aq,
196                            struct fd_acc_query_sample *s,
197                            union pipe_query_result *result)
198 {
199    struct fd6_query_sample *sp = fd6_query_sample(s);
200    result->b = !!sp->result;
201 }
202 
203 static void
occlusion_predicate_result_resource(struct fd_acc_query * aq,struct fd_ringbuffer * ring,enum pipe_query_value_type result_type,int index,struct fd_resource * dst,unsigned offset)204 occlusion_predicate_result_resource(struct fd_acc_query *aq, struct fd_ringbuffer *ring,
205                                     enum pipe_query_value_type result_type,
206                                     int index, struct fd_resource *dst,
207                                     unsigned offset)
208 {
209    /* This is a bit annoying but we need to turn the result into a one or
210     * zero.. to do this use a CP_COND_WRITE to overwrite the result with
211     * a one if it is non-zero.  This doesn't change the results if the
212     * query is also read on the CPU (ie. occlusion_predicate_result()).
213     */
214    OUT_PKT7(ring, CP_COND_WRITE5, 9);
215    OUT_RING(ring, CP_COND_WRITE5_0_FUNCTION(WRITE_NE) |
216                   CP_WAIT_REG_MEM_0_POLL(POLL_MEMORY) |
217                   CP_COND_WRITE5_0_WRITE_MEMORY);
218    OUT_RELOC(ring, query_sample(aq, result)); /* POLL_ADDR_LO/HI */
219    OUT_RING(ring, CP_COND_WRITE5_3_REF(0));
220    OUT_RING(ring, CP_COND_WRITE5_4_MASK(~0));
221    OUT_RELOC(ring, query_sample(aq, result)); /* WRITE_ADDR_LO/HI */
222    OUT_RING(ring, 1);
223    OUT_RING(ring, 0);
224 
225    copy_result(ring, result_type, dst, offset, fd_resource(aq->prsc),
226                offsetof(struct fd6_query_sample, result));
227 }
228 
229 template <chip CHIP>
230 static const struct fd_acc_sample_provider occlusion_counter = {
231    .query_type = PIPE_QUERY_OCCLUSION_COUNTER,
232    .size = sizeof(struct fd6_query_sample),
233    .resume = occlusion_resume<CHIP>,
234    .pause = occlusion_pause<CHIP>,
235    .result = occlusion_counter_result,
236    .result_resource = occlusion_counter_result_resource,
237 };
238 
239 template <chip CHIP>
240 static const struct fd_acc_sample_provider occlusion_predicate = {
241    .query_type = PIPE_QUERY_OCCLUSION_PREDICATE,
242    .size = sizeof(struct fd6_query_sample),
243    .resume = occlusion_resume<CHIP>,
244    .pause = occlusion_pause<CHIP>,
245    .result = occlusion_predicate_result,
246    .result_resource = occlusion_predicate_result_resource,
247 };
248 
249 template <chip CHIP>
250 static const struct fd_acc_sample_provider occlusion_predicate_conservative = {
251    .query_type = PIPE_QUERY_OCCLUSION_PREDICATE_CONSERVATIVE,
252    .size = sizeof(struct fd6_query_sample),
253    .resume = occlusion_resume<CHIP>,
254    .pause = occlusion_pause<CHIP>,
255    .result = occlusion_predicate_result,
256    .result_resource = occlusion_predicate_result_resource,
257 };
258 
259 /*
260  * Timestamp Queries:
261  */
262 
263 template <chip CHIP>
264 static void
timestamp_resume(struct fd_acc_query * aq,struct fd_batch * batch)265 timestamp_resume(struct fd_acc_query *aq, struct fd_batch *batch)
266 {
267    struct fd_ringbuffer *ring = batch->draw;
268 
269    fd6_record_ts<CHIP>(ring, query_sample(aq, start));
270 }
271 
272 template <chip CHIP>
273 static void
time_elapsed_pause(struct fd_acc_query * aq,struct fd_batch * batch)274 time_elapsed_pause(struct fd_acc_query *aq, struct fd_batch *batch) assert_dt
275 {
276    struct fd_ringbuffer *ring = batch->draw;
277 
278    fd6_record_ts<CHIP>(ring, query_sample(aq, stop));
279 
280    OUT_WFI5(ring);
281 
282    /* result += stop - start: */
283    OUT_PKT7(ring, CP_MEM_TO_MEM, 9);
284    OUT_RING(ring, CP_MEM_TO_MEM_0_DOUBLE | CP_MEM_TO_MEM_0_NEG_C);
285    OUT_RELOC(ring, query_sample(aq, result)); /* dst */
286    OUT_RELOC(ring, query_sample(aq, result)); /* srcA */
287    OUT_RELOC(ring, query_sample(aq, stop));   /* srcB */
288    OUT_RELOC(ring, query_sample(aq, start));  /* srcC */
289 }
290 
291 static void
timestamp_pause(struct fd_acc_query * aq,struct fd_batch * batch)292 timestamp_pause(struct fd_acc_query *aq, struct fd_batch *batch)
293 {
294    /* We captured a timestamp in timestamp_resume(), nothing to do here. */
295 }
296 
297 /* timestamp logging for u_trace: */
298 template <chip CHIP>
299 static void
record_timestamp(struct fd_ringbuffer * ring,struct fd_bo * bo,unsigned offset)300 record_timestamp(struct fd_ringbuffer *ring, struct fd_bo *bo, unsigned offset)
301 {
302    fd6_record_ts<CHIP>(ring, bo, offset, 0, 0);
303 }
304 
305 static void
time_elapsed_accumulate_result(struct fd_acc_query * aq,struct fd_acc_query_sample * s,union pipe_query_result * result)306 time_elapsed_accumulate_result(struct fd_acc_query *aq,
307                                struct fd_acc_query_sample *s,
308                                union pipe_query_result *result)
309 {
310    struct fd6_query_sample *sp = fd6_query_sample(s);
311    result->u64 = ticks_to_ns(sp->result);
312 }
313 
314 static void
time_elapsed_result_resource(struct fd_acc_query * aq,struct fd_ringbuffer * ring,enum pipe_query_value_type result_type,int index,struct fd_resource * dst,unsigned offset)315 time_elapsed_result_resource(struct fd_acc_query *aq, struct fd_ringbuffer *ring,
316                              enum pipe_query_value_type result_type,
317                              int index, struct fd_resource *dst,
318                              unsigned offset)
319 {
320    // TODO ticks_to_ns conversion would require spinning up a compute shader?
321    copy_result(ring, result_type, dst, offset, fd_resource(aq->prsc),
322                offsetof(struct fd6_query_sample, result));
323 }
324 
325 static void
timestamp_accumulate_result(struct fd_acc_query * aq,struct fd_acc_query_sample * s,union pipe_query_result * result)326 timestamp_accumulate_result(struct fd_acc_query *aq,
327                             struct fd_acc_query_sample *s,
328                             union pipe_query_result *result)
329 {
330    struct fd6_query_sample *sp = fd6_query_sample(s);
331    result->u64 = ticks_to_ns(sp->start);
332 }
333 
334 static void
timestamp_result_resource(struct fd_acc_query * aq,struct fd_ringbuffer * ring,enum pipe_query_value_type result_type,int index,struct fd_resource * dst,unsigned offset)335 timestamp_result_resource(struct fd_acc_query *aq, struct fd_ringbuffer *ring,
336                           enum pipe_query_value_type result_type,
337                           int index, struct fd_resource *dst,
338                           unsigned offset)
339 {
340    // TODO ticks_to_ns conversion would require spinning up a compute shader?
341    copy_result(ring, result_type, dst, offset, fd_resource(aq->prsc),
342                offsetof(struct fd6_query_sample, start));
343 }
344 
345 template <chip CHIP>
346 static const struct fd_acc_sample_provider time_elapsed = {
347    .query_type = PIPE_QUERY_TIME_ELAPSED,
348    .always = true,
349    .size = sizeof(struct fd6_query_sample),
350    .resume = timestamp_resume<CHIP>,
351    .pause = time_elapsed_pause<CHIP>,
352    .result = time_elapsed_accumulate_result,
353    .result_resource = time_elapsed_result_resource,
354 };
355 
356 /* NOTE: timestamp query isn't going to give terribly sensible results
357  * on a tiler.  But it is needed by qapitrace profile heatmap.  If you
358  * add in a binning pass, the results get even more non-sensical.  So
359  * we just return the timestamp on the last tile and hope that is
360  * kind of good enough.
361  */
362 
363 template <chip CHIP>
364 static const struct fd_acc_sample_provider timestamp = {
365    .query_type = PIPE_QUERY_TIMESTAMP,
366    .always = true,
367    .size = sizeof(struct fd6_query_sample),
368    .resume = timestamp_resume<CHIP>,
369    .pause = timestamp_pause,
370    .result = timestamp_accumulate_result,
371    .result_resource = timestamp_result_resource,
372 };
373 
374 struct PACKED fd6_pipeline_stats_sample {
375    struct fd_acc_query_sample base;
376 
377    uint64_t start, stop, result;
378 };
379 FD_DEFINE_CAST(fd_acc_query_sample, fd6_pipeline_stats_sample);
380 
381 #define stats_reloc(ring, aq, field)                                           \
382    OUT_RELOC(ring, fd_resource((aq)->prsc)->bo,                                \
383              offsetof(struct fd6_pipeline_stats_sample, field), 0, 0);
384 
385 /* Mapping of counters to pipeline stats:
386  *
387  *   Gallium (PIPE_STAT_QUERY_x) | Vulkan (VK_QUERY_PIPELINE_STATISTIC_x_BIT) | hw counter
388  *   ----------------------------+--------------------------------------------+----------------
389  *   IA_VERTICES                 | INPUT_ASSEMBLY_VERTICES                    | RBBM_PRIMCTR_0
390  *   IA_PRIMITIVES               | INPUT_ASSEMBLY_PRIMITIVES                  | RBBM_PRIMCTR_1
391  *   VS_INVOCATIONS              | VERTEX_SHADER_INVOCATIONS                  | RBBM_PRIMCTR_0
392  *   GS_INVOCATIONS              | GEOMETRY_SHADER_INVOCATIONS                | RBBM_PRIMCTR_5
393  *   GS_PRIMITIVES               | GEOMETRY_SHADER_PRIMITIVES                 | RBBM_PRIMCTR_6
394  *   C_INVOCATIONS               | CLIPPING_INVOCATIONS                       | RBBM_PRIMCTR_7
395  *   C_PRIMITIVES                | CLIPPING_PRIMITIVES                        | RBBM_PRIMCTR_8
396  *   PS_INVOCATIONS              | FRAGMENT_SHADER_INVOCATIONS                | RBBM_PRIMCTR_9
397  *   HS_INVOCATIONS              | TESSELLATION_CONTROL_SHADER_PATCHES        | RBBM_PRIMCTR_2
398  *   DS_INVOCATIONS              | TESSELLATION_EVALUATION_SHADER_INVOCATIONS | RBBM_PRIMCTR_4
399  *   CS_INVOCATIONS              | COMPUTE_SHADER_INVOCATIONS                 | RBBM_PRIMCTR_10
400  *
401  * Note that "Vertices corresponding to incomplete primitives may contribute to the count.",
402  * in our case they do not, so IA_VERTICES and VS_INVOCATIONS are the same thing.
403  */
404 
405 enum stats_type {
406    STATS_PRIMITIVE,
407    STATS_FRAGMENT,
408    STATS_COMPUTE,
409 };
410 
411 static const struct {
412    enum fd_gpu_event start, stop;
413 } stats_counter_events[] = {
414       [STATS_PRIMITIVE] = { FD_START_PRIMITIVE_CTRS, FD_STOP_PRIMITIVE_CTRS },
415       [STATS_FRAGMENT]  = { FD_START_FRAGMENT_CTRS,  FD_STOP_FRAGMENT_CTRS },
416       [STATS_COMPUTE]   = { FD_START_COMPUTE_CTRS,   FD_STOP_COMPUTE_CTRS },
417 };
418 
419 static enum stats_type
get_stats_type(struct fd_acc_query * aq)420 get_stats_type(struct fd_acc_query *aq)
421 {
422    if (aq->provider->query_type == PIPE_QUERY_PRIMITIVES_GENERATED)
423       return STATS_PRIMITIVE;
424 
425    switch (aq->base.index) {
426    case PIPE_STAT_QUERY_PS_INVOCATIONS: return STATS_FRAGMENT;
427    case PIPE_STAT_QUERY_CS_INVOCATIONS: return STATS_COMPUTE;
428    default:
429       return STATS_PRIMITIVE;
430    }
431 }
432 
433 static unsigned
stats_counter_index(struct fd_acc_query * aq)434 stats_counter_index(struct fd_acc_query *aq)
435 {
436    if (aq->provider->query_type == PIPE_QUERY_PRIMITIVES_GENERATED)
437       return 7;
438 
439    switch (aq->base.index) {
440    case PIPE_STAT_QUERY_IA_VERTICES:    return 0;
441    case PIPE_STAT_QUERY_IA_PRIMITIVES:  return 1;
442    case PIPE_STAT_QUERY_VS_INVOCATIONS: return 0;
443    case PIPE_STAT_QUERY_GS_INVOCATIONS: return 5;
444    case PIPE_STAT_QUERY_GS_PRIMITIVES:  return 6;
445    case PIPE_STAT_QUERY_C_INVOCATIONS:  return 7;
446    case PIPE_STAT_QUERY_C_PRIMITIVES:   return 8;
447    case PIPE_STAT_QUERY_PS_INVOCATIONS: return 9;
448    case PIPE_STAT_QUERY_HS_INVOCATIONS: return 2;
449    case PIPE_STAT_QUERY_DS_INVOCATIONS: return 4;
450    case PIPE_STAT_QUERY_CS_INVOCATIONS: return 10;
451    default:
452       return 0;
453    }
454 }
455 
456 static void
log_pipeline_stats(struct fd6_pipeline_stats_sample * ps,unsigned idx)457 log_pipeline_stats(struct fd6_pipeline_stats_sample *ps, unsigned idx)
458 {
459 #ifdef DEBUG_COUNTERS
460    const char *labels[] = {
461       "VS_INVOCATIONS",
462       "IA_PRIMITIVES",
463       "HS_INVOCATIONS",
464       "??",
465       "DS_INVOCATIONS",
466       "GS_INVOCATIONS",
467       "GS_PRIMITIVES",
468       "C_INVOCATIONS",
469       "C_PRIMITIVES",
470       "PS_INVOCATIONS",
471       "CS_INVOCATIONS",
472    };
473 
474    mesa_logd("  counter\t\tstart\t\t\tstop\t\t\tdiff");
475    mesa_logd("  RBBM_PRIMCTR_%d\t0x%016" PRIx64 "\t0x%016" PRIx64 "\t%" PRIi64 "\t%s",
476              idx, ps->start, ps->stop, ps->stop - ps->start, labels[idx]);
477 #endif
478 }
479 
480 template <chip CHIP>
481 static void
pipeline_stats_resume(struct fd_acc_query * aq,struct fd_batch * batch)482 pipeline_stats_resume(struct fd_acc_query *aq, struct fd_batch *batch)
483    assert_dt
484 {
485    struct fd_ringbuffer *ring = batch->draw;
486    enum stats_type type = get_stats_type(aq);
487    unsigned idx = stats_counter_index(aq);
488    unsigned reg = REG_A6XX_RBBM_PRIMCTR_0_LO + (2 * idx);
489 
490    OUT_WFI5(ring);
491 
492    OUT_PKT7(ring, CP_REG_TO_MEM, 3);
493    OUT_RING(ring, CP_REG_TO_MEM_0_64B |
494                   CP_REG_TO_MEM_0_CNT(2) |
495                   CP_REG_TO_MEM_0_REG(reg));
496    stats_reloc(ring, aq, start);
497 
498    assert(type < ARRAY_SIZE(batch->pipeline_stats_queries_active));
499 
500    if (!batch->pipeline_stats_queries_active[type])
501       fd6_event_write<CHIP>(batch->ctx, ring, stats_counter_events[type].start);
502    batch->pipeline_stats_queries_active[type]++;
503 }
504 
505 template <chip CHIP>
506 static void
pipeline_stats_pause(struct fd_acc_query * aq,struct fd_batch * batch)507 pipeline_stats_pause(struct fd_acc_query *aq, struct fd_batch *batch)
508    assert_dt
509 {
510    struct fd_ringbuffer *ring = batch->draw;
511    enum stats_type type = get_stats_type(aq);
512    unsigned idx = stats_counter_index(aq);
513    unsigned reg = REG_A6XX_RBBM_PRIMCTR_0_LO + (2 * idx);
514 
515    OUT_WFI5(ring);
516 
517    /* snapshot the end values: */
518    OUT_PKT7(ring, CP_REG_TO_MEM, 3);
519    OUT_RING(ring, CP_REG_TO_MEM_0_64B |
520                   CP_REG_TO_MEM_0_CNT(2) |
521                   CP_REG_TO_MEM_0_REG(reg));
522    stats_reloc(ring, aq, stop);
523 
524    assert(type < ARRAY_SIZE(batch->pipeline_stats_queries_active));
525    assert(batch->pipeline_stats_queries_active[type] > 0);
526 
527    batch->pipeline_stats_queries_active[type]--;
528    if (batch->pipeline_stats_queries_active[type])
529       fd6_event_write<CHIP>(batch->ctx, ring, stats_counter_events[type].stop);
530 
531    /* result += stop - start: */
532    OUT_PKT7(ring, CP_MEM_TO_MEM, 9);
533    OUT_RING(ring, CP_MEM_TO_MEM_0_DOUBLE | CP_MEM_TO_MEM_0_NEG_C | 0x40000000);
534    stats_reloc(ring, aq, result);
535    stats_reloc(ring, aq, result);
536    stats_reloc(ring, aq, stop)
537    stats_reloc(ring, aq, start);
538 }
539 
540 static void
pipeline_stats_result(struct fd_acc_query * aq,struct fd_acc_query_sample * s,union pipe_query_result * result)541 pipeline_stats_result(struct fd_acc_query *aq,
542                       struct fd_acc_query_sample *s,
543                       union pipe_query_result *result)
544 {
545    struct fd6_pipeline_stats_sample *ps = fd6_pipeline_stats_sample(s);
546 
547    log_pipeline_stats(ps, stats_counter_index(aq));
548 
549    result->u64 = ps->result;
550 }
551 
552 static void
pipeline_stats_result_resource(struct fd_acc_query * aq,struct fd_ringbuffer * ring,enum pipe_query_value_type result_type,int index,struct fd_resource * dst,unsigned offset)553 pipeline_stats_result_resource(struct fd_acc_query *aq,
554                                struct fd_ringbuffer *ring,
555                                enum pipe_query_value_type result_type,
556                                int index, struct fd_resource *dst,
557                                unsigned offset)
558 {
559    copy_result(ring, result_type, dst, offset, fd_resource(aq->prsc),
560                offsetof(struct fd6_pipeline_stats_sample, result));
561 }
562 
563 template <chip CHIP>
564 static const struct fd_acc_sample_provider primitives_generated = {
565    .query_type = PIPE_QUERY_PRIMITIVES_GENERATED,
566    .size = sizeof(struct fd6_pipeline_stats_sample),
567    .resume = pipeline_stats_resume<CHIP>,
568    .pause = pipeline_stats_pause<CHIP>,
569    .result = pipeline_stats_result,
570    .result_resource = pipeline_stats_result_resource,
571 };
572 
573 template <chip CHIP>
574 static const struct fd_acc_sample_provider pipeline_statistics_single = {
575    .query_type = PIPE_QUERY_PIPELINE_STATISTICS_SINGLE,
576    .size = sizeof(struct fd6_pipeline_stats_sample),
577    .resume = pipeline_stats_resume<CHIP>,
578    .pause = pipeline_stats_pause<CHIP>,
579    .result = pipeline_stats_result,
580    .result_resource = pipeline_stats_result_resource,
581 };
582 
583 struct PACKED fd6_primitives_sample {
584    struct fd_acc_query_sample base;
585 
586    /* VPC_SO_STREAM_COUNTS dest address must be 32b aligned: */
587    uint64_t pad[3];
588 
589    struct {
590       uint64_t emitted, generated;
591    } start[4], stop[4], result;
592 };
593 FD_DEFINE_CAST(fd_acc_query_sample, fd6_primitives_sample);
594 
595 #define primitives_reloc(ring, aq, field)                                      \
596    OUT_RELOC(ring, fd_resource((aq)->prsc)->bo,                                \
597              __offsetof(struct fd6_primitives_sample, field), 0, 0);
598 
599 static void
log_primitives_sample(struct fd6_primitives_sample * ps)600 log_primitives_sample(struct fd6_primitives_sample *ps)
601 {
602 #ifdef DEBUG_COUNTERS
603    mesa_logd("  so counts");
604    for (int i = 0; i < ARRAY_SIZE(ps->start); i++) {
605       mesa_logd("  CHANNEL %d emitted\t0x%016" PRIx64 "\t0x%016" PRIx64
606              "\t%" PRIi64,
607              i, ps->start[i].generated, ps->stop[i].generated,
608              ps->stop[i].generated - ps->start[i].generated);
609       mesa_logd("  CHANNEL %d generated\t0x%016" PRIx64 "\t0x%016" PRIx64
610              "\t%" PRIi64,
611              i, ps->start[i].emitted, ps->stop[i].emitted,
612              ps->stop[i].emitted - ps->start[i].emitted);
613    }
614 
615    mesa_logd("generated %" PRIu64 ", emitted %" PRIu64, ps->result.generated,
616           ps->result.emitted);
617 #endif
618 }
619 
620 template <chip CHIP>
621 static void
primitives_emitted_resume(struct fd_acc_query * aq,struct fd_batch * batch)622 primitives_emitted_resume(struct fd_acc_query *aq,
623                           struct fd_batch *batch) assert_dt
624 {
625    struct fd_ringbuffer *ring = batch->draw;
626 
627    OUT_WFI5(ring);
628 
629    ASSERT_ALIGNED(struct fd6_primitives_sample, start[0], 32);
630 
631    OUT_PKT4(ring, REG_A6XX_VPC_SO_STREAM_COUNTS, 2);
632    primitives_reloc(ring, aq, start[0]);
633 
634    fd6_event_write<CHIP>(batch->ctx, ring, FD_WRITE_PRIMITIVE_COUNTS);
635 }
636 
637 static void
accumultate_primitives_emitted(struct fd_acc_query * aq,struct fd_ringbuffer * ring,int idx)638 accumultate_primitives_emitted(struct fd_acc_query *aq,
639                                struct fd_ringbuffer *ring,
640                                int idx)
641 {
642    /* result += stop - start: */
643    OUT_PKT7(ring, CP_MEM_TO_MEM, 9);
644    OUT_RING(ring, CP_MEM_TO_MEM_0_DOUBLE | CP_MEM_TO_MEM_0_NEG_C | 0x80000000);
645    primitives_reloc(ring, aq, result.emitted);
646    primitives_reloc(ring, aq, result.emitted);
647    primitives_reloc(ring, aq, stop[idx].emitted);
648    primitives_reloc(ring, aq, start[idx].emitted);
649 }
650 
651 static void
accumultate_primitives_generated(struct fd_acc_query * aq,struct fd_ringbuffer * ring,int idx)652 accumultate_primitives_generated(struct fd_acc_query *aq,
653                                  struct fd_ringbuffer *ring,
654                                  int idx)
655 {
656    /* result += stop - start: */
657    OUT_PKT7(ring, CP_MEM_TO_MEM, 9);
658    OUT_RING(ring, CP_MEM_TO_MEM_0_DOUBLE | CP_MEM_TO_MEM_0_NEG_C | 0x80000000);
659    primitives_reloc(ring, aq, result.generated);
660    primitives_reloc(ring, aq, result.generated);
661    primitives_reloc(ring, aq, stop[idx].generated);
662    primitives_reloc(ring, aq, start[idx].generated);
663 }
664 
665 template <chip CHIP>
666 static void
primitives_emitted_pause(struct fd_acc_query * aq,struct fd_batch * batch)667 primitives_emitted_pause(struct fd_acc_query *aq,
668                          struct fd_batch *batch) assert_dt
669 {
670    struct fd_ringbuffer *ring = batch->draw;
671 
672    OUT_WFI5(ring);
673 
674    ASSERT_ALIGNED(struct fd6_primitives_sample, stop[0], 32);
675 
676    OUT_PKT4(ring, REG_A6XX_VPC_SO_STREAM_COUNTS, 2);
677    primitives_reloc(ring, aq, stop[0]);
678 
679    fd6_event_write<CHIP>(batch->ctx, ring, FD_WRITE_PRIMITIVE_COUNTS);
680    fd6_event_write<CHIP>(batch->ctx, ring, FD_CACHE_CLEAN);
681 
682    if (aq->provider->query_type == PIPE_QUERY_SO_OVERFLOW_ANY_PREDICATE) {
683       /* Need results from all channels: */
684       for (int i = 0; i < PIPE_MAX_SO_BUFFERS; i++) {
685          accumultate_primitives_emitted(aq, ring, i);
686          accumultate_primitives_generated(aq, ring, i);
687       }
688    } else {
689       accumultate_primitives_emitted(aq, ring, aq->base.index);
690       /* Only need primitives generated counts for the overflow queries: */
691       if (aq->provider->query_type == PIPE_QUERY_SO_OVERFLOW_PREDICATE)
692          accumultate_primitives_generated(aq, ring, aq->base.index);
693    }
694 }
695 
696 static void
primitives_emitted_result(struct fd_acc_query * aq,struct fd_acc_query_sample * s,union pipe_query_result * result)697 primitives_emitted_result(struct fd_acc_query *aq,
698                           struct fd_acc_query_sample *s,
699                           union pipe_query_result *result)
700 {
701    struct fd6_primitives_sample *ps = fd6_primitives_sample(s);
702 
703    log_primitives_sample(ps);
704 
705    result->u64 = ps->result.emitted;
706 }
707 
708 static void
primitives_emitted_result_resource(struct fd_acc_query * aq,struct fd_ringbuffer * ring,enum pipe_query_value_type result_type,int index,struct fd_resource * dst,unsigned offset)709 primitives_emitted_result_resource(struct fd_acc_query *aq,
710                                    struct fd_ringbuffer *ring,
711                                    enum pipe_query_value_type result_type,
712                                    int index, struct fd_resource *dst,
713                                    unsigned offset)
714 {
715    copy_result(ring, result_type, dst, offset, fd_resource(aq->prsc),
716                offsetof(struct fd6_primitives_sample, result.emitted));
717 }
718 
719 static void
so_overflow_predicate_result(struct fd_acc_query * aq,struct fd_acc_query_sample * s,union pipe_query_result * result)720 so_overflow_predicate_result(struct fd_acc_query *aq,
721                              struct fd_acc_query_sample *s,
722                              union pipe_query_result *result)
723 {
724    struct fd6_primitives_sample *ps = fd6_primitives_sample(s);
725 
726    log_primitives_sample(ps);
727 
728    result->b = ps->result.emitted != ps->result.generated;
729 }
730 
731 static void
so_overflow_predicate_result_resource(struct fd_acc_query * aq,struct fd_ringbuffer * ring,enum pipe_query_value_type result_type,int index,struct fd_resource * dst,unsigned offset)732 so_overflow_predicate_result_resource(struct fd_acc_query *aq,
733                                       struct fd_ringbuffer *ring,
734                                       enum pipe_query_value_type result_type,
735                                       int index, struct fd_resource *dst,
736                                       unsigned offset)
737 {
738    fd_ringbuffer_attach_bo(ring, dst->bo);
739    fd_ringbuffer_attach_bo(ring, fd_resource(aq->prsc)->bo);
740 
741    /* result = generated - emitted: */
742    OUT_PKT7(ring, CP_MEM_TO_MEM, 7);
743    OUT_RING(ring, CP_MEM_TO_MEM_0_NEG_B |
744             COND(result_type >= PIPE_QUERY_TYPE_I64, CP_MEM_TO_MEM_0_DOUBLE));
745    OUT_RELOC(ring, dst->bo, offset, 0, 0);
746    primitives_reloc(ring, aq, result.generated);
747    primitives_reloc(ring, aq, result.emitted);
748 
749    /* This is a bit awkward, but glcts expects the result to be 1 or 0
750     * rather than non-zero vs zero:
751     */
752    OUT_PKT7(ring, CP_COND_WRITE5, 9);
753    OUT_RING(ring, CP_COND_WRITE5_0_FUNCTION(WRITE_NE) |
754                   CP_COND_WRITE5_0_POLL(POLL_MEMORY) |
755                   CP_COND_WRITE5_0_WRITE_MEMORY);
756    OUT_RELOC(ring, dst->bo, offset, 0, 0);    /* POLL_ADDR_LO/HI */
757    OUT_RING(ring, CP_COND_WRITE5_3_REF(0));
758    OUT_RING(ring, CP_COND_WRITE5_4_MASK(~0));
759    OUT_RELOC(ring, dst->bo, offset, 0, 0);    /* WRITE_ADDR_LO/HI */
760    OUT_RING(ring, 1);
761    OUT_RING(ring, 0);
762 }
763 
764 template <chip CHIP>
765 static const struct fd_acc_sample_provider primitives_emitted = {
766    .query_type = PIPE_QUERY_PRIMITIVES_EMITTED,
767    .size = sizeof(struct fd6_primitives_sample),
768    .resume = primitives_emitted_resume<CHIP>,
769    .pause = primitives_emitted_pause<CHIP>,
770    .result = primitives_emitted_result,
771    .result_resource = primitives_emitted_result_resource,
772 };
773 
774 template <chip CHIP>
775 static const struct fd_acc_sample_provider so_overflow_any_predicate = {
776    .query_type = PIPE_QUERY_SO_OVERFLOW_ANY_PREDICATE,
777    .size = sizeof(struct fd6_primitives_sample),
778    .resume = primitives_emitted_resume<CHIP>,
779    .pause = primitives_emitted_pause<CHIP>,
780    .result = so_overflow_predicate_result,
781    .result_resource = so_overflow_predicate_result_resource,
782 };
783 
784 template <chip CHIP>
785 static const struct fd_acc_sample_provider so_overflow_predicate = {
786    .query_type = PIPE_QUERY_SO_OVERFLOW_PREDICATE,
787    .size = sizeof(struct fd6_primitives_sample),
788    .resume = primitives_emitted_resume<CHIP>,
789    .pause = primitives_emitted_pause<CHIP>,
790    .result = so_overflow_predicate_result,
791    .result_resource = so_overflow_predicate_result_resource,
792 };
793 
794 /*
795  * Performance Counter (batch) queries:
796  *
797  * Only one of these is active at a time, per design of the gallium
798  * batch_query API design.  On perfcntr query tracks N query_types,
799  * each of which has a 'fd_batch_query_entry' that maps it back to
800  * the associated group and counter.
801  */
802 
803 struct fd_batch_query_entry {
804    uint8_t gid; /* group-id */
805    uint8_t cid; /* countable-id within the group */
806 };
807 
808 struct fd_batch_query_data {
809    struct fd_screen *screen;
810    unsigned num_query_entries;
811    struct fd_batch_query_entry query_entries[];
812 };
813 
814 static void
perfcntr_resume(struct fd_acc_query * aq,struct fd_batch * batch)815 perfcntr_resume(struct fd_acc_query *aq, struct fd_batch *batch) assert_dt
816 {
817    struct fd_batch_query_data *data = (struct fd_batch_query_data *)aq->query_data;
818    struct fd_screen *screen = data->screen;
819    struct fd_ringbuffer *ring = batch->draw;
820 
821    unsigned counters_per_group[screen->num_perfcntr_groups];
822    memset(counters_per_group, 0, sizeof(counters_per_group));
823 
824    OUT_WFI5(ring);
825 
826    /* configure performance counters for the requested queries: */
827    for (unsigned i = 0; i < data->num_query_entries; i++) {
828       struct fd_batch_query_entry *entry = &data->query_entries[i];
829       const struct fd_perfcntr_group *g = &screen->perfcntr_groups[entry->gid];
830       unsigned counter_idx = counters_per_group[entry->gid]++;
831 
832       assert(counter_idx < g->num_counters);
833 
834       OUT_PKT4(ring, g->counters[counter_idx].select_reg, 1);
835       OUT_RING(ring, g->countables[entry->cid].selector);
836    }
837 
838    memset(counters_per_group, 0, sizeof(counters_per_group));
839 
840    /* and snapshot the start values */
841    for (unsigned i = 0; i < data->num_query_entries; i++) {
842       struct fd_batch_query_entry *entry = &data->query_entries[i];
843       const struct fd_perfcntr_group *g = &screen->perfcntr_groups[entry->gid];
844       unsigned counter_idx = counters_per_group[entry->gid]++;
845       const struct fd_perfcntr_counter *counter = &g->counters[counter_idx];
846 
847       OUT_PKT7(ring, CP_REG_TO_MEM, 3);
848       OUT_RING(ring, CP_REG_TO_MEM_0_64B |
849                         CP_REG_TO_MEM_0_REG(counter->counter_reg_lo));
850       OUT_RELOC(ring, query_sample_idx(aq, i, start));
851    }
852 }
853 
854 static void
perfcntr_pause(struct fd_acc_query * aq,struct fd_batch * batch)855 perfcntr_pause(struct fd_acc_query *aq, struct fd_batch *batch) assert_dt
856 {
857    struct fd_batch_query_data *data = (struct fd_batch_query_data *)aq->query_data;
858    struct fd_screen *screen = data->screen;
859    struct fd_ringbuffer *ring = batch->draw;
860 
861    unsigned counters_per_group[screen->num_perfcntr_groups];
862    memset(counters_per_group, 0, sizeof(counters_per_group));
863 
864    OUT_WFI5(ring);
865 
866    /* TODO do we need to bother to turn anything off? */
867 
868    /* snapshot the end values: */
869    for (unsigned i = 0; i < data->num_query_entries; i++) {
870       struct fd_batch_query_entry *entry = &data->query_entries[i];
871       const struct fd_perfcntr_group *g = &screen->perfcntr_groups[entry->gid];
872       unsigned counter_idx = counters_per_group[entry->gid]++;
873       const struct fd_perfcntr_counter *counter = &g->counters[counter_idx];
874 
875       OUT_PKT7(ring, CP_REG_TO_MEM, 3);
876       OUT_RING(ring, CP_REG_TO_MEM_0_64B |
877                         CP_REG_TO_MEM_0_REG(counter->counter_reg_lo));
878       OUT_RELOC(ring, query_sample_idx(aq, i, stop));
879    }
880 
881    /* and compute the result: */
882    for (unsigned i = 0; i < data->num_query_entries; i++) {
883       /* result += stop - start: */
884       OUT_PKT7(ring, CP_MEM_TO_MEM, 9);
885       OUT_RING(ring, CP_MEM_TO_MEM_0_DOUBLE | CP_MEM_TO_MEM_0_NEG_C);
886       OUT_RELOC(ring, query_sample_idx(aq, i, result)); /* dst */
887       OUT_RELOC(ring, query_sample_idx(aq, i, result)); /* srcA */
888       OUT_RELOC(ring, query_sample_idx(aq, i, stop));   /* srcB */
889       OUT_RELOC(ring, query_sample_idx(aq, i, start));  /* srcC */
890    }
891 }
892 
893 static void
perfcntr_accumulate_result(struct fd_acc_query * aq,struct fd_acc_query_sample * s,union pipe_query_result * result)894 perfcntr_accumulate_result(struct fd_acc_query *aq,
895                            struct fd_acc_query_sample *s,
896                            union pipe_query_result *result)
897 {
898    struct fd_batch_query_data *data =
899          (struct fd_batch_query_data *)aq->query_data;
900    struct fd6_query_sample *sp = fd6_query_sample(s);
901 
902    for (unsigned i = 0; i < data->num_query_entries; i++) {
903       result->batch[i].u64 = sp[i].result;
904    }
905 }
906 
907 static const struct fd_acc_sample_provider perfcntr = {
908    .query_type = FD_QUERY_FIRST_PERFCNTR,
909    .always = true,
910    .resume = perfcntr_resume,
911    .pause = perfcntr_pause,
912    .result = perfcntr_accumulate_result,
913 };
914 
915 static struct pipe_query *
fd6_create_batch_query(struct pipe_context * pctx,unsigned num_queries,unsigned * query_types)916 fd6_create_batch_query(struct pipe_context *pctx, unsigned num_queries,
917                        unsigned *query_types)
918 {
919    struct fd_context *ctx = fd_context(pctx);
920    struct fd_screen *screen = ctx->screen;
921    struct fd_query *q;
922    struct fd_acc_query *aq;
923    struct fd_batch_query_data *data;
924 
925    data = CALLOC_VARIANT_LENGTH_STRUCT(
926       fd_batch_query_data, num_queries * sizeof(data->query_entries[0]));
927 
928    data->screen = screen;
929    data->num_query_entries = num_queries;
930 
931    /* validate the requested query_types and ensure we don't try
932     * to request more query_types of a given group than we have
933     * counters:
934     */
935    unsigned counters_per_group[screen->num_perfcntr_groups];
936    memset(counters_per_group, 0, sizeof(counters_per_group));
937 
938    for (unsigned i = 0; i < num_queries; i++) {
939       unsigned idx = query_types[i] - FD_QUERY_FIRST_PERFCNTR;
940 
941       /* verify valid query_type, ie. is it actually a perfcntr? */
942       if ((query_types[i] < FD_QUERY_FIRST_PERFCNTR) ||
943           (idx >= screen->num_perfcntr_queries)) {
944          mesa_loge("invalid batch query query_type: %u", query_types[i]);
945          goto error;
946       }
947 
948       struct fd_batch_query_entry *entry = &data->query_entries[i];
949       struct pipe_driver_query_info *pq = &screen->perfcntr_queries[idx];
950 
951       entry->gid = pq->group_id;
952 
953       /* the perfcntr_queries[] table flattens all the countables
954        * for each group in series, ie:
955        *
956        *   (G0,C0), .., (G0,Cn), (G1,C0), .., (G1,Cm), ...
957        *
958        * So to find the countable index just step back through the
959        * table to find the first entry with the same group-id.
960        */
961       while (pq > screen->perfcntr_queries) {
962          pq--;
963          if (pq->group_id == entry->gid)
964             entry->cid++;
965       }
966 
967       if (counters_per_group[entry->gid] >=
968           screen->perfcntr_groups[entry->gid].num_counters) {
969          mesa_loge("too many counters for group %u", entry->gid);
970          goto error;
971       }
972 
973       counters_per_group[entry->gid]++;
974    }
975 
976    q = fd_acc_create_query2(ctx, 0, 0, &perfcntr);
977    aq = fd_acc_query(q);
978 
979    /* sample buffer size is based on # of queries: */
980    aq->size = num_queries * sizeof(struct fd6_query_sample);
981    aq->query_data = data;
982 
983    return (struct pipe_query *)q;
984 
985 error:
986    free(data);
987    return NULL;
988 }
989 
990 template <chip CHIP>
991 void
fd6_query_context_init(struct pipe_context * pctx)992 fd6_query_context_init(struct pipe_context *pctx) disable_thread_safety_analysis
993 {
994    struct fd_context *ctx = fd_context(pctx);
995 
996    ctx->create_query = fd_acc_create_query;
997    ctx->query_update_batch = fd_acc_query_update_batch;
998 
999    ctx->record_timestamp = record_timestamp<CHIP>;
1000    ctx->ts_to_ns = ticks_to_ns;
1001 
1002    pctx->create_batch_query = fd6_create_batch_query;
1003 
1004    fd_acc_query_register_provider(pctx, &occlusion_counter<CHIP>);
1005    fd_acc_query_register_provider(pctx, &occlusion_predicate<CHIP>);
1006    fd_acc_query_register_provider(pctx, &occlusion_predicate_conservative<CHIP>);
1007 
1008    fd_acc_query_register_provider(pctx, &time_elapsed<CHIP>);
1009    fd_acc_query_register_provider(pctx, &timestamp<CHIP>);
1010 
1011    fd_acc_query_register_provider(pctx, &primitives_generated<CHIP>);
1012    fd_acc_query_register_provider(pctx, &pipeline_statistics_single<CHIP>);
1013 
1014    fd_acc_query_register_provider(pctx, &primitives_emitted<CHIP>);
1015    fd_acc_query_register_provider(pctx, &so_overflow_any_predicate<CHIP>);
1016    fd_acc_query_register_provider(pctx, &so_overflow_predicate<CHIP>);
1017 }
1018 FD_GENX(fd6_query_context_init);
1019