1 /*
2 * Copyright © 2017 Rob Clark <[email protected]>
3 * SPDX-License-Identifier: MIT
4 *
5 * Authors:
6 * Rob Clark <[email protected]>
7 */
8
9 /* NOTE: see https://gitlab.freedesktop.org/freedreno/freedreno/-/wikis/A5xx-Queries */
10
11 #include "freedreno_query_acc.h"
12 #include "freedreno_resource.h"
13
14 #include "fd5_context.h"
15 #include "fd5_emit.h"
16 #include "fd5_format.h"
17 #include "fd5_query.h"
18
19 struct PACKED fd5_query_sample {
20 struct fd_acc_query_sample base;
21
22 /* The RB_SAMPLE_COUNT_ADDR destination needs to be 16-byte aligned: */
23 uint64_t pad;
24
25 uint64_t start;
26 uint64_t result;
27 uint64_t stop;
28 };
29 FD_DEFINE_CAST(fd_acc_query_sample, fd5_query_sample);
30
31 /* offset of a single field of an array of fd5_query_sample: */
32 #define query_sample_idx(aq, idx, field) \
33 fd_resource((aq)->prsc)->bo, \
34 (idx * sizeof(struct fd5_query_sample)) + \
35 offsetof(struct fd5_query_sample, field), \
36 0, 0
37
38 /* offset of a single field of fd5_query_sample: */
39 #define query_sample(aq, field) query_sample_idx(aq, 0, field)
40
41 /*
42 * Occlusion Query:
43 *
44 * OCCLUSION_COUNTER and OCCLUSION_PREDICATE differ only in how they
45 * interpret results
46 */
47
48 static void
occlusion_resume(struct fd_acc_query * aq,struct fd_batch * batch)49 occlusion_resume(struct fd_acc_query *aq, struct fd_batch *batch)
50 {
51 struct fd_ringbuffer *ring = batch->draw;
52
53 OUT_PKT4(ring, REG_A5XX_RB_SAMPLE_COUNT_CONTROL, 1);
54 OUT_RING(ring, A5XX_RB_SAMPLE_COUNT_CONTROL_COPY);
55
56 ASSERT_ALIGNED(struct fd5_query_sample, start, 16);
57
58 OUT_PKT4(ring, REG_A5XX_RB_SAMPLE_COUNT_ADDR_LO, 2);
59 OUT_RELOC(ring, query_sample(aq, start));
60
61 fd5_event_write(batch, ring, ZPASS_DONE, false);
62 fd_reset_wfi(batch);
63
64 fd5_context(batch->ctx)->samples_passed_queries++;
65 }
66
67 static void
occlusion_pause(struct fd_acc_query * aq,struct fd_batch * batch)68 occlusion_pause(struct fd_acc_query *aq, struct fd_batch *batch)
69 {
70 struct fd_ringbuffer *ring = batch->draw;
71
72 OUT_PKT7(ring, CP_MEM_WRITE, 4);
73 OUT_RELOC(ring, query_sample(aq, stop));
74 OUT_RING(ring, 0xffffffff);
75 OUT_RING(ring, 0xffffffff);
76
77 OUT_PKT7(ring, CP_WAIT_MEM_WRITES, 0);
78
79 OUT_PKT4(ring, REG_A5XX_RB_SAMPLE_COUNT_CONTROL, 1);
80 OUT_RING(ring, A5XX_RB_SAMPLE_COUNT_CONTROL_COPY);
81
82 ASSERT_ALIGNED(struct fd5_query_sample, stop, 16);
83
84 OUT_PKT4(ring, REG_A5XX_RB_SAMPLE_COUNT_ADDR_LO, 2);
85 OUT_RELOC(ring, query_sample(aq, stop));
86
87 fd5_event_write(batch, ring, ZPASS_DONE, false);
88 fd_reset_wfi(batch);
89
90 OUT_PKT7(ring, CP_WAIT_REG_MEM, 6);
91 OUT_RING(ring, 0x00000014); // XXX
92 OUT_RELOC(ring, query_sample(aq, stop));
93 OUT_RING(ring, 0xffffffff);
94 OUT_RING(ring, 0xffffffff);
95 OUT_RING(ring, 0x00000010); // XXX
96
97 /* result += stop - start: */
98 OUT_PKT7(ring, CP_MEM_TO_MEM, 9);
99 OUT_RING(ring, CP_MEM_TO_MEM_0_DOUBLE | CP_MEM_TO_MEM_0_NEG_C);
100 OUT_RELOC(ring, query_sample(aq, result)); /* dst */
101 OUT_RELOC(ring, query_sample(aq, result)); /* srcA */
102 OUT_RELOC(ring, query_sample(aq, stop)); /* srcB */
103 OUT_RELOC(ring, query_sample(aq, start)); /* srcC */
104
105 fd5_context(batch->ctx)->samples_passed_queries--;
106 }
107
108 static void
occlusion_counter_result(struct fd_acc_query * aq,struct fd_acc_query_sample * s,union pipe_query_result * result)109 occlusion_counter_result(struct fd_acc_query *aq,
110 struct fd_acc_query_sample *s,
111 union pipe_query_result *result)
112 {
113 struct fd5_query_sample *sp = fd5_query_sample(s);
114 result->u64 = sp->result;
115 }
116
117 static void
occlusion_predicate_result(struct fd_acc_query * aq,struct fd_acc_query_sample * s,union pipe_query_result * result)118 occlusion_predicate_result(struct fd_acc_query *aq,
119 struct fd_acc_query_sample *s,
120 union pipe_query_result *result)
121 {
122 struct fd5_query_sample *sp = fd5_query_sample(s);
123 result->b = !!sp->result;
124 }
125
126 static const struct fd_acc_sample_provider occlusion_counter = {
127 .query_type = PIPE_QUERY_OCCLUSION_COUNTER,
128 .size = sizeof(struct fd5_query_sample),
129 .resume = occlusion_resume,
130 .pause = occlusion_pause,
131 .result = occlusion_counter_result,
132 };
133
134 static const struct fd_acc_sample_provider occlusion_predicate = {
135 .query_type = PIPE_QUERY_OCCLUSION_PREDICATE,
136 .size = sizeof(struct fd5_query_sample),
137 .resume = occlusion_resume,
138 .pause = occlusion_pause,
139 .result = occlusion_predicate_result,
140 };
141
142 static const struct fd_acc_sample_provider occlusion_predicate_conservative = {
143 .query_type = PIPE_QUERY_OCCLUSION_PREDICATE_CONSERVATIVE,
144 .size = sizeof(struct fd5_query_sample),
145 .resume = occlusion_resume,
146 .pause = occlusion_pause,
147 .result = occlusion_predicate_result,
148 };
149
150 /*
151 * Timestamp Queries:
152 */
153
154 static void
timestamp_resume(struct fd_acc_query * aq,struct fd_batch * batch)155 timestamp_resume(struct fd_acc_query *aq, struct fd_batch *batch) assert_dt
156 {
157 struct fd_ringbuffer *ring = batch->draw;
158
159 OUT_PKT7(ring, CP_EVENT_WRITE, 4);
160 OUT_RING(ring,
161 CP_EVENT_WRITE_0_EVENT(RB_DONE_TS) | CP_EVENT_WRITE_0_TIMESTAMP);
162 OUT_RELOC(ring, query_sample(aq, start));
163 OUT_RING(ring, 0x00000000);
164
165 fd_reset_wfi(batch);
166 }
167
168 static void
timestamp_pause(struct fd_acc_query * aq,struct fd_batch * batch)169 timestamp_pause(struct fd_acc_query *aq, struct fd_batch *batch) assert_dt
170 {
171 struct fd_ringbuffer *ring = batch->draw;
172
173 OUT_PKT7(ring, CP_EVENT_WRITE, 4);
174 OUT_RING(ring,
175 CP_EVENT_WRITE_0_EVENT(RB_DONE_TS) | CP_EVENT_WRITE_0_TIMESTAMP);
176 OUT_RELOC(ring, query_sample(aq, stop));
177 OUT_RING(ring, 0x00000000);
178
179 fd_reset_wfi(batch);
180 fd_wfi(batch, ring);
181
182 /* result += stop - start: */
183 OUT_PKT7(ring, CP_MEM_TO_MEM, 9);
184 OUT_RING(ring, CP_MEM_TO_MEM_0_DOUBLE | CP_MEM_TO_MEM_0_NEG_C);
185 OUT_RELOC(ring, query_sample(aq, result)); /* dst */
186 OUT_RELOC(ring, query_sample(aq, result)); /* srcA */
187 OUT_RELOC(ring, query_sample(aq, stop)); /* srcB */
188 OUT_RELOC(ring, query_sample(aq, start)); /* srcC */
189 }
190
191 static void
time_elapsed_accumulate_result(struct fd_acc_query * aq,struct fd_acc_query_sample * s,union pipe_query_result * result)192 time_elapsed_accumulate_result(struct fd_acc_query *aq,
193 struct fd_acc_query_sample *s,
194 union pipe_query_result *result)
195 {
196 struct fd5_query_sample *sp = fd5_query_sample(s);
197 result->u64 = ticks_to_ns(sp->result);
198 }
199
200 static void
timestamp_accumulate_result(struct fd_acc_query * aq,struct fd_acc_query_sample * s,union pipe_query_result * result)201 timestamp_accumulate_result(struct fd_acc_query *aq,
202 struct fd_acc_query_sample *s,
203 union pipe_query_result *result)
204 {
205 struct fd5_query_sample *sp = fd5_query_sample(s);
206 result->u64 = ticks_to_ns(sp->result);
207 }
208
209 static const struct fd_acc_sample_provider time_elapsed = {
210 .query_type = PIPE_QUERY_TIME_ELAPSED,
211 .always = true,
212 .size = sizeof(struct fd5_query_sample),
213 .resume = timestamp_resume,
214 .pause = timestamp_pause,
215 .result = time_elapsed_accumulate_result,
216 };
217
218 /* NOTE: timestamp query isn't going to give terribly sensible results
219 * on a tiler. But it is needed by qapitrace profile heatmap. If you
220 * add in a binning pass, the results get even more non-sensical. So
221 * we just return the timestamp on the first tile and hope that is
222 * kind of good enough.
223 */
224
225 static const struct fd_acc_sample_provider timestamp = {
226 .query_type = PIPE_QUERY_TIMESTAMP,
227 .always = true,
228 .size = sizeof(struct fd5_query_sample),
229 .resume = timestamp_resume,
230 .pause = timestamp_pause,
231 .result = timestamp_accumulate_result,
232 };
233
234 /*
235 * Performance Counter (batch) queries:
236 *
237 * Only one of these is active at a time, per design of the gallium
238 * batch_query API design. On perfcntr query tracks N query_types,
239 * each of which has a 'fd_batch_query_entry' that maps it back to
240 * the associated group and counter.
241 */
242
243 struct fd_batch_query_entry {
244 uint8_t gid; /* group-id */
245 uint8_t cid; /* countable-id within the group */
246 };
247
248 struct fd_batch_query_data {
249 struct fd_screen *screen;
250 unsigned num_query_entries;
251 struct fd_batch_query_entry query_entries[];
252 };
253
254 static void
perfcntr_resume(struct fd_acc_query * aq,struct fd_batch * batch)255 perfcntr_resume(struct fd_acc_query *aq, struct fd_batch *batch) assert_dt
256 {
257 struct fd_batch_query_data *data = aq->query_data;
258 struct fd_screen *screen = data->screen;
259 struct fd_ringbuffer *ring = batch->draw;
260
261 unsigned counters_per_group[screen->num_perfcntr_groups];
262 memset(counters_per_group, 0, sizeof(counters_per_group));
263
264 fd_wfi(batch, ring);
265
266 /* configure performance counters for the requested queries: */
267 for (unsigned i = 0; i < data->num_query_entries; i++) {
268 struct fd_batch_query_entry *entry = &data->query_entries[i];
269 const struct fd_perfcntr_group *g = &screen->perfcntr_groups[entry->gid];
270 unsigned counter_idx = counters_per_group[entry->gid]++;
271
272 assert(counter_idx < g->num_counters);
273
274 OUT_PKT4(ring, g->counters[counter_idx].select_reg, 1);
275 OUT_RING(ring, g->countables[entry->cid].selector);
276 }
277
278 memset(counters_per_group, 0, sizeof(counters_per_group));
279
280 /* and snapshot the start values */
281 for (unsigned i = 0; i < data->num_query_entries; i++) {
282 struct fd_batch_query_entry *entry = &data->query_entries[i];
283 const struct fd_perfcntr_group *g = &screen->perfcntr_groups[entry->gid];
284 unsigned counter_idx = counters_per_group[entry->gid]++;
285 const struct fd_perfcntr_counter *counter = &g->counters[counter_idx];
286
287 OUT_PKT7(ring, CP_REG_TO_MEM, 3);
288 OUT_RING(ring, CP_REG_TO_MEM_0_64B |
289 CP_REG_TO_MEM_0_REG(counter->counter_reg_lo));
290 OUT_RELOC(ring, query_sample_idx(aq, i, start));
291 }
292 }
293
294 static void
perfcntr_pause(struct fd_acc_query * aq,struct fd_batch * batch)295 perfcntr_pause(struct fd_acc_query *aq, struct fd_batch *batch) assert_dt
296 {
297 struct fd_batch_query_data *data = aq->query_data;
298 struct fd_screen *screen = data->screen;
299 struct fd_ringbuffer *ring = batch->draw;
300
301 unsigned counters_per_group[screen->num_perfcntr_groups];
302 memset(counters_per_group, 0, sizeof(counters_per_group));
303
304 fd_wfi(batch, ring);
305
306 /* TODO do we need to bother to turn anything off? */
307
308 /* snapshot the end values: */
309 for (unsigned i = 0; i < data->num_query_entries; i++) {
310 struct fd_batch_query_entry *entry = &data->query_entries[i];
311 const struct fd_perfcntr_group *g = &screen->perfcntr_groups[entry->gid];
312 unsigned counter_idx = counters_per_group[entry->gid]++;
313 const struct fd_perfcntr_counter *counter = &g->counters[counter_idx];
314
315 OUT_PKT7(ring, CP_REG_TO_MEM, 3);
316 OUT_RING(ring, CP_REG_TO_MEM_0_64B |
317 CP_REG_TO_MEM_0_REG(counter->counter_reg_lo));
318 OUT_RELOC(ring, query_sample_idx(aq, i, stop));
319 }
320
321 /* and compute the result: */
322 for (unsigned i = 0; i < data->num_query_entries; i++) {
323 /* result += stop - start: */
324 OUT_PKT7(ring, CP_MEM_TO_MEM, 9);
325 OUT_RING(ring, CP_MEM_TO_MEM_0_DOUBLE | CP_MEM_TO_MEM_0_NEG_C);
326 OUT_RELOC(ring, query_sample_idx(aq, i, result)); /* dst */
327 OUT_RELOC(ring, query_sample_idx(aq, i, result)); /* srcA */
328 OUT_RELOC(ring, query_sample_idx(aq, i, stop)); /* srcB */
329 OUT_RELOC(ring, query_sample_idx(aq, i, start)); /* srcC */
330 }
331 }
332
333 static void
perfcntr_accumulate_result(struct fd_acc_query * aq,struct fd_acc_query_sample * s,union pipe_query_result * result)334 perfcntr_accumulate_result(struct fd_acc_query *aq,
335 struct fd_acc_query_sample *s,
336 union pipe_query_result *result)
337 {
338 struct fd_batch_query_data *data = aq->query_data;
339 struct fd5_query_sample *sp = fd5_query_sample(s);
340
341 for (unsigned i = 0; i < data->num_query_entries; i++) {
342 result->batch[i].u64 = sp[i].result;
343 }
344 }
345
346 static const struct fd_acc_sample_provider perfcntr = {
347 .query_type = FD_QUERY_FIRST_PERFCNTR,
348 .always = true,
349 .resume = perfcntr_resume,
350 .pause = perfcntr_pause,
351 .result = perfcntr_accumulate_result,
352 };
353
354 static struct pipe_query *
fd5_create_batch_query(struct pipe_context * pctx,unsigned num_queries,unsigned * query_types)355 fd5_create_batch_query(struct pipe_context *pctx, unsigned num_queries,
356 unsigned *query_types)
357 {
358 struct fd_context *ctx = fd_context(pctx);
359 struct fd_screen *screen = ctx->screen;
360 struct fd_query *q;
361 struct fd_acc_query *aq;
362 struct fd_batch_query_data *data;
363
364 data = CALLOC_VARIANT_LENGTH_STRUCT(
365 fd_batch_query_data, num_queries * sizeof(data->query_entries[0]));
366
367 data->screen = screen;
368 data->num_query_entries = num_queries;
369
370 /* validate the requested query_types and ensure we don't try
371 * to request more query_types of a given group than we have
372 * counters:
373 */
374 unsigned counters_per_group[screen->num_perfcntr_groups];
375 memset(counters_per_group, 0, sizeof(counters_per_group));
376
377 for (unsigned i = 0; i < num_queries; i++) {
378 unsigned idx = query_types[i] - FD_QUERY_FIRST_PERFCNTR;
379
380 /* verify valid query_type, ie. is it actually a perfcntr? */
381 if ((query_types[i] < FD_QUERY_FIRST_PERFCNTR) ||
382 (idx >= screen->num_perfcntr_queries)) {
383 mesa_loge("invalid batch query query_type: %u", query_types[i]);
384 goto error;
385 }
386
387 struct fd_batch_query_entry *entry = &data->query_entries[i];
388 struct pipe_driver_query_info *pq = &screen->perfcntr_queries[idx];
389
390 entry->gid = pq->group_id;
391
392 /* the perfcntr_queries[] table flattens all the countables
393 * for each group in series, ie:
394 *
395 * (G0,C0), .., (G0,Cn), (G1,C0), .., (G1,Cm), ...
396 *
397 * So to find the countable index just step back through the
398 * table to find the first entry with the same group-id.
399 */
400 while (pq > screen->perfcntr_queries) {
401 pq--;
402 if (pq->group_id == entry->gid)
403 entry->cid++;
404 }
405
406 if (counters_per_group[entry->gid] >=
407 screen->perfcntr_groups[entry->gid].num_counters) {
408 mesa_loge("too many counters for group %u\n", entry->gid);
409 goto error;
410 }
411
412 counters_per_group[entry->gid]++;
413 }
414
415 q = fd_acc_create_query2(ctx, 0, 0, &perfcntr);
416 aq = fd_acc_query(q);
417
418 /* sample buffer size is based on # of queries: */
419 aq->size = num_queries * sizeof(struct fd5_query_sample);
420 aq->query_data = data;
421
422 return (struct pipe_query *)q;
423
424 error:
425 free(data);
426 return NULL;
427 }
428
429 void
fd5_query_context_init(struct pipe_context * pctx)430 fd5_query_context_init(struct pipe_context *pctx) disable_thread_safety_analysis
431 {
432 struct fd_context *ctx = fd_context(pctx);
433
434 ctx->create_query = fd_acc_create_query;
435 ctx->query_update_batch = fd_acc_query_update_batch;
436
437 pctx->create_batch_query = fd5_create_batch_query;
438
439 fd_acc_query_register_provider(pctx, &occlusion_counter);
440 fd_acc_query_register_provider(pctx, &occlusion_predicate);
441 fd_acc_query_register_provider(pctx, &occlusion_predicate_conservative);
442
443 fd_acc_query_register_provider(pctx, &time_elapsed);
444 fd_acc_query_register_provider(pctx, ×tamp);
445 }
446