xref: /aosp_15_r20/external/mesa3d/src/amd/common/ac_perfcounter.c (revision 6104692788411f58d303aa86923a9ff6ecaded22)
1 /*
2  * Copyright 2015 Advanced Micro Devices, Inc.
3  *
4  * SPDX-License-Identifier: MIT
5  */
6 
7 #include "ac_gpu_info.h"
8 #include "ac_perfcounter.h"
9 #include "ac_spm.h"
10 
11 #include "util/u_memory.h"
12 #include "util/macros.h"
13 
14 /* cik_CB */
15 static unsigned cik_CB_select0[] = {
16    R_037004_CB_PERFCOUNTER0_SELECT,
17    R_03700C_CB_PERFCOUNTER1_SELECT,
18    R_037010_CB_PERFCOUNTER2_SELECT,
19    R_037014_CB_PERFCOUNTER3_SELECT,
20 };
21 static unsigned cik_CB_select1[] = {
22    R_037008_CB_PERFCOUNTER0_SELECT1,
23 };
24 static struct ac_pc_block_base cik_CB = {
25    .gpu_block = CB,
26    .name = "CB",
27    .num_counters = 4,
28    .flags = AC_PC_BLOCK_SE | AC_PC_BLOCK_INSTANCE_GROUPS,
29 
30    .select0 = cik_CB_select0,
31    .select1 = cik_CB_select1,
32    .counter0_lo = R_035018_CB_PERFCOUNTER0_LO,
33 
34    .num_spm_counters = 1,
35    .num_spm_wires = 2,
36    .spm_block_select = AC_SPM_SE_BLOCK_CB,
37 };
38 
39 /* cik_CPC */
40 static unsigned cik_CPC_select0[] = {
41    R_036024_CPC_PERFCOUNTER0_SELECT,
42    R_03600C_CPC_PERFCOUNTER1_SELECT,
43 };
44 static unsigned cik_CPC_select1[] = {
45    R_036010_CPC_PERFCOUNTER0_SELECT1,
46 };
47 static unsigned cik_CPC_counters[] = {
48    R_034018_CPC_PERFCOUNTER0_LO,
49    R_034010_CPC_PERFCOUNTER1_LO,
50 };
51 static struct ac_pc_block_base cik_CPC = {
52    .gpu_block = CPC,
53    .name = "CPC",
54    .num_counters = 2,
55 
56    .select0 = cik_CPC_select0,
57    .select1 = cik_CPC_select1,
58    .counters = cik_CPC_counters,
59 
60    .num_spm_counters = 1,
61    .num_spm_wires = 2,
62    .spm_block_select = AC_SPM_GLOBAL_BLOCK_CPC,
63 };
64 
65 /* cik_CPF */
66 static unsigned cik_CPF_select0[] = {
67    R_03601C_CPF_PERFCOUNTER0_SELECT,
68    R_036014_CPF_PERFCOUNTER1_SELECT,
69 };
70 static unsigned cik_CPF_select1[] = {
71    R_036018_CPF_PERFCOUNTER0_SELECT1,
72 };
73 static unsigned cik_CPF_counters[] = {
74    R_034028_CPF_PERFCOUNTER0_LO,
75    R_034020_CPF_PERFCOUNTER1_LO,
76 };
77 static struct ac_pc_block_base cik_CPF = {
78    .gpu_block = CPF,
79    .name = "CPF",
80    .num_counters = 2,
81 
82    .select0 = cik_CPF_select0,
83    .select1 = cik_CPF_select1,
84    .counters = cik_CPF_counters,
85 
86    .num_spm_counters = 1,
87    .num_spm_wires = 2,
88    .spm_block_select = AC_SPM_GLOBAL_BLOCK_CPF,
89 };
90 
91 /* cik_CPG */
92 static unsigned cik_CPG_select0[] = {
93    R_036008_CPG_PERFCOUNTER0_SELECT,
94    R_036000_CPG_PERFCOUNTER1_SELECT,
95 };
96 static unsigned cik_CPG_select1[] = {
97    R_036004_CPG_PERFCOUNTER0_SELECT1
98 };
99 static unsigned cik_CPG_counters[] = {
100    R_034008_CPG_PERFCOUNTER0_LO,
101    R_034000_CPG_PERFCOUNTER1_LO,
102 };
103 static struct ac_pc_block_base cik_CPG = {
104    .gpu_block = CPG,
105    .name = "CPG",
106    .num_counters = 2,
107 
108    .select0 = cik_CPG_select0,
109    .select1 = cik_CPG_select1,
110    .counters = cik_CPG_counters,
111 
112    .num_spm_counters = 1,
113    .num_spm_wires = 2,
114    .spm_block_select = AC_SPM_GLOBAL_BLOCK_CPG,
115 };
116 
117 /* cik_DB */
118 static unsigned cik_DB_select0[] = {
119    R_037100_DB_PERFCOUNTER0_SELECT,
120    R_037108_DB_PERFCOUNTER1_SELECT,
121    R_037110_DB_PERFCOUNTER2_SELECT,
122    R_037118_DB_PERFCOUNTER3_SELECT,
123 };
124 static unsigned cik_DB_select1[] = {
125    R_037104_DB_PERFCOUNTER0_SELECT1,
126    R_03710C_DB_PERFCOUNTER1_SELECT1,
127 };
128 static struct ac_pc_block_base cik_DB = {
129    .gpu_block = DB,
130    .name = "DB",
131    .num_counters = 4,
132    .flags = AC_PC_BLOCK_SE | AC_PC_BLOCK_INSTANCE_GROUPS,
133 
134    .select0 = cik_DB_select0,
135    .select1 = cik_DB_select1,
136    .counter0_lo = R_035100_DB_PERFCOUNTER0_LO,
137 
138    .num_spm_counters = 2,
139    .num_spm_wires = 3,
140 };
141 
142 /* cik_GDS */
143 static unsigned cik_GDS_select0[] = {
144    R_036A00_GDS_PERFCOUNTER0_SELECT,
145    R_036A04_GDS_PERFCOUNTER1_SELECT,
146    R_036A08_GDS_PERFCOUNTER2_SELECT,
147    R_036A0C_GDS_PERFCOUNTER3_SELECT,
148 };
149 static unsigned cik_GDS_select1[] = {
150    R_036A10_GDS_PERFCOUNTER0_SELECT1,
151 };
152 static struct ac_pc_block_base cik_GDS = {
153    .gpu_block = GDS,
154    .name = "GDS",
155    .num_counters = 4,
156 
157    .select0 = cik_GDS_select0,
158    .select1 = cik_GDS_select1,
159    .counter0_lo = R_034A00_GDS_PERFCOUNTER0_LO,
160 
161    .num_spm_counters = 1,
162    .num_spm_wires = 2,
163    .spm_block_select = AC_SPM_GLOBAL_BLOCK_GDS,
164 };
165 
166 /* cik_GRBM */
167 static unsigned cik_GRBM_select0[] = {
168    R_036100_GRBM_PERFCOUNTER0_SELECT,
169    R_036104_GRBM_PERFCOUNTER1_SELECT,
170 };
171 static unsigned cik_GRBM_counters[] = {
172    R_034100_GRBM_PERFCOUNTER0_LO,
173    R_03410C_GRBM_PERFCOUNTER1_LO,
174 };
175 static struct ac_pc_block_base cik_GRBM = {
176    .gpu_block = GRBM,
177    .name = "GRBM",
178    .num_counters = 2,
179 
180    .select0 = cik_GRBM_select0,
181    .counters = cik_GRBM_counters,
182 };
183 
184 /* cik_GRBMSE */
185 static unsigned cik_GRBMSE_select0[] = {
186    R_036108_GRBM_SE0_PERFCOUNTER_SELECT,
187    R_03610C_GRBM_SE1_PERFCOUNTER_SELECT,
188    R_036110_GRBM_SE2_PERFCOUNTER_SELECT,
189    R_036114_GRBM_SE3_PERFCOUNTER_SELECT,
190 };
191 static struct ac_pc_block_base cik_GRBMSE = {
192    .gpu_block = GRBMSE,
193    .name = "GRBMSE",
194    .num_counters = 4,
195 
196    .select0 = cik_GRBMSE_select0,
197    .counter0_lo = R_034114_GRBM_SE0_PERFCOUNTER_LO,
198 };
199 
200 /* cik_IA */
201 static unsigned cik_IA_select0[] = {
202    R_036210_IA_PERFCOUNTER0_SELECT,
203    R_036214_IA_PERFCOUNTER1_SELECT,
204    R_036218_IA_PERFCOUNTER2_SELECT,
205    R_03621C_IA_PERFCOUNTER3_SELECT,
206 };
207 static unsigned cik_IA_select1[] = {
208    R_036220_IA_PERFCOUNTER0_SELECT1,
209 };
210 static struct ac_pc_block_base cik_IA = {
211    .gpu_block = IA,
212    .name = "IA",
213    .num_counters = 4,
214 
215    .select0 = cik_IA_select0,
216    .select1 = cik_IA_select1,
217    .counter0_lo = R_034220_IA_PERFCOUNTER0_LO,
218 
219    .num_spm_counters = 1,
220    .num_spm_wires = 2,
221 };
222 
223 /* cik_PA_SC */
224 static unsigned cik_PA_SC_select0[] = {
225    R_036500_PA_SC_PERFCOUNTER0_SELECT,
226    R_036508_PA_SC_PERFCOUNTER1_SELECT,
227    R_03650C_PA_SC_PERFCOUNTER2_SELECT,
228    R_036510_PA_SC_PERFCOUNTER3_SELECT,
229    R_036514_PA_SC_PERFCOUNTER4_SELECT,
230    R_036518_PA_SC_PERFCOUNTER5_SELECT,
231    R_03651C_PA_SC_PERFCOUNTER6_SELECT,
232    R_036520_PA_SC_PERFCOUNTER7_SELECT,
233 };
234 static unsigned cik_PA_SC_select1[] = {
235    R_036504_PA_SC_PERFCOUNTER0_SELECT1,
236 };
237 static struct ac_pc_block_base cik_PA_SC = {
238    .gpu_block = PA_SC,
239    .name = "PA_SC",
240    .num_counters = 8,
241    .flags = AC_PC_BLOCK_SE,
242 
243    .select0 = cik_PA_SC_select0,
244    .select1 = cik_PA_SC_select1,
245    .counter0_lo = R_034500_PA_SC_PERFCOUNTER0_LO,
246 
247    .num_spm_counters = 1,
248    .num_spm_wires = 2,
249    .spm_block_select = AC_SPM_SE_BLOCK_SC,
250 };
251 
252 /* cik_PA_SU */
253 static unsigned cik_PA_SU_select0[] = {
254    R_036400_PA_SU_PERFCOUNTER0_SELECT,
255    R_036408_PA_SU_PERFCOUNTER1_SELECT,
256    R_036410_PA_SU_PERFCOUNTER2_SELECT,
257    R_036414_PA_SU_PERFCOUNTER3_SELECT,
258 };
259 static unsigned cik_PA_SU_select1[] = {
260    R_036404_PA_SU_PERFCOUNTER0_SELECT1,
261    R_03640C_PA_SU_PERFCOUNTER1_SELECT1,
262 };
263 /* According to docs, PA_SU counters are only 48 bits wide. */
264 static struct ac_pc_block_base cik_PA_SU = {
265    .gpu_block = PA_SU,
266    .name = "PA_SU",
267    .num_counters = 4,
268    .flags = AC_PC_BLOCK_SE,
269 
270    .select0 = cik_PA_SU_select0,
271    .select1 = cik_PA_SU_select1,
272    .counter0_lo = R_034400_PA_SU_PERFCOUNTER0_LO,
273 
274    .num_spm_counters = 2,
275    .num_spm_wires = 3,
276 };
277 
278 /* cik_SPI */
279 static unsigned cik_SPI_select0[] = {
280    R_036600_SPI_PERFCOUNTER0_SELECT,
281    R_036604_SPI_PERFCOUNTER1_SELECT,
282    R_036608_SPI_PERFCOUNTER2_SELECT,
283    R_03660C_SPI_PERFCOUNTER3_SELECT,
284    R_036620_SPI_PERFCOUNTER4_SELECT,
285    R_036624_SPI_PERFCOUNTER5_SELECT,
286 };
287 static unsigned cik_SPI_select1[] = {
288    R_036610_SPI_PERFCOUNTER0_SELECT1,
289    R_036614_SPI_PERFCOUNTER1_SELECT1,
290    R_036618_SPI_PERFCOUNTER2_SELECT1,
291    R_03661C_SPI_PERFCOUNTER3_SELECT1
292 };
293 static struct ac_pc_block_base cik_SPI = {
294    .gpu_block = SPI,
295    .name = "SPI",
296    .num_counters = 6,
297    .flags = AC_PC_BLOCK_SE,
298 
299    .select0 = cik_SPI_select0,
300    .select1 = cik_SPI_select1,
301    .counter0_lo = R_034604_SPI_PERFCOUNTER0_LO,
302 
303    .num_spm_counters = 4,
304    .num_spm_wires = 8,
305    .spm_block_select = AC_SPM_SE_BLOCK_SPI,
306 };
307 
308 /* cik_SQ */
309 static unsigned cik_SQ_select0[] = {
310    R_036700_SQ_PERFCOUNTER0_SELECT,
311    R_036704_SQ_PERFCOUNTER1_SELECT,
312    R_036708_SQ_PERFCOUNTER2_SELECT,
313    R_03670C_SQ_PERFCOUNTER3_SELECT,
314    R_036710_SQ_PERFCOUNTER4_SELECT,
315    R_036714_SQ_PERFCOUNTER5_SELECT,
316    R_036718_SQ_PERFCOUNTER6_SELECT,
317    R_03671C_SQ_PERFCOUNTER7_SELECT,
318    R_036720_SQ_PERFCOUNTER8_SELECT,
319    R_036724_SQ_PERFCOUNTER9_SELECT,
320    R_036728_SQ_PERFCOUNTER10_SELECT,
321    R_03672C_SQ_PERFCOUNTER11_SELECT,
322    R_036730_SQ_PERFCOUNTER12_SELECT,
323    R_036734_SQ_PERFCOUNTER13_SELECT,
324    R_036738_SQ_PERFCOUNTER14_SELECT,
325    R_03673C_SQ_PERFCOUNTER15_SELECT,
326 };
327 static struct ac_pc_block_base cik_SQ = {
328    .gpu_block = SQ,
329    .name = "SQ",
330    .num_counters = 16,
331    .flags = AC_PC_BLOCK_SE | AC_PC_BLOCK_SHADER,
332 
333    .select0 = cik_SQ_select0,
334    .select_or = S_036700_SQC_BANK_MASK(15) | S_036700_SQC_CLIENT_MASK(15) | S_036700_SIMD_MASK(15),
335    .counter0_lo = R_034700_SQ_PERFCOUNTER0_LO,
336 
337    .num_spm_wires = 8,
338 };
339 
340 /* cik_SX */
341 static unsigned cik_SX_select0[] = {
342    R_036900_SX_PERFCOUNTER0_SELECT,
343    R_036904_SX_PERFCOUNTER1_SELECT,
344    R_036908_SX_PERFCOUNTER2_SELECT,
345    R_03690C_SX_PERFCOUNTER3_SELECT,
346 };
347 static unsigned cik_SX_select1[] = {
348    R_036910_SX_PERFCOUNTER0_SELECT1,
349    R_036914_SX_PERFCOUNTER1_SELECT1,
350 };
351 static struct ac_pc_block_base cik_SX = {
352    .gpu_block = SX,
353    .name = "SX",
354    .num_counters = 4,
355    .flags = AC_PC_BLOCK_SE,
356 
357    .select0 = cik_SX_select0,
358    .select1 = cik_SX_select1,
359    .counter0_lo = R_034900_SX_PERFCOUNTER0_LO,
360 
361    .num_spm_counters = 2,
362    .num_spm_wires = 4,
363    .spm_block_select = AC_SPM_SE_BLOCK_SX,
364 };
365 
366 /* cik_TA */
367 static unsigned cik_TA_select0[] = {
368    R_036B00_TA_PERFCOUNTER0_SELECT,
369    R_036B08_TA_PERFCOUNTER1_SELECT,
370 };
371 static unsigned cik_TA_select1[] = {
372    R_036B04_TA_PERFCOUNTER0_SELECT1,
373 };
374 static struct ac_pc_block_base cik_TA = {
375    .gpu_block = TA,
376    .name = "TA",
377    .num_counters = 2,
378    .flags = AC_PC_BLOCK_SE | AC_PC_BLOCK_INSTANCE_GROUPS | AC_PC_BLOCK_SHADER_WINDOWED,
379 
380    .select0 = cik_TA_select0,
381    .select1 = cik_TA_select1,
382    .counter0_lo = R_034B00_TA_PERFCOUNTER0_LO,
383 
384    .num_spm_counters = 1,
385    .num_spm_wires = 2,
386    .spm_block_select = AC_SPM_SE_BLOCK_TA,
387 };
388 
389 /* cik_TD */
390 static unsigned cik_TD_select0[] = {
391    R_036C00_TD_PERFCOUNTER0_SELECT,
392    R_036C08_TD_PERFCOUNTER1_SELECT,
393 };
394 static unsigned cik_TD_select1[] = {
395    R_036C04_TD_PERFCOUNTER0_SELECT1,
396 };
397 static struct ac_pc_block_base cik_TD = {
398    .gpu_block = TD,
399    .name = "TD",
400    .num_counters = 2,
401    .flags = AC_PC_BLOCK_SE | AC_PC_BLOCK_INSTANCE_GROUPS | AC_PC_BLOCK_SHADER_WINDOWED,
402 
403    .select0 = cik_TD_select0,
404    .select1 = cik_TD_select1,
405    .counter0_lo = R_034C00_TD_PERFCOUNTER0_LO,
406 
407    .num_spm_counters = 1,
408    .num_spm_wires = 2,
409    .spm_block_select = AC_SPM_SE_BLOCK_TD,
410 };
411 
412 /* cik_TCA */
413 static unsigned cik_TCA_select0[] = {
414    R_036E40_TCA_PERFCOUNTER0_SELECT,
415    R_036E48_TCA_PERFCOUNTER1_SELECT,
416    R_036E50_TCA_PERFCOUNTER2_SELECT,
417    R_036E54_TCA_PERFCOUNTER3_SELECT,
418 };
419 static unsigned cik_TCA_select1[] = {
420    R_036E44_TCA_PERFCOUNTER0_SELECT1,
421    R_036E4C_TCA_PERFCOUNTER1_SELECT1,
422 };
423 static struct ac_pc_block_base cik_TCA = {
424    .gpu_block = TCA,
425    .name = "TCA",
426    .num_counters = 4,
427    .flags = AC_PC_BLOCK_INSTANCE_GROUPS,
428 
429    .select0 = cik_TCA_select0,
430    .select1 = cik_TCA_select1,
431    .counter0_lo = R_034E40_TCA_PERFCOUNTER0_LO,
432 
433    .num_spm_counters = 2,
434    .num_spm_wires = 4,
435 };
436 
437 /* cik_TCC */
438 static unsigned cik_TCC_select0[] = {
439    R_036E00_TCC_PERFCOUNTER0_SELECT,
440    R_036E08_TCC_PERFCOUNTER1_SELECT,
441    R_036E10_TCC_PERFCOUNTER2_SELECT,
442    R_036E14_TCC_PERFCOUNTER3_SELECT,
443 };
444 static unsigned cik_TCC_select1[] = {
445    R_036E04_TCC_PERFCOUNTER0_SELECT1,
446    R_036E0C_TCC_PERFCOUNTER1_SELECT1,
447 };
448 static struct ac_pc_block_base cik_TCC = {
449    .gpu_block = TCC,
450    .name = "TCC",
451    .num_counters = 4,
452    .flags = AC_PC_BLOCK_INSTANCE_GROUPS,
453 
454    .select0 = cik_TCC_select0,
455    .select1 = cik_TCC_select1,
456    .counter0_lo = R_034E00_TCC_PERFCOUNTER0_LO,
457 
458    .num_spm_counters = 2,
459    .num_spm_wires = 4,
460 };
461 
462 /* cik_TCP */
463 static unsigned cik_TCP_select0[] = {
464    R_036D00_TCP_PERFCOUNTER0_SELECT,
465    R_036D08_TCP_PERFCOUNTER1_SELECT,
466    R_036D10_TCP_PERFCOUNTER2_SELECT,
467    R_036D14_TCP_PERFCOUNTER3_SELECT,
468 };
469 static unsigned cik_TCP_select1[] = {
470    R_036D04_TCP_PERFCOUNTER0_SELECT1,
471    R_036D0C_TCP_PERFCOUNTER1_SELECT1,
472 };
473 static struct ac_pc_block_base cik_TCP = {
474    .gpu_block = TCP,
475    .name = "TCP",
476    .num_counters = 4,
477    .flags = AC_PC_BLOCK_SE | AC_PC_BLOCK_INSTANCE_GROUPS | AC_PC_BLOCK_SHADER_WINDOWED,
478 
479    .select0 = cik_TCP_select0,
480    .select1 = cik_TCP_select1,
481    .counter0_lo = R_034D00_TCP_PERFCOUNTER0_LO,
482 
483    .num_spm_counters = 2,
484    .num_spm_wires = 3,
485 };
486 
487 /* cik_VGT */
488 static unsigned cik_VGT_select0[] = {
489    R_036230_VGT_PERFCOUNTER0_SELECT,
490    R_036234_VGT_PERFCOUNTER1_SELECT,
491    R_036238_VGT_PERFCOUNTER2_SELECT,
492    R_03623C_VGT_PERFCOUNTER3_SELECT,
493 };
494 static unsigned cik_VGT_select1[] = {
495    R_036240_VGT_PERFCOUNTER0_SELECT1,
496    R_036244_VGT_PERFCOUNTER1_SELECT1,
497 };
498 static struct ac_pc_block_base cik_VGT = {
499    .gpu_block = VGT,
500    .name = "VGT",
501    .num_counters = 4,
502    .flags = AC_PC_BLOCK_SE,
503 
504    .select0 = cik_VGT_select0,
505    .select1 = cik_VGT_select1,
506    .counter0_lo = R_034240_VGT_PERFCOUNTER0_LO,
507 
508    .num_spm_counters = 2,
509    .num_spm_wires = 3,
510 };
511 
512 /* cik_WD */
513 static unsigned cik_WD_select0[] = {
514    R_036200_WD_PERFCOUNTER0_SELECT,
515    R_036204_WD_PERFCOUNTER1_SELECT,
516    R_036208_WD_PERFCOUNTER2_SELECT,
517    R_03620C_WD_PERFCOUNTER3_SELECT,
518 };
519 static struct ac_pc_block_base cik_WD = {
520    .gpu_block = WD,
521    .name = "WD",
522    .num_counters = 4,
523 
524    .select0 = cik_WD_select0,
525    .counter0_lo = R_034200_WD_PERFCOUNTER0_LO,
526 };
527 
528 /* cik_MC */
529 static struct ac_pc_block_base cik_MC = {
530    .gpu_block = MC,
531    .name = "MC",
532    .num_counters = 4,
533 };
534 
535 /* cik_SRBM */
536 static struct ac_pc_block_base cik_SRBM = {
537    .gpu_block = SRBM,
538    .name = "SRBM",
539    .num_counters = 2,
540 };
541 
542 /* gfx10_CHA */
543 static unsigned gfx10_CHA_select0[] = {
544    R_037780_CHA_PERFCOUNTER0_SELECT,
545    R_037788_CHA_PERFCOUNTER1_SELECT,
546    R_03778C_CHA_PERFCOUNTER2_SELECT,
547    R_037790_CHA_PERFCOUNTER3_SELECT,
548 };
549 static unsigned gfx10_CHA_select1[] = {
550    R_037784_CHA_PERFCOUNTER0_SELECT1,
551 };
552 static struct ac_pc_block_base gfx10_CHA = {
553    .gpu_block = CHA,
554    .name = "CHA",
555    .num_counters = 4,
556 
557    .select0 = gfx10_CHA_select0,
558    .select1 = gfx10_CHA_select1,
559    .counter0_lo = R_035800_CHA_PERFCOUNTER0_LO,
560 
561    .num_spm_counters = 1,
562    .num_spm_wires = 2,
563    .spm_block_select = AC_SPM_GLOBAL_BLOCK_CHA,
564 };
565 
566 /* gfx10_CHCG */
567 static unsigned gfx10_CHCG_select0[] = {
568    R_036F18_CHCG_PERFCOUNTER0_SELECT,
569    R_036F20_CHCG_PERFCOUNTER1_SELECT,
570    R_036F24_CHCG_PERFCOUNTER2_SELECT,
571    R_036F28_CHCG_PERFCOUNTER3_SELECT,
572 };
573 static unsigned gfx10_CHCG_select1[] = {
574    R_036F1C_CHCG_PERFCOUNTER0_SELECT1,
575 };
576 static struct ac_pc_block_base gfx10_CHCG = {
577    .gpu_block = CHCG,
578    .name = "CHCG",
579    .num_counters = 4,
580 
581    .select0 = gfx10_CHCG_select0,
582    .select1 = gfx10_CHCG_select1,
583    .counter0_lo = R_034F20_CHCG_PERFCOUNTER0_LO,
584 
585    .num_spm_counters = 1,
586    .num_spm_wires = 2,
587    .spm_block_select = AC_SPM_GLOBAL_BLOCK_CHCG,
588 };
589 
590 /* gfx10_CHC */
591 static unsigned gfx10_CHC_select0[] = {
592    R_036F00_CHC_PERFCOUNTER0_SELECT,
593    R_036F08_CHC_PERFCOUNTER1_SELECT,
594    R_036F0C_CHC_PERFCOUNTER2_SELECT,
595    R_036F10_CHC_PERFCOUNTER3_SELECT,
596 };
597 static unsigned gfx10_CHC_select1[] = {
598    R_036F04_CHC_PERFCOUNTER0_SELECT1,
599 };
600 static struct ac_pc_block_base gfx10_CHC = {
601    .gpu_block = CHC,
602    .name = "CHC",
603    .num_counters = 4,
604 
605    .select0 = gfx10_CHC_select0,
606    .select1 = gfx10_CHC_select1,
607    .counter0_lo = R_034F00_CHC_PERFCOUNTER0_LO,
608 
609    .num_spm_counters = 1,
610    .num_spm_wires = 2,
611    .spm_block_select = AC_SPM_GLOBAL_BLOCK_CHC,
612 };
613 
614 /* gfx10_DB */
615 static struct ac_pc_block_base gfx10_DB = {
616    .gpu_block = DB,
617    .name = "DB",
618    .num_counters = 4,
619    .flags = AC_PC_BLOCK_SE | AC_PC_BLOCK_INSTANCE_GROUPS,
620 
621    .select0 = cik_DB_select0,
622    .select1 = cik_DB_select1,
623    .counter0_lo = R_035100_DB_PERFCOUNTER0_LO,
624 
625    .num_spm_counters = 2,
626    .num_spm_wires = 4,
627    .spm_block_select = AC_SPM_SE_BLOCK_DB,
628 };
629 
630 /* gfx10_GCR */
631 static unsigned gfx10_GCR_select0[] = {
632    R_037580_GCR_PERFCOUNTER0_SELECT,
633    R_037588_GCR_PERFCOUNTER1_SELECT,
634 };
635 static unsigned gfx10_GCR_select1[] = {
636    R_037584_GCR_PERFCOUNTER0_SELECT1,
637 };
638 static struct ac_pc_block_base gfx10_GCR = {
639    .gpu_block = GCR,
640    .name = "GCR",
641    .num_counters = 2,
642 
643    .select0 = gfx10_GCR_select0,
644    .select1 = gfx10_GCR_select1,
645    .counter0_lo = R_035480_GCR_PERFCOUNTER0_LO,
646 
647    .num_spm_counters = 1,
648    .num_spm_wires = 2,
649    .spm_block_select = AC_SPM_GLOBAL_BLOCK_GCR,
650 };
651 
652 /* gfx10_GE */
653 static unsigned gfx10_GE_select0[] = {
654    R_036200_GE_PERFCOUNTER0_SELECT,
655    R_036208_GE_PERFCOUNTER1_SELECT,
656    R_036210_GE_PERFCOUNTER2_SELECT,
657    R_036218_GE_PERFCOUNTER3_SELECT,
658    R_036220_GE_PERFCOUNTER4_SELECT,
659    R_036228_GE_PERFCOUNTER5_SELECT,
660    R_036230_GE_PERFCOUNTER6_SELECT,
661    R_036238_GE_PERFCOUNTER7_SELECT,
662    R_036240_GE_PERFCOUNTER8_SELECT,
663    R_036248_GE_PERFCOUNTER9_SELECT,
664    R_036250_GE_PERFCOUNTER10_SELECT,
665    R_036258_GE_PERFCOUNTER11_SELECT,
666 };
667 static unsigned gfx10_GE_select1[] = {
668    R_036204_GE_PERFCOUNTER0_SELECT1,
669    R_03620C_GE_PERFCOUNTER1_SELECT1,
670    R_036214_GE_PERFCOUNTER2_SELECT1,
671    R_03621C_GE_PERFCOUNTER3_SELECT1,
672 };
673 static struct ac_pc_block_base gfx10_GE = {
674    .gpu_block = GE,
675    .name = "GE",
676    .num_counters = 12,
677 
678    .select0 = gfx10_GE_select0,
679    .select1 = gfx10_GE_select1,
680    .counter0_lo = R_034200_GE_PERFCOUNTER0_LO,
681 
682    .num_spm_counters = 4,
683    .num_spm_wires = 8,
684    .spm_block_select = AC_SPM_GLOBAL_BLOCK_GE,
685 };
686 
687 /* gfx10_GL1A */
688 static unsigned gfx10_GL1A_select0[] = {
689    R_037700_GL1A_PERFCOUNTER0_SELECT,
690    R_037708_GL1A_PERFCOUNTER1_SELECT,
691    R_03770C_GL1A_PERFCOUNTER2_SELECT,
692    R_037710_GL1A_PERFCOUNTER3_SELECT,
693 };
694 static unsigned gfx10_GL1A_select1[] = {
695    R_037704_GL1A_PERFCOUNTER0_SELECT1,
696 };
697 static struct ac_pc_block_base gfx10_GL1A = {
698    .gpu_block = GL1A,
699    .name = "GL1A",
700    .num_counters = 4,
701    .flags = AC_PC_BLOCK_SE | AC_PC_BLOCK_SHADER_WINDOWED,
702 
703    .select0 = gfx10_GL1A_select0,
704    .select1 = gfx10_GL1A_select1,
705    .counter0_lo = R_035700_GL1A_PERFCOUNTER0_LO,
706 
707    .num_spm_counters = 1,
708    .num_spm_wires = 2,
709    .spm_block_select = AC_SPM_SE_BLOCK_GL1A,
710 };
711 
712 /* gfx10_GL1C */
713 static unsigned gfx10_GL1C_select0[] = {
714    R_036E80_GL1C_PERFCOUNTER0_SELECT,
715    R_036E88_GL1C_PERFCOUNTER1_SELECT,
716    R_036E8C_GL1C_PERFCOUNTER2_SELECT,
717    R_036E90_GL1C_PERFCOUNTER3_SELECT,
718 };
719 static unsigned gfx10_GL1C_select1[] = {
720    R_036E84_GL1C_PERFCOUNTER0_SELECT1,
721 };
722 static struct ac_pc_block_base gfx10_GL1C = {
723    .gpu_block = GL1C,
724    .name = "GL1C",
725    .num_counters = 4,
726    .flags = AC_PC_BLOCK_SE | AC_PC_BLOCK_SHADER_WINDOWED,
727 
728    .select0 = gfx10_GL1C_select0,
729    .select1 = gfx10_GL1C_select1,
730    .counter0_lo = R_034E80_GL1C_PERFCOUNTER0_LO,
731 
732    .num_spm_counters = 1,
733    .num_spm_wires = 2,
734    .spm_block_select = AC_SPM_SE_BLOCK_GL1C,
735 };
736 
737 /* gfx10_GL2A */
738 static unsigned gfx10_GL2A_select0[] = {
739    R_036E40_GL2A_PERFCOUNTER0_SELECT,
740    R_036E48_GL2A_PERFCOUNTER1_SELECT,
741    R_036E50_GL2A_PERFCOUNTER2_SELECT,
742    R_036E54_GL2A_PERFCOUNTER3_SELECT,
743 };
744 static unsigned gfx10_GL2A_select1[] = {
745    R_036E44_GL2A_PERFCOUNTER0_SELECT1,
746    R_036E4C_GL2A_PERFCOUNTER1_SELECT1,
747 };
748 static struct ac_pc_block_base gfx10_GL2A = {
749    .gpu_block = GL2A,
750    .name = "GL2A",
751    .num_counters = 4,
752 
753    .select0 = gfx10_GL2A_select0,
754    .select1 = gfx10_GL2A_select1,
755    .counter0_lo = R_034E40_GL2A_PERFCOUNTER0_LO,
756 
757    .num_spm_counters = 2,
758    .num_spm_wires = 4,
759    .spm_block_select = AC_SPM_GLOBAL_BLOCK_GL2A,
760 };
761 
762 /* gfx10_GL2C */
763 static unsigned gfx10_GL2C_select0[] = {
764    R_036E00_GL2C_PERFCOUNTER0_SELECT,
765    R_036E08_GL2C_PERFCOUNTER1_SELECT,
766    R_036E10_GL2C_PERFCOUNTER2_SELECT,
767    R_036E14_GL2C_PERFCOUNTER3_SELECT,
768 };
769 static unsigned gfx10_GL2C_select1[] = {
770    R_036E04_GL2C_PERFCOUNTER0_SELECT1,
771    R_036E0C_GL2C_PERFCOUNTER1_SELECT1,
772 };
773 static struct ac_pc_block_base gfx10_GL2C = {
774    .gpu_block = GL2C,
775    .name = "GL2C",
776    .num_counters = 4,
777 
778    .select0 = gfx10_GL2C_select0,
779    .select1 = gfx10_GL2C_select1,
780    .counter0_lo = R_034E00_GL2C_PERFCOUNTER0_LO,
781 
782    .num_spm_counters = 2,
783    .num_spm_wires = 4,
784    .spm_block_select = AC_SPM_GLOBAL_BLOCK_GL2C,
785 };
786 
787 /* gfx10_PA_PH */
788 static unsigned gfx10_PA_PH_select0[] = {
789    R_037600_PA_PH_PERFCOUNTER0_SELECT,
790    R_037608_PA_PH_PERFCOUNTER1_SELECT,
791    R_03760C_PA_PH_PERFCOUNTER2_SELECT,
792    R_037610_PA_PH_PERFCOUNTER3_SELECT,
793    R_037614_PA_PH_PERFCOUNTER4_SELECT,
794    R_037618_PA_PH_PERFCOUNTER5_SELECT,
795    R_03761C_PA_PH_PERFCOUNTER6_SELECT,
796    R_037620_PA_PH_PERFCOUNTER7_SELECT,
797 };
798 static unsigned gfx10_PA_PH_select1[] = {
799    R_037604_PA_PH_PERFCOUNTER0_SELECT1,
800    R_037640_PA_PH_PERFCOUNTER1_SELECT1,
801    R_037644_PA_PH_PERFCOUNTER2_SELECT1,
802    R_037648_PA_PH_PERFCOUNTER3_SELECT1,
803 };
804 static struct ac_pc_block_base gfx10_PA_PH = {
805    .gpu_block = PA_PH,
806    .name = "PA_PH",
807    .num_counters = 8,
808    .flags = AC_PC_BLOCK_SE,
809 
810    .select0 = gfx10_PA_PH_select0,
811    .select1 = gfx10_PA_PH_select1,
812    .counter0_lo = R_035600_PA_PH_PERFCOUNTER0_LO,
813 
814    .num_spm_counters = 4,
815    .num_spm_wires = 8,
816    .spm_block_select = AC_SPM_GLOBAL_BLOCK_PH,
817 };
818 
819 /* gfx10_PA_SU */
820 static unsigned gfx10_PA_SU_select0[] = {
821    R_036400_PA_SU_PERFCOUNTER0_SELECT,
822    R_036408_PA_SU_PERFCOUNTER1_SELECT,
823    R_036410_PA_SU_PERFCOUNTER2_SELECT,
824    R_036418_PA_SU_PERFCOUNTER3_SELECT,
825 };
826 static unsigned gfx10_PA_SU_select1[] = {
827    R_036404_PA_SU_PERFCOUNTER0_SELECT1,
828    R_03640C_PA_SU_PERFCOUNTER1_SELECT1,
829    R_036414_PA_SU_PERFCOUNTER2_SELECT1,
830    R_03641C_PA_SU_PERFCOUNTER3_SELECT1,
831 };
832 static struct ac_pc_block_base gfx10_PA_SU = {
833    .gpu_block = PA_SU,
834    .name = "PA_SU",
835    .num_counters = 4,
836    .flags = AC_PC_BLOCK_SE,
837 
838    .select0 = gfx10_PA_SU_select0,
839    .select1 = gfx10_PA_SU_select1,
840    .counter0_lo = R_034400_PA_SU_PERFCOUNTER0_LO,
841 
842    .num_spm_counters = 4,
843    .num_spm_wires = 8,
844    .spm_block_select = AC_SPM_SE_BLOCK_PA,
845 };
846 
847 /* gfx10_RLC */
848 static unsigned gfx10_RLC_select0[] = {
849    R_037304_RLC_PERFCOUNTER0_SELECT,
850    R_037308_RLC_PERFCOUNTER1_SELECT,
851 };
852 static struct ac_pc_block_base gfx10_RLC = {
853    .gpu_block = RLC,
854    .name = "RLC",
855    .num_counters = 2,
856 
857    .select0 = gfx10_RLC_select0,
858    .counter0_lo = R_035200_RLC_PERFCOUNTER0_LO,
859    .num_spm_counters = 0,
860 };
861 
862 /* gfx10_RMI */
863 static unsigned gfx10_RMI_select0[] = {
864    R_037400_RMI_PERFCOUNTER0_SELECT,
865    R_037408_RMI_PERFCOUNTER1_SELECT,
866    R_03740C_RMI_PERFCOUNTER2_SELECT,
867    R_037414_RMI_PERFCOUNTER3_SELECT,
868 };
869 static unsigned gfx10_RMI_select1[] = {
870    R_037404_RMI_PERFCOUNTER0_SELECT1,
871    R_037410_RMI_PERFCOUNTER2_SELECT1,
872 };
873 static struct ac_pc_block_base gfx10_RMI = {
874    .gpu_block = RMI,
875    .name = "RMI",
876    .num_counters = 4,
877    .flags = AC_PC_BLOCK_SE | AC_PC_BLOCK_INSTANCE_GROUPS,
878 
879    .select0 = gfx10_RMI_select0,
880    .select1 = gfx10_RMI_select1,
881    .counter0_lo = R_035300_RMI_PERFCOUNTER0_LO,
882 
883    .num_spm_counters = 2,
884    .num_spm_wires = 2,
885    .spm_block_select = AC_SPM_SE_BLOCK_RMI,
886 };
887 
888 /* gfx10_SQ */
889 static struct ac_pc_block_base gfx10_SQ = {
890    .gpu_block = SQ,
891    .name = "SQ",
892    .num_counters = 16,
893    .flags = AC_PC_BLOCK_SE | AC_PC_BLOCK_SHADER,
894 
895    .select0 = cik_SQ_select0,
896    .select_or = S_036700_SQC_BANK_MASK(15),
897    .counter0_lo = R_034700_SQ_PERFCOUNTER0_LO,
898 
899    .num_spm_wires = 16,
900    .spm_block_select = AC_SPM_SE_BLOCK_SQG,
901 };
902 
903 /* gfx10_TCP */
904 static struct ac_pc_block_base gfx10_TCP = {
905    .gpu_block = TCP,
906    .name = "TCP",
907    .num_counters = 4,
908    .flags = AC_PC_BLOCK_SE | AC_PC_BLOCK_INSTANCE_GROUPS | AC_PC_BLOCK_SHADER_WINDOWED,
909 
910    .select0 = cik_TCP_select0,
911    .select1 = cik_TCP_select1,
912    .counter0_lo = R_034D00_TCP_PERFCOUNTER0_LO,
913 
914    .num_spm_counters = 2,
915    .num_spm_wires = 4,
916    .spm_block_select = AC_SPM_SE_BLOCK_TCP,
917 };
918 
919 /* gfx10_UTCL1 */
920 static unsigned gfx10_UTCL1_select0[] = {
921    R_03758C_UTCL1_PERFCOUNTER0_SELECT,
922    R_037590_UTCL1_PERFCOUNTER1_SELECT,
923 };
924 static struct ac_pc_block_base gfx10_UTCL1 = {
925    .gpu_block = UTCL1,
926    .name = "UTCL1",
927    .num_counters = 2,
928    .flags = AC_PC_BLOCK_SE | AC_PC_BLOCK_SHADER_WINDOWED,
929 
930    .select0 = gfx10_UTCL1_select0,
931    .counter0_lo = R_035470_UTCL1_PERFCOUNTER0_LO,
932    .num_spm_counters = 0,
933 };
934 
935 /* gfx11_SQ_WQP */
936 static struct ac_pc_block_base gfx11_SQ_WGP = {
937    .gpu_block = SQ_WGP,
938    .name = "SQ_WGP",
939    .num_counters = 16,
940    .flags = AC_PC_BLOCK_SE | AC_PC_BLOCK_SHADER,
941 
942    .select0 = cik_SQ_select0,
943    .counter0_lo = R_034700_SQ_PERFCOUNTER0_LO,
944 
945    .num_spm_counters = 8,
946    .num_spm_wires = 8,
947    .spm_block_select = AC_SPM_SE_BLOCK_SQC,
948 };
949 
950 /* Both the number of instances and selectors varies between chips of the same
951  * class. We only differentiate by class here and simply expose the maximum
952  * number over all chips in a class.
953  *
954  * Unfortunately, GPUPerfStudio uses the order of performance counter groups
955  * blindly once it believes it has identified the hardware, so the order of
956  * blocks here matters.
957  */
958 static struct ac_pc_block_gfxdescr groups_CIK[] = {
959    {&cik_CB, 226},     {&cik_CPF, 17},    {&cik_DB, 257},  {&cik_GRBM, 34},   {&cik_GRBMSE, 15},
960    {&cik_PA_SU, 153},  {&cik_PA_SC, 395}, {&cik_SPI, 186}, {&cik_SQ, 252},    {&cik_SX, 32},
961    {&cik_TA, 111},     {&cik_TCA, 39, 2}, {&cik_TCC, 160}, {&cik_TD, 55},     {&cik_TCP, 154},
962    {&cik_GDS, 121},    {&cik_VGT, 140},   {&cik_IA, 22},   {&cik_MC, 22},     {&cik_SRBM, 19},
963    {&cik_WD, 22},      {&cik_CPG, 46},    {&cik_CPC, 22},
964 
965 };
966 
967 static struct ac_pc_block_gfxdescr groups_VI[] = {
968    {&cik_CB, 405},     {&cik_CPF, 19},    {&cik_DB, 257},  {&cik_GRBM, 34},   {&cik_GRBMSE, 15},
969    {&cik_PA_SU, 154},  {&cik_PA_SC, 397}, {&cik_SPI, 197}, {&cik_SQ, 273},    {&cik_SX, 34},
970    {&cik_TA, 119},     {&cik_TCA, 35, 2}, {&cik_TCC, 192}, {&cik_TD, 55},     {&cik_TCP, 180},
971    {&cik_GDS, 121},    {&cik_VGT, 147},   {&cik_IA, 24},   {&cik_MC, 22},     {&cik_SRBM, 27},
972    {&cik_WD, 37},      {&cik_CPG, 48},    {&cik_CPC, 24},
973 
974 };
975 
976 static struct ac_pc_block_gfxdescr groups_gfx9[] = {
977    {&cik_CB, 438},     {&cik_CPF, 32},    {&cik_DB, 328},  {&cik_GRBM, 38},   {&cik_GRBMSE, 16},
978    {&cik_PA_SU, 292},  {&cik_PA_SC, 491}, {&cik_SPI, 196}, {&cik_SQ, 374},    {&cik_SX, 208},
979    {&cik_TA, 119},     {&cik_TCA, 35, 2}, {&cik_TCC, 256}, {&cik_TD, 57},     {&cik_TCP, 85},
980    {&cik_GDS, 121},    {&cik_VGT, 148},   {&cik_IA, 32},   {&cik_WD, 58},     {&cik_CPG, 59},
981    {&cik_CPC, 35},
982 };
983 
984 static struct ac_pc_block_gfxdescr groups_gfx10[] = {
985    {&cik_CB, 461},
986    {&gfx10_CHA, 45},
987    {&gfx10_CHCG, 35},
988    {&gfx10_CHC, 35},
989    {&cik_CPC, 47},
990    {&cik_CPF, 40},
991    {&cik_CPG, 82},
992    {&gfx10_DB, 370},
993    {&gfx10_GCR, 94},
994    {&cik_GDS, 123},
995    {&gfx10_GE, 315},
996    {&gfx10_GL1A, 36},
997    {&gfx10_GL1C, 64, 4},
998    {&gfx10_GL2A, 91},
999    {&gfx10_GL2C, 235},
1000    {&cik_GRBM, 47},
1001    {&cik_GRBMSE, 19},
1002    {&gfx10_PA_PH, 960},
1003    {&cik_PA_SC, 552},
1004    {&gfx10_PA_SU, 266},
1005    {&gfx10_RLC, 7},
1006    {&gfx10_RMI, 258},
1007    {&cik_SPI, 329},
1008    {&gfx10_SQ, 509},
1009    {&cik_SX, 225},
1010    {&cik_TA, 226},
1011    {&gfx10_TCP, 77},
1012    {&cik_TD, 61},
1013    {&gfx10_UTCL1, 15},
1014 };
1015 
1016 static struct ac_pc_block_gfxdescr groups_gfx11[] = {
1017    {&cik_CB, 313},
1018    {&gfx10_CHA, 39},
1019    {&gfx10_CHCG, 43},
1020    {&gfx10_CHC, 43},
1021    {&cik_CPC, 55},
1022    {&cik_CPF, 43},
1023    {&cik_CPG, 91},
1024    {&gfx10_DB, 370},
1025    {&gfx10_GCR, 154},
1026    {&cik_GDS, 147},
1027    {&gfx10_GE, 39},
1028    {&gfx10_GL1A, 23},
1029    {&gfx10_GL1C, 83, 4},
1030    {&gfx10_GL2A, 107},
1031    {&gfx10_GL2C, 258},
1032    {&cik_GRBM, 49},
1033    {&cik_GRBMSE, 20},
1034    {&gfx10_PA_PH, 1023},
1035    {&cik_PA_SC, 664},
1036    {&gfx10_PA_SU, 310},
1037    {&gfx10_RLC, 6},
1038    {&gfx10_RMI, 138},
1039    {&cik_SPI, 283},
1040    {&gfx10_SQ, 36},
1041    {&cik_SX, 81},
1042    {&cik_TA, 235},
1043    {&gfx10_TCP, 77},
1044    {&cik_TD, 196},
1045    {&gfx10_UTCL1, 65},
1046    {&gfx11_SQ_WGP, 511, 4},
1047 };
1048 
ac_lookup_counter(const struct ac_perfcounters * pc,unsigned index,unsigned * base_gid,unsigned * sub_index)1049 struct ac_pc_block *ac_lookup_counter(const struct ac_perfcounters *pc,
1050                                       unsigned index, unsigned *base_gid,
1051                                       unsigned *sub_index)
1052 {
1053    struct ac_pc_block *block = pc->blocks;
1054    unsigned bid;
1055 
1056    *base_gid = 0;
1057    for (bid = 0; bid < pc->num_blocks; ++bid, ++block) {
1058       unsigned total = block->num_groups * block->b->selectors;
1059 
1060       if (index < total) {
1061          *sub_index = index;
1062          return block;
1063       }
1064 
1065       index -= total;
1066       *base_gid += block->num_groups;
1067    }
1068 
1069    return NULL;
1070 }
1071 
ac_lookup_group(const struct ac_perfcounters * pc,unsigned * index)1072 struct ac_pc_block *ac_lookup_group(const struct ac_perfcounters *pc,
1073                                     unsigned *index)
1074 {
1075    unsigned bid;
1076    struct ac_pc_block *block = pc->blocks;
1077 
1078    for (bid = 0; bid < pc->num_blocks; ++bid, ++block) {
1079       if (*index < block->num_groups)
1080          return block;
1081       *index -= block->num_groups;
1082    }
1083 
1084    return NULL;
1085 }
1086 
ac_init_block_names(const struct radeon_info * info,const struct ac_perfcounters * pc,struct ac_pc_block * block)1087 bool ac_init_block_names(const struct radeon_info *info,
1088                          const struct ac_perfcounters *pc,
1089                          struct ac_pc_block *block)
1090 {
1091    bool per_instance_groups = ac_pc_block_has_per_instance_groups(pc, block);
1092    bool per_se_groups = ac_pc_block_has_per_se_groups(pc, block);
1093    unsigned i, j, k;
1094    unsigned groups_shader = 1, groups_se = 1, groups_instance = 1;
1095    unsigned namelen;
1096    char *groupname;
1097    char *p;
1098 
1099    if (per_instance_groups)
1100       groups_instance = block->num_instances;
1101    if (per_se_groups)
1102       groups_se = info->max_se;
1103    if (block->b->b->flags & AC_PC_BLOCK_SHADER)
1104       groups_shader = ARRAY_SIZE(ac_pc_shader_type_bits);
1105 
1106    namelen = strlen(block->b->b->name);
1107    block->group_name_stride = namelen + 1;
1108    if (block->b->b->flags & AC_PC_BLOCK_SHADER)
1109       block->group_name_stride += 3;
1110    if (per_se_groups) {
1111       assert(groups_se <= 10);
1112       block->group_name_stride += 1;
1113 
1114       if (per_instance_groups)
1115          block->group_name_stride += 1;
1116    }
1117    if (per_instance_groups) {
1118       assert(groups_instance <= 100);
1119       block->group_name_stride += 2;
1120    }
1121 
1122    block->group_names = MALLOC(block->num_groups * block->group_name_stride);
1123    if (!block->group_names)
1124       return false;
1125 
1126    groupname = block->group_names;
1127    for (i = 0; i < groups_shader; ++i) {
1128       const char *shader_suffix = ac_pc_shader_type_suffixes[i];
1129       unsigned shaderlen = strlen(shader_suffix);
1130       for (j = 0; j < groups_se; ++j) {
1131          for (k = 0; k < groups_instance; ++k) {
1132             strcpy(groupname, block->b->b->name);
1133             p = groupname + namelen;
1134 
1135             if (block->b->b->flags & AC_PC_BLOCK_SHADER) {
1136                strcpy(p, shader_suffix);
1137                p += shaderlen;
1138             }
1139 
1140             if (per_se_groups) {
1141                p += sprintf(p, "%d", j);
1142                if (per_instance_groups)
1143                   *p++ = '_';
1144             }
1145 
1146             if (per_instance_groups)
1147                p += sprintf(p, "%d", k);
1148 
1149             groupname += block->group_name_stride;
1150          }
1151       }
1152    }
1153 
1154    block->selector_name_stride = block->group_name_stride + 4;
1155    block->selector_names =
1156       MALLOC(block->num_groups * block->b->selectors * block->selector_name_stride);
1157    if (!block->selector_names)
1158       return false;
1159 
1160    groupname = block->group_names;
1161    p = block->selector_names;
1162    for (i = 0; i < block->num_groups; ++i) {
1163       for (j = 0; j < block->b->selectors; ++j) {
1164          sprintf(p, "%s_%03d", groupname, j);
1165          p += block->selector_name_stride;
1166       }
1167       groupname += block->group_name_stride;
1168    }
1169 
1170    return true;
1171 }
1172 
ac_init_perfcounters(const struct radeon_info * info,bool separate_se,bool separate_instance,struct ac_perfcounters * pc)1173 bool ac_init_perfcounters(const struct radeon_info *info,
1174                           bool separate_se,
1175                           bool separate_instance,
1176                           struct ac_perfcounters *pc)
1177 {
1178    const struct ac_pc_block_gfxdescr *blocks;
1179    unsigned num_blocks;
1180 
1181    switch (info->gfx_level) {
1182    case GFX7:
1183       blocks = groups_CIK;
1184       num_blocks = ARRAY_SIZE(groups_CIK);
1185       break;
1186    case GFX8:
1187       blocks = groups_VI;
1188       num_blocks = ARRAY_SIZE(groups_VI);
1189       break;
1190    case GFX9:
1191       blocks = groups_gfx9;
1192       num_blocks = ARRAY_SIZE(groups_gfx9);
1193       break;
1194    case GFX10:
1195    case GFX10_3:
1196       blocks = groups_gfx10;
1197       num_blocks = ARRAY_SIZE(groups_gfx10);
1198       break;
1199    case GFX11:
1200       blocks = groups_gfx11;
1201       num_blocks = ARRAY_SIZE(groups_gfx11);
1202       break;
1203    case GFX6:
1204    default:
1205       return false; /* not implemented */
1206    }
1207 
1208    pc->separate_se = separate_se;
1209    pc->separate_instance = separate_instance;
1210 
1211    pc->blocks = CALLOC(num_blocks, sizeof(struct ac_pc_block));
1212    if (!pc->blocks)
1213       return false;
1214    pc->num_blocks = num_blocks;
1215 
1216    for (unsigned i = 0; i < num_blocks; i++) {
1217       struct ac_pc_block *block = &pc->blocks[i];
1218 
1219       block->b = &blocks[i];
1220       block->num_instances = MAX2(1, block->b->instances);
1221 
1222       if (!strcmp(block->b->b->name, "CB") ||
1223           !strcmp(block->b->b->name, "DB") ||
1224           !strcmp(block->b->b->name, "RMI"))
1225          block->num_instances = info->max_se;
1226       else if (!strcmp(block->b->b->name, "TCC"))
1227          block->num_instances = info->max_tcc_blocks;
1228       else if (!strcmp(block->b->b->name, "IA"))
1229          block->num_instances = MAX2(1, info->max_se / 2);
1230       else if (!strcmp(block->b->b->name, "TA") ||
1231                !strcmp(block->b->b->name, "TCP") ||
1232                !strcmp(block->b->b->name, "TD")) {
1233          block->num_instances = MAX2(1, info->max_good_cu_per_sa);
1234       }
1235 
1236       if (info->gfx_level >= GFX10) {
1237          if (!strcmp(block->b->b->name, "TCP")) {
1238             block->num_global_instances = MAX2(1, info->num_cu_per_sh) * info->num_se * info->max_sa_per_se;
1239          } else if (!strcmp(block->b->b->name, "SQ")) {
1240             block->num_global_instances = block->num_instances * info->num_se;
1241          } else if (!strcmp(block->b->b->name, "GL1C") ||
1242                     !strcmp(block->b->b->name, "SQ_WGP")) {
1243             block->num_global_instances = block->num_instances * info->num_se * info->max_sa_per_se;
1244          } else if (!strcmp(block->b->b->name, "GL2C")) {
1245             block->num_instances = block->num_global_instances = info->num_tcc_blocks;
1246          }
1247       }
1248 
1249       if (ac_pc_block_has_per_instance_groups(pc, block)) {
1250          block->num_groups = block->num_instances;
1251       } else {
1252          block->num_groups = 1;
1253       }
1254 
1255       if (ac_pc_block_has_per_se_groups(pc, block))
1256          block->num_groups *= info->max_se;
1257       if (block->b->b->flags & AC_PC_BLOCK_SHADER)
1258          block->num_groups *= ARRAY_SIZE(ac_pc_shader_type_bits);
1259 
1260       pc->num_groups += block->num_groups;
1261    }
1262 
1263    return true;
1264 }
1265 
ac_destroy_perfcounters(struct ac_perfcounters * pc)1266 void ac_destroy_perfcounters(struct ac_perfcounters *pc)
1267 {
1268    if (!pc)
1269       return;
1270 
1271    for (unsigned i = 0; i < pc->num_blocks; ++i) {
1272       FREE(pc->blocks[i].group_names);
1273       FREE(pc->blocks[i].selector_names);
1274    }
1275    FREE(pc->blocks);
1276 }
1277 
ac_pc_get_block(const struct ac_perfcounters * pc,enum ac_pc_gpu_block gpu_block)1278 struct ac_pc_block *ac_pc_get_block(const struct ac_perfcounters *pc,
1279                                     enum ac_pc_gpu_block gpu_block)
1280 {
1281    for (unsigned i = 0; i < pc->num_blocks; i++) {
1282       struct ac_pc_block *block = &pc->blocks[i];
1283       if (block->b->b->gpu_block == gpu_block) {
1284          return block;
1285       }
1286    }
1287    return NULL;
1288 }
1289