xref: /aosp_15_r20/external/mesa3d/src/amd/common/ac_spm.c (revision 6104692788411f58d303aa86923a9ff6ecaded22)
1*61046927SAndroid Build Coastguard Worker /*
2*61046927SAndroid Build Coastguard Worker  * Copyright 2021 Valve Corporation
3*61046927SAndroid Build Coastguard Worker  *
4*61046927SAndroid Build Coastguard Worker  * SPDX-License-Identifier: MIT
5*61046927SAndroid Build Coastguard Worker  */
6*61046927SAndroid Build Coastguard Worker 
7*61046927SAndroid Build Coastguard Worker #include "ac_spm.h"
8*61046927SAndroid Build Coastguard Worker 
9*61046927SAndroid Build Coastguard Worker #include "util/bitscan.h"
10*61046927SAndroid Build Coastguard Worker #include "util/u_memory.h"
11*61046927SAndroid Build Coastguard Worker #include "ac_perfcounter.h"
12*61046927SAndroid Build Coastguard Worker 
13*61046927SAndroid Build Coastguard Worker /* SPM counters definition. */
14*61046927SAndroid Build Coastguard Worker /* GFX10+ */
15*61046927SAndroid Build Coastguard Worker static struct ac_spm_counter_descr gfx10_num_l2_hits = {TCP, 0x9};
16*61046927SAndroid Build Coastguard Worker static struct ac_spm_counter_descr gfx10_num_l2_misses = {TCP, 0x12};
17*61046927SAndroid Build Coastguard Worker static struct ac_spm_counter_descr gfx10_num_scache_hits = {SQ, 0x14f};
18*61046927SAndroid Build Coastguard Worker static struct ac_spm_counter_descr gfx10_num_scache_misses = {SQ, 0x150};
19*61046927SAndroid Build Coastguard Worker static struct ac_spm_counter_descr gfx10_num_scache_misses_dup = {SQ, 0x151};
20*61046927SAndroid Build Coastguard Worker static struct ac_spm_counter_descr gfx10_num_icache_hits = {SQ, 0x12c};
21*61046927SAndroid Build Coastguard Worker static struct ac_spm_counter_descr gfx10_num_icache_misses = {SQ, 0x12d};
22*61046927SAndroid Build Coastguard Worker static struct ac_spm_counter_descr gfx10_num_icache_misses_dup = {SQ, 0x12e};
23*61046927SAndroid Build Coastguard Worker static struct ac_spm_counter_descr gfx10_num_gl1c_hits = {GL1C, 0xe};
24*61046927SAndroid Build Coastguard Worker static struct ac_spm_counter_descr gfx10_num_gl1c_misses = {GL1C, 0x12};
25*61046927SAndroid Build Coastguard Worker static struct ac_spm_counter_descr gfx10_num_gl2c_hits = {GL2C, 0x3};
26*61046927SAndroid Build Coastguard Worker static struct ac_spm_counter_descr gfx10_num_gl2c_misses = {GL2C, 0x23};
27*61046927SAndroid Build Coastguard Worker 
28*61046927SAndroid Build Coastguard Worker static struct ac_spm_counter_create_info gfx10_spm_counters[] = {
29*61046927SAndroid Build Coastguard Worker    {&gfx10_num_l2_hits},
30*61046927SAndroid Build Coastguard Worker    {&gfx10_num_l2_misses},
31*61046927SAndroid Build Coastguard Worker    {&gfx10_num_scache_hits},
32*61046927SAndroid Build Coastguard Worker    {&gfx10_num_scache_misses},
33*61046927SAndroid Build Coastguard Worker    {&gfx10_num_scache_misses_dup},
34*61046927SAndroid Build Coastguard Worker    {&gfx10_num_icache_hits},
35*61046927SAndroid Build Coastguard Worker    {&gfx10_num_icache_misses},
36*61046927SAndroid Build Coastguard Worker    {&gfx10_num_icache_misses_dup},
37*61046927SAndroid Build Coastguard Worker    {&gfx10_num_gl1c_hits},
38*61046927SAndroid Build Coastguard Worker    {&gfx10_num_gl1c_misses},
39*61046927SAndroid Build Coastguard Worker    {&gfx10_num_gl2c_hits},
40*61046927SAndroid Build Coastguard Worker    {&gfx10_num_gl2c_misses},
41*61046927SAndroid Build Coastguard Worker };
42*61046927SAndroid Build Coastguard Worker 
43*61046927SAndroid Build Coastguard Worker /* GFX10.3+ */
44*61046927SAndroid Build Coastguard Worker static struct ac_spm_counter_descr gfx103_num_gl2c_misses = {GL2C, 0x2b};
45*61046927SAndroid Build Coastguard Worker 
46*61046927SAndroid Build Coastguard Worker static struct ac_spm_counter_create_info gfx103_spm_counters[] = {
47*61046927SAndroid Build Coastguard Worker    {&gfx10_num_l2_hits},
48*61046927SAndroid Build Coastguard Worker    {&gfx10_num_l2_misses},
49*61046927SAndroid Build Coastguard Worker    {&gfx10_num_scache_hits},
50*61046927SAndroid Build Coastguard Worker    {&gfx10_num_scache_misses},
51*61046927SAndroid Build Coastguard Worker    {&gfx10_num_scache_misses_dup},
52*61046927SAndroid Build Coastguard Worker    {&gfx10_num_icache_hits},
53*61046927SAndroid Build Coastguard Worker    {&gfx10_num_icache_misses},
54*61046927SAndroid Build Coastguard Worker    {&gfx10_num_icache_misses_dup},
55*61046927SAndroid Build Coastguard Worker    {&gfx10_num_gl1c_hits},
56*61046927SAndroid Build Coastguard Worker    {&gfx10_num_gl1c_misses},
57*61046927SAndroid Build Coastguard Worker    {&gfx10_num_gl2c_hits},
58*61046927SAndroid Build Coastguard Worker    {&gfx103_num_gl2c_misses},
59*61046927SAndroid Build Coastguard Worker };
60*61046927SAndroid Build Coastguard Worker 
61*61046927SAndroid Build Coastguard Worker /* GFX11+ */
62*61046927SAndroid Build Coastguard Worker static struct ac_spm_counter_descr gfx11_num_l2_misses = {TCP, 0x11};
63*61046927SAndroid Build Coastguard Worker static struct ac_spm_counter_descr gfx11_num_scache_hits = {SQ_WGP, 0x126};
64*61046927SAndroid Build Coastguard Worker static struct ac_spm_counter_descr gfx11_num_scache_misses = {SQ_WGP, 0x127};
65*61046927SAndroid Build Coastguard Worker static struct ac_spm_counter_descr gfx11_num_scache_misses_dup = {SQ_WGP, 0x128};
66*61046927SAndroid Build Coastguard Worker static struct ac_spm_counter_descr gfx11_num_icache_hits = {SQ_WGP, 0x10e};
67*61046927SAndroid Build Coastguard Worker static struct ac_spm_counter_descr gfx11_num_icache_misses = {SQ_WGP, 0x10f};
68*61046927SAndroid Build Coastguard Worker static struct ac_spm_counter_descr gfx11_num_icache_misses_dup = {SQ_WGP, 0x110};
69*61046927SAndroid Build Coastguard Worker 
70*61046927SAndroid Build Coastguard Worker static struct ac_spm_counter_create_info gfx11_spm_counters[] = {
71*61046927SAndroid Build Coastguard Worker    {&gfx10_num_l2_hits},
72*61046927SAndroid Build Coastguard Worker    {&gfx11_num_l2_misses},
73*61046927SAndroid Build Coastguard Worker    {&gfx11_num_scache_hits},
74*61046927SAndroid Build Coastguard Worker    {&gfx11_num_scache_misses},
75*61046927SAndroid Build Coastguard Worker    {&gfx11_num_scache_misses_dup},
76*61046927SAndroid Build Coastguard Worker    {&gfx11_num_icache_hits},
77*61046927SAndroid Build Coastguard Worker    {&gfx11_num_icache_misses},
78*61046927SAndroid Build Coastguard Worker    {&gfx11_num_icache_misses_dup},
79*61046927SAndroid Build Coastguard Worker    {&gfx10_num_gl1c_hits},
80*61046927SAndroid Build Coastguard Worker    {&gfx10_num_gl1c_misses},
81*61046927SAndroid Build Coastguard Worker    {&gfx10_num_gl2c_hits},
82*61046927SAndroid Build Coastguard Worker    {&gfx103_num_gl2c_misses},
83*61046927SAndroid Build Coastguard Worker };
84*61046927SAndroid Build Coastguard Worker 
85*61046927SAndroid Build Coastguard Worker static struct ac_spm_block_select *
ac_spm_get_block_select(struct ac_spm * spm,const struct ac_pc_block * block)86*61046927SAndroid Build Coastguard Worker ac_spm_get_block_select(struct ac_spm *spm, const struct ac_pc_block *block)
87*61046927SAndroid Build Coastguard Worker {
88*61046927SAndroid Build Coastguard Worker    struct ac_spm_block_select *block_sel, *new_block_sel;
89*61046927SAndroid Build Coastguard Worker    uint32_t num_block_sel;
90*61046927SAndroid Build Coastguard Worker 
91*61046927SAndroid Build Coastguard Worker    for (uint32_t i = 0; i < spm->num_block_sel; i++) {
92*61046927SAndroid Build Coastguard Worker       if (spm->block_sel[i].b->b->b->gpu_block == block->b->b->gpu_block)
93*61046927SAndroid Build Coastguard Worker          return &spm->block_sel[i];
94*61046927SAndroid Build Coastguard Worker    }
95*61046927SAndroid Build Coastguard Worker 
96*61046927SAndroid Build Coastguard Worker    /* Allocate a new select block if it doesn't already exist. */
97*61046927SAndroid Build Coastguard Worker    num_block_sel = spm->num_block_sel + 1;
98*61046927SAndroid Build Coastguard Worker    block_sel = realloc(spm->block_sel, num_block_sel * sizeof(*block_sel));
99*61046927SAndroid Build Coastguard Worker    if (!block_sel)
100*61046927SAndroid Build Coastguard Worker       return NULL;
101*61046927SAndroid Build Coastguard Worker 
102*61046927SAndroid Build Coastguard Worker    spm->num_block_sel = num_block_sel;
103*61046927SAndroid Build Coastguard Worker    spm->block_sel = block_sel;
104*61046927SAndroid Build Coastguard Worker 
105*61046927SAndroid Build Coastguard Worker    /* Initialize the new select block. */
106*61046927SAndroid Build Coastguard Worker    new_block_sel = &spm->block_sel[spm->num_block_sel - 1];
107*61046927SAndroid Build Coastguard Worker    memset(new_block_sel, 0, sizeof(*new_block_sel));
108*61046927SAndroid Build Coastguard Worker 
109*61046927SAndroid Build Coastguard Worker    new_block_sel->b = block;
110*61046927SAndroid Build Coastguard Worker    new_block_sel->instances =
111*61046927SAndroid Build Coastguard Worker       calloc(block->num_global_instances, sizeof(*new_block_sel->instances));
112*61046927SAndroid Build Coastguard Worker    if (!new_block_sel->instances)
113*61046927SAndroid Build Coastguard Worker       return NULL;
114*61046927SAndroid Build Coastguard Worker    new_block_sel->num_instances = block->num_global_instances;
115*61046927SAndroid Build Coastguard Worker 
116*61046927SAndroid Build Coastguard Worker    for (unsigned i = 0; i < new_block_sel->num_instances; i++)
117*61046927SAndroid Build Coastguard Worker       new_block_sel->instances[i].num_counters = block->b->b->num_spm_counters;
118*61046927SAndroid Build Coastguard Worker 
119*61046927SAndroid Build Coastguard Worker    return new_block_sel;
120*61046927SAndroid Build Coastguard Worker }
121*61046927SAndroid Build Coastguard Worker 
122*61046927SAndroid Build Coastguard Worker struct ac_spm_instance_mapping {
123*61046927SAndroid Build Coastguard Worker    uint32_t se_index;         /* SE index or 0 if global */
124*61046927SAndroid Build Coastguard Worker    uint32_t sa_index;         /* SA index or 0 if global or per-SE */
125*61046927SAndroid Build Coastguard Worker    uint32_t instance_index;
126*61046927SAndroid Build Coastguard Worker };
127*61046927SAndroid Build Coastguard Worker 
128*61046927SAndroid Build Coastguard Worker static bool
ac_spm_init_instance_mapping(const struct radeon_info * info,const struct ac_pc_block * block,const struct ac_spm_counter_info * counter,struct ac_spm_instance_mapping * mapping)129*61046927SAndroid Build Coastguard Worker ac_spm_init_instance_mapping(const struct radeon_info *info,
130*61046927SAndroid Build Coastguard Worker                              const struct ac_pc_block *block,
131*61046927SAndroid Build Coastguard Worker                              const struct ac_spm_counter_info *counter,
132*61046927SAndroid Build Coastguard Worker                              struct ac_spm_instance_mapping *mapping)
133*61046927SAndroid Build Coastguard Worker {
134*61046927SAndroid Build Coastguard Worker    uint32_t instance_index = 0, se_index = 0, sa_index = 0;
135*61046927SAndroid Build Coastguard Worker 
136*61046927SAndroid Build Coastguard Worker    if (block->b->b->flags & AC_PC_BLOCK_SE) {
137*61046927SAndroid Build Coastguard Worker       if (block->b->b->gpu_block == SQ) {
138*61046927SAndroid Build Coastguard Worker          /* Per-SE blocks. */
139*61046927SAndroid Build Coastguard Worker          se_index = counter->instance / block->num_instances;
140*61046927SAndroid Build Coastguard Worker          instance_index = counter->instance % block->num_instances;
141*61046927SAndroid Build Coastguard Worker       } else {
142*61046927SAndroid Build Coastguard Worker          /* Per-SA blocks. */
143*61046927SAndroid Build Coastguard Worker          assert(block->b->b->gpu_block == GL1C ||
144*61046927SAndroid Build Coastguard Worker                 block->b->b->gpu_block == TCP ||
145*61046927SAndroid Build Coastguard Worker                 block->b->b->gpu_block == SQ_WGP);
146*61046927SAndroid Build Coastguard Worker          se_index = (counter->instance / block->num_instances) / info->max_sa_per_se;
147*61046927SAndroid Build Coastguard Worker          sa_index = (counter->instance / block->num_instances) % info->max_sa_per_se;
148*61046927SAndroid Build Coastguard Worker          instance_index = counter->instance % block->num_instances;
149*61046927SAndroid Build Coastguard Worker       }
150*61046927SAndroid Build Coastguard Worker    } else {
151*61046927SAndroid Build Coastguard Worker       /* Global blocks. */
152*61046927SAndroid Build Coastguard Worker       assert(block->b->b->gpu_block == GL2C);
153*61046927SAndroid Build Coastguard Worker       instance_index = counter->instance;
154*61046927SAndroid Build Coastguard Worker    }
155*61046927SAndroid Build Coastguard Worker 
156*61046927SAndroid Build Coastguard Worker    if (se_index >= info->num_se ||
157*61046927SAndroid Build Coastguard Worker        sa_index >= info->max_sa_per_se ||
158*61046927SAndroid Build Coastguard Worker        instance_index >= block->num_instances)
159*61046927SAndroid Build Coastguard Worker       return false;
160*61046927SAndroid Build Coastguard Worker 
161*61046927SAndroid Build Coastguard Worker    mapping->se_index = se_index;
162*61046927SAndroid Build Coastguard Worker    mapping->sa_index = sa_index;
163*61046927SAndroid Build Coastguard Worker    mapping->instance_index = instance_index;
164*61046927SAndroid Build Coastguard Worker 
165*61046927SAndroid Build Coastguard Worker    return true;
166*61046927SAndroid Build Coastguard Worker }
167*61046927SAndroid Build Coastguard Worker 
168*61046927SAndroid Build Coastguard Worker static void
ac_spm_init_muxsel(const struct radeon_info * info,const struct ac_pc_block * block,const struct ac_spm_instance_mapping * mapping,struct ac_spm_counter_info * counter,uint32_t spm_wire)169*61046927SAndroid Build Coastguard Worker ac_spm_init_muxsel(const struct radeon_info *info,
170*61046927SAndroid Build Coastguard Worker                    const struct ac_pc_block *block,
171*61046927SAndroid Build Coastguard Worker                    const struct ac_spm_instance_mapping *mapping,
172*61046927SAndroid Build Coastguard Worker                    struct ac_spm_counter_info *counter,
173*61046927SAndroid Build Coastguard Worker                    uint32_t spm_wire)
174*61046927SAndroid Build Coastguard Worker {
175*61046927SAndroid Build Coastguard Worker    const uint16_t counter_idx = 2 * spm_wire + (counter->is_even ? 0 : 1);
176*61046927SAndroid Build Coastguard Worker    union ac_spm_muxsel *muxsel = &counter->muxsel;
177*61046927SAndroid Build Coastguard Worker 
178*61046927SAndroid Build Coastguard Worker    if (info->gfx_level >= GFX11) {
179*61046927SAndroid Build Coastguard Worker       muxsel->gfx11.counter = counter_idx;
180*61046927SAndroid Build Coastguard Worker       muxsel->gfx11.block = block->b->b->spm_block_select;
181*61046927SAndroid Build Coastguard Worker       muxsel->gfx11.shader_array = mapping->sa_index;
182*61046927SAndroid Build Coastguard Worker       muxsel->gfx11.instance = mapping->instance_index;
183*61046927SAndroid Build Coastguard Worker    } else {
184*61046927SAndroid Build Coastguard Worker       muxsel->gfx10.counter = counter_idx;
185*61046927SAndroid Build Coastguard Worker       muxsel->gfx10.block = block->b->b->spm_block_select;
186*61046927SAndroid Build Coastguard Worker       muxsel->gfx10.shader_array = mapping->sa_index;
187*61046927SAndroid Build Coastguard Worker       muxsel->gfx10.instance = mapping->instance_index;
188*61046927SAndroid Build Coastguard Worker    }
189*61046927SAndroid Build Coastguard Worker }
190*61046927SAndroid Build Coastguard Worker 
191*61046927SAndroid Build Coastguard Worker static uint32_t
ac_spm_init_grbm_gfx_index(const struct ac_pc_block * block,const struct ac_spm_instance_mapping * mapping)192*61046927SAndroid Build Coastguard Worker ac_spm_init_grbm_gfx_index(const struct ac_pc_block *block,
193*61046927SAndroid Build Coastguard Worker                            const struct ac_spm_instance_mapping *mapping)
194*61046927SAndroid Build Coastguard Worker {
195*61046927SAndroid Build Coastguard Worker    uint32_t instance = mapping->instance_index;
196*61046927SAndroid Build Coastguard Worker    uint32_t grbm_gfx_index = 0;
197*61046927SAndroid Build Coastguard Worker 
198*61046927SAndroid Build Coastguard Worker    grbm_gfx_index |= S_030800_SE_INDEX(mapping->se_index) |
199*61046927SAndroid Build Coastguard Worker                      S_030800_SH_INDEX(mapping->sa_index);
200*61046927SAndroid Build Coastguard Worker 
201*61046927SAndroid Build Coastguard Worker    switch (block->b->b->gpu_block) {
202*61046927SAndroid Build Coastguard Worker    case GL2C:
203*61046927SAndroid Build Coastguard Worker       /* Global blocks. */
204*61046927SAndroid Build Coastguard Worker       grbm_gfx_index |= S_030800_SE_BROADCAST_WRITES(1);
205*61046927SAndroid Build Coastguard Worker       break;
206*61046927SAndroid Build Coastguard Worker    case SQ:
207*61046927SAndroid Build Coastguard Worker       /* Per-SE blocks. */
208*61046927SAndroid Build Coastguard Worker       grbm_gfx_index |= S_030800_SH_BROADCAST_WRITES(1);
209*61046927SAndroid Build Coastguard Worker       break;
210*61046927SAndroid Build Coastguard Worker    default:
211*61046927SAndroid Build Coastguard Worker       /* Other blocks shouldn't broadcast. */
212*61046927SAndroid Build Coastguard Worker       break;
213*61046927SAndroid Build Coastguard Worker    }
214*61046927SAndroid Build Coastguard Worker 
215*61046927SAndroid Build Coastguard Worker    if (block->b->b->gpu_block == SQ_WGP) {
216*61046927SAndroid Build Coastguard Worker       union {
217*61046927SAndroid Build Coastguard Worker          struct {
218*61046927SAndroid Build Coastguard Worker             uint32_t block_index : 2; /* Block index withing WGP */
219*61046927SAndroid Build Coastguard Worker             uint32_t wgp_index : 3;
220*61046927SAndroid Build Coastguard Worker             uint32_t is_below_spi : 1; /* 0: lower WGP numbers, 1: higher WGP numbers */
221*61046927SAndroid Build Coastguard Worker             uint32_t reserved : 26;
222*61046927SAndroid Build Coastguard Worker          };
223*61046927SAndroid Build Coastguard Worker 
224*61046927SAndroid Build Coastguard Worker          uint32_t value;
225*61046927SAndroid Build Coastguard Worker       } instance_index = {0};
226*61046927SAndroid Build Coastguard Worker 
227*61046927SAndroid Build Coastguard Worker       const uint32_t num_wgp_above_spi = 4;
228*61046927SAndroid Build Coastguard Worker       const bool is_below_spi = mapping->instance_index >= num_wgp_above_spi;
229*61046927SAndroid Build Coastguard Worker 
230*61046927SAndroid Build Coastguard Worker       instance_index.wgp_index =
231*61046927SAndroid Build Coastguard Worker          is_below_spi ? (mapping->instance_index - num_wgp_above_spi) : mapping->instance_index;
232*61046927SAndroid Build Coastguard Worker       instance_index.is_below_spi = is_below_spi;
233*61046927SAndroid Build Coastguard Worker 
234*61046927SAndroid Build Coastguard Worker       instance = instance_index.value;
235*61046927SAndroid Build Coastguard Worker    }
236*61046927SAndroid Build Coastguard Worker 
237*61046927SAndroid Build Coastguard Worker    grbm_gfx_index |= S_030800_INSTANCE_INDEX(instance);
238*61046927SAndroid Build Coastguard Worker 
239*61046927SAndroid Build Coastguard Worker    return grbm_gfx_index;
240*61046927SAndroid Build Coastguard Worker }
241*61046927SAndroid Build Coastguard Worker 
242*61046927SAndroid Build Coastguard Worker static bool
ac_spm_map_counter(struct ac_spm * spm,struct ac_spm_block_select * block_sel,struct ac_spm_counter_info * counter,const struct ac_spm_instance_mapping * mapping,uint32_t * spm_wire)243*61046927SAndroid Build Coastguard Worker ac_spm_map_counter(struct ac_spm *spm, struct ac_spm_block_select *block_sel,
244*61046927SAndroid Build Coastguard Worker                    struct ac_spm_counter_info *counter,
245*61046927SAndroid Build Coastguard Worker                    const struct ac_spm_instance_mapping *mapping,
246*61046927SAndroid Build Coastguard Worker                    uint32_t *spm_wire)
247*61046927SAndroid Build Coastguard Worker {
248*61046927SAndroid Build Coastguard Worker    uint32_t instance = counter->instance;
249*61046927SAndroid Build Coastguard Worker 
250*61046927SAndroid Build Coastguard Worker    if (block_sel->b->b->b->gpu_block == SQ_WGP) {
251*61046927SAndroid Build Coastguard Worker       if (!spm->sq_wgp[instance].grbm_gfx_index) {
252*61046927SAndroid Build Coastguard Worker          spm->sq_wgp[instance].grbm_gfx_index =
253*61046927SAndroid Build Coastguard Worker             ac_spm_init_grbm_gfx_index(block_sel->b, mapping);
254*61046927SAndroid Build Coastguard Worker       }
255*61046927SAndroid Build Coastguard Worker 
256*61046927SAndroid Build Coastguard Worker       for (unsigned i = 0; i < ARRAY_SIZE(spm->sq_wgp[instance].counters); i++) {
257*61046927SAndroid Build Coastguard Worker          struct ac_spm_counter_select *cntr_sel = &spm->sq_wgp[instance].counters[i];
258*61046927SAndroid Build Coastguard Worker 
259*61046927SAndroid Build Coastguard Worker          if (i < spm->sq_wgp[instance].num_counters)
260*61046927SAndroid Build Coastguard Worker             continue;
261*61046927SAndroid Build Coastguard Worker 
262*61046927SAndroid Build Coastguard Worker          cntr_sel->sel0 |= S_036700_PERF_SEL(counter->event_id) |
263*61046927SAndroid Build Coastguard Worker                            S_036700_SPM_MODE(1) | /* 16-bit clamp */
264*61046927SAndroid Build Coastguard Worker                            S_036700_PERF_MODE(0);
265*61046927SAndroid Build Coastguard Worker 
266*61046927SAndroid Build Coastguard Worker          /* Each SQ_WQP modules (GFX11+) share one 32-bit accumulator/wire
267*61046927SAndroid Build Coastguard Worker           * per pair of selects.
268*61046927SAndroid Build Coastguard Worker           */
269*61046927SAndroid Build Coastguard Worker          cntr_sel->active |= 1 << (i % 2);
270*61046927SAndroid Build Coastguard Worker          *spm_wire = i / 2;
271*61046927SAndroid Build Coastguard Worker 
272*61046927SAndroid Build Coastguard Worker          if (cntr_sel->active & 0x1)
273*61046927SAndroid Build Coastguard Worker             counter->is_even = true;
274*61046927SAndroid Build Coastguard Worker 
275*61046927SAndroid Build Coastguard Worker          spm->sq_wgp[instance].num_counters++;
276*61046927SAndroid Build Coastguard Worker          return true;
277*61046927SAndroid Build Coastguard Worker       }
278*61046927SAndroid Build Coastguard Worker    } else if (block_sel->b->b->b->gpu_block == SQ) {
279*61046927SAndroid Build Coastguard Worker       for (unsigned i = 0; i < ARRAY_SIZE(spm->sqg[instance].counters); i++) {
280*61046927SAndroid Build Coastguard Worker          struct ac_spm_counter_select *cntr_sel = &spm->sqg[instance].counters[i];
281*61046927SAndroid Build Coastguard Worker 
282*61046927SAndroid Build Coastguard Worker          if (i < spm->sqg[instance].num_counters)
283*61046927SAndroid Build Coastguard Worker             continue;
284*61046927SAndroid Build Coastguard Worker 
285*61046927SAndroid Build Coastguard Worker          /* SQ doesn't support 16-bit counters. */
286*61046927SAndroid Build Coastguard Worker          cntr_sel->sel0 |= S_036700_PERF_SEL(counter->event_id) |
287*61046927SAndroid Build Coastguard Worker                            S_036700_SPM_MODE(3) | /* 32-bit clamp */
288*61046927SAndroid Build Coastguard Worker                            S_036700_PERF_MODE(0);
289*61046927SAndroid Build Coastguard Worker          cntr_sel->active |= 0x3;
290*61046927SAndroid Build Coastguard Worker 
291*61046927SAndroid Build Coastguard Worker          /* 32-bits counter are always even. */
292*61046927SAndroid Build Coastguard Worker          counter->is_even = true;
293*61046927SAndroid Build Coastguard Worker 
294*61046927SAndroid Build Coastguard Worker          /* One wire per SQ module. */
295*61046927SAndroid Build Coastguard Worker          *spm_wire = i;
296*61046927SAndroid Build Coastguard Worker 
297*61046927SAndroid Build Coastguard Worker          spm->sqg[instance].num_counters++;
298*61046927SAndroid Build Coastguard Worker          return true;
299*61046927SAndroid Build Coastguard Worker       }
300*61046927SAndroid Build Coastguard Worker    } else {
301*61046927SAndroid Build Coastguard Worker       /* Generic blocks. */
302*61046927SAndroid Build Coastguard Worker       struct ac_spm_block_instance *block_instance =
303*61046927SAndroid Build Coastguard Worker          &block_sel->instances[instance];
304*61046927SAndroid Build Coastguard Worker 
305*61046927SAndroid Build Coastguard Worker       if (!block_instance->grbm_gfx_index) {
306*61046927SAndroid Build Coastguard Worker          block_instance->grbm_gfx_index =
307*61046927SAndroid Build Coastguard Worker             ac_spm_init_grbm_gfx_index(block_sel->b, mapping);
308*61046927SAndroid Build Coastguard Worker       }
309*61046927SAndroid Build Coastguard Worker 
310*61046927SAndroid Build Coastguard Worker       for (unsigned i = 0; i < block_instance->num_counters; i++) {
311*61046927SAndroid Build Coastguard Worker          struct ac_spm_counter_select *cntr_sel = &block_instance->counters[i];
312*61046927SAndroid Build Coastguard Worker          int index = ffs(~cntr_sel->active) - 1;
313*61046927SAndroid Build Coastguard Worker 
314*61046927SAndroid Build Coastguard Worker          switch (index) {
315*61046927SAndroid Build Coastguard Worker          case 0: /* use S_037004_PERF_SEL */
316*61046927SAndroid Build Coastguard Worker             cntr_sel->sel0 |= S_037004_PERF_SEL(counter->event_id) |
317*61046927SAndroid Build Coastguard Worker                               S_037004_CNTR_MODE(1) | /* 16-bit clamp */
318*61046927SAndroid Build Coastguard Worker                               S_037004_PERF_MODE(0); /* accum */
319*61046927SAndroid Build Coastguard Worker             break;
320*61046927SAndroid Build Coastguard Worker          case 1: /* use S_037004_PERF_SEL1 */
321*61046927SAndroid Build Coastguard Worker             cntr_sel->sel0 |= S_037004_PERF_SEL1(counter->event_id) |
322*61046927SAndroid Build Coastguard Worker                               S_037004_PERF_MODE1(0);
323*61046927SAndroid Build Coastguard Worker             break;
324*61046927SAndroid Build Coastguard Worker          case 2: /* use S_037004_PERF_SEL2 */
325*61046927SAndroid Build Coastguard Worker             cntr_sel->sel1 |= S_037008_PERF_SEL2(counter->event_id) |
326*61046927SAndroid Build Coastguard Worker                               S_037008_PERF_MODE2(0);
327*61046927SAndroid Build Coastguard Worker             break;
328*61046927SAndroid Build Coastguard Worker          case 3: /* use S_037004_PERF_SEL3 */
329*61046927SAndroid Build Coastguard Worker             cntr_sel->sel1 |= S_037008_PERF_SEL3(counter->event_id) |
330*61046927SAndroid Build Coastguard Worker                               S_037008_PERF_MODE3(0);
331*61046927SAndroid Build Coastguard Worker             break;
332*61046927SAndroid Build Coastguard Worker          default:
333*61046927SAndroid Build Coastguard Worker             return false;
334*61046927SAndroid Build Coastguard Worker          }
335*61046927SAndroid Build Coastguard Worker 
336*61046927SAndroid Build Coastguard Worker          /* Mark this 16-bit counter as used. */
337*61046927SAndroid Build Coastguard Worker          cntr_sel->active |= 1 << index;
338*61046927SAndroid Build Coastguard Worker 
339*61046927SAndroid Build Coastguard Worker          /* Determine if the counter is even or odd. */
340*61046927SAndroid Build Coastguard Worker          counter->is_even = !(index % 2);
341*61046927SAndroid Build Coastguard Worker 
342*61046927SAndroid Build Coastguard Worker          /* Determine the SPM wire (one wire holds two 16-bit counters). */
343*61046927SAndroid Build Coastguard Worker          *spm_wire = !!(index >= 2);
344*61046927SAndroid Build Coastguard Worker 
345*61046927SAndroid Build Coastguard Worker          return true;
346*61046927SAndroid Build Coastguard Worker       }
347*61046927SAndroid Build Coastguard Worker    }
348*61046927SAndroid Build Coastguard Worker 
349*61046927SAndroid Build Coastguard Worker    return false;
350*61046927SAndroid Build Coastguard Worker }
351*61046927SAndroid Build Coastguard Worker 
352*61046927SAndroid Build Coastguard Worker static bool
ac_spm_add_counter(const struct radeon_info * info,const struct ac_perfcounters * pc,struct ac_spm * spm,const struct ac_spm_counter_create_info * counter_info)353*61046927SAndroid Build Coastguard Worker ac_spm_add_counter(const struct radeon_info *info,
354*61046927SAndroid Build Coastguard Worker                    const struct ac_perfcounters *pc,
355*61046927SAndroid Build Coastguard Worker                    struct ac_spm *spm,
356*61046927SAndroid Build Coastguard Worker                    const struct ac_spm_counter_create_info *counter_info)
357*61046927SAndroid Build Coastguard Worker {
358*61046927SAndroid Build Coastguard Worker    struct ac_spm_instance_mapping instance_mapping = {0};
359*61046927SAndroid Build Coastguard Worker    struct ac_spm_counter_info *counter;
360*61046927SAndroid Build Coastguard Worker    struct ac_spm_block_select *block_sel;
361*61046927SAndroid Build Coastguard Worker    struct ac_pc_block *block;
362*61046927SAndroid Build Coastguard Worker    uint32_t spm_wire;
363*61046927SAndroid Build Coastguard Worker 
364*61046927SAndroid Build Coastguard Worker    /* Check if the GPU block is valid. */
365*61046927SAndroid Build Coastguard Worker    block = ac_pc_get_block(pc, counter_info->b->gpu_block);
366*61046927SAndroid Build Coastguard Worker    if (!block) {
367*61046927SAndroid Build Coastguard Worker       fprintf(stderr, "ac/spm: Invalid GPU block.\n");
368*61046927SAndroid Build Coastguard Worker       return false;
369*61046927SAndroid Build Coastguard Worker    }
370*61046927SAndroid Build Coastguard Worker 
371*61046927SAndroid Build Coastguard Worker    /* Check if the number of instances is valid. */
372*61046927SAndroid Build Coastguard Worker    if (counter_info->instance > block->num_global_instances - 1) {
373*61046927SAndroid Build Coastguard Worker       fprintf(stderr, "ac/spm: Invalid instance ID.\n");
374*61046927SAndroid Build Coastguard Worker       return false;
375*61046927SAndroid Build Coastguard Worker    }
376*61046927SAndroid Build Coastguard Worker 
377*61046927SAndroid Build Coastguard Worker    /* Check if the event ID is valid. */
378*61046927SAndroid Build Coastguard Worker    if (counter_info->b->event_id > block->b->selectors) {
379*61046927SAndroid Build Coastguard Worker       fprintf(stderr, "ac/spm: Invalid event ID.\n");
380*61046927SAndroid Build Coastguard Worker       return false;
381*61046927SAndroid Build Coastguard Worker    }
382*61046927SAndroid Build Coastguard Worker 
383*61046927SAndroid Build Coastguard Worker    counter = &spm->counters[spm->num_counters];
384*61046927SAndroid Build Coastguard Worker    spm->num_counters++;
385*61046927SAndroid Build Coastguard Worker 
386*61046927SAndroid Build Coastguard Worker    counter->gpu_block = counter_info->b->gpu_block;
387*61046927SAndroid Build Coastguard Worker    counter->event_id = counter_info->b->event_id;
388*61046927SAndroid Build Coastguard Worker    counter->instance = counter_info->instance;
389*61046927SAndroid Build Coastguard Worker 
390*61046927SAndroid Build Coastguard Worker    /* Get the select block used to configure the counter. */
391*61046927SAndroid Build Coastguard Worker    block_sel = ac_spm_get_block_select(spm, block);
392*61046927SAndroid Build Coastguard Worker    if (!block_sel)
393*61046927SAndroid Build Coastguard Worker       return false;
394*61046927SAndroid Build Coastguard Worker 
395*61046927SAndroid Build Coastguard Worker    /* Initialize instance mapping for the counter. */
396*61046927SAndroid Build Coastguard Worker    if (!ac_spm_init_instance_mapping(info, block, counter, &instance_mapping)) {
397*61046927SAndroid Build Coastguard Worker       fprintf(stderr, "ac/spm: Failed to initialize instance mapping.\n");
398*61046927SAndroid Build Coastguard Worker       return false;
399*61046927SAndroid Build Coastguard Worker    }
400*61046927SAndroid Build Coastguard Worker 
401*61046927SAndroid Build Coastguard Worker    /* Map the counter to the select block. */
402*61046927SAndroid Build Coastguard Worker    if (!ac_spm_map_counter(spm, block_sel, counter, &instance_mapping, &spm_wire)) {
403*61046927SAndroid Build Coastguard Worker       fprintf(stderr, "ac/spm: No free slots available!\n");
404*61046927SAndroid Build Coastguard Worker       return false;
405*61046927SAndroid Build Coastguard Worker    }
406*61046927SAndroid Build Coastguard Worker 
407*61046927SAndroid Build Coastguard Worker    /* Determine the counter segment type. */
408*61046927SAndroid Build Coastguard Worker    if (block->b->b->flags & AC_PC_BLOCK_SE) {
409*61046927SAndroid Build Coastguard Worker       counter->segment_type = instance_mapping.se_index;
410*61046927SAndroid Build Coastguard Worker    } else {
411*61046927SAndroid Build Coastguard Worker       counter->segment_type = AC_SPM_SEGMENT_TYPE_GLOBAL;
412*61046927SAndroid Build Coastguard Worker    }
413*61046927SAndroid Build Coastguard Worker 
414*61046927SAndroid Build Coastguard Worker    /* Configure the muxsel for SPM. */
415*61046927SAndroid Build Coastguard Worker    ac_spm_init_muxsel(info, block, &instance_mapping, counter, spm_wire);
416*61046927SAndroid Build Coastguard Worker 
417*61046927SAndroid Build Coastguard Worker    return true;
418*61046927SAndroid Build Coastguard Worker }
419*61046927SAndroid Build Coastguard Worker 
420*61046927SAndroid Build Coastguard Worker static void
ac_spm_fill_muxsel_ram(const struct radeon_info * info,struct ac_spm * spm,enum ac_spm_segment_type segment_type,uint32_t offset)421*61046927SAndroid Build Coastguard Worker ac_spm_fill_muxsel_ram(const struct radeon_info *info,
422*61046927SAndroid Build Coastguard Worker                        struct ac_spm *spm,
423*61046927SAndroid Build Coastguard Worker                        enum ac_spm_segment_type segment_type,
424*61046927SAndroid Build Coastguard Worker                        uint32_t offset)
425*61046927SAndroid Build Coastguard Worker {
426*61046927SAndroid Build Coastguard Worker    struct ac_spm_muxsel_line *mappings = spm->muxsel_lines[segment_type];
427*61046927SAndroid Build Coastguard Worker    uint32_t even_counter_idx = 0, even_line_idx = 0;
428*61046927SAndroid Build Coastguard Worker    uint32_t odd_counter_idx = 0, odd_line_idx = 1;
429*61046927SAndroid Build Coastguard Worker 
430*61046927SAndroid Build Coastguard Worker    /* Add the global timestamps first. */
431*61046927SAndroid Build Coastguard Worker    if (segment_type == AC_SPM_SEGMENT_TYPE_GLOBAL) {
432*61046927SAndroid Build Coastguard Worker       if (info->gfx_level >= GFX11) {
433*61046927SAndroid Build Coastguard Worker          mappings[even_line_idx].muxsel[even_counter_idx++].value = 0xf840;
434*61046927SAndroid Build Coastguard Worker          mappings[even_line_idx].muxsel[even_counter_idx++].value = 0xf841;
435*61046927SAndroid Build Coastguard Worker          mappings[even_line_idx].muxsel[even_counter_idx++].value = 0xf842;
436*61046927SAndroid Build Coastguard Worker          mappings[even_line_idx].muxsel[even_counter_idx++].value = 0xf843;
437*61046927SAndroid Build Coastguard Worker       } else {
438*61046927SAndroid Build Coastguard Worker          for (unsigned i = 0; i < 4; i++) {
439*61046927SAndroid Build Coastguard Worker             mappings[even_line_idx].muxsel[even_counter_idx++].value = 0xf0f0;
440*61046927SAndroid Build Coastguard Worker          }
441*61046927SAndroid Build Coastguard Worker       }
442*61046927SAndroid Build Coastguard Worker    }
443*61046927SAndroid Build Coastguard Worker 
444*61046927SAndroid Build Coastguard Worker    for (unsigned i = 0; i < spm->num_counters; i++) {
445*61046927SAndroid Build Coastguard Worker       struct ac_spm_counter_info *counter = &spm->counters[i];
446*61046927SAndroid Build Coastguard Worker 
447*61046927SAndroid Build Coastguard Worker       if (counter->segment_type != segment_type)
448*61046927SAndroid Build Coastguard Worker          continue;
449*61046927SAndroid Build Coastguard Worker 
450*61046927SAndroid Build Coastguard Worker       if (counter->is_even) {
451*61046927SAndroid Build Coastguard Worker          counter->offset =
452*61046927SAndroid Build Coastguard Worker             (offset + even_line_idx) * AC_SPM_NUM_COUNTER_PER_MUXSEL + even_counter_idx;
453*61046927SAndroid Build Coastguard Worker 
454*61046927SAndroid Build Coastguard Worker          mappings[even_line_idx].muxsel[even_counter_idx] = spm->counters[i].muxsel;
455*61046927SAndroid Build Coastguard Worker          if (++even_counter_idx == AC_SPM_NUM_COUNTER_PER_MUXSEL) {
456*61046927SAndroid Build Coastguard Worker             even_counter_idx = 0;
457*61046927SAndroid Build Coastguard Worker             even_line_idx += 2;
458*61046927SAndroid Build Coastguard Worker          }
459*61046927SAndroid Build Coastguard Worker       } else {
460*61046927SAndroid Build Coastguard Worker          counter->offset =
461*61046927SAndroid Build Coastguard Worker             (offset + odd_line_idx) * AC_SPM_NUM_COUNTER_PER_MUXSEL + odd_counter_idx;
462*61046927SAndroid Build Coastguard Worker 
463*61046927SAndroid Build Coastguard Worker          mappings[odd_line_idx].muxsel[odd_counter_idx] = spm->counters[i].muxsel;
464*61046927SAndroid Build Coastguard Worker          if (++odd_counter_idx == AC_SPM_NUM_COUNTER_PER_MUXSEL) {
465*61046927SAndroid Build Coastguard Worker             odd_counter_idx = 0;
466*61046927SAndroid Build Coastguard Worker             odd_line_idx += 2;
467*61046927SAndroid Build Coastguard Worker          }
468*61046927SAndroid Build Coastguard Worker       }
469*61046927SAndroid Build Coastguard Worker    }
470*61046927SAndroid Build Coastguard Worker }
471*61046927SAndroid Build Coastguard Worker 
ac_init_spm(const struct radeon_info * info,const struct ac_perfcounters * pc,struct ac_spm * spm)472*61046927SAndroid Build Coastguard Worker bool ac_init_spm(const struct radeon_info *info,
473*61046927SAndroid Build Coastguard Worker                  const struct ac_perfcounters *pc,
474*61046927SAndroid Build Coastguard Worker                  struct ac_spm *spm)
475*61046927SAndroid Build Coastguard Worker {
476*61046927SAndroid Build Coastguard Worker    const struct ac_spm_counter_create_info *create_info;
477*61046927SAndroid Build Coastguard Worker    unsigned create_info_count;
478*61046927SAndroid Build Coastguard Worker    unsigned num_counters = 0;
479*61046927SAndroid Build Coastguard Worker 
480*61046927SAndroid Build Coastguard Worker    switch (info->gfx_level) {
481*61046927SAndroid Build Coastguard Worker    case GFX10:
482*61046927SAndroid Build Coastguard Worker       create_info_count = ARRAY_SIZE(gfx10_spm_counters);
483*61046927SAndroid Build Coastguard Worker       create_info = gfx10_spm_counters;
484*61046927SAndroid Build Coastguard Worker       break;
485*61046927SAndroid Build Coastguard Worker    case GFX10_3:
486*61046927SAndroid Build Coastguard Worker       create_info_count = ARRAY_SIZE(gfx103_spm_counters);
487*61046927SAndroid Build Coastguard Worker       create_info = gfx103_spm_counters;
488*61046927SAndroid Build Coastguard Worker       break;
489*61046927SAndroid Build Coastguard Worker    case GFX11:
490*61046927SAndroid Build Coastguard Worker    case GFX11_5:
491*61046927SAndroid Build Coastguard Worker       create_info_count = ARRAY_SIZE(gfx11_spm_counters);
492*61046927SAndroid Build Coastguard Worker       create_info = gfx11_spm_counters;
493*61046927SAndroid Build Coastguard Worker       break;
494*61046927SAndroid Build Coastguard Worker    default:
495*61046927SAndroid Build Coastguard Worker       return false; /* not implemented */
496*61046927SAndroid Build Coastguard Worker    }
497*61046927SAndroid Build Coastguard Worker 
498*61046927SAndroid Build Coastguard Worker    /* Count the total number of counters. */
499*61046927SAndroid Build Coastguard Worker    for (unsigned i = 0; i < create_info_count; i++) {
500*61046927SAndroid Build Coastguard Worker       const struct ac_pc_block *block = ac_pc_get_block(pc, create_info[i].b->gpu_block);
501*61046927SAndroid Build Coastguard Worker 
502*61046927SAndroid Build Coastguard Worker       if (!block)
503*61046927SAndroid Build Coastguard Worker          return false;
504*61046927SAndroid Build Coastguard Worker 
505*61046927SAndroid Build Coastguard Worker       num_counters += block->num_global_instances;
506*61046927SAndroid Build Coastguard Worker    }
507*61046927SAndroid Build Coastguard Worker 
508*61046927SAndroid Build Coastguard Worker    spm->counters = CALLOC(num_counters, sizeof(*spm->counters));
509*61046927SAndroid Build Coastguard Worker    if (!spm->counters)
510*61046927SAndroid Build Coastguard Worker       return false;
511*61046927SAndroid Build Coastguard Worker 
512*61046927SAndroid Build Coastguard Worker    for (unsigned i = 0; i < create_info_count; i++) {
513*61046927SAndroid Build Coastguard Worker       const struct ac_pc_block *block = ac_pc_get_block(pc, create_info[i].b->gpu_block);
514*61046927SAndroid Build Coastguard Worker       struct ac_spm_counter_create_info counter = create_info[i];
515*61046927SAndroid Build Coastguard Worker 
516*61046927SAndroid Build Coastguard Worker       for (unsigned j = 0; j < block->num_global_instances; j++) {
517*61046927SAndroid Build Coastguard Worker          counter.instance = j;
518*61046927SAndroid Build Coastguard Worker 
519*61046927SAndroid Build Coastguard Worker          if (!ac_spm_add_counter(info, pc, spm, &counter)) {
520*61046927SAndroid Build Coastguard Worker             fprintf(stderr, "ac/spm: Failed to add SPM counter (%d).\n", i);
521*61046927SAndroid Build Coastguard Worker             return false;
522*61046927SAndroid Build Coastguard Worker          }
523*61046927SAndroid Build Coastguard Worker       }
524*61046927SAndroid Build Coastguard Worker    }
525*61046927SAndroid Build Coastguard Worker 
526*61046927SAndroid Build Coastguard Worker    /* Determine the segment size and create a muxsel ram for every segment. */
527*61046927SAndroid Build Coastguard Worker    for (unsigned s = 0; s < AC_SPM_SEGMENT_TYPE_COUNT; s++) {
528*61046927SAndroid Build Coastguard Worker       unsigned num_even_counters = 0, num_odd_counters = 0;
529*61046927SAndroid Build Coastguard Worker 
530*61046927SAndroid Build Coastguard Worker       if (s == AC_SPM_SEGMENT_TYPE_GLOBAL) {
531*61046927SAndroid Build Coastguard Worker          /* The global segment always start with a 64-bit timestamp. */
532*61046927SAndroid Build Coastguard Worker          num_even_counters += AC_SPM_GLOBAL_TIMESTAMP_COUNTERS;
533*61046927SAndroid Build Coastguard Worker       }
534*61046927SAndroid Build Coastguard Worker 
535*61046927SAndroid Build Coastguard Worker       /* Count the number of even/odd counters for this segment. */
536*61046927SAndroid Build Coastguard Worker       for (unsigned c = 0; c < spm->num_counters; c++) {
537*61046927SAndroid Build Coastguard Worker          struct ac_spm_counter_info *counter = &spm->counters[c];
538*61046927SAndroid Build Coastguard Worker 
539*61046927SAndroid Build Coastguard Worker          if (counter->segment_type != s)
540*61046927SAndroid Build Coastguard Worker             continue;
541*61046927SAndroid Build Coastguard Worker 
542*61046927SAndroid Build Coastguard Worker          if (counter->is_even) {
543*61046927SAndroid Build Coastguard Worker             num_even_counters++;
544*61046927SAndroid Build Coastguard Worker          } else {
545*61046927SAndroid Build Coastguard Worker             num_odd_counters++;
546*61046927SAndroid Build Coastguard Worker          }
547*61046927SAndroid Build Coastguard Worker       }
548*61046927SAndroid Build Coastguard Worker 
549*61046927SAndroid Build Coastguard Worker       /* Compute the number of lines. */
550*61046927SAndroid Build Coastguard Worker       unsigned even_lines =
551*61046927SAndroid Build Coastguard Worker          DIV_ROUND_UP(num_even_counters, AC_SPM_NUM_COUNTER_PER_MUXSEL);
552*61046927SAndroid Build Coastguard Worker       unsigned odd_lines =
553*61046927SAndroid Build Coastguard Worker          DIV_ROUND_UP(num_odd_counters, AC_SPM_NUM_COUNTER_PER_MUXSEL);
554*61046927SAndroid Build Coastguard Worker       unsigned num_lines = (even_lines > odd_lines) ? (2 * even_lines - 1) : (2 * odd_lines);
555*61046927SAndroid Build Coastguard Worker 
556*61046927SAndroid Build Coastguard Worker       spm->muxsel_lines[s] = CALLOC(num_lines, sizeof(*spm->muxsel_lines[s]));
557*61046927SAndroid Build Coastguard Worker       if (!spm->muxsel_lines[s])
558*61046927SAndroid Build Coastguard Worker          return false;
559*61046927SAndroid Build Coastguard Worker       spm->num_muxsel_lines[s] = num_lines;
560*61046927SAndroid Build Coastguard Worker    }
561*61046927SAndroid Build Coastguard Worker 
562*61046927SAndroid Build Coastguard Worker    /* Compute the maximum number of muxsel lines among all SEs. On GFX11,
563*61046927SAndroid Build Coastguard Worker     * there is only one SE segment size value and the highest value is used.
564*61046927SAndroid Build Coastguard Worker     */
565*61046927SAndroid Build Coastguard Worker    for (unsigned s = 0; s < AC_SPM_SEGMENT_TYPE_GLOBAL; s++) {
566*61046927SAndroid Build Coastguard Worker       spm->max_se_muxsel_lines =
567*61046927SAndroid Build Coastguard Worker          MAX2(spm->num_muxsel_lines[s], spm->max_se_muxsel_lines);
568*61046927SAndroid Build Coastguard Worker    }
569*61046927SAndroid Build Coastguard Worker 
570*61046927SAndroid Build Coastguard Worker    /* RLC uses the following order: Global, SE0, SE1, SE2, SE3, SE4, SE5. */
571*61046927SAndroid Build Coastguard Worker    ac_spm_fill_muxsel_ram(info, spm, AC_SPM_SEGMENT_TYPE_GLOBAL, 0);
572*61046927SAndroid Build Coastguard Worker 
573*61046927SAndroid Build Coastguard Worker    const uint32_t num_global_lines = spm->num_muxsel_lines[AC_SPM_SEGMENT_TYPE_GLOBAL];
574*61046927SAndroid Build Coastguard Worker 
575*61046927SAndroid Build Coastguard Worker    if (info->gfx_level >= GFX11) {
576*61046927SAndroid Build Coastguard Worker       /* On GFX11, RLC uses one segment size for every single SE. */
577*61046927SAndroid Build Coastguard Worker       for (unsigned i = 0; i < info->num_se; i++) {
578*61046927SAndroid Build Coastguard Worker          assert(i < AC_SPM_SEGMENT_TYPE_GLOBAL);
579*61046927SAndroid Build Coastguard Worker          uint32_t offset = num_global_lines + i * spm->max_se_muxsel_lines;
580*61046927SAndroid Build Coastguard Worker 
581*61046927SAndroid Build Coastguard Worker          ac_spm_fill_muxsel_ram(info, spm, i, offset);
582*61046927SAndroid Build Coastguard Worker       }
583*61046927SAndroid Build Coastguard Worker    } else {
584*61046927SAndroid Build Coastguard Worker       uint32_t offset = num_global_lines;
585*61046927SAndroid Build Coastguard Worker 
586*61046927SAndroid Build Coastguard Worker       for (unsigned i = 0; i < info->num_se; i++) {
587*61046927SAndroid Build Coastguard Worker          assert(i < AC_SPM_SEGMENT_TYPE_GLOBAL);
588*61046927SAndroid Build Coastguard Worker 
589*61046927SAndroid Build Coastguard Worker          ac_spm_fill_muxsel_ram(info, spm, i, offset);
590*61046927SAndroid Build Coastguard Worker 
591*61046927SAndroid Build Coastguard Worker          offset += spm->num_muxsel_lines[i];
592*61046927SAndroid Build Coastguard Worker       }
593*61046927SAndroid Build Coastguard Worker    }
594*61046927SAndroid Build Coastguard Worker 
595*61046927SAndroid Build Coastguard Worker    /* On GFX11, the data size written by the hw is in units of segment. */
596*61046927SAndroid Build Coastguard Worker    spm->ptr_granularity = info->gfx_level >= GFX11 ? 32 : 1;
597*61046927SAndroid Build Coastguard Worker 
598*61046927SAndroid Build Coastguard Worker    return true;
599*61046927SAndroid Build Coastguard Worker }
600*61046927SAndroid Build Coastguard Worker 
ac_destroy_spm(struct ac_spm * spm)601*61046927SAndroid Build Coastguard Worker void ac_destroy_spm(struct ac_spm *spm)
602*61046927SAndroid Build Coastguard Worker {
603*61046927SAndroid Build Coastguard Worker    for (unsigned s = 0; s < AC_SPM_SEGMENT_TYPE_COUNT; s++) {
604*61046927SAndroid Build Coastguard Worker       FREE(spm->muxsel_lines[s]);
605*61046927SAndroid Build Coastguard Worker    }
606*61046927SAndroid Build Coastguard Worker 
607*61046927SAndroid Build Coastguard Worker    for (unsigned i = 0; i < spm->num_block_sel; i++) {
608*61046927SAndroid Build Coastguard Worker       FREE(spm->block_sel[i].instances);
609*61046927SAndroid Build Coastguard Worker    }
610*61046927SAndroid Build Coastguard Worker 
611*61046927SAndroid Build Coastguard Worker    FREE(spm->block_sel);
612*61046927SAndroid Build Coastguard Worker    FREE(spm->counters);
613*61046927SAndroid Build Coastguard Worker }
614*61046927SAndroid Build Coastguard Worker 
ac_spm_get_sample_size(const struct ac_spm * spm)615*61046927SAndroid Build Coastguard Worker static uint32_t ac_spm_get_sample_size(const struct ac_spm *spm)
616*61046927SAndroid Build Coastguard Worker {
617*61046927SAndroid Build Coastguard Worker    uint32_t sample_size = 0; /* in bytes */
618*61046927SAndroid Build Coastguard Worker 
619*61046927SAndroid Build Coastguard Worker    for (unsigned s = 0; s < AC_SPM_SEGMENT_TYPE_COUNT; s++) {
620*61046927SAndroid Build Coastguard Worker       sample_size += spm->num_muxsel_lines[s] * AC_SPM_MUXSEL_LINE_SIZE * 4;
621*61046927SAndroid Build Coastguard Worker    }
622*61046927SAndroid Build Coastguard Worker 
623*61046927SAndroid Build Coastguard Worker    return sample_size;
624*61046927SAndroid Build Coastguard Worker }
625*61046927SAndroid Build Coastguard Worker 
ac_spm_get_num_samples(const struct ac_spm * spm)626*61046927SAndroid Build Coastguard Worker static uint32_t ac_spm_get_num_samples(const struct ac_spm *spm)
627*61046927SAndroid Build Coastguard Worker {
628*61046927SAndroid Build Coastguard Worker    uint32_t sample_size = ac_spm_get_sample_size(spm);
629*61046927SAndroid Build Coastguard Worker    uint32_t *ptr = (uint32_t *)spm->ptr;
630*61046927SAndroid Build Coastguard Worker    uint32_t data_size, num_lines_written;
631*61046927SAndroid Build Coastguard Worker    uint32_t num_samples = 0;
632*61046927SAndroid Build Coastguard Worker 
633*61046927SAndroid Build Coastguard Worker    /* Get the data size (in bytes) written by the hw to the ring buffer. */
634*61046927SAndroid Build Coastguard Worker    data_size = ptr[0] * spm->ptr_granularity;
635*61046927SAndroid Build Coastguard Worker 
636*61046927SAndroid Build Coastguard Worker    /* Compute the number of 256 bits (16 * 16-bits counters) lines written. */
637*61046927SAndroid Build Coastguard Worker    num_lines_written = data_size / (2 * AC_SPM_NUM_COUNTER_PER_MUXSEL);
638*61046927SAndroid Build Coastguard Worker 
639*61046927SAndroid Build Coastguard Worker    /* Check for overflow. */
640*61046927SAndroid Build Coastguard Worker    if (num_lines_written % (sample_size / 32)) {
641*61046927SAndroid Build Coastguard Worker       abort();
642*61046927SAndroid Build Coastguard Worker    } else {
643*61046927SAndroid Build Coastguard Worker       num_samples = num_lines_written / (sample_size / 32);
644*61046927SAndroid Build Coastguard Worker    }
645*61046927SAndroid Build Coastguard Worker 
646*61046927SAndroid Build Coastguard Worker    return num_samples;
647*61046927SAndroid Build Coastguard Worker }
648*61046927SAndroid Build Coastguard Worker 
ac_spm_get_trace(const struct ac_spm * spm,struct ac_spm_trace * trace)649*61046927SAndroid Build Coastguard Worker void ac_spm_get_trace(const struct ac_spm *spm, struct ac_spm_trace *trace)
650*61046927SAndroid Build Coastguard Worker {
651*61046927SAndroid Build Coastguard Worker    memset(trace, 0, sizeof(*trace));
652*61046927SAndroid Build Coastguard Worker 
653*61046927SAndroid Build Coastguard Worker    trace->ptr = spm->ptr;
654*61046927SAndroid Build Coastguard Worker    trace->sample_interval = spm->sample_interval;
655*61046927SAndroid Build Coastguard Worker    trace->num_counters = spm->num_counters;
656*61046927SAndroid Build Coastguard Worker    trace->counters = spm->counters;
657*61046927SAndroid Build Coastguard Worker    trace->sample_size_in_bytes = ac_spm_get_sample_size(spm);
658*61046927SAndroid Build Coastguard Worker    trace->num_samples = ac_spm_get_num_samples(spm);
659*61046927SAndroid Build Coastguard Worker }
660