1 /*
2 * Copyright © 2021 Valve Corporation
3 *
4 * SPDX-License-Identifier: MIT
5 */
6
7 #include <inttypes.h>
8
9 #include "radv_buffer.h"
10 #include "radv_cs.h"
11 #include "radv_spm.h"
12 #include "sid.h"
13
14 #define SPM_RING_BASE_ALIGN 32
15
16 static bool
radv_spm_init_bo(struct radv_device * device)17 radv_spm_init_bo(struct radv_device *device)
18 {
19 struct radeon_winsys *ws = device->ws;
20 uint64_t size = 32 * 1024 * 1024; /* Default to 1MB. */
21 uint16_t sample_interval = 4096; /* Default to 4096 clk. */
22 VkResult result;
23
24 device->spm.buffer_size = size;
25 device->spm.sample_interval = sample_interval;
26
27 struct radeon_winsys_bo *bo = NULL;
28 result = radv_bo_create(device, NULL, size, 4096, RADEON_DOMAIN_VRAM,
29 RADEON_FLAG_CPU_ACCESS | RADEON_FLAG_NO_INTERPROCESS_SHARING | RADEON_FLAG_ZERO_VRAM,
30 RADV_BO_PRIORITY_SCRATCH, 0, true, &bo);
31 device->spm.bo = bo;
32 if (result != VK_SUCCESS)
33 return false;
34
35 result = ws->buffer_make_resident(ws, device->spm.bo, true);
36 if (result != VK_SUCCESS)
37 return false;
38
39 device->spm.ptr = radv_buffer_map(ws, device->spm.bo);
40 if (!device->spm.ptr)
41 return false;
42
43 return true;
44 }
45
46 static void
radv_emit_spm_counters(struct radv_device * device,struct radeon_cmdbuf * cs,enum radv_queue_family qf)47 radv_emit_spm_counters(struct radv_device *device, struct radeon_cmdbuf *cs, enum radv_queue_family qf)
48 {
49 const struct radv_physical_device *pdev = radv_device_physical(device);
50 const enum amd_gfx_level gfx_level = pdev->info.gfx_level;
51 struct ac_spm *spm = &device->spm;
52
53 if (gfx_level >= GFX11) {
54 for (uint32_t instance = 0; instance < ARRAY_SIZE(spm->sq_wgp); instance++) {
55 uint32_t num_counters = spm->sq_wgp[instance].num_counters;
56
57 if (!num_counters)
58 continue;
59
60 radeon_check_space(device->ws, cs, 3 + num_counters * 3);
61
62 radeon_set_uconfig_reg(cs, R_030800_GRBM_GFX_INDEX, spm->sq_wgp[instance].grbm_gfx_index);
63
64 for (uint32_t b = 0; b < num_counters; b++) {
65 const struct ac_spm_counter_select *cntr_sel = &spm->sq_wgp[instance].counters[b];
66 uint32_t reg_base = R_036700_SQ_PERFCOUNTER0_SELECT;
67
68 radeon_set_uconfig_perfctr_reg_seq(gfx_level, qf, cs, reg_base + b * 4, 1);
69 radeon_emit(cs, cntr_sel->sel0);
70 }
71 }
72 }
73
74 for (uint32_t instance = 0; instance < ARRAY_SIZE(spm->sqg); instance++) {
75 uint32_t num_counters = spm->sqg[instance].num_counters;
76
77 if (!num_counters)
78 continue;
79
80 radeon_check_space(device->ws, cs, 3 + num_counters * 3);
81
82 radeon_set_uconfig_reg(
83 cs, R_030800_GRBM_GFX_INDEX,
84 S_030800_SH_BROADCAST_WRITES(1) | S_030800_INSTANCE_BROADCAST_WRITES(1) | S_030800_SE_INDEX(instance));
85
86 for (uint32_t b = 0; b < num_counters; b++) {
87 const struct ac_spm_counter_select *cntr_sel = &spm->sqg[instance].counters[b];
88 uint32_t reg_base = R_036700_SQ_PERFCOUNTER0_SELECT;
89
90 radeon_set_uconfig_perfctr_reg_seq(gfx_level, qf, cs, reg_base + b * 4, 1);
91 radeon_emit(cs, cntr_sel->sel0 | S_036700_SQC_BANK_MASK(0xf)); /* SQC_BANK_MASK only gfx10 */
92 }
93 }
94
95 for (uint32_t b = 0; b < spm->num_block_sel; b++) {
96 struct ac_spm_block_select *block_sel = &spm->block_sel[b];
97 struct ac_pc_block_base *regs = block_sel->b->b->b;
98
99 for (unsigned i = 0; i < block_sel->num_instances; i++) {
100 struct ac_spm_block_instance *block_instance = &block_sel->instances[i];
101
102 radeon_check_space(device->ws, cs, 3 + (AC_SPM_MAX_COUNTER_PER_BLOCK * 6));
103
104 radeon_set_uconfig_reg(cs, R_030800_GRBM_GFX_INDEX, block_instance->grbm_gfx_index);
105
106 for (unsigned c = 0; c < block_instance->num_counters; c++) {
107 const struct ac_spm_counter_select *cntr_sel = &block_instance->counters[c];
108
109 if (!cntr_sel->active)
110 continue;
111
112 radeon_set_uconfig_perfctr_reg_seq(gfx_level, qf, cs, regs->select0[c], 1);
113 radeon_emit(cs, cntr_sel->sel0);
114
115 radeon_set_uconfig_perfctr_reg_seq(gfx_level, qf, cs, regs->select1[c], 1);
116 radeon_emit(cs, cntr_sel->sel1);
117 }
118 }
119 }
120
121 /* Restore global broadcasting. */
122 radeon_set_uconfig_reg(
123 cs, R_030800_GRBM_GFX_INDEX,
124 S_030800_SE_BROADCAST_WRITES(1) | S_030800_SH_BROADCAST_WRITES(1) | S_030800_INSTANCE_BROADCAST_WRITES(1));
125 }
126
127 void
radv_emit_spm_setup(struct radv_device * device,struct radeon_cmdbuf * cs,enum radv_queue_family qf)128 radv_emit_spm_setup(struct radv_device *device, struct radeon_cmdbuf *cs, enum radv_queue_family qf)
129 {
130 const struct radv_physical_device *pdev = radv_device_physical(device);
131 const enum amd_gfx_level gfx_level = pdev->info.gfx_level;
132 struct ac_spm *spm = &device->spm;
133 uint64_t va = radv_buffer_get_va(spm->bo);
134 uint64_t ring_size = spm->buffer_size;
135
136 /* It's required that the ring VA and the size are correctly aligned. */
137 assert(!(va & (SPM_RING_BASE_ALIGN - 1)));
138 assert(!(ring_size & (SPM_RING_BASE_ALIGN - 1)));
139 assert(spm->sample_interval >= 32);
140
141 radeon_check_space(device->ws, cs, 27);
142
143 /* Configure the SPM ring buffer. */
144 radeon_set_uconfig_reg(cs, R_037200_RLC_SPM_PERFMON_CNTL,
145 S_037200_PERFMON_RING_MODE(0) | /* no stall and no interrupt on overflow */
146 S_037200_PERFMON_SAMPLE_INTERVAL(spm->sample_interval)); /* in sclk */
147 radeon_set_uconfig_reg(cs, R_037204_RLC_SPM_PERFMON_RING_BASE_LO, va);
148 radeon_set_uconfig_reg(cs, R_037208_RLC_SPM_PERFMON_RING_BASE_HI, S_037208_RING_BASE_HI(va >> 32));
149 radeon_set_uconfig_reg(cs, R_03720C_RLC_SPM_PERFMON_RING_SIZE, ring_size);
150
151 /* Configure the muxsel. */
152 uint32_t total_muxsel_lines = 0;
153 for (unsigned s = 0; s < AC_SPM_SEGMENT_TYPE_COUNT; s++) {
154 total_muxsel_lines += spm->num_muxsel_lines[s];
155 }
156
157 radeon_set_uconfig_reg(cs, R_03726C_RLC_SPM_ACCUM_MODE, 0);
158
159 if (pdev->info.gfx_level >= GFX11) {
160 radeon_set_uconfig_reg(cs, R_03721C_RLC_SPM_PERFMON_SEGMENT_SIZE,
161 S_03721C_TOTAL_NUM_SEGMENT(total_muxsel_lines) |
162 S_03721C_GLOBAL_NUM_SEGMENT(spm->num_muxsel_lines[AC_SPM_SEGMENT_TYPE_GLOBAL]) |
163 S_03721C_SE_NUM_SEGMENT(spm->max_se_muxsel_lines));
164
165 radeon_set_uconfig_reg(cs, R_037210_RLC_SPM_RING_WRPTR, 0);
166 } else {
167 radeon_set_uconfig_reg(cs, R_037210_RLC_SPM_PERFMON_SEGMENT_SIZE, 0);
168 radeon_set_uconfig_reg(cs, R_03727C_RLC_SPM_PERFMON_SE3TO0_SEGMENT_SIZE,
169 S_03727C_SE0_NUM_LINE(spm->num_muxsel_lines[AC_SPM_SEGMENT_TYPE_SE0]) |
170 S_03727C_SE1_NUM_LINE(spm->num_muxsel_lines[AC_SPM_SEGMENT_TYPE_SE1]) |
171 S_03727C_SE2_NUM_LINE(spm->num_muxsel_lines[AC_SPM_SEGMENT_TYPE_SE2]) |
172 S_03727C_SE3_NUM_LINE(spm->num_muxsel_lines[AC_SPM_SEGMENT_TYPE_SE3]));
173 radeon_set_uconfig_reg(cs, R_037280_RLC_SPM_PERFMON_GLB_SEGMENT_SIZE,
174 S_037280_PERFMON_SEGMENT_SIZE(total_muxsel_lines) |
175 S_037280_GLOBAL_NUM_LINE(spm->num_muxsel_lines[AC_SPM_SEGMENT_TYPE_GLOBAL]));
176 }
177
178 /* Upload each muxsel ram to the RLC. */
179 for (unsigned s = 0; s < AC_SPM_SEGMENT_TYPE_COUNT; s++) {
180 unsigned rlc_muxsel_addr, rlc_muxsel_data;
181 unsigned grbm_gfx_index = S_030800_SH_BROADCAST_WRITES(1) | S_030800_INSTANCE_BROADCAST_WRITES(1);
182
183 if (!spm->num_muxsel_lines[s])
184 continue;
185
186 if (s == AC_SPM_SEGMENT_TYPE_GLOBAL) {
187 grbm_gfx_index |= S_030800_SE_BROADCAST_WRITES(1);
188
189 rlc_muxsel_addr =
190 gfx_level >= GFX11 ? R_037220_RLC_SPM_GLOBAL_MUXSEL_ADDR : R_037224_RLC_SPM_GLOBAL_MUXSEL_ADDR;
191 rlc_muxsel_data =
192 gfx_level >= GFX11 ? R_037224_RLC_SPM_GLOBAL_MUXSEL_DATA : R_037228_RLC_SPM_GLOBAL_MUXSEL_DATA;
193 } else {
194 grbm_gfx_index |= S_030800_SE_INDEX(s);
195
196 rlc_muxsel_addr = gfx_level >= GFX11 ? R_037228_RLC_SPM_SE_MUXSEL_ADDR : R_03721C_RLC_SPM_SE_MUXSEL_ADDR;
197 rlc_muxsel_data = gfx_level >= GFX11 ? R_03722C_RLC_SPM_SE_MUXSEL_DATA : R_037220_RLC_SPM_SE_MUXSEL_DATA;
198 }
199
200 radeon_check_space(device->ws, cs, 3 + spm->num_muxsel_lines[s] * (7 + AC_SPM_MUXSEL_LINE_SIZE));
201
202 radeon_set_uconfig_reg(cs, R_030800_GRBM_GFX_INDEX, grbm_gfx_index);
203
204 for (unsigned l = 0; l < spm->num_muxsel_lines[s]; l++) {
205 uint32_t *data = (uint32_t *)spm->muxsel_lines[s][l].muxsel;
206
207 /* Select MUXSEL_ADDR to point to the next muxsel. */
208 radeon_set_uconfig_perfctr_reg(gfx_level, qf, cs, rlc_muxsel_addr, l * AC_SPM_MUXSEL_LINE_SIZE);
209
210 /* Write the muxsel line configuration with MUXSEL_DATA. */
211 radeon_emit(cs, PKT3(PKT3_WRITE_DATA, 2 + AC_SPM_MUXSEL_LINE_SIZE, 0));
212 radeon_emit(cs, S_370_DST_SEL(V_370_MEM_MAPPED_REGISTER) | S_370_WR_CONFIRM(1) | S_370_ENGINE_SEL(V_370_ME) |
213 S_370_WR_ONE_ADDR(1));
214 radeon_emit(cs, rlc_muxsel_data >> 2);
215 radeon_emit(cs, 0);
216 radeon_emit_array(cs, data, AC_SPM_MUXSEL_LINE_SIZE);
217 }
218 }
219
220 /* Select SPM counters. */
221 radv_emit_spm_counters(device, cs, qf);
222 }
223
224 bool
radv_spm_init(struct radv_device * device)225 radv_spm_init(struct radv_device *device)
226 {
227 struct radv_physical_device *pdev = radv_device_physical(device);
228 const struct radeon_info *gpu_info = &pdev->info;
229 struct ac_perfcounters *pc = &pdev->ac_perfcounters;
230
231 /* We failed to initialize the performance counters. */
232 if (!pc->blocks)
233 return false;
234
235 if (!ac_init_spm(gpu_info, pc, &device->spm))
236 return false;
237
238 if (!radv_spm_init_bo(device))
239 return false;
240
241 return true;
242 }
243
244 void
radv_spm_finish(struct radv_device * device)245 radv_spm_finish(struct radv_device *device)
246 {
247 struct radeon_winsys *ws = device->ws;
248
249 if (device->spm.bo) {
250 ws->buffer_make_resident(ws, device->spm.bo, false);
251 radv_bo_destroy(device, NULL, device->spm.bo);
252 }
253
254 ac_destroy_spm(&device->spm);
255 }
256