1 /*
2 * Copyright 2016 Advanced Micro Devices, Inc.
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice shall be included in
12 * all copies or substantial portions of the Software.
13 *
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20 * OTHER DEALINGS IN THE SOFTWARE.
21 *
22 */
23
24 #include <linux/delay.h>
25 #include <linux/kernel.h>
26 #include <linux/firmware.h>
27 #include <linux/module.h>
28 #include <linux/pci.h>
29
30 #include "amdgpu.h"
31 #include "amdgpu_gfx.h"
32 #include "soc15.h"
33 #include "soc15d.h"
34 #include "amdgpu_atomfirmware.h"
35 #include "amdgpu_pm.h"
36
37 #include "gc/gc_9_0_offset.h"
38 #include "gc/gc_9_0_sh_mask.h"
39
40 #include "vega10_enum.h"
41
42 #include "soc15_common.h"
43 #include "clearstate_gfx9.h"
44 #include "v9_structs.h"
45
46 #include "ivsrcid/gfx/irqsrcs_gfx_9_0.h"
47
48 #include "amdgpu_ras.h"
49
50 #include "amdgpu_ring_mux.h"
51 #include "gfx_v9_4.h"
52 #include "gfx_v9_0.h"
53 #include "gfx_v9_0_cleaner_shader.h"
54 #include "gfx_v9_4_2.h"
55
56 #include "asic_reg/pwr/pwr_10_0_offset.h"
57 #include "asic_reg/pwr/pwr_10_0_sh_mask.h"
58 #include "asic_reg/gc/gc_9_0_default.h"
59
60 #define GFX9_NUM_GFX_RINGS 1
61 #define GFX9_NUM_SW_GFX_RINGS 2
62 #define GFX9_MEC_HPD_SIZE 4096
63 #define RLCG_UCODE_LOADING_START_ADDRESS 0x00002000L
64 #define RLC_SAVE_RESTORE_ADDR_STARTING_OFFSET 0x00000000L
65
66 #define mmGCEA_PROBE_MAP 0x070c
67 #define mmGCEA_PROBE_MAP_BASE_IDX 0
68
69 MODULE_FIRMWARE("amdgpu/vega10_ce.bin");
70 MODULE_FIRMWARE("amdgpu/vega10_pfp.bin");
71 MODULE_FIRMWARE("amdgpu/vega10_me.bin");
72 MODULE_FIRMWARE("amdgpu/vega10_mec.bin");
73 MODULE_FIRMWARE("amdgpu/vega10_mec2.bin");
74 MODULE_FIRMWARE("amdgpu/vega10_rlc.bin");
75
76 MODULE_FIRMWARE("amdgpu/vega12_ce.bin");
77 MODULE_FIRMWARE("amdgpu/vega12_pfp.bin");
78 MODULE_FIRMWARE("amdgpu/vega12_me.bin");
79 MODULE_FIRMWARE("amdgpu/vega12_mec.bin");
80 MODULE_FIRMWARE("amdgpu/vega12_mec2.bin");
81 MODULE_FIRMWARE("amdgpu/vega12_rlc.bin");
82
83 MODULE_FIRMWARE("amdgpu/vega20_ce.bin");
84 MODULE_FIRMWARE("amdgpu/vega20_pfp.bin");
85 MODULE_FIRMWARE("amdgpu/vega20_me.bin");
86 MODULE_FIRMWARE("amdgpu/vega20_mec.bin");
87 MODULE_FIRMWARE("amdgpu/vega20_mec2.bin");
88 MODULE_FIRMWARE("amdgpu/vega20_rlc.bin");
89
90 MODULE_FIRMWARE("amdgpu/raven_ce.bin");
91 MODULE_FIRMWARE("amdgpu/raven_pfp.bin");
92 MODULE_FIRMWARE("amdgpu/raven_me.bin");
93 MODULE_FIRMWARE("amdgpu/raven_mec.bin");
94 MODULE_FIRMWARE("amdgpu/raven_mec2.bin");
95 MODULE_FIRMWARE("amdgpu/raven_rlc.bin");
96
97 MODULE_FIRMWARE("amdgpu/picasso_ce.bin");
98 MODULE_FIRMWARE("amdgpu/picasso_pfp.bin");
99 MODULE_FIRMWARE("amdgpu/picasso_me.bin");
100 MODULE_FIRMWARE("amdgpu/picasso_mec.bin");
101 MODULE_FIRMWARE("amdgpu/picasso_mec2.bin");
102 MODULE_FIRMWARE("amdgpu/picasso_rlc.bin");
103 MODULE_FIRMWARE("amdgpu/picasso_rlc_am4.bin");
104
105 MODULE_FIRMWARE("amdgpu/raven2_ce.bin");
106 MODULE_FIRMWARE("amdgpu/raven2_pfp.bin");
107 MODULE_FIRMWARE("amdgpu/raven2_me.bin");
108 MODULE_FIRMWARE("amdgpu/raven2_mec.bin");
109 MODULE_FIRMWARE("amdgpu/raven2_mec2.bin");
110 MODULE_FIRMWARE("amdgpu/raven2_rlc.bin");
111 MODULE_FIRMWARE("amdgpu/raven_kicker_rlc.bin");
112
113 MODULE_FIRMWARE("amdgpu/arcturus_mec.bin");
114 MODULE_FIRMWARE("amdgpu/arcturus_rlc.bin");
115
116 MODULE_FIRMWARE("amdgpu/renoir_ce.bin");
117 MODULE_FIRMWARE("amdgpu/renoir_pfp.bin");
118 MODULE_FIRMWARE("amdgpu/renoir_me.bin");
119 MODULE_FIRMWARE("amdgpu/renoir_mec.bin");
120 MODULE_FIRMWARE("amdgpu/renoir_rlc.bin");
121
122 MODULE_FIRMWARE("amdgpu/green_sardine_ce.bin");
123 MODULE_FIRMWARE("amdgpu/green_sardine_pfp.bin");
124 MODULE_FIRMWARE("amdgpu/green_sardine_me.bin");
125 MODULE_FIRMWARE("amdgpu/green_sardine_mec.bin");
126 MODULE_FIRMWARE("amdgpu/green_sardine_mec2.bin");
127 MODULE_FIRMWARE("amdgpu/green_sardine_rlc.bin");
128
129 MODULE_FIRMWARE("amdgpu/aldebaran_mec.bin");
130 MODULE_FIRMWARE("amdgpu/aldebaran_mec2.bin");
131 MODULE_FIRMWARE("amdgpu/aldebaran_rlc.bin");
132 MODULE_FIRMWARE("amdgpu/aldebaran_sjt_mec.bin");
133 MODULE_FIRMWARE("amdgpu/aldebaran_sjt_mec2.bin");
134
135 #define mmTCP_CHAN_STEER_0_ARCT 0x0b03
136 #define mmTCP_CHAN_STEER_0_ARCT_BASE_IDX 0
137 #define mmTCP_CHAN_STEER_1_ARCT 0x0b04
138 #define mmTCP_CHAN_STEER_1_ARCT_BASE_IDX 0
139 #define mmTCP_CHAN_STEER_2_ARCT 0x0b09
140 #define mmTCP_CHAN_STEER_2_ARCT_BASE_IDX 0
141 #define mmTCP_CHAN_STEER_3_ARCT 0x0b0a
142 #define mmTCP_CHAN_STEER_3_ARCT_BASE_IDX 0
143 #define mmTCP_CHAN_STEER_4_ARCT 0x0b0b
144 #define mmTCP_CHAN_STEER_4_ARCT_BASE_IDX 0
145 #define mmTCP_CHAN_STEER_5_ARCT 0x0b0c
146 #define mmTCP_CHAN_STEER_5_ARCT_BASE_IDX 0
147
148 #define mmGOLDEN_TSC_COUNT_UPPER_Renoir 0x0025
149 #define mmGOLDEN_TSC_COUNT_UPPER_Renoir_BASE_IDX 1
150 #define mmGOLDEN_TSC_COUNT_LOWER_Renoir 0x0026
151 #define mmGOLDEN_TSC_COUNT_LOWER_Renoir_BASE_IDX 1
152
153 static const struct amdgpu_hwip_reg_entry gc_reg_list_9[] = {
154 SOC15_REG_ENTRY_STR(GC, 0, mmGRBM_STATUS),
155 SOC15_REG_ENTRY_STR(GC, 0, mmGRBM_STATUS2),
156 SOC15_REG_ENTRY_STR(GC, 0, mmCP_STALLED_STAT1),
157 SOC15_REG_ENTRY_STR(GC, 0, mmCP_STALLED_STAT2),
158 SOC15_REG_ENTRY_STR(GC, 0, mmCP_CPC_STALLED_STAT1),
159 SOC15_REG_ENTRY_STR(GC, 0, mmCP_CPF_STALLED_STAT1),
160 SOC15_REG_ENTRY_STR(GC, 0, mmCP_BUSY_STAT),
161 SOC15_REG_ENTRY_STR(GC, 0, mmCP_CPC_BUSY_STAT),
162 SOC15_REG_ENTRY_STR(GC, 0, mmCP_CPF_BUSY_STAT),
163 SOC15_REG_ENTRY_STR(GC, 0, mmCP_CPF_STATUS),
164 SOC15_REG_ENTRY_STR(GC, 0, mmCP_GFX_ERROR),
165 SOC15_REG_ENTRY_STR(GC, 0, mmCP_RB_BASE),
166 SOC15_REG_ENTRY_STR(GC, 0, mmCP_RB_RPTR),
167 SOC15_REG_ENTRY_STR(GC, 0, mmCP_RB_WPTR),
168 SOC15_REG_ENTRY_STR(GC, 0, mmCP_RB0_BASE),
169 SOC15_REG_ENTRY_STR(GC, 0, mmCP_RB0_RPTR),
170 SOC15_REG_ENTRY_STR(GC, 0, mmCP_RB0_WPTR),
171 SOC15_REG_ENTRY_STR(GC, 0, mmCP_RB1_BASE),
172 SOC15_REG_ENTRY_STR(GC, 0, mmCP_RB1_RPTR),
173 SOC15_REG_ENTRY_STR(GC, 0, mmCP_RB1_WPTR),
174 SOC15_REG_ENTRY_STR(GC, 0, mmCP_RB2_BASE),
175 SOC15_REG_ENTRY_STR(GC, 0, mmCP_RB2_WPTR),
176 SOC15_REG_ENTRY_STR(GC, 0, mmCP_RB2_WPTR),
177 SOC15_REG_ENTRY_STR(GC, 0, mmCP_CE_IB1_CMD_BUFSZ),
178 SOC15_REG_ENTRY_STR(GC, 0, mmCP_CE_IB2_CMD_BUFSZ),
179 SOC15_REG_ENTRY_STR(GC, 0, mmCP_IB1_CMD_BUFSZ),
180 SOC15_REG_ENTRY_STR(GC, 0, mmCP_IB2_CMD_BUFSZ),
181 SOC15_REG_ENTRY_STR(GC, 0, mmCP_CE_IB1_BASE_LO),
182 SOC15_REG_ENTRY_STR(GC, 0, mmCP_CE_IB1_BASE_HI),
183 SOC15_REG_ENTRY_STR(GC, 0, mmCP_CE_IB1_BUFSZ),
184 SOC15_REG_ENTRY_STR(GC, 0, mmCP_CE_IB2_BASE_LO),
185 SOC15_REG_ENTRY_STR(GC, 0, mmCP_CE_IB2_BASE_HI),
186 SOC15_REG_ENTRY_STR(GC, 0, mmCP_CE_IB2_BUFSZ),
187 SOC15_REG_ENTRY_STR(GC, 0, mmCP_IB1_BASE_LO),
188 SOC15_REG_ENTRY_STR(GC, 0, mmCP_IB1_BASE_HI),
189 SOC15_REG_ENTRY_STR(GC, 0, mmCP_IB1_BUFSZ),
190 SOC15_REG_ENTRY_STR(GC, 0, mmCP_IB2_BASE_LO),
191 SOC15_REG_ENTRY_STR(GC, 0, mmCP_IB2_BASE_HI),
192 SOC15_REG_ENTRY_STR(GC, 0, mmCP_IB2_BUFSZ),
193 SOC15_REG_ENTRY_STR(GC, 0, mmCPF_UTCL1_STATUS),
194 SOC15_REG_ENTRY_STR(GC, 0, mmCPC_UTCL1_STATUS),
195 SOC15_REG_ENTRY_STR(GC, 0, mmCPG_UTCL1_STATUS),
196 SOC15_REG_ENTRY_STR(GC, 0, mmGDS_PROTECTION_FAULT),
197 SOC15_REG_ENTRY_STR(GC, 0, mmGDS_VM_PROTECTION_FAULT),
198 SOC15_REG_ENTRY_STR(GC, 0, mmIA_UTCL1_STATUS),
199 SOC15_REG_ENTRY_STR(GC, 0, mmIA_UTCL1_CNTL),
200 SOC15_REG_ENTRY_STR(GC, 0, mmPA_CL_CNTL_STATUS),
201 SOC15_REG_ENTRY_STR(GC, 0, mmRLC_UTCL1_STATUS),
202 SOC15_REG_ENTRY_STR(GC, 0, mmRMI_UTCL1_STATUS),
203 SOC15_REG_ENTRY_STR(GC, 0, mmSQC_DCACHE_UTCL1_STATUS),
204 SOC15_REG_ENTRY_STR(GC, 0, mmSQC_ICACHE_UTCL1_STATUS),
205 SOC15_REG_ENTRY_STR(GC, 0, mmSQ_UTCL1_STATUS),
206 SOC15_REG_ENTRY_STR(GC, 0, mmTCP_UTCL1_STATUS),
207 SOC15_REG_ENTRY_STR(GC, 0, mmWD_UTCL1_STATUS),
208 SOC15_REG_ENTRY_STR(GC, 0, mmVM_L2_PROTECTION_FAULT_CNTL),
209 SOC15_REG_ENTRY_STR(GC, 0, mmVM_L2_PROTECTION_FAULT_STATUS),
210 SOC15_REG_ENTRY_STR(GC, 0, mmCP_DEBUG),
211 SOC15_REG_ENTRY_STR(GC, 0, mmCP_MEC_CNTL),
212 SOC15_REG_ENTRY_STR(GC, 0, mmCP_CE_INSTR_PNTR),
213 SOC15_REG_ENTRY_STR(GC, 0, mmCP_MEC1_INSTR_PNTR),
214 SOC15_REG_ENTRY_STR(GC, 0, mmCP_MEC2_INSTR_PNTR),
215 SOC15_REG_ENTRY_STR(GC, 0, mmCP_ME_INSTR_PNTR),
216 SOC15_REG_ENTRY_STR(GC, 0, mmCP_PFP_INSTR_PNTR),
217 SOC15_REG_ENTRY_STR(GC, 0, mmCP_CPC_STATUS),
218 SOC15_REG_ENTRY_STR(GC, 0, mmRLC_STAT),
219 SOC15_REG_ENTRY_STR(GC, 0, mmRLC_SMU_COMMAND),
220 SOC15_REG_ENTRY_STR(GC, 0, mmRLC_SMU_MESSAGE),
221 SOC15_REG_ENTRY_STR(GC, 0, mmRLC_SMU_ARGUMENT_1),
222 SOC15_REG_ENTRY_STR(GC, 0, mmRLC_SMU_ARGUMENT_2),
223 SOC15_REG_ENTRY_STR(GC, 0, mmSMU_RLC_RESPONSE),
224 SOC15_REG_ENTRY_STR(GC, 0, mmRLC_SAFE_MODE),
225 SOC15_REG_ENTRY_STR(GC, 0, mmRLC_SMU_SAFE_MODE),
226 SOC15_REG_ENTRY_STR(GC, 0, mmRLC_INT_STAT),
227 SOC15_REG_ENTRY_STR(GC, 0, mmRLC_GPM_GENERAL_6),
228 /* cp header registers */
229 SOC15_REG_ENTRY_STR(GC, 0, mmCP_CE_HEADER_DUMP),
230 SOC15_REG_ENTRY_STR(GC, 0, mmCP_MEC_ME1_HEADER_DUMP),
231 SOC15_REG_ENTRY_STR(GC, 0, mmCP_MEC_ME2_HEADER_DUMP),
232 SOC15_REG_ENTRY_STR(GC, 0, mmCP_PFP_HEADER_DUMP),
233 SOC15_REG_ENTRY_STR(GC, 0, mmCP_ME_HEADER_DUMP),
234 /* SE status registers */
235 SOC15_REG_ENTRY_STR(GC, 0, mmGRBM_STATUS_SE0),
236 SOC15_REG_ENTRY_STR(GC, 0, mmGRBM_STATUS_SE1),
237 SOC15_REG_ENTRY_STR(GC, 0, mmGRBM_STATUS_SE2),
238 SOC15_REG_ENTRY_STR(GC, 0, mmGRBM_STATUS_SE3)
239 };
240
241 static const struct amdgpu_hwip_reg_entry gc_cp_reg_list_9[] = {
242 /* compute queue registers */
243 SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_VMID),
244 SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_ACTIVE),
245 SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_PERSISTENT_STATE),
246 SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_PIPE_PRIORITY),
247 SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_QUEUE_PRIORITY),
248 SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_QUANTUM),
249 SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_PQ_BASE),
250 SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_PQ_BASE_HI),
251 SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_PQ_RPTR),
252 SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_PQ_WPTR_POLL_ADDR),
253 SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_PQ_WPTR_POLL_ADDR_HI),
254 SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL),
255 SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_PQ_CONTROL),
256 SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_IB_BASE_ADDR),
257 SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_IB_BASE_ADDR_HI),
258 SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_IB_RPTR),
259 SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_IB_CONTROL),
260 SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_DEQUEUE_REQUEST),
261 SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_EOP_BASE_ADDR),
262 SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_EOP_BASE_ADDR_HI),
263 SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_EOP_CONTROL),
264 SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_EOP_RPTR),
265 SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_EOP_WPTR),
266 SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_EOP_EVENTS),
267 SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_CTX_SAVE_BASE_ADDR_LO),
268 SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_CTX_SAVE_BASE_ADDR_HI),
269 SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_CTX_SAVE_CONTROL),
270 SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_CNTL_STACK_OFFSET),
271 SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_CNTL_STACK_SIZE),
272 SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_WG_STATE_OFFSET),
273 SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_CTX_SAVE_SIZE),
274 SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_GDS_RESOURCE_STATE),
275 SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_ERROR),
276 SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_EOP_WPTR_MEM),
277 SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_PQ_WPTR_LO),
278 SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_PQ_WPTR_HI),
279 SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_GFX_STATUS),
280 };
281
282 enum ta_ras_gfx_subblock {
283 /*CPC*/
284 TA_RAS_BLOCK__GFX_CPC_INDEX_START = 0,
285 TA_RAS_BLOCK__GFX_CPC_SCRATCH = TA_RAS_BLOCK__GFX_CPC_INDEX_START,
286 TA_RAS_BLOCK__GFX_CPC_UCODE,
287 TA_RAS_BLOCK__GFX_DC_STATE_ME1,
288 TA_RAS_BLOCK__GFX_DC_CSINVOC_ME1,
289 TA_RAS_BLOCK__GFX_DC_RESTORE_ME1,
290 TA_RAS_BLOCK__GFX_DC_STATE_ME2,
291 TA_RAS_BLOCK__GFX_DC_CSINVOC_ME2,
292 TA_RAS_BLOCK__GFX_DC_RESTORE_ME2,
293 TA_RAS_BLOCK__GFX_CPC_INDEX_END = TA_RAS_BLOCK__GFX_DC_RESTORE_ME2,
294 /* CPF*/
295 TA_RAS_BLOCK__GFX_CPF_INDEX_START,
296 TA_RAS_BLOCK__GFX_CPF_ROQ_ME2 = TA_RAS_BLOCK__GFX_CPF_INDEX_START,
297 TA_RAS_BLOCK__GFX_CPF_ROQ_ME1,
298 TA_RAS_BLOCK__GFX_CPF_TAG,
299 TA_RAS_BLOCK__GFX_CPF_INDEX_END = TA_RAS_BLOCK__GFX_CPF_TAG,
300 /* CPG*/
301 TA_RAS_BLOCK__GFX_CPG_INDEX_START,
302 TA_RAS_BLOCK__GFX_CPG_DMA_ROQ = TA_RAS_BLOCK__GFX_CPG_INDEX_START,
303 TA_RAS_BLOCK__GFX_CPG_DMA_TAG,
304 TA_RAS_BLOCK__GFX_CPG_TAG,
305 TA_RAS_BLOCK__GFX_CPG_INDEX_END = TA_RAS_BLOCK__GFX_CPG_TAG,
306 /* GDS*/
307 TA_RAS_BLOCK__GFX_GDS_INDEX_START,
308 TA_RAS_BLOCK__GFX_GDS_MEM = TA_RAS_BLOCK__GFX_GDS_INDEX_START,
309 TA_RAS_BLOCK__GFX_GDS_INPUT_QUEUE,
310 TA_RAS_BLOCK__GFX_GDS_OA_PHY_CMD_RAM_MEM,
311 TA_RAS_BLOCK__GFX_GDS_OA_PHY_DATA_RAM_MEM,
312 TA_RAS_BLOCK__GFX_GDS_OA_PIPE_MEM,
313 TA_RAS_BLOCK__GFX_GDS_INDEX_END = TA_RAS_BLOCK__GFX_GDS_OA_PIPE_MEM,
314 /* SPI*/
315 TA_RAS_BLOCK__GFX_SPI_SR_MEM,
316 /* SQ*/
317 TA_RAS_BLOCK__GFX_SQ_INDEX_START,
318 TA_RAS_BLOCK__GFX_SQ_SGPR = TA_RAS_BLOCK__GFX_SQ_INDEX_START,
319 TA_RAS_BLOCK__GFX_SQ_LDS_D,
320 TA_RAS_BLOCK__GFX_SQ_LDS_I,
321 TA_RAS_BLOCK__GFX_SQ_VGPR, /* VGPR = SP*/
322 TA_RAS_BLOCK__GFX_SQ_INDEX_END = TA_RAS_BLOCK__GFX_SQ_VGPR,
323 /* SQC (3 ranges)*/
324 TA_RAS_BLOCK__GFX_SQC_INDEX_START,
325 /* SQC range 0*/
326 TA_RAS_BLOCK__GFX_SQC_INDEX0_START = TA_RAS_BLOCK__GFX_SQC_INDEX_START,
327 TA_RAS_BLOCK__GFX_SQC_INST_UTCL1_LFIFO =
328 TA_RAS_BLOCK__GFX_SQC_INDEX0_START,
329 TA_RAS_BLOCK__GFX_SQC_DATA_CU0_WRITE_DATA_BUF,
330 TA_RAS_BLOCK__GFX_SQC_DATA_CU0_UTCL1_LFIFO,
331 TA_RAS_BLOCK__GFX_SQC_DATA_CU1_WRITE_DATA_BUF,
332 TA_RAS_BLOCK__GFX_SQC_DATA_CU1_UTCL1_LFIFO,
333 TA_RAS_BLOCK__GFX_SQC_DATA_CU2_WRITE_DATA_BUF,
334 TA_RAS_BLOCK__GFX_SQC_DATA_CU2_UTCL1_LFIFO,
335 TA_RAS_BLOCK__GFX_SQC_INDEX0_END =
336 TA_RAS_BLOCK__GFX_SQC_DATA_CU2_UTCL1_LFIFO,
337 /* SQC range 1*/
338 TA_RAS_BLOCK__GFX_SQC_INDEX1_START,
339 TA_RAS_BLOCK__GFX_SQC_INST_BANKA_TAG_RAM =
340 TA_RAS_BLOCK__GFX_SQC_INDEX1_START,
341 TA_RAS_BLOCK__GFX_SQC_INST_BANKA_UTCL1_MISS_FIFO,
342 TA_RAS_BLOCK__GFX_SQC_INST_BANKA_MISS_FIFO,
343 TA_RAS_BLOCK__GFX_SQC_INST_BANKA_BANK_RAM,
344 TA_RAS_BLOCK__GFX_SQC_DATA_BANKA_TAG_RAM,
345 TA_RAS_BLOCK__GFX_SQC_DATA_BANKA_HIT_FIFO,
346 TA_RAS_BLOCK__GFX_SQC_DATA_BANKA_MISS_FIFO,
347 TA_RAS_BLOCK__GFX_SQC_DATA_BANKA_DIRTY_BIT_RAM,
348 TA_RAS_BLOCK__GFX_SQC_DATA_BANKA_BANK_RAM,
349 TA_RAS_BLOCK__GFX_SQC_INDEX1_END =
350 TA_RAS_BLOCK__GFX_SQC_DATA_BANKA_BANK_RAM,
351 /* SQC range 2*/
352 TA_RAS_BLOCK__GFX_SQC_INDEX2_START,
353 TA_RAS_BLOCK__GFX_SQC_INST_BANKB_TAG_RAM =
354 TA_RAS_BLOCK__GFX_SQC_INDEX2_START,
355 TA_RAS_BLOCK__GFX_SQC_INST_BANKB_UTCL1_MISS_FIFO,
356 TA_RAS_BLOCK__GFX_SQC_INST_BANKB_MISS_FIFO,
357 TA_RAS_BLOCK__GFX_SQC_INST_BANKB_BANK_RAM,
358 TA_RAS_BLOCK__GFX_SQC_DATA_BANKB_TAG_RAM,
359 TA_RAS_BLOCK__GFX_SQC_DATA_BANKB_HIT_FIFO,
360 TA_RAS_BLOCK__GFX_SQC_DATA_BANKB_MISS_FIFO,
361 TA_RAS_BLOCK__GFX_SQC_DATA_BANKB_DIRTY_BIT_RAM,
362 TA_RAS_BLOCK__GFX_SQC_DATA_BANKB_BANK_RAM,
363 TA_RAS_BLOCK__GFX_SQC_INDEX2_END =
364 TA_RAS_BLOCK__GFX_SQC_DATA_BANKB_BANK_RAM,
365 TA_RAS_BLOCK__GFX_SQC_INDEX_END = TA_RAS_BLOCK__GFX_SQC_INDEX2_END,
366 /* TA*/
367 TA_RAS_BLOCK__GFX_TA_INDEX_START,
368 TA_RAS_BLOCK__GFX_TA_FS_DFIFO = TA_RAS_BLOCK__GFX_TA_INDEX_START,
369 TA_RAS_BLOCK__GFX_TA_FS_AFIFO,
370 TA_RAS_BLOCK__GFX_TA_FL_LFIFO,
371 TA_RAS_BLOCK__GFX_TA_FX_LFIFO,
372 TA_RAS_BLOCK__GFX_TA_FS_CFIFO,
373 TA_RAS_BLOCK__GFX_TA_INDEX_END = TA_RAS_BLOCK__GFX_TA_FS_CFIFO,
374 /* TCA*/
375 TA_RAS_BLOCK__GFX_TCA_INDEX_START,
376 TA_RAS_BLOCK__GFX_TCA_HOLE_FIFO = TA_RAS_BLOCK__GFX_TCA_INDEX_START,
377 TA_RAS_BLOCK__GFX_TCA_REQ_FIFO,
378 TA_RAS_BLOCK__GFX_TCA_INDEX_END = TA_RAS_BLOCK__GFX_TCA_REQ_FIFO,
379 /* TCC (5 sub-ranges)*/
380 TA_RAS_BLOCK__GFX_TCC_INDEX_START,
381 /* TCC range 0*/
382 TA_RAS_BLOCK__GFX_TCC_INDEX0_START = TA_RAS_BLOCK__GFX_TCC_INDEX_START,
383 TA_RAS_BLOCK__GFX_TCC_CACHE_DATA = TA_RAS_BLOCK__GFX_TCC_INDEX0_START,
384 TA_RAS_BLOCK__GFX_TCC_CACHE_DATA_BANK_0_1,
385 TA_RAS_BLOCK__GFX_TCC_CACHE_DATA_BANK_1_0,
386 TA_RAS_BLOCK__GFX_TCC_CACHE_DATA_BANK_1_1,
387 TA_RAS_BLOCK__GFX_TCC_CACHE_DIRTY_BANK_0,
388 TA_RAS_BLOCK__GFX_TCC_CACHE_DIRTY_BANK_1,
389 TA_RAS_BLOCK__GFX_TCC_HIGH_RATE_TAG,
390 TA_RAS_BLOCK__GFX_TCC_LOW_RATE_TAG,
391 TA_RAS_BLOCK__GFX_TCC_INDEX0_END = TA_RAS_BLOCK__GFX_TCC_LOW_RATE_TAG,
392 /* TCC range 1*/
393 TA_RAS_BLOCK__GFX_TCC_INDEX1_START,
394 TA_RAS_BLOCK__GFX_TCC_IN_USE_DEC = TA_RAS_BLOCK__GFX_TCC_INDEX1_START,
395 TA_RAS_BLOCK__GFX_TCC_IN_USE_TRANSFER,
396 TA_RAS_BLOCK__GFX_TCC_INDEX1_END =
397 TA_RAS_BLOCK__GFX_TCC_IN_USE_TRANSFER,
398 /* TCC range 2*/
399 TA_RAS_BLOCK__GFX_TCC_INDEX2_START,
400 TA_RAS_BLOCK__GFX_TCC_RETURN_DATA = TA_RAS_BLOCK__GFX_TCC_INDEX2_START,
401 TA_RAS_BLOCK__GFX_TCC_RETURN_CONTROL,
402 TA_RAS_BLOCK__GFX_TCC_UC_ATOMIC_FIFO,
403 TA_RAS_BLOCK__GFX_TCC_WRITE_RETURN,
404 TA_RAS_BLOCK__GFX_TCC_WRITE_CACHE_READ,
405 TA_RAS_BLOCK__GFX_TCC_SRC_FIFO,
406 TA_RAS_BLOCK__GFX_TCC_SRC_FIFO_NEXT_RAM,
407 TA_RAS_BLOCK__GFX_TCC_CACHE_TAG_PROBE_FIFO,
408 TA_RAS_BLOCK__GFX_TCC_INDEX2_END =
409 TA_RAS_BLOCK__GFX_TCC_CACHE_TAG_PROBE_FIFO,
410 /* TCC range 3*/
411 TA_RAS_BLOCK__GFX_TCC_INDEX3_START,
412 TA_RAS_BLOCK__GFX_TCC_LATENCY_FIFO = TA_RAS_BLOCK__GFX_TCC_INDEX3_START,
413 TA_RAS_BLOCK__GFX_TCC_LATENCY_FIFO_NEXT_RAM,
414 TA_RAS_BLOCK__GFX_TCC_INDEX3_END =
415 TA_RAS_BLOCK__GFX_TCC_LATENCY_FIFO_NEXT_RAM,
416 /* TCC range 4*/
417 TA_RAS_BLOCK__GFX_TCC_INDEX4_START,
418 TA_RAS_BLOCK__GFX_TCC_WRRET_TAG_WRITE_RETURN =
419 TA_RAS_BLOCK__GFX_TCC_INDEX4_START,
420 TA_RAS_BLOCK__GFX_TCC_ATOMIC_RETURN_BUFFER,
421 TA_RAS_BLOCK__GFX_TCC_INDEX4_END =
422 TA_RAS_BLOCK__GFX_TCC_ATOMIC_RETURN_BUFFER,
423 TA_RAS_BLOCK__GFX_TCC_INDEX_END = TA_RAS_BLOCK__GFX_TCC_INDEX4_END,
424 /* TCI*/
425 TA_RAS_BLOCK__GFX_TCI_WRITE_RAM,
426 /* TCP*/
427 TA_RAS_BLOCK__GFX_TCP_INDEX_START,
428 TA_RAS_BLOCK__GFX_TCP_CACHE_RAM = TA_RAS_BLOCK__GFX_TCP_INDEX_START,
429 TA_RAS_BLOCK__GFX_TCP_LFIFO_RAM,
430 TA_RAS_BLOCK__GFX_TCP_CMD_FIFO,
431 TA_RAS_BLOCK__GFX_TCP_VM_FIFO,
432 TA_RAS_BLOCK__GFX_TCP_DB_RAM,
433 TA_RAS_BLOCK__GFX_TCP_UTCL1_LFIFO0,
434 TA_RAS_BLOCK__GFX_TCP_UTCL1_LFIFO1,
435 TA_RAS_BLOCK__GFX_TCP_INDEX_END = TA_RAS_BLOCK__GFX_TCP_UTCL1_LFIFO1,
436 /* TD*/
437 TA_RAS_BLOCK__GFX_TD_INDEX_START,
438 TA_RAS_BLOCK__GFX_TD_SS_FIFO_LO = TA_RAS_BLOCK__GFX_TD_INDEX_START,
439 TA_RAS_BLOCK__GFX_TD_SS_FIFO_HI,
440 TA_RAS_BLOCK__GFX_TD_CS_FIFO,
441 TA_RAS_BLOCK__GFX_TD_INDEX_END = TA_RAS_BLOCK__GFX_TD_CS_FIFO,
442 /* EA (3 sub-ranges)*/
443 TA_RAS_BLOCK__GFX_EA_INDEX_START,
444 /* EA range 0*/
445 TA_RAS_BLOCK__GFX_EA_INDEX0_START = TA_RAS_BLOCK__GFX_EA_INDEX_START,
446 TA_RAS_BLOCK__GFX_EA_DRAMRD_CMDMEM = TA_RAS_BLOCK__GFX_EA_INDEX0_START,
447 TA_RAS_BLOCK__GFX_EA_DRAMWR_CMDMEM,
448 TA_RAS_BLOCK__GFX_EA_DRAMWR_DATAMEM,
449 TA_RAS_BLOCK__GFX_EA_RRET_TAGMEM,
450 TA_RAS_BLOCK__GFX_EA_WRET_TAGMEM,
451 TA_RAS_BLOCK__GFX_EA_GMIRD_CMDMEM,
452 TA_RAS_BLOCK__GFX_EA_GMIWR_CMDMEM,
453 TA_RAS_BLOCK__GFX_EA_GMIWR_DATAMEM,
454 TA_RAS_BLOCK__GFX_EA_INDEX0_END = TA_RAS_BLOCK__GFX_EA_GMIWR_DATAMEM,
455 /* EA range 1*/
456 TA_RAS_BLOCK__GFX_EA_INDEX1_START,
457 TA_RAS_BLOCK__GFX_EA_DRAMRD_PAGEMEM = TA_RAS_BLOCK__GFX_EA_INDEX1_START,
458 TA_RAS_BLOCK__GFX_EA_DRAMWR_PAGEMEM,
459 TA_RAS_BLOCK__GFX_EA_IORD_CMDMEM,
460 TA_RAS_BLOCK__GFX_EA_IOWR_CMDMEM,
461 TA_RAS_BLOCK__GFX_EA_IOWR_DATAMEM,
462 TA_RAS_BLOCK__GFX_EA_GMIRD_PAGEMEM,
463 TA_RAS_BLOCK__GFX_EA_GMIWR_PAGEMEM,
464 TA_RAS_BLOCK__GFX_EA_INDEX1_END = TA_RAS_BLOCK__GFX_EA_GMIWR_PAGEMEM,
465 /* EA range 2*/
466 TA_RAS_BLOCK__GFX_EA_INDEX2_START,
467 TA_RAS_BLOCK__GFX_EA_MAM_D0MEM = TA_RAS_BLOCK__GFX_EA_INDEX2_START,
468 TA_RAS_BLOCK__GFX_EA_MAM_D1MEM,
469 TA_RAS_BLOCK__GFX_EA_MAM_D2MEM,
470 TA_RAS_BLOCK__GFX_EA_MAM_D3MEM,
471 TA_RAS_BLOCK__GFX_EA_INDEX2_END = TA_RAS_BLOCK__GFX_EA_MAM_D3MEM,
472 TA_RAS_BLOCK__GFX_EA_INDEX_END = TA_RAS_BLOCK__GFX_EA_INDEX2_END,
473 /* UTC VM L2 bank*/
474 TA_RAS_BLOCK__UTC_VML2_BANK_CACHE,
475 /* UTC VM walker*/
476 TA_RAS_BLOCK__UTC_VML2_WALKER,
477 /* UTC ATC L2 2MB cache*/
478 TA_RAS_BLOCK__UTC_ATCL2_CACHE_2M_BANK,
479 /* UTC ATC L2 4KB cache*/
480 TA_RAS_BLOCK__UTC_ATCL2_CACHE_4K_BANK,
481 TA_RAS_BLOCK__GFX_MAX
482 };
483
484 struct ras_gfx_subblock {
485 unsigned char *name;
486 int ta_subblock;
487 int hw_supported_error_type;
488 int sw_supported_error_type;
489 };
490
491 #define AMDGPU_RAS_SUB_BLOCK(subblock, a, b, c, d, e, f, g, h) \
492 [AMDGPU_RAS_BLOCK__##subblock] = { \
493 #subblock, \
494 TA_RAS_BLOCK__##subblock, \
495 ((a) | ((b) << 1) | ((c) << 2) | ((d) << 3)), \
496 (((e) << 1) | ((f) << 3) | (g) | ((h) << 2)), \
497 }
498
499 static const struct ras_gfx_subblock ras_gfx_subblocks[] = {
500 AMDGPU_RAS_SUB_BLOCK(GFX_CPC_SCRATCH, 0, 1, 1, 1, 1, 0, 0, 1),
501 AMDGPU_RAS_SUB_BLOCK(GFX_CPC_UCODE, 0, 1, 1, 1, 1, 0, 0, 1),
502 AMDGPU_RAS_SUB_BLOCK(GFX_DC_STATE_ME1, 1, 0, 0, 1, 0, 0, 1, 0),
503 AMDGPU_RAS_SUB_BLOCK(GFX_DC_CSINVOC_ME1, 1, 0, 0, 1, 0, 0, 0, 0),
504 AMDGPU_RAS_SUB_BLOCK(GFX_DC_RESTORE_ME1, 1, 0, 0, 1, 0, 0, 0, 0),
505 AMDGPU_RAS_SUB_BLOCK(GFX_DC_STATE_ME2, 1, 0, 0, 1, 0, 0, 0, 0),
506 AMDGPU_RAS_SUB_BLOCK(GFX_DC_CSINVOC_ME2, 1, 0, 0, 1, 0, 0, 0, 0),
507 AMDGPU_RAS_SUB_BLOCK(GFX_DC_RESTORE_ME2, 1, 0, 0, 1, 0, 0, 0, 0),
508 AMDGPU_RAS_SUB_BLOCK(GFX_CPF_ROQ_ME2, 1, 0, 0, 1, 0, 0, 0, 0),
509 AMDGPU_RAS_SUB_BLOCK(GFX_CPF_ROQ_ME1, 1, 0, 0, 1, 0, 0, 1, 0),
510 AMDGPU_RAS_SUB_BLOCK(GFX_CPF_TAG, 0, 1, 1, 1, 1, 0, 0, 1),
511 AMDGPU_RAS_SUB_BLOCK(GFX_CPG_DMA_ROQ, 1, 0, 0, 1, 0, 0, 1, 0),
512 AMDGPU_RAS_SUB_BLOCK(GFX_CPG_DMA_TAG, 0, 1, 1, 1, 0, 1, 0, 1),
513 AMDGPU_RAS_SUB_BLOCK(GFX_CPG_TAG, 0, 1, 1, 1, 1, 1, 0, 1),
514 AMDGPU_RAS_SUB_BLOCK(GFX_GDS_MEM, 0, 1, 1, 1, 0, 0, 0, 0),
515 AMDGPU_RAS_SUB_BLOCK(GFX_GDS_INPUT_QUEUE, 1, 0, 0, 1, 0, 0, 0, 0),
516 AMDGPU_RAS_SUB_BLOCK(GFX_GDS_OA_PHY_CMD_RAM_MEM, 0, 1, 1, 1, 0, 0, 0,
517 0),
518 AMDGPU_RAS_SUB_BLOCK(GFX_GDS_OA_PHY_DATA_RAM_MEM, 1, 0, 0, 1, 0, 0, 0,
519 0),
520 AMDGPU_RAS_SUB_BLOCK(GFX_GDS_OA_PIPE_MEM, 0, 1, 1, 1, 0, 0, 0, 0),
521 AMDGPU_RAS_SUB_BLOCK(GFX_SPI_SR_MEM, 1, 0, 0, 1, 0, 0, 0, 0),
522 AMDGPU_RAS_SUB_BLOCK(GFX_SQ_SGPR, 0, 1, 1, 1, 0, 0, 0, 0),
523 AMDGPU_RAS_SUB_BLOCK(GFX_SQ_LDS_D, 0, 1, 1, 1, 1, 0, 0, 1),
524 AMDGPU_RAS_SUB_BLOCK(GFX_SQ_LDS_I, 0, 1, 1, 1, 0, 0, 0, 0),
525 AMDGPU_RAS_SUB_BLOCK(GFX_SQ_VGPR, 0, 1, 1, 1, 0, 0, 0, 0),
526 AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_UTCL1_LFIFO, 0, 1, 1, 1, 0, 0, 0, 1),
527 AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_CU0_WRITE_DATA_BUF, 0, 1, 1, 1, 0, 0,
528 0, 0),
529 AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_CU0_UTCL1_LFIFO, 0, 1, 1, 1, 0, 0, 0,
530 0),
531 AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_CU1_WRITE_DATA_BUF, 0, 1, 1, 1, 0, 0,
532 0, 0),
533 AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_CU1_UTCL1_LFIFO, 0, 1, 1, 1, 1, 0, 0,
534 0),
535 AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_CU2_WRITE_DATA_BUF, 0, 1, 1, 1, 0, 0,
536 0, 0),
537 AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_CU2_UTCL1_LFIFO, 0, 1, 1, 1, 0, 0, 0,
538 0),
539 AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKA_TAG_RAM, 0, 1, 1, 1, 1, 0, 0,
540 1),
541 AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKA_UTCL1_MISS_FIFO, 1, 0, 0, 1, 0,
542 0, 0, 0),
543 AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKA_MISS_FIFO, 1, 0, 0, 1, 0, 0, 0,
544 0),
545 AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKA_BANK_RAM, 0, 1, 1, 1, 0, 0, 0,
546 0),
547 AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKA_TAG_RAM, 0, 1, 1, 1, 0, 0, 0,
548 0),
549 AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKA_HIT_FIFO, 1, 0, 0, 1, 0, 0, 0,
550 0),
551 AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKA_MISS_FIFO, 1, 0, 0, 1, 0, 0, 0,
552 0),
553 AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKA_DIRTY_BIT_RAM, 1, 0, 0, 1, 0, 0,
554 0, 0),
555 AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKA_BANK_RAM, 0, 1, 1, 1, 0, 0, 0,
556 0),
557 AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKB_TAG_RAM, 0, 1, 1, 1, 1, 0, 0,
558 0),
559 AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKB_UTCL1_MISS_FIFO, 1, 0, 0, 1, 0,
560 0, 0, 0),
561 AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKB_MISS_FIFO, 1, 0, 0, 1, 0, 0, 0,
562 0),
563 AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKB_BANK_RAM, 0, 1, 1, 1, 0, 0, 0,
564 0),
565 AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKB_TAG_RAM, 0, 1, 1, 1, 0, 0, 0,
566 0),
567 AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKB_HIT_FIFO, 1, 0, 0, 1, 0, 0, 0,
568 0),
569 AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKB_MISS_FIFO, 1, 0, 0, 1, 0, 0, 0,
570 0),
571 AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKB_DIRTY_BIT_RAM, 1, 0, 0, 1, 0, 0,
572 0, 0),
573 AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKB_BANK_RAM, 0, 1, 1, 1, 0, 0, 0,
574 0),
575 AMDGPU_RAS_SUB_BLOCK(GFX_TA_FS_DFIFO, 0, 1, 1, 1, 1, 0, 0, 1),
576 AMDGPU_RAS_SUB_BLOCK(GFX_TA_FS_AFIFO, 1, 0, 0, 1, 0, 0, 0, 0),
577 AMDGPU_RAS_SUB_BLOCK(GFX_TA_FL_LFIFO, 1, 0, 0, 1, 0, 0, 0, 0),
578 AMDGPU_RAS_SUB_BLOCK(GFX_TA_FX_LFIFO, 1, 0, 0, 1, 0, 0, 0, 0),
579 AMDGPU_RAS_SUB_BLOCK(GFX_TA_FS_CFIFO, 1, 0, 0, 1, 0, 0, 0, 0),
580 AMDGPU_RAS_SUB_BLOCK(GFX_TCA_HOLE_FIFO, 1, 0, 0, 1, 0, 1, 1, 0),
581 AMDGPU_RAS_SUB_BLOCK(GFX_TCA_REQ_FIFO, 1, 0, 0, 1, 0, 0, 0, 0),
582 AMDGPU_RAS_SUB_BLOCK(GFX_TCC_CACHE_DATA, 0, 1, 1, 1, 1, 0, 0, 1),
583 AMDGPU_RAS_SUB_BLOCK(GFX_TCC_CACHE_DATA_BANK_0_1, 0, 1, 1, 1, 1, 0, 0,
584 1),
585 AMDGPU_RAS_SUB_BLOCK(GFX_TCC_CACHE_DATA_BANK_1_0, 0, 1, 1, 1, 1, 0, 0,
586 1),
587 AMDGPU_RAS_SUB_BLOCK(GFX_TCC_CACHE_DATA_BANK_1_1, 0, 1, 1, 1, 1, 0, 0,
588 1),
589 AMDGPU_RAS_SUB_BLOCK(GFX_TCC_CACHE_DIRTY_BANK_0, 0, 1, 1, 1, 0, 0, 0,
590 0),
591 AMDGPU_RAS_SUB_BLOCK(GFX_TCC_CACHE_DIRTY_BANK_1, 0, 1, 1, 1, 0, 0, 0,
592 0),
593 AMDGPU_RAS_SUB_BLOCK(GFX_TCC_HIGH_RATE_TAG, 0, 1, 1, 1, 0, 0, 0, 0),
594 AMDGPU_RAS_SUB_BLOCK(GFX_TCC_LOW_RATE_TAG, 0, 1, 1, 1, 0, 0, 0, 0),
595 AMDGPU_RAS_SUB_BLOCK(GFX_TCC_IN_USE_DEC, 1, 0, 0, 1, 0, 0, 0, 0),
596 AMDGPU_RAS_SUB_BLOCK(GFX_TCC_IN_USE_TRANSFER, 1, 0, 0, 1, 0, 0, 0, 0),
597 AMDGPU_RAS_SUB_BLOCK(GFX_TCC_RETURN_DATA, 1, 0, 0, 1, 0, 0, 0, 0),
598 AMDGPU_RAS_SUB_BLOCK(GFX_TCC_RETURN_CONTROL, 1, 0, 0, 1, 0, 0, 0, 0),
599 AMDGPU_RAS_SUB_BLOCK(GFX_TCC_UC_ATOMIC_FIFO, 1, 0, 0, 1, 0, 0, 0, 0),
600 AMDGPU_RAS_SUB_BLOCK(GFX_TCC_WRITE_RETURN, 1, 0, 0, 1, 0, 1, 1, 0),
601 AMDGPU_RAS_SUB_BLOCK(GFX_TCC_WRITE_CACHE_READ, 1, 0, 0, 1, 0, 0, 0, 0),
602 AMDGPU_RAS_SUB_BLOCK(GFX_TCC_SRC_FIFO, 0, 1, 1, 1, 0, 0, 0, 0),
603 AMDGPU_RAS_SUB_BLOCK(GFX_TCC_SRC_FIFO_NEXT_RAM, 1, 0, 0, 1, 0, 0, 1, 0),
604 AMDGPU_RAS_SUB_BLOCK(GFX_TCC_CACHE_TAG_PROBE_FIFO, 1, 0, 0, 1, 0, 0, 0,
605 0),
606 AMDGPU_RAS_SUB_BLOCK(GFX_TCC_LATENCY_FIFO, 1, 0, 0, 1, 0, 0, 0, 0),
607 AMDGPU_RAS_SUB_BLOCK(GFX_TCC_LATENCY_FIFO_NEXT_RAM, 1, 0, 0, 1, 0, 0, 0,
608 0),
609 AMDGPU_RAS_SUB_BLOCK(GFX_TCC_WRRET_TAG_WRITE_RETURN, 1, 0, 0, 1, 0, 0,
610 0, 0),
611 AMDGPU_RAS_SUB_BLOCK(GFX_TCC_ATOMIC_RETURN_BUFFER, 1, 0, 0, 1, 0, 0, 0,
612 0),
613 AMDGPU_RAS_SUB_BLOCK(GFX_TCI_WRITE_RAM, 1, 0, 0, 1, 0, 0, 0, 0),
614 AMDGPU_RAS_SUB_BLOCK(GFX_TCP_CACHE_RAM, 0, 1, 1, 1, 1, 0, 0, 1),
615 AMDGPU_RAS_SUB_BLOCK(GFX_TCP_LFIFO_RAM, 0, 1, 1, 1, 0, 0, 0, 0),
616 AMDGPU_RAS_SUB_BLOCK(GFX_TCP_CMD_FIFO, 1, 0, 0, 1, 0, 0, 0, 0),
617 AMDGPU_RAS_SUB_BLOCK(GFX_TCP_VM_FIFO, 0, 1, 1, 1, 0, 0, 0, 0),
618 AMDGPU_RAS_SUB_BLOCK(GFX_TCP_DB_RAM, 1, 0, 0, 1, 0, 0, 0, 0),
619 AMDGPU_RAS_SUB_BLOCK(GFX_TCP_UTCL1_LFIFO0, 0, 1, 1, 1, 0, 0, 0, 0),
620 AMDGPU_RAS_SUB_BLOCK(GFX_TCP_UTCL1_LFIFO1, 0, 1, 1, 1, 0, 0, 0, 0),
621 AMDGPU_RAS_SUB_BLOCK(GFX_TD_SS_FIFO_LO, 0, 1, 1, 1, 1, 0, 0, 1),
622 AMDGPU_RAS_SUB_BLOCK(GFX_TD_SS_FIFO_HI, 0, 1, 1, 1, 0, 0, 0, 0),
623 AMDGPU_RAS_SUB_BLOCK(GFX_TD_CS_FIFO, 1, 0, 0, 1, 0, 0, 0, 0),
624 AMDGPU_RAS_SUB_BLOCK(GFX_EA_DRAMRD_CMDMEM, 0, 1, 1, 1, 1, 0, 0, 1),
625 AMDGPU_RAS_SUB_BLOCK(GFX_EA_DRAMWR_CMDMEM, 0, 1, 1, 1, 0, 0, 0, 0),
626 AMDGPU_RAS_SUB_BLOCK(GFX_EA_DRAMWR_DATAMEM, 0, 1, 1, 1, 0, 0, 0, 0),
627 AMDGPU_RAS_SUB_BLOCK(GFX_EA_RRET_TAGMEM, 0, 1, 1, 1, 0, 0, 0, 0),
628 AMDGPU_RAS_SUB_BLOCK(GFX_EA_WRET_TAGMEM, 0, 1, 1, 1, 0, 0, 0, 0),
629 AMDGPU_RAS_SUB_BLOCK(GFX_EA_GMIRD_CMDMEM, 0, 1, 1, 1, 0, 0, 0, 0),
630 AMDGPU_RAS_SUB_BLOCK(GFX_EA_GMIWR_CMDMEM, 0, 1, 1, 1, 0, 0, 0, 0),
631 AMDGPU_RAS_SUB_BLOCK(GFX_EA_GMIWR_DATAMEM, 0, 1, 1, 1, 0, 0, 0, 0),
632 AMDGPU_RAS_SUB_BLOCK(GFX_EA_DRAMRD_PAGEMEM, 1, 0, 0, 1, 0, 0, 0, 0),
633 AMDGPU_RAS_SUB_BLOCK(GFX_EA_DRAMWR_PAGEMEM, 1, 0, 0, 1, 0, 0, 0, 0),
634 AMDGPU_RAS_SUB_BLOCK(GFX_EA_IORD_CMDMEM, 1, 0, 0, 1, 0, 0, 0, 0),
635 AMDGPU_RAS_SUB_BLOCK(GFX_EA_IOWR_CMDMEM, 1, 0, 0, 1, 0, 0, 0, 0),
636 AMDGPU_RAS_SUB_BLOCK(GFX_EA_IOWR_DATAMEM, 1, 0, 0, 1, 0, 0, 0, 0),
637 AMDGPU_RAS_SUB_BLOCK(GFX_EA_GMIRD_PAGEMEM, 1, 0, 0, 1, 0, 0, 0, 0),
638 AMDGPU_RAS_SUB_BLOCK(GFX_EA_GMIWR_PAGEMEM, 1, 0, 0, 1, 0, 0, 0, 0),
639 AMDGPU_RAS_SUB_BLOCK(GFX_EA_MAM_D0MEM, 1, 0, 0, 1, 0, 0, 0, 0),
640 AMDGPU_RAS_SUB_BLOCK(GFX_EA_MAM_D1MEM, 1, 0, 0, 1, 0, 0, 0, 0),
641 AMDGPU_RAS_SUB_BLOCK(GFX_EA_MAM_D2MEM, 1, 0, 0, 1, 0, 0, 0, 0),
642 AMDGPU_RAS_SUB_BLOCK(GFX_EA_MAM_D3MEM, 1, 0, 0, 1, 0, 0, 0, 0),
643 AMDGPU_RAS_SUB_BLOCK(UTC_VML2_BANK_CACHE, 0, 1, 1, 1, 0, 0, 0, 0),
644 AMDGPU_RAS_SUB_BLOCK(UTC_VML2_WALKER, 0, 1, 1, 1, 0, 0, 0, 0),
645 AMDGPU_RAS_SUB_BLOCK(UTC_ATCL2_CACHE_2M_BANK, 1, 0, 0, 1, 0, 0, 0, 0),
646 AMDGPU_RAS_SUB_BLOCK(UTC_ATCL2_CACHE_4K_BANK, 0, 1, 1, 1, 0, 0, 0, 0),
647 };
648
649 static const struct soc15_reg_golden golden_settings_gc_9_0[] =
650 {
651 SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG2, 0xf00fffff, 0x00000400),
652 SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG3, 0x80000000, 0x80000000),
653 SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_GPU_ID, 0x0000000f, 0x00000000),
654 SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_BINNER_EVENT_CNTL_3, 0x00000003, 0x82400024),
655 SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE, 0x3fffffff, 0x00000001),
656 SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000),
657 SOC15_REG_GOLDEN_VALUE(GC, 0, mmSH_MEM_CONFIG, 0x00001000, 0x00001000),
658 SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_CU_0, 0x0007ffff, 0x00000800),
659 SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_CU_1, 0x0007ffff, 0x00000800),
660 SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_EN_CU_0, 0x01ffffff, 0x00ffff87),
661 SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_EN_CU_1, 0x01ffffff, 0x00ffff8f),
662 SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQC_CONFIG, 0x03000000, 0x020a2000),
663 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0xfffffeef, 0x010b0000),
664 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_HI, 0xffffffff, 0x4a2c0e68),
665 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_LO, 0xffffffff, 0xb5d3f197),
666 SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_CACHE_INVALIDATION, 0x3fff3af3, 0x19200000),
667 SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_GS_MAX_WAVE_ID, 0x00000fff, 0x000003ff),
668 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC1_F32_INT_DIS, 0x00000800, 0x00000800),
669 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC2_F32_INT_DIS, 0x00000800, 0x00000800),
670 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_DEBUG, 0x00008000, 0x00008000)
671 };
672
673 static const struct soc15_reg_golden golden_settings_gc_9_0_vg10[] =
674 {
675 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL, 0x0000f000, 0x00012107),
676 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_3, 0x30000000, 0x10000000),
677 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPC_UTCL1_CNTL, 0x08000000, 0x08000080),
678 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPF_UTCL1_CNTL, 0x08000000, 0x08000080),
679 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPG_UTCL1_CNTL, 0x08000000, 0x08000080),
680 SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xffff77ff, 0x2a114042),
681 SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG_READ, 0xffff77ff, 0x2a114042),
682 SOC15_REG_GOLDEN_VALUE(GC, 0, mmIA_UTCL1_CNTL, 0x08000000, 0x08000080),
683 SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_1, 0x00008000, 0x00048000),
684 SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_0, 0x08000000, 0x08000080),
685 SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_1, 0x08000000, 0x08000080),
686 SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_2, 0x08000000, 0x08000080),
687 SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_PREWALKER_UTCL1_CNTL, 0x08000000, 0x08000080),
688 SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_UTCL1_CNTL, 0x08000000, 0x08000080),
689 SOC15_REG_GOLDEN_VALUE(GC, 0, mmRMI_UTCL1_CNTL2, 0x00030000, 0x00020000),
690 SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_CONFIG_CNTL_1, 0x0000000f, 0x01000107),
691 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTD_CNTL, 0x00001800, 0x00000800),
692 SOC15_REG_GOLDEN_VALUE(GC, 0, mmWD_UTCL1_CNTL, 0x08000000, 0x08000080)
693 };
694
695 static const struct soc15_reg_golden golden_settings_gc_9_0_vg20[] =
696 {
697 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_DCC_CONFIG, 0x0f000080, 0x04000080),
698 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_2, 0x0f000000, 0x0a000000),
699 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_3, 0x30000000, 0x10000000),
700 SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xf3e777ff, 0x22014042),
701 SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG_READ, 0xf3e777ff, 0x22014042),
702 SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG2, 0x00003e00, 0x00000400),
703 SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_1, 0xff840000, 0x04040000),
704 SOC15_REG_GOLDEN_VALUE(GC, 0, mmRMI_UTCL1_CNTL2, 0x00030000, 0x00030000),
705 SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_CONFIG_CNTL_1, 0xffff010f, 0x01000107),
706 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0x000b0000, 0x000b0000),
707 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTD_CNTL, 0x01000000, 0x01000000)
708 };
709
710 static const struct soc15_reg_golden golden_settings_gc_9_1[] =
711 {
712 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL, 0xfffdf3cf, 0x00014104),
713 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPC_UTCL1_CNTL, 0x08000000, 0x08000080),
714 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPF_UTCL1_CNTL, 0x08000000, 0x08000080),
715 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPG_UTCL1_CNTL, 0x08000000, 0x08000080),
716 SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG2, 0xf00fffff, 0x00000420),
717 SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_GPU_ID, 0x0000000f, 0x00000000),
718 SOC15_REG_GOLDEN_VALUE(GC, 0, mmIA_UTCL1_CNTL, 0x08000000, 0x08000080),
719 SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_BINNER_EVENT_CNTL_3, 0x00000003, 0x82400024),
720 SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE, 0x3fffffff, 0x00000001),
721 SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000),
722 SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_0, 0x08000000, 0x08000080),
723 SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_1, 0x08000000, 0x08000080),
724 SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_2, 0x08000000, 0x08000080),
725 SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_PREWALKER_UTCL1_CNTL, 0x08000000, 0x08000080),
726 SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_UTCL1_CNTL, 0x08000000, 0x08000080),
727 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0xfffffeef, 0x010b0000),
728 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000),
729 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00003120),
730 SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_CACHE_INVALIDATION, 0x3fff3af3, 0x19200000),
731 SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_GS_MAX_WAVE_ID, 0x00000fff, 0x000000ff),
732 SOC15_REG_GOLDEN_VALUE(GC, 0, mmWD_UTCL1_CNTL, 0x08000000, 0x08000080),
733 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC1_F32_INT_DIS, 0x00000800, 0x00000800),
734 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC2_F32_INT_DIS, 0x00000800, 0x00000800),
735 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_DEBUG, 0x00008000, 0x00008000)
736 };
737
738 static const struct soc15_reg_golden golden_settings_gc_9_1_rv1[] =
739 {
740 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_3, 0x30000000, 0x10000000),
741 SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xffff77ff, 0x24000042),
742 SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG_READ, 0xffff77ff, 0x24000042),
743 SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_1, 0xffffffff, 0x04048000),
744 SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_MODE_CNTL_1, 0x06000000, 0x06000000),
745 SOC15_REG_GOLDEN_VALUE(GC, 0, mmRMI_UTCL1_CNTL2, 0x00030000, 0x00020000),
746 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTD_CNTL, 0x01bd9f33, 0x00000800)
747 };
748
749 static const struct soc15_reg_golden golden_settings_gc_9_1_rv2[] =
750 {
751 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_DCC_CONFIG, 0xff7fffff, 0x04000000),
752 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL, 0xfffdf3cf, 0x00014104),
753 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_2, 0xff7fffff, 0x0a000000),
754 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPC_UTCL1_CNTL, 0x7f0fffff, 0x08000080),
755 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPF_UTCL1_CNTL, 0xff8fffff, 0x08000080),
756 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPG_UTCL1_CNTL, 0x7f8fffff, 0x08000080),
757 SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xffff77ff, 0x26013041),
758 SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG_READ, 0xffff77ff, 0x26013041),
759 SOC15_REG_GOLDEN_VALUE(GC, 0, mmIA_UTCL1_CNTL, 0x3f8fffff, 0x08000080),
760 SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_1, 0xffffffff, 0x04040000),
761 SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_0, 0xff0fffff, 0x08000080),
762 SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_1, 0xff0fffff, 0x08000080),
763 SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_2, 0xff0fffff, 0x08000080),
764 SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_PREWALKER_UTCL1_CNTL, 0xff0fffff, 0x08000080),
765 SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_UTCL1_CNTL, 0xff0fffff, 0x08000080),
766 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000),
767 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00000010),
768 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTD_CNTL, 0x01bd9f33, 0x01000000),
769 SOC15_REG_GOLDEN_VALUE(GC, 0, mmWD_UTCL1_CNTL, 0x3f8fffff, 0x08000080),
770 };
771
772 static const struct soc15_reg_golden golden_settings_gc_9_1_rn[] =
773 {
774 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL, 0xfffdf3cf, 0x00014104),
775 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_2, 0xff7fffff, 0x0a000000),
776 SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG2, 0xf00fffff, 0x00000400),
777 SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xf3e777ff, 0x24000042),
778 SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG_READ, 0xf3e777ff, 0x24000042),
779 SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE, 0x3fffffff, 0x00000001),
780 SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_1, 0xffffffff, 0x04040000),
781 SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000),
782 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0xfffffeef, 0x010b0000),
783 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000),
784 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00003120),
785 SOC15_REG_GOLDEN_VALUE(GC, 0, mmGCEA_PROBE_MAP, 0xffffffff, 0x0000cccc),
786 };
787
788 static const struct soc15_reg_golden golden_settings_gc_9_x_common[] =
789 {
790 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_SD_CNTL, 0xffffffff, 0x000001ff),
791 SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_CAM_INDEX, 0xffffffff, 0x00000000),
792 SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_CAM_DATA, 0xffffffff, 0x2544c382)
793 };
794
795 static const struct soc15_reg_golden golden_settings_gc_9_2_1[] =
796 {
797 SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG2, 0xf00fffff, 0x00000420),
798 SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_GPU_ID, 0x0000000f, 0x00000000),
799 SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_BINNER_EVENT_CNTL_3, 0x00000003, 0x82400024),
800 SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE, 0x3fffffff, 0x00000001),
801 SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000),
802 SOC15_REG_GOLDEN_VALUE(GC, 0, mmSH_MEM_CONFIG, 0x00001000, 0x00001000),
803 SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_CU_0, 0x0007ffff, 0x00000800),
804 SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_CU_1, 0x0007ffff, 0x00000800),
805 SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_EN_CU_0, 0x01ffffff, 0x0000ff87),
806 SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_EN_CU_1, 0x01ffffff, 0x0000ff8f),
807 SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQC_CONFIG, 0x03000000, 0x020a2000),
808 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0xfffffeef, 0x010b0000),
809 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_HI, 0xffffffff, 0x4a2c0e68),
810 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_LO, 0xffffffff, 0xb5d3f197),
811 SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_CACHE_INVALIDATION, 0x3fff3af3, 0x19200000),
812 SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_GS_MAX_WAVE_ID, 0x00000fff, 0x000003ff)
813 };
814
815 static const struct soc15_reg_golden golden_settings_gc_9_2_1_vg12[] =
816 {
817 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_DCC_CONFIG, 0x00000080, 0x04000080),
818 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL, 0xfffdf3cf, 0x00014104),
819 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_2, 0x0f000000, 0x0a000000),
820 SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xffff77ff, 0x24104041),
821 SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG_READ, 0xffff77ff, 0x24104041),
822 SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_1, 0xffffffff, 0x04040000),
823 SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_CONFIG_CNTL_1, 0xffff03ff, 0x01000107),
824 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000),
825 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_LO, 0xffffffff, 0x76325410),
826 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTD_CNTL, 0x01bd9f33, 0x01000000),
827 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC1_F32_INT_DIS, 0x00000800, 0x00000800),
828 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC2_F32_INT_DIS, 0x00000800, 0x00000800),
829 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_DEBUG, 0x00008000, 0x00008000)
830 };
831
832 static const struct soc15_reg_golden golden_settings_gc_9_4_1_arct[] =
833 {
834 SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xffff77ff, 0x2a114042),
835 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0xfffffeef, 0x10b0000),
836 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_0_ARCT, 0x3fffffff, 0x346f0a4e),
837 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_1_ARCT, 0x3fffffff, 0x1c642ca),
838 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_2_ARCT, 0x3fffffff, 0x26f45098),
839 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_3_ARCT, 0x3fffffff, 0x2ebd9fe3),
840 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_4_ARCT, 0x3fffffff, 0xb90f5b1),
841 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_5_ARCT, 0x3ff, 0x135),
842 SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQ_CONFIG, 0xffffffff, 0x011A0000),
843 SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQ_FIFO_SIZES, 0xffffffff, 0x00000f00),
844 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_UTCL1_CNTL1, 0x30000000, 0x30000000)
845 };
846
847 static const struct soc15_reg_rlcg rlcg_access_gc_9_0[] = {
848 {SOC15_REG_ENTRY(GC, 0, mmGRBM_GFX_INDEX)},
849 {SOC15_REG_ENTRY(GC, 0, mmSQ_IND_INDEX)},
850 };
851
852 static const u32 GFX_RLC_SRM_INDEX_CNTL_ADDR_OFFSETS[] =
853 {
854 mmRLC_SRM_INDEX_CNTL_ADDR_0 - mmRLC_SRM_INDEX_CNTL_ADDR_0,
855 mmRLC_SRM_INDEX_CNTL_ADDR_1 - mmRLC_SRM_INDEX_CNTL_ADDR_0,
856 mmRLC_SRM_INDEX_CNTL_ADDR_2 - mmRLC_SRM_INDEX_CNTL_ADDR_0,
857 mmRLC_SRM_INDEX_CNTL_ADDR_3 - mmRLC_SRM_INDEX_CNTL_ADDR_0,
858 mmRLC_SRM_INDEX_CNTL_ADDR_4 - mmRLC_SRM_INDEX_CNTL_ADDR_0,
859 mmRLC_SRM_INDEX_CNTL_ADDR_5 - mmRLC_SRM_INDEX_CNTL_ADDR_0,
860 mmRLC_SRM_INDEX_CNTL_ADDR_6 - mmRLC_SRM_INDEX_CNTL_ADDR_0,
861 mmRLC_SRM_INDEX_CNTL_ADDR_7 - mmRLC_SRM_INDEX_CNTL_ADDR_0,
862 };
863
864 static const u32 GFX_RLC_SRM_INDEX_CNTL_DATA_OFFSETS[] =
865 {
866 mmRLC_SRM_INDEX_CNTL_DATA_0 - mmRLC_SRM_INDEX_CNTL_DATA_0,
867 mmRLC_SRM_INDEX_CNTL_DATA_1 - mmRLC_SRM_INDEX_CNTL_DATA_0,
868 mmRLC_SRM_INDEX_CNTL_DATA_2 - mmRLC_SRM_INDEX_CNTL_DATA_0,
869 mmRLC_SRM_INDEX_CNTL_DATA_3 - mmRLC_SRM_INDEX_CNTL_DATA_0,
870 mmRLC_SRM_INDEX_CNTL_DATA_4 - mmRLC_SRM_INDEX_CNTL_DATA_0,
871 mmRLC_SRM_INDEX_CNTL_DATA_5 - mmRLC_SRM_INDEX_CNTL_DATA_0,
872 mmRLC_SRM_INDEX_CNTL_DATA_6 - mmRLC_SRM_INDEX_CNTL_DATA_0,
873 mmRLC_SRM_INDEX_CNTL_DATA_7 - mmRLC_SRM_INDEX_CNTL_DATA_0,
874 };
875
876 #define VEGA10_GB_ADDR_CONFIG_GOLDEN 0x2a114042
877 #define VEGA12_GB_ADDR_CONFIG_GOLDEN 0x24104041
878 #define RAVEN_GB_ADDR_CONFIG_GOLDEN 0x24000042
879 #define RAVEN2_GB_ADDR_CONFIG_GOLDEN 0x26013041
880
881 static void gfx_v9_0_set_ring_funcs(struct amdgpu_device *adev);
882 static void gfx_v9_0_set_irq_funcs(struct amdgpu_device *adev);
883 static void gfx_v9_0_set_gds_init(struct amdgpu_device *adev);
884 static void gfx_v9_0_set_rlc_funcs(struct amdgpu_device *adev);
885 static int gfx_v9_0_get_cu_info(struct amdgpu_device *adev,
886 struct amdgpu_cu_info *cu_info);
887 static uint64_t gfx_v9_0_get_gpu_clock_counter(struct amdgpu_device *adev);
888 static void gfx_v9_0_ring_emit_de_meta(struct amdgpu_ring *ring, bool resume, bool usegds);
889 static u64 gfx_v9_0_ring_get_rptr_compute(struct amdgpu_ring *ring);
890 static void gfx_v9_0_query_ras_error_count(struct amdgpu_device *adev,
891 void *ras_error_status);
892 static int gfx_v9_0_ras_error_inject(struct amdgpu_device *adev,
893 void *inject_if, uint32_t instance_mask);
894 static void gfx_v9_0_reset_ras_error_count(struct amdgpu_device *adev);
895 static void gfx_v9_0_update_spm_vmid_internal(struct amdgpu_device *adev,
896 unsigned int vmid);
897 static void gfx_v9_0_set_safe_mode(struct amdgpu_device *adev, int xcc_id);
898 static void gfx_v9_0_unset_safe_mode(struct amdgpu_device *adev, int xcc_id);
899
gfx_v9_0_kiq_set_resources(struct amdgpu_ring * kiq_ring,uint64_t queue_mask)900 static void gfx_v9_0_kiq_set_resources(struct amdgpu_ring *kiq_ring,
901 uint64_t queue_mask)
902 {
903 struct amdgpu_device *adev = kiq_ring->adev;
904 u64 shader_mc_addr;
905
906 /* Cleaner shader MC address */
907 shader_mc_addr = adev->gfx.cleaner_shader_gpu_addr >> 8;
908
909 amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_SET_RESOURCES, 6));
910 amdgpu_ring_write(kiq_ring,
911 PACKET3_SET_RESOURCES_VMID_MASK(0) |
912 /* vmid_mask:0* queue_type:0 (KIQ) */
913 PACKET3_SET_RESOURCES_QUEUE_TYPE(0));
914 amdgpu_ring_write(kiq_ring,
915 lower_32_bits(queue_mask)); /* queue mask lo */
916 amdgpu_ring_write(kiq_ring,
917 upper_32_bits(queue_mask)); /* queue mask hi */
918 amdgpu_ring_write(kiq_ring, lower_32_bits(shader_mc_addr)); /* cleaner shader addr lo */
919 amdgpu_ring_write(kiq_ring, upper_32_bits(shader_mc_addr)); /* cleaner shader addr hi */
920 amdgpu_ring_write(kiq_ring, 0); /* oac mask */
921 amdgpu_ring_write(kiq_ring, 0); /* gds heap base:0, gds heap size:0 */
922 }
923
gfx_v9_0_kiq_map_queues(struct amdgpu_ring * kiq_ring,struct amdgpu_ring * ring)924 static void gfx_v9_0_kiq_map_queues(struct amdgpu_ring *kiq_ring,
925 struct amdgpu_ring *ring)
926 {
927 uint64_t mqd_addr = amdgpu_bo_gpu_offset(ring->mqd_obj);
928 uint64_t wptr_addr = ring->wptr_gpu_addr;
929 uint32_t eng_sel = ring->funcs->type == AMDGPU_RING_TYPE_GFX ? 4 : 0;
930
931 amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_MAP_QUEUES, 5));
932 /* Q_sel:0, vmid:0, vidmem: 1, engine:0, num_Q:1*/
933 amdgpu_ring_write(kiq_ring, /* Q_sel: 0, vmid: 0, engine: 0, num_Q: 1 */
934 PACKET3_MAP_QUEUES_QUEUE_SEL(0) | /* Queue_Sel */
935 PACKET3_MAP_QUEUES_VMID(0) | /* VMID */
936 PACKET3_MAP_QUEUES_QUEUE(ring->queue) |
937 PACKET3_MAP_QUEUES_PIPE(ring->pipe) |
938 PACKET3_MAP_QUEUES_ME((ring->me == 1 ? 0 : 1)) |
939 /*queue_type: normal compute queue */
940 PACKET3_MAP_QUEUES_QUEUE_TYPE(0) |
941 /* alloc format: all_on_one_pipe */
942 PACKET3_MAP_QUEUES_ALLOC_FORMAT(0) |
943 PACKET3_MAP_QUEUES_ENGINE_SEL(eng_sel) |
944 /* num_queues: must be 1 */
945 PACKET3_MAP_QUEUES_NUM_QUEUES(1));
946 amdgpu_ring_write(kiq_ring,
947 PACKET3_MAP_QUEUES_DOORBELL_OFFSET(ring->doorbell_index));
948 amdgpu_ring_write(kiq_ring, lower_32_bits(mqd_addr));
949 amdgpu_ring_write(kiq_ring, upper_32_bits(mqd_addr));
950 amdgpu_ring_write(kiq_ring, lower_32_bits(wptr_addr));
951 amdgpu_ring_write(kiq_ring, upper_32_bits(wptr_addr));
952 }
953
gfx_v9_0_kiq_unmap_queues(struct amdgpu_ring * kiq_ring,struct amdgpu_ring * ring,enum amdgpu_unmap_queues_action action,u64 gpu_addr,u64 seq)954 static void gfx_v9_0_kiq_unmap_queues(struct amdgpu_ring *kiq_ring,
955 struct amdgpu_ring *ring,
956 enum amdgpu_unmap_queues_action action,
957 u64 gpu_addr, u64 seq)
958 {
959 uint32_t eng_sel = ring->funcs->type == AMDGPU_RING_TYPE_GFX ? 4 : 0;
960
961 amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_UNMAP_QUEUES, 4));
962 amdgpu_ring_write(kiq_ring, /* Q_sel: 0, vmid: 0, engine: 0, num_Q: 1 */
963 PACKET3_UNMAP_QUEUES_ACTION(action) |
964 PACKET3_UNMAP_QUEUES_QUEUE_SEL(0) |
965 PACKET3_UNMAP_QUEUES_ENGINE_SEL(eng_sel) |
966 PACKET3_UNMAP_QUEUES_NUM_QUEUES(1));
967 amdgpu_ring_write(kiq_ring,
968 PACKET3_UNMAP_QUEUES_DOORBELL_OFFSET0(ring->doorbell_index));
969
970 if (action == PREEMPT_QUEUES_NO_UNMAP) {
971 amdgpu_ring_write(kiq_ring, lower_32_bits(ring->wptr & ring->buf_mask));
972 amdgpu_ring_write(kiq_ring, 0);
973 amdgpu_ring_write(kiq_ring, 0);
974
975 } else {
976 amdgpu_ring_write(kiq_ring, 0);
977 amdgpu_ring_write(kiq_ring, 0);
978 amdgpu_ring_write(kiq_ring, 0);
979 }
980 }
981
gfx_v9_0_kiq_query_status(struct amdgpu_ring * kiq_ring,struct amdgpu_ring * ring,u64 addr,u64 seq)982 static void gfx_v9_0_kiq_query_status(struct amdgpu_ring *kiq_ring,
983 struct amdgpu_ring *ring,
984 u64 addr,
985 u64 seq)
986 {
987 uint32_t eng_sel = ring->funcs->type == AMDGPU_RING_TYPE_GFX ? 4 : 0;
988
989 amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_QUERY_STATUS, 5));
990 amdgpu_ring_write(kiq_ring,
991 PACKET3_QUERY_STATUS_CONTEXT_ID(0) |
992 PACKET3_QUERY_STATUS_INTERRUPT_SEL(0) |
993 PACKET3_QUERY_STATUS_COMMAND(2));
994 /* Q_sel: 0, vmid: 0, engine: 0, num_Q: 1 */
995 amdgpu_ring_write(kiq_ring,
996 PACKET3_QUERY_STATUS_DOORBELL_OFFSET(ring->doorbell_index) |
997 PACKET3_QUERY_STATUS_ENG_SEL(eng_sel));
998 amdgpu_ring_write(kiq_ring, lower_32_bits(addr));
999 amdgpu_ring_write(kiq_ring, upper_32_bits(addr));
1000 amdgpu_ring_write(kiq_ring, lower_32_bits(seq));
1001 amdgpu_ring_write(kiq_ring, upper_32_bits(seq));
1002 }
1003
gfx_v9_0_kiq_invalidate_tlbs(struct amdgpu_ring * kiq_ring,uint16_t pasid,uint32_t flush_type,bool all_hub)1004 static void gfx_v9_0_kiq_invalidate_tlbs(struct amdgpu_ring *kiq_ring,
1005 uint16_t pasid, uint32_t flush_type,
1006 bool all_hub)
1007 {
1008 amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_INVALIDATE_TLBS, 0));
1009 amdgpu_ring_write(kiq_ring,
1010 PACKET3_INVALIDATE_TLBS_DST_SEL(1) |
1011 PACKET3_INVALIDATE_TLBS_ALL_HUB(all_hub) |
1012 PACKET3_INVALIDATE_TLBS_PASID(pasid) |
1013 PACKET3_INVALIDATE_TLBS_FLUSH_TYPE(flush_type));
1014 }
1015
1016
gfx_v9_0_kiq_reset_hw_queue(struct amdgpu_ring * kiq_ring,uint32_t queue_type,uint32_t me_id,uint32_t pipe_id,uint32_t queue_id,uint32_t xcc_id,uint32_t vmid)1017 static void gfx_v9_0_kiq_reset_hw_queue(struct amdgpu_ring *kiq_ring, uint32_t queue_type,
1018 uint32_t me_id, uint32_t pipe_id, uint32_t queue_id,
1019 uint32_t xcc_id, uint32_t vmid)
1020 {
1021 struct amdgpu_device *adev = kiq_ring->adev;
1022 unsigned i;
1023
1024 /* enter save mode */
1025 amdgpu_gfx_rlc_enter_safe_mode(adev, xcc_id);
1026 mutex_lock(&adev->srbm_mutex);
1027 soc15_grbm_select(adev, me_id, pipe_id, queue_id, 0, 0);
1028
1029 if (queue_type == AMDGPU_RING_TYPE_COMPUTE) {
1030 WREG32_SOC15(GC, 0, mmCP_HQD_DEQUEUE_REQUEST, 0x2);
1031 WREG32_SOC15(GC, 0, mmSPI_COMPUTE_QUEUE_RESET, 0x1);
1032 /* wait till dequeue take effects */
1033 for (i = 0; i < adev->usec_timeout; i++) {
1034 if (!(RREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE) & 1))
1035 break;
1036 udelay(1);
1037 }
1038 if (i >= adev->usec_timeout)
1039 dev_err(adev->dev, "fail to wait on hqd deactive\n");
1040 } else {
1041 dev_err(adev->dev, "reset queue_type(%d) not supported\n", queue_type);
1042 }
1043
1044 soc15_grbm_select(adev, 0, 0, 0, 0, 0);
1045 mutex_unlock(&adev->srbm_mutex);
1046 /* exit safe mode */
1047 amdgpu_gfx_rlc_exit_safe_mode(adev, xcc_id);
1048 }
1049
1050 static const struct kiq_pm4_funcs gfx_v9_0_kiq_pm4_funcs = {
1051 .kiq_set_resources = gfx_v9_0_kiq_set_resources,
1052 .kiq_map_queues = gfx_v9_0_kiq_map_queues,
1053 .kiq_unmap_queues = gfx_v9_0_kiq_unmap_queues,
1054 .kiq_query_status = gfx_v9_0_kiq_query_status,
1055 .kiq_invalidate_tlbs = gfx_v9_0_kiq_invalidate_tlbs,
1056 .kiq_reset_hw_queue = gfx_v9_0_kiq_reset_hw_queue,
1057 .set_resources_size = 8,
1058 .map_queues_size = 7,
1059 .unmap_queues_size = 6,
1060 .query_status_size = 7,
1061 .invalidate_tlbs_size = 2,
1062 };
1063
gfx_v9_0_set_kiq_pm4_funcs(struct amdgpu_device * adev)1064 static void gfx_v9_0_set_kiq_pm4_funcs(struct amdgpu_device *adev)
1065 {
1066 adev->gfx.kiq[0].pmf = &gfx_v9_0_kiq_pm4_funcs;
1067 }
1068
gfx_v9_0_init_golden_registers(struct amdgpu_device * adev)1069 static void gfx_v9_0_init_golden_registers(struct amdgpu_device *adev)
1070 {
1071 switch (amdgpu_ip_version(adev, GC_HWIP, 0)) {
1072 case IP_VERSION(9, 0, 1):
1073 soc15_program_register_sequence(adev,
1074 golden_settings_gc_9_0,
1075 ARRAY_SIZE(golden_settings_gc_9_0));
1076 soc15_program_register_sequence(adev,
1077 golden_settings_gc_9_0_vg10,
1078 ARRAY_SIZE(golden_settings_gc_9_0_vg10));
1079 break;
1080 case IP_VERSION(9, 2, 1):
1081 soc15_program_register_sequence(adev,
1082 golden_settings_gc_9_2_1,
1083 ARRAY_SIZE(golden_settings_gc_9_2_1));
1084 soc15_program_register_sequence(adev,
1085 golden_settings_gc_9_2_1_vg12,
1086 ARRAY_SIZE(golden_settings_gc_9_2_1_vg12));
1087 break;
1088 case IP_VERSION(9, 4, 0):
1089 soc15_program_register_sequence(adev,
1090 golden_settings_gc_9_0,
1091 ARRAY_SIZE(golden_settings_gc_9_0));
1092 soc15_program_register_sequence(adev,
1093 golden_settings_gc_9_0_vg20,
1094 ARRAY_SIZE(golden_settings_gc_9_0_vg20));
1095 break;
1096 case IP_VERSION(9, 4, 1):
1097 soc15_program_register_sequence(adev,
1098 golden_settings_gc_9_4_1_arct,
1099 ARRAY_SIZE(golden_settings_gc_9_4_1_arct));
1100 break;
1101 case IP_VERSION(9, 2, 2):
1102 case IP_VERSION(9, 1, 0):
1103 soc15_program_register_sequence(adev, golden_settings_gc_9_1,
1104 ARRAY_SIZE(golden_settings_gc_9_1));
1105 if (adev->apu_flags & AMD_APU_IS_RAVEN2)
1106 soc15_program_register_sequence(adev,
1107 golden_settings_gc_9_1_rv2,
1108 ARRAY_SIZE(golden_settings_gc_9_1_rv2));
1109 else
1110 soc15_program_register_sequence(adev,
1111 golden_settings_gc_9_1_rv1,
1112 ARRAY_SIZE(golden_settings_gc_9_1_rv1));
1113 break;
1114 case IP_VERSION(9, 3, 0):
1115 soc15_program_register_sequence(adev,
1116 golden_settings_gc_9_1_rn,
1117 ARRAY_SIZE(golden_settings_gc_9_1_rn));
1118 return; /* for renoir, don't need common goldensetting */
1119 case IP_VERSION(9, 4, 2):
1120 gfx_v9_4_2_init_golden_registers(adev,
1121 adev->smuio.funcs->get_die_id(adev));
1122 break;
1123 default:
1124 break;
1125 }
1126
1127 if ((amdgpu_ip_version(adev, GC_HWIP, 0) != IP_VERSION(9, 4, 1)) &&
1128 (amdgpu_ip_version(adev, GC_HWIP, 0) != IP_VERSION(9, 4, 2)))
1129 soc15_program_register_sequence(adev, golden_settings_gc_9_x_common,
1130 (const u32)ARRAY_SIZE(golden_settings_gc_9_x_common));
1131 }
1132
gfx_v9_0_write_data_to_reg(struct amdgpu_ring * ring,int eng_sel,bool wc,uint32_t reg,uint32_t val)1133 static void gfx_v9_0_write_data_to_reg(struct amdgpu_ring *ring, int eng_sel,
1134 bool wc, uint32_t reg, uint32_t val)
1135 {
1136 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
1137 amdgpu_ring_write(ring, WRITE_DATA_ENGINE_SEL(eng_sel) |
1138 WRITE_DATA_DST_SEL(0) |
1139 (wc ? WR_CONFIRM : 0));
1140 amdgpu_ring_write(ring, reg);
1141 amdgpu_ring_write(ring, 0);
1142 amdgpu_ring_write(ring, val);
1143 }
1144
gfx_v9_0_wait_reg_mem(struct amdgpu_ring * ring,int eng_sel,int mem_space,int opt,uint32_t addr0,uint32_t addr1,uint32_t ref,uint32_t mask,uint32_t inv)1145 static void gfx_v9_0_wait_reg_mem(struct amdgpu_ring *ring, int eng_sel,
1146 int mem_space, int opt, uint32_t addr0,
1147 uint32_t addr1, uint32_t ref, uint32_t mask,
1148 uint32_t inv)
1149 {
1150 amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
1151 amdgpu_ring_write(ring,
1152 /* memory (1) or register (0) */
1153 (WAIT_REG_MEM_MEM_SPACE(mem_space) |
1154 WAIT_REG_MEM_OPERATION(opt) | /* wait */
1155 WAIT_REG_MEM_FUNCTION(3) | /* equal */
1156 WAIT_REG_MEM_ENGINE(eng_sel)));
1157
1158 if (mem_space)
1159 BUG_ON(addr0 & 0x3); /* Dword align */
1160 amdgpu_ring_write(ring, addr0);
1161 amdgpu_ring_write(ring, addr1);
1162 amdgpu_ring_write(ring, ref);
1163 amdgpu_ring_write(ring, mask);
1164 amdgpu_ring_write(ring, inv); /* poll interval */
1165 }
1166
gfx_v9_0_ring_test_ring(struct amdgpu_ring * ring)1167 static int gfx_v9_0_ring_test_ring(struct amdgpu_ring *ring)
1168 {
1169 struct amdgpu_device *adev = ring->adev;
1170 uint32_t scratch = SOC15_REG_OFFSET(GC, 0, mmSCRATCH_REG0);
1171 uint32_t tmp = 0;
1172 unsigned i;
1173 int r;
1174
1175 WREG32(scratch, 0xCAFEDEAD);
1176 r = amdgpu_ring_alloc(ring, 3);
1177 if (r)
1178 return r;
1179
1180 amdgpu_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1));
1181 amdgpu_ring_write(ring, scratch - PACKET3_SET_UCONFIG_REG_START);
1182 amdgpu_ring_write(ring, 0xDEADBEEF);
1183 amdgpu_ring_commit(ring);
1184
1185 for (i = 0; i < adev->usec_timeout; i++) {
1186 tmp = RREG32(scratch);
1187 if (tmp == 0xDEADBEEF)
1188 break;
1189 udelay(1);
1190 }
1191
1192 if (i >= adev->usec_timeout)
1193 r = -ETIMEDOUT;
1194 return r;
1195 }
1196
gfx_v9_0_ring_test_ib(struct amdgpu_ring * ring,long timeout)1197 static int gfx_v9_0_ring_test_ib(struct amdgpu_ring *ring, long timeout)
1198 {
1199 struct amdgpu_device *adev = ring->adev;
1200 struct amdgpu_ib ib;
1201 struct dma_fence *f = NULL;
1202
1203 unsigned index;
1204 uint64_t gpu_addr;
1205 uint32_t tmp;
1206 long r;
1207
1208 r = amdgpu_device_wb_get(adev, &index);
1209 if (r)
1210 return r;
1211
1212 gpu_addr = adev->wb.gpu_addr + (index * 4);
1213 adev->wb.wb[index] = cpu_to_le32(0xCAFEDEAD);
1214 memset(&ib, 0, sizeof(ib));
1215
1216 r = amdgpu_ib_get(adev, NULL, 20, AMDGPU_IB_POOL_DIRECT, &ib);
1217 if (r)
1218 goto err1;
1219
1220 ib.ptr[0] = PACKET3(PACKET3_WRITE_DATA, 3);
1221 ib.ptr[1] = WRITE_DATA_DST_SEL(5) | WR_CONFIRM;
1222 ib.ptr[2] = lower_32_bits(gpu_addr);
1223 ib.ptr[3] = upper_32_bits(gpu_addr);
1224 ib.ptr[4] = 0xDEADBEEF;
1225 ib.length_dw = 5;
1226
1227 r = amdgpu_ib_schedule(ring, 1, &ib, NULL, &f);
1228 if (r)
1229 goto err2;
1230
1231 r = dma_fence_wait_timeout(f, false, timeout);
1232 if (r == 0) {
1233 r = -ETIMEDOUT;
1234 goto err2;
1235 } else if (r < 0) {
1236 goto err2;
1237 }
1238
1239 tmp = adev->wb.wb[index];
1240 if (tmp == 0xDEADBEEF)
1241 r = 0;
1242 else
1243 r = -EINVAL;
1244
1245 err2:
1246 amdgpu_ib_free(&ib, NULL);
1247 dma_fence_put(f);
1248 err1:
1249 amdgpu_device_wb_free(adev, index);
1250 return r;
1251 }
1252
1253
gfx_v9_0_free_microcode(struct amdgpu_device * adev)1254 static void gfx_v9_0_free_microcode(struct amdgpu_device *adev)
1255 {
1256 amdgpu_ucode_release(&adev->gfx.pfp_fw);
1257 amdgpu_ucode_release(&adev->gfx.me_fw);
1258 amdgpu_ucode_release(&adev->gfx.ce_fw);
1259 amdgpu_ucode_release(&adev->gfx.rlc_fw);
1260 amdgpu_ucode_release(&adev->gfx.mec_fw);
1261 amdgpu_ucode_release(&adev->gfx.mec2_fw);
1262
1263 kfree(adev->gfx.rlc.register_list_format);
1264 }
1265
gfx_v9_0_check_fw_write_wait(struct amdgpu_device * adev)1266 static void gfx_v9_0_check_fw_write_wait(struct amdgpu_device *adev)
1267 {
1268 adev->gfx.me_fw_write_wait = false;
1269 adev->gfx.mec_fw_write_wait = false;
1270
1271 if ((amdgpu_ip_version(adev, GC_HWIP, 0) != IP_VERSION(9, 4, 1)) &&
1272 (amdgpu_ip_version(adev, GC_HWIP, 0) != IP_VERSION(9, 4, 2)) &&
1273 ((adev->gfx.mec_fw_version < 0x000001a5) ||
1274 (adev->gfx.mec_feature_version < 46) ||
1275 (adev->gfx.pfp_fw_version < 0x000000b7) ||
1276 (adev->gfx.pfp_feature_version < 46)))
1277 DRM_WARN_ONCE("CP firmware version too old, please update!");
1278
1279 switch (amdgpu_ip_version(adev, GC_HWIP, 0)) {
1280 case IP_VERSION(9, 0, 1):
1281 if ((adev->gfx.me_fw_version >= 0x0000009c) &&
1282 (adev->gfx.me_feature_version >= 42) &&
1283 (adev->gfx.pfp_fw_version >= 0x000000b1) &&
1284 (adev->gfx.pfp_feature_version >= 42))
1285 adev->gfx.me_fw_write_wait = true;
1286
1287 if ((adev->gfx.mec_fw_version >= 0x00000193) &&
1288 (adev->gfx.mec_feature_version >= 42))
1289 adev->gfx.mec_fw_write_wait = true;
1290 break;
1291 case IP_VERSION(9, 2, 1):
1292 if ((adev->gfx.me_fw_version >= 0x0000009c) &&
1293 (adev->gfx.me_feature_version >= 44) &&
1294 (adev->gfx.pfp_fw_version >= 0x000000b2) &&
1295 (adev->gfx.pfp_feature_version >= 44))
1296 adev->gfx.me_fw_write_wait = true;
1297
1298 if ((adev->gfx.mec_fw_version >= 0x00000196) &&
1299 (adev->gfx.mec_feature_version >= 44))
1300 adev->gfx.mec_fw_write_wait = true;
1301 break;
1302 case IP_VERSION(9, 4, 0):
1303 if ((adev->gfx.me_fw_version >= 0x0000009c) &&
1304 (adev->gfx.me_feature_version >= 44) &&
1305 (adev->gfx.pfp_fw_version >= 0x000000b2) &&
1306 (adev->gfx.pfp_feature_version >= 44))
1307 adev->gfx.me_fw_write_wait = true;
1308
1309 if ((adev->gfx.mec_fw_version >= 0x00000197) &&
1310 (adev->gfx.mec_feature_version >= 44))
1311 adev->gfx.mec_fw_write_wait = true;
1312 break;
1313 case IP_VERSION(9, 1, 0):
1314 case IP_VERSION(9, 2, 2):
1315 if ((adev->gfx.me_fw_version >= 0x0000009c) &&
1316 (adev->gfx.me_feature_version >= 42) &&
1317 (adev->gfx.pfp_fw_version >= 0x000000b1) &&
1318 (adev->gfx.pfp_feature_version >= 42))
1319 adev->gfx.me_fw_write_wait = true;
1320
1321 if ((adev->gfx.mec_fw_version >= 0x00000192) &&
1322 (adev->gfx.mec_feature_version >= 42))
1323 adev->gfx.mec_fw_write_wait = true;
1324 break;
1325 default:
1326 adev->gfx.me_fw_write_wait = true;
1327 adev->gfx.mec_fw_write_wait = true;
1328 break;
1329 }
1330 }
1331
1332 struct amdgpu_gfxoff_quirk {
1333 u16 chip_vendor;
1334 u16 chip_device;
1335 u16 subsys_vendor;
1336 u16 subsys_device;
1337 u8 revision;
1338 };
1339
1340 static const struct amdgpu_gfxoff_quirk amdgpu_gfxoff_quirk_list[] = {
1341 /* https://bugzilla.kernel.org/show_bug.cgi?id=204689 */
1342 { 0x1002, 0x15dd, 0x1002, 0x15dd, 0xc8 },
1343 /* https://bugzilla.kernel.org/show_bug.cgi?id=207171 */
1344 { 0x1002, 0x15dd, 0x103c, 0x83e7, 0xd3 },
1345 /* GFXOFF is unstable on C6 parts with a VBIOS 113-RAVEN-114 */
1346 { 0x1002, 0x15dd, 0x1002, 0x15dd, 0xc6 },
1347 /* Apple MacBook Pro (15-inch, 2019) Radeon Pro Vega 20 4 GB */
1348 { 0x1002, 0x69af, 0x106b, 0x019a, 0xc0 },
1349 /* https://bbs.openkylin.top/t/topic/171497 */
1350 { 0x1002, 0x15d8, 0x19e5, 0x3e14, 0xc2 },
1351 /* HP 705G4 DM with R5 2400G */
1352 { 0x1002, 0x15dd, 0x103c, 0x8464, 0xd6 },
1353 { 0, 0, 0, 0, 0 },
1354 };
1355
gfx_v9_0_should_disable_gfxoff(struct pci_dev * pdev)1356 static bool gfx_v9_0_should_disable_gfxoff(struct pci_dev *pdev)
1357 {
1358 const struct amdgpu_gfxoff_quirk *p = amdgpu_gfxoff_quirk_list;
1359
1360 while (p && p->chip_device != 0) {
1361 if (pdev->vendor == p->chip_vendor &&
1362 pdev->device == p->chip_device &&
1363 pdev->subsystem_vendor == p->subsys_vendor &&
1364 pdev->subsystem_device == p->subsys_device &&
1365 pdev->revision == p->revision) {
1366 return true;
1367 }
1368 ++p;
1369 }
1370 return false;
1371 }
1372
is_raven_kicker(struct amdgpu_device * adev)1373 static bool is_raven_kicker(struct amdgpu_device *adev)
1374 {
1375 if (adev->pm.fw_version >= 0x41e2b)
1376 return true;
1377 else
1378 return false;
1379 }
1380
check_if_enlarge_doorbell_range(struct amdgpu_device * adev)1381 static bool check_if_enlarge_doorbell_range(struct amdgpu_device *adev)
1382 {
1383 if ((amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 3, 0)) &&
1384 (adev->gfx.me_fw_version >= 0x000000a5) &&
1385 (adev->gfx.me_feature_version >= 52))
1386 return true;
1387 else
1388 return false;
1389 }
1390
gfx_v9_0_check_if_need_gfxoff(struct amdgpu_device * adev)1391 static void gfx_v9_0_check_if_need_gfxoff(struct amdgpu_device *adev)
1392 {
1393 if (gfx_v9_0_should_disable_gfxoff(adev->pdev))
1394 adev->pm.pp_feature &= ~PP_GFXOFF_MASK;
1395
1396 switch (amdgpu_ip_version(adev, GC_HWIP, 0)) {
1397 case IP_VERSION(9, 0, 1):
1398 case IP_VERSION(9, 2, 1):
1399 case IP_VERSION(9, 4, 0):
1400 break;
1401 case IP_VERSION(9, 2, 2):
1402 case IP_VERSION(9, 1, 0):
1403 if (!((adev->apu_flags & AMD_APU_IS_RAVEN2) ||
1404 (adev->apu_flags & AMD_APU_IS_PICASSO)) &&
1405 ((!is_raven_kicker(adev) &&
1406 adev->gfx.rlc_fw_version < 531) ||
1407 (adev->gfx.rlc_feature_version < 1) ||
1408 !adev->gfx.rlc.is_rlc_v2_1))
1409 adev->pm.pp_feature &= ~PP_GFXOFF_MASK;
1410
1411 if (adev->pm.pp_feature & PP_GFXOFF_MASK)
1412 adev->pg_flags |= AMD_PG_SUPPORT_GFX_PG |
1413 AMD_PG_SUPPORT_CP |
1414 AMD_PG_SUPPORT_RLC_SMU_HS;
1415 break;
1416 case IP_VERSION(9, 3, 0):
1417 if (adev->pm.pp_feature & PP_GFXOFF_MASK)
1418 adev->pg_flags |= AMD_PG_SUPPORT_GFX_PG |
1419 AMD_PG_SUPPORT_CP |
1420 AMD_PG_SUPPORT_RLC_SMU_HS;
1421 break;
1422 default:
1423 break;
1424 }
1425 }
1426
gfx_v9_0_init_cp_gfx_microcode(struct amdgpu_device * adev,char * chip_name)1427 static int gfx_v9_0_init_cp_gfx_microcode(struct amdgpu_device *adev,
1428 char *chip_name)
1429 {
1430 int err;
1431
1432 err = amdgpu_ucode_request(adev, &adev->gfx.pfp_fw,
1433 AMDGPU_UCODE_REQUIRED,
1434 "amdgpu/%s_pfp.bin", chip_name);
1435 if (err)
1436 goto out;
1437 amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_PFP);
1438
1439 err = amdgpu_ucode_request(adev, &adev->gfx.me_fw,
1440 AMDGPU_UCODE_REQUIRED,
1441 "amdgpu/%s_me.bin", chip_name);
1442 if (err)
1443 goto out;
1444 amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_ME);
1445
1446 err = amdgpu_ucode_request(adev, &adev->gfx.ce_fw,
1447 AMDGPU_UCODE_REQUIRED,
1448 "amdgpu/%s_ce.bin", chip_name);
1449 if (err)
1450 goto out;
1451 amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_CE);
1452
1453 out:
1454 if (err) {
1455 amdgpu_ucode_release(&adev->gfx.pfp_fw);
1456 amdgpu_ucode_release(&adev->gfx.me_fw);
1457 amdgpu_ucode_release(&adev->gfx.ce_fw);
1458 }
1459 return err;
1460 }
1461
gfx_v9_0_init_rlc_microcode(struct amdgpu_device * adev,char * chip_name)1462 static int gfx_v9_0_init_rlc_microcode(struct amdgpu_device *adev,
1463 char *chip_name)
1464 {
1465 int err;
1466 const struct rlc_firmware_header_v2_0 *rlc_hdr;
1467 uint16_t version_major;
1468 uint16_t version_minor;
1469 uint32_t smu_version;
1470
1471 /*
1472 * For Picasso && AM4 SOCKET board, we use picasso_rlc_am4.bin
1473 * instead of picasso_rlc.bin.
1474 * Judgment method:
1475 * PCO AM4: revision >= 0xC8 && revision <= 0xCF
1476 * or revision >= 0xD8 && revision <= 0xDF
1477 * otherwise is PCO FP5
1478 */
1479 if (!strcmp(chip_name, "picasso") &&
1480 (((adev->pdev->revision >= 0xC8) && (adev->pdev->revision <= 0xCF)) ||
1481 ((adev->pdev->revision >= 0xD8) && (adev->pdev->revision <= 0xDF))))
1482 err = amdgpu_ucode_request(adev, &adev->gfx.rlc_fw,
1483 AMDGPU_UCODE_REQUIRED,
1484 "amdgpu/%s_rlc_am4.bin", chip_name);
1485 else if (!strcmp(chip_name, "raven") && (amdgpu_pm_load_smu_firmware(adev, &smu_version) == 0) &&
1486 (smu_version >= 0x41e2b))
1487 /**
1488 *SMC is loaded by SBIOS on APU and it's able to get the SMU version directly.
1489 */
1490 err = amdgpu_ucode_request(adev, &adev->gfx.rlc_fw,
1491 AMDGPU_UCODE_REQUIRED,
1492 "amdgpu/%s_kicker_rlc.bin", chip_name);
1493 else
1494 err = amdgpu_ucode_request(adev, &adev->gfx.rlc_fw,
1495 AMDGPU_UCODE_REQUIRED,
1496 "amdgpu/%s_rlc.bin", chip_name);
1497 if (err)
1498 goto out;
1499
1500 rlc_hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data;
1501 version_major = le16_to_cpu(rlc_hdr->header.header_version_major);
1502 version_minor = le16_to_cpu(rlc_hdr->header.header_version_minor);
1503 err = amdgpu_gfx_rlc_init_microcode(adev, version_major, version_minor);
1504 out:
1505 if (err)
1506 amdgpu_ucode_release(&adev->gfx.rlc_fw);
1507
1508 return err;
1509 }
1510
gfx_v9_0_load_mec2_fw_bin_support(struct amdgpu_device * adev)1511 static bool gfx_v9_0_load_mec2_fw_bin_support(struct amdgpu_device *adev)
1512 {
1513 if (amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 2) ||
1514 amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 1) ||
1515 amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 3, 0))
1516 return false;
1517
1518 return true;
1519 }
1520
gfx_v9_0_init_cp_compute_microcode(struct amdgpu_device * adev,char * chip_name)1521 static int gfx_v9_0_init_cp_compute_microcode(struct amdgpu_device *adev,
1522 char *chip_name)
1523 {
1524 int err;
1525
1526 if (amdgpu_sriov_vf(adev) && (adev->asic_type == CHIP_ALDEBARAN))
1527 err = amdgpu_ucode_request(adev, &adev->gfx.mec_fw,
1528 AMDGPU_UCODE_REQUIRED,
1529 "amdgpu/%s_sjt_mec.bin", chip_name);
1530 else
1531 err = amdgpu_ucode_request(adev, &adev->gfx.mec_fw,
1532 AMDGPU_UCODE_REQUIRED,
1533 "amdgpu/%s_mec.bin", chip_name);
1534 if (err)
1535 goto out;
1536
1537 amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_MEC1);
1538 amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_MEC1_JT);
1539
1540 if (gfx_v9_0_load_mec2_fw_bin_support(adev)) {
1541 if (amdgpu_sriov_vf(adev) && (adev->asic_type == CHIP_ALDEBARAN))
1542 err = amdgpu_ucode_request(adev, &adev->gfx.mec2_fw,
1543 AMDGPU_UCODE_REQUIRED,
1544 "amdgpu/%s_sjt_mec2.bin", chip_name);
1545 else
1546 err = amdgpu_ucode_request(adev, &adev->gfx.mec2_fw,
1547 AMDGPU_UCODE_REQUIRED,
1548 "amdgpu/%s_mec2.bin", chip_name);
1549 if (!err) {
1550 amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_MEC2);
1551 amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_MEC2_JT);
1552 } else {
1553 err = 0;
1554 amdgpu_ucode_release(&adev->gfx.mec2_fw);
1555 }
1556 } else {
1557 adev->gfx.mec2_fw_version = adev->gfx.mec_fw_version;
1558 adev->gfx.mec2_feature_version = adev->gfx.mec_feature_version;
1559 }
1560
1561 gfx_v9_0_check_if_need_gfxoff(adev);
1562 gfx_v9_0_check_fw_write_wait(adev);
1563
1564 out:
1565 if (err)
1566 amdgpu_ucode_release(&adev->gfx.mec_fw);
1567 return err;
1568 }
1569
gfx_v9_0_init_microcode(struct amdgpu_device * adev)1570 static int gfx_v9_0_init_microcode(struct amdgpu_device *adev)
1571 {
1572 char ucode_prefix[30];
1573 int r;
1574
1575 DRM_DEBUG("\n");
1576 amdgpu_ucode_ip_version_decode(adev, GC_HWIP, ucode_prefix, sizeof(ucode_prefix));
1577
1578 /* No CPG in Arcturus */
1579 if (adev->gfx.num_gfx_rings) {
1580 r = gfx_v9_0_init_cp_gfx_microcode(adev, ucode_prefix);
1581 if (r)
1582 return r;
1583 }
1584
1585 r = gfx_v9_0_init_rlc_microcode(adev, ucode_prefix);
1586 if (r)
1587 return r;
1588
1589 r = gfx_v9_0_init_cp_compute_microcode(adev, ucode_prefix);
1590 if (r)
1591 return r;
1592
1593 return r;
1594 }
1595
gfx_v9_0_get_csb_size(struct amdgpu_device * adev)1596 static u32 gfx_v9_0_get_csb_size(struct amdgpu_device *adev)
1597 {
1598 u32 count = 0;
1599 const struct cs_section_def *sect = NULL;
1600 const struct cs_extent_def *ext = NULL;
1601
1602 /* begin clear state */
1603 count += 2;
1604 /* context control state */
1605 count += 3;
1606
1607 for (sect = gfx9_cs_data; sect->section != NULL; ++sect) {
1608 for (ext = sect->section; ext->extent != NULL; ++ext) {
1609 if (sect->id == SECT_CONTEXT)
1610 count += 2 + ext->reg_count;
1611 else
1612 return 0;
1613 }
1614 }
1615
1616 /* end clear state */
1617 count += 2;
1618 /* clear state */
1619 count += 2;
1620
1621 return count;
1622 }
1623
gfx_v9_0_get_csb_buffer(struct amdgpu_device * adev,volatile u32 * buffer)1624 static void gfx_v9_0_get_csb_buffer(struct amdgpu_device *adev,
1625 volatile u32 *buffer)
1626 {
1627 u32 count = 0, i;
1628 const struct cs_section_def *sect = NULL;
1629 const struct cs_extent_def *ext = NULL;
1630
1631 if (adev->gfx.rlc.cs_data == NULL)
1632 return;
1633 if (buffer == NULL)
1634 return;
1635
1636 buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
1637 buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
1638
1639 buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CONTEXT_CONTROL, 1));
1640 buffer[count++] = cpu_to_le32(0x80000000);
1641 buffer[count++] = cpu_to_le32(0x80000000);
1642
1643 for (sect = adev->gfx.rlc.cs_data; sect->section != NULL; ++sect) {
1644 for (ext = sect->section; ext->extent != NULL; ++ext) {
1645 if (sect->id == SECT_CONTEXT) {
1646 buffer[count++] =
1647 cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, ext->reg_count));
1648 buffer[count++] = cpu_to_le32(ext->reg_index -
1649 PACKET3_SET_CONTEXT_REG_START);
1650 for (i = 0; i < ext->reg_count; i++)
1651 buffer[count++] = cpu_to_le32(ext->extent[i]);
1652 } else {
1653 return;
1654 }
1655 }
1656 }
1657
1658 buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
1659 buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_END_CLEAR_STATE);
1660
1661 buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CLEAR_STATE, 0));
1662 buffer[count++] = cpu_to_le32(0);
1663 }
1664
gfx_v9_0_init_always_on_cu_mask(struct amdgpu_device * adev)1665 static void gfx_v9_0_init_always_on_cu_mask(struct amdgpu_device *adev)
1666 {
1667 struct amdgpu_cu_info *cu_info = &adev->gfx.cu_info;
1668 uint32_t pg_always_on_cu_num = 2;
1669 uint32_t always_on_cu_num;
1670 uint32_t i, j, k;
1671 uint32_t mask, cu_bitmap, counter;
1672
1673 if (adev->flags & AMD_IS_APU)
1674 always_on_cu_num = 4;
1675 else if (amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 2, 1))
1676 always_on_cu_num = 8;
1677 else
1678 always_on_cu_num = 12;
1679
1680 mutex_lock(&adev->grbm_idx_mutex);
1681 for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
1682 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
1683 mask = 1;
1684 cu_bitmap = 0;
1685 counter = 0;
1686 amdgpu_gfx_select_se_sh(adev, i, j, 0xffffffff, 0);
1687
1688 for (k = 0; k < adev->gfx.config.max_cu_per_sh; k ++) {
1689 if (cu_info->bitmap[0][i][j] & mask) {
1690 if (counter == pg_always_on_cu_num)
1691 WREG32_SOC15(GC, 0, mmRLC_PG_ALWAYS_ON_CU_MASK, cu_bitmap);
1692 if (counter < always_on_cu_num)
1693 cu_bitmap |= mask;
1694 else
1695 break;
1696 counter++;
1697 }
1698 mask <<= 1;
1699 }
1700
1701 WREG32_SOC15(GC, 0, mmRLC_LB_ALWAYS_ACTIVE_CU_MASK, cu_bitmap);
1702 cu_info->ao_cu_bitmap[i][j] = cu_bitmap;
1703 }
1704 }
1705 amdgpu_gfx_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff, 0);
1706 mutex_unlock(&adev->grbm_idx_mutex);
1707 }
1708
gfx_v9_0_init_lbpw(struct amdgpu_device * adev)1709 static void gfx_v9_0_init_lbpw(struct amdgpu_device *adev)
1710 {
1711 uint32_t data;
1712
1713 /* set mmRLC_LB_THR_CONFIG_1/2/3/4 */
1714 WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_1, 0x0000007F);
1715 WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_2, 0x0333A5A7);
1716 WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_3, 0x00000077);
1717 WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_4, (0x30 | 0x40 << 8 | 0x02FA << 16));
1718
1719 /* set mmRLC_LB_CNTR_INIT = 0x0000_0000 */
1720 WREG32_SOC15(GC, 0, mmRLC_LB_CNTR_INIT, 0x00000000);
1721
1722 /* set mmRLC_LB_CNTR_MAX = 0x0000_0500 */
1723 WREG32_SOC15(GC, 0, mmRLC_LB_CNTR_MAX, 0x00000500);
1724
1725 mutex_lock(&adev->grbm_idx_mutex);
1726 /* set mmRLC_LB_INIT_CU_MASK thru broadcast mode to enable all SE/SH*/
1727 amdgpu_gfx_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff, 0);
1728 WREG32_SOC15(GC, 0, mmRLC_LB_INIT_CU_MASK, 0xffffffff);
1729
1730 /* set mmRLC_LB_PARAMS = 0x003F_1006 */
1731 data = REG_SET_FIELD(0, RLC_LB_PARAMS, FIFO_SAMPLES, 0x0003);
1732 data |= REG_SET_FIELD(data, RLC_LB_PARAMS, PG_IDLE_SAMPLES, 0x0010);
1733 data |= REG_SET_FIELD(data, RLC_LB_PARAMS, PG_IDLE_SAMPLE_INTERVAL, 0x033F);
1734 WREG32_SOC15(GC, 0, mmRLC_LB_PARAMS, data);
1735
1736 /* set mmRLC_GPM_GENERAL_7[31-16] = 0x00C0 */
1737 data = RREG32_SOC15(GC, 0, mmRLC_GPM_GENERAL_7);
1738 data &= 0x0000FFFF;
1739 data |= 0x00C00000;
1740 WREG32_SOC15(GC, 0, mmRLC_GPM_GENERAL_7, data);
1741
1742 /*
1743 * RLC_LB_ALWAYS_ACTIVE_CU_MASK = 0xF (4 CUs AON for Raven),
1744 * programmed in gfx_v9_0_init_always_on_cu_mask()
1745 */
1746
1747 /* set RLC_LB_CNTL = 0x8000_0095, 31 bit is reserved,
1748 * but used for RLC_LB_CNTL configuration */
1749 data = RLC_LB_CNTL__LB_CNT_SPIM_ACTIVE_MASK;
1750 data |= REG_SET_FIELD(data, RLC_LB_CNTL, CU_MASK_USED_OFF_HYST, 0x09);
1751 data |= REG_SET_FIELD(data, RLC_LB_CNTL, RESERVED, 0x80000);
1752 WREG32_SOC15(GC, 0, mmRLC_LB_CNTL, data);
1753 mutex_unlock(&adev->grbm_idx_mutex);
1754
1755 gfx_v9_0_init_always_on_cu_mask(adev);
1756 }
1757
gfx_v9_4_init_lbpw(struct amdgpu_device * adev)1758 static void gfx_v9_4_init_lbpw(struct amdgpu_device *adev)
1759 {
1760 uint32_t data;
1761
1762 /* set mmRLC_LB_THR_CONFIG_1/2/3/4 */
1763 WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_1, 0x0000007F);
1764 WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_2, 0x033388F8);
1765 WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_3, 0x00000077);
1766 WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_4, (0x10 | 0x27 << 8 | 0x02FA << 16));
1767
1768 /* set mmRLC_LB_CNTR_INIT = 0x0000_0000 */
1769 WREG32_SOC15(GC, 0, mmRLC_LB_CNTR_INIT, 0x00000000);
1770
1771 /* set mmRLC_LB_CNTR_MAX = 0x0000_0500 */
1772 WREG32_SOC15(GC, 0, mmRLC_LB_CNTR_MAX, 0x00000800);
1773
1774 mutex_lock(&adev->grbm_idx_mutex);
1775 /* set mmRLC_LB_INIT_CU_MASK thru broadcast mode to enable all SE/SH*/
1776 amdgpu_gfx_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff, 0);
1777 WREG32_SOC15(GC, 0, mmRLC_LB_INIT_CU_MASK, 0xffffffff);
1778
1779 /* set mmRLC_LB_PARAMS = 0x003F_1006 */
1780 data = REG_SET_FIELD(0, RLC_LB_PARAMS, FIFO_SAMPLES, 0x0003);
1781 data |= REG_SET_FIELD(data, RLC_LB_PARAMS, PG_IDLE_SAMPLES, 0x0010);
1782 data |= REG_SET_FIELD(data, RLC_LB_PARAMS, PG_IDLE_SAMPLE_INTERVAL, 0x033F);
1783 WREG32_SOC15(GC, 0, mmRLC_LB_PARAMS, data);
1784
1785 /* set mmRLC_GPM_GENERAL_7[31-16] = 0x00C0 */
1786 data = RREG32_SOC15(GC, 0, mmRLC_GPM_GENERAL_7);
1787 data &= 0x0000FFFF;
1788 data |= 0x00C00000;
1789 WREG32_SOC15(GC, 0, mmRLC_GPM_GENERAL_7, data);
1790
1791 /*
1792 * RLC_LB_ALWAYS_ACTIVE_CU_MASK = 0xFFF (12 CUs AON),
1793 * programmed in gfx_v9_0_init_always_on_cu_mask()
1794 */
1795
1796 /* set RLC_LB_CNTL = 0x8000_0095, 31 bit is reserved,
1797 * but used for RLC_LB_CNTL configuration */
1798 data = RLC_LB_CNTL__LB_CNT_SPIM_ACTIVE_MASK;
1799 data |= REG_SET_FIELD(data, RLC_LB_CNTL, CU_MASK_USED_OFF_HYST, 0x09);
1800 data |= REG_SET_FIELD(data, RLC_LB_CNTL, RESERVED, 0x80000);
1801 WREG32_SOC15(GC, 0, mmRLC_LB_CNTL, data);
1802 mutex_unlock(&adev->grbm_idx_mutex);
1803
1804 gfx_v9_0_init_always_on_cu_mask(adev);
1805 }
1806
gfx_v9_0_enable_lbpw(struct amdgpu_device * adev,bool enable)1807 static void gfx_v9_0_enable_lbpw(struct amdgpu_device *adev, bool enable)
1808 {
1809 WREG32_FIELD15(GC, 0, RLC_LB_CNTL, LOAD_BALANCE_ENABLE, enable ? 1 : 0);
1810 }
1811
gfx_v9_0_cp_jump_table_num(struct amdgpu_device * adev)1812 static int gfx_v9_0_cp_jump_table_num(struct amdgpu_device *adev)
1813 {
1814 if (gfx_v9_0_load_mec2_fw_bin_support(adev))
1815 return 5;
1816 else
1817 return 4;
1818 }
1819
gfx_v9_0_init_rlcg_reg_access_ctrl(struct amdgpu_device * adev)1820 static void gfx_v9_0_init_rlcg_reg_access_ctrl(struct amdgpu_device *adev)
1821 {
1822 struct amdgpu_rlcg_reg_access_ctrl *reg_access_ctrl;
1823
1824 reg_access_ctrl = &adev->gfx.rlc.reg_access_ctrl[0];
1825 reg_access_ctrl->scratch_reg0 = SOC15_REG_OFFSET(GC, 0, mmSCRATCH_REG0);
1826 reg_access_ctrl->scratch_reg1 = SOC15_REG_OFFSET(GC, 0, mmSCRATCH_REG1);
1827 reg_access_ctrl->scratch_reg2 = SOC15_REG_OFFSET(GC, 0, mmSCRATCH_REG2);
1828 reg_access_ctrl->scratch_reg3 = SOC15_REG_OFFSET(GC, 0, mmSCRATCH_REG3);
1829 reg_access_ctrl->grbm_cntl = SOC15_REG_OFFSET(GC, 0, mmGRBM_GFX_CNTL);
1830 reg_access_ctrl->grbm_idx = SOC15_REG_OFFSET(GC, 0, mmGRBM_GFX_INDEX);
1831 reg_access_ctrl->spare_int = SOC15_REG_OFFSET(GC, 0, mmRLC_SPARE_INT);
1832 adev->gfx.rlc.rlcg_reg_access_supported = true;
1833 }
1834
gfx_v9_0_rlc_init(struct amdgpu_device * adev)1835 static int gfx_v9_0_rlc_init(struct amdgpu_device *adev)
1836 {
1837 const struct cs_section_def *cs_data;
1838 int r;
1839
1840 adev->gfx.rlc.cs_data = gfx9_cs_data;
1841
1842 cs_data = adev->gfx.rlc.cs_data;
1843
1844 if (cs_data) {
1845 /* init clear state block */
1846 r = amdgpu_gfx_rlc_init_csb(adev);
1847 if (r)
1848 return r;
1849 }
1850
1851 if (adev->flags & AMD_IS_APU) {
1852 /* TODO: double check the cp_table_size for RV */
1853 adev->gfx.rlc.cp_table_size = ALIGN(96 * 5 * 4, 2048) + (64 * 1024); /* JT + GDS */
1854 r = amdgpu_gfx_rlc_init_cpt(adev);
1855 if (r)
1856 return r;
1857 }
1858
1859 return 0;
1860 }
1861
gfx_v9_0_mec_fini(struct amdgpu_device * adev)1862 static void gfx_v9_0_mec_fini(struct amdgpu_device *adev)
1863 {
1864 amdgpu_bo_free_kernel(&adev->gfx.mec.hpd_eop_obj, NULL, NULL);
1865 amdgpu_bo_free_kernel(&adev->gfx.mec.mec_fw_obj, NULL, NULL);
1866 }
1867
gfx_v9_0_mec_init(struct amdgpu_device * adev)1868 static int gfx_v9_0_mec_init(struct amdgpu_device *adev)
1869 {
1870 int r;
1871 u32 *hpd;
1872 const __le32 *fw_data;
1873 unsigned fw_size;
1874 u32 *fw;
1875 size_t mec_hpd_size;
1876
1877 const struct gfx_firmware_header_v1_0 *mec_hdr;
1878
1879 bitmap_zero(adev->gfx.mec_bitmap[0].queue_bitmap, AMDGPU_MAX_COMPUTE_QUEUES);
1880
1881 /* take ownership of the relevant compute queues */
1882 amdgpu_gfx_compute_queue_acquire(adev);
1883 mec_hpd_size = adev->gfx.num_compute_rings * GFX9_MEC_HPD_SIZE;
1884 if (mec_hpd_size) {
1885 r = amdgpu_bo_create_reserved(adev, mec_hpd_size, PAGE_SIZE,
1886 AMDGPU_GEM_DOMAIN_VRAM |
1887 AMDGPU_GEM_DOMAIN_GTT,
1888 &adev->gfx.mec.hpd_eop_obj,
1889 &adev->gfx.mec.hpd_eop_gpu_addr,
1890 (void **)&hpd);
1891 if (r) {
1892 dev_warn(adev->dev, "(%d) create HDP EOP bo failed\n", r);
1893 gfx_v9_0_mec_fini(adev);
1894 return r;
1895 }
1896
1897 memset(hpd, 0, mec_hpd_size);
1898
1899 amdgpu_bo_kunmap(adev->gfx.mec.hpd_eop_obj);
1900 amdgpu_bo_unreserve(adev->gfx.mec.hpd_eop_obj);
1901 }
1902
1903 mec_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
1904
1905 fw_data = (const __le32 *)
1906 (adev->gfx.mec_fw->data +
1907 le32_to_cpu(mec_hdr->header.ucode_array_offset_bytes));
1908 fw_size = le32_to_cpu(mec_hdr->header.ucode_size_bytes);
1909
1910 r = amdgpu_bo_create_reserved(adev, mec_hdr->header.ucode_size_bytes,
1911 PAGE_SIZE, AMDGPU_GEM_DOMAIN_GTT,
1912 &adev->gfx.mec.mec_fw_obj,
1913 &adev->gfx.mec.mec_fw_gpu_addr,
1914 (void **)&fw);
1915 if (r) {
1916 dev_warn(adev->dev, "(%d) create mec firmware bo failed\n", r);
1917 gfx_v9_0_mec_fini(adev);
1918 return r;
1919 }
1920
1921 memcpy(fw, fw_data, fw_size);
1922
1923 amdgpu_bo_kunmap(adev->gfx.mec.mec_fw_obj);
1924 amdgpu_bo_unreserve(adev->gfx.mec.mec_fw_obj);
1925
1926 return 0;
1927 }
1928
wave_read_ind(struct amdgpu_device * adev,uint32_t simd,uint32_t wave,uint32_t address)1929 static uint32_t wave_read_ind(struct amdgpu_device *adev, uint32_t simd, uint32_t wave, uint32_t address)
1930 {
1931 WREG32_SOC15_RLC(GC, 0, mmSQ_IND_INDEX,
1932 (wave << SQ_IND_INDEX__WAVE_ID__SHIFT) |
1933 (simd << SQ_IND_INDEX__SIMD_ID__SHIFT) |
1934 (address << SQ_IND_INDEX__INDEX__SHIFT) |
1935 (SQ_IND_INDEX__FORCE_READ_MASK));
1936 return RREG32_SOC15(GC, 0, mmSQ_IND_DATA);
1937 }
1938
wave_read_regs(struct amdgpu_device * adev,uint32_t simd,uint32_t wave,uint32_t thread,uint32_t regno,uint32_t num,uint32_t * out)1939 static void wave_read_regs(struct amdgpu_device *adev, uint32_t simd,
1940 uint32_t wave, uint32_t thread,
1941 uint32_t regno, uint32_t num, uint32_t *out)
1942 {
1943 WREG32_SOC15_RLC(GC, 0, mmSQ_IND_INDEX,
1944 (wave << SQ_IND_INDEX__WAVE_ID__SHIFT) |
1945 (simd << SQ_IND_INDEX__SIMD_ID__SHIFT) |
1946 (regno << SQ_IND_INDEX__INDEX__SHIFT) |
1947 (thread << SQ_IND_INDEX__THREAD_ID__SHIFT) |
1948 (SQ_IND_INDEX__FORCE_READ_MASK) |
1949 (SQ_IND_INDEX__AUTO_INCR_MASK));
1950 while (num--)
1951 *(out++) = RREG32_SOC15(GC, 0, mmSQ_IND_DATA);
1952 }
1953
gfx_v9_0_read_wave_data(struct amdgpu_device * adev,uint32_t xcc_id,uint32_t simd,uint32_t wave,uint32_t * dst,int * no_fields)1954 static void gfx_v9_0_read_wave_data(struct amdgpu_device *adev, uint32_t xcc_id, uint32_t simd, uint32_t wave, uint32_t *dst, int *no_fields)
1955 {
1956 /* type 1 wave data */
1957 dst[(*no_fields)++] = 1;
1958 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_STATUS);
1959 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_PC_LO);
1960 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_PC_HI);
1961 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_EXEC_LO);
1962 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_EXEC_HI);
1963 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_HW_ID);
1964 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_INST_DW0);
1965 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_INST_DW1);
1966 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_GPR_ALLOC);
1967 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_LDS_ALLOC);
1968 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TRAPSTS);
1969 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_IB_STS);
1970 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_IB_DBG0);
1971 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_M0);
1972 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_MODE);
1973 }
1974
gfx_v9_0_read_wave_sgprs(struct amdgpu_device * adev,uint32_t xcc_id,uint32_t simd,uint32_t wave,uint32_t start,uint32_t size,uint32_t * dst)1975 static void gfx_v9_0_read_wave_sgprs(struct amdgpu_device *adev, uint32_t xcc_id, uint32_t simd,
1976 uint32_t wave, uint32_t start,
1977 uint32_t size, uint32_t *dst)
1978 {
1979 wave_read_regs(
1980 adev, simd, wave, 0,
1981 start + SQIND_WAVE_SGPRS_OFFSET, size, dst);
1982 }
1983
gfx_v9_0_read_wave_vgprs(struct amdgpu_device * adev,uint32_t xcc_id,uint32_t simd,uint32_t wave,uint32_t thread,uint32_t start,uint32_t size,uint32_t * dst)1984 static void gfx_v9_0_read_wave_vgprs(struct amdgpu_device *adev, uint32_t xcc_id, uint32_t simd,
1985 uint32_t wave, uint32_t thread,
1986 uint32_t start, uint32_t size,
1987 uint32_t *dst)
1988 {
1989 wave_read_regs(
1990 adev, simd, wave, thread,
1991 start + SQIND_WAVE_VGPRS_OFFSET, size, dst);
1992 }
1993
gfx_v9_0_select_me_pipe_q(struct amdgpu_device * adev,u32 me,u32 pipe,u32 q,u32 vm,u32 xcc_id)1994 static void gfx_v9_0_select_me_pipe_q(struct amdgpu_device *adev,
1995 u32 me, u32 pipe, u32 q, u32 vm, u32 xcc_id)
1996 {
1997 soc15_grbm_select(adev, me, pipe, q, vm, 0);
1998 }
1999
2000 static const struct amdgpu_gfx_funcs gfx_v9_0_gfx_funcs = {
2001 .get_gpu_clock_counter = &gfx_v9_0_get_gpu_clock_counter,
2002 .select_se_sh = &gfx_v9_0_select_se_sh,
2003 .read_wave_data = &gfx_v9_0_read_wave_data,
2004 .read_wave_sgprs = &gfx_v9_0_read_wave_sgprs,
2005 .read_wave_vgprs = &gfx_v9_0_read_wave_vgprs,
2006 .select_me_pipe_q = &gfx_v9_0_select_me_pipe_q,
2007 };
2008
2009 const struct amdgpu_ras_block_hw_ops gfx_v9_0_ras_ops = {
2010 .ras_error_inject = &gfx_v9_0_ras_error_inject,
2011 .query_ras_error_count = &gfx_v9_0_query_ras_error_count,
2012 .reset_ras_error_count = &gfx_v9_0_reset_ras_error_count,
2013 };
2014
2015 static struct amdgpu_gfx_ras gfx_v9_0_ras = {
2016 .ras_block = {
2017 .hw_ops = &gfx_v9_0_ras_ops,
2018 },
2019 };
2020
gfx_v9_0_gpu_early_init(struct amdgpu_device * adev)2021 static int gfx_v9_0_gpu_early_init(struct amdgpu_device *adev)
2022 {
2023 u32 gb_addr_config;
2024 int err;
2025
2026 switch (amdgpu_ip_version(adev, GC_HWIP, 0)) {
2027 case IP_VERSION(9, 0, 1):
2028 adev->gfx.config.max_hw_contexts = 8;
2029 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
2030 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
2031 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
2032 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0;
2033 gb_addr_config = VEGA10_GB_ADDR_CONFIG_GOLDEN;
2034 break;
2035 case IP_VERSION(9, 2, 1):
2036 adev->gfx.config.max_hw_contexts = 8;
2037 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
2038 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
2039 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
2040 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0;
2041 gb_addr_config = VEGA12_GB_ADDR_CONFIG_GOLDEN;
2042 DRM_INFO("fix gfx.config for vega12\n");
2043 break;
2044 case IP_VERSION(9, 4, 0):
2045 adev->gfx.ras = &gfx_v9_0_ras;
2046 adev->gfx.config.max_hw_contexts = 8;
2047 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
2048 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
2049 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
2050 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0;
2051 gb_addr_config = RREG32_SOC15(GC, 0, mmGB_ADDR_CONFIG);
2052 gb_addr_config &= ~0xf3e777ff;
2053 gb_addr_config |= 0x22014042;
2054 /* check vbios table if gpu info is not available */
2055 err = amdgpu_atomfirmware_get_gfx_info(adev);
2056 if (err)
2057 return err;
2058 break;
2059 case IP_VERSION(9, 2, 2):
2060 case IP_VERSION(9, 1, 0):
2061 adev->gfx.config.max_hw_contexts = 8;
2062 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
2063 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
2064 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
2065 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0;
2066 if (adev->apu_flags & AMD_APU_IS_RAVEN2)
2067 gb_addr_config = RAVEN2_GB_ADDR_CONFIG_GOLDEN;
2068 else
2069 gb_addr_config = RAVEN_GB_ADDR_CONFIG_GOLDEN;
2070 break;
2071 case IP_VERSION(9, 4, 1):
2072 adev->gfx.ras = &gfx_v9_4_ras;
2073 adev->gfx.config.max_hw_contexts = 8;
2074 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
2075 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
2076 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
2077 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0;
2078 gb_addr_config = RREG32_SOC15(GC, 0, mmGB_ADDR_CONFIG);
2079 gb_addr_config &= ~0xf3e777ff;
2080 gb_addr_config |= 0x22014042;
2081 break;
2082 case IP_VERSION(9, 3, 0):
2083 adev->gfx.config.max_hw_contexts = 8;
2084 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
2085 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
2086 adev->gfx.config.sc_hiz_tile_fifo_size = 0x80;
2087 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0;
2088 gb_addr_config = RREG32_SOC15(GC, 0, mmGB_ADDR_CONFIG);
2089 gb_addr_config &= ~0xf3e777ff;
2090 gb_addr_config |= 0x22010042;
2091 break;
2092 case IP_VERSION(9, 4, 2):
2093 adev->gfx.ras = &gfx_v9_4_2_ras;
2094 adev->gfx.config.max_hw_contexts = 8;
2095 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
2096 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
2097 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
2098 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0;
2099 gb_addr_config = RREG32_SOC15(GC, 0, mmGB_ADDR_CONFIG);
2100 gb_addr_config &= ~0xf3e777ff;
2101 gb_addr_config |= 0x22014042;
2102 /* check vbios table if gpu info is not available */
2103 err = amdgpu_atomfirmware_get_gfx_info(adev);
2104 if (err)
2105 return err;
2106 break;
2107 default:
2108 BUG();
2109 break;
2110 }
2111
2112 adev->gfx.config.gb_addr_config = gb_addr_config;
2113
2114 adev->gfx.config.gb_addr_config_fields.num_pipes = 1 <<
2115 REG_GET_FIELD(
2116 adev->gfx.config.gb_addr_config,
2117 GB_ADDR_CONFIG,
2118 NUM_PIPES);
2119
2120 adev->gfx.config.max_tile_pipes =
2121 adev->gfx.config.gb_addr_config_fields.num_pipes;
2122
2123 adev->gfx.config.gb_addr_config_fields.num_banks = 1 <<
2124 REG_GET_FIELD(
2125 adev->gfx.config.gb_addr_config,
2126 GB_ADDR_CONFIG,
2127 NUM_BANKS);
2128 adev->gfx.config.gb_addr_config_fields.max_compress_frags = 1 <<
2129 REG_GET_FIELD(
2130 adev->gfx.config.gb_addr_config,
2131 GB_ADDR_CONFIG,
2132 MAX_COMPRESSED_FRAGS);
2133 adev->gfx.config.gb_addr_config_fields.num_rb_per_se = 1 <<
2134 REG_GET_FIELD(
2135 adev->gfx.config.gb_addr_config,
2136 GB_ADDR_CONFIG,
2137 NUM_RB_PER_SE);
2138 adev->gfx.config.gb_addr_config_fields.num_se = 1 <<
2139 REG_GET_FIELD(
2140 adev->gfx.config.gb_addr_config,
2141 GB_ADDR_CONFIG,
2142 NUM_SHADER_ENGINES);
2143 adev->gfx.config.gb_addr_config_fields.pipe_interleave_size = 1 << (8 +
2144 REG_GET_FIELD(
2145 adev->gfx.config.gb_addr_config,
2146 GB_ADDR_CONFIG,
2147 PIPE_INTERLEAVE_SIZE));
2148
2149 return 0;
2150 }
2151
gfx_v9_0_compute_ring_init(struct amdgpu_device * adev,int ring_id,int mec,int pipe,int queue)2152 static int gfx_v9_0_compute_ring_init(struct amdgpu_device *adev, int ring_id,
2153 int mec, int pipe, int queue)
2154 {
2155 unsigned irq_type;
2156 struct amdgpu_ring *ring = &adev->gfx.compute_ring[ring_id];
2157 unsigned int hw_prio;
2158
2159 ring = &adev->gfx.compute_ring[ring_id];
2160
2161 /* mec0 is me1 */
2162 ring->me = mec + 1;
2163 ring->pipe = pipe;
2164 ring->queue = queue;
2165
2166 ring->ring_obj = NULL;
2167 ring->use_doorbell = true;
2168 ring->doorbell_index = (adev->doorbell_index.mec_ring0 + ring_id) << 1;
2169 ring->eop_gpu_addr = adev->gfx.mec.hpd_eop_gpu_addr
2170 + (ring_id * GFX9_MEC_HPD_SIZE);
2171 ring->vm_hub = AMDGPU_GFXHUB(0);
2172 sprintf(ring->name, "comp_%d.%d.%d", ring->me, ring->pipe, ring->queue);
2173
2174 irq_type = AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP
2175 + ((ring->me - 1) * adev->gfx.mec.num_pipe_per_mec)
2176 + ring->pipe;
2177 hw_prio = amdgpu_gfx_is_high_priority_compute_queue(adev, ring) ?
2178 AMDGPU_RING_PRIO_2 : AMDGPU_RING_PRIO_DEFAULT;
2179 /* type-2 packets are deprecated on MEC, use type-3 instead */
2180 return amdgpu_ring_init(adev, ring, 1024, &adev->gfx.eop_irq, irq_type,
2181 hw_prio, NULL);
2182 }
2183
gfx_v9_0_alloc_ip_dump(struct amdgpu_device * adev)2184 static void gfx_v9_0_alloc_ip_dump(struct amdgpu_device *adev)
2185 {
2186 uint32_t reg_count = ARRAY_SIZE(gc_reg_list_9);
2187 uint32_t *ptr;
2188 uint32_t inst;
2189
2190 ptr = kcalloc(reg_count, sizeof(uint32_t), GFP_KERNEL);
2191 if (!ptr) {
2192 DRM_ERROR("Failed to allocate memory for GFX IP Dump\n");
2193 adev->gfx.ip_dump_core = NULL;
2194 } else {
2195 adev->gfx.ip_dump_core = ptr;
2196 }
2197
2198 /* Allocate memory for compute queue registers for all the instances */
2199 reg_count = ARRAY_SIZE(gc_cp_reg_list_9);
2200 inst = adev->gfx.mec.num_mec * adev->gfx.mec.num_pipe_per_mec *
2201 adev->gfx.mec.num_queue_per_pipe;
2202
2203 ptr = kcalloc(reg_count * inst, sizeof(uint32_t), GFP_KERNEL);
2204 if (!ptr) {
2205 DRM_ERROR("Failed to allocate memory for Compute Queues IP Dump\n");
2206 adev->gfx.ip_dump_compute_queues = NULL;
2207 } else {
2208 adev->gfx.ip_dump_compute_queues = ptr;
2209 }
2210 }
2211
gfx_v9_0_sw_init(struct amdgpu_ip_block * ip_block)2212 static int gfx_v9_0_sw_init(struct amdgpu_ip_block *ip_block)
2213 {
2214 int i, j, k, r, ring_id;
2215 int xcc_id = 0;
2216 struct amdgpu_ring *ring;
2217 struct amdgpu_device *adev = ip_block->adev;
2218 unsigned int hw_prio;
2219
2220 switch (amdgpu_ip_version(adev, GC_HWIP, 0)) {
2221 case IP_VERSION(9, 0, 1):
2222 case IP_VERSION(9, 2, 1):
2223 case IP_VERSION(9, 4, 0):
2224 case IP_VERSION(9, 2, 2):
2225 case IP_VERSION(9, 1, 0):
2226 case IP_VERSION(9, 4, 1):
2227 case IP_VERSION(9, 3, 0):
2228 case IP_VERSION(9, 4, 2):
2229 adev->gfx.mec.num_mec = 2;
2230 break;
2231 default:
2232 adev->gfx.mec.num_mec = 1;
2233 break;
2234 }
2235
2236 switch (amdgpu_ip_version(adev, GC_HWIP, 0)) {
2237 case IP_VERSION(9, 4, 2):
2238 adev->gfx.cleaner_shader_ptr = gfx_9_4_2_cleaner_shader_hex;
2239 adev->gfx.cleaner_shader_size = sizeof(gfx_9_4_2_cleaner_shader_hex);
2240 if (adev->gfx.mec_fw_version >= 88) {
2241 adev->gfx.enable_cleaner_shader = true;
2242 r = amdgpu_gfx_cleaner_shader_sw_init(adev, adev->gfx.cleaner_shader_size);
2243 if (r) {
2244 adev->gfx.enable_cleaner_shader = false;
2245 dev_err(adev->dev, "Failed to initialize cleaner shader\n");
2246 }
2247 }
2248 break;
2249 default:
2250 adev->gfx.enable_cleaner_shader = false;
2251 break;
2252 }
2253
2254 adev->gfx.mec.num_pipe_per_mec = 4;
2255 adev->gfx.mec.num_queue_per_pipe = 8;
2256
2257 /* EOP Event */
2258 r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_GRBM_CP, GFX_9_0__SRCID__CP_EOP_INTERRUPT, &adev->gfx.eop_irq);
2259 if (r)
2260 return r;
2261
2262 /* Bad opcode Event */
2263 r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_GRBM_CP,
2264 GFX_9_0__SRCID__CP_BAD_OPCODE_ERROR,
2265 &adev->gfx.bad_op_irq);
2266 if (r)
2267 return r;
2268
2269 /* Privileged reg */
2270 r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_GRBM_CP, GFX_9_0__SRCID__CP_PRIV_REG_FAULT,
2271 &adev->gfx.priv_reg_irq);
2272 if (r)
2273 return r;
2274
2275 /* Privileged inst */
2276 r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_GRBM_CP, GFX_9_0__SRCID__CP_PRIV_INSTR_FAULT,
2277 &adev->gfx.priv_inst_irq);
2278 if (r)
2279 return r;
2280
2281 /* ECC error */
2282 r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_GRBM_CP, GFX_9_0__SRCID__CP_ECC_ERROR,
2283 &adev->gfx.cp_ecc_error_irq);
2284 if (r)
2285 return r;
2286
2287 /* FUE error */
2288 r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_GRBM_CP, GFX_9_0__SRCID__CP_FUE_ERROR,
2289 &adev->gfx.cp_ecc_error_irq);
2290 if (r)
2291 return r;
2292
2293 adev->gfx.gfx_current_status = AMDGPU_GFX_NORMAL_MODE;
2294
2295 if (adev->gfx.rlc.funcs) {
2296 if (adev->gfx.rlc.funcs->init) {
2297 r = adev->gfx.rlc.funcs->init(adev);
2298 if (r) {
2299 dev_err(adev->dev, "Failed to init rlc BOs!\n");
2300 return r;
2301 }
2302 }
2303 }
2304
2305 r = gfx_v9_0_mec_init(adev);
2306 if (r) {
2307 DRM_ERROR("Failed to init MEC BOs!\n");
2308 return r;
2309 }
2310
2311 /* set up the gfx ring */
2312 for (i = 0; i < adev->gfx.num_gfx_rings; i++) {
2313 ring = &adev->gfx.gfx_ring[i];
2314 ring->ring_obj = NULL;
2315 if (!i)
2316 sprintf(ring->name, "gfx");
2317 else
2318 sprintf(ring->name, "gfx_%d", i);
2319 ring->use_doorbell = true;
2320 ring->doorbell_index = adev->doorbell_index.gfx_ring0 << 1;
2321
2322 /* disable scheduler on the real ring */
2323 ring->no_scheduler = adev->gfx.mcbp;
2324 ring->vm_hub = AMDGPU_GFXHUB(0);
2325 r = amdgpu_ring_init(adev, ring, 1024, &adev->gfx.eop_irq,
2326 AMDGPU_CP_IRQ_GFX_ME0_PIPE0_EOP,
2327 AMDGPU_RING_PRIO_DEFAULT, NULL);
2328 if (r)
2329 return r;
2330 }
2331
2332 /* set up the software rings */
2333 if (adev->gfx.mcbp && adev->gfx.num_gfx_rings) {
2334 for (i = 0; i < GFX9_NUM_SW_GFX_RINGS; i++) {
2335 ring = &adev->gfx.sw_gfx_ring[i];
2336 ring->ring_obj = NULL;
2337 sprintf(ring->name, amdgpu_sw_ring_name(i));
2338 ring->use_doorbell = true;
2339 ring->doorbell_index = adev->doorbell_index.gfx_ring0 << 1;
2340 ring->is_sw_ring = true;
2341 hw_prio = amdgpu_sw_ring_priority(i);
2342 ring->vm_hub = AMDGPU_GFXHUB(0);
2343 r = amdgpu_ring_init(adev, ring, 1024, &adev->gfx.eop_irq,
2344 AMDGPU_CP_IRQ_GFX_ME0_PIPE0_EOP, hw_prio,
2345 NULL);
2346 if (r)
2347 return r;
2348 ring->wptr = 0;
2349 }
2350
2351 /* init the muxer and add software rings */
2352 r = amdgpu_ring_mux_init(&adev->gfx.muxer, &adev->gfx.gfx_ring[0],
2353 GFX9_NUM_SW_GFX_RINGS);
2354 if (r) {
2355 DRM_ERROR("amdgpu_ring_mux_init failed(%d)\n", r);
2356 return r;
2357 }
2358 for (i = 0; i < GFX9_NUM_SW_GFX_RINGS; i++) {
2359 r = amdgpu_ring_mux_add_sw_ring(&adev->gfx.muxer,
2360 &adev->gfx.sw_gfx_ring[i]);
2361 if (r) {
2362 DRM_ERROR("amdgpu_ring_mux_add_sw_ring failed(%d)\n", r);
2363 return r;
2364 }
2365 }
2366 }
2367
2368 /* set up the compute queues - allocate horizontally across pipes */
2369 ring_id = 0;
2370 for (i = 0; i < adev->gfx.mec.num_mec; ++i) {
2371 for (j = 0; j < adev->gfx.mec.num_queue_per_pipe; j++) {
2372 for (k = 0; k < adev->gfx.mec.num_pipe_per_mec; k++) {
2373 if (!amdgpu_gfx_is_mec_queue_enabled(adev, 0, i,
2374 k, j))
2375 continue;
2376
2377 r = gfx_v9_0_compute_ring_init(adev,
2378 ring_id,
2379 i, k, j);
2380 if (r)
2381 return r;
2382
2383 ring_id++;
2384 }
2385 }
2386 }
2387
2388 /* TODO: Add queue reset mask when FW fully supports it */
2389 adev->gfx.gfx_supported_reset =
2390 amdgpu_get_soft_full_reset_mask(&adev->gfx.gfx_ring[0]);
2391 adev->gfx.compute_supported_reset =
2392 amdgpu_get_soft_full_reset_mask(&adev->gfx.compute_ring[0]);
2393
2394 r = amdgpu_gfx_kiq_init(adev, GFX9_MEC_HPD_SIZE, 0);
2395 if (r) {
2396 DRM_ERROR("Failed to init KIQ BOs!\n");
2397 return r;
2398 }
2399
2400 r = amdgpu_gfx_kiq_init_ring(adev, xcc_id);
2401 if (r)
2402 return r;
2403
2404 /* create MQD for all compute queues as wel as KIQ for SRIOV case */
2405 r = amdgpu_gfx_mqd_sw_init(adev, sizeof(struct v9_mqd_allocation), 0);
2406 if (r)
2407 return r;
2408
2409 adev->gfx.ce_ram_size = 0x8000;
2410
2411 r = gfx_v9_0_gpu_early_init(adev);
2412 if (r)
2413 return r;
2414
2415 if (amdgpu_gfx_ras_sw_init(adev)) {
2416 dev_err(adev->dev, "Failed to initialize gfx ras block!\n");
2417 return -EINVAL;
2418 }
2419
2420 gfx_v9_0_alloc_ip_dump(adev);
2421
2422 r = amdgpu_gfx_sysfs_init(adev);
2423 if (r)
2424 return r;
2425
2426 return 0;
2427 }
2428
2429
gfx_v9_0_sw_fini(struct amdgpu_ip_block * ip_block)2430 static int gfx_v9_0_sw_fini(struct amdgpu_ip_block *ip_block)
2431 {
2432 int i;
2433 struct amdgpu_device *adev = ip_block->adev;
2434
2435 if (adev->gfx.mcbp && adev->gfx.num_gfx_rings) {
2436 for (i = 0; i < GFX9_NUM_SW_GFX_RINGS; i++)
2437 amdgpu_ring_fini(&adev->gfx.sw_gfx_ring[i]);
2438 amdgpu_ring_mux_fini(&adev->gfx.muxer);
2439 }
2440
2441 for (i = 0; i < adev->gfx.num_gfx_rings; i++)
2442 amdgpu_ring_fini(&adev->gfx.gfx_ring[i]);
2443 for (i = 0; i < adev->gfx.num_compute_rings; i++)
2444 amdgpu_ring_fini(&adev->gfx.compute_ring[i]);
2445
2446 amdgpu_gfx_mqd_sw_fini(adev, 0);
2447 amdgpu_gfx_kiq_free_ring(&adev->gfx.kiq[0].ring);
2448 amdgpu_gfx_kiq_fini(adev, 0);
2449
2450 amdgpu_gfx_cleaner_shader_sw_fini(adev);
2451
2452 gfx_v9_0_mec_fini(adev);
2453 amdgpu_bo_free_kernel(&adev->gfx.rlc.clear_state_obj,
2454 &adev->gfx.rlc.clear_state_gpu_addr,
2455 (void **)&adev->gfx.rlc.cs_ptr);
2456 if (adev->flags & AMD_IS_APU) {
2457 amdgpu_bo_free_kernel(&adev->gfx.rlc.cp_table_obj,
2458 &adev->gfx.rlc.cp_table_gpu_addr,
2459 (void **)&adev->gfx.rlc.cp_table_ptr);
2460 }
2461 gfx_v9_0_free_microcode(adev);
2462
2463 amdgpu_gfx_sysfs_fini(adev);
2464
2465 kfree(adev->gfx.ip_dump_core);
2466 kfree(adev->gfx.ip_dump_compute_queues);
2467
2468 return 0;
2469 }
2470
2471
gfx_v9_0_tiling_mode_table_init(struct amdgpu_device * adev)2472 static void gfx_v9_0_tiling_mode_table_init(struct amdgpu_device *adev)
2473 {
2474 /* TODO */
2475 }
2476
gfx_v9_0_select_se_sh(struct amdgpu_device * adev,u32 se_num,u32 sh_num,u32 instance,int xcc_id)2477 void gfx_v9_0_select_se_sh(struct amdgpu_device *adev, u32 se_num, u32 sh_num,
2478 u32 instance, int xcc_id)
2479 {
2480 u32 data;
2481
2482 if (instance == 0xffffffff)
2483 data = REG_SET_FIELD(0, GRBM_GFX_INDEX, INSTANCE_BROADCAST_WRITES, 1);
2484 else
2485 data = REG_SET_FIELD(0, GRBM_GFX_INDEX, INSTANCE_INDEX, instance);
2486
2487 if (se_num == 0xffffffff)
2488 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_BROADCAST_WRITES, 1);
2489 else
2490 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_INDEX, se_num);
2491
2492 if (sh_num == 0xffffffff)
2493 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SH_BROADCAST_WRITES, 1);
2494 else
2495 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SH_INDEX, sh_num);
2496
2497 WREG32_SOC15_RLC_SHADOW(GC, 0, mmGRBM_GFX_INDEX, data);
2498 }
2499
gfx_v9_0_get_rb_active_bitmap(struct amdgpu_device * adev)2500 static u32 gfx_v9_0_get_rb_active_bitmap(struct amdgpu_device *adev)
2501 {
2502 u32 data, mask;
2503
2504 data = RREG32_SOC15(GC, 0, mmCC_RB_BACKEND_DISABLE);
2505 data |= RREG32_SOC15(GC, 0, mmGC_USER_RB_BACKEND_DISABLE);
2506
2507 data &= CC_RB_BACKEND_DISABLE__BACKEND_DISABLE_MASK;
2508 data >>= GC_USER_RB_BACKEND_DISABLE__BACKEND_DISABLE__SHIFT;
2509
2510 mask = amdgpu_gfx_create_bitmask(adev->gfx.config.max_backends_per_se /
2511 adev->gfx.config.max_sh_per_se);
2512
2513 return (~data) & mask;
2514 }
2515
gfx_v9_0_setup_rb(struct amdgpu_device * adev)2516 static void gfx_v9_0_setup_rb(struct amdgpu_device *adev)
2517 {
2518 int i, j;
2519 u32 data;
2520 u32 active_rbs = 0;
2521 u32 rb_bitmap_width_per_sh = adev->gfx.config.max_backends_per_se /
2522 adev->gfx.config.max_sh_per_se;
2523
2524 mutex_lock(&adev->grbm_idx_mutex);
2525 for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
2526 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
2527 amdgpu_gfx_select_se_sh(adev, i, j, 0xffffffff, 0);
2528 data = gfx_v9_0_get_rb_active_bitmap(adev);
2529 active_rbs |= data << ((i * adev->gfx.config.max_sh_per_se + j) *
2530 rb_bitmap_width_per_sh);
2531 }
2532 }
2533 amdgpu_gfx_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff, 0);
2534 mutex_unlock(&adev->grbm_idx_mutex);
2535
2536 adev->gfx.config.backend_enable_mask = active_rbs;
2537 adev->gfx.config.num_rbs = hweight32(active_rbs);
2538 }
2539
gfx_v9_0_debug_trap_config_init(struct amdgpu_device * adev,uint32_t first_vmid,uint32_t last_vmid)2540 static void gfx_v9_0_debug_trap_config_init(struct amdgpu_device *adev,
2541 uint32_t first_vmid,
2542 uint32_t last_vmid)
2543 {
2544 uint32_t data;
2545 uint32_t trap_config_vmid_mask = 0;
2546 int i;
2547
2548 /* Calculate trap config vmid mask */
2549 for (i = first_vmid; i < last_vmid; i++)
2550 trap_config_vmid_mask |= (1 << i);
2551
2552 data = REG_SET_FIELD(0, SPI_GDBG_TRAP_CONFIG,
2553 VMID_SEL, trap_config_vmid_mask);
2554 data = REG_SET_FIELD(data, SPI_GDBG_TRAP_CONFIG,
2555 TRAP_EN, 1);
2556 WREG32(SOC15_REG_OFFSET(GC, 0, mmSPI_GDBG_TRAP_CONFIG), data);
2557 WREG32(SOC15_REG_OFFSET(GC, 0, mmSPI_GDBG_TRAP_MASK), 0);
2558
2559 WREG32(SOC15_REG_OFFSET(GC, 0, mmSPI_GDBG_TRAP_DATA0), 0);
2560 WREG32(SOC15_REG_OFFSET(GC, 0, mmSPI_GDBG_TRAP_DATA1), 0);
2561 }
2562
2563 #define DEFAULT_SH_MEM_BASES (0x6000)
gfx_v9_0_init_compute_vmid(struct amdgpu_device * adev)2564 static void gfx_v9_0_init_compute_vmid(struct amdgpu_device *adev)
2565 {
2566 int i;
2567 uint32_t sh_mem_config;
2568 uint32_t sh_mem_bases;
2569
2570 /*
2571 * Configure apertures:
2572 * LDS: 0x60000000'00000000 - 0x60000001'00000000 (4GB)
2573 * Scratch: 0x60000001'00000000 - 0x60000002'00000000 (4GB)
2574 * GPUVM: 0x60010000'00000000 - 0x60020000'00000000 (1TB)
2575 */
2576 sh_mem_bases = DEFAULT_SH_MEM_BASES | (DEFAULT_SH_MEM_BASES << 16);
2577
2578 sh_mem_config = SH_MEM_ADDRESS_MODE_64 |
2579 SH_MEM_ALIGNMENT_MODE_UNALIGNED <<
2580 SH_MEM_CONFIG__ALIGNMENT_MODE__SHIFT;
2581
2582 mutex_lock(&adev->srbm_mutex);
2583 for (i = adev->vm_manager.first_kfd_vmid; i < AMDGPU_NUM_VMID; i++) {
2584 soc15_grbm_select(adev, 0, 0, 0, i, 0);
2585 /* CP and shaders */
2586 WREG32_SOC15_RLC(GC, 0, mmSH_MEM_CONFIG, sh_mem_config);
2587 WREG32_SOC15_RLC(GC, 0, mmSH_MEM_BASES, sh_mem_bases);
2588 }
2589 soc15_grbm_select(adev, 0, 0, 0, 0, 0);
2590 mutex_unlock(&adev->srbm_mutex);
2591
2592 /* Initialize all compute VMIDs to have no GDS, GWS, or OA
2593 access. These should be enabled by FW for target VMIDs. */
2594 for (i = adev->vm_manager.first_kfd_vmid; i < AMDGPU_NUM_VMID; i++) {
2595 WREG32_SOC15_OFFSET(GC, 0, mmGDS_VMID0_BASE, 2 * i, 0);
2596 WREG32_SOC15_OFFSET(GC, 0, mmGDS_VMID0_SIZE, 2 * i, 0);
2597 WREG32_SOC15_OFFSET(GC, 0, mmGDS_GWS_VMID0, i, 0);
2598 WREG32_SOC15_OFFSET(GC, 0, mmGDS_OA_VMID0, i, 0);
2599 }
2600 }
2601
gfx_v9_0_init_gds_vmid(struct amdgpu_device * adev)2602 static void gfx_v9_0_init_gds_vmid(struct amdgpu_device *adev)
2603 {
2604 int vmid;
2605
2606 /*
2607 * Initialize all compute and user-gfx VMIDs to have no GDS, GWS, or OA
2608 * access. Compute VMIDs should be enabled by FW for target VMIDs,
2609 * the driver can enable them for graphics. VMID0 should maintain
2610 * access so that HWS firmware can save/restore entries.
2611 */
2612 for (vmid = 1; vmid < AMDGPU_NUM_VMID; vmid++) {
2613 WREG32_SOC15_OFFSET(GC, 0, mmGDS_VMID0_BASE, 2 * vmid, 0);
2614 WREG32_SOC15_OFFSET(GC, 0, mmGDS_VMID0_SIZE, 2 * vmid, 0);
2615 WREG32_SOC15_OFFSET(GC, 0, mmGDS_GWS_VMID0, vmid, 0);
2616 WREG32_SOC15_OFFSET(GC, 0, mmGDS_OA_VMID0, vmid, 0);
2617 }
2618 }
2619
gfx_v9_0_init_sq_config(struct amdgpu_device * adev)2620 static void gfx_v9_0_init_sq_config(struct amdgpu_device *adev)
2621 {
2622 uint32_t tmp;
2623
2624 switch (amdgpu_ip_version(adev, GC_HWIP, 0)) {
2625 case IP_VERSION(9, 4, 1):
2626 tmp = RREG32_SOC15(GC, 0, mmSQ_CONFIG);
2627 tmp = REG_SET_FIELD(tmp, SQ_CONFIG, DISABLE_BARRIER_WAITCNT,
2628 !READ_ONCE(adev->barrier_has_auto_waitcnt));
2629 WREG32_SOC15(GC, 0, mmSQ_CONFIG, tmp);
2630 break;
2631 default:
2632 break;
2633 }
2634 }
2635
gfx_v9_0_constants_init(struct amdgpu_device * adev)2636 static void gfx_v9_0_constants_init(struct amdgpu_device *adev)
2637 {
2638 u32 tmp;
2639 int i;
2640
2641 WREG32_FIELD15_RLC(GC, 0, GRBM_CNTL, READ_TIMEOUT, 0xff);
2642
2643 gfx_v9_0_tiling_mode_table_init(adev);
2644
2645 if (adev->gfx.num_gfx_rings)
2646 gfx_v9_0_setup_rb(adev);
2647 gfx_v9_0_get_cu_info(adev, &adev->gfx.cu_info);
2648 adev->gfx.config.db_debug2 = RREG32_SOC15(GC, 0, mmDB_DEBUG2);
2649
2650 /* XXX SH_MEM regs */
2651 /* where to put LDS, scratch, GPUVM in FSA64 space */
2652 mutex_lock(&adev->srbm_mutex);
2653 for (i = 0; i < adev->vm_manager.id_mgr[AMDGPU_GFXHUB(0)].num_ids; i++) {
2654 soc15_grbm_select(adev, 0, 0, 0, i, 0);
2655 /* CP and shaders */
2656 if (i == 0) {
2657 tmp = REG_SET_FIELD(0, SH_MEM_CONFIG, ALIGNMENT_MODE,
2658 SH_MEM_ALIGNMENT_MODE_UNALIGNED);
2659 tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, RETRY_DISABLE,
2660 !!adev->gmc.noretry);
2661 WREG32_SOC15_RLC(GC, 0, mmSH_MEM_CONFIG, tmp);
2662 WREG32_SOC15_RLC(GC, 0, mmSH_MEM_BASES, 0);
2663 } else {
2664 tmp = REG_SET_FIELD(0, SH_MEM_CONFIG, ALIGNMENT_MODE,
2665 SH_MEM_ALIGNMENT_MODE_UNALIGNED);
2666 tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, RETRY_DISABLE,
2667 !!adev->gmc.noretry);
2668 WREG32_SOC15_RLC(GC, 0, mmSH_MEM_CONFIG, tmp);
2669 tmp = REG_SET_FIELD(0, SH_MEM_BASES, PRIVATE_BASE,
2670 (adev->gmc.private_aperture_start >> 48));
2671 tmp = REG_SET_FIELD(tmp, SH_MEM_BASES, SHARED_BASE,
2672 (adev->gmc.shared_aperture_start >> 48));
2673 WREG32_SOC15_RLC(GC, 0, mmSH_MEM_BASES, tmp);
2674 }
2675 }
2676 soc15_grbm_select(adev, 0, 0, 0, 0, 0);
2677
2678 mutex_unlock(&adev->srbm_mutex);
2679
2680 gfx_v9_0_init_compute_vmid(adev);
2681 gfx_v9_0_init_gds_vmid(adev);
2682 gfx_v9_0_init_sq_config(adev);
2683 }
2684
gfx_v9_0_wait_for_rlc_serdes(struct amdgpu_device * adev)2685 static void gfx_v9_0_wait_for_rlc_serdes(struct amdgpu_device *adev)
2686 {
2687 u32 i, j, k;
2688 u32 mask;
2689
2690 mutex_lock(&adev->grbm_idx_mutex);
2691 for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
2692 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
2693 amdgpu_gfx_select_se_sh(adev, i, j, 0xffffffff, 0);
2694 for (k = 0; k < adev->usec_timeout; k++) {
2695 if (RREG32_SOC15(GC, 0, mmRLC_SERDES_CU_MASTER_BUSY) == 0)
2696 break;
2697 udelay(1);
2698 }
2699 if (k == adev->usec_timeout) {
2700 amdgpu_gfx_select_se_sh(adev, 0xffffffff,
2701 0xffffffff, 0xffffffff, 0);
2702 mutex_unlock(&adev->grbm_idx_mutex);
2703 DRM_INFO("Timeout wait for RLC serdes %u,%u\n",
2704 i, j);
2705 return;
2706 }
2707 }
2708 }
2709 amdgpu_gfx_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff, 0);
2710 mutex_unlock(&adev->grbm_idx_mutex);
2711
2712 mask = RLC_SERDES_NONCU_MASTER_BUSY__SE_MASTER_BUSY_MASK |
2713 RLC_SERDES_NONCU_MASTER_BUSY__GC_MASTER_BUSY_MASK |
2714 RLC_SERDES_NONCU_MASTER_BUSY__TC0_MASTER_BUSY_MASK |
2715 RLC_SERDES_NONCU_MASTER_BUSY__TC1_MASTER_BUSY_MASK;
2716 for (k = 0; k < adev->usec_timeout; k++) {
2717 if ((RREG32_SOC15(GC, 0, mmRLC_SERDES_NONCU_MASTER_BUSY) & mask) == 0)
2718 break;
2719 udelay(1);
2720 }
2721 }
2722
gfx_v9_0_enable_gui_idle_interrupt(struct amdgpu_device * adev,bool enable)2723 static void gfx_v9_0_enable_gui_idle_interrupt(struct amdgpu_device *adev,
2724 bool enable)
2725 {
2726 u32 tmp;
2727
2728 /* These interrupts should be enabled to drive DS clock */
2729
2730 tmp= RREG32_SOC15(GC, 0, mmCP_INT_CNTL_RING0);
2731
2732 tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_BUSY_INT_ENABLE, enable ? 1 : 0);
2733 tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_EMPTY_INT_ENABLE, enable ? 1 : 0);
2734 tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CMP_BUSY_INT_ENABLE, enable ? 1 : 0);
2735 if (adev->gfx.num_gfx_rings)
2736 tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, GFX_IDLE_INT_ENABLE, enable ? 1 : 0);
2737
2738 WREG32_SOC15(GC, 0, mmCP_INT_CNTL_RING0, tmp);
2739 }
2740
gfx_v9_0_init_csb(struct amdgpu_device * adev)2741 static void gfx_v9_0_init_csb(struct amdgpu_device *adev)
2742 {
2743 adev->gfx.rlc.funcs->get_csb_buffer(adev, adev->gfx.rlc.cs_ptr);
2744 /* csib */
2745 WREG32_RLC(SOC15_REG_OFFSET(GC, 0, mmRLC_CSIB_ADDR_HI),
2746 adev->gfx.rlc.clear_state_gpu_addr >> 32);
2747 WREG32_RLC(SOC15_REG_OFFSET(GC, 0, mmRLC_CSIB_ADDR_LO),
2748 adev->gfx.rlc.clear_state_gpu_addr & 0xfffffffc);
2749 WREG32_RLC(SOC15_REG_OFFSET(GC, 0, mmRLC_CSIB_LENGTH),
2750 adev->gfx.rlc.clear_state_size);
2751 }
2752
gfx_v9_1_parse_ind_reg_list(int * register_list_format,int indirect_offset,int list_size,int * unique_indirect_regs,int unique_indirect_reg_count,int * indirect_start_offsets,int * indirect_start_offsets_count,int max_start_offsets_count)2753 static void gfx_v9_1_parse_ind_reg_list(int *register_list_format,
2754 int indirect_offset,
2755 int list_size,
2756 int *unique_indirect_regs,
2757 int unique_indirect_reg_count,
2758 int *indirect_start_offsets,
2759 int *indirect_start_offsets_count,
2760 int max_start_offsets_count)
2761 {
2762 int idx;
2763
2764 for (; indirect_offset < list_size; indirect_offset++) {
2765 WARN_ON(*indirect_start_offsets_count >= max_start_offsets_count);
2766 indirect_start_offsets[*indirect_start_offsets_count] = indirect_offset;
2767 *indirect_start_offsets_count = *indirect_start_offsets_count + 1;
2768
2769 while (register_list_format[indirect_offset] != 0xFFFFFFFF) {
2770 indirect_offset += 2;
2771
2772 /* look for the matching indice */
2773 for (idx = 0; idx < unique_indirect_reg_count; idx++) {
2774 if (unique_indirect_regs[idx] ==
2775 register_list_format[indirect_offset] ||
2776 !unique_indirect_regs[idx])
2777 break;
2778 }
2779
2780 BUG_ON(idx >= unique_indirect_reg_count);
2781
2782 if (!unique_indirect_regs[idx])
2783 unique_indirect_regs[idx] = register_list_format[indirect_offset];
2784
2785 indirect_offset++;
2786 }
2787 }
2788 }
2789
gfx_v9_1_init_rlc_save_restore_list(struct amdgpu_device * adev)2790 static int gfx_v9_1_init_rlc_save_restore_list(struct amdgpu_device *adev)
2791 {
2792 int unique_indirect_regs[] = {0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0};
2793 int unique_indirect_reg_count = 0;
2794
2795 int indirect_start_offsets[] = {0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0};
2796 int indirect_start_offsets_count = 0;
2797
2798 int list_size = 0;
2799 int i = 0, j = 0;
2800 u32 tmp = 0;
2801
2802 u32 *register_list_format =
2803 kmemdup(adev->gfx.rlc.register_list_format,
2804 adev->gfx.rlc.reg_list_format_size_bytes, GFP_KERNEL);
2805 if (!register_list_format)
2806 return -ENOMEM;
2807
2808 /* setup unique_indirect_regs array and indirect_start_offsets array */
2809 unique_indirect_reg_count = ARRAY_SIZE(unique_indirect_regs);
2810 gfx_v9_1_parse_ind_reg_list(register_list_format,
2811 adev->gfx.rlc.reg_list_format_direct_reg_list_length,
2812 adev->gfx.rlc.reg_list_format_size_bytes >> 2,
2813 unique_indirect_regs,
2814 unique_indirect_reg_count,
2815 indirect_start_offsets,
2816 &indirect_start_offsets_count,
2817 ARRAY_SIZE(indirect_start_offsets));
2818
2819 /* enable auto inc in case it is disabled */
2820 tmp = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_CNTL));
2821 tmp |= RLC_SRM_CNTL__AUTO_INCR_ADDR_MASK;
2822 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_CNTL), tmp);
2823
2824 /* write register_restore table to offset 0x0 using RLC_SRM_ARAM_ADDR/DATA */
2825 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_ARAM_ADDR),
2826 RLC_SAVE_RESTORE_ADDR_STARTING_OFFSET);
2827 for (i = 0; i < adev->gfx.rlc.reg_list_size_bytes >> 2; i++)
2828 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_ARAM_DATA),
2829 adev->gfx.rlc.register_restore[i]);
2830
2831 /* load indirect register */
2832 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_ADDR),
2833 adev->gfx.rlc.reg_list_format_start);
2834
2835 /* direct register portion */
2836 for (i = 0; i < adev->gfx.rlc.reg_list_format_direct_reg_list_length; i++)
2837 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_DATA),
2838 register_list_format[i]);
2839
2840 /* indirect register portion */
2841 while (i < (adev->gfx.rlc.reg_list_format_size_bytes >> 2)) {
2842 if (register_list_format[i] == 0xFFFFFFFF) {
2843 WREG32_SOC15(GC, 0, mmRLC_GPM_SCRATCH_DATA, register_list_format[i++]);
2844 continue;
2845 }
2846
2847 WREG32_SOC15(GC, 0, mmRLC_GPM_SCRATCH_DATA, register_list_format[i++]);
2848 WREG32_SOC15(GC, 0, mmRLC_GPM_SCRATCH_DATA, register_list_format[i++]);
2849
2850 for (j = 0; j < unique_indirect_reg_count; j++) {
2851 if (register_list_format[i] == unique_indirect_regs[j]) {
2852 WREG32_SOC15(GC, 0, mmRLC_GPM_SCRATCH_DATA, j);
2853 break;
2854 }
2855 }
2856
2857 BUG_ON(j >= unique_indirect_reg_count);
2858
2859 i++;
2860 }
2861
2862 /* set save/restore list size */
2863 list_size = adev->gfx.rlc.reg_list_size_bytes >> 2;
2864 list_size = list_size >> 1;
2865 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_ADDR),
2866 adev->gfx.rlc.reg_restore_list_size);
2867 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_DATA), list_size);
2868
2869 /* write the starting offsets to RLC scratch ram */
2870 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_ADDR),
2871 adev->gfx.rlc.starting_offsets_start);
2872 for (i = 0; i < ARRAY_SIZE(indirect_start_offsets); i++)
2873 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_DATA),
2874 indirect_start_offsets[i]);
2875
2876 /* load unique indirect regs*/
2877 for (i = 0; i < ARRAY_SIZE(unique_indirect_regs); i++) {
2878 if (unique_indirect_regs[i] != 0) {
2879 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_INDEX_CNTL_ADDR_0)
2880 + GFX_RLC_SRM_INDEX_CNTL_ADDR_OFFSETS[i],
2881 unique_indirect_regs[i] & 0x3FFFF);
2882
2883 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_INDEX_CNTL_DATA_0)
2884 + GFX_RLC_SRM_INDEX_CNTL_DATA_OFFSETS[i],
2885 unique_indirect_regs[i] >> 20);
2886 }
2887 }
2888
2889 kfree(register_list_format);
2890 return 0;
2891 }
2892
gfx_v9_0_enable_save_restore_machine(struct amdgpu_device * adev)2893 static void gfx_v9_0_enable_save_restore_machine(struct amdgpu_device *adev)
2894 {
2895 WREG32_FIELD15(GC, 0, RLC_SRM_CNTL, SRM_ENABLE, 1);
2896 }
2897
pwr_10_0_gfxip_control_over_cgpg(struct amdgpu_device * adev,bool enable)2898 static void pwr_10_0_gfxip_control_over_cgpg(struct amdgpu_device *adev,
2899 bool enable)
2900 {
2901 uint32_t data = 0;
2902 uint32_t default_data = 0;
2903
2904 default_data = data = RREG32(SOC15_REG_OFFSET(PWR, 0, mmPWR_MISC_CNTL_STATUS));
2905 if (enable) {
2906 /* enable GFXIP control over CGPG */
2907 data |= PWR_MISC_CNTL_STATUS__PWR_GFX_RLC_CGPG_EN_MASK;
2908 if(default_data != data)
2909 WREG32(SOC15_REG_OFFSET(PWR, 0, mmPWR_MISC_CNTL_STATUS), data);
2910
2911 /* update status */
2912 data &= ~PWR_MISC_CNTL_STATUS__PWR_GFXOFF_STATUS_MASK;
2913 data |= (2 << PWR_MISC_CNTL_STATUS__PWR_GFXOFF_STATUS__SHIFT);
2914 if(default_data != data)
2915 WREG32(SOC15_REG_OFFSET(PWR, 0, mmPWR_MISC_CNTL_STATUS), data);
2916 } else {
2917 /* restore GFXIP control over GCPG */
2918 data &= ~PWR_MISC_CNTL_STATUS__PWR_GFX_RLC_CGPG_EN_MASK;
2919 if(default_data != data)
2920 WREG32(SOC15_REG_OFFSET(PWR, 0, mmPWR_MISC_CNTL_STATUS), data);
2921 }
2922 }
2923
gfx_v9_0_init_gfx_power_gating(struct amdgpu_device * adev)2924 static void gfx_v9_0_init_gfx_power_gating(struct amdgpu_device *adev)
2925 {
2926 uint32_t data = 0;
2927
2928 if (adev->pg_flags & (AMD_PG_SUPPORT_GFX_PG |
2929 AMD_PG_SUPPORT_GFX_SMG |
2930 AMD_PG_SUPPORT_GFX_DMG)) {
2931 /* init IDLE_POLL_COUNT = 60 */
2932 data = RREG32(SOC15_REG_OFFSET(GC, 0, mmCP_RB_WPTR_POLL_CNTL));
2933 data &= ~CP_RB_WPTR_POLL_CNTL__IDLE_POLL_COUNT_MASK;
2934 data |= (0x60 << CP_RB_WPTR_POLL_CNTL__IDLE_POLL_COUNT__SHIFT);
2935 WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_RB_WPTR_POLL_CNTL), data);
2936
2937 /* init RLC PG Delay */
2938 data = 0;
2939 data |= (0x10 << RLC_PG_DELAY__POWER_UP_DELAY__SHIFT);
2940 data |= (0x10 << RLC_PG_DELAY__POWER_DOWN_DELAY__SHIFT);
2941 data |= (0x10 << RLC_PG_DELAY__CMD_PROPAGATE_DELAY__SHIFT);
2942 data |= (0x40 << RLC_PG_DELAY__MEM_SLEEP_DELAY__SHIFT);
2943 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_DELAY), data);
2944
2945 data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_DELAY_2));
2946 data &= ~RLC_PG_DELAY_2__SERDES_CMD_DELAY_MASK;
2947 data |= (0x4 << RLC_PG_DELAY_2__SERDES_CMD_DELAY__SHIFT);
2948 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_DELAY_2), data);
2949
2950 data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_DELAY_3));
2951 data &= ~RLC_PG_DELAY_3__CGCG_ACTIVE_BEFORE_CGPG_MASK;
2952 data |= (0xff << RLC_PG_DELAY_3__CGCG_ACTIVE_BEFORE_CGPG__SHIFT);
2953 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_DELAY_3), data);
2954
2955 data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_AUTO_PG_CTRL));
2956 data &= ~RLC_AUTO_PG_CTRL__GRBM_REG_SAVE_GFX_IDLE_THRESHOLD_MASK;
2957
2958 /* program GRBM_REG_SAVE_GFX_IDLE_THRESHOLD to 0x55f0 */
2959 data |= (0x55f0 << RLC_AUTO_PG_CTRL__GRBM_REG_SAVE_GFX_IDLE_THRESHOLD__SHIFT);
2960 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_AUTO_PG_CTRL), data);
2961 if (amdgpu_ip_version(adev, GC_HWIP, 0) != IP_VERSION(9, 3, 0))
2962 pwr_10_0_gfxip_control_over_cgpg(adev, true);
2963 }
2964 }
2965
gfx_v9_0_enable_sck_slow_down_on_power_up(struct amdgpu_device * adev,bool enable)2966 static void gfx_v9_0_enable_sck_slow_down_on_power_up(struct amdgpu_device *adev,
2967 bool enable)
2968 {
2969 uint32_t data = 0;
2970 uint32_t default_data = 0;
2971
2972 default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL));
2973 data = REG_SET_FIELD(data, RLC_PG_CNTL,
2974 SMU_CLK_SLOWDOWN_ON_PU_ENABLE,
2975 enable ? 1 : 0);
2976 if (default_data != data)
2977 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data);
2978 }
2979
gfx_v9_0_enable_sck_slow_down_on_power_down(struct amdgpu_device * adev,bool enable)2980 static void gfx_v9_0_enable_sck_slow_down_on_power_down(struct amdgpu_device *adev,
2981 bool enable)
2982 {
2983 uint32_t data = 0;
2984 uint32_t default_data = 0;
2985
2986 default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL));
2987 data = REG_SET_FIELD(data, RLC_PG_CNTL,
2988 SMU_CLK_SLOWDOWN_ON_PD_ENABLE,
2989 enable ? 1 : 0);
2990 if(default_data != data)
2991 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data);
2992 }
2993
gfx_v9_0_enable_cp_power_gating(struct amdgpu_device * adev,bool enable)2994 static void gfx_v9_0_enable_cp_power_gating(struct amdgpu_device *adev,
2995 bool enable)
2996 {
2997 uint32_t data = 0;
2998 uint32_t default_data = 0;
2999
3000 default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL));
3001 data = REG_SET_FIELD(data, RLC_PG_CNTL,
3002 CP_PG_DISABLE,
3003 enable ? 0 : 1);
3004 if(default_data != data)
3005 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data);
3006 }
3007
gfx_v9_0_enable_gfx_cg_power_gating(struct amdgpu_device * adev,bool enable)3008 static void gfx_v9_0_enable_gfx_cg_power_gating(struct amdgpu_device *adev,
3009 bool enable)
3010 {
3011 uint32_t data, default_data;
3012
3013 default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL));
3014 data = REG_SET_FIELD(data, RLC_PG_CNTL,
3015 GFX_POWER_GATING_ENABLE,
3016 enable ? 1 : 0);
3017 if(default_data != data)
3018 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data);
3019 }
3020
gfx_v9_0_enable_gfx_pipeline_powergating(struct amdgpu_device * adev,bool enable)3021 static void gfx_v9_0_enable_gfx_pipeline_powergating(struct amdgpu_device *adev,
3022 bool enable)
3023 {
3024 uint32_t data, default_data;
3025
3026 default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL));
3027 data = REG_SET_FIELD(data, RLC_PG_CNTL,
3028 GFX_PIPELINE_PG_ENABLE,
3029 enable ? 1 : 0);
3030 if(default_data != data)
3031 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data);
3032
3033 if (!enable)
3034 /* read any GFX register to wake up GFX */
3035 data = RREG32(SOC15_REG_OFFSET(GC, 0, mmDB_RENDER_CONTROL));
3036 }
3037
gfx_v9_0_enable_gfx_static_mg_power_gating(struct amdgpu_device * adev,bool enable)3038 static void gfx_v9_0_enable_gfx_static_mg_power_gating(struct amdgpu_device *adev,
3039 bool enable)
3040 {
3041 uint32_t data, default_data;
3042
3043 default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL));
3044 data = REG_SET_FIELD(data, RLC_PG_CNTL,
3045 STATIC_PER_CU_PG_ENABLE,
3046 enable ? 1 : 0);
3047 if(default_data != data)
3048 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data);
3049 }
3050
gfx_v9_0_enable_gfx_dynamic_mg_power_gating(struct amdgpu_device * adev,bool enable)3051 static void gfx_v9_0_enable_gfx_dynamic_mg_power_gating(struct amdgpu_device *adev,
3052 bool enable)
3053 {
3054 uint32_t data, default_data;
3055
3056 default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL));
3057 data = REG_SET_FIELD(data, RLC_PG_CNTL,
3058 DYN_PER_CU_PG_ENABLE,
3059 enable ? 1 : 0);
3060 if(default_data != data)
3061 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data);
3062 }
3063
gfx_v9_0_init_pg(struct amdgpu_device * adev)3064 static void gfx_v9_0_init_pg(struct amdgpu_device *adev)
3065 {
3066 gfx_v9_0_init_csb(adev);
3067
3068 /*
3069 * Rlc save restore list is workable since v2_1.
3070 * And it's needed by gfxoff feature.
3071 */
3072 if (adev->gfx.rlc.is_rlc_v2_1) {
3073 if (amdgpu_ip_version(adev, GC_HWIP, 0) ==
3074 IP_VERSION(9, 2, 1) ||
3075 (adev->apu_flags & AMD_APU_IS_RAVEN2))
3076 gfx_v9_1_init_rlc_save_restore_list(adev);
3077 gfx_v9_0_enable_save_restore_machine(adev);
3078 }
3079
3080 if (adev->pg_flags & (AMD_PG_SUPPORT_GFX_PG |
3081 AMD_PG_SUPPORT_GFX_SMG |
3082 AMD_PG_SUPPORT_GFX_DMG |
3083 AMD_PG_SUPPORT_CP |
3084 AMD_PG_SUPPORT_GDS |
3085 AMD_PG_SUPPORT_RLC_SMU_HS)) {
3086 WREG32_SOC15(GC, 0, mmRLC_JUMP_TABLE_RESTORE,
3087 adev->gfx.rlc.cp_table_gpu_addr >> 8);
3088 gfx_v9_0_init_gfx_power_gating(adev);
3089 }
3090 }
3091
gfx_v9_0_rlc_stop(struct amdgpu_device * adev)3092 static void gfx_v9_0_rlc_stop(struct amdgpu_device *adev)
3093 {
3094 WREG32_FIELD15(GC, 0, RLC_CNTL, RLC_ENABLE_F32, 0);
3095 gfx_v9_0_enable_gui_idle_interrupt(adev, false);
3096 gfx_v9_0_wait_for_rlc_serdes(adev);
3097 }
3098
gfx_v9_0_rlc_reset(struct amdgpu_device * adev)3099 static void gfx_v9_0_rlc_reset(struct amdgpu_device *adev)
3100 {
3101 WREG32_FIELD15(GC, 0, GRBM_SOFT_RESET, SOFT_RESET_RLC, 1);
3102 udelay(50);
3103 WREG32_FIELD15(GC, 0, GRBM_SOFT_RESET, SOFT_RESET_RLC, 0);
3104 udelay(50);
3105 }
3106
gfx_v9_0_rlc_start(struct amdgpu_device * adev)3107 static void gfx_v9_0_rlc_start(struct amdgpu_device *adev)
3108 {
3109 #ifdef AMDGPU_RLC_DEBUG_RETRY
3110 u32 rlc_ucode_ver;
3111 #endif
3112
3113 WREG32_FIELD15(GC, 0, RLC_CNTL, RLC_ENABLE_F32, 1);
3114 udelay(50);
3115
3116 /* carrizo do enable cp interrupt after cp inited */
3117 if (!(adev->flags & AMD_IS_APU)) {
3118 gfx_v9_0_enable_gui_idle_interrupt(adev, true);
3119 udelay(50);
3120 }
3121
3122 #ifdef AMDGPU_RLC_DEBUG_RETRY
3123 /* RLC_GPM_GENERAL_6 : RLC Ucode version */
3124 rlc_ucode_ver = RREG32_SOC15(GC, 0, mmRLC_GPM_GENERAL_6);
3125 if(rlc_ucode_ver == 0x108) {
3126 DRM_INFO("Using rlc debug ucode. mmRLC_GPM_GENERAL_6 ==0x08%x / fw_ver == %i \n",
3127 rlc_ucode_ver, adev->gfx.rlc_fw_version);
3128 /* RLC_GPM_TIMER_INT_3 : Timer interval in RefCLK cycles,
3129 * default is 0x9C4 to create a 100us interval */
3130 WREG32_SOC15(GC, 0, mmRLC_GPM_TIMER_INT_3, 0x9C4);
3131 /* RLC_GPM_GENERAL_12 : Minimum gap between wptr and rptr
3132 * to disable the page fault retry interrupts, default is
3133 * 0x100 (256) */
3134 WREG32_SOC15(GC, 0, mmRLC_GPM_GENERAL_12, 0x100);
3135 }
3136 #endif
3137 }
3138
gfx_v9_0_rlc_load_microcode(struct amdgpu_device * adev)3139 static int gfx_v9_0_rlc_load_microcode(struct amdgpu_device *adev)
3140 {
3141 const struct rlc_firmware_header_v2_0 *hdr;
3142 const __le32 *fw_data;
3143 unsigned i, fw_size;
3144
3145 if (!adev->gfx.rlc_fw)
3146 return -EINVAL;
3147
3148 hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data;
3149 amdgpu_ucode_print_rlc_hdr(&hdr->header);
3150
3151 fw_data = (const __le32 *)(adev->gfx.rlc_fw->data +
3152 le32_to_cpu(hdr->header.ucode_array_offset_bytes));
3153 fw_size = le32_to_cpu(hdr->header.ucode_size_bytes) / 4;
3154
3155 WREG32_SOC15(GC, 0, mmRLC_GPM_UCODE_ADDR,
3156 RLCG_UCODE_LOADING_START_ADDRESS);
3157 for (i = 0; i < fw_size; i++)
3158 WREG32_SOC15(GC, 0, mmRLC_GPM_UCODE_DATA, le32_to_cpup(fw_data++));
3159 WREG32_SOC15(GC, 0, mmRLC_GPM_UCODE_ADDR, adev->gfx.rlc_fw_version);
3160
3161 return 0;
3162 }
3163
gfx_v9_0_rlc_resume(struct amdgpu_device * adev)3164 static int gfx_v9_0_rlc_resume(struct amdgpu_device *adev)
3165 {
3166 int r;
3167
3168 if (amdgpu_sriov_vf(adev)) {
3169 gfx_v9_0_init_csb(adev);
3170 return 0;
3171 }
3172
3173 adev->gfx.rlc.funcs->stop(adev);
3174
3175 /* disable CG */
3176 WREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL, 0);
3177
3178 gfx_v9_0_init_pg(adev);
3179
3180 if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP) {
3181 /* legacy rlc firmware loading */
3182 r = gfx_v9_0_rlc_load_microcode(adev);
3183 if (r)
3184 return r;
3185 }
3186
3187 switch (amdgpu_ip_version(adev, GC_HWIP, 0)) {
3188 case IP_VERSION(9, 2, 2):
3189 case IP_VERSION(9, 1, 0):
3190 gfx_v9_0_init_lbpw(adev);
3191 if (amdgpu_lbpw == 0)
3192 gfx_v9_0_enable_lbpw(adev, false);
3193 else
3194 gfx_v9_0_enable_lbpw(adev, true);
3195 break;
3196 case IP_VERSION(9, 4, 0):
3197 gfx_v9_4_init_lbpw(adev);
3198 if (amdgpu_lbpw > 0)
3199 gfx_v9_0_enable_lbpw(adev, true);
3200 else
3201 gfx_v9_0_enable_lbpw(adev, false);
3202 break;
3203 default:
3204 break;
3205 }
3206
3207 gfx_v9_0_update_spm_vmid_internal(adev, 0xf);
3208
3209 adev->gfx.rlc.funcs->start(adev);
3210
3211 return 0;
3212 }
3213
gfx_v9_0_cp_gfx_enable(struct amdgpu_device * adev,bool enable)3214 static void gfx_v9_0_cp_gfx_enable(struct amdgpu_device *adev, bool enable)
3215 {
3216 u32 tmp = RREG32_SOC15(GC, 0, mmCP_ME_CNTL);
3217
3218 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, CE_INVALIDATE_ICACHE, enable ? 0 : 1);
3219 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, PFP_INVALIDATE_ICACHE, enable ? 0 : 1);
3220 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, ME_INVALIDATE_ICACHE, enable ? 0 : 1);
3221 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, CE_PIPE0_RESET, enable ? 0 : 1);
3222 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, CE_PIPE1_RESET, enable ? 0 : 1);
3223 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, PFP_PIPE0_RESET, enable ? 0 : 1);
3224 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, PFP_PIPE1_RESET, enable ? 0 : 1);
3225 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, ME_PIPE0_RESET, enable ? 0 : 1);
3226 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, ME_PIPE1_RESET, enable ? 0 : 1);
3227 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, ME_HALT, enable ? 0 : 1);
3228 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, PFP_HALT, enable ? 0 : 1);
3229 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, CE_HALT, enable ? 0 : 1);
3230 WREG32_SOC15_RLC(GC, 0, mmCP_ME_CNTL, tmp);
3231 udelay(50);
3232 }
3233
gfx_v9_0_cp_gfx_load_microcode(struct amdgpu_device * adev)3234 static int gfx_v9_0_cp_gfx_load_microcode(struct amdgpu_device *adev)
3235 {
3236 const struct gfx_firmware_header_v1_0 *pfp_hdr;
3237 const struct gfx_firmware_header_v1_0 *ce_hdr;
3238 const struct gfx_firmware_header_v1_0 *me_hdr;
3239 const __le32 *fw_data;
3240 unsigned i, fw_size;
3241
3242 if (!adev->gfx.me_fw || !adev->gfx.pfp_fw || !adev->gfx.ce_fw)
3243 return -EINVAL;
3244
3245 pfp_hdr = (const struct gfx_firmware_header_v1_0 *)
3246 adev->gfx.pfp_fw->data;
3247 ce_hdr = (const struct gfx_firmware_header_v1_0 *)
3248 adev->gfx.ce_fw->data;
3249 me_hdr = (const struct gfx_firmware_header_v1_0 *)
3250 adev->gfx.me_fw->data;
3251
3252 amdgpu_ucode_print_gfx_hdr(&pfp_hdr->header);
3253 amdgpu_ucode_print_gfx_hdr(&ce_hdr->header);
3254 amdgpu_ucode_print_gfx_hdr(&me_hdr->header);
3255
3256 gfx_v9_0_cp_gfx_enable(adev, false);
3257
3258 /* PFP */
3259 fw_data = (const __le32 *)
3260 (adev->gfx.pfp_fw->data +
3261 le32_to_cpu(pfp_hdr->header.ucode_array_offset_bytes));
3262 fw_size = le32_to_cpu(pfp_hdr->header.ucode_size_bytes) / 4;
3263 WREG32_SOC15(GC, 0, mmCP_PFP_UCODE_ADDR, 0);
3264 for (i = 0; i < fw_size; i++)
3265 WREG32_SOC15(GC, 0, mmCP_PFP_UCODE_DATA, le32_to_cpup(fw_data++));
3266 WREG32_SOC15(GC, 0, mmCP_PFP_UCODE_ADDR, adev->gfx.pfp_fw_version);
3267
3268 /* CE */
3269 fw_data = (const __le32 *)
3270 (adev->gfx.ce_fw->data +
3271 le32_to_cpu(ce_hdr->header.ucode_array_offset_bytes));
3272 fw_size = le32_to_cpu(ce_hdr->header.ucode_size_bytes) / 4;
3273 WREG32_SOC15(GC, 0, mmCP_CE_UCODE_ADDR, 0);
3274 for (i = 0; i < fw_size; i++)
3275 WREG32_SOC15(GC, 0, mmCP_CE_UCODE_DATA, le32_to_cpup(fw_data++));
3276 WREG32_SOC15(GC, 0, mmCP_CE_UCODE_ADDR, adev->gfx.ce_fw_version);
3277
3278 /* ME */
3279 fw_data = (const __le32 *)
3280 (adev->gfx.me_fw->data +
3281 le32_to_cpu(me_hdr->header.ucode_array_offset_bytes));
3282 fw_size = le32_to_cpu(me_hdr->header.ucode_size_bytes) / 4;
3283 WREG32_SOC15(GC, 0, mmCP_ME_RAM_WADDR, 0);
3284 for (i = 0; i < fw_size; i++)
3285 WREG32_SOC15(GC, 0, mmCP_ME_RAM_DATA, le32_to_cpup(fw_data++));
3286 WREG32_SOC15(GC, 0, mmCP_ME_RAM_WADDR, adev->gfx.me_fw_version);
3287
3288 return 0;
3289 }
3290
gfx_v9_0_cp_gfx_start(struct amdgpu_device * adev)3291 static int gfx_v9_0_cp_gfx_start(struct amdgpu_device *adev)
3292 {
3293 struct amdgpu_ring *ring = &adev->gfx.gfx_ring[0];
3294 const struct cs_section_def *sect = NULL;
3295 const struct cs_extent_def *ext = NULL;
3296 int r, i, tmp;
3297
3298 /* init the CP */
3299 WREG32_SOC15(GC, 0, mmCP_MAX_CONTEXT, adev->gfx.config.max_hw_contexts - 1);
3300 WREG32_SOC15(GC, 0, mmCP_DEVICE_ID, 1);
3301
3302 gfx_v9_0_cp_gfx_enable(adev, true);
3303
3304 /* Now only limit the quirk on the APU gfx9 series and already
3305 * confirmed that the APU gfx10/gfx11 needn't such update.
3306 */
3307 if (adev->flags & AMD_IS_APU &&
3308 adev->in_s3 && !pm_resume_via_firmware()) {
3309 DRM_INFO("Will skip the CSB packet resubmit\n");
3310 return 0;
3311 }
3312 r = amdgpu_ring_alloc(ring, gfx_v9_0_get_csb_size(adev) + 4 + 3);
3313 if (r) {
3314 DRM_ERROR("amdgpu: cp failed to lock ring (%d).\n", r);
3315 return r;
3316 }
3317
3318 amdgpu_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
3319 amdgpu_ring_write(ring, PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
3320
3321 amdgpu_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1));
3322 amdgpu_ring_write(ring, 0x80000000);
3323 amdgpu_ring_write(ring, 0x80000000);
3324
3325 for (sect = gfx9_cs_data; sect->section != NULL; ++sect) {
3326 for (ext = sect->section; ext->extent != NULL; ++ext) {
3327 if (sect->id == SECT_CONTEXT) {
3328 amdgpu_ring_write(ring,
3329 PACKET3(PACKET3_SET_CONTEXT_REG,
3330 ext->reg_count));
3331 amdgpu_ring_write(ring,
3332 ext->reg_index - PACKET3_SET_CONTEXT_REG_START);
3333 for (i = 0; i < ext->reg_count; i++)
3334 amdgpu_ring_write(ring, ext->extent[i]);
3335 }
3336 }
3337 }
3338
3339 amdgpu_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
3340 amdgpu_ring_write(ring, PACKET3_PREAMBLE_END_CLEAR_STATE);
3341
3342 amdgpu_ring_write(ring, PACKET3(PACKET3_CLEAR_STATE, 0));
3343 amdgpu_ring_write(ring, 0);
3344
3345 amdgpu_ring_write(ring, PACKET3(PACKET3_SET_BASE, 2));
3346 amdgpu_ring_write(ring, PACKET3_BASE_INDEX(CE_PARTITION_BASE));
3347 amdgpu_ring_write(ring, 0x8000);
3348 amdgpu_ring_write(ring, 0x8000);
3349
3350 amdgpu_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG,1));
3351 tmp = (PACKET3_SET_UCONFIG_REG_INDEX_TYPE |
3352 (SOC15_REG_OFFSET(GC, 0, mmVGT_INDEX_TYPE) - PACKET3_SET_UCONFIG_REG_START));
3353 amdgpu_ring_write(ring, tmp);
3354 amdgpu_ring_write(ring, 0);
3355
3356 amdgpu_ring_commit(ring);
3357
3358 return 0;
3359 }
3360
gfx_v9_0_cp_gfx_resume(struct amdgpu_device * adev)3361 static int gfx_v9_0_cp_gfx_resume(struct amdgpu_device *adev)
3362 {
3363 struct amdgpu_ring *ring;
3364 u32 tmp;
3365 u32 rb_bufsz;
3366 u64 rb_addr, rptr_addr, wptr_gpu_addr;
3367
3368 /* Set the write pointer delay */
3369 WREG32_SOC15(GC, 0, mmCP_RB_WPTR_DELAY, 0);
3370
3371 /* set the RB to use vmid 0 */
3372 WREG32_SOC15(GC, 0, mmCP_RB_VMID, 0);
3373
3374 /* Set ring buffer size */
3375 ring = &adev->gfx.gfx_ring[0];
3376 rb_bufsz = order_base_2(ring->ring_size / 8);
3377 tmp = REG_SET_FIELD(0, CP_RB0_CNTL, RB_BUFSZ, rb_bufsz);
3378 tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, RB_BLKSZ, rb_bufsz - 2);
3379 #ifdef __BIG_ENDIAN
3380 tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, BUF_SWAP, 1);
3381 #endif
3382 WREG32_SOC15(GC, 0, mmCP_RB0_CNTL, tmp);
3383
3384 /* Initialize the ring buffer's write pointers */
3385 ring->wptr = 0;
3386 WREG32_SOC15(GC, 0, mmCP_RB0_WPTR, lower_32_bits(ring->wptr));
3387 WREG32_SOC15(GC, 0, mmCP_RB0_WPTR_HI, upper_32_bits(ring->wptr));
3388
3389 /* set the wb address whether it's enabled or not */
3390 rptr_addr = ring->rptr_gpu_addr;
3391 WREG32_SOC15(GC, 0, mmCP_RB0_RPTR_ADDR, lower_32_bits(rptr_addr));
3392 WREG32_SOC15(GC, 0, mmCP_RB0_RPTR_ADDR_HI, upper_32_bits(rptr_addr) & CP_RB_RPTR_ADDR_HI__RB_RPTR_ADDR_HI_MASK);
3393
3394 wptr_gpu_addr = ring->wptr_gpu_addr;
3395 WREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_ADDR_LO, lower_32_bits(wptr_gpu_addr));
3396 WREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_ADDR_HI, upper_32_bits(wptr_gpu_addr));
3397
3398 mdelay(1);
3399 WREG32_SOC15(GC, 0, mmCP_RB0_CNTL, tmp);
3400
3401 rb_addr = ring->gpu_addr >> 8;
3402 WREG32_SOC15(GC, 0, mmCP_RB0_BASE, rb_addr);
3403 WREG32_SOC15(GC, 0, mmCP_RB0_BASE_HI, upper_32_bits(rb_addr));
3404
3405 tmp = RREG32_SOC15(GC, 0, mmCP_RB_DOORBELL_CONTROL);
3406 if (ring->use_doorbell) {
3407 tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
3408 DOORBELL_OFFSET, ring->doorbell_index);
3409 tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
3410 DOORBELL_EN, 1);
3411 } else {
3412 tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL, DOORBELL_EN, 0);
3413 }
3414 WREG32_SOC15(GC, 0, mmCP_RB_DOORBELL_CONTROL, tmp);
3415
3416 tmp = REG_SET_FIELD(0, CP_RB_DOORBELL_RANGE_LOWER,
3417 DOORBELL_RANGE_LOWER, ring->doorbell_index);
3418 WREG32_SOC15(GC, 0, mmCP_RB_DOORBELL_RANGE_LOWER, tmp);
3419
3420 WREG32_SOC15(GC, 0, mmCP_RB_DOORBELL_RANGE_UPPER,
3421 CP_RB_DOORBELL_RANGE_UPPER__DOORBELL_RANGE_UPPER_MASK);
3422
3423
3424 /* start the ring */
3425 gfx_v9_0_cp_gfx_start(adev);
3426
3427 return 0;
3428 }
3429
gfx_v9_0_cp_compute_enable(struct amdgpu_device * adev,bool enable)3430 static void gfx_v9_0_cp_compute_enable(struct amdgpu_device *adev, bool enable)
3431 {
3432 if (enable) {
3433 WREG32_SOC15_RLC(GC, 0, mmCP_MEC_CNTL, 0);
3434 } else {
3435 WREG32_SOC15_RLC(GC, 0, mmCP_MEC_CNTL,
3436 (CP_MEC_CNTL__MEC_INVALIDATE_ICACHE_MASK |
3437 CP_MEC_CNTL__MEC_ME1_PIPE0_RESET_MASK |
3438 CP_MEC_CNTL__MEC_ME1_PIPE1_RESET_MASK |
3439 CP_MEC_CNTL__MEC_ME1_PIPE2_RESET_MASK |
3440 CP_MEC_CNTL__MEC_ME1_PIPE3_RESET_MASK |
3441 CP_MEC_CNTL__MEC_ME2_PIPE0_RESET_MASK |
3442 CP_MEC_CNTL__MEC_ME2_PIPE1_RESET_MASK |
3443 CP_MEC_CNTL__MEC_ME1_HALT_MASK |
3444 CP_MEC_CNTL__MEC_ME2_HALT_MASK));
3445 adev->gfx.kiq[0].ring.sched.ready = false;
3446 }
3447 udelay(50);
3448 }
3449
gfx_v9_0_cp_compute_load_microcode(struct amdgpu_device * adev)3450 static int gfx_v9_0_cp_compute_load_microcode(struct amdgpu_device *adev)
3451 {
3452 const struct gfx_firmware_header_v1_0 *mec_hdr;
3453 const __le32 *fw_data;
3454 unsigned i;
3455 u32 tmp;
3456
3457 if (!adev->gfx.mec_fw)
3458 return -EINVAL;
3459
3460 gfx_v9_0_cp_compute_enable(adev, false);
3461
3462 mec_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
3463 amdgpu_ucode_print_gfx_hdr(&mec_hdr->header);
3464
3465 fw_data = (const __le32 *)
3466 (adev->gfx.mec_fw->data +
3467 le32_to_cpu(mec_hdr->header.ucode_array_offset_bytes));
3468 tmp = 0;
3469 tmp = REG_SET_FIELD(tmp, CP_CPC_IC_BASE_CNTL, VMID, 0);
3470 tmp = REG_SET_FIELD(tmp, CP_CPC_IC_BASE_CNTL, CACHE_POLICY, 0);
3471 WREG32_SOC15(GC, 0, mmCP_CPC_IC_BASE_CNTL, tmp);
3472
3473 WREG32_SOC15(GC, 0, mmCP_CPC_IC_BASE_LO,
3474 adev->gfx.mec.mec_fw_gpu_addr & 0xFFFFF000);
3475 WREG32_SOC15(GC, 0, mmCP_CPC_IC_BASE_HI,
3476 upper_32_bits(adev->gfx.mec.mec_fw_gpu_addr));
3477
3478 /* MEC1 */
3479 WREG32_SOC15(GC, 0, mmCP_MEC_ME1_UCODE_ADDR,
3480 mec_hdr->jt_offset);
3481 for (i = 0; i < mec_hdr->jt_size; i++)
3482 WREG32_SOC15(GC, 0, mmCP_MEC_ME1_UCODE_DATA,
3483 le32_to_cpup(fw_data + mec_hdr->jt_offset + i));
3484
3485 WREG32_SOC15(GC, 0, mmCP_MEC_ME1_UCODE_ADDR,
3486 adev->gfx.mec_fw_version);
3487 /* Todo : Loading MEC2 firmware is only necessary if MEC2 should run different microcode than MEC1. */
3488
3489 return 0;
3490 }
3491
3492 /* KIQ functions */
gfx_v9_0_kiq_setting(struct amdgpu_ring * ring)3493 static void gfx_v9_0_kiq_setting(struct amdgpu_ring *ring)
3494 {
3495 uint32_t tmp;
3496 struct amdgpu_device *adev = ring->adev;
3497
3498 /* tell RLC which is KIQ queue */
3499 tmp = RREG32_SOC15(GC, 0, mmRLC_CP_SCHEDULERS);
3500 tmp &= 0xffffff00;
3501 tmp |= (ring->me << 5) | (ring->pipe << 3) | (ring->queue);
3502 WREG32_SOC15_RLC(GC, 0, mmRLC_CP_SCHEDULERS, tmp | 0x80);
3503 }
3504
gfx_v9_0_mqd_set_priority(struct amdgpu_ring * ring,struct v9_mqd * mqd)3505 static void gfx_v9_0_mqd_set_priority(struct amdgpu_ring *ring, struct v9_mqd *mqd)
3506 {
3507 struct amdgpu_device *adev = ring->adev;
3508
3509 if (ring->funcs->type == AMDGPU_RING_TYPE_COMPUTE) {
3510 if (amdgpu_gfx_is_high_priority_compute_queue(adev, ring)) {
3511 mqd->cp_hqd_pipe_priority = AMDGPU_GFX_PIPE_PRIO_HIGH;
3512 mqd->cp_hqd_queue_priority =
3513 AMDGPU_GFX_QUEUE_PRIORITY_MAXIMUM;
3514 }
3515 }
3516 }
3517
gfx_v9_0_mqd_init(struct amdgpu_ring * ring)3518 static int gfx_v9_0_mqd_init(struct amdgpu_ring *ring)
3519 {
3520 struct amdgpu_device *adev = ring->adev;
3521 struct v9_mqd *mqd = ring->mqd_ptr;
3522 uint64_t hqd_gpu_addr, wb_gpu_addr, eop_base_addr;
3523 uint32_t tmp;
3524
3525 mqd->header = 0xC0310800;
3526 mqd->compute_pipelinestat_enable = 0x00000001;
3527 mqd->compute_static_thread_mgmt_se0 = 0xffffffff;
3528 mqd->compute_static_thread_mgmt_se1 = 0xffffffff;
3529 mqd->compute_static_thread_mgmt_se2 = 0xffffffff;
3530 mqd->compute_static_thread_mgmt_se3 = 0xffffffff;
3531 mqd->compute_static_thread_mgmt_se4 = 0xffffffff;
3532 mqd->compute_static_thread_mgmt_se5 = 0xffffffff;
3533 mqd->compute_static_thread_mgmt_se6 = 0xffffffff;
3534 mqd->compute_static_thread_mgmt_se7 = 0xffffffff;
3535 mqd->compute_misc_reserved = 0x00000003;
3536
3537 mqd->dynamic_cu_mask_addr_lo =
3538 lower_32_bits(ring->mqd_gpu_addr
3539 + offsetof(struct v9_mqd_allocation, dynamic_cu_mask));
3540 mqd->dynamic_cu_mask_addr_hi =
3541 upper_32_bits(ring->mqd_gpu_addr
3542 + offsetof(struct v9_mqd_allocation, dynamic_cu_mask));
3543
3544 eop_base_addr = ring->eop_gpu_addr >> 8;
3545 mqd->cp_hqd_eop_base_addr_lo = eop_base_addr;
3546 mqd->cp_hqd_eop_base_addr_hi = upper_32_bits(eop_base_addr);
3547
3548 /* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */
3549 tmp = RREG32_SOC15(GC, 0, mmCP_HQD_EOP_CONTROL);
3550 tmp = REG_SET_FIELD(tmp, CP_HQD_EOP_CONTROL, EOP_SIZE,
3551 (order_base_2(GFX9_MEC_HPD_SIZE / 4) - 1));
3552
3553 mqd->cp_hqd_eop_control = tmp;
3554
3555 /* enable doorbell? */
3556 tmp = RREG32_SOC15(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL);
3557
3558 if (ring->use_doorbell) {
3559 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
3560 DOORBELL_OFFSET, ring->doorbell_index);
3561 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
3562 DOORBELL_EN, 1);
3563 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
3564 DOORBELL_SOURCE, 0);
3565 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
3566 DOORBELL_HIT, 0);
3567 } else {
3568 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
3569 DOORBELL_EN, 0);
3570 }
3571
3572 mqd->cp_hqd_pq_doorbell_control = tmp;
3573
3574 /* disable the queue if it's active */
3575 ring->wptr = 0;
3576 mqd->cp_hqd_dequeue_request = 0;
3577 mqd->cp_hqd_pq_rptr = 0;
3578 mqd->cp_hqd_pq_wptr_lo = 0;
3579 mqd->cp_hqd_pq_wptr_hi = 0;
3580
3581 /* set the pointer to the MQD */
3582 mqd->cp_mqd_base_addr_lo = ring->mqd_gpu_addr & 0xfffffffc;
3583 mqd->cp_mqd_base_addr_hi = upper_32_bits(ring->mqd_gpu_addr);
3584
3585 /* set MQD vmid to 0 */
3586 tmp = RREG32_SOC15(GC, 0, mmCP_MQD_CONTROL);
3587 tmp = REG_SET_FIELD(tmp, CP_MQD_CONTROL, VMID, 0);
3588 mqd->cp_mqd_control = tmp;
3589
3590 /* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */
3591 hqd_gpu_addr = ring->gpu_addr >> 8;
3592 mqd->cp_hqd_pq_base_lo = hqd_gpu_addr;
3593 mqd->cp_hqd_pq_base_hi = upper_32_bits(hqd_gpu_addr);
3594
3595 /* set up the HQD, this is similar to CP_RB0_CNTL */
3596 tmp = RREG32_SOC15(GC, 0, mmCP_HQD_PQ_CONTROL);
3597 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, QUEUE_SIZE,
3598 (order_base_2(ring->ring_size / 4) - 1));
3599 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, RPTR_BLOCK_SIZE,
3600 (order_base_2(AMDGPU_GPU_PAGE_SIZE / 4) - 1));
3601 #ifdef __BIG_ENDIAN
3602 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, ENDIAN_SWAP, 1);
3603 #endif
3604 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, UNORD_DISPATCH, 0);
3605 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, ROQ_PQ_IB_FLIP, 0);
3606 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, PRIV_STATE, 1);
3607 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, KMD_QUEUE, 1);
3608 mqd->cp_hqd_pq_control = tmp;
3609
3610 /* set the wb address whether it's enabled or not */
3611 wb_gpu_addr = ring->rptr_gpu_addr;
3612 mqd->cp_hqd_pq_rptr_report_addr_lo = wb_gpu_addr & 0xfffffffc;
3613 mqd->cp_hqd_pq_rptr_report_addr_hi =
3614 upper_32_bits(wb_gpu_addr) & 0xffff;
3615
3616 /* only used if CP_PQ_WPTR_POLL_CNTL.CP_PQ_WPTR_POLL_CNTL__EN_MASK=1 */
3617 wb_gpu_addr = ring->wptr_gpu_addr;
3618 mqd->cp_hqd_pq_wptr_poll_addr_lo = wb_gpu_addr & 0xfffffffc;
3619 mqd->cp_hqd_pq_wptr_poll_addr_hi = upper_32_bits(wb_gpu_addr) & 0xffff;
3620
3621 /* reset read and write pointers, similar to CP_RB0_WPTR/_RPTR */
3622 ring->wptr = 0;
3623 mqd->cp_hqd_pq_rptr = RREG32_SOC15(GC, 0, mmCP_HQD_PQ_RPTR);
3624
3625 /* set the vmid for the queue */
3626 mqd->cp_hqd_vmid = 0;
3627
3628 tmp = RREG32_SOC15(GC, 0, mmCP_HQD_PERSISTENT_STATE);
3629 tmp = REG_SET_FIELD(tmp, CP_HQD_PERSISTENT_STATE, PRELOAD_SIZE, 0x53);
3630 mqd->cp_hqd_persistent_state = tmp;
3631
3632 /* set MIN_IB_AVAIL_SIZE */
3633 tmp = RREG32_SOC15(GC, 0, mmCP_HQD_IB_CONTROL);
3634 tmp = REG_SET_FIELD(tmp, CP_HQD_IB_CONTROL, MIN_IB_AVAIL_SIZE, 3);
3635 mqd->cp_hqd_ib_control = tmp;
3636
3637 /* set static priority for a queue/ring */
3638 gfx_v9_0_mqd_set_priority(ring, mqd);
3639 mqd->cp_hqd_quantum = RREG32_SOC15(GC, 0, mmCP_HQD_QUANTUM);
3640
3641 /* map_queues packet doesn't need activate the queue,
3642 * so only kiq need set this field.
3643 */
3644 if (ring->funcs->type == AMDGPU_RING_TYPE_KIQ)
3645 mqd->cp_hqd_active = 1;
3646
3647 return 0;
3648 }
3649
gfx_v9_0_kiq_init_register(struct amdgpu_ring * ring)3650 static int gfx_v9_0_kiq_init_register(struct amdgpu_ring *ring)
3651 {
3652 struct amdgpu_device *adev = ring->adev;
3653 struct v9_mqd *mqd = ring->mqd_ptr;
3654 int j;
3655
3656 /* disable wptr polling */
3657 WREG32_FIELD15(GC, 0, CP_PQ_WPTR_POLL_CNTL, EN, 0);
3658
3659 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_EOP_BASE_ADDR,
3660 mqd->cp_hqd_eop_base_addr_lo);
3661 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_EOP_BASE_ADDR_HI,
3662 mqd->cp_hqd_eop_base_addr_hi);
3663
3664 /* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */
3665 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_EOP_CONTROL,
3666 mqd->cp_hqd_eop_control);
3667
3668 /* enable doorbell? */
3669 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL,
3670 mqd->cp_hqd_pq_doorbell_control);
3671
3672 /* disable the queue if it's active */
3673 if (RREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE) & 1) {
3674 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_DEQUEUE_REQUEST, 1);
3675 for (j = 0; j < adev->usec_timeout; j++) {
3676 if (!(RREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE) & 1))
3677 break;
3678 udelay(1);
3679 }
3680 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_DEQUEUE_REQUEST,
3681 mqd->cp_hqd_dequeue_request);
3682 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_RPTR,
3683 mqd->cp_hqd_pq_rptr);
3684 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_LO,
3685 mqd->cp_hqd_pq_wptr_lo);
3686 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_HI,
3687 mqd->cp_hqd_pq_wptr_hi);
3688 }
3689
3690 /* set the pointer to the MQD */
3691 WREG32_SOC15_RLC(GC, 0, mmCP_MQD_BASE_ADDR,
3692 mqd->cp_mqd_base_addr_lo);
3693 WREG32_SOC15_RLC(GC, 0, mmCP_MQD_BASE_ADDR_HI,
3694 mqd->cp_mqd_base_addr_hi);
3695
3696 /* set MQD vmid to 0 */
3697 WREG32_SOC15_RLC(GC, 0, mmCP_MQD_CONTROL,
3698 mqd->cp_mqd_control);
3699
3700 /* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */
3701 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_BASE,
3702 mqd->cp_hqd_pq_base_lo);
3703 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_BASE_HI,
3704 mqd->cp_hqd_pq_base_hi);
3705
3706 /* set up the HQD, this is similar to CP_RB0_CNTL */
3707 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_CONTROL,
3708 mqd->cp_hqd_pq_control);
3709
3710 /* set the wb address whether it's enabled or not */
3711 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_RPTR_REPORT_ADDR,
3712 mqd->cp_hqd_pq_rptr_report_addr_lo);
3713 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_RPTR_REPORT_ADDR_HI,
3714 mqd->cp_hqd_pq_rptr_report_addr_hi);
3715
3716 /* only used if CP_PQ_WPTR_POLL_CNTL.CP_PQ_WPTR_POLL_CNTL__EN_MASK=1 */
3717 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_POLL_ADDR,
3718 mqd->cp_hqd_pq_wptr_poll_addr_lo);
3719 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_POLL_ADDR_HI,
3720 mqd->cp_hqd_pq_wptr_poll_addr_hi);
3721
3722 /* enable the doorbell if requested */
3723 if (ring->use_doorbell) {
3724 WREG32_SOC15(GC, 0, mmCP_MEC_DOORBELL_RANGE_LOWER,
3725 (adev->doorbell_index.kiq * 2) << 2);
3726 /* If GC has entered CGPG, ringing doorbell > first page
3727 * doesn't wakeup GC. Enlarge CP_MEC_DOORBELL_RANGE_UPPER to
3728 * workaround this issue. And this change has to align with firmware
3729 * update.
3730 */
3731 if (check_if_enlarge_doorbell_range(adev))
3732 WREG32_SOC15(GC, 0, mmCP_MEC_DOORBELL_RANGE_UPPER,
3733 (adev->doorbell.size - 4));
3734 else
3735 WREG32_SOC15(GC, 0, mmCP_MEC_DOORBELL_RANGE_UPPER,
3736 (adev->doorbell_index.userqueue_end * 2) << 2);
3737 }
3738
3739 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL,
3740 mqd->cp_hqd_pq_doorbell_control);
3741
3742 /* reset read and write pointers, similar to CP_RB0_WPTR/_RPTR */
3743 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_LO,
3744 mqd->cp_hqd_pq_wptr_lo);
3745 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_HI,
3746 mqd->cp_hqd_pq_wptr_hi);
3747
3748 /* set the vmid for the queue */
3749 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_VMID, mqd->cp_hqd_vmid);
3750
3751 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PERSISTENT_STATE,
3752 mqd->cp_hqd_persistent_state);
3753
3754 /* activate the queue */
3755 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_ACTIVE,
3756 mqd->cp_hqd_active);
3757
3758 if (ring->use_doorbell)
3759 WREG32_FIELD15(GC, 0, CP_PQ_STATUS, DOORBELL_ENABLE, 1);
3760
3761 return 0;
3762 }
3763
gfx_v9_0_kiq_fini_register(struct amdgpu_ring * ring)3764 static int gfx_v9_0_kiq_fini_register(struct amdgpu_ring *ring)
3765 {
3766 struct amdgpu_device *adev = ring->adev;
3767 int j;
3768
3769 /* disable the queue if it's active */
3770 if (RREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE) & 1) {
3771
3772 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_DEQUEUE_REQUEST, 1);
3773
3774 for (j = 0; j < adev->usec_timeout; j++) {
3775 if (!(RREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE) & 1))
3776 break;
3777 udelay(1);
3778 }
3779
3780 if (j == AMDGPU_MAX_USEC_TIMEOUT) {
3781 DRM_DEBUG("KIQ dequeue request failed.\n");
3782
3783 /* Manual disable if dequeue request times out */
3784 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_ACTIVE, 0);
3785 }
3786
3787 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_DEQUEUE_REQUEST,
3788 0);
3789 }
3790
3791 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_IQ_TIMER, 0);
3792 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_IB_CONTROL, 0);
3793 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PERSISTENT_STATE, 0);
3794 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL, 0x40000000);
3795 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL, 0);
3796 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_RPTR, 0);
3797 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_HI, 0);
3798 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_LO, 0);
3799
3800 return 0;
3801 }
3802
gfx_v9_0_kiq_init_queue(struct amdgpu_ring * ring)3803 static int gfx_v9_0_kiq_init_queue(struct amdgpu_ring *ring)
3804 {
3805 struct amdgpu_device *adev = ring->adev;
3806 struct v9_mqd *mqd = ring->mqd_ptr;
3807 struct v9_mqd *tmp_mqd;
3808
3809 gfx_v9_0_kiq_setting(ring);
3810
3811 /* GPU could be in bad state during probe, driver trigger the reset
3812 * after load the SMU, in this case , the mqd is not be initialized.
3813 * driver need to re-init the mqd.
3814 * check mqd->cp_hqd_pq_control since this value should not be 0
3815 */
3816 tmp_mqd = (struct v9_mqd *)adev->gfx.kiq[0].mqd_backup;
3817 if (amdgpu_in_reset(adev) && tmp_mqd->cp_hqd_pq_control){
3818 /* for GPU_RESET case , reset MQD to a clean status */
3819 if (adev->gfx.kiq[0].mqd_backup)
3820 memcpy(mqd, adev->gfx.kiq[0].mqd_backup, sizeof(struct v9_mqd_allocation));
3821
3822 /* reset ring buffer */
3823 ring->wptr = 0;
3824 amdgpu_ring_clear_ring(ring);
3825
3826 mutex_lock(&adev->srbm_mutex);
3827 soc15_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0, 0);
3828 gfx_v9_0_kiq_init_register(ring);
3829 soc15_grbm_select(adev, 0, 0, 0, 0, 0);
3830 mutex_unlock(&adev->srbm_mutex);
3831 } else {
3832 memset((void *)mqd, 0, sizeof(struct v9_mqd_allocation));
3833 ((struct v9_mqd_allocation *)mqd)->dynamic_cu_mask = 0xFFFFFFFF;
3834 ((struct v9_mqd_allocation *)mqd)->dynamic_rb_mask = 0xFFFFFFFF;
3835 if (amdgpu_sriov_vf(adev) && adev->in_suspend)
3836 amdgpu_ring_clear_ring(ring);
3837 mutex_lock(&adev->srbm_mutex);
3838 soc15_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0, 0);
3839 gfx_v9_0_mqd_init(ring);
3840 gfx_v9_0_kiq_init_register(ring);
3841 soc15_grbm_select(adev, 0, 0, 0, 0, 0);
3842 mutex_unlock(&adev->srbm_mutex);
3843
3844 if (adev->gfx.kiq[0].mqd_backup)
3845 memcpy(adev->gfx.kiq[0].mqd_backup, mqd, sizeof(struct v9_mqd_allocation));
3846 }
3847
3848 return 0;
3849 }
3850
gfx_v9_0_kcq_init_queue(struct amdgpu_ring * ring,bool restore)3851 static int gfx_v9_0_kcq_init_queue(struct amdgpu_ring *ring, bool restore)
3852 {
3853 struct amdgpu_device *adev = ring->adev;
3854 struct v9_mqd *mqd = ring->mqd_ptr;
3855 int mqd_idx = ring - &adev->gfx.compute_ring[0];
3856 struct v9_mqd *tmp_mqd;
3857
3858 /* Same as above kiq init, driver need to re-init the mqd if mqd->cp_hqd_pq_control
3859 * is not be initialized before
3860 */
3861 tmp_mqd = (struct v9_mqd *)adev->gfx.mec.mqd_backup[mqd_idx];
3862
3863 if (!restore && (!tmp_mqd->cp_hqd_pq_control ||
3864 (!amdgpu_in_reset(adev) && !adev->in_suspend))) {
3865 memset((void *)mqd, 0, sizeof(struct v9_mqd_allocation));
3866 ((struct v9_mqd_allocation *)mqd)->dynamic_cu_mask = 0xFFFFFFFF;
3867 ((struct v9_mqd_allocation *)mqd)->dynamic_rb_mask = 0xFFFFFFFF;
3868 mutex_lock(&adev->srbm_mutex);
3869 soc15_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0, 0);
3870 gfx_v9_0_mqd_init(ring);
3871 soc15_grbm_select(adev, 0, 0, 0, 0, 0);
3872 mutex_unlock(&adev->srbm_mutex);
3873
3874 if (adev->gfx.mec.mqd_backup[mqd_idx])
3875 memcpy(adev->gfx.mec.mqd_backup[mqd_idx], mqd, sizeof(struct v9_mqd_allocation));
3876 } else {
3877 /* restore MQD to a clean status */
3878 if (adev->gfx.mec.mqd_backup[mqd_idx])
3879 memcpy(mqd, adev->gfx.mec.mqd_backup[mqd_idx], sizeof(struct v9_mqd_allocation));
3880 /* reset ring buffer */
3881 ring->wptr = 0;
3882 atomic64_set((atomic64_t *)ring->wptr_cpu_addr, 0);
3883 amdgpu_ring_clear_ring(ring);
3884 }
3885
3886 return 0;
3887 }
3888
gfx_v9_0_kiq_resume(struct amdgpu_device * adev)3889 static int gfx_v9_0_kiq_resume(struct amdgpu_device *adev)
3890 {
3891 struct amdgpu_ring *ring;
3892 int r;
3893
3894 ring = &adev->gfx.kiq[0].ring;
3895
3896 r = amdgpu_bo_reserve(ring->mqd_obj, false);
3897 if (unlikely(r != 0))
3898 return r;
3899
3900 r = amdgpu_bo_kmap(ring->mqd_obj, (void **)&ring->mqd_ptr);
3901 if (unlikely(r != 0)) {
3902 amdgpu_bo_unreserve(ring->mqd_obj);
3903 return r;
3904 }
3905
3906 gfx_v9_0_kiq_init_queue(ring);
3907 amdgpu_bo_kunmap(ring->mqd_obj);
3908 ring->mqd_ptr = NULL;
3909 amdgpu_bo_unreserve(ring->mqd_obj);
3910 return 0;
3911 }
3912
gfx_v9_0_kcq_resume(struct amdgpu_device * adev)3913 static int gfx_v9_0_kcq_resume(struct amdgpu_device *adev)
3914 {
3915 struct amdgpu_ring *ring = NULL;
3916 int r = 0, i;
3917
3918 gfx_v9_0_cp_compute_enable(adev, true);
3919
3920 for (i = 0; i < adev->gfx.num_compute_rings; i++) {
3921 ring = &adev->gfx.compute_ring[i];
3922
3923 r = amdgpu_bo_reserve(ring->mqd_obj, false);
3924 if (unlikely(r != 0))
3925 goto done;
3926 r = amdgpu_bo_kmap(ring->mqd_obj, (void **)&ring->mqd_ptr);
3927 if (!r) {
3928 r = gfx_v9_0_kcq_init_queue(ring, false);
3929 amdgpu_bo_kunmap(ring->mqd_obj);
3930 ring->mqd_ptr = NULL;
3931 }
3932 amdgpu_bo_unreserve(ring->mqd_obj);
3933 if (r)
3934 goto done;
3935 }
3936
3937 r = amdgpu_gfx_enable_kcq(adev, 0);
3938 done:
3939 return r;
3940 }
3941
gfx_v9_0_cp_resume(struct amdgpu_device * adev)3942 static int gfx_v9_0_cp_resume(struct amdgpu_device *adev)
3943 {
3944 int r, i;
3945 struct amdgpu_ring *ring;
3946
3947 if (!(adev->flags & AMD_IS_APU))
3948 gfx_v9_0_enable_gui_idle_interrupt(adev, false);
3949
3950 if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP) {
3951 if (adev->gfx.num_gfx_rings) {
3952 /* legacy firmware loading */
3953 r = gfx_v9_0_cp_gfx_load_microcode(adev);
3954 if (r)
3955 return r;
3956 }
3957
3958 r = gfx_v9_0_cp_compute_load_microcode(adev);
3959 if (r)
3960 return r;
3961 }
3962
3963 if (adev->gfx.num_gfx_rings)
3964 gfx_v9_0_cp_gfx_enable(adev, false);
3965 gfx_v9_0_cp_compute_enable(adev, false);
3966
3967 r = gfx_v9_0_kiq_resume(adev);
3968 if (r)
3969 return r;
3970
3971 if (adev->gfx.num_gfx_rings) {
3972 r = gfx_v9_0_cp_gfx_resume(adev);
3973 if (r)
3974 return r;
3975 }
3976
3977 r = gfx_v9_0_kcq_resume(adev);
3978 if (r)
3979 return r;
3980
3981 if (adev->gfx.num_gfx_rings) {
3982 ring = &adev->gfx.gfx_ring[0];
3983 r = amdgpu_ring_test_helper(ring);
3984 if (r)
3985 return r;
3986 }
3987
3988 for (i = 0; i < adev->gfx.num_compute_rings; i++) {
3989 ring = &adev->gfx.compute_ring[i];
3990 amdgpu_ring_test_helper(ring);
3991 }
3992
3993 gfx_v9_0_enable_gui_idle_interrupt(adev, true);
3994
3995 return 0;
3996 }
3997
gfx_v9_0_init_tcp_config(struct amdgpu_device * adev)3998 static void gfx_v9_0_init_tcp_config(struct amdgpu_device *adev)
3999 {
4000 u32 tmp;
4001
4002 if (amdgpu_ip_version(adev, GC_HWIP, 0) != IP_VERSION(9, 4, 1) &&
4003 amdgpu_ip_version(adev, GC_HWIP, 0) != IP_VERSION(9, 4, 2))
4004 return;
4005
4006 tmp = RREG32_SOC15(GC, 0, mmTCP_ADDR_CONFIG);
4007 tmp = REG_SET_FIELD(tmp, TCP_ADDR_CONFIG, ENABLE64KHASH,
4008 adev->df.hash_status.hash_64k);
4009 tmp = REG_SET_FIELD(tmp, TCP_ADDR_CONFIG, ENABLE2MHASH,
4010 adev->df.hash_status.hash_2m);
4011 tmp = REG_SET_FIELD(tmp, TCP_ADDR_CONFIG, ENABLE1GHASH,
4012 adev->df.hash_status.hash_1g);
4013 WREG32_SOC15(GC, 0, mmTCP_ADDR_CONFIG, tmp);
4014 }
4015
gfx_v9_0_cp_enable(struct amdgpu_device * adev,bool enable)4016 static void gfx_v9_0_cp_enable(struct amdgpu_device *adev, bool enable)
4017 {
4018 if (adev->gfx.num_gfx_rings)
4019 gfx_v9_0_cp_gfx_enable(adev, enable);
4020 gfx_v9_0_cp_compute_enable(adev, enable);
4021 }
4022
gfx_v9_0_hw_init(struct amdgpu_ip_block * ip_block)4023 static int gfx_v9_0_hw_init(struct amdgpu_ip_block *ip_block)
4024 {
4025 int r;
4026 struct amdgpu_device *adev = ip_block->adev;
4027
4028 amdgpu_gfx_cleaner_shader_init(adev, adev->gfx.cleaner_shader_size,
4029 adev->gfx.cleaner_shader_ptr);
4030
4031 if (!amdgpu_sriov_vf(adev))
4032 gfx_v9_0_init_golden_registers(adev);
4033
4034 gfx_v9_0_constants_init(adev);
4035
4036 gfx_v9_0_init_tcp_config(adev);
4037
4038 r = adev->gfx.rlc.funcs->resume(adev);
4039 if (r)
4040 return r;
4041
4042 r = gfx_v9_0_cp_resume(adev);
4043 if (r)
4044 return r;
4045
4046 if (amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 2))
4047 gfx_v9_4_2_set_power_brake_sequence(adev);
4048
4049 return r;
4050 }
4051
gfx_v9_0_hw_fini(struct amdgpu_ip_block * ip_block)4052 static int gfx_v9_0_hw_fini(struct amdgpu_ip_block *ip_block)
4053 {
4054 struct amdgpu_device *adev = ip_block->adev;
4055
4056 if (amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__GFX))
4057 amdgpu_irq_put(adev, &adev->gfx.cp_ecc_error_irq, 0);
4058 amdgpu_irq_put(adev, &adev->gfx.priv_reg_irq, 0);
4059 amdgpu_irq_put(adev, &adev->gfx.priv_inst_irq, 0);
4060 amdgpu_irq_put(adev, &adev->gfx.bad_op_irq, 0);
4061
4062 /* DF freeze and kcq disable will fail */
4063 if (!amdgpu_ras_intr_triggered())
4064 /* disable KCQ to avoid CPC touch memory not valid anymore */
4065 amdgpu_gfx_disable_kcq(adev, 0);
4066
4067 if (amdgpu_sriov_vf(adev)) {
4068 gfx_v9_0_cp_gfx_enable(adev, false);
4069 /* must disable polling for SRIOV when hw finished, otherwise
4070 * CPC engine may still keep fetching WB address which is already
4071 * invalid after sw finished and trigger DMAR reading error in
4072 * hypervisor side.
4073 */
4074 WREG32_FIELD15(GC, 0, CP_PQ_WPTR_POLL_CNTL, EN, 0);
4075 return 0;
4076 }
4077
4078 /* Use deinitialize sequence from CAIL when unbinding device from driver,
4079 * otherwise KIQ is hanging when binding back
4080 */
4081 if (!amdgpu_in_reset(adev) && !adev->in_suspend) {
4082 mutex_lock(&adev->srbm_mutex);
4083 soc15_grbm_select(adev, adev->gfx.kiq[0].ring.me,
4084 adev->gfx.kiq[0].ring.pipe,
4085 adev->gfx.kiq[0].ring.queue, 0, 0);
4086 gfx_v9_0_kiq_fini_register(&adev->gfx.kiq[0].ring);
4087 soc15_grbm_select(adev, 0, 0, 0, 0, 0);
4088 mutex_unlock(&adev->srbm_mutex);
4089 }
4090
4091 gfx_v9_0_cp_enable(adev, false);
4092
4093 /* Skip stopping RLC with A+A reset or when RLC controls GFX clock */
4094 if ((adev->gmc.xgmi.connected_to_cpu && amdgpu_in_reset(adev)) ||
4095 (amdgpu_ip_version(adev, GC_HWIP, 0) >= IP_VERSION(9, 4, 2))) {
4096 dev_dbg(adev->dev, "Skipping RLC halt\n");
4097 return 0;
4098 }
4099
4100 adev->gfx.rlc.funcs->stop(adev);
4101 return 0;
4102 }
4103
gfx_v9_0_suspend(struct amdgpu_ip_block * ip_block)4104 static int gfx_v9_0_suspend(struct amdgpu_ip_block *ip_block)
4105 {
4106 return gfx_v9_0_hw_fini(ip_block);
4107 }
4108
gfx_v9_0_resume(struct amdgpu_ip_block * ip_block)4109 static int gfx_v9_0_resume(struct amdgpu_ip_block *ip_block)
4110 {
4111 return gfx_v9_0_hw_init(ip_block);
4112 }
4113
gfx_v9_0_is_idle(void * handle)4114 static bool gfx_v9_0_is_idle(void *handle)
4115 {
4116 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4117
4118 if (REG_GET_FIELD(RREG32_SOC15(GC, 0, mmGRBM_STATUS),
4119 GRBM_STATUS, GUI_ACTIVE))
4120 return false;
4121 else
4122 return true;
4123 }
4124
gfx_v9_0_wait_for_idle(struct amdgpu_ip_block * ip_block)4125 static int gfx_v9_0_wait_for_idle(struct amdgpu_ip_block *ip_block)
4126 {
4127 unsigned i;
4128 struct amdgpu_device *adev = ip_block->adev;
4129
4130 for (i = 0; i < adev->usec_timeout; i++) {
4131 if (gfx_v9_0_is_idle(adev))
4132 return 0;
4133 udelay(1);
4134 }
4135 return -ETIMEDOUT;
4136 }
4137
gfx_v9_0_soft_reset(struct amdgpu_ip_block * ip_block)4138 static int gfx_v9_0_soft_reset(struct amdgpu_ip_block *ip_block)
4139 {
4140 u32 grbm_soft_reset = 0;
4141 u32 tmp;
4142 struct amdgpu_device *adev = ip_block->adev;
4143
4144 /* GRBM_STATUS */
4145 tmp = RREG32_SOC15(GC, 0, mmGRBM_STATUS);
4146 if (tmp & (GRBM_STATUS__PA_BUSY_MASK | GRBM_STATUS__SC_BUSY_MASK |
4147 GRBM_STATUS__BCI_BUSY_MASK | GRBM_STATUS__SX_BUSY_MASK |
4148 GRBM_STATUS__TA_BUSY_MASK | GRBM_STATUS__VGT_BUSY_MASK |
4149 GRBM_STATUS__DB_BUSY_MASK | GRBM_STATUS__CB_BUSY_MASK |
4150 GRBM_STATUS__GDS_BUSY_MASK | GRBM_STATUS__SPI_BUSY_MASK |
4151 GRBM_STATUS__IA_BUSY_MASK | GRBM_STATUS__IA_BUSY_NO_DMA_MASK)) {
4152 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
4153 GRBM_SOFT_RESET, SOFT_RESET_CP, 1);
4154 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
4155 GRBM_SOFT_RESET, SOFT_RESET_GFX, 1);
4156 }
4157
4158 if (tmp & (GRBM_STATUS__CP_BUSY_MASK | GRBM_STATUS__CP_COHERENCY_BUSY_MASK)) {
4159 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
4160 GRBM_SOFT_RESET, SOFT_RESET_CP, 1);
4161 }
4162
4163 /* GRBM_STATUS2 */
4164 tmp = RREG32_SOC15(GC, 0, mmGRBM_STATUS2);
4165 if (REG_GET_FIELD(tmp, GRBM_STATUS2, RLC_BUSY))
4166 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
4167 GRBM_SOFT_RESET, SOFT_RESET_RLC, 1);
4168
4169
4170 if (grbm_soft_reset) {
4171 /* stop the rlc */
4172 adev->gfx.rlc.funcs->stop(adev);
4173
4174 if (adev->gfx.num_gfx_rings)
4175 /* Disable GFX parsing/prefetching */
4176 gfx_v9_0_cp_gfx_enable(adev, false);
4177
4178 /* Disable MEC parsing/prefetching */
4179 gfx_v9_0_cp_compute_enable(adev, false);
4180
4181 if (grbm_soft_reset) {
4182 tmp = RREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET);
4183 tmp |= grbm_soft_reset;
4184 dev_info(adev->dev, "GRBM_SOFT_RESET=0x%08X\n", tmp);
4185 WREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET, tmp);
4186 tmp = RREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET);
4187
4188 udelay(50);
4189
4190 tmp &= ~grbm_soft_reset;
4191 WREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET, tmp);
4192 tmp = RREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET);
4193 }
4194
4195 /* Wait a little for things to settle down */
4196 udelay(50);
4197 }
4198 return 0;
4199 }
4200
gfx_v9_0_kiq_read_clock(struct amdgpu_device * adev)4201 static uint64_t gfx_v9_0_kiq_read_clock(struct amdgpu_device *adev)
4202 {
4203 signed long r, cnt = 0;
4204 unsigned long flags;
4205 uint32_t seq, reg_val_offs = 0;
4206 uint64_t value = 0;
4207 struct amdgpu_kiq *kiq = &adev->gfx.kiq[0];
4208 struct amdgpu_ring *ring = &kiq->ring;
4209
4210 BUG_ON(!ring->funcs->emit_rreg);
4211
4212 spin_lock_irqsave(&kiq->ring_lock, flags);
4213 if (amdgpu_device_wb_get(adev, ®_val_offs)) {
4214 pr_err("critical bug! too many kiq readers\n");
4215 goto failed_unlock;
4216 }
4217 amdgpu_ring_alloc(ring, 32);
4218 amdgpu_ring_write(ring, PACKET3(PACKET3_COPY_DATA, 4));
4219 amdgpu_ring_write(ring, 9 | /* src: register*/
4220 (5 << 8) | /* dst: memory */
4221 (1 << 16) | /* count sel */
4222 (1 << 20)); /* write confirm */
4223 amdgpu_ring_write(ring, 0);
4224 amdgpu_ring_write(ring, 0);
4225 amdgpu_ring_write(ring, lower_32_bits(adev->wb.gpu_addr +
4226 reg_val_offs * 4));
4227 amdgpu_ring_write(ring, upper_32_bits(adev->wb.gpu_addr +
4228 reg_val_offs * 4));
4229 r = amdgpu_fence_emit_polling(ring, &seq, MAX_KIQ_REG_WAIT);
4230 if (r)
4231 goto failed_undo;
4232
4233 amdgpu_ring_commit(ring);
4234 spin_unlock_irqrestore(&kiq->ring_lock, flags);
4235
4236 r = amdgpu_fence_wait_polling(ring, seq, MAX_KIQ_REG_WAIT);
4237
4238 /* don't wait anymore for gpu reset case because this way may
4239 * block gpu_recover() routine forever, e.g. this virt_kiq_rreg
4240 * is triggered in TTM and ttm_bo_lock_delayed_workqueue() will
4241 * never return if we keep waiting in virt_kiq_rreg, which cause
4242 * gpu_recover() hang there.
4243 *
4244 * also don't wait anymore for IRQ context
4245 * */
4246 if (r < 1 && (amdgpu_in_reset(adev)))
4247 goto failed_kiq_read;
4248
4249 might_sleep();
4250 while (r < 1 && cnt++ < MAX_KIQ_REG_TRY) {
4251 msleep(MAX_KIQ_REG_BAILOUT_INTERVAL);
4252 r = amdgpu_fence_wait_polling(ring, seq, MAX_KIQ_REG_WAIT);
4253 }
4254
4255 if (cnt > MAX_KIQ_REG_TRY)
4256 goto failed_kiq_read;
4257
4258 mb();
4259 value = (uint64_t)adev->wb.wb[reg_val_offs] |
4260 (uint64_t)adev->wb.wb[reg_val_offs + 1 ] << 32ULL;
4261 amdgpu_device_wb_free(adev, reg_val_offs);
4262 return value;
4263
4264 failed_undo:
4265 amdgpu_ring_undo(ring);
4266 failed_unlock:
4267 spin_unlock_irqrestore(&kiq->ring_lock, flags);
4268 failed_kiq_read:
4269 if (reg_val_offs)
4270 amdgpu_device_wb_free(adev, reg_val_offs);
4271 pr_err("failed to read gpu clock\n");
4272 return ~0;
4273 }
4274
gfx_v9_0_get_gpu_clock_counter(struct amdgpu_device * adev)4275 static uint64_t gfx_v9_0_get_gpu_clock_counter(struct amdgpu_device *adev)
4276 {
4277 uint64_t clock, clock_lo, clock_hi, hi_check;
4278
4279 switch (amdgpu_ip_version(adev, GC_HWIP, 0)) {
4280 case IP_VERSION(9, 3, 0):
4281 preempt_disable();
4282 clock_hi = RREG32_SOC15_NO_KIQ(SMUIO, 0, mmGOLDEN_TSC_COUNT_UPPER_Renoir);
4283 clock_lo = RREG32_SOC15_NO_KIQ(SMUIO, 0, mmGOLDEN_TSC_COUNT_LOWER_Renoir);
4284 hi_check = RREG32_SOC15_NO_KIQ(SMUIO, 0, mmGOLDEN_TSC_COUNT_UPPER_Renoir);
4285 /* The SMUIO TSC clock frequency is 100MHz, which sets 32-bit carry over
4286 * roughly every 42 seconds.
4287 */
4288 if (hi_check != clock_hi) {
4289 clock_lo = RREG32_SOC15_NO_KIQ(SMUIO, 0, mmGOLDEN_TSC_COUNT_LOWER_Renoir);
4290 clock_hi = hi_check;
4291 }
4292 preempt_enable();
4293 clock = clock_lo | (clock_hi << 32ULL);
4294 break;
4295 default:
4296 amdgpu_gfx_off_ctrl(adev, false);
4297 mutex_lock(&adev->gfx.gpu_clock_mutex);
4298 if (amdgpu_ip_version(adev, GC_HWIP, 0) ==
4299 IP_VERSION(9, 0, 1) &&
4300 amdgpu_sriov_runtime(adev)) {
4301 clock = gfx_v9_0_kiq_read_clock(adev);
4302 } else {
4303 WREG32_SOC15(GC, 0, mmRLC_CAPTURE_GPU_CLOCK_COUNT, 1);
4304 clock = (uint64_t)RREG32_SOC15(GC, 0, mmRLC_GPU_CLOCK_COUNT_LSB) |
4305 ((uint64_t)RREG32_SOC15(GC, 0, mmRLC_GPU_CLOCK_COUNT_MSB) << 32ULL);
4306 }
4307 mutex_unlock(&adev->gfx.gpu_clock_mutex);
4308 amdgpu_gfx_off_ctrl(adev, true);
4309 break;
4310 }
4311 return clock;
4312 }
4313
gfx_v9_0_ring_emit_gds_switch(struct amdgpu_ring * ring,uint32_t vmid,uint32_t gds_base,uint32_t gds_size,uint32_t gws_base,uint32_t gws_size,uint32_t oa_base,uint32_t oa_size)4314 static void gfx_v9_0_ring_emit_gds_switch(struct amdgpu_ring *ring,
4315 uint32_t vmid,
4316 uint32_t gds_base, uint32_t gds_size,
4317 uint32_t gws_base, uint32_t gws_size,
4318 uint32_t oa_base, uint32_t oa_size)
4319 {
4320 struct amdgpu_device *adev = ring->adev;
4321
4322 /* GDS Base */
4323 gfx_v9_0_write_data_to_reg(ring, 0, false,
4324 SOC15_REG_OFFSET(GC, 0, mmGDS_VMID0_BASE) + 2 * vmid,
4325 gds_base);
4326
4327 /* GDS Size */
4328 gfx_v9_0_write_data_to_reg(ring, 0, false,
4329 SOC15_REG_OFFSET(GC, 0, mmGDS_VMID0_SIZE) + 2 * vmid,
4330 gds_size);
4331
4332 /* GWS */
4333 gfx_v9_0_write_data_to_reg(ring, 0, false,
4334 SOC15_REG_OFFSET(GC, 0, mmGDS_GWS_VMID0) + vmid,
4335 gws_size << GDS_GWS_VMID0__SIZE__SHIFT | gws_base);
4336
4337 /* OA */
4338 gfx_v9_0_write_data_to_reg(ring, 0, false,
4339 SOC15_REG_OFFSET(GC, 0, mmGDS_OA_VMID0) + vmid,
4340 (1 << (oa_size + oa_base)) - (1 << oa_base));
4341 }
4342
4343 static const u32 vgpr_init_compute_shader[] =
4344 {
4345 0xb07c0000, 0xbe8000ff,
4346 0x000000f8, 0xbf110800,
4347 0x7e000280, 0x7e020280,
4348 0x7e040280, 0x7e060280,
4349 0x7e080280, 0x7e0a0280,
4350 0x7e0c0280, 0x7e0e0280,
4351 0x80808800, 0xbe803200,
4352 0xbf84fff5, 0xbf9c0000,
4353 0xd28c0001, 0x0001007f,
4354 0xd28d0001, 0x0002027e,
4355 0x10020288, 0xb8810904,
4356 0xb7814000, 0xd1196a01,
4357 0x00000301, 0xbe800087,
4358 0xbefc00c1, 0xd89c4000,
4359 0x00020201, 0xd89cc080,
4360 0x00040401, 0x320202ff,
4361 0x00000800, 0x80808100,
4362 0xbf84fff8, 0x7e020280,
4363 0xbf810000, 0x00000000,
4364 };
4365
4366 static const u32 sgpr_init_compute_shader[] =
4367 {
4368 0xb07c0000, 0xbe8000ff,
4369 0x0000005f, 0xbee50080,
4370 0xbe812c65, 0xbe822c65,
4371 0xbe832c65, 0xbe842c65,
4372 0xbe852c65, 0xb77c0005,
4373 0x80808500, 0xbf84fff8,
4374 0xbe800080, 0xbf810000,
4375 };
4376
4377 static const u32 vgpr_init_compute_shader_arcturus[] = {
4378 0xd3d94000, 0x18000080, 0xd3d94001, 0x18000080, 0xd3d94002, 0x18000080,
4379 0xd3d94003, 0x18000080, 0xd3d94004, 0x18000080, 0xd3d94005, 0x18000080,
4380 0xd3d94006, 0x18000080, 0xd3d94007, 0x18000080, 0xd3d94008, 0x18000080,
4381 0xd3d94009, 0x18000080, 0xd3d9400a, 0x18000080, 0xd3d9400b, 0x18000080,
4382 0xd3d9400c, 0x18000080, 0xd3d9400d, 0x18000080, 0xd3d9400e, 0x18000080,
4383 0xd3d9400f, 0x18000080, 0xd3d94010, 0x18000080, 0xd3d94011, 0x18000080,
4384 0xd3d94012, 0x18000080, 0xd3d94013, 0x18000080, 0xd3d94014, 0x18000080,
4385 0xd3d94015, 0x18000080, 0xd3d94016, 0x18000080, 0xd3d94017, 0x18000080,
4386 0xd3d94018, 0x18000080, 0xd3d94019, 0x18000080, 0xd3d9401a, 0x18000080,
4387 0xd3d9401b, 0x18000080, 0xd3d9401c, 0x18000080, 0xd3d9401d, 0x18000080,
4388 0xd3d9401e, 0x18000080, 0xd3d9401f, 0x18000080, 0xd3d94020, 0x18000080,
4389 0xd3d94021, 0x18000080, 0xd3d94022, 0x18000080, 0xd3d94023, 0x18000080,
4390 0xd3d94024, 0x18000080, 0xd3d94025, 0x18000080, 0xd3d94026, 0x18000080,
4391 0xd3d94027, 0x18000080, 0xd3d94028, 0x18000080, 0xd3d94029, 0x18000080,
4392 0xd3d9402a, 0x18000080, 0xd3d9402b, 0x18000080, 0xd3d9402c, 0x18000080,
4393 0xd3d9402d, 0x18000080, 0xd3d9402e, 0x18000080, 0xd3d9402f, 0x18000080,
4394 0xd3d94030, 0x18000080, 0xd3d94031, 0x18000080, 0xd3d94032, 0x18000080,
4395 0xd3d94033, 0x18000080, 0xd3d94034, 0x18000080, 0xd3d94035, 0x18000080,
4396 0xd3d94036, 0x18000080, 0xd3d94037, 0x18000080, 0xd3d94038, 0x18000080,
4397 0xd3d94039, 0x18000080, 0xd3d9403a, 0x18000080, 0xd3d9403b, 0x18000080,
4398 0xd3d9403c, 0x18000080, 0xd3d9403d, 0x18000080, 0xd3d9403e, 0x18000080,
4399 0xd3d9403f, 0x18000080, 0xd3d94040, 0x18000080, 0xd3d94041, 0x18000080,
4400 0xd3d94042, 0x18000080, 0xd3d94043, 0x18000080, 0xd3d94044, 0x18000080,
4401 0xd3d94045, 0x18000080, 0xd3d94046, 0x18000080, 0xd3d94047, 0x18000080,
4402 0xd3d94048, 0x18000080, 0xd3d94049, 0x18000080, 0xd3d9404a, 0x18000080,
4403 0xd3d9404b, 0x18000080, 0xd3d9404c, 0x18000080, 0xd3d9404d, 0x18000080,
4404 0xd3d9404e, 0x18000080, 0xd3d9404f, 0x18000080, 0xd3d94050, 0x18000080,
4405 0xd3d94051, 0x18000080, 0xd3d94052, 0x18000080, 0xd3d94053, 0x18000080,
4406 0xd3d94054, 0x18000080, 0xd3d94055, 0x18000080, 0xd3d94056, 0x18000080,
4407 0xd3d94057, 0x18000080, 0xd3d94058, 0x18000080, 0xd3d94059, 0x18000080,
4408 0xd3d9405a, 0x18000080, 0xd3d9405b, 0x18000080, 0xd3d9405c, 0x18000080,
4409 0xd3d9405d, 0x18000080, 0xd3d9405e, 0x18000080, 0xd3d9405f, 0x18000080,
4410 0xd3d94060, 0x18000080, 0xd3d94061, 0x18000080, 0xd3d94062, 0x18000080,
4411 0xd3d94063, 0x18000080, 0xd3d94064, 0x18000080, 0xd3d94065, 0x18000080,
4412 0xd3d94066, 0x18000080, 0xd3d94067, 0x18000080, 0xd3d94068, 0x18000080,
4413 0xd3d94069, 0x18000080, 0xd3d9406a, 0x18000080, 0xd3d9406b, 0x18000080,
4414 0xd3d9406c, 0x18000080, 0xd3d9406d, 0x18000080, 0xd3d9406e, 0x18000080,
4415 0xd3d9406f, 0x18000080, 0xd3d94070, 0x18000080, 0xd3d94071, 0x18000080,
4416 0xd3d94072, 0x18000080, 0xd3d94073, 0x18000080, 0xd3d94074, 0x18000080,
4417 0xd3d94075, 0x18000080, 0xd3d94076, 0x18000080, 0xd3d94077, 0x18000080,
4418 0xd3d94078, 0x18000080, 0xd3d94079, 0x18000080, 0xd3d9407a, 0x18000080,
4419 0xd3d9407b, 0x18000080, 0xd3d9407c, 0x18000080, 0xd3d9407d, 0x18000080,
4420 0xd3d9407e, 0x18000080, 0xd3d9407f, 0x18000080, 0xd3d94080, 0x18000080,
4421 0xd3d94081, 0x18000080, 0xd3d94082, 0x18000080, 0xd3d94083, 0x18000080,
4422 0xd3d94084, 0x18000080, 0xd3d94085, 0x18000080, 0xd3d94086, 0x18000080,
4423 0xd3d94087, 0x18000080, 0xd3d94088, 0x18000080, 0xd3d94089, 0x18000080,
4424 0xd3d9408a, 0x18000080, 0xd3d9408b, 0x18000080, 0xd3d9408c, 0x18000080,
4425 0xd3d9408d, 0x18000080, 0xd3d9408e, 0x18000080, 0xd3d9408f, 0x18000080,
4426 0xd3d94090, 0x18000080, 0xd3d94091, 0x18000080, 0xd3d94092, 0x18000080,
4427 0xd3d94093, 0x18000080, 0xd3d94094, 0x18000080, 0xd3d94095, 0x18000080,
4428 0xd3d94096, 0x18000080, 0xd3d94097, 0x18000080, 0xd3d94098, 0x18000080,
4429 0xd3d94099, 0x18000080, 0xd3d9409a, 0x18000080, 0xd3d9409b, 0x18000080,
4430 0xd3d9409c, 0x18000080, 0xd3d9409d, 0x18000080, 0xd3d9409e, 0x18000080,
4431 0xd3d9409f, 0x18000080, 0xd3d940a0, 0x18000080, 0xd3d940a1, 0x18000080,
4432 0xd3d940a2, 0x18000080, 0xd3d940a3, 0x18000080, 0xd3d940a4, 0x18000080,
4433 0xd3d940a5, 0x18000080, 0xd3d940a6, 0x18000080, 0xd3d940a7, 0x18000080,
4434 0xd3d940a8, 0x18000080, 0xd3d940a9, 0x18000080, 0xd3d940aa, 0x18000080,
4435 0xd3d940ab, 0x18000080, 0xd3d940ac, 0x18000080, 0xd3d940ad, 0x18000080,
4436 0xd3d940ae, 0x18000080, 0xd3d940af, 0x18000080, 0xd3d940b0, 0x18000080,
4437 0xd3d940b1, 0x18000080, 0xd3d940b2, 0x18000080, 0xd3d940b3, 0x18000080,
4438 0xd3d940b4, 0x18000080, 0xd3d940b5, 0x18000080, 0xd3d940b6, 0x18000080,
4439 0xd3d940b7, 0x18000080, 0xd3d940b8, 0x18000080, 0xd3d940b9, 0x18000080,
4440 0xd3d940ba, 0x18000080, 0xd3d940bb, 0x18000080, 0xd3d940bc, 0x18000080,
4441 0xd3d940bd, 0x18000080, 0xd3d940be, 0x18000080, 0xd3d940bf, 0x18000080,
4442 0xd3d940c0, 0x18000080, 0xd3d940c1, 0x18000080, 0xd3d940c2, 0x18000080,
4443 0xd3d940c3, 0x18000080, 0xd3d940c4, 0x18000080, 0xd3d940c5, 0x18000080,
4444 0xd3d940c6, 0x18000080, 0xd3d940c7, 0x18000080, 0xd3d940c8, 0x18000080,
4445 0xd3d940c9, 0x18000080, 0xd3d940ca, 0x18000080, 0xd3d940cb, 0x18000080,
4446 0xd3d940cc, 0x18000080, 0xd3d940cd, 0x18000080, 0xd3d940ce, 0x18000080,
4447 0xd3d940cf, 0x18000080, 0xd3d940d0, 0x18000080, 0xd3d940d1, 0x18000080,
4448 0xd3d940d2, 0x18000080, 0xd3d940d3, 0x18000080, 0xd3d940d4, 0x18000080,
4449 0xd3d940d5, 0x18000080, 0xd3d940d6, 0x18000080, 0xd3d940d7, 0x18000080,
4450 0xd3d940d8, 0x18000080, 0xd3d940d9, 0x18000080, 0xd3d940da, 0x18000080,
4451 0xd3d940db, 0x18000080, 0xd3d940dc, 0x18000080, 0xd3d940dd, 0x18000080,
4452 0xd3d940de, 0x18000080, 0xd3d940df, 0x18000080, 0xd3d940e0, 0x18000080,
4453 0xd3d940e1, 0x18000080, 0xd3d940e2, 0x18000080, 0xd3d940e3, 0x18000080,
4454 0xd3d940e4, 0x18000080, 0xd3d940e5, 0x18000080, 0xd3d940e6, 0x18000080,
4455 0xd3d940e7, 0x18000080, 0xd3d940e8, 0x18000080, 0xd3d940e9, 0x18000080,
4456 0xd3d940ea, 0x18000080, 0xd3d940eb, 0x18000080, 0xd3d940ec, 0x18000080,
4457 0xd3d940ed, 0x18000080, 0xd3d940ee, 0x18000080, 0xd3d940ef, 0x18000080,
4458 0xd3d940f0, 0x18000080, 0xd3d940f1, 0x18000080, 0xd3d940f2, 0x18000080,
4459 0xd3d940f3, 0x18000080, 0xd3d940f4, 0x18000080, 0xd3d940f5, 0x18000080,
4460 0xd3d940f6, 0x18000080, 0xd3d940f7, 0x18000080, 0xd3d940f8, 0x18000080,
4461 0xd3d940f9, 0x18000080, 0xd3d940fa, 0x18000080, 0xd3d940fb, 0x18000080,
4462 0xd3d940fc, 0x18000080, 0xd3d940fd, 0x18000080, 0xd3d940fe, 0x18000080,
4463 0xd3d940ff, 0x18000080, 0xb07c0000, 0xbe8a00ff, 0x000000f8, 0xbf11080a,
4464 0x7e000280, 0x7e020280, 0x7e040280, 0x7e060280, 0x7e080280, 0x7e0a0280,
4465 0x7e0c0280, 0x7e0e0280, 0x808a880a, 0xbe80320a, 0xbf84fff5, 0xbf9c0000,
4466 0xd28c0001, 0x0001007f, 0xd28d0001, 0x0002027e, 0x10020288, 0xb88b0904,
4467 0xb78b4000, 0xd1196a01, 0x00001701, 0xbe8a0087, 0xbefc00c1, 0xd89c4000,
4468 0x00020201, 0xd89cc080, 0x00040401, 0x320202ff, 0x00000800, 0x808a810a,
4469 0xbf84fff8, 0xbf810000,
4470 };
4471
4472 /* When below register arrays changed, please update gpr_reg_size,
4473 and sec_ded_counter_reg_size in function gfx_v9_0_do_edc_gpr_workarounds,
4474 to cover all gfx9 ASICs */
4475 static const struct soc15_reg_entry vgpr_init_regs[] = {
4476 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_RESOURCE_LIMITS), 0x0000000 },
4477 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_X), 0x40 },
4478 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Y), 4 },
4479 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Z), 1 },
4480 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC1), 0x3f },
4481 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC2), 0x400000 }, /* 64KB LDS */
4482 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE0), 0xffffffff },
4483 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE1), 0xffffffff },
4484 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE2), 0xffffffff },
4485 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE3), 0xffffffff },
4486 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE4), 0xffffffff },
4487 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE5), 0xffffffff },
4488 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE6), 0xffffffff },
4489 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE7), 0xffffffff },
4490 };
4491
4492 static const struct soc15_reg_entry vgpr_init_regs_arcturus[] = {
4493 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_RESOURCE_LIMITS), 0x0000000 },
4494 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_X), 0x40 },
4495 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Y), 4 },
4496 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Z), 1 },
4497 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC1), 0xbf },
4498 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC2), 0x400000 }, /* 64KB LDS */
4499 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE0), 0xffffffff },
4500 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE1), 0xffffffff },
4501 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE2), 0xffffffff },
4502 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE3), 0xffffffff },
4503 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE4), 0xffffffff },
4504 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE5), 0xffffffff },
4505 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE6), 0xffffffff },
4506 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE7), 0xffffffff },
4507 };
4508
4509 static const struct soc15_reg_entry sgpr1_init_regs[] = {
4510 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_RESOURCE_LIMITS), 0x0000000 },
4511 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_X), 0x40 },
4512 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Y), 8 },
4513 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Z), 1 },
4514 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC1), 0x240 }, /* (80 GPRS) */
4515 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC2), 0x0 },
4516 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE0), 0x000000ff },
4517 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE1), 0x000000ff },
4518 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE2), 0x000000ff },
4519 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE3), 0x000000ff },
4520 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE4), 0x000000ff },
4521 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE5), 0x000000ff },
4522 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE6), 0x000000ff },
4523 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE7), 0x000000ff },
4524 };
4525
4526 static const struct soc15_reg_entry sgpr2_init_regs[] = {
4527 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_RESOURCE_LIMITS), 0x0000000 },
4528 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_X), 0x40 },
4529 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Y), 8 },
4530 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Z), 1 },
4531 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC1), 0x240 }, /* (80 GPRS) */
4532 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC2), 0x0 },
4533 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE0), 0x0000ff00 },
4534 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE1), 0x0000ff00 },
4535 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE2), 0x0000ff00 },
4536 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE3), 0x0000ff00 },
4537 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE4), 0x0000ff00 },
4538 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE5), 0x0000ff00 },
4539 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE6), 0x0000ff00 },
4540 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE7), 0x0000ff00 },
4541 };
4542
4543 static const struct soc15_reg_entry gfx_v9_0_edc_counter_regs[] = {
4544 { SOC15_REG_ENTRY(GC, 0, mmCPC_EDC_SCRATCH_CNT), 0, 1, 1},
4545 { SOC15_REG_ENTRY(GC, 0, mmCPC_EDC_UCODE_CNT), 0, 1, 1},
4546 { SOC15_REG_ENTRY(GC, 0, mmCPF_EDC_ROQ_CNT), 0, 1, 1},
4547 { SOC15_REG_ENTRY(GC, 0, mmCPF_EDC_TAG_CNT), 0, 1, 1},
4548 { SOC15_REG_ENTRY(GC, 0, mmCPG_EDC_DMA_CNT), 0, 1, 1},
4549 { SOC15_REG_ENTRY(GC, 0, mmCPG_EDC_TAG_CNT), 0, 1, 1},
4550 { SOC15_REG_ENTRY(GC, 0, mmDC_EDC_CSINVOC_CNT), 0, 1, 1},
4551 { SOC15_REG_ENTRY(GC, 0, mmDC_EDC_RESTORE_CNT), 0, 1, 1},
4552 { SOC15_REG_ENTRY(GC, 0, mmDC_EDC_STATE_CNT), 0, 1, 1},
4553 { SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_CNT), 0, 1, 1},
4554 { SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_GRBM_CNT), 0, 1, 1},
4555 { SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_DED), 0, 1, 1},
4556 { SOC15_REG_ENTRY(GC, 0, mmSPI_EDC_CNT), 0, 4, 1},
4557 { SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT), 0, 4, 6},
4558 { SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_DED_CNT), 0, 4, 16},
4559 { SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_INFO), 0, 4, 16},
4560 { SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_SEC_CNT), 0, 4, 16},
4561 { SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT), 0, 1, 16},
4562 { SOC15_REG_ENTRY(GC, 0, mmTCP_ATC_EDC_GATCL1_CNT), 0, 4, 16},
4563 { SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT), 0, 4, 16},
4564 { SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW), 0, 4, 16},
4565 { SOC15_REG_ENTRY(GC, 0, mmTD_EDC_CNT), 0, 4, 16},
4566 { SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2), 0, 4, 6},
4567 { SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT), 0, 4, 16},
4568 { SOC15_REG_ENTRY(GC, 0, mmTA_EDC_CNT), 0, 4, 16},
4569 { SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PHY_CNT), 0, 1, 1},
4570 { SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PIPE_CNT), 0, 1, 1},
4571 { SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT), 0, 1, 32},
4572 { SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2), 0, 1, 32},
4573 { SOC15_REG_ENTRY(GC, 0, mmTCI_EDC_CNT), 0, 1, 72},
4574 { SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2), 0, 1, 16},
4575 { SOC15_REG_ENTRY(GC, 0, mmTCA_EDC_CNT), 0, 1, 2},
4576 { SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3), 0, 4, 6},
4577 };
4578
gfx_v9_0_do_edc_gds_workarounds(struct amdgpu_device * adev)4579 static int gfx_v9_0_do_edc_gds_workarounds(struct amdgpu_device *adev)
4580 {
4581 struct amdgpu_ring *ring = &adev->gfx.compute_ring[0];
4582 int i, r;
4583
4584 /* only support when RAS is enabled */
4585 if (!amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__GFX))
4586 return 0;
4587
4588 r = amdgpu_ring_alloc(ring, 7);
4589 if (r) {
4590 DRM_ERROR("amdgpu: GDS workarounds failed to lock ring %s (%d).\n",
4591 ring->name, r);
4592 return r;
4593 }
4594
4595 WREG32_SOC15(GC, 0, mmGDS_VMID0_BASE, 0x00000000);
4596 WREG32_SOC15(GC, 0, mmGDS_VMID0_SIZE, adev->gds.gds_size);
4597
4598 amdgpu_ring_write(ring, PACKET3(PACKET3_DMA_DATA, 5));
4599 amdgpu_ring_write(ring, (PACKET3_DMA_DATA_CP_SYNC |
4600 PACKET3_DMA_DATA_DST_SEL(1) |
4601 PACKET3_DMA_DATA_SRC_SEL(2) |
4602 PACKET3_DMA_DATA_ENGINE(0)));
4603 amdgpu_ring_write(ring, 0);
4604 amdgpu_ring_write(ring, 0);
4605 amdgpu_ring_write(ring, 0);
4606 amdgpu_ring_write(ring, 0);
4607 amdgpu_ring_write(ring, PACKET3_DMA_DATA_CMD_RAW_WAIT |
4608 adev->gds.gds_size);
4609
4610 amdgpu_ring_commit(ring);
4611
4612 for (i = 0; i < adev->usec_timeout; i++) {
4613 if (ring->wptr == gfx_v9_0_ring_get_rptr_compute(ring))
4614 break;
4615 udelay(1);
4616 }
4617
4618 if (i >= adev->usec_timeout)
4619 r = -ETIMEDOUT;
4620
4621 WREG32_SOC15(GC, 0, mmGDS_VMID0_SIZE, 0x00000000);
4622
4623 return r;
4624 }
4625
gfx_v9_0_do_edc_gpr_workarounds(struct amdgpu_device * adev)4626 static int gfx_v9_0_do_edc_gpr_workarounds(struct amdgpu_device *adev)
4627 {
4628 struct amdgpu_ring *ring = &adev->gfx.compute_ring[0];
4629 struct amdgpu_ib ib;
4630 struct dma_fence *f = NULL;
4631 int r, i;
4632 unsigned total_size, vgpr_offset, sgpr_offset;
4633 u64 gpu_addr;
4634
4635 int compute_dim_x = adev->gfx.config.max_shader_engines *
4636 adev->gfx.config.max_cu_per_sh *
4637 adev->gfx.config.max_sh_per_se;
4638 int sgpr_work_group_size = 5;
4639 int gpr_reg_size = adev->gfx.config.max_shader_engines + 6;
4640 int vgpr_init_shader_size;
4641 const u32 *vgpr_init_shader_ptr;
4642 const struct soc15_reg_entry *vgpr_init_regs_ptr;
4643
4644 /* only support when RAS is enabled */
4645 if (!amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__GFX))
4646 return 0;
4647
4648 /* bail if the compute ring is not ready */
4649 if (!ring->sched.ready)
4650 return 0;
4651
4652 if (amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 1)) {
4653 vgpr_init_shader_ptr = vgpr_init_compute_shader_arcturus;
4654 vgpr_init_shader_size = sizeof(vgpr_init_compute_shader_arcturus);
4655 vgpr_init_regs_ptr = vgpr_init_regs_arcturus;
4656 } else {
4657 vgpr_init_shader_ptr = vgpr_init_compute_shader;
4658 vgpr_init_shader_size = sizeof(vgpr_init_compute_shader);
4659 vgpr_init_regs_ptr = vgpr_init_regs;
4660 }
4661
4662 total_size =
4663 (gpr_reg_size * 3 + 4 + 5 + 2) * 4; /* VGPRS */
4664 total_size +=
4665 (gpr_reg_size * 3 + 4 + 5 + 2) * 4; /* SGPRS1 */
4666 total_size +=
4667 (gpr_reg_size * 3 + 4 + 5 + 2) * 4; /* SGPRS2 */
4668 total_size = ALIGN(total_size, 256);
4669 vgpr_offset = total_size;
4670 total_size += ALIGN(vgpr_init_shader_size, 256);
4671 sgpr_offset = total_size;
4672 total_size += sizeof(sgpr_init_compute_shader);
4673
4674 /* allocate an indirect buffer to put the commands in */
4675 memset(&ib, 0, sizeof(ib));
4676 r = amdgpu_ib_get(adev, NULL, total_size,
4677 AMDGPU_IB_POOL_DIRECT, &ib);
4678 if (r) {
4679 DRM_ERROR("amdgpu: failed to get ib (%d).\n", r);
4680 return r;
4681 }
4682
4683 /* load the compute shaders */
4684 for (i = 0; i < vgpr_init_shader_size/sizeof(u32); i++)
4685 ib.ptr[i + (vgpr_offset / 4)] = vgpr_init_shader_ptr[i];
4686
4687 for (i = 0; i < ARRAY_SIZE(sgpr_init_compute_shader); i++)
4688 ib.ptr[i + (sgpr_offset / 4)] = sgpr_init_compute_shader[i];
4689
4690 /* init the ib length to 0 */
4691 ib.length_dw = 0;
4692
4693 /* VGPR */
4694 /* write the register state for the compute dispatch */
4695 for (i = 0; i < gpr_reg_size; i++) {
4696 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1);
4697 ib.ptr[ib.length_dw++] = SOC15_REG_ENTRY_OFFSET(vgpr_init_regs_ptr[i])
4698 - PACKET3_SET_SH_REG_START;
4699 ib.ptr[ib.length_dw++] = vgpr_init_regs_ptr[i].reg_value;
4700 }
4701 /* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */
4702 gpu_addr = (ib.gpu_addr + (u64)vgpr_offset) >> 8;
4703 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2);
4704 ib.ptr[ib.length_dw++] = SOC15_REG_OFFSET(GC, 0, mmCOMPUTE_PGM_LO)
4705 - PACKET3_SET_SH_REG_START;
4706 ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr);
4707 ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr);
4708
4709 /* write dispatch packet */
4710 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3);
4711 ib.ptr[ib.length_dw++] = compute_dim_x * 2; /* x */
4712 ib.ptr[ib.length_dw++] = 1; /* y */
4713 ib.ptr[ib.length_dw++] = 1; /* z */
4714 ib.ptr[ib.length_dw++] =
4715 REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1);
4716
4717 /* write CS partial flush packet */
4718 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0);
4719 ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4);
4720
4721 /* SGPR1 */
4722 /* write the register state for the compute dispatch */
4723 for (i = 0; i < gpr_reg_size; i++) {
4724 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1);
4725 ib.ptr[ib.length_dw++] = SOC15_REG_ENTRY_OFFSET(sgpr1_init_regs[i])
4726 - PACKET3_SET_SH_REG_START;
4727 ib.ptr[ib.length_dw++] = sgpr1_init_regs[i].reg_value;
4728 }
4729 /* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */
4730 gpu_addr = (ib.gpu_addr + (u64)sgpr_offset) >> 8;
4731 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2);
4732 ib.ptr[ib.length_dw++] = SOC15_REG_OFFSET(GC, 0, mmCOMPUTE_PGM_LO)
4733 - PACKET3_SET_SH_REG_START;
4734 ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr);
4735 ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr);
4736
4737 /* write dispatch packet */
4738 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3);
4739 ib.ptr[ib.length_dw++] = compute_dim_x / 2 * sgpr_work_group_size; /* x */
4740 ib.ptr[ib.length_dw++] = 1; /* y */
4741 ib.ptr[ib.length_dw++] = 1; /* z */
4742 ib.ptr[ib.length_dw++] =
4743 REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1);
4744
4745 /* write CS partial flush packet */
4746 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0);
4747 ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4);
4748
4749 /* SGPR2 */
4750 /* write the register state for the compute dispatch */
4751 for (i = 0; i < gpr_reg_size; i++) {
4752 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1);
4753 ib.ptr[ib.length_dw++] = SOC15_REG_ENTRY_OFFSET(sgpr2_init_regs[i])
4754 - PACKET3_SET_SH_REG_START;
4755 ib.ptr[ib.length_dw++] = sgpr2_init_regs[i].reg_value;
4756 }
4757 /* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */
4758 gpu_addr = (ib.gpu_addr + (u64)sgpr_offset) >> 8;
4759 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2);
4760 ib.ptr[ib.length_dw++] = SOC15_REG_OFFSET(GC, 0, mmCOMPUTE_PGM_LO)
4761 - PACKET3_SET_SH_REG_START;
4762 ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr);
4763 ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr);
4764
4765 /* write dispatch packet */
4766 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3);
4767 ib.ptr[ib.length_dw++] = compute_dim_x / 2 * sgpr_work_group_size; /* x */
4768 ib.ptr[ib.length_dw++] = 1; /* y */
4769 ib.ptr[ib.length_dw++] = 1; /* z */
4770 ib.ptr[ib.length_dw++] =
4771 REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1);
4772
4773 /* write CS partial flush packet */
4774 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0);
4775 ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4);
4776
4777 /* shedule the ib on the ring */
4778 r = amdgpu_ib_schedule(ring, 1, &ib, NULL, &f);
4779 if (r) {
4780 DRM_ERROR("amdgpu: ib submit failed (%d).\n", r);
4781 goto fail;
4782 }
4783
4784 /* wait for the GPU to finish processing the IB */
4785 r = dma_fence_wait(f, false);
4786 if (r) {
4787 DRM_ERROR("amdgpu: fence wait failed (%d).\n", r);
4788 goto fail;
4789 }
4790
4791 fail:
4792 amdgpu_ib_free(&ib, NULL);
4793 dma_fence_put(f);
4794
4795 return r;
4796 }
4797
gfx_v9_0_early_init(struct amdgpu_ip_block * ip_block)4798 static int gfx_v9_0_early_init(struct amdgpu_ip_block *ip_block)
4799 {
4800 struct amdgpu_device *adev = ip_block->adev;
4801
4802 adev->gfx.funcs = &gfx_v9_0_gfx_funcs;
4803
4804 if (amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 1) ||
4805 amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 2))
4806 adev->gfx.num_gfx_rings = 0;
4807 else
4808 adev->gfx.num_gfx_rings = GFX9_NUM_GFX_RINGS;
4809 adev->gfx.xcc_mask = 1;
4810 adev->gfx.num_compute_rings = min(amdgpu_gfx_get_num_kcq(adev),
4811 AMDGPU_MAX_COMPUTE_RINGS);
4812 gfx_v9_0_set_kiq_pm4_funcs(adev);
4813 gfx_v9_0_set_ring_funcs(adev);
4814 gfx_v9_0_set_irq_funcs(adev);
4815 gfx_v9_0_set_gds_init(adev);
4816 gfx_v9_0_set_rlc_funcs(adev);
4817
4818 /* init rlcg reg access ctrl */
4819 gfx_v9_0_init_rlcg_reg_access_ctrl(adev);
4820
4821 return gfx_v9_0_init_microcode(adev);
4822 }
4823
gfx_v9_0_ecc_late_init(struct amdgpu_ip_block * ip_block)4824 static int gfx_v9_0_ecc_late_init(struct amdgpu_ip_block *ip_block)
4825 {
4826 struct amdgpu_device *adev = ip_block->adev;
4827 int r;
4828
4829 /*
4830 * Temp workaround to fix the issue that CP firmware fails to
4831 * update read pointer when CPDMA is writing clearing operation
4832 * to GDS in suspend/resume sequence on several cards. So just
4833 * limit this operation in cold boot sequence.
4834 */
4835 if ((!adev->in_suspend) &&
4836 (adev->gds.gds_size)) {
4837 r = gfx_v9_0_do_edc_gds_workarounds(adev);
4838 if (r)
4839 return r;
4840 }
4841
4842 /* requires IBs so do in late init after IB pool is initialized */
4843 if (amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 2))
4844 r = gfx_v9_4_2_do_edc_gpr_workarounds(adev);
4845 else
4846 r = gfx_v9_0_do_edc_gpr_workarounds(adev);
4847
4848 if (r)
4849 return r;
4850
4851 if (adev->gfx.ras &&
4852 adev->gfx.ras->enable_watchdog_timer)
4853 adev->gfx.ras->enable_watchdog_timer(adev);
4854
4855 return 0;
4856 }
4857
gfx_v9_0_late_init(struct amdgpu_ip_block * ip_block)4858 static int gfx_v9_0_late_init(struct amdgpu_ip_block *ip_block)
4859 {
4860 struct amdgpu_device *adev = ip_block->adev;
4861 int r;
4862
4863 r = amdgpu_irq_get(adev, &adev->gfx.priv_reg_irq, 0);
4864 if (r)
4865 return r;
4866
4867 r = amdgpu_irq_get(adev, &adev->gfx.priv_inst_irq, 0);
4868 if (r)
4869 return r;
4870
4871 r = amdgpu_irq_get(adev, &adev->gfx.bad_op_irq, 0);
4872 if (r)
4873 return r;
4874
4875 r = gfx_v9_0_ecc_late_init(ip_block);
4876 if (r)
4877 return r;
4878
4879 if (amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 2))
4880 gfx_v9_4_2_debug_trap_config_init(adev,
4881 adev->vm_manager.first_kfd_vmid, AMDGPU_NUM_VMID);
4882 else
4883 gfx_v9_0_debug_trap_config_init(adev,
4884 adev->vm_manager.first_kfd_vmid, AMDGPU_NUM_VMID);
4885
4886 return 0;
4887 }
4888
gfx_v9_0_is_rlc_enabled(struct amdgpu_device * adev)4889 static bool gfx_v9_0_is_rlc_enabled(struct amdgpu_device *adev)
4890 {
4891 uint32_t rlc_setting;
4892
4893 /* if RLC is not enabled, do nothing */
4894 rlc_setting = RREG32_SOC15(GC, 0, mmRLC_CNTL);
4895 if (!(rlc_setting & RLC_CNTL__RLC_ENABLE_F32_MASK))
4896 return false;
4897
4898 return true;
4899 }
4900
gfx_v9_0_set_safe_mode(struct amdgpu_device * adev,int xcc_id)4901 static void gfx_v9_0_set_safe_mode(struct amdgpu_device *adev, int xcc_id)
4902 {
4903 uint32_t data;
4904 unsigned i;
4905
4906 data = RLC_SAFE_MODE__CMD_MASK;
4907 data |= (1 << RLC_SAFE_MODE__MESSAGE__SHIFT);
4908 WREG32_SOC15(GC, 0, mmRLC_SAFE_MODE, data);
4909
4910 /* wait for RLC_SAFE_MODE */
4911 for (i = 0; i < adev->usec_timeout; i++) {
4912 if (!REG_GET_FIELD(RREG32_SOC15(GC, 0, mmRLC_SAFE_MODE), RLC_SAFE_MODE, CMD))
4913 break;
4914 udelay(1);
4915 }
4916 }
4917
gfx_v9_0_unset_safe_mode(struct amdgpu_device * adev,int xcc_id)4918 static void gfx_v9_0_unset_safe_mode(struct amdgpu_device *adev, int xcc_id)
4919 {
4920 uint32_t data;
4921
4922 data = RLC_SAFE_MODE__CMD_MASK;
4923 WREG32_SOC15(GC, 0, mmRLC_SAFE_MODE, data);
4924 }
4925
gfx_v9_0_update_gfx_cg_power_gating(struct amdgpu_device * adev,bool enable)4926 static void gfx_v9_0_update_gfx_cg_power_gating(struct amdgpu_device *adev,
4927 bool enable)
4928 {
4929 amdgpu_gfx_rlc_enter_safe_mode(adev, 0);
4930
4931 if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_PG) && enable) {
4932 gfx_v9_0_enable_gfx_cg_power_gating(adev, true);
4933 if (adev->pg_flags & AMD_PG_SUPPORT_GFX_PIPELINE)
4934 gfx_v9_0_enable_gfx_pipeline_powergating(adev, true);
4935 } else {
4936 gfx_v9_0_enable_gfx_cg_power_gating(adev, false);
4937 if (adev->pg_flags & AMD_PG_SUPPORT_GFX_PIPELINE)
4938 gfx_v9_0_enable_gfx_pipeline_powergating(adev, false);
4939 }
4940
4941 amdgpu_gfx_rlc_exit_safe_mode(adev, 0);
4942 }
4943
gfx_v9_0_update_gfx_mg_power_gating(struct amdgpu_device * adev,bool enable)4944 static void gfx_v9_0_update_gfx_mg_power_gating(struct amdgpu_device *adev,
4945 bool enable)
4946 {
4947 /* TODO: double check if we need to perform under safe mode */
4948 /* gfx_v9_0_enter_rlc_safe_mode(adev); */
4949
4950 if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_SMG) && enable)
4951 gfx_v9_0_enable_gfx_static_mg_power_gating(adev, true);
4952 else
4953 gfx_v9_0_enable_gfx_static_mg_power_gating(adev, false);
4954
4955 if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_DMG) && enable)
4956 gfx_v9_0_enable_gfx_dynamic_mg_power_gating(adev, true);
4957 else
4958 gfx_v9_0_enable_gfx_dynamic_mg_power_gating(adev, false);
4959
4960 /* gfx_v9_0_exit_rlc_safe_mode(adev); */
4961 }
4962
gfx_v9_0_update_medium_grain_clock_gating(struct amdgpu_device * adev,bool enable)4963 static void gfx_v9_0_update_medium_grain_clock_gating(struct amdgpu_device *adev,
4964 bool enable)
4965 {
4966 uint32_t data, def;
4967
4968 /* It is disabled by HW by default */
4969 if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGCG)) {
4970 /* 1 - RLC_CGTT_MGCG_OVERRIDE */
4971 def = data = RREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE);
4972
4973 if (amdgpu_ip_version(adev, GC_HWIP, 0) != IP_VERSION(9, 2, 1))
4974 data &= ~RLC_CGTT_MGCG_OVERRIDE__CPF_CGTT_SCLK_OVERRIDE_MASK;
4975
4976 data &= ~(RLC_CGTT_MGCG_OVERRIDE__GRBM_CGTT_SCLK_OVERRIDE_MASK |
4977 RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGCG_OVERRIDE_MASK |
4978 RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGLS_OVERRIDE_MASK);
4979
4980 /* only for Vega10 & Raven1 */
4981 data |= RLC_CGTT_MGCG_OVERRIDE__RLC_CGTT_SCLK_OVERRIDE_MASK;
4982
4983 if (def != data)
4984 WREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE, data);
4985
4986 /* MGLS is a global flag to control all MGLS in GFX */
4987 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGLS) {
4988 /* 2 - RLC memory Light sleep */
4989 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_RLC_LS) {
4990 def = data = RREG32_SOC15(GC, 0, mmRLC_MEM_SLP_CNTL);
4991 data |= RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK;
4992 if (def != data)
4993 WREG32_SOC15(GC, 0, mmRLC_MEM_SLP_CNTL, data);
4994 }
4995 /* 3 - CP memory Light sleep */
4996 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CP_LS) {
4997 def = data = RREG32_SOC15(GC, 0, mmCP_MEM_SLP_CNTL);
4998 data |= CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK;
4999 if (def != data)
5000 WREG32_SOC15(GC, 0, mmCP_MEM_SLP_CNTL, data);
5001 }
5002 }
5003 } else {
5004 /* 1 - MGCG_OVERRIDE */
5005 def = data = RREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE);
5006
5007 if (amdgpu_ip_version(adev, GC_HWIP, 0) != IP_VERSION(9, 2, 1))
5008 data |= RLC_CGTT_MGCG_OVERRIDE__CPF_CGTT_SCLK_OVERRIDE_MASK;
5009
5010 data |= (RLC_CGTT_MGCG_OVERRIDE__RLC_CGTT_SCLK_OVERRIDE_MASK |
5011 RLC_CGTT_MGCG_OVERRIDE__GRBM_CGTT_SCLK_OVERRIDE_MASK |
5012 RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGCG_OVERRIDE_MASK |
5013 RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGLS_OVERRIDE_MASK);
5014
5015 if (def != data)
5016 WREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE, data);
5017
5018 /* 2 - disable MGLS in RLC */
5019 data = RREG32_SOC15(GC, 0, mmRLC_MEM_SLP_CNTL);
5020 if (data & RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK) {
5021 data &= ~RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK;
5022 WREG32_SOC15(GC, 0, mmRLC_MEM_SLP_CNTL, data);
5023 }
5024
5025 /* 3 - disable MGLS in CP */
5026 data = RREG32_SOC15(GC, 0, mmCP_MEM_SLP_CNTL);
5027 if (data & CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK) {
5028 data &= ~CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK;
5029 WREG32_SOC15(GC, 0, mmCP_MEM_SLP_CNTL, data);
5030 }
5031 }
5032 }
5033
gfx_v9_0_update_3d_clock_gating(struct amdgpu_device * adev,bool enable)5034 static void gfx_v9_0_update_3d_clock_gating(struct amdgpu_device *adev,
5035 bool enable)
5036 {
5037 uint32_t data, def;
5038
5039 if (!adev->gfx.num_gfx_rings)
5040 return;
5041
5042 /* Enable 3D CGCG/CGLS */
5043 if (enable) {
5044 /* write cmd to clear cgcg/cgls ov */
5045 def = data = RREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE);
5046 /* unset CGCG override */
5047 data &= ~RLC_CGTT_MGCG_OVERRIDE__GFXIP_GFX3D_CG_OVERRIDE_MASK;
5048 /* update CGCG and CGLS override bits */
5049 if (def != data)
5050 WREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE, data);
5051
5052 /* enable 3Dcgcg FSM(0x0000363f) */
5053 def = RREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL_3D);
5054
5055 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGCG)
5056 data = (0x36 << RLC_CGCG_CGLS_CTRL_3D__CGCG_GFX_IDLE_THRESHOLD__SHIFT) |
5057 RLC_CGCG_CGLS_CTRL_3D__CGCG_EN_MASK;
5058 else
5059 data = 0x0 << RLC_CGCG_CGLS_CTRL_3D__CGCG_GFX_IDLE_THRESHOLD__SHIFT;
5060
5061 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGLS)
5062 data |= (0x000F << RLC_CGCG_CGLS_CTRL_3D__CGLS_REP_COMPANSAT_DELAY__SHIFT) |
5063 RLC_CGCG_CGLS_CTRL_3D__CGLS_EN_MASK;
5064 if (def != data)
5065 WREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL_3D, data);
5066
5067 /* set IDLE_POLL_COUNT(0x00900100) */
5068 def = RREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_CNTL);
5069 data = (0x0100 << CP_RB_WPTR_POLL_CNTL__POLL_FREQUENCY__SHIFT) |
5070 (0x0090 << CP_RB_WPTR_POLL_CNTL__IDLE_POLL_COUNT__SHIFT);
5071 if (def != data)
5072 WREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_CNTL, data);
5073 } else {
5074 /* Disable CGCG/CGLS */
5075 def = data = RREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL_3D);
5076 /* disable cgcg, cgls should be disabled */
5077 data &= ~(RLC_CGCG_CGLS_CTRL_3D__CGCG_EN_MASK |
5078 RLC_CGCG_CGLS_CTRL_3D__CGLS_EN_MASK);
5079 /* disable cgcg and cgls in FSM */
5080 if (def != data)
5081 WREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL_3D, data);
5082 }
5083 }
5084
gfx_v9_0_update_coarse_grain_clock_gating(struct amdgpu_device * adev,bool enable)5085 static void gfx_v9_0_update_coarse_grain_clock_gating(struct amdgpu_device *adev,
5086 bool enable)
5087 {
5088 uint32_t def, data;
5089
5090 if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGCG)) {
5091 def = data = RREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE);
5092 /* unset CGCG override */
5093 data &= ~RLC_CGTT_MGCG_OVERRIDE__GFXIP_CGCG_OVERRIDE_MASK;
5094 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS)
5095 data &= ~RLC_CGTT_MGCG_OVERRIDE__GFXIP_CGLS_OVERRIDE_MASK;
5096 else
5097 data |= RLC_CGTT_MGCG_OVERRIDE__GFXIP_CGLS_OVERRIDE_MASK;
5098 /* update CGCG and CGLS override bits */
5099 if (def != data)
5100 WREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE, data);
5101
5102 /* enable cgcg FSM(0x0000363F) */
5103 def = RREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL);
5104
5105 if (amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 1))
5106 data = (0x2000 << RLC_CGCG_CGLS_CTRL__CGCG_GFX_IDLE_THRESHOLD__SHIFT) |
5107 RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK;
5108 else
5109 data = (0x36 << RLC_CGCG_CGLS_CTRL__CGCG_GFX_IDLE_THRESHOLD__SHIFT) |
5110 RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK;
5111 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS)
5112 data |= (0x000F << RLC_CGCG_CGLS_CTRL__CGLS_REP_COMPANSAT_DELAY__SHIFT) |
5113 RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK;
5114 if (def != data)
5115 WREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL, data);
5116
5117 /* set IDLE_POLL_COUNT(0x00900100) */
5118 def = RREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_CNTL);
5119 data = (0x0100 << CP_RB_WPTR_POLL_CNTL__POLL_FREQUENCY__SHIFT) |
5120 (0x0090 << CP_RB_WPTR_POLL_CNTL__IDLE_POLL_COUNT__SHIFT);
5121 if (def != data)
5122 WREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_CNTL, data);
5123 } else {
5124 def = data = RREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL);
5125 /* reset CGCG/CGLS bits */
5126 data &= ~(RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK | RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK);
5127 /* disable cgcg and cgls in FSM */
5128 if (def != data)
5129 WREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL, data);
5130 }
5131 }
5132
gfx_v9_0_update_gfx_clock_gating(struct amdgpu_device * adev,bool enable)5133 static int gfx_v9_0_update_gfx_clock_gating(struct amdgpu_device *adev,
5134 bool enable)
5135 {
5136 amdgpu_gfx_rlc_enter_safe_mode(adev, 0);
5137 if (enable) {
5138 /* CGCG/CGLS should be enabled after MGCG/MGLS
5139 * === MGCG + MGLS ===
5140 */
5141 gfx_v9_0_update_medium_grain_clock_gating(adev, enable);
5142 /* === CGCG /CGLS for GFX 3D Only === */
5143 gfx_v9_0_update_3d_clock_gating(adev, enable);
5144 /* === CGCG + CGLS === */
5145 gfx_v9_0_update_coarse_grain_clock_gating(adev, enable);
5146 } else {
5147 /* CGCG/CGLS should be disabled before MGCG/MGLS
5148 * === CGCG + CGLS ===
5149 */
5150 gfx_v9_0_update_coarse_grain_clock_gating(adev, enable);
5151 /* === CGCG /CGLS for GFX 3D Only === */
5152 gfx_v9_0_update_3d_clock_gating(adev, enable);
5153 /* === MGCG + MGLS === */
5154 gfx_v9_0_update_medium_grain_clock_gating(adev, enable);
5155 }
5156 amdgpu_gfx_rlc_exit_safe_mode(adev, 0);
5157 return 0;
5158 }
5159
gfx_v9_0_update_spm_vmid_internal(struct amdgpu_device * adev,unsigned int vmid)5160 static void gfx_v9_0_update_spm_vmid_internal(struct amdgpu_device *adev,
5161 unsigned int vmid)
5162 {
5163 u32 reg, data;
5164
5165 reg = SOC15_REG_OFFSET(GC, 0, mmRLC_SPM_MC_CNTL);
5166 if (amdgpu_sriov_is_pp_one_vf(adev))
5167 data = RREG32_NO_KIQ(reg);
5168 else
5169 data = RREG32_SOC15(GC, 0, mmRLC_SPM_MC_CNTL);
5170
5171 data &= ~RLC_SPM_MC_CNTL__RLC_SPM_VMID_MASK;
5172 data |= (vmid & RLC_SPM_MC_CNTL__RLC_SPM_VMID_MASK) << RLC_SPM_MC_CNTL__RLC_SPM_VMID__SHIFT;
5173
5174 if (amdgpu_sriov_is_pp_one_vf(adev))
5175 WREG32_SOC15_NO_KIQ(GC, 0, mmRLC_SPM_MC_CNTL, data);
5176 else
5177 WREG32_SOC15(GC, 0, mmRLC_SPM_MC_CNTL, data);
5178 }
5179
gfx_v9_0_update_spm_vmid(struct amdgpu_device * adev,struct amdgpu_ring * ring,unsigned int vmid)5180 static void gfx_v9_0_update_spm_vmid(struct amdgpu_device *adev, struct amdgpu_ring *ring, unsigned int vmid)
5181 {
5182 amdgpu_gfx_off_ctrl(adev, false);
5183
5184 gfx_v9_0_update_spm_vmid_internal(adev, vmid);
5185
5186 amdgpu_gfx_off_ctrl(adev, true);
5187 }
5188
gfx_v9_0_check_rlcg_range(struct amdgpu_device * adev,uint32_t offset,struct soc15_reg_rlcg * entries,int arr_size)5189 static bool gfx_v9_0_check_rlcg_range(struct amdgpu_device *adev,
5190 uint32_t offset,
5191 struct soc15_reg_rlcg *entries, int arr_size)
5192 {
5193 int i;
5194 uint32_t reg;
5195
5196 if (!entries)
5197 return false;
5198
5199 for (i = 0; i < arr_size; i++) {
5200 const struct soc15_reg_rlcg *entry;
5201
5202 entry = &entries[i];
5203 reg = adev->reg_offset[entry->hwip][entry->instance][entry->segment] + entry->reg;
5204 if (offset == reg)
5205 return true;
5206 }
5207
5208 return false;
5209 }
5210
gfx_v9_0_is_rlcg_access_range(struct amdgpu_device * adev,u32 offset)5211 static bool gfx_v9_0_is_rlcg_access_range(struct amdgpu_device *adev, u32 offset)
5212 {
5213 return gfx_v9_0_check_rlcg_range(adev, offset,
5214 (void *)rlcg_access_gc_9_0,
5215 ARRAY_SIZE(rlcg_access_gc_9_0));
5216 }
5217
5218 static const struct amdgpu_rlc_funcs gfx_v9_0_rlc_funcs = {
5219 .is_rlc_enabled = gfx_v9_0_is_rlc_enabled,
5220 .set_safe_mode = gfx_v9_0_set_safe_mode,
5221 .unset_safe_mode = gfx_v9_0_unset_safe_mode,
5222 .init = gfx_v9_0_rlc_init,
5223 .get_csb_size = gfx_v9_0_get_csb_size,
5224 .get_csb_buffer = gfx_v9_0_get_csb_buffer,
5225 .get_cp_table_num = gfx_v9_0_cp_jump_table_num,
5226 .resume = gfx_v9_0_rlc_resume,
5227 .stop = gfx_v9_0_rlc_stop,
5228 .reset = gfx_v9_0_rlc_reset,
5229 .start = gfx_v9_0_rlc_start,
5230 .update_spm_vmid = gfx_v9_0_update_spm_vmid,
5231 .is_rlcg_access_range = gfx_v9_0_is_rlcg_access_range,
5232 };
5233
gfx_v9_0_set_powergating_state(struct amdgpu_ip_block * ip_block,enum amd_powergating_state state)5234 static int gfx_v9_0_set_powergating_state(struct amdgpu_ip_block *ip_block,
5235 enum amd_powergating_state state)
5236 {
5237 struct amdgpu_device *adev = ip_block->adev;
5238 bool enable = (state == AMD_PG_STATE_GATE);
5239
5240 switch (amdgpu_ip_version(adev, GC_HWIP, 0)) {
5241 case IP_VERSION(9, 2, 2):
5242 case IP_VERSION(9, 1, 0):
5243 case IP_VERSION(9, 3, 0):
5244 if (!enable)
5245 amdgpu_gfx_off_ctrl(adev, false);
5246
5247 if (adev->pg_flags & AMD_PG_SUPPORT_RLC_SMU_HS) {
5248 gfx_v9_0_enable_sck_slow_down_on_power_up(adev, true);
5249 gfx_v9_0_enable_sck_slow_down_on_power_down(adev, true);
5250 } else {
5251 gfx_v9_0_enable_sck_slow_down_on_power_up(adev, false);
5252 gfx_v9_0_enable_sck_slow_down_on_power_down(adev, false);
5253 }
5254
5255 if (adev->pg_flags & AMD_PG_SUPPORT_CP)
5256 gfx_v9_0_enable_cp_power_gating(adev, true);
5257 else
5258 gfx_v9_0_enable_cp_power_gating(adev, false);
5259
5260 /* update gfx cgpg state */
5261 gfx_v9_0_update_gfx_cg_power_gating(adev, enable);
5262
5263 /* update mgcg state */
5264 gfx_v9_0_update_gfx_mg_power_gating(adev, enable);
5265
5266 if (enable)
5267 amdgpu_gfx_off_ctrl(adev, true);
5268 break;
5269 case IP_VERSION(9, 2, 1):
5270 amdgpu_gfx_off_ctrl(adev, enable);
5271 break;
5272 default:
5273 break;
5274 }
5275
5276 return 0;
5277 }
5278
gfx_v9_0_set_clockgating_state(struct amdgpu_ip_block * ip_block,enum amd_clockgating_state state)5279 static int gfx_v9_0_set_clockgating_state(struct amdgpu_ip_block *ip_block,
5280 enum amd_clockgating_state state)
5281 {
5282 struct amdgpu_device *adev = ip_block->adev;
5283
5284 if (amdgpu_sriov_vf(adev))
5285 return 0;
5286
5287 switch (amdgpu_ip_version(adev, GC_HWIP, 0)) {
5288 case IP_VERSION(9, 0, 1):
5289 case IP_VERSION(9, 2, 1):
5290 case IP_VERSION(9, 4, 0):
5291 case IP_VERSION(9, 2, 2):
5292 case IP_VERSION(9, 1, 0):
5293 case IP_VERSION(9, 4, 1):
5294 case IP_VERSION(9, 3, 0):
5295 case IP_VERSION(9, 4, 2):
5296 gfx_v9_0_update_gfx_clock_gating(adev,
5297 state == AMD_CG_STATE_GATE);
5298 break;
5299 default:
5300 break;
5301 }
5302 return 0;
5303 }
5304
gfx_v9_0_get_clockgating_state(void * handle,u64 * flags)5305 static void gfx_v9_0_get_clockgating_state(void *handle, u64 *flags)
5306 {
5307 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5308 int data;
5309
5310 if (amdgpu_sriov_vf(adev))
5311 *flags = 0;
5312
5313 /* AMD_CG_SUPPORT_GFX_MGCG */
5314 data = RREG32_KIQ(SOC15_REG_OFFSET(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE));
5315 if (!(data & RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGCG_OVERRIDE_MASK))
5316 *flags |= AMD_CG_SUPPORT_GFX_MGCG;
5317
5318 /* AMD_CG_SUPPORT_GFX_CGCG */
5319 data = RREG32_KIQ(SOC15_REG_OFFSET(GC, 0, mmRLC_CGCG_CGLS_CTRL));
5320 if (data & RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK)
5321 *flags |= AMD_CG_SUPPORT_GFX_CGCG;
5322
5323 /* AMD_CG_SUPPORT_GFX_CGLS */
5324 if (data & RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK)
5325 *flags |= AMD_CG_SUPPORT_GFX_CGLS;
5326
5327 /* AMD_CG_SUPPORT_GFX_RLC_LS */
5328 data = RREG32_KIQ(SOC15_REG_OFFSET(GC, 0, mmRLC_MEM_SLP_CNTL));
5329 if (data & RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK)
5330 *flags |= AMD_CG_SUPPORT_GFX_RLC_LS | AMD_CG_SUPPORT_GFX_MGLS;
5331
5332 /* AMD_CG_SUPPORT_GFX_CP_LS */
5333 data = RREG32_KIQ(SOC15_REG_OFFSET(GC, 0, mmCP_MEM_SLP_CNTL));
5334 if (data & CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK)
5335 *flags |= AMD_CG_SUPPORT_GFX_CP_LS | AMD_CG_SUPPORT_GFX_MGLS;
5336
5337 if (amdgpu_ip_version(adev, GC_HWIP, 0) != IP_VERSION(9, 4, 1)) {
5338 /* AMD_CG_SUPPORT_GFX_3D_CGCG */
5339 data = RREG32_KIQ(SOC15_REG_OFFSET(GC, 0, mmRLC_CGCG_CGLS_CTRL_3D));
5340 if (data & RLC_CGCG_CGLS_CTRL_3D__CGCG_EN_MASK)
5341 *flags |= AMD_CG_SUPPORT_GFX_3D_CGCG;
5342
5343 /* AMD_CG_SUPPORT_GFX_3D_CGLS */
5344 if (data & RLC_CGCG_CGLS_CTRL_3D__CGLS_EN_MASK)
5345 *flags |= AMD_CG_SUPPORT_GFX_3D_CGLS;
5346 }
5347 }
5348
gfx_v9_0_ring_get_rptr_gfx(struct amdgpu_ring * ring)5349 static u64 gfx_v9_0_ring_get_rptr_gfx(struct amdgpu_ring *ring)
5350 {
5351 return *ring->rptr_cpu_addr; /* gfx9 is 32bit rptr*/
5352 }
5353
gfx_v9_0_ring_get_wptr_gfx(struct amdgpu_ring * ring)5354 static u64 gfx_v9_0_ring_get_wptr_gfx(struct amdgpu_ring *ring)
5355 {
5356 struct amdgpu_device *adev = ring->adev;
5357 u64 wptr;
5358
5359 /* XXX check if swapping is necessary on BE */
5360 if (ring->use_doorbell) {
5361 wptr = atomic64_read((atomic64_t *)ring->wptr_cpu_addr);
5362 } else {
5363 wptr = RREG32_SOC15(GC, 0, mmCP_RB0_WPTR);
5364 wptr += (u64)RREG32_SOC15(GC, 0, mmCP_RB0_WPTR_HI) << 32;
5365 }
5366
5367 return wptr;
5368 }
5369
gfx_v9_0_ring_set_wptr_gfx(struct amdgpu_ring * ring)5370 static void gfx_v9_0_ring_set_wptr_gfx(struct amdgpu_ring *ring)
5371 {
5372 struct amdgpu_device *adev = ring->adev;
5373
5374 if (ring->use_doorbell) {
5375 /* XXX check if swapping is necessary on BE */
5376 atomic64_set((atomic64_t *)ring->wptr_cpu_addr, ring->wptr);
5377 WDOORBELL64(ring->doorbell_index, ring->wptr);
5378 } else {
5379 WREG32_SOC15(GC, 0, mmCP_RB0_WPTR, lower_32_bits(ring->wptr));
5380 WREG32_SOC15(GC, 0, mmCP_RB0_WPTR_HI, upper_32_bits(ring->wptr));
5381 }
5382 }
5383
gfx_v9_0_ring_emit_hdp_flush(struct amdgpu_ring * ring)5384 static void gfx_v9_0_ring_emit_hdp_flush(struct amdgpu_ring *ring)
5385 {
5386 struct amdgpu_device *adev = ring->adev;
5387 u32 ref_and_mask, reg_mem_engine;
5388 const struct nbio_hdp_flush_reg *nbio_hf_reg = adev->nbio.hdp_flush_reg;
5389
5390 if (ring->funcs->type == AMDGPU_RING_TYPE_COMPUTE) {
5391 switch (ring->me) {
5392 case 1:
5393 ref_and_mask = nbio_hf_reg->ref_and_mask_cp2 << ring->pipe;
5394 break;
5395 case 2:
5396 ref_and_mask = nbio_hf_reg->ref_and_mask_cp6 << ring->pipe;
5397 break;
5398 default:
5399 return;
5400 }
5401 reg_mem_engine = 0;
5402 } else {
5403 ref_and_mask = nbio_hf_reg->ref_and_mask_cp0;
5404 reg_mem_engine = 1; /* pfp */
5405 }
5406
5407 gfx_v9_0_wait_reg_mem(ring, reg_mem_engine, 0, 1,
5408 adev->nbio.funcs->get_hdp_flush_req_offset(adev),
5409 adev->nbio.funcs->get_hdp_flush_done_offset(adev),
5410 ref_and_mask, ref_and_mask, 0x20);
5411 }
5412
gfx_v9_0_ring_emit_ib_gfx(struct amdgpu_ring * ring,struct amdgpu_job * job,struct amdgpu_ib * ib,uint32_t flags)5413 static void gfx_v9_0_ring_emit_ib_gfx(struct amdgpu_ring *ring,
5414 struct amdgpu_job *job,
5415 struct amdgpu_ib *ib,
5416 uint32_t flags)
5417 {
5418 unsigned vmid = AMDGPU_JOB_GET_VMID(job);
5419 u32 header, control = 0;
5420
5421 if (ib->flags & AMDGPU_IB_FLAG_CE)
5422 header = PACKET3(PACKET3_INDIRECT_BUFFER_CONST, 2);
5423 else
5424 header = PACKET3(PACKET3_INDIRECT_BUFFER, 2);
5425
5426 control |= ib->length_dw | (vmid << 24);
5427
5428 if (ib->flags & AMDGPU_IB_FLAG_PREEMPT) {
5429 control |= INDIRECT_BUFFER_PRE_ENB(1);
5430
5431 if (flags & AMDGPU_IB_PREEMPTED)
5432 control |= INDIRECT_BUFFER_PRE_RESUME(1);
5433
5434 if (!(ib->flags & AMDGPU_IB_FLAG_CE) && vmid)
5435 gfx_v9_0_ring_emit_de_meta(ring,
5436 (!amdgpu_sriov_vf(ring->adev) &&
5437 flags & AMDGPU_IB_PREEMPTED) ?
5438 true : false,
5439 job->gds_size > 0 && job->gds_base != 0);
5440 }
5441
5442 amdgpu_ring_write(ring, header);
5443 BUG_ON(ib->gpu_addr & 0x3); /* Dword align */
5444 amdgpu_ring_write(ring,
5445 #ifdef __BIG_ENDIAN
5446 (2 << 0) |
5447 #endif
5448 lower_32_bits(ib->gpu_addr));
5449 amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr));
5450 amdgpu_ring_ib_on_emit_cntl(ring);
5451 amdgpu_ring_write(ring, control);
5452 }
5453
gfx_v9_0_ring_patch_cntl(struct amdgpu_ring * ring,unsigned offset)5454 static void gfx_v9_0_ring_patch_cntl(struct amdgpu_ring *ring,
5455 unsigned offset)
5456 {
5457 u32 control = ring->ring[offset];
5458
5459 control |= INDIRECT_BUFFER_PRE_RESUME(1);
5460 ring->ring[offset] = control;
5461 }
5462
gfx_v9_0_ring_patch_ce_meta(struct amdgpu_ring * ring,unsigned offset)5463 static void gfx_v9_0_ring_patch_ce_meta(struct amdgpu_ring *ring,
5464 unsigned offset)
5465 {
5466 struct amdgpu_device *adev = ring->adev;
5467 void *ce_payload_cpu_addr;
5468 uint64_t payload_offset, payload_size;
5469
5470 payload_size = sizeof(struct v9_ce_ib_state);
5471
5472 if (ring->is_mes_queue) {
5473 payload_offset = offsetof(struct amdgpu_mes_ctx_meta_data,
5474 gfx[0].gfx_meta_data) +
5475 offsetof(struct v9_gfx_meta_data, ce_payload);
5476 ce_payload_cpu_addr =
5477 amdgpu_mes_ctx_get_offs_cpu_addr(ring, payload_offset);
5478 } else {
5479 payload_offset = offsetof(struct v9_gfx_meta_data, ce_payload);
5480 ce_payload_cpu_addr = adev->virt.csa_cpu_addr + payload_offset;
5481 }
5482
5483 if (offset + (payload_size >> 2) <= ring->buf_mask + 1) {
5484 memcpy((void *)&ring->ring[offset], ce_payload_cpu_addr, payload_size);
5485 } else {
5486 memcpy((void *)&ring->ring[offset], ce_payload_cpu_addr,
5487 (ring->buf_mask + 1 - offset) << 2);
5488 payload_size -= (ring->buf_mask + 1 - offset) << 2;
5489 memcpy((void *)&ring->ring[0],
5490 ce_payload_cpu_addr + ((ring->buf_mask + 1 - offset) << 2),
5491 payload_size);
5492 }
5493 }
5494
gfx_v9_0_ring_patch_de_meta(struct amdgpu_ring * ring,unsigned offset)5495 static void gfx_v9_0_ring_patch_de_meta(struct amdgpu_ring *ring,
5496 unsigned offset)
5497 {
5498 struct amdgpu_device *adev = ring->adev;
5499 void *de_payload_cpu_addr;
5500 uint64_t payload_offset, payload_size;
5501
5502 payload_size = sizeof(struct v9_de_ib_state);
5503
5504 if (ring->is_mes_queue) {
5505 payload_offset = offsetof(struct amdgpu_mes_ctx_meta_data,
5506 gfx[0].gfx_meta_data) +
5507 offsetof(struct v9_gfx_meta_data, de_payload);
5508 de_payload_cpu_addr =
5509 amdgpu_mes_ctx_get_offs_cpu_addr(ring, payload_offset);
5510 } else {
5511 payload_offset = offsetof(struct v9_gfx_meta_data, de_payload);
5512 de_payload_cpu_addr = adev->virt.csa_cpu_addr + payload_offset;
5513 }
5514
5515 ((struct v9_de_ib_state *)de_payload_cpu_addr)->ib_completion_status =
5516 IB_COMPLETION_STATUS_PREEMPTED;
5517
5518 if (offset + (payload_size >> 2) <= ring->buf_mask + 1) {
5519 memcpy((void *)&ring->ring[offset], de_payload_cpu_addr, payload_size);
5520 } else {
5521 memcpy((void *)&ring->ring[offset], de_payload_cpu_addr,
5522 (ring->buf_mask + 1 - offset) << 2);
5523 payload_size -= (ring->buf_mask + 1 - offset) << 2;
5524 memcpy((void *)&ring->ring[0],
5525 de_payload_cpu_addr + ((ring->buf_mask + 1 - offset) << 2),
5526 payload_size);
5527 }
5528 }
5529
gfx_v9_0_ring_emit_ib_compute(struct amdgpu_ring * ring,struct amdgpu_job * job,struct amdgpu_ib * ib,uint32_t flags)5530 static void gfx_v9_0_ring_emit_ib_compute(struct amdgpu_ring *ring,
5531 struct amdgpu_job *job,
5532 struct amdgpu_ib *ib,
5533 uint32_t flags)
5534 {
5535 unsigned vmid = AMDGPU_JOB_GET_VMID(job);
5536 u32 control = INDIRECT_BUFFER_VALID | ib->length_dw | (vmid << 24);
5537
5538 /* Currently, there is a high possibility to get wave ID mismatch
5539 * between ME and GDS, leading to a hw deadlock, because ME generates
5540 * different wave IDs than the GDS expects. This situation happens
5541 * randomly when at least 5 compute pipes use GDS ordered append.
5542 * The wave IDs generated by ME are also wrong after suspend/resume.
5543 * Those are probably bugs somewhere else in the kernel driver.
5544 *
5545 * Writing GDS_COMPUTE_MAX_WAVE_ID resets wave ID counters in ME and
5546 * GDS to 0 for this ring (me/pipe).
5547 */
5548 if (ib->flags & AMDGPU_IB_FLAG_RESET_GDS_MAX_WAVE_ID) {
5549 amdgpu_ring_write(ring, PACKET3(PACKET3_SET_CONFIG_REG, 1));
5550 amdgpu_ring_write(ring, mmGDS_COMPUTE_MAX_WAVE_ID);
5551 amdgpu_ring_write(ring, ring->adev->gds.gds_compute_max_wave_id);
5552 }
5553
5554 amdgpu_ring_write(ring, PACKET3(PACKET3_INDIRECT_BUFFER, 2));
5555 BUG_ON(ib->gpu_addr & 0x3); /* Dword align */
5556 amdgpu_ring_write(ring,
5557 #ifdef __BIG_ENDIAN
5558 (2 << 0) |
5559 #endif
5560 lower_32_bits(ib->gpu_addr));
5561 amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr));
5562 amdgpu_ring_write(ring, control);
5563 }
5564
gfx_v9_0_ring_emit_fence(struct amdgpu_ring * ring,u64 addr,u64 seq,unsigned flags)5565 static void gfx_v9_0_ring_emit_fence(struct amdgpu_ring *ring, u64 addr,
5566 u64 seq, unsigned flags)
5567 {
5568 bool write64bit = flags & AMDGPU_FENCE_FLAG_64BIT;
5569 bool int_sel = flags & AMDGPU_FENCE_FLAG_INT;
5570 bool writeback = flags & AMDGPU_FENCE_FLAG_TC_WB_ONLY;
5571 bool exec = flags & AMDGPU_FENCE_FLAG_EXEC;
5572 uint32_t dw2 = 0;
5573
5574 /* RELEASE_MEM - flush caches, send int */
5575 amdgpu_ring_write(ring, PACKET3(PACKET3_RELEASE_MEM, 6));
5576
5577 if (writeback) {
5578 dw2 = EOP_TC_NC_ACTION_EN;
5579 } else {
5580 dw2 = EOP_TCL1_ACTION_EN | EOP_TC_ACTION_EN |
5581 EOP_TC_MD_ACTION_EN;
5582 }
5583 dw2 |= EOP_TC_WB_ACTION_EN | EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
5584 EVENT_INDEX(5);
5585 if (exec)
5586 dw2 |= EOP_EXEC;
5587
5588 amdgpu_ring_write(ring, dw2);
5589 amdgpu_ring_write(ring, DATA_SEL(write64bit ? 2 : 1) | INT_SEL(int_sel ? 2 : 0));
5590
5591 /*
5592 * the address should be Qword aligned if 64bit write, Dword
5593 * aligned if only send 32bit data low (discard data high)
5594 */
5595 if (write64bit)
5596 BUG_ON(addr & 0x7);
5597 else
5598 BUG_ON(addr & 0x3);
5599 amdgpu_ring_write(ring, lower_32_bits(addr));
5600 amdgpu_ring_write(ring, upper_32_bits(addr));
5601 amdgpu_ring_write(ring, lower_32_bits(seq));
5602 amdgpu_ring_write(ring, upper_32_bits(seq));
5603 amdgpu_ring_write(ring, 0);
5604 }
5605
gfx_v9_0_ring_emit_pipeline_sync(struct amdgpu_ring * ring)5606 static void gfx_v9_0_ring_emit_pipeline_sync(struct amdgpu_ring *ring)
5607 {
5608 int usepfp = (ring->funcs->type == AMDGPU_RING_TYPE_GFX);
5609 uint32_t seq = ring->fence_drv.sync_seq;
5610 uint64_t addr = ring->fence_drv.gpu_addr;
5611
5612 gfx_v9_0_wait_reg_mem(ring, usepfp, 1, 0,
5613 lower_32_bits(addr), upper_32_bits(addr),
5614 seq, 0xffffffff, 4);
5615 }
5616
gfx_v9_0_ring_emit_vm_flush(struct amdgpu_ring * ring,unsigned vmid,uint64_t pd_addr)5617 static void gfx_v9_0_ring_emit_vm_flush(struct amdgpu_ring *ring,
5618 unsigned vmid, uint64_t pd_addr)
5619 {
5620 amdgpu_gmc_emit_flush_gpu_tlb(ring, vmid, pd_addr);
5621
5622 /* compute doesn't have PFP */
5623 if (ring->funcs->type == AMDGPU_RING_TYPE_GFX) {
5624 /* sync PFP to ME, otherwise we might get invalid PFP reads */
5625 amdgpu_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0));
5626 amdgpu_ring_write(ring, 0x0);
5627 }
5628 }
5629
gfx_v9_0_ring_get_rptr_compute(struct amdgpu_ring * ring)5630 static u64 gfx_v9_0_ring_get_rptr_compute(struct amdgpu_ring *ring)
5631 {
5632 return *ring->rptr_cpu_addr; /* gfx9 hardware is 32bit rptr */
5633 }
5634
gfx_v9_0_ring_get_wptr_compute(struct amdgpu_ring * ring)5635 static u64 gfx_v9_0_ring_get_wptr_compute(struct amdgpu_ring *ring)
5636 {
5637 u64 wptr;
5638
5639 /* XXX check if swapping is necessary on BE */
5640 if (ring->use_doorbell)
5641 wptr = atomic64_read((atomic64_t *)ring->wptr_cpu_addr);
5642 else
5643 BUG();
5644 return wptr;
5645 }
5646
gfx_v9_0_ring_set_wptr_compute(struct amdgpu_ring * ring)5647 static void gfx_v9_0_ring_set_wptr_compute(struct amdgpu_ring *ring)
5648 {
5649 struct amdgpu_device *adev = ring->adev;
5650
5651 /* XXX check if swapping is necessary on BE */
5652 if (ring->use_doorbell) {
5653 atomic64_set((atomic64_t *)ring->wptr_cpu_addr, ring->wptr);
5654 WDOORBELL64(ring->doorbell_index, ring->wptr);
5655 } else{
5656 BUG(); /* only DOORBELL method supported on gfx9 now */
5657 }
5658 }
5659
gfx_v9_0_ring_emit_fence_kiq(struct amdgpu_ring * ring,u64 addr,u64 seq,unsigned int flags)5660 static void gfx_v9_0_ring_emit_fence_kiq(struct amdgpu_ring *ring, u64 addr,
5661 u64 seq, unsigned int flags)
5662 {
5663 struct amdgpu_device *adev = ring->adev;
5664
5665 /* we only allocate 32bit for each seq wb address */
5666 BUG_ON(flags & AMDGPU_FENCE_FLAG_64BIT);
5667
5668 /* write fence seq to the "addr" */
5669 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5670 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5671 WRITE_DATA_DST_SEL(5) | WR_CONFIRM));
5672 amdgpu_ring_write(ring, lower_32_bits(addr));
5673 amdgpu_ring_write(ring, upper_32_bits(addr));
5674 amdgpu_ring_write(ring, lower_32_bits(seq));
5675
5676 if (flags & AMDGPU_FENCE_FLAG_INT) {
5677 /* set register to trigger INT */
5678 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5679 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5680 WRITE_DATA_DST_SEL(0) | WR_CONFIRM));
5681 amdgpu_ring_write(ring, SOC15_REG_OFFSET(GC, 0, mmCPC_INT_STATUS));
5682 amdgpu_ring_write(ring, 0);
5683 amdgpu_ring_write(ring, 0x20000000); /* src_id is 178 */
5684 }
5685 }
5686
gfx_v9_ring_emit_sb(struct amdgpu_ring * ring)5687 static void gfx_v9_ring_emit_sb(struct amdgpu_ring *ring)
5688 {
5689 amdgpu_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
5690 amdgpu_ring_write(ring, 0);
5691 }
5692
gfx_v9_0_ring_emit_ce_meta(struct amdgpu_ring * ring,bool resume)5693 static void gfx_v9_0_ring_emit_ce_meta(struct amdgpu_ring *ring, bool resume)
5694 {
5695 struct amdgpu_device *adev = ring->adev;
5696 struct v9_ce_ib_state ce_payload = {0};
5697 uint64_t offset, ce_payload_gpu_addr;
5698 void *ce_payload_cpu_addr;
5699 int cnt;
5700
5701 cnt = (sizeof(ce_payload) >> 2) + 4 - 2;
5702
5703 if (ring->is_mes_queue) {
5704 offset = offsetof(struct amdgpu_mes_ctx_meta_data,
5705 gfx[0].gfx_meta_data) +
5706 offsetof(struct v9_gfx_meta_data, ce_payload);
5707 ce_payload_gpu_addr =
5708 amdgpu_mes_ctx_get_offs_gpu_addr(ring, offset);
5709 ce_payload_cpu_addr =
5710 amdgpu_mes_ctx_get_offs_cpu_addr(ring, offset);
5711 } else {
5712 offset = offsetof(struct v9_gfx_meta_data, ce_payload);
5713 ce_payload_gpu_addr = amdgpu_csa_vaddr(ring->adev) + offset;
5714 ce_payload_cpu_addr = adev->virt.csa_cpu_addr + offset;
5715 }
5716
5717 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, cnt));
5718 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(2) |
5719 WRITE_DATA_DST_SEL(8) |
5720 WR_CONFIRM) |
5721 WRITE_DATA_CACHE_POLICY(0));
5722 amdgpu_ring_write(ring, lower_32_bits(ce_payload_gpu_addr));
5723 amdgpu_ring_write(ring, upper_32_bits(ce_payload_gpu_addr));
5724
5725 amdgpu_ring_ib_on_emit_ce(ring);
5726
5727 if (resume)
5728 amdgpu_ring_write_multiple(ring, ce_payload_cpu_addr,
5729 sizeof(ce_payload) >> 2);
5730 else
5731 amdgpu_ring_write_multiple(ring, (void *)&ce_payload,
5732 sizeof(ce_payload) >> 2);
5733 }
5734
gfx_v9_0_ring_preempt_ib(struct amdgpu_ring * ring)5735 static int gfx_v9_0_ring_preempt_ib(struct amdgpu_ring *ring)
5736 {
5737 int i, r = 0;
5738 struct amdgpu_device *adev = ring->adev;
5739 struct amdgpu_kiq *kiq = &adev->gfx.kiq[0];
5740 struct amdgpu_ring *kiq_ring = &kiq->ring;
5741 unsigned long flags;
5742
5743 if (!kiq->pmf || !kiq->pmf->kiq_unmap_queues)
5744 return -EINVAL;
5745
5746 spin_lock_irqsave(&kiq->ring_lock, flags);
5747
5748 if (amdgpu_ring_alloc(kiq_ring, kiq->pmf->unmap_queues_size)) {
5749 spin_unlock_irqrestore(&kiq->ring_lock, flags);
5750 return -ENOMEM;
5751 }
5752
5753 /* assert preemption condition */
5754 amdgpu_ring_set_preempt_cond_exec(ring, false);
5755
5756 ring->trail_seq += 1;
5757 amdgpu_ring_alloc(ring, 13);
5758 gfx_v9_0_ring_emit_fence(ring, ring->trail_fence_gpu_addr,
5759 ring->trail_seq, AMDGPU_FENCE_FLAG_EXEC | AMDGPU_FENCE_FLAG_INT);
5760
5761 /* assert IB preemption, emit the trailing fence */
5762 kiq->pmf->kiq_unmap_queues(kiq_ring, ring, PREEMPT_QUEUES_NO_UNMAP,
5763 ring->trail_fence_gpu_addr,
5764 ring->trail_seq);
5765
5766 amdgpu_ring_commit(kiq_ring);
5767 spin_unlock_irqrestore(&kiq->ring_lock, flags);
5768
5769 /* poll the trailing fence */
5770 for (i = 0; i < adev->usec_timeout; i++) {
5771 if (ring->trail_seq ==
5772 le32_to_cpu(*ring->trail_fence_cpu_addr))
5773 break;
5774 udelay(1);
5775 }
5776
5777 if (i >= adev->usec_timeout) {
5778 r = -EINVAL;
5779 DRM_WARN("ring %d timeout to preempt ib\n", ring->idx);
5780 }
5781
5782 /*reset the CP_VMID_PREEMPT after trailing fence*/
5783 amdgpu_ring_emit_wreg(ring,
5784 SOC15_REG_OFFSET(GC, 0, mmCP_VMID_PREEMPT),
5785 0x0);
5786 amdgpu_ring_commit(ring);
5787
5788 /* deassert preemption condition */
5789 amdgpu_ring_set_preempt_cond_exec(ring, true);
5790 return r;
5791 }
5792
gfx_v9_0_ring_emit_de_meta(struct amdgpu_ring * ring,bool resume,bool usegds)5793 static void gfx_v9_0_ring_emit_de_meta(struct amdgpu_ring *ring, bool resume, bool usegds)
5794 {
5795 struct amdgpu_device *adev = ring->adev;
5796 struct v9_de_ib_state de_payload = {0};
5797 uint64_t offset, gds_addr, de_payload_gpu_addr;
5798 void *de_payload_cpu_addr;
5799 int cnt;
5800
5801 if (ring->is_mes_queue) {
5802 offset = offsetof(struct amdgpu_mes_ctx_meta_data,
5803 gfx[0].gfx_meta_data) +
5804 offsetof(struct v9_gfx_meta_data, de_payload);
5805 de_payload_gpu_addr =
5806 amdgpu_mes_ctx_get_offs_gpu_addr(ring, offset);
5807 de_payload_cpu_addr =
5808 amdgpu_mes_ctx_get_offs_cpu_addr(ring, offset);
5809
5810 offset = offsetof(struct amdgpu_mes_ctx_meta_data,
5811 gfx[0].gds_backup) +
5812 offsetof(struct v9_gfx_meta_data, de_payload);
5813 gds_addr = amdgpu_mes_ctx_get_offs_gpu_addr(ring, offset);
5814 } else {
5815 offset = offsetof(struct v9_gfx_meta_data, de_payload);
5816 de_payload_gpu_addr = amdgpu_csa_vaddr(ring->adev) + offset;
5817 de_payload_cpu_addr = adev->virt.csa_cpu_addr + offset;
5818
5819 gds_addr = ALIGN(amdgpu_csa_vaddr(ring->adev) +
5820 AMDGPU_CSA_SIZE - adev->gds.gds_size,
5821 PAGE_SIZE);
5822 }
5823
5824 if (usegds) {
5825 de_payload.gds_backup_addrlo = lower_32_bits(gds_addr);
5826 de_payload.gds_backup_addrhi = upper_32_bits(gds_addr);
5827 }
5828
5829 cnt = (sizeof(de_payload) >> 2) + 4 - 2;
5830 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, cnt));
5831 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(1) |
5832 WRITE_DATA_DST_SEL(8) |
5833 WR_CONFIRM) |
5834 WRITE_DATA_CACHE_POLICY(0));
5835 amdgpu_ring_write(ring, lower_32_bits(de_payload_gpu_addr));
5836 amdgpu_ring_write(ring, upper_32_bits(de_payload_gpu_addr));
5837
5838 amdgpu_ring_ib_on_emit_de(ring);
5839 if (resume)
5840 amdgpu_ring_write_multiple(ring, de_payload_cpu_addr,
5841 sizeof(de_payload) >> 2);
5842 else
5843 amdgpu_ring_write_multiple(ring, (void *)&de_payload,
5844 sizeof(de_payload) >> 2);
5845 }
5846
gfx_v9_0_ring_emit_frame_cntl(struct amdgpu_ring * ring,bool start,bool secure)5847 static void gfx_v9_0_ring_emit_frame_cntl(struct amdgpu_ring *ring, bool start,
5848 bool secure)
5849 {
5850 uint32_t v = secure ? FRAME_TMZ : 0;
5851
5852 amdgpu_ring_write(ring, PACKET3(PACKET3_FRAME_CONTROL, 0));
5853 amdgpu_ring_write(ring, v | FRAME_CMD(start ? 0 : 1));
5854 }
5855
gfx_v9_ring_emit_cntxcntl(struct amdgpu_ring * ring,uint32_t flags)5856 static void gfx_v9_ring_emit_cntxcntl(struct amdgpu_ring *ring, uint32_t flags)
5857 {
5858 uint32_t dw2 = 0;
5859
5860 gfx_v9_0_ring_emit_ce_meta(ring,
5861 (!amdgpu_sriov_vf(ring->adev) &&
5862 flags & AMDGPU_IB_PREEMPTED) ? true : false);
5863
5864 dw2 |= 0x80000000; /* set load_enable otherwise this package is just NOPs */
5865 if (flags & AMDGPU_HAVE_CTX_SWITCH) {
5866 /* set load_global_config & load_global_uconfig */
5867 dw2 |= 0x8001;
5868 /* set load_cs_sh_regs */
5869 dw2 |= 0x01000000;
5870 /* set load_per_context_state & load_gfx_sh_regs for GFX */
5871 dw2 |= 0x10002;
5872
5873 /* set load_ce_ram if preamble presented */
5874 if (AMDGPU_PREAMBLE_IB_PRESENT & flags)
5875 dw2 |= 0x10000000;
5876 } else {
5877 /* still load_ce_ram if this is the first time preamble presented
5878 * although there is no context switch happens.
5879 */
5880 if (AMDGPU_PREAMBLE_IB_PRESENT_FIRST & flags)
5881 dw2 |= 0x10000000;
5882 }
5883
5884 amdgpu_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1));
5885 amdgpu_ring_write(ring, dw2);
5886 amdgpu_ring_write(ring, 0);
5887 }
5888
gfx_v9_0_ring_emit_init_cond_exec(struct amdgpu_ring * ring,uint64_t addr)5889 static unsigned gfx_v9_0_ring_emit_init_cond_exec(struct amdgpu_ring *ring,
5890 uint64_t addr)
5891 {
5892 unsigned ret;
5893 amdgpu_ring_write(ring, PACKET3(PACKET3_COND_EXEC, 3));
5894 amdgpu_ring_write(ring, lower_32_bits(addr));
5895 amdgpu_ring_write(ring, upper_32_bits(addr));
5896 /* discard following DWs if *cond_exec_gpu_addr==0 */
5897 amdgpu_ring_write(ring, 0);
5898 ret = ring->wptr & ring->buf_mask;
5899 /* patch dummy value later */
5900 amdgpu_ring_write(ring, 0);
5901 return ret;
5902 }
5903
gfx_v9_0_ring_emit_rreg(struct amdgpu_ring * ring,uint32_t reg,uint32_t reg_val_offs)5904 static void gfx_v9_0_ring_emit_rreg(struct amdgpu_ring *ring, uint32_t reg,
5905 uint32_t reg_val_offs)
5906 {
5907 struct amdgpu_device *adev = ring->adev;
5908
5909 amdgpu_ring_write(ring, PACKET3(PACKET3_COPY_DATA, 4));
5910 amdgpu_ring_write(ring, 0 | /* src: register*/
5911 (5 << 8) | /* dst: memory */
5912 (1 << 20)); /* write confirm */
5913 amdgpu_ring_write(ring, reg);
5914 amdgpu_ring_write(ring, 0);
5915 amdgpu_ring_write(ring, lower_32_bits(adev->wb.gpu_addr +
5916 reg_val_offs * 4));
5917 amdgpu_ring_write(ring, upper_32_bits(adev->wb.gpu_addr +
5918 reg_val_offs * 4));
5919 }
5920
gfx_v9_0_ring_emit_wreg(struct amdgpu_ring * ring,uint32_t reg,uint32_t val)5921 static void gfx_v9_0_ring_emit_wreg(struct amdgpu_ring *ring, uint32_t reg,
5922 uint32_t val)
5923 {
5924 uint32_t cmd = 0;
5925
5926 switch (ring->funcs->type) {
5927 case AMDGPU_RING_TYPE_GFX:
5928 cmd = WRITE_DATA_ENGINE_SEL(1) | WR_CONFIRM;
5929 break;
5930 case AMDGPU_RING_TYPE_KIQ:
5931 cmd = (1 << 16); /* no inc addr */
5932 break;
5933 default:
5934 cmd = WR_CONFIRM;
5935 break;
5936 }
5937 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5938 amdgpu_ring_write(ring, cmd);
5939 amdgpu_ring_write(ring, reg);
5940 amdgpu_ring_write(ring, 0);
5941 amdgpu_ring_write(ring, val);
5942 }
5943
gfx_v9_0_ring_emit_reg_wait(struct amdgpu_ring * ring,uint32_t reg,uint32_t val,uint32_t mask)5944 static void gfx_v9_0_ring_emit_reg_wait(struct amdgpu_ring *ring, uint32_t reg,
5945 uint32_t val, uint32_t mask)
5946 {
5947 gfx_v9_0_wait_reg_mem(ring, 0, 0, 0, reg, 0, val, mask, 0x20);
5948 }
5949
gfx_v9_0_ring_emit_reg_write_reg_wait(struct amdgpu_ring * ring,uint32_t reg0,uint32_t reg1,uint32_t ref,uint32_t mask)5950 static void gfx_v9_0_ring_emit_reg_write_reg_wait(struct amdgpu_ring *ring,
5951 uint32_t reg0, uint32_t reg1,
5952 uint32_t ref, uint32_t mask)
5953 {
5954 int usepfp = (ring->funcs->type == AMDGPU_RING_TYPE_GFX);
5955 struct amdgpu_device *adev = ring->adev;
5956 bool fw_version_ok = (ring->funcs->type == AMDGPU_RING_TYPE_GFX) ?
5957 adev->gfx.me_fw_write_wait : adev->gfx.mec_fw_write_wait;
5958
5959 if (fw_version_ok)
5960 gfx_v9_0_wait_reg_mem(ring, usepfp, 0, 1, reg0, reg1,
5961 ref, mask, 0x20);
5962 else
5963 amdgpu_ring_emit_reg_write_reg_wait_helper(ring, reg0, reg1,
5964 ref, mask);
5965 }
5966
gfx_v9_0_ring_soft_recovery(struct amdgpu_ring * ring,unsigned vmid)5967 static void gfx_v9_0_ring_soft_recovery(struct amdgpu_ring *ring, unsigned vmid)
5968 {
5969 struct amdgpu_device *adev = ring->adev;
5970 uint32_t value = 0;
5971
5972 value = REG_SET_FIELD(value, SQ_CMD, CMD, 0x03);
5973 value = REG_SET_FIELD(value, SQ_CMD, MODE, 0x01);
5974 value = REG_SET_FIELD(value, SQ_CMD, CHECK_VMID, 1);
5975 value = REG_SET_FIELD(value, SQ_CMD, VM_ID, vmid);
5976 amdgpu_gfx_rlc_enter_safe_mode(adev, 0);
5977 WREG32_SOC15(GC, 0, mmSQ_CMD, value);
5978 amdgpu_gfx_rlc_exit_safe_mode(adev, 0);
5979 }
5980
gfx_v9_0_set_gfx_eop_interrupt_state(struct amdgpu_device * adev,enum amdgpu_interrupt_state state)5981 static void gfx_v9_0_set_gfx_eop_interrupt_state(struct amdgpu_device *adev,
5982 enum amdgpu_interrupt_state state)
5983 {
5984 switch (state) {
5985 case AMDGPU_IRQ_STATE_DISABLE:
5986 case AMDGPU_IRQ_STATE_ENABLE:
5987 WREG32_FIELD15(GC, 0, CP_INT_CNTL_RING0,
5988 TIME_STAMP_INT_ENABLE,
5989 state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0);
5990 break;
5991 default:
5992 break;
5993 }
5994 }
5995
gfx_v9_0_set_compute_eop_interrupt_state(struct amdgpu_device * adev,int me,int pipe,enum amdgpu_interrupt_state state)5996 static void gfx_v9_0_set_compute_eop_interrupt_state(struct amdgpu_device *adev,
5997 int me, int pipe,
5998 enum amdgpu_interrupt_state state)
5999 {
6000 u32 mec_int_cntl, mec_int_cntl_reg;
6001
6002 /*
6003 * amdgpu controls only the first MEC. That's why this function only
6004 * handles the setting of interrupts for this specific MEC. All other
6005 * pipes' interrupts are set by amdkfd.
6006 */
6007
6008 if (me == 1) {
6009 switch (pipe) {
6010 case 0:
6011 mec_int_cntl_reg = SOC15_REG_OFFSET(GC, 0, mmCP_ME1_PIPE0_INT_CNTL);
6012 break;
6013 case 1:
6014 mec_int_cntl_reg = SOC15_REG_OFFSET(GC, 0, mmCP_ME1_PIPE1_INT_CNTL);
6015 break;
6016 case 2:
6017 mec_int_cntl_reg = SOC15_REG_OFFSET(GC, 0, mmCP_ME1_PIPE2_INT_CNTL);
6018 break;
6019 case 3:
6020 mec_int_cntl_reg = SOC15_REG_OFFSET(GC, 0, mmCP_ME1_PIPE3_INT_CNTL);
6021 break;
6022 default:
6023 DRM_DEBUG("invalid pipe %d\n", pipe);
6024 return;
6025 }
6026 } else {
6027 DRM_DEBUG("invalid me %d\n", me);
6028 return;
6029 }
6030
6031 switch (state) {
6032 case AMDGPU_IRQ_STATE_DISABLE:
6033 mec_int_cntl = RREG32_SOC15_IP(GC,mec_int_cntl_reg);
6034 mec_int_cntl = REG_SET_FIELD(mec_int_cntl, CP_ME1_PIPE0_INT_CNTL,
6035 TIME_STAMP_INT_ENABLE, 0);
6036 WREG32_SOC15_IP(GC, mec_int_cntl_reg, mec_int_cntl);
6037 break;
6038 case AMDGPU_IRQ_STATE_ENABLE:
6039 mec_int_cntl = RREG32_SOC15_IP(GC, mec_int_cntl_reg);
6040 mec_int_cntl = REG_SET_FIELD(mec_int_cntl, CP_ME1_PIPE0_INT_CNTL,
6041 TIME_STAMP_INT_ENABLE, 1);
6042 WREG32_SOC15_IP(GC, mec_int_cntl_reg, mec_int_cntl);
6043 break;
6044 default:
6045 break;
6046 }
6047 }
6048
gfx_v9_0_get_cpc_int_cntl(struct amdgpu_device * adev,int me,int pipe)6049 static u32 gfx_v9_0_get_cpc_int_cntl(struct amdgpu_device *adev,
6050 int me, int pipe)
6051 {
6052 /*
6053 * amdgpu controls only the first MEC. That's why this function only
6054 * handles the setting of interrupts for this specific MEC. All other
6055 * pipes' interrupts are set by amdkfd.
6056 */
6057 if (me != 1)
6058 return 0;
6059
6060 switch (pipe) {
6061 case 0:
6062 return SOC15_REG_OFFSET(GC, 0, mmCP_ME1_PIPE0_INT_CNTL);
6063 case 1:
6064 return SOC15_REG_OFFSET(GC, 0, mmCP_ME1_PIPE1_INT_CNTL);
6065 case 2:
6066 return SOC15_REG_OFFSET(GC, 0, mmCP_ME1_PIPE2_INT_CNTL);
6067 case 3:
6068 return SOC15_REG_OFFSET(GC, 0, mmCP_ME1_PIPE3_INT_CNTL);
6069 default:
6070 return 0;
6071 }
6072 }
6073
gfx_v9_0_set_priv_reg_fault_state(struct amdgpu_device * adev,struct amdgpu_irq_src * source,unsigned type,enum amdgpu_interrupt_state state)6074 static int gfx_v9_0_set_priv_reg_fault_state(struct amdgpu_device *adev,
6075 struct amdgpu_irq_src *source,
6076 unsigned type,
6077 enum amdgpu_interrupt_state state)
6078 {
6079 u32 cp_int_cntl_reg, cp_int_cntl;
6080 int i, j;
6081
6082 switch (state) {
6083 case AMDGPU_IRQ_STATE_DISABLE:
6084 case AMDGPU_IRQ_STATE_ENABLE:
6085 WREG32_FIELD15(GC, 0, CP_INT_CNTL_RING0,
6086 PRIV_REG_INT_ENABLE,
6087 state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0);
6088 for (i = 0; i < adev->gfx.mec.num_mec; i++) {
6089 for (j = 0; j < adev->gfx.mec.num_pipe_per_mec; j++) {
6090 /* MECs start at 1 */
6091 cp_int_cntl_reg = gfx_v9_0_get_cpc_int_cntl(adev, i + 1, j);
6092
6093 if (cp_int_cntl_reg) {
6094 cp_int_cntl = RREG32_SOC15_IP(GC, cp_int_cntl_reg);
6095 cp_int_cntl = REG_SET_FIELD(cp_int_cntl, CP_ME1_PIPE0_INT_CNTL,
6096 PRIV_REG_INT_ENABLE,
6097 state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0);
6098 WREG32_SOC15_IP(GC, cp_int_cntl_reg, cp_int_cntl);
6099 }
6100 }
6101 }
6102 break;
6103 default:
6104 break;
6105 }
6106
6107 return 0;
6108 }
6109
gfx_v9_0_set_bad_op_fault_state(struct amdgpu_device * adev,struct amdgpu_irq_src * source,unsigned type,enum amdgpu_interrupt_state state)6110 static int gfx_v9_0_set_bad_op_fault_state(struct amdgpu_device *adev,
6111 struct amdgpu_irq_src *source,
6112 unsigned type,
6113 enum amdgpu_interrupt_state state)
6114 {
6115 u32 cp_int_cntl_reg, cp_int_cntl;
6116 int i, j;
6117
6118 switch (state) {
6119 case AMDGPU_IRQ_STATE_DISABLE:
6120 case AMDGPU_IRQ_STATE_ENABLE:
6121 WREG32_FIELD15(GC, 0, CP_INT_CNTL_RING0,
6122 OPCODE_ERROR_INT_ENABLE,
6123 state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0);
6124 for (i = 0; i < adev->gfx.mec.num_mec; i++) {
6125 for (j = 0; j < adev->gfx.mec.num_pipe_per_mec; j++) {
6126 /* MECs start at 1 */
6127 cp_int_cntl_reg = gfx_v9_0_get_cpc_int_cntl(adev, i + 1, j);
6128
6129 if (cp_int_cntl_reg) {
6130 cp_int_cntl = RREG32_SOC15_IP(GC, cp_int_cntl_reg);
6131 cp_int_cntl = REG_SET_FIELD(cp_int_cntl, CP_ME1_PIPE0_INT_CNTL,
6132 OPCODE_ERROR_INT_ENABLE,
6133 state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0);
6134 WREG32_SOC15_IP(GC, cp_int_cntl_reg, cp_int_cntl);
6135 }
6136 }
6137 }
6138 break;
6139 default:
6140 break;
6141 }
6142
6143 return 0;
6144 }
6145
gfx_v9_0_set_priv_inst_fault_state(struct amdgpu_device * adev,struct amdgpu_irq_src * source,unsigned type,enum amdgpu_interrupt_state state)6146 static int gfx_v9_0_set_priv_inst_fault_state(struct amdgpu_device *adev,
6147 struct amdgpu_irq_src *source,
6148 unsigned type,
6149 enum amdgpu_interrupt_state state)
6150 {
6151 switch (state) {
6152 case AMDGPU_IRQ_STATE_DISABLE:
6153 case AMDGPU_IRQ_STATE_ENABLE:
6154 WREG32_FIELD15(GC, 0, CP_INT_CNTL_RING0,
6155 PRIV_INSTR_INT_ENABLE,
6156 state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0);
6157 break;
6158 default:
6159 break;
6160 }
6161
6162 return 0;
6163 }
6164
6165 #define ENABLE_ECC_ON_ME_PIPE(me, pipe) \
6166 WREG32_FIELD15(GC, 0, CP_ME##me##_PIPE##pipe##_INT_CNTL,\
6167 CP_ECC_ERROR_INT_ENABLE, 1)
6168
6169 #define DISABLE_ECC_ON_ME_PIPE(me, pipe) \
6170 WREG32_FIELD15(GC, 0, CP_ME##me##_PIPE##pipe##_INT_CNTL,\
6171 CP_ECC_ERROR_INT_ENABLE, 0)
6172
gfx_v9_0_set_cp_ecc_error_state(struct amdgpu_device * adev,struct amdgpu_irq_src * source,unsigned type,enum amdgpu_interrupt_state state)6173 static int gfx_v9_0_set_cp_ecc_error_state(struct amdgpu_device *adev,
6174 struct amdgpu_irq_src *source,
6175 unsigned type,
6176 enum amdgpu_interrupt_state state)
6177 {
6178 switch (state) {
6179 case AMDGPU_IRQ_STATE_DISABLE:
6180 WREG32_FIELD15(GC, 0, CP_INT_CNTL_RING0,
6181 CP_ECC_ERROR_INT_ENABLE, 0);
6182 DISABLE_ECC_ON_ME_PIPE(1, 0);
6183 DISABLE_ECC_ON_ME_PIPE(1, 1);
6184 DISABLE_ECC_ON_ME_PIPE(1, 2);
6185 DISABLE_ECC_ON_ME_PIPE(1, 3);
6186 break;
6187
6188 case AMDGPU_IRQ_STATE_ENABLE:
6189 WREG32_FIELD15(GC, 0, CP_INT_CNTL_RING0,
6190 CP_ECC_ERROR_INT_ENABLE, 1);
6191 ENABLE_ECC_ON_ME_PIPE(1, 0);
6192 ENABLE_ECC_ON_ME_PIPE(1, 1);
6193 ENABLE_ECC_ON_ME_PIPE(1, 2);
6194 ENABLE_ECC_ON_ME_PIPE(1, 3);
6195 break;
6196 default:
6197 break;
6198 }
6199
6200 return 0;
6201 }
6202
6203
gfx_v9_0_set_eop_interrupt_state(struct amdgpu_device * adev,struct amdgpu_irq_src * src,unsigned type,enum amdgpu_interrupt_state state)6204 static int gfx_v9_0_set_eop_interrupt_state(struct amdgpu_device *adev,
6205 struct amdgpu_irq_src *src,
6206 unsigned type,
6207 enum amdgpu_interrupt_state state)
6208 {
6209 switch (type) {
6210 case AMDGPU_CP_IRQ_GFX_ME0_PIPE0_EOP:
6211 gfx_v9_0_set_gfx_eop_interrupt_state(adev, state);
6212 break;
6213 case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP:
6214 gfx_v9_0_set_compute_eop_interrupt_state(adev, 1, 0, state);
6215 break;
6216 case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE1_EOP:
6217 gfx_v9_0_set_compute_eop_interrupt_state(adev, 1, 1, state);
6218 break;
6219 case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE2_EOP:
6220 gfx_v9_0_set_compute_eop_interrupt_state(adev, 1, 2, state);
6221 break;
6222 case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE3_EOP:
6223 gfx_v9_0_set_compute_eop_interrupt_state(adev, 1, 3, state);
6224 break;
6225 case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE0_EOP:
6226 gfx_v9_0_set_compute_eop_interrupt_state(adev, 2, 0, state);
6227 break;
6228 case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE1_EOP:
6229 gfx_v9_0_set_compute_eop_interrupt_state(adev, 2, 1, state);
6230 break;
6231 case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE2_EOP:
6232 gfx_v9_0_set_compute_eop_interrupt_state(adev, 2, 2, state);
6233 break;
6234 case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE3_EOP:
6235 gfx_v9_0_set_compute_eop_interrupt_state(adev, 2, 3, state);
6236 break;
6237 default:
6238 break;
6239 }
6240 return 0;
6241 }
6242
gfx_v9_0_eop_irq(struct amdgpu_device * adev,struct amdgpu_irq_src * source,struct amdgpu_iv_entry * entry)6243 static int gfx_v9_0_eop_irq(struct amdgpu_device *adev,
6244 struct amdgpu_irq_src *source,
6245 struct amdgpu_iv_entry *entry)
6246 {
6247 int i;
6248 u8 me_id, pipe_id, queue_id;
6249 struct amdgpu_ring *ring;
6250
6251 DRM_DEBUG("IH: CP EOP\n");
6252 me_id = (entry->ring_id & 0x0c) >> 2;
6253 pipe_id = (entry->ring_id & 0x03) >> 0;
6254 queue_id = (entry->ring_id & 0x70) >> 4;
6255
6256 switch (me_id) {
6257 case 0:
6258 if (adev->gfx.num_gfx_rings) {
6259 if (!adev->gfx.mcbp) {
6260 amdgpu_fence_process(&adev->gfx.gfx_ring[0]);
6261 } else if (!amdgpu_mcbp_handle_trailing_fence_irq(&adev->gfx.muxer)) {
6262 /* Fence signals are handled on the software rings*/
6263 for (i = 0; i < GFX9_NUM_SW_GFX_RINGS; i++)
6264 amdgpu_fence_process(&adev->gfx.sw_gfx_ring[i]);
6265 }
6266 }
6267 break;
6268 case 1:
6269 case 2:
6270 for (i = 0; i < adev->gfx.num_compute_rings; i++) {
6271 ring = &adev->gfx.compute_ring[i];
6272 /* Per-queue interrupt is supported for MEC starting from VI.
6273 * The interrupt can only be enabled/disabled per pipe instead of per queue.
6274 */
6275 if ((ring->me == me_id) && (ring->pipe == pipe_id) && (ring->queue == queue_id))
6276 amdgpu_fence_process(ring);
6277 }
6278 break;
6279 }
6280 return 0;
6281 }
6282
gfx_v9_0_fault(struct amdgpu_device * adev,struct amdgpu_iv_entry * entry)6283 static void gfx_v9_0_fault(struct amdgpu_device *adev,
6284 struct amdgpu_iv_entry *entry)
6285 {
6286 u8 me_id, pipe_id, queue_id;
6287 struct amdgpu_ring *ring;
6288 int i;
6289
6290 me_id = (entry->ring_id & 0x0c) >> 2;
6291 pipe_id = (entry->ring_id & 0x03) >> 0;
6292 queue_id = (entry->ring_id & 0x70) >> 4;
6293
6294 switch (me_id) {
6295 case 0:
6296 drm_sched_fault(&adev->gfx.gfx_ring[0].sched);
6297 break;
6298 case 1:
6299 case 2:
6300 for (i = 0; i < adev->gfx.num_compute_rings; i++) {
6301 ring = &adev->gfx.compute_ring[i];
6302 if (ring->me == me_id && ring->pipe == pipe_id &&
6303 ring->queue == queue_id)
6304 drm_sched_fault(&ring->sched);
6305 }
6306 break;
6307 }
6308 }
6309
gfx_v9_0_priv_reg_irq(struct amdgpu_device * adev,struct amdgpu_irq_src * source,struct amdgpu_iv_entry * entry)6310 static int gfx_v9_0_priv_reg_irq(struct amdgpu_device *adev,
6311 struct amdgpu_irq_src *source,
6312 struct amdgpu_iv_entry *entry)
6313 {
6314 DRM_ERROR("Illegal register access in command stream\n");
6315 gfx_v9_0_fault(adev, entry);
6316 return 0;
6317 }
6318
gfx_v9_0_bad_op_irq(struct amdgpu_device * adev,struct amdgpu_irq_src * source,struct amdgpu_iv_entry * entry)6319 static int gfx_v9_0_bad_op_irq(struct amdgpu_device *adev,
6320 struct amdgpu_irq_src *source,
6321 struct amdgpu_iv_entry *entry)
6322 {
6323 DRM_ERROR("Illegal opcode in command stream\n");
6324 gfx_v9_0_fault(adev, entry);
6325 return 0;
6326 }
6327
gfx_v9_0_priv_inst_irq(struct amdgpu_device * adev,struct amdgpu_irq_src * source,struct amdgpu_iv_entry * entry)6328 static int gfx_v9_0_priv_inst_irq(struct amdgpu_device *adev,
6329 struct amdgpu_irq_src *source,
6330 struct amdgpu_iv_entry *entry)
6331 {
6332 DRM_ERROR("Illegal instruction in command stream\n");
6333 gfx_v9_0_fault(adev, entry);
6334 return 0;
6335 }
6336
6337
6338 static const struct soc15_ras_field_entry gfx_v9_0_ras_fields[] = {
6339 { "CPC_SCRATCH", SOC15_REG_ENTRY(GC, 0, mmCPC_EDC_SCRATCH_CNT),
6340 SOC15_REG_FIELD(CPC_EDC_SCRATCH_CNT, SEC_COUNT),
6341 SOC15_REG_FIELD(CPC_EDC_SCRATCH_CNT, DED_COUNT)
6342 },
6343 { "CPC_UCODE", SOC15_REG_ENTRY(GC, 0, mmCPC_EDC_UCODE_CNT),
6344 SOC15_REG_FIELD(CPC_EDC_UCODE_CNT, SEC_COUNT),
6345 SOC15_REG_FIELD(CPC_EDC_UCODE_CNT, DED_COUNT)
6346 },
6347 { "CPF_ROQ_ME1", SOC15_REG_ENTRY(GC, 0, mmCPF_EDC_ROQ_CNT),
6348 SOC15_REG_FIELD(CPF_EDC_ROQ_CNT, COUNT_ME1),
6349 0, 0
6350 },
6351 { "CPF_ROQ_ME2", SOC15_REG_ENTRY(GC, 0, mmCPF_EDC_ROQ_CNT),
6352 SOC15_REG_FIELD(CPF_EDC_ROQ_CNT, COUNT_ME2),
6353 0, 0
6354 },
6355 { "CPF_TAG", SOC15_REG_ENTRY(GC, 0, mmCPF_EDC_TAG_CNT),
6356 SOC15_REG_FIELD(CPF_EDC_TAG_CNT, SEC_COUNT),
6357 SOC15_REG_FIELD(CPF_EDC_TAG_CNT, DED_COUNT)
6358 },
6359 { "CPG_DMA_ROQ", SOC15_REG_ENTRY(GC, 0, mmCPG_EDC_DMA_CNT),
6360 SOC15_REG_FIELD(CPG_EDC_DMA_CNT, ROQ_COUNT),
6361 0, 0
6362 },
6363 { "CPG_DMA_TAG", SOC15_REG_ENTRY(GC, 0, mmCPG_EDC_DMA_CNT),
6364 SOC15_REG_FIELD(CPG_EDC_DMA_CNT, TAG_SEC_COUNT),
6365 SOC15_REG_FIELD(CPG_EDC_DMA_CNT, TAG_DED_COUNT)
6366 },
6367 { "CPG_TAG", SOC15_REG_ENTRY(GC, 0, mmCPG_EDC_TAG_CNT),
6368 SOC15_REG_FIELD(CPG_EDC_TAG_CNT, SEC_COUNT),
6369 SOC15_REG_FIELD(CPG_EDC_TAG_CNT, DED_COUNT)
6370 },
6371 { "DC_CSINVOC", SOC15_REG_ENTRY(GC, 0, mmDC_EDC_CSINVOC_CNT),
6372 SOC15_REG_FIELD(DC_EDC_CSINVOC_CNT, COUNT_ME1),
6373 0, 0
6374 },
6375 { "DC_RESTORE", SOC15_REG_ENTRY(GC, 0, mmDC_EDC_RESTORE_CNT),
6376 SOC15_REG_FIELD(DC_EDC_RESTORE_CNT, COUNT_ME1),
6377 0, 0
6378 },
6379 { "DC_STATE", SOC15_REG_ENTRY(GC, 0, mmDC_EDC_STATE_CNT),
6380 SOC15_REG_FIELD(DC_EDC_STATE_CNT, COUNT_ME1),
6381 0, 0
6382 },
6383 { "GDS_MEM", SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_CNT),
6384 SOC15_REG_FIELD(GDS_EDC_CNT, GDS_MEM_SEC),
6385 SOC15_REG_FIELD(GDS_EDC_CNT, GDS_MEM_DED)
6386 },
6387 { "GDS_INPUT_QUEUE", SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_CNT),
6388 SOC15_REG_FIELD(GDS_EDC_CNT, GDS_INPUT_QUEUE_SED),
6389 0, 0
6390 },
6391 { "GDS_ME0_CS_PIPE_MEM", SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PHY_CNT),
6392 SOC15_REG_FIELD(GDS_EDC_OA_PHY_CNT, ME0_CS_PIPE_MEM_SEC),
6393 SOC15_REG_FIELD(GDS_EDC_OA_PHY_CNT, ME0_CS_PIPE_MEM_DED)
6394 },
6395 { "GDS_OA_PHY_PHY_CMD_RAM_MEM",
6396 SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PHY_CNT),
6397 SOC15_REG_FIELD(GDS_EDC_OA_PHY_CNT, PHY_CMD_RAM_MEM_SEC),
6398 SOC15_REG_FIELD(GDS_EDC_OA_PHY_CNT, PHY_CMD_RAM_MEM_DED)
6399 },
6400 { "GDS_OA_PHY_PHY_DATA_RAM_MEM",
6401 SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PHY_CNT),
6402 SOC15_REG_FIELD(GDS_EDC_OA_PHY_CNT, PHY_DATA_RAM_MEM_SED),
6403 0, 0
6404 },
6405 { "GDS_OA_PIPE_ME1_PIPE0_PIPE_MEM",
6406 SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PIPE_CNT),
6407 SOC15_REG_FIELD(GDS_EDC_OA_PIPE_CNT, ME1_PIPE0_PIPE_MEM_SEC),
6408 SOC15_REG_FIELD(GDS_EDC_OA_PIPE_CNT, ME1_PIPE0_PIPE_MEM_DED)
6409 },
6410 { "GDS_OA_PIPE_ME1_PIPE1_PIPE_MEM",
6411 SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PIPE_CNT),
6412 SOC15_REG_FIELD(GDS_EDC_OA_PIPE_CNT, ME1_PIPE1_PIPE_MEM_SEC),
6413 SOC15_REG_FIELD(GDS_EDC_OA_PIPE_CNT, ME1_PIPE1_PIPE_MEM_DED)
6414 },
6415 { "GDS_OA_PIPE_ME1_PIPE2_PIPE_MEM",
6416 SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PIPE_CNT),
6417 SOC15_REG_FIELD(GDS_EDC_OA_PIPE_CNT, ME1_PIPE2_PIPE_MEM_SEC),
6418 SOC15_REG_FIELD(GDS_EDC_OA_PIPE_CNT, ME1_PIPE2_PIPE_MEM_DED)
6419 },
6420 { "GDS_OA_PIPE_ME1_PIPE3_PIPE_MEM",
6421 SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PIPE_CNT),
6422 SOC15_REG_FIELD(GDS_EDC_OA_PIPE_CNT, ME1_PIPE3_PIPE_MEM_SEC),
6423 SOC15_REG_FIELD(GDS_EDC_OA_PIPE_CNT, ME1_PIPE3_PIPE_MEM_DED)
6424 },
6425 { "SPI_SR_MEM", SOC15_REG_ENTRY(GC, 0, mmSPI_EDC_CNT),
6426 SOC15_REG_FIELD(SPI_EDC_CNT, SPI_SR_MEM_SED_COUNT),
6427 0, 0
6428 },
6429 { "TA_FS_DFIFO", SOC15_REG_ENTRY(GC, 0, mmTA_EDC_CNT),
6430 SOC15_REG_FIELD(TA_EDC_CNT, TA_FS_DFIFO_SEC_COUNT),
6431 SOC15_REG_FIELD(TA_EDC_CNT, TA_FS_DFIFO_DED_COUNT)
6432 },
6433 { "TA_FS_AFIFO", SOC15_REG_ENTRY(GC, 0, mmTA_EDC_CNT),
6434 SOC15_REG_FIELD(TA_EDC_CNT, TA_FS_AFIFO_SED_COUNT),
6435 0, 0
6436 },
6437 { "TA_FL_LFIFO", SOC15_REG_ENTRY(GC, 0, mmTA_EDC_CNT),
6438 SOC15_REG_FIELD(TA_EDC_CNT, TA_FL_LFIFO_SED_COUNT),
6439 0, 0
6440 },
6441 { "TA_FX_LFIFO", SOC15_REG_ENTRY(GC, 0, mmTA_EDC_CNT),
6442 SOC15_REG_FIELD(TA_EDC_CNT, TA_FX_LFIFO_SED_COUNT),
6443 0, 0
6444 },
6445 { "TA_FS_CFIFO", SOC15_REG_ENTRY(GC, 0, mmTA_EDC_CNT),
6446 SOC15_REG_FIELD(TA_EDC_CNT, TA_FS_CFIFO_SED_COUNT),
6447 0, 0
6448 },
6449 { "TCA_HOLE_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCA_EDC_CNT),
6450 SOC15_REG_FIELD(TCA_EDC_CNT, HOLE_FIFO_SED_COUNT),
6451 0, 0
6452 },
6453 { "TCA_REQ_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCA_EDC_CNT),
6454 SOC15_REG_FIELD(TCA_EDC_CNT, REQ_FIFO_SED_COUNT),
6455 0, 0
6456 },
6457 { "TCC_CACHE_DATA", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT),
6458 SOC15_REG_FIELD(TCC_EDC_CNT, CACHE_DATA_SEC_COUNT),
6459 SOC15_REG_FIELD(TCC_EDC_CNT, CACHE_DATA_DED_COUNT)
6460 },
6461 { "TCC_CACHE_DIRTY", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT),
6462 SOC15_REG_FIELD(TCC_EDC_CNT, CACHE_DIRTY_SEC_COUNT),
6463 SOC15_REG_FIELD(TCC_EDC_CNT, CACHE_DIRTY_DED_COUNT)
6464 },
6465 { "TCC_HIGH_RATE_TAG", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT),
6466 SOC15_REG_FIELD(TCC_EDC_CNT, HIGH_RATE_TAG_SEC_COUNT),
6467 SOC15_REG_FIELD(TCC_EDC_CNT, HIGH_RATE_TAG_DED_COUNT)
6468 },
6469 { "TCC_LOW_RATE_TAG", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT),
6470 SOC15_REG_FIELD(TCC_EDC_CNT, LOW_RATE_TAG_SEC_COUNT),
6471 SOC15_REG_FIELD(TCC_EDC_CNT, LOW_RATE_TAG_DED_COUNT)
6472 },
6473 { "TCC_SRC_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT),
6474 SOC15_REG_FIELD(TCC_EDC_CNT, SRC_FIFO_SEC_COUNT),
6475 SOC15_REG_FIELD(TCC_EDC_CNT, SRC_FIFO_DED_COUNT)
6476 },
6477 { "TCC_IN_USE_DEC", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT),
6478 SOC15_REG_FIELD(TCC_EDC_CNT, IN_USE_DEC_SED_COUNT),
6479 0, 0
6480 },
6481 { "TCC_IN_USE_TRANSFER", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT),
6482 SOC15_REG_FIELD(TCC_EDC_CNT, IN_USE_TRANSFER_SED_COUNT),
6483 0, 0
6484 },
6485 { "TCC_LATENCY_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT),
6486 SOC15_REG_FIELD(TCC_EDC_CNT, LATENCY_FIFO_SED_COUNT),
6487 0, 0
6488 },
6489 { "TCC_RETURN_DATA", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT),
6490 SOC15_REG_FIELD(TCC_EDC_CNT, RETURN_DATA_SED_COUNT),
6491 0, 0
6492 },
6493 { "TCC_RETURN_CONTROL", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT),
6494 SOC15_REG_FIELD(TCC_EDC_CNT, RETURN_CONTROL_SED_COUNT),
6495 0, 0
6496 },
6497 { "TCC_UC_ATOMIC_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT),
6498 SOC15_REG_FIELD(TCC_EDC_CNT, UC_ATOMIC_FIFO_SED_COUNT),
6499 0, 0
6500 },
6501 { "TCC_WRITE_RETURN", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2),
6502 SOC15_REG_FIELD(TCC_EDC_CNT2, WRITE_RETURN_SED_COUNT),
6503 0, 0
6504 },
6505 { "TCC_WRITE_CACHE_READ", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2),
6506 SOC15_REG_FIELD(TCC_EDC_CNT2, WRITE_CACHE_READ_SED_COUNT),
6507 0, 0
6508 },
6509 { "TCC_SRC_FIFO_NEXT_RAM", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2),
6510 SOC15_REG_FIELD(TCC_EDC_CNT2, SRC_FIFO_NEXT_RAM_SED_COUNT),
6511 0, 0
6512 },
6513 { "TCC_LATENCY_FIFO_NEXT_RAM", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2),
6514 SOC15_REG_FIELD(TCC_EDC_CNT2, LATENCY_FIFO_NEXT_RAM_SED_COUNT),
6515 0, 0
6516 },
6517 { "TCC_CACHE_TAG_PROBE_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2),
6518 SOC15_REG_FIELD(TCC_EDC_CNT2, CACHE_TAG_PROBE_FIFO_SED_COUNT),
6519 0, 0
6520 },
6521 { "TCC_WRRET_TAG_WRITE_RETURN", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2),
6522 SOC15_REG_FIELD(TCC_EDC_CNT2, WRRET_TAG_WRITE_RETURN_SED_COUNT),
6523 0, 0
6524 },
6525 { "TCC_ATOMIC_RETURN_BUFFER", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2),
6526 SOC15_REG_FIELD(TCC_EDC_CNT2, ATOMIC_RETURN_BUFFER_SED_COUNT),
6527 0, 0
6528 },
6529 { "TCI_WRITE_RAM", SOC15_REG_ENTRY(GC, 0, mmTCI_EDC_CNT),
6530 SOC15_REG_FIELD(TCI_EDC_CNT, WRITE_RAM_SED_COUNT),
6531 0, 0
6532 },
6533 { "TCP_CACHE_RAM", SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW),
6534 SOC15_REG_FIELD(TCP_EDC_CNT_NEW, CACHE_RAM_SEC_COUNT),
6535 SOC15_REG_FIELD(TCP_EDC_CNT_NEW, CACHE_RAM_DED_COUNT)
6536 },
6537 { "TCP_LFIFO_RAM", SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW),
6538 SOC15_REG_FIELD(TCP_EDC_CNT_NEW, LFIFO_RAM_SEC_COUNT),
6539 SOC15_REG_FIELD(TCP_EDC_CNT_NEW, LFIFO_RAM_DED_COUNT)
6540 },
6541 { "TCP_CMD_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW),
6542 SOC15_REG_FIELD(TCP_EDC_CNT_NEW, CMD_FIFO_SED_COUNT),
6543 0, 0
6544 },
6545 { "TCP_VM_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW),
6546 SOC15_REG_FIELD(TCP_EDC_CNT_NEW, VM_FIFO_SEC_COUNT),
6547 0, 0
6548 },
6549 { "TCP_DB_RAM", SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW),
6550 SOC15_REG_FIELD(TCP_EDC_CNT_NEW, DB_RAM_SED_COUNT),
6551 0, 0
6552 },
6553 { "TCP_UTCL1_LFIFO0", SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW),
6554 SOC15_REG_FIELD(TCP_EDC_CNT_NEW, UTCL1_LFIFO0_SEC_COUNT),
6555 SOC15_REG_FIELD(TCP_EDC_CNT_NEW, UTCL1_LFIFO0_DED_COUNT)
6556 },
6557 { "TCP_UTCL1_LFIFO1", SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW),
6558 SOC15_REG_FIELD(TCP_EDC_CNT_NEW, UTCL1_LFIFO1_SEC_COUNT),
6559 SOC15_REG_FIELD(TCP_EDC_CNT_NEW, UTCL1_LFIFO1_DED_COUNT)
6560 },
6561 { "TD_SS_FIFO_LO", SOC15_REG_ENTRY(GC, 0, mmTD_EDC_CNT),
6562 SOC15_REG_FIELD(TD_EDC_CNT, SS_FIFO_LO_SEC_COUNT),
6563 SOC15_REG_FIELD(TD_EDC_CNT, SS_FIFO_LO_DED_COUNT)
6564 },
6565 { "TD_SS_FIFO_HI", SOC15_REG_ENTRY(GC, 0, mmTD_EDC_CNT),
6566 SOC15_REG_FIELD(TD_EDC_CNT, SS_FIFO_HI_SEC_COUNT),
6567 SOC15_REG_FIELD(TD_EDC_CNT, SS_FIFO_HI_DED_COUNT)
6568 },
6569 { "TD_CS_FIFO", SOC15_REG_ENTRY(GC, 0, mmTD_EDC_CNT),
6570 SOC15_REG_FIELD(TD_EDC_CNT, CS_FIFO_SED_COUNT),
6571 0, 0
6572 },
6573 { "SQ_LDS_D", SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT),
6574 SOC15_REG_FIELD(SQ_EDC_CNT, LDS_D_SEC_COUNT),
6575 SOC15_REG_FIELD(SQ_EDC_CNT, LDS_D_DED_COUNT)
6576 },
6577 { "SQ_LDS_I", SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT),
6578 SOC15_REG_FIELD(SQ_EDC_CNT, LDS_I_SEC_COUNT),
6579 SOC15_REG_FIELD(SQ_EDC_CNT, LDS_I_DED_COUNT)
6580 },
6581 { "SQ_SGPR", SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT),
6582 SOC15_REG_FIELD(SQ_EDC_CNT, SGPR_SEC_COUNT),
6583 SOC15_REG_FIELD(SQ_EDC_CNT, SGPR_DED_COUNT)
6584 },
6585 { "SQ_VGPR0", SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT),
6586 SOC15_REG_FIELD(SQ_EDC_CNT, VGPR0_SEC_COUNT),
6587 SOC15_REG_FIELD(SQ_EDC_CNT, VGPR0_DED_COUNT)
6588 },
6589 { "SQ_VGPR1", SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT),
6590 SOC15_REG_FIELD(SQ_EDC_CNT, VGPR1_SEC_COUNT),
6591 SOC15_REG_FIELD(SQ_EDC_CNT, VGPR1_DED_COUNT)
6592 },
6593 { "SQ_VGPR2", SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT),
6594 SOC15_REG_FIELD(SQ_EDC_CNT, VGPR2_SEC_COUNT),
6595 SOC15_REG_FIELD(SQ_EDC_CNT, VGPR2_DED_COUNT)
6596 },
6597 { "SQ_VGPR3", SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT),
6598 SOC15_REG_FIELD(SQ_EDC_CNT, VGPR3_SEC_COUNT),
6599 SOC15_REG_FIELD(SQ_EDC_CNT, VGPR3_DED_COUNT)
6600 },
6601 { "SQC_DATA_CU0_WRITE_DATA_BUF", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT),
6602 SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU0_WRITE_DATA_BUF_SEC_COUNT),
6603 SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU0_WRITE_DATA_BUF_DED_COUNT)
6604 },
6605 { "SQC_DATA_CU0_UTCL1_LFIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT),
6606 SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU0_UTCL1_LFIFO_SEC_COUNT),
6607 SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU0_UTCL1_LFIFO_DED_COUNT)
6608 },
6609 { "SQC_DATA_CU1_WRITE_DATA_BUF", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT),
6610 SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU1_WRITE_DATA_BUF_SEC_COUNT),
6611 SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU1_WRITE_DATA_BUF_DED_COUNT)
6612 },
6613 { "SQC_DATA_CU1_UTCL1_LFIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT),
6614 SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU1_UTCL1_LFIFO_SEC_COUNT),
6615 SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU1_UTCL1_LFIFO_DED_COUNT)
6616 },
6617 { "SQC_DATA_CU2_WRITE_DATA_BUF", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT),
6618 SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU2_WRITE_DATA_BUF_SEC_COUNT),
6619 SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU2_WRITE_DATA_BUF_DED_COUNT)
6620 },
6621 { "SQC_DATA_CU2_UTCL1_LFIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT),
6622 SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU2_UTCL1_LFIFO_SEC_COUNT),
6623 SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU2_UTCL1_LFIFO_DED_COUNT)
6624 },
6625 { "SQC_INST_BANKA_TAG_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2),
6626 SOC15_REG_FIELD(SQC_EDC_CNT2, INST_BANKA_TAG_RAM_SEC_COUNT),
6627 SOC15_REG_FIELD(SQC_EDC_CNT2, INST_BANKA_TAG_RAM_DED_COUNT)
6628 },
6629 { "SQC_INST_BANKA_BANK_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2),
6630 SOC15_REG_FIELD(SQC_EDC_CNT2, INST_BANKA_BANK_RAM_SEC_COUNT),
6631 SOC15_REG_FIELD(SQC_EDC_CNT2, INST_BANKA_BANK_RAM_DED_COUNT)
6632 },
6633 { "SQC_DATA_BANKA_TAG_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2),
6634 SOC15_REG_FIELD(SQC_EDC_CNT2, DATA_BANKA_TAG_RAM_SEC_COUNT),
6635 SOC15_REG_FIELD(SQC_EDC_CNT2, DATA_BANKA_TAG_RAM_DED_COUNT)
6636 },
6637 { "SQC_DATA_BANKA_BANK_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2),
6638 SOC15_REG_FIELD(SQC_EDC_CNT2, DATA_BANKA_BANK_RAM_SEC_COUNT),
6639 SOC15_REG_FIELD(SQC_EDC_CNT2, DATA_BANKA_BANK_RAM_DED_COUNT)
6640 },
6641 { "SQC_INST_BANKA_UTCL1_MISS_FIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2),
6642 SOC15_REG_FIELD(SQC_EDC_CNT2, INST_BANKA_UTCL1_MISS_FIFO_SED_COUNT),
6643 0, 0
6644 },
6645 { "SQC_INST_BANKA_MISS_FIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2),
6646 SOC15_REG_FIELD(SQC_EDC_CNT2, INST_BANKA_MISS_FIFO_SED_COUNT),
6647 0, 0
6648 },
6649 { "SQC_DATA_BANKA_HIT_FIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2),
6650 SOC15_REG_FIELD(SQC_EDC_CNT2, DATA_BANKA_HIT_FIFO_SED_COUNT),
6651 0, 0
6652 },
6653 { "SQC_DATA_BANKA_MISS_FIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2),
6654 SOC15_REG_FIELD(SQC_EDC_CNT2, DATA_BANKA_MISS_FIFO_SED_COUNT),
6655 0, 0
6656 },
6657 { "SQC_DATA_BANKA_DIRTY_BIT_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2),
6658 SOC15_REG_FIELD(SQC_EDC_CNT2, DATA_BANKA_DIRTY_BIT_RAM_SED_COUNT),
6659 0, 0
6660 },
6661 { "SQC_INST_UTCL1_LFIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2),
6662 SOC15_REG_FIELD(SQC_EDC_CNT2, INST_UTCL1_LFIFO_SEC_COUNT),
6663 SOC15_REG_FIELD(SQC_EDC_CNT2, INST_UTCL1_LFIFO_DED_COUNT)
6664 },
6665 { "SQC_INST_BANKB_TAG_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3),
6666 SOC15_REG_FIELD(SQC_EDC_CNT3, INST_BANKB_TAG_RAM_SEC_COUNT),
6667 SOC15_REG_FIELD(SQC_EDC_CNT3, INST_BANKB_TAG_RAM_DED_COUNT)
6668 },
6669 { "SQC_INST_BANKB_BANK_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3),
6670 SOC15_REG_FIELD(SQC_EDC_CNT3, INST_BANKB_BANK_RAM_SEC_COUNT),
6671 SOC15_REG_FIELD(SQC_EDC_CNT3, INST_BANKB_BANK_RAM_DED_COUNT)
6672 },
6673 { "SQC_DATA_BANKB_TAG_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3),
6674 SOC15_REG_FIELD(SQC_EDC_CNT3, DATA_BANKB_TAG_RAM_SEC_COUNT),
6675 SOC15_REG_FIELD(SQC_EDC_CNT3, DATA_BANKB_TAG_RAM_DED_COUNT)
6676 },
6677 { "SQC_DATA_BANKB_BANK_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3),
6678 SOC15_REG_FIELD(SQC_EDC_CNT3, DATA_BANKB_BANK_RAM_SEC_COUNT),
6679 SOC15_REG_FIELD(SQC_EDC_CNT3, DATA_BANKB_BANK_RAM_DED_COUNT)
6680 },
6681 { "SQC_INST_BANKB_UTCL1_MISS_FIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3),
6682 SOC15_REG_FIELD(SQC_EDC_CNT3, INST_BANKB_UTCL1_MISS_FIFO_SED_COUNT),
6683 0, 0
6684 },
6685 { "SQC_INST_BANKB_MISS_FIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3),
6686 SOC15_REG_FIELD(SQC_EDC_CNT3, INST_BANKB_MISS_FIFO_SED_COUNT),
6687 0, 0
6688 },
6689 { "SQC_DATA_BANKB_HIT_FIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3),
6690 SOC15_REG_FIELD(SQC_EDC_CNT3, DATA_BANKB_HIT_FIFO_SED_COUNT),
6691 0, 0
6692 },
6693 { "SQC_DATA_BANKB_MISS_FIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3),
6694 SOC15_REG_FIELD(SQC_EDC_CNT3, DATA_BANKB_MISS_FIFO_SED_COUNT),
6695 0, 0
6696 },
6697 { "SQC_DATA_BANKB_DIRTY_BIT_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3),
6698 SOC15_REG_FIELD(SQC_EDC_CNT3, DATA_BANKB_DIRTY_BIT_RAM_SED_COUNT),
6699 0, 0
6700 },
6701 { "EA_DRAMRD_CMDMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT),
6702 SOC15_REG_FIELD(GCEA_EDC_CNT, DRAMRD_CMDMEM_SEC_COUNT),
6703 SOC15_REG_FIELD(GCEA_EDC_CNT, DRAMRD_CMDMEM_DED_COUNT)
6704 },
6705 { "EA_DRAMWR_CMDMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT),
6706 SOC15_REG_FIELD(GCEA_EDC_CNT, DRAMWR_CMDMEM_SEC_COUNT),
6707 SOC15_REG_FIELD(GCEA_EDC_CNT, DRAMWR_CMDMEM_DED_COUNT)
6708 },
6709 { "EA_DRAMWR_DATAMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT),
6710 SOC15_REG_FIELD(GCEA_EDC_CNT, DRAMWR_DATAMEM_SEC_COUNT),
6711 SOC15_REG_FIELD(GCEA_EDC_CNT, DRAMWR_DATAMEM_DED_COUNT)
6712 },
6713 { "EA_RRET_TAGMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT),
6714 SOC15_REG_FIELD(GCEA_EDC_CNT, RRET_TAGMEM_SEC_COUNT),
6715 SOC15_REG_FIELD(GCEA_EDC_CNT, RRET_TAGMEM_DED_COUNT)
6716 },
6717 { "EA_WRET_TAGMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT),
6718 SOC15_REG_FIELD(GCEA_EDC_CNT, WRET_TAGMEM_SEC_COUNT),
6719 SOC15_REG_FIELD(GCEA_EDC_CNT, WRET_TAGMEM_DED_COUNT)
6720 },
6721 { "EA_DRAMRD_PAGEMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT),
6722 SOC15_REG_FIELD(GCEA_EDC_CNT, DRAMRD_PAGEMEM_SED_COUNT),
6723 0, 0
6724 },
6725 { "EA_DRAMWR_PAGEMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT),
6726 SOC15_REG_FIELD(GCEA_EDC_CNT, DRAMWR_PAGEMEM_SED_COUNT),
6727 0, 0
6728 },
6729 { "EA_IORD_CMDMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT),
6730 SOC15_REG_FIELD(GCEA_EDC_CNT, IORD_CMDMEM_SED_COUNT),
6731 0, 0
6732 },
6733 { "EA_IOWR_CMDMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT),
6734 SOC15_REG_FIELD(GCEA_EDC_CNT, IOWR_CMDMEM_SED_COUNT),
6735 0, 0
6736 },
6737 { "EA_IOWR_DATAMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT),
6738 SOC15_REG_FIELD(GCEA_EDC_CNT, IOWR_DATAMEM_SED_COUNT),
6739 0, 0
6740 },
6741 { "GMIRD_CMDMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2),
6742 SOC15_REG_FIELD(GCEA_EDC_CNT2, GMIRD_CMDMEM_SEC_COUNT),
6743 SOC15_REG_FIELD(GCEA_EDC_CNT2, GMIRD_CMDMEM_DED_COUNT)
6744 },
6745 { "GMIWR_CMDMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2),
6746 SOC15_REG_FIELD(GCEA_EDC_CNT2, GMIWR_CMDMEM_SEC_COUNT),
6747 SOC15_REG_FIELD(GCEA_EDC_CNT2, GMIWR_CMDMEM_DED_COUNT)
6748 },
6749 { "GMIWR_DATAMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2),
6750 SOC15_REG_FIELD(GCEA_EDC_CNT2, GMIWR_DATAMEM_SEC_COUNT),
6751 SOC15_REG_FIELD(GCEA_EDC_CNT2, GMIWR_DATAMEM_DED_COUNT)
6752 },
6753 { "GMIRD_PAGEMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2),
6754 SOC15_REG_FIELD(GCEA_EDC_CNT2, GMIRD_PAGEMEM_SED_COUNT),
6755 0, 0
6756 },
6757 { "GMIWR_PAGEMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2),
6758 SOC15_REG_FIELD(GCEA_EDC_CNT2, GMIWR_PAGEMEM_SED_COUNT),
6759 0, 0
6760 },
6761 { "MAM_D0MEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2),
6762 SOC15_REG_FIELD(GCEA_EDC_CNT2, MAM_D0MEM_SED_COUNT),
6763 0, 0
6764 },
6765 { "MAM_D1MEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2),
6766 SOC15_REG_FIELD(GCEA_EDC_CNT2, MAM_D1MEM_SED_COUNT),
6767 0, 0
6768 },
6769 { "MAM_D2MEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2),
6770 SOC15_REG_FIELD(GCEA_EDC_CNT2, MAM_D2MEM_SED_COUNT),
6771 0, 0
6772 },
6773 { "MAM_D3MEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2),
6774 SOC15_REG_FIELD(GCEA_EDC_CNT2, MAM_D3MEM_SED_COUNT),
6775 0, 0
6776 }
6777 };
6778
gfx_v9_0_ras_error_inject(struct amdgpu_device * adev,void * inject_if,uint32_t instance_mask)6779 static int gfx_v9_0_ras_error_inject(struct amdgpu_device *adev,
6780 void *inject_if, uint32_t instance_mask)
6781 {
6782 struct ras_inject_if *info = (struct ras_inject_if *)inject_if;
6783 int ret;
6784 struct ta_ras_trigger_error_input block_info = { 0 };
6785
6786 if (!amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__GFX))
6787 return -EINVAL;
6788
6789 if (info->head.sub_block_index >= ARRAY_SIZE(ras_gfx_subblocks))
6790 return -EINVAL;
6791
6792 if (!ras_gfx_subblocks[info->head.sub_block_index].name)
6793 return -EPERM;
6794
6795 if (!(ras_gfx_subblocks[info->head.sub_block_index].hw_supported_error_type &
6796 info->head.type)) {
6797 DRM_ERROR("GFX Subblock %s, hardware do not support type 0x%x\n",
6798 ras_gfx_subblocks[info->head.sub_block_index].name,
6799 info->head.type);
6800 return -EPERM;
6801 }
6802
6803 if (!(ras_gfx_subblocks[info->head.sub_block_index].sw_supported_error_type &
6804 info->head.type)) {
6805 DRM_ERROR("GFX Subblock %s, driver do not support type 0x%x\n",
6806 ras_gfx_subblocks[info->head.sub_block_index].name,
6807 info->head.type);
6808 return -EPERM;
6809 }
6810
6811 block_info.block_id = amdgpu_ras_block_to_ta(info->head.block);
6812 block_info.sub_block_index =
6813 ras_gfx_subblocks[info->head.sub_block_index].ta_subblock;
6814 block_info.inject_error_type = amdgpu_ras_error_to_ta(info->head.type);
6815 block_info.address = info->address;
6816 block_info.value = info->value;
6817
6818 mutex_lock(&adev->grbm_idx_mutex);
6819 ret = psp_ras_trigger_error(&adev->psp, &block_info, instance_mask);
6820 mutex_unlock(&adev->grbm_idx_mutex);
6821
6822 return ret;
6823 }
6824
6825 static const char * const vml2_mems[] = {
6826 "UTC_VML2_BANK_CACHE_0_BIGK_MEM0",
6827 "UTC_VML2_BANK_CACHE_0_BIGK_MEM1",
6828 "UTC_VML2_BANK_CACHE_0_4K_MEM0",
6829 "UTC_VML2_BANK_CACHE_0_4K_MEM1",
6830 "UTC_VML2_BANK_CACHE_1_BIGK_MEM0",
6831 "UTC_VML2_BANK_CACHE_1_BIGK_MEM1",
6832 "UTC_VML2_BANK_CACHE_1_4K_MEM0",
6833 "UTC_VML2_BANK_CACHE_1_4K_MEM1",
6834 "UTC_VML2_BANK_CACHE_2_BIGK_MEM0",
6835 "UTC_VML2_BANK_CACHE_2_BIGK_MEM1",
6836 "UTC_VML2_BANK_CACHE_2_4K_MEM0",
6837 "UTC_VML2_BANK_CACHE_2_4K_MEM1",
6838 "UTC_VML2_BANK_CACHE_3_BIGK_MEM0",
6839 "UTC_VML2_BANK_CACHE_3_BIGK_MEM1",
6840 "UTC_VML2_BANK_CACHE_3_4K_MEM0",
6841 "UTC_VML2_BANK_CACHE_3_4K_MEM1",
6842 };
6843
6844 static const char * const vml2_walker_mems[] = {
6845 "UTC_VML2_CACHE_PDE0_MEM0",
6846 "UTC_VML2_CACHE_PDE0_MEM1",
6847 "UTC_VML2_CACHE_PDE1_MEM0",
6848 "UTC_VML2_CACHE_PDE1_MEM1",
6849 "UTC_VML2_CACHE_PDE2_MEM0",
6850 "UTC_VML2_CACHE_PDE2_MEM1",
6851 "UTC_VML2_RDIF_LOG_FIFO",
6852 };
6853
6854 static const char * const atc_l2_cache_2m_mems[] = {
6855 "UTC_ATCL2_CACHE_2M_BANK0_WAY0_MEM",
6856 "UTC_ATCL2_CACHE_2M_BANK0_WAY1_MEM",
6857 "UTC_ATCL2_CACHE_2M_BANK1_WAY0_MEM",
6858 "UTC_ATCL2_CACHE_2M_BANK1_WAY1_MEM",
6859 };
6860
6861 static const char *atc_l2_cache_4k_mems[] = {
6862 "UTC_ATCL2_CACHE_4K_BANK0_WAY0_MEM0",
6863 "UTC_ATCL2_CACHE_4K_BANK0_WAY0_MEM1",
6864 "UTC_ATCL2_CACHE_4K_BANK0_WAY0_MEM2",
6865 "UTC_ATCL2_CACHE_4K_BANK0_WAY0_MEM3",
6866 "UTC_ATCL2_CACHE_4K_BANK0_WAY0_MEM4",
6867 "UTC_ATCL2_CACHE_4K_BANK0_WAY0_MEM5",
6868 "UTC_ATCL2_CACHE_4K_BANK0_WAY0_MEM6",
6869 "UTC_ATCL2_CACHE_4K_BANK0_WAY0_MEM7",
6870 "UTC_ATCL2_CACHE_4K_BANK0_WAY1_MEM0",
6871 "UTC_ATCL2_CACHE_4K_BANK0_WAY1_MEM1",
6872 "UTC_ATCL2_CACHE_4K_BANK0_WAY1_MEM2",
6873 "UTC_ATCL2_CACHE_4K_BANK0_WAY1_MEM3",
6874 "UTC_ATCL2_CACHE_4K_BANK0_WAY1_MEM4",
6875 "UTC_ATCL2_CACHE_4K_BANK0_WAY1_MEM5",
6876 "UTC_ATCL2_CACHE_4K_BANK0_WAY1_MEM6",
6877 "UTC_ATCL2_CACHE_4K_BANK0_WAY1_MEM7",
6878 "UTC_ATCL2_CACHE_4K_BANK1_WAY0_MEM0",
6879 "UTC_ATCL2_CACHE_4K_BANK1_WAY0_MEM1",
6880 "UTC_ATCL2_CACHE_4K_BANK1_WAY0_MEM2",
6881 "UTC_ATCL2_CACHE_4K_BANK1_WAY0_MEM3",
6882 "UTC_ATCL2_CACHE_4K_BANK1_WAY0_MEM4",
6883 "UTC_ATCL2_CACHE_4K_BANK1_WAY0_MEM5",
6884 "UTC_ATCL2_CACHE_4K_BANK1_WAY0_MEM6",
6885 "UTC_ATCL2_CACHE_4K_BANK1_WAY0_MEM7",
6886 "UTC_ATCL2_CACHE_4K_BANK1_WAY1_MEM0",
6887 "UTC_ATCL2_CACHE_4K_BANK1_WAY1_MEM1",
6888 "UTC_ATCL2_CACHE_4K_BANK1_WAY1_MEM2",
6889 "UTC_ATCL2_CACHE_4K_BANK1_WAY1_MEM3",
6890 "UTC_ATCL2_CACHE_4K_BANK1_WAY1_MEM4",
6891 "UTC_ATCL2_CACHE_4K_BANK1_WAY1_MEM5",
6892 "UTC_ATCL2_CACHE_4K_BANK1_WAY1_MEM6",
6893 "UTC_ATCL2_CACHE_4K_BANK1_WAY1_MEM7",
6894 };
6895
gfx_v9_0_query_utc_edc_status(struct amdgpu_device * adev,struct ras_err_data * err_data)6896 static int gfx_v9_0_query_utc_edc_status(struct amdgpu_device *adev,
6897 struct ras_err_data *err_data)
6898 {
6899 uint32_t i, data;
6900 uint32_t sec_count, ded_count;
6901
6902 WREG32_SOC15(GC, 0, mmVM_L2_MEM_ECC_INDEX, 255);
6903 WREG32_SOC15(GC, 0, mmVM_L2_MEM_ECC_CNT, 0);
6904 WREG32_SOC15(GC, 0, mmVM_L2_WALKER_MEM_ECC_INDEX, 255);
6905 WREG32_SOC15(GC, 0, mmVM_L2_WALKER_MEM_ECC_CNT, 0);
6906 WREG32_SOC15(GC, 0, mmATC_L2_CACHE_2M_EDC_INDEX, 255);
6907 WREG32_SOC15(GC, 0, mmATC_L2_CACHE_2M_EDC_CNT, 0);
6908 WREG32_SOC15(GC, 0, mmATC_L2_CACHE_4K_EDC_INDEX, 255);
6909 WREG32_SOC15(GC, 0, mmATC_L2_CACHE_4K_EDC_CNT, 0);
6910
6911 for (i = 0; i < ARRAY_SIZE(vml2_mems); i++) {
6912 WREG32_SOC15(GC, 0, mmVM_L2_MEM_ECC_INDEX, i);
6913 data = RREG32_SOC15(GC, 0, mmVM_L2_MEM_ECC_CNT);
6914
6915 sec_count = REG_GET_FIELD(data, VM_L2_MEM_ECC_CNT, SEC_COUNT);
6916 if (sec_count) {
6917 dev_info(adev->dev, "Instance[%d]: SubBlock %s, "
6918 "SEC %d\n", i, vml2_mems[i], sec_count);
6919 err_data->ce_count += sec_count;
6920 }
6921
6922 ded_count = REG_GET_FIELD(data, VM_L2_MEM_ECC_CNT, DED_COUNT);
6923 if (ded_count) {
6924 dev_info(adev->dev, "Instance[%d]: SubBlock %s, "
6925 "DED %d\n", i, vml2_mems[i], ded_count);
6926 err_data->ue_count += ded_count;
6927 }
6928 }
6929
6930 for (i = 0; i < ARRAY_SIZE(vml2_walker_mems); i++) {
6931 WREG32_SOC15(GC, 0, mmVM_L2_WALKER_MEM_ECC_INDEX, i);
6932 data = RREG32_SOC15(GC, 0, mmVM_L2_WALKER_MEM_ECC_CNT);
6933
6934 sec_count = REG_GET_FIELD(data, VM_L2_WALKER_MEM_ECC_CNT,
6935 SEC_COUNT);
6936 if (sec_count) {
6937 dev_info(adev->dev, "Instance[%d]: SubBlock %s, "
6938 "SEC %d\n", i, vml2_walker_mems[i], sec_count);
6939 err_data->ce_count += sec_count;
6940 }
6941
6942 ded_count = REG_GET_FIELD(data, VM_L2_WALKER_MEM_ECC_CNT,
6943 DED_COUNT);
6944 if (ded_count) {
6945 dev_info(adev->dev, "Instance[%d]: SubBlock %s, "
6946 "DED %d\n", i, vml2_walker_mems[i], ded_count);
6947 err_data->ue_count += ded_count;
6948 }
6949 }
6950
6951 for (i = 0; i < ARRAY_SIZE(atc_l2_cache_2m_mems); i++) {
6952 WREG32_SOC15(GC, 0, mmATC_L2_CACHE_2M_EDC_INDEX, i);
6953 data = RREG32_SOC15(GC, 0, mmATC_L2_CACHE_2M_EDC_CNT);
6954
6955 sec_count = (data & 0x00006000L) >> 0xd;
6956 if (sec_count) {
6957 dev_info(adev->dev, "Instance[%d]: SubBlock %s, "
6958 "SEC %d\n", i, atc_l2_cache_2m_mems[i],
6959 sec_count);
6960 err_data->ce_count += sec_count;
6961 }
6962 }
6963
6964 for (i = 0; i < ARRAY_SIZE(atc_l2_cache_4k_mems); i++) {
6965 WREG32_SOC15(GC, 0, mmATC_L2_CACHE_4K_EDC_INDEX, i);
6966 data = RREG32_SOC15(GC, 0, mmATC_L2_CACHE_4K_EDC_CNT);
6967
6968 sec_count = (data & 0x00006000L) >> 0xd;
6969 if (sec_count) {
6970 dev_info(adev->dev, "Instance[%d]: SubBlock %s, "
6971 "SEC %d\n", i, atc_l2_cache_4k_mems[i],
6972 sec_count);
6973 err_data->ce_count += sec_count;
6974 }
6975
6976 ded_count = (data & 0x00018000L) >> 0xf;
6977 if (ded_count) {
6978 dev_info(adev->dev, "Instance[%d]: SubBlock %s, "
6979 "DED %d\n", i, atc_l2_cache_4k_mems[i],
6980 ded_count);
6981 err_data->ue_count += ded_count;
6982 }
6983 }
6984
6985 WREG32_SOC15(GC, 0, mmVM_L2_MEM_ECC_INDEX, 255);
6986 WREG32_SOC15(GC, 0, mmVM_L2_WALKER_MEM_ECC_INDEX, 255);
6987 WREG32_SOC15(GC, 0, mmATC_L2_CACHE_2M_EDC_INDEX, 255);
6988 WREG32_SOC15(GC, 0, mmATC_L2_CACHE_4K_EDC_INDEX, 255);
6989
6990 return 0;
6991 }
6992
gfx_v9_0_ras_error_count(struct amdgpu_device * adev,const struct soc15_reg_entry * reg,uint32_t se_id,uint32_t inst_id,uint32_t value,uint32_t * sec_count,uint32_t * ded_count)6993 static int gfx_v9_0_ras_error_count(struct amdgpu_device *adev,
6994 const struct soc15_reg_entry *reg,
6995 uint32_t se_id, uint32_t inst_id, uint32_t value,
6996 uint32_t *sec_count, uint32_t *ded_count)
6997 {
6998 uint32_t i;
6999 uint32_t sec_cnt, ded_cnt;
7000
7001 for (i = 0; i < ARRAY_SIZE(gfx_v9_0_ras_fields); i++) {
7002 if(gfx_v9_0_ras_fields[i].reg_offset != reg->reg_offset ||
7003 gfx_v9_0_ras_fields[i].seg != reg->seg ||
7004 gfx_v9_0_ras_fields[i].inst != reg->inst)
7005 continue;
7006
7007 sec_cnt = (value &
7008 gfx_v9_0_ras_fields[i].sec_count_mask) >>
7009 gfx_v9_0_ras_fields[i].sec_count_shift;
7010 if (sec_cnt) {
7011 dev_info(adev->dev, "GFX SubBlock %s, "
7012 "Instance[%d][%d], SEC %d\n",
7013 gfx_v9_0_ras_fields[i].name,
7014 se_id, inst_id,
7015 sec_cnt);
7016 *sec_count += sec_cnt;
7017 }
7018
7019 ded_cnt = (value &
7020 gfx_v9_0_ras_fields[i].ded_count_mask) >>
7021 gfx_v9_0_ras_fields[i].ded_count_shift;
7022 if (ded_cnt) {
7023 dev_info(adev->dev, "GFX SubBlock %s, "
7024 "Instance[%d][%d], DED %d\n",
7025 gfx_v9_0_ras_fields[i].name,
7026 se_id, inst_id,
7027 ded_cnt);
7028 *ded_count += ded_cnt;
7029 }
7030 }
7031
7032 return 0;
7033 }
7034
gfx_v9_0_reset_ras_error_count(struct amdgpu_device * adev)7035 static void gfx_v9_0_reset_ras_error_count(struct amdgpu_device *adev)
7036 {
7037 int i, j, k;
7038
7039 if (!amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__GFX))
7040 return;
7041
7042 /* read back registers to clear the counters */
7043 mutex_lock(&adev->grbm_idx_mutex);
7044 for (i = 0; i < ARRAY_SIZE(gfx_v9_0_edc_counter_regs); i++) {
7045 for (j = 0; j < gfx_v9_0_edc_counter_regs[i].se_num; j++) {
7046 for (k = 0; k < gfx_v9_0_edc_counter_regs[i].instance; k++) {
7047 amdgpu_gfx_select_se_sh(adev, j, 0x0, k, 0);
7048 RREG32(SOC15_REG_ENTRY_OFFSET(gfx_v9_0_edc_counter_regs[i]));
7049 }
7050 }
7051 }
7052 WREG32_SOC15(GC, 0, mmGRBM_GFX_INDEX, 0xe0000000);
7053 mutex_unlock(&adev->grbm_idx_mutex);
7054
7055 WREG32_SOC15(GC, 0, mmVM_L2_MEM_ECC_INDEX, 255);
7056 WREG32_SOC15(GC, 0, mmVM_L2_MEM_ECC_CNT, 0);
7057 WREG32_SOC15(GC, 0, mmVM_L2_WALKER_MEM_ECC_INDEX, 255);
7058 WREG32_SOC15(GC, 0, mmVM_L2_WALKER_MEM_ECC_CNT, 0);
7059 WREG32_SOC15(GC, 0, mmATC_L2_CACHE_2M_EDC_INDEX, 255);
7060 WREG32_SOC15(GC, 0, mmATC_L2_CACHE_2M_EDC_CNT, 0);
7061 WREG32_SOC15(GC, 0, mmATC_L2_CACHE_4K_EDC_INDEX, 255);
7062 WREG32_SOC15(GC, 0, mmATC_L2_CACHE_4K_EDC_CNT, 0);
7063
7064 for (i = 0; i < ARRAY_SIZE(vml2_mems); i++) {
7065 WREG32_SOC15(GC, 0, mmVM_L2_MEM_ECC_INDEX, i);
7066 RREG32_SOC15(GC, 0, mmVM_L2_MEM_ECC_CNT);
7067 }
7068
7069 for (i = 0; i < ARRAY_SIZE(vml2_walker_mems); i++) {
7070 WREG32_SOC15(GC, 0, mmVM_L2_WALKER_MEM_ECC_INDEX, i);
7071 RREG32_SOC15(GC, 0, mmVM_L2_WALKER_MEM_ECC_CNT);
7072 }
7073
7074 for (i = 0; i < ARRAY_SIZE(atc_l2_cache_2m_mems); i++) {
7075 WREG32_SOC15(GC, 0, mmATC_L2_CACHE_2M_EDC_INDEX, i);
7076 RREG32_SOC15(GC, 0, mmATC_L2_CACHE_2M_EDC_CNT);
7077 }
7078
7079 for (i = 0; i < ARRAY_SIZE(atc_l2_cache_4k_mems); i++) {
7080 WREG32_SOC15(GC, 0, mmATC_L2_CACHE_4K_EDC_INDEX, i);
7081 RREG32_SOC15(GC, 0, mmATC_L2_CACHE_4K_EDC_CNT);
7082 }
7083
7084 WREG32_SOC15(GC, 0, mmVM_L2_MEM_ECC_INDEX, 255);
7085 WREG32_SOC15(GC, 0, mmVM_L2_WALKER_MEM_ECC_INDEX, 255);
7086 WREG32_SOC15(GC, 0, mmATC_L2_CACHE_2M_EDC_INDEX, 255);
7087 WREG32_SOC15(GC, 0, mmATC_L2_CACHE_4K_EDC_INDEX, 255);
7088 }
7089
gfx_v9_0_query_ras_error_count(struct amdgpu_device * adev,void * ras_error_status)7090 static void gfx_v9_0_query_ras_error_count(struct amdgpu_device *adev,
7091 void *ras_error_status)
7092 {
7093 struct ras_err_data *err_data = (struct ras_err_data *)ras_error_status;
7094 uint32_t sec_count = 0, ded_count = 0;
7095 uint32_t i, j, k;
7096 uint32_t reg_value;
7097
7098 if (!amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__GFX))
7099 return;
7100
7101 err_data->ue_count = 0;
7102 err_data->ce_count = 0;
7103
7104 mutex_lock(&adev->grbm_idx_mutex);
7105
7106 for (i = 0; i < ARRAY_SIZE(gfx_v9_0_edc_counter_regs); i++) {
7107 for (j = 0; j < gfx_v9_0_edc_counter_regs[i].se_num; j++) {
7108 for (k = 0; k < gfx_v9_0_edc_counter_regs[i].instance; k++) {
7109 amdgpu_gfx_select_se_sh(adev, j, 0, k, 0);
7110 reg_value =
7111 RREG32(SOC15_REG_ENTRY_OFFSET(gfx_v9_0_edc_counter_regs[i]));
7112 if (reg_value)
7113 gfx_v9_0_ras_error_count(adev,
7114 &gfx_v9_0_edc_counter_regs[i],
7115 j, k, reg_value,
7116 &sec_count, &ded_count);
7117 }
7118 }
7119 }
7120
7121 err_data->ce_count += sec_count;
7122 err_data->ue_count += ded_count;
7123
7124 amdgpu_gfx_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff, 0);
7125 mutex_unlock(&adev->grbm_idx_mutex);
7126
7127 gfx_v9_0_query_utc_edc_status(adev, err_data);
7128 }
7129
gfx_v9_0_emit_mem_sync(struct amdgpu_ring * ring)7130 static void gfx_v9_0_emit_mem_sync(struct amdgpu_ring *ring)
7131 {
7132 const unsigned int cp_coher_cntl =
7133 PACKET3_ACQUIRE_MEM_CP_COHER_CNTL_SH_ICACHE_ACTION_ENA(1) |
7134 PACKET3_ACQUIRE_MEM_CP_COHER_CNTL_SH_KCACHE_ACTION_ENA(1) |
7135 PACKET3_ACQUIRE_MEM_CP_COHER_CNTL_TC_ACTION_ENA(1) |
7136 PACKET3_ACQUIRE_MEM_CP_COHER_CNTL_TCL1_ACTION_ENA(1) |
7137 PACKET3_ACQUIRE_MEM_CP_COHER_CNTL_TC_WB_ACTION_ENA(1);
7138
7139 /* ACQUIRE_MEM -make one or more surfaces valid for use by the subsequent operations */
7140 amdgpu_ring_write(ring, PACKET3(PACKET3_ACQUIRE_MEM, 5));
7141 amdgpu_ring_write(ring, cp_coher_cntl); /* CP_COHER_CNTL */
7142 amdgpu_ring_write(ring, 0xffffffff); /* CP_COHER_SIZE */
7143 amdgpu_ring_write(ring, 0xffffff); /* CP_COHER_SIZE_HI */
7144 amdgpu_ring_write(ring, 0); /* CP_COHER_BASE */
7145 amdgpu_ring_write(ring, 0); /* CP_COHER_BASE_HI */
7146 amdgpu_ring_write(ring, 0x0000000A); /* POLL_INTERVAL */
7147 }
7148
gfx_v9_0_emit_wave_limit_cs(struct amdgpu_ring * ring,uint32_t pipe,bool enable)7149 static void gfx_v9_0_emit_wave_limit_cs(struct amdgpu_ring *ring,
7150 uint32_t pipe, bool enable)
7151 {
7152 struct amdgpu_device *adev = ring->adev;
7153 uint32_t val;
7154 uint32_t wcl_cs_reg;
7155
7156 /* mmSPI_WCL_PIPE_PERCENT_CS[0-7]_DEFAULT values are same */
7157 val = enable ? 0x1 : mmSPI_WCL_PIPE_PERCENT_CS0_DEFAULT;
7158
7159 switch (pipe) {
7160 case 0:
7161 wcl_cs_reg = SOC15_REG_OFFSET(GC, 0, mmSPI_WCL_PIPE_PERCENT_CS0);
7162 break;
7163 case 1:
7164 wcl_cs_reg = SOC15_REG_OFFSET(GC, 0, mmSPI_WCL_PIPE_PERCENT_CS1);
7165 break;
7166 case 2:
7167 wcl_cs_reg = SOC15_REG_OFFSET(GC, 0, mmSPI_WCL_PIPE_PERCENT_CS2);
7168 break;
7169 case 3:
7170 wcl_cs_reg = SOC15_REG_OFFSET(GC, 0, mmSPI_WCL_PIPE_PERCENT_CS3);
7171 break;
7172 default:
7173 DRM_DEBUG("invalid pipe %d\n", pipe);
7174 return;
7175 }
7176
7177 amdgpu_ring_emit_wreg(ring, wcl_cs_reg, val);
7178
7179 }
gfx_v9_0_emit_wave_limit(struct amdgpu_ring * ring,bool enable)7180 static void gfx_v9_0_emit_wave_limit(struct amdgpu_ring *ring, bool enable)
7181 {
7182 struct amdgpu_device *adev = ring->adev;
7183 uint32_t val;
7184 int i;
7185
7186
7187 /* mmSPI_WCL_PIPE_PERCENT_GFX is 7 bit multiplier register to limit
7188 * number of gfx waves. Setting 5 bit will make sure gfx only gets
7189 * around 25% of gpu resources.
7190 */
7191 val = enable ? 0x1f : mmSPI_WCL_PIPE_PERCENT_GFX_DEFAULT;
7192 amdgpu_ring_emit_wreg(ring,
7193 SOC15_REG_OFFSET(GC, 0, mmSPI_WCL_PIPE_PERCENT_GFX),
7194 val);
7195
7196 /* Restrict waves for normal/low priority compute queues as well
7197 * to get best QoS for high priority compute jobs.
7198 *
7199 * amdgpu controls only 1st ME(0-3 CS pipes).
7200 */
7201 for (i = 0; i < adev->gfx.mec.num_pipe_per_mec; i++) {
7202 if (i != ring->pipe)
7203 gfx_v9_0_emit_wave_limit_cs(ring, i, enable);
7204
7205 }
7206 }
7207
gfx_v9_ring_insert_nop(struct amdgpu_ring * ring,uint32_t num_nop)7208 static void gfx_v9_ring_insert_nop(struct amdgpu_ring *ring, uint32_t num_nop)
7209 {
7210 /* Header itself is a NOP packet */
7211 if (num_nop == 1) {
7212 amdgpu_ring_write(ring, ring->funcs->nop);
7213 return;
7214 }
7215
7216 /* Max HW optimization till 0x3ffe, followed by remaining one NOP at a time*/
7217 amdgpu_ring_write(ring, PACKET3(PACKET3_NOP, min(num_nop - 2, 0x3ffe)));
7218
7219 /* Header is at index 0, followed by num_nops - 1 NOP packet's */
7220 amdgpu_ring_insert_nop(ring, num_nop - 1);
7221 }
7222
gfx_v9_0_reset_kgq(struct amdgpu_ring * ring,unsigned int vmid)7223 static int gfx_v9_0_reset_kgq(struct amdgpu_ring *ring, unsigned int vmid)
7224 {
7225 struct amdgpu_device *adev = ring->adev;
7226 struct amdgpu_kiq *kiq = &adev->gfx.kiq[0];
7227 struct amdgpu_ring *kiq_ring = &kiq->ring;
7228 unsigned long flags;
7229 u32 tmp;
7230 int r;
7231
7232 if (amdgpu_sriov_vf(adev))
7233 return -EINVAL;
7234
7235 if (!kiq->pmf || !kiq->pmf->kiq_unmap_queues)
7236 return -EINVAL;
7237
7238 spin_lock_irqsave(&kiq->ring_lock, flags);
7239
7240 if (amdgpu_ring_alloc(kiq_ring, 5)) {
7241 spin_unlock_irqrestore(&kiq->ring_lock, flags);
7242 return -ENOMEM;
7243 }
7244
7245 tmp = REG_SET_FIELD(0, CP_VMID_RESET, RESET_REQUEST, 1 << vmid);
7246 gfx_v9_0_ring_emit_wreg(kiq_ring,
7247 SOC15_REG_OFFSET(GC, 0, mmCP_VMID_RESET), tmp);
7248 amdgpu_ring_commit(kiq_ring);
7249
7250 spin_unlock_irqrestore(&kiq->ring_lock, flags);
7251
7252 r = amdgpu_ring_test_ring(kiq_ring);
7253 if (r)
7254 return r;
7255
7256 if (amdgpu_ring_alloc(ring, 7 + 7 + 5))
7257 return -ENOMEM;
7258 gfx_v9_0_ring_emit_fence(ring, ring->fence_drv.gpu_addr,
7259 ring->fence_drv.sync_seq, AMDGPU_FENCE_FLAG_EXEC);
7260 gfx_v9_0_ring_emit_reg_wait(ring,
7261 SOC15_REG_OFFSET(GC, 0, mmCP_VMID_RESET), 0, 0xffff);
7262 gfx_v9_0_ring_emit_wreg(ring,
7263 SOC15_REG_OFFSET(GC, 0, mmCP_VMID_RESET), 0);
7264
7265 return amdgpu_ring_test_ring(ring);
7266 }
7267
gfx_v9_0_reset_kcq(struct amdgpu_ring * ring,unsigned int vmid)7268 static int gfx_v9_0_reset_kcq(struct amdgpu_ring *ring,
7269 unsigned int vmid)
7270 {
7271 struct amdgpu_device *adev = ring->adev;
7272 struct amdgpu_kiq *kiq = &adev->gfx.kiq[0];
7273 struct amdgpu_ring *kiq_ring = &kiq->ring;
7274 unsigned long flags;
7275 int i, r;
7276
7277 if (amdgpu_sriov_vf(adev))
7278 return -EINVAL;
7279
7280 if (!kiq->pmf || !kiq->pmf->kiq_unmap_queues)
7281 return -EINVAL;
7282
7283 spin_lock_irqsave(&kiq->ring_lock, flags);
7284
7285 if (amdgpu_ring_alloc(kiq_ring, kiq->pmf->unmap_queues_size)) {
7286 spin_unlock_irqrestore(&kiq->ring_lock, flags);
7287 return -ENOMEM;
7288 }
7289
7290 kiq->pmf->kiq_unmap_queues(kiq_ring, ring, RESET_QUEUES,
7291 0, 0);
7292 amdgpu_ring_commit(kiq_ring);
7293
7294 spin_unlock_irqrestore(&kiq->ring_lock, flags);
7295
7296 r = amdgpu_ring_test_ring(kiq_ring);
7297 if (r)
7298 return r;
7299
7300 /* make sure dequeue is complete*/
7301 amdgpu_gfx_rlc_enter_safe_mode(adev, 0);
7302 mutex_lock(&adev->srbm_mutex);
7303 soc15_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0, 0);
7304 for (i = 0; i < adev->usec_timeout; i++) {
7305 if (!(RREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE) & 1))
7306 break;
7307 udelay(1);
7308 }
7309 if (i >= adev->usec_timeout)
7310 r = -ETIMEDOUT;
7311 soc15_grbm_select(adev, 0, 0, 0, 0, 0);
7312 mutex_unlock(&adev->srbm_mutex);
7313 amdgpu_gfx_rlc_exit_safe_mode(adev, 0);
7314 if (r) {
7315 dev_err(adev->dev, "fail to wait on hqd deactive\n");
7316 return r;
7317 }
7318
7319 r = amdgpu_bo_reserve(ring->mqd_obj, false);
7320 if (unlikely(r != 0)){
7321 dev_err(adev->dev, "fail to resv mqd_obj\n");
7322 return r;
7323 }
7324 r = amdgpu_bo_kmap(ring->mqd_obj, (void **)&ring->mqd_ptr);
7325 if (!r) {
7326 r = gfx_v9_0_kcq_init_queue(ring, true);
7327 amdgpu_bo_kunmap(ring->mqd_obj);
7328 ring->mqd_ptr = NULL;
7329 }
7330 amdgpu_bo_unreserve(ring->mqd_obj);
7331 if (r) {
7332 dev_err(adev->dev, "fail to unresv mqd_obj\n");
7333 return r;
7334 }
7335 spin_lock_irqsave(&kiq->ring_lock, flags);
7336 r = amdgpu_ring_alloc(kiq_ring, kiq->pmf->map_queues_size);
7337 if (r) {
7338 spin_unlock_irqrestore(&kiq->ring_lock, flags);
7339 return -ENOMEM;
7340 }
7341 kiq->pmf->kiq_map_queues(kiq_ring, ring);
7342 amdgpu_ring_commit(kiq_ring);
7343 spin_unlock_irqrestore(&kiq->ring_lock, flags);
7344 r = amdgpu_ring_test_ring(kiq_ring);
7345 if (r) {
7346 DRM_ERROR("fail to remap queue\n");
7347 return r;
7348 }
7349 return amdgpu_ring_test_ring(ring);
7350 }
7351
gfx_v9_ip_print(struct amdgpu_ip_block * ip_block,struct drm_printer * p)7352 static void gfx_v9_ip_print(struct amdgpu_ip_block *ip_block, struct drm_printer *p)
7353 {
7354 struct amdgpu_device *adev = ip_block->adev;
7355 uint32_t i, j, k, reg, index = 0;
7356 uint32_t reg_count = ARRAY_SIZE(gc_reg_list_9);
7357
7358 if (!adev->gfx.ip_dump_core)
7359 return;
7360
7361 for (i = 0; i < reg_count; i++)
7362 drm_printf(p, "%-50s \t 0x%08x\n",
7363 gc_reg_list_9[i].reg_name,
7364 adev->gfx.ip_dump_core[i]);
7365
7366 /* print compute queue registers for all instances */
7367 if (!adev->gfx.ip_dump_compute_queues)
7368 return;
7369
7370 reg_count = ARRAY_SIZE(gc_cp_reg_list_9);
7371 drm_printf(p, "\nnum_mec: %d num_pipe: %d num_queue: %d\n",
7372 adev->gfx.mec.num_mec,
7373 adev->gfx.mec.num_pipe_per_mec,
7374 adev->gfx.mec.num_queue_per_pipe);
7375
7376 for (i = 0; i < adev->gfx.mec.num_mec; i++) {
7377 for (j = 0; j < adev->gfx.mec.num_pipe_per_mec; j++) {
7378 for (k = 0; k < adev->gfx.mec.num_queue_per_pipe; k++) {
7379 drm_printf(p, "\nmec %d, pipe %d, queue %d\n", i, j, k);
7380 for (reg = 0; reg < reg_count; reg++) {
7381 drm_printf(p, "%-50s \t 0x%08x\n",
7382 gc_cp_reg_list_9[reg].reg_name,
7383 adev->gfx.ip_dump_compute_queues[index + reg]);
7384 }
7385 index += reg_count;
7386 }
7387 }
7388 }
7389
7390 }
7391
gfx_v9_ip_dump(struct amdgpu_ip_block * ip_block)7392 static void gfx_v9_ip_dump(struct amdgpu_ip_block *ip_block)
7393 {
7394 struct amdgpu_device *adev = ip_block->adev;
7395 uint32_t i, j, k, reg, index = 0;
7396 uint32_t reg_count = ARRAY_SIZE(gc_reg_list_9);
7397
7398 if (!adev->gfx.ip_dump_core || !adev->gfx.num_gfx_rings)
7399 return;
7400
7401 amdgpu_gfx_off_ctrl(adev, false);
7402 for (i = 0; i < reg_count; i++)
7403 adev->gfx.ip_dump_core[i] = RREG32(SOC15_REG_ENTRY_OFFSET(gc_reg_list_9[i]));
7404 amdgpu_gfx_off_ctrl(adev, true);
7405
7406 /* dump compute queue registers for all instances */
7407 if (!adev->gfx.ip_dump_compute_queues)
7408 return;
7409
7410 reg_count = ARRAY_SIZE(gc_cp_reg_list_9);
7411 amdgpu_gfx_off_ctrl(adev, false);
7412 mutex_lock(&adev->srbm_mutex);
7413 for (i = 0; i < adev->gfx.mec.num_mec; i++) {
7414 for (j = 0; j < adev->gfx.mec.num_pipe_per_mec; j++) {
7415 for (k = 0; k < adev->gfx.mec.num_queue_per_pipe; k++) {
7416 /* ME0 is for GFX so start from 1 for CP */
7417 soc15_grbm_select(adev, 1 + i, j, k, 0, 0);
7418
7419 for (reg = 0; reg < reg_count; reg++) {
7420 adev->gfx.ip_dump_compute_queues[index + reg] =
7421 RREG32(SOC15_REG_ENTRY_OFFSET(
7422 gc_cp_reg_list_9[reg]));
7423 }
7424 index += reg_count;
7425 }
7426 }
7427 }
7428 soc15_grbm_select(adev, 0, 0, 0, 0, 0);
7429 mutex_unlock(&adev->srbm_mutex);
7430 amdgpu_gfx_off_ctrl(adev, true);
7431
7432 }
7433
gfx_v9_0_ring_emit_cleaner_shader(struct amdgpu_ring * ring)7434 static void gfx_v9_0_ring_emit_cleaner_shader(struct amdgpu_ring *ring)
7435 {
7436 /* Emit the cleaner shader */
7437 amdgpu_ring_write(ring, PACKET3(PACKET3_RUN_CLEANER_SHADER, 0));
7438 amdgpu_ring_write(ring, 0); /* RESERVED field, programmed to zero */
7439 }
7440
gfx_v9_0_ring_begin_use_compute(struct amdgpu_ring * ring)7441 static void gfx_v9_0_ring_begin_use_compute(struct amdgpu_ring *ring)
7442 {
7443 struct amdgpu_device *adev = ring->adev;
7444 struct amdgpu_ip_block *gfx_block =
7445 amdgpu_device_ip_get_ip_block(adev, AMD_IP_BLOCK_TYPE_GFX);
7446
7447 amdgpu_gfx_enforce_isolation_ring_begin_use(ring);
7448
7449 /* Raven and PCO APUs seem to have stability issues
7450 * with compute and gfxoff and gfx pg. Disable gfx pg during
7451 * submission and allow again afterwards.
7452 */
7453 if (gfx_block && amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 1, 0))
7454 gfx_v9_0_set_powergating_state(gfx_block, AMD_PG_STATE_UNGATE);
7455 }
7456
gfx_v9_0_ring_end_use_compute(struct amdgpu_ring * ring)7457 static void gfx_v9_0_ring_end_use_compute(struct amdgpu_ring *ring)
7458 {
7459 struct amdgpu_device *adev = ring->adev;
7460 struct amdgpu_ip_block *gfx_block =
7461 amdgpu_device_ip_get_ip_block(adev, AMD_IP_BLOCK_TYPE_GFX);
7462
7463 /* Raven and PCO APUs seem to have stability issues
7464 * with compute and gfxoff and gfx pg. Disable gfx pg during
7465 * submission and allow again afterwards.
7466 */
7467 if (gfx_block && amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 1, 0))
7468 gfx_v9_0_set_powergating_state(gfx_block, AMD_PG_STATE_GATE);
7469
7470 amdgpu_gfx_enforce_isolation_ring_end_use(ring);
7471 }
7472
7473 static const struct amd_ip_funcs gfx_v9_0_ip_funcs = {
7474 .name = "gfx_v9_0",
7475 .early_init = gfx_v9_0_early_init,
7476 .late_init = gfx_v9_0_late_init,
7477 .sw_init = gfx_v9_0_sw_init,
7478 .sw_fini = gfx_v9_0_sw_fini,
7479 .hw_init = gfx_v9_0_hw_init,
7480 .hw_fini = gfx_v9_0_hw_fini,
7481 .suspend = gfx_v9_0_suspend,
7482 .resume = gfx_v9_0_resume,
7483 .is_idle = gfx_v9_0_is_idle,
7484 .wait_for_idle = gfx_v9_0_wait_for_idle,
7485 .soft_reset = gfx_v9_0_soft_reset,
7486 .set_clockgating_state = gfx_v9_0_set_clockgating_state,
7487 .set_powergating_state = gfx_v9_0_set_powergating_state,
7488 .get_clockgating_state = gfx_v9_0_get_clockgating_state,
7489 .dump_ip_state = gfx_v9_ip_dump,
7490 .print_ip_state = gfx_v9_ip_print,
7491 };
7492
7493 static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_gfx = {
7494 .type = AMDGPU_RING_TYPE_GFX,
7495 .align_mask = 0xff,
7496 .nop = PACKET3(PACKET3_NOP, 0x3FFF),
7497 .support_64bit_ptrs = true,
7498 .secure_submission_supported = true,
7499 .get_rptr = gfx_v9_0_ring_get_rptr_gfx,
7500 .get_wptr = gfx_v9_0_ring_get_wptr_gfx,
7501 .set_wptr = gfx_v9_0_ring_set_wptr_gfx,
7502 .emit_frame_size = /* totally 242 maximum if 16 IBs */
7503 5 + /* COND_EXEC */
7504 7 + /* PIPELINE_SYNC */
7505 SOC15_FLUSH_GPU_TLB_NUM_WREG * 5 +
7506 SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 +
7507 2 + /* VM_FLUSH */
7508 8 + /* FENCE for VM_FLUSH */
7509 20 + /* GDS switch */
7510 4 + /* double SWITCH_BUFFER,
7511 the first COND_EXEC jump to the place just
7512 prior to this double SWITCH_BUFFER */
7513 5 + /* COND_EXEC */
7514 7 + /* HDP_flush */
7515 4 + /* VGT_flush */
7516 14 + /* CE_META */
7517 31 + /* DE_META */
7518 3 + /* CNTX_CTRL */
7519 5 + /* HDP_INVL */
7520 8 + 8 + /* FENCE x2 */
7521 2 + /* SWITCH_BUFFER */
7522 7 + /* gfx_v9_0_emit_mem_sync */
7523 2, /* gfx_v9_0_ring_emit_cleaner_shader */
7524 .emit_ib_size = 4, /* gfx_v9_0_ring_emit_ib_gfx */
7525 .emit_ib = gfx_v9_0_ring_emit_ib_gfx,
7526 .emit_fence = gfx_v9_0_ring_emit_fence,
7527 .emit_pipeline_sync = gfx_v9_0_ring_emit_pipeline_sync,
7528 .emit_vm_flush = gfx_v9_0_ring_emit_vm_flush,
7529 .emit_gds_switch = gfx_v9_0_ring_emit_gds_switch,
7530 .emit_hdp_flush = gfx_v9_0_ring_emit_hdp_flush,
7531 .test_ring = gfx_v9_0_ring_test_ring,
7532 .insert_nop = gfx_v9_ring_insert_nop,
7533 .pad_ib = amdgpu_ring_generic_pad_ib,
7534 .emit_switch_buffer = gfx_v9_ring_emit_sb,
7535 .emit_cntxcntl = gfx_v9_ring_emit_cntxcntl,
7536 .init_cond_exec = gfx_v9_0_ring_emit_init_cond_exec,
7537 .preempt_ib = gfx_v9_0_ring_preempt_ib,
7538 .emit_frame_cntl = gfx_v9_0_ring_emit_frame_cntl,
7539 .emit_wreg = gfx_v9_0_ring_emit_wreg,
7540 .emit_reg_wait = gfx_v9_0_ring_emit_reg_wait,
7541 .emit_reg_write_reg_wait = gfx_v9_0_ring_emit_reg_write_reg_wait,
7542 .soft_recovery = gfx_v9_0_ring_soft_recovery,
7543 .emit_mem_sync = gfx_v9_0_emit_mem_sync,
7544 .reset = gfx_v9_0_reset_kgq,
7545 .emit_cleaner_shader = gfx_v9_0_ring_emit_cleaner_shader,
7546 .begin_use = amdgpu_gfx_enforce_isolation_ring_begin_use,
7547 .end_use = amdgpu_gfx_enforce_isolation_ring_end_use,
7548 };
7549
7550 static const struct amdgpu_ring_funcs gfx_v9_0_sw_ring_funcs_gfx = {
7551 .type = AMDGPU_RING_TYPE_GFX,
7552 .align_mask = 0xff,
7553 .nop = PACKET3(PACKET3_NOP, 0x3FFF),
7554 .support_64bit_ptrs = true,
7555 .secure_submission_supported = true,
7556 .get_rptr = amdgpu_sw_ring_get_rptr_gfx,
7557 .get_wptr = amdgpu_sw_ring_get_wptr_gfx,
7558 .set_wptr = amdgpu_sw_ring_set_wptr_gfx,
7559 .emit_frame_size = /* totally 242 maximum if 16 IBs */
7560 5 + /* COND_EXEC */
7561 7 + /* PIPELINE_SYNC */
7562 SOC15_FLUSH_GPU_TLB_NUM_WREG * 5 +
7563 SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 +
7564 2 + /* VM_FLUSH */
7565 8 + /* FENCE for VM_FLUSH */
7566 20 + /* GDS switch */
7567 4 + /* double SWITCH_BUFFER,
7568 * the first COND_EXEC jump to the place just
7569 * prior to this double SWITCH_BUFFER
7570 */
7571 5 + /* COND_EXEC */
7572 7 + /* HDP_flush */
7573 4 + /* VGT_flush */
7574 14 + /* CE_META */
7575 31 + /* DE_META */
7576 3 + /* CNTX_CTRL */
7577 5 + /* HDP_INVL */
7578 8 + 8 + /* FENCE x2 */
7579 2 + /* SWITCH_BUFFER */
7580 7 + /* gfx_v9_0_emit_mem_sync */
7581 2, /* gfx_v9_0_ring_emit_cleaner_shader */
7582 .emit_ib_size = 4, /* gfx_v9_0_ring_emit_ib_gfx */
7583 .emit_ib = gfx_v9_0_ring_emit_ib_gfx,
7584 .emit_fence = gfx_v9_0_ring_emit_fence,
7585 .emit_pipeline_sync = gfx_v9_0_ring_emit_pipeline_sync,
7586 .emit_vm_flush = gfx_v9_0_ring_emit_vm_flush,
7587 .emit_gds_switch = gfx_v9_0_ring_emit_gds_switch,
7588 .emit_hdp_flush = gfx_v9_0_ring_emit_hdp_flush,
7589 .test_ring = gfx_v9_0_ring_test_ring,
7590 .test_ib = gfx_v9_0_ring_test_ib,
7591 .insert_nop = gfx_v9_ring_insert_nop,
7592 .pad_ib = amdgpu_ring_generic_pad_ib,
7593 .emit_switch_buffer = gfx_v9_ring_emit_sb,
7594 .emit_cntxcntl = gfx_v9_ring_emit_cntxcntl,
7595 .init_cond_exec = gfx_v9_0_ring_emit_init_cond_exec,
7596 .emit_frame_cntl = gfx_v9_0_ring_emit_frame_cntl,
7597 .emit_wreg = gfx_v9_0_ring_emit_wreg,
7598 .emit_reg_wait = gfx_v9_0_ring_emit_reg_wait,
7599 .emit_reg_write_reg_wait = gfx_v9_0_ring_emit_reg_write_reg_wait,
7600 .soft_recovery = gfx_v9_0_ring_soft_recovery,
7601 .emit_mem_sync = gfx_v9_0_emit_mem_sync,
7602 .patch_cntl = gfx_v9_0_ring_patch_cntl,
7603 .patch_de = gfx_v9_0_ring_patch_de_meta,
7604 .patch_ce = gfx_v9_0_ring_patch_ce_meta,
7605 .emit_cleaner_shader = gfx_v9_0_ring_emit_cleaner_shader,
7606 .begin_use = amdgpu_gfx_enforce_isolation_ring_begin_use,
7607 .end_use = amdgpu_gfx_enforce_isolation_ring_end_use,
7608 };
7609
7610 static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_compute = {
7611 .type = AMDGPU_RING_TYPE_COMPUTE,
7612 .align_mask = 0xff,
7613 .nop = PACKET3(PACKET3_NOP, 0x3FFF),
7614 .support_64bit_ptrs = true,
7615 .get_rptr = gfx_v9_0_ring_get_rptr_compute,
7616 .get_wptr = gfx_v9_0_ring_get_wptr_compute,
7617 .set_wptr = gfx_v9_0_ring_set_wptr_compute,
7618 .emit_frame_size =
7619 20 + /* gfx_v9_0_ring_emit_gds_switch */
7620 7 + /* gfx_v9_0_ring_emit_hdp_flush */
7621 5 + /* hdp invalidate */
7622 7 + /* gfx_v9_0_ring_emit_pipeline_sync */
7623 SOC15_FLUSH_GPU_TLB_NUM_WREG * 5 +
7624 SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 +
7625 8 + 8 + 8 + /* gfx_v9_0_ring_emit_fence x3 for user fence, vm fence */
7626 7 + /* gfx_v9_0_emit_mem_sync */
7627 5 + /* gfx_v9_0_emit_wave_limit for updating mmSPI_WCL_PIPE_PERCENT_GFX register */
7628 15 + /* for updating 3 mmSPI_WCL_PIPE_PERCENT_CS registers */
7629 2, /* gfx_v9_0_ring_emit_cleaner_shader */
7630 .emit_ib_size = 7, /* gfx_v9_0_ring_emit_ib_compute */
7631 .emit_ib = gfx_v9_0_ring_emit_ib_compute,
7632 .emit_fence = gfx_v9_0_ring_emit_fence,
7633 .emit_pipeline_sync = gfx_v9_0_ring_emit_pipeline_sync,
7634 .emit_vm_flush = gfx_v9_0_ring_emit_vm_flush,
7635 .emit_gds_switch = gfx_v9_0_ring_emit_gds_switch,
7636 .emit_hdp_flush = gfx_v9_0_ring_emit_hdp_flush,
7637 .test_ring = gfx_v9_0_ring_test_ring,
7638 .test_ib = gfx_v9_0_ring_test_ib,
7639 .insert_nop = gfx_v9_ring_insert_nop,
7640 .pad_ib = amdgpu_ring_generic_pad_ib,
7641 .emit_wreg = gfx_v9_0_ring_emit_wreg,
7642 .emit_reg_wait = gfx_v9_0_ring_emit_reg_wait,
7643 .emit_reg_write_reg_wait = gfx_v9_0_ring_emit_reg_write_reg_wait,
7644 .soft_recovery = gfx_v9_0_ring_soft_recovery,
7645 .emit_mem_sync = gfx_v9_0_emit_mem_sync,
7646 .emit_wave_limit = gfx_v9_0_emit_wave_limit,
7647 .reset = gfx_v9_0_reset_kcq,
7648 .emit_cleaner_shader = gfx_v9_0_ring_emit_cleaner_shader,
7649 .begin_use = gfx_v9_0_ring_begin_use_compute,
7650 .end_use = gfx_v9_0_ring_end_use_compute,
7651 };
7652
7653 static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_kiq = {
7654 .type = AMDGPU_RING_TYPE_KIQ,
7655 .align_mask = 0xff,
7656 .nop = PACKET3(PACKET3_NOP, 0x3FFF),
7657 .support_64bit_ptrs = true,
7658 .get_rptr = gfx_v9_0_ring_get_rptr_compute,
7659 .get_wptr = gfx_v9_0_ring_get_wptr_compute,
7660 .set_wptr = gfx_v9_0_ring_set_wptr_compute,
7661 .emit_frame_size =
7662 20 + /* gfx_v9_0_ring_emit_gds_switch */
7663 7 + /* gfx_v9_0_ring_emit_hdp_flush */
7664 5 + /* hdp invalidate */
7665 7 + /* gfx_v9_0_ring_emit_pipeline_sync */
7666 SOC15_FLUSH_GPU_TLB_NUM_WREG * 5 +
7667 SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 +
7668 8 + 8 + 8, /* gfx_v9_0_ring_emit_fence_kiq x3 for user fence, vm fence */
7669 .emit_ib_size = 7, /* gfx_v9_0_ring_emit_ib_compute */
7670 .emit_fence = gfx_v9_0_ring_emit_fence_kiq,
7671 .test_ring = gfx_v9_0_ring_test_ring,
7672 .insert_nop = amdgpu_ring_insert_nop,
7673 .pad_ib = amdgpu_ring_generic_pad_ib,
7674 .emit_rreg = gfx_v9_0_ring_emit_rreg,
7675 .emit_wreg = gfx_v9_0_ring_emit_wreg,
7676 .emit_reg_wait = gfx_v9_0_ring_emit_reg_wait,
7677 .emit_reg_write_reg_wait = gfx_v9_0_ring_emit_reg_write_reg_wait,
7678 };
7679
gfx_v9_0_set_ring_funcs(struct amdgpu_device * adev)7680 static void gfx_v9_0_set_ring_funcs(struct amdgpu_device *adev)
7681 {
7682 int i;
7683
7684 adev->gfx.kiq[0].ring.funcs = &gfx_v9_0_ring_funcs_kiq;
7685
7686 for (i = 0; i < adev->gfx.num_gfx_rings; i++)
7687 adev->gfx.gfx_ring[i].funcs = &gfx_v9_0_ring_funcs_gfx;
7688
7689 if (adev->gfx.mcbp && adev->gfx.num_gfx_rings) {
7690 for (i = 0; i < GFX9_NUM_SW_GFX_RINGS; i++)
7691 adev->gfx.sw_gfx_ring[i].funcs = &gfx_v9_0_sw_ring_funcs_gfx;
7692 }
7693
7694 for (i = 0; i < adev->gfx.num_compute_rings; i++)
7695 adev->gfx.compute_ring[i].funcs = &gfx_v9_0_ring_funcs_compute;
7696 }
7697
7698 static const struct amdgpu_irq_src_funcs gfx_v9_0_eop_irq_funcs = {
7699 .set = gfx_v9_0_set_eop_interrupt_state,
7700 .process = gfx_v9_0_eop_irq,
7701 };
7702
7703 static const struct amdgpu_irq_src_funcs gfx_v9_0_priv_reg_irq_funcs = {
7704 .set = gfx_v9_0_set_priv_reg_fault_state,
7705 .process = gfx_v9_0_priv_reg_irq,
7706 };
7707
7708 static const struct amdgpu_irq_src_funcs gfx_v9_0_bad_op_irq_funcs = {
7709 .set = gfx_v9_0_set_bad_op_fault_state,
7710 .process = gfx_v9_0_bad_op_irq,
7711 };
7712
7713 static const struct amdgpu_irq_src_funcs gfx_v9_0_priv_inst_irq_funcs = {
7714 .set = gfx_v9_0_set_priv_inst_fault_state,
7715 .process = gfx_v9_0_priv_inst_irq,
7716 };
7717
7718 static const struct amdgpu_irq_src_funcs gfx_v9_0_cp_ecc_error_irq_funcs = {
7719 .set = gfx_v9_0_set_cp_ecc_error_state,
7720 .process = amdgpu_gfx_cp_ecc_error_irq,
7721 };
7722
7723
gfx_v9_0_set_irq_funcs(struct amdgpu_device * adev)7724 static void gfx_v9_0_set_irq_funcs(struct amdgpu_device *adev)
7725 {
7726 adev->gfx.eop_irq.num_types = AMDGPU_CP_IRQ_LAST;
7727 adev->gfx.eop_irq.funcs = &gfx_v9_0_eop_irq_funcs;
7728
7729 adev->gfx.priv_reg_irq.num_types = 1;
7730 adev->gfx.priv_reg_irq.funcs = &gfx_v9_0_priv_reg_irq_funcs;
7731
7732 adev->gfx.bad_op_irq.num_types = 1;
7733 adev->gfx.bad_op_irq.funcs = &gfx_v9_0_bad_op_irq_funcs;
7734
7735 adev->gfx.priv_inst_irq.num_types = 1;
7736 adev->gfx.priv_inst_irq.funcs = &gfx_v9_0_priv_inst_irq_funcs;
7737
7738 adev->gfx.cp_ecc_error_irq.num_types = 2; /*C5 ECC error and C9 FUE error*/
7739 adev->gfx.cp_ecc_error_irq.funcs = &gfx_v9_0_cp_ecc_error_irq_funcs;
7740 }
7741
gfx_v9_0_set_rlc_funcs(struct amdgpu_device * adev)7742 static void gfx_v9_0_set_rlc_funcs(struct amdgpu_device *adev)
7743 {
7744 switch (amdgpu_ip_version(adev, GC_HWIP, 0)) {
7745 case IP_VERSION(9, 0, 1):
7746 case IP_VERSION(9, 2, 1):
7747 case IP_VERSION(9, 4, 0):
7748 case IP_VERSION(9, 2, 2):
7749 case IP_VERSION(9, 1, 0):
7750 case IP_VERSION(9, 4, 1):
7751 case IP_VERSION(9, 3, 0):
7752 case IP_VERSION(9, 4, 2):
7753 adev->gfx.rlc.funcs = &gfx_v9_0_rlc_funcs;
7754 break;
7755 default:
7756 break;
7757 }
7758 }
7759
gfx_v9_0_set_gds_init(struct amdgpu_device * adev)7760 static void gfx_v9_0_set_gds_init(struct amdgpu_device *adev)
7761 {
7762 /* init asci gds info */
7763 switch (amdgpu_ip_version(adev, GC_HWIP, 0)) {
7764 case IP_VERSION(9, 0, 1):
7765 case IP_VERSION(9, 2, 1):
7766 case IP_VERSION(9, 4, 0):
7767 adev->gds.gds_size = 0x10000;
7768 break;
7769 case IP_VERSION(9, 2, 2):
7770 case IP_VERSION(9, 1, 0):
7771 case IP_VERSION(9, 4, 1):
7772 adev->gds.gds_size = 0x1000;
7773 break;
7774 case IP_VERSION(9, 4, 2):
7775 /* aldebaran removed all the GDS internal memory,
7776 * only support GWS opcode in kernel, like barrier
7777 * semaphore.etc */
7778 adev->gds.gds_size = 0;
7779 break;
7780 default:
7781 adev->gds.gds_size = 0x10000;
7782 break;
7783 }
7784
7785 switch (amdgpu_ip_version(adev, GC_HWIP, 0)) {
7786 case IP_VERSION(9, 0, 1):
7787 case IP_VERSION(9, 4, 0):
7788 adev->gds.gds_compute_max_wave_id = 0x7ff;
7789 break;
7790 case IP_VERSION(9, 2, 1):
7791 adev->gds.gds_compute_max_wave_id = 0x27f;
7792 break;
7793 case IP_VERSION(9, 2, 2):
7794 case IP_VERSION(9, 1, 0):
7795 if (adev->apu_flags & AMD_APU_IS_RAVEN2)
7796 adev->gds.gds_compute_max_wave_id = 0x77; /* raven2 */
7797 else
7798 adev->gds.gds_compute_max_wave_id = 0x15f; /* raven1 */
7799 break;
7800 case IP_VERSION(9, 4, 1):
7801 adev->gds.gds_compute_max_wave_id = 0xfff;
7802 break;
7803 case IP_VERSION(9, 4, 2):
7804 /* deprecated for Aldebaran, no usage at all */
7805 adev->gds.gds_compute_max_wave_id = 0;
7806 break;
7807 default:
7808 /* this really depends on the chip */
7809 adev->gds.gds_compute_max_wave_id = 0x7ff;
7810 break;
7811 }
7812
7813 adev->gds.gws_size = 64;
7814 adev->gds.oa_size = 16;
7815 }
7816
gfx_v9_0_set_user_cu_inactive_bitmap(struct amdgpu_device * adev,u32 bitmap)7817 static void gfx_v9_0_set_user_cu_inactive_bitmap(struct amdgpu_device *adev,
7818 u32 bitmap)
7819 {
7820 u32 data;
7821
7822 if (!bitmap)
7823 return;
7824
7825 data = bitmap << GC_USER_SHADER_ARRAY_CONFIG__INACTIVE_CUS__SHIFT;
7826 data &= GC_USER_SHADER_ARRAY_CONFIG__INACTIVE_CUS_MASK;
7827
7828 WREG32_SOC15(GC, 0, mmGC_USER_SHADER_ARRAY_CONFIG, data);
7829 }
7830
gfx_v9_0_get_cu_active_bitmap(struct amdgpu_device * adev)7831 static u32 gfx_v9_0_get_cu_active_bitmap(struct amdgpu_device *adev)
7832 {
7833 u32 data, mask;
7834
7835 data = RREG32_SOC15(GC, 0, mmCC_GC_SHADER_ARRAY_CONFIG);
7836 data |= RREG32_SOC15(GC, 0, mmGC_USER_SHADER_ARRAY_CONFIG);
7837
7838 data &= CC_GC_SHADER_ARRAY_CONFIG__INACTIVE_CUS_MASK;
7839 data >>= CC_GC_SHADER_ARRAY_CONFIG__INACTIVE_CUS__SHIFT;
7840
7841 mask = amdgpu_gfx_create_bitmask(adev->gfx.config.max_cu_per_sh);
7842
7843 return (~data) & mask;
7844 }
7845
gfx_v9_0_get_cu_info(struct amdgpu_device * adev,struct amdgpu_cu_info * cu_info)7846 static int gfx_v9_0_get_cu_info(struct amdgpu_device *adev,
7847 struct amdgpu_cu_info *cu_info)
7848 {
7849 int i, j, k, counter, active_cu_number = 0;
7850 u32 mask, bitmap, ao_bitmap, ao_cu_mask = 0;
7851 unsigned disable_masks[4 * 4];
7852
7853 if (!adev || !cu_info)
7854 return -EINVAL;
7855
7856 /*
7857 * 16 comes from bitmap array size 4*4, and it can cover all gfx9 ASICs
7858 */
7859 if (adev->gfx.config.max_shader_engines *
7860 adev->gfx.config.max_sh_per_se > 16)
7861 return -EINVAL;
7862
7863 amdgpu_gfx_parse_disable_cu(disable_masks,
7864 adev->gfx.config.max_shader_engines,
7865 adev->gfx.config.max_sh_per_se);
7866
7867 mutex_lock(&adev->grbm_idx_mutex);
7868 for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
7869 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
7870 mask = 1;
7871 ao_bitmap = 0;
7872 counter = 0;
7873 amdgpu_gfx_select_se_sh(adev, i, j, 0xffffffff, 0);
7874 gfx_v9_0_set_user_cu_inactive_bitmap(
7875 adev, disable_masks[i * adev->gfx.config.max_sh_per_se + j]);
7876 bitmap = gfx_v9_0_get_cu_active_bitmap(adev);
7877
7878 /*
7879 * The bitmap(and ao_cu_bitmap) in cu_info structure is
7880 * 4x4 size array, and it's usually suitable for Vega
7881 * ASICs which has 4*2 SE/SH layout.
7882 * But for Arcturus, SE/SH layout is changed to 8*1.
7883 * To mostly reduce the impact, we make it compatible
7884 * with current bitmap array as below:
7885 * SE4,SH0 --> bitmap[0][1]
7886 * SE5,SH0 --> bitmap[1][1]
7887 * SE6,SH0 --> bitmap[2][1]
7888 * SE7,SH0 --> bitmap[3][1]
7889 */
7890 cu_info->bitmap[0][i % 4][j + i / 4] = bitmap;
7891
7892 for (k = 0; k < adev->gfx.config.max_cu_per_sh; k ++) {
7893 if (bitmap & mask) {
7894 if (counter < adev->gfx.config.max_cu_per_sh)
7895 ao_bitmap |= mask;
7896 counter ++;
7897 }
7898 mask <<= 1;
7899 }
7900 active_cu_number += counter;
7901 if (i < 2 && j < 2)
7902 ao_cu_mask |= (ao_bitmap << (i * 16 + j * 8));
7903 cu_info->ao_cu_bitmap[i % 4][j + i / 4] = ao_bitmap;
7904 }
7905 }
7906 amdgpu_gfx_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff, 0);
7907 mutex_unlock(&adev->grbm_idx_mutex);
7908
7909 cu_info->number = active_cu_number;
7910 cu_info->ao_cu_mask = ao_cu_mask;
7911 cu_info->simd_per_cu = NUM_SIMD_PER_CU;
7912
7913 return 0;
7914 }
7915
7916 const struct amdgpu_ip_block_version gfx_v9_0_ip_block =
7917 {
7918 .type = AMD_IP_BLOCK_TYPE_GFX,
7919 .major = 9,
7920 .minor = 0,
7921 .rev = 0,
7922 .funcs = &gfx_v9_0_ip_funcs,
7923 };
7924