xref: /aosp_15_r20/external/coreboot/src/northbridge/intel/gm45/raminit_receive_enable_calibration.c (revision b9411a12aaaa7e1e6a6fb7c5e057f44ee179a49c)
1 /* SPDX-License-Identifier: GPL-2.0-only */
2 
3 #include <stdint.h>
4 #include <device/mmio.h>
5 #include <console/console.h>
6 #include "gm45.h"
7 
8 #define CxRECy_MCHBAR(x, y)	(0x14a0 + ((x) * 0x0100) + ((3 - (y)) * 4))
9 #define CxRECy_SHIFT_L		0
10 #define CxRECy_MASK_L		(3 << CxRECy_SHIFT_L)
11 #define CxRECy_SHIFT_H		16
12 #define CxRECy_MASK_H		(3 << CxRECy_SHIFT_H)
13 #define CxRECy_T_SHIFT		28
14 #define CxRECy_T_MASK		(0xf << CxRECy_T_SHIFT)
15 #define CxRECy_T(t)		(((t) << CxRECy_T_SHIFT) & CxRECy_T_MASK)
16 #define CxRECy_P_SHIFT		24
17 #define CxRECy_P_MASK		(0x7 << CxRECy_P_SHIFT)
18 #define CxRECy_P(p)		(((p) << CxRECy_P_SHIFT) & CxRECy_P_MASK)
19 #define CxRECy_PH_SHIFT		22
20 #define CxRECy_PH_MASK		(0x3 << CxRECy_PH_SHIFT)
21 #define CxRECy_PH(p)		(((p) << CxRECy_PH_SHIFT) & CxRECy_PH_MASK)
22 #define CxRECy_PM_SHIFT		20
23 #define CxRECy_PM_MASK		(0x3 << CxRECy_PM_SHIFT)
24 #define CxRECy_PM(p)		(((p) << CxRECy_PM_SHIFT) & CxRECy_PM_MASK)
25 #define CxRECy_TIMING_MASK	(CxRECy_T_MASK | CxRECy_P_MASK | \
26 				 CxRECy_PH_MASK | CxRECy_PM_MASK)
27 
28 #define CxDRT3_C_SHIFT	7
29 #define CxDRT3_C_MASK	(0xf << CxDRT3_C_SHIFT)
30 #define CxDRT3_C(c)	(((c) << CxDRT3_C_SHIFT) & CxDRT3_C_MASK)
31 /* group to byte-lane mapping: (cardF X group X 2 per group) */
32 static const char bytelane_map[2][4][2] = {
33 /* A,B,C */{ { 0, 1 }, { 2, 3 }, { 4, 5 }, { 6, 7 } },
34 /*     F */{ { 0, 2 }, { 1, 3 }, { 4, 6 }, { 5, 7 } },
35 };
36 
37 #define PH_BOUND	4
38 #define PH_STEP		2
39 #define PM_BOUND	3
40 #define C_BOUND	16
41 typedef struct {
42 	int c;
43 	int pre;
44 	int ph;
45 	int t;
46 	const int t_bound;
47 	int p;
48 	const int p_bound;
49 } rec_timing_t;
normalize_rec_timing(rec_timing_t * const timing)50 static void normalize_rec_timing(rec_timing_t *const timing)
51 {
52 	while (timing->p >= timing->p_bound) {
53 		timing->t++;
54 		timing->p -= timing->p_bound;
55 	}
56 	while (timing->p < 0) {
57 		timing->t--;
58 		timing->p += timing->p_bound;
59 	}
60 	while (timing->t >= timing->t_bound) {
61 		timing->ph += PH_STEP;
62 		timing->t -= timing->t_bound;
63 	}
64 	while (timing->t < 0) {
65 		timing->ph -= PH_STEP;
66 		timing->t += timing->t_bound;
67 	}
68 	while (timing->ph >= PH_BOUND) {
69 		timing->c++;
70 		timing->ph -= PH_BOUND;
71 	}
72 	while (timing->ph < 0) {
73 		timing->c--;
74 		timing->ph += PH_BOUND;
75 	}
76 	if (timing->c < 0 || timing->c >= C_BOUND)
77 		die("Timing under-/overflow during "
78 			"receive-enable calibration.\n");
79 }
80 
rec_full_backstep(rec_timing_t * const timing)81 static void rec_full_backstep(rec_timing_t *const timing)
82 {
83 	timing->c--;
84 }
rec_half_backstep(rec_timing_t * const timing)85 static void rec_half_backstep(rec_timing_t *const timing)
86 {
87 	timing->ph -= PH_STEP;
88 }
rec_quarter_step(rec_timing_t * const timing)89 static void rec_quarter_step(rec_timing_t *const timing)
90 {
91 	timing->t += (timing->t_bound) >> 1;
92 	timing->p += (timing->t_bound & 1) * (timing->p_bound >> 1);
93 }
rec_quarter_backstep(rec_timing_t * const timing)94 static void rec_quarter_backstep(rec_timing_t *const timing)
95 {
96 	timing->t -= (timing->t_bound) >> 1;
97 	timing->p -= (timing->t_bound & 1) * (timing->p_bound >> 1);
98 }
rec_smallest_step(rec_timing_t * const timing)99 static void rec_smallest_step(rec_timing_t *const timing)
100 {
101 	timing->p++;
102 }
103 
program_timing(int channel,int group,rec_timing_t timings[][4])104 static void program_timing(int channel, int group,
105 			   rec_timing_t timings[][4])
106 {
107 	rec_timing_t *const timing = &timings[channel][group];
108 
109 	normalize_rec_timing(timing);
110 
111 	/* C value is per channel. */
112 	unsigned int mchbar = CxDRT3_MCHBAR(channel);
113 	mchbar_clrsetbits32(mchbar, CxDRT3_C_MASK, CxDRT3_C(timing->c));
114 
115 	/* All other per group. */
116 	mchbar = CxRECy_MCHBAR(channel, group);
117 	u32 reg = mchbar_read32(mchbar);
118 	reg &= ~CxRECy_TIMING_MASK;
119 	reg |= CxRECy_T(timing->t) | CxRECy_P(timing->p) |
120 		CxRECy_PH(timing->ph) | CxRECy_PM(timing->pre);
121 	mchbar_write32(mchbar, reg);
122 }
123 
read_dqs_level(const int channel,const int lane)124 static int read_dqs_level(const int channel, const int lane)
125 {
126 	unsigned int mchbar = 0x14f0 + (channel * 0x0100);
127 	mchbar_clrbits32(mchbar, 1 << 9);
128 	mchbar_setbits32(mchbar, 1 << 9);
129 
130 	/* Read from this channel. */
131 	read32p(raminit_get_rank_addr(channel, 0));
132 
133 	mchbar = 0x14b0 + (channel * 0x0100) + ((7 - lane) * 4);
134 	return mchbar_read32(mchbar) & (1 << 30);
135 }
136 
find_dqs_low(const int channel,const int group,rec_timing_t timings[][4],const char lane_map[][2])137 static void find_dqs_low(const int channel, const int group,
138 			 rec_timing_t timings[][4], const char lane_map[][2])
139 {
140 	/* Look for DQS low, using quarter steps. */
141 	while (read_dqs_level(channel, lane_map[group][0]) ||
142 			read_dqs_level(channel, lane_map[group][1])) {
143 		rec_quarter_step(&timings[channel][group]);
144 		program_timing(channel, group, timings);
145 	}
146 }
find_dqs_high(const int channel,const int group,rec_timing_t timings[][4],const char lane_map[][2])147 static void find_dqs_high(const int channel, const int group,
148 			  rec_timing_t timings[][4], const char lane_map[][2])
149 {
150 	/* Look for _any_ DQS high, using quarter steps. */
151 	while (!read_dqs_level(channel, lane_map[group][0]) &&
152 			!read_dqs_level(channel, lane_map[group][1])) {
153 		rec_quarter_step(&timings[channel][group]);
154 		program_timing(channel, group, timings);
155 	}
156 }
find_dqs_edge_lowhigh(const int channel,const int group,rec_timing_t timings[][4],const char lane_map[][2])157 static void find_dqs_edge_lowhigh(const int channel, const int group,
158 				  rec_timing_t timings[][4],
159 				  const char lane_map[][2])
160 {
161 	/* Advance beyond previous high to low transition. */
162 	timings[channel][group].t += 2;
163 	program_timing(channel, group, timings);
164 
165 	/* Coarsely look for DQS high. */
166 	find_dqs_high(channel, group, timings, lane_map);
167 
168 	/* Go back and perform finer search. */
169 	rec_quarter_backstep(&timings[channel][group]);
170 	program_timing(channel, group, timings);
171 	while (!read_dqs_level(channel, lane_map[group][0]) ||
172 			!read_dqs_level(channel, lane_map[group][1])) {
173 		rec_smallest_step(&timings[channel][group]);
174 		program_timing(channel, group, timings);
175 	}
176 }
find_preamble(const int channel,const int group,rec_timing_t timings[][4],const char lane_map[][2])177 static void find_preamble(const int channel, const int group,
178 			  rec_timing_t timings[][4], const char lane_map[][2])
179 {
180 	/* Look for DQS low, backstepping. */
181 	while (read_dqs_level(channel, lane_map[group][0]) ||
182 			read_dqs_level(channel, lane_map[group][1])) {
183 		rec_full_backstep(&timings[channel][group]);
184 		program_timing(channel, group, timings);
185 	}
186 }
187 
receive_enable_calibration(const int ddr_type,const timings_t * const timings,const dimminfo_t * const dimms)188 static void receive_enable_calibration(const int ddr_type,
189 				       const timings_t *const timings,
190 				       const dimminfo_t *const dimms)
191 {
192 	/* Override group to byte-lane mapping for raw card type F DIMMS. */
193 	static const char over_bytelane_map[2][4][2] = {
194 	/* A,B,C */{ { 0, 1 }, { 2, 3 }, { 4, 5 }, { 6, 7 } },
195 	/*     F */{ { 0, 0 }, { 3, 3 }, { 6, 6 }, { 5, 5 } },
196 	};
197 
198 	const int cardF[] = {
199 		dimms[0].card_type == 0xf,
200 		dimms[1].card_type == 0xf,
201 	};
202 
203 	const unsigned int t_bound =
204 		(timings->mem_clock == MEM_CLOCK_1067MT) ? 9
205 		: (ddr_type == DDR3) ? 12 : 15;
206 	const unsigned int p_bound =
207 		(timings->mem_clock == MEM_CLOCK_1067MT) ? 8 : 1;
208 
209 	rec_timing_t rec_timings[2][4] = {
210 		{
211 			{ timings->CAS + 1, 0, 0, 0, t_bound, 0, p_bound },
212 			{ timings->CAS + 1, 0, 0, 0, t_bound, 0, p_bound },
213 			{ timings->CAS + 1, 0, 0, 0, t_bound, 0, p_bound },
214 			{ timings->CAS + 1, 0, 0, 0, t_bound, 0, p_bound }
215 		}, {
216 			{ timings->CAS + 1, 0, 0, 0, t_bound, 0, p_bound },
217 			{ timings->CAS + 1, 0, 0, 0, t_bound, 0, p_bound },
218 			{ timings->CAS + 1, 0, 0, 0, t_bound, 0, p_bound },
219 			{ timings->CAS + 1, 0, 0, 0, t_bound, 0, p_bound }
220 		}
221 	};
222 
223 	int ch, group;
224 	FOR_EACH_POPULATED_CHANNEL(dimms, ch) {
225 		const char (*const map)[2] = over_bytelane_map[cardF[ch]];
226 		for (group = 0; group < 4; ++group) {
227 			program_timing(ch, group, rec_timings);
228 			find_dqs_low(ch, group, rec_timings, map);
229 			find_dqs_edge_lowhigh(ch, group, rec_timings, map);
230 
231 			rec_quarter_step(&rec_timings[ch][group]);
232 			program_timing(ch, group, rec_timings);
233 			find_preamble(ch, group, rec_timings, map);
234 			find_dqs_edge_lowhigh(ch, group, rec_timings, map);
235 			rec_half_backstep(&rec_timings[ch][group]);
236 			normalize_rec_timing(&rec_timings[ch][group]);
237 			if (cardF[ch]) {
238 				rec_timings[ch][group].t++;
239 				program_timing(ch, group, rec_timings);
240 			}
241 		}
242 		int c_min = C_BOUND;
243 		for (group = 0; group < 4; ++group) {
244 			if (rec_timings[ch][group].c < c_min)
245 				c_min = rec_timings[ch][group].c;
246 		}
247 		for (group = 0; group < 4; ++group) {
248 			rec_timings[ch][group].pre =
249 				rec_timings[ch][group].c - c_min;
250 			rec_timings[ch][group].c = c_min;
251 			program_timing(ch, group, rec_timings);
252 			printk(RAM_DEBUG, "Final timings for ");
253 			printk(BIOS_DEBUG, "group %d, ch %d: %d.%d.%d.%d.%d\n",
254 			       group, ch,
255 			       rec_timings[ch][group].c,
256 			       rec_timings[ch][group].pre,
257 			       rec_timings[ch][group].ph,
258 			       rec_timings[ch][group].t,
259 			       rec_timings[ch][group].p);
260 		}
261 	}
262 }
263 
raminit_receive_enable_calibration(const int ddr_type,const timings_t * const timings,const dimminfo_t * const dimms)264 void raminit_receive_enable_calibration(const int ddr_type,
265 					const timings_t *const timings,
266 					const dimminfo_t *const dimms)
267 {
268 	int ch;
269 
270 	/* Setup group to byte-lane mapping. */
271 	FOR_EACH_POPULATED_CHANNEL(dimms, ch) {
272 		const char (*const map)[2] =
273 			bytelane_map[dimms[ch].card_type == 0xf];
274 		unsigned int group;
275 		for (group = 0; group < 4; ++group) {
276 			const unsigned int mchbar = CxRECy_MCHBAR(ch, group);
277 			u32 reg = mchbar_read32(mchbar);
278 			reg &= ~((3 << 16) | (1 << 8) | 3);
279 			reg |= (map[group][0] - group);
280 			reg |= (map[group][1] - group - 1) << 16;
281 			mchbar_write32(mchbar, reg);
282 		}
283 	}
284 
285 	mchbar_setbits32(0x12a4, 1 << 31);
286 	mchbar_setbits32(0x13a4, 1 << 31);
287 	mchbar_clrsetbits32(0x14f0, 3 << 9, 1 << 9);
288 	mchbar_clrsetbits32(0x15f0, 3 << 9, 1 << 9);
289 
290 	receive_enable_calibration(ddr_type, timings, dimms);
291 
292 	mchbar_clrbits32(0x12a4, 1 << 31);
293 	mchbar_clrbits32(0x13a4, 1 << 31);
294 	raminit_reset_readwrite_pointers();
295 }
296