xref: /aosp_15_r20/external/coreboot/src/northbridge/intel/gm45/raminit_read_write_training.c (revision b9411a12aaaa7e1e6a6fb7c5e057f44ee179a49c)
1 /* SPDX-License-Identifier: GPL-2.0-only */
2 
3 #include <stdint.h>
4 #include <device/mmio.h>
5 #include <pc80/mc146818rtc.h>
6 #include <console/console.h>
7 #include "gm45.h"
8 
9 typedef struct {
10 	u32 addr[RANKS_PER_CHANNEL];
11 	unsigned int count;
12 } address_bunch_t;
13 
14 /* Read Training. */
15 #define CxRDTy_MCHBAR(ch, bl)	(0x14b0 + ((ch) * 0x0100) + ((7 - (bl)) * 4))
16 #define CxRDTy_T_SHIFT		20
17 #define CxRDTy_T_MASK		(0xf << CxRDTy_T_SHIFT)
18 #define CxRDTy_T(t)		(((t) << CxRDTy_T_SHIFT) & CxRDTy_T_MASK)
19 #define CxRDTy_P_SHIFT		16
20 #define CxRDTy_P_MASK		(0x7 << CxRDTy_P_SHIFT)
21 #define CxRDTy_P(p)		(((p) << CxRDTy_P_SHIFT) & CxRDTy_P_MASK)
22 static const u32 read_training_schedule[] = {
23 	0xfefefefe, 0x7f7f7f7f, 0xbebebebe, 0xdfdfdfdf,
24 	0xeeeeeeee, 0xf7f7f7f7, 0xfafafafa, 0xfdfdfdfd,
25 	0x00000000, 0x81818181, 0x40404040, 0x21212121,
26 	0x10101010, 0x09090909, 0x04040404, 0x03030303,
27 	0x10101010, 0x11111111, 0xeeeeeeee, 0xefefefef,
28 	0x10101010, 0x11111111, 0xeeeeeeee, 0xefefefef,
29 	0x10101010, 0xefefefef, 0x10101010, 0xefefefef,
30 	0x10101010, 0xefefefef, 0x10101010, 0xefefefef,
31 	0x00000000, 0xffffffff, 0x00000000, 0xffffffff,
32 	0x00000000, 0xffffffff, 0x00000000, 0x00000000,
33 };
34 #define READ_TIMING_P_SHIFT	3
35 #define READ_TIMING_P_BOUND	(1 << READ_TIMING_P_SHIFT)
36 #define READ_TIMING_T_BOUND	14
37 typedef struct {
38 	int t;
39 	int p;
40 } read_timing_t;
print_read_timing(const int msg_lvl,const char * const msg,const int lane,const int channel,const read_timing_t * const timing)41 static void print_read_timing(const int msg_lvl, const char *const msg,
42 			      const int lane, const int channel,
43 			      const read_timing_t *const timing)
44 {
45 	printk(msg_lvl, "%sbyte lane %d, ch %d: %d.%d\n",
46 	       msg, lane, channel, timing->t, timing->p);
47 }
48 
normalize_read_timing(read_timing_t * const timing)49 static int normalize_read_timing(read_timing_t *const timing)
50 {
51 	while (timing->p >= READ_TIMING_P_BOUND) {
52 		timing->t++;
53 		timing->p -= READ_TIMING_P_BOUND;
54 	}
55 	while (timing->p < 0) {
56 		timing->t--;
57 		timing->p += READ_TIMING_P_BOUND;
58 	}
59 	if (timing->t < 0) {
60 		printk(BIOS_WARNING,
61 		       "Timing underflow during read training.\n");
62 		timing->t = 0;
63 		timing->p = 0;
64 		return -1;
65 	} else if (timing->t >= READ_TIMING_T_BOUND) {
66 		printk(BIOS_WARNING,
67 		       "Timing overflow during read training.\n");
68 		timing->t = READ_TIMING_T_BOUND - 1;
69 		timing->p = READ_TIMING_P_BOUND - 1;
70 		return -1;
71 	}
72 	return 0;
73 }
program_read_timing(const int ch,const int lane,read_timing_t * const timing)74 static int program_read_timing(const int ch, const int lane,
75 			       read_timing_t *const timing)
76 {
77 	if (normalize_read_timing(timing) < 0)
78 		return -1;
79 
80 	u32 reg = mchbar_read32(CxRDTy_MCHBAR(ch, lane));
81 	reg &= ~(CxRDTy_T_MASK | CxRDTy_P_MASK);
82 	reg |= CxRDTy_T(timing->t) | CxRDTy_P(timing->p);
83 	mchbar_write32(CxRDTy_MCHBAR(ch, lane), reg);
84 
85 	return 0;
86 }
87 /* Returns 1 on success, 0 on failure. */
read_training_test(const int channel,const int lane,const address_bunch_t * const addresses)88 static int read_training_test(const int channel, const int lane,
89 			      const address_bunch_t *const addresses)
90 {
91 	int i;
92 
93 	const int lane_offset = lane & 4;
94 	const int lane_mask = 0xff << ((lane & ~4) << 3);
95 
96 	for (i = 0; i < addresses->count; ++i) {
97 		unsigned int offset;
98 		for (offset = lane_offset; offset < 320; offset += 8) {
99 			const u32 read = read32p(addresses->addr[i] + offset);
100 			const u32 good = read_training_schedule[offset >> 3];
101 			if ((read & lane_mask) != (good & lane_mask))
102 				return 0;
103 		}
104 	}
105 	return 1;
106 }
read_training_find_lower(const int channel,const int lane,const address_bunch_t * const addresses,read_timing_t * const lower)107 static int read_training_find_lower(const int channel, const int lane,
108 				    const address_bunch_t *const addresses,
109 				    read_timing_t *const lower)
110 {
111 	/* Coarse search for good t. */
112 	program_read_timing(channel, lane, lower);
113 	while (!read_training_test(channel, lane, addresses)) {
114 		++lower->t;
115 		if (program_read_timing(channel, lane, lower) < 0)
116 			return -1;
117 	}
118 
119 	/* Step back, then fine search for good p. */
120 	if (lower->t <= 0)
121 		/* Can't step back, zero is good. */
122 		return 0;
123 
124 	--lower->t;
125 	program_read_timing(channel, lane, lower);
126 	while (!read_training_test(channel, lane, addresses)) {
127 		++lower->p;
128 		if (program_read_timing(channel, lane, lower) < 0)
129 			return -1;
130 	}
131 
132 	return 0;
133 }
read_training_find_upper(const int channel,const int lane,const address_bunch_t * const addresses,read_timing_t * const upper)134 static int read_training_find_upper(const int channel, const int lane,
135 				    const address_bunch_t *const addresses,
136 				    read_timing_t *const upper)
137 {
138 	if (program_read_timing(channel, lane, upper) < 0)
139 		return -1;
140 	if (!read_training_test(channel, lane, addresses)) {
141 		printk(BIOS_WARNING,
142 		       "Read training failure: limits too narrow.\n");
143 		return -1;
144 	}
145 	/* Coarse search for bad t. */
146 	do {
147 		++upper->t;
148 		if (program_read_timing(channel, lane, upper) < 0)
149 			return -1;
150 	} while (read_training_test(channel, lane, addresses));
151 	/* Fine search for bad p. */
152 	--upper->t;
153 	program_read_timing(channel, lane, upper);
154 	while (read_training_test(channel, lane, addresses)) {
155 		++upper->p;
156 		if (program_read_timing(channel, lane, upper) < 0)
157 			return -1;
158 	}
159 
160 	return 0;
161 }
read_training_per_lane(const int channel,const int lane,const address_bunch_t * const addresses)162 static void read_training_per_lane(const int channel, const int lane,
163 				   const address_bunch_t *const addresses)
164 {
165 	read_timing_t lower, upper;
166 
167 	mchbar_setbits32(CxRDTy_MCHBAR(channel, lane), 3 << 25);
168 
169 	/*** Search lower bound. ***/
170 
171 	/* Start at zero. */
172 	lower.t = 0;
173 	lower.p = 0;
174 	if (read_training_find_lower(channel, lane, addresses, &lower) < 0)
175 		die("Read training failure: lower bound.\n");
176 	print_read_timing(RAM_DEBUG, "Lower bound for ", lane, channel, &lower);
177 
178 	/*** Search upper bound. ***/
179 
180 	/* Start at lower + 1t. */
181 	upper.t = lower.t + 1;
182 	upper.p = lower.p;
183 	if (read_training_find_upper(channel, lane, addresses, &upper) < 0)
184 		/* Overflow on upper edge is not fatal. */
185 		printk(BIOS_WARNING, "Read training failure: upper bound.\n");
186 	print_read_timing(RAM_DEBUG, "Upper bound for ", lane, channel, &upper);
187 
188 	/*** Calculate and program mean value. ***/
189 
190 	lower.p += lower.t << READ_TIMING_P_SHIFT;
191 	upper.p += upper.t << READ_TIMING_P_SHIFT;
192 	const int mean_p = (lower.p + upper.p) >> 1;
193 	/* lower becomes the mean value. */
194 	lower.t = mean_p >> READ_TIMING_P_SHIFT;
195 	lower.p = mean_p & (READ_TIMING_P_BOUND - 1);
196 	program_read_timing(channel, lane, &lower);
197 	printk(RAM_DEBUG, "Final timings for ");
198 	print_read_timing(BIOS_DEBUG, "", lane, channel, &lower);
199 }
perform_read_training(const dimminfo_t * const dimms)200 static void perform_read_training(const dimminfo_t *const dimms)
201 {
202 	int ch, i;
203 
204 	FOR_EACH_POPULATED_CHANNEL(dimms, ch) {
205 		address_bunch_t addresses = { { 0, }, 0 };
206 		FOR_EACH_POPULATED_RANK_IN_CHANNEL(dimms, ch, i)
207 			addresses.addr[addresses.count++] =
208 				raminit_get_rank_addr(ch, i);
209 
210 		for (i = 0; i < addresses.count; ++i) {
211 			/* Write test pattern. */
212 			unsigned int offset;
213 			for (offset = 0; offset < 320; offset += 4)
214 				write32p(addresses.addr[i] + offset,
215 					read_training_schedule[offset >> 3]);
216 		}
217 
218 		for (i = 0; i < 8; ++i)
219 			read_training_per_lane(ch, i, &addresses);
220 	}
221 }
read_training_store_results(void)222 static void read_training_store_results(void)
223 {
224 	u8 bytes[TOTAL_CHANNELS * 8];
225 	int ch, i;
226 
227 	/* Store one timing pair in one byte each. */
228 	FOR_EACH_CHANNEL(ch) {
229 		for (i = 0; i < 8; ++i) {
230 			const u32 bl_reg = mchbar_read32(CxRDTy_MCHBAR(ch, i));
231 			bytes[(ch * 8) + i] =
232 				(((bl_reg & CxRDTy_T_MASK) >> CxRDTy_T_SHIFT)
233 				 << 4) |
234 				((bl_reg & CxRDTy_P_MASK) >> CxRDTy_P_SHIFT);
235 		}
236 	}
237 
238 	/* Store everything in CMOS above 128 bytes. */
239 	for (i = 0; i < (TOTAL_CHANNELS * 8); ++i)
240 		cmos_write(bytes[i], CMOS_READ_TRAINING + i);
241 }
read_training_restore_results(void)242 static void read_training_restore_results(void)
243 {
244 	u8 bytes[TOTAL_CHANNELS * 8];
245 	int ch, i;
246 
247 	/* Read from CMOS. */
248 	for (i = 0; i < (TOTAL_CHANNELS * 8); ++i)
249 		bytes[i] = cmos_read(CMOS_READ_TRAINING + i);
250 
251 	/* Program restored results. */
252 	FOR_EACH_CHANNEL(ch) {
253 		for (i = 0; i < 8; ++i) {
254 			const int t = bytes[(ch * 8) + i] >> 4;
255 			const int p = bytes[(ch * 8) + i] & 7;
256 			u32 bl_reg = mchbar_read32(CxRDTy_MCHBAR(ch, i));
257 			bl_reg &= ~(CxRDTy_T_MASK | CxRDTy_P_MASK);
258 			bl_reg |= (3 << 25) | CxRDTy_T(t) | CxRDTy_P(p);
259 			mchbar_write32(CxRDTy_MCHBAR(ch, i), bl_reg);
260 			printk(BIOS_DEBUG, "Restored timings for byte lane "
261 			       "%d on channel %d: %d.%d\n", i, ch, t, p);
262 		}
263 	}
264 }
raminit_read_training(const dimminfo_t * const dimms,const int s3resume)265 void raminit_read_training(const dimminfo_t *const dimms, const int s3resume)
266 {
267 	if (!s3resume) {
268 		perform_read_training(dimms);
269 		read_training_store_results();
270 	} else {
271 		read_training_restore_results();
272 	}
273 	raminit_reset_readwrite_pointers();
274 }
275 
276 /* Write Training. */
277 #define CxWRTy_T_SHIFT		28
278 #define CxWRTy_T_MASK		(0xf << CxWRTy_T_SHIFT)
279 #define CxWRTy_T(t)		(((t) << CxWRTy_T_SHIFT) & CxWRTy_T_MASK)
280 #define CxWRTy_P_SHIFT		24
281 #define CxWRTy_P_MASK		(0x7 << CxWRTy_P_SHIFT)
282 #define CxWRTy_P(p)		(((p) << CxWRTy_P_SHIFT) & CxWRTy_P_MASK)
283 #define CxWRTy_F_SHIFT		18
284 #define CxWRTy_F_MASK		(0x3 << CxWRTy_F_SHIFT)
285 #define CxWRTy_F(f)		(((f) << CxWRTy_F_SHIFT) & CxWRTy_F_MASK)
286 #define CxWRTy_D_SHIFT		16
287 #define CxWRTy_D_MASK		(0x3 << CxWRTy_D_SHIFT)
288 #define CxWRTy_BELOW_D		(0x3 << CxWRTy_D_SHIFT)
289 #define CxWRTy_ABOVE_D		(0x1 << CxWRTy_D_SHIFT)
290 static const u32 write_training_schedule[] = {
291 	0xffffffff, 0x00000000, 0xffffffff, 0x00000000,
292 	0xffffffff, 0x00000000, 0xffffffff, 0x00000000,
293 	0xffffffff, 0x00000000, 0xffffffff, 0x00000000,
294 	0xffffffff, 0x00000000, 0xffffffff, 0x00000000,
295 	0xefefefef, 0x10101010, 0xefefefef, 0x10101010,
296 	0xefefefef, 0x10101010, 0xefefefef, 0x10101010,
297 	0xefefefef, 0x10101010, 0xefefefef, 0x10101010,
298 	0xefefefef, 0x10101010, 0xefefefef, 0x10101010,
299 	0xefefefef, 0xeeeeeeee, 0x11111111, 0x10101010,
300 	0xefefefef, 0xeeeeeeee, 0x11111111, 0x10101010,
301 	0xefefefef, 0xeeeeeeee, 0x11111111, 0x10101010,
302 	0xefefefef, 0xeeeeeeee, 0x11111111, 0x10101010,
303 	0x03030303, 0x04040404, 0x09090909, 0x10101010,
304 	0x21212121, 0x40404040, 0x81818181, 0x00000000,
305 	0x03030303, 0x04040404, 0x09090909, 0x10101010,
306 	0x21212121, 0x40404040, 0x81818181, 0x00000000,
307 	0xfdfdfdfd, 0xfafafafa, 0xf7f7f7f7, 0xeeeeeeee,
308 	0xdfdfdfdf, 0xbebebebe, 0x7f7f7f7f, 0xfefefefe,
309 	0xfdfdfdfd, 0xfafafafa, 0xf7f7f7f7, 0xeeeeeeee,
310 	0xdfdfdfdf, 0xbebebebe, 0x7f7f7f7f, 0xfefefefe,
311 };
312 /* for raw card types A, B and C: MEM_CLOCK_1067MT? X group X lower/upper */
313 static const u32 write_training_bytelane_masks_abc[2][4][2] = {
314 	{ /* clock < MEM_CLOCK_1067MT */
315 		{ 0xffffffff, 0x00000000 }, { 0x00000000, 0x00000000 },
316 		{ 0x00000000, 0xffffffff }, { 0x00000000, 0x00000000 },
317 	},
318 	{ /* clock == MEM_CLOCK_1067MT */
319 		{ 0x0000ffff, 0x00000000 }, { 0xffff0000, 0x00000000 },
320 		{ 0x00000000, 0x0000ffff }, { 0x00000000, 0xffff0000 },
321 	},
322 };
323 /* for raw card type F: group X lower/upper */
324 static const u32 write_training_bytelane_masks_f[4][2] = {
325 	{ 0xff00ff00, 0x00000000 }, { 0x00ff00ff, 0x00000000 },
326 	{ 0x00000000, 0xff00ff00 }, { 0x00000000, 0x00ff00ff },
327 };
328 #define WRITE_TIMING_P_SHIFT	3
329 #define WRITE_TIMING_P_BOUND	(1 << WRITE_TIMING_P_SHIFT)
330 #define WRITE_TIMING_F_BOUND	4
331 typedef struct {
332 	int f;
333 	int t;
334 	const int t_bound;
335 	int p;
336 } write_timing_t;
print_write_timing(const int msg_lvl,const char * const msg,const int group,const int channel,const write_timing_t * const timing)337 static void print_write_timing(const int msg_lvl, const char *const msg,
338 			       const int group, const int channel,
339 			       const write_timing_t *const timing)
340 {
341 	printk(msg_lvl, "%sgroup %d, ch %d: %d.%d.%d\n",
342 	       msg, group, channel, timing->f, timing->t, timing->p);
343 }
344 
normalize_write_timing(write_timing_t * const timing)345 static int normalize_write_timing(write_timing_t *const timing)
346 {
347 	while (timing->p >= WRITE_TIMING_P_BOUND) {
348 		timing->t++;
349 		timing->p -= WRITE_TIMING_P_BOUND;
350 	}
351 	while (timing->p < 0) {
352 		timing->t--;
353 		timing->p += WRITE_TIMING_P_BOUND;
354 	}
355 	while (timing->t >= timing->t_bound) {
356 		timing->f++;
357 		timing->t -= timing->t_bound;
358 	}
359 	while (timing->t < 0) {
360 		timing->f--;
361 		timing->t += timing->t_bound;
362 	}
363 	if (timing->f < 0) {
364 		printk(BIOS_WARNING,
365 		       "Timing underflow during write training.\n");
366 		timing->f = 0;
367 		timing->t = 0;
368 		timing->p = 0;
369 		return -1;
370 	} else if (timing->f >= WRITE_TIMING_F_BOUND) {
371 		printk(BIOS_WARNING,
372 		       "Timing overflow during write training.\n");
373 		timing->f = WRITE_TIMING_F_BOUND - 1;
374 		timing->t = timing->t_bound - 1;
375 		timing->p = WRITE_TIMING_P_BOUND - 1;
376 		return -1;
377 	}
378 	return 0;
379 }
program_write_timing(const int ch,const int group,write_timing_t * const timing,int memclk1067)380 static int program_write_timing(const int ch, const int group,
381 				write_timing_t *const timing, int memclk1067)
382 {
383 	/* MEM_CLOCK_1067MT? X lower/upper */
384 	const u32 d_bounds[2][2] = { { 1, 6 }, { 2, 9 } };
385 
386 	if (normalize_write_timing(timing) < 0)
387 		return -1;
388 
389 	const int f = timing->f;
390 	const int t = timing->t;
391 	const int p = (memclk1067 && (((t ==  9) && (timing->p >= 4)) ||
392 				       ((t == 10) && (timing->p < 4))))
393 		? 4 : timing->p;
394 	const int d =
395 		(t <= d_bounds[memclk1067][0]) ? CxWRTy_BELOW_D :
396 		((t >  d_bounds[memclk1067][1]) ? CxWRTy_ABOVE_D : 0);
397 
398 	u32 reg = mchbar_read32(CxWRTy_MCHBAR(ch, group));
399 	reg &= ~(CxWRTy_T_MASK | CxWRTy_P_MASK | CxWRTy_F_MASK);
400 	reg &= ~CxWRTy_D_MASK;
401 	reg |= CxWRTy_T(t) | CxWRTy_P(p) | CxWRTy_F(f) | d;
402 	mchbar_write32(CxWRTy_MCHBAR(ch, group), reg);
403 
404 	return 0;
405 }
406 /* Returns 1 on success, 0 on failure. */
write_training_test(const address_bunch_t * const addresses,const u32 * const masks)407 static int write_training_test(const address_bunch_t *const addresses,
408 			       const u32 *const masks)
409 {
410 	int i, ret = 0;
411 
412 	const u32 mmarb0 = mchbar_read32(0x0220);
413 	const u8  wrcctl = mchbar_read8(0x0218);
414 	mchbar_setbits32(0x0220, 0xf << 28);
415 	mchbar_setbits8(0x0218,  0x1 <<  4);
416 
417 	for (i = 0; i < addresses->count; ++i) {
418 		const unsigned int addr = addresses->addr[i];
419 		unsigned int off;
420 		for (off = 0; off < 640; off += 8) {
421 			const u32 pattern = write_training_schedule[off >> 3];
422 			write32p(addr + off, pattern);
423 			write32p(addr + off + 4, pattern);
424 		}
425 
426 		mchbar_setbits8(0x78, 1);
427 
428 		for (off = 0; off < 640; off += 8) {
429 			const u32 good = write_training_schedule[off >> 3];
430 			const u32 read1 = read32p(addr + off);
431 			if ((read1 & masks[0]) != (good & masks[0]))
432 				goto _bad_timing_out;
433 			const u32 read2 = read32p(addr + off + 4);
434 			if ((read2 & masks[1]) != (good & masks[1]))
435 				goto _bad_timing_out;
436 		}
437 	}
438 	ret = 1;
439 
440 _bad_timing_out:
441 	mchbar_write32(0x0220, mmarb0);
442 	mchbar_write8(0x0218, wrcctl);
443 
444 	return ret;
445 }
write_training_find_lower(const int ch,const int group,const address_bunch_t * const addresses,const u32 masks[][2],const int memclk1067,write_timing_t * const lower)446 static int write_training_find_lower(const int ch, const int group,
447 				     const address_bunch_t *const addresses,
448 				     const u32 masks[][2], const int memclk1067,
449 				     write_timing_t *const lower)
450 {
451 	program_write_timing(ch, group, lower, memclk1067);
452 	/* Coarse search for good t. */
453 	while (!write_training_test(addresses, masks[group])) {
454 		++lower->t;
455 		if (program_write_timing(ch, group, lower, memclk1067) < 0)
456 			return -1;
457 	}
458 	/* Step back, then fine search for good p. */
459 	if ((lower->f <= 0) && (lower->t <= 0))
460 		/* Can't step back, zero is good. */
461 		return 0;
462 
463 	--lower->t;
464 	program_write_timing(ch, group, lower, memclk1067);
465 	while (!write_training_test(addresses, masks[group])) {
466 		++lower->p;
467 		if (program_write_timing(ch, group, lower, memclk1067) < 0)
468 			return -1;
469 	}
470 
471 	return 0;
472 }
write_training_find_upper(const int ch,const int group,const address_bunch_t * const addresses,const u32 masks[][2],const int memclk1067,write_timing_t * const upper)473 static int write_training_find_upper(const int ch, const int group,
474 				     const address_bunch_t *const addresses,
475 				     const u32 masks[][2], const int memclk1067,
476 				     write_timing_t *const upper)
477 {
478 	if (program_write_timing(ch, group, upper, memclk1067) < 0)
479 		return -1;
480 	if (!write_training_test(addresses, masks[group])) {
481 		printk(BIOS_WARNING,
482 		       "Write training failure; limits too narrow.\n");
483 		return -1;
484 	}
485 	/* Coarse search for bad t. */
486 	while (write_training_test(addresses, masks[group])) {
487 		++upper->t;
488 		if (program_write_timing(ch, group, upper, memclk1067) < 0)
489 			return -1;
490 	}
491 	/* Fine search for bad p. */
492 	--upper->t;
493 	program_write_timing(ch, group, upper, memclk1067);
494 	while (write_training_test(addresses, masks[group])) {
495 		++upper->p;
496 		if (program_write_timing(ch, group, upper, memclk1067) < 0)
497 			return -1;
498 	}
499 
500 	return 0;
501 }
write_training_per_group(const int ch,const int group,const address_bunch_t * const addresses,const u32 masks[][2],const int memclk1067)502 static void write_training_per_group(const int ch, const int group,
503 				     const address_bunch_t *const addresses,
504 				     const u32 masks[][2], const int memclk1067)
505 {
506 	const int t_bound = memclk1067 ? 12 : 11;
507 	write_timing_t lower = { 0, 0, t_bound, 0 },
508 		       upper = { 0, 0, t_bound, 0 };
509 
510 	/*** Search lower bound. ***/
511 
512 	/* Start at -1f from current values. */
513 	const u32 reg = mchbar_read32(CxWRTy_MCHBAR(ch, group));
514 	lower.t =  (reg >> 12) & 0xf;
515 	lower.p =  (reg >>  8) & 0x7;
516 	lower.f = ((reg >>  2) & 0x3) - 1;
517 
518 	if (write_training_find_lower(ch, group, addresses,
519 				      masks, memclk1067, &lower) < 0)
520 		die("Write training failure: lower bound.\n");
521 	print_write_timing(RAM_DEBUG, "Lower bound for ", group, ch, &lower);
522 
523 	/*** Search upper bound. ***/
524 
525 	/* Start at lower + 3t. */
526 	upper.t = lower.t + 3;
527 	upper.p = lower.p;
528 	upper.f = lower.f;
529 
530 	if (write_training_find_upper(ch, group, addresses,
531 				      masks, memclk1067, &upper) < 0)
532 		printk(BIOS_WARNING, "Write training failure: upper bound.\n");
533 	print_write_timing(RAM_DEBUG, "Upper bound for ", group, ch, &upper);
534 
535 	/*** Calculate and program mean value. ***/
536 
537 	lower.t += lower.f * lower.t_bound;
538 	lower.p += lower.t << WRITE_TIMING_P_SHIFT;
539 	upper.t += upper.f * upper.t_bound;
540 	upper.p += upper.t << WRITE_TIMING_P_SHIFT;
541 	/* lower becomes the mean value. */
542 	const int mean_p = (lower.p + upper.p) >> 1;
543 	lower.f = mean_p / (lower.t_bound << WRITE_TIMING_P_SHIFT);
544 	lower.t = (mean_p >> WRITE_TIMING_P_SHIFT) % lower.t_bound;
545 	lower.p = mean_p & (WRITE_TIMING_P_BOUND - 1);
546 	program_write_timing(ch, group, &lower, memclk1067);
547 	printk(RAM_DEBUG, "Final timings for ");
548 	print_write_timing(BIOS_DEBUG, "", group, ch, &lower);
549 }
perform_write_training(const int memclk1067,const dimminfo_t * const dimms)550 static void perform_write_training(const int memclk1067,
551 				   const dimminfo_t *const dimms)
552 {
553 	const int cardF[] = { dimms[0].card_type == 0xf,
554 			      dimms[1].card_type == 0xf };
555 	int ch, r, group;
556 
557 	address_bunch_t addr[2] = { { { 0, }, 0 }, { { 0, }, 0 }, };
558 	/* Add check if channel A is populated, i.e. if cardF[0] is valid.
559 	 * Otherwise we would write channel A registers when DIMM in channel B
560 	 * is of raw card type A, B or C (cardF[1] == 0) even if channel A is
561 	 * not populated.
562 	 * Needs raw card type A, B or C for testing. */
563 	if ((dimms[0].card_type != 0) && (cardF[0] == cardF[1])) {
564 		/* Common path for both channels. */
565 		FOR_EACH_POPULATED_RANK(dimms, ch, r)
566 			addr[0].addr[addr[0].count++] =
567 				raminit_get_rank_addr(ch, r);
568 	} else {
569 		FOR_EACH_POPULATED_RANK(dimms, ch, r)
570 			addr[ch].addr[addr[ch].count++] =
571 				raminit_get_rank_addr(ch, r);
572 	}
573 
574 	FOR_EACH_CHANNEL(ch) if (addr[ch].count > 0) {
575 		const u32 (*const masks)[2] = (!cardF[ch])
576 			? write_training_bytelane_masks_abc[memclk1067]
577 			: write_training_bytelane_masks_f;
578 		for (group = 0; group < 4; ++group) {
579 			if (!masks[group][0] && !masks[group][1])
580 				continue;
581 			write_training_per_group(
582 				ch, group, &addr[ch], masks, memclk1067);
583 		}
584 	}
585 }
write_training_store_results(void)586 static void write_training_store_results(void)
587 {
588 	u8 bytes[TOTAL_CHANNELS * 4 * 2]; /* two bytes per group */
589 	int ch, i;
590 
591 	/* Store one T/P pair in one, F in the other byte. */
592 	/* We could save six bytes by putting all F values in two bytes. */
593 	FOR_EACH_CHANNEL(ch) {
594 		for (i = 0; i < 4; ++i) {
595 			const u32 reg = mchbar_read32(CxWRTy_MCHBAR(ch, i));
596 			bytes[(ch * 8) + (i * 2)] =
597 				(((reg & CxWRTy_T_MASK)
598 				  >> CxWRTy_T_SHIFT) << 4) |
599 				((reg & CxWRTy_P_MASK) >> CxWRTy_P_SHIFT);
600 			bytes[(ch * 8) + (i * 2) + 1] =
601 				((reg & CxWRTy_F_MASK) >> CxWRTy_F_SHIFT);
602 		}
603 	}
604 
605 	/* Store everything in CMOS above 128 bytes. */
606 	for (i = 0; i < (TOTAL_CHANNELS * 4 * 2); ++i)
607 		cmos_write(bytes[i], CMOS_WRITE_TRAINING + i);
608 }
write_training_restore_results(const int memclk1067)609 static void write_training_restore_results(const int memclk1067)
610 {
611 	const int t_bound = memclk1067 ? 12 : 11;
612 
613 	u8 bytes[TOTAL_CHANNELS * 4 * 2]; /* two bytes per group */
614 	int ch, i;
615 
616 	/* Read from CMOS. */
617 	for (i = 0; i < (TOTAL_CHANNELS * 4 * 2); ++i)
618 		bytes[i] = cmos_read(CMOS_WRITE_TRAINING + i);
619 
620 	/* Program with original program_write_timing(). */
621 	FOR_EACH_CHANNEL(ch) {
622 		for (i = 0; i < 4; ++i) {
623 			write_timing_t timing = { 0, 0, t_bound, 0 };
624 			timing.f = bytes[(ch * 8) + (i * 2) + 1] & 3;
625 			timing.t = bytes[(ch * 8) + (i * 2)] >> 4;
626 			timing.p = bytes[(ch * 8) + (i * 2)] & 7;
627 			program_write_timing(ch, i, &timing, memclk1067);
628 			printk(BIOS_DEBUG, "Restored timings for group %d "
629 					   "on channel %d: %d.%d.%d\n",
630 			       i, ch, timing.f, timing.t, timing.p);
631 		}
632 	}
633 }
raminit_write_training(const mem_clock_t ddr3clock,const dimminfo_t * const dimms,const int s3resume)634 void raminit_write_training(const mem_clock_t ddr3clock,
635 			    const dimminfo_t *const dimms,
636 			    const int s3resume)
637 {
638 	const int memclk1067 = ddr3clock == MEM_CLOCK_1067MT;
639 
640 	if (!s3resume) {
641 		perform_write_training(memclk1067, dimms);
642 		write_training_store_results();
643 	} else {
644 		write_training_restore_results(memclk1067);
645 	}
646 	raminit_reset_readwrite_pointers();
647 }
648