1 /* SPDX-License-Identifier: GPL-2.0-only */
2
3 #include <stdint.h>
4 #include <device/mmio.h>
5 #include <pc80/mc146818rtc.h>
6 #include <console/console.h>
7 #include "gm45.h"
8
9 typedef struct {
10 u32 addr[RANKS_PER_CHANNEL];
11 unsigned int count;
12 } address_bunch_t;
13
14 /* Read Training. */
15 #define CxRDTy_MCHBAR(ch, bl) (0x14b0 + ((ch) * 0x0100) + ((7 - (bl)) * 4))
16 #define CxRDTy_T_SHIFT 20
17 #define CxRDTy_T_MASK (0xf << CxRDTy_T_SHIFT)
18 #define CxRDTy_T(t) (((t) << CxRDTy_T_SHIFT) & CxRDTy_T_MASK)
19 #define CxRDTy_P_SHIFT 16
20 #define CxRDTy_P_MASK (0x7 << CxRDTy_P_SHIFT)
21 #define CxRDTy_P(p) (((p) << CxRDTy_P_SHIFT) & CxRDTy_P_MASK)
22 static const u32 read_training_schedule[] = {
23 0xfefefefe, 0x7f7f7f7f, 0xbebebebe, 0xdfdfdfdf,
24 0xeeeeeeee, 0xf7f7f7f7, 0xfafafafa, 0xfdfdfdfd,
25 0x00000000, 0x81818181, 0x40404040, 0x21212121,
26 0x10101010, 0x09090909, 0x04040404, 0x03030303,
27 0x10101010, 0x11111111, 0xeeeeeeee, 0xefefefef,
28 0x10101010, 0x11111111, 0xeeeeeeee, 0xefefefef,
29 0x10101010, 0xefefefef, 0x10101010, 0xefefefef,
30 0x10101010, 0xefefefef, 0x10101010, 0xefefefef,
31 0x00000000, 0xffffffff, 0x00000000, 0xffffffff,
32 0x00000000, 0xffffffff, 0x00000000, 0x00000000,
33 };
34 #define READ_TIMING_P_SHIFT 3
35 #define READ_TIMING_P_BOUND (1 << READ_TIMING_P_SHIFT)
36 #define READ_TIMING_T_BOUND 14
37 typedef struct {
38 int t;
39 int p;
40 } read_timing_t;
print_read_timing(const int msg_lvl,const char * const msg,const int lane,const int channel,const read_timing_t * const timing)41 static void print_read_timing(const int msg_lvl, const char *const msg,
42 const int lane, const int channel,
43 const read_timing_t *const timing)
44 {
45 printk(msg_lvl, "%sbyte lane %d, ch %d: %d.%d\n",
46 msg, lane, channel, timing->t, timing->p);
47 }
48
normalize_read_timing(read_timing_t * const timing)49 static int normalize_read_timing(read_timing_t *const timing)
50 {
51 while (timing->p >= READ_TIMING_P_BOUND) {
52 timing->t++;
53 timing->p -= READ_TIMING_P_BOUND;
54 }
55 while (timing->p < 0) {
56 timing->t--;
57 timing->p += READ_TIMING_P_BOUND;
58 }
59 if (timing->t < 0) {
60 printk(BIOS_WARNING,
61 "Timing underflow during read training.\n");
62 timing->t = 0;
63 timing->p = 0;
64 return -1;
65 } else if (timing->t >= READ_TIMING_T_BOUND) {
66 printk(BIOS_WARNING,
67 "Timing overflow during read training.\n");
68 timing->t = READ_TIMING_T_BOUND - 1;
69 timing->p = READ_TIMING_P_BOUND - 1;
70 return -1;
71 }
72 return 0;
73 }
program_read_timing(const int ch,const int lane,read_timing_t * const timing)74 static int program_read_timing(const int ch, const int lane,
75 read_timing_t *const timing)
76 {
77 if (normalize_read_timing(timing) < 0)
78 return -1;
79
80 u32 reg = mchbar_read32(CxRDTy_MCHBAR(ch, lane));
81 reg &= ~(CxRDTy_T_MASK | CxRDTy_P_MASK);
82 reg |= CxRDTy_T(timing->t) | CxRDTy_P(timing->p);
83 mchbar_write32(CxRDTy_MCHBAR(ch, lane), reg);
84
85 return 0;
86 }
87 /* Returns 1 on success, 0 on failure. */
read_training_test(const int channel,const int lane,const address_bunch_t * const addresses)88 static int read_training_test(const int channel, const int lane,
89 const address_bunch_t *const addresses)
90 {
91 int i;
92
93 const int lane_offset = lane & 4;
94 const int lane_mask = 0xff << ((lane & ~4) << 3);
95
96 for (i = 0; i < addresses->count; ++i) {
97 unsigned int offset;
98 for (offset = lane_offset; offset < 320; offset += 8) {
99 const u32 read = read32p(addresses->addr[i] + offset);
100 const u32 good = read_training_schedule[offset >> 3];
101 if ((read & lane_mask) != (good & lane_mask))
102 return 0;
103 }
104 }
105 return 1;
106 }
read_training_find_lower(const int channel,const int lane,const address_bunch_t * const addresses,read_timing_t * const lower)107 static int read_training_find_lower(const int channel, const int lane,
108 const address_bunch_t *const addresses,
109 read_timing_t *const lower)
110 {
111 /* Coarse search for good t. */
112 program_read_timing(channel, lane, lower);
113 while (!read_training_test(channel, lane, addresses)) {
114 ++lower->t;
115 if (program_read_timing(channel, lane, lower) < 0)
116 return -1;
117 }
118
119 /* Step back, then fine search for good p. */
120 if (lower->t <= 0)
121 /* Can't step back, zero is good. */
122 return 0;
123
124 --lower->t;
125 program_read_timing(channel, lane, lower);
126 while (!read_training_test(channel, lane, addresses)) {
127 ++lower->p;
128 if (program_read_timing(channel, lane, lower) < 0)
129 return -1;
130 }
131
132 return 0;
133 }
read_training_find_upper(const int channel,const int lane,const address_bunch_t * const addresses,read_timing_t * const upper)134 static int read_training_find_upper(const int channel, const int lane,
135 const address_bunch_t *const addresses,
136 read_timing_t *const upper)
137 {
138 if (program_read_timing(channel, lane, upper) < 0)
139 return -1;
140 if (!read_training_test(channel, lane, addresses)) {
141 printk(BIOS_WARNING,
142 "Read training failure: limits too narrow.\n");
143 return -1;
144 }
145 /* Coarse search for bad t. */
146 do {
147 ++upper->t;
148 if (program_read_timing(channel, lane, upper) < 0)
149 return -1;
150 } while (read_training_test(channel, lane, addresses));
151 /* Fine search for bad p. */
152 --upper->t;
153 program_read_timing(channel, lane, upper);
154 while (read_training_test(channel, lane, addresses)) {
155 ++upper->p;
156 if (program_read_timing(channel, lane, upper) < 0)
157 return -1;
158 }
159
160 return 0;
161 }
read_training_per_lane(const int channel,const int lane,const address_bunch_t * const addresses)162 static void read_training_per_lane(const int channel, const int lane,
163 const address_bunch_t *const addresses)
164 {
165 read_timing_t lower, upper;
166
167 mchbar_setbits32(CxRDTy_MCHBAR(channel, lane), 3 << 25);
168
169 /*** Search lower bound. ***/
170
171 /* Start at zero. */
172 lower.t = 0;
173 lower.p = 0;
174 if (read_training_find_lower(channel, lane, addresses, &lower) < 0)
175 die("Read training failure: lower bound.\n");
176 print_read_timing(RAM_DEBUG, "Lower bound for ", lane, channel, &lower);
177
178 /*** Search upper bound. ***/
179
180 /* Start at lower + 1t. */
181 upper.t = lower.t + 1;
182 upper.p = lower.p;
183 if (read_training_find_upper(channel, lane, addresses, &upper) < 0)
184 /* Overflow on upper edge is not fatal. */
185 printk(BIOS_WARNING, "Read training failure: upper bound.\n");
186 print_read_timing(RAM_DEBUG, "Upper bound for ", lane, channel, &upper);
187
188 /*** Calculate and program mean value. ***/
189
190 lower.p += lower.t << READ_TIMING_P_SHIFT;
191 upper.p += upper.t << READ_TIMING_P_SHIFT;
192 const int mean_p = (lower.p + upper.p) >> 1;
193 /* lower becomes the mean value. */
194 lower.t = mean_p >> READ_TIMING_P_SHIFT;
195 lower.p = mean_p & (READ_TIMING_P_BOUND - 1);
196 program_read_timing(channel, lane, &lower);
197 printk(RAM_DEBUG, "Final timings for ");
198 print_read_timing(BIOS_DEBUG, "", lane, channel, &lower);
199 }
perform_read_training(const dimminfo_t * const dimms)200 static void perform_read_training(const dimminfo_t *const dimms)
201 {
202 int ch, i;
203
204 FOR_EACH_POPULATED_CHANNEL(dimms, ch) {
205 address_bunch_t addresses = { { 0, }, 0 };
206 FOR_EACH_POPULATED_RANK_IN_CHANNEL(dimms, ch, i)
207 addresses.addr[addresses.count++] =
208 raminit_get_rank_addr(ch, i);
209
210 for (i = 0; i < addresses.count; ++i) {
211 /* Write test pattern. */
212 unsigned int offset;
213 for (offset = 0; offset < 320; offset += 4)
214 write32p(addresses.addr[i] + offset,
215 read_training_schedule[offset >> 3]);
216 }
217
218 for (i = 0; i < 8; ++i)
219 read_training_per_lane(ch, i, &addresses);
220 }
221 }
read_training_store_results(void)222 static void read_training_store_results(void)
223 {
224 u8 bytes[TOTAL_CHANNELS * 8];
225 int ch, i;
226
227 /* Store one timing pair in one byte each. */
228 FOR_EACH_CHANNEL(ch) {
229 for (i = 0; i < 8; ++i) {
230 const u32 bl_reg = mchbar_read32(CxRDTy_MCHBAR(ch, i));
231 bytes[(ch * 8) + i] =
232 (((bl_reg & CxRDTy_T_MASK) >> CxRDTy_T_SHIFT)
233 << 4) |
234 ((bl_reg & CxRDTy_P_MASK) >> CxRDTy_P_SHIFT);
235 }
236 }
237
238 /* Store everything in CMOS above 128 bytes. */
239 for (i = 0; i < (TOTAL_CHANNELS * 8); ++i)
240 cmos_write(bytes[i], CMOS_READ_TRAINING + i);
241 }
read_training_restore_results(void)242 static void read_training_restore_results(void)
243 {
244 u8 bytes[TOTAL_CHANNELS * 8];
245 int ch, i;
246
247 /* Read from CMOS. */
248 for (i = 0; i < (TOTAL_CHANNELS * 8); ++i)
249 bytes[i] = cmos_read(CMOS_READ_TRAINING + i);
250
251 /* Program restored results. */
252 FOR_EACH_CHANNEL(ch) {
253 for (i = 0; i < 8; ++i) {
254 const int t = bytes[(ch * 8) + i] >> 4;
255 const int p = bytes[(ch * 8) + i] & 7;
256 u32 bl_reg = mchbar_read32(CxRDTy_MCHBAR(ch, i));
257 bl_reg &= ~(CxRDTy_T_MASK | CxRDTy_P_MASK);
258 bl_reg |= (3 << 25) | CxRDTy_T(t) | CxRDTy_P(p);
259 mchbar_write32(CxRDTy_MCHBAR(ch, i), bl_reg);
260 printk(BIOS_DEBUG, "Restored timings for byte lane "
261 "%d on channel %d: %d.%d\n", i, ch, t, p);
262 }
263 }
264 }
raminit_read_training(const dimminfo_t * const dimms,const int s3resume)265 void raminit_read_training(const dimminfo_t *const dimms, const int s3resume)
266 {
267 if (!s3resume) {
268 perform_read_training(dimms);
269 read_training_store_results();
270 } else {
271 read_training_restore_results();
272 }
273 raminit_reset_readwrite_pointers();
274 }
275
276 /* Write Training. */
277 #define CxWRTy_T_SHIFT 28
278 #define CxWRTy_T_MASK (0xf << CxWRTy_T_SHIFT)
279 #define CxWRTy_T(t) (((t) << CxWRTy_T_SHIFT) & CxWRTy_T_MASK)
280 #define CxWRTy_P_SHIFT 24
281 #define CxWRTy_P_MASK (0x7 << CxWRTy_P_SHIFT)
282 #define CxWRTy_P(p) (((p) << CxWRTy_P_SHIFT) & CxWRTy_P_MASK)
283 #define CxWRTy_F_SHIFT 18
284 #define CxWRTy_F_MASK (0x3 << CxWRTy_F_SHIFT)
285 #define CxWRTy_F(f) (((f) << CxWRTy_F_SHIFT) & CxWRTy_F_MASK)
286 #define CxWRTy_D_SHIFT 16
287 #define CxWRTy_D_MASK (0x3 << CxWRTy_D_SHIFT)
288 #define CxWRTy_BELOW_D (0x3 << CxWRTy_D_SHIFT)
289 #define CxWRTy_ABOVE_D (0x1 << CxWRTy_D_SHIFT)
290 static const u32 write_training_schedule[] = {
291 0xffffffff, 0x00000000, 0xffffffff, 0x00000000,
292 0xffffffff, 0x00000000, 0xffffffff, 0x00000000,
293 0xffffffff, 0x00000000, 0xffffffff, 0x00000000,
294 0xffffffff, 0x00000000, 0xffffffff, 0x00000000,
295 0xefefefef, 0x10101010, 0xefefefef, 0x10101010,
296 0xefefefef, 0x10101010, 0xefefefef, 0x10101010,
297 0xefefefef, 0x10101010, 0xefefefef, 0x10101010,
298 0xefefefef, 0x10101010, 0xefefefef, 0x10101010,
299 0xefefefef, 0xeeeeeeee, 0x11111111, 0x10101010,
300 0xefefefef, 0xeeeeeeee, 0x11111111, 0x10101010,
301 0xefefefef, 0xeeeeeeee, 0x11111111, 0x10101010,
302 0xefefefef, 0xeeeeeeee, 0x11111111, 0x10101010,
303 0x03030303, 0x04040404, 0x09090909, 0x10101010,
304 0x21212121, 0x40404040, 0x81818181, 0x00000000,
305 0x03030303, 0x04040404, 0x09090909, 0x10101010,
306 0x21212121, 0x40404040, 0x81818181, 0x00000000,
307 0xfdfdfdfd, 0xfafafafa, 0xf7f7f7f7, 0xeeeeeeee,
308 0xdfdfdfdf, 0xbebebebe, 0x7f7f7f7f, 0xfefefefe,
309 0xfdfdfdfd, 0xfafafafa, 0xf7f7f7f7, 0xeeeeeeee,
310 0xdfdfdfdf, 0xbebebebe, 0x7f7f7f7f, 0xfefefefe,
311 };
312 /* for raw card types A, B and C: MEM_CLOCK_1067MT? X group X lower/upper */
313 static const u32 write_training_bytelane_masks_abc[2][4][2] = {
314 { /* clock < MEM_CLOCK_1067MT */
315 { 0xffffffff, 0x00000000 }, { 0x00000000, 0x00000000 },
316 { 0x00000000, 0xffffffff }, { 0x00000000, 0x00000000 },
317 },
318 { /* clock == MEM_CLOCK_1067MT */
319 { 0x0000ffff, 0x00000000 }, { 0xffff0000, 0x00000000 },
320 { 0x00000000, 0x0000ffff }, { 0x00000000, 0xffff0000 },
321 },
322 };
323 /* for raw card type F: group X lower/upper */
324 static const u32 write_training_bytelane_masks_f[4][2] = {
325 { 0xff00ff00, 0x00000000 }, { 0x00ff00ff, 0x00000000 },
326 { 0x00000000, 0xff00ff00 }, { 0x00000000, 0x00ff00ff },
327 };
328 #define WRITE_TIMING_P_SHIFT 3
329 #define WRITE_TIMING_P_BOUND (1 << WRITE_TIMING_P_SHIFT)
330 #define WRITE_TIMING_F_BOUND 4
331 typedef struct {
332 int f;
333 int t;
334 const int t_bound;
335 int p;
336 } write_timing_t;
print_write_timing(const int msg_lvl,const char * const msg,const int group,const int channel,const write_timing_t * const timing)337 static void print_write_timing(const int msg_lvl, const char *const msg,
338 const int group, const int channel,
339 const write_timing_t *const timing)
340 {
341 printk(msg_lvl, "%sgroup %d, ch %d: %d.%d.%d\n",
342 msg, group, channel, timing->f, timing->t, timing->p);
343 }
344
normalize_write_timing(write_timing_t * const timing)345 static int normalize_write_timing(write_timing_t *const timing)
346 {
347 while (timing->p >= WRITE_TIMING_P_BOUND) {
348 timing->t++;
349 timing->p -= WRITE_TIMING_P_BOUND;
350 }
351 while (timing->p < 0) {
352 timing->t--;
353 timing->p += WRITE_TIMING_P_BOUND;
354 }
355 while (timing->t >= timing->t_bound) {
356 timing->f++;
357 timing->t -= timing->t_bound;
358 }
359 while (timing->t < 0) {
360 timing->f--;
361 timing->t += timing->t_bound;
362 }
363 if (timing->f < 0) {
364 printk(BIOS_WARNING,
365 "Timing underflow during write training.\n");
366 timing->f = 0;
367 timing->t = 0;
368 timing->p = 0;
369 return -1;
370 } else if (timing->f >= WRITE_TIMING_F_BOUND) {
371 printk(BIOS_WARNING,
372 "Timing overflow during write training.\n");
373 timing->f = WRITE_TIMING_F_BOUND - 1;
374 timing->t = timing->t_bound - 1;
375 timing->p = WRITE_TIMING_P_BOUND - 1;
376 return -1;
377 }
378 return 0;
379 }
program_write_timing(const int ch,const int group,write_timing_t * const timing,int memclk1067)380 static int program_write_timing(const int ch, const int group,
381 write_timing_t *const timing, int memclk1067)
382 {
383 /* MEM_CLOCK_1067MT? X lower/upper */
384 const u32 d_bounds[2][2] = { { 1, 6 }, { 2, 9 } };
385
386 if (normalize_write_timing(timing) < 0)
387 return -1;
388
389 const int f = timing->f;
390 const int t = timing->t;
391 const int p = (memclk1067 && (((t == 9) && (timing->p >= 4)) ||
392 ((t == 10) && (timing->p < 4))))
393 ? 4 : timing->p;
394 const int d =
395 (t <= d_bounds[memclk1067][0]) ? CxWRTy_BELOW_D :
396 ((t > d_bounds[memclk1067][1]) ? CxWRTy_ABOVE_D : 0);
397
398 u32 reg = mchbar_read32(CxWRTy_MCHBAR(ch, group));
399 reg &= ~(CxWRTy_T_MASK | CxWRTy_P_MASK | CxWRTy_F_MASK);
400 reg &= ~CxWRTy_D_MASK;
401 reg |= CxWRTy_T(t) | CxWRTy_P(p) | CxWRTy_F(f) | d;
402 mchbar_write32(CxWRTy_MCHBAR(ch, group), reg);
403
404 return 0;
405 }
406 /* Returns 1 on success, 0 on failure. */
write_training_test(const address_bunch_t * const addresses,const u32 * const masks)407 static int write_training_test(const address_bunch_t *const addresses,
408 const u32 *const masks)
409 {
410 int i, ret = 0;
411
412 const u32 mmarb0 = mchbar_read32(0x0220);
413 const u8 wrcctl = mchbar_read8(0x0218);
414 mchbar_setbits32(0x0220, 0xf << 28);
415 mchbar_setbits8(0x0218, 0x1 << 4);
416
417 for (i = 0; i < addresses->count; ++i) {
418 const unsigned int addr = addresses->addr[i];
419 unsigned int off;
420 for (off = 0; off < 640; off += 8) {
421 const u32 pattern = write_training_schedule[off >> 3];
422 write32p(addr + off, pattern);
423 write32p(addr + off + 4, pattern);
424 }
425
426 mchbar_setbits8(0x78, 1);
427
428 for (off = 0; off < 640; off += 8) {
429 const u32 good = write_training_schedule[off >> 3];
430 const u32 read1 = read32p(addr + off);
431 if ((read1 & masks[0]) != (good & masks[0]))
432 goto _bad_timing_out;
433 const u32 read2 = read32p(addr + off + 4);
434 if ((read2 & masks[1]) != (good & masks[1]))
435 goto _bad_timing_out;
436 }
437 }
438 ret = 1;
439
440 _bad_timing_out:
441 mchbar_write32(0x0220, mmarb0);
442 mchbar_write8(0x0218, wrcctl);
443
444 return ret;
445 }
write_training_find_lower(const int ch,const int group,const address_bunch_t * const addresses,const u32 masks[][2],const int memclk1067,write_timing_t * const lower)446 static int write_training_find_lower(const int ch, const int group,
447 const address_bunch_t *const addresses,
448 const u32 masks[][2], const int memclk1067,
449 write_timing_t *const lower)
450 {
451 program_write_timing(ch, group, lower, memclk1067);
452 /* Coarse search for good t. */
453 while (!write_training_test(addresses, masks[group])) {
454 ++lower->t;
455 if (program_write_timing(ch, group, lower, memclk1067) < 0)
456 return -1;
457 }
458 /* Step back, then fine search for good p. */
459 if ((lower->f <= 0) && (lower->t <= 0))
460 /* Can't step back, zero is good. */
461 return 0;
462
463 --lower->t;
464 program_write_timing(ch, group, lower, memclk1067);
465 while (!write_training_test(addresses, masks[group])) {
466 ++lower->p;
467 if (program_write_timing(ch, group, lower, memclk1067) < 0)
468 return -1;
469 }
470
471 return 0;
472 }
write_training_find_upper(const int ch,const int group,const address_bunch_t * const addresses,const u32 masks[][2],const int memclk1067,write_timing_t * const upper)473 static int write_training_find_upper(const int ch, const int group,
474 const address_bunch_t *const addresses,
475 const u32 masks[][2], const int memclk1067,
476 write_timing_t *const upper)
477 {
478 if (program_write_timing(ch, group, upper, memclk1067) < 0)
479 return -1;
480 if (!write_training_test(addresses, masks[group])) {
481 printk(BIOS_WARNING,
482 "Write training failure; limits too narrow.\n");
483 return -1;
484 }
485 /* Coarse search for bad t. */
486 while (write_training_test(addresses, masks[group])) {
487 ++upper->t;
488 if (program_write_timing(ch, group, upper, memclk1067) < 0)
489 return -1;
490 }
491 /* Fine search for bad p. */
492 --upper->t;
493 program_write_timing(ch, group, upper, memclk1067);
494 while (write_training_test(addresses, masks[group])) {
495 ++upper->p;
496 if (program_write_timing(ch, group, upper, memclk1067) < 0)
497 return -1;
498 }
499
500 return 0;
501 }
write_training_per_group(const int ch,const int group,const address_bunch_t * const addresses,const u32 masks[][2],const int memclk1067)502 static void write_training_per_group(const int ch, const int group,
503 const address_bunch_t *const addresses,
504 const u32 masks[][2], const int memclk1067)
505 {
506 const int t_bound = memclk1067 ? 12 : 11;
507 write_timing_t lower = { 0, 0, t_bound, 0 },
508 upper = { 0, 0, t_bound, 0 };
509
510 /*** Search lower bound. ***/
511
512 /* Start at -1f from current values. */
513 const u32 reg = mchbar_read32(CxWRTy_MCHBAR(ch, group));
514 lower.t = (reg >> 12) & 0xf;
515 lower.p = (reg >> 8) & 0x7;
516 lower.f = ((reg >> 2) & 0x3) - 1;
517
518 if (write_training_find_lower(ch, group, addresses,
519 masks, memclk1067, &lower) < 0)
520 die("Write training failure: lower bound.\n");
521 print_write_timing(RAM_DEBUG, "Lower bound for ", group, ch, &lower);
522
523 /*** Search upper bound. ***/
524
525 /* Start at lower + 3t. */
526 upper.t = lower.t + 3;
527 upper.p = lower.p;
528 upper.f = lower.f;
529
530 if (write_training_find_upper(ch, group, addresses,
531 masks, memclk1067, &upper) < 0)
532 printk(BIOS_WARNING, "Write training failure: upper bound.\n");
533 print_write_timing(RAM_DEBUG, "Upper bound for ", group, ch, &upper);
534
535 /*** Calculate and program mean value. ***/
536
537 lower.t += lower.f * lower.t_bound;
538 lower.p += lower.t << WRITE_TIMING_P_SHIFT;
539 upper.t += upper.f * upper.t_bound;
540 upper.p += upper.t << WRITE_TIMING_P_SHIFT;
541 /* lower becomes the mean value. */
542 const int mean_p = (lower.p + upper.p) >> 1;
543 lower.f = mean_p / (lower.t_bound << WRITE_TIMING_P_SHIFT);
544 lower.t = (mean_p >> WRITE_TIMING_P_SHIFT) % lower.t_bound;
545 lower.p = mean_p & (WRITE_TIMING_P_BOUND - 1);
546 program_write_timing(ch, group, &lower, memclk1067);
547 printk(RAM_DEBUG, "Final timings for ");
548 print_write_timing(BIOS_DEBUG, "", group, ch, &lower);
549 }
perform_write_training(const int memclk1067,const dimminfo_t * const dimms)550 static void perform_write_training(const int memclk1067,
551 const dimminfo_t *const dimms)
552 {
553 const int cardF[] = { dimms[0].card_type == 0xf,
554 dimms[1].card_type == 0xf };
555 int ch, r, group;
556
557 address_bunch_t addr[2] = { { { 0, }, 0 }, { { 0, }, 0 }, };
558 /* Add check if channel A is populated, i.e. if cardF[0] is valid.
559 * Otherwise we would write channel A registers when DIMM in channel B
560 * is of raw card type A, B or C (cardF[1] == 0) even if channel A is
561 * not populated.
562 * Needs raw card type A, B or C for testing. */
563 if ((dimms[0].card_type != 0) && (cardF[0] == cardF[1])) {
564 /* Common path for both channels. */
565 FOR_EACH_POPULATED_RANK(dimms, ch, r)
566 addr[0].addr[addr[0].count++] =
567 raminit_get_rank_addr(ch, r);
568 } else {
569 FOR_EACH_POPULATED_RANK(dimms, ch, r)
570 addr[ch].addr[addr[ch].count++] =
571 raminit_get_rank_addr(ch, r);
572 }
573
574 FOR_EACH_CHANNEL(ch) if (addr[ch].count > 0) {
575 const u32 (*const masks)[2] = (!cardF[ch])
576 ? write_training_bytelane_masks_abc[memclk1067]
577 : write_training_bytelane_masks_f;
578 for (group = 0; group < 4; ++group) {
579 if (!masks[group][0] && !masks[group][1])
580 continue;
581 write_training_per_group(
582 ch, group, &addr[ch], masks, memclk1067);
583 }
584 }
585 }
write_training_store_results(void)586 static void write_training_store_results(void)
587 {
588 u8 bytes[TOTAL_CHANNELS * 4 * 2]; /* two bytes per group */
589 int ch, i;
590
591 /* Store one T/P pair in one, F in the other byte. */
592 /* We could save six bytes by putting all F values in two bytes. */
593 FOR_EACH_CHANNEL(ch) {
594 for (i = 0; i < 4; ++i) {
595 const u32 reg = mchbar_read32(CxWRTy_MCHBAR(ch, i));
596 bytes[(ch * 8) + (i * 2)] =
597 (((reg & CxWRTy_T_MASK)
598 >> CxWRTy_T_SHIFT) << 4) |
599 ((reg & CxWRTy_P_MASK) >> CxWRTy_P_SHIFT);
600 bytes[(ch * 8) + (i * 2) + 1] =
601 ((reg & CxWRTy_F_MASK) >> CxWRTy_F_SHIFT);
602 }
603 }
604
605 /* Store everything in CMOS above 128 bytes. */
606 for (i = 0; i < (TOTAL_CHANNELS * 4 * 2); ++i)
607 cmos_write(bytes[i], CMOS_WRITE_TRAINING + i);
608 }
write_training_restore_results(const int memclk1067)609 static void write_training_restore_results(const int memclk1067)
610 {
611 const int t_bound = memclk1067 ? 12 : 11;
612
613 u8 bytes[TOTAL_CHANNELS * 4 * 2]; /* two bytes per group */
614 int ch, i;
615
616 /* Read from CMOS. */
617 for (i = 0; i < (TOTAL_CHANNELS * 4 * 2); ++i)
618 bytes[i] = cmos_read(CMOS_WRITE_TRAINING + i);
619
620 /* Program with original program_write_timing(). */
621 FOR_EACH_CHANNEL(ch) {
622 for (i = 0; i < 4; ++i) {
623 write_timing_t timing = { 0, 0, t_bound, 0 };
624 timing.f = bytes[(ch * 8) + (i * 2) + 1] & 3;
625 timing.t = bytes[(ch * 8) + (i * 2)] >> 4;
626 timing.p = bytes[(ch * 8) + (i * 2)] & 7;
627 program_write_timing(ch, i, &timing, memclk1067);
628 printk(BIOS_DEBUG, "Restored timings for group %d "
629 "on channel %d: %d.%d.%d\n",
630 i, ch, timing.f, timing.t, timing.p);
631 }
632 }
633 }
raminit_write_training(const mem_clock_t ddr3clock,const dimminfo_t * const dimms,const int s3resume)634 void raminit_write_training(const mem_clock_t ddr3clock,
635 const dimminfo_t *const dimms,
636 const int s3resume)
637 {
638 const int memclk1067 = ddr3clock == MEM_CLOCK_1067MT;
639
640 if (!s3resume) {
641 perform_write_training(memclk1067, dimms);
642 write_training_store_results();
643 } else {
644 write_training_restore_results(memclk1067);
645 }
646 raminit_reset_readwrite_pointers();
647 }
648