1 /* SPDX-License-Identifier: GPL-2.0-or-later */
2
3 /*
4 * Intel Pentium L2 Cache initialization.
5 * This code was developed by reverse engineering
6 * the BIOS. Where the code accesses documented
7 * registers I have added comments as best I can.
8 * Some undocumented registers on the Pentium II are
9 * used so some of the documentation is incomplete.
10 *
11 * References:
12 * Intel Architecture Software Developer's Manual
13 * Volume 3B: System Programming Guide, Part 2 (#253669)
14 * Appendix B.9
15 */
16
17 /* This code is ported from coreboot v1.
18 * The L2 cache initialization sequence here only apply to SECC/SECC2 P6 family
19 * CPUs with Klamath (63x), Deschutes (65x) and Katmai (67x) cores.
20 * It is not required for Coppermine (68x) and Tualatin (6bx) cores.
21 * It is currently not known if Celerons with Mendocino (66x) core require the
22 * special initialization.
23 * Covington-core Celerons do not have L2 cache.
24 */
25
26 #include <console/console.h>
27 #include <cpu/cpu.h>
28 #include <cpu/intel/l2_cache.h>
29 #include <cpu/x86/cache.h>
30 #include <cpu/x86/msr.h>
31 #include <stdint.h>
32
33 /* Latency Tables */
34 struct latency_entry {
35 u8 key;
36 u8 value;
37 };
38 /*
39 Latency maps for Deschutes and Katmai.
40 No such mapping is available for Klamath.
41
42 Cache latency to
43 be written to L2 -----++++
44 control register ||||
45 0000 xx 00 -----> 000 cccc 0
46 |||| 00 66MHz
47 |||| 10 100MHz
48 |||| 01 133MHz (Katmai "B" only)
49 ++++------ CPU frequency multiplier
50
51 0000 2x
52 0001 3x
53 0010 4x
54 0011 5x
55 0100 2.5x
56 0101 3.5x
57 0110 4.5x
58 0111 5.5x
59 1000 6x
60 1001 7x
61 1010 8x
62 1011 Reserved
63 1100 6.5x
64 1101 7.5x
65 1110 1.5x
66 1111 2x
67
68 */
69 static const struct latency_entry latency_650_t0[] = {
70 {0x10, 0x02}, {0x50, 0x02}, {0x20, 0x04}, {0x60, 0x06},
71 {0x00, 0x08}, {0x40, 0x0C}, {0x12, 0x06}, {0x52, 0x0A},
72 {0x22, 0x0E}, {0x62, 0x10}, {0x02, 0x10}, {0xFF, 0x00}
73 };
74
75 static const struct latency_entry latency_650_t1[] = {
76 {0x12, 0x14}, {0x52, 0x16}, {0x22, 0x16}, {0x62, 0x16},
77 {0xFF, 0x00}
78 };
79
80 static const struct latency_entry latency_670_t0[] = {
81 {0x60, 0x06}, {0x00, 0x08}, {0x12, 0x06}, {0x52, 0x0A},
82 {0x22, 0x0E}, {0x62, 0x10}, {0x02, 0x10}, {0x42, 0x02},
83 {0x11, 0x0E}, {0x51, 0x0C}, {0x21, 0x02}, {0x61, 0x10},
84 {0x01, 0x10}, {0x41, 0x02}, {0xFF, 0x00}
85 };
86
87 static const struct latency_entry latency_670_t1[] = {
88 {0x22, 0x18}, {0x62, 0x18}, {0x02, 0x1A}, {0x11, 0x18},
89 {0xFF, 0x00}
90 };
91
92 static const struct latency_entry latency_670_t2[] = {
93 {0x22, 0x12}, {0x62, 0x14}, {0x02, 0x16}, {0x42, 0x1E},
94 {0x11, 0x12}, {0x51, 0x16}, {0x21, 0x1E}, {0x61, 0x14},
95 {0x01, 0x16}, {0x41, 0x1E}, {0xFF, 0x00}
96 };
97
98 /* Latency tables for 650 model/type */
99 static const struct latency_entry *latency_650[] = {
100 latency_650_t0, latency_650_t1, latency_650_t1
101 };
102
103 /* Latency tables for 670 model/type */
104 static const struct latency_entry *latency_670[] = {
105 latency_670_t0, latency_670_t1, latency_670_t2
106 };
107
calculate_l2_latency(void)108 int calculate_l2_latency(void)
109 {
110 u32 eax, l, signature;
111 const struct latency_entry *latency_table, *le;
112 msr_t msr;
113
114 /* First, attempt to get cache latency value from
115 IA32_PLATFORM_ID[56:53]. (L2 Cache Latency Read)
116 */
117 msr = rdmsr(IA32_PLATFORM_ID);
118
119 printk(BIOS_DEBUG, "rdmsr(IA32_PLATFORM_ID) = %x:%x\n", msr.hi, msr.lo);
120
121 l = (msr.hi >> 20) & 0x1e;
122
123 if (l == 0) {
124 /* If latency value isn't available from
125 IA32_PLATFORM_ID[56:53], read it from
126 L2 control register 0 for lookup from
127 tables. */
128 int t, a;
129
130 /* The raw code is read from L2 register 0, bits [7:4]. */
131 a = read_l2(0);
132 if (a < 0)
133 return -1;
134
135 a &= 0xf0;
136
137 if ((a & 0x20) == 0)
138 t = 0;
139 else if (a == 0x20)
140 t = 1;
141 else if (a == 0x30)
142 t = 2;
143 else
144 return -1;
145
146 printk(BIOS_DEBUG, "L2 latency type = %x\n", t);
147
148 /* Get CPUID family/model */
149 signature = cpuid_eax(1) & 0xfff0;
150
151 /* Read EBL_CR_POWERON */
152 msr = rdmsr(EBL_CR_POWERON);
153 /* Get clock multiplier and FSB frequency.
154 * Multiplier is in [25:22].
155 * FSB is in [19:18] in Katmai, [19] in Deschutes ([18] is zero
156 * for them).
157 */
158 eax = msr.lo >> 18;
159 if (signature == 0x650) {
160 eax &= ~0xf2;
161 latency_table = latency_650[t];
162 } else if (signature == 0x670) {
163 eax &= ~0xf3;
164 latency_table = latency_670[t];
165 } else
166 return -1;
167
168 /* Search table for matching entry */
169 for (le = latency_table; le->key != eax; le++) {
170 /* Fail if we get to the end of the table */
171 if (le->key == 0xff) {
172 printk(BIOS_DEBUG,
173 "Could not find key %02x in latency table\n",
174 eax);
175 return -1;
176 }
177 }
178
179 l = le->value;
180 }
181
182 printk(BIOS_DEBUG, "L2 Cache latency is %d\n", l / 2);
183
184 /* Writes the calculated latency in BBL_CR_CTL3[4:1]. */
185 msr = rdmsr(BBL_CR_CTL3);
186 msr.lo &= 0xffffffe1;
187 msr.lo |= l;
188 wrmsr(BBL_CR_CTL3, msr);
189
190 return 0;
191 }
192
193 /* Setup address, data_high:data_low into the L2
194 * control registers and then issue command with correct cache way
195 */
signal_l2(u32 address,u32 data_high,u32 data_low,int way,u8 command)196 int signal_l2(u32 address, u32 data_high, u32 data_low, int way, u8 command)
197 {
198 int i;
199 msr_t msr;
200
201 /* Write L2 Address to BBL_CR_ADDR */
202 msr.lo = address;
203 msr.hi = 0;
204 wrmsr(BBL_CR_ADDR, msr);
205
206 /* Write data to BBL_CR_D{0..3} */
207 msr.lo = data_low;
208 msr.hi = data_high;
209 for (i = BBL_CR_D0; i <= BBL_CR_D3; i++)
210 wrmsr(i, msr);
211
212 /* Put the command and way into BBL_CR_CTL */
213 msr = rdmsr(BBL_CR_CTL);
214 msr.lo = (msr.lo & 0xfffffce0) | command | (way << 8);
215 wrmsr(BBL_CR_CTL, msr);
216
217 /* Trigger L2 controller */
218 msr.lo = 0;
219 msr.hi = 0;
220 wrmsr(BBL_CR_TRIG, msr);
221
222 /* Poll the controller to see when done */
223 for (i = 0; i < 0x100; i++) {
224 /* Read BBL_CR_BUSY */
225 msr = rdmsr(BBL_CR_BUSY);
226 /* If not busy then return */
227 if ((msr.lo & 1) == 0)
228 return 0;
229 }
230
231 /* Return timeout code */
232 return -1;
233 }
234
235 /* Read the L2 Cache controller register at given address */
read_l2(u32 address)236 int read_l2(u32 address)
237 {
238 msr_t msr;
239
240 /* Send a L2 Control Register Read to L2 controller */
241 if (signal_l2(address << 5, 0, 0, 0, L2CMD_CR) != 0)
242 return -1;
243
244 /* If OK then get the result from BBL_CR_ADDR */
245 msr = rdmsr(BBL_CR_ADDR);
246 return (msr.lo >> 0x15);
247 }
248
249 /* Write data into the L2 controller register at address */
write_l2(u32 address,u32 data)250 int write_l2(u32 address, u32 data)
251 {
252 int v1, v2, i;
253
254 v1 = read_l2(0);
255 if (v1 < 0)
256 return -1;
257
258 v2 = read_l2(2);
259 if (v2 < 0)
260 return -1;
261
262 if ((v1 & 0x20) == 0) {
263 v2 &= 0x3;
264 v2++;
265 } else
266 v2 &= 0x7;
267
268 /* This write has to be replicated to a number of places. Not sure what.
269 */
270
271 for (i = 0; i < v2; i++) {
272 u32 data1, data2;
273 // Bits legend
274 // data1 = ffffffff
275 // data2 = 000000dc
276 // address = 00aaaaaa
277 // Final address signaled:
278 // 000fffff fff000c0 000dcaaa aaa00000
279 data1 = data & 0xff;
280 data1 = data1 << 21;
281 data2 = (i << 11) & 0x1800;
282 data1 |= data2;
283 data2 <<= 6;
284 data2 &= 0x20000;
285 data1 |= data2;
286
287 /* Signal L2 controller */
288 if (signal_l2((address << 5) | data1, 0, 0, 0, 3))
289 return -1;
290 }
291 return 0;
292 }
293
294 /* Write data_high:data_low into the cache at address1. Test address2
295 * to see if the same data is returned. Return 0 if the data matches.
296 * return lower 16 bits if mismatched data if mismatch. Return -1
297 * on error
298 */
test_l2_address_alias(u32 address1,u32 address2,u32 data_high,u32 data_low)299 int test_l2_address_alias(u32 address1, u32 address2,
300 u32 data_high, u32 data_low)
301 {
302 int d;
303 msr_t msr;
304
305 /* Tag Write with Data Write for L2 */
306 if (signal_l2(address1, data_high, data_low, 0, L2CMD_TWW))
307 return -1;
308
309 /* Tag Read with Data Read for L2 */
310 if (signal_l2(address2, 0, 0, 0, L2CMD_TRR))
311 return -1;
312
313 /* Read data from BBL_CR_D[0-3] */
314 for (d = BBL_CR_D0; d <= BBL_CR_D3; d++) {
315 msr = rdmsr(d);
316 if (msr.lo != data_low || msr.hi != data_high)
317 return (msr.lo & 0xffff);
318 }
319
320 return 0;
321 }
322
323 /* Calculates the L2 cache size.
324 *
325 * Reference: Intel(R) 64 and IA-32 Architectures Software Developer's Manual
326 * Volume 3B: System Programming Guide, Part 2, Intel pub. 253669,
327 * pg. B-172.
328 *
329 */
calculate_l2_cache_size(void)330 int calculate_l2_cache_size(void)
331 {
332 int v;
333 msr_t msr;
334 u32 cache_setting;
335 u32 address, size, eax, bblcr3;
336
337 v = read_l2(0);
338 if (v < 0)
339 return -1;
340 if ((v & 0x20) == 0) {
341 msr = rdmsr(BBL_CR_CTL3);
342 bblcr3 = msr.lo & ~BBLCR3_L2_SIZE;
343 /*
344 * Successively write in all the possible cache size per bank
345 * into BBL_CR_CTL3[17:13], starting from 256KB (00001) to 4MB
346 * (10000), and read the last value written and accepted by the
347 * cache.
348 *
349 * No idea why these bits are writable at all.
350 */
351 for (cache_setting = BBLCR3_L2_SIZE_256K;
352 cache_setting <= BBLCR3_L2_SIZE_4M; cache_setting <<= 1) {
353 eax = bblcr3 | cache_setting;
354 msr.lo = eax;
355 wrmsr(BBL_CR_CTL3, msr);
356 msr = rdmsr(BBL_CR_CTL3);
357
358 /* Value not accepted */
359 if (msr.lo != eax)
360 break;
361 }
362
363 /* Backtrack to the last value that worked... */
364 cache_setting >>= 1;
365
366 /* and write it into BBL_CR_CTL3 */
367 msr.lo &= ~BBLCR3_L2_SIZE;
368 msr.lo |= (cache_setting & BBLCR3_L2_SIZE);
369
370 wrmsr(BBL_CR_CTL3, msr);
371
372 printk(BIOS_DEBUG, "Maximum cache mask is %x\n", cache_setting);
373
374 /* For now, BBL_CR_CTL3 has the highest cache "size" that
375 * register will accept. Now we'll ping the cache and see where
376 * it wraps.
377 */
378
379 /* Write aaaaaaaa:aaaaaaaa to address 0 in the l2 cache.
380 * If this "alias test" returns an "address", it means the
381 * cache cannot be written to properly, and we have a problem.
382 */
383 v = test_l2_address_alias(0, 0, 0xaaaaaaaa, 0xaaaaaaaa);
384 if (v != 0)
385 return -1;
386
387 /* Start with 32K wrap point (256KB actually) */
388 size = 1;
389 address = 0x8000;
390
391 while (1) {
392 v = test_l2_address_alias(address, 0, 0x55555555,
393 0x55555555);
394 // Write failed.
395 if (v < 0)
396 return -1;
397 // It wraps here.
398 else if (v == 0)
399 break;
400
401 size <<= 1;
402 address <<= 1;
403
404 if (address > 0x200000)
405 return -1;
406 }
407
408 /* Mask size */
409 size &= 0x3e;
410
411 /* Shift to [17:13] */
412 size <<= 12;
413
414 /* Set this into BBL_CR_CTL3 */
415 msr = rdmsr(BBL_CR_CTL3);
416 msr.lo &= ~BBLCR3_L2_SIZE;
417 msr.lo |= size;
418 wrmsr(BBL_CR_CTL3, msr);
419
420 printk(BIOS_DEBUG, "L2 Cache Mask is %x\n", size);
421
422 /* Shift to [6:2] */
423 size >>= 11;
424
425 v = read_l2(2);
426
427 if (v < 0)
428 return -1;
429
430 printk(BIOS_DEBUG, "L2(2): %x ", v);
431
432 v &= 0x3;
433
434 /* Shift size right by v */
435 size >>= v;
436
437 /* Or in this size */
438 v |= size;
439
440 printk(BIOS_DEBUG, "-> %x\n", v);
441
442 if (write_l2(2, v) != 0)
443 return -1;
444 } else {
445 // Some cache size information is available from L2 registers.
446 // Work from there.
447 int b, c;
448
449 v = read_l2(2);
450
451 printk(BIOS_DEBUG, "L2(2) = %x\n", v);
452
453 if (v < 0)
454 return -1;
455
456 // L2 register 2 bitmap: cc---bbb
457 b = v & 0x7;
458 c = v >> 6;
459
460 v = 1 << c * b;
461
462 v &= 0xf;
463
464 printk(BIOS_DEBUG, "Calculated a = %x\n", v);
465
466 if (v == 0)
467 return -1;
468
469 /* Shift to 17:14 */
470 v <<= 14;
471
472 /* Write this size into BBL_CR_CTL3 */
473 msr = rdmsr(BBL_CR_CTL3);
474 msr.lo &= ~BBLCR3_L2_SIZE;
475 msr.lo |= v;
476 wrmsr(BBL_CR_CTL3, msr);
477 }
478
479 return 0;
480 }
481
482 // L2 physical address range can be found from L2 control register 3,
483 // bits [2:0].
calculate_l2_physical_address_range(void)484 int calculate_l2_physical_address_range(void)
485 {
486 int r0, r3;
487 msr_t msr;
488
489 r3 = read_l2(3);
490 if (r3 < 0)
491 return -1;
492
493 r0 = read_l2(0);
494 if (r0 < 0)
495 return -1;
496
497 if (r0 & 0x20)
498 r3 = 0x7;
499 else
500 r3 &= 0x7;
501
502 printk(BIOS_DEBUG, "L2 Physical Address Range is %dM\n",
503 (1 << r3) * 512);
504
505 /* Shift into [22:20] to be saved into BBL_CR_CTL3. */
506 r3 = r3 << 20;
507
508 msr = rdmsr(BBL_CR_CTL3);
509 msr.lo &= ~BBLCR3_L2_PHYSICAL_RANGE;
510 msr.lo |= r3;
511 wrmsr(BBL_CR_CTL3, msr);
512
513 return 0;
514 }
515
set_l2_ecc(void)516 int set_l2_ecc(void)
517 {
518 u32 eax;
519 const u32 data1 = 0xaa55aa55;
520 const u32 data2 = 0xaaaaaaaa;
521 msr_t msr;
522
523 /* Set User Supplied ECC in BBL_CR_CTL */
524 msr = rdmsr(BBL_CR_CTL);
525 msr.lo |= BBLCR3_L2_SUPPLIED_ECC;
526 wrmsr(BBL_CR_CTL, msr);
527
528 /* Write a value into the L2 Data ECC register BBL_CR_DECC */
529 msr.lo = data1;
530 msr.hi = 0;
531 wrmsr(BBL_CR_DECC, msr);
532
533 if (test_l2_address_alias(0, 0, data2, data2) < 0)
534 return -1;
535
536 /* Read back ECC from BBL_CR_DECC */
537 msr = rdmsr(BBL_CR_DECC);
538 eax = msr.lo;
539
540 if (eax == data1) {
541 printk(BIOS_DEBUG, "L2 ECC Checking is enabled\n");
542
543 /* Set ECC Check Enable in BBL_CR_CTL3 */
544 msr = rdmsr(BBL_CR_CTL3);
545 msr.lo |= BBLCR3_L2_ECC_CHECK_ENABLE;
546 wrmsr(BBL_CR_CTL3, msr);
547 }
548
549 /* Clear User Supplied ECC in BBL_CR_CTL */
550 msr = rdmsr(BBL_CR_CTL);
551 msr.lo &= ~BBLCR3_L2_SUPPLIED_ECC;
552 wrmsr(BBL_CR_CTL, msr);
553
554 return 0;
555 }
556
557 /*
558 * This is the function called from CPU initialization
559 * driver to set up P6 family L2 cache.
560 */
561
p6_configure_l2_cache(void)562 int p6_configure_l2_cache(void)
563 {
564 msr_t msr, bblctl3;
565 unsigned int eax;
566 u16 signature;
567 int cache_size, bank;
568 int result, calc_eax;
569 int v, a;
570
571 int badclk1, badclk2, clkratio;
572 int crctl3_or;
573
574 printk(BIOS_INFO, "Configuring L2 cache... ");
575
576 /* Read BBL_CR_CTL3 */
577 bblctl3 = rdmsr(BBL_CR_CTL3);
578 /* If bit 23 (L2 Hardware disable) is set then done */
579 /* These would be Covington core Celerons with no L2 cache */
580 if (bblctl3.lo & BBLCR3_L2_NOT_PRESENT) {
581 printk(BIOS_INFO, "hardware disabled\n");
582 return 0;
583 }
584
585 signature = cpuid_eax(1) & 0xfff0;
586
587 /* Klamath-specific bit settings for certain
588 preliminary checks.
589 */
590 if (signature == 0x630) {
591 clkratio = 0x1c00000;
592 badclk2 = 0x1000000;
593 crctl3_or = 0x44000;
594 } else {
595 clkratio = 0x3c00000;
596 badclk2 = 0x3000000;
597 crctl3_or = 0x40000;
598 }
599 badclk1 = 0xc00000;
600
601 /* Read EBL_CR_POWERON */
602 msr = rdmsr(EBL_CR_POWERON);
603 eax = msr.lo;
604 /* Mask out [22-25] Clock frequency ratio */
605 eax &= clkratio;
606 if (eax == badclk1 || eax == badclk2) {
607 printk(BIOS_ERR, "Incorrect clock frequency ratio %x\n", eax);
608 return -1;
609 }
610
611 disable_cache();
612
613 /* Mask out from BBL_CR_CTL3:
614 * [0] L2 Configured
615 * [5] ECC Check Enable
616 * [6] Address Parity Check Enable
617 * [7] CRTN Parity Check Enable
618 * [8] L2 Enabled
619 * [12:11] Number of L2 banks
620 * [17:13] Cache size per bank
621 * [18] (Set below)
622 * [22:20] L2 Physical Address Range Support
623 */
624 bblctl3.lo &= 0xff88061e;
625 /* Set:
626 * [17:13] = 00010 = 512Kbyte Cache size per bank (63x)
627 * [17:13] = 00000 = 128Kbyte Cache size per bank (all others)
628 * [18] Cache state error checking enable
629 */
630 bblctl3.lo |= crctl3_or;
631
632 /* Write BBL_CR_CTL3 */
633 wrmsr(BBL_CR_CTL3, bblctl3);
634
635 if (signature != 0x630) {
636 eax = bblctl3.lo;
637
638 /* Set the l2 latency in BBL_CR_CTL3 */
639 if (calculate_l2_latency() != 0)
640 goto bad;
641
642 /* Read the new latency values back */
643 bblctl3 = rdmsr(BBL_CR_CTL3);
644 calc_eax = bblctl3.lo;
645
646 /* Write back the original default value */
647 bblctl3.lo = eax;
648 wrmsr(BBL_CR_CTL3, bblctl3);
649
650 /* Write BBL_CR_CTL3[27:26] (reserved??) to bits [1:0] of L2
651 * register 4. Apparently all other bits must be preserved,
652 * hence these code.
653 */
654
655 v = (calc_eax >> 26) & 0x3;
656
657 printk(BIOS_DEBUG, "write_l2(4, %x)\n", v);
658
659 a = read_l2(4);
660 if (a >= 0) {
661 a &= 0xfffc;
662 a |= v;
663 a = write_l2(4, a);
664 /* a now contains result code from write_l2() */
665 }
666 if (a != 0)
667 goto bad;
668
669 /* Restore the correct latency value into BBL_CR_CTL3 */
670 bblctl3.lo = calc_eax;
671 wrmsr(BBL_CR_CTL3, bblctl3);
672 } /* ! 63x CPU */
673
674 /* Read L2 register 0 */
675 v = read_l2(0);
676
677 /* If L2(0)[5] set (and can be read properly), enable CRTN and address
678 * parity
679 */
680 if (v >= 0 && (v & 0x20)) {
681 bblctl3 = rdmsr(BBL_CR_CTL3);
682 bblctl3.lo |= (BBLCR3_L2_ADDR_PARITY_ENABLE |
683 BBLCR3_L2_CRTN_PARITY_ENABLE);
684 wrmsr(BBL_CR_CTL3, bblctl3);
685 }
686
687 /* If something goes wrong at L2 ECC setup, cache ECC
688 * will just remain disabled.
689 */
690 set_l2_ecc();
691
692 if (calculate_l2_physical_address_range() != 0) {
693 printk(BIOS_ERR,
694 "Failed to calculate L2 physical address range");
695 goto bad;
696 }
697
698 if (calculate_l2_cache_size() != 0) {
699 printk(BIOS_ERR, "Failed to calculate L2 cache size");
700 goto bad;
701 }
702
703 /* Turn on cache. Only L1 is active at this time. */
704 enable_cache();
705
706 /* Get the calculated cache size from BBL_CR_CTL3[17:13] */
707 bblctl3 = rdmsr(BBL_CR_CTL3);
708 cache_size = (bblctl3.lo & BBLCR3_L2_SIZE);
709 if (cache_size == 0)
710 cache_size = 0x1000;
711 cache_size = cache_size << 3;
712
713 /* TODO: Cache size above is per bank. We're supposed to get
714 * the number of banks from BBL_CR_CTL3[12:11].
715 * Confirm that this still provides the correct answer.
716 */
717 bank = (bblctl3.lo >> 11) & 0x3;
718 if (bank == 0)
719 bank = 1;
720
721 printk(BIOS_INFO, "size %dK... ", cache_size * bank * 4 / 1024);
722
723 /* Write to all cache lines to initialize */
724
725 while (cache_size > 0) {
726 /* Each cache line is 32 bytes. */
727 cache_size -= 32;
728
729 /* Update each way */
730
731 /* We're supposed to get L2 associativity from
732 * BBL_CR_CTL3[10:9]. But this code only applies to certain
733 * members of the P6 processor family and since all P6
734 * processors have 4-way L2 cache, we can safely assume
735 * 4 way for all cache operations.
736 */
737
738 for (v = 0; v < 4; v++) {
739 /* Send Tag Write w/Data Write (TWW) to L2 controller
740 * MESI = Invalid
741 */
742 if (signal_l2(cache_size, 0, 0, v, L2CMD_TWW
743 | L2CMD_MESI_I) != 0) {
744 printk(BIOS_ERR,
745 "Failed on signal_l2(%x, %x)\n",
746 cache_size, v);
747 goto bad;
748 }
749 }
750 }
751 printk(BIOS_DEBUG, "L2 Cache lines initialized\n");
752
753 /* Disable cache */
754 disable_cache();
755
756 /* Set L2 cache configured in BBL_CR_CTL3 */
757 bblctl3 = rdmsr(BBL_CR_CTL3);
758 bblctl3.lo |= BBLCR3_L2_CONFIGURED;
759 wrmsr(BBL_CR_CTL3, bblctl3);
760
761 /* Invalidate cache and discard unsaved writes */
762 asm volatile ("invd");
763
764 /* Write 0 to L2 control register 5 */
765 if (write_l2(5, 0) != 0) {
766 printk(BIOS_ERR, "write_l2(5, 0) failed\n");
767 goto done;
768 }
769
770 bblctl3 = rdmsr(BBL_CR_CTL3);
771 if (signature == 0x650) {
772 /* Change the L2 latency to 0101 then back to
773 * original value. I don't know why this is needed - dpd
774 */
775 eax = bblctl3.lo;
776 bblctl3.lo &= ~BBLCR3_L2_LATENCY;
777 bblctl3.lo |= 0x0a;
778 wrmsr(BBL_CR_CTL3, bblctl3);
779 bblctl3.lo = eax;
780 wrmsr(BBL_CR_CTL3, bblctl3);
781 }
782
783 /* Enable L2 in BBL_CR_CTL3 */
784 bblctl3.lo |= BBLCR3_L2_ENABLED;
785 wrmsr(BBL_CR_CTL3, bblctl3);
786
787 /* Turn on cache. Both L1 and L2 are now active. Wahoo! */
788 done:
789 result = 0;
790 goto out;
791 bad:
792 result = -1;
793 out:
794 printk(BIOS_INFO, "done.\n");
795 return result;
796 }
797