xref: /aosp_15_r20/external/coreboot/src/cpu/intel/slot_1/l2_cache.c (revision b9411a12aaaa7e1e6a6fb7c5e057f44ee179a49c)
1 /* SPDX-License-Identifier: GPL-2.0-or-later */
2 
3 /*
4  * Intel Pentium L2 Cache initialization.
5  * This code was developed by reverse engineering
6  * the BIOS. Where the code accesses documented
7  * registers I have added comments as best I can.
8  * Some undocumented registers on the Pentium II are
9  * used so some of the documentation is incomplete.
10  *
11  * References:
12  * Intel Architecture Software Developer's Manual
13  * Volume 3B: System Programming Guide, Part 2 (#253669)
14  * Appendix B.9
15  */
16 
17 /* This code is ported from coreboot v1.
18  * The L2 cache initialization sequence here only apply to SECC/SECC2 P6 family
19  * CPUs with Klamath (63x), Deschutes (65x) and Katmai (67x) cores.
20  * It is not required for Coppermine (68x) and Tualatin (6bx) cores.
21  * It is currently not known if Celerons with Mendocino (66x) core require the
22  * special initialization.
23  * Covington-core Celerons do not have L2 cache.
24  */
25 
26 #include <console/console.h>
27 #include <cpu/cpu.h>
28 #include <cpu/intel/l2_cache.h>
29 #include <cpu/x86/cache.h>
30 #include <cpu/x86/msr.h>
31 #include <stdint.h>
32 
33 /* Latency Tables */
34 struct latency_entry {
35 	u8 key;
36 	u8 value;
37 };
38 /*
39 Latency maps for Deschutes and Katmai.
40 No such mapping is available for Klamath.
41 
42 Cache latency to
43 be written to L2 -----++++
44 control register      ||||
45 0000 xx 00 -----> 000 cccc 0
46 ||||    00 66MHz
47 ||||    10 100MHz
48 ||||    01 133MHz (Katmai "B" only)
49 ++++------ CPU frequency multiplier
50 
51 0000 2x
52 0001 3x
53 0010 4x
54 0011 5x
55 0100 2.5x
56 0101 3.5x
57 0110 4.5x
58 0111 5.5x
59 1000 6x
60 1001 7x
61 1010 8x
62 1011 Reserved
63 1100 6.5x
64 1101 7.5x
65 1110 1.5x
66 1111 2x
67 
68 */
69 static const struct latency_entry latency_650_t0[] = {
70 	{0x10, 0x02}, {0x50, 0x02}, {0x20, 0x04}, {0x60, 0x06},
71 	{0x00, 0x08}, {0x40, 0x0C}, {0x12, 0x06}, {0x52, 0x0A},
72 	{0x22, 0x0E}, {0x62, 0x10}, {0x02, 0x10}, {0xFF, 0x00}
73 };
74 
75 static const struct latency_entry latency_650_t1[] = {
76 	{0x12, 0x14}, {0x52, 0x16}, {0x22, 0x16}, {0x62, 0x16},
77 	{0xFF, 0x00}
78 };
79 
80 static const struct latency_entry latency_670_t0[] = {
81 	{0x60, 0x06}, {0x00, 0x08}, {0x12, 0x06}, {0x52, 0x0A},
82 	{0x22, 0x0E}, {0x62, 0x10}, {0x02, 0x10}, {0x42, 0x02},
83 	{0x11, 0x0E}, {0x51, 0x0C}, {0x21, 0x02}, {0x61, 0x10},
84 	{0x01, 0x10}, {0x41, 0x02}, {0xFF, 0x00}
85 };
86 
87 static const struct latency_entry latency_670_t1[] = {
88 	{0x22, 0x18}, {0x62, 0x18}, {0x02, 0x1A}, {0x11, 0x18},
89 	{0xFF, 0x00}
90 };
91 
92 static const struct latency_entry latency_670_t2[] = {
93 	{0x22, 0x12}, {0x62, 0x14}, {0x02, 0x16}, {0x42, 0x1E},
94 	{0x11, 0x12}, {0x51, 0x16}, {0x21, 0x1E}, {0x61, 0x14},
95 	{0x01, 0x16}, {0x41, 0x1E}, {0xFF, 0x00}
96 };
97 
98 /* Latency tables for 650 model/type */
99 static const struct latency_entry *latency_650[] = {
100 	latency_650_t0, latency_650_t1, latency_650_t1
101 };
102 
103 /* Latency tables for 670 model/type */
104 static const struct latency_entry *latency_670[] = {
105 	latency_670_t0, latency_670_t1, latency_670_t2
106 };
107 
calculate_l2_latency(void)108 int calculate_l2_latency(void)
109 {
110 	u32 eax, l, signature;
111 	const struct latency_entry *latency_table, *le;
112 	msr_t msr;
113 
114 	/* First, attempt to get cache latency value from
115 	   IA32_PLATFORM_ID[56:53]. (L2 Cache Latency Read)
116 	 */
117 	msr = rdmsr(IA32_PLATFORM_ID);
118 
119 	printk(BIOS_DEBUG, "rdmsr(IA32_PLATFORM_ID) = %x:%x\n", msr.hi, msr.lo);
120 
121 	l = (msr.hi >> 20) & 0x1e;
122 
123 	if (l == 0) {
124 		/* If latency value isn't available from
125 		   IA32_PLATFORM_ID[56:53], read it from
126 		   L2 control register 0 for lookup from
127 		   tables. */
128 		int t, a;
129 
130 		/* The raw code is read from L2 register 0, bits [7:4]. */
131 		a = read_l2(0);
132 		if (a < 0)
133 			return -1;
134 
135 		a &= 0xf0;
136 
137 		if ((a & 0x20) == 0)
138 			t = 0;
139 		else if (a == 0x20)
140 			t = 1;
141 		else if (a == 0x30)
142 			t = 2;
143 		else
144 			return -1;
145 
146 		printk(BIOS_DEBUG, "L2 latency type = %x\n", t);
147 
148 		/* Get CPUID family/model */
149 		signature = cpuid_eax(1) & 0xfff0;
150 
151 		/* Read EBL_CR_POWERON */
152 		msr = rdmsr(EBL_CR_POWERON);
153 		/* Get clock multiplier and FSB frequency.
154 		 * Multiplier is in [25:22].
155 		 * FSB is in [19:18] in Katmai, [19] in Deschutes ([18] is zero
156 		 * for them).
157 		 */
158 		eax = msr.lo >> 18;
159 		if (signature == 0x650) {
160 			eax &= ~0xf2;
161 			latency_table = latency_650[t];
162 		} else if (signature == 0x670) {
163 			eax &= ~0xf3;
164 			latency_table = latency_670[t];
165 		} else
166 			return -1;
167 
168 		/* Search table for matching entry */
169 		for (le = latency_table; le->key != eax; le++) {
170 			/* Fail if we get to the end of the table */
171 			if (le->key == 0xff) {
172 				printk(BIOS_DEBUG,
173 				   "Could not find key %02x in latency table\n",
174 				   eax);
175 				return -1;
176 			}
177 		}
178 
179 		l = le->value;
180 	}
181 
182 	printk(BIOS_DEBUG, "L2 Cache latency is %d\n", l / 2);
183 
184 	/* Writes the calculated latency in BBL_CR_CTL3[4:1]. */
185 	msr = rdmsr(BBL_CR_CTL3);
186 	msr.lo &= 0xffffffe1;
187 	msr.lo |= l;
188 	wrmsr(BBL_CR_CTL3, msr);
189 
190 	return 0;
191 }
192 
193 /* Setup address, data_high:data_low into the L2
194  * control registers and then issue command with correct cache way
195  */
signal_l2(u32 address,u32 data_high,u32 data_low,int way,u8 command)196 int signal_l2(u32 address, u32 data_high, u32 data_low, int way, u8 command)
197 {
198 	int i;
199 	msr_t msr;
200 
201 	/* Write L2 Address to BBL_CR_ADDR */
202 	msr.lo = address;
203 	msr.hi = 0;
204 	wrmsr(BBL_CR_ADDR, msr);
205 
206 	/* Write data to BBL_CR_D{0..3} */
207 	msr.lo = data_low;
208 	msr.hi = data_high;
209 	for (i = BBL_CR_D0; i <= BBL_CR_D3; i++)
210 		wrmsr(i, msr);
211 
212 	/* Put the command and way into BBL_CR_CTL */
213 	msr = rdmsr(BBL_CR_CTL);
214 	msr.lo = (msr.lo & 0xfffffce0) | command | (way << 8);
215 	wrmsr(BBL_CR_CTL, msr);
216 
217 	/* Trigger L2 controller */
218 	msr.lo = 0;
219 	msr.hi = 0;
220 	wrmsr(BBL_CR_TRIG, msr);
221 
222 	/* Poll the controller to see when done */
223 	for (i = 0; i < 0x100; i++) {
224 		/* Read BBL_CR_BUSY */
225 		msr = rdmsr(BBL_CR_BUSY);
226 		/* If not busy then return */
227 		if ((msr.lo & 1) == 0)
228 			return 0;
229 	}
230 
231 	/* Return timeout code */
232 	return -1;
233 }
234 
235 /* Read the L2 Cache controller register at given address */
read_l2(u32 address)236 int read_l2(u32 address)
237 {
238 	msr_t msr;
239 
240 	/* Send a L2 Control Register Read to L2 controller */
241 	if (signal_l2(address << 5, 0, 0, 0, L2CMD_CR) != 0)
242 		return -1;
243 
244 	/* If OK then get the result from BBL_CR_ADDR */
245 	msr = rdmsr(BBL_CR_ADDR);
246 	return (msr.lo >> 0x15);
247 }
248 
249 /* Write data into the L2 controller register at address */
write_l2(u32 address,u32 data)250 int write_l2(u32 address, u32 data)
251 {
252 	int v1, v2, i;
253 
254 	v1 = read_l2(0);
255 	if (v1 < 0)
256 		return -1;
257 
258 	v2 = read_l2(2);
259 	if (v2 < 0)
260 		return -1;
261 
262 	if ((v1 & 0x20) == 0) {
263 		v2 &= 0x3;
264 		v2++;
265 	} else
266 		v2 &= 0x7;
267 
268 	/* This write has to be replicated to a number of places. Not sure what.
269 	 */
270 
271 	for (i = 0; i < v2; i++) {
272 		u32 data1, data2;
273 		// Bits legend
274 		// data1   = ffffffff
275 		// data2   = 000000dc
276 		// address = 00aaaaaa
277 		// Final address signaled:
278 		// 000fffff fff000c0 000dcaaa aaa00000
279 		data1 = data & 0xff;
280 		data1 = data1 << 21;
281 		data2 = (i << 11) & 0x1800;
282 		data1 |= data2;
283 		data2 <<= 6;
284 		data2 &= 0x20000;
285 		data1 |= data2;
286 
287 		/* Signal L2 controller */
288 		if (signal_l2((address << 5) | data1, 0, 0, 0, 3))
289 			return -1;
290 	}
291 	return 0;
292 }
293 
294 /* Write data_high:data_low into the cache at address1. Test address2
295  * to see if the same data is returned. Return 0 if the data matches.
296  * return lower 16 bits if mismatched data if mismatch. Return -1
297  * on error
298  */
test_l2_address_alias(u32 address1,u32 address2,u32 data_high,u32 data_low)299 int test_l2_address_alias(u32 address1, u32 address2,
300 				 u32 data_high, u32 data_low)
301 {
302 	int d;
303 	msr_t msr;
304 
305 	/* Tag Write with Data Write for L2 */
306 	if (signal_l2(address1, data_high, data_low, 0, L2CMD_TWW))
307 		return -1;
308 
309 	/* Tag Read with Data Read for L2 */
310 	if (signal_l2(address2, 0, 0, 0, L2CMD_TRR))
311 		return -1;
312 
313 	/* Read data from BBL_CR_D[0-3] */
314 	for (d = BBL_CR_D0; d <= BBL_CR_D3; d++) {
315 		msr = rdmsr(d);
316 		if (msr.lo != data_low || msr.hi != data_high)
317 			return (msr.lo & 0xffff);
318 	}
319 
320 	return 0;
321 }
322 
323 /* Calculates the L2 cache size.
324  *
325  * Reference: Intel(R) 64 and IA-32 Architectures Software Developer's Manual
326  *            Volume 3B: System Programming Guide, Part 2, Intel pub. 253669,
327  *            pg. B-172.
328  *
329  */
calculate_l2_cache_size(void)330 int calculate_l2_cache_size(void)
331 {
332 	int v;
333 	msr_t msr;
334 	u32 cache_setting;
335 	u32 address, size, eax, bblcr3;
336 
337 	v = read_l2(0);
338 	if (v < 0)
339 		return -1;
340 	if ((v & 0x20) == 0) {
341 		msr = rdmsr(BBL_CR_CTL3);
342 		bblcr3 = msr.lo & ~BBLCR3_L2_SIZE;
343 		/*
344 		 * Successively write in all the possible cache size per bank
345 		 * into BBL_CR_CTL3[17:13], starting from 256KB (00001) to 4MB
346 		 * (10000), and read the last value written and accepted by the
347 		 * cache.
348 		 *
349 		 * No idea why these bits are writable at all.
350 		 */
351 		for (cache_setting = BBLCR3_L2_SIZE_256K;
352 		     cache_setting <= BBLCR3_L2_SIZE_4M; cache_setting <<= 1) {
353 			eax = bblcr3 | cache_setting;
354 			msr.lo = eax;
355 			wrmsr(BBL_CR_CTL3, msr);
356 			msr = rdmsr(BBL_CR_CTL3);
357 
358 			/* Value not accepted */
359 			if (msr.lo != eax)
360 				break;
361 		}
362 
363 		/* Backtrack to the last value that worked... */
364 		cache_setting >>= 1;
365 
366 		/* and write it into BBL_CR_CTL3 */
367 		msr.lo &= ~BBLCR3_L2_SIZE;
368 		msr.lo |= (cache_setting & BBLCR3_L2_SIZE);
369 
370 		wrmsr(BBL_CR_CTL3, msr);
371 
372 		printk(BIOS_DEBUG, "Maximum cache mask is %x\n", cache_setting);
373 
374 		/* For now, BBL_CR_CTL3 has the highest cache "size" that
375 		 * register will accept. Now we'll ping the cache and see where
376 		 * it wraps.
377 		 */
378 
379 		/* Write aaaaaaaa:aaaaaaaa to address 0 in the l2 cache.
380 		 * If this "alias test" returns an "address", it means the
381 		 * cache cannot be written to properly, and we have a problem.
382 		 */
383 		v = test_l2_address_alias(0, 0, 0xaaaaaaaa, 0xaaaaaaaa);
384 		if (v != 0)
385 			return -1;
386 
387 		/* Start with 32K wrap point (256KB actually) */
388 		size = 1;
389 		address = 0x8000;
390 
391 		while (1) {
392 			v = test_l2_address_alias(address, 0, 0x55555555,
393 						  0x55555555);
394 			// Write failed.
395 			if (v < 0)
396 				return -1;
397 			// It wraps here.
398 			else if (v == 0)
399 				break;
400 
401 			size <<= 1;
402 			address <<= 1;
403 
404 			if (address > 0x200000)
405 				return -1;
406 		}
407 
408 		/* Mask size */
409 		size &= 0x3e;
410 
411 		/* Shift to [17:13] */
412 		size <<= 12;
413 
414 		/* Set this into BBL_CR_CTL3 */
415 		msr = rdmsr(BBL_CR_CTL3);
416 		msr.lo &= ~BBLCR3_L2_SIZE;
417 		msr.lo |= size;
418 		wrmsr(BBL_CR_CTL3, msr);
419 
420 		printk(BIOS_DEBUG, "L2 Cache Mask is %x\n", size);
421 
422 		/* Shift to [6:2] */
423 		size >>= 11;
424 
425 		v = read_l2(2);
426 
427 		if (v < 0)
428 			return -1;
429 
430 		printk(BIOS_DEBUG, "L2(2): %x ", v);
431 
432 		v &= 0x3;
433 
434 		/* Shift size right by v */
435 		size >>= v;
436 
437 		/* Or in this size */
438 		v |= size;
439 
440 		printk(BIOS_DEBUG, "-> %x\n", v);
441 
442 		if (write_l2(2, v) != 0)
443 			return -1;
444 	} else {
445 		// Some cache size information is available from L2 registers.
446 		// Work from there.
447 		int b, c;
448 
449 		v = read_l2(2);
450 
451 		printk(BIOS_DEBUG, "L2(2) = %x\n", v);
452 
453 		if (v < 0)
454 			return -1;
455 
456 		// L2 register 2 bitmap: cc---bbb
457 		b = v & 0x7;
458 		c = v >> 6;
459 
460 		v = 1 << c * b;
461 
462 		v &= 0xf;
463 
464 		printk(BIOS_DEBUG, "Calculated a = %x\n", v);
465 
466 		if (v == 0)
467 			return -1;
468 
469 		/* Shift to 17:14 */
470 		v <<= 14;
471 
472 		/* Write this size into BBL_CR_CTL3 */
473 		msr = rdmsr(BBL_CR_CTL3);
474 		msr.lo &= ~BBLCR3_L2_SIZE;
475 		msr.lo |= v;
476 		wrmsr(BBL_CR_CTL3, msr);
477 	}
478 
479 	return 0;
480 }
481 
482 // L2 physical address range can be found from L2 control register 3,
483 // bits [2:0].
calculate_l2_physical_address_range(void)484 int calculate_l2_physical_address_range(void)
485 {
486 	int r0, r3;
487 	msr_t msr;
488 
489 	r3 = read_l2(3);
490 	if (r3 < 0)
491 		return -1;
492 
493 	r0 = read_l2(0);
494 	if (r0 < 0)
495 		return -1;
496 
497 	if (r0 & 0x20)
498 		r3 = 0x7;
499 	else
500 		r3 &= 0x7;
501 
502 	printk(BIOS_DEBUG, "L2 Physical Address Range is %dM\n",
503 		(1 << r3) * 512);
504 
505 	/* Shift into [22:20] to be saved into BBL_CR_CTL3. */
506 	r3 = r3 << 20;
507 
508 	msr = rdmsr(BBL_CR_CTL3);
509 	msr.lo &= ~BBLCR3_L2_PHYSICAL_RANGE;
510 	msr.lo |= r3;
511 	wrmsr(BBL_CR_CTL3, msr);
512 
513 	return 0;
514 }
515 
set_l2_ecc(void)516 int set_l2_ecc(void)
517 {
518 	u32 eax;
519 	const u32 data1 = 0xaa55aa55;
520 	const u32 data2 = 0xaaaaaaaa;
521 	msr_t msr;
522 
523 	/* Set User Supplied ECC in BBL_CR_CTL */
524 	msr = rdmsr(BBL_CR_CTL);
525 	msr.lo |= BBLCR3_L2_SUPPLIED_ECC;
526 	wrmsr(BBL_CR_CTL, msr);
527 
528 	/* Write a value into the L2 Data ECC register BBL_CR_DECC */
529 	msr.lo = data1;
530 	msr.hi = 0;
531 	wrmsr(BBL_CR_DECC, msr);
532 
533 	if (test_l2_address_alias(0, 0, data2, data2) < 0)
534 		return -1;
535 
536 	/* Read back ECC from BBL_CR_DECC */
537 	msr = rdmsr(BBL_CR_DECC);
538 	eax = msr.lo;
539 
540 	if (eax == data1) {
541 		printk(BIOS_DEBUG, "L2 ECC Checking is enabled\n");
542 
543 		/* Set ECC Check Enable in BBL_CR_CTL3 */
544 		msr = rdmsr(BBL_CR_CTL3);
545 		msr.lo |= BBLCR3_L2_ECC_CHECK_ENABLE;
546 		wrmsr(BBL_CR_CTL3, msr);
547 	}
548 
549 	/* Clear User Supplied ECC in BBL_CR_CTL */
550 	msr = rdmsr(BBL_CR_CTL);
551 	msr.lo &= ~BBLCR3_L2_SUPPLIED_ECC;
552 	wrmsr(BBL_CR_CTL, msr);
553 
554 	return 0;
555 }
556 
557 /*
558  * This is the function called from CPU initialization
559  * driver to set up P6 family L2 cache.
560  */
561 
p6_configure_l2_cache(void)562 int p6_configure_l2_cache(void)
563 {
564 	msr_t msr, bblctl3;
565 	unsigned int eax;
566 	u16 signature;
567 	int cache_size, bank;
568 	int result, calc_eax;
569 	int v, a;
570 
571 	int badclk1, badclk2, clkratio;
572 	int crctl3_or;
573 
574 	printk(BIOS_INFO, "Configuring L2 cache... ");
575 
576 	/* Read BBL_CR_CTL3 */
577 	bblctl3 = rdmsr(BBL_CR_CTL3);
578 	/* If bit 23 (L2 Hardware disable) is set then done */
579 	/* These would be Covington core Celerons with no L2 cache */
580 	if (bblctl3.lo & BBLCR3_L2_NOT_PRESENT) {
581 		printk(BIOS_INFO, "hardware disabled\n");
582 		return 0;
583 	}
584 
585 	signature = cpuid_eax(1) & 0xfff0;
586 
587 	/* Klamath-specific bit settings for certain
588 	   preliminary checks.
589 	 */
590 	if (signature == 0x630) {
591 		clkratio = 0x1c00000;
592 		badclk2 = 0x1000000;
593 		crctl3_or = 0x44000;
594 	} else {
595 		clkratio = 0x3c00000;
596 		badclk2 = 0x3000000;
597 		crctl3_or = 0x40000;
598 	}
599 	badclk1 = 0xc00000;
600 
601 	/* Read EBL_CR_POWERON */
602 	msr = rdmsr(EBL_CR_POWERON);
603 	eax = msr.lo;
604 	/* Mask out [22-25] Clock frequency ratio */
605 	eax &= clkratio;
606 	if (eax == badclk1 || eax == badclk2) {
607 		printk(BIOS_ERR, "Incorrect clock frequency ratio %x\n", eax);
608 		return -1;
609 	}
610 
611 	disable_cache();
612 
613 	/* Mask out from BBL_CR_CTL3:
614 	 * [0] L2 Configured
615 	 * [5] ECC Check Enable
616 	 * [6] Address Parity Check Enable
617 	 * [7] CRTN Parity Check Enable
618 	 * [8] L2 Enabled
619 	 * [12:11] Number of L2 banks
620 	 * [17:13] Cache size per bank
621 	 * [18] (Set below)
622 	 * [22:20] L2 Physical Address Range Support
623 	 */
624 	bblctl3.lo &= 0xff88061e;
625 	/* Set:
626 	 * [17:13] = 00010 = 512Kbyte Cache size per bank (63x)
627 	 * [17:13] = 00000 = 128Kbyte Cache size per bank (all others)
628 	 * [18] Cache state error checking enable
629 	 */
630 	bblctl3.lo |= crctl3_or;
631 
632 	/* Write BBL_CR_CTL3 */
633 	wrmsr(BBL_CR_CTL3, bblctl3);
634 
635 	if (signature != 0x630) {
636 		eax = bblctl3.lo;
637 
638 		/* Set the l2 latency in BBL_CR_CTL3 */
639 		if (calculate_l2_latency() != 0)
640 			goto bad;
641 
642 		/* Read the new latency values back */
643 		bblctl3 = rdmsr(BBL_CR_CTL3);
644 		calc_eax = bblctl3.lo;
645 
646 		/* Write back the original default value */
647 		bblctl3.lo = eax;
648 		wrmsr(BBL_CR_CTL3, bblctl3);
649 
650 		/* Write BBL_CR_CTL3[27:26] (reserved??) to bits [1:0] of L2
651 		 * register 4.  Apparently all other bits must be preserved,
652 		 * hence these code.
653 		 */
654 
655 		v = (calc_eax >> 26) & 0x3;
656 
657 		printk(BIOS_DEBUG, "write_l2(4, %x)\n", v);
658 
659 		a = read_l2(4);
660 		if (a >= 0) {
661 			a &= 0xfffc;
662 			a |= v;
663 			a = write_l2(4, a);
664 			/* a now contains result code from write_l2() */
665 		}
666 		if (a != 0)
667 			goto bad;
668 
669 		/* Restore the correct latency value into BBL_CR_CTL3 */
670 		bblctl3.lo = calc_eax;
671 		wrmsr(BBL_CR_CTL3, bblctl3);
672 	} /* ! 63x CPU */
673 
674 	/* Read L2 register 0 */
675 	v = read_l2(0);
676 
677 	/* If L2(0)[5] set (and can be read properly), enable CRTN and address
678 	 * parity
679 	 */
680 	if (v >= 0 && (v & 0x20)) {
681 		bblctl3 = rdmsr(BBL_CR_CTL3);
682 		bblctl3.lo |= (BBLCR3_L2_ADDR_PARITY_ENABLE |
683 			       BBLCR3_L2_CRTN_PARITY_ENABLE);
684 		wrmsr(BBL_CR_CTL3, bblctl3);
685 	}
686 
687 	/* If something goes wrong at L2 ECC setup, cache ECC
688 	 * will just remain disabled.
689 	 */
690 	set_l2_ecc();
691 
692 	if (calculate_l2_physical_address_range() != 0) {
693 		printk(BIOS_ERR,
694 			"Failed to calculate L2 physical address range");
695 		goto bad;
696 	}
697 
698 	if (calculate_l2_cache_size() != 0) {
699 		printk(BIOS_ERR, "Failed to calculate L2 cache size");
700 		goto bad;
701 	}
702 
703 	/* Turn on cache. Only L1 is active at this time. */
704 	enable_cache();
705 
706 	/* Get the calculated cache size from BBL_CR_CTL3[17:13] */
707 	bblctl3 = rdmsr(BBL_CR_CTL3);
708 	cache_size = (bblctl3.lo & BBLCR3_L2_SIZE);
709 	if (cache_size == 0)
710 		cache_size = 0x1000;
711 	cache_size = cache_size << 3;
712 
713 	/* TODO: Cache size above is per bank. We're supposed to get
714 	 * the number of banks from BBL_CR_CTL3[12:11].
715 	 * Confirm that this still provides the correct answer.
716 	 */
717 	bank = (bblctl3.lo >> 11) & 0x3;
718 	if (bank == 0)
719 		bank = 1;
720 
721 	printk(BIOS_INFO, "size %dK... ", cache_size * bank * 4 / 1024);
722 
723 	/* Write to all cache lines to initialize */
724 
725 	while (cache_size > 0) {
726 		/* Each cache line is 32 bytes. */
727 		cache_size -= 32;
728 
729 		/* Update each way */
730 
731 		/* We're supposed to get L2 associativity from
732 		 * BBL_CR_CTL3[10:9].  But this code only applies to certain
733 		 * members of the P6 processor family and since all P6
734 		 * processors have 4-way L2 cache, we can safely assume
735 		 * 4 way for all cache operations.
736 		 */
737 
738 		for (v = 0; v < 4; v++) {
739 			/* Send Tag Write w/Data Write (TWW) to L2 controller
740 			 * MESI = Invalid
741 			 */
742 			if (signal_l2(cache_size, 0, 0, v, L2CMD_TWW
743 				| L2CMD_MESI_I) != 0) {
744 				printk(BIOS_ERR,
745 					"Failed on signal_l2(%x, %x)\n",
746 				       cache_size, v);
747 				goto bad;
748 			}
749 		}
750 	}
751 	printk(BIOS_DEBUG, "L2 Cache lines initialized\n");
752 
753 	/* Disable cache */
754 	disable_cache();
755 
756 	/* Set L2 cache configured in BBL_CR_CTL3 */
757 	bblctl3 = rdmsr(BBL_CR_CTL3);
758 	bblctl3.lo |= BBLCR3_L2_CONFIGURED;
759 	wrmsr(BBL_CR_CTL3, bblctl3);
760 
761 	/* Invalidate cache and discard unsaved writes */
762 	asm volatile ("invd");
763 
764 	/* Write 0 to L2 control register 5 */
765 	if (write_l2(5, 0) != 0) {
766 		printk(BIOS_ERR, "write_l2(5, 0) failed\n");
767 		goto done;
768 	}
769 
770 	bblctl3 = rdmsr(BBL_CR_CTL3);
771 	if (signature == 0x650) {
772 		/* Change the L2 latency to 0101 then back to
773 		 * original value. I don't know why this is needed - dpd
774 		 */
775 		eax = bblctl3.lo;
776 		bblctl3.lo &= ~BBLCR3_L2_LATENCY;
777 		bblctl3.lo |= 0x0a;
778 		wrmsr(BBL_CR_CTL3, bblctl3);
779 		bblctl3.lo = eax;
780 		wrmsr(BBL_CR_CTL3, bblctl3);
781 	}
782 
783 	/* Enable L2 in BBL_CR_CTL3 */
784 	bblctl3.lo |= BBLCR3_L2_ENABLED;
785 	wrmsr(BBL_CR_CTL3, bblctl3);
786 
787 	/* Turn on cache. Both L1 and L2 are now active. Wahoo! */
788 done:
789 	result = 0;
790 	goto out;
791 bad:
792 	result = -1;
793 out:
794 	printk(BIOS_INFO, "done.\n");
795 	return result;
796 }
797