xref: /aosp_15_r20/external/coreboot/src/northbridge/intel/ironlake/raminit.c (revision b9411a12aaaa7e1e6a6fb7c5e057f44ee179a49c)
1 /* SPDX-License-Identifier: GPL-2.0-or-later */
2 
3 #include <console/console.h>
4 #include <commonlib/helpers.h>
5 #include <string.h>
6 #include <arch/io.h>
7 #include <device/mmio.h>
8 #include <device/pci_ops.h>
9 #include <device/smbus_host.h>
10 #include <cpu/x86/msr.h>
11 #include <cpu/x86/cache.h>
12 #include <cbmem.h>
13 #include <cf9_reset.h>
14 #include <option.h>
15 #include <device/pci_def.h>
16 #include <device/device.h>
17 #include <halt.h>
18 #include <spd.h>
19 #include <timestamp.h>
20 #include <cpu/x86/mtrr.h>
21 #include <cpu/intel/speedstep.h>
22 #include <cpu/intel/turbo.h>
23 #include <mrc_cache.h>
24 #include <southbridge/intel/ibexpeak/me.h>
25 #include <southbridge/intel/common/pmbase.h>
26 #include <delay.h>
27 #include <types.h>
28 
29 #include "chip.h"
30 #include "ironlake.h"
31 #include "raminit.h"
32 #include "raminit_tables.h"
33 
34 #define NORTHBRIDGE PCI_DEV(0, 0, 0)
35 #define SOUTHBRIDGE PCI_DEV(0, 0x1f, 0)
36 #define GMA PCI_DEV(0, 0x2, 0x0)
37 
38 #define FOR_ALL_RANKS					   \
39   for (channel = 0; channel < NUM_CHANNELS; channel++)	   \
40     for (slot = 0; slot < NUM_SLOTS; slot++)		   \
41       for (rank = 0; rank < NUM_RANKS; rank++)
42 
43 #define FOR_POPULATED_RANKS				   \
44   for (channel = 0; channel < NUM_CHANNELS; channel++)	   \
45     for (slot = 0; slot < NUM_SLOTS; slot++)		   \
46       for (rank = 0; rank < NUM_RANKS; rank++)		   \
47 	if (info->populated_ranks[channel][slot][rank])
48 
49 #define FOR_POPULATED_RANKS_BACKWARDS				\
50   for (channel = NUM_CHANNELS - 1; channel >= 0; channel--)	\
51     for (slot = 0; slot < NUM_SLOTS; slot++)			\
52       for (rank = 0; rank < NUM_RANKS; rank++)			\
53 	if (info->populated_ranks[channel][slot][rank])
54 
55 #include <lib.h>		/* Prototypes */
56 
57 typedef struct _u128 {
58 	u64 lo;
59 	u64 hi;
60 } u128;
61 
read128(u32 addr,u64 * out)62 static void read128(u32 addr, u64 * out)
63 {
64 	u128 ret;
65 	u128 stor;
66 	asm volatile ("movdqu %%xmm0, %0\n"
67 		      "movdqa (%2), %%xmm0\n"
68 		      "movdqu %%xmm0, %1\n"
69 		      "movdqu %0, %%xmm0":"+m" (stor), "=m"(ret):"r"(addr));
70 	out[0] = ret.lo;
71 	out[1] = ret.hi;
72 }
73 
74 /*
75  * Ironlake memory I/O timings are located in scan chains, accessible
76  * through MCHBAR register groups. Each channel has a scan chain, and
77  * there's a global scan chain too. Each chain is broken into smaller
78  * sections of N bits, where N <= 32. Each section allows reading and
79  * writing a certain parameter. Each section contains N - 2 data bits
80  * and two additional bits: a Mask bit, and a Halt bit.
81  */
82 
83 /* OK */
write_1d0(u32 val,u16 addr,int bits,int flag)84 static void write_1d0(u32 val, u16 addr, int bits, int flag)
85 {
86 	mchbar_write32(0x1d0, 0);
87 	while (mchbar_read32(0x1d0) & (1 << 23))
88 		;
89 	mchbar_write32(0x1d4, (val & ((1 << bits) - 1)) | 2 << bits | flag << bits);
90 	mchbar_write32(0x1d0, 1 << 30 | addr);
91 	while (mchbar_read32(0x1d0) & (1 << 23))
92 		;
93 }
94 
95 /* OK */
read_1d0(u16 addr,int split)96 static u16 read_1d0(u16 addr, int split)
97 {
98 	u32 val;
99 	mchbar_write32(0x1d0, 0);
100 	while (mchbar_read32(0x1d0) & (1 << 23))
101 		;
102 	mchbar_write32(0x1d0, 1 << 31 | (((mchbar_read8(0x246) >> 2) & 3) + 0x361 - addr));
103 	while (mchbar_read32(0x1d0) & (1 << 23))
104 		;
105 	val = mchbar_read32(0x1d8);
106 	write_1d0(0, 0x33d, 0, 0);
107 	write_1d0(0, 0x33d, 0, 0);
108 	val &= ((1 << split) - 1);
109 	//  printk (BIOS_ERR, "R1D0C [%x] => %x\n", addr, val);
110 	return val;
111 }
112 
sfence(void)113 static void sfence(void)
114 {
115 	asm volatile ("sfence");
116 }
117 
get_lane_offset(int slot,int rank,int lane)118 static inline u16 get_lane_offset(int slot, int rank, int lane)
119 {
120 	return 0x124 * lane + ((lane & 4) ? 0x23e : 0) + 11 * rank + 22 * slot -
121 	    0x452 * (lane == 8);
122 }
123 
get_timing_register_addr(int lane,int tm,int slot,int rank)124 static inline u16 get_timing_register_addr(int lane, int tm, int slot, int rank)
125 {
126 	const u16 offs[] = { 0x1d, 0xa8, 0xe6, 0x5c };
127 	return get_lane_offset(slot, rank, lane) + offs[(tm + 3) % 4];
128 }
129 
gav_real(int line,u32 in)130 static u32 gav_real(int line, u32 in)
131 {
132 	//  printk (BIOS_DEBUG, "%d: GAV: %x\n", line, in);
133 	return in;
134 }
135 
136 #define gav(x) gav_real(__LINE__, (x))
137 
138 /* Global allocation of timings_car */
139 timing_bounds_t timings_car[64];
140 
141 /* OK */
142 static u16
read_500(struct raminfo * info,int channel,u16 addr,int split)143 read_500(struct raminfo *info, int channel, u16 addr, int split)
144 {
145 	u32 val;
146 	info->last_500_command[channel] = 1 << 31;
147 	mchbar_write32(0x500 + (channel << 10), 0);
148 	while (mchbar_read32(0x500 + (channel << 10)) & (1 << 23))
149 		;
150 	mchbar_write32(0x500 + (channel << 10),
151 		1 << 31 | (((mchbar_read8(0x246 + (channel << 10)) >> 2) & 3) + 0xb88 - addr));
152 	while (mchbar_read32(0x500 + (channel << 10)) & (1 << 23))
153 		;
154 	val = mchbar_read32(0x508 + (channel << 10));
155 	return val & ((1 << split) - 1);
156 }
157 
158 /* OK */
159 static void
write_500(struct raminfo * info,int channel,u32 val,u16 addr,int bits,int flag)160 write_500(struct raminfo *info, int channel, u32 val, u16 addr, int bits,
161 	  int flag)
162 {
163 	if (info->last_500_command[channel] == 1 << 31) {
164 		info->last_500_command[channel] = 1 << 30;
165 		write_500(info, channel, 0, 0xb61, 0, 0);
166 	}
167 	mchbar_write32(0x500 + (channel << 10), 0);
168 	while (mchbar_read32(0x500 + (channel << 10)) & (1 << 23))
169 		;
170 	mchbar_write32(0x504 + (channel << 10),
171 		(val & ((1 << bits) - 1)) | 2 << bits | flag << bits);
172 	mchbar_write32(0x500 + (channel << 10), 1 << 30 | addr);
173 	while (mchbar_read32(0x500 + (channel << 10)) & (1 << 23))
174 		;
175 }
176 
rmw_500(struct raminfo * info,int channel,u16 addr,int bits,u32 and,u32 or)177 static void rmw_500(struct raminfo *info, int channel, u16 addr, int bits, u32 and, u32 or)
178 {
179 	const u32 val = read_500(info, channel, addr, bits) & and;
180 	write_500(info, channel, val | or, addr, bits, 1);
181 }
182 
rw_test(int rank)183 static int rw_test(int rank)
184 {
185 	const u32 mask = 0xf00fc33c;
186 	int ok = 0xff;
187 	int i;
188 	for (i = 0; i < 64; i++)
189 		write32p((rank << 28) | (i << 2), 0);
190 	sfence();
191 	for (i = 0; i < 64; i++)
192 		gav(read32p((rank << 28) | (i << 2)));
193 	sfence();
194 	for (i = 0; i < 32; i++) {
195 		u32 pat = (((mask >> i) & 1) ? 0xffffffff : 0);
196 		write32p((rank << 28) | (i << 3), pat);
197 		write32p((rank << 28) | (i << 3) | 4, pat);
198 	}
199 	sfence();
200 	for (i = 0; i < 32; i++) {
201 		u8 pat = (((mask >> i) & 1) ? 0xff : 0);
202 		int j;
203 		u32 val;
204 		gav(val = read32p((rank << 28) | (i << 3)));
205 		for (j = 0; j < 4; j++)
206 			if (((val >> (j * 8)) & 0xff) != pat)
207 				ok &= ~(1 << j);
208 		gav(val = read32p((rank << 28) | (i << 3) | 4));
209 		for (j = 0; j < 4; j++)
210 			if (((val >> (j * 8)) & 0xff) != pat)
211 				ok &= ~(16 << j);
212 	}
213 	sfence();
214 	for (i = 0; i < 64; i++)
215 		write32p((rank << 28) | (i << 2), 0);
216 	sfence();
217 	for (i = 0; i < 64; i++)
218 		gav(read32p((rank << 28) | (i << 2)));
219 
220 	return ok;
221 }
222 
223 static void
program_timings(struct raminfo * info,u16 base,int channel,int slot,int rank)224 program_timings(struct raminfo *info, u16 base, int channel, int slot, int rank)
225 {
226 	int lane;
227 	for (lane = 0; lane < 8; lane++) {
228 		write_500(info, channel,
229 			  base +
230 			  info->training.
231 			  lane_timings[2][channel][slot][rank][lane],
232 			  get_timing_register_addr(lane, 2, slot, rank), 9, 0);
233 		write_500(info, channel,
234 			  base +
235 			  info->training.
236 			  lane_timings[3][channel][slot][rank][lane],
237 			  get_timing_register_addr(lane, 3, slot, rank), 9, 0);
238 	}
239 }
240 
write_26c(int channel,u16 si)241 static void write_26c(int channel, u16 si)
242 {
243 	mchbar_write32(0x26c + (channel << 10), 0x03243f35);
244 	mchbar_write32(0x268 + (channel << 10), 0xcfc00000 | si << 9);
245 	mchbar_write16(0x2b9 + (channel << 10), si);
246 }
247 
toggle_1d0_142_5ff(void)248 static void toggle_1d0_142_5ff(void)
249 {
250 	u32 reg32 = gav(read_1d0(0x142, 3));
251 	if (reg32 & (1 << 1))
252 		write_1d0(0, 0x142, 3, 1);
253 
254 	mchbar_write8(0x5ff, 0);
255 	mchbar_write8(0x5ff, 1 << 7);
256 	if (reg32 & (1 << 1))
257 		write_1d0(0x2, 0x142, 3, 1);
258 }
259 
get_580(int channel,u8 addr)260 static u32 get_580(int channel, u8 addr)
261 {
262 	u32 ret;
263 	toggle_1d0_142_5ff();
264 	mchbar_write32(0x580 + (channel << 10), 0x8493c012 | addr);
265 	mchbar_setbits8(0x580 + (channel << 10), 1 << 0);
266 	while (!((ret = mchbar_read32(0x580 + (channel << 10))) & (1 << 16)))
267 		;
268 	mchbar_clrbits8(0x580 + (channel << 10), 1 << 0);
269 	return ret;
270 }
271 
272 #define RANK_SHIFT 28
273 #define CHANNEL_SHIFT 10
274 
seq9(struct raminfo * info,int channel,int slot,int rank)275 static void seq9(struct raminfo *info, int channel, int slot, int rank)
276 {
277 	int i, lane;
278 
279 	for (i = 0; i < 2; i++)
280 		for (lane = 0; lane < 8; lane++)
281 			write_500(info, channel,
282 				  info->training.lane_timings[i +
283 							      1][channel][slot]
284 				  [rank][lane], get_timing_register_addr(lane,
285 									 i + 1,
286 									 slot,
287 									 rank),
288 				  9, 0);
289 
290 	write_1d0(1, 0x103, 6, 1);
291 	for (lane = 0; lane < 8; lane++)
292 		write_500(info, channel,
293 			  info->training.
294 			  lane_timings[0][channel][slot][rank][lane],
295 			  get_timing_register_addr(lane, 0, slot, rank), 9, 0);
296 
297 	for (i = 0; i < 2; i++) {
298 		for (lane = 0; lane < 8; lane++)
299 			write_500(info, channel,
300 				  info->training.lane_timings[i +
301 							      1][channel][slot]
302 				  [rank][lane], get_timing_register_addr(lane,
303 									 i + 1,
304 									 slot,
305 									 rank),
306 				  9, 0);
307 		gav(get_580(channel, ((i + 1) << 2) | (rank << 5)));
308 	}
309 
310 	toggle_1d0_142_5ff();
311 	write_1d0(0x2, 0x142, 3, 1);
312 
313 	for (lane = 0; lane < 8; lane++) {
314 		//      printk (BIOS_ERR, "before: %x\n", info->training.lane_timings[2][channel][slot][rank][lane]);
315 		info->training.lane_timings[2][channel][slot][rank][lane] =
316 		    read_500(info, channel,
317 			     get_timing_register_addr(lane, 2, slot, rank), 9);
318 		//printk (BIOS_ERR, "after: %x\n", info->training.lane_timings[2][channel][slot][rank][lane]);
319 		info->training.lane_timings[3][channel][slot][rank][lane] =
320 		    info->training.lane_timings[2][channel][slot][rank][lane] +
321 		    0x20;
322 	}
323 }
324 
count_ranks_in_channel(struct raminfo * info,int channel)325 static int count_ranks_in_channel(struct raminfo *info, int channel)
326 {
327 	int slot, rank;
328 	int res = 0;
329 	for (slot = 0; slot < NUM_SLOTS; slot++)
330 		for (rank = 0; rank < NUM_SLOTS; rank++)
331 			res += info->populated_ranks[channel][slot][rank];
332 	return res;
333 }
334 
335 static void
config_rank(struct raminfo * info,int s3resume,int channel,int slot,int rank)336 config_rank(struct raminfo *info, int s3resume, int channel, int slot, int rank)
337 {
338 	int add;
339 
340 	write_1d0(0, 0x178, 7, 1);
341 	seq9(info, channel, slot, rank);
342 	program_timings(info, 0x80, channel, slot, rank);
343 
344 	if (channel == 0)
345 		add = count_ranks_in_channel(info, 1);
346 	else
347 		add = 0;
348 	if (!s3resume)
349 		gav(rw_test(rank + add));
350 	program_timings(info, 0x00, channel, slot, rank);
351 	if (!s3resume)
352 		gav(rw_test(rank + add));
353 	if (!s3resume)
354 		gav(rw_test(rank + add));
355 	write_1d0(0, 0x142, 3, 1);
356 	write_1d0(0, 0x103, 6, 1);
357 
358 	gav(get_580(channel, 0xc | (rank << 5)));
359 	gav(read_1d0(0x142, 3));
360 
361 	mchbar_write8(0x5ff, 0);
362 	mchbar_write8(0x5ff, 1 << 7);
363 }
364 
set_4cf(struct raminfo * info,int channel,u8 bit,u8 val)365 static void set_4cf(struct raminfo *info, int channel, u8 bit, u8 val)
366 {
367 	const u16 regtable[] = { 0x4cf, 0x659, 0x697 };
368 
369 	val &= 1;
370 	for (int i = 0; i < ARRAY_SIZE(regtable); i++)
371 		rmw_500(info, channel, regtable[i], 4, ~(1 << bit), val << bit);
372 }
373 
set_334(int zero)374 static void set_334(int zero)
375 {
376 	int j, k, channel;
377 	const u32 val3[] = { 0x2a2b2a2b, 0x26272627, 0x2e2f2e2f, 0x2a2b };
378 	u32 vd8[2][16];
379 
380 	for (channel = 0; channel < NUM_CHANNELS; channel++) {
381 		for (j = 0; j < 4; j++) {
382 			u32 a = (j == 1) ? 0x29292929 : 0x31313131;
383 			u32 lmask = (j == 3) ? 0xffff : 0xffffffff;
384 			u16 c;
385 			if ((j == 0 || j == 3) && zero)
386 				c = 0;
387 			else if (j == 3)
388 				c = 0x5f;
389 			else
390 				c = 0x5f5f;
391 
392 			for (k = 0; k < 2; k++) {
393 				mchbar_write32(0x138 + 8 * k, channel << 26 | j << 24);
394 				gav(vd8[1][(channel << 3) | (j << 1) | k] =
395 				    mchbar_read32(0x138 + 8 * k));
396 				gav(vd8[0][(channel << 3) | (j << 1) | k] =
397 				    mchbar_read32(0x13c + 8 * k));
398 			}
399 
400 			mchbar_write32(0x334 + (channel << 10) + j * 0x44, zero ? 0 : val3[j]);
401 			mchbar_write32(0x32c + (channel << 10) + j * 0x44,
402 					zero ? 0 : 0x18191819 & lmask);
403 			mchbar_write16(0x34a + (channel << 10) + j * 0x44, c);
404 			mchbar_write32(0x33c + (channel << 10) + j * 0x44,
405 					zero ? 0 : a & lmask);
406 			mchbar_write32(0x344 + (channel << 10) + j * 0x44,
407 					zero ? 0 : a & lmask);
408 		}
409 	}
410 
411 	mchbar_setbits32(0x130, 1 << 0);
412 	while (mchbar_read8(0x130) & 1)
413 		;
414 }
415 
rmw_1d0(u16 addr,u32 and,u32 or,int split)416 static void rmw_1d0(u16 addr, u32 and, u32 or, int split)
417 {
418 	u32 v;
419 	v = read_1d0(addr, split);
420 	write_1d0((v & and) | or, addr, split, 1);
421 }
422 
find_highest_bit_set(u16 val)423 static int find_highest_bit_set(u16 val)
424 {
425 	int i;
426 	for (i = 15; i >= 0; i--)
427 		if (val & (1 << i))
428 			return i;
429 	return -1;
430 }
431 
find_lowest_bit_set32(u32 val)432 static int find_lowest_bit_set32(u32 val)
433 {
434 	int i;
435 	for (i = 0; i < 32; i++)
436 		if (val & (1 << i))
437 			return i;
438 	return -1;
439 }
440 
441 enum {
442 	DEVICE_TYPE = 2,
443 	MODULE_TYPE = 3,
444 	DENSITY = 4,
445 	RANKS_AND_DQ = 7,
446 	MEMORY_BUS_WIDTH = 8,
447 	TIMEBASE_DIVIDEND = 10,
448 	TIMEBASE_DIVISOR = 11,
449 	CYCLETIME = 12,
450 
451 	CAS_LATENCIES_LSB = 14,
452 	CAS_LATENCIES_MSB = 15,
453 	CAS_LATENCY_TIME = 16,
454 	THERMAL_AND_REFRESH = 31,
455 	REFERENCE_RAW_CARD_USED = 62,
456 	RANK1_ADDRESS_MAPPING = 63
457 };
458 
calculate_timings(struct raminfo * info)459 static void calculate_timings(struct raminfo *info)
460 {
461 	unsigned int cycletime;
462 	unsigned int cas_latency_time;
463 	unsigned int supported_cas_latencies;
464 	unsigned int channel, slot;
465 	unsigned int clock_speed_index;
466 	unsigned int min_cas_latency;
467 	unsigned int cas_latency;
468 	unsigned int max_clock_index;
469 
470 	/* Find common CAS latency  */
471 	supported_cas_latencies = 0x3fe;
472 	for (channel = 0; channel < NUM_CHANNELS; channel++)
473 		for (slot = 0; slot < NUM_SLOTS; slot++)
474 			if (info->populated_ranks[channel][slot][0])
475 				supported_cas_latencies &=
476 				    2 *
477 				    (info->
478 				     spd[channel][slot][CAS_LATENCIES_LSB] |
479 				     (info->
480 				      spd[channel][slot][CAS_LATENCIES_MSB] <<
481 				      8));
482 
483 	max_clock_index = MIN(3, info->max_supported_clock_speed_index);
484 
485 	cycletime = min_cycletime[max_clock_index];
486 	cas_latency_time = min_cas_latency_time[max_clock_index];
487 
488 	for (channel = 0; channel < NUM_CHANNELS; channel++)
489 		for (slot = 0; slot < NUM_SLOTS; slot++)
490 			if (info->populated_ranks[channel][slot][0]) {
491 				unsigned int timebase;
492 				timebase =
493 				    1000 *
494 				    info->
495 				    spd[channel][slot][TIMEBASE_DIVIDEND] /
496 				    info->spd[channel][slot][TIMEBASE_DIVISOR];
497 				cycletime =
498 				    MAX(cycletime,
499 					timebase *
500 					info->spd[channel][slot][CYCLETIME]);
501 				cas_latency_time =
502 				    MAX(cas_latency_time,
503 					timebase *
504 					info->
505 					spd[channel][slot][CAS_LATENCY_TIME]);
506 			}
507 	if (cycletime > min_cycletime[0])
508 		die("RAM init: Decoded SPD DRAM freq is slower than the controller minimum!");
509 	for (clock_speed_index = 0; clock_speed_index < 3; clock_speed_index++) {
510 		if (cycletime == min_cycletime[clock_speed_index])
511 			break;
512 		if (cycletime > min_cycletime[clock_speed_index]) {
513 			clock_speed_index--;
514 			cycletime = min_cycletime[clock_speed_index];
515 			break;
516 		}
517 	}
518 	min_cas_latency = DIV_ROUND_UP(cas_latency_time, cycletime);
519 	cas_latency = 0;
520 	while (supported_cas_latencies) {
521 		cas_latency = find_highest_bit_set(supported_cas_latencies) + 3;
522 		if (cas_latency <= min_cas_latency)
523 			break;
524 		supported_cas_latencies &=
525 		    ~(1 << find_highest_bit_set(supported_cas_latencies));
526 	}
527 
528 	if (cas_latency != min_cas_latency && clock_speed_index)
529 		clock_speed_index--;
530 
531 	if (cas_latency * min_cycletime[clock_speed_index] > 20000)
532 		die("Couldn't configure DRAM");
533 	info->clock_speed_index = clock_speed_index;
534 	info->cas_latency = cas_latency;
535 }
536 
program_base_timings(struct raminfo * info)537 static void program_base_timings(struct raminfo *info)
538 {
539 	unsigned int channel;
540 	unsigned int slot, rank, lane;
541 	unsigned int extended_silicon_revision;
542 	int i;
543 
544 	extended_silicon_revision = info->silicon_revision;
545 	if (info->silicon_revision == 0)
546 		for (channel = 0; channel < NUM_CHANNELS; channel++)
547 			for (slot = 0; slot < NUM_SLOTS; slot++)
548 				if ((info->
549 				     spd[channel][slot][MODULE_TYPE] & 0xF) ==
550 				    3)
551 					extended_silicon_revision = 4;
552 
553 	for (channel = 0; channel < NUM_CHANNELS; channel++) {
554 		for (slot = 0; slot < NUM_SLOTS; slot++)
555 			for (rank = 0; rank < NUM_SLOTS; rank++) {
556 				int card_timing_2;
557 				if (!info->populated_ranks[channel][slot][rank])
558 					continue;
559 
560 				for (lane = 0; lane < 9; lane++) {
561 					int tm_reg;
562 					int card_timing;
563 
564 					card_timing = 0;
565 					if ((info->
566 					     spd[channel][slot][MODULE_TYPE] &
567 					     0xF) == 3) {
568 						int reference_card;
569 						reference_card =
570 						    info->
571 						    spd[channel][slot]
572 						    [REFERENCE_RAW_CARD_USED] &
573 						    0x1f;
574 						if (reference_card == 3)
575 							card_timing =
576 							    u16_ffd1188[0][lane]
577 							    [info->
578 							     clock_speed_index];
579 						if (reference_card == 5)
580 							card_timing =
581 							    u16_ffd1188[1][lane]
582 							    [info->
583 							     clock_speed_index];
584 					}
585 
586 					info->training.
587 					    lane_timings[0][channel][slot][rank]
588 					    [lane] =
589 					    u8_FFFD1218[info->
590 							clock_speed_index];
591 					info->training.
592 					    lane_timings[1][channel][slot][rank]
593 					    [lane] = 256;
594 
595 					for (tm_reg = 2; tm_reg < 4; tm_reg++)
596 						info->training.
597 						    lane_timings[tm_reg]
598 						    [channel][slot][rank][lane]
599 						    =
600 						    u8_FFFD1240[channel]
601 						    [extended_silicon_revision]
602 						    [lane][2 * slot +
603 							   rank][info->
604 								 clock_speed_index]
605 						    + info->max4048[channel]
606 						    +
607 						    u8_FFFD0C78[channel]
608 						    [extended_silicon_revision]
609 						    [info->
610 						     mode4030[channel]][slot]
611 						    [rank][info->
612 							   clock_speed_index]
613 						    + card_timing;
614 					for (tm_reg = 0; tm_reg < 4; tm_reg++)
615 						write_500(info, channel,
616 							  info->training.
617 							  lane_timings[tm_reg]
618 							  [channel][slot][rank]
619 							  [lane],
620 							  get_timing_register_addr
621 							  (lane, tm_reg, slot,
622 							   rank), 9, 0);
623 				}
624 
625 				card_timing_2 = 0;
626 				if (!(extended_silicon_revision != 4
627 				      || (info->
628 					  populated_ranks_mask[channel] & 5) ==
629 				      5)) {
630 					if ((info->
631 					     spd[channel][slot]
632 					     [REFERENCE_RAW_CARD_USED] & 0x1F)
633 					    == 3)
634 						card_timing_2 =
635 						    u16_FFFE0EB8[0][info->
636 								    clock_speed_index];
637 					if ((info->
638 					     spd[channel][slot]
639 					     [REFERENCE_RAW_CARD_USED] & 0x1F)
640 					    == 5)
641 						card_timing_2 =
642 						    u16_FFFE0EB8[1][info->
643 								    clock_speed_index];
644 				}
645 
646 				for (i = 0; i < 3; i++)
647 					write_500(info, channel,
648 						  (card_timing_2 +
649 						   info->max4048[channel]
650 						   +
651 						   u8_FFFD0EF8[channel]
652 						   [extended_silicon_revision]
653 						   [info->
654 						    mode4030[channel]][info->
655 								       clock_speed_index]),
656 						  u16_fffd0c50[i][slot][rank],
657 						  8, 1);
658 				write_500(info, channel,
659 					  (info->max4048[channel] +
660 					   u8_FFFD0C78[channel]
661 					   [extended_silicon_revision][info->
662 								       mode4030
663 								       [channel]]
664 					   [slot][rank][info->
665 							clock_speed_index]),
666 					  u16_fffd0c70[slot][rank], 7, 1);
667 			}
668 		if (!info->populated_ranks_mask[channel])
669 			continue;
670 		for (i = 0; i < 3; i++)
671 			write_500(info, channel,
672 				  (info->max4048[channel] +
673 				   info->avg4044[channel]
674 				   +
675 				   u8_FFFD17E0[channel]
676 				   [extended_silicon_revision][info->
677 							       mode4030
678 							       [channel]][info->
679 									  clock_speed_index]),
680 				  u16_fffd0c68[i], 8, 1);
681 	}
682 }
683 
684 /* The time of clock cycle in ps.  */
cycle_ps(struct raminfo * info)685 static unsigned int cycle_ps(struct raminfo *info)
686 {
687 	return 2 * halfcycle_ps(info);
688 }
689 
690 /* Frequency in 0.1 MHz units. */
frequency_01(struct raminfo * info)691 static unsigned int frequency_01(struct raminfo *info)
692 {
693 	return 100 * frequency_11(info) / 9;
694 }
695 
ps_to_halfcycles(struct raminfo * info,unsigned int ps)696 static unsigned int ps_to_halfcycles(struct raminfo *info, unsigned int ps)
697 {
698 	return (frequency_11(info) * 2) * ps / 900000;
699 }
700 
ns_to_cycles(struct raminfo * info,unsigned int ns)701 static unsigned int ns_to_cycles(struct raminfo *info, unsigned int ns)
702 {
703 	return (frequency_11(info)) * ns / 900;
704 }
705 
compute_derived_timings(struct raminfo * info)706 static void compute_derived_timings(struct raminfo *info)
707 {
708 	unsigned int channel, slot, rank;
709 	int extended_silicon_revision;
710 	int some_delay_1_ps;
711 	int some_delay_2_ps;
712 	int some_delay_2_halfcycles_ceil;
713 	int some_delay_2_halfcycles_floor;
714 	int some_delay_3_ps;
715 	int some_delay_3_ps_rounded;
716 	int some_delay_1_cycle_ceil;
717 	int some_delay_1_cycle_floor;
718 
719 	some_delay_3_ps_rounded = 0;
720 	extended_silicon_revision = info->silicon_revision;
721 	if (!info->silicon_revision)
722 		for (channel = 0; channel < NUM_CHANNELS; channel++)
723 			for (slot = 0; slot < NUM_SLOTS; slot++)
724 				if ((info->
725 				     spd[channel][slot][MODULE_TYPE] & 0xF) ==
726 				    3)
727 					extended_silicon_revision = 4;
728 	if (info->board_lane_delay[7] < 5)
729 		info->board_lane_delay[7] = 5;
730 	info->revision_flag_1 = 2;
731 	if (info->silicon_revision == 2 || info->silicon_revision == 3)
732 		info->revision_flag_1 = 0;
733 	if (info->revision < 16)
734 		info->revision_flag_1 = 0;
735 
736 	if (info->revision < 8)
737 		info->revision_flag_1 = 0;
738 	if (info->revision >= 8 && (info->silicon_revision == 0
739 				    || info->silicon_revision == 1))
740 		some_delay_2_ps = 735;
741 	else
742 		some_delay_2_ps = 750;
743 
744 	if (info->revision >= 0x10 && (info->silicon_revision == 0
745 				       || info->silicon_revision == 1))
746 		some_delay_1_ps = 3929;
747 	else
748 		some_delay_1_ps = 3490;
749 
750 	some_delay_1_cycle_floor = some_delay_1_ps / cycle_ps(info);
751 	some_delay_1_cycle_ceil = some_delay_1_ps / cycle_ps(info);
752 	if (some_delay_1_ps % cycle_ps(info))
753 		some_delay_1_cycle_ceil++;
754 	else
755 		some_delay_1_cycle_floor--;
756 	info->some_delay_1_cycle_floor = some_delay_1_cycle_floor;
757 	if (info->revision_flag_1)
758 		some_delay_2_ps = halfcycle_ps(info) >> 6;
759 	some_delay_2_ps +=
760 	    MAX(some_delay_1_ps - 30,
761 		2 * halfcycle_ps(info) * (some_delay_1_cycle_ceil - 1) + 1000) +
762 	    375;
763 	some_delay_3_ps =
764 	    halfcycle_ps(info) - some_delay_2_ps % halfcycle_ps(info);
765 	if (info->revision_flag_1) {
766 		if (some_delay_3_ps >= 150) {
767 			const int some_delay_3_halfcycles =
768 			    (some_delay_3_ps << 6) / halfcycle_ps(info);
769 			some_delay_3_ps_rounded =
770 			    halfcycle_ps(info) * some_delay_3_halfcycles >> 6;
771 		}
772 	}
773 	some_delay_2_halfcycles_ceil =
774 	    (some_delay_2_ps + halfcycle_ps(info) - 1) / halfcycle_ps(info) -
775 	    2 * (some_delay_1_cycle_ceil - 1);
776 	if (info->revision_flag_1 && some_delay_3_ps < 150)
777 		some_delay_2_halfcycles_ceil++;
778 	some_delay_2_halfcycles_floor = some_delay_2_halfcycles_ceil;
779 	if (info->revision < 0x10)
780 		some_delay_2_halfcycles_floor =
781 		    some_delay_2_halfcycles_ceil - 1;
782 	if (!info->revision_flag_1)
783 		some_delay_2_halfcycles_floor++;
784 	/* FIXME: this variable is unused. Should it be used? */
785 	(void)some_delay_2_halfcycles_floor;
786 	info->some_delay_2_halfcycles_ceil = some_delay_2_halfcycles_ceil;
787 	info->some_delay_3_ps_rounded = some_delay_3_ps_rounded;
788 	if ((info->populated_ranks[0][0][0] && info->populated_ranks[0][1][0])
789 	    || (info->populated_ranks[1][0][0]
790 		&& info->populated_ranks[1][1][0]))
791 		info->max_slots_used_in_channel = 2;
792 	else
793 		info->max_slots_used_in_channel = 1;
794 	for (channel = 0; channel < NUM_CHANNELS; channel++)
795 		mchbar_write32(0x244 + (channel << 10),
796 			((info->revision < 8) ? 1 : 0x200) |
797 			((2 - info->max_slots_used_in_channel) << 17) |
798 			(channel << 21) |
799 			(info->some_delay_1_cycle_floor << 18) | 0x9510);
800 	if (info->max_slots_used_in_channel == 1) {
801 		info->mode4030[0] = (count_ranks_in_channel(info, 0) == 2);
802 		info->mode4030[1] = (count_ranks_in_channel(info, 1) == 2);
803 	} else {
804 		info->mode4030[0] = ((count_ranks_in_channel(info, 0) == 1) || (count_ranks_in_channel(info, 0) == 2)) ? 2 : 3;	/* 2 if 1 or 2 ranks */
805 		info->mode4030[1] = ((count_ranks_in_channel(info, 1) == 1)
806 				     || (count_ranks_in_channel(info, 1) ==
807 					 2)) ? 2 : 3;
808 	}
809 	for (channel = 0; channel < NUM_CHANNELS; channel++) {
810 		int max_of_unk;
811 		int min_of_unk_2;
812 
813 		int i, count;
814 		int sum;
815 
816 		if (!info->populated_ranks_mask[channel])
817 			continue;
818 
819 		max_of_unk = 0;
820 		min_of_unk_2 = 32767;
821 
822 		sum = 0;
823 		count = 0;
824 		for (i = 0; i < 3; i++) {
825 			int unk1;
826 			if (info->revision < 8)
827 				unk1 =
828 				    u8_FFFD1891[0][channel][info->
829 							    clock_speed_index]
830 				    [i];
831 			else if (!
832 				 (info->revision >= 0x10
833 				  || info->revision_flag_1))
834 				unk1 =
835 				    u8_FFFD1891[1][channel][info->
836 							    clock_speed_index]
837 				    [i];
838 			else
839 				unk1 = 0;
840 			for (slot = 0; slot < NUM_SLOTS; slot++)
841 				for (rank = 0; rank < NUM_RANKS; rank++) {
842 					int a = 0;
843 					int b = 0;
844 
845 					if (!info->
846 					    populated_ranks[channel][slot]
847 					    [rank])
848 						continue;
849 					if (extended_silicon_revision == 4
850 					    && (info->
851 						populated_ranks_mask[channel] &
852 						5) != 5) {
853 						if ((info->
854 						     spd[channel][slot]
855 						     [REFERENCE_RAW_CARD_USED] &
856 						     0x1F) == 3) {
857 							a = u16_ffd1178[0]
858 							    [info->
859 							     clock_speed_index];
860 							b = u16_fe0eb8[0][info->
861 									  clock_speed_index];
862 						} else
863 						    if ((info->
864 							 spd[channel][slot]
865 							 [REFERENCE_RAW_CARD_USED]
866 							 & 0x1F) == 5) {
867 							a = u16_ffd1178[1]
868 							    [info->
869 							     clock_speed_index];
870 							b = u16_fe0eb8[1][info->
871 									  clock_speed_index];
872 						}
873 					}
874 					min_of_unk_2 = MIN(min_of_unk_2, a);
875 					min_of_unk_2 = MIN(min_of_unk_2, b);
876 					if (rank == 0) {
877 						sum += a;
878 						count++;
879 					}
880 					{
881 						int t;
882 						t = b +
883 						    u8_FFFD0EF8[channel]
884 						    [extended_silicon_revision]
885 						    [info->
886 						     mode4030[channel]][info->
887 									clock_speed_index];
888 						if (unk1 >= t)
889 							max_of_unk =
890 							    MAX(max_of_unk,
891 								unk1 - t);
892 					}
893 				}
894 			{
895 				int t =
896 				    u8_FFFD17E0[channel]
897 				    [extended_silicon_revision][info->
898 								mode4030
899 								[channel]]
900 				    [info->clock_speed_index] + min_of_unk_2;
901 				if (unk1 >= t)
902 					max_of_unk = MAX(max_of_unk, unk1 - t);
903 			}
904 		}
905 
906 		if (count == 0)
907 			die("No memory ranks found for channel %u\n", channel);
908 
909 		info->avg4044[channel] = sum / count;
910 		info->max4048[channel] = max_of_unk;
911 	}
912 }
913 
jedec_read(struct raminfo * info,int channel,int slot,int rank,int total_rank,u8 addr3,unsigned int value)914 static void jedec_read(struct raminfo *info,
915 		       int channel, int slot, int rank,
916 		       int total_rank, u8 addr3, unsigned int value)
917 {
918 	/* Handle mirrored mapping.  */
919 	if ((rank & 1) && (info->spd[channel][slot][RANK1_ADDRESS_MAPPING] & 1)) {
920 		addr3 = (addr3 & 0xCF) | ((addr3 & 0x10) << 1) | ((addr3 >> 1) & 0x10);
921 		value = (value & ~0x1f8) | ((value >> 1) & 0xa8) | ((value & 0xa8) << 1);
922 	}
923 
924 	mchbar_clrsetbits8(0x271, 0x1f << 1, addr3);
925 	mchbar_clrsetbits8(0x671, 0x1f << 1, addr3);
926 
927 	read32p((value << 3) | (total_rank << 28));
928 
929 	mchbar_clrsetbits8(0x271, 0x1f << 1, 1 << 1);
930 	mchbar_clrsetbits8(0x671, 0x1f << 1, 1 << 1);
931 
932 	read32p(total_rank << 28);
933 }
934 
935 enum {
936 	MR1_RZQ12 = 512,
937 	MR1_RZQ2 = 64,
938 	MR1_RZQ4 = 4,
939 	MR1_ODS34OHM = 2
940 };
941 
942 enum {
943 	MR0_BT_INTERLEAVED = 8,
944 	MR0_DLL_RESET_ON = 256
945 };
946 
947 enum {
948 	MR2_RTT_WR_DISABLED = 0,
949 	MR2_RZQ2 = 1 << 10
950 };
951 
jedec_init(struct raminfo * info)952 static void jedec_init(struct raminfo *info)
953 {
954 	int write_recovery;
955 	int channel, slot, rank;
956 	int total_rank;
957 	int dll_on;
958 	int self_refresh_temperature;
959 	int auto_self_refresh;
960 
961 	auto_self_refresh = 1;
962 	self_refresh_temperature = 1;
963 	if (info->board_lane_delay[3] <= 10) {
964 		if (info->board_lane_delay[3] <= 8)
965 			write_recovery = info->board_lane_delay[3] - 4;
966 		else
967 			write_recovery = 5;
968 	} else {
969 		write_recovery = 6;
970 	}
971 	FOR_POPULATED_RANKS {
972 		auto_self_refresh &=
973 		    (info->spd[channel][slot][THERMAL_AND_REFRESH] >> 2) & 1;
974 		self_refresh_temperature &=
975 		    info->spd[channel][slot][THERMAL_AND_REFRESH] & 1;
976 	}
977 	if (auto_self_refresh == 1)
978 		self_refresh_temperature = 0;
979 
980 	dll_on = ((info->silicon_revision != 2 && info->silicon_revision != 3)
981 		  || (info->populated_ranks[0][0][0]
982 		      && info->populated_ranks[0][1][0])
983 		  || (info->populated_ranks[1][0][0]
984 		      && info->populated_ranks[1][1][0]));
985 
986 	total_rank = 0;
987 
988 	for (channel = NUM_CHANNELS - 1; channel >= 0; channel--) {
989 		int rtt, rtt_wr = MR2_RTT_WR_DISABLED;
990 		int rzq_reg58e;
991 
992 		if (info->silicon_revision == 2 || info->silicon_revision == 3) {
993 			rzq_reg58e = 64;
994 			rtt = MR1_RZQ2;
995 			if (info->clock_speed_index != 0) {
996 				rzq_reg58e = 4;
997 				if (info->populated_ranks_mask[channel] == 3)
998 					rtt = MR1_RZQ4;
999 			}
1000 		} else {
1001 			if ((info->populated_ranks_mask[channel] & 5) == 5) {
1002 				rtt = MR1_RZQ12;
1003 				rzq_reg58e = 64;
1004 				rtt_wr = MR2_RZQ2;
1005 			} else {
1006 				rzq_reg58e = 4;
1007 				rtt = MR1_RZQ4;
1008 			}
1009 		}
1010 
1011 		mchbar_write16(0x588 + (channel << 10), 0);
1012 		mchbar_write16(0x58a + (channel << 10), 4);
1013 		mchbar_write16(0x58c + (channel << 10), rtt | MR1_ODS34OHM);
1014 		mchbar_write16(0x58e + (channel << 10), rzq_reg58e | 0x82);
1015 		mchbar_write16(0x590 + (channel << 10), 0x1282);
1016 
1017 		for (slot = 0; slot < NUM_SLOTS; slot++)
1018 			for (rank = 0; rank < NUM_RANKS; rank++)
1019 				if (info->populated_ranks[channel][slot][rank]) {
1020 					jedec_read(info, channel, slot, rank,
1021 						   total_rank, 0x28,
1022 						   rtt_wr | (info->
1023 							     clock_speed_index
1024 							     << 3)
1025 						   | (auto_self_refresh << 6) |
1026 						   (self_refresh_temperature <<
1027 						    7));
1028 					jedec_read(info, channel, slot, rank,
1029 						   total_rank, 0x38, 0);
1030 					jedec_read(info, channel, slot, rank,
1031 						   total_rank, 0x18,
1032 						   rtt | MR1_ODS34OHM);
1033 					jedec_read(info, channel, slot, rank,
1034 						   total_rank, 6,
1035 						   (dll_on << 12) |
1036 						   (write_recovery << 9)
1037 						   | ((info->cas_latency - 4) <<
1038 						      4) | MR0_BT_INTERLEAVED |
1039 						   MR0_DLL_RESET_ON);
1040 					total_rank++;
1041 				}
1042 	}
1043 }
1044 
program_modules_memory_map(struct raminfo * info,int pre_jedec)1045 static void program_modules_memory_map(struct raminfo *info, int pre_jedec)
1046 {
1047 	unsigned int channel, slot, rank;
1048 	unsigned int total_mb[2] = { 0, 0 };	/* total memory per channel in MB */
1049 	unsigned int channel_0_non_interleaved;
1050 
1051 	FOR_ALL_RANKS {
1052 		if (info->populated_ranks[channel][slot][rank]) {
1053 			total_mb[channel] +=
1054 			    pre_jedec ? 256 : (256 << info->
1055 					       density[channel][slot] >> info->
1056 					       is_x16_module[channel][slot]);
1057 			mchbar_write8(0x208 + rank + 2 * slot + (channel << 10),
1058 				(pre_jedec ? (1 | ((1 + 1) << 1)) :
1059 				(info->is_x16_module[channel][slot] |
1060 				((info->density[channel][slot] + 1) << 1))) |
1061 				0x80);
1062 		}
1063 		mchbar_write16(0x200 + (channel << 10) + 4 * slot + 2 * rank,
1064 			total_mb[channel] >> 6);
1065 	}
1066 
1067 	info->total_memory_mb = total_mb[0] + total_mb[1];
1068 
1069 	info->interleaved_part_mb =
1070 	    pre_jedec ? 0 : 2 * MIN(total_mb[0], total_mb[1]);
1071 	info->non_interleaved_part_mb =
1072 	    total_mb[0] + total_mb[1] - info->interleaved_part_mb;
1073 	channel_0_non_interleaved = total_mb[0] - info->interleaved_part_mb / 2;
1074 	mchbar_write32(0x100, channel_0_non_interleaved | info->non_interleaved_part_mb << 16);
1075 	if (!pre_jedec)
1076 		mchbar_write16(0x104, info->interleaved_part_mb);
1077 }
1078 
program_board_delay(struct raminfo * info)1079 static void program_board_delay(struct raminfo *info)
1080 {
1081 	int cas_latency_shift;
1082 	int some_delay_ns;
1083 	int some_delay_3_half_cycles;
1084 
1085 	unsigned int channel, i;
1086 	int high_multiplier;
1087 	int lane_3_delay;
1088 	int cas_latency_derived;
1089 
1090 	high_multiplier = 0;
1091 	some_delay_ns = 200;
1092 	some_delay_3_half_cycles = 4;
1093 	cas_latency_shift = info->silicon_revision == 0
1094 	    || info->silicon_revision == 1 ? 1 : 0;
1095 	if (info->revision < 8) {
1096 		some_delay_ns = 600;
1097 		cas_latency_shift = 0;
1098 	}
1099 	{
1100 		int speed_bit;
1101 		speed_bit =
1102 		    ((info->clock_speed_index > 1
1103 		      || (info->silicon_revision != 2
1104 			  && info->silicon_revision != 3))) ^ (info->revision >=
1105 							       0x10);
1106 		write_500(info, 0, speed_bit | ((!info->use_ecc) << 1), 0x60e,
1107 			  3, 1);
1108 		write_500(info, 1, speed_bit | ((!info->use_ecc) << 1), 0x60e,
1109 			  3, 1);
1110 		if (info->revision >= 0x10 && info->clock_speed_index <= 1
1111 		    && (info->silicon_revision == 2
1112 			|| info->silicon_revision == 3))
1113 			rmw_1d0(0x116, 5, 2, 4);
1114 	}
1115 	mchbar_write32(0x120, 1 << (info->max_slots_used_in_channel + 28) | 0x188e7f9f);
1116 
1117 	mchbar_write8(0x124, info->board_lane_delay[4] + (frequency_01(info) + 999) / 1000);
1118 	mchbar_write16(0x125, 0x1360);
1119 	mchbar_write8(0x127, 0x40);
1120 	if (info->fsb_frequency < frequency_11(info) / 2) {
1121 		unsigned int some_delay_2_half_cycles;
1122 		high_multiplier = 1;
1123 		some_delay_2_half_cycles = ps_to_halfcycles(info,
1124 							    ((3 *
1125 							      fsbcycle_ps(info))
1126 							     >> 1) +
1127 							    (halfcycle_ps(info)
1128 							     *
1129 							     reg178_min[info->
1130 									clock_speed_index]
1131 							     >> 6)
1132 							    +
1133 							    4 *
1134 							    halfcycle_ps(info)
1135 							    + 2230);
1136 		some_delay_3_half_cycles =
1137 		    MIN((some_delay_2_half_cycles +
1138 			 (frequency_11(info) * 2) * (28 -
1139 						     some_delay_2_half_cycles) /
1140 			 (frequency_11(info) * 2 -
1141 			  4 * (info->fsb_frequency))) >> 3, 7);
1142 	}
1143 	if (mchbar_read8(0x2ca9) & 1)
1144 		some_delay_3_half_cycles = 3;
1145 	for (channel = 0; channel < NUM_CHANNELS; channel++) {
1146 		mchbar_setbits32(0x220 + (channel << 10), 0x18001117);
1147 		mchbar_write32(0x224 + (channel << 10),
1148 			(info->max_slots_used_in_channel - 1) |
1149 			(info->cas_latency - 5 - info->clock_speed_index)
1150 			<< 21 | (info->max_slots_used_in_channel +
1151 			info->cas_latency - cas_latency_shift - 4) << 16 |
1152 			(info->cas_latency - cas_latency_shift - 4) << 26 |
1153 			(info->cas_latency - info->clock_speed_index +
1154 			info->max_slots_used_in_channel - 6) << 8);
1155 		mchbar_write32(0x228 + (channel << 10), info->max_slots_used_in_channel);
1156 		mchbar_write8(0x239 + (channel << 10), 32);
1157 		mchbar_write32(0x248 + (channel << 10), high_multiplier << 24 |
1158 			some_delay_3_half_cycles << 25 | 0x840000);
1159 		mchbar_write32(0x278 + (channel << 10), 0xc362042);
1160 		mchbar_write32(0x27c + (channel << 10), 0x8b000062);
1161 		mchbar_write32(0x24c + (channel << 10),
1162 			(!!info->clock_speed_index) << 17 |
1163 			((2 + info->clock_speed_index -
1164 			(!!info->clock_speed_index))) << 12 | 0x10200);
1165 
1166 		mchbar_write8(0x267 + (channel << 10), 4);
1167 		mchbar_write16(0x272 + (channel << 10), 0x155);
1168 		mchbar_clrsetbits32(0x2bc + (channel << 10), 0xffffff, 0x707070);
1169 
1170 		write_500(info, channel,
1171 			  ((!info->populated_ranks[channel][1][1])
1172 			   | (!info->populated_ranks[channel][1][0] << 1)
1173 			   | (!info->populated_ranks[channel][0][1] << 2)
1174 			   | (!info->populated_ranks[channel][0][0] << 3)),
1175 			  0x4c9, 4, 1);
1176 	}
1177 
1178 	mchbar_write8(0x2c4, (1 + (info->clock_speed_index != 0)) << 6 | 0xc);
1179 	{
1180 		u8 freq_divisor = 2;
1181 		if (info->fsb_frequency == frequency_11(info))
1182 			freq_divisor = 3;
1183 		else if (2 * info->fsb_frequency < 3 * (frequency_11(info) / 2))
1184 			freq_divisor = 1;
1185 		else
1186 			freq_divisor = 2;
1187 		mchbar_write32(0x2c0, freq_divisor << 11 | 0x6009c400);
1188 	}
1189 
1190 	if (info->board_lane_delay[3] <= 10) {
1191 		if (info->board_lane_delay[3] <= 8)
1192 			lane_3_delay = info->board_lane_delay[3];
1193 		else
1194 			lane_3_delay = 10;
1195 	} else {
1196 		lane_3_delay = 12;
1197 	}
1198 	cas_latency_derived = info->cas_latency - info->clock_speed_index + 2;
1199 	if (info->clock_speed_index > 1)
1200 		cas_latency_derived++;
1201 	for (channel = 0; channel < NUM_CHANNELS; channel++) {
1202 		mchbar_write32(0x240 + (channel << 10),
1203 			((info->clock_speed_index == 0) * 0x11000) |
1204 			0x1002100 | (2 + info->clock_speed_index) << 4 |
1205 			(info->cas_latency - 3));
1206 		write_500(info, channel, (info->clock_speed_index << 1) | 1,
1207 			  0x609, 6, 1);
1208 		write_500(info, channel,
1209 			  info->clock_speed_index + 2 * info->cas_latency - 7,
1210 			  0x601, 6, 1);
1211 
1212 		mchbar_write32(0x250 + (channel << 10),
1213 			(lane_3_delay + info->clock_speed_index + 9) << 6 |
1214 			info->board_lane_delay[7] << 2 |
1215 			info->board_lane_delay[4] << 16 |
1216 			info->board_lane_delay[1] << 25 |
1217 			info->board_lane_delay[1] << 29 | 1);
1218 		mchbar_write32(0x254 + (channel << 10),
1219 			info->board_lane_delay[1] >> 3 |
1220 			(info->board_lane_delay[8] + 4 * info->use_ecc) << 6 |
1221 			0x80 | info->board_lane_delay[6] << 1 |
1222 			info->board_lane_delay[2] << 28 |
1223 			cas_latency_derived << 16 | 0x4700000);
1224 		mchbar_write32(0x258 + (channel << 10),
1225 			(info->board_lane_delay[5] + info->clock_speed_index + 9) << 12 |
1226 			(info->clock_speed_index - info->cas_latency + 12) << 8 |
1227 			info->board_lane_delay[2] << 17 |
1228 			info->board_lane_delay[4] << 24 | 0x47);
1229 		mchbar_write32(0x25c + (channel << 10),
1230 			info->board_lane_delay[1] << 1 |
1231 			info->board_lane_delay[0] << 8 | 0x1da50000);
1232 		mchbar_write8(0x264 + (channel << 10), 0xff);
1233 		mchbar_write8(0x5f8 + (channel << 10), cas_latency_shift << 3 | info->use_ecc);
1234 	}
1235 
1236 	program_modules_memory_map(info, 1);
1237 
1238 	mchbar_clrsetbits16(0x610, 0xfe3c,
1239 		MIN(ns_to_cycles(info, some_delay_ns) / 2, 127) << 9 | 0x3c);
1240 	mchbar_setbits16(0x612, 1 << 8);
1241 	mchbar_setbits16(0x214, 0x3e00);
1242 	for (i = 0; i < 8; i++) {
1243 		pci_write_config32(QPI_SAD, SAD_DRAM_RULE(i),
1244 			       (info->total_memory_mb - 64) | !i | 2);
1245 		pci_write_config32(QPI_SAD, SAD_INTERLEAVE_LIST(i), 0);
1246 	}
1247 }
1248 
1249 #define DEFAULT_PCI_MMIO_SIZE 2048
1250 
program_total_memory_map(struct raminfo * info)1251 static void program_total_memory_map(struct raminfo *info)
1252 {
1253 	unsigned int tom, tolud, touud;
1254 	unsigned int quickpath_reserved;
1255 	unsigned int remap_base;
1256 	unsigned int uma_base_igd;
1257 	unsigned int uma_base_gtt;
1258 	unsigned int mmio_size;
1259 	int memory_remap;
1260 	unsigned int memory_map[8];
1261 	int i;
1262 	unsigned int current_limit;
1263 	unsigned int tseg_base;
1264 	int uma_size_igd = 0, uma_size_gtt = 0;
1265 
1266 	memset(memory_map, 0, sizeof(memory_map));
1267 
1268 	if (info->uma_enabled) {
1269 		u16 t = pci_read_config16(NORTHBRIDGE, GGC);
1270 		gav(t);
1271 		const int uma_sizes_gtt[16] =
1272 		    { 0, 1, 0, 2, 0, 0, 0, 0, 0, 2, 3, 4, 42, 42, 42, 42 };
1273 		/* Igd memory */
1274 		const int uma_sizes_igd[16] = {
1275 			0, 0, 0, 0, 0, 32, 48, 64, 128, 256, 96, 160, 224, 352,
1276 			    256, 512
1277 		};
1278 
1279 		uma_size_igd = uma_sizes_igd[(t >> 4) & 0xF];
1280 		uma_size_gtt = uma_sizes_gtt[(t >> 8) & 0xF];
1281 	}
1282 
1283 	mmio_size = DEFAULT_PCI_MMIO_SIZE;
1284 
1285 	tom = info->total_memory_mb;
1286 	if (tom == 4096)
1287 		tom = 4032;
1288 	touud = ALIGN_DOWN(tom - info->memory_reserved_for_heci_mb, 64);
1289 	tolud = ALIGN_DOWN(MIN(4096 - mmio_size + ALIGN_UP(uma_size_igd + uma_size_gtt, 64)
1290 			      , touud), 64);
1291 	memory_remap = 0;
1292 	if (touud - tolud > 64) {
1293 		memory_remap = 1;
1294 		remap_base = MAX(4096, touud);
1295 		touud = touud - tolud + 4096;
1296 	}
1297 	if (touud > 4096)
1298 		memory_map[2] = touud | 1;
1299 	quickpath_reserved = 0;
1300 
1301 	u32 t = pci_read_config32(QPI_SAD, 0x68);
1302 
1303 	gav(t);
1304 
1305 	if (t & 0x800) {
1306 		u32 shift = t >> 20;
1307 		if (shift == 0)
1308 			die("Quickpath value is 0\n");
1309 		quickpath_reserved = (u32)1 << find_lowest_bit_set32(shift);
1310 	}
1311 
1312 	if (memory_remap)
1313 		touud -= quickpath_reserved;
1314 
1315 	uma_base_igd = tolud - uma_size_igd;
1316 	uma_base_gtt = uma_base_igd - uma_size_gtt;
1317 	tseg_base = ALIGN_DOWN(uma_base_gtt, 64) - (CONFIG_SMM_TSEG_SIZE >> 20);
1318 	if (!memory_remap)
1319 		tseg_base -= quickpath_reserved;
1320 	tseg_base = ALIGN_DOWN(tseg_base, 8);
1321 
1322 	pci_write_config16(NORTHBRIDGE, TOLUD, tolud << 4);
1323 	pci_write_config16(NORTHBRIDGE, TOM, tom >> 6);
1324 	if (memory_remap) {
1325 		pci_write_config16(NORTHBRIDGE, REMAPBASE, remap_base >> 6);
1326 		pci_write_config16(NORTHBRIDGE, REMAPLIMIT, (touud - 64) >> 6);
1327 	}
1328 	pci_write_config16(NORTHBRIDGE, TOUUD, touud);
1329 
1330 	if (info->uma_enabled) {
1331 		pci_write_config32(NORTHBRIDGE, IGD_BASE, uma_base_igd << 20);
1332 		pci_write_config32(NORTHBRIDGE, GTT_BASE, uma_base_gtt << 20);
1333 	}
1334 	pci_write_config32(NORTHBRIDGE, TSEG, tseg_base << 20);
1335 
1336 	current_limit = 0;
1337 	memory_map[0] = ALIGN_DOWN(uma_base_gtt, 64) | 1;
1338 	memory_map[1] = 4096;
1339 	for (i = 0; i < ARRAY_SIZE(memory_map); i++) {
1340 		current_limit = MAX(current_limit, memory_map[i] & ~1);
1341 		pci_write_config32(QPI_SAD, SAD_DRAM_RULE(i),
1342 			       (memory_map[i] & 1) | ALIGN_DOWN(current_limit -
1343 								1, 64) | 2);
1344 		pci_write_config32(QPI_SAD, SAD_INTERLEAVE_LIST(i), 0);
1345 	}
1346 }
1347 
collect_system_info(struct raminfo * info)1348 static void collect_system_info(struct raminfo *info)
1349 {
1350 	u32 capid0[3];
1351 	int i;
1352 	unsigned int channel;
1353 
1354 	for (i = 0; i < 3; i++) {
1355 		capid0[i] = pci_read_config32(NORTHBRIDGE, CAPID0 | (i << 2));
1356 		printk(BIOS_DEBUG, "CAPID0[%d] = 0x%08x\n", i, capid0[i]);
1357 	}
1358 	info->revision = pci_read_config8(NORTHBRIDGE, PCI_REVISION_ID);
1359 	printk(BIOS_DEBUG, "Revision ID: 0x%x\n", info->revision);
1360 	printk(BIOS_DEBUG, "Device ID: 0x%x\n", pci_read_config16(NORTHBRIDGE, PCI_DEVICE_ID));
1361 
1362 	info->max_supported_clock_speed_index = (~capid0[1] & 7);
1363 
1364 	if ((capid0[1] >> 11) & 1)
1365 		info->uma_enabled = 0;
1366 	else
1367 		gav(info->uma_enabled =
1368 		    pci_read_config8(NORTHBRIDGE, DEVEN) & 8);
1369 	/* Unrecognised: [0000:fffd3d2d] 37f81.37f82 ! CPUID: eax: 00000001; ecx: 00000e00 => 00020655.00010800.029ae3ff.bfebfbff */
1370 	info->silicon_revision = 0;
1371 
1372 	if (capid0[2] & 2) {
1373 		info->silicon_revision = 0;
1374 		info->max_supported_clock_speed_index = 2;
1375 		for (channel = 0; channel < NUM_CHANNELS; channel++)
1376 			if (info->populated_ranks[channel][0][0]
1377 			    && (info->spd[channel][0][MODULE_TYPE] & 0xf) ==
1378 			    3) {
1379 				info->silicon_revision = 2;
1380 				info->max_supported_clock_speed_index = 1;
1381 			}
1382 	} else {
1383 		switch (((capid0[2] >> 18) & 1) + 2 * ((capid0[1] >> 3) & 1)) {
1384 		case 1:
1385 		case 2:
1386 			info->silicon_revision = 3;
1387 			break;
1388 		case 3:
1389 			info->silicon_revision = 0;
1390 			break;
1391 		case 0:
1392 			info->silicon_revision = 2;
1393 			break;
1394 		}
1395 		switch (pci_read_config16(NORTHBRIDGE, PCI_DEVICE_ID)) {
1396 		case 0x40:
1397 			info->silicon_revision = 0;
1398 			break;
1399 		case 0x48:
1400 			info->silicon_revision = 1;
1401 			break;
1402 		}
1403 	}
1404 }
1405 
write_training_data(struct raminfo * info)1406 static void write_training_data(struct raminfo *info)
1407 {
1408 	int tm, channel, slot, rank, lane;
1409 	if (info->revision < 8)
1410 		return;
1411 
1412 	for (tm = 0; tm < 4; tm++)
1413 		for (channel = 0; channel < NUM_CHANNELS; channel++)
1414 			for (slot = 0; slot < NUM_SLOTS; slot++)
1415 				for (rank = 0; rank < NUM_RANKS; rank++)
1416 					for (lane = 0; lane < 9; lane++)
1417 						write_500(info, channel,
1418 							  info->
1419 							  cached_training->
1420 							  lane_timings[tm]
1421 							  [channel][slot][rank]
1422 							  [lane],
1423 							  get_timing_register_addr
1424 							  (lane, tm, slot,
1425 							   rank), 9, 0);
1426 	write_1d0(info->cached_training->reg_178, 0x178, 7, 1);
1427 	write_1d0(info->cached_training->reg_10b, 0x10b, 6, 1);
1428 }
1429 
dump_timings(struct raminfo * info)1430 static void dump_timings(struct raminfo *info)
1431 {
1432 	int channel, slot, rank, lane, i;
1433 	printk(RAM_SPEW, "Timings:\n");
1434 	FOR_POPULATED_RANKS {
1435 		printk(RAM_SPEW, "channel %d, slot %d, rank %d\n", channel,
1436 		       slot, rank);
1437 		for (lane = 0; lane < 9; lane++) {
1438 			printk(RAM_SPEW, "lane %d: ", lane);
1439 			for (i = 0; i < 4; i++) {
1440 				printk(RAM_SPEW, "%x (%x) ",
1441 				       read_500(info, channel,
1442 						get_timing_register_addr
1443 						(lane, i, slot, rank),
1444 						9),
1445 				       info->training.
1446 				       lane_timings[i][channel][slot][rank]
1447 				       [lane]);
1448 			}
1449 			printk(RAM_SPEW, "\n");
1450 		}
1451 	}
1452 	printk(RAM_SPEW, "[178] = %x (%x)\n", read_1d0(0x178, 7),
1453 	       info->training.reg_178);
1454 	printk(RAM_SPEW, "[10b] = %x (%x)\n", read_1d0(0x10b, 6),
1455 	       info->training.reg_10b);
1456 }
1457 
1458 /* Read timings and other registers that need to be restored verbatim and
1459    put them to CBMEM.
1460  */
save_timings(struct raminfo * info)1461 static void save_timings(struct raminfo *info)
1462 {
1463 	struct ram_training train;
1464 	int channel, slot, rank, lane, i;
1465 
1466 	train = info->training;
1467 	FOR_POPULATED_RANKS for (lane = 0; lane < 9; lane++)
1468 		for (i = 0; i < 4; i++)
1469 			train.lane_timings[i][channel][slot][rank][lane] =
1470 			    read_500(info, channel,
1471 				     get_timing_register_addr(lane, i, slot,
1472 							      rank), 9);
1473 	train.reg_178 = read_1d0(0x178, 7);
1474 	train.reg_10b = read_1d0(0x10b, 6);
1475 
1476 	for (channel = 0; channel < NUM_CHANNELS; channel++) {
1477 		u32 reg32;
1478 		reg32 = mchbar_read32((channel << 10) + 0x274);
1479 		train.reg274265[channel][0] = reg32 >> 16;
1480 		train.reg274265[channel][1] = reg32 & 0xffff;
1481 		train.reg274265[channel][2] = mchbar_read16((channel << 10) + 0x265) >> 8;
1482 	}
1483 	train.reg2ca9_bit0 = mchbar_read8(0x2ca9) & 1;
1484 	train.reg_6dc = mchbar_read32(0x6dc);
1485 	train.reg_6e8 = mchbar_read32(0x6e8);
1486 
1487 	printk(RAM_SPEW, "[6dc] = %x\n", train.reg_6dc);
1488 	printk(RAM_SPEW, "[6e8] = %x\n", train.reg_6e8);
1489 
1490 	/* Save the MRC S3 restore data to cbmem */
1491 	mrc_cache_stash_data(MRC_TRAINING_DATA, MRC_CACHE_VERSION,
1492 			&train, sizeof(train));
1493 }
1494 
get_cached_training(void)1495 static const struct ram_training *get_cached_training(void)
1496 {
1497 	return mrc_cache_current_mmap_leak(MRC_TRAINING_DATA,
1498 					   MRC_CACHE_VERSION,
1499 					   NULL);
1500 }
1501 
have_match_ranks(struct raminfo * info,int channel,int ranks)1502 static int have_match_ranks(struct raminfo *info, int channel, int ranks)
1503 {
1504 	int ranks_in_channel;
1505 	ranks_in_channel = info->populated_ranks[channel][0][0]
1506 	    + info->populated_ranks[channel][0][1]
1507 	    + info->populated_ranks[channel][1][0]
1508 	    + info->populated_ranks[channel][1][1];
1509 
1510 	/* empty channel */
1511 	if (ranks_in_channel == 0)
1512 		return 1;
1513 
1514 	if (ranks_in_channel != ranks)
1515 		return 0;
1516 	/* single slot */
1517 	if (info->populated_ranks[channel][0][0] !=
1518 	    info->populated_ranks[channel][1][0])
1519 		return 1;
1520 	if (info->populated_ranks[channel][0][1] !=
1521 	    info->populated_ranks[channel][1][1])
1522 		return 1;
1523 	if (info->is_x16_module[channel][0] != info->is_x16_module[channel][1])
1524 		return 0;
1525 	if (info->density[channel][0] != info->density[channel][1])
1526 		return 0;
1527 	return 1;
1528 }
1529 
read_4090(struct raminfo * info)1530 static void read_4090(struct raminfo *info)
1531 {
1532 	int i, channel, slot, rank, lane;
1533 	for (i = 0; i < 2; i++)
1534 		for (slot = 0; slot < NUM_SLOTS; slot++)
1535 			for (rank = 0; rank < NUM_RANKS; rank++)
1536 				for (lane = 0; lane < 9; lane++)
1537 					info->training.
1538 					    lane_timings[0][i][slot][rank][lane]
1539 					    = 32;
1540 
1541 	for (i = 1; i < 4; i++)
1542 		for (channel = 0; channel < NUM_CHANNELS; channel++)
1543 			for (slot = 0; slot < NUM_SLOTS; slot++)
1544 				for (rank = 0; rank < NUM_RANKS; rank++)
1545 					for (lane = 0; lane < 9; lane++) {
1546 						info->training.
1547 						    lane_timings[i][channel]
1548 						    [slot][rank][lane] =
1549 						    read_500(info, channel,
1550 							     get_timing_register_addr
1551 							     (lane, i, slot,
1552 							      rank), 9)
1553 						    + (i == 1) * 11;	// !!!!
1554 					}
1555 }
1556 
get_etalon2(int flip,u32 addr)1557 static u32 get_etalon2(int flip, u32 addr)
1558 {
1559 	const u16 invmask[] = {
1560 		0xaaaa, 0x6db6, 0x4924, 0xeeee, 0xcccc, 0x8888, 0x7bde, 0x739c,
1561 		0x6318, 0x4210, 0xefbe, 0xcf3c, 0x8e38, 0x0c30, 0x0820
1562 	};
1563 	u32 ret;
1564 	u32 comp4 = addr / 480;
1565 	addr %= 480;
1566 	u32 comp1 = addr & 0xf;
1567 	u32 comp2 = (addr >> 4) & 1;
1568 	u32 comp3 = addr >> 5;
1569 
1570 	if (comp4)
1571 		ret = 0x1010101 << (comp4 - 1);
1572 	else
1573 		ret = 0;
1574 	if (flip ^ (((invmask[comp3] >> comp1) ^ comp2) & 1))
1575 		ret = ~ret;
1576 
1577 	return ret;
1578 }
1579 
disable_cache_region(void)1580 static void disable_cache_region(void)
1581 {
1582 	msr_t msr = {.lo = 0, .hi = 0 };
1583 
1584 	wrmsr(MTRR_PHYS_BASE(3), msr);
1585 	wrmsr(MTRR_PHYS_MASK(3), msr);
1586 }
1587 
enable_cache_region(unsigned int base,unsigned int size)1588 static void enable_cache_region(unsigned int base, unsigned int size)
1589 {
1590 	msr_t msr;
1591 	msr.lo = base | MTRR_TYPE_WRPROT;
1592 	msr.hi = 0;
1593 	wrmsr(MTRR_PHYS_BASE(3), msr);
1594 	msr.lo = ((~(ALIGN_DOWN(size + 4096, 4096) - 1) | MTRR_DEF_TYPE_EN)
1595 		  & 0xffffffff);
1596 	msr.hi = 0x0000000f;
1597 	wrmsr(MTRR_PHYS_MASK(3), msr);
1598 }
1599 
flush_cache(u32 start,u32 size)1600 static void flush_cache(u32 start, u32 size)
1601 {
1602 	u32 end;
1603 	u32 addr;
1604 
1605 	end = start + (ALIGN_DOWN(size + 4096, 4096));
1606 	for (addr = start; addr < end; addr += 64)
1607 		clflush((void *)(uintptr_t)addr);
1608 }
1609 
clear_errors(void)1610 static void clear_errors(void)
1611 {
1612 	pci_write_config8(NORTHBRIDGE, 0xc0, 0x01);
1613 }
1614 
write_testing(struct raminfo * info,int totalrank,int flip)1615 static void write_testing(struct raminfo *info, int totalrank, int flip)
1616 {
1617 	int nwrites = 0;
1618 	/* in 8-byte units.  */
1619 	u32 offset;
1620 	u8 *base;
1621 
1622 	base = (u8 *)(uintptr_t)(totalrank << 28);
1623 	for (offset = 0; offset < 9 * 480; offset += 2) {
1624 		write32(base + offset * 8, get_etalon2(flip, offset));
1625 		write32(base + offset * 8 + 4, get_etalon2(flip, offset));
1626 		write32(base + offset * 8 + 8, get_etalon2(flip, offset + 1));
1627 		write32(base + offset * 8 + 12, get_etalon2(flip, offset + 1));
1628 		nwrites += 4;
1629 		if (nwrites >= 320) {
1630 			clear_errors();
1631 			nwrites = 0;
1632 		}
1633 	}
1634 }
1635 
check_testing(struct raminfo * info,u8 total_rank,int flip)1636 static u8 check_testing(struct raminfo *info, u8 total_rank, int flip)
1637 {
1638 	u8 failmask = 0;
1639 	int i;
1640 	int comp1, comp2, comp3;
1641 	u32 failxor[2] = { 0, 0 };
1642 
1643 	enable_cache_region((total_rank << 28), 1728 * 5 * 4);
1644 
1645 	for (comp3 = 0; comp3 < 9 && failmask != 0xff; comp3++) {
1646 		for (comp1 = 0; comp1 < 4; comp1++)
1647 			for (comp2 = 0; comp2 < 60; comp2++) {
1648 				u32 re[4];
1649 				u32 curroffset =
1650 				    comp3 * 8 * 60 + 2 * comp1 + 8 * comp2;
1651 				read128((total_rank << 28) | (curroffset << 3),
1652 					(u64 *)re);
1653 				failxor[0] |=
1654 				    get_etalon2(flip, curroffset) ^ re[0];
1655 				failxor[1] |=
1656 				    get_etalon2(flip, curroffset) ^ re[1];
1657 				failxor[0] |=
1658 				    get_etalon2(flip, curroffset | 1) ^ re[2];
1659 				failxor[1] |=
1660 				    get_etalon2(flip, curroffset | 1) ^ re[3];
1661 			}
1662 		for (i = 0; i < 8; i++)
1663 			if ((0xff << (8 * (i % 4))) & failxor[i / 4])
1664 				failmask |= 1 << i;
1665 	}
1666 	disable_cache_region();
1667 	flush_cache((total_rank << 28), 1728 * 5 * 4);
1668 	return failmask;
1669 }
1670 
1671 const u32 seed1[0x18] = {
1672 	0x3a9d5ab5, 0x576cb65b, 0x555773b6, 0x2ab772ee,
1673 	0x555556ee, 0x3a9d5ab5, 0x576cb65b, 0x555773b6,
1674 	0x2ab772ee, 0x555556ee, 0x5155a555, 0x5155a555,
1675 	0x5155a555, 0x5155a555, 0x3a9d5ab5, 0x576cb65b,
1676 	0x555773b6, 0x2ab772ee, 0x555556ee, 0x55d6b4a5,
1677 	0x366d6b3a, 0x2ae5ddbb, 0x3b9ddbb7, 0x55d6b4a5,
1678 };
1679 
get_seed2(int a,int b)1680 static u32 get_seed2(int a, int b)
1681 {
1682 	const u32 seed2[5] = {
1683 		0x55555555, 0x33333333, 0x2e555a55, 0x55555555,
1684 		0x5b6db6db,
1685 	};
1686 	u32 r;
1687 	r = seed2[(a + (a >= 10)) / 5];
1688 	return b ? ~r : r;
1689 }
1690 
make_shift(int comp2,int comp5,int x)1691 static int make_shift(int comp2, int comp5, int x)
1692 {
1693 	const u8 seed3[32] = {
1694 		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1695 		0x00, 0x00, 0x38, 0x1c, 0x3c, 0x18, 0x38, 0x38,
1696 		0x38, 0x38, 0x38, 0x38, 0x0f, 0x0f, 0x0f, 0x0f,
1697 		0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f,
1698 	};
1699 
1700 	return (comp2 - ((seed3[comp5] >> (x & 7)) & 1)) & 0x1f;
1701 }
1702 
get_etalon(int flip,u32 addr)1703 static u32 get_etalon(int flip, u32 addr)
1704 {
1705 	u32 mask_byte = 0;
1706 	int comp1 = (addr >> 1) & 1;
1707 	int comp2 = (addr >> 3) & 0x1f;
1708 	int comp3 = (addr >> 8) & 0xf;
1709 	int comp4 = (addr >> 12) & 0xf;
1710 	int comp5 = (addr >> 16) & 0x1f;
1711 	u32 mask_bit = ~(0x10001 << comp3);
1712 	u32 part1;
1713 	u32 part2;
1714 	int byte;
1715 
1716 	part2 =
1717 	    ((seed1[comp5] >>
1718 	      make_shift(comp2, comp5,
1719 			 (comp3 >> 3) | (comp1 << 2) | 2)) & 1) ^ flip;
1720 	part1 =
1721 	    ((seed1[comp5] >>
1722 	      make_shift(comp2, comp5,
1723 			 (comp3 >> 3) | (comp1 << 2) | 0)) & 1) ^ flip;
1724 
1725 	for (byte = 0; byte < 4; byte++)
1726 		if ((get_seed2(comp5, comp4) >>
1727 		     make_shift(comp2, comp5, (byte | (comp1 << 2)))) & 1)
1728 			mask_byte |= 0xff << (8 * byte);
1729 
1730 	return (mask_bit & mask_byte) | (part1 << comp3) | (part2 <<
1731 							    (comp3 + 16));
1732 }
1733 
1734 static void
write_testing_type2(struct raminfo * info,u8 totalrank,u8 region,u8 block,char flip)1735 write_testing_type2(struct raminfo *info, u8 totalrank, u8 region, u8 block,
1736 		    char flip)
1737 {
1738 	int i;
1739 	for (i = 0; i < 2048; i++)
1740 		write32p((totalrank << 28) | (region << 25) | (block << 16) |
1741 			 (i << 2), get_etalon(flip, (block << 16) | (i << 2)));
1742 }
1743 
1744 static u8
check_testing_type2(struct raminfo * info,u8 totalrank,u8 region,u8 block,char flip)1745 check_testing_type2(struct raminfo *info, u8 totalrank, u8 region, u8 block,
1746 		    char flip)
1747 {
1748 	u8 failmask = 0;
1749 	u32 failxor[2];
1750 	int i;
1751 	int comp1, comp2, comp3;
1752 
1753 	failxor[0] = 0;
1754 	failxor[1] = 0;
1755 
1756 	enable_cache_region(totalrank << 28, 134217728);
1757 	for (comp3 = 0; comp3 < 2 && failmask != 0xff; comp3++) {
1758 		for (comp1 = 0; comp1 < 16; comp1++)
1759 			for (comp2 = 0; comp2 < 64; comp2++) {
1760 				u32 addr =
1761 				    (totalrank << 28) | (region << 25) | (block
1762 									  << 16)
1763 				    | (comp3 << 12) | (comp2 << 6) | (comp1 <<
1764 								      2);
1765 				failxor[comp1 & 1] |=
1766 				    read32p(addr) ^ get_etalon(flip, addr);
1767 			}
1768 		for (i = 0; i < 8; i++)
1769 			if ((0xff << (8 * (i % 4))) & failxor[i / 4])
1770 				failmask |= 1 << i;
1771 	}
1772 	disable_cache_region();
1773 	flush_cache((totalrank << 28) | (region << 25) | (block << 16), 16384);
1774 	return failmask;
1775 }
1776 
check_bounded(unsigned short * vals,u16 bound)1777 static int check_bounded(unsigned short *vals, u16 bound)
1778 {
1779 	int i;
1780 
1781 	for (i = 0; i < 8; i++)
1782 		if (vals[i] < bound)
1783 			return 0;
1784 	return 1;
1785 }
1786 
1787 enum state {
1788 	BEFORE_USABLE = 0, AT_USABLE = 1, AT_MARGIN = 2, COMPLETE = 3
1789 };
1790 
validate_state(enum state * in)1791 static int validate_state(enum state *in)
1792 {
1793 	int i;
1794 	for (i = 0; i < 8; i++)
1795 		if (in[i] != COMPLETE)
1796 			return 0;
1797 	return 1;
1798 }
1799 
1800 static void
do_fsm(enum state * state,u16 * counter,u8 fail_mask,int margin,int uplimit,u8 * res_low,u8 * res_high,u8 val)1801 do_fsm(enum state *state, u16 *counter,
1802 	u8 fail_mask, int margin, int uplimit,
1803 	u8 *res_low, u8 *res_high, u8 val)
1804 {
1805 	int lane;
1806 
1807 	for (lane = 0; lane < 8; lane++) {
1808 		int is_fail = (fail_mask >> lane) & 1;
1809 		switch (state[lane]) {
1810 		case BEFORE_USABLE:
1811 			if (!is_fail) {
1812 				counter[lane] = 1;
1813 				state[lane] = AT_USABLE;
1814 				break;
1815 			}
1816 			counter[lane] = 0;
1817 			state[lane] = BEFORE_USABLE;
1818 			break;
1819 		case AT_USABLE:
1820 			if (!is_fail) {
1821 				++counter[lane];
1822 				if (counter[lane] >= margin) {
1823 					state[lane] = AT_MARGIN;
1824 					res_low[lane] = val - margin + 1;
1825 					break;
1826 				}
1827 				state[lane] = 1;
1828 				break;
1829 			}
1830 			counter[lane] = 0;
1831 			state[lane] = BEFORE_USABLE;
1832 			break;
1833 		case AT_MARGIN:
1834 			if (is_fail) {
1835 				state[lane] = COMPLETE;
1836 				res_high[lane] = val - 1;
1837 			} else {
1838 				counter[lane]++;
1839 				state[lane] = AT_MARGIN;
1840 				if (val == uplimit) {
1841 					state[lane] = COMPLETE;
1842 					res_high[lane] = uplimit;
1843 				}
1844 			}
1845 			break;
1846 		case COMPLETE:
1847 			break;
1848 		}
1849 	}
1850 }
1851 
1852 static void
train_ram_at_178(struct raminfo * info,u8 channel,int slot,int rank,u8 total_rank,u8 reg_178,int first_run,int niter,timing_bounds_t * timings)1853 train_ram_at_178(struct raminfo *info, u8 channel, int slot, int rank,
1854 		 u8 total_rank, u8 reg_178, int first_run, int niter,
1855 		 timing_bounds_t * timings)
1856 {
1857 	int lane;
1858 	enum state state[8];
1859 	u16 count[8];
1860 	u8 lower_usable[8];
1861 	u8 upper_usable[8];
1862 	unsigned short num_successfully_checked[8];
1863 	u8 reg1b3;
1864 	int i;
1865 
1866 	for (i = 0; i < 8; i++)
1867 		state[i] = BEFORE_USABLE;
1868 
1869 	if (!first_run) {
1870 		int is_all_ok = 1;
1871 		for (lane = 0; lane < 8; lane++)
1872 			if (timings[reg_178][channel][slot][rank][lane].
1873 			    smallest ==
1874 			    timings[reg_178][channel][slot][rank][lane].
1875 			    largest) {
1876 				timings[reg_178][channel][slot][rank][lane].
1877 				    smallest = 0;
1878 				timings[reg_178][channel][slot][rank][lane].
1879 				    largest = 0;
1880 				is_all_ok = 0;
1881 			}
1882 		if (is_all_ok) {
1883 			for (i = 0; i < 8; i++)
1884 				state[i] = COMPLETE;
1885 		}
1886 	}
1887 
1888 	for (reg1b3 = 0; reg1b3 < 0x30 && !validate_state(state); reg1b3++) {
1889 		u8 failmask = 0;
1890 		write_1d0(reg1b3 ^ 32, 0x1b3, 6, 1);
1891 		write_1d0(reg1b3 ^ 32, 0x1a3, 6, 1);
1892 		failmask = check_testing(info, total_rank, 0);
1893 		mchbar_setbits32(0xfb0, 3 << 16);
1894 		do_fsm(state, count, failmask, 5, 47, lower_usable,
1895 		       upper_usable, reg1b3);
1896 	}
1897 
1898 	if (reg1b3) {
1899 		write_1d0(0, 0x1b3, 6, 1);
1900 		write_1d0(0, 0x1a3, 6, 1);
1901 		for (lane = 0; lane < 8; lane++) {
1902 			if (state[lane] == COMPLETE) {
1903 				timings[reg_178][channel][slot][rank][lane].
1904 				    smallest =
1905 				    lower_usable[lane] +
1906 				    (info->training.
1907 				     lane_timings[0][channel][slot][rank][lane]
1908 				     & 0x3F) - 32;
1909 				timings[reg_178][channel][slot][rank][lane].
1910 				    largest =
1911 				    upper_usable[lane] +
1912 				    (info->training.
1913 				     lane_timings[0][channel][slot][rank][lane]
1914 				     & 0x3F) - 32;
1915 			}
1916 		}
1917 	}
1918 
1919 	if (!first_run) {
1920 		for (lane = 0; lane < 8; lane++)
1921 			if (state[lane] == COMPLETE) {
1922 				write_500(info, channel,
1923 					  timings[reg_178][channel][slot][rank]
1924 					  [lane].smallest,
1925 					  get_timing_register_addr(lane, 0,
1926 								   slot, rank),
1927 					  9, 1);
1928 				write_500(info, channel,
1929 					  timings[reg_178][channel][slot][rank]
1930 					  [lane].smallest +
1931 					  info->training.
1932 					  lane_timings[1][channel][slot][rank]
1933 					  [lane]
1934 					  -
1935 					  info->training.
1936 					  lane_timings[0][channel][slot][rank]
1937 					  [lane], get_timing_register_addr(lane,
1938 									   1,
1939 									   slot,
1940 									   rank),
1941 					  9, 1);
1942 				num_successfully_checked[lane] = 0;
1943 			} else
1944 				num_successfully_checked[lane] = -1;
1945 
1946 		do {
1947 			u8 failmask = 0;
1948 			for (i = 0; i < niter; i++) {
1949 				if (failmask == 0xFF)
1950 					break;
1951 				failmask |=
1952 				    check_testing_type2(info, total_rank, 2, i,
1953 							0);
1954 				failmask |=
1955 				    check_testing_type2(info, total_rank, 3, i,
1956 							1);
1957 			}
1958 			mchbar_setbits32(0xfb0, 3 << 16);
1959 			for (lane = 0; lane < 8; lane++)
1960 				if (num_successfully_checked[lane] != 0xffff) {
1961 					if ((1 << lane) & failmask) {
1962 						if (timings[reg_178][channel]
1963 						    [slot][rank][lane].
1964 						    largest <=
1965 						    timings[reg_178][channel]
1966 						    [slot][rank][lane].smallest)
1967 							num_successfully_checked
1968 							    [lane] = -1;
1969 						else {
1970 							num_successfully_checked
1971 							    [lane] = 0;
1972 							timings[reg_178]
1973 							    [channel][slot]
1974 							    [rank][lane].
1975 							    smallest++;
1976 							write_500(info, channel,
1977 								  timings
1978 								  [reg_178]
1979 								  [channel]
1980 								  [slot][rank]
1981 								  [lane].
1982 								  smallest,
1983 								  get_timing_register_addr
1984 								  (lane, 0,
1985 								   slot, rank),
1986 								  9, 1);
1987 							write_500(info, channel,
1988 								  timings
1989 								  [reg_178]
1990 								  [channel]
1991 								  [slot][rank]
1992 								  [lane].
1993 								  smallest +
1994 								  info->
1995 								  training.
1996 								  lane_timings
1997 								  [1][channel]
1998 								  [slot][rank]
1999 								  [lane]
2000 								  -
2001 								  info->
2002 								  training.
2003 								  lane_timings
2004 								  [0][channel]
2005 								  [slot][rank]
2006 								  [lane],
2007 								  get_timing_register_addr
2008 								  (lane, 1,
2009 								   slot, rank),
2010 								  9, 1);
2011 						}
2012 					} else
2013 						num_successfully_checked[lane]
2014 							++;
2015 				}
2016 		}
2017 		while (!check_bounded(num_successfully_checked, 2))
2018 			;
2019 
2020 		for (lane = 0; lane < 8; lane++)
2021 			if (state[lane] == COMPLETE) {
2022 				write_500(info, channel,
2023 					  timings[reg_178][channel][slot][rank]
2024 					  [lane].largest,
2025 					  get_timing_register_addr(lane, 0,
2026 								   slot, rank),
2027 					  9, 1);
2028 				write_500(info, channel,
2029 					  timings[reg_178][channel][slot][rank]
2030 					  [lane].largest +
2031 					  info->training.
2032 					  lane_timings[1][channel][slot][rank]
2033 					  [lane]
2034 					  -
2035 					  info->training.
2036 					  lane_timings[0][channel][slot][rank]
2037 					  [lane], get_timing_register_addr(lane,
2038 									   1,
2039 									   slot,
2040 									   rank),
2041 					  9, 1);
2042 				num_successfully_checked[lane] = 0;
2043 			} else
2044 				num_successfully_checked[lane] = -1;
2045 
2046 		do {
2047 			int failmask = 0;
2048 			for (i = 0; i < niter; i++) {
2049 				if (failmask == 0xFF)
2050 					break;
2051 				failmask |=
2052 				    check_testing_type2(info, total_rank, 2, i,
2053 							0);
2054 				failmask |=
2055 				    check_testing_type2(info, total_rank, 3, i,
2056 							1);
2057 			}
2058 
2059 			mchbar_setbits32(0xfb0, 3 << 16);
2060 			for (lane = 0; lane < 8; lane++) {
2061 				if (num_successfully_checked[lane] != 0xffff) {
2062 					if ((1 << lane) & failmask) {
2063 						if (timings[reg_178][channel]
2064 						    [slot][rank][lane].
2065 						    largest <=
2066 						    timings[reg_178][channel]
2067 						    [slot][rank][lane].
2068 						    smallest) {
2069 							num_successfully_checked
2070 							    [lane] = -1;
2071 						} else {
2072 							num_successfully_checked
2073 							    [lane] = 0;
2074 							timings[reg_178]
2075 							    [channel][slot]
2076 							    [rank][lane].
2077 							    largest--;
2078 							write_500(info, channel,
2079 								  timings
2080 								  [reg_178]
2081 								  [channel]
2082 								  [slot][rank]
2083 								  [lane].
2084 								  largest,
2085 								  get_timing_register_addr
2086 								  (lane, 0,
2087 								   slot, rank),
2088 								  9, 1);
2089 							write_500(info, channel,
2090 								  timings
2091 								  [reg_178]
2092 								  [channel]
2093 								  [slot][rank]
2094 								  [lane].
2095 								  largest +
2096 								  info->
2097 								  training.
2098 								  lane_timings
2099 								  [1][channel]
2100 								  [slot][rank]
2101 								  [lane]
2102 								  -
2103 								  info->
2104 								  training.
2105 								  lane_timings
2106 								  [0][channel]
2107 								  [slot][rank]
2108 								  [lane],
2109 								  get_timing_register_addr
2110 								  (lane, 1,
2111 								   slot, rank),
2112 								  9, 1);
2113 						}
2114 					} else
2115 						num_successfully_checked[lane]
2116 							++;
2117 				}
2118 			}
2119 		}
2120 		while (!check_bounded(num_successfully_checked, 3))
2121 			;
2122 
2123 		for (lane = 0; lane < 8; lane++) {
2124 			write_500(info, channel,
2125 				  info->training.
2126 				  lane_timings[0][channel][slot][rank][lane],
2127 				  get_timing_register_addr(lane, 0, slot, rank),
2128 				  9, 1);
2129 			write_500(info, channel,
2130 				  info->training.
2131 				  lane_timings[1][channel][slot][rank][lane],
2132 				  get_timing_register_addr(lane, 1, slot, rank),
2133 				  9, 1);
2134 			if (timings[reg_178][channel][slot][rank][lane].
2135 			    largest <=
2136 			    timings[reg_178][channel][slot][rank][lane].
2137 			    smallest) {
2138 				timings[reg_178][channel][slot][rank][lane].
2139 				    largest = 0;
2140 				timings[reg_178][channel][slot][rank][lane].
2141 				    smallest = 0;
2142 			}
2143 		}
2144 	}
2145 }
2146 
set_10b(struct raminfo * info,u8 val)2147 static void set_10b(struct raminfo *info, u8 val)
2148 {
2149 	int channel;
2150 	int slot, rank;
2151 	int lane;
2152 
2153 	if (read_1d0(0x10b, 6) == val)
2154 		return;
2155 
2156 	write_1d0(val, 0x10b, 6, 1);
2157 
2158 	FOR_POPULATED_RANKS_BACKWARDS for (lane = 0; lane < 9; lane++) {
2159 		u16 reg_500;
2160 		reg_500 = read_500(info, channel,
2161 				   get_timing_register_addr(lane, 0, slot,
2162 							    rank), 9);
2163 		if (val == 1) {
2164 			if (lut16[info->clock_speed_index] <= reg_500)
2165 				reg_500 -= lut16[info->clock_speed_index];
2166 			else
2167 				reg_500 = 0;
2168 		} else {
2169 			reg_500 += lut16[info->clock_speed_index];
2170 		}
2171 		write_500(info, channel, reg_500,
2172 			  get_timing_register_addr(lane, 0, slot, rank), 9, 1);
2173 	}
2174 }
2175 
set_ecc(int onoff)2176 static void set_ecc(int onoff)
2177 {
2178 	int channel;
2179 	for (channel = 0; channel < NUM_CHANNELS; channel++) {
2180 		u8 t;
2181 		t = mchbar_read8((channel << 10) + 0x5f8);
2182 		if (onoff)
2183 			t |= 1;
2184 		else
2185 			t &= ~1;
2186 		mchbar_write8((channel << 10) + 0x5f8, t);
2187 	}
2188 }
2189 
set_178(u8 val)2190 static void set_178(u8 val)
2191 {
2192 	if (val >= 31)
2193 		val = val - 31;
2194 	else
2195 		val = 63 - val;
2196 
2197 	write_1d0(2 * val, 0x178, 7, 1);
2198 }
2199 
2200 static void
write_500_timings_type(struct raminfo * info,int channel,int slot,int rank,int type)2201 write_500_timings_type(struct raminfo *info, int channel, int slot, int rank,
2202 		       int type)
2203 {
2204 	int lane;
2205 
2206 	for (lane = 0; lane < 8; lane++)
2207 		write_500(info, channel,
2208 			  info->training.
2209 			  lane_timings[type][channel][slot][rank][lane],
2210 			  get_timing_register_addr(lane, type, slot, rank), 9,
2211 			  0);
2212 }
2213 
2214 static void
try_timing_offsets(struct raminfo * info,int channel,int slot,int rank,int totalrank)2215 try_timing_offsets(struct raminfo *info, int channel,
2216 		   int slot, int rank, int totalrank)
2217 {
2218 	u16 count[8];
2219 	enum state state[8];
2220 	u8 lower_usable[8], upper_usable[8];
2221 	int lane;
2222 	int i;
2223 	int flip = 1;
2224 	int timing_offset;
2225 
2226 	for (i = 0; i < 8; i++)
2227 		state[i] = BEFORE_USABLE;
2228 
2229 	memset(count, 0, sizeof(count));
2230 
2231 	for (lane = 0; lane < 8; lane++)
2232 		write_500(info, channel,
2233 			  info->training.
2234 			  lane_timings[2][channel][slot][rank][lane] + 32,
2235 			  get_timing_register_addr(lane, 3, slot, rank), 9, 1);
2236 
2237 	for (timing_offset = 0; !validate_state(state) && timing_offset < 64;
2238 	     timing_offset++) {
2239 		u8 failmask;
2240 		write_1d0(timing_offset ^ 32, 0x1bb, 6, 1);
2241 		failmask = 0;
2242 		for (i = 0; i < 2 && failmask != 0xff; i++) {
2243 			flip = !flip;
2244 			write_testing(info, totalrank, flip);
2245 			failmask |= check_testing(info, totalrank, flip);
2246 		}
2247 		do_fsm(state, count, failmask, 10, 63, lower_usable,
2248 		       upper_usable, timing_offset);
2249 	}
2250 	write_1d0(0, 0x1bb, 6, 1);
2251 	dump_timings(info);
2252 	if (!validate_state(state))
2253 		die("Couldn't discover DRAM timings (1)\n");
2254 
2255 	for (lane = 0; lane < 8; lane++) {
2256 		u8 bias = 0;
2257 
2258 		if (info->silicon_revision) {
2259 			int usable_length;
2260 
2261 			usable_length = upper_usable[lane] - lower_usable[lane];
2262 			if (usable_length >= 20) {
2263 				bias = usable_length / 2 - 10;
2264 				if (bias >= 2)
2265 					bias = 2;
2266 			}
2267 		}
2268 		write_500(info, channel,
2269 			  info->training.
2270 			  lane_timings[2][channel][slot][rank][lane] +
2271 			  (upper_usable[lane] + lower_usable[lane]) / 2 - bias,
2272 			  get_timing_register_addr(lane, 3, slot, rank), 9, 1);
2273 		info->training.timing2_bounds[channel][slot][rank][lane][0] =
2274 		    info->training.lane_timings[2][channel][slot][rank][lane] +
2275 		    lower_usable[lane];
2276 		info->training.timing2_bounds[channel][slot][rank][lane][1] =
2277 		    info->training.lane_timings[2][channel][slot][rank][lane] +
2278 		    upper_usable[lane];
2279 		info->training.timing2_offset[channel][slot][rank][lane] =
2280 		    info->training.lane_timings[2][channel][slot][rank][lane];
2281 	}
2282 }
2283 
2284 static u8
choose_training(struct raminfo * info,int channel,int slot,int rank,int lane,timing_bounds_t * timings,u8 center_178)2285 choose_training(struct raminfo *info, int channel, int slot, int rank,
2286 		int lane, timing_bounds_t * timings, u8 center_178)
2287 {
2288 	u16 central_weight;
2289 	u16 side_weight;
2290 	unsigned int sum = 0, count = 0;
2291 	u8 span;
2292 	u8 lower_margin, upper_margin;
2293 	u8 reg_178;
2294 	u8 result;
2295 
2296 	span = 12;
2297 	central_weight = 20;
2298 	side_weight = 20;
2299 	if (info->silicon_revision == 1 && channel == 1) {
2300 		central_weight = 5;
2301 		side_weight = 20;
2302 		if ((info->
2303 		     populated_ranks_mask[1] ^ (info->
2304 						populated_ranks_mask[1] >> 2)) &
2305 		    1)
2306 			span = 18;
2307 	}
2308 	if ((info->populated_ranks_mask[0] & 5) == 5) {
2309 		central_weight = 20;
2310 		side_weight = 20;
2311 	}
2312 	if (info->clock_speed_index >= 2
2313 	    && (info->populated_ranks_mask[0] & 5) == 5 && slot == 1) {
2314 		if (info->silicon_revision == 1) {
2315 			switch (channel) {
2316 			case 0:
2317 				if (lane == 1) {
2318 					central_weight = 10;
2319 					side_weight = 20;
2320 				}
2321 				break;
2322 			case 1:
2323 				if (lane == 6) {
2324 					side_weight = 5;
2325 					central_weight = 20;
2326 				}
2327 				break;
2328 			}
2329 		}
2330 		if (info->silicon_revision == 0 && channel == 0 && lane == 0) {
2331 			side_weight = 5;
2332 			central_weight = 20;
2333 		}
2334 	}
2335 	for (reg_178 = center_178 - span; reg_178 <= center_178 + span;
2336 	     reg_178 += span) {
2337 		u8 smallest;
2338 		u8 largest;
2339 		largest = timings[reg_178][channel][slot][rank][lane].largest;
2340 		smallest = timings[reg_178][channel][slot][rank][lane].smallest;
2341 		if (largest - smallest + 1 >= 5) {
2342 			unsigned int weight;
2343 			if (reg_178 == center_178)
2344 				weight = central_weight;
2345 			else
2346 				weight = side_weight;
2347 			sum += weight * (largest + smallest);
2348 			count += weight;
2349 		}
2350 	}
2351 	dump_timings(info);
2352 	if (count == 0)
2353 		die("Couldn't discover DRAM timings (2)\n");
2354 	result = sum / (2 * count);
2355 	lower_margin =
2356 	    result - timings[center_178][channel][slot][rank][lane].smallest;
2357 	upper_margin =
2358 	    timings[center_178][channel][slot][rank][lane].largest - result;
2359 	if (upper_margin < 10 && lower_margin > 10)
2360 		result -= MIN(lower_margin - 10, 10 - upper_margin);
2361 	if (upper_margin > 10 && lower_margin < 10)
2362 		result += MIN(upper_margin - 10, 10 - lower_margin);
2363 	return result;
2364 }
2365 
2366 #define STANDARD_MIN_MARGIN 5
2367 
choose_reg178(struct raminfo * info,timing_bounds_t * timings)2368 static u8 choose_reg178(struct raminfo *info, timing_bounds_t * timings)
2369 {
2370 	u16 margin[64];
2371 	int lane, rank, slot, channel;
2372 	u8 reg178;
2373 	int count = 0, sum = 0;
2374 
2375 	for (reg178 = reg178_min[info->clock_speed_index];
2376 	     reg178 < reg178_max[info->clock_speed_index];
2377 	     reg178 += reg178_step[info->clock_speed_index]) {
2378 		margin[reg178] = -1;
2379 		FOR_POPULATED_RANKS_BACKWARDS for (lane = 0; lane < 8; lane++) {
2380 			int curmargin =
2381 			    timings[reg178][channel][slot][rank][lane].largest -
2382 			    timings[reg178][channel][slot][rank][lane].
2383 			    smallest + 1;
2384 			if (curmargin < margin[reg178])
2385 				margin[reg178] = curmargin;
2386 		}
2387 		if (margin[reg178] >= STANDARD_MIN_MARGIN) {
2388 			u16 weight;
2389 			weight = margin[reg178] - STANDARD_MIN_MARGIN;
2390 			sum += weight * reg178;
2391 			count += weight;
2392 		}
2393 	}
2394 	dump_timings(info);
2395 	if (count == 0)
2396 		die("Couldn't discover DRAM timings (3)\n");
2397 
2398 	u8 threshold;
2399 
2400 	for (threshold = 30; threshold >= 5; threshold--) {
2401 		int usable_length = 0;
2402 		int smallest_fount = 0;
2403 		for (reg178 = reg178_min[info->clock_speed_index];
2404 		     reg178 < reg178_max[info->clock_speed_index];
2405 		     reg178 += reg178_step[info->clock_speed_index])
2406 			if (margin[reg178] >= threshold) {
2407 				usable_length +=
2408 				    reg178_step[info->clock_speed_index];
2409 				info->training.reg178_largest =
2410 				    reg178 -
2411 				    2 * reg178_step[info->clock_speed_index];
2412 
2413 				if (!smallest_fount) {
2414 					smallest_fount = 1;
2415 					info->training.reg178_smallest =
2416 					    reg178 +
2417 					    reg178_step[info->
2418 							clock_speed_index];
2419 				}
2420 			}
2421 		if (usable_length >= 0x21)
2422 			break;
2423 	}
2424 
2425 	return sum / count;
2426 }
2427 
check_cached_sanity(struct raminfo * info)2428 static int check_cached_sanity(struct raminfo *info)
2429 {
2430 	int lane;
2431 	int slot, rank;
2432 	int channel;
2433 
2434 	if (!info->cached_training)
2435 		return 0;
2436 
2437 	for (channel = 0; channel < NUM_CHANNELS; channel++)
2438 		for (slot = 0; slot < NUM_SLOTS; slot++)
2439 			for (rank = 0; rank < NUM_RANKS; rank++)
2440 				for (lane = 0; lane < 8 + info->use_ecc; lane++) {
2441 					u16 cached_value, estimation_value;
2442 					cached_value =
2443 					    info->cached_training->
2444 					    lane_timings[1][channel][slot][rank]
2445 					    [lane];
2446 					if (cached_value >= 0x18
2447 					    && cached_value <= 0x1E7) {
2448 						estimation_value =
2449 						    info->training.
2450 						    lane_timings[1][channel]
2451 						    [slot][rank][lane];
2452 						if (estimation_value <
2453 						    cached_value - 24)
2454 							return 0;
2455 						if (estimation_value >
2456 						    cached_value + 24)
2457 							return 0;
2458 					}
2459 				}
2460 	return 1;
2461 }
2462 
try_cached_training(struct raminfo * info)2463 static int try_cached_training(struct raminfo *info)
2464 {
2465 	u8 saved_243[2];
2466 	u8 tm;
2467 
2468 	int channel, slot, rank, lane;
2469 	int flip = 1;
2470 	int i, j;
2471 
2472 	if (!check_cached_sanity(info))
2473 		return 0;
2474 
2475 	info->training.reg178_center = info->cached_training->reg178_center;
2476 	info->training.reg178_smallest = info->cached_training->reg178_smallest;
2477 	info->training.reg178_largest = info->cached_training->reg178_largest;
2478 	memcpy(&info->training.timing_bounds,
2479 	       &info->cached_training->timing_bounds,
2480 	       sizeof(info->training.timing_bounds));
2481 	memcpy(&info->training.timing_offset,
2482 	       &info->cached_training->timing_offset,
2483 	       sizeof(info->training.timing_offset));
2484 
2485 	write_1d0(2, 0x142, 3, 1);
2486 	saved_243[0] = mchbar_read8(0x243);
2487 	saved_243[1] = mchbar_read8(0x643);
2488 	mchbar_write8(0x243, saved_243[0] | 2);
2489 	mchbar_write8(0x643, saved_243[1] | 2);
2490 	set_ecc(0);
2491 	pci_write_config16(NORTHBRIDGE, 0xc8, 3);
2492 	if (read_1d0(0x10b, 6) & 1)
2493 		set_10b(info, 0);
2494 	for (tm = 0; tm < 2; tm++) {
2495 		int totalrank;
2496 
2497 		set_178(tm ? info->cached_training->reg178_largest : info->
2498 			cached_training->reg178_smallest);
2499 
2500 		totalrank = 0;
2501 		/* Check timing ranges. With i == 0 we check smallest one and with
2502 		   i == 1 the largest bound. With j == 0 we check that on the bound
2503 		   it still works whereas with j == 1 we check that just outside of
2504 		   bound we fail.
2505 		 */
2506 		FOR_POPULATED_RANKS_BACKWARDS {
2507 			for (i = 0; i < 2; i++) {
2508 				for (lane = 0; lane < 8; lane++) {
2509 					write_500(info, channel,
2510 						  info->cached_training->
2511 						  timing2_bounds[channel][slot]
2512 						  [rank][lane][i],
2513 						  get_timing_register_addr(lane,
2514 									   3,
2515 									   slot,
2516 									   rank),
2517 						  9, 1);
2518 
2519 					if (!i)
2520 						write_500(info, channel,
2521 							  info->
2522 							  cached_training->
2523 							  timing2_offset
2524 							  [channel][slot][rank]
2525 							  [lane],
2526 							  get_timing_register_addr
2527 							  (lane, 2, slot, rank),
2528 							  9, 1);
2529 					write_500(info, channel,
2530 						  i ? info->cached_training->
2531 						  timing_bounds[tm][channel]
2532 						  [slot][rank][lane].
2533 						  largest : info->
2534 						  cached_training->
2535 						  timing_bounds[tm][channel]
2536 						  [slot][rank][lane].smallest,
2537 						  get_timing_register_addr(lane,
2538 									   0,
2539 									   slot,
2540 									   rank),
2541 						  9, 1);
2542 					write_500(info, channel,
2543 						  info->cached_training->
2544 						  timing_offset[channel][slot]
2545 						  [rank][lane] +
2546 						  (i ? info->cached_training->
2547 						   timing_bounds[tm][channel]
2548 						   [slot][rank][lane].
2549 						   largest : info->
2550 						   cached_training->
2551 						   timing_bounds[tm][channel]
2552 						   [slot][rank][lane].
2553 						   smallest) - 64,
2554 						  get_timing_register_addr(lane,
2555 									   1,
2556 									   slot,
2557 									   rank),
2558 						  9, 1);
2559 				}
2560 				for (j = 0; j < 2; j++) {
2561 					u8 failmask;
2562 					u8 expected_failmask;
2563 					char reg1b3;
2564 
2565 					reg1b3 = (j == 1) + 4;
2566 					reg1b3 =
2567 					    j == i ? reg1b3 : (-reg1b3) & 0x3f;
2568 					write_1d0(reg1b3, 0x1bb, 6, 1);
2569 					write_1d0(reg1b3, 0x1b3, 6, 1);
2570 					write_1d0(reg1b3, 0x1a3, 6, 1);
2571 
2572 					flip = !flip;
2573 					write_testing(info, totalrank, flip);
2574 					failmask =
2575 					    check_testing(info, totalrank,
2576 							  flip);
2577 					expected_failmask =
2578 					    j == 0 ? 0x00 : 0xff;
2579 					if (failmask != expected_failmask)
2580 						goto fail;
2581 				}
2582 			}
2583 			totalrank++;
2584 		}
2585 	}
2586 
2587 	set_178(info->cached_training->reg178_center);
2588 	if (info->use_ecc)
2589 		set_ecc(1);
2590 	write_training_data(info);
2591 	write_1d0(0, 322, 3, 1);
2592 	info->training = *info->cached_training;
2593 
2594 	write_1d0(0, 0x1bb, 6, 1);
2595 	write_1d0(0, 0x1b3, 6, 1);
2596 	write_1d0(0, 0x1a3, 6, 1);
2597 	mchbar_write8(0x243, saved_243[0]);
2598 	mchbar_write8(0x643, saved_243[1]);
2599 
2600 	return 1;
2601 
2602 fail:
2603 	FOR_POPULATED_RANKS {
2604 		write_500_timings_type(info, channel, slot, rank, 1);
2605 		write_500_timings_type(info, channel, slot, rank, 2);
2606 		write_500_timings_type(info, channel, slot, rank, 3);
2607 	}
2608 
2609 	write_1d0(0, 0x1bb, 6, 1);
2610 	write_1d0(0, 0x1b3, 6, 1);
2611 	write_1d0(0, 0x1a3, 6, 1);
2612 	mchbar_write8(0x243, saved_243[0]);
2613 	mchbar_write8(0x643, saved_243[1]);
2614 
2615 	return 0;
2616 }
2617 
do_ram_training(struct raminfo * info)2618 static void do_ram_training(struct raminfo *info)
2619 {
2620 	u8 saved_243[2];
2621 	int totalrank = 0;
2622 	u8 reg_178;
2623 	int niter;
2624 
2625 	timing_bounds_t *timings = timings_car;
2626 	int lane, rank, slot, channel;
2627 	u8 reg178_center;
2628 
2629 	write_1d0(2, 0x142, 3, 1);
2630 	saved_243[0] = mchbar_read8(0x243);
2631 	saved_243[1] = mchbar_read8(0x643);
2632 	mchbar_write8(0x243, saved_243[0] | 2);
2633 	mchbar_write8(0x643, saved_243[1] | 2);
2634 	switch (info->clock_speed_index) {
2635 	case 0:
2636 		niter = 5;
2637 		break;
2638 	case 1:
2639 		niter = 10;
2640 		break;
2641 	default:
2642 		niter = 19;
2643 		break;
2644 	}
2645 	set_ecc(0);
2646 
2647 	FOR_POPULATED_RANKS_BACKWARDS {
2648 		int i;
2649 
2650 		write_500_timings_type(info, channel, slot, rank, 0);
2651 
2652 		write_testing(info, totalrank, 0);
2653 		for (i = 0; i < niter; i++) {
2654 			write_testing_type2(info, totalrank, 2, i, 0);
2655 			write_testing_type2(info, totalrank, 3, i, 1);
2656 		}
2657 		pci_write_config8(NORTHBRIDGE, 0xc0, 0x01);
2658 		totalrank++;
2659 	}
2660 
2661 	if (reg178_min[info->clock_speed_index] <
2662 	    reg178_max[info->clock_speed_index])
2663 		memset(timings[reg178_min[info->clock_speed_index]], 0,
2664 		       sizeof(timings[0]) *
2665 		       (reg178_max[info->clock_speed_index] -
2666 			reg178_min[info->clock_speed_index]));
2667 	for (reg_178 = reg178_min[info->clock_speed_index];
2668 	     reg_178 < reg178_max[info->clock_speed_index];
2669 	     reg_178 += reg178_step[info->clock_speed_index]) {
2670 		totalrank = 0;
2671 		set_178(reg_178);
2672 		for (channel = NUM_CHANNELS - 1; channel >= 0; channel--)
2673 			for (slot = 0; slot < NUM_SLOTS; slot++)
2674 				for (rank = 0; rank < NUM_RANKS; rank++) {
2675 					memset(&timings[reg_178][channel][slot]
2676 					       [rank][0].smallest, 0, 16);
2677 					if (info->
2678 					    populated_ranks[channel][slot]
2679 					    [rank]) {
2680 						train_ram_at_178(info, channel,
2681 								 slot, rank,
2682 								 totalrank,
2683 								 reg_178, 1,
2684 								 niter,
2685 								 timings);
2686 						totalrank++;
2687 					}
2688 				}
2689 	}
2690 
2691 	reg178_center = choose_reg178(info, timings);
2692 
2693 	FOR_POPULATED_RANKS_BACKWARDS for (lane = 0; lane < 8; lane++) {
2694 		info->training.timing_bounds[0][channel][slot][rank][lane].
2695 		    smallest =
2696 		    timings[info->training.
2697 			    reg178_smallest][channel][slot][rank][lane].
2698 		    smallest;
2699 		info->training.timing_bounds[0][channel][slot][rank][lane].
2700 		    largest =
2701 		    timings[info->training.
2702 			    reg178_smallest][channel][slot][rank][lane].largest;
2703 		info->training.timing_bounds[1][channel][slot][rank][lane].
2704 		    smallest =
2705 		    timings[info->training.
2706 			    reg178_largest][channel][slot][rank][lane].smallest;
2707 		info->training.timing_bounds[1][channel][slot][rank][lane].
2708 		    largest =
2709 		    timings[info->training.
2710 			    reg178_largest][channel][slot][rank][lane].largest;
2711 		info->training.timing_offset[channel][slot][rank][lane] =
2712 		    info->training.lane_timings[1][channel][slot][rank][lane]
2713 		    -
2714 		    info->training.lane_timings[0][channel][slot][rank][lane] +
2715 		    64;
2716 	}
2717 
2718 	if (info->silicon_revision == 1
2719 	    && (info->
2720 		populated_ranks_mask[1] ^ (info->
2721 					   populated_ranks_mask[1] >> 2)) & 1) {
2722 		int ranks_after_channel1;
2723 
2724 		totalrank = 0;
2725 		for (reg_178 = reg178_center - 18;
2726 		     reg_178 <= reg178_center + 18; reg_178 += 18) {
2727 			totalrank = 0;
2728 			set_178(reg_178);
2729 			for (slot = 0; slot < NUM_SLOTS; slot++)
2730 				for (rank = 0; rank < NUM_RANKS; rank++) {
2731 					if (info->
2732 					    populated_ranks[1][slot][rank]) {
2733 						train_ram_at_178(info, 1, slot,
2734 								 rank,
2735 								 totalrank,
2736 								 reg_178, 0,
2737 								 niter,
2738 								 timings);
2739 						totalrank++;
2740 					}
2741 				}
2742 		}
2743 		ranks_after_channel1 = totalrank;
2744 
2745 		for (reg_178 = reg178_center - 12;
2746 		     reg_178 <= reg178_center + 12; reg_178 += 12) {
2747 			totalrank = ranks_after_channel1;
2748 			set_178(reg_178);
2749 			for (slot = 0; slot < NUM_SLOTS; slot++)
2750 				for (rank = 0; rank < NUM_RANKS; rank++)
2751 					if (info->
2752 					    populated_ranks[0][slot][rank]) {
2753 						train_ram_at_178(info, 0, slot,
2754 								 rank,
2755 								 totalrank,
2756 								 reg_178, 0,
2757 								 niter,
2758 								 timings);
2759 						totalrank++;
2760 					}
2761 		}
2762 	} else {
2763 		for (reg_178 = reg178_center - 12;
2764 		     reg_178 <= reg178_center + 12; reg_178 += 12) {
2765 			totalrank = 0;
2766 			set_178(reg_178);
2767 			FOR_POPULATED_RANKS_BACKWARDS {
2768 				train_ram_at_178(info, channel, slot, rank,
2769 						 totalrank, reg_178, 0, niter,
2770 						 timings);
2771 				totalrank++;
2772 			}
2773 		}
2774 	}
2775 
2776 	set_178(reg178_center);
2777 	FOR_POPULATED_RANKS_BACKWARDS for (lane = 0; lane < 8; lane++) {
2778 		u16 tm0;
2779 
2780 		tm0 =
2781 		    choose_training(info, channel, slot, rank, lane, timings,
2782 				    reg178_center);
2783 		write_500(info, channel, tm0,
2784 			  get_timing_register_addr(lane, 0, slot, rank), 9, 1);
2785 		write_500(info, channel,
2786 			  tm0 +
2787 			  info->training.
2788 			  lane_timings[1][channel][slot][rank][lane] -
2789 			  info->training.
2790 			  lane_timings[0][channel][slot][rank][lane],
2791 			  get_timing_register_addr(lane, 1, slot, rank), 9, 1);
2792 	}
2793 
2794 	totalrank = 0;
2795 	FOR_POPULATED_RANKS_BACKWARDS {
2796 		try_timing_offsets(info, channel, slot, rank, totalrank);
2797 		totalrank++;
2798 	}
2799 	mchbar_write8(0x243, saved_243[0]);
2800 	mchbar_write8(0x643, saved_243[1]);
2801 	write_1d0(0, 0x142, 3, 1);
2802 	info->training.reg178_center = reg178_center;
2803 }
2804 
ram_training(struct raminfo * info)2805 static void ram_training(struct raminfo *info)
2806 {
2807 	u16 saved_fc4;
2808 
2809 	saved_fc4 = mchbar_read16(0xfc4);
2810 	mchbar_write16(0xfc4, 0xffff);
2811 
2812 	if (info->revision >= 8)
2813 		read_4090(info);
2814 
2815 	if (!try_cached_training(info))
2816 		do_ram_training(info);
2817 	if ((info->silicon_revision == 2 || info->silicon_revision == 3)
2818 	    && info->clock_speed_index < 2)
2819 		set_10b(info, 1);
2820 	mchbar_write16(0xfc4, saved_fc4);
2821 }
2822 
get_max_timing(struct raminfo * info,int channel)2823 u16 get_max_timing(struct raminfo *info, int channel)
2824 {
2825 	int slot, rank, lane;
2826 	u16 ret = 0;
2827 
2828 	if ((mchbar_read8(0x2ca8) >> 2) < 1)
2829 		return 384;
2830 
2831 	if (info->revision < 8)
2832 		return 256;
2833 
2834 	for (slot = 0; slot < NUM_SLOTS; slot++)
2835 		for (rank = 0; rank < NUM_RANKS; rank++)
2836 			if (info->populated_ranks[channel][slot][rank])
2837 				for (lane = 0; lane < 8 + info->use_ecc; lane++)
2838 					ret = MAX(ret, read_500(info, channel,
2839 								get_timing_register_addr
2840 								(lane, 0, slot,
2841 								 rank), 9));
2842 	return ret;
2843 }
2844 
dmi_setup(void)2845 static void dmi_setup(void)
2846 {
2847 	gav(dmibar_read8(0x254));
2848 	dmibar_write8(0x254, 1 << 0);
2849 	dmibar_write16(0x1b8, 0x18f2);
2850 	mchbar_clrsetbits16(0x48, ~0, 1 << 1);
2851 
2852 	dmibar_setbits32(0xd68, 1 << 27);
2853 
2854 	outl((gav(inl(DEFAULT_GPIOBASE | 0x38)) & ~0x140000) | 0x400000,
2855 	     DEFAULT_GPIOBASE | 0x38);
2856 	gav(inb(DEFAULT_GPIOBASE | 0xe));	// = 0xfdcaff6e
2857 }
2858 
chipset_init(const int s3resume)2859 void chipset_init(const int s3resume)
2860 {
2861 	u8 x2ca8;
2862 	u16 ggc;
2863 	u8 gfxsize;
2864 
2865 	x2ca8 = mchbar_read8(0x2ca8);
2866 	if ((x2ca8 & 1) || (x2ca8 == 8 && !s3resume)) {
2867 		printk(BIOS_DEBUG, "soft reset detected, rebooting properly\n");
2868 		mchbar_write8(0x2ca8, 0);
2869 		system_reset();
2870 	}
2871 
2872 	dmi_setup();
2873 
2874 	mchbar_write16(0x1170, 0xa880);
2875 	mchbar_write8(0x11c1, 1 << 0);
2876 	mchbar_write16(0x1170, 0xb880);
2877 	mchbar_clrsetbits8(0x1210, ~0, 0x84);
2878 
2879 	gfxsize = get_uint_option("gfx_uma_size", 0);	/* 0 for 32MB */
2880 
2881 	ggc = 0xb00 | ((gfxsize + 5) << 4);
2882 
2883 	pci_write_config16(NORTHBRIDGE, GGC, ggc | 2);
2884 
2885 	u16 deven;
2886 	deven = pci_read_config16(NORTHBRIDGE, DEVEN);	// = 0x3
2887 
2888 	if (deven & 8) {
2889 		mchbar_write8(0x2c30, 1 << 5);
2890 		pci_read_config8(NORTHBRIDGE, 0x8);	// = 0x18
2891 		mchbar_setbits16(0x2c30, 1 << 9);
2892 		mchbar_write16(0x2c32, 0x434);
2893 		mchbar_clrsetbits32(0x2c44, ~0, 0x1053687);
2894 		pci_read_config8(GMA, MSAC);	// = 0x2
2895 		pci_write_config8(GMA, MSAC, 0x2);
2896 		RCBA8(0x2318);
2897 		RCBA8(0x2318) = 0x47;
2898 		RCBA8(0x2320);
2899 		RCBA8(0x2320) = 0xfc;
2900 	}
2901 
2902 	mchbar_clrsetbits32(0x30, ~0, 0x40);
2903 
2904 	pci_write_config16(NORTHBRIDGE, GGC, ggc);
2905 	gav(RCBA32(0x3428));
2906 	RCBA32(0x3428) = 0x1d;
2907 }
2908 
get_bits_420(const u32 reg32)2909 static u8 get_bits_420(const u32 reg32)
2910 {
2911 	u8 val = 0;
2912 	val |= (reg32 >> 4) & (1 << 0);
2913 	val |= (reg32 >> 2) & (1 << 1);
2914 	val |= (reg32 >> 0) & (1 << 2);
2915 	return val;
2916 }
2917 
raminit(const int s3resume,const u8 * spd_addrmap)2918 void raminit(const int s3resume, const u8 *spd_addrmap)
2919 {
2920 	unsigned int channel, slot, lane, rank;
2921 	struct raminfo info;
2922 	u8 x2ca8;
2923 	int cbmem_wasnot_inited;
2924 
2925 	x2ca8 = mchbar_read8(0x2ca8);
2926 
2927 	printk(RAM_DEBUG, "Scratchpad MCHBAR8(0x2ca8): 0x%04x\n", x2ca8);
2928 
2929 	memset(&info, 0x5a, sizeof(info));
2930 
2931 	info.last_500_command[0] = 0;
2932 	info.last_500_command[1] = 0;
2933 
2934 	info.board_lane_delay[0] = 0x14;
2935 	info.board_lane_delay[1] = 0x07;
2936 	info.board_lane_delay[2] = 0x07;
2937 	info.board_lane_delay[3] = 0x08;
2938 	info.board_lane_delay[4] = 0x56;
2939 	info.board_lane_delay[5] = 0x04;
2940 	info.board_lane_delay[6] = 0x04;
2941 	info.board_lane_delay[7] = 0x05;
2942 	info.board_lane_delay[8] = 0x10;
2943 
2944 	info.training.reg_178 = 0;
2945 	info.training.reg_10b = 0;
2946 
2947 	/* Wait for some bit, maybe TXT clear. */
2948 	while (!(read8((u8 *)0xfed40000) & (1 << 7)))
2949 		;
2950 
2951 	/* Wait for ME to be ready */
2952 	intel_early_me_init();
2953 	info.memory_reserved_for_heci_mb = intel_early_me_uma_size();
2954 
2955 	/* before SPD */
2956 	timestamp_add_now(101);
2957 
2958 	if (!s3resume || 1) {	// possible error
2959 		memset(&info.populated_ranks, 0, sizeof(info.populated_ranks));
2960 
2961 		info.use_ecc = 1;
2962 		for (channel = 0; channel < NUM_CHANNELS; channel++)
2963 			for (slot = 0; slot < NUM_SLOTS; slot++) {
2964 				int v;
2965 				int try;
2966 				int addr;
2967 				const u8 useful_addresses[] = {
2968 					DEVICE_TYPE,
2969 					MODULE_TYPE,
2970 					DENSITY,
2971 					RANKS_AND_DQ,
2972 					MEMORY_BUS_WIDTH,
2973 					TIMEBASE_DIVIDEND,
2974 					TIMEBASE_DIVISOR,
2975 					CYCLETIME,
2976 					CAS_LATENCIES_LSB,
2977 					CAS_LATENCIES_MSB,
2978 					CAS_LATENCY_TIME,
2979 					0x11, 0x12, 0x13, 0x14, 0x15,
2980 					0x16, 0x17, 0x18, 0x19, 0x1a, 0x1b,
2981 					    0x1c, 0x1d,
2982 					THERMAL_AND_REFRESH,
2983 					0x20,
2984 					REFERENCE_RAW_CARD_USED,
2985 					RANK1_ADDRESS_MAPPING,
2986 					0x75, 0x76, 0x77, 0x78,
2987 					0x79, 0x7a, 0x7b, 0x7c, 0x7d, 0x7e,
2988 					    0x7f, 0x80, 0x81, 0x82, 0x83, 0x84,
2989 					    0x85, 0x86, 0x87, 0x88,
2990 					0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e,
2991 					    0x8f, 0x90, 0x91, 0x92, 0x93, 0x94,
2992 					    0x95
2993 				};
2994 				if (!spd_addrmap[2 * channel + slot])
2995 					continue;
2996 				for (try = 0; try < 5; try++) {
2997 					v = smbus_read_byte(spd_addrmap[2 * channel + slot],
2998 							    DEVICE_TYPE);
2999 					if (v >= 0)
3000 						break;
3001 				}
3002 				if (v < 0)
3003 					continue;
3004 				for (addr = 0;
3005 				     addr <
3006 				     ARRAY_SIZE(useful_addresses); addr++)
3007 					gav(info.
3008 					    spd[channel][0][useful_addresses
3009 							    [addr]] =
3010 					    smbus_read_byte(spd_addrmap[2 * channel + slot],
3011 							    useful_addresses
3012 							    [addr]));
3013 				if (info.spd[channel][0][DEVICE_TYPE] != 11)
3014 					die("Only DDR3 is supported");
3015 
3016 				v = info.spd[channel][0][RANKS_AND_DQ];
3017 				info.populated_ranks[channel][0][0] = 1;
3018 				info.populated_ranks[channel][0][1] =
3019 				    ((v >> 3) & 7);
3020 				if (((v >> 3) & 7) > 1)
3021 					die("At most 2 ranks are supported");
3022 				if ((v & 7) == 0 || (v & 7) > 2)
3023 					die("Only x8 and x16 modules are supported");
3024 				if ((info.
3025 				     spd[channel][slot][MODULE_TYPE] & 0xF) != 2
3026 				    && (info.
3027 					spd[channel][slot][MODULE_TYPE] & 0xF)
3028 				    != 3)
3029 					die("Registered memory is not supported");
3030 				info.is_x16_module[channel][0] = (v & 7) - 1;
3031 				info.density[channel][slot] =
3032 				    info.spd[channel][slot][DENSITY] & 0xF;
3033 				if (!
3034 				    (info.
3035 				     spd[channel][slot][MEMORY_BUS_WIDTH] &
3036 				     0x18))
3037 					info.use_ecc = 0;
3038 			}
3039 
3040 		gav(0x55);
3041 
3042 		for (channel = 0; channel < NUM_CHANNELS; channel++) {
3043 			int v = 0;
3044 			for (slot = 0; slot < NUM_SLOTS; slot++)
3045 				for (rank = 0; rank < NUM_RANKS; rank++)
3046 					v |= info.
3047 					    populated_ranks[channel][slot][rank]
3048 					    << (2 * slot + rank);
3049 			info.populated_ranks_mask[channel] = v;
3050 		}
3051 
3052 		gav(0x55);
3053 
3054 		gav(pci_read_config32(NORTHBRIDGE, CAPID0 + 4));
3055 	}
3056 
3057 	/* after SPD  */
3058 	timestamp_add_now(102);
3059 
3060 	mchbar_clrbits8(0x2ca8, 1 << 1 | 1 << 0);
3061 
3062 	collect_system_info(&info);
3063 	calculate_timings(&info);
3064 
3065 	if (!s3resume) {
3066 		u8 reg8 = pci_read_config8(SOUTHBRIDGE, GEN_PMCON_2);
3067 		if (x2ca8 == 0 && (reg8 & 0x80)) {
3068 			/* Don't enable S4-assertion stretch. Makes trouble on roda/rk9.
3069 			   reg8 = pci_read_config8(PCI_DEV(0, 0x1f, 0), 0xa4);
3070 			   pci_write_config8(PCI_DEV(0, 0x1f, 0), 0xa4, reg8 | 0x08);
3071 			 */
3072 
3073 			/* Clear bit7. */
3074 
3075 			pci_write_config8(SOUTHBRIDGE, GEN_PMCON_2,
3076 				   (reg8 & ~(1 << 7)));
3077 
3078 			printk(BIOS_INFO,
3079 			       "Interrupted RAM init, reset required.\n");
3080 			system_reset();
3081 		}
3082 	}
3083 
3084 	if (!s3resume && x2ca8 == 0)
3085 		pci_write_config8(SOUTHBRIDGE, GEN_PMCON_2,
3086 			      pci_read_config8(SOUTHBRIDGE, GEN_PMCON_2) | 0x80);
3087 
3088 	compute_derived_timings(&info);
3089 
3090 	early_quickpath_init(&info, x2ca8);
3091 
3092 	info.cached_training = get_cached_training();
3093 
3094 	if (x2ca8 == 0)
3095 		late_quickpath_init(&info, s3resume);
3096 
3097 	mchbar_setbits32(0x2c80, 1 << 24);
3098 	mchbar_write32(0x1804, mchbar_read32(0x1c04) & ~(1 << 27));
3099 
3100 	mchbar_read8(0x2ca8);	// !!!!
3101 
3102 	if (x2ca8 == 0) {
3103 		mchbar_clrbits8(0x2ca8, 3);
3104 		mchbar_write8(0x2ca8, mchbar_read8(0x2ca8) + 4);	// "+" or  "|"?
3105 		/* This issues a CPU reset without resetting the platform */
3106 		printk(BIOS_DEBUG, "Issuing a CPU reset\n");
3107 		/* Write back the S3 state to PM1_CNT to let the reset CPU
3108 		   know it also needs to take the s3 path. */
3109 		if (s3resume)
3110 			write_pmbase32(PM1_CNT, read_pmbase32(PM1_CNT)
3111 				       | (SLP_TYP_S3 << 10));
3112 		mchbar_setbits32(0x1af0, 1 << 4);
3113 		halt();
3114 	}
3115 
3116 	mchbar_clrbits8(0x2ca8, 0);	// !!!!
3117 
3118 	mchbar_clrbits32(0x2c80, 1 << 24);
3119 
3120 	pci_write_config32(QPI_NON_CORE, MAX_RTIDS, 0x20220);
3121 
3122 	{
3123 		u8 x2c20 = (mchbar_read16(0x2c20) >> 8) & 3;
3124 		u16 x2c10 = mchbar_read16(0x2c10);
3125 		u16 value = mchbar_read16(0x2c00);
3126 		if (x2c20 == 0 && (x2c10 & 0x300) == 0)
3127 			value |= (1 << 7);
3128 		else
3129 			value &= ~(1 << 0);
3130 
3131 		mchbar_write16(0x2c00, value);
3132 	}
3133 
3134 	udelay(1000);	// !!!!
3135 
3136 	write_1d0(0, 0x33d, 0, 0);
3137 	write_500(&info, 0, 0, 0xb61, 0, 0);
3138 	write_500(&info, 1, 0, 0xb61, 0, 0);
3139 	mchbar_write32(0x1a30, 0);
3140 	mchbar_write32(0x1a34, 0);
3141 	mchbar_write16(0x614, 0xb5b | (info.populated_ranks[1][0][0] * 0x404) |
3142 		(info.populated_ranks[0][0][0] * 0xa0));
3143 	mchbar_write16(0x616, 0x26a);
3144 	mchbar_write32(0x134, 0x856000);
3145 	mchbar_write32(0x160, 0x5ffffff);
3146 	mchbar_clrsetbits32(0x114, ~0, 0xc2024440);	// !!!!
3147 	mchbar_clrsetbits32(0x118, ~0, 0x4);	// !!!!
3148 	for (channel = 0; channel < NUM_CHANNELS; channel++)
3149 		mchbar_write32(0x260 + (channel << 10), 0x30809ff |
3150 			(info.populated_ranks_mask[channel] & 3) << 20);
3151 	for (channel = 0; channel < NUM_CHANNELS; channel++) {
3152 		mchbar_write16(0x31c + (channel << 10), 0x101);
3153 		mchbar_write16(0x360 + (channel << 10), 0x909);
3154 		mchbar_write16(0x3a4 + (channel << 10), 0x101);
3155 		mchbar_write16(0x3e8 + (channel << 10), 0x101);
3156 		mchbar_write32(0x320 + (channel << 10), 0x29002900);
3157 		mchbar_write32(0x324 + (channel << 10), 0);
3158 		mchbar_write32(0x368 + (channel << 10), 0x32003200);
3159 		mchbar_write16(0x352 + (channel << 10), 0x505);
3160 		mchbar_write16(0x354 + (channel << 10), 0x3c3c);
3161 		mchbar_write16(0x356 + (channel << 10), 0x1040);
3162 		mchbar_write16(0x39a + (channel << 10), 0x73e4);
3163 		mchbar_write16(0x3de + (channel << 10), 0x77ed);
3164 		mchbar_write16(0x422 + (channel << 10), 0x1040);
3165 	}
3166 
3167 	write_1d0(0x4, 0x151, 4, 1);
3168 	write_1d0(0, 0x142, 3, 1);
3169 	rdmsr(0x1ac);	// !!!!
3170 	write_500(&info, 1, 1, 0x6b3, 4, 1);
3171 	write_500(&info, 1, 1, 0x6cf, 4, 1);
3172 
3173 	rmw_1d0(0x21c, 0x38, 0, 6);
3174 
3175 	write_1d0(((!info.populated_ranks[1][0][0]) << 1) | ((!info.
3176 							      populated_ranks[0]
3177 							      [0][0]) << 0),
3178 		  0x1d1, 3, 1);
3179 	for (channel = 0; channel < NUM_CHANNELS; channel++) {
3180 		mchbar_write16(0x38e + (channel << 10), 0x5f5f);
3181 		mchbar_write16(0x3d2 + (channel << 10), 0x5f5f);
3182 	}
3183 
3184 	set_334(0);
3185 
3186 	program_base_timings(&info);
3187 
3188 	mchbar_setbits8(0x5ff, 1 << 7);
3189 
3190 	write_1d0(0x2, 0x1d5, 2, 1);
3191 	write_1d0(0x20, 0x166, 7, 1);
3192 	write_1d0(0x0, 0xeb, 3, 1);
3193 	write_1d0(0x0, 0xf3, 6, 1);
3194 
3195 	for (channel = 0; channel < NUM_CHANNELS; channel++) {
3196 		u8 a = 0;
3197 		if (info.populated_ranks[channel][0][1] && info.clock_speed_index > 1)
3198 			a = 3;
3199 		if (info.silicon_revision == 0 || info.silicon_revision == 1)
3200 			a = 3;
3201 
3202 		for (lane = 0; lane < 9; lane++) {
3203 			const u16 addr = 0x125 + get_lane_offset(0, 0, lane);
3204 			rmw_500(&info, channel, addr, 6, 0xf, a);
3205 		}
3206 	}
3207 
3208 	if (s3resume) {
3209 		if (!info.cached_training) {
3210 			u32 reg32;
3211 			printk(BIOS_ERR,
3212 			       "Couldn't find training data. Rebooting\n");
3213 			reg32 = inl(DEFAULT_PMBASE + 0x04);
3214 			outl(reg32 & ~(7 << 10), DEFAULT_PMBASE + 0x04);
3215 			full_reset();
3216 		}
3217 		int tm;
3218 		info.training = *info.cached_training;
3219 		for (tm = 0; tm < 4; tm++)
3220 			for (channel = 0; channel < NUM_CHANNELS; channel++)
3221 				for (slot = 0; slot < NUM_SLOTS; slot++)
3222 					for (rank = 0; rank < NUM_RANKS; rank++)
3223 						for (lane = 0; lane < 9; lane++)
3224 							write_500(&info,
3225 								  channel,
3226 								  info.training.
3227 								  lane_timings
3228 								  [tm][channel]
3229 								  [slot][rank]
3230 								  [lane],
3231 								  get_timing_register_addr
3232 								  (lane, tm,
3233 								   slot, rank),
3234 								  9, 0);
3235 		write_1d0(info.cached_training->reg_178, 0x178, 7, 1);
3236 		write_1d0(info.cached_training->reg_10b, 0x10b, 6, 1);
3237 	}
3238 
3239 	mchbar_clrsetbits32(0x1f4, ~0, 1 << 17);	// !!!!
3240 	mchbar_write32(0x1f0, 0x1d000200);
3241 	mchbar_setbits8(0x1f0, 1 << 0);
3242 	while (mchbar_read8(0x1f0) & 1)
3243 		;
3244 
3245 	program_board_delay(&info);
3246 
3247 	mchbar_write8(0x5ff, 0);
3248 	mchbar_write8(0x5ff, 1 << 7);
3249 	mchbar_write8(0x5f4, 1 << 0);
3250 
3251 	mchbar_clrbits32(0x130, 1 << 1);	// | 2 when ?
3252 	while (mchbar_read32(0x130) & 1)
3253 		;
3254 
3255 	rmw_1d0(0x14b, 0x47, 0x30, 7);
3256 	rmw_1d0(0xd6,  0x38, 7, 6);
3257 	rmw_1d0(0x328, 0x38, 7, 6);
3258 
3259 	for (channel = 0; channel < NUM_CHANNELS; channel++)
3260 		set_4cf(&info, channel, 1, 0);
3261 
3262 	rmw_1d0(0x116, 0xe,  0, 4);
3263 	rmw_1d0(0xae,  0x3e, 0, 6);
3264 	rmw_1d0(0x300, 0x3e, 0, 6);
3265 	mchbar_clrbits16(0x356, 1 << 15);
3266 	mchbar_clrbits16(0x756, 1 << 15);
3267 	mchbar_clrbits32(0x140, 7 << 24);
3268 	mchbar_clrbits32(0x138, 7 << 24);
3269 	mchbar_write32(0x130, 0x31111301);
3270 	/* Wait until REG130b0 is 1.  */
3271 	while (mchbar_read32(0x130) & 1)
3272 		;
3273 
3274 	u8 value_a1;
3275 	{
3276 		const u8 val_xa1 = get_bits_420(read_1d0(0xa1, 6));	// = 0x1cf4040 // !!!!
3277 		const u8 val_2f3 = get_bits_420(read_1d0(0x2f3, 6));	// = 0x10a4040 // !!!!
3278 		value_a1 = val_xa1;
3279 		rmw_1d0(0x320, 0x38, val_2f3, 6);
3280 		rmw_1d0(0x14b, 0x78, val_xa1, 7);
3281 		rmw_1d0(0xce,  0x38, val_xa1, 6);
3282 	}
3283 
3284 	for (channel = 0; channel < NUM_CHANNELS; channel++)
3285 		set_4cf(&info, channel, 1, 1);
3286 
3287 	rmw_1d0(0x116, 0xe, 1, 4);	// = 0x4040432 // !!!!
3288 	{
3289 		if ((mchbar_read32(0x144) & 0x1f) < 0x13)
3290 			value_a1 += 2;
3291 		else
3292 			value_a1 += 1;
3293 
3294 		if (value_a1 > 7)
3295 			value_a1 = 7;
3296 
3297 		write_1d0(2, 0xae, 6, 1);
3298 		write_1d0(2, 0x300, 6, 1);
3299 		write_1d0(value_a1, 0x121, 3, 1);
3300 		rmw_1d0(0xd6,  0x38, 4, 6);
3301 		rmw_1d0(0x328, 0x38, 4, 6);
3302 	}
3303 
3304 	for (channel = 0; channel < NUM_CHANNELS; channel++)
3305 		set_4cf(&info, channel, 2, 0);
3306 
3307 	mchbar_write32(0x130, 0x11111301 | info.populated_ranks[1][0][0] << 30 |
3308 		info.populated_ranks[0][0][0] << 29);
3309 	while (mchbar_read8(0x130) & 1)
3310 		;
3311 
3312 	{
3313 		const u8 val_xa1 = get_bits_420(read_1d0(0xa1, 6));
3314 		read_1d0(0x2f3, 6);		// = 0x10a4054 // !!!!
3315 		rmw_1d0(0x21c, 0x38, 0, 6);
3316 		rmw_1d0(0x14b, 0x78, val_xa1, 7);
3317 	}
3318 
3319 	for (channel = 0; channel < NUM_CHANNELS; channel++)
3320 		set_4cf(&info, channel, 2, 1);
3321 
3322 	set_334(1);
3323 
3324 	mchbar_write8(0x1e8, 1 << 2);
3325 
3326 	for (channel = 0; channel < NUM_CHANNELS; channel++) {
3327 		write_500(&info, channel,
3328 			  0x3 & ~(info.populated_ranks_mask[channel]), 0x6b7, 2,
3329 			  1);
3330 		write_500(&info, channel, 0x3, 0x69b, 2, 1);
3331 	}
3332 	mchbar_clrsetbits32(0x2d0, ~0xff0c01ff, 0x200000);
3333 	mchbar_write16(0x6c0, 0x14a0);
3334 	mchbar_clrsetbits32(0x6d0, ~0xff0000ff, 0x8000);
3335 	mchbar_write16(0x232, 1 << 3);
3336 	/* 0x40004 or 0 depending on ? */
3337 	mchbar_clrsetbits32(0x234, 0x40004, 0x40004);
3338 	mchbar_clrsetbits32(0x34, 0x7, 5);
3339 	mchbar_write32(0x128, 0x2150d05);
3340 	mchbar_write8(0x12c, 0x1f);
3341 	mchbar_write8(0x12d, 0x56);
3342 	mchbar_write8(0x12e, 0x31);
3343 	mchbar_write8(0x12f, 0);
3344 	mchbar_write8(0x271, 1 << 1);
3345 	mchbar_write8(0x671, 1 << 1);
3346 	mchbar_write8(0x1e8, 1 << 2);
3347 	for (channel = 0; channel < NUM_CHANNELS; channel++)
3348 		mchbar_write32(0x294 + (channel << 10),
3349 			(info.populated_ranks_mask[channel] & 3) << 16);
3350 	mchbar_clrsetbits32(0x134, ~0xfc01ffff, 0x10000);
3351 	mchbar_clrsetbits32(0x134, ~0xfc85ffff, 0x850000);
3352 	for (channel = 0; channel < NUM_CHANNELS; channel++)
3353 		mchbar_clrsetbits32(0x260 + (channel << 10), 0xf << 20, 1 << 27 |
3354 			(info.populated_ranks_mask[channel] & 3) << 20);
3355 
3356 	if (!s3resume)
3357 		jedec_init(&info);
3358 
3359 	int totalrank = 0;
3360 	for (channel = 0; channel < NUM_CHANNELS; channel++)
3361 		for (slot = 0; slot < NUM_SLOTS; slot++)
3362 			for (rank = 0; rank < NUM_RANKS; rank++)
3363 				if (info.populated_ranks[channel][slot][rank]) {
3364 					jedec_read(&info, channel, slot, rank,
3365 						   totalrank, 0xa, 0x400);
3366 					totalrank++;
3367 				}
3368 
3369 	mchbar_write8(0x12c, 0x9f);
3370 
3371 	mchbar_clrsetbits8(0x271, 0x3e, 0x0e);
3372 	mchbar_clrsetbits8(0x671, 0x3e, 0x0e);
3373 
3374 	if (!s3resume) {
3375 		for (channel = 0; channel < NUM_CHANNELS; channel++) {
3376 			mchbar_write32(0x294 + (channel << 10),
3377 				(info.populated_ranks_mask[channel] & 3) << 16);
3378 			mchbar_write16(0x298 + (channel << 10),
3379 				info.populated_ranks[channel][0][0] |
3380 				info.populated_ranks[channel][0][1] << 5);
3381 			mchbar_write32(0x29c + (channel << 10), 0x77a);
3382 		}
3383 		mchbar_clrsetbits32(0x2c0, ~0, 0x6009cc00);	// !!!!
3384 
3385 		{
3386 			u8 a, b;
3387 			a = mchbar_read8(0x243);
3388 			b = mchbar_read8(0x643);
3389 			mchbar_write8(0x243, a | 2);
3390 			mchbar_write8(0x643, b | 2);
3391 		}
3392 
3393 		write_1d0(7, 0x19b, 3, 1);
3394 		write_1d0(7, 0x1c0, 3, 1);
3395 		write_1d0(4, 0x1c6, 4, 1);
3396 		write_1d0(4, 0x1cc, 4, 1);
3397 		rmw_1d0(0x151, 0xf, 0x4, 4);
3398 		mchbar_write32(0x584, 0xfffff);
3399 		mchbar_write32(0x984, 0xfffff);
3400 
3401 		for (channel = 0; channel < NUM_CHANNELS; channel++)
3402 			for (slot = 0; slot < NUM_SLOTS; slot++)
3403 				for (rank = 0; rank < NUM_RANKS; rank++)
3404 					if (info.
3405 					    populated_ranks[channel][slot]
3406 					    [rank])
3407 						config_rank(&info, s3resume,
3408 							    channel, slot,
3409 							    rank);
3410 
3411 		mchbar_write8(0x243, 1);
3412 		mchbar_write8(0x643, 1);
3413 	}
3414 
3415 	/* was == 1 but is common */
3416 	pci_write_config16(NORTHBRIDGE, 0xc8, 3);
3417 	write_26c(0, 0x820);
3418 	write_26c(1, 0x820);
3419 	mchbar_setbits32(0x130, 1 << 1);
3420 	/* end */
3421 
3422 	if (s3resume) {
3423 		for (channel = 0; channel < NUM_CHANNELS; channel++) {
3424 			mchbar_write32(0x294 + (channel << 10),
3425 				(info.populated_ranks_mask[channel] & 3) << 16);
3426 			mchbar_write16(0x298 + (channel << 10),
3427 				info.populated_ranks[channel][0][0] |
3428 				info.populated_ranks[channel][0][1] << 5);
3429 			mchbar_write32(0x29c + (channel << 10), 0x77a);
3430 		}
3431 		mchbar_clrsetbits32(0x2c0, ~0, 0x6009cc00);	// !!!!
3432 	}
3433 
3434 	mchbar_clrbits32(0xfa4, 1 << 24 | 1 << 1);
3435 	mchbar_write32(0xfb0, 0x2000e019);
3436 
3437 	/* Before training. */
3438 	timestamp_add_now(103);
3439 
3440 	if (!s3resume)
3441 		ram_training(&info);
3442 
3443 	/* After training. */
3444 	timestamp_add_now(104);
3445 
3446 	dump_timings(&info);
3447 
3448 	program_modules_memory_map(&info, 0);
3449 	program_total_memory_map(&info);
3450 
3451 	if (info.non_interleaved_part_mb != 0 && info.interleaved_part_mb != 0)
3452 		mchbar_write8(0x111, 0 << 2 | 1 << 5 | 1 << 6 | 0 << 7);
3453 	else if (have_match_ranks(&info, 0, 4) && have_match_ranks(&info, 1, 4))
3454 		mchbar_write8(0x111, 3 << 2 | 1 << 5 | 0 << 6 | 1 << 7);
3455 	else if (have_match_ranks(&info, 0, 2) && have_match_ranks(&info, 1, 2))
3456 		mchbar_write8(0x111, 3 << 2 | 1 << 5 | 0 << 6 | 0 << 7);
3457 	else
3458 		mchbar_write8(0x111, 3 << 2 | 1 << 5 | 1 << 6 | 0 << 7);
3459 
3460 	mchbar_clrbits32(0xfac, 1 << 31);
3461 	mchbar_write32(0xfb4, 0x4800);
3462 	mchbar_write32(0xfb8, (info.revision < 8) ? 0x20 : 0x0);
3463 	mchbar_write32(0xe94, 0x7ffff);
3464 	mchbar_write32(0xfc0, 0x80002040);
3465 	mchbar_write32(0xfc4, 0x701246);
3466 	mchbar_clrbits8(0xfc8, 0x70);
3467 	mchbar_setbits32(0xe5c, 1 << 24);
3468 	mchbar_clrsetbits32(0x1a70, 3 << 20, 2 << 20);
3469 	mchbar_write32(0x50, 0x700b0);
3470 	mchbar_write32(0x3c, 0x10);
3471 	mchbar_clrsetbits8(0x1aa8, 0x3f, 0xa);
3472 	mchbar_setbits8(0xff4, 1 << 1);
3473 	mchbar_clrsetbits32(0xff8, 0xe008, 0x1020);
3474 
3475 	mchbar_write32(0xd00, IOMMU_BASE2 | 1);
3476 	mchbar_write32(0xd40, IOMMU_BASE1 | 1);
3477 	mchbar_write32(0xdc0, IOMMU_BASE4 | 1);
3478 
3479 	write32p(IOMMU_BASE1 | 0xffc, 0x80000000);
3480 	write32p(IOMMU_BASE2 | 0xffc, 0xc0000000);
3481 	write32p(IOMMU_BASE4 | 0xffc, 0x80000000);
3482 
3483 	{
3484 		u32 eax;
3485 
3486 		eax = info.fsb_frequency / 9;
3487 		mchbar_clrsetbits32(0xfcc, 0x3ffff,
3488 			(eax * 0x280) | (eax * 0x5000) | eax | 0x40000);
3489 		mchbar_write32(0x20, 0x33001);
3490 	}
3491 
3492 	for (channel = 0; channel < NUM_CHANNELS; channel++) {
3493 		mchbar_clrbits32(0x220 + (channel << 10), 0x7770);
3494 		if (info.max_slots_used_in_channel == 1)
3495 			mchbar_setbits16(0x237 + (channel << 10), 0x0201);
3496 		else
3497 			mchbar_clrbits16(0x237 + (channel << 10), 0x0201);
3498 
3499 		mchbar_setbits8(0x241 + (channel << 10), 1 << 0);
3500 
3501 		if (info.clock_speed_index <= 1 && (info.silicon_revision == 2
3502 			|| info.silicon_revision == 3))
3503 			mchbar_setbits32(0x248 + (channel << 10), 0x00102000);
3504 		else
3505 			mchbar_clrbits32(0x248 + (channel << 10), 0x00102000);
3506 	}
3507 
3508 	mchbar_setbits32(0x115, 1 << 24);
3509 
3510 	{
3511 		u8 al;
3512 		al = 0xd;
3513 		if (!(info.silicon_revision == 0 || info.silicon_revision == 1))
3514 			al += 2;
3515 		al |= ((1 << (info.max_slots_used_in_channel - 1)) - 1) << 4;
3516 		mchbar_write32(0x210, al << 16 | 0x20);
3517 	}
3518 
3519 	for (channel = 0; channel < NUM_CHANNELS; channel++) {
3520 		mchbar_write32(0x288 + (channel << 10), 0x70605040);
3521 		mchbar_write32(0x28c + (channel << 10), 0xfffec080);
3522 		mchbar_write32(0x290 + (channel << 10), 0x282091c |
3523 			(info.max_slots_used_in_channel - 1) << 0x16);
3524 	}
3525 	u32 reg1c;
3526 	pci_read_config32(NORTHBRIDGE, 0x40);	// = DEFAULT_EPBAR | 0x001 // OK
3527 	reg1c = epbar_read32(EPVC1RCAP);	// = 0x8001 // OK
3528 	pci_read_config32(NORTHBRIDGE, 0x40);	// = DEFAULT_EPBAR | 0x001 // OK
3529 	epbar_write32(EPVC1RCAP, reg1c);	// OK
3530 	mchbar_read8(0xe08);	// = 0x0
3531 	pci_read_config32(NORTHBRIDGE, 0xe4);	// = 0x316126
3532 	mchbar_setbits8(0x1210, 1 << 1);
3533 	mchbar_write32(0x1200, 0x8800440);
3534 	mchbar_write32(0x1204, 0x53ff0453);
3535 	mchbar_write32(0x1208, 0x19002043);
3536 	mchbar_write16(0x1214, 0x320);
3537 
3538 	if (info.revision == 0x10 || info.revision == 0x11) {
3539 		mchbar_write16(0x1214, 0x220);
3540 		mchbar_setbits8(0x1210, 1 << 6);
3541 	}
3542 
3543 	mchbar_setbits8(0x1214, 1 << 2);
3544 	mchbar_write8(0x120c, 1);
3545 	mchbar_write8(0x1218, 3);
3546 	mchbar_write8(0x121a, 3);
3547 	mchbar_write8(0x121c, 3);
3548 	mchbar_write16(0xc14, 0);
3549 	mchbar_write16(0xc20, 0);
3550 	mchbar_write32(0x1c, 0);
3551 
3552 	/* revision dependent here.  */
3553 
3554 	mchbar_setbits16(0x1230, 0x1f07);
3555 
3556 	if (info.uma_enabled)
3557 		mchbar_setbits32(0x11f4, 1 << 28);
3558 
3559 	mchbar_setbits16(0x1230, 1 << 15);
3560 	mchbar_setbits8(0x1214, 1 << 0);
3561 
3562 	u8 bl, ebpb;
3563 	u16 reg_1020;
3564 
3565 	reg_1020 = mchbar_read32(0x1020);	// = 0x6c733c  // OK
3566 	mchbar_write8(0x1070, 1);
3567 
3568 	mchbar_write32(0x1000, 0x100);
3569 	mchbar_write8(0x1007, 0);
3570 
3571 	if (reg_1020 != 0) {
3572 		mchbar_write16(0x1018, 0);
3573 		bl = reg_1020 >> 8;
3574 		ebpb = reg_1020 & 0xff;
3575 	} else {
3576 		ebpb = 0;
3577 		bl = 8;
3578 	}
3579 
3580 	rdmsr(0x1a2);
3581 
3582 	mchbar_write32(0x1014, 0xffffffff);
3583 
3584 	mchbar_write32(0x1010, ((((ebpb + 0x7d) << 7) / bl) & 0xff) * !!reg_1020);
3585 
3586 	mchbar_write8(0x101c, 0xb8);
3587 
3588 	mchbar_clrsetbits8(0x123e, 0xf0, 0x60);
3589 	if (reg_1020 != 0) {
3590 		mchbar_clrsetbits32(0x123c, 0xf << 20, 0x6 << 20);
3591 		mchbar_write8(0x101c, 0xb8);
3592 	}
3593 
3594 	const u64 heci_uma_addr =
3595 	    ((u64)
3596 	     ((((u64)pci_read_config16(NORTHBRIDGE, TOM)) << 6) -
3597 	      info.memory_reserved_for_heci_mb)) << 20;
3598 
3599 	setup_heci_uma(heci_uma_addr, info.memory_reserved_for_heci_mb);
3600 
3601 	if (info.uma_enabled) {
3602 		u16 ax;
3603 		mchbar_setbits32(0x11b0, 1 << 14);
3604 		mchbar_setbits32(0x11b4, 1 << 14);
3605 		mchbar_setbits16(0x1190, 1 << 14);
3606 
3607 		ax = mchbar_read16(0x1190) & 0xf00;	// = 0x480a  // OK
3608 		mchbar_write16(0x1170, ax | (mchbar_read16(0x1170) & 0x107f) | 0x4080);
3609 		mchbar_setbits16(0x1170, 1 << 12);
3610 
3611 		udelay(1000);
3612 
3613 		u16 ecx;
3614 		for (ecx = 0xffff; ecx && (mchbar_read16(0x1170) & (1 << 12)); ecx--)
3615 			;
3616 		mchbar_clrbits16(0x1190, 1 << 14);
3617 	}
3618 
3619 	pci_write_config8(SOUTHBRIDGE, GEN_PMCON_2,
3620 		      pci_read_config8(SOUTHBRIDGE, GEN_PMCON_2) & ~0x80);
3621 	udelay(10000);
3622 	mchbar_write16(0x2ca8, 1 << 3);
3623 
3624 	udelay(1000);
3625 	dump_timings(&info);
3626 	cbmem_wasnot_inited = cbmem_recovery(s3resume);
3627 
3628 	if (!s3resume)
3629 		save_timings(&info);
3630 	if (s3resume && cbmem_wasnot_inited) {
3631 		printk(BIOS_ERR, "Failed S3 resume.\n");
3632 		ram_check_nodie(1 * MiB);
3633 
3634 		/* Failed S3 resume, reset to come up cleanly */
3635 		full_reset();
3636 	}
3637 }
3638