1 /* SPDX-License-Identifier: GPL-2.0-or-later */
2
3 #include <console/console.h>
4 #include <commonlib/helpers.h>
5 #include <string.h>
6 #include <arch/io.h>
7 #include <device/mmio.h>
8 #include <device/pci_ops.h>
9 #include <device/smbus_host.h>
10 #include <cpu/x86/msr.h>
11 #include <cpu/x86/cache.h>
12 #include <cbmem.h>
13 #include <cf9_reset.h>
14 #include <option.h>
15 #include <device/pci_def.h>
16 #include <device/device.h>
17 #include <halt.h>
18 #include <spd.h>
19 #include <timestamp.h>
20 #include <cpu/x86/mtrr.h>
21 #include <cpu/intel/speedstep.h>
22 #include <cpu/intel/turbo.h>
23 #include <mrc_cache.h>
24 #include <southbridge/intel/ibexpeak/me.h>
25 #include <southbridge/intel/common/pmbase.h>
26 #include <delay.h>
27 #include <types.h>
28
29 #include "chip.h"
30 #include "ironlake.h"
31 #include "raminit.h"
32 #include "raminit_tables.h"
33
34 #define NORTHBRIDGE PCI_DEV(0, 0, 0)
35 #define SOUTHBRIDGE PCI_DEV(0, 0x1f, 0)
36 #define GMA PCI_DEV(0, 0x2, 0x0)
37
38 #define FOR_ALL_RANKS \
39 for (channel = 0; channel < NUM_CHANNELS; channel++) \
40 for (slot = 0; slot < NUM_SLOTS; slot++) \
41 for (rank = 0; rank < NUM_RANKS; rank++)
42
43 #define FOR_POPULATED_RANKS \
44 for (channel = 0; channel < NUM_CHANNELS; channel++) \
45 for (slot = 0; slot < NUM_SLOTS; slot++) \
46 for (rank = 0; rank < NUM_RANKS; rank++) \
47 if (info->populated_ranks[channel][slot][rank])
48
49 #define FOR_POPULATED_RANKS_BACKWARDS \
50 for (channel = NUM_CHANNELS - 1; channel >= 0; channel--) \
51 for (slot = 0; slot < NUM_SLOTS; slot++) \
52 for (rank = 0; rank < NUM_RANKS; rank++) \
53 if (info->populated_ranks[channel][slot][rank])
54
55 #include <lib.h> /* Prototypes */
56
57 typedef struct _u128 {
58 u64 lo;
59 u64 hi;
60 } u128;
61
read128(u32 addr,u64 * out)62 static void read128(u32 addr, u64 * out)
63 {
64 u128 ret;
65 u128 stor;
66 asm volatile ("movdqu %%xmm0, %0\n"
67 "movdqa (%2), %%xmm0\n"
68 "movdqu %%xmm0, %1\n"
69 "movdqu %0, %%xmm0":"+m" (stor), "=m"(ret):"r"(addr));
70 out[0] = ret.lo;
71 out[1] = ret.hi;
72 }
73
74 /*
75 * Ironlake memory I/O timings are located in scan chains, accessible
76 * through MCHBAR register groups. Each channel has a scan chain, and
77 * there's a global scan chain too. Each chain is broken into smaller
78 * sections of N bits, where N <= 32. Each section allows reading and
79 * writing a certain parameter. Each section contains N - 2 data bits
80 * and two additional bits: a Mask bit, and a Halt bit.
81 */
82
83 /* OK */
write_1d0(u32 val,u16 addr,int bits,int flag)84 static void write_1d0(u32 val, u16 addr, int bits, int flag)
85 {
86 mchbar_write32(0x1d0, 0);
87 while (mchbar_read32(0x1d0) & (1 << 23))
88 ;
89 mchbar_write32(0x1d4, (val & ((1 << bits) - 1)) | 2 << bits | flag << bits);
90 mchbar_write32(0x1d0, 1 << 30 | addr);
91 while (mchbar_read32(0x1d0) & (1 << 23))
92 ;
93 }
94
95 /* OK */
read_1d0(u16 addr,int split)96 static u16 read_1d0(u16 addr, int split)
97 {
98 u32 val;
99 mchbar_write32(0x1d0, 0);
100 while (mchbar_read32(0x1d0) & (1 << 23))
101 ;
102 mchbar_write32(0x1d0, 1 << 31 | (((mchbar_read8(0x246) >> 2) & 3) + 0x361 - addr));
103 while (mchbar_read32(0x1d0) & (1 << 23))
104 ;
105 val = mchbar_read32(0x1d8);
106 write_1d0(0, 0x33d, 0, 0);
107 write_1d0(0, 0x33d, 0, 0);
108 val &= ((1 << split) - 1);
109 // printk (BIOS_ERR, "R1D0C [%x] => %x\n", addr, val);
110 return val;
111 }
112
sfence(void)113 static void sfence(void)
114 {
115 asm volatile ("sfence");
116 }
117
get_lane_offset(int slot,int rank,int lane)118 static inline u16 get_lane_offset(int slot, int rank, int lane)
119 {
120 return 0x124 * lane + ((lane & 4) ? 0x23e : 0) + 11 * rank + 22 * slot -
121 0x452 * (lane == 8);
122 }
123
get_timing_register_addr(int lane,int tm,int slot,int rank)124 static inline u16 get_timing_register_addr(int lane, int tm, int slot, int rank)
125 {
126 const u16 offs[] = { 0x1d, 0xa8, 0xe6, 0x5c };
127 return get_lane_offset(slot, rank, lane) + offs[(tm + 3) % 4];
128 }
129
gav_real(int line,u32 in)130 static u32 gav_real(int line, u32 in)
131 {
132 // printk (BIOS_DEBUG, "%d: GAV: %x\n", line, in);
133 return in;
134 }
135
136 #define gav(x) gav_real(__LINE__, (x))
137
138 /* Global allocation of timings_car */
139 timing_bounds_t timings_car[64];
140
141 /* OK */
142 static u16
read_500(struct raminfo * info,int channel,u16 addr,int split)143 read_500(struct raminfo *info, int channel, u16 addr, int split)
144 {
145 u32 val;
146 info->last_500_command[channel] = 1 << 31;
147 mchbar_write32(0x500 + (channel << 10), 0);
148 while (mchbar_read32(0x500 + (channel << 10)) & (1 << 23))
149 ;
150 mchbar_write32(0x500 + (channel << 10),
151 1 << 31 | (((mchbar_read8(0x246 + (channel << 10)) >> 2) & 3) + 0xb88 - addr));
152 while (mchbar_read32(0x500 + (channel << 10)) & (1 << 23))
153 ;
154 val = mchbar_read32(0x508 + (channel << 10));
155 return val & ((1 << split) - 1);
156 }
157
158 /* OK */
159 static void
write_500(struct raminfo * info,int channel,u32 val,u16 addr,int bits,int flag)160 write_500(struct raminfo *info, int channel, u32 val, u16 addr, int bits,
161 int flag)
162 {
163 if (info->last_500_command[channel] == 1 << 31) {
164 info->last_500_command[channel] = 1 << 30;
165 write_500(info, channel, 0, 0xb61, 0, 0);
166 }
167 mchbar_write32(0x500 + (channel << 10), 0);
168 while (mchbar_read32(0x500 + (channel << 10)) & (1 << 23))
169 ;
170 mchbar_write32(0x504 + (channel << 10),
171 (val & ((1 << bits) - 1)) | 2 << bits | flag << bits);
172 mchbar_write32(0x500 + (channel << 10), 1 << 30 | addr);
173 while (mchbar_read32(0x500 + (channel << 10)) & (1 << 23))
174 ;
175 }
176
rmw_500(struct raminfo * info,int channel,u16 addr,int bits,u32 and,u32 or)177 static void rmw_500(struct raminfo *info, int channel, u16 addr, int bits, u32 and, u32 or)
178 {
179 const u32 val = read_500(info, channel, addr, bits) & and;
180 write_500(info, channel, val | or, addr, bits, 1);
181 }
182
rw_test(int rank)183 static int rw_test(int rank)
184 {
185 const u32 mask = 0xf00fc33c;
186 int ok = 0xff;
187 int i;
188 for (i = 0; i < 64; i++)
189 write32p((rank << 28) | (i << 2), 0);
190 sfence();
191 for (i = 0; i < 64; i++)
192 gav(read32p((rank << 28) | (i << 2)));
193 sfence();
194 for (i = 0; i < 32; i++) {
195 u32 pat = (((mask >> i) & 1) ? 0xffffffff : 0);
196 write32p((rank << 28) | (i << 3), pat);
197 write32p((rank << 28) | (i << 3) | 4, pat);
198 }
199 sfence();
200 for (i = 0; i < 32; i++) {
201 u8 pat = (((mask >> i) & 1) ? 0xff : 0);
202 int j;
203 u32 val;
204 gav(val = read32p((rank << 28) | (i << 3)));
205 for (j = 0; j < 4; j++)
206 if (((val >> (j * 8)) & 0xff) != pat)
207 ok &= ~(1 << j);
208 gav(val = read32p((rank << 28) | (i << 3) | 4));
209 for (j = 0; j < 4; j++)
210 if (((val >> (j * 8)) & 0xff) != pat)
211 ok &= ~(16 << j);
212 }
213 sfence();
214 for (i = 0; i < 64; i++)
215 write32p((rank << 28) | (i << 2), 0);
216 sfence();
217 for (i = 0; i < 64; i++)
218 gav(read32p((rank << 28) | (i << 2)));
219
220 return ok;
221 }
222
223 static void
program_timings(struct raminfo * info,u16 base,int channel,int slot,int rank)224 program_timings(struct raminfo *info, u16 base, int channel, int slot, int rank)
225 {
226 int lane;
227 for (lane = 0; lane < 8; lane++) {
228 write_500(info, channel,
229 base +
230 info->training.
231 lane_timings[2][channel][slot][rank][lane],
232 get_timing_register_addr(lane, 2, slot, rank), 9, 0);
233 write_500(info, channel,
234 base +
235 info->training.
236 lane_timings[3][channel][slot][rank][lane],
237 get_timing_register_addr(lane, 3, slot, rank), 9, 0);
238 }
239 }
240
write_26c(int channel,u16 si)241 static void write_26c(int channel, u16 si)
242 {
243 mchbar_write32(0x26c + (channel << 10), 0x03243f35);
244 mchbar_write32(0x268 + (channel << 10), 0xcfc00000 | si << 9);
245 mchbar_write16(0x2b9 + (channel << 10), si);
246 }
247
toggle_1d0_142_5ff(void)248 static void toggle_1d0_142_5ff(void)
249 {
250 u32 reg32 = gav(read_1d0(0x142, 3));
251 if (reg32 & (1 << 1))
252 write_1d0(0, 0x142, 3, 1);
253
254 mchbar_write8(0x5ff, 0);
255 mchbar_write8(0x5ff, 1 << 7);
256 if (reg32 & (1 << 1))
257 write_1d0(0x2, 0x142, 3, 1);
258 }
259
get_580(int channel,u8 addr)260 static u32 get_580(int channel, u8 addr)
261 {
262 u32 ret;
263 toggle_1d0_142_5ff();
264 mchbar_write32(0x580 + (channel << 10), 0x8493c012 | addr);
265 mchbar_setbits8(0x580 + (channel << 10), 1 << 0);
266 while (!((ret = mchbar_read32(0x580 + (channel << 10))) & (1 << 16)))
267 ;
268 mchbar_clrbits8(0x580 + (channel << 10), 1 << 0);
269 return ret;
270 }
271
272 #define RANK_SHIFT 28
273 #define CHANNEL_SHIFT 10
274
seq9(struct raminfo * info,int channel,int slot,int rank)275 static void seq9(struct raminfo *info, int channel, int slot, int rank)
276 {
277 int i, lane;
278
279 for (i = 0; i < 2; i++)
280 for (lane = 0; lane < 8; lane++)
281 write_500(info, channel,
282 info->training.lane_timings[i +
283 1][channel][slot]
284 [rank][lane], get_timing_register_addr(lane,
285 i + 1,
286 slot,
287 rank),
288 9, 0);
289
290 write_1d0(1, 0x103, 6, 1);
291 for (lane = 0; lane < 8; lane++)
292 write_500(info, channel,
293 info->training.
294 lane_timings[0][channel][slot][rank][lane],
295 get_timing_register_addr(lane, 0, slot, rank), 9, 0);
296
297 for (i = 0; i < 2; i++) {
298 for (lane = 0; lane < 8; lane++)
299 write_500(info, channel,
300 info->training.lane_timings[i +
301 1][channel][slot]
302 [rank][lane], get_timing_register_addr(lane,
303 i + 1,
304 slot,
305 rank),
306 9, 0);
307 gav(get_580(channel, ((i + 1) << 2) | (rank << 5)));
308 }
309
310 toggle_1d0_142_5ff();
311 write_1d0(0x2, 0x142, 3, 1);
312
313 for (lane = 0; lane < 8; lane++) {
314 // printk (BIOS_ERR, "before: %x\n", info->training.lane_timings[2][channel][slot][rank][lane]);
315 info->training.lane_timings[2][channel][slot][rank][lane] =
316 read_500(info, channel,
317 get_timing_register_addr(lane, 2, slot, rank), 9);
318 //printk (BIOS_ERR, "after: %x\n", info->training.lane_timings[2][channel][slot][rank][lane]);
319 info->training.lane_timings[3][channel][slot][rank][lane] =
320 info->training.lane_timings[2][channel][slot][rank][lane] +
321 0x20;
322 }
323 }
324
count_ranks_in_channel(struct raminfo * info,int channel)325 static int count_ranks_in_channel(struct raminfo *info, int channel)
326 {
327 int slot, rank;
328 int res = 0;
329 for (slot = 0; slot < NUM_SLOTS; slot++)
330 for (rank = 0; rank < NUM_SLOTS; rank++)
331 res += info->populated_ranks[channel][slot][rank];
332 return res;
333 }
334
335 static void
config_rank(struct raminfo * info,int s3resume,int channel,int slot,int rank)336 config_rank(struct raminfo *info, int s3resume, int channel, int slot, int rank)
337 {
338 int add;
339
340 write_1d0(0, 0x178, 7, 1);
341 seq9(info, channel, slot, rank);
342 program_timings(info, 0x80, channel, slot, rank);
343
344 if (channel == 0)
345 add = count_ranks_in_channel(info, 1);
346 else
347 add = 0;
348 if (!s3resume)
349 gav(rw_test(rank + add));
350 program_timings(info, 0x00, channel, slot, rank);
351 if (!s3resume)
352 gav(rw_test(rank + add));
353 if (!s3resume)
354 gav(rw_test(rank + add));
355 write_1d0(0, 0x142, 3, 1);
356 write_1d0(0, 0x103, 6, 1);
357
358 gav(get_580(channel, 0xc | (rank << 5)));
359 gav(read_1d0(0x142, 3));
360
361 mchbar_write8(0x5ff, 0);
362 mchbar_write8(0x5ff, 1 << 7);
363 }
364
set_4cf(struct raminfo * info,int channel,u8 bit,u8 val)365 static void set_4cf(struct raminfo *info, int channel, u8 bit, u8 val)
366 {
367 const u16 regtable[] = { 0x4cf, 0x659, 0x697 };
368
369 val &= 1;
370 for (int i = 0; i < ARRAY_SIZE(regtable); i++)
371 rmw_500(info, channel, regtable[i], 4, ~(1 << bit), val << bit);
372 }
373
set_334(int zero)374 static void set_334(int zero)
375 {
376 int j, k, channel;
377 const u32 val3[] = { 0x2a2b2a2b, 0x26272627, 0x2e2f2e2f, 0x2a2b };
378 u32 vd8[2][16];
379
380 for (channel = 0; channel < NUM_CHANNELS; channel++) {
381 for (j = 0; j < 4; j++) {
382 u32 a = (j == 1) ? 0x29292929 : 0x31313131;
383 u32 lmask = (j == 3) ? 0xffff : 0xffffffff;
384 u16 c;
385 if ((j == 0 || j == 3) && zero)
386 c = 0;
387 else if (j == 3)
388 c = 0x5f;
389 else
390 c = 0x5f5f;
391
392 for (k = 0; k < 2; k++) {
393 mchbar_write32(0x138 + 8 * k, channel << 26 | j << 24);
394 gav(vd8[1][(channel << 3) | (j << 1) | k] =
395 mchbar_read32(0x138 + 8 * k));
396 gav(vd8[0][(channel << 3) | (j << 1) | k] =
397 mchbar_read32(0x13c + 8 * k));
398 }
399
400 mchbar_write32(0x334 + (channel << 10) + j * 0x44, zero ? 0 : val3[j]);
401 mchbar_write32(0x32c + (channel << 10) + j * 0x44,
402 zero ? 0 : 0x18191819 & lmask);
403 mchbar_write16(0x34a + (channel << 10) + j * 0x44, c);
404 mchbar_write32(0x33c + (channel << 10) + j * 0x44,
405 zero ? 0 : a & lmask);
406 mchbar_write32(0x344 + (channel << 10) + j * 0x44,
407 zero ? 0 : a & lmask);
408 }
409 }
410
411 mchbar_setbits32(0x130, 1 << 0);
412 while (mchbar_read8(0x130) & 1)
413 ;
414 }
415
rmw_1d0(u16 addr,u32 and,u32 or,int split)416 static void rmw_1d0(u16 addr, u32 and, u32 or, int split)
417 {
418 u32 v;
419 v = read_1d0(addr, split);
420 write_1d0((v & and) | or, addr, split, 1);
421 }
422
find_highest_bit_set(u16 val)423 static int find_highest_bit_set(u16 val)
424 {
425 int i;
426 for (i = 15; i >= 0; i--)
427 if (val & (1 << i))
428 return i;
429 return -1;
430 }
431
find_lowest_bit_set32(u32 val)432 static int find_lowest_bit_set32(u32 val)
433 {
434 int i;
435 for (i = 0; i < 32; i++)
436 if (val & (1 << i))
437 return i;
438 return -1;
439 }
440
441 enum {
442 DEVICE_TYPE = 2,
443 MODULE_TYPE = 3,
444 DENSITY = 4,
445 RANKS_AND_DQ = 7,
446 MEMORY_BUS_WIDTH = 8,
447 TIMEBASE_DIVIDEND = 10,
448 TIMEBASE_DIVISOR = 11,
449 CYCLETIME = 12,
450
451 CAS_LATENCIES_LSB = 14,
452 CAS_LATENCIES_MSB = 15,
453 CAS_LATENCY_TIME = 16,
454 THERMAL_AND_REFRESH = 31,
455 REFERENCE_RAW_CARD_USED = 62,
456 RANK1_ADDRESS_MAPPING = 63
457 };
458
calculate_timings(struct raminfo * info)459 static void calculate_timings(struct raminfo *info)
460 {
461 unsigned int cycletime;
462 unsigned int cas_latency_time;
463 unsigned int supported_cas_latencies;
464 unsigned int channel, slot;
465 unsigned int clock_speed_index;
466 unsigned int min_cas_latency;
467 unsigned int cas_latency;
468 unsigned int max_clock_index;
469
470 /* Find common CAS latency */
471 supported_cas_latencies = 0x3fe;
472 for (channel = 0; channel < NUM_CHANNELS; channel++)
473 for (slot = 0; slot < NUM_SLOTS; slot++)
474 if (info->populated_ranks[channel][slot][0])
475 supported_cas_latencies &=
476 2 *
477 (info->
478 spd[channel][slot][CAS_LATENCIES_LSB] |
479 (info->
480 spd[channel][slot][CAS_LATENCIES_MSB] <<
481 8));
482
483 max_clock_index = MIN(3, info->max_supported_clock_speed_index);
484
485 cycletime = min_cycletime[max_clock_index];
486 cas_latency_time = min_cas_latency_time[max_clock_index];
487
488 for (channel = 0; channel < NUM_CHANNELS; channel++)
489 for (slot = 0; slot < NUM_SLOTS; slot++)
490 if (info->populated_ranks[channel][slot][0]) {
491 unsigned int timebase;
492 timebase =
493 1000 *
494 info->
495 spd[channel][slot][TIMEBASE_DIVIDEND] /
496 info->spd[channel][slot][TIMEBASE_DIVISOR];
497 cycletime =
498 MAX(cycletime,
499 timebase *
500 info->spd[channel][slot][CYCLETIME]);
501 cas_latency_time =
502 MAX(cas_latency_time,
503 timebase *
504 info->
505 spd[channel][slot][CAS_LATENCY_TIME]);
506 }
507 if (cycletime > min_cycletime[0])
508 die("RAM init: Decoded SPD DRAM freq is slower than the controller minimum!");
509 for (clock_speed_index = 0; clock_speed_index < 3; clock_speed_index++) {
510 if (cycletime == min_cycletime[clock_speed_index])
511 break;
512 if (cycletime > min_cycletime[clock_speed_index]) {
513 clock_speed_index--;
514 cycletime = min_cycletime[clock_speed_index];
515 break;
516 }
517 }
518 min_cas_latency = DIV_ROUND_UP(cas_latency_time, cycletime);
519 cas_latency = 0;
520 while (supported_cas_latencies) {
521 cas_latency = find_highest_bit_set(supported_cas_latencies) + 3;
522 if (cas_latency <= min_cas_latency)
523 break;
524 supported_cas_latencies &=
525 ~(1 << find_highest_bit_set(supported_cas_latencies));
526 }
527
528 if (cas_latency != min_cas_latency && clock_speed_index)
529 clock_speed_index--;
530
531 if (cas_latency * min_cycletime[clock_speed_index] > 20000)
532 die("Couldn't configure DRAM");
533 info->clock_speed_index = clock_speed_index;
534 info->cas_latency = cas_latency;
535 }
536
program_base_timings(struct raminfo * info)537 static void program_base_timings(struct raminfo *info)
538 {
539 unsigned int channel;
540 unsigned int slot, rank, lane;
541 unsigned int extended_silicon_revision;
542 int i;
543
544 extended_silicon_revision = info->silicon_revision;
545 if (info->silicon_revision == 0)
546 for (channel = 0; channel < NUM_CHANNELS; channel++)
547 for (slot = 0; slot < NUM_SLOTS; slot++)
548 if ((info->
549 spd[channel][slot][MODULE_TYPE] & 0xF) ==
550 3)
551 extended_silicon_revision = 4;
552
553 for (channel = 0; channel < NUM_CHANNELS; channel++) {
554 for (slot = 0; slot < NUM_SLOTS; slot++)
555 for (rank = 0; rank < NUM_SLOTS; rank++) {
556 int card_timing_2;
557 if (!info->populated_ranks[channel][slot][rank])
558 continue;
559
560 for (lane = 0; lane < 9; lane++) {
561 int tm_reg;
562 int card_timing;
563
564 card_timing = 0;
565 if ((info->
566 spd[channel][slot][MODULE_TYPE] &
567 0xF) == 3) {
568 int reference_card;
569 reference_card =
570 info->
571 spd[channel][slot]
572 [REFERENCE_RAW_CARD_USED] &
573 0x1f;
574 if (reference_card == 3)
575 card_timing =
576 u16_ffd1188[0][lane]
577 [info->
578 clock_speed_index];
579 if (reference_card == 5)
580 card_timing =
581 u16_ffd1188[1][lane]
582 [info->
583 clock_speed_index];
584 }
585
586 info->training.
587 lane_timings[0][channel][slot][rank]
588 [lane] =
589 u8_FFFD1218[info->
590 clock_speed_index];
591 info->training.
592 lane_timings[1][channel][slot][rank]
593 [lane] = 256;
594
595 for (tm_reg = 2; tm_reg < 4; tm_reg++)
596 info->training.
597 lane_timings[tm_reg]
598 [channel][slot][rank][lane]
599 =
600 u8_FFFD1240[channel]
601 [extended_silicon_revision]
602 [lane][2 * slot +
603 rank][info->
604 clock_speed_index]
605 + info->max4048[channel]
606 +
607 u8_FFFD0C78[channel]
608 [extended_silicon_revision]
609 [info->
610 mode4030[channel]][slot]
611 [rank][info->
612 clock_speed_index]
613 + card_timing;
614 for (tm_reg = 0; tm_reg < 4; tm_reg++)
615 write_500(info, channel,
616 info->training.
617 lane_timings[tm_reg]
618 [channel][slot][rank]
619 [lane],
620 get_timing_register_addr
621 (lane, tm_reg, slot,
622 rank), 9, 0);
623 }
624
625 card_timing_2 = 0;
626 if (!(extended_silicon_revision != 4
627 || (info->
628 populated_ranks_mask[channel] & 5) ==
629 5)) {
630 if ((info->
631 spd[channel][slot]
632 [REFERENCE_RAW_CARD_USED] & 0x1F)
633 == 3)
634 card_timing_2 =
635 u16_FFFE0EB8[0][info->
636 clock_speed_index];
637 if ((info->
638 spd[channel][slot]
639 [REFERENCE_RAW_CARD_USED] & 0x1F)
640 == 5)
641 card_timing_2 =
642 u16_FFFE0EB8[1][info->
643 clock_speed_index];
644 }
645
646 for (i = 0; i < 3; i++)
647 write_500(info, channel,
648 (card_timing_2 +
649 info->max4048[channel]
650 +
651 u8_FFFD0EF8[channel]
652 [extended_silicon_revision]
653 [info->
654 mode4030[channel]][info->
655 clock_speed_index]),
656 u16_fffd0c50[i][slot][rank],
657 8, 1);
658 write_500(info, channel,
659 (info->max4048[channel] +
660 u8_FFFD0C78[channel]
661 [extended_silicon_revision][info->
662 mode4030
663 [channel]]
664 [slot][rank][info->
665 clock_speed_index]),
666 u16_fffd0c70[slot][rank], 7, 1);
667 }
668 if (!info->populated_ranks_mask[channel])
669 continue;
670 for (i = 0; i < 3; i++)
671 write_500(info, channel,
672 (info->max4048[channel] +
673 info->avg4044[channel]
674 +
675 u8_FFFD17E0[channel]
676 [extended_silicon_revision][info->
677 mode4030
678 [channel]][info->
679 clock_speed_index]),
680 u16_fffd0c68[i], 8, 1);
681 }
682 }
683
684 /* The time of clock cycle in ps. */
cycle_ps(struct raminfo * info)685 static unsigned int cycle_ps(struct raminfo *info)
686 {
687 return 2 * halfcycle_ps(info);
688 }
689
690 /* Frequency in 0.1 MHz units. */
frequency_01(struct raminfo * info)691 static unsigned int frequency_01(struct raminfo *info)
692 {
693 return 100 * frequency_11(info) / 9;
694 }
695
ps_to_halfcycles(struct raminfo * info,unsigned int ps)696 static unsigned int ps_to_halfcycles(struct raminfo *info, unsigned int ps)
697 {
698 return (frequency_11(info) * 2) * ps / 900000;
699 }
700
ns_to_cycles(struct raminfo * info,unsigned int ns)701 static unsigned int ns_to_cycles(struct raminfo *info, unsigned int ns)
702 {
703 return (frequency_11(info)) * ns / 900;
704 }
705
compute_derived_timings(struct raminfo * info)706 static void compute_derived_timings(struct raminfo *info)
707 {
708 unsigned int channel, slot, rank;
709 int extended_silicon_revision;
710 int some_delay_1_ps;
711 int some_delay_2_ps;
712 int some_delay_2_halfcycles_ceil;
713 int some_delay_2_halfcycles_floor;
714 int some_delay_3_ps;
715 int some_delay_3_ps_rounded;
716 int some_delay_1_cycle_ceil;
717 int some_delay_1_cycle_floor;
718
719 some_delay_3_ps_rounded = 0;
720 extended_silicon_revision = info->silicon_revision;
721 if (!info->silicon_revision)
722 for (channel = 0; channel < NUM_CHANNELS; channel++)
723 for (slot = 0; slot < NUM_SLOTS; slot++)
724 if ((info->
725 spd[channel][slot][MODULE_TYPE] & 0xF) ==
726 3)
727 extended_silicon_revision = 4;
728 if (info->board_lane_delay[7] < 5)
729 info->board_lane_delay[7] = 5;
730 info->revision_flag_1 = 2;
731 if (info->silicon_revision == 2 || info->silicon_revision == 3)
732 info->revision_flag_1 = 0;
733 if (info->revision < 16)
734 info->revision_flag_1 = 0;
735
736 if (info->revision < 8)
737 info->revision_flag_1 = 0;
738 if (info->revision >= 8 && (info->silicon_revision == 0
739 || info->silicon_revision == 1))
740 some_delay_2_ps = 735;
741 else
742 some_delay_2_ps = 750;
743
744 if (info->revision >= 0x10 && (info->silicon_revision == 0
745 || info->silicon_revision == 1))
746 some_delay_1_ps = 3929;
747 else
748 some_delay_1_ps = 3490;
749
750 some_delay_1_cycle_floor = some_delay_1_ps / cycle_ps(info);
751 some_delay_1_cycle_ceil = some_delay_1_ps / cycle_ps(info);
752 if (some_delay_1_ps % cycle_ps(info))
753 some_delay_1_cycle_ceil++;
754 else
755 some_delay_1_cycle_floor--;
756 info->some_delay_1_cycle_floor = some_delay_1_cycle_floor;
757 if (info->revision_flag_1)
758 some_delay_2_ps = halfcycle_ps(info) >> 6;
759 some_delay_2_ps +=
760 MAX(some_delay_1_ps - 30,
761 2 * halfcycle_ps(info) * (some_delay_1_cycle_ceil - 1) + 1000) +
762 375;
763 some_delay_3_ps =
764 halfcycle_ps(info) - some_delay_2_ps % halfcycle_ps(info);
765 if (info->revision_flag_1) {
766 if (some_delay_3_ps >= 150) {
767 const int some_delay_3_halfcycles =
768 (some_delay_3_ps << 6) / halfcycle_ps(info);
769 some_delay_3_ps_rounded =
770 halfcycle_ps(info) * some_delay_3_halfcycles >> 6;
771 }
772 }
773 some_delay_2_halfcycles_ceil =
774 (some_delay_2_ps + halfcycle_ps(info) - 1) / halfcycle_ps(info) -
775 2 * (some_delay_1_cycle_ceil - 1);
776 if (info->revision_flag_1 && some_delay_3_ps < 150)
777 some_delay_2_halfcycles_ceil++;
778 some_delay_2_halfcycles_floor = some_delay_2_halfcycles_ceil;
779 if (info->revision < 0x10)
780 some_delay_2_halfcycles_floor =
781 some_delay_2_halfcycles_ceil - 1;
782 if (!info->revision_flag_1)
783 some_delay_2_halfcycles_floor++;
784 /* FIXME: this variable is unused. Should it be used? */
785 (void)some_delay_2_halfcycles_floor;
786 info->some_delay_2_halfcycles_ceil = some_delay_2_halfcycles_ceil;
787 info->some_delay_3_ps_rounded = some_delay_3_ps_rounded;
788 if ((info->populated_ranks[0][0][0] && info->populated_ranks[0][1][0])
789 || (info->populated_ranks[1][0][0]
790 && info->populated_ranks[1][1][0]))
791 info->max_slots_used_in_channel = 2;
792 else
793 info->max_slots_used_in_channel = 1;
794 for (channel = 0; channel < NUM_CHANNELS; channel++)
795 mchbar_write32(0x244 + (channel << 10),
796 ((info->revision < 8) ? 1 : 0x200) |
797 ((2 - info->max_slots_used_in_channel) << 17) |
798 (channel << 21) |
799 (info->some_delay_1_cycle_floor << 18) | 0x9510);
800 if (info->max_slots_used_in_channel == 1) {
801 info->mode4030[0] = (count_ranks_in_channel(info, 0) == 2);
802 info->mode4030[1] = (count_ranks_in_channel(info, 1) == 2);
803 } else {
804 info->mode4030[0] = ((count_ranks_in_channel(info, 0) == 1) || (count_ranks_in_channel(info, 0) == 2)) ? 2 : 3; /* 2 if 1 or 2 ranks */
805 info->mode4030[1] = ((count_ranks_in_channel(info, 1) == 1)
806 || (count_ranks_in_channel(info, 1) ==
807 2)) ? 2 : 3;
808 }
809 for (channel = 0; channel < NUM_CHANNELS; channel++) {
810 int max_of_unk;
811 int min_of_unk_2;
812
813 int i, count;
814 int sum;
815
816 if (!info->populated_ranks_mask[channel])
817 continue;
818
819 max_of_unk = 0;
820 min_of_unk_2 = 32767;
821
822 sum = 0;
823 count = 0;
824 for (i = 0; i < 3; i++) {
825 int unk1;
826 if (info->revision < 8)
827 unk1 =
828 u8_FFFD1891[0][channel][info->
829 clock_speed_index]
830 [i];
831 else if (!
832 (info->revision >= 0x10
833 || info->revision_flag_1))
834 unk1 =
835 u8_FFFD1891[1][channel][info->
836 clock_speed_index]
837 [i];
838 else
839 unk1 = 0;
840 for (slot = 0; slot < NUM_SLOTS; slot++)
841 for (rank = 0; rank < NUM_RANKS; rank++) {
842 int a = 0;
843 int b = 0;
844
845 if (!info->
846 populated_ranks[channel][slot]
847 [rank])
848 continue;
849 if (extended_silicon_revision == 4
850 && (info->
851 populated_ranks_mask[channel] &
852 5) != 5) {
853 if ((info->
854 spd[channel][slot]
855 [REFERENCE_RAW_CARD_USED] &
856 0x1F) == 3) {
857 a = u16_ffd1178[0]
858 [info->
859 clock_speed_index];
860 b = u16_fe0eb8[0][info->
861 clock_speed_index];
862 } else
863 if ((info->
864 spd[channel][slot]
865 [REFERENCE_RAW_CARD_USED]
866 & 0x1F) == 5) {
867 a = u16_ffd1178[1]
868 [info->
869 clock_speed_index];
870 b = u16_fe0eb8[1][info->
871 clock_speed_index];
872 }
873 }
874 min_of_unk_2 = MIN(min_of_unk_2, a);
875 min_of_unk_2 = MIN(min_of_unk_2, b);
876 if (rank == 0) {
877 sum += a;
878 count++;
879 }
880 {
881 int t;
882 t = b +
883 u8_FFFD0EF8[channel]
884 [extended_silicon_revision]
885 [info->
886 mode4030[channel]][info->
887 clock_speed_index];
888 if (unk1 >= t)
889 max_of_unk =
890 MAX(max_of_unk,
891 unk1 - t);
892 }
893 }
894 {
895 int t =
896 u8_FFFD17E0[channel]
897 [extended_silicon_revision][info->
898 mode4030
899 [channel]]
900 [info->clock_speed_index] + min_of_unk_2;
901 if (unk1 >= t)
902 max_of_unk = MAX(max_of_unk, unk1 - t);
903 }
904 }
905
906 if (count == 0)
907 die("No memory ranks found for channel %u\n", channel);
908
909 info->avg4044[channel] = sum / count;
910 info->max4048[channel] = max_of_unk;
911 }
912 }
913
jedec_read(struct raminfo * info,int channel,int slot,int rank,int total_rank,u8 addr3,unsigned int value)914 static void jedec_read(struct raminfo *info,
915 int channel, int slot, int rank,
916 int total_rank, u8 addr3, unsigned int value)
917 {
918 /* Handle mirrored mapping. */
919 if ((rank & 1) && (info->spd[channel][slot][RANK1_ADDRESS_MAPPING] & 1)) {
920 addr3 = (addr3 & 0xCF) | ((addr3 & 0x10) << 1) | ((addr3 >> 1) & 0x10);
921 value = (value & ~0x1f8) | ((value >> 1) & 0xa8) | ((value & 0xa8) << 1);
922 }
923
924 mchbar_clrsetbits8(0x271, 0x1f << 1, addr3);
925 mchbar_clrsetbits8(0x671, 0x1f << 1, addr3);
926
927 read32p((value << 3) | (total_rank << 28));
928
929 mchbar_clrsetbits8(0x271, 0x1f << 1, 1 << 1);
930 mchbar_clrsetbits8(0x671, 0x1f << 1, 1 << 1);
931
932 read32p(total_rank << 28);
933 }
934
935 enum {
936 MR1_RZQ12 = 512,
937 MR1_RZQ2 = 64,
938 MR1_RZQ4 = 4,
939 MR1_ODS34OHM = 2
940 };
941
942 enum {
943 MR0_BT_INTERLEAVED = 8,
944 MR0_DLL_RESET_ON = 256
945 };
946
947 enum {
948 MR2_RTT_WR_DISABLED = 0,
949 MR2_RZQ2 = 1 << 10
950 };
951
jedec_init(struct raminfo * info)952 static void jedec_init(struct raminfo *info)
953 {
954 int write_recovery;
955 int channel, slot, rank;
956 int total_rank;
957 int dll_on;
958 int self_refresh_temperature;
959 int auto_self_refresh;
960
961 auto_self_refresh = 1;
962 self_refresh_temperature = 1;
963 if (info->board_lane_delay[3] <= 10) {
964 if (info->board_lane_delay[3] <= 8)
965 write_recovery = info->board_lane_delay[3] - 4;
966 else
967 write_recovery = 5;
968 } else {
969 write_recovery = 6;
970 }
971 FOR_POPULATED_RANKS {
972 auto_self_refresh &=
973 (info->spd[channel][slot][THERMAL_AND_REFRESH] >> 2) & 1;
974 self_refresh_temperature &=
975 info->spd[channel][slot][THERMAL_AND_REFRESH] & 1;
976 }
977 if (auto_self_refresh == 1)
978 self_refresh_temperature = 0;
979
980 dll_on = ((info->silicon_revision != 2 && info->silicon_revision != 3)
981 || (info->populated_ranks[0][0][0]
982 && info->populated_ranks[0][1][0])
983 || (info->populated_ranks[1][0][0]
984 && info->populated_ranks[1][1][0]));
985
986 total_rank = 0;
987
988 for (channel = NUM_CHANNELS - 1; channel >= 0; channel--) {
989 int rtt, rtt_wr = MR2_RTT_WR_DISABLED;
990 int rzq_reg58e;
991
992 if (info->silicon_revision == 2 || info->silicon_revision == 3) {
993 rzq_reg58e = 64;
994 rtt = MR1_RZQ2;
995 if (info->clock_speed_index != 0) {
996 rzq_reg58e = 4;
997 if (info->populated_ranks_mask[channel] == 3)
998 rtt = MR1_RZQ4;
999 }
1000 } else {
1001 if ((info->populated_ranks_mask[channel] & 5) == 5) {
1002 rtt = MR1_RZQ12;
1003 rzq_reg58e = 64;
1004 rtt_wr = MR2_RZQ2;
1005 } else {
1006 rzq_reg58e = 4;
1007 rtt = MR1_RZQ4;
1008 }
1009 }
1010
1011 mchbar_write16(0x588 + (channel << 10), 0);
1012 mchbar_write16(0x58a + (channel << 10), 4);
1013 mchbar_write16(0x58c + (channel << 10), rtt | MR1_ODS34OHM);
1014 mchbar_write16(0x58e + (channel << 10), rzq_reg58e | 0x82);
1015 mchbar_write16(0x590 + (channel << 10), 0x1282);
1016
1017 for (slot = 0; slot < NUM_SLOTS; slot++)
1018 for (rank = 0; rank < NUM_RANKS; rank++)
1019 if (info->populated_ranks[channel][slot][rank]) {
1020 jedec_read(info, channel, slot, rank,
1021 total_rank, 0x28,
1022 rtt_wr | (info->
1023 clock_speed_index
1024 << 3)
1025 | (auto_self_refresh << 6) |
1026 (self_refresh_temperature <<
1027 7));
1028 jedec_read(info, channel, slot, rank,
1029 total_rank, 0x38, 0);
1030 jedec_read(info, channel, slot, rank,
1031 total_rank, 0x18,
1032 rtt | MR1_ODS34OHM);
1033 jedec_read(info, channel, slot, rank,
1034 total_rank, 6,
1035 (dll_on << 12) |
1036 (write_recovery << 9)
1037 | ((info->cas_latency - 4) <<
1038 4) | MR0_BT_INTERLEAVED |
1039 MR0_DLL_RESET_ON);
1040 total_rank++;
1041 }
1042 }
1043 }
1044
program_modules_memory_map(struct raminfo * info,int pre_jedec)1045 static void program_modules_memory_map(struct raminfo *info, int pre_jedec)
1046 {
1047 unsigned int channel, slot, rank;
1048 unsigned int total_mb[2] = { 0, 0 }; /* total memory per channel in MB */
1049 unsigned int channel_0_non_interleaved;
1050
1051 FOR_ALL_RANKS {
1052 if (info->populated_ranks[channel][slot][rank]) {
1053 total_mb[channel] +=
1054 pre_jedec ? 256 : (256 << info->
1055 density[channel][slot] >> info->
1056 is_x16_module[channel][slot]);
1057 mchbar_write8(0x208 + rank + 2 * slot + (channel << 10),
1058 (pre_jedec ? (1 | ((1 + 1) << 1)) :
1059 (info->is_x16_module[channel][slot] |
1060 ((info->density[channel][slot] + 1) << 1))) |
1061 0x80);
1062 }
1063 mchbar_write16(0x200 + (channel << 10) + 4 * slot + 2 * rank,
1064 total_mb[channel] >> 6);
1065 }
1066
1067 info->total_memory_mb = total_mb[0] + total_mb[1];
1068
1069 info->interleaved_part_mb =
1070 pre_jedec ? 0 : 2 * MIN(total_mb[0], total_mb[1]);
1071 info->non_interleaved_part_mb =
1072 total_mb[0] + total_mb[1] - info->interleaved_part_mb;
1073 channel_0_non_interleaved = total_mb[0] - info->interleaved_part_mb / 2;
1074 mchbar_write32(0x100, channel_0_non_interleaved | info->non_interleaved_part_mb << 16);
1075 if (!pre_jedec)
1076 mchbar_write16(0x104, info->interleaved_part_mb);
1077 }
1078
program_board_delay(struct raminfo * info)1079 static void program_board_delay(struct raminfo *info)
1080 {
1081 int cas_latency_shift;
1082 int some_delay_ns;
1083 int some_delay_3_half_cycles;
1084
1085 unsigned int channel, i;
1086 int high_multiplier;
1087 int lane_3_delay;
1088 int cas_latency_derived;
1089
1090 high_multiplier = 0;
1091 some_delay_ns = 200;
1092 some_delay_3_half_cycles = 4;
1093 cas_latency_shift = info->silicon_revision == 0
1094 || info->silicon_revision == 1 ? 1 : 0;
1095 if (info->revision < 8) {
1096 some_delay_ns = 600;
1097 cas_latency_shift = 0;
1098 }
1099 {
1100 int speed_bit;
1101 speed_bit =
1102 ((info->clock_speed_index > 1
1103 || (info->silicon_revision != 2
1104 && info->silicon_revision != 3))) ^ (info->revision >=
1105 0x10);
1106 write_500(info, 0, speed_bit | ((!info->use_ecc) << 1), 0x60e,
1107 3, 1);
1108 write_500(info, 1, speed_bit | ((!info->use_ecc) << 1), 0x60e,
1109 3, 1);
1110 if (info->revision >= 0x10 && info->clock_speed_index <= 1
1111 && (info->silicon_revision == 2
1112 || info->silicon_revision == 3))
1113 rmw_1d0(0x116, 5, 2, 4);
1114 }
1115 mchbar_write32(0x120, 1 << (info->max_slots_used_in_channel + 28) | 0x188e7f9f);
1116
1117 mchbar_write8(0x124, info->board_lane_delay[4] + (frequency_01(info) + 999) / 1000);
1118 mchbar_write16(0x125, 0x1360);
1119 mchbar_write8(0x127, 0x40);
1120 if (info->fsb_frequency < frequency_11(info) / 2) {
1121 unsigned int some_delay_2_half_cycles;
1122 high_multiplier = 1;
1123 some_delay_2_half_cycles = ps_to_halfcycles(info,
1124 ((3 *
1125 fsbcycle_ps(info))
1126 >> 1) +
1127 (halfcycle_ps(info)
1128 *
1129 reg178_min[info->
1130 clock_speed_index]
1131 >> 6)
1132 +
1133 4 *
1134 halfcycle_ps(info)
1135 + 2230);
1136 some_delay_3_half_cycles =
1137 MIN((some_delay_2_half_cycles +
1138 (frequency_11(info) * 2) * (28 -
1139 some_delay_2_half_cycles) /
1140 (frequency_11(info) * 2 -
1141 4 * (info->fsb_frequency))) >> 3, 7);
1142 }
1143 if (mchbar_read8(0x2ca9) & 1)
1144 some_delay_3_half_cycles = 3;
1145 for (channel = 0; channel < NUM_CHANNELS; channel++) {
1146 mchbar_setbits32(0x220 + (channel << 10), 0x18001117);
1147 mchbar_write32(0x224 + (channel << 10),
1148 (info->max_slots_used_in_channel - 1) |
1149 (info->cas_latency - 5 - info->clock_speed_index)
1150 << 21 | (info->max_slots_used_in_channel +
1151 info->cas_latency - cas_latency_shift - 4) << 16 |
1152 (info->cas_latency - cas_latency_shift - 4) << 26 |
1153 (info->cas_latency - info->clock_speed_index +
1154 info->max_slots_used_in_channel - 6) << 8);
1155 mchbar_write32(0x228 + (channel << 10), info->max_slots_used_in_channel);
1156 mchbar_write8(0x239 + (channel << 10), 32);
1157 mchbar_write32(0x248 + (channel << 10), high_multiplier << 24 |
1158 some_delay_3_half_cycles << 25 | 0x840000);
1159 mchbar_write32(0x278 + (channel << 10), 0xc362042);
1160 mchbar_write32(0x27c + (channel << 10), 0x8b000062);
1161 mchbar_write32(0x24c + (channel << 10),
1162 (!!info->clock_speed_index) << 17 |
1163 ((2 + info->clock_speed_index -
1164 (!!info->clock_speed_index))) << 12 | 0x10200);
1165
1166 mchbar_write8(0x267 + (channel << 10), 4);
1167 mchbar_write16(0x272 + (channel << 10), 0x155);
1168 mchbar_clrsetbits32(0x2bc + (channel << 10), 0xffffff, 0x707070);
1169
1170 write_500(info, channel,
1171 ((!info->populated_ranks[channel][1][1])
1172 | (!info->populated_ranks[channel][1][0] << 1)
1173 | (!info->populated_ranks[channel][0][1] << 2)
1174 | (!info->populated_ranks[channel][0][0] << 3)),
1175 0x4c9, 4, 1);
1176 }
1177
1178 mchbar_write8(0x2c4, (1 + (info->clock_speed_index != 0)) << 6 | 0xc);
1179 {
1180 u8 freq_divisor = 2;
1181 if (info->fsb_frequency == frequency_11(info))
1182 freq_divisor = 3;
1183 else if (2 * info->fsb_frequency < 3 * (frequency_11(info) / 2))
1184 freq_divisor = 1;
1185 else
1186 freq_divisor = 2;
1187 mchbar_write32(0x2c0, freq_divisor << 11 | 0x6009c400);
1188 }
1189
1190 if (info->board_lane_delay[3] <= 10) {
1191 if (info->board_lane_delay[3] <= 8)
1192 lane_3_delay = info->board_lane_delay[3];
1193 else
1194 lane_3_delay = 10;
1195 } else {
1196 lane_3_delay = 12;
1197 }
1198 cas_latency_derived = info->cas_latency - info->clock_speed_index + 2;
1199 if (info->clock_speed_index > 1)
1200 cas_latency_derived++;
1201 for (channel = 0; channel < NUM_CHANNELS; channel++) {
1202 mchbar_write32(0x240 + (channel << 10),
1203 ((info->clock_speed_index == 0) * 0x11000) |
1204 0x1002100 | (2 + info->clock_speed_index) << 4 |
1205 (info->cas_latency - 3));
1206 write_500(info, channel, (info->clock_speed_index << 1) | 1,
1207 0x609, 6, 1);
1208 write_500(info, channel,
1209 info->clock_speed_index + 2 * info->cas_latency - 7,
1210 0x601, 6, 1);
1211
1212 mchbar_write32(0x250 + (channel << 10),
1213 (lane_3_delay + info->clock_speed_index + 9) << 6 |
1214 info->board_lane_delay[7] << 2 |
1215 info->board_lane_delay[4] << 16 |
1216 info->board_lane_delay[1] << 25 |
1217 info->board_lane_delay[1] << 29 | 1);
1218 mchbar_write32(0x254 + (channel << 10),
1219 info->board_lane_delay[1] >> 3 |
1220 (info->board_lane_delay[8] + 4 * info->use_ecc) << 6 |
1221 0x80 | info->board_lane_delay[6] << 1 |
1222 info->board_lane_delay[2] << 28 |
1223 cas_latency_derived << 16 | 0x4700000);
1224 mchbar_write32(0x258 + (channel << 10),
1225 (info->board_lane_delay[5] + info->clock_speed_index + 9) << 12 |
1226 (info->clock_speed_index - info->cas_latency + 12) << 8 |
1227 info->board_lane_delay[2] << 17 |
1228 info->board_lane_delay[4] << 24 | 0x47);
1229 mchbar_write32(0x25c + (channel << 10),
1230 info->board_lane_delay[1] << 1 |
1231 info->board_lane_delay[0] << 8 | 0x1da50000);
1232 mchbar_write8(0x264 + (channel << 10), 0xff);
1233 mchbar_write8(0x5f8 + (channel << 10), cas_latency_shift << 3 | info->use_ecc);
1234 }
1235
1236 program_modules_memory_map(info, 1);
1237
1238 mchbar_clrsetbits16(0x610, 0xfe3c,
1239 MIN(ns_to_cycles(info, some_delay_ns) / 2, 127) << 9 | 0x3c);
1240 mchbar_setbits16(0x612, 1 << 8);
1241 mchbar_setbits16(0x214, 0x3e00);
1242 for (i = 0; i < 8; i++) {
1243 pci_write_config32(QPI_SAD, SAD_DRAM_RULE(i),
1244 (info->total_memory_mb - 64) | !i | 2);
1245 pci_write_config32(QPI_SAD, SAD_INTERLEAVE_LIST(i), 0);
1246 }
1247 }
1248
1249 #define DEFAULT_PCI_MMIO_SIZE 2048
1250
program_total_memory_map(struct raminfo * info)1251 static void program_total_memory_map(struct raminfo *info)
1252 {
1253 unsigned int tom, tolud, touud;
1254 unsigned int quickpath_reserved;
1255 unsigned int remap_base;
1256 unsigned int uma_base_igd;
1257 unsigned int uma_base_gtt;
1258 unsigned int mmio_size;
1259 int memory_remap;
1260 unsigned int memory_map[8];
1261 int i;
1262 unsigned int current_limit;
1263 unsigned int tseg_base;
1264 int uma_size_igd = 0, uma_size_gtt = 0;
1265
1266 memset(memory_map, 0, sizeof(memory_map));
1267
1268 if (info->uma_enabled) {
1269 u16 t = pci_read_config16(NORTHBRIDGE, GGC);
1270 gav(t);
1271 const int uma_sizes_gtt[16] =
1272 { 0, 1, 0, 2, 0, 0, 0, 0, 0, 2, 3, 4, 42, 42, 42, 42 };
1273 /* Igd memory */
1274 const int uma_sizes_igd[16] = {
1275 0, 0, 0, 0, 0, 32, 48, 64, 128, 256, 96, 160, 224, 352,
1276 256, 512
1277 };
1278
1279 uma_size_igd = uma_sizes_igd[(t >> 4) & 0xF];
1280 uma_size_gtt = uma_sizes_gtt[(t >> 8) & 0xF];
1281 }
1282
1283 mmio_size = DEFAULT_PCI_MMIO_SIZE;
1284
1285 tom = info->total_memory_mb;
1286 if (tom == 4096)
1287 tom = 4032;
1288 touud = ALIGN_DOWN(tom - info->memory_reserved_for_heci_mb, 64);
1289 tolud = ALIGN_DOWN(MIN(4096 - mmio_size + ALIGN_UP(uma_size_igd + uma_size_gtt, 64)
1290 , touud), 64);
1291 memory_remap = 0;
1292 if (touud - tolud > 64) {
1293 memory_remap = 1;
1294 remap_base = MAX(4096, touud);
1295 touud = touud - tolud + 4096;
1296 }
1297 if (touud > 4096)
1298 memory_map[2] = touud | 1;
1299 quickpath_reserved = 0;
1300
1301 u32 t = pci_read_config32(QPI_SAD, 0x68);
1302
1303 gav(t);
1304
1305 if (t & 0x800) {
1306 u32 shift = t >> 20;
1307 if (shift == 0)
1308 die("Quickpath value is 0\n");
1309 quickpath_reserved = (u32)1 << find_lowest_bit_set32(shift);
1310 }
1311
1312 if (memory_remap)
1313 touud -= quickpath_reserved;
1314
1315 uma_base_igd = tolud - uma_size_igd;
1316 uma_base_gtt = uma_base_igd - uma_size_gtt;
1317 tseg_base = ALIGN_DOWN(uma_base_gtt, 64) - (CONFIG_SMM_TSEG_SIZE >> 20);
1318 if (!memory_remap)
1319 tseg_base -= quickpath_reserved;
1320 tseg_base = ALIGN_DOWN(tseg_base, 8);
1321
1322 pci_write_config16(NORTHBRIDGE, TOLUD, tolud << 4);
1323 pci_write_config16(NORTHBRIDGE, TOM, tom >> 6);
1324 if (memory_remap) {
1325 pci_write_config16(NORTHBRIDGE, REMAPBASE, remap_base >> 6);
1326 pci_write_config16(NORTHBRIDGE, REMAPLIMIT, (touud - 64) >> 6);
1327 }
1328 pci_write_config16(NORTHBRIDGE, TOUUD, touud);
1329
1330 if (info->uma_enabled) {
1331 pci_write_config32(NORTHBRIDGE, IGD_BASE, uma_base_igd << 20);
1332 pci_write_config32(NORTHBRIDGE, GTT_BASE, uma_base_gtt << 20);
1333 }
1334 pci_write_config32(NORTHBRIDGE, TSEG, tseg_base << 20);
1335
1336 current_limit = 0;
1337 memory_map[0] = ALIGN_DOWN(uma_base_gtt, 64) | 1;
1338 memory_map[1] = 4096;
1339 for (i = 0; i < ARRAY_SIZE(memory_map); i++) {
1340 current_limit = MAX(current_limit, memory_map[i] & ~1);
1341 pci_write_config32(QPI_SAD, SAD_DRAM_RULE(i),
1342 (memory_map[i] & 1) | ALIGN_DOWN(current_limit -
1343 1, 64) | 2);
1344 pci_write_config32(QPI_SAD, SAD_INTERLEAVE_LIST(i), 0);
1345 }
1346 }
1347
collect_system_info(struct raminfo * info)1348 static void collect_system_info(struct raminfo *info)
1349 {
1350 u32 capid0[3];
1351 int i;
1352 unsigned int channel;
1353
1354 for (i = 0; i < 3; i++) {
1355 capid0[i] = pci_read_config32(NORTHBRIDGE, CAPID0 | (i << 2));
1356 printk(BIOS_DEBUG, "CAPID0[%d] = 0x%08x\n", i, capid0[i]);
1357 }
1358 info->revision = pci_read_config8(NORTHBRIDGE, PCI_REVISION_ID);
1359 printk(BIOS_DEBUG, "Revision ID: 0x%x\n", info->revision);
1360 printk(BIOS_DEBUG, "Device ID: 0x%x\n", pci_read_config16(NORTHBRIDGE, PCI_DEVICE_ID));
1361
1362 info->max_supported_clock_speed_index = (~capid0[1] & 7);
1363
1364 if ((capid0[1] >> 11) & 1)
1365 info->uma_enabled = 0;
1366 else
1367 gav(info->uma_enabled =
1368 pci_read_config8(NORTHBRIDGE, DEVEN) & 8);
1369 /* Unrecognised: [0000:fffd3d2d] 37f81.37f82 ! CPUID: eax: 00000001; ecx: 00000e00 => 00020655.00010800.029ae3ff.bfebfbff */
1370 info->silicon_revision = 0;
1371
1372 if (capid0[2] & 2) {
1373 info->silicon_revision = 0;
1374 info->max_supported_clock_speed_index = 2;
1375 for (channel = 0; channel < NUM_CHANNELS; channel++)
1376 if (info->populated_ranks[channel][0][0]
1377 && (info->spd[channel][0][MODULE_TYPE] & 0xf) ==
1378 3) {
1379 info->silicon_revision = 2;
1380 info->max_supported_clock_speed_index = 1;
1381 }
1382 } else {
1383 switch (((capid0[2] >> 18) & 1) + 2 * ((capid0[1] >> 3) & 1)) {
1384 case 1:
1385 case 2:
1386 info->silicon_revision = 3;
1387 break;
1388 case 3:
1389 info->silicon_revision = 0;
1390 break;
1391 case 0:
1392 info->silicon_revision = 2;
1393 break;
1394 }
1395 switch (pci_read_config16(NORTHBRIDGE, PCI_DEVICE_ID)) {
1396 case 0x40:
1397 info->silicon_revision = 0;
1398 break;
1399 case 0x48:
1400 info->silicon_revision = 1;
1401 break;
1402 }
1403 }
1404 }
1405
write_training_data(struct raminfo * info)1406 static void write_training_data(struct raminfo *info)
1407 {
1408 int tm, channel, slot, rank, lane;
1409 if (info->revision < 8)
1410 return;
1411
1412 for (tm = 0; tm < 4; tm++)
1413 for (channel = 0; channel < NUM_CHANNELS; channel++)
1414 for (slot = 0; slot < NUM_SLOTS; slot++)
1415 for (rank = 0; rank < NUM_RANKS; rank++)
1416 for (lane = 0; lane < 9; lane++)
1417 write_500(info, channel,
1418 info->
1419 cached_training->
1420 lane_timings[tm]
1421 [channel][slot][rank]
1422 [lane],
1423 get_timing_register_addr
1424 (lane, tm, slot,
1425 rank), 9, 0);
1426 write_1d0(info->cached_training->reg_178, 0x178, 7, 1);
1427 write_1d0(info->cached_training->reg_10b, 0x10b, 6, 1);
1428 }
1429
dump_timings(struct raminfo * info)1430 static void dump_timings(struct raminfo *info)
1431 {
1432 int channel, slot, rank, lane, i;
1433 printk(RAM_SPEW, "Timings:\n");
1434 FOR_POPULATED_RANKS {
1435 printk(RAM_SPEW, "channel %d, slot %d, rank %d\n", channel,
1436 slot, rank);
1437 for (lane = 0; lane < 9; lane++) {
1438 printk(RAM_SPEW, "lane %d: ", lane);
1439 for (i = 0; i < 4; i++) {
1440 printk(RAM_SPEW, "%x (%x) ",
1441 read_500(info, channel,
1442 get_timing_register_addr
1443 (lane, i, slot, rank),
1444 9),
1445 info->training.
1446 lane_timings[i][channel][slot][rank]
1447 [lane]);
1448 }
1449 printk(RAM_SPEW, "\n");
1450 }
1451 }
1452 printk(RAM_SPEW, "[178] = %x (%x)\n", read_1d0(0x178, 7),
1453 info->training.reg_178);
1454 printk(RAM_SPEW, "[10b] = %x (%x)\n", read_1d0(0x10b, 6),
1455 info->training.reg_10b);
1456 }
1457
1458 /* Read timings and other registers that need to be restored verbatim and
1459 put them to CBMEM.
1460 */
save_timings(struct raminfo * info)1461 static void save_timings(struct raminfo *info)
1462 {
1463 struct ram_training train;
1464 int channel, slot, rank, lane, i;
1465
1466 train = info->training;
1467 FOR_POPULATED_RANKS for (lane = 0; lane < 9; lane++)
1468 for (i = 0; i < 4; i++)
1469 train.lane_timings[i][channel][slot][rank][lane] =
1470 read_500(info, channel,
1471 get_timing_register_addr(lane, i, slot,
1472 rank), 9);
1473 train.reg_178 = read_1d0(0x178, 7);
1474 train.reg_10b = read_1d0(0x10b, 6);
1475
1476 for (channel = 0; channel < NUM_CHANNELS; channel++) {
1477 u32 reg32;
1478 reg32 = mchbar_read32((channel << 10) + 0x274);
1479 train.reg274265[channel][0] = reg32 >> 16;
1480 train.reg274265[channel][1] = reg32 & 0xffff;
1481 train.reg274265[channel][2] = mchbar_read16((channel << 10) + 0x265) >> 8;
1482 }
1483 train.reg2ca9_bit0 = mchbar_read8(0x2ca9) & 1;
1484 train.reg_6dc = mchbar_read32(0x6dc);
1485 train.reg_6e8 = mchbar_read32(0x6e8);
1486
1487 printk(RAM_SPEW, "[6dc] = %x\n", train.reg_6dc);
1488 printk(RAM_SPEW, "[6e8] = %x\n", train.reg_6e8);
1489
1490 /* Save the MRC S3 restore data to cbmem */
1491 mrc_cache_stash_data(MRC_TRAINING_DATA, MRC_CACHE_VERSION,
1492 &train, sizeof(train));
1493 }
1494
get_cached_training(void)1495 static const struct ram_training *get_cached_training(void)
1496 {
1497 return mrc_cache_current_mmap_leak(MRC_TRAINING_DATA,
1498 MRC_CACHE_VERSION,
1499 NULL);
1500 }
1501
have_match_ranks(struct raminfo * info,int channel,int ranks)1502 static int have_match_ranks(struct raminfo *info, int channel, int ranks)
1503 {
1504 int ranks_in_channel;
1505 ranks_in_channel = info->populated_ranks[channel][0][0]
1506 + info->populated_ranks[channel][0][1]
1507 + info->populated_ranks[channel][1][0]
1508 + info->populated_ranks[channel][1][1];
1509
1510 /* empty channel */
1511 if (ranks_in_channel == 0)
1512 return 1;
1513
1514 if (ranks_in_channel != ranks)
1515 return 0;
1516 /* single slot */
1517 if (info->populated_ranks[channel][0][0] !=
1518 info->populated_ranks[channel][1][0])
1519 return 1;
1520 if (info->populated_ranks[channel][0][1] !=
1521 info->populated_ranks[channel][1][1])
1522 return 1;
1523 if (info->is_x16_module[channel][0] != info->is_x16_module[channel][1])
1524 return 0;
1525 if (info->density[channel][0] != info->density[channel][1])
1526 return 0;
1527 return 1;
1528 }
1529
read_4090(struct raminfo * info)1530 static void read_4090(struct raminfo *info)
1531 {
1532 int i, channel, slot, rank, lane;
1533 for (i = 0; i < 2; i++)
1534 for (slot = 0; slot < NUM_SLOTS; slot++)
1535 for (rank = 0; rank < NUM_RANKS; rank++)
1536 for (lane = 0; lane < 9; lane++)
1537 info->training.
1538 lane_timings[0][i][slot][rank][lane]
1539 = 32;
1540
1541 for (i = 1; i < 4; i++)
1542 for (channel = 0; channel < NUM_CHANNELS; channel++)
1543 for (slot = 0; slot < NUM_SLOTS; slot++)
1544 for (rank = 0; rank < NUM_RANKS; rank++)
1545 for (lane = 0; lane < 9; lane++) {
1546 info->training.
1547 lane_timings[i][channel]
1548 [slot][rank][lane] =
1549 read_500(info, channel,
1550 get_timing_register_addr
1551 (lane, i, slot,
1552 rank), 9)
1553 + (i == 1) * 11; // !!!!
1554 }
1555 }
1556
get_etalon2(int flip,u32 addr)1557 static u32 get_etalon2(int flip, u32 addr)
1558 {
1559 const u16 invmask[] = {
1560 0xaaaa, 0x6db6, 0x4924, 0xeeee, 0xcccc, 0x8888, 0x7bde, 0x739c,
1561 0x6318, 0x4210, 0xefbe, 0xcf3c, 0x8e38, 0x0c30, 0x0820
1562 };
1563 u32 ret;
1564 u32 comp4 = addr / 480;
1565 addr %= 480;
1566 u32 comp1 = addr & 0xf;
1567 u32 comp2 = (addr >> 4) & 1;
1568 u32 comp3 = addr >> 5;
1569
1570 if (comp4)
1571 ret = 0x1010101 << (comp4 - 1);
1572 else
1573 ret = 0;
1574 if (flip ^ (((invmask[comp3] >> comp1) ^ comp2) & 1))
1575 ret = ~ret;
1576
1577 return ret;
1578 }
1579
disable_cache_region(void)1580 static void disable_cache_region(void)
1581 {
1582 msr_t msr = {.lo = 0, .hi = 0 };
1583
1584 wrmsr(MTRR_PHYS_BASE(3), msr);
1585 wrmsr(MTRR_PHYS_MASK(3), msr);
1586 }
1587
enable_cache_region(unsigned int base,unsigned int size)1588 static void enable_cache_region(unsigned int base, unsigned int size)
1589 {
1590 msr_t msr;
1591 msr.lo = base | MTRR_TYPE_WRPROT;
1592 msr.hi = 0;
1593 wrmsr(MTRR_PHYS_BASE(3), msr);
1594 msr.lo = ((~(ALIGN_DOWN(size + 4096, 4096) - 1) | MTRR_DEF_TYPE_EN)
1595 & 0xffffffff);
1596 msr.hi = 0x0000000f;
1597 wrmsr(MTRR_PHYS_MASK(3), msr);
1598 }
1599
flush_cache(u32 start,u32 size)1600 static void flush_cache(u32 start, u32 size)
1601 {
1602 u32 end;
1603 u32 addr;
1604
1605 end = start + (ALIGN_DOWN(size + 4096, 4096));
1606 for (addr = start; addr < end; addr += 64)
1607 clflush((void *)(uintptr_t)addr);
1608 }
1609
clear_errors(void)1610 static void clear_errors(void)
1611 {
1612 pci_write_config8(NORTHBRIDGE, 0xc0, 0x01);
1613 }
1614
write_testing(struct raminfo * info,int totalrank,int flip)1615 static void write_testing(struct raminfo *info, int totalrank, int flip)
1616 {
1617 int nwrites = 0;
1618 /* in 8-byte units. */
1619 u32 offset;
1620 u8 *base;
1621
1622 base = (u8 *)(uintptr_t)(totalrank << 28);
1623 for (offset = 0; offset < 9 * 480; offset += 2) {
1624 write32(base + offset * 8, get_etalon2(flip, offset));
1625 write32(base + offset * 8 + 4, get_etalon2(flip, offset));
1626 write32(base + offset * 8 + 8, get_etalon2(flip, offset + 1));
1627 write32(base + offset * 8 + 12, get_etalon2(flip, offset + 1));
1628 nwrites += 4;
1629 if (nwrites >= 320) {
1630 clear_errors();
1631 nwrites = 0;
1632 }
1633 }
1634 }
1635
check_testing(struct raminfo * info,u8 total_rank,int flip)1636 static u8 check_testing(struct raminfo *info, u8 total_rank, int flip)
1637 {
1638 u8 failmask = 0;
1639 int i;
1640 int comp1, comp2, comp3;
1641 u32 failxor[2] = { 0, 0 };
1642
1643 enable_cache_region((total_rank << 28), 1728 * 5 * 4);
1644
1645 for (comp3 = 0; comp3 < 9 && failmask != 0xff; comp3++) {
1646 for (comp1 = 0; comp1 < 4; comp1++)
1647 for (comp2 = 0; comp2 < 60; comp2++) {
1648 u32 re[4];
1649 u32 curroffset =
1650 comp3 * 8 * 60 + 2 * comp1 + 8 * comp2;
1651 read128((total_rank << 28) | (curroffset << 3),
1652 (u64 *)re);
1653 failxor[0] |=
1654 get_etalon2(flip, curroffset) ^ re[0];
1655 failxor[1] |=
1656 get_etalon2(flip, curroffset) ^ re[1];
1657 failxor[0] |=
1658 get_etalon2(flip, curroffset | 1) ^ re[2];
1659 failxor[1] |=
1660 get_etalon2(flip, curroffset | 1) ^ re[3];
1661 }
1662 for (i = 0; i < 8; i++)
1663 if ((0xff << (8 * (i % 4))) & failxor[i / 4])
1664 failmask |= 1 << i;
1665 }
1666 disable_cache_region();
1667 flush_cache((total_rank << 28), 1728 * 5 * 4);
1668 return failmask;
1669 }
1670
1671 const u32 seed1[0x18] = {
1672 0x3a9d5ab5, 0x576cb65b, 0x555773b6, 0x2ab772ee,
1673 0x555556ee, 0x3a9d5ab5, 0x576cb65b, 0x555773b6,
1674 0x2ab772ee, 0x555556ee, 0x5155a555, 0x5155a555,
1675 0x5155a555, 0x5155a555, 0x3a9d5ab5, 0x576cb65b,
1676 0x555773b6, 0x2ab772ee, 0x555556ee, 0x55d6b4a5,
1677 0x366d6b3a, 0x2ae5ddbb, 0x3b9ddbb7, 0x55d6b4a5,
1678 };
1679
get_seed2(int a,int b)1680 static u32 get_seed2(int a, int b)
1681 {
1682 const u32 seed2[5] = {
1683 0x55555555, 0x33333333, 0x2e555a55, 0x55555555,
1684 0x5b6db6db,
1685 };
1686 u32 r;
1687 r = seed2[(a + (a >= 10)) / 5];
1688 return b ? ~r : r;
1689 }
1690
make_shift(int comp2,int comp5,int x)1691 static int make_shift(int comp2, int comp5, int x)
1692 {
1693 const u8 seed3[32] = {
1694 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1695 0x00, 0x00, 0x38, 0x1c, 0x3c, 0x18, 0x38, 0x38,
1696 0x38, 0x38, 0x38, 0x38, 0x0f, 0x0f, 0x0f, 0x0f,
1697 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f,
1698 };
1699
1700 return (comp2 - ((seed3[comp5] >> (x & 7)) & 1)) & 0x1f;
1701 }
1702
get_etalon(int flip,u32 addr)1703 static u32 get_etalon(int flip, u32 addr)
1704 {
1705 u32 mask_byte = 0;
1706 int comp1 = (addr >> 1) & 1;
1707 int comp2 = (addr >> 3) & 0x1f;
1708 int comp3 = (addr >> 8) & 0xf;
1709 int comp4 = (addr >> 12) & 0xf;
1710 int comp5 = (addr >> 16) & 0x1f;
1711 u32 mask_bit = ~(0x10001 << comp3);
1712 u32 part1;
1713 u32 part2;
1714 int byte;
1715
1716 part2 =
1717 ((seed1[comp5] >>
1718 make_shift(comp2, comp5,
1719 (comp3 >> 3) | (comp1 << 2) | 2)) & 1) ^ flip;
1720 part1 =
1721 ((seed1[comp5] >>
1722 make_shift(comp2, comp5,
1723 (comp3 >> 3) | (comp1 << 2) | 0)) & 1) ^ flip;
1724
1725 for (byte = 0; byte < 4; byte++)
1726 if ((get_seed2(comp5, comp4) >>
1727 make_shift(comp2, comp5, (byte | (comp1 << 2)))) & 1)
1728 mask_byte |= 0xff << (8 * byte);
1729
1730 return (mask_bit & mask_byte) | (part1 << comp3) | (part2 <<
1731 (comp3 + 16));
1732 }
1733
1734 static void
write_testing_type2(struct raminfo * info,u8 totalrank,u8 region,u8 block,char flip)1735 write_testing_type2(struct raminfo *info, u8 totalrank, u8 region, u8 block,
1736 char flip)
1737 {
1738 int i;
1739 for (i = 0; i < 2048; i++)
1740 write32p((totalrank << 28) | (region << 25) | (block << 16) |
1741 (i << 2), get_etalon(flip, (block << 16) | (i << 2)));
1742 }
1743
1744 static u8
check_testing_type2(struct raminfo * info,u8 totalrank,u8 region,u8 block,char flip)1745 check_testing_type2(struct raminfo *info, u8 totalrank, u8 region, u8 block,
1746 char flip)
1747 {
1748 u8 failmask = 0;
1749 u32 failxor[2];
1750 int i;
1751 int comp1, comp2, comp3;
1752
1753 failxor[0] = 0;
1754 failxor[1] = 0;
1755
1756 enable_cache_region(totalrank << 28, 134217728);
1757 for (comp3 = 0; comp3 < 2 && failmask != 0xff; comp3++) {
1758 for (comp1 = 0; comp1 < 16; comp1++)
1759 for (comp2 = 0; comp2 < 64; comp2++) {
1760 u32 addr =
1761 (totalrank << 28) | (region << 25) | (block
1762 << 16)
1763 | (comp3 << 12) | (comp2 << 6) | (comp1 <<
1764 2);
1765 failxor[comp1 & 1] |=
1766 read32p(addr) ^ get_etalon(flip, addr);
1767 }
1768 for (i = 0; i < 8; i++)
1769 if ((0xff << (8 * (i % 4))) & failxor[i / 4])
1770 failmask |= 1 << i;
1771 }
1772 disable_cache_region();
1773 flush_cache((totalrank << 28) | (region << 25) | (block << 16), 16384);
1774 return failmask;
1775 }
1776
check_bounded(unsigned short * vals,u16 bound)1777 static int check_bounded(unsigned short *vals, u16 bound)
1778 {
1779 int i;
1780
1781 for (i = 0; i < 8; i++)
1782 if (vals[i] < bound)
1783 return 0;
1784 return 1;
1785 }
1786
1787 enum state {
1788 BEFORE_USABLE = 0, AT_USABLE = 1, AT_MARGIN = 2, COMPLETE = 3
1789 };
1790
validate_state(enum state * in)1791 static int validate_state(enum state *in)
1792 {
1793 int i;
1794 for (i = 0; i < 8; i++)
1795 if (in[i] != COMPLETE)
1796 return 0;
1797 return 1;
1798 }
1799
1800 static void
do_fsm(enum state * state,u16 * counter,u8 fail_mask,int margin,int uplimit,u8 * res_low,u8 * res_high,u8 val)1801 do_fsm(enum state *state, u16 *counter,
1802 u8 fail_mask, int margin, int uplimit,
1803 u8 *res_low, u8 *res_high, u8 val)
1804 {
1805 int lane;
1806
1807 for (lane = 0; lane < 8; lane++) {
1808 int is_fail = (fail_mask >> lane) & 1;
1809 switch (state[lane]) {
1810 case BEFORE_USABLE:
1811 if (!is_fail) {
1812 counter[lane] = 1;
1813 state[lane] = AT_USABLE;
1814 break;
1815 }
1816 counter[lane] = 0;
1817 state[lane] = BEFORE_USABLE;
1818 break;
1819 case AT_USABLE:
1820 if (!is_fail) {
1821 ++counter[lane];
1822 if (counter[lane] >= margin) {
1823 state[lane] = AT_MARGIN;
1824 res_low[lane] = val - margin + 1;
1825 break;
1826 }
1827 state[lane] = 1;
1828 break;
1829 }
1830 counter[lane] = 0;
1831 state[lane] = BEFORE_USABLE;
1832 break;
1833 case AT_MARGIN:
1834 if (is_fail) {
1835 state[lane] = COMPLETE;
1836 res_high[lane] = val - 1;
1837 } else {
1838 counter[lane]++;
1839 state[lane] = AT_MARGIN;
1840 if (val == uplimit) {
1841 state[lane] = COMPLETE;
1842 res_high[lane] = uplimit;
1843 }
1844 }
1845 break;
1846 case COMPLETE:
1847 break;
1848 }
1849 }
1850 }
1851
1852 static void
train_ram_at_178(struct raminfo * info,u8 channel,int slot,int rank,u8 total_rank,u8 reg_178,int first_run,int niter,timing_bounds_t * timings)1853 train_ram_at_178(struct raminfo *info, u8 channel, int slot, int rank,
1854 u8 total_rank, u8 reg_178, int first_run, int niter,
1855 timing_bounds_t * timings)
1856 {
1857 int lane;
1858 enum state state[8];
1859 u16 count[8];
1860 u8 lower_usable[8];
1861 u8 upper_usable[8];
1862 unsigned short num_successfully_checked[8];
1863 u8 reg1b3;
1864 int i;
1865
1866 for (i = 0; i < 8; i++)
1867 state[i] = BEFORE_USABLE;
1868
1869 if (!first_run) {
1870 int is_all_ok = 1;
1871 for (lane = 0; lane < 8; lane++)
1872 if (timings[reg_178][channel][slot][rank][lane].
1873 smallest ==
1874 timings[reg_178][channel][slot][rank][lane].
1875 largest) {
1876 timings[reg_178][channel][slot][rank][lane].
1877 smallest = 0;
1878 timings[reg_178][channel][slot][rank][lane].
1879 largest = 0;
1880 is_all_ok = 0;
1881 }
1882 if (is_all_ok) {
1883 for (i = 0; i < 8; i++)
1884 state[i] = COMPLETE;
1885 }
1886 }
1887
1888 for (reg1b3 = 0; reg1b3 < 0x30 && !validate_state(state); reg1b3++) {
1889 u8 failmask = 0;
1890 write_1d0(reg1b3 ^ 32, 0x1b3, 6, 1);
1891 write_1d0(reg1b3 ^ 32, 0x1a3, 6, 1);
1892 failmask = check_testing(info, total_rank, 0);
1893 mchbar_setbits32(0xfb0, 3 << 16);
1894 do_fsm(state, count, failmask, 5, 47, lower_usable,
1895 upper_usable, reg1b3);
1896 }
1897
1898 if (reg1b3) {
1899 write_1d0(0, 0x1b3, 6, 1);
1900 write_1d0(0, 0x1a3, 6, 1);
1901 for (lane = 0; lane < 8; lane++) {
1902 if (state[lane] == COMPLETE) {
1903 timings[reg_178][channel][slot][rank][lane].
1904 smallest =
1905 lower_usable[lane] +
1906 (info->training.
1907 lane_timings[0][channel][slot][rank][lane]
1908 & 0x3F) - 32;
1909 timings[reg_178][channel][slot][rank][lane].
1910 largest =
1911 upper_usable[lane] +
1912 (info->training.
1913 lane_timings[0][channel][slot][rank][lane]
1914 & 0x3F) - 32;
1915 }
1916 }
1917 }
1918
1919 if (!first_run) {
1920 for (lane = 0; lane < 8; lane++)
1921 if (state[lane] == COMPLETE) {
1922 write_500(info, channel,
1923 timings[reg_178][channel][slot][rank]
1924 [lane].smallest,
1925 get_timing_register_addr(lane, 0,
1926 slot, rank),
1927 9, 1);
1928 write_500(info, channel,
1929 timings[reg_178][channel][slot][rank]
1930 [lane].smallest +
1931 info->training.
1932 lane_timings[1][channel][slot][rank]
1933 [lane]
1934 -
1935 info->training.
1936 lane_timings[0][channel][slot][rank]
1937 [lane], get_timing_register_addr(lane,
1938 1,
1939 slot,
1940 rank),
1941 9, 1);
1942 num_successfully_checked[lane] = 0;
1943 } else
1944 num_successfully_checked[lane] = -1;
1945
1946 do {
1947 u8 failmask = 0;
1948 for (i = 0; i < niter; i++) {
1949 if (failmask == 0xFF)
1950 break;
1951 failmask |=
1952 check_testing_type2(info, total_rank, 2, i,
1953 0);
1954 failmask |=
1955 check_testing_type2(info, total_rank, 3, i,
1956 1);
1957 }
1958 mchbar_setbits32(0xfb0, 3 << 16);
1959 for (lane = 0; lane < 8; lane++)
1960 if (num_successfully_checked[lane] != 0xffff) {
1961 if ((1 << lane) & failmask) {
1962 if (timings[reg_178][channel]
1963 [slot][rank][lane].
1964 largest <=
1965 timings[reg_178][channel]
1966 [slot][rank][lane].smallest)
1967 num_successfully_checked
1968 [lane] = -1;
1969 else {
1970 num_successfully_checked
1971 [lane] = 0;
1972 timings[reg_178]
1973 [channel][slot]
1974 [rank][lane].
1975 smallest++;
1976 write_500(info, channel,
1977 timings
1978 [reg_178]
1979 [channel]
1980 [slot][rank]
1981 [lane].
1982 smallest,
1983 get_timing_register_addr
1984 (lane, 0,
1985 slot, rank),
1986 9, 1);
1987 write_500(info, channel,
1988 timings
1989 [reg_178]
1990 [channel]
1991 [slot][rank]
1992 [lane].
1993 smallest +
1994 info->
1995 training.
1996 lane_timings
1997 [1][channel]
1998 [slot][rank]
1999 [lane]
2000 -
2001 info->
2002 training.
2003 lane_timings
2004 [0][channel]
2005 [slot][rank]
2006 [lane],
2007 get_timing_register_addr
2008 (lane, 1,
2009 slot, rank),
2010 9, 1);
2011 }
2012 } else
2013 num_successfully_checked[lane]
2014 ++;
2015 }
2016 }
2017 while (!check_bounded(num_successfully_checked, 2))
2018 ;
2019
2020 for (lane = 0; lane < 8; lane++)
2021 if (state[lane] == COMPLETE) {
2022 write_500(info, channel,
2023 timings[reg_178][channel][slot][rank]
2024 [lane].largest,
2025 get_timing_register_addr(lane, 0,
2026 slot, rank),
2027 9, 1);
2028 write_500(info, channel,
2029 timings[reg_178][channel][slot][rank]
2030 [lane].largest +
2031 info->training.
2032 lane_timings[1][channel][slot][rank]
2033 [lane]
2034 -
2035 info->training.
2036 lane_timings[0][channel][slot][rank]
2037 [lane], get_timing_register_addr(lane,
2038 1,
2039 slot,
2040 rank),
2041 9, 1);
2042 num_successfully_checked[lane] = 0;
2043 } else
2044 num_successfully_checked[lane] = -1;
2045
2046 do {
2047 int failmask = 0;
2048 for (i = 0; i < niter; i++) {
2049 if (failmask == 0xFF)
2050 break;
2051 failmask |=
2052 check_testing_type2(info, total_rank, 2, i,
2053 0);
2054 failmask |=
2055 check_testing_type2(info, total_rank, 3, i,
2056 1);
2057 }
2058
2059 mchbar_setbits32(0xfb0, 3 << 16);
2060 for (lane = 0; lane < 8; lane++) {
2061 if (num_successfully_checked[lane] != 0xffff) {
2062 if ((1 << lane) & failmask) {
2063 if (timings[reg_178][channel]
2064 [slot][rank][lane].
2065 largest <=
2066 timings[reg_178][channel]
2067 [slot][rank][lane].
2068 smallest) {
2069 num_successfully_checked
2070 [lane] = -1;
2071 } else {
2072 num_successfully_checked
2073 [lane] = 0;
2074 timings[reg_178]
2075 [channel][slot]
2076 [rank][lane].
2077 largest--;
2078 write_500(info, channel,
2079 timings
2080 [reg_178]
2081 [channel]
2082 [slot][rank]
2083 [lane].
2084 largest,
2085 get_timing_register_addr
2086 (lane, 0,
2087 slot, rank),
2088 9, 1);
2089 write_500(info, channel,
2090 timings
2091 [reg_178]
2092 [channel]
2093 [slot][rank]
2094 [lane].
2095 largest +
2096 info->
2097 training.
2098 lane_timings
2099 [1][channel]
2100 [slot][rank]
2101 [lane]
2102 -
2103 info->
2104 training.
2105 lane_timings
2106 [0][channel]
2107 [slot][rank]
2108 [lane],
2109 get_timing_register_addr
2110 (lane, 1,
2111 slot, rank),
2112 9, 1);
2113 }
2114 } else
2115 num_successfully_checked[lane]
2116 ++;
2117 }
2118 }
2119 }
2120 while (!check_bounded(num_successfully_checked, 3))
2121 ;
2122
2123 for (lane = 0; lane < 8; lane++) {
2124 write_500(info, channel,
2125 info->training.
2126 lane_timings[0][channel][slot][rank][lane],
2127 get_timing_register_addr(lane, 0, slot, rank),
2128 9, 1);
2129 write_500(info, channel,
2130 info->training.
2131 lane_timings[1][channel][slot][rank][lane],
2132 get_timing_register_addr(lane, 1, slot, rank),
2133 9, 1);
2134 if (timings[reg_178][channel][slot][rank][lane].
2135 largest <=
2136 timings[reg_178][channel][slot][rank][lane].
2137 smallest) {
2138 timings[reg_178][channel][slot][rank][lane].
2139 largest = 0;
2140 timings[reg_178][channel][slot][rank][lane].
2141 smallest = 0;
2142 }
2143 }
2144 }
2145 }
2146
set_10b(struct raminfo * info,u8 val)2147 static void set_10b(struct raminfo *info, u8 val)
2148 {
2149 int channel;
2150 int slot, rank;
2151 int lane;
2152
2153 if (read_1d0(0x10b, 6) == val)
2154 return;
2155
2156 write_1d0(val, 0x10b, 6, 1);
2157
2158 FOR_POPULATED_RANKS_BACKWARDS for (lane = 0; lane < 9; lane++) {
2159 u16 reg_500;
2160 reg_500 = read_500(info, channel,
2161 get_timing_register_addr(lane, 0, slot,
2162 rank), 9);
2163 if (val == 1) {
2164 if (lut16[info->clock_speed_index] <= reg_500)
2165 reg_500 -= lut16[info->clock_speed_index];
2166 else
2167 reg_500 = 0;
2168 } else {
2169 reg_500 += lut16[info->clock_speed_index];
2170 }
2171 write_500(info, channel, reg_500,
2172 get_timing_register_addr(lane, 0, slot, rank), 9, 1);
2173 }
2174 }
2175
set_ecc(int onoff)2176 static void set_ecc(int onoff)
2177 {
2178 int channel;
2179 for (channel = 0; channel < NUM_CHANNELS; channel++) {
2180 u8 t;
2181 t = mchbar_read8((channel << 10) + 0x5f8);
2182 if (onoff)
2183 t |= 1;
2184 else
2185 t &= ~1;
2186 mchbar_write8((channel << 10) + 0x5f8, t);
2187 }
2188 }
2189
set_178(u8 val)2190 static void set_178(u8 val)
2191 {
2192 if (val >= 31)
2193 val = val - 31;
2194 else
2195 val = 63 - val;
2196
2197 write_1d0(2 * val, 0x178, 7, 1);
2198 }
2199
2200 static void
write_500_timings_type(struct raminfo * info,int channel,int slot,int rank,int type)2201 write_500_timings_type(struct raminfo *info, int channel, int slot, int rank,
2202 int type)
2203 {
2204 int lane;
2205
2206 for (lane = 0; lane < 8; lane++)
2207 write_500(info, channel,
2208 info->training.
2209 lane_timings[type][channel][slot][rank][lane],
2210 get_timing_register_addr(lane, type, slot, rank), 9,
2211 0);
2212 }
2213
2214 static void
try_timing_offsets(struct raminfo * info,int channel,int slot,int rank,int totalrank)2215 try_timing_offsets(struct raminfo *info, int channel,
2216 int slot, int rank, int totalrank)
2217 {
2218 u16 count[8];
2219 enum state state[8];
2220 u8 lower_usable[8], upper_usable[8];
2221 int lane;
2222 int i;
2223 int flip = 1;
2224 int timing_offset;
2225
2226 for (i = 0; i < 8; i++)
2227 state[i] = BEFORE_USABLE;
2228
2229 memset(count, 0, sizeof(count));
2230
2231 for (lane = 0; lane < 8; lane++)
2232 write_500(info, channel,
2233 info->training.
2234 lane_timings[2][channel][slot][rank][lane] + 32,
2235 get_timing_register_addr(lane, 3, slot, rank), 9, 1);
2236
2237 for (timing_offset = 0; !validate_state(state) && timing_offset < 64;
2238 timing_offset++) {
2239 u8 failmask;
2240 write_1d0(timing_offset ^ 32, 0x1bb, 6, 1);
2241 failmask = 0;
2242 for (i = 0; i < 2 && failmask != 0xff; i++) {
2243 flip = !flip;
2244 write_testing(info, totalrank, flip);
2245 failmask |= check_testing(info, totalrank, flip);
2246 }
2247 do_fsm(state, count, failmask, 10, 63, lower_usable,
2248 upper_usable, timing_offset);
2249 }
2250 write_1d0(0, 0x1bb, 6, 1);
2251 dump_timings(info);
2252 if (!validate_state(state))
2253 die("Couldn't discover DRAM timings (1)\n");
2254
2255 for (lane = 0; lane < 8; lane++) {
2256 u8 bias = 0;
2257
2258 if (info->silicon_revision) {
2259 int usable_length;
2260
2261 usable_length = upper_usable[lane] - lower_usable[lane];
2262 if (usable_length >= 20) {
2263 bias = usable_length / 2 - 10;
2264 if (bias >= 2)
2265 bias = 2;
2266 }
2267 }
2268 write_500(info, channel,
2269 info->training.
2270 lane_timings[2][channel][slot][rank][lane] +
2271 (upper_usable[lane] + lower_usable[lane]) / 2 - bias,
2272 get_timing_register_addr(lane, 3, slot, rank), 9, 1);
2273 info->training.timing2_bounds[channel][slot][rank][lane][0] =
2274 info->training.lane_timings[2][channel][slot][rank][lane] +
2275 lower_usable[lane];
2276 info->training.timing2_bounds[channel][slot][rank][lane][1] =
2277 info->training.lane_timings[2][channel][slot][rank][lane] +
2278 upper_usable[lane];
2279 info->training.timing2_offset[channel][slot][rank][lane] =
2280 info->training.lane_timings[2][channel][slot][rank][lane];
2281 }
2282 }
2283
2284 static u8
choose_training(struct raminfo * info,int channel,int slot,int rank,int lane,timing_bounds_t * timings,u8 center_178)2285 choose_training(struct raminfo *info, int channel, int slot, int rank,
2286 int lane, timing_bounds_t * timings, u8 center_178)
2287 {
2288 u16 central_weight;
2289 u16 side_weight;
2290 unsigned int sum = 0, count = 0;
2291 u8 span;
2292 u8 lower_margin, upper_margin;
2293 u8 reg_178;
2294 u8 result;
2295
2296 span = 12;
2297 central_weight = 20;
2298 side_weight = 20;
2299 if (info->silicon_revision == 1 && channel == 1) {
2300 central_weight = 5;
2301 side_weight = 20;
2302 if ((info->
2303 populated_ranks_mask[1] ^ (info->
2304 populated_ranks_mask[1] >> 2)) &
2305 1)
2306 span = 18;
2307 }
2308 if ((info->populated_ranks_mask[0] & 5) == 5) {
2309 central_weight = 20;
2310 side_weight = 20;
2311 }
2312 if (info->clock_speed_index >= 2
2313 && (info->populated_ranks_mask[0] & 5) == 5 && slot == 1) {
2314 if (info->silicon_revision == 1) {
2315 switch (channel) {
2316 case 0:
2317 if (lane == 1) {
2318 central_weight = 10;
2319 side_weight = 20;
2320 }
2321 break;
2322 case 1:
2323 if (lane == 6) {
2324 side_weight = 5;
2325 central_weight = 20;
2326 }
2327 break;
2328 }
2329 }
2330 if (info->silicon_revision == 0 && channel == 0 && lane == 0) {
2331 side_weight = 5;
2332 central_weight = 20;
2333 }
2334 }
2335 for (reg_178 = center_178 - span; reg_178 <= center_178 + span;
2336 reg_178 += span) {
2337 u8 smallest;
2338 u8 largest;
2339 largest = timings[reg_178][channel][slot][rank][lane].largest;
2340 smallest = timings[reg_178][channel][slot][rank][lane].smallest;
2341 if (largest - smallest + 1 >= 5) {
2342 unsigned int weight;
2343 if (reg_178 == center_178)
2344 weight = central_weight;
2345 else
2346 weight = side_weight;
2347 sum += weight * (largest + smallest);
2348 count += weight;
2349 }
2350 }
2351 dump_timings(info);
2352 if (count == 0)
2353 die("Couldn't discover DRAM timings (2)\n");
2354 result = sum / (2 * count);
2355 lower_margin =
2356 result - timings[center_178][channel][slot][rank][lane].smallest;
2357 upper_margin =
2358 timings[center_178][channel][slot][rank][lane].largest - result;
2359 if (upper_margin < 10 && lower_margin > 10)
2360 result -= MIN(lower_margin - 10, 10 - upper_margin);
2361 if (upper_margin > 10 && lower_margin < 10)
2362 result += MIN(upper_margin - 10, 10 - lower_margin);
2363 return result;
2364 }
2365
2366 #define STANDARD_MIN_MARGIN 5
2367
choose_reg178(struct raminfo * info,timing_bounds_t * timings)2368 static u8 choose_reg178(struct raminfo *info, timing_bounds_t * timings)
2369 {
2370 u16 margin[64];
2371 int lane, rank, slot, channel;
2372 u8 reg178;
2373 int count = 0, sum = 0;
2374
2375 for (reg178 = reg178_min[info->clock_speed_index];
2376 reg178 < reg178_max[info->clock_speed_index];
2377 reg178 += reg178_step[info->clock_speed_index]) {
2378 margin[reg178] = -1;
2379 FOR_POPULATED_RANKS_BACKWARDS for (lane = 0; lane < 8; lane++) {
2380 int curmargin =
2381 timings[reg178][channel][slot][rank][lane].largest -
2382 timings[reg178][channel][slot][rank][lane].
2383 smallest + 1;
2384 if (curmargin < margin[reg178])
2385 margin[reg178] = curmargin;
2386 }
2387 if (margin[reg178] >= STANDARD_MIN_MARGIN) {
2388 u16 weight;
2389 weight = margin[reg178] - STANDARD_MIN_MARGIN;
2390 sum += weight * reg178;
2391 count += weight;
2392 }
2393 }
2394 dump_timings(info);
2395 if (count == 0)
2396 die("Couldn't discover DRAM timings (3)\n");
2397
2398 u8 threshold;
2399
2400 for (threshold = 30; threshold >= 5; threshold--) {
2401 int usable_length = 0;
2402 int smallest_fount = 0;
2403 for (reg178 = reg178_min[info->clock_speed_index];
2404 reg178 < reg178_max[info->clock_speed_index];
2405 reg178 += reg178_step[info->clock_speed_index])
2406 if (margin[reg178] >= threshold) {
2407 usable_length +=
2408 reg178_step[info->clock_speed_index];
2409 info->training.reg178_largest =
2410 reg178 -
2411 2 * reg178_step[info->clock_speed_index];
2412
2413 if (!smallest_fount) {
2414 smallest_fount = 1;
2415 info->training.reg178_smallest =
2416 reg178 +
2417 reg178_step[info->
2418 clock_speed_index];
2419 }
2420 }
2421 if (usable_length >= 0x21)
2422 break;
2423 }
2424
2425 return sum / count;
2426 }
2427
check_cached_sanity(struct raminfo * info)2428 static int check_cached_sanity(struct raminfo *info)
2429 {
2430 int lane;
2431 int slot, rank;
2432 int channel;
2433
2434 if (!info->cached_training)
2435 return 0;
2436
2437 for (channel = 0; channel < NUM_CHANNELS; channel++)
2438 for (slot = 0; slot < NUM_SLOTS; slot++)
2439 for (rank = 0; rank < NUM_RANKS; rank++)
2440 for (lane = 0; lane < 8 + info->use_ecc; lane++) {
2441 u16 cached_value, estimation_value;
2442 cached_value =
2443 info->cached_training->
2444 lane_timings[1][channel][slot][rank]
2445 [lane];
2446 if (cached_value >= 0x18
2447 && cached_value <= 0x1E7) {
2448 estimation_value =
2449 info->training.
2450 lane_timings[1][channel]
2451 [slot][rank][lane];
2452 if (estimation_value <
2453 cached_value - 24)
2454 return 0;
2455 if (estimation_value >
2456 cached_value + 24)
2457 return 0;
2458 }
2459 }
2460 return 1;
2461 }
2462
try_cached_training(struct raminfo * info)2463 static int try_cached_training(struct raminfo *info)
2464 {
2465 u8 saved_243[2];
2466 u8 tm;
2467
2468 int channel, slot, rank, lane;
2469 int flip = 1;
2470 int i, j;
2471
2472 if (!check_cached_sanity(info))
2473 return 0;
2474
2475 info->training.reg178_center = info->cached_training->reg178_center;
2476 info->training.reg178_smallest = info->cached_training->reg178_smallest;
2477 info->training.reg178_largest = info->cached_training->reg178_largest;
2478 memcpy(&info->training.timing_bounds,
2479 &info->cached_training->timing_bounds,
2480 sizeof(info->training.timing_bounds));
2481 memcpy(&info->training.timing_offset,
2482 &info->cached_training->timing_offset,
2483 sizeof(info->training.timing_offset));
2484
2485 write_1d0(2, 0x142, 3, 1);
2486 saved_243[0] = mchbar_read8(0x243);
2487 saved_243[1] = mchbar_read8(0x643);
2488 mchbar_write8(0x243, saved_243[0] | 2);
2489 mchbar_write8(0x643, saved_243[1] | 2);
2490 set_ecc(0);
2491 pci_write_config16(NORTHBRIDGE, 0xc8, 3);
2492 if (read_1d0(0x10b, 6) & 1)
2493 set_10b(info, 0);
2494 for (tm = 0; tm < 2; tm++) {
2495 int totalrank;
2496
2497 set_178(tm ? info->cached_training->reg178_largest : info->
2498 cached_training->reg178_smallest);
2499
2500 totalrank = 0;
2501 /* Check timing ranges. With i == 0 we check smallest one and with
2502 i == 1 the largest bound. With j == 0 we check that on the bound
2503 it still works whereas with j == 1 we check that just outside of
2504 bound we fail.
2505 */
2506 FOR_POPULATED_RANKS_BACKWARDS {
2507 for (i = 0; i < 2; i++) {
2508 for (lane = 0; lane < 8; lane++) {
2509 write_500(info, channel,
2510 info->cached_training->
2511 timing2_bounds[channel][slot]
2512 [rank][lane][i],
2513 get_timing_register_addr(lane,
2514 3,
2515 slot,
2516 rank),
2517 9, 1);
2518
2519 if (!i)
2520 write_500(info, channel,
2521 info->
2522 cached_training->
2523 timing2_offset
2524 [channel][slot][rank]
2525 [lane],
2526 get_timing_register_addr
2527 (lane, 2, slot, rank),
2528 9, 1);
2529 write_500(info, channel,
2530 i ? info->cached_training->
2531 timing_bounds[tm][channel]
2532 [slot][rank][lane].
2533 largest : info->
2534 cached_training->
2535 timing_bounds[tm][channel]
2536 [slot][rank][lane].smallest,
2537 get_timing_register_addr(lane,
2538 0,
2539 slot,
2540 rank),
2541 9, 1);
2542 write_500(info, channel,
2543 info->cached_training->
2544 timing_offset[channel][slot]
2545 [rank][lane] +
2546 (i ? info->cached_training->
2547 timing_bounds[tm][channel]
2548 [slot][rank][lane].
2549 largest : info->
2550 cached_training->
2551 timing_bounds[tm][channel]
2552 [slot][rank][lane].
2553 smallest) - 64,
2554 get_timing_register_addr(lane,
2555 1,
2556 slot,
2557 rank),
2558 9, 1);
2559 }
2560 for (j = 0; j < 2; j++) {
2561 u8 failmask;
2562 u8 expected_failmask;
2563 char reg1b3;
2564
2565 reg1b3 = (j == 1) + 4;
2566 reg1b3 =
2567 j == i ? reg1b3 : (-reg1b3) & 0x3f;
2568 write_1d0(reg1b3, 0x1bb, 6, 1);
2569 write_1d0(reg1b3, 0x1b3, 6, 1);
2570 write_1d0(reg1b3, 0x1a3, 6, 1);
2571
2572 flip = !flip;
2573 write_testing(info, totalrank, flip);
2574 failmask =
2575 check_testing(info, totalrank,
2576 flip);
2577 expected_failmask =
2578 j == 0 ? 0x00 : 0xff;
2579 if (failmask != expected_failmask)
2580 goto fail;
2581 }
2582 }
2583 totalrank++;
2584 }
2585 }
2586
2587 set_178(info->cached_training->reg178_center);
2588 if (info->use_ecc)
2589 set_ecc(1);
2590 write_training_data(info);
2591 write_1d0(0, 322, 3, 1);
2592 info->training = *info->cached_training;
2593
2594 write_1d0(0, 0x1bb, 6, 1);
2595 write_1d0(0, 0x1b3, 6, 1);
2596 write_1d0(0, 0x1a3, 6, 1);
2597 mchbar_write8(0x243, saved_243[0]);
2598 mchbar_write8(0x643, saved_243[1]);
2599
2600 return 1;
2601
2602 fail:
2603 FOR_POPULATED_RANKS {
2604 write_500_timings_type(info, channel, slot, rank, 1);
2605 write_500_timings_type(info, channel, slot, rank, 2);
2606 write_500_timings_type(info, channel, slot, rank, 3);
2607 }
2608
2609 write_1d0(0, 0x1bb, 6, 1);
2610 write_1d0(0, 0x1b3, 6, 1);
2611 write_1d0(0, 0x1a3, 6, 1);
2612 mchbar_write8(0x243, saved_243[0]);
2613 mchbar_write8(0x643, saved_243[1]);
2614
2615 return 0;
2616 }
2617
do_ram_training(struct raminfo * info)2618 static void do_ram_training(struct raminfo *info)
2619 {
2620 u8 saved_243[2];
2621 int totalrank = 0;
2622 u8 reg_178;
2623 int niter;
2624
2625 timing_bounds_t *timings = timings_car;
2626 int lane, rank, slot, channel;
2627 u8 reg178_center;
2628
2629 write_1d0(2, 0x142, 3, 1);
2630 saved_243[0] = mchbar_read8(0x243);
2631 saved_243[1] = mchbar_read8(0x643);
2632 mchbar_write8(0x243, saved_243[0] | 2);
2633 mchbar_write8(0x643, saved_243[1] | 2);
2634 switch (info->clock_speed_index) {
2635 case 0:
2636 niter = 5;
2637 break;
2638 case 1:
2639 niter = 10;
2640 break;
2641 default:
2642 niter = 19;
2643 break;
2644 }
2645 set_ecc(0);
2646
2647 FOR_POPULATED_RANKS_BACKWARDS {
2648 int i;
2649
2650 write_500_timings_type(info, channel, slot, rank, 0);
2651
2652 write_testing(info, totalrank, 0);
2653 for (i = 0; i < niter; i++) {
2654 write_testing_type2(info, totalrank, 2, i, 0);
2655 write_testing_type2(info, totalrank, 3, i, 1);
2656 }
2657 pci_write_config8(NORTHBRIDGE, 0xc0, 0x01);
2658 totalrank++;
2659 }
2660
2661 if (reg178_min[info->clock_speed_index] <
2662 reg178_max[info->clock_speed_index])
2663 memset(timings[reg178_min[info->clock_speed_index]], 0,
2664 sizeof(timings[0]) *
2665 (reg178_max[info->clock_speed_index] -
2666 reg178_min[info->clock_speed_index]));
2667 for (reg_178 = reg178_min[info->clock_speed_index];
2668 reg_178 < reg178_max[info->clock_speed_index];
2669 reg_178 += reg178_step[info->clock_speed_index]) {
2670 totalrank = 0;
2671 set_178(reg_178);
2672 for (channel = NUM_CHANNELS - 1; channel >= 0; channel--)
2673 for (slot = 0; slot < NUM_SLOTS; slot++)
2674 for (rank = 0; rank < NUM_RANKS; rank++) {
2675 memset(&timings[reg_178][channel][slot]
2676 [rank][0].smallest, 0, 16);
2677 if (info->
2678 populated_ranks[channel][slot]
2679 [rank]) {
2680 train_ram_at_178(info, channel,
2681 slot, rank,
2682 totalrank,
2683 reg_178, 1,
2684 niter,
2685 timings);
2686 totalrank++;
2687 }
2688 }
2689 }
2690
2691 reg178_center = choose_reg178(info, timings);
2692
2693 FOR_POPULATED_RANKS_BACKWARDS for (lane = 0; lane < 8; lane++) {
2694 info->training.timing_bounds[0][channel][slot][rank][lane].
2695 smallest =
2696 timings[info->training.
2697 reg178_smallest][channel][slot][rank][lane].
2698 smallest;
2699 info->training.timing_bounds[0][channel][slot][rank][lane].
2700 largest =
2701 timings[info->training.
2702 reg178_smallest][channel][slot][rank][lane].largest;
2703 info->training.timing_bounds[1][channel][slot][rank][lane].
2704 smallest =
2705 timings[info->training.
2706 reg178_largest][channel][slot][rank][lane].smallest;
2707 info->training.timing_bounds[1][channel][slot][rank][lane].
2708 largest =
2709 timings[info->training.
2710 reg178_largest][channel][slot][rank][lane].largest;
2711 info->training.timing_offset[channel][slot][rank][lane] =
2712 info->training.lane_timings[1][channel][slot][rank][lane]
2713 -
2714 info->training.lane_timings[0][channel][slot][rank][lane] +
2715 64;
2716 }
2717
2718 if (info->silicon_revision == 1
2719 && (info->
2720 populated_ranks_mask[1] ^ (info->
2721 populated_ranks_mask[1] >> 2)) & 1) {
2722 int ranks_after_channel1;
2723
2724 totalrank = 0;
2725 for (reg_178 = reg178_center - 18;
2726 reg_178 <= reg178_center + 18; reg_178 += 18) {
2727 totalrank = 0;
2728 set_178(reg_178);
2729 for (slot = 0; slot < NUM_SLOTS; slot++)
2730 for (rank = 0; rank < NUM_RANKS; rank++) {
2731 if (info->
2732 populated_ranks[1][slot][rank]) {
2733 train_ram_at_178(info, 1, slot,
2734 rank,
2735 totalrank,
2736 reg_178, 0,
2737 niter,
2738 timings);
2739 totalrank++;
2740 }
2741 }
2742 }
2743 ranks_after_channel1 = totalrank;
2744
2745 for (reg_178 = reg178_center - 12;
2746 reg_178 <= reg178_center + 12; reg_178 += 12) {
2747 totalrank = ranks_after_channel1;
2748 set_178(reg_178);
2749 for (slot = 0; slot < NUM_SLOTS; slot++)
2750 for (rank = 0; rank < NUM_RANKS; rank++)
2751 if (info->
2752 populated_ranks[0][slot][rank]) {
2753 train_ram_at_178(info, 0, slot,
2754 rank,
2755 totalrank,
2756 reg_178, 0,
2757 niter,
2758 timings);
2759 totalrank++;
2760 }
2761 }
2762 } else {
2763 for (reg_178 = reg178_center - 12;
2764 reg_178 <= reg178_center + 12; reg_178 += 12) {
2765 totalrank = 0;
2766 set_178(reg_178);
2767 FOR_POPULATED_RANKS_BACKWARDS {
2768 train_ram_at_178(info, channel, slot, rank,
2769 totalrank, reg_178, 0, niter,
2770 timings);
2771 totalrank++;
2772 }
2773 }
2774 }
2775
2776 set_178(reg178_center);
2777 FOR_POPULATED_RANKS_BACKWARDS for (lane = 0; lane < 8; lane++) {
2778 u16 tm0;
2779
2780 tm0 =
2781 choose_training(info, channel, slot, rank, lane, timings,
2782 reg178_center);
2783 write_500(info, channel, tm0,
2784 get_timing_register_addr(lane, 0, slot, rank), 9, 1);
2785 write_500(info, channel,
2786 tm0 +
2787 info->training.
2788 lane_timings[1][channel][slot][rank][lane] -
2789 info->training.
2790 lane_timings[0][channel][slot][rank][lane],
2791 get_timing_register_addr(lane, 1, slot, rank), 9, 1);
2792 }
2793
2794 totalrank = 0;
2795 FOR_POPULATED_RANKS_BACKWARDS {
2796 try_timing_offsets(info, channel, slot, rank, totalrank);
2797 totalrank++;
2798 }
2799 mchbar_write8(0x243, saved_243[0]);
2800 mchbar_write8(0x643, saved_243[1]);
2801 write_1d0(0, 0x142, 3, 1);
2802 info->training.reg178_center = reg178_center;
2803 }
2804
ram_training(struct raminfo * info)2805 static void ram_training(struct raminfo *info)
2806 {
2807 u16 saved_fc4;
2808
2809 saved_fc4 = mchbar_read16(0xfc4);
2810 mchbar_write16(0xfc4, 0xffff);
2811
2812 if (info->revision >= 8)
2813 read_4090(info);
2814
2815 if (!try_cached_training(info))
2816 do_ram_training(info);
2817 if ((info->silicon_revision == 2 || info->silicon_revision == 3)
2818 && info->clock_speed_index < 2)
2819 set_10b(info, 1);
2820 mchbar_write16(0xfc4, saved_fc4);
2821 }
2822
get_max_timing(struct raminfo * info,int channel)2823 u16 get_max_timing(struct raminfo *info, int channel)
2824 {
2825 int slot, rank, lane;
2826 u16 ret = 0;
2827
2828 if ((mchbar_read8(0x2ca8) >> 2) < 1)
2829 return 384;
2830
2831 if (info->revision < 8)
2832 return 256;
2833
2834 for (slot = 0; slot < NUM_SLOTS; slot++)
2835 for (rank = 0; rank < NUM_RANKS; rank++)
2836 if (info->populated_ranks[channel][slot][rank])
2837 for (lane = 0; lane < 8 + info->use_ecc; lane++)
2838 ret = MAX(ret, read_500(info, channel,
2839 get_timing_register_addr
2840 (lane, 0, slot,
2841 rank), 9));
2842 return ret;
2843 }
2844
dmi_setup(void)2845 static void dmi_setup(void)
2846 {
2847 gav(dmibar_read8(0x254));
2848 dmibar_write8(0x254, 1 << 0);
2849 dmibar_write16(0x1b8, 0x18f2);
2850 mchbar_clrsetbits16(0x48, ~0, 1 << 1);
2851
2852 dmibar_setbits32(0xd68, 1 << 27);
2853
2854 outl((gav(inl(DEFAULT_GPIOBASE | 0x38)) & ~0x140000) | 0x400000,
2855 DEFAULT_GPIOBASE | 0x38);
2856 gav(inb(DEFAULT_GPIOBASE | 0xe)); // = 0xfdcaff6e
2857 }
2858
chipset_init(const int s3resume)2859 void chipset_init(const int s3resume)
2860 {
2861 u8 x2ca8;
2862 u16 ggc;
2863 u8 gfxsize;
2864
2865 x2ca8 = mchbar_read8(0x2ca8);
2866 if ((x2ca8 & 1) || (x2ca8 == 8 && !s3resume)) {
2867 printk(BIOS_DEBUG, "soft reset detected, rebooting properly\n");
2868 mchbar_write8(0x2ca8, 0);
2869 system_reset();
2870 }
2871
2872 dmi_setup();
2873
2874 mchbar_write16(0x1170, 0xa880);
2875 mchbar_write8(0x11c1, 1 << 0);
2876 mchbar_write16(0x1170, 0xb880);
2877 mchbar_clrsetbits8(0x1210, ~0, 0x84);
2878
2879 gfxsize = get_uint_option("gfx_uma_size", 0); /* 0 for 32MB */
2880
2881 ggc = 0xb00 | ((gfxsize + 5) << 4);
2882
2883 pci_write_config16(NORTHBRIDGE, GGC, ggc | 2);
2884
2885 u16 deven;
2886 deven = pci_read_config16(NORTHBRIDGE, DEVEN); // = 0x3
2887
2888 if (deven & 8) {
2889 mchbar_write8(0x2c30, 1 << 5);
2890 pci_read_config8(NORTHBRIDGE, 0x8); // = 0x18
2891 mchbar_setbits16(0x2c30, 1 << 9);
2892 mchbar_write16(0x2c32, 0x434);
2893 mchbar_clrsetbits32(0x2c44, ~0, 0x1053687);
2894 pci_read_config8(GMA, MSAC); // = 0x2
2895 pci_write_config8(GMA, MSAC, 0x2);
2896 RCBA8(0x2318);
2897 RCBA8(0x2318) = 0x47;
2898 RCBA8(0x2320);
2899 RCBA8(0x2320) = 0xfc;
2900 }
2901
2902 mchbar_clrsetbits32(0x30, ~0, 0x40);
2903
2904 pci_write_config16(NORTHBRIDGE, GGC, ggc);
2905 gav(RCBA32(0x3428));
2906 RCBA32(0x3428) = 0x1d;
2907 }
2908
get_bits_420(const u32 reg32)2909 static u8 get_bits_420(const u32 reg32)
2910 {
2911 u8 val = 0;
2912 val |= (reg32 >> 4) & (1 << 0);
2913 val |= (reg32 >> 2) & (1 << 1);
2914 val |= (reg32 >> 0) & (1 << 2);
2915 return val;
2916 }
2917
raminit(const int s3resume,const u8 * spd_addrmap)2918 void raminit(const int s3resume, const u8 *spd_addrmap)
2919 {
2920 unsigned int channel, slot, lane, rank;
2921 struct raminfo info;
2922 u8 x2ca8;
2923 int cbmem_wasnot_inited;
2924
2925 x2ca8 = mchbar_read8(0x2ca8);
2926
2927 printk(RAM_DEBUG, "Scratchpad MCHBAR8(0x2ca8): 0x%04x\n", x2ca8);
2928
2929 memset(&info, 0x5a, sizeof(info));
2930
2931 info.last_500_command[0] = 0;
2932 info.last_500_command[1] = 0;
2933
2934 info.board_lane_delay[0] = 0x14;
2935 info.board_lane_delay[1] = 0x07;
2936 info.board_lane_delay[2] = 0x07;
2937 info.board_lane_delay[3] = 0x08;
2938 info.board_lane_delay[4] = 0x56;
2939 info.board_lane_delay[5] = 0x04;
2940 info.board_lane_delay[6] = 0x04;
2941 info.board_lane_delay[7] = 0x05;
2942 info.board_lane_delay[8] = 0x10;
2943
2944 info.training.reg_178 = 0;
2945 info.training.reg_10b = 0;
2946
2947 /* Wait for some bit, maybe TXT clear. */
2948 while (!(read8((u8 *)0xfed40000) & (1 << 7)))
2949 ;
2950
2951 /* Wait for ME to be ready */
2952 intel_early_me_init();
2953 info.memory_reserved_for_heci_mb = intel_early_me_uma_size();
2954
2955 /* before SPD */
2956 timestamp_add_now(101);
2957
2958 if (!s3resume || 1) { // possible error
2959 memset(&info.populated_ranks, 0, sizeof(info.populated_ranks));
2960
2961 info.use_ecc = 1;
2962 for (channel = 0; channel < NUM_CHANNELS; channel++)
2963 for (slot = 0; slot < NUM_SLOTS; slot++) {
2964 int v;
2965 int try;
2966 int addr;
2967 const u8 useful_addresses[] = {
2968 DEVICE_TYPE,
2969 MODULE_TYPE,
2970 DENSITY,
2971 RANKS_AND_DQ,
2972 MEMORY_BUS_WIDTH,
2973 TIMEBASE_DIVIDEND,
2974 TIMEBASE_DIVISOR,
2975 CYCLETIME,
2976 CAS_LATENCIES_LSB,
2977 CAS_LATENCIES_MSB,
2978 CAS_LATENCY_TIME,
2979 0x11, 0x12, 0x13, 0x14, 0x15,
2980 0x16, 0x17, 0x18, 0x19, 0x1a, 0x1b,
2981 0x1c, 0x1d,
2982 THERMAL_AND_REFRESH,
2983 0x20,
2984 REFERENCE_RAW_CARD_USED,
2985 RANK1_ADDRESS_MAPPING,
2986 0x75, 0x76, 0x77, 0x78,
2987 0x79, 0x7a, 0x7b, 0x7c, 0x7d, 0x7e,
2988 0x7f, 0x80, 0x81, 0x82, 0x83, 0x84,
2989 0x85, 0x86, 0x87, 0x88,
2990 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e,
2991 0x8f, 0x90, 0x91, 0x92, 0x93, 0x94,
2992 0x95
2993 };
2994 if (!spd_addrmap[2 * channel + slot])
2995 continue;
2996 for (try = 0; try < 5; try++) {
2997 v = smbus_read_byte(spd_addrmap[2 * channel + slot],
2998 DEVICE_TYPE);
2999 if (v >= 0)
3000 break;
3001 }
3002 if (v < 0)
3003 continue;
3004 for (addr = 0;
3005 addr <
3006 ARRAY_SIZE(useful_addresses); addr++)
3007 gav(info.
3008 spd[channel][0][useful_addresses
3009 [addr]] =
3010 smbus_read_byte(spd_addrmap[2 * channel + slot],
3011 useful_addresses
3012 [addr]));
3013 if (info.spd[channel][0][DEVICE_TYPE] != 11)
3014 die("Only DDR3 is supported");
3015
3016 v = info.spd[channel][0][RANKS_AND_DQ];
3017 info.populated_ranks[channel][0][0] = 1;
3018 info.populated_ranks[channel][0][1] =
3019 ((v >> 3) & 7);
3020 if (((v >> 3) & 7) > 1)
3021 die("At most 2 ranks are supported");
3022 if ((v & 7) == 0 || (v & 7) > 2)
3023 die("Only x8 and x16 modules are supported");
3024 if ((info.
3025 spd[channel][slot][MODULE_TYPE] & 0xF) != 2
3026 && (info.
3027 spd[channel][slot][MODULE_TYPE] & 0xF)
3028 != 3)
3029 die("Registered memory is not supported");
3030 info.is_x16_module[channel][0] = (v & 7) - 1;
3031 info.density[channel][slot] =
3032 info.spd[channel][slot][DENSITY] & 0xF;
3033 if (!
3034 (info.
3035 spd[channel][slot][MEMORY_BUS_WIDTH] &
3036 0x18))
3037 info.use_ecc = 0;
3038 }
3039
3040 gav(0x55);
3041
3042 for (channel = 0; channel < NUM_CHANNELS; channel++) {
3043 int v = 0;
3044 for (slot = 0; slot < NUM_SLOTS; slot++)
3045 for (rank = 0; rank < NUM_RANKS; rank++)
3046 v |= info.
3047 populated_ranks[channel][slot][rank]
3048 << (2 * slot + rank);
3049 info.populated_ranks_mask[channel] = v;
3050 }
3051
3052 gav(0x55);
3053
3054 gav(pci_read_config32(NORTHBRIDGE, CAPID0 + 4));
3055 }
3056
3057 /* after SPD */
3058 timestamp_add_now(102);
3059
3060 mchbar_clrbits8(0x2ca8, 1 << 1 | 1 << 0);
3061
3062 collect_system_info(&info);
3063 calculate_timings(&info);
3064
3065 if (!s3resume) {
3066 u8 reg8 = pci_read_config8(SOUTHBRIDGE, GEN_PMCON_2);
3067 if (x2ca8 == 0 && (reg8 & 0x80)) {
3068 /* Don't enable S4-assertion stretch. Makes trouble on roda/rk9.
3069 reg8 = pci_read_config8(PCI_DEV(0, 0x1f, 0), 0xa4);
3070 pci_write_config8(PCI_DEV(0, 0x1f, 0), 0xa4, reg8 | 0x08);
3071 */
3072
3073 /* Clear bit7. */
3074
3075 pci_write_config8(SOUTHBRIDGE, GEN_PMCON_2,
3076 (reg8 & ~(1 << 7)));
3077
3078 printk(BIOS_INFO,
3079 "Interrupted RAM init, reset required.\n");
3080 system_reset();
3081 }
3082 }
3083
3084 if (!s3resume && x2ca8 == 0)
3085 pci_write_config8(SOUTHBRIDGE, GEN_PMCON_2,
3086 pci_read_config8(SOUTHBRIDGE, GEN_PMCON_2) | 0x80);
3087
3088 compute_derived_timings(&info);
3089
3090 early_quickpath_init(&info, x2ca8);
3091
3092 info.cached_training = get_cached_training();
3093
3094 if (x2ca8 == 0)
3095 late_quickpath_init(&info, s3resume);
3096
3097 mchbar_setbits32(0x2c80, 1 << 24);
3098 mchbar_write32(0x1804, mchbar_read32(0x1c04) & ~(1 << 27));
3099
3100 mchbar_read8(0x2ca8); // !!!!
3101
3102 if (x2ca8 == 0) {
3103 mchbar_clrbits8(0x2ca8, 3);
3104 mchbar_write8(0x2ca8, mchbar_read8(0x2ca8) + 4); // "+" or "|"?
3105 /* This issues a CPU reset without resetting the platform */
3106 printk(BIOS_DEBUG, "Issuing a CPU reset\n");
3107 /* Write back the S3 state to PM1_CNT to let the reset CPU
3108 know it also needs to take the s3 path. */
3109 if (s3resume)
3110 write_pmbase32(PM1_CNT, read_pmbase32(PM1_CNT)
3111 | (SLP_TYP_S3 << 10));
3112 mchbar_setbits32(0x1af0, 1 << 4);
3113 halt();
3114 }
3115
3116 mchbar_clrbits8(0x2ca8, 0); // !!!!
3117
3118 mchbar_clrbits32(0x2c80, 1 << 24);
3119
3120 pci_write_config32(QPI_NON_CORE, MAX_RTIDS, 0x20220);
3121
3122 {
3123 u8 x2c20 = (mchbar_read16(0x2c20) >> 8) & 3;
3124 u16 x2c10 = mchbar_read16(0x2c10);
3125 u16 value = mchbar_read16(0x2c00);
3126 if (x2c20 == 0 && (x2c10 & 0x300) == 0)
3127 value |= (1 << 7);
3128 else
3129 value &= ~(1 << 0);
3130
3131 mchbar_write16(0x2c00, value);
3132 }
3133
3134 udelay(1000); // !!!!
3135
3136 write_1d0(0, 0x33d, 0, 0);
3137 write_500(&info, 0, 0, 0xb61, 0, 0);
3138 write_500(&info, 1, 0, 0xb61, 0, 0);
3139 mchbar_write32(0x1a30, 0);
3140 mchbar_write32(0x1a34, 0);
3141 mchbar_write16(0x614, 0xb5b | (info.populated_ranks[1][0][0] * 0x404) |
3142 (info.populated_ranks[0][0][0] * 0xa0));
3143 mchbar_write16(0x616, 0x26a);
3144 mchbar_write32(0x134, 0x856000);
3145 mchbar_write32(0x160, 0x5ffffff);
3146 mchbar_clrsetbits32(0x114, ~0, 0xc2024440); // !!!!
3147 mchbar_clrsetbits32(0x118, ~0, 0x4); // !!!!
3148 for (channel = 0; channel < NUM_CHANNELS; channel++)
3149 mchbar_write32(0x260 + (channel << 10), 0x30809ff |
3150 (info.populated_ranks_mask[channel] & 3) << 20);
3151 for (channel = 0; channel < NUM_CHANNELS; channel++) {
3152 mchbar_write16(0x31c + (channel << 10), 0x101);
3153 mchbar_write16(0x360 + (channel << 10), 0x909);
3154 mchbar_write16(0x3a4 + (channel << 10), 0x101);
3155 mchbar_write16(0x3e8 + (channel << 10), 0x101);
3156 mchbar_write32(0x320 + (channel << 10), 0x29002900);
3157 mchbar_write32(0x324 + (channel << 10), 0);
3158 mchbar_write32(0x368 + (channel << 10), 0x32003200);
3159 mchbar_write16(0x352 + (channel << 10), 0x505);
3160 mchbar_write16(0x354 + (channel << 10), 0x3c3c);
3161 mchbar_write16(0x356 + (channel << 10), 0x1040);
3162 mchbar_write16(0x39a + (channel << 10), 0x73e4);
3163 mchbar_write16(0x3de + (channel << 10), 0x77ed);
3164 mchbar_write16(0x422 + (channel << 10), 0x1040);
3165 }
3166
3167 write_1d0(0x4, 0x151, 4, 1);
3168 write_1d0(0, 0x142, 3, 1);
3169 rdmsr(0x1ac); // !!!!
3170 write_500(&info, 1, 1, 0x6b3, 4, 1);
3171 write_500(&info, 1, 1, 0x6cf, 4, 1);
3172
3173 rmw_1d0(0x21c, 0x38, 0, 6);
3174
3175 write_1d0(((!info.populated_ranks[1][0][0]) << 1) | ((!info.
3176 populated_ranks[0]
3177 [0][0]) << 0),
3178 0x1d1, 3, 1);
3179 for (channel = 0; channel < NUM_CHANNELS; channel++) {
3180 mchbar_write16(0x38e + (channel << 10), 0x5f5f);
3181 mchbar_write16(0x3d2 + (channel << 10), 0x5f5f);
3182 }
3183
3184 set_334(0);
3185
3186 program_base_timings(&info);
3187
3188 mchbar_setbits8(0x5ff, 1 << 7);
3189
3190 write_1d0(0x2, 0x1d5, 2, 1);
3191 write_1d0(0x20, 0x166, 7, 1);
3192 write_1d0(0x0, 0xeb, 3, 1);
3193 write_1d0(0x0, 0xf3, 6, 1);
3194
3195 for (channel = 0; channel < NUM_CHANNELS; channel++) {
3196 u8 a = 0;
3197 if (info.populated_ranks[channel][0][1] && info.clock_speed_index > 1)
3198 a = 3;
3199 if (info.silicon_revision == 0 || info.silicon_revision == 1)
3200 a = 3;
3201
3202 for (lane = 0; lane < 9; lane++) {
3203 const u16 addr = 0x125 + get_lane_offset(0, 0, lane);
3204 rmw_500(&info, channel, addr, 6, 0xf, a);
3205 }
3206 }
3207
3208 if (s3resume) {
3209 if (!info.cached_training) {
3210 u32 reg32;
3211 printk(BIOS_ERR,
3212 "Couldn't find training data. Rebooting\n");
3213 reg32 = inl(DEFAULT_PMBASE + 0x04);
3214 outl(reg32 & ~(7 << 10), DEFAULT_PMBASE + 0x04);
3215 full_reset();
3216 }
3217 int tm;
3218 info.training = *info.cached_training;
3219 for (tm = 0; tm < 4; tm++)
3220 for (channel = 0; channel < NUM_CHANNELS; channel++)
3221 for (slot = 0; slot < NUM_SLOTS; slot++)
3222 for (rank = 0; rank < NUM_RANKS; rank++)
3223 for (lane = 0; lane < 9; lane++)
3224 write_500(&info,
3225 channel,
3226 info.training.
3227 lane_timings
3228 [tm][channel]
3229 [slot][rank]
3230 [lane],
3231 get_timing_register_addr
3232 (lane, tm,
3233 slot, rank),
3234 9, 0);
3235 write_1d0(info.cached_training->reg_178, 0x178, 7, 1);
3236 write_1d0(info.cached_training->reg_10b, 0x10b, 6, 1);
3237 }
3238
3239 mchbar_clrsetbits32(0x1f4, ~0, 1 << 17); // !!!!
3240 mchbar_write32(0x1f0, 0x1d000200);
3241 mchbar_setbits8(0x1f0, 1 << 0);
3242 while (mchbar_read8(0x1f0) & 1)
3243 ;
3244
3245 program_board_delay(&info);
3246
3247 mchbar_write8(0x5ff, 0);
3248 mchbar_write8(0x5ff, 1 << 7);
3249 mchbar_write8(0x5f4, 1 << 0);
3250
3251 mchbar_clrbits32(0x130, 1 << 1); // | 2 when ?
3252 while (mchbar_read32(0x130) & 1)
3253 ;
3254
3255 rmw_1d0(0x14b, 0x47, 0x30, 7);
3256 rmw_1d0(0xd6, 0x38, 7, 6);
3257 rmw_1d0(0x328, 0x38, 7, 6);
3258
3259 for (channel = 0; channel < NUM_CHANNELS; channel++)
3260 set_4cf(&info, channel, 1, 0);
3261
3262 rmw_1d0(0x116, 0xe, 0, 4);
3263 rmw_1d0(0xae, 0x3e, 0, 6);
3264 rmw_1d0(0x300, 0x3e, 0, 6);
3265 mchbar_clrbits16(0x356, 1 << 15);
3266 mchbar_clrbits16(0x756, 1 << 15);
3267 mchbar_clrbits32(0x140, 7 << 24);
3268 mchbar_clrbits32(0x138, 7 << 24);
3269 mchbar_write32(0x130, 0x31111301);
3270 /* Wait until REG130b0 is 1. */
3271 while (mchbar_read32(0x130) & 1)
3272 ;
3273
3274 u8 value_a1;
3275 {
3276 const u8 val_xa1 = get_bits_420(read_1d0(0xa1, 6)); // = 0x1cf4040 // !!!!
3277 const u8 val_2f3 = get_bits_420(read_1d0(0x2f3, 6)); // = 0x10a4040 // !!!!
3278 value_a1 = val_xa1;
3279 rmw_1d0(0x320, 0x38, val_2f3, 6);
3280 rmw_1d0(0x14b, 0x78, val_xa1, 7);
3281 rmw_1d0(0xce, 0x38, val_xa1, 6);
3282 }
3283
3284 for (channel = 0; channel < NUM_CHANNELS; channel++)
3285 set_4cf(&info, channel, 1, 1);
3286
3287 rmw_1d0(0x116, 0xe, 1, 4); // = 0x4040432 // !!!!
3288 {
3289 if ((mchbar_read32(0x144) & 0x1f) < 0x13)
3290 value_a1 += 2;
3291 else
3292 value_a1 += 1;
3293
3294 if (value_a1 > 7)
3295 value_a1 = 7;
3296
3297 write_1d0(2, 0xae, 6, 1);
3298 write_1d0(2, 0x300, 6, 1);
3299 write_1d0(value_a1, 0x121, 3, 1);
3300 rmw_1d0(0xd6, 0x38, 4, 6);
3301 rmw_1d0(0x328, 0x38, 4, 6);
3302 }
3303
3304 for (channel = 0; channel < NUM_CHANNELS; channel++)
3305 set_4cf(&info, channel, 2, 0);
3306
3307 mchbar_write32(0x130, 0x11111301 | info.populated_ranks[1][0][0] << 30 |
3308 info.populated_ranks[0][0][0] << 29);
3309 while (mchbar_read8(0x130) & 1)
3310 ;
3311
3312 {
3313 const u8 val_xa1 = get_bits_420(read_1d0(0xa1, 6));
3314 read_1d0(0x2f3, 6); // = 0x10a4054 // !!!!
3315 rmw_1d0(0x21c, 0x38, 0, 6);
3316 rmw_1d0(0x14b, 0x78, val_xa1, 7);
3317 }
3318
3319 for (channel = 0; channel < NUM_CHANNELS; channel++)
3320 set_4cf(&info, channel, 2, 1);
3321
3322 set_334(1);
3323
3324 mchbar_write8(0x1e8, 1 << 2);
3325
3326 for (channel = 0; channel < NUM_CHANNELS; channel++) {
3327 write_500(&info, channel,
3328 0x3 & ~(info.populated_ranks_mask[channel]), 0x6b7, 2,
3329 1);
3330 write_500(&info, channel, 0x3, 0x69b, 2, 1);
3331 }
3332 mchbar_clrsetbits32(0x2d0, ~0xff0c01ff, 0x200000);
3333 mchbar_write16(0x6c0, 0x14a0);
3334 mchbar_clrsetbits32(0x6d0, ~0xff0000ff, 0x8000);
3335 mchbar_write16(0x232, 1 << 3);
3336 /* 0x40004 or 0 depending on ? */
3337 mchbar_clrsetbits32(0x234, 0x40004, 0x40004);
3338 mchbar_clrsetbits32(0x34, 0x7, 5);
3339 mchbar_write32(0x128, 0x2150d05);
3340 mchbar_write8(0x12c, 0x1f);
3341 mchbar_write8(0x12d, 0x56);
3342 mchbar_write8(0x12e, 0x31);
3343 mchbar_write8(0x12f, 0);
3344 mchbar_write8(0x271, 1 << 1);
3345 mchbar_write8(0x671, 1 << 1);
3346 mchbar_write8(0x1e8, 1 << 2);
3347 for (channel = 0; channel < NUM_CHANNELS; channel++)
3348 mchbar_write32(0x294 + (channel << 10),
3349 (info.populated_ranks_mask[channel] & 3) << 16);
3350 mchbar_clrsetbits32(0x134, ~0xfc01ffff, 0x10000);
3351 mchbar_clrsetbits32(0x134, ~0xfc85ffff, 0x850000);
3352 for (channel = 0; channel < NUM_CHANNELS; channel++)
3353 mchbar_clrsetbits32(0x260 + (channel << 10), 0xf << 20, 1 << 27 |
3354 (info.populated_ranks_mask[channel] & 3) << 20);
3355
3356 if (!s3resume)
3357 jedec_init(&info);
3358
3359 int totalrank = 0;
3360 for (channel = 0; channel < NUM_CHANNELS; channel++)
3361 for (slot = 0; slot < NUM_SLOTS; slot++)
3362 for (rank = 0; rank < NUM_RANKS; rank++)
3363 if (info.populated_ranks[channel][slot][rank]) {
3364 jedec_read(&info, channel, slot, rank,
3365 totalrank, 0xa, 0x400);
3366 totalrank++;
3367 }
3368
3369 mchbar_write8(0x12c, 0x9f);
3370
3371 mchbar_clrsetbits8(0x271, 0x3e, 0x0e);
3372 mchbar_clrsetbits8(0x671, 0x3e, 0x0e);
3373
3374 if (!s3resume) {
3375 for (channel = 0; channel < NUM_CHANNELS; channel++) {
3376 mchbar_write32(0x294 + (channel << 10),
3377 (info.populated_ranks_mask[channel] & 3) << 16);
3378 mchbar_write16(0x298 + (channel << 10),
3379 info.populated_ranks[channel][0][0] |
3380 info.populated_ranks[channel][0][1] << 5);
3381 mchbar_write32(0x29c + (channel << 10), 0x77a);
3382 }
3383 mchbar_clrsetbits32(0x2c0, ~0, 0x6009cc00); // !!!!
3384
3385 {
3386 u8 a, b;
3387 a = mchbar_read8(0x243);
3388 b = mchbar_read8(0x643);
3389 mchbar_write8(0x243, a | 2);
3390 mchbar_write8(0x643, b | 2);
3391 }
3392
3393 write_1d0(7, 0x19b, 3, 1);
3394 write_1d0(7, 0x1c0, 3, 1);
3395 write_1d0(4, 0x1c6, 4, 1);
3396 write_1d0(4, 0x1cc, 4, 1);
3397 rmw_1d0(0x151, 0xf, 0x4, 4);
3398 mchbar_write32(0x584, 0xfffff);
3399 mchbar_write32(0x984, 0xfffff);
3400
3401 for (channel = 0; channel < NUM_CHANNELS; channel++)
3402 for (slot = 0; slot < NUM_SLOTS; slot++)
3403 for (rank = 0; rank < NUM_RANKS; rank++)
3404 if (info.
3405 populated_ranks[channel][slot]
3406 [rank])
3407 config_rank(&info, s3resume,
3408 channel, slot,
3409 rank);
3410
3411 mchbar_write8(0x243, 1);
3412 mchbar_write8(0x643, 1);
3413 }
3414
3415 /* was == 1 but is common */
3416 pci_write_config16(NORTHBRIDGE, 0xc8, 3);
3417 write_26c(0, 0x820);
3418 write_26c(1, 0x820);
3419 mchbar_setbits32(0x130, 1 << 1);
3420 /* end */
3421
3422 if (s3resume) {
3423 for (channel = 0; channel < NUM_CHANNELS; channel++) {
3424 mchbar_write32(0x294 + (channel << 10),
3425 (info.populated_ranks_mask[channel] & 3) << 16);
3426 mchbar_write16(0x298 + (channel << 10),
3427 info.populated_ranks[channel][0][0] |
3428 info.populated_ranks[channel][0][1] << 5);
3429 mchbar_write32(0x29c + (channel << 10), 0x77a);
3430 }
3431 mchbar_clrsetbits32(0x2c0, ~0, 0x6009cc00); // !!!!
3432 }
3433
3434 mchbar_clrbits32(0xfa4, 1 << 24 | 1 << 1);
3435 mchbar_write32(0xfb0, 0x2000e019);
3436
3437 /* Before training. */
3438 timestamp_add_now(103);
3439
3440 if (!s3resume)
3441 ram_training(&info);
3442
3443 /* After training. */
3444 timestamp_add_now(104);
3445
3446 dump_timings(&info);
3447
3448 program_modules_memory_map(&info, 0);
3449 program_total_memory_map(&info);
3450
3451 if (info.non_interleaved_part_mb != 0 && info.interleaved_part_mb != 0)
3452 mchbar_write8(0x111, 0 << 2 | 1 << 5 | 1 << 6 | 0 << 7);
3453 else if (have_match_ranks(&info, 0, 4) && have_match_ranks(&info, 1, 4))
3454 mchbar_write8(0x111, 3 << 2 | 1 << 5 | 0 << 6 | 1 << 7);
3455 else if (have_match_ranks(&info, 0, 2) && have_match_ranks(&info, 1, 2))
3456 mchbar_write8(0x111, 3 << 2 | 1 << 5 | 0 << 6 | 0 << 7);
3457 else
3458 mchbar_write8(0x111, 3 << 2 | 1 << 5 | 1 << 6 | 0 << 7);
3459
3460 mchbar_clrbits32(0xfac, 1 << 31);
3461 mchbar_write32(0xfb4, 0x4800);
3462 mchbar_write32(0xfb8, (info.revision < 8) ? 0x20 : 0x0);
3463 mchbar_write32(0xe94, 0x7ffff);
3464 mchbar_write32(0xfc0, 0x80002040);
3465 mchbar_write32(0xfc4, 0x701246);
3466 mchbar_clrbits8(0xfc8, 0x70);
3467 mchbar_setbits32(0xe5c, 1 << 24);
3468 mchbar_clrsetbits32(0x1a70, 3 << 20, 2 << 20);
3469 mchbar_write32(0x50, 0x700b0);
3470 mchbar_write32(0x3c, 0x10);
3471 mchbar_clrsetbits8(0x1aa8, 0x3f, 0xa);
3472 mchbar_setbits8(0xff4, 1 << 1);
3473 mchbar_clrsetbits32(0xff8, 0xe008, 0x1020);
3474
3475 mchbar_write32(0xd00, IOMMU_BASE2 | 1);
3476 mchbar_write32(0xd40, IOMMU_BASE1 | 1);
3477 mchbar_write32(0xdc0, IOMMU_BASE4 | 1);
3478
3479 write32p(IOMMU_BASE1 | 0xffc, 0x80000000);
3480 write32p(IOMMU_BASE2 | 0xffc, 0xc0000000);
3481 write32p(IOMMU_BASE4 | 0xffc, 0x80000000);
3482
3483 {
3484 u32 eax;
3485
3486 eax = info.fsb_frequency / 9;
3487 mchbar_clrsetbits32(0xfcc, 0x3ffff,
3488 (eax * 0x280) | (eax * 0x5000) | eax | 0x40000);
3489 mchbar_write32(0x20, 0x33001);
3490 }
3491
3492 for (channel = 0; channel < NUM_CHANNELS; channel++) {
3493 mchbar_clrbits32(0x220 + (channel << 10), 0x7770);
3494 if (info.max_slots_used_in_channel == 1)
3495 mchbar_setbits16(0x237 + (channel << 10), 0x0201);
3496 else
3497 mchbar_clrbits16(0x237 + (channel << 10), 0x0201);
3498
3499 mchbar_setbits8(0x241 + (channel << 10), 1 << 0);
3500
3501 if (info.clock_speed_index <= 1 && (info.silicon_revision == 2
3502 || info.silicon_revision == 3))
3503 mchbar_setbits32(0x248 + (channel << 10), 0x00102000);
3504 else
3505 mchbar_clrbits32(0x248 + (channel << 10), 0x00102000);
3506 }
3507
3508 mchbar_setbits32(0x115, 1 << 24);
3509
3510 {
3511 u8 al;
3512 al = 0xd;
3513 if (!(info.silicon_revision == 0 || info.silicon_revision == 1))
3514 al += 2;
3515 al |= ((1 << (info.max_slots_used_in_channel - 1)) - 1) << 4;
3516 mchbar_write32(0x210, al << 16 | 0x20);
3517 }
3518
3519 for (channel = 0; channel < NUM_CHANNELS; channel++) {
3520 mchbar_write32(0x288 + (channel << 10), 0x70605040);
3521 mchbar_write32(0x28c + (channel << 10), 0xfffec080);
3522 mchbar_write32(0x290 + (channel << 10), 0x282091c |
3523 (info.max_slots_used_in_channel - 1) << 0x16);
3524 }
3525 u32 reg1c;
3526 pci_read_config32(NORTHBRIDGE, 0x40); // = DEFAULT_EPBAR | 0x001 // OK
3527 reg1c = epbar_read32(EPVC1RCAP); // = 0x8001 // OK
3528 pci_read_config32(NORTHBRIDGE, 0x40); // = DEFAULT_EPBAR | 0x001 // OK
3529 epbar_write32(EPVC1RCAP, reg1c); // OK
3530 mchbar_read8(0xe08); // = 0x0
3531 pci_read_config32(NORTHBRIDGE, 0xe4); // = 0x316126
3532 mchbar_setbits8(0x1210, 1 << 1);
3533 mchbar_write32(0x1200, 0x8800440);
3534 mchbar_write32(0x1204, 0x53ff0453);
3535 mchbar_write32(0x1208, 0x19002043);
3536 mchbar_write16(0x1214, 0x320);
3537
3538 if (info.revision == 0x10 || info.revision == 0x11) {
3539 mchbar_write16(0x1214, 0x220);
3540 mchbar_setbits8(0x1210, 1 << 6);
3541 }
3542
3543 mchbar_setbits8(0x1214, 1 << 2);
3544 mchbar_write8(0x120c, 1);
3545 mchbar_write8(0x1218, 3);
3546 mchbar_write8(0x121a, 3);
3547 mchbar_write8(0x121c, 3);
3548 mchbar_write16(0xc14, 0);
3549 mchbar_write16(0xc20, 0);
3550 mchbar_write32(0x1c, 0);
3551
3552 /* revision dependent here. */
3553
3554 mchbar_setbits16(0x1230, 0x1f07);
3555
3556 if (info.uma_enabled)
3557 mchbar_setbits32(0x11f4, 1 << 28);
3558
3559 mchbar_setbits16(0x1230, 1 << 15);
3560 mchbar_setbits8(0x1214, 1 << 0);
3561
3562 u8 bl, ebpb;
3563 u16 reg_1020;
3564
3565 reg_1020 = mchbar_read32(0x1020); // = 0x6c733c // OK
3566 mchbar_write8(0x1070, 1);
3567
3568 mchbar_write32(0x1000, 0x100);
3569 mchbar_write8(0x1007, 0);
3570
3571 if (reg_1020 != 0) {
3572 mchbar_write16(0x1018, 0);
3573 bl = reg_1020 >> 8;
3574 ebpb = reg_1020 & 0xff;
3575 } else {
3576 ebpb = 0;
3577 bl = 8;
3578 }
3579
3580 rdmsr(0x1a2);
3581
3582 mchbar_write32(0x1014, 0xffffffff);
3583
3584 mchbar_write32(0x1010, ((((ebpb + 0x7d) << 7) / bl) & 0xff) * !!reg_1020);
3585
3586 mchbar_write8(0x101c, 0xb8);
3587
3588 mchbar_clrsetbits8(0x123e, 0xf0, 0x60);
3589 if (reg_1020 != 0) {
3590 mchbar_clrsetbits32(0x123c, 0xf << 20, 0x6 << 20);
3591 mchbar_write8(0x101c, 0xb8);
3592 }
3593
3594 const u64 heci_uma_addr =
3595 ((u64)
3596 ((((u64)pci_read_config16(NORTHBRIDGE, TOM)) << 6) -
3597 info.memory_reserved_for_heci_mb)) << 20;
3598
3599 setup_heci_uma(heci_uma_addr, info.memory_reserved_for_heci_mb);
3600
3601 if (info.uma_enabled) {
3602 u16 ax;
3603 mchbar_setbits32(0x11b0, 1 << 14);
3604 mchbar_setbits32(0x11b4, 1 << 14);
3605 mchbar_setbits16(0x1190, 1 << 14);
3606
3607 ax = mchbar_read16(0x1190) & 0xf00; // = 0x480a // OK
3608 mchbar_write16(0x1170, ax | (mchbar_read16(0x1170) & 0x107f) | 0x4080);
3609 mchbar_setbits16(0x1170, 1 << 12);
3610
3611 udelay(1000);
3612
3613 u16 ecx;
3614 for (ecx = 0xffff; ecx && (mchbar_read16(0x1170) & (1 << 12)); ecx--)
3615 ;
3616 mchbar_clrbits16(0x1190, 1 << 14);
3617 }
3618
3619 pci_write_config8(SOUTHBRIDGE, GEN_PMCON_2,
3620 pci_read_config8(SOUTHBRIDGE, GEN_PMCON_2) & ~0x80);
3621 udelay(10000);
3622 mchbar_write16(0x2ca8, 1 << 3);
3623
3624 udelay(1000);
3625 dump_timings(&info);
3626 cbmem_wasnot_inited = cbmem_recovery(s3resume);
3627
3628 if (!s3resume)
3629 save_timings(&info);
3630 if (s3resume && cbmem_wasnot_inited) {
3631 printk(BIOS_ERR, "Failed S3 resume.\n");
3632 ram_check_nodie(1 * MiB);
3633
3634 /* Failed S3 resume, reset to come up cleanly */
3635 full_reset();
3636 }
3637 }
3638