1 /* SPDX-License-Identifier: GPL-2.0-or-later */
2
3 #include <device/mmio.h>
4 #include <console/console.h>
5 #include <delay.h>
6 #include <string.h>
7 #include <types.h>
8 #include "raminit.h"
9 #include "x4x.h"
10
print_dll_setting(const struct dll_setting * dll_setting,u8 default_verbose)11 static void print_dll_setting(const struct dll_setting *dll_setting, u8 default_verbose)
12 {
13 u8 debug_level = default_verbose ? BIOS_DEBUG : RAM_DEBUG;
14
15 printk(debug_level, "%d.%d.%d.%d:%d.%d\n", dll_setting->coarse,
16 dll_setting->clk_delay, dll_setting->tap,
17 dll_setting->pi, dll_setting->db_en,
18 dll_setting->db_sel);
19 }
20
21 struct db_limit {
22 u8 tap0;
23 u8 tap1;
24 u8 pi0;
25 u8 pi1;
26 };
27
set_db(const struct sysinfo * s,struct dll_setting * dq_dqs_setting)28 static void set_db(const struct sysinfo *s, struct dll_setting *dq_dqs_setting)
29 {
30 struct db_limit limit;
31
32 switch (s->selected_timings.mem_clk) {
33 default:
34 case MEM_CLOCK_800MHz:
35 limit.tap0 = 3;
36 limit.tap1 = 10;
37 limit.pi0 = 2;
38 limit.pi1 = 3;
39 break;
40 case MEM_CLOCK_1066MHz:
41 limit.tap0 = 2;
42 limit.tap1 = 8;
43 limit.pi0 = 6;
44 limit.pi1 = 7;
45 break;
46 case MEM_CLOCK_1333MHz:
47 limit.tap0 = 3;
48 limit.tap1 = 11;
49 /* TO CHECK: Might be reverse since this makes little sense */
50 limit.pi0 = 6;
51 limit.pi1 = 4;
52 break;
53 }
54
55 if (dq_dqs_setting->tap < limit.tap0) {
56 dq_dqs_setting->db_en = 1;
57 dq_dqs_setting->db_sel = 1;
58 } else if ((dq_dqs_setting->tap == limit.tap0)
59 && (dq_dqs_setting->pi < limit.pi0)) {
60 dq_dqs_setting->db_en = 1;
61 dq_dqs_setting->db_sel = 1;
62 } else if (dq_dqs_setting->tap < limit.tap1) {
63 dq_dqs_setting->db_en = 0;
64 dq_dqs_setting->db_sel = 0;
65 } else if ((dq_dqs_setting->tap == limit.tap1)
66 && (dq_dqs_setting->pi < limit.pi1)) {
67 dq_dqs_setting->db_en = 0;
68 dq_dqs_setting->db_sel = 0;
69 } else {
70 dq_dqs_setting->db_en = 1;
71 dq_dqs_setting->db_sel = 0;
72 }
73 }
74
75 static const u8 max_tap[3] = {12, 10, 13};
76
increment_dq_dqs(const struct sysinfo * s,struct dll_setting * dq_dqs_setting)77 static enum cb_err increment_dq_dqs(const struct sysinfo *s, struct dll_setting *dq_dqs_setting)
78 {
79 u8 max_tap_val = max_tap[s->selected_timings.mem_clk - MEM_CLOCK_800MHz];
80
81 if (dq_dqs_setting->pi < 6) {
82 dq_dqs_setting->pi += 1;
83 } else if (dq_dqs_setting->tap < max_tap_val) {
84 dq_dqs_setting->pi = 0;
85 dq_dqs_setting->tap += 1;
86 } else if (dq_dqs_setting->clk_delay < 2) {
87 dq_dqs_setting->pi = 0;
88 dq_dqs_setting->tap = 0;
89 dq_dqs_setting->clk_delay += 1;
90 } else if (dq_dqs_setting->coarse < 1) {
91 dq_dqs_setting->pi = 0;
92 dq_dqs_setting->tap = 0;
93 dq_dqs_setting->clk_delay -= 1;
94 dq_dqs_setting->coarse += 1;
95 } else {
96 return CB_ERR;
97 }
98 set_db(s, dq_dqs_setting);
99 return CB_SUCCESS;
100 }
101
decrement_dq_dqs(const struct sysinfo * s,struct dll_setting * dq_dqs_setting)102 static enum cb_err decrement_dq_dqs(const struct sysinfo *s, struct dll_setting *dq_dqs_setting)
103 {
104 u8 max_tap_val = max_tap[s->selected_timings.mem_clk - MEM_CLOCK_800MHz];
105
106 if (dq_dqs_setting->pi > 0) {
107 dq_dqs_setting->pi -= 1;
108 } else if (dq_dqs_setting->tap > 0) {
109 dq_dqs_setting->pi = 6;
110 dq_dqs_setting->tap -= 1;
111 } else if (dq_dqs_setting->clk_delay > 0) {
112 dq_dqs_setting->pi = 6;
113 dq_dqs_setting->tap = max_tap_val;
114 dq_dqs_setting->clk_delay -= 1;
115 } else if (dq_dqs_setting->coarse > 0) {
116 dq_dqs_setting->pi = 6;
117 dq_dqs_setting->tap = max_tap_val;
118 dq_dqs_setting->clk_delay += 1;
119 dq_dqs_setting->coarse -= 1;
120 } else {
121 return CB_ERR;
122 }
123 set_db(s, dq_dqs_setting);
124 return CB_SUCCESS;
125 }
126
127 #define WT_PATTERN_SIZE 80
128
129 static const u32 write_training_schedule[WT_PATTERN_SIZE] = {
130 0xffffffff, 0x00000000, 0xffffffff, 0x00000000,
131 0xffffffff, 0x00000000, 0xffffffff, 0x00000000,
132 0xffffffff, 0x00000000, 0xffffffff, 0x00000000,
133 0xffffffff, 0x00000000, 0xffffffff, 0x00000000,
134 0xefefefef, 0x10101010, 0xefefefef, 0x10101010,
135 0xefefefef, 0x10101010, 0xefefefef, 0x10101010,
136 0xefefefef, 0x10101010, 0xefefefef, 0x10101010,
137 0xefefefef, 0x10101010, 0xefefefef, 0x10101010,
138 0xefefefef, 0xeeeeeeee, 0x11111111, 0x10101010,
139 0xefefefef, 0xeeeeeeee, 0x11111111, 0x10101010,
140 0xefefefef, 0xeeeeeeee, 0x11111111, 0x10101010,
141 0xefefefef, 0xeeeeeeee, 0x11111111, 0x10101010,
142 0x03030303, 0x04040404, 0x09090909, 0x10101010,
143 0x21212121, 0x40404040, 0x81818181, 0x00000000,
144 0x03030303, 0x04040404, 0x09090909, 0x10101010,
145 0x21212121, 0x40404040, 0x81818181, 0x00000000,
146 0xfdfdfdfd, 0xfafafafa, 0xf7f7f7f7, 0xeeeeeeee,
147 0xdfdfdfdf, 0xbebebebe, 0x7f7f7f7f, 0xfefefefe,
148 0xfdfdfdfd, 0xfafafafa, 0xf7f7f7f7, 0xeeeeeeee,
149 0xdfdfdfdf, 0xbebebebe, 0x7f7f7f7f, 0xfefefefe,
150 };
151
152 enum training_modes {
153 SUCCEEDING = 0,
154 FAILING = 1
155 };
156
test_dq_aligned(const struct sysinfo * s,const u8 channel)157 static u8 test_dq_aligned(const struct sysinfo *s, const u8 channel)
158 {
159 u32 address;
160 int rank, lane;
161 u8 count, count1;
162 u8 data[8];
163 u8 lane_error = 0;
164
165 FOR_EACH_POPULATED_RANK_IN_CHANNEL(s->dimms, channel, rank) {
166 address = test_address(channel, rank);
167 for (count = 0; count < WT_PATTERN_SIZE; count++) {
168 for (count1 = 0; count1 < WT_PATTERN_SIZE; count1++) {
169 if ((count1 % 16) == 0)
170 mchbar_write32(0xf90, 1);
171 const u32 pattern = write_training_schedule[count1];
172 write32p(address + 8 * count1, pattern);
173 write32p(address + 8 * count1 + 4, pattern);
174 }
175
176 const u32 good = write_training_schedule[count];
177 write32(&data[0], read32p(address + 8 * count));
178 write32(&data[4], read32p(address + 8 * count + 4));
179 FOR_EACH_BYTELANE(lane) {
180 u8 expected = (good >> ((lane % 4) * 8)) & 0xff;
181 if (data[lane] != expected)
182 lane_error |= 1 << lane;
183 }
184 }
185 }
186 return lane_error;
187 }
188
189 #define CONSISTENCY 10
190
191 /*
192 * This function finds either failing or succeeding writes by increasing DQ.
193 * When it has found a failing or succeeding setting it will increase DQ
194 * another 10 times to make sure the result is consistent.
195 * This is probably done because lanes cannot be trained independent from
196 * each other.
197 */
find_dq_limit(const struct sysinfo * s,const u8 channel,struct dll_setting dq_setting[TOTAL_BYTELANES],u8 dq_lim[TOTAL_BYTELANES],const enum training_modes expected_result)198 static enum cb_err find_dq_limit(const struct sysinfo *s, const u8 channel,
199 struct dll_setting dq_setting[TOTAL_BYTELANES],
200 u8 dq_lim[TOTAL_BYTELANES],
201 const enum training_modes expected_result)
202 {
203 enum cb_err status = CB_SUCCESS;
204 int lane;
205 u8 test_result;
206 u8 pass_count[TOTAL_BYTELANES];
207 u8 success_mask = 0xff;
208
209 printk(RAM_DEBUG, "Looking for %s writes on channel %d\n",
210 expected_result == FAILING ? "failing" : "succeeding", channel);
211 memset(pass_count, 0, sizeof(pass_count));
212
213 while (success_mask) {
214 test_result = test_dq_aligned(s, channel);
215 FOR_EACH_BYTELANE(lane) {
216 if (((test_result >> lane) & 1) != expected_result) {
217 status = increment_dq_dqs(s, &dq_setting[lane]);
218 dqset(channel, lane, &dq_setting[lane]);
219 dq_lim[lane]++;
220 } else if (pass_count[lane] < CONSISTENCY) {
221 status = increment_dq_dqs(s, &dq_setting[lane]);
222 dqset(channel, lane, &dq_setting[lane]);
223 dq_lim[lane]++;
224 pass_count[lane]++;
225 } else if (pass_count[lane] == CONSISTENCY) {
226 success_mask &= ~(1 << lane);
227 }
228 if (status == CB_ERR) {
229 printk(BIOS_CRIT,
230 "Could not find a case of %s writes on CH%d, lane %d\n",
231 expected_result == FAILING ? "failing"
232 : "succeeding", channel, lane);
233 return CB_ERR;
234 }
235 }
236 }
237 return CB_SUCCESS;
238 }
239
240 /*
241 * This attempts to find the ideal delay for DQ to account for the skew between
242 * the DQ and the DQS signal.
243 * The training works this way:
244 * - start from the DQS delay values (DQ is always later than DQS)
245 * - increment the DQ delay until a succeeding write is found on all bytelayes,
246 * on all ranks on a channel and save these values
247 * - again increment the DQ delay until write start to fail on all bytelanes and
248 * save that value
249 * - use the mean between the saved succeeding and failing value
250 * - note: bytelanes cannot be trained independently, so the delays need to be
251 * adjusted and tested for all of them at the same time
252 */
do_write_training(struct sysinfo * s)253 enum cb_err do_write_training(struct sysinfo *s)
254 {
255 int i;
256 u8 channel, lane;
257 u8 dq_lower[TOTAL_BYTELANES];
258 u8 dq_upper[TOTAL_BYTELANES];
259 struct dll_setting dq_setting[TOTAL_BYTELANES];
260
261 printk(BIOS_DEBUG, "Starting DQ write training\n");
262
263 FOR_EACH_POPULATED_CHANNEL(s->dimms, channel) {
264 printk(BIOS_DEBUG, "Doing DQ write training on CH%d\n", channel);
265
266 /* Start all lanes at DQS values */
267 FOR_EACH_BYTELANE(lane) {
268 dqset(channel, lane, &s->dqs_settings[channel][lane]);
269 s->dq_settings[channel][lane] = s->dqs_settings[channel][lane];
270 }
271 memset(dq_lower, 0, sizeof(dq_lower));
272 /* Start from DQS settings */
273 memcpy(dq_setting, s->dqs_settings[channel], sizeof(dq_setting));
274
275 if (find_dq_limit(s, channel, dq_setting, dq_lower, SUCCEEDING)) {
276 printk(BIOS_CRIT, "Could not find working lower limit DQ setting\n");
277 return CB_ERR;
278 }
279
280 memcpy(dq_upper, dq_lower, sizeof(dq_lower));
281
282 if (find_dq_limit(s, channel, dq_setting, dq_upper, FAILING)) {
283 printk(BIOS_WARNING, "Could not find failing upper limit DQ setting\n");
284 return CB_ERR;
285 }
286
287 FOR_EACH_BYTELANE(lane) {
288 dq_lower[lane] -= CONSISTENCY - 1;
289 dq_upper[lane] -= CONSISTENCY - 1;
290 u8 dq_center = (dq_upper[lane] + dq_lower[lane]) / 2;
291
292 printk(RAM_DEBUG,
293 "Centered value for DQ DLL: ch%d, lane %d, #steps = %d\n",
294 channel, lane, dq_center);
295 for (i = 0; i < dq_center; i++) {
296 /* Should never happen */
297 if (increment_dq_dqs(s, &s->dq_settings[channel][lane])
298 == CB_ERR)
299 printk(BIOS_ERR,
300 "Huh? write training overflowed!!\n");
301 }
302 }
303
304 /* Reset DQ DLL settings and increment with centered value*/
305 printk(BIOS_DEBUG, "Final DQ timings on CH%d\n", channel);
306 FOR_EACH_BYTELANE(lane) {
307 printk(BIOS_DEBUG, "\tlane%d: ", lane);
308 print_dll_setting(&s->dq_settings[channel][lane], 1);
309 dqset(channel, lane, &s->dq_settings[channel][lane]);
310 }
311 }
312 printk(BIOS_DEBUG, "Done DQ write training\n");
313 return CB_SUCCESS;
314 }
315
316 #define RT_PATTERN_SIZE 40
317
318 static const u32 read_training_schedule[RT_PATTERN_SIZE] = {
319 0xffffffff, 0x00000000, 0xffffffff, 0x00000000,
320 0xffffffff, 0x00000000, 0xffffffff, 0x00000000,
321 0xefefefef, 0x10101010, 0xefefefef, 0x10101010,
322 0xefefefef, 0x10101010, 0xefefefef, 0x10101010,
323 0xefefefef, 0xeeeeeeee, 0x11111111, 0x10101010,
324 0xefefefef, 0xeeeeeeee, 0x11111111, 0x10101010,
325 0x03030303, 0x04040404, 0x09090909, 0x10101010,
326 0x21212121, 0x40404040, 0x81818181, 0x00000000,
327 0xfdfdfdfd, 0xfafafafa, 0xf7f7f7f7, 0xeeeeeeee,
328 0xdfdfdfdf, 0xbebebebe, 0x7f7f7f7f, 0xfefefefe
329 };
330
rt_increment_dqs(struct rt_dqs_setting * setting)331 static enum cb_err rt_increment_dqs(struct rt_dqs_setting *setting)
332 {
333 if (setting->pi < 7) {
334 setting->pi++;
335 } else if (setting->tap < 14) {
336 setting->pi = 0;
337 setting->tap++;
338 } else {
339 return CB_ERR;
340 }
341 return CB_SUCCESS;
342 }
343
test_dqs_aligned(const struct sysinfo * s,const u8 channel)344 static u8 test_dqs_aligned(const struct sysinfo *s, const u8 channel)
345 {
346 int i, rank, lane;
347 volatile u8 data[8];
348 u32 address;
349 u8 bytelane_error = 0;
350
351 FOR_EACH_POPULATED_RANK_IN_CHANNEL(s->dimms, channel, rank) {
352 address = test_address(channel, rank);
353 for (i = 0; i < RT_PATTERN_SIZE; i++) {
354 const u32 good = read_training_schedule[i];
355 write32(&data[0], read32p(address + i * 8));
356 write32(&data[4], read32p(address + i * 8 + 4));
357
358 FOR_EACH_BYTELANE(lane) {
359 if (data[lane] != (good & 0xff))
360 bytelane_error |= 1 << lane;
361 }
362 }
363 }
364 return bytelane_error;
365 }
366
rt_find_dqs_limit(struct sysinfo * s,u8 channel,struct rt_dqs_setting dqs_setting[TOTAL_BYTELANES],u8 dqs_lim[TOTAL_BYTELANES],const enum training_modes expected_result)367 static enum cb_err rt_find_dqs_limit(struct sysinfo *s, u8 channel,
368 struct rt_dqs_setting dqs_setting[TOTAL_BYTELANES],
369 u8 dqs_lim[TOTAL_BYTELANES],
370 const enum training_modes expected_result)
371 {
372 int lane;
373 u8 test_result;
374 enum cb_err status = CB_SUCCESS;
375
376 FOR_EACH_BYTELANE(lane)
377 rt_set_dqs(channel, lane, 0, &dqs_setting[lane]);
378
379 while (status == CB_SUCCESS) {
380 test_result = test_dqs_aligned(s, channel);
381 if (test_result == (expected_result == SUCCEEDING ? 0 : 0xff))
382 return CB_SUCCESS;
383 FOR_EACH_BYTELANE(lane) {
384 if (((test_result >> lane) & 1) != expected_result) {
385 status = rt_increment_dqs(&dqs_setting[lane]);
386 dqs_lim[lane]++;
387 rt_set_dqs(channel, lane, 0, &dqs_setting[lane]);
388 }
389 }
390 }
391
392 if (expected_result == SUCCEEDING) {
393 printk(BIOS_CRIT, "Could not find RT DQS setting\n");
394 return CB_ERR;
395 } else {
396 printk(RAM_DEBUG, "Read succeeded over all DQS settings, continuing\n");
397 return CB_SUCCESS;
398 }
399 }
400
401 #define RT_LOOPS 3
402
403 /*
404 * This attempts to find the ideal delay for DQS on reads (rx).
405 * The training works this way:
406 * - start from the lowest possible delay (0) on all bytelanes
407 * - increment the DQS rx delays until a succeeding write is found on all
408 * bytelayes, on all ranks on a channel and save these values
409 * - again increment the DQS rx delay until write start to fail on all bytelanes
410 * and save that value
411 * - use the mean between the saved succeeding and failing value
412 * - note0: bytelanes cannot be trained independently, so the delays need to be
413 * adjusted and tested for all of them at the same time
414 * - note1: At this stage all ranks effectively use the rank0's rt_dqs settings,
415 * but later on their respective settings are used (TODO where is the
416 * 'switch' register??). So programming the results for all ranks at the end
417 * of the training. Programming on all ranks instead of all populated ranks,
418 * seems to be required, most likely because the signals can't really be generated
419 * separately.
420 */
do_read_training(struct sysinfo * s)421 enum cb_err do_read_training(struct sysinfo *s)
422 {
423 int loop, channel, i, lane, rank;
424 u32 address, content;
425 u8 dqs_lower[TOTAL_BYTELANES];
426 u8 dqs_upper[TOTAL_BYTELANES];
427 struct rt_dqs_setting dqs_setting[TOTAL_BYTELANES];
428 u16 saved_dqs_center[TOTAL_CHANNELS][TOTAL_BYTELANES];
429
430 memset(saved_dqs_center, 0, sizeof(saved_dqs_center));
431
432 printk(BIOS_DEBUG, "Starting DQS read training\n");
433
434 for (loop = 0; loop < RT_LOOPS; loop++) {
435 FOR_EACH_POPULATED_CHANNEL(s->dimms, channel) {
436 printk(RAM_DEBUG, "Doing DQS read training on CH%d\n",
437 channel);
438
439 /* Write pattern to strobe address */
440 FOR_EACH_POPULATED_RANK_IN_CHANNEL(s->dimms, channel, rank) {
441 address = test_address(channel, rank);
442 for (i = 0; i < RT_PATTERN_SIZE; i++) {
443 content = read_training_schedule[i];
444 write32p(address + 8 * i, content);
445 write32p(address + 8 * i + 4, content);
446 }
447 }
448
449 memset(dqs_lower, 0, sizeof(dqs_lower));
450 memset(&dqs_setting, 0, sizeof(dqs_setting));
451 if (rt_find_dqs_limit(s, channel, dqs_setting, dqs_lower,
452 SUCCEEDING)) {
453 printk(BIOS_CRIT,
454 "Could not find working lower limit DQS setting\n");
455 return CB_ERR;
456 }
457
458 FOR_EACH_BYTELANE(lane)
459 dqs_upper[lane] = dqs_lower[lane];
460
461 if (rt_find_dqs_limit(s, channel, dqs_setting, dqs_upper,
462 FAILING)) {
463 printk(BIOS_CRIT,
464 "Could not find failing upper limit DQ setting\n");
465 return CB_ERR;
466 }
467
468 printk(RAM_DEBUG, "Centered values, loop %d:\n", loop);
469 FOR_EACH_BYTELANE(lane) {
470 u8 center = (dqs_lower[lane] + dqs_upper[lane]) / 2;
471 printk(RAM_DEBUG, "\t lane%d: #%d\n", lane, center);
472 saved_dqs_center[channel][lane] += center;
473 }
474 } /* END FOR_EACH_POPULATED_CHANNEL */
475 } /* end RT_LOOPS */
476
477 memset(s->rt_dqs, 0, sizeof(s->rt_dqs));
478
479 FOR_EACH_POPULATED_CHANNEL(s->dimms, channel) {
480 printk(BIOS_DEBUG, "Final timings on CH%d:\n", channel);
481 FOR_EACH_BYTELANE(lane) {
482 saved_dqs_center[channel][lane] /= RT_LOOPS;
483 while (saved_dqs_center[channel][lane]--) {
484 if (rt_increment_dqs(&s->rt_dqs[channel][lane])
485 == CB_ERR)
486 /* Should never happen */
487 printk(BIOS_ERR,
488 "Huh? read training overflowed!!\n");
489 }
490 /* Later on separate settings for each rank are used so program
491 all of them */
492 FOR_EACH_RANK_IN_CHANNEL(rank)
493 rt_set_dqs(channel, lane, rank,
494 &s->rt_dqs[channel][lane]);
495 printk(BIOS_DEBUG, "\tlane%d: %d.%d\n",
496 lane, s->rt_dqs[channel][lane].tap,
497 s->rt_dqs[channel][lane].pi);
498 }
499 }
500 printk(BIOS_DEBUG, "Done DQS read training\n");
501 return CB_SUCCESS;
502 }
503
504 /* Enable write leveling on selected rank and disable output on other ranks */
set_rank_write_level(struct sysinfo * s,u8 channel,u8 config,u8 config_rank,u8 target_rank,int wl_enable)505 static void set_rank_write_level(struct sysinfo *s, u8 channel, u8 config,
506 u8 config_rank, u8 target_rank, int wl_enable)
507 {
508 u32 emrs1;
509
510 /* Is shifted by bits 2 later so u8 can be used to reduce size */
511 static const u8 emrs1_lut[8][4][4] = { /* [Config][Leveling Rank][Rank] */
512 { /* Config 0: 2R2R */
513 {0x11, 0x00, 0x91, 0x00},
514 {0x00, 0x11, 0x91, 0x00},
515 {0x91, 0x00, 0x11, 0x00},
516 {0x91, 0x00, 0x00, 0x11}
517 },
518 { /* Config 1: 2R1R */
519 {0x11, 0x00, 0x91, 0x00},
520 {0x00, 0x11, 0x91, 0x00},
521 {0x91, 0x00, 0x11, 0x00},
522 {0x00, 0x00, 0x00, 0x00}
523 },
524 { /* Config 2: 1R2R */
525 {0x11, 0x00, 0x91, 0x00},
526 {0x00, 0x00, 0x00, 0x00},
527 {0x91, 0x00, 0x11, 0x00},
528 {0x91, 0x00, 0x00, 0x11}
529 },
530 { /* Config 3: 1R1R */
531 {0x11, 0x00, 0x91, 0x00},
532 {0x00, 0x00, 0x00, 0x00},
533 {0x91, 0x00, 0x11, 0x00},
534 {0x00, 0x00, 0x00, 0x00}
535 },
536 { /* Config 4: 2R0R */
537 {0x11, 0x00, 0x00, 0x00},
538 {0x00, 0x11, 0x00, 0x00},
539 {0x00, 0x00, 0x00, 0x00},
540 {0x00, 0x00, 0x00, 0x00}
541 },
542 { /* Config 5: 0R2R */
543 {0x00, 0x00, 0x00, 0x00},
544 {0x00, 0x00, 0x00, 0x00},
545 {0x00, 0x00, 0x11, 0x00},
546 {0x00, 0x00, 0x00, 0x11}
547 },
548 { /* Config 6: 1R0R */
549 {0x11, 0x00, 0x00, 0x00},
550 {0x00, 0x00, 0x00, 0x00},
551 {0x00, 0x00, 0x00, 0x00},
552 {0x00, 0x00, 0x00, 0x00}
553 },
554 { /* Config 7: 0R1R */
555 {0x00, 0x00, 0x00, 0x00},
556 {0x00, 0x00, 0x00, 0x00},
557 {0x00, 0x00, 0x11, 0x00},
558 {0x00, 0x00, 0x00, 0x00}
559 }
560 };
561
562 if (wl_enable) {
563 printk(RAM_DEBUG, "Entering WL mode\n");
564 printk(RAM_DEBUG, "Using WL ODT values\n");
565 emrs1 = emrs1_lut[config][target_rank][config_rank];
566 } else {
567 printk(RAM_DEBUG, "Exiting WL mode\n");
568 emrs1 = ddr3_emrs1_rtt_nom_config[s->dimm_config[channel]][config_rank];
569 }
570 printk(RAM_DEBUG, "Setting ODT for rank%d to ", config_rank);
571 switch (emrs1) {
572 case 0:
573 printk(RAM_DEBUG, "High-Z\n");
574 break;
575 case 0x11:
576 printk(RAM_DEBUG, "40 Ohm\n");
577 break;
578 case 0x81:
579 printk(RAM_DEBUG, "30 Ohm\n");
580 break;
581 case 0x80:
582 printk(RAM_DEBUG, "20 Ohm\n");
583 break;
584 case 0x10:
585 printk(RAM_DEBUG, "120 Ohm\n");
586 break;
587 case 0x01:
588 printk(RAM_DEBUG, "60 Ohm\n");
589 break;
590 default:
591 printk(BIOS_WARNING, "ODT value Undefined!\n");
592 break;
593 }
594
595 emrs1 <<= 2;
596 /* Set output drive strength to 34 Ohm during write levelling */
597 emrs1 |= (1 << 1);
598
599 if (wl_enable && (target_rank != config_rank)) {
600 printk(RAM_DEBUG, "Disabling output for rank%d\n", config_rank);
601 emrs1 |= (1 << 12);
602 }
603 if (wl_enable && (target_rank == config_rank)) {
604 printk(RAM_DEBUG, "Enabling WL for rank%d\n", config_rank);
605 emrs1 |= (1 << 7);
606 }
607 send_jedec_cmd(s, config_rank, channel, EMRS1_CMD, emrs1);
608 }
609
610 #define N_SAMPLES 5
611
sample_dq(const struct sysinfo * s,u8 channel,u8 rank,u8 high_found[8])612 static void sample_dq(const struct sysinfo *s, u8 channel, u8 rank,
613 u8 high_found[8]) {
614 u32 address = test_address(channel, rank);
615 int samples, lane;
616
617 memset(high_found, 0, TOTAL_BYTELANES * sizeof(high_found[0]));
618 for (samples = 0; samples < N_SAMPLES; samples++) {
619 write32p(address, 0x12341234);
620 write32p(address + 4, 0x12341234);
621 udelay(5);
622 FOR_EACH_BYTELANE(lane) {
623 u8 dq_high = (mchbar_read8(0x561 + 0x400 * channel
624 + (lane * 4)) >> 7) & 1;
625 high_found[lane] += dq_high;
626 }
627 }
628 }
629
increment_to_dqs_edge(struct sysinfo * s,u8 channel,u8 rank)630 static enum cb_err increment_to_dqs_edge(struct sysinfo *s, u8 channel, u8 rank)
631 {
632 int lane;
633 u8 saved_24d;
634 struct dll_setting dqs_setting[TOTAL_BYTELANES];
635 u8 bytelane_ok = 0;
636 u8 dq_sample[TOTAL_BYTELANES];
637
638 memcpy(dqs_setting, s->dqs_settings[channel], sizeof(dqs_setting));
639 FOR_EACH_BYTELANE(lane)
640 dqsset(channel, lane, &dqs_setting[lane]);
641
642 saved_24d = mchbar_read8(0x24d + 0x400 * channel);
643
644 /* Loop 0: Find DQ sample low, by decreasing */
645 while (bytelane_ok != 0xff) {
646 sample_dq(s, channel, rank, dq_sample);
647 FOR_EACH_BYTELANE(lane) {
648 if (bytelane_ok & (1 << lane))
649 continue;
650
651 printk(RAM_SPEW, "%d, %d, %02d, %d, lane%d sample: %d\n",
652 dqs_setting[lane].coarse,
653 dqs_setting[lane].clk_delay,
654 dqs_setting[lane].tap,
655 dqs_setting[lane].pi,
656 lane,
657 dq_sample[lane]);
658
659 if (dq_sample[lane] == 0) {
660 bytelane_ok |= (1 << lane);
661 } else if (decrement_dq_dqs(s, &dqs_setting[lane])) {
662 printk(BIOS_EMERG,
663 "DQS setting channel%d, lane %d reached a minimum!\n",
664 channel, lane);
665 return CB_ERR;
666 }
667 dqsset(channel, lane, &dqs_setting[lane]);
668 }
669 }
670
671 printk(RAM_DEBUG, "DQS settings on PASS #0:\n");
672 FOR_EACH_BYTELANE(lane) {
673 printk(RAM_DEBUG, "lane %d: ", lane);
674 print_dll_setting(&dqs_setting[lane], 0);
675 }
676
677 /* Loop 1: Find DQ sample high, by increasing */
678 bytelane_ok = 0;
679 while (bytelane_ok != 0xff) {
680 sample_dq(s, channel, rank, dq_sample);
681 FOR_EACH_BYTELANE(lane) {
682 if (bytelane_ok & (1 << lane))
683 continue;
684
685 printk(RAM_SPEW, "%d, %d, %02d, %d, lane%d sample: %d\n",
686 dqs_setting[lane].coarse,
687 dqs_setting[lane].clk_delay,
688 dqs_setting[lane].tap,
689 dqs_setting[lane].pi,
690 lane,
691 dq_sample[lane]);
692
693 if (dq_sample[lane] == N_SAMPLES) {
694 bytelane_ok |= (1 << lane);
695 } else if (increment_dq_dqs(s, &dqs_setting[lane])) {
696 printk(BIOS_EMERG,
697 "DQS setting channel%d, lane %d reached a maximum!\n",
698 channel, lane);
699 return CB_ERR;
700 }
701 dqsset(channel, lane, &dqs_setting[lane]);
702 }
703 }
704
705 printk(RAM_DEBUG, "DQS settings on PASS #1:\n");
706 FOR_EACH_BYTELANE(lane) {
707 printk(RAM_DEBUG, "lane %d: ", lane);
708 print_dll_setting(&dqs_setting[lane], 0);
709 }
710
711 printk(BIOS_DEBUG, "final WL DQS settings on CH%d\n", channel);
712 FOR_EACH_BYTELANE(lane) {
713 printk(BIOS_DEBUG, "\tlane%d: ", lane);
714 print_dll_setting(&dqs_setting[lane], 1);
715 s->dqs_settings[channel][lane] = dqs_setting[lane];
716 }
717
718 mchbar_write8(0x24d + 0x400 * channel, saved_24d);
719 return CB_SUCCESS;
720 }
721
722 /*
723 * DDR3 uses flyby topology where the clock signal takes a different path
724 * than the data signal, to allow for better signal intergrity.
725 * Therefore the delay on the data signals needs to account for this.
726 * This is done by sampling the DQS write (tx) signal back over the DQ
727 * signal and looking for delay values where the sample transitions
728 * from high to low.
729 * Here the following is done:
730 * - Enable write levelling on the first populated rank.
731 * - Disable output on other populated ranks.
732 * - Start from safe DQS (tx) delays. Other transitions can be
733 * found at different starting values but are generally bad.
734 * - loop0: decrease DQS (tx) delays until low is sampled,
735 * loop1: increase DQS (tx) delays until high is sampled,
736 * This way, we are sure to have hit a low-high transition.
737 * - Put all ranks in normal mode of operation again.
738 * Note: All ranks need to be leveled together.
739 */
search_write_leveling(struct sysinfo * s)740 void search_write_leveling(struct sysinfo *s)
741 {
742 int i, ch, count;
743 u8 config, rank0, rank1, lane;
744 struct dll_setting dq_setting;
745
746 const u8 chanconfig_lut[16] = {0, 6, 4, 6, 7, 3, 1, 3, 5, 2, 0, 2, 7, 3, 1, 3};
747
748 const u8 odt_force[8][4] = { /* [Config][leveling rank] */
749 {0x5, 0x6, 0x5, 0x9},
750 {0x5, 0x6, 0x5, 0x0},
751 {0x5, 0x0, 0x5, 0x9},
752 {0x5, 0x0, 0x5, 0x0},
753 {0x1, 0x2, 0x0, 0x0},
754 {0x0, 0x0, 0x4, 0x8},
755 {0x1, 0x0, 0x0, 0x0},
756 {0x0, 0x0, 0x4, 0x0}
757 };
758
759 printk(BIOS_DEBUG, "Starting write levelling.\n");
760
761 FOR_EACH_POPULATED_CHANNEL(s->dimms, ch) {
762 printk(BIOS_DEBUG, "\tCH%d\n", ch);
763 config = chanconfig_lut[s->dimm_config[ch]];
764
765 mchbar_clrbits8(0x5d8 + 0x400 * ch, 0x0e);
766 mchbar_clrsetbits16(0x5c4 + 0x400 * ch, 0x3fff, 0x3fff);
767 mchbar_clrbits8(0x265 + 0x400 * ch, 0x1f);
768 /* find the first populated rank */
769 FOR_EACH_POPULATED_RANK_IN_CHANNEL(s->dimms, ch, rank0)
770 break;
771
772 /* Enable WL for the first populated rank and disable output
773 for others */
774 FOR_EACH_POPULATED_RANK_IN_CHANNEL(s->dimms, ch, rank1)
775 set_rank_write_level(s, ch, config, rank1, rank0, 1);
776
777 mchbar_clrsetbits8(0x298 + 2 + 0x400 * ch, 0x0f, odt_force[config][rank0]);
778 mchbar_clrsetbits8(0x271 + 0x400 * ch, 0x7e, 0x4e);
779 mchbar_setbits8(0x5d9 + 0x400 * ch, 1 << 2);
780 mchbar_clrsetbits32(0x1a0, 0x07ffffff, 0x00014000);
781
782 if (increment_to_dqs_edge(s, ch, rank0))
783 die("Write Leveling failed!");
784
785 mchbar_clrbits8(0x298 + 2 + 0x400 * ch, 0x0f);
786 mchbar_clrsetbits8(0x271 + 0x400 * ch, 0x7e, 0x0e);
787 mchbar_clrbits8(0x5d9 + 0x400 * ch, 1 << 2);
788 mchbar_clrsetbits32(0x1a0, 0x07ffffff, 0x00555801);
789
790 /* Disable WL on the trained rank */
791 set_rank_write_level(s, ch, config, rank0, rank0, 0);
792 send_jedec_cmd(s, rank0, ch, NORMALOP_CMD, 1 << 12);
793
794 mchbar_setbits8(0x5d8 + 0x400 * ch, 0x0e);
795 mchbar_clrsetbits16(0x5c4 + 0x400 * ch, 0x3fff, 0x1807);
796 mchbar_clrbits8(0x265 + 0x400 * ch, 0x1f);
797
798 /* Disable write level mode for all ranks */
799 FOR_EACH_POPULATED_RANK_IN_CHANNEL(s->dimms, ch, rank0)
800 set_rank_write_level(s, ch, config, rank0, rank0, 0);
801 }
802
803 mchbar_setbits8(0x5dc, 1 << 7);
804
805 /* Increment DQ (rx) dll setting by a standard amount past DQS,
806 This is further trained in write training. */
807 switch (s->selected_timings.mem_clk) {
808 default:
809 case MEM_CLOCK_800MHz:
810 count = 39;
811 break;
812 case MEM_CLOCK_1066MHz:
813 count = 32;
814 break;
815 case MEM_CLOCK_1333MHz:
816 count = 42;
817 break;
818 }
819
820 FOR_EACH_POPULATED_CHANNEL_AND_BYTELANE(s->dimms, ch, lane) {
821 dq_setting = s->dqs_settings[ch][lane];
822 for (i = 0; i < count; i++)
823 if (increment_dq_dqs(s, &dq_setting))
824 die("Can't further increase DQ past DQS delay");
825 dqset(ch, lane, &dq_setting);
826 }
827
828 printk(BIOS_DEBUG, "Done write levelling.\n");
829 }
830