xref: /aosp_15_r20/external/coreboot/src/northbridge/intel/x4x/dq_dqs.c (revision b9411a12aaaa7e1e6a6fb7c5e057f44ee179a49c)
1 /* SPDX-License-Identifier: GPL-2.0-or-later */
2 
3 #include <device/mmio.h>
4 #include <console/console.h>
5 #include <delay.h>
6 #include <string.h>
7 #include <types.h>
8 #include "raminit.h"
9 #include "x4x.h"
10 
print_dll_setting(const struct dll_setting * dll_setting,u8 default_verbose)11 static void print_dll_setting(const struct dll_setting *dll_setting, u8 default_verbose)
12 {
13 	u8 debug_level = default_verbose ? BIOS_DEBUG : RAM_DEBUG;
14 
15 	printk(debug_level, "%d.%d.%d.%d:%d.%d\n", dll_setting->coarse,
16 		dll_setting->clk_delay, dll_setting->tap,
17 		dll_setting->pi, dll_setting->db_en,
18 		dll_setting->db_sel);
19 }
20 
21 struct db_limit {
22 	u8 tap0;
23 	u8 tap1;
24 	u8 pi0;
25 	u8 pi1;
26 };
27 
set_db(const struct sysinfo * s,struct dll_setting * dq_dqs_setting)28 static void set_db(const struct sysinfo *s, struct dll_setting *dq_dqs_setting)
29 {
30 	struct db_limit limit;
31 
32 	switch (s->selected_timings.mem_clk) {
33 	default:
34 	case MEM_CLOCK_800MHz:
35 		limit.tap0 = 3;
36 		limit.tap1 = 10;
37 		limit.pi0 = 2;
38 		limit.pi1 = 3;
39 		break;
40 	case MEM_CLOCK_1066MHz:
41 		limit.tap0 = 2;
42 		limit.tap1 = 8;
43 		limit.pi0 = 6;
44 		limit.pi1 = 7;
45 		break;
46 	case MEM_CLOCK_1333MHz:
47 		limit.tap0 = 3;
48 		limit.tap1 = 11;
49 		/* TO CHECK: Might be reverse since this makes little sense */
50 		limit.pi0 = 6;
51 		limit.pi1 = 4;
52 		break;
53 	}
54 
55 	if (dq_dqs_setting->tap < limit.tap0) {
56 		dq_dqs_setting->db_en = 1;
57 		dq_dqs_setting->db_sel = 1;
58 	} else if ((dq_dqs_setting->tap == limit.tap0)
59 			&& (dq_dqs_setting->pi < limit.pi0)) {
60 		dq_dqs_setting->db_en = 1;
61 		dq_dqs_setting->db_sel = 1;
62 	} else if (dq_dqs_setting->tap < limit.tap1) {
63 		dq_dqs_setting->db_en = 0;
64 		dq_dqs_setting->db_sel = 0;
65 	} else if ((dq_dqs_setting->tap == limit.tap1)
66 			&& (dq_dqs_setting->pi < limit.pi1)) {
67 		dq_dqs_setting->db_en = 0;
68 		dq_dqs_setting->db_sel = 0;
69 	} else {
70 		dq_dqs_setting->db_en = 1;
71 		dq_dqs_setting->db_sel = 0;
72 	}
73 }
74 
75 static const u8 max_tap[3] = {12, 10, 13};
76 
increment_dq_dqs(const struct sysinfo * s,struct dll_setting * dq_dqs_setting)77 static enum cb_err increment_dq_dqs(const struct sysinfo *s, struct dll_setting *dq_dqs_setting)
78 {
79 	u8 max_tap_val = max_tap[s->selected_timings.mem_clk - MEM_CLOCK_800MHz];
80 
81 	if (dq_dqs_setting->pi < 6) {
82 		dq_dqs_setting->pi += 1;
83 	} else if (dq_dqs_setting->tap < max_tap_val) {
84 		dq_dqs_setting->pi = 0;
85 		dq_dqs_setting->tap += 1;
86 	} else if (dq_dqs_setting->clk_delay < 2) {
87 		dq_dqs_setting->pi = 0;
88 		dq_dqs_setting->tap = 0;
89 		dq_dqs_setting->clk_delay += 1;
90 	} else if (dq_dqs_setting->coarse < 1) {
91 		dq_dqs_setting->pi = 0;
92 		dq_dqs_setting->tap = 0;
93 		dq_dqs_setting->clk_delay -= 1;
94 		dq_dqs_setting->coarse += 1;
95 	} else {
96 		return CB_ERR;
97 	}
98 	set_db(s, dq_dqs_setting);
99 	return CB_SUCCESS;
100 }
101 
decrement_dq_dqs(const struct sysinfo * s,struct dll_setting * dq_dqs_setting)102 static enum cb_err decrement_dq_dqs(const struct sysinfo *s, struct dll_setting *dq_dqs_setting)
103 {
104 	u8 max_tap_val = max_tap[s->selected_timings.mem_clk - MEM_CLOCK_800MHz];
105 
106 	if (dq_dqs_setting->pi > 0) {
107 		dq_dqs_setting->pi -= 1;
108 	} else if (dq_dqs_setting->tap > 0) {
109 		dq_dqs_setting->pi = 6;
110 		dq_dqs_setting->tap -= 1;
111 	} else if (dq_dqs_setting->clk_delay > 0) {
112 		dq_dqs_setting->pi = 6;
113 		dq_dqs_setting->tap = max_tap_val;
114 		dq_dqs_setting->clk_delay -= 1;
115 	} else if (dq_dqs_setting->coarse > 0) {
116 		dq_dqs_setting->pi = 6;
117 		dq_dqs_setting->tap = max_tap_val;
118 		dq_dqs_setting->clk_delay += 1;
119 		dq_dqs_setting->coarse -= 1;
120 	} else {
121 		return CB_ERR;
122 	}
123 	set_db(s, dq_dqs_setting);
124 	return CB_SUCCESS;
125 }
126 
127 #define WT_PATTERN_SIZE 80
128 
129 static const u32 write_training_schedule[WT_PATTERN_SIZE] = {
130 	0xffffffff, 0x00000000, 0xffffffff, 0x00000000,
131 	0xffffffff, 0x00000000, 0xffffffff, 0x00000000,
132 	0xffffffff, 0x00000000, 0xffffffff, 0x00000000,
133 	0xffffffff, 0x00000000, 0xffffffff, 0x00000000,
134 	0xefefefef, 0x10101010, 0xefefefef, 0x10101010,
135 	0xefefefef, 0x10101010, 0xefefefef, 0x10101010,
136 	0xefefefef, 0x10101010, 0xefefefef, 0x10101010,
137 	0xefefefef, 0x10101010, 0xefefefef, 0x10101010,
138 	0xefefefef, 0xeeeeeeee, 0x11111111, 0x10101010,
139 	0xefefefef, 0xeeeeeeee, 0x11111111, 0x10101010,
140 	0xefefefef, 0xeeeeeeee, 0x11111111, 0x10101010,
141 	0xefefefef, 0xeeeeeeee, 0x11111111, 0x10101010,
142 	0x03030303, 0x04040404, 0x09090909, 0x10101010,
143 	0x21212121, 0x40404040, 0x81818181, 0x00000000,
144 	0x03030303, 0x04040404, 0x09090909, 0x10101010,
145 	0x21212121, 0x40404040, 0x81818181, 0x00000000,
146 	0xfdfdfdfd, 0xfafafafa, 0xf7f7f7f7, 0xeeeeeeee,
147 	0xdfdfdfdf, 0xbebebebe, 0x7f7f7f7f, 0xfefefefe,
148 	0xfdfdfdfd, 0xfafafafa, 0xf7f7f7f7, 0xeeeeeeee,
149 	0xdfdfdfdf, 0xbebebebe, 0x7f7f7f7f, 0xfefefefe,
150 };
151 
152 enum training_modes {
153 	SUCCEEDING = 0,
154 	FAILING = 1
155 };
156 
test_dq_aligned(const struct sysinfo * s,const u8 channel)157 static u8 test_dq_aligned(const struct sysinfo *s, const u8 channel)
158 {
159 	u32 address;
160 	int rank, lane;
161 	u8 count, count1;
162 	u8 data[8];
163 	u8 lane_error = 0;
164 
165 	FOR_EACH_POPULATED_RANK_IN_CHANNEL(s->dimms, channel, rank) {
166 		address = test_address(channel, rank);
167 		for (count = 0; count < WT_PATTERN_SIZE; count++) {
168 			for (count1 = 0; count1 < WT_PATTERN_SIZE; count1++) {
169 				if ((count1 % 16) == 0)
170 					mchbar_write32(0xf90, 1);
171 				const u32 pattern = write_training_schedule[count1];
172 				write32p(address + 8 * count1, pattern);
173 				write32p(address + 8 * count1 + 4, pattern);
174 			}
175 
176 			const u32 good = write_training_schedule[count];
177 			write32(&data[0], read32p(address + 8 * count));
178 			write32(&data[4], read32p(address + 8 * count + 4));
179 			FOR_EACH_BYTELANE(lane) {
180 				u8 expected = (good >> ((lane % 4) * 8)) & 0xff;
181 				if (data[lane] != expected)
182 					lane_error |= 1 << lane;
183 			}
184 		}
185 	}
186 	return lane_error;
187 }
188 
189 #define CONSISTENCY 10
190 
191 /*
192  * This function finds either failing or succeeding writes by increasing DQ.
193  * When it has found a failing or succeeding setting it will increase DQ
194  * another 10 times to make sure the result is consistent.
195  * This is probably done because lanes cannot be trained independent from
196  * each other.
197  */
find_dq_limit(const struct sysinfo * s,const u8 channel,struct dll_setting dq_setting[TOTAL_BYTELANES],u8 dq_lim[TOTAL_BYTELANES],const enum training_modes expected_result)198 static enum cb_err find_dq_limit(const struct sysinfo *s, const u8 channel,
199 			struct dll_setting dq_setting[TOTAL_BYTELANES],
200 			u8 dq_lim[TOTAL_BYTELANES],
201 			const enum training_modes expected_result)
202 {
203 	enum cb_err status = CB_SUCCESS;
204 	int lane;
205 	u8 test_result;
206 	u8 pass_count[TOTAL_BYTELANES];
207 	u8 success_mask = 0xff;
208 
209 	printk(RAM_DEBUG, "Looking for %s writes on channel %d\n",
210 		expected_result == FAILING ? "failing" : "succeeding", channel);
211 	memset(pass_count, 0, sizeof(pass_count));
212 
213 	while (success_mask) {
214 		test_result = test_dq_aligned(s, channel);
215 		FOR_EACH_BYTELANE(lane) {
216 			if (((test_result >> lane) & 1) != expected_result) {
217 				status = increment_dq_dqs(s, &dq_setting[lane]);
218 				dqset(channel, lane, &dq_setting[lane]);
219 				dq_lim[lane]++;
220 			} else if (pass_count[lane] < CONSISTENCY) {
221 				status = increment_dq_dqs(s, &dq_setting[lane]);
222 				dqset(channel, lane, &dq_setting[lane]);
223 				dq_lim[lane]++;
224 				pass_count[lane]++;
225 			} else if (pass_count[lane] == CONSISTENCY) {
226 				success_mask &= ~(1 << lane);
227 			}
228 			if (status == CB_ERR) {
229 				printk(BIOS_CRIT,
230 					"Could not find a case of %s writes on CH%d, lane %d\n",
231 					expected_result == FAILING ? "failing"
232 					: "succeeding", channel, lane);
233 				return CB_ERR;
234 			}
235 		}
236 	}
237 	return CB_SUCCESS;
238 }
239 
240 /*
241  * This attempts to find the ideal delay for DQ to account for the skew between
242  * the DQ and the DQS signal.
243  * The training works this way:
244  * - start from the DQS delay values (DQ is always later than DQS)
245  * - increment the DQ delay until a succeeding write is found on all bytelayes,
246  *   on all ranks on a channel and save these values
247  * - again increment the DQ delay until write start to fail on all bytelanes and
248  *   save that value
249  * - use the mean between the saved succeeding and failing value
250  * - note: bytelanes cannot be trained independently, so the delays need to be
251  *   adjusted and tested for all of them at the same time
252  */
do_write_training(struct sysinfo * s)253 enum cb_err do_write_training(struct sysinfo *s)
254 {
255 	int i;
256 	u8 channel, lane;
257 	u8 dq_lower[TOTAL_BYTELANES];
258 	u8 dq_upper[TOTAL_BYTELANES];
259 	struct dll_setting dq_setting[TOTAL_BYTELANES];
260 
261 	printk(BIOS_DEBUG, "Starting DQ write training\n");
262 
263 	FOR_EACH_POPULATED_CHANNEL(s->dimms, channel) {
264 		printk(BIOS_DEBUG, "Doing DQ write training on CH%d\n", channel);
265 
266 		/* Start all lanes at DQS values */
267 		FOR_EACH_BYTELANE(lane) {
268 			dqset(channel, lane, &s->dqs_settings[channel][lane]);
269 			s->dq_settings[channel][lane] = s->dqs_settings[channel][lane];
270 		}
271 		memset(dq_lower, 0, sizeof(dq_lower));
272 		/* Start from DQS settings */
273 		memcpy(dq_setting, s->dqs_settings[channel], sizeof(dq_setting));
274 
275 		if (find_dq_limit(s, channel, dq_setting, dq_lower, SUCCEEDING)) {
276 			printk(BIOS_CRIT, "Could not find working lower limit DQ setting\n");
277 			return CB_ERR;
278 		}
279 
280 		memcpy(dq_upper, dq_lower, sizeof(dq_lower));
281 
282 		if (find_dq_limit(s, channel, dq_setting, dq_upper, FAILING)) {
283 			printk(BIOS_WARNING, "Could not find failing upper limit DQ setting\n");
284 			return CB_ERR;
285 		}
286 
287 		FOR_EACH_BYTELANE(lane) {
288 			dq_lower[lane] -= CONSISTENCY - 1;
289 			dq_upper[lane] -= CONSISTENCY - 1;
290 			u8 dq_center = (dq_upper[lane] + dq_lower[lane]) / 2;
291 
292 			printk(RAM_DEBUG,
293 				"Centered value for DQ DLL: ch%d, lane %d, #steps = %d\n",
294 				channel, lane, dq_center);
295 			for (i = 0; i < dq_center; i++) {
296 				/* Should never happen */
297 				if (increment_dq_dqs(s, &s->dq_settings[channel][lane])
298 					== CB_ERR)
299 					printk(BIOS_ERR,
300 						"Huh? write training overflowed!!\n");
301 			}
302 		}
303 
304 		/* Reset DQ DLL settings and increment with centered value*/
305 		printk(BIOS_DEBUG, "Final DQ timings on CH%d\n", channel);
306 	        FOR_EACH_BYTELANE(lane) {
307 			printk(BIOS_DEBUG, "\tlane%d: ", lane);
308 			print_dll_setting(&s->dq_settings[channel][lane], 1);
309 			dqset(channel, lane, &s->dq_settings[channel][lane]);
310 		}
311 	}
312 	printk(BIOS_DEBUG, "Done DQ write training\n");
313 	return CB_SUCCESS;
314 }
315 
316 #define RT_PATTERN_SIZE 40
317 
318 static const u32 read_training_schedule[RT_PATTERN_SIZE] = {
319 	0xffffffff, 0x00000000, 0xffffffff, 0x00000000,
320 	0xffffffff, 0x00000000, 0xffffffff, 0x00000000,
321 	0xefefefef, 0x10101010, 0xefefefef, 0x10101010,
322 	0xefefefef, 0x10101010, 0xefefefef, 0x10101010,
323 	0xefefefef, 0xeeeeeeee, 0x11111111, 0x10101010,
324 	0xefefefef, 0xeeeeeeee, 0x11111111, 0x10101010,
325 	0x03030303, 0x04040404, 0x09090909, 0x10101010,
326 	0x21212121, 0x40404040, 0x81818181, 0x00000000,
327 	0xfdfdfdfd, 0xfafafafa, 0xf7f7f7f7, 0xeeeeeeee,
328 	0xdfdfdfdf, 0xbebebebe, 0x7f7f7f7f, 0xfefefefe
329 };
330 
rt_increment_dqs(struct rt_dqs_setting * setting)331 static enum cb_err rt_increment_dqs(struct rt_dqs_setting *setting)
332 {
333 	if (setting->pi < 7) {
334 		setting->pi++;
335 	} else if (setting->tap < 14) {
336 		setting->pi = 0;
337 		setting->tap++;
338 	} else {
339 		return CB_ERR;
340 	}
341 	return CB_SUCCESS;
342 }
343 
test_dqs_aligned(const struct sysinfo * s,const u8 channel)344 static u8 test_dqs_aligned(const struct sysinfo *s, const u8 channel)
345 {
346 	int i, rank, lane;
347 	volatile u8 data[8];
348 	u32 address;
349 	u8 bytelane_error = 0;
350 
351 	FOR_EACH_POPULATED_RANK_IN_CHANNEL(s->dimms, channel, rank) {
352 		address = test_address(channel, rank);
353 		for (i = 0; i < RT_PATTERN_SIZE; i++) {
354 			const u32 good = read_training_schedule[i];
355 			write32(&data[0], read32p(address + i * 8));
356 			write32(&data[4], read32p(address + i * 8 + 4));
357 
358 			FOR_EACH_BYTELANE(lane) {
359 				if (data[lane] != (good & 0xff))
360 					bytelane_error |= 1 << lane;
361 			}
362 		}
363 	}
364 	return bytelane_error;
365 }
366 
rt_find_dqs_limit(struct sysinfo * s,u8 channel,struct rt_dqs_setting dqs_setting[TOTAL_BYTELANES],u8 dqs_lim[TOTAL_BYTELANES],const enum training_modes expected_result)367 static enum cb_err rt_find_dqs_limit(struct sysinfo *s, u8 channel,
368 			struct rt_dqs_setting dqs_setting[TOTAL_BYTELANES],
369 			u8 dqs_lim[TOTAL_BYTELANES],
370 			const enum training_modes expected_result)
371 {
372 	int lane;
373 	u8 test_result;
374 	enum cb_err status = CB_SUCCESS;
375 
376 	FOR_EACH_BYTELANE(lane)
377 		rt_set_dqs(channel, lane, 0, &dqs_setting[lane]);
378 
379 	while (status == CB_SUCCESS) {
380 		test_result = test_dqs_aligned(s, channel);
381 		if (test_result == (expected_result == SUCCEEDING ? 0 : 0xff))
382 			return CB_SUCCESS;
383 		FOR_EACH_BYTELANE(lane) {
384 			if (((test_result >> lane) & 1) != expected_result) {
385 				status = rt_increment_dqs(&dqs_setting[lane]);
386 				dqs_lim[lane]++;
387 				rt_set_dqs(channel, lane, 0, &dqs_setting[lane]);
388 			}
389 		}
390 	}
391 
392 	if (expected_result == SUCCEEDING) {
393 		printk(BIOS_CRIT, "Could not find RT DQS setting\n");
394 		return CB_ERR;
395 	} else {
396 		printk(RAM_DEBUG, "Read succeeded over all DQS settings, continuing\n");
397 		return CB_SUCCESS;
398 	}
399 }
400 
401 #define RT_LOOPS 3
402 
403 /*
404  * This attempts to find the ideal delay for DQS on reads (rx).
405  * The training works this way:
406  * - start from the lowest possible delay (0) on all bytelanes
407  * - increment the DQS rx delays until a succeeding write is found on all
408  *   bytelayes, on all ranks on a channel and save these values
409  * - again increment the DQS rx delay until write start to fail on all bytelanes
410  *   and save that value
411  * - use the mean between the saved succeeding and failing value
412  * - note0: bytelanes cannot be trained independently, so the delays need to be
413  *   adjusted and tested for all of them at the same time
414  * - note1: At this stage all ranks effectively use the rank0's rt_dqs settings,
415  *   but later on their respective settings are used (TODO where is the
416  *   'switch' register??). So programming the results for all ranks at the end
417  *   of the training. Programming on all ranks instead of all populated ranks,
418  *   seems to be required, most likely because the signals can't really be generated
419  *   separately.
420  */
do_read_training(struct sysinfo * s)421 enum cb_err do_read_training(struct sysinfo *s)
422 {
423 	int loop, channel, i, lane, rank;
424 	u32 address, content;
425 	u8 dqs_lower[TOTAL_BYTELANES];
426 	u8 dqs_upper[TOTAL_BYTELANES];
427 	struct rt_dqs_setting dqs_setting[TOTAL_BYTELANES];
428 	u16 saved_dqs_center[TOTAL_CHANNELS][TOTAL_BYTELANES];
429 
430 	memset(saved_dqs_center, 0, sizeof(saved_dqs_center));
431 
432 	printk(BIOS_DEBUG, "Starting DQS read training\n");
433 
434 	for (loop = 0; loop < RT_LOOPS; loop++) {
435 		FOR_EACH_POPULATED_CHANNEL(s->dimms, channel) {
436 			printk(RAM_DEBUG, "Doing DQS read training on CH%d\n",
437 				channel);
438 
439 			/* Write pattern to strobe address */
440 			FOR_EACH_POPULATED_RANK_IN_CHANNEL(s->dimms, channel, rank) {
441 				address = test_address(channel, rank);
442 				for (i = 0; i < RT_PATTERN_SIZE; i++) {
443 					content = read_training_schedule[i];
444 					write32p(address + 8 * i, content);
445 					write32p(address + 8 * i + 4, content);
446 				}
447 			}
448 
449 			memset(dqs_lower, 0, sizeof(dqs_lower));
450 			memset(&dqs_setting, 0, sizeof(dqs_setting));
451 			if (rt_find_dqs_limit(s, channel, dqs_setting, dqs_lower,
452 						SUCCEEDING)) {
453 				printk(BIOS_CRIT,
454 					"Could not find working lower limit DQS setting\n");
455 				return CB_ERR;
456 			}
457 
458 			FOR_EACH_BYTELANE(lane)
459 				dqs_upper[lane] = dqs_lower[lane];
460 
461 			if (rt_find_dqs_limit(s, channel, dqs_setting, dqs_upper,
462 						FAILING)) {
463 				printk(BIOS_CRIT,
464 					"Could not find failing upper limit DQ setting\n");
465 				return CB_ERR;
466 			}
467 
468 			printk(RAM_DEBUG, "Centered values, loop %d:\n", loop);
469 			FOR_EACH_BYTELANE(lane) {
470 				u8 center = (dqs_lower[lane] + dqs_upper[lane]) / 2;
471 				printk(RAM_DEBUG, "\t lane%d: #%d\n", lane, center);
472 				saved_dqs_center[channel][lane] += center;
473 			}
474 		} /* END FOR_EACH_POPULATED_CHANNEL */
475 	} /* end RT_LOOPS */
476 
477 	memset(s->rt_dqs, 0, sizeof(s->rt_dqs));
478 
479 	FOR_EACH_POPULATED_CHANNEL(s->dimms, channel) {
480 		printk(BIOS_DEBUG, "Final timings on CH%d:\n", channel);
481 		FOR_EACH_BYTELANE(lane) {
482 			saved_dqs_center[channel][lane] /= RT_LOOPS;
483 			while (saved_dqs_center[channel][lane]--) {
484 				if (rt_increment_dqs(&s->rt_dqs[channel][lane])
485 							== CB_ERR)
486 					/* Should never happen */
487 					printk(BIOS_ERR,
488 						"Huh? read training overflowed!!\n");
489 			}
490 			/* Later on separate settings for each rank are used so program
491 			   all of them */
492 			FOR_EACH_RANK_IN_CHANNEL(rank)
493 				rt_set_dqs(channel, lane, rank,
494 					&s->rt_dqs[channel][lane]);
495 			printk(BIOS_DEBUG, "\tlane%d: %d.%d\n",
496 				lane, s->rt_dqs[channel][lane].tap,
497 				s->rt_dqs[channel][lane].pi);
498 		}
499 	}
500 	printk(BIOS_DEBUG, "Done DQS read training\n");
501 	return CB_SUCCESS;
502 }
503 
504 /* Enable write leveling on selected rank and disable output on other ranks */
set_rank_write_level(struct sysinfo * s,u8 channel,u8 config,u8 config_rank,u8 target_rank,int wl_enable)505 static void set_rank_write_level(struct sysinfo *s, u8 channel, u8 config,
506 				u8 config_rank, u8 target_rank, int wl_enable)
507 {
508 	u32 emrs1;
509 
510 	/* Is shifted by bits 2 later so u8 can be used to reduce size */
511 	static const u8 emrs1_lut[8][4][4] = { /* [Config][Leveling Rank][Rank] */
512 		{ /* Config 0: 2R2R */
513 			{0x11, 0x00, 0x91, 0x00},
514 			{0x00, 0x11, 0x91, 0x00},
515 			{0x91, 0x00, 0x11, 0x00},
516 			{0x91, 0x00, 0x00, 0x11}
517 		},
518 		{ /* Config 1: 2R1R */
519 			{0x11, 0x00, 0x91, 0x00},
520 			{0x00, 0x11, 0x91, 0x00},
521 			{0x91, 0x00, 0x11, 0x00},
522 			{0x00, 0x00, 0x00, 0x00}
523 		},
524 		{ /* Config 2: 1R2R */
525 			{0x11, 0x00, 0x91, 0x00},
526 			{0x00, 0x00, 0x00, 0x00},
527 			{0x91, 0x00, 0x11, 0x00},
528 			{0x91, 0x00, 0x00, 0x11}
529 		},
530 		{ /* Config 3: 1R1R */
531 			{0x11, 0x00, 0x91, 0x00},
532 			{0x00, 0x00, 0x00, 0x00},
533 			{0x91, 0x00, 0x11, 0x00},
534 			{0x00, 0x00, 0x00, 0x00}
535 		},
536 		{ /* Config 4: 2R0R */
537 			{0x11, 0x00, 0x00, 0x00},
538 			{0x00, 0x11, 0x00, 0x00},
539 			{0x00, 0x00, 0x00, 0x00},
540 			{0x00, 0x00, 0x00, 0x00}
541 		},
542 		{ /* Config 5: 0R2R */
543 			{0x00, 0x00, 0x00, 0x00},
544 			{0x00, 0x00, 0x00, 0x00},
545 			{0x00, 0x00, 0x11, 0x00},
546 			{0x00, 0x00, 0x00, 0x11}
547 		},
548 		{ /* Config 6: 1R0R */
549 			{0x11, 0x00, 0x00, 0x00},
550 			{0x00, 0x00, 0x00, 0x00},
551 			{0x00, 0x00, 0x00, 0x00},
552 			{0x00, 0x00, 0x00, 0x00}
553 		},
554 		{ /* Config 7: 0R1R */
555 			{0x00, 0x00, 0x00, 0x00},
556 			{0x00, 0x00, 0x00, 0x00},
557 			{0x00, 0x00, 0x11, 0x00},
558 			{0x00, 0x00, 0x00, 0x00}
559 		}
560 	};
561 
562 	if (wl_enable) {
563 		printk(RAM_DEBUG, "Entering WL mode\n");
564 		printk(RAM_DEBUG, "Using WL ODT values\n");
565 		emrs1 = emrs1_lut[config][target_rank][config_rank];
566 	} else {
567 		printk(RAM_DEBUG, "Exiting WL mode\n");
568 		emrs1 = ddr3_emrs1_rtt_nom_config[s->dimm_config[channel]][config_rank];
569 	}
570 	printk(RAM_DEBUG, "Setting ODT for rank%d to ", config_rank);
571 	switch (emrs1) {
572 	case 0:
573 		printk(RAM_DEBUG, "High-Z\n");
574 		break;
575 	case 0x11:
576 		printk(RAM_DEBUG, "40 Ohm\n");
577 		break;
578 	case 0x81:
579 		printk(RAM_DEBUG, "30 Ohm\n");
580 		break;
581 	case 0x80:
582 		printk(RAM_DEBUG, "20 Ohm\n");
583 		break;
584 	case 0x10:
585 		printk(RAM_DEBUG, "120 Ohm\n");
586 		break;
587 	case 0x01:
588 		printk(RAM_DEBUG, "60 Ohm\n");
589 		break;
590 	default:
591 		printk(BIOS_WARNING, "ODT value Undefined!\n");
592 		break;
593 	}
594 
595 	emrs1 <<= 2;
596 	/* Set output drive strength to 34 Ohm during write levelling */
597 	emrs1 |= (1 << 1);
598 
599 	if (wl_enable && (target_rank != config_rank)) {
600 		printk(RAM_DEBUG, "Disabling output for rank%d\n", config_rank);
601 		emrs1 |= (1 << 12);
602 	}
603 	if (wl_enable && (target_rank == config_rank)) {
604 		printk(RAM_DEBUG, "Enabling WL for rank%d\n", config_rank);
605 		emrs1 |= (1 << 7);
606 	}
607 	send_jedec_cmd(s, config_rank, channel, EMRS1_CMD, emrs1);
608 }
609 
610 #define N_SAMPLES 5
611 
sample_dq(const struct sysinfo * s,u8 channel,u8 rank,u8 high_found[8])612 static void sample_dq(const struct sysinfo *s, u8 channel, u8 rank,
613 		u8 high_found[8]) {
614 	u32 address = test_address(channel, rank);
615 	int samples, lane;
616 
617 	memset(high_found, 0, TOTAL_BYTELANES * sizeof(high_found[0]));
618 	for (samples = 0; samples < N_SAMPLES; samples++) {
619 		write32p(address, 0x12341234);
620 		write32p(address + 4, 0x12341234);
621 		udelay(5);
622 		FOR_EACH_BYTELANE(lane) {
623 			u8 dq_high = (mchbar_read8(0x561 + 0x400 * channel
624 					+ (lane * 4)) >> 7) & 1;
625 			high_found[lane] += dq_high;
626 		}
627 	}
628 }
629 
increment_to_dqs_edge(struct sysinfo * s,u8 channel,u8 rank)630 static enum cb_err increment_to_dqs_edge(struct sysinfo *s, u8 channel, u8 rank)
631 {
632 	int lane;
633 	u8 saved_24d;
634 	struct dll_setting dqs_setting[TOTAL_BYTELANES];
635 	u8 bytelane_ok = 0;
636 	u8 dq_sample[TOTAL_BYTELANES];
637 
638 	memcpy(dqs_setting, s->dqs_settings[channel], sizeof(dqs_setting));
639 	FOR_EACH_BYTELANE(lane)
640 		dqsset(channel, lane, &dqs_setting[lane]);
641 
642 	saved_24d = mchbar_read8(0x24d + 0x400 * channel);
643 
644 	/* Loop 0: Find DQ sample low, by decreasing */
645 	while (bytelane_ok != 0xff) {
646 		sample_dq(s, channel, rank, dq_sample);
647 		FOR_EACH_BYTELANE(lane) {
648 			if (bytelane_ok & (1 << lane))
649 				continue;
650 
651 			printk(RAM_SPEW, "%d, %d, %02d, %d, lane%d sample: %d\n",
652 				dqs_setting[lane].coarse,
653 				dqs_setting[lane].clk_delay,
654 				dqs_setting[lane].tap,
655 				dqs_setting[lane].pi,
656 				lane,
657 				dq_sample[lane]);
658 
659 			if (dq_sample[lane] == 0) {
660 				bytelane_ok |= (1 << lane);
661 			} else if (decrement_dq_dqs(s, &dqs_setting[lane])) {
662 				printk(BIOS_EMERG,
663 					"DQS setting channel%d, lane %d reached a minimum!\n",
664 					channel, lane);
665 				return CB_ERR;
666 			}
667 			dqsset(channel, lane, &dqs_setting[lane]);
668 		}
669 	}
670 
671 	printk(RAM_DEBUG, "DQS settings on PASS #0:\n");
672 	FOR_EACH_BYTELANE(lane) {
673 		printk(RAM_DEBUG, "lane %d: ", lane);
674 		print_dll_setting(&dqs_setting[lane], 0);
675 	}
676 
677 	/* Loop 1: Find DQ sample high, by increasing */
678 	bytelane_ok = 0;
679 	while (bytelane_ok != 0xff) {
680 		sample_dq(s, channel, rank, dq_sample);
681 		FOR_EACH_BYTELANE(lane) {
682 			if (bytelane_ok & (1 << lane))
683 				continue;
684 
685 			printk(RAM_SPEW, "%d, %d, %02d, %d, lane%d sample: %d\n",
686 				dqs_setting[lane].coarse,
687 				dqs_setting[lane].clk_delay,
688 				dqs_setting[lane].tap,
689 				dqs_setting[lane].pi,
690 				lane,
691 				dq_sample[lane]);
692 
693 			if (dq_sample[lane] == N_SAMPLES) {
694 				bytelane_ok |= (1 << lane);
695 			} else if (increment_dq_dqs(s, &dqs_setting[lane])) {
696 				printk(BIOS_EMERG,
697 					"DQS setting channel%d, lane %d reached a maximum!\n",
698 					channel, lane);
699 				return CB_ERR;
700 			}
701 			dqsset(channel, lane, &dqs_setting[lane]);
702 		}
703 	}
704 
705 	printk(RAM_DEBUG, "DQS settings on PASS #1:\n");
706 	FOR_EACH_BYTELANE(lane) {
707 		printk(RAM_DEBUG, "lane %d: ", lane);
708 		print_dll_setting(&dqs_setting[lane], 0);
709 	}
710 
711 	printk(BIOS_DEBUG, "final WL DQS settings on CH%d\n", channel);
712 	FOR_EACH_BYTELANE(lane) {
713 		printk(BIOS_DEBUG, "\tlane%d: ", lane);
714 		print_dll_setting(&dqs_setting[lane], 1);
715 		s->dqs_settings[channel][lane] = dqs_setting[lane];
716 	}
717 
718 	mchbar_write8(0x24d + 0x400 * channel, saved_24d);
719 	return CB_SUCCESS;
720 }
721 
722 /*
723  * DDR3 uses flyby topology where the clock signal takes a different path
724  * than the data signal, to allow for better signal intergrity.
725  * Therefore the delay on the data signals needs to account for this.
726  * This is done by sampling the DQS write (tx) signal back over the DQ
727  * signal and looking for delay values where the sample transitions
728  * from high to low.
729  * Here the following is done:
730  * - Enable write levelling on the first populated rank.
731  * - Disable output on other populated ranks.
732  * - Start from safe DQS (tx) delays. Other transitions can be
733  *   found at different starting values but are generally bad.
734  * - loop0: decrease DQS (tx) delays until low is sampled,
735  *   loop1: increase DQS (tx) delays until high is sampled,
736  *   This way, we are sure to have hit a low-high transition.
737  * - Put all ranks in normal mode of operation again.
738  * Note: All ranks need to be leveled together.
739  */
search_write_leveling(struct sysinfo * s)740 void search_write_leveling(struct sysinfo *s)
741 {
742 	int i, ch, count;
743 	u8 config, rank0, rank1, lane;
744 	struct dll_setting dq_setting;
745 
746 	const u8 chanconfig_lut[16] = {0, 6, 4, 6, 7, 3, 1, 3, 5, 2, 0, 2, 7, 3, 1, 3};
747 
748 	const u8 odt_force[8][4] = { /* [Config][leveling rank] */
749 		{0x5, 0x6, 0x5, 0x9},
750 		{0x5, 0x6, 0x5, 0x0},
751 		{0x5, 0x0, 0x5, 0x9},
752 		{0x5, 0x0, 0x5, 0x0},
753 		{0x1, 0x2, 0x0, 0x0},
754 		{0x0, 0x0, 0x4, 0x8},
755 		{0x1, 0x0, 0x0, 0x0},
756 		{0x0, 0x0, 0x4, 0x0}
757 	};
758 
759 	printk(BIOS_DEBUG, "Starting write levelling.\n");
760 
761 	FOR_EACH_POPULATED_CHANNEL(s->dimms, ch) {
762 		printk(BIOS_DEBUG, "\tCH%d\n", ch);
763 		config = chanconfig_lut[s->dimm_config[ch]];
764 
765 		mchbar_clrbits8(0x5d8 + 0x400 * ch, 0x0e);
766 		mchbar_clrsetbits16(0x5c4 + 0x400 * ch, 0x3fff, 0x3fff);
767 		mchbar_clrbits8(0x265 + 0x400 * ch, 0x1f);
768 		/* find the first populated rank */
769 		FOR_EACH_POPULATED_RANK_IN_CHANNEL(s->dimms, ch, rank0)
770 			break;
771 
772 		/* Enable WL for the first populated rank and disable output
773 		   for others */
774 		FOR_EACH_POPULATED_RANK_IN_CHANNEL(s->dimms, ch, rank1)
775 			set_rank_write_level(s, ch, config, rank1, rank0, 1);
776 
777 		mchbar_clrsetbits8(0x298 + 2 + 0x400 * ch, 0x0f, odt_force[config][rank0]);
778 		mchbar_clrsetbits8(0x271 + 0x400 * ch, 0x7e, 0x4e);
779 		mchbar_setbits8(0x5d9 + 0x400 * ch, 1 << 2);
780 		mchbar_clrsetbits32(0x1a0, 0x07ffffff, 0x00014000);
781 
782 		if (increment_to_dqs_edge(s, ch, rank0))
783 			die("Write Leveling failed!");
784 
785 		mchbar_clrbits8(0x298 + 2 + 0x400 * ch, 0x0f);
786 		mchbar_clrsetbits8(0x271 + 0x400 * ch, 0x7e, 0x0e);
787 		mchbar_clrbits8(0x5d9 + 0x400 * ch, 1 << 2);
788 		mchbar_clrsetbits32(0x1a0, 0x07ffffff, 0x00555801);
789 
790 		/* Disable WL on the trained rank */
791 		set_rank_write_level(s, ch, config, rank0, rank0, 0);
792 		send_jedec_cmd(s, rank0, ch, NORMALOP_CMD, 1 << 12);
793 
794 		mchbar_setbits8(0x5d8 + 0x400 * ch, 0x0e);
795 		mchbar_clrsetbits16(0x5c4 + 0x400 * ch, 0x3fff, 0x1807);
796 		mchbar_clrbits8(0x265 + 0x400 * ch, 0x1f);
797 
798 		/* Disable write level mode for all ranks */
799 		FOR_EACH_POPULATED_RANK_IN_CHANNEL(s->dimms, ch, rank0)
800 			set_rank_write_level(s, ch, config, rank0, rank0, 0);
801 	}
802 
803 	mchbar_setbits8(0x5dc, 1 << 7);
804 
805 	/* Increment DQ (rx) dll setting by a standard amount past DQS,
806 	   This is further trained in write training. */
807 	switch (s->selected_timings.mem_clk) {
808 	default:
809 	case MEM_CLOCK_800MHz:
810 		count = 39;
811 		break;
812 	case MEM_CLOCK_1066MHz:
813 		count = 32;
814 		break;
815 	case MEM_CLOCK_1333MHz:
816 		count = 42;
817 		break;
818 	}
819 
820 	FOR_EACH_POPULATED_CHANNEL_AND_BYTELANE(s->dimms, ch, lane) {
821 		dq_setting = s->dqs_settings[ch][lane];
822 		for (i = 0; i < count; i++)
823 			if (increment_dq_dqs(s, &dq_setting))
824 				die("Can't further increase DQ past DQS delay");
825 		dqset(ch, lane, &dq_setting);
826 	}
827 
828 	printk(BIOS_DEBUG, "Done write levelling.\n");
829 }
830