xref: /aosp_15_r20/external/coreboot/src/soc/mediatek/mt8173/dramc_pi_calibration_api.c (revision b9411a12aaaa7e1e6a6fb7c5e057f44ee179a49c)
1 /* SPDX-License-Identifier: GPL-2.0-only */
2 
3 #include <device/mmio.h>
4 #include <console/console.h>
5 #include <soc/addressmap.h>
6 #include <soc/dramc_common.h>
7 #include <soc/dramc_register.h>
8 #include <soc/dramc_pi_api.h>
9 #include <soc/dramc_soc.h>
10 #include <soc/emi.h>
11 
12 static u8 opt_gw_coarse_value[CHANNEL_NUM][DUAL_RANKS];
13 static u8 opt_gw_fine_value[CHANNEL_NUM][DUAL_RANKS];
14 static s8 wrlevel_dqs_dly[CHANNEL_NUM][DQS_NUMBER];
15 
sw_impedance_cal(u32 channel,const struct mt8173_sdram_params * sdram_params)16 void sw_impedance_cal(u32 channel,
17 		      const struct mt8173_sdram_params *sdram_params)
18 {
19 	u32 mask, value;
20 
21 	const struct mt8173_calib_params *params = &sdram_params->calib_params;
22 
23 	dramc_dbg("[Imp Calibration] DRVP:%d\n", params->impedance_drvp);
24 	dramc_dbg("[Imp Calibration] DRVN:%d\n", params->impedance_drvn);
25 
26 	mask = 0xf << 28 | 0xf << 24 | 0xf << 12 | 0xf << 8;  /* driving */
27 
28 	value =  params->impedance_drvp << 28 | params->impedance_drvn << 24 |
29 		 params->impedance_drvp << 12 | params->impedance_drvn << 8;
30 
31 	/* DQS and DQ */
32 	clrsetbits32(&ch[channel].ao_regs->iodrv6, mask, value);
33 	/* CLK and CMD */
34 	clrsetbits32(&ch[channel].ao_regs->drvctl1, mask, value);
35 	clrsetbits32(&ch[channel].ddrphy_regs->drvctl1, mask, value);
36 	/* DQ_2 and CMD_2 */
37 	clrsetbits32(&ch[channel].ao_regs->iodrv4, mask, value);
38 	/* disable impcal calibration */
39 	clrbits32(&ch[channel].ao_regs->impcal, 1 << IMP_CALI_ENP_SHIFT |
40 						   1 << IMP_CALI_ENN_SHIFT |
41 						   1 << IMP_CALI_EN_SHIFT  |
42 						   0xf << IMP_CALI_DRVP_SHIFT |
43 						   0xf << IMP_CALI_DRVN_SHIFT);
44 }
45 
ca_training(u32 channel,const struct mt8173_sdram_params * sdram_params)46 void ca_training(u32 channel, const struct mt8173_sdram_params *sdram_params)
47 {
48 	const struct mt8173_calib_params *params = &sdram_params->calib_params;
49 
50 	u32 i, ca_shift_avg32 = 0;
51 	s8 ca_max_center, ca_shift_avg8 = 0, order, ca_shift[CATRAINING_NUM];
52 
53 	s8 shift[CATRAINING_NUM] = {
54 		CMDDLY0_RA2_SHIFT, CMDDLY1_RA7_SHIFT, CMDDLY3_BA0_SHIFT,
55 		CMDDLY3_BA1_SHIFT, CMDDLY3_BA2_SHIFT, CMDDLY4_RAS_SHIFT,
56 		CMDDLY4_CAS_SHIFT, CMDDLY5_RA13_SHIFT, CMDDLY5_WE_SHIFT
57 	};
58 
59 	s8 ca_order[CHANNEL_NUM][CATRAINING_NUM] = {
60 		{ 7, 5, 6, 1, 3, 0, 9, 8, 2, 4},
61 		{ 2, 0, 3, 7, 5, 9, 4, 1, 6, 8}
62 	};
63 
64 	s8 cmd_order[CATRAINING_NUM] = {
65 		0, 1, 3, 3, 3, 4, 4, 5, 5
66 	};
67 
68 	for (i = 0; i < CATRAINING_NUM; i++) {
69 		ca_shift[i] = params->ca_train[channel][i];
70 		ca_shift_avg8 += ca_shift[i];
71 	}
72 
73 	/* CA pins align the center */
74 	ca_max_center = params->ca_train_center[channel];
75 
76 	/* set CA pins output delay */
77 	for (i = 0; i < (CATRAINING_NUM - 1); i++) {
78 		order = ca_order[channel][i];
79 		clrsetbits32(&ch[channel].ddrphy_regs->cmddly[cmd_order[i]],
80 			     0xf << shift[i], ca_shift[order] << shift[i]);
81 	}
82 
83 	order = ca_order[channel][9];
84 	clrsetbits32(&ch[channel].ddrphy_regs->dqscal0,
85 		     0xf << DQSCAL0_RA14_SHIFT,
86 		     ca_shift[order] << DQSCAL0_RA14_SHIFT);
87 
88 	/* CKE and CS delay */
89 	ca_shift_avg32 = (u32)(ca_shift_avg8 + (CATRAINING_NUM >> 1));
90 	ca_shift_avg32 /= (u32)CATRAINING_NUM;
91 
92 	/* CKEDLY */
93 	clrsetbits32(&ch[channel].ddrphy_regs->cmddly[4],
94 		     0x1f << CMDDLY4_CS_SHIFT |
95 		     0x1f << CMDDLY4_CKE_SHIFT,
96 		     ca_shift_avg32 << CMDDLY4_CS_SHIFT |
97 		     ca_shift_avg32 << CMDDLY4_CKE_SHIFT);
98 
99 	/* CKE1DLY */
100 	clrsetbits32(&ch[channel].ao_regs->dqscal1,
101 		     0x1f << DQSCAL1_CKE1_SHIFT,
102 		     ca_shift_avg32 << DQSCAL1_CKE1_SHIFT);
103 
104 	/* CS1DLY */
105 	clrsetbits32(&ch[channel].ddrphy_regs->padctl1,
106 		     0xf << PADCTL1_CS1_SHIFT,
107 		     ca_shift_avg32 << PADCTL1_CS1_SHIFT);
108 
109 	/* set max center into clk output delay */
110 	clrsetbits32(&ch[channel].ddrphy_regs->padctl1,
111 		     0xf << PADCTL1_CLK_SHIFT,
112 		     ca_max_center << PADCTL1_CLK_SHIFT);
113 
114 	dramc_dbg("=========================================\n");
115 	dramc_dbg("   [Channel %d] CA training\n", channel);
116 	dramc_dbg("=========================================\n");
117 
118 	for (i = 0; i < CATRAINING_NUM; i++)
119 		dramc_dbg("[CA] CA %d\tShift %d\n", i, ca_shift[i]);
120 
121 	dramc_dbg("[CA] Reg CMDDLY4 = %xh\n",
122 			read32(&ch[channel].ddrphy_regs->cmddly[4]));
123 	dramc_dbg("[CA] Reg DQSCAL1 = %xh\n",
124 			read32(&ch[channel].ao_regs->dqscal1));
125 	dramc_dbg("[CA] Reg PADCTL1 = %xh\n",
126 			read32(&ch[channel].ddrphy_regs->padctl1));
127 }
128 
write_leveling(u32 channel,const struct mt8173_sdram_params * sdram_params)129 void write_leveling(u32 channel, const struct mt8173_sdram_params *sdram_params)
130 {
131 	u8 i, byte_i;
132 	u32 value;
133 
134 	for (i = 0; i < DQS_NUMBER; i++)
135 		wrlevel_dqs_dly[channel][i] =
136 			sdram_params->calib_params.wr_level[channel][i];
137 	/* DQS */
138 	value = 0;
139 	for (i = 0; i < DQS_NUMBER; i++) {
140 		value += ((u32)wrlevel_dqs_dly[channel][i]) << (4 * i);
141 	}
142 	write32(&ch[channel].ddrphy_regs->padctl3, value);
143 
144 	/* DQM */
145 	clrsetbits32(&ch[channel].ddrphy_regs->padctl2, MASK_PADCTL2_32BIT,
146 		     (value << PADCTL2_SHIFT) & MASK_PADCTL2_32BIT);
147 
148 	/* DQ */
149 	for (byte_i = 0; byte_i < DQS_NUMBER; byte_i++) {
150 		value = 0;
151 		for (i = 0; i < DQS_BIT_NUMBER; i++) {
152 			s8 val = wrlevel_dqs_dly[channel][byte_i];
153 			value += (((u32)val) << (4 * i));
154 		}
155 		write32(&ch[channel].ddrphy_regs->dqodly[byte_i], value);
156 	}
157 
158 	dramc_dbg("========================================\n");
159 	dramc_dbg("[Channel %d] dramc_write_leveling_swcal\n", channel);
160 	dramc_dbg("========================================\n");
161 
162 	dramc_dbg("[WL] DQS: %#x",
163 			read32(&ch[channel].ddrphy_regs->padctl3));
164 	dramc_dbg("[WL] DQM: %#x\n",
165 			read32(&ch[channel].ddrphy_regs->padctl2));
166 
167 	for (byte_i = 0; byte_i < DQS_NUMBER; byte_i++)
168 		dramc_dbg("[WL] DQ byte%d: %#x\n", byte_i,
169 			     read32(&ch[channel].ddrphy_regs->dqodly[byte_i]));
170 }
171 
set_gw_coarse_factor(u32 channel,u8 curr_val)172 static void set_gw_coarse_factor(u32 channel, u8 curr_val)
173 {
174 	u8 curr_val_p1, selph2_dqsgate, selph2_dqsgate_p1;
175 
176 	u32 coarse_tune_start = curr_val >> 2;
177 
178 	if (coarse_tune_start > 3) {
179 		coarse_tune_start -= 3;
180 	} else {
181 		if (coarse_tune_start) {
182 			coarse_tune_start = 1;
183 		}
184 	}
185 
186 	if (coarse_tune_start > 15) {
187 		coarse_tune_start = 15;
188 	}
189 
190 	curr_val_p1 = curr_val + 2;  /* diff is 0.5T */
191 
192 	/* Rank 0 P0/P1 coarse tune settings */
193 	clrsetbits32(&ch[channel].ao_regs->dqsctl1,
194 		     0xf << DQSCTL1_DQSINCTL_SHIFT,
195 		     coarse_tune_start << DQSCTL1_DQSINCTL_SHIFT &
196 		     0xf << DQSCTL1_DQSINCTL_SHIFT);
197 
198 	/* DQSINCTL does not have P1. */
199 	/* Need to use TXDLY_DQSGATE/TXDLY_DQSGATE_P1 to set */
200 	/* different 1 M_CK coarse tune values for P0 & P1. */
201 	selph2_dqsgate = (curr_val >> 2) - coarse_tune_start;
202 	selph2_dqsgate_p1 = (curr_val_p1 >> 2) - coarse_tune_start;
203 
204 	clrsetbits32(&ch[channel].ao_regs->selph2,
205 		     0x7 << SELPH2_TXDLY_DQSGATE_SHIFT |
206 		     0x7 << SELPH2_TXDLY_DQSGATE_P1_SHIFT,
207 		     selph2_dqsgate << SELPH2_TXDLY_DQSGATE_SHIFT |
208 		     selph2_dqsgate_p1 << SELPH2_TXDLY_DQSGATE_P1_SHIFT);
209 
210 	/* dly_DQSGATE and dly_DQSGATE_P1 */
211 	clrsetbits32(&ch[channel].ao_regs->selph5,
212 		     0x3 << SELPH5_DLY_DQSGATE_SHIFT |
213 		     0x3 << SELPH5_DLY_DQSGATE_P1_SHIFT,
214 		     (curr_val & 0x3) << SELPH5_DLY_DQSGATE_SHIFT |
215 		     (curr_val_p1 & 0x3) << SELPH5_DLY_DQSGATE_P1_SHIFT);
216 }
217 
set_gw_fine_factor(u32 channel,u8 curr_val,u8 rank)218 static void set_gw_fine_factor(u32 channel, u8 curr_val, u8 rank)
219 {
220 	u32 set = curr_val & (0x7f << DQSIEN_DQS0IEN_SHIFT);
221 
222 	clrsetbits32(&ch[channel].ao_regs->dqsien[rank],
223 		     0x7f << DQSIEN_DQS0IEN_SHIFT |
224 		     0x7f << DQSIEN_DQS1IEN_SHIFT |
225 		     0x7f << DQSIEN_DQS2IEN_SHIFT |
226 		     0x7f << DQSIEN_DQS3IEN_SHIFT,
227 		     set << DQSIEN_DQS0IEN_SHIFT |
228 		     set << DQSIEN_DQS1IEN_SHIFT |
229 		     set << DQSIEN_DQS2IEN_SHIFT |
230 		     set << DQSIEN_DQS3IEN_SHIFT);
231 }
232 
set_gw_coarse_factor_rank1(u32 channel,u8 curr_val,u8 dqsinctl)233 static void set_gw_coarse_factor_rank1(u32 channel, u8 curr_val, u8 dqsinctl)
234 {
235 	u8 curr_val_p1, r1dqsgate, r1dqsgate_p1;
236 
237 	curr_val_p1 = curr_val + 2;  /* diff is 0.5T */
238 
239 	clrsetbits32(&ch[channel].ao_regs->dqsctl2,
240 		     0xf << DQSCTL2_DQSINCTL_SHIFT,
241 		     dqsinctl << DQSCTL2_DQSINCTL_SHIFT);
242 
243 	/* TXDLY_R1DQSGATE and TXDLY_R1DQSGATE_P1 */
244 	r1dqsgate = (curr_val >> 2) - dqsinctl;
245 	r1dqsgate_p1 = (curr_val_p1 >> 2) - dqsinctl;
246 
247 	clrsetbits32(&ch[channel].ao_regs->selph6_1,
248 		     0x7 << SELPH6_1_TXDLY_R1DQSGATE_SHIFT |
249 		     0x7 << SELPH6_1_TXDLY_R1DQSGATE_P1_SHIFT,
250 		     r1dqsgate << SELPH6_1_TXDLY_R1DQSGATE_SHIFT |
251 		     r1dqsgate_p1 << SELPH6_1_TXDLY_R1DQSGATE_P1_SHIFT);
252 
253 	/* dly_R1DQSGATE and dly_R1DQSGATE_P1 */
254 	clrsetbits32(&ch[channel].ao_regs->selph6_1,
255 		     0x3 << SELPH6_1_DLY_R1DQSGATE_SHIFT |
256 		     0x3 << SELPH6_1_DLY_R1DQSGATE_P1_SHIFT,
257 		     (curr_val & 0x3) << SELPH6_1_DLY_R1DQSGATE_SHIFT |
258 		     (curr_val_p1 & 0x3) << SELPH6_1_DLY_R1DQSGATE_P1_SHIFT);
259 }
260 
dqs_gw_counter_reset(u32 channel)261 static void dqs_gw_counter_reset(u32 channel)
262 {
263 	/* reset dqs counter (1 to 0) */
264 	setbits32(&ch[channel].ao_regs->spcmd, 1 << SPCMD_DQSGCNTRST_SHIFT);
265 	clrbits32(&ch[channel].ao_regs->spcmd, 1 << SPCMD_DQSGCNTRST_SHIFT);
266 	dramc_phy_reset(channel);
267 }
268 
dqs_gw_test(u32 channel)269 static int dqs_gw_test(u32 channel)
270 {
271 	u32 coarse_result01, coarse_result23;
272 
273 	/* read data counter reset in PHY layer */
274 	dqs_gw_counter_reset(channel);
275 
276 	/* use audio pattern to run the test */
277 	dramc_engine2(channel, TE_OP_READ_CHECK, DQS_GW_PATTERN2,
278 		      DQS_GW_PATTERN1 | DQS_GW_TE_OFFSET, 1, 0);
279 
280 	/* get coarse result of DQS0, 1, 2, 3 */
281 	coarse_result01 = read32(&ch[channel].nao_regs->dqsgnwcnt[0]);
282 	coarse_result23 = read32(&ch[channel].nao_regs->dqsgnwcnt[1]);
283 
284 	if (coarse_result01 == DQS_GW_GOLD_COUNTER_32BIT &&
285 	    coarse_result23 == DQS_GW_GOLD_COUNTER_32BIT)
286 		return 1;
287 
288 	return 0;
289 }
290 
dqs_gw_fine_tune_calib(u32 channel,u8 fine_val)291 static u8 dqs_gw_fine_tune_calib(u32 channel, u8 fine_val)
292 {
293 	u8 i, opt_fine_val;
294 	s8 delta[7] = {-48, -32, -16, 0, 16, 32, 48};
295 	int matches = 0, sum = 0;
296 
297 	/* fine tune range from 0 to 127 */
298 	fine_val = MIN(MAX(fine_val, 0 - delta[0]), 127 - delta[6]);
299 
300 	/* test gw fine tune */
301 	for (i = 0; i < ARRAY_SIZE(delta); i++) {
302 		opt_fine_val = fine_val + delta[i];
303 		set_gw_fine_factor(channel, opt_fine_val, 0);
304 		if (dqs_gw_test(channel)) {
305 			matches++;
306 			sum += delta[i];
307 		}
308 	}
309 
310 	if (matches == 0) {
311 		die("[GW] ERROR, Fine-Tuning failed.\n");
312 	}
313 
314 	opt_fine_val = fine_val + (sum / matches);
315 
316 	return opt_fine_val;
317 }
318 
dqs_gw_coarse_tune_calib(u32 channel,u8 coarse_val)319 static u8 dqs_gw_coarse_tune_calib(u32 channel, u8 coarse_val)
320 {
321 	u8 i, opt_coarse_val[3];
322 	s8 gw_ret[3], delta[3] = {0, 1, -1};
323 
324 	for (i = 0; i < 3; i++) {
325 		/* adjust gw coarse tune value */
326 		opt_coarse_val[i] = coarse_val + delta[i];
327 		set_gw_coarse_factor(channel, opt_coarse_val[i]);
328 		/* get gw test result */
329 		gw_ret[i] = dqs_gw_test(channel);
330 		/* judge test result */
331 		if (gw_ret[i] != 0)
332 			return opt_coarse_val[i];
333 	}
334 
335 	/* abnormal test result, set to default coarse tune value */
336 	printk(BIOS_ERR, "[GW] ERROR, No found coarse tune!!!\n");
337 
338 	return coarse_val;
339 }
340 
rx_dqs_gating_cal(u32 channel,u8 rank,const struct mt8173_sdram_params * sdram_params)341 void rx_dqs_gating_cal(u32 channel, u8 rank,
342 		       const struct mt8173_sdram_params *sdram_params)
343 {
344 	u8 gw_coarse_val, gw_fine_val;
345 
346 	/* disable HW gating */
347 	clrbits32(&ch[channel].ao_regs->dqscal0,
348 				  1 << DQSCAL0_STBCALEN_SHIFT);
349 	/* enable DQS gating window counter */
350 	setbits32(&ch[channel].ao_regs->dqsctl1,
351 				  1 << DQSCTL1_DQSIENMODE_SHIFT);
352 	setbits32(&ch[channel].ao_regs->spcmd,
353 				  1 << SPCMD_DQSGCNTEN_SHIFT);
354 	/* dual-phase DQS clock gating control enabling */
355 	setbits32(&ch[channel].ddrphy_regs->dqsgctl,
356 				  1 << DQSGCTL_DQSGDUALP_SHIFT);
357 
358 	/* gating calibration value */
359 	gw_coarse_val = sdram_params->calib_params.gating_win[channel][rank][0];
360 	gw_fine_val = sdram_params->calib_params.gating_win[channel][rank][1];
361 
362 	dramc_dbg("****************************************************\n");
363 	dramc_dbg("Channel %d Rank %d DQS GW Calibration\n", channel, rank);
364 	dramc_dbg("Default (coarse, fine) tune value %d, %d.\n",
365 		       gw_coarse_val, gw_fine_val);
366 	dramc_dbg("****************************************************\n");
367 
368 	/* set default coarse and fine value */
369 	set_gw_coarse_factor(channel, gw_coarse_val);
370 	set_gw_fine_factor(channel, gw_fine_val, 0);
371 
372 	/* adjust gw coarse tune */
373 	opt_gw_coarse_value[channel][rank] =
374 		dqs_gw_coarse_tune_calib(channel, gw_coarse_val);
375 
376 	/* set adjusted gw coarse tune */
377 	set_gw_coarse_factor(channel, opt_gw_coarse_value[channel][rank]);
378 
379 	/* adjust gw fine tune */
380 	opt_gw_fine_value[channel][rank] =
381 		dqs_gw_fine_tune_calib(channel, gw_fine_val);
382 
383 	/* set adjusted gw fine tune */
384 	set_gw_fine_factor(channel, opt_gw_fine_value[channel][rank], 0);
385 
386 	/* read data counter reset in PHY layer */
387 	dqs_gw_counter_reset(channel);
388 
389 	/* gating window training result */
390 	printk(BIOS_INFO, "[GW] [Channel %d] [Rank %d] adjusted (coarse, fine) tune value: %d, %d.\n",
391 	       channel, rank, opt_gw_coarse_value[channel][rank],
392 	       opt_gw_fine_value[channel][rank]);
393 }
394 
dual_rank_rx_dqs_gating_cal(u32 channel,const struct mt8173_sdram_params * sdram_params)395 void dual_rank_rx_dqs_gating_cal(u32 channel,
396 				 const struct mt8173_sdram_params *sdram_params)
397 {
398 	u32 dqsinctl;
399 
400 	/* rank 0 gw calibration */
401 	rx_dqs_gating_cal(channel, 0, sdram_params);
402 
403 	/* get dqsinctl after rank 0 calibration */
404 	dqsinctl = read32(&ch[channel].ao_regs->dqsctl1);
405 	dqsinctl = (dqsinctl >> DQSCTL1_DQSINCTL_SHIFT) & (0xf << 0);
406 
407 	/* swap cs0 and cs1 */
408 	setbits32(&ch[channel].ao_regs->rkcfg, MASK_RKCFG_RKSWAP_EN);
409 
410 	/* rank 1 gw calibration */
411 	rx_dqs_gating_cal(channel, 1, sdram_params);
412 
413 	/* set rank 1 coarse tune and fine tune */
414 	set_gw_coarse_factor_rank1(channel, opt_gw_coarse_value[channel][1],
415 				   dqsinctl);
416 	set_gw_fine_factor(channel, opt_gw_fine_value[channel][1], 1);
417 
418 	/* swap cs back */
419 	clrbits32(&ch[channel].ao_regs->rkcfg, MASK_RKCFG_RKSWAP_EN);
420 
421 	/* set rank 0 coarse tune and fine tune back */
422 	set_gw_coarse_factor(channel, opt_gw_coarse_value[channel][0]);
423 	set_gw_fine_factor(channel, opt_gw_fine_value[channel][0], 0);
424 }
425 
dramc_rankinctl_config(u32 channel,const struct mt8173_sdram_params * sdram_params)426 void dramc_rankinctl_config(u32 channel,
427 			    const struct mt8173_sdram_params *sdram_params)
428 {
429 	u32 value;
430 
431 	if (is_dual_rank(channel, sdram_params)) {
432 		/* RANKINCTL_ROOT1 = DQSINCTL + reg_TX_DLY_DQSGATE */
433 		value = MIN(opt_gw_coarse_value[channel][0],
434 			    opt_gw_coarse_value[channel][1]) >> 2;
435 
436 		clrsetbits32(&ch[channel].ao_regs->dummy, 0xf, value);
437 
438 		/* RANKINCTL = RANKINCTL_ROOT1 */
439 		clrsetbits32(&ch[channel].ao_regs->dqscal1,
440 			     0xf << 16, value << 16);
441 	}
442 	/* disable per-bank refresh when refresh rate >= 5 */
443 	setbits32(&ch[channel].ao_regs->rkcfg,
444 		  1 << RKCFG_PBREF_DISBYRATE_SHIFT);
445 }
446 
dram_k_perbit(u32 channel)447 u32 dram_k_perbit(u32 channel)
448 {
449 	u32 err_value = 0x0;
450 
451 	/* use XTALK pattern to run the test */
452 	err_value = dramc_engine2(channel, TE_OP_WRITE_READ_CHECK,
453 				  DEFAULT_TEST2_1_CAL, DEFAULT_TEST2_2_CAL,
454 				  2, 0);
455 	return err_value;
456 }
457 
dramk_check_dqs_win(struct dqs_perbit_dly * p,u8 dly_step,u8 last_step,u32 fail_bit)458 void dramk_check_dqs_win(struct dqs_perbit_dly *p, u8 dly_step, u8 last_step,
459 			 u32 fail_bit)
460 {
461 	s8 dqsdly_pass_win, best_pass_win;
462 
463 	if (fail_bit == 0) {
464 		if (p->first_dqsdly_pass == -1) {
465 			/* first DQS pass delay tap */
466 			p->first_dqsdly_pass = dly_step;
467 		}
468 		if ((p->last_dqsdly_pass == -2) && (dly_step == last_step)) {
469 			/* pass to the last tap */
470 			p->last_dqsdly_pass = dly_step;
471 			dqsdly_pass_win = p->last_dqsdly_pass -
472 					  p->first_dqsdly_pass;
473 			best_pass_win = p->best_last_dqsdly_pass -
474 					p->best_first_dqsdly_pass;
475 			if (dqsdly_pass_win > best_pass_win) {
476 				p->best_last_dqsdly_pass =  p->last_dqsdly_pass;
477 				p->best_first_dqsdly_pass = p->first_dqsdly_pass;
478 			}
479 			/* clear to find the next pass range if it has */
480 			p->first_dqsdly_pass = -1;
481 			p->last_dqsdly_pass = -2;
482 		}
483 	} else {
484 		if ((p->first_dqsdly_pass != -1) && (p->last_dqsdly_pass == -2)) {
485 			p->last_dqsdly_pass = dly_step - 1;
486 			dqsdly_pass_win = p->last_dqsdly_pass -
487 					  p->first_dqsdly_pass;
488 			best_pass_win = p->best_last_dqsdly_pass -
489 					p->best_first_dqsdly_pass;
490 			if (dqsdly_pass_win > best_pass_win) {
491 				p->best_last_dqsdly_pass =  p->last_dqsdly_pass;
492 				p->best_first_dqsdly_pass = p->first_dqsdly_pass;
493 			}
494 			/* clear to find the next pass range if it has */
495 			p->first_dqsdly_pass = -1;
496 			p->last_dqsdly_pass = -2;
497 		}
498 	}
499 }
500 
dramk_check_dq_win(struct dqs_perbit_dly * p,u8 dly_step,u8 last_step,u32 fail_bit)501 void dramk_check_dq_win(struct dqs_perbit_dly *p, u8 dly_step, u8 last_step,
502 			u32 fail_bit)
503 {
504 	s8 dqdly_pass_win, best_pass_win;
505 
506 	if (fail_bit == 0) {
507 		if (p->first_dqdly_pass == -1) {
508 			/* first DQ pass delay tap */
509 			p->first_dqdly_pass = dly_step;
510 		}
511 
512 		if ((p->last_dqdly_pass == -2) && (dly_step == last_step)) {
513 			/* pass to the last tap */
514 			p->last_dqdly_pass = dly_step;
515 			dqdly_pass_win = p->last_dqdly_pass -
516 					 p->first_dqdly_pass;
517 			best_pass_win = p->best_last_dqdly_pass -
518 					p->best_first_dqdly_pass;
519 			if (dqdly_pass_win > best_pass_win) {
520 				p->best_last_dqdly_pass =  p->last_dqdly_pass;
521 				p->best_first_dqdly_pass = p->first_dqdly_pass;
522 			}
523 			/* clear to find the next pass range if it has */
524 			p->first_dqdly_pass = -1;
525 			p->last_dqdly_pass = -2;
526 		}
527 	} else {
528 		if ((p->first_dqdly_pass != -1) && (p->last_dqdly_pass == -2)) {
529 			p->last_dqdly_pass = dly_step - 1;
530 			dqdly_pass_win = p->last_dqdly_pass -
531 					 p->first_dqdly_pass;
532 			best_pass_win = p->best_last_dqdly_pass -
533 					p->best_first_dqdly_pass;
534 			if (dqdly_pass_win > best_pass_win) {
535 				p->best_last_dqdly_pass =  p->last_dqdly_pass;
536 				p->best_first_dqdly_pass = p->first_dqdly_pass;
537 			}
538 			/* clear to find the next pass range if it has */
539 			p->first_dqdly_pass = -1;
540 			p->last_dqdly_pass = -2;
541 		}
542 	}
543 }
544 
dramk_calcu_best_dly(u8 bit,struct dqs_perbit_dly * p,u8 * p_max_byte)545 u8 dramk_calcu_best_dly(u8 bit, struct dqs_perbit_dly *p, u8 *p_max_byte)
546 {
547 	u8 fail = 0;
548 	u8 hold, setup;
549 
550 	/* hold time = DQS pass taps */
551 	hold = p->best_last_dqsdly_pass - p->best_first_dqsdly_pass + 1;
552 	/* setup time = DQ pass taps */
553 	setup = p->best_last_dqdly_pass - p->best_first_dqdly_pass + 1;
554 
555 	/* The relationship of setup and hold time of dqs and dq signals
556 	 * is represented with delay tap in the following format:
557 	 *
558 	 *   setup time(dq delay)  hold time(dqs delay)
559 	 *   xxxxxxxxxxxxxoooooooo|ooooooooooooooooooooxxxxx
560 	 *   15		         0 1		      15 tap
561 	 */
562 
563 	if (hold > setup) {
564 		/* like this: (setup time != 0) */
565 		/* xxxxxxxxxxxxxoooooooo|ooooooooooooooooooooxxxxx */
566 		/* like this: (setup time == 0) */
567 		/* xxxxxxxxxxxxxxxxxxxxx|xxxooooooooooxxxxxxxxxxxx */
568 
569 		p->best_dqdly = 0;
570 		p->best_dqsdly = (setup != 0)? (hold - setup) / 2:
571 				 (hold - setup) / 2 + p->best_first_dqsdly_pass;
572 
573 		if (p->best_dqsdly > *p_max_byte) {
574 			*p_max_byte = p->best_dqsdly;
575 		}
576 
577 	} else if (hold < setup) {
578 		/* like this: (hold time != 0)*/
579 		/* xxxoooooooooooooooooo|ooooooooxxxxxxxxxxxxxxxxx */
580 		/* like this: (hold time == 0) */
581 		/* xxxoooooooooooooooxxx|xxxxxxxxxxxxxxxxxxxxxxxxx */
582 
583 		p->best_dqsdly = 0;
584 		p->best_dqdly = (hold != 0)? (setup - hold) / 2:
585 				(setup - hold) / 2 + p->best_first_dqdly_pass;
586 
587 	} else { /* hold time == setup time */
588 		p->best_dqsdly = 0;
589 		p->best_dqdly = 0;
590 
591 		if (hold == 0) {
592 			/* like this: (mean this bit is error) */
593 			/* xxxxxxxxxxxxxxxxxxxxx|xxxxxxxxxxxxxxxxxxxxxxxxx */
594 			printk(BIOS_ERR, "Error at bit %d, "
595 					 "setup_time = hold_time = 0!!\n", bit);
596 			fail = 1;
597 		}
598 	}
599 
600 	dramc_dbg("bit#%d : dq =%d dqs=%d win=%d (%d, %d)\n",
601 		      bit, setup, hold, setup + hold,
602 		      p->best_dqdly, p->best_dqsdly);
603 
604 	return fail;
605 }
606 
clk_duty_cal(u32 channel)607 void clk_duty_cal(u32 channel)
608 {
609 	u8  max_duty_sel, max_duty;
610 	u32 max_win_size = 0;
611 
612 	max_duty_sel = max_duty = 1;
613 
614 	clrsetbits32(&ch[channel].ddrphy_regs->phyclkduty,
615 		     0x3 << PHYCLKDUTY_CMDCLKP0DUTYN_SHIFT |
616 		     1 << PHYCLKDUTY_CMDCLKP0DUTYP_SHIFT,
617 		     1 << PHYCLKDUTY_CMDCLKP0DUTYSEL_SHIFT |
618 		     max_duty << PHYCLKDUTY_CMDCLKP0DUTYN_SHIFT);
619 
620 	max_win_size = read32(&ch[channel].ddrphy_regs->phyclkduty);
621 
622 	dramc_dbg("[Channel %d CLK DUTY CALIB] ", channel);
623 	dramc_dbg("Final DUTY_SEL=%d, DUTY=%d, rx window size=%d\n",
624 		      max_duty_sel, max_duty, max_win_size);
625 }
626 
set_dle_factor(u32 channel,u8 curr_val)627 static void set_dle_factor(u32 channel, u8 curr_val)
628 {
629 	clrsetbits32(&ch[channel].ao_regs->ddr2ctl,
630 		     0x7 << DDR2CTL_DATLAT_SHIFT,
631 		     (curr_val & 0x7) << DDR2CTL_DATLAT_SHIFT);
632 
633 	clrsetbits32(&ch[channel].ao_regs->padctl4,
634 		     0x1 << PADCTL4_DATLAT3_SHIFT,
635 		     ((curr_val >> 3) & 0x1) << PADCTL4_DATLAT3_SHIFT);
636 
637 	clrsetbits32(&ch[channel].ao_regs->phyctl1,
638 		     0x1 << PHYCTL1_DATLAT4_SHIFT,
639 		     ((curr_val >> 4) & 0x1) << PHYCTL1_DATLAT4_SHIFT);
640 
641 	clrsetbits32(&ch[channel].ao_regs->misc,
642 		     0x1f << MISC_DATLAT_DSEL_SHIFT,
643 		     (curr_val - 8) << MISC_DATLAT_DSEL_SHIFT);
644 
645 	/* optimize bandwidth for HW run time test engine use */
646 	clrsetbits32(&ch[channel].ao_regs->misc,
647 		     0x1f << MISC_LATNORMP_SHIFT,
648 		     (curr_val - 3) << MISC_LATNORMP_SHIFT);
649 }
650 
dual_rank_rx_datlat_cal(u32 channel,const struct mt8173_sdram_params * sdram_params)651 void dual_rank_rx_datlat_cal(u32 channel,
652 			     const struct mt8173_sdram_params *sdram_params)
653 {
654 	u8 r0_dle_setting, r1_dle_setting;
655 
656 	/* rank 0 dle calibration */
657 	r0_dle_setting = rx_datlat_cal(channel, 0, sdram_params);
658 
659 	/* swap cs0 and cs1 */
660 	setbits32(&ch[channel].ao_regs->rkcfg, MASK_RKCFG_RKSWAP_EN);
661 
662 	/* set rank 1 coarse tune and fine tune back */
663 	set_gw_coarse_factor(channel, opt_gw_coarse_value[channel][1]);
664 	set_gw_fine_factor(channel, opt_gw_fine_value[channel][1], 0);
665 
666 	/* rank 1 dle calibration */
667 	r1_dle_setting = rx_datlat_cal(channel, 1, sdram_params);
668 
669 	/* set rank 0 coarse tune and fine tune back */
670 	set_gw_coarse_factor(channel, opt_gw_coarse_value[channel][0]);
671 	set_gw_fine_factor(channel, opt_gw_fine_value[channel][0], 0);
672 
673 	/* swap cs back */
674 	clrbits32(&ch[channel].ao_regs->rkcfg, MASK_RKCFG_RKSWAP_EN);
675 
676 	/* output dle setting of rank 0 and 1 */
677 	dramc_dbg("[DLE] Rank 0 DLE calibrated setting = %xh.\n"
678 		      "[DLE] Rank 1 DLE calibrated setting = %xh.\n",
679 		      r0_dle_setting, r1_dle_setting);
680 
681 	if (r1_dle_setting < r0_dle_setting) {
682 		/* compare dle setting of two ranks */
683 		dramc_dbg("[DLE] rank 0 > rank 1. set to rank 0.\n");
684 		/* case 1: set rank 0 dle setting */
685 		set_dle_factor(channel, r0_dle_setting);
686 	} else {
687 		/* compare dle setting of two ranks */
688 		dramc_dbg("[DLE] rank 0 < rank 1. use rank 1.\n");
689 		/* case 2: set rank 1 dle setting */
690 		set_dle_factor(channel, r1_dle_setting);
691 	}
692 }
693 
rx_datlat_cal(u32 channel,u8 rank,const struct mt8173_sdram_params * sdram_params)694 u8 rx_datlat_cal(u32 channel, u8 rank,
695 		 const struct mt8173_sdram_params *sdram_params)
696 {
697 	u8 i, best_step;
698 	u32 err[DLE_TEST_NUM];
699 
700 	dramc_dbg("=========================================\n");
701 	dramc_dbg("[Channel %d] [Rank %d] DATLAT calibration\n",
702 		       channel, rank);
703 	dramc_dbg("=========================================\n");
704 
705 	clrbits32(&ch[channel].ao_regs->mckdly,
706 		     0x11 << MCKDLY_DQIENQKEND_SHIFT |
707 		     0x1  << MCKDLY_DQIENLAT_SHIFT);
708 
709 	/* set dle calibration initial value */
710 	best_step = sdram_params->calib_params.datlat_ucfirst + 1;
711 
712 	/* do dle calibration test */
713 	for (i = 0; i < DLE_TEST_NUM; i++) {
714 		set_dle_factor(channel, best_step - i);
715 		err[i] = dramc_engine2(channel, TE_OP_WRITE_READ_CHECK,
716 				       DEFAULT_TEST2_1_CAL,
717 				       DEFAULT_TEST2_2_CAL, 2, 0);
718 	}
719 
720 	if (err[0]) {
721 		/* dle test error */
722 		printk(BIOS_ERR, "[DLE] CH:%d calibration ERROR CMP_ERR =%xh,\n",
723 			channel, err[0]);
724 	} else {
725 		/* judge dle test result */
726 		for (i = 0; i < DLE_TEST_NUM; i++) {
727 			if (!err[i] && (i + 1 == DLE_TEST_NUM || err[i + 1])) {
728 				/* dle test ok */
729 				best_step -= (i - 1);
730 				break;
731 			}
732 		}
733 	}
734 
735 	/* Default dle value is set when test error (error recovery).
736 	 * Others, adjusted dle calibration value is set normally.
737 	 */
738 	set_dle_factor(channel, best_step);
739 
740 	dramc_dbg("[DLE] adjusted value = %#x\n", best_step);
741 
742 	return best_step;
743 }
744 
tx_delay_for_wrleveling(u32 channel,struct dqs_perbit_dly * dqdqs_perbit_dly,u8 * max_dqsdly_byte,u8 * ave_dqdly_byte)745 void tx_delay_for_wrleveling(u32 channel,
746 			     struct dqs_perbit_dly *dqdqs_perbit_dly,
747 			     u8 *max_dqsdly_byte, u8 *ave_dqdly_byte)
748 {
749 	s8 i, delta, index, max_taps;
750 
751 	max_taps = MAX_DQDLY_TAPS - 1;
752 
753 	for (i = 0; i < DATA_WIDTH_32BIT; i++) {
754 		index = i / DQS_BIT_NUMBER;
755 
756 		if (i % DQS_BIT_NUMBER == 0)
757 			dramc_dbg("DQS%d: %d\n", index,
758 				       wrlevel_dqs_dly[channel][index]);
759 
760 		if (max_dqsdly_byte[index] <= wrlevel_dqs_dly[channel][index]) {
761 			/* set diff value (delta) */
762 			delta = wrlevel_dqs_dly[channel][index] -
763 				max_dqsdly_byte[index];
764 
765 			dqdqs_perbit_dly[i].best_dqdly += delta;
766 
767 			/* max limit to 15 */
768 			if (dqdqs_perbit_dly[i].best_dqdly > max_taps)
769 				dqdqs_perbit_dly[i].best_dqdly = max_taps;
770 
771 			if ((i + 1) % DQS_BIT_NUMBER == 0) {
772 				/* DQS */
773 				max_dqsdly_byte[index] =
774 					wrlevel_dqs_dly[channel][index];
775 				/* DQM */
776 				ave_dqdly_byte[index] += delta;
777 				/* max limit to 15 */
778 				if (ave_dqdly_byte[index] > max_taps)
779 					ave_dqdly_byte[index] = max_taps;
780 			}
781 
782 		} else if (i % DQS_BIT_NUMBER == 0) {
783 			/* max_dqsdly_byte[j] > wrlevel_dqs_dly[channel][j]
784 			 * Originally, we should move clk and CA delay.
785 			 * Then, do GW calibration again. However, DQ/DQS
786 			 * skew should not be large in MT8173, so we sacrifice
787 			 * the Clk/DQS margin by keeping the clk out delay.
788 			 */
789 			printk(BIOS_ERR, "[Warning] DQSO %d in TX "
790 					 "per-bit = %d > DQSO %d in WL = %d  ",
791 					 index, max_dqsdly_byte[index], index,
792 					 wrlevel_dqs_dly[channel][index]);
793 		}
794 	}
795 }
796 
set_rx_dly_factor(u32 channel,u32 curr_val,u8 type)797 static void set_rx_dly_factor(u32 channel, u32 curr_val, u8 type)
798 {
799 	u32 i, value = 0;
800 
801 	for (i = 0; i < DQS_NUMBER; i++)
802 		value += (curr_val << (8 * i));
803 
804 	switch (type) {
805 	case RX_DQS:
806 		write32(&ch[channel].ao_regs->r0deldly, value);
807 		break;
808 	case RX_DQ:
809 		for (i = 0; i < DATA_WIDTH_32BIT; i += 4)
810 			write32(&ch[channel].ao_regs->dqidly[i/4], value);
811 		break;
812 	}
813 }
814 
set_tx_dly_factor(u32 channel,u32 curr_val,u8 type)815 static void set_tx_dly_factor(u32 channel, u32 curr_val, u8 type)
816 {
817 	u32 i, bit_num, value = 0;
818 
819 	bit_num = (type == TX_DQ)? DQS_BIT_NUMBER: DQS_NUMBER;
820 
821 	for (i = 0; i < bit_num; i++)
822 		value += (curr_val << (4 * i));
823 
824 	switch (type) {
825 	case TX_DQS:
826 		write32(&ch[channel].ddrphy_regs->padctl3, value);
827 		break;
828 	case TX_DQM:
829 		write32(&ch[channel].ddrphy_regs->padctl2, value);
830 		break;
831 	case TX_DQ:
832 		for (i = 0; i < DQS_NUMBER; i++)
833 			write32(&ch[channel].ddrphy_regs->dqodly[i], value);
834 		break;
835 	}
836 }
837 
set_dly_factor(u32 channel,u8 stage,u8 type,u8 dly)838 static void set_dly_factor(u32 channel, u8 stage, u8 type, u8 dly)
839 {
840 	switch (stage | type << 1) {
841 	/* set delay for DQ/DQM/DQS by setup/hold stage and window type */
842 	case STAGE_SETUP_TX_WIN:
843 		/* set DQ/DQM delay for tx window */
844 		set_tx_dly_factor(channel, dly, TX_DQ);
845 		set_tx_dly_factor(channel, dly, TX_DQM);
846 		break;
847 	case STAGE_SETUP_RX_WIN:
848 		/* set DQ delay for rx window */
849 		set_rx_dly_factor(channel, dly, RX_DQ);
850 		break;
851 	case STAGE_HOLD_TX_WIN:
852 		/* set DQS delay for tx window */
853 		set_tx_dly_factor(channel, dly, TX_DQS);
854 		break;
855 	case STAGE_HOLD_RX_WIN:
856 		/* set DQS delay for rx window */
857 		set_rx_dly_factor(channel, dly, RX_DQS);
858 		break;
859 	}
860 }
861 
set_rx_best_dly_factor(u32 channel,struct dqs_perbit_dly * dqdqs_perbit_dly,u8 * max_dqsdly_byte)862 static void set_rx_best_dly_factor(u32 channel,
863 				   struct dqs_perbit_dly *dqdqs_perbit_dly,
864 				   u8 *max_dqsdly_byte)
865 {
866 	u32 i, value = 0;
867 
868 	for (i = 0; i < DQS_NUMBER; i++)
869 		value += (((u32)max_dqsdly_byte[i]) << (8 * i));
870 
871 	write32(&ch[channel].ao_regs->r0deldly, value);
872 	write32(&ch[channel].ao_regs->r1deldly, value);
873 
874 	dramc_dbg("[RX] DQS Reg R0DELDLY=%xh\n",
875 			read32(&ch[channel].ao_regs->r0deldly));
876 	dramc_dbg("[RX] DQS Reg R1DELDLY=%xh\n",
877 			read32(&ch[channel].ao_regs->r1deldly));
878 
879 	for (i = 0; i < DATA_WIDTH_32BIT; i += 4) {
880 		/* every 4bit dq have the same delay register address */
881 		value = ((u32)dqdqs_perbit_dly[i].best_dqdly) +
882 			(((u32)dqdqs_perbit_dly[i + 1].best_dqdly) << 8) +
883 			(((u32)dqdqs_perbit_dly[i + 2].best_dqdly) << 16) +
884 			(((u32)dqdqs_perbit_dly[i + 3].best_dqdly) << 24);
885 
886 		write32(&ch[channel].ao_regs->dqidly[i / 4], value);
887 		dramc_dbg("[RX] DQ DQIDLY%d = %xh\n", (i + 4) / 4, value);
888 	}
889 }
890 
set_tx_best_dly_factor(u32 channel,struct dqs_perbit_dly * dqdqs_perbit_dly,u8 * max_dqsdly_byte,u8 * ave_dqdly_byte)891 static void set_tx_best_dly_factor(u32 channel,
892 				   struct dqs_perbit_dly *dqdqs_perbit_dly,
893 				   u8 *max_dqsdly_byte, u8 *ave_dqdly_byte)
894 {
895 	u32 bit, value, shift, dqs_index = 0;
896 
897 	value = 0;
898 	for (bit = 0; bit < DQS_NUMBER; bit++) {
899 		value += (((u32)max_dqsdly_byte[bit]) << (4 * bit));
900 	}
901 
902 	write32(&ch[channel].ddrphy_regs->padctl3, value);
903 	dramc_dbg("[TX] DQS PADCTL3 Reg = %#x\n", value);
904 
905 	/* DQ delay */
906 	for (bit = 0; bit < DATA_WIDTH_32BIT; bit++) {
907 		/* every 8 DQ reset */
908 		if (bit % DQS_BIT_NUMBER == 0) {
909 			value = 0;
910 			dqs_index = bit / DQS_BIT_NUMBER;
911 		}
912 		/* 4 bits field for each DQ */
913 		shift = 4 * (bit % DQS_BIT_NUMBER);
914 		value += (((u32)(dqdqs_perbit_dly[bit].best_dqdly)) << shift);
915 		/* each register is with 8 DQ */
916 		if ((bit + 1) % DQS_BIT_NUMBER == 0) {
917 			write32(&ch[channel].ddrphy_regs->dqodly[dqs_index], value);
918 			dramc_dbg("[TX] DQ DQ0DLY%d = %xh\n",
919 					dqs_index + 1, value);
920 		}
921 	}
922 
923 	/* DQM delay */
924 	value = read32(&ch[channel].ddrphy_regs->padctl2);
925 	value &= MASK_PADCTL2;
926 
927 	for (bit = 0; bit < DQS_NUMBER; bit++) {
928 		value += (((u32)ave_dqdly_byte[bit]) << (4 * bit));
929 	}
930 	write32(&ch[channel].ddrphy_regs->padctl2, value);
931 	dramc_dbg("[TX] DQM PADCTL2 Reg = %#x\n", value);
932 }
933 
perbit_window_cal(u32 channel,u8 type)934 void perbit_window_cal(u32 channel, u8 type)
935 {
936 	u8 i, dly, bit, max_dqs_taps, fail = 0;
937 	u8 max_dqsdly_byte[DQS_NUMBER], ave_dqdly_byte[DQS_NUMBER];
938 	u32 err_value, fail_bit, max_limit, index;
939 
940 	struct dqs_perbit_dly dqdqs_perbit_dly[DQ_DATA_WIDTH];
941 
942 	dramc_dbg("\n[Channel %d] %s DQ/DQS per bit :\n",
943 			channel, (type == TX_WIN)? "TX": "RX");
944 
945 	if (type == TX_WIN)
946 		dramc_phy_reset(channel);
947 
948 	for (i = 0; i < DATA_WIDTH_32BIT; i++) {
949 		dqdqs_perbit_dly[i].first_dqdly_pass = -1;
950 		dqdqs_perbit_dly[i].last_dqdly_pass = -2;
951 		dqdqs_perbit_dly[i].first_dqsdly_pass = -1;
952 		dqdqs_perbit_dly[i].last_dqsdly_pass = -2;
953 		dqdqs_perbit_dly[i].best_first_dqdly_pass = -1;
954 		dqdqs_perbit_dly[i].best_last_dqdly_pass = -2;
955 		dqdqs_perbit_dly[i].best_first_dqsdly_pass = -1;
956 		dqdqs_perbit_dly[i].best_last_dqsdly_pass = -2;
957 	}
958 
959 	/* 1. delay DQ,find the pass window (left boundary)
960 	 * 2. delay DQS find the pass window (right boundary)
961 	 * 3. find the best DQ / DQS to satisfy the middle value
962 	 *    of the overall pass window per bit
963 	 * 4. set DQS delay to the max per byte, delay DQ to de-skew
964 	 */
965 
966 	/* 1. set DQS delay to 0 first */
967 	set_dly_factor(channel, STAGE_HOLD, type, FIRST_DQS_DELAY);
968 
969 	dramc_dbg("----------------------------------"
970 			"--------------------\n");
971 	dramc_dbg("Start DQ delay to find pass range,"
972 			"DQS delay fixed to %#x...\n", FIRST_DQS_DELAY);
973 	dramc_dbg("----------------------------------"
974 			"-------------------\n");
975 	dramc_dbg("x-axis is bit #; y-axis is DQ delay (%d~%d)\n",
976 			FIRST_DQ_DELAY, MAX_DQDLY_TAPS - 1);
977 
978 	/* delay DQ from 0 to 15 to get the setup time */
979 	for (dly = FIRST_DQ_DELAY; dly < MAX_DQDLY_TAPS; dly++) {
980 		set_dly_factor(channel, STAGE_SETUP, type, dly);
981 		err_value = dram_k_perbit(channel);
982 
983 		/* check fail bit, 0 ok, others fail */
984 		for (bit = 0; bit < DATA_WIDTH_32BIT; bit++) {
985 			fail_bit = err_value & ((u32)1 << bit);
986 			dramk_check_dq_win(&(dqdqs_perbit_dly[bit]), dly,
987 					   MAX_DQDLY_TAPS - 1, fail_bit);
988 			if (fail_bit == 0) {
989 				dramc_dbg("o");
990 			} else {
991 				dramc_dbg("x");
992 			}
993 		}
994 		dramc_dbg("\n");
995 	}
996 
997 	/* 2. set DQ delay to 0 */
998 	set_dly_factor(channel, STAGE_SETUP, type, FIRST_DQ_DELAY);
999 
1000 	/* DQS delay taps: tx and rx are 16 and 64 taps */
1001 	max_dqs_taps = (type == TX_WIN)? MAX_TX_DQSDLY_TAPS: MAX_RX_DQSDLY_TAPS;
1002 
1003 	dramc_dbg("-----------------------------------"
1004 			"-------------------\n");
1005 	dramc_dbg("Start DQS delay to find pass range,"
1006 			"DQ delay fixed to %#x...\n", FIRST_DQ_DELAY);
1007 	dramc_dbg("------------------------------------"
1008 			"------------------\n");
1009 	dramc_dbg("x-axis is bit #; y-axis is DQS delay (%d~%d)\n",
1010 		      FIRST_DQS_DELAY + 1, max_dqs_taps - 1);
1011 
1012 	/* delay DQS to get the hold time, dq_dly = dqs_dly = 0 is counted */
1013 	/* when we delay dq, so we set first dqs delay to 1 */
1014 	for (dly = (FIRST_DQS_DELAY + 1); dly < max_dqs_taps; dly++) {
1015 		set_dly_factor(channel, STAGE_HOLD, type, dly);
1016 		err_value = dram_k_perbit(channel);
1017 
1018 		/* check fail bit, 0 ok, others fail */
1019 		for (bit = 0; bit < DATA_WIDTH_32BIT; bit++) {
1020 			fail_bit = err_value & ((u32)1 << bit);
1021 			dramk_check_dqs_win(&(dqdqs_perbit_dly[bit]), dly,
1022 					    max_dqs_taps - 1, fail_bit);
1023 			if (fail_bit == 0) {
1024 				dramc_dbg("o");
1025 			} else {
1026 				dramc_dbg("x");
1027 			}
1028 		}
1029 		dramc_dbg("\n");
1030 	}
1031 
1032 	/* 3 calculate dq and dqs time */
1033 	dramc_dbg("-------------------------------"
1034 			"-----------------------\n");
1035 	dramc_dbg("Start calculate dq time and dqs "
1036 			"time:\n");
1037 	dramc_dbg("Find max DQS delay per byte / "
1038 			"Adjust DQ delay to align DQS...\n");
1039 	dramc_dbg("--------------------------------"
1040 			"----------------------\n");
1041 
1042 	/* As per byte, check max DQS delay in 8-bit.
1043 	 * Except for the bit of max DQS delay, delay
1044 	 * DQ to fulfill setup time = hold time
1045 	 */
1046 	for (i = 0; i < DQS_NUMBER; i++) {
1047 		max_dqsdly_byte[i] = 0;
1048 		ave_dqdly_byte[i] = 0;
1049 	}
1050 
1051 	for (i = 0; i < DATA_WIDTH_32BIT; i++) {
1052 		/* we delay DQ or DQS to let DQS sample the middle */
1053 		/* of tx/rx pass window for all the 8 bits */
1054 		index = i / DQS_BIT_NUMBER;
1055 		fail |= dramk_calcu_best_dly(i, &dqdqs_perbit_dly[i],
1056 					     &max_dqsdly_byte[index]);
1057 
1058 		if ((i + 1) % DQS_BIT_NUMBER == 0)
1059 			dramc_dbg("----separate line----\n");
1060 	}
1061 
1062 	for (i = 0; i < DATA_WIDTH_32BIT; i++) {
1063 		/* dqs index for every 8-bit */
1064 		index = i / DQS_BIT_NUMBER;
1065 		/* set DQS to max for 8-bit */
1066 		if (dqdqs_perbit_dly[i].best_dqsdly < max_dqsdly_byte[index]) {
1067 			/* delay DQ to compensate extra DQS delay */
1068 			dly = max_dqsdly_byte[index] -
1069 			      dqdqs_perbit_dly[i].best_dqsdly;
1070 			dqdqs_perbit_dly[i].best_dqdly += dly;
1071 			/* max limit to 15 */
1072 			max_limit = MAX_DQDLY_TAPS - 1;
1073 			if (dqdqs_perbit_dly[i].best_dqdly > max_limit) {
1074 				dqdqs_perbit_dly[i].best_dqdly = max_limit;
1075 			}
1076 		}
1077 		/* accumulation variable for TX DQM */
1078 		ave_dqdly_byte[index] += dqdqs_perbit_dly[i].best_dqdly;
1079 		/* take the average of DQ for TX DQM */
1080 		if ((i + 1) % DQS_BIT_NUMBER == 0) {
1081 			ave_dqdly_byte[index] /= DQS_BIT_NUMBER;
1082 		}
1083 	}
1084 
1085 	if (fail == 1) /* error handling */
1086 		die("fail on %s()\n", __func__);
1087 
1088 	dramc_dbg("==================================================\n");
1089 	dramc_dbg("        dramc_perbit_window_swcal:\n");
1090 	dramc_dbg("           channel=%d(0:cha, 1:chb)\n", channel);
1091 	dramc_dbg("           bus width=%d\n", DATA_WIDTH_32BIT);
1092 	dramc_dbg("==================================================\n");
1093 	dramc_dbg("DQS Delay :\n DQS0 = %d DQS1 = %d DQS2 = %d DQS3 = %d\n",
1094 		       max_dqsdly_byte[0], max_dqsdly_byte[1],
1095 		       max_dqsdly_byte[2], max_dqsdly_byte[3]);
1096 
1097 	if (type == TX_WIN)
1098 		dramc_dbg("DQM Delay :\n"
1099 			      "DQM0 = %d DQM1 = %d DQM2 = %d DQM3 = %d\n",
1100 			       ave_dqdly_byte[0], ave_dqdly_byte[1],
1101 			       ave_dqdly_byte[2], ave_dqdly_byte[3]);
1102 
1103 	dramc_dbg("DQ Delay :\n");
1104 	for (i = 0; i < DATA_WIDTH_32BIT; i++) {
1105 		dramc_dbg("DQ%d = %d ", i, dqdqs_perbit_dly[i].best_dqdly);
1106 		if (((i + 1) % 4) == 0)
1107 			dramc_dbg("\n");
1108 	}
1109 
1110 	dramc_dbg("____________________________________"
1111 		      "____________________________________\n");
1112 
1113 	if (type == TX_WIN) {
1114 		/* Add CLK to DQS/DQ skew after write leveling */
1115 		dramc_dbg("Add CLK to DQS/DQ skew based on write leveling.\n");
1116 		/* this subroutine add clk delay to DQS/DQ after WL */
1117 		tx_delay_for_wrleveling(channel, dqdqs_perbit_dly,
1118 					max_dqsdly_byte, ave_dqdly_byte);
1119 	}
1120 
1121 	if (type == TX_WIN)
1122 		set_tx_best_dly_factor(channel, dqdqs_perbit_dly,
1123 				       max_dqsdly_byte, ave_dqdly_byte);
1124 	else
1125 		set_rx_best_dly_factor(channel, dqdqs_perbit_dly,
1126 				       max_dqsdly_byte);
1127 
1128 	dramc_phy_reset(channel);
1129 }
1130