1 /***********************license start***********************************
2 * Copyright (c) 2003-2017 Cavium Inc. ([email protected]). All rights
3 * reserved.
4 *
5 *
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions are
8 * met:
9 *
10 * * Redistributions of source code must retain the above copyright
11 * notice, this list of conditions and the following disclaimer.
12 *
13 * * Redistributions in binary form must reproduce the above
14 * copyright notice, this list of conditions and the following
15 * disclaimer in the documentation and/or other materials provided
16 * with the distribution.
17 *
18 * * Neither the name of Cavium Inc. nor the names of
19 * its contributors may be used to endorse or promote products
20 * derived from this software without specific prior written
21 * permission.
22 *
23 * This Software, including technical data, may be subject to U.S. export
24 * control laws, including the U.S. Export Administration Act and its
25 * associated regulations, and may be subject to export or import
26 * regulations in other countries.
27 *
28 * TO THE MAXIMUM EXTENT PERMITTED BY LAW, THE SOFTWARE IS PROVIDED "AS IS"
29 * AND WITH ALL FAULTS AND CAVIUM INC. MAKES NO PROMISES, REPRESENTATIONS OR
30 * WARRANTIES, EITHER EXPRESS, IMPLIED, STATUTORY, OR OTHERWISE, WITH RESPECT
31 * TO THE SOFTWARE, INCLUDING ITS CONDITION, ITS CONFORMITY TO ANY
32 * REPRESENTATION OR DESCRIPTION, OR THE EXISTENCE OF ANY LATENT OR PATENT
33 * DEFECTS, AND CAVIUM SPECIFICALLY DISCLAIMS ALL IMPLIED (IF ANY) WARRANTIES
34 * OF TITLE, MERCHANTABILITY, NONINFRINGEMENT, FITNESS FOR A PARTICULAR
35 * PURPOSE, LACK OF VIRUSES, ACCURACY OR COMPLETENESS, QUIET ENJOYMENT,
36 * QUIET POSSESSION OR CORRESPONDENCE TO DESCRIPTION. THE ENTIRE RISK
37 * ARISING OUT OF USE OR PERFORMANCE OF THE SOFTWARE LIES WITH YOU.
38 ***********************license end**************************************/
39 #include <bdk.h>
40 #include <bdk-coreboot.h>
41 #include "libbdk-arch/bdk-csrs-l2c_tad.h"
42 #include "libbdk-arch/bdk-csrs-mio_fus.h"
43 #include "dram-internal.h"
44
45 #include <stdlib.h>
46 #include <stdio.h>
47 #include <string.h>
48 #include <libbdk-hal/bdk-config.h>
49 #include <libbdk-hal/bdk-l2c.h>
50 #include <libbdk-hal/bdk-rng.h>
51 #include <libbdk-trust/bdk-trust.h>
52 #include <lame_string.h>
53
54 #define WODT_MASK_2R_1S 1 // FIXME: did not seem to make much difference with #152 1-slot?
55
56 #define DESKEW_RODT_CTL 1
57
58 // Set to 1 to use the feature whenever possible automatically.
59 // When 0, however, the feature is still available, and it can
60 // be enabled via envvar override "ddr_enable_write_deskew=1".
61 #define ENABLE_WRITE_DESKEW_DEFAULT 0
62
63 #define ENABLE_COMPUTED_VREF_ADJUSTMENT 1
64
65 #define RLEXTRAS_PATCH 1 // write to unused RL rank entries
66 #define WLEXTRAS_PATCH 1 // write to unused WL rank entries
67 #define ADD_48_OHM_SKIP 1
68 #define NOSKIP_40_48_OHM 1
69 #define NOSKIP_48_STACKED 1
70 #define NOSKIP_FOR_MINI 1
71 #define NOSKIP_FOR_2S_1R 1
72 #define MAJORITY_OVER_AVG 1
73 #define RANK_MAJORITY MAJORITY_OVER_AVG && 1
74 #define SW_WL_CHECK_PATCH 1 // check validity after SW adjust
75 #define HW_WL_MAJORITY 1
76 #define SWL_TRY_HWL_ALT HW_WL_MAJORITY && 1 // try HW WL base alternate if available when SW WL fails
77 #define DISABLE_SW_WL_PASS_2 1
78
79 #define HWL_BY_BYTE 0 // FIXME? set to 1 to do HWL a byte at a time (seemed to work better earlier?)
80
81 #define USE_ORIG_TEST_DRAM_BYTE 1
82
83 // collect and print LMC utilization using SWL software algorithm
84 #define ENABLE_SW_WLEVEL_UTILIZATION 0
85
86 #define COUNT_RL_CANDIDATES 1
87
88 #define LOOK_FOR_STUCK_BYTE 0
89 #define ENABLE_STUCK_BYTE_RESET 0
90
91 #define FAILSAFE_CHECK 1
92
93 #define PERFECT_BITMASK_COUNTING 1
94
95 #define DAC_OVERRIDE_EARLY 1
96
97 #define SWL_WITH_HW_ALTS_CHOOSE_SW 0 // FIXME: allow override?
98
99 #define DEBUG_VALIDATE_BITMASK 0
100 #if DEBUG_VALIDATE_BITMASK
101 #define debug_bitmask_print ddr_print
102 #else
103 #define debug_bitmask_print(...)
104 #endif
105
106 #define ENABLE_SLOT_CTL_ACCESS 0
107 #undef ENABLE_CUSTOM_RLEVEL_TABLE
108
109 #define ENABLE_DISPLAY_MPR_PAGE 0
110 #if ENABLE_DISPLAY_MPR_PAGE
111 static void Display_MPR_Page_Location(bdk_node_t node, int rank,
112 int ddr_interface_num, int dimm_count,
113 int page, int location, uint64_t *mpr_data);
114 #endif
115
116 #define USE_L2_WAYS_LIMIT 1
117
118 /* Read out Deskew Settings for DDR */
119
120 typedef struct {
121 uint16_t bits[8];
122 } deskew_bytes_t;
123 typedef struct {
124 deskew_bytes_t bytes[9];
125 } deskew_data_t;
126
127 static void
Get_Deskew_Settings(bdk_node_t node,int ddr_interface_num,deskew_data_t * dskdat)128 Get_Deskew_Settings(bdk_node_t node, int ddr_interface_num, deskew_data_t *dskdat)
129 {
130 bdk_lmcx_phy_ctl_t phy_ctl;
131 bdk_lmcx_config_t lmc_config;
132 int bit_num, bit_index;
133 int byte_lane, byte_limit;
134 // NOTE: these are for pass 2.x
135 int is_t88p2 = !CAVIUM_IS_MODEL(CAVIUM_CN88XX_PASS1_X); // added 81xx and 83xx
136 int bit_end = (is_t88p2) ? 9 : 8;
137
138 lmc_config.u = BDK_CSR_READ(node, BDK_LMCX_CONFIG(ddr_interface_num));
139 byte_limit = ((lmc_config.s.mode32b) ? 4 : 8) + lmc_config.s.ecc_ena;
140
141 memset(dskdat, 0, sizeof(*dskdat));
142
143 BDK_CSR_MODIFY(_phy_ctl, node, BDK_LMCX_PHY_CTL(ddr_interface_num),
144 _phy_ctl.s.dsk_dbg_clk_scaler = 3);
145
146 for (byte_lane = 0; byte_lane < byte_limit; byte_lane++) {
147 bit_index = 0;
148 for (bit_num = 0; bit_num <= bit_end; ++bit_num) { // NOTE: this is for pass 2.x
149
150 if (bit_num == 4) continue;
151 if ((bit_num == 5) && is_t88p2) continue; // NOTE: this is for pass 2.x
152
153 // set byte lane and bit to read
154 BDK_CSR_MODIFY(_phy_ctl, node, BDK_LMCX_PHY_CTL(ddr_interface_num),
155 (_phy_ctl.s.dsk_dbg_bit_sel = bit_num,
156 _phy_ctl.s.dsk_dbg_byte_sel = byte_lane));
157
158 // start read sequence
159 BDK_CSR_MODIFY(_phy_ctl, node, BDK_LMCX_PHY_CTL(ddr_interface_num),
160 _phy_ctl.s.dsk_dbg_rd_start = 1);
161
162 // poll for read sequence to complete
163 do {
164 phy_ctl.u = BDK_CSR_READ(node, BDK_LMCX_PHY_CTL(ddr_interface_num));
165 } while (phy_ctl.s.dsk_dbg_rd_complete != 1);
166
167 // record the data
168 dskdat->bytes[byte_lane].bits[bit_index] = phy_ctl.s.dsk_dbg_rd_data & 0x3ff;
169 bit_index++;
170
171 } /* for (bit_num = 0; bit_num <= bit_end; ++bit_num) */
172 } /* for (byte_lane = 0; byte_lane < byte_limit; byte_lane++) */
173
174 return;
175 }
176
177 static void
Display_Deskew_Data(bdk_node_t node,int ddr_interface_num,deskew_data_t * dskdat,int print_enable)178 Display_Deskew_Data(bdk_node_t node, int ddr_interface_num,
179 deskew_data_t *dskdat, int print_enable)
180 {
181 int byte_lane;
182 int bit_num;
183 uint16_t flags, deskew;
184 bdk_lmcx_config_t lmc_config;
185 int byte_limit;
186 const char *fc = " ?-=+*#&";
187
188 lmc_config.u = BDK_CSR_READ(node, BDK_LMCX_CONFIG(ddr_interface_num));
189 byte_limit = ((lmc_config.s.mode32b) ? 4 : 8) + lmc_config.s.ecc_ena;
190
191 if (print_enable) {
192 VB_PRT(print_enable, "N%d.LMC%d: Deskew Data: Bit => :",
193 node, ddr_interface_num);
194 for (bit_num = 7; bit_num >= 0; --bit_num)
195 VB_PRT(print_enable, " %3d ", bit_num);
196 VB_PRT(print_enable, "\n");
197 }
198
199 for (byte_lane = 0; byte_lane < byte_limit; byte_lane++) {
200 if (print_enable)
201 VB_PRT(print_enable, "N%d.LMC%d: Bit Deskew Byte %d %s :",
202 node, ddr_interface_num, byte_lane,
203 (print_enable >= VBL_TME) ? "FINAL" : " ");
204
205 for (bit_num = 7; bit_num >= 0; --bit_num) {
206
207 flags = dskdat->bytes[byte_lane].bits[bit_num] & 7;
208 deskew = dskdat->bytes[byte_lane].bits[bit_num] >> 3;
209
210 if (print_enable)
211 VB_PRT(print_enable, " %3d %c", deskew, fc[flags^1]);
212
213 } /* for (bit_num = 7; bit_num >= 0; --bit_num) */
214
215 if (print_enable)
216 VB_PRT(print_enable, "\n");
217
218 } /* for (byte_lane = 0; byte_lane < byte_limit; byte_lane++) */
219
220 return;
221 }
222
223 static int
change_wr_deskew_ena(bdk_node_t node,int ddr_interface_num,int new_state)224 change_wr_deskew_ena(bdk_node_t node, int ddr_interface_num, int new_state)
225 {
226 bdk_lmcx_dll_ctl3_t ddr_dll_ctl3;
227 int saved_wr_deskew_ena;
228
229 // return original WR_DESKEW_ENA setting
230 ddr_dll_ctl3.u = BDK_CSR_READ(node, BDK_LMCX_DLL_CTL3(ddr_interface_num));
231 saved_wr_deskew_ena = !!GET_DDR_DLL_CTL3(wr_deskew_ena);
232 if (saved_wr_deskew_ena != !!new_state) { // write it only when changing it
233 SET_DDR_DLL_CTL3(wr_deskew_ena, !!new_state);
234 DRAM_CSR_WRITE(node, BDK_LMCX_DLL_CTL3(ddr_interface_num), ddr_dll_ctl3.u);
235 }
236 return saved_wr_deskew_ena;
237 }
238
239 typedef struct {
240 int saturated; // number saturated
241 int unlocked; // number unlocked
242 int nibrng_errs; // nibble range errors
243 int nibunl_errs; // nibble unlocked errors
244 //int nibsat_errs; // nibble saturation errors
245 int bitval_errs; // bit value errors
246 #if LOOK_FOR_STUCK_BYTE
247 int bytes_stuck; // byte(s) stuck
248 #endif
249 } deskew_counts_t;
250
251 #define MIN_BITVAL 17
252 #define MAX_BITVAL 110
253
254 static deskew_counts_t deskew_training_results;
255 static int deskew_validation_delay = 10000; // FIXME: make this a var for overriding
256
257 static void
Validate_Read_Deskew_Training(bdk_node_t node,int rank_mask,int ddr_interface_num,deskew_counts_t * counts,int print_enable)258 Validate_Read_Deskew_Training(bdk_node_t node, int rank_mask, int ddr_interface_num,
259 deskew_counts_t *counts, int print_enable)
260 {
261 int byte_lane, bit_num, nib_num;
262 int nibrng_errs, nibunl_errs, bitval_errs;
263 //int nibsat_errs;
264 bdk_lmcx_config_t lmc_config;
265 int16_t nib_min[2], nib_max[2], nib_unl[2]/*, nib_sat[2]*/;
266 // NOTE: these are for pass 2.x
267 int is_t88p2 = !CAVIUM_IS_MODEL(CAVIUM_CN88XX_PASS1_X); // added 81xx and 83xx
268 int bit_start = (is_t88p2) ? 9 : 8;
269 int byte_limit;
270 #if LOOK_FOR_STUCK_BYTE
271 uint64_t bl_mask[2]; // enough for 128 values
272 int bit_values;
273 #endif
274 deskew_data_t dskdat;
275 int bit_index;
276 int16_t flags, deskew;
277 const char *fc = " ?-=+*#&";
278 int saved_wr_deskew_ena;
279 int bit_last;
280
281 // save original WR_DESKEW_ENA setting, and disable it for read deskew
282 saved_wr_deskew_ena = change_wr_deskew_ena(node, ddr_interface_num, 0);
283
284 lmc_config.u = BDK_CSR_READ(node, BDK_LMCX_CONFIG(ddr_interface_num));
285 byte_limit = ((!lmc_config.s.mode32b) ? 8 : 4) + lmc_config.s.ecc_ena;
286
287 memset(counts, 0, sizeof(deskew_counts_t));
288
289 Get_Deskew_Settings(node, ddr_interface_num, &dskdat);
290
291 if (print_enable) {
292 VB_PRT(print_enable, "N%d.LMC%d: Deskew Settings: Bit => :",
293 node, ddr_interface_num);
294 for (bit_num = 7; bit_num >= 0; --bit_num)
295 VB_PRT(print_enable, " %3d ", bit_num);
296 VB_PRT(print_enable, "\n");
297 }
298
299 for (byte_lane = 0; byte_lane < byte_limit; byte_lane++) {
300 if (print_enable)
301 VB_PRT(print_enable, "N%d.LMC%d: Bit Deskew Byte %d %s :",
302 node, ddr_interface_num, byte_lane,
303 (print_enable >= VBL_TME) ? "FINAL" : " ");
304
305 nib_min[0] = 127; nib_min[1] = 127;
306 nib_max[0] = 0; nib_max[1] = 0;
307 nib_unl[0] = 0; nib_unl[1] = 0;
308 //nib_sat[0] = 0; nib_sat[1] = 0;
309
310 #if LOOK_FOR_STUCK_BYTE
311 bl_mask[0] = bl_mask[1] = 0;
312 #endif
313
314 if ((lmc_config.s.mode32b == 1) && (byte_lane == 4)) {
315 bit_index = 3;
316 bit_last = 3;
317 if (print_enable)
318 VB_PRT(print_enable, " ");
319 } else {
320 bit_index = 7;
321 bit_last = bit_start;
322 }
323
324 for (bit_num = bit_last; bit_num >= 0; --bit_num) { // NOTE: this is for pass 2.x
325 if (bit_num == 4) continue;
326 if ((bit_num == 5) && is_t88p2) continue; // NOTE: this is for pass 2.x
327
328 nib_num = (bit_num > 4) ? 1 : 0;
329
330 flags = dskdat.bytes[byte_lane].bits[bit_index] & 7;
331 deskew = dskdat.bytes[byte_lane].bits[bit_index] >> 3;
332 bit_index--;
333
334 counts->saturated += !!(flags & 6);
335 counts->unlocked += !(flags & 1);
336
337 nib_unl[nib_num] += !(flags & 1);
338 //nib_sat[nib_num] += !!(flags & 6);
339
340 if (flags & 1) { // FIXME? only do range when locked
341 nib_min[nib_num] = min(nib_min[nib_num], deskew);
342 nib_max[nib_num] = max(nib_max[nib_num], deskew);
343 }
344
345 #if LOOK_FOR_STUCK_BYTE
346 bl_mask[(deskew >> 6) & 1] |= 1UL << (deskew & 0x3f);
347 #endif
348
349 if (print_enable)
350 VB_PRT(print_enable, " %3d %c", deskew, fc[flags^1]);
351
352 } /* for (bit_num = bit_last; bit_num >= 0; --bit_num) */
353
354 /*
355 Now look for nibble errors:
356
357 For bit 55, it looks like a bit deskew problem. When the upper nibble of byte 6
358 needs to go to saturation, bit 7 of byte 6 locks prematurely at 64.
359 For DIMMs with raw card A and B, can we reset the deskew training when we encounter this case?
360 The reset criteria should be looking at one nibble at a time for raw card A and B;
361 if the bit-deskew setting within a nibble is different by > 33, we'll issue a reset
362 to the bit deskew training.
363
364 LMC0 Bit Deskew Byte(6): 64 0 - 0 - 0 - 26 61 35 64
365 */
366 // upper nibble range, then lower nibble range
367 nibrng_errs = ((nib_max[1] - nib_min[1]) > 33) ? 1 : 0;
368 nibrng_errs |= ((nib_max[0] - nib_min[0]) > 33) ? 1 : 0;
369
370 // check for nibble all unlocked
371 nibunl_errs = ((nib_unl[0] == 4) || (nib_unl[1] == 4)) ? 1 : 0;
372
373 // check for nibble all saturated
374 //nibsat_errs = ((nib_sat[0] == 4) || (nib_sat[1] == 4)) ? 1 : 0;
375
376 // check for bit value errors, ie < 17 or > 110
377 // FIXME? assume max always > MIN_BITVAL and min < MAX_BITVAL
378 bitval_errs = ((nib_max[1] > MAX_BITVAL) || (nib_max[0] > MAX_BITVAL)) ? 1 : 0;
379 bitval_errs |= ((nib_min[1] < MIN_BITVAL) || (nib_min[0] < MIN_BITVAL)) ? 1 : 0;
380
381 if (((nibrng_errs != 0) || (nibunl_errs != 0) /*|| (nibsat_errs != 0)*/ || (bitval_errs != 0))
382 && print_enable)
383 {
384 VB_PRT(print_enable, " %c%c%c%c",
385 (nibrng_errs)?'R':' ',
386 (nibunl_errs)?'U':' ',
387 (bitval_errs)?'V':' ',
388 /*(nibsat_errs)?'S':*/' ');
389 }
390
391 #if LOOK_FOR_STUCK_BYTE
392 bit_values = __builtin_popcountl(bl_mask[0]) + __builtin_popcountl(bl_mask[1]);
393 if (bit_values < 3) {
394 counts->bytes_stuck |= (1 << byte_lane);
395 if (print_enable)
396 VB_PRT(print_enable, "X");
397 }
398 #endif
399 if (print_enable)
400 VB_PRT(print_enable, "\n");
401
402 counts->nibrng_errs |= (nibrng_errs << byte_lane);
403 counts->nibunl_errs |= (nibunl_errs << byte_lane);
404 //counts->nibsat_errs |= (nibsat_errs << byte_lane);
405 counts->bitval_errs |= (bitval_errs << byte_lane);
406
407 #if LOOK_FOR_STUCK_BYTE
408 // just for completeness, allow print of the stuck values bitmask after the bytelane print
409 if ((bit_values < 3) && print_enable) {
410 VB_PRT(VBL_DEV, "N%d.LMC%d: Deskew byte %d STUCK on value 0x%016lx.%016lx\n",
411 node, ddr_interface_num, byte_lane,
412 bl_mask[1], bl_mask[0]);
413 }
414 #endif
415
416 } /* for (byte_lane = 0; byte_lane < byte_limit; byte_lane++) */
417
418 // restore original WR_DESKEW_ENA setting
419 change_wr_deskew_ena(node, ddr_interface_num, saved_wr_deskew_ena);
420
421 return;
422 }
423
load_dac_override(int node,int ddr_interface_num,int dac_value,int byte)424 unsigned short load_dac_override(int node, int ddr_interface_num,
425 int dac_value, int byte)
426 {
427 bdk_lmcx_dll_ctl3_t ddr_dll_ctl3;
428 int bytex = (byte == 0x0A) ? byte : byte + 1; // single bytelanes incr by 1; A is for ALL
429
430 ddr_dll_ctl3.u = BDK_CSR_READ(node, BDK_LMCX_DLL_CTL3(ddr_interface_num));
431
432 SET_DDR_DLL_CTL3(byte_sel, bytex);
433 SET_DDR_DLL_CTL3(offset, dac_value >> 1); // only 7-bit field, use MS bits
434
435 ddr_dll_ctl3.s.bit_select = 0x9; /* No-op */
436 DRAM_CSR_WRITE(node, BDK_LMCX_DLL_CTL3(ddr_interface_num), ddr_dll_ctl3.u);
437
438 ddr_dll_ctl3.s.bit_select = 0xC; /* Vref bypass setting load */
439 DRAM_CSR_WRITE(node, BDK_LMCX_DLL_CTL3(ddr_interface_num), ddr_dll_ctl3.u);
440
441 ddr_dll_ctl3.s.bit_select = 0xD; /* Vref bypass on. */
442 DRAM_CSR_WRITE(node, BDK_LMCX_DLL_CTL3(ddr_interface_num), ddr_dll_ctl3.u);
443
444 ddr_dll_ctl3.s.bit_select = 0x9; /* No-op */
445 DRAM_CSR_WRITE(node, BDK_LMCX_DLL_CTL3(ddr_interface_num), ddr_dll_ctl3.u);
446
447 return ((unsigned short) GET_DDR_DLL_CTL3(offset));
448 }
449
450 // arg dac_or_dbi is 1 for DAC, 0 for DBI
451 // returns 9 entries (bytelanes 0 through 8) in settings[]
452 // returns 0 if OK, -1 if a problem
read_DAC_DBI_settings(int node,int ddr_interface_num,int dac_or_dbi,int * settings)453 int read_DAC_DBI_settings(int node, int ddr_interface_num,
454 int dac_or_dbi, int *settings)
455 {
456 bdk_lmcx_phy_ctl_t phy_ctl;
457 int byte_lane, bit_num;
458 int deskew;
459 int dac_value;
460 int is_t88p2 = !CAVIUM_IS_MODEL(CAVIUM_CN88XX_PASS1_X); // added 81xx and 83xx
461
462 phy_ctl.u = BDK_CSR_READ(node, BDK_LMCX_PHY_CTL(ddr_interface_num));
463 phy_ctl.s.dsk_dbg_clk_scaler = 3;
464 DRAM_CSR_WRITE(node, BDK_LMCX_PHY_CTL(ddr_interface_num), phy_ctl.u);
465
466 bit_num = (dac_or_dbi) ? 4 : 5;
467 if ((bit_num == 5) && !is_t88p2) { // NOTE: this is for pass 1.x
468 return -1;
469 }
470
471 for (byte_lane = 8; byte_lane >= 0 ; --byte_lane) { // FIXME: always assume ECC is available
472
473 //set byte lane and bit to read
474 phy_ctl.s.dsk_dbg_bit_sel = bit_num;
475 phy_ctl.s.dsk_dbg_byte_sel = byte_lane;
476 DRAM_CSR_WRITE(node, BDK_LMCX_PHY_CTL(ddr_interface_num), phy_ctl.u);
477
478 //start read sequence
479 phy_ctl.u = BDK_CSR_READ(node, BDK_LMCX_PHY_CTL(ddr_interface_num));
480 phy_ctl.s.dsk_dbg_rd_start = 1;
481 DRAM_CSR_WRITE(node, BDK_LMCX_PHY_CTL(ddr_interface_num), phy_ctl.u);
482
483 //poll for read sequence to complete
484 do {
485 phy_ctl.u = BDK_CSR_READ(node, BDK_LMCX_PHY_CTL(ddr_interface_num));
486 } while (phy_ctl.s.dsk_dbg_rd_complete != 1);
487
488 deskew = phy_ctl.s.dsk_dbg_rd_data /*>> 3*/; // leave the flag bits for DBI
489 dac_value = phy_ctl.s.dsk_dbg_rd_data & 0xff;
490
491 settings[byte_lane] = (dac_or_dbi) ? dac_value : deskew;
492
493 } /* for (byte_lane = 8; byte_lane >= 0 ; --byte_lane) { */
494
495 return 0;
496 }
497
498 // print out the DBI settings array
499 // arg dac_or_dbi is 1 for DAC, 0 for DBI
500 void
display_DAC_DBI_settings(int node,int lmc,int dac_or_dbi,int ecc_ena,int * settings,const char * title)501 display_DAC_DBI_settings(int node, int lmc, int dac_or_dbi,
502 int ecc_ena, int *settings, const char *title)
503 {
504 int byte;
505 int flags;
506 int deskew;
507 const char *fc = " ?-=+*#&";
508
509 ddr_print("N%d.LMC%d: %s %s Deskew Settings %d:0 :",
510 node, lmc, title, (dac_or_dbi)?"DAC":"DBI", 7+ecc_ena);
511 for (byte = (7+ecc_ena); byte >= 0; --byte) { // FIXME: what about 32-bit mode?
512 if (dac_or_dbi) { // DAC
513 flags = 1; // say its locked to get blank
514 deskew = settings[byte] & 0xff;
515 } else { // DBI
516 flags = settings[byte] & 7;
517 deskew = (settings[byte] >> 3) & 0x7f;
518 }
519 ddr_print(" %3d %c", deskew, fc[flags^1]);
520 }
521 ddr_print("\n");
522 }
523
524 // Evaluate the DAC settings array
525 static int
evaluate_DAC_settings(int ddr_interface_64b,int ecc_ena,int * settings)526 evaluate_DAC_settings(int ddr_interface_64b, int ecc_ena, int *settings)
527 {
528 int byte, dac;
529 int last = (ddr_interface_64b) ? 7 : 3;
530
531 // this looks only for DAC values that are EVEN
532 for (byte = (last+ecc_ena); byte >= 0; --byte) {
533 dac = settings[byte] & 0xff;
534 if ((dac & 1) == 0)
535 return 1;
536 }
537 return 0;
538 }
539
540 static void
Perform_Offset_Training(bdk_node_t node,int rank_mask,int ddr_interface_num)541 Perform_Offset_Training(bdk_node_t node, int rank_mask, int ddr_interface_num)
542 {
543 bdk_lmcx_phy_ctl_t lmc_phy_ctl;
544 uint64_t orig_phy_ctl;
545 const char *s;
546
547 /*
548 * 6.9.8 LMC Offset Training
549 *
550 * LMC requires input-receiver offset training.
551 *
552 * 1. Write LMC(0)_PHY_CTL[DAC_ON] = 1
553 */
554 lmc_phy_ctl.u = BDK_CSR_READ(node, BDK_LMCX_PHY_CTL(ddr_interface_num));
555 orig_phy_ctl = lmc_phy_ctl.u;
556 lmc_phy_ctl.s.dac_on = 1;
557
558 // allow full CSR override
559 if ((s = lookup_env_parameter_ull("ddr_phy_ctl")) != NULL) {
560 lmc_phy_ctl.u = strtoull(s, NULL, 0);
561 }
562
563 // do not print or write if CSR does not change...
564 if (lmc_phy_ctl.u != orig_phy_ctl) {
565 ddr_print("PHY_CTL : 0x%016llx\n", lmc_phy_ctl.u);
566 DRAM_CSR_WRITE(node, BDK_LMCX_PHY_CTL(ddr_interface_num), lmc_phy_ctl.u);
567 }
568
569 #if 0
570 // FIXME? do we really need to show RODT here?
571 bdk_lmcx_comp_ctl2_t lmc_comp_ctl2;
572 lmc_comp_ctl2.u = BDK_CSR_READ(node, BDK_LMCX_COMP_CTL2(ddr_interface_num));
573 ddr_print("Read ODT_CTL : 0x%x (%d ohms)\n",
574 lmc_comp_ctl2.s.rodt_ctl, imp_values->rodt_ohms[lmc_comp_ctl2.s.rodt_ctl]);
575 #endif
576
577 /*
578 * 2. Write LMC(0)_SEQ_CTL[SEQ_SEL] = 0x0B and
579 * LMC(0)_SEQ_CTL[INIT_START] = 1.
580 *
581 * 3. Wait for LMC(0)_SEQ_CTL[SEQ_COMPLETE] to be set to 1.
582 */
583 perform_octeon3_ddr3_sequence(node, rank_mask, ddr_interface_num, 0x0B); /* Offset training sequence */
584
585 }
586
587 static void
Perform_Internal_VREF_Training(bdk_node_t node,int rank_mask,int ddr_interface_num)588 Perform_Internal_VREF_Training(bdk_node_t node, int rank_mask, int ddr_interface_num)
589 {
590 bdk_lmcx_ext_config_t ext_config;
591
592 /*
593 * 6.9.9 LMC Internal Vref Training
594 *
595 * LMC requires input-reference-voltage training.
596 *
597 * 1. Write LMC(0)_EXT_CONFIG[VREFINT_SEQ_DESKEW] = 0.
598 */
599 ext_config.u = BDK_CSR_READ(node, BDK_LMCX_EXT_CONFIG(ddr_interface_num));
600 ext_config.s.vrefint_seq_deskew = 0;
601
602 VB_PRT(VBL_SEQ, "N%d.LMC%d: Performing LMC sequence: vrefint_seq_deskew = %d\n",
603 node, ddr_interface_num, ext_config.s.vrefint_seq_deskew);
604
605 DRAM_CSR_WRITE(node, BDK_LMCX_EXT_CONFIG(ddr_interface_num), ext_config.u);
606
607 /*
608 * 2. Write LMC(0)_SEQ_CTL[SEQ_SEL] = 0x0a and
609 * LMC(0)_SEQ_CTL[INIT_START] = 1.
610 *
611 * 3. Wait for LMC(0)_SEQ_CTL[SEQ_COMPLETE] to be set to 1.
612 */
613 perform_octeon3_ddr3_sequence(node, rank_mask, ddr_interface_num, 0x0A); /* LMC Internal Vref Training */
614 }
615
616 #define dbg_avg(format, ...) VB_PRT(VBL_DEV, format, ##__VA_ARGS__)
617 static int
process_samples_average(int16_t * bytes,int num_samples,int lmc,int lane_no)618 process_samples_average(int16_t *bytes, int num_samples, int lmc, int lane_no)
619 {
620 int i, savg, sadj, sum = 0, rng, ret, asum, trunc;
621 int16_t smin = 32767, smax = -32768;
622
623 dbg_avg("DBG_AVG%d.%d: ", lmc, lane_no);
624
625 for (i = 0; i < num_samples; i++) {
626 sum += bytes[i];
627 if (bytes[i] < smin) smin = bytes[i];
628 if (bytes[i] > smax) smax = bytes[i];
629 dbg_avg(" %3d", bytes[i]);
630 }
631 rng = smax - smin + 1;
632
633 dbg_avg(" (%3d, %3d, %2d)", smin, smax, rng);
634
635 asum = sum - smin - smax;
636
637 savg = divide_nint(sum * 10, num_samples);
638
639 sadj = divide_nint(asum * 10, (num_samples - 2));
640
641 trunc = asum / (num_samples - 2);
642
643 dbg_avg(" [%3d.%d, %3d.%d, %3d]", savg/10, savg%10, sadj/10, sadj%10, trunc);
644
645 sadj = divide_nint(sadj, 10);
646 if (trunc & 1)
647 ret = trunc;
648 else if (sadj & 1)
649 ret = sadj;
650 else
651 ret = trunc + 1;
652
653 dbg_avg(" -> %3d\n", ret);
654
655 return ret;
656 }
657
658
659 #define DEFAULT_SAT_RETRY_LIMIT 11 // 1 + 10 retries
660 static int default_lock_retry_limit = 20; // 20 retries // FIXME: make a var for overriding
661
662 static int
Perform_Read_Deskew_Training(bdk_node_t node,int rank_mask,int ddr_interface_num,int spd_rawcard_AorB,int print_flags,int ddr_interface_64b)663 Perform_Read_Deskew_Training(bdk_node_t node, int rank_mask, int ddr_interface_num,
664 int spd_rawcard_AorB, int print_flags, int ddr_interface_64b)
665 {
666 int unsaturated, locked;
667 //int nibble_sat;
668 int sat_retries, lock_retries, lock_retries_total, lock_retries_limit;
669 int print_first;
670 int print_them_all;
671 deskew_counts_t dsk_counts;
672 uint64_t saved_wr_deskew_ena;
673 #if DESKEW_RODT_CTL
674 bdk_lmcx_comp_ctl2_t comp_ctl2;
675 int save_deskew_rodt_ctl = -1;
676 #endif
677 int is_t88p2 = !CAVIUM_IS_MODEL(CAVIUM_CN88XX_PASS1_X); // added 81xx and 83xx
678
679 VB_PRT(VBL_FAE, "N%d.LMC%d: Performing Read Deskew Training.\n", node, ddr_interface_num);
680
681 // save original WR_DESKEW_ENA setting, and disable it for read deskew
682 saved_wr_deskew_ena = change_wr_deskew_ena(node, ddr_interface_num, 0);
683
684 sat_retries = 0;
685 lock_retries_total = 0;
686 unsaturated = 0;
687 print_first = VBL_FAE; // print the first one, FAE and above
688 print_them_all = dram_is_verbose(VBL_DEV4); // set to true for printing all normal deskew attempts
689
690 int loops, normal_loops = 1; // default to 1 NORMAL deskew training op...
691 const char *s;
692 if ((s = getenv("ddr_deskew_normal_loops")) != NULL) {
693 normal_loops = strtoul(s, NULL, 0);
694 }
695
696 #if LOOK_FOR_STUCK_BYTE
697 // provide override for STUCK BYTE RESETS
698 int do_stuck_reset = ENABLE_STUCK_BYTE_RESET;
699 if ((s = getenv("ddr_enable_stuck_byte_reset")) != NULL) {
700 do_stuck_reset = !!strtoul(s, NULL, 0);
701 }
702 #endif
703
704 #if DESKEW_RODT_CTL
705 if ((s = getenv("ddr_deskew_rodt_ctl")) != NULL) {
706 int deskew_rodt_ctl = strtoul(s, NULL, 0);
707 comp_ctl2.u = BDK_CSR_READ(node, BDK_LMCX_COMP_CTL2(ddr_interface_num));
708 save_deskew_rodt_ctl = comp_ctl2.s.rodt_ctl;
709 comp_ctl2.s.rodt_ctl = deskew_rodt_ctl;
710 DRAM_CSR_WRITE(node, BDK_LMCX_COMP_CTL2(ddr_interface_num), comp_ctl2.u);
711 }
712 #endif
713
714 lock_retries_limit = default_lock_retry_limit;
715 if (! CAVIUM_IS_MODEL(CAVIUM_CN88XX_PASS1_X)) // added 81xx and 83xx
716 lock_retries_limit *= 2; // give pass 2.0 twice as many
717
718 do { /* while (sat_retries < sat_retry_limit) */
719
720 /*
721 * 6.9.10 LMC Deskew Training
722 *
723 * LMC requires input-read-data deskew training.
724 *
725 * 1. Write LMC(0)_EXT_CONFIG[VREFINT_SEQ_DESKEW] = 1.
726 */
727 VB_PRT(VBL_SEQ, "N%d.LMC%d: Performing LMC sequence: Set vrefint_seq_deskew = 1\n",
728 node, ddr_interface_num);
729 DRAM_CSR_MODIFY(ext_config, node, BDK_LMCX_EXT_CONFIG(ddr_interface_num),
730 ext_config.s.vrefint_seq_deskew = 1); /* Set Deskew sequence */
731
732 /*
733 * 2. Write LMC(0)_SEQ_CTL[SEQ_SEL] = 0x0A and
734 * LMC(0)_SEQ_CTL[INIT_START] = 1.
735 *
736 * 3. Wait for LMC(0)_SEQ_CTL[SEQ_COMPLETE] to be set to 1.
737 */
738 DRAM_CSR_MODIFY(phy_ctl, node, BDK_LMCX_PHY_CTL(ddr_interface_num),
739 phy_ctl.s.phy_dsk_reset = 1); /* RESET Deskew sequence */
740 perform_octeon3_ddr3_sequence(node, rank_mask, ddr_interface_num, 0x0A); /* LMC Deskew Training */
741
742 lock_retries = 0;
743
744 perform_read_deskew_training:
745 // maybe perform the NORMAL deskew training sequence multiple times before looking at lock status
746 for (loops = 0; loops < normal_loops; loops++) {
747 DRAM_CSR_MODIFY(phy_ctl, node, BDK_LMCX_PHY_CTL(ddr_interface_num),
748 phy_ctl.s.phy_dsk_reset = 0); /* Normal Deskew sequence */
749 perform_octeon3_ddr3_sequence(node, rank_mask, ddr_interface_num, 0x0A); /* LMC Deskew Training */
750 }
751 // Moved this from Validate_Read_Deskew_Training
752 /* Allow deskew results to stabilize before evaluating them. */
753 bdk_wait_usec(deskew_validation_delay);
754
755 // Now go look at lock and saturation status...
756 Validate_Read_Deskew_Training(node, rank_mask, ddr_interface_num, &dsk_counts, print_first);
757 if (print_first && !print_them_all) // after printing the first and not doing them all, no more
758 print_first = 0;
759
760 unsaturated = (dsk_counts.saturated == 0);
761 locked = (dsk_counts.unlocked == 0);
762 //nibble_sat = (dsk_counts.nibsat_errs != 0);
763
764 // only do locking retries if unsaturated or rawcard A or B, otherwise full SAT retry
765 if (unsaturated || (spd_rawcard_AorB && !is_t88p2 /*&& !nibble_sat*/)) {
766 if (!locked) { // and not locked
767 lock_retries++;
768 lock_retries_total++;
769 if (lock_retries <= lock_retries_limit) {
770 goto perform_read_deskew_training;
771 } else {
772 VB_PRT(VBL_TME, "N%d.LMC%d: LOCK RETRIES failed after %d retries\n",
773 node, ddr_interface_num, lock_retries_limit);
774 }
775 } else {
776 if (lock_retries_total > 0) // only print if we did try
777 VB_PRT(VBL_TME, "N%d.LMC%d: LOCK RETRIES successful after %d retries\n",
778 node, ddr_interface_num, lock_retries);
779 }
780 } /* if (unsaturated || spd_rawcard_AorB) */
781
782 ++sat_retries;
783
784 #if LOOK_FOR_STUCK_BYTE
785 // FIXME: this is a bit of a hack at the moment...
786 // We want to force a Deskew RESET hopefully to unstick the bytes values
787 // and then resume normal deskew training as usual.
788 // For now, do only if it is all locked...
789 if (locked && (dsk_counts.bytes_stuck != 0)) {
790 BDK_CSR_INIT(lmc_config, node, BDK_LMCX_CONFIG(ddr_interface_num));
791 if (do_stuck_reset && lmc_config.s.mode_x4dev) { // FIXME: only when x4!!
792 unsaturated = 0; // to always make sure the while continues
793 VB_PRT(VBL_TME, "N%d.LMC%d: STUCK BYTE (0x%x), forcing deskew RESET\n",
794 node, ddr_interface_num, dsk_counts.bytes_stuck);
795 continue; // bypass the rest to get back to the RESET
796 } else {
797 VB_PRT(VBL_TME, "N%d.LMC%d: STUCK BYTE (0x%x), ignoring deskew RESET\n",
798 node, ddr_interface_num, dsk_counts.bytes_stuck);
799 }
800 }
801 #endif
802 /*
803 * At this point, check for a DDR4 RDIMM that will not benefit from SAT retries; if so, no retries
804 */
805 if (spd_rawcard_AorB && !is_t88p2 /*&& !nibble_sat*/) {
806 VB_PRT(VBL_TME, "N%d.LMC%d: Read Deskew Training Loop: Exiting for RAWCARD == A or B.\n",
807 node, ddr_interface_num);
808 break; // no sat or lock retries
809 }
810
811 } while (!unsaturated && (sat_retries < DEFAULT_SAT_RETRY_LIMIT));
812
813 #if DESKEW_RODT_CTL
814 if (save_deskew_rodt_ctl != -1) {
815 comp_ctl2.u = BDK_CSR_READ(node, BDK_LMCX_COMP_CTL2(ddr_interface_num));
816 comp_ctl2.s.rodt_ctl = save_deskew_rodt_ctl;
817 DRAM_CSR_WRITE(node, BDK_LMCX_COMP_CTL2(ddr_interface_num), comp_ctl2.u);
818 }
819 #endif
820
821 VB_PRT(VBL_FAE, "N%d.LMC%d: Read Deskew Training %s. %d sat-retries, %d lock-retries\n",
822 node, ddr_interface_num,
823 (sat_retries >= DEFAULT_SAT_RETRY_LIMIT) ? "Timed Out" : "Completed",
824 sat_retries-1, lock_retries_total);
825
826 // restore original WR_DESKEW_ENA setting
827 change_wr_deskew_ena(node, ddr_interface_num, saved_wr_deskew_ena);
828
829 if ((dsk_counts.nibrng_errs != 0) || (dsk_counts.nibunl_errs != 0)) {
830 debug_print("N%d.LMC%d: NIBBLE ERROR(S) found, returning FAULT\n",
831 node, ddr_interface_num);
832 return -1; // we did retry locally, they did not help
833 }
834
835 // NOTE: we (currently) always print one last training validation before starting Read Leveling...
836
837 return 0;
838 }
839
840 static void
do_write_deskew_op(bdk_node_t node,int ddr_interface_num,int bit_sel,int byte_sel,int ena)841 do_write_deskew_op(bdk_node_t node, int ddr_interface_num,
842 int bit_sel, int byte_sel, int ena)
843 {
844 bdk_lmcx_dll_ctl3_t ddr_dll_ctl3;
845
846 ddr_dll_ctl3.u = BDK_CSR_READ(node, BDK_LMCX_DLL_CTL3(ddr_interface_num));
847 SET_DDR_DLL_CTL3(bit_select, bit_sel);
848 SET_DDR_DLL_CTL3(byte_sel, byte_sel);
849 SET_DDR_DLL_CTL3(wr_deskew_ena, ena);
850 DRAM_CSR_WRITE(node, BDK_LMCX_DLL_CTL3(ddr_interface_num), ddr_dll_ctl3.u);
851
852 ddr_dll_ctl3.u = BDK_CSR_READ(node, BDK_LMCX_DLL_CTL3(ddr_interface_num));
853 }
854
855 static void
set_write_deskew_offset(bdk_node_t node,int ddr_interface_num,int bit_sel,int byte_sel,int offset)856 set_write_deskew_offset(bdk_node_t node, int ddr_interface_num,
857 int bit_sel, int byte_sel, int offset)
858 {
859 bdk_lmcx_dll_ctl3_t ddr_dll_ctl3;
860
861 ddr_dll_ctl3.u = BDK_CSR_READ(node, BDK_LMCX_DLL_CTL3(ddr_interface_num));
862 SET_DDR_DLL_CTL3(bit_select, bit_sel);
863 SET_DDR_DLL_CTL3(byte_sel, byte_sel);
864 SET_DDR_DLL_CTL3(offset, offset);
865 DRAM_CSR_WRITE(node, BDK_LMCX_DLL_CTL3(ddr_interface_num), ddr_dll_ctl3.u);
866
867 ddr_dll_ctl3.u = BDK_CSR_READ(node, BDK_LMCX_DLL_CTL3(ddr_interface_num));
868 SET_DDR_DLL_CTL3(wr_deskew_ld, 1);
869 DRAM_CSR_WRITE(node, BDK_LMCX_DLL_CTL3(ddr_interface_num), ddr_dll_ctl3.u);
870
871 ddr_dll_ctl3.u = BDK_CSR_READ(node, BDK_LMCX_DLL_CTL3(ddr_interface_num));
872 }
873
874 static void
Update_Write_Deskew_Settings(bdk_node_t node,int ddr_interface_num,deskew_data_t * dskdat)875 Update_Write_Deskew_Settings(bdk_node_t node, int ddr_interface_num, deskew_data_t *dskdat)
876 {
877 bdk_lmcx_config_t lmc_config;
878 int bit_num;
879 int byte_lane, byte_limit;
880
881 lmc_config.u = BDK_CSR_READ(node, BDK_LMCX_CONFIG(ddr_interface_num));
882 byte_limit = ((lmc_config.s.mode32b) ? 4 : 8) + lmc_config.s.ecc_ena;
883
884 for (byte_lane = 0; byte_lane < byte_limit; byte_lane++) {
885 for (bit_num = 0; bit_num <= 7; ++bit_num) {
886
887 set_write_deskew_offset(node, ddr_interface_num, bit_num, byte_lane + 1,
888 dskdat->bytes[byte_lane].bits[bit_num]);
889
890 } /* for (bit_num = 0; bit_num <= 7; ++bit_num) */
891 } /* for (byte_lane = 0; byte_lane < byte_limit; byte_lane++) */
892
893 return;
894 }
895
896 #define ALL_BYTES 0x0A
897 #define BS_NOOP 0x09
898 #define BS_RESET 0x0F
899 #define BS_REUSE 0x0A
900
901 // set all entries to the same value (used during training)
902 static void
Set_Write_Deskew_Settings(bdk_node_t node,int ddr_interface_num,int value)903 Set_Write_Deskew_Settings(bdk_node_t node, int ddr_interface_num, int value)
904 {
905 bdk_lmcx_dll_ctl3_t ddr_dll_ctl3;
906 int bit_num;
907
908 VB_PRT(VBL_DEV2, "N%d.LMC%d: SetWriteDeskew: WRITE %d\n", node, ddr_interface_num, value);
909
910 for (bit_num = 0; bit_num <= 7; ++bit_num) {
911
912 // write a bit-deskew value to all bit-lanes of all bytes
913 ddr_dll_ctl3.u = BDK_CSR_READ(node, BDK_LMCX_DLL_CTL3(ddr_interface_num));
914 SET_DDR_DLL_CTL3(bit_select, bit_num);
915 SET_DDR_DLL_CTL3(byte_sel, ALL_BYTES); // FIXME? will this work in 32-bit mode?
916 SET_DDR_DLL_CTL3(offset, value);
917 DRAM_CSR_WRITE(node, BDK_LMCX_DLL_CTL3(ddr_interface_num), ddr_dll_ctl3.u);
918
919 ddr_dll_ctl3.u = BDK_CSR_READ(node, BDK_LMCX_DLL_CTL3(ddr_interface_num));
920 SET_DDR_DLL_CTL3(wr_deskew_ld, 1);
921 DRAM_CSR_WRITE(node, BDK_LMCX_DLL_CTL3(ddr_interface_num), ddr_dll_ctl3.u);
922
923 } /* for (bit_num = 0; bit_num <= 7; ++bit_num) */
924
925 #if 0
926 // FIXME: for debug use only
927 Get_Deskew_Settings(node, ddr_interface_num, &dskdat);
928 Display_Deskew_Data(node, ddr_interface_num, &dskdat, VBL_NORM);
929 #endif
930
931 return;
932 }
933
934 typedef struct {
935 uint8_t count[8];
936 uint8_t start[8];
937 uint8_t best_count[8];
938 uint8_t best_start[8];
939 } deskew_bytelane_t;
940 typedef struct {
941 deskew_bytelane_t bytes[9];
942 } deskew_rank_t;
943
944 deskew_rank_t deskew_history[4];
945
946 #define DSKVAL_INCR 4
947
948 static void
Neutral_Write_Deskew_Setup(bdk_node_t node,int ddr_interface_num)949 Neutral_Write_Deskew_Setup(bdk_node_t node, int ddr_interface_num)
950 {
951 // first: NO-OP, Select all bytes, Disable write bit-deskew
952 ddr_print("N%d.LMC%d: NEUTRAL Write Deskew Setup: first: NOOP\n", node, ddr_interface_num);
953 do_write_deskew_op(node, ddr_interface_num, BS_NOOP, ALL_BYTES, 0);
954 //Get_Deskew_Settings(node, ddr_interface_num, &dskdat);
955 //Display_Deskew_Data(node, ddr_interface_num, &dskdat, VBL_NORM);
956
957 // enable write bit-deskew and RESET the settings
958 ddr_print("N%d.LMC%d: NEUTRAL Write Deskew Setup: wr_ena: RESET\n", node, ddr_interface_num);
959 do_write_deskew_op(node, ddr_interface_num, BS_RESET, ALL_BYTES, 1);
960 //Get_Deskew_Settings(node, ddr_interface_num, &dskdat);
961 //Display_Deskew_Data(node, ddr_interface_num, &dskdat, VBL_NORM);
962 }
963
964 static void
Perform_Write_Deskew_Training(bdk_node_t node,int ddr_interface_num)965 Perform_Write_Deskew_Training(bdk_node_t node, int ddr_interface_num)
966 {
967 deskew_data_t dskdat;
968 int byte, bit_num;
969 int dskval, rankx, rank_mask, active_ranks, errors, bit_errs;
970 uint64_t hw_rank_offset;
971 uint64_t bad_bits[2];
972 uint64_t phys_addr;
973 deskew_rank_t *dhp;
974 int num_lmcs = __bdk_dram_get_num_lmc(node);
975
976 BDK_CSR_INIT(lmcx_config, node, BDK_LMCX_CONFIG(ddr_interface_num));
977 rank_mask = lmcx_config.s.init_status; // FIXME: is this right when we run?
978
979 // this should be correct for 1 or 2 ranks, 1 or 2 DIMMs
980 hw_rank_offset = 1ull << (28 + lmcx_config.s.pbank_lsb - lmcx_config.s.rank_ena + (num_lmcs/2));
981
982 VB_PRT(VBL_FAE, "N%d.LMC%d: Performing Write Deskew Training.\n", node, ddr_interface_num);
983
984 // first: NO-OP, Select all bytes, Disable write bit-deskew
985 ddr_print("N%d.LMC%d: WriteDeskewConfig: first: NOOP\n", node, ddr_interface_num);
986 do_write_deskew_op(node, ddr_interface_num, BS_NOOP, ALL_BYTES, 0);
987 //Get_Deskew_Settings(node, ddr_interface_num, &dskdat);
988 //Display_Deskew_Data(node, ddr_interface_num, &dskdat, VBL_NORM);
989
990 // enable write bit-deskew and RESET the settings
991 ddr_print("N%d.LMC%d: WriteDeskewConfig: wr_ena: RESET\n", node, ddr_interface_num);
992 do_write_deskew_op(node, ddr_interface_num, BS_RESET, ALL_BYTES, 1);
993 //Get_Deskew_Settings(node, ddr_interface_num, &dskdat);
994 //Display_Deskew_Data(node, ddr_interface_num, &dskdat, VBL_NORM);
995
996 #if 0
997 // enable write bit-deskew and REUSE read bit-deskew settings
998 ddr_print("N%d.LMC%d: WriteDeskewConfig: wr_ena: REUSE\n", node, ddr_interface_num);
999 do_write_deskew_op(node, ddr_interface_num, BS_REUSE, ALL_BYTES, 1);
1000 Get_Deskew_Settings(node, ddr_interface_num, &dskdat);
1001 Display_Deskew_Data(node, ddr_interface_num, &dskdat, VBL_NORM);
1002 #endif
1003
1004 #if 1
1005 memset(deskew_history, 0, sizeof(deskew_history));
1006
1007 for (dskval = 0; dskval < 128; dskval += DSKVAL_INCR) {
1008
1009 Set_Write_Deskew_Settings(node, ddr_interface_num, dskval);
1010
1011 active_ranks = 0;
1012 for (rankx = 0; rankx < 4; rankx++) {
1013 if (!(rank_mask & (1 << rankx)))
1014 continue;
1015 dhp = &deskew_history[rankx];
1016 phys_addr = hw_rank_offset * active_ranks;
1017 active_ranks++;
1018
1019 errors = test_dram_byte_hw(node, ddr_interface_num, phys_addr, 0, bad_bits);
1020
1021 for (byte = 0; byte <= 8; byte++) { // do bytelane(s)
1022
1023 // check errors
1024 if (errors & (1 << byte)) { // yes, error(s) in the byte lane in this rank
1025 bit_errs = ((byte == 8) ? bad_bits[1] : bad_bits[0] >> (8 * byte)) & 0xFFULL;
1026
1027 VB_PRT(VBL_DEV2, "N%d.LMC%d.R%d: Byte %d Value %d: Address 0x%012llx errors 0x%x/0x%x\n",
1028 node, ddr_interface_num, rankx, byte,
1029 dskval, phys_addr, errors, bit_errs);
1030
1031 for (bit_num = 0; bit_num <= 7; bit_num++) {
1032 if (!(bit_errs & (1 << bit_num)))
1033 continue;
1034 if (dhp->bytes[byte].count[bit_num] > 0) { // had started run
1035 VB_PRT(VBL_DEV2, "N%d.LMC%d.R%d: Byte %d Bit %d Value %d: stopping a run here\n",
1036 node, ddr_interface_num, rankx, byte, bit_num, dskval);
1037 dhp->bytes[byte].count[bit_num] = 0; // stop now
1038 }
1039 } /* for (bit_num = 0; bit_num <= 7; bit_num++) */
1040
1041 // FIXME: else had not started run - nothing else to do?
1042 } else { // no error in the byte lane
1043 for (bit_num = 0; bit_num <= 7; bit_num++) {
1044 if (dhp->bytes[byte].count[bit_num] == 0) { // first success, set run start
1045 VB_PRT(VBL_DEV2, "N%d.LMC%d.R%d: Byte %d Bit %d Value %d: starting a run here\n",
1046 node, ddr_interface_num, rankx, byte, bit_num, dskval);
1047 dhp->bytes[byte].start[bit_num] = dskval;
1048 }
1049 dhp->bytes[byte].count[bit_num] += DSKVAL_INCR; // bump run length
1050
1051 // is this now the biggest window?
1052 if (dhp->bytes[byte].count[bit_num] > dhp->bytes[byte].best_count[bit_num]) {
1053 dhp->bytes[byte].best_count[bit_num] = dhp->bytes[byte].count[bit_num];
1054 dhp->bytes[byte].best_start[bit_num] = dhp->bytes[byte].start[bit_num];
1055 VB_PRT(VBL_DEV2, "N%d.LMC%d.R%d: Byte %d Bit %d Value %d: updating best to %d/%d\n",
1056 node, ddr_interface_num, rankx, byte, bit_num, dskval,
1057 dhp->bytes[byte].best_start[bit_num],
1058 dhp->bytes[byte].best_count[bit_num]);
1059 }
1060 } /* for (bit_num = 0; bit_num <= 7; bit_num++) */
1061 } /* error in the byte lane */
1062 } /* for (byte = 0; byte <= 8; byte++) */
1063 } /* for (rankx = 0; rankx < 4; rankx++) */
1064 } /* for (dskval = 0; dskval < 128; dskval++) */
1065
1066
1067 for (byte = 0; byte <= 8; byte++) { // do bytelane(s)
1068
1069 for (bit_num = 0; bit_num <= 7; bit_num++) { // do bits
1070 int bit_beg, bit_end;
1071
1072 bit_beg = 0;
1073 bit_end = 128;
1074
1075 for (rankx = 0; rankx < 4; rankx++) { // merge ranks
1076 int rank_beg, rank_end, rank_count;
1077 if (!(rank_mask & (1 << rankx)))
1078 continue;
1079
1080 dhp = &deskew_history[rankx];
1081 rank_beg = dhp->bytes[byte].best_start[bit_num];
1082 rank_count = dhp->bytes[byte].best_count[bit_num];
1083
1084 if (!rank_count) {
1085 VB_PRT(VBL_DEV, "N%d.LMC%d.R%d: Byte %d Bit %d: EMPTY\n",
1086 node, ddr_interface_num, rankx, byte, bit_num);
1087 continue;
1088 }
1089
1090 bit_beg = max(bit_beg, rank_beg);
1091 rank_end = rank_beg + rank_count - DSKVAL_INCR;
1092 bit_end = min(bit_end, rank_end);
1093
1094 } /* for (rankx = 0; rankx < 4; rankx++) */
1095
1096 dskdat.bytes[byte].bits[bit_num] = (bit_end + bit_beg) / 2;
1097
1098 } /* for (bit_num = 0; bit_num <= 7; bit_num++) */
1099 } /* for (byte = 0; byte <= 8; byte++) */
1100
1101 #endif
1102
1103 // update the write bit-deskew settings with final settings
1104 ddr_print("N%d.LMC%d: WriteDeskewConfig: wr_ena: UPDATE\n", node, ddr_interface_num);
1105 Update_Write_Deskew_Settings(node, ddr_interface_num, &dskdat);
1106 Get_Deskew_Settings(node, ddr_interface_num, &dskdat);
1107 Display_Deskew_Data(node, ddr_interface_num, &dskdat, VBL_NORM);
1108
1109 // last: NO-OP, Select all bytes, MUST leave write bit-deskew enabled
1110 ddr_print("N%d.LMC%d: WriteDeskewConfig: last: wr_ena: NOOP\n", node, ddr_interface_num);
1111 do_write_deskew_op(node, ddr_interface_num, BS_NOOP, ALL_BYTES, 1);
1112 //Get_Deskew_Settings(node, ddr_interface_num, &dskdat);
1113 //Display_Deskew_Data(node, ddr_interface_num, &dskdat, VBL_NORM);
1114
1115 #if 0
1116 // FIXME: disable/delete this when write bit-deskew works...
1117 // final: NO-OP, Select all bytes, do NOT leave write bit-deskew enabled
1118 ddr_print("N%d.LMC%d: WriteDeskewConfig: final: read: NOOP\n", node, ddr_interface_num);
1119 do_write_deskew_op(node, ddr_interface_num, BS_NOOP, ALL_BYTES, 0);
1120 Get_Deskew_Settings(node, ddr_interface_num, &dskdat);
1121 Display_Deskew_Data(node, ddr_interface_num, &dskdat, VBL_NORM);
1122 #endif
1123 }
1124
1125 #define SCALING_FACTOR (1000)
1126 #define Dprintf debug_print // make this "ddr_print" for extra debug output below
compute_Vref_1slot_2rank(int rtt_wr,int rtt_park,int dqx_ctl,int rank_count)1127 static int compute_Vref_1slot_2rank(int rtt_wr, int rtt_park, int dqx_ctl, int rank_count)
1128 {
1129 uint64_t Reff_s;
1130 uint64_t Rser_s = 15;
1131 uint64_t Vdd = 1200;
1132 uint64_t Vref;
1133 //uint64_t Vl;
1134 uint64_t rtt_wr_s = (((rtt_wr == 0) || (rtt_wr == 99)) ? 1*1024*1024 : rtt_wr); // 99 == HiZ
1135 uint64_t rtt_park_s = (((rtt_park == 0) || ((rank_count == 1) && (rtt_wr != 0))) ? 1*1024*1024 : rtt_park);
1136 uint64_t dqx_ctl_s = (dqx_ctl == 0 ? 1*1024*1024 : dqx_ctl);
1137 int Vref_value;
1138 uint64_t Rangepc = 6000; // range1 base is 60%
1139 uint64_t Vrefpc;
1140 int Vref_range = 0;
1141
1142 Dprintf("rtt_wr = %d, rtt_park = %d, dqx_ctl = %d\n", rtt_wr, rtt_park, dqx_ctl);
1143 Dprintf("rtt_wr_s = %d, rtt_park_s = %d, dqx_ctl_s = %d\n", rtt_wr_s, rtt_park_s, dqx_ctl_s);
1144
1145 Reff_s = divide_nint((rtt_wr_s * rtt_park_s) , (rtt_wr_s + rtt_park_s));
1146 Dprintf("Reff_s = %d\n", Reff_s);
1147
1148 //Vl = (((Rser_s + dqx_ctl_s) * SCALING_FACTOR) / (Rser_s + dqx_ctl_s + Reff_s)) * Vdd / SCALING_FACTOR;
1149 //printf("Vl = %d\n", Vl);
1150
1151 Vref = (((Rser_s + dqx_ctl_s) * SCALING_FACTOR) / (Rser_s + dqx_ctl_s + Reff_s)) + SCALING_FACTOR;
1152 Dprintf("Vref = %d\n", Vref);
1153
1154 Vref = (Vref * Vdd) / 2 / SCALING_FACTOR;
1155 Dprintf("Vref = %d\n", Vref);
1156
1157 Vrefpc = (Vref * 100 * 100) / Vdd;
1158 Dprintf("Vrefpc = %d\n", Vrefpc);
1159
1160 if (Vrefpc < Rangepc) { // < range1 base, use range2
1161 Vref_range = 1 << 6; // set bit A6 for range2
1162 Rangepc = 4500; // range2 base is 45%
1163 }
1164
1165 Vref_value = divide_nint(Vrefpc - Rangepc, 65);
1166 if (Vref_value < 0)
1167 Vref_value = Vref_range; // set to base of range as lowest value
1168 else
1169 Vref_value |= Vref_range;
1170 Dprintf("Vref_value = %d (0x%02x)\n", Vref_value, Vref_value);
1171
1172 debug_print("rtt_wr:%d, rtt_park:%d, dqx_ctl:%d, Vref_value:%d (0x%x)\n",
1173 rtt_wr, rtt_park, dqx_ctl, Vref_value, Vref_value);
1174
1175 return Vref_value;
1176 }
compute_Vref_2slot_2rank(int rtt_wr,int rtt_park_00,int rtt_park_01,int dqx_ctl,int rtt_nom)1177 static int compute_Vref_2slot_2rank(int rtt_wr, int rtt_park_00, int rtt_park_01, int dqx_ctl, int rtt_nom)
1178 {
1179 //uint64_t Rser = 15;
1180 uint64_t Vdd = 1200;
1181 //uint64_t Vref;
1182 uint64_t Vl, Vlp, Vcm;
1183 uint64_t Rd0, Rd1, Rpullup;
1184 uint64_t rtt_wr_s = (((rtt_wr == 0) || (rtt_wr == 99)) ? 1*1024*1024 : rtt_wr); // 99 == HiZ
1185 uint64_t rtt_park_00_s = (rtt_park_00 == 0 ? 1*1024*1024 : rtt_park_00);
1186 uint64_t rtt_park_01_s = (rtt_park_01 == 0 ? 1*1024*1024 : rtt_park_01);
1187 uint64_t dqx_ctl_s = (dqx_ctl == 0 ? 1*1024*1024 : dqx_ctl);
1188 uint64_t rtt_nom_s = (rtt_nom == 0 ? 1*1024*1024 : rtt_nom);
1189 int Vref_value;
1190 uint64_t Rangepc = 6000; // range1 base is 60%
1191 uint64_t Vrefpc;
1192 int Vref_range = 0;
1193
1194 // Rd0 = (RTT_NOM /*parallel*/ RTT_WR) + 15 = ((RTT_NOM * RTT_WR) / (RTT_NOM + RTT_WR)) + 15
1195 Rd0 = divide_nint((rtt_nom_s * rtt_wr_s), (rtt_nom_s + rtt_wr_s)) + 15;
1196 //printf("Rd0 = %ld\n", Rd0);
1197
1198 // Rd1 = (RTT_PARK_00 /*parallel*/ RTT_PARK_01) + 15 = ((RTT_PARK_00 * RTT_PARK_01) / (RTT_PARK_00 + RTT_PARK_01)) + 15
1199 Rd1 = divide_nint((rtt_park_00_s * rtt_park_01_s), (rtt_park_00_s + rtt_park_01_s)) + 15;
1200 //printf("Rd1 = %ld\n", Rd1);
1201
1202 // Rpullup = Rd0 /*parallel*/ Rd1 = (Rd0 * Rd1) / (Rd0 + Rd1)
1203 Rpullup = divide_nint((Rd0 * Rd1), (Rd0 + Rd1));
1204 //printf("Rpullup = %ld\n", Rpullup);
1205
1206 // Vl = (DQX_CTL / (DQX_CTL + Rpullup)) * 1.2
1207 Vl = divide_nint((dqx_ctl_s * Vdd), (dqx_ctl_s + Rpullup));
1208 //printf("Vl = %ld\n", Vl);
1209
1210 // Vlp = ((15 / Rd0) * (1.2 - Vl)) + Vl
1211 Vlp = divide_nint((15 * (Vdd - Vl)), Rd0) + Vl;
1212 //printf("Vlp = %ld\n", Vlp);
1213
1214 // Vcm = (Vlp + 1.2) / 2
1215 Vcm = divide_nint((Vlp + Vdd), 2);
1216 //printf("Vcm = %ld\n", Vcm);
1217
1218 // Vrefpc = (Vcm / 1.2) * 100
1219 Vrefpc = divide_nint((Vcm * 100 * 100), Vdd);
1220 //printf("Vrefpc = %ld\n", Vrefpc);
1221
1222 if (Vrefpc < Rangepc) { // < range1 base, use range2
1223 Vref_range = 1 << 6; // set bit A6 for range2
1224 Rangepc = 4500; // range2 base is 45%
1225 }
1226
1227 Vref_value = divide_nint(Vrefpc - Rangepc, 65);
1228 if (Vref_value < 0)
1229 Vref_value = Vref_range; // set to base of range as lowest value
1230 else
1231 Vref_value |= Vref_range;
1232 //printf("Vref_value = %d (0x%02x)\n", Vref_value, Vref_value);
1233
1234 debug_print("rtt_wr:%d, rtt_park_00:%d, rtt_park_01:%d, dqx_ctl:%d, rtt_nom:%d, Vref_value:%d (0x%x)\n",
1235 rtt_wr, rtt_park_00, rtt_park_01, dqx_ctl, rtt_nom, Vref_value, Vref_value);
1236
1237 return Vref_value;
1238 }
1239
1240 // NOTE: only call this for DIMMs with 1 or 2 ranks, not 4.
1241 int
compute_vref_value(bdk_node_t node,int ddr_interface_num,int rankx,int dimm_count,int rank_count,impedence_values_t * imp_values,int is_stacked_die)1242 compute_vref_value(bdk_node_t node, int ddr_interface_num,
1243 int rankx, int dimm_count, int rank_count,
1244 impedence_values_t *imp_values, int is_stacked_die)
1245 {
1246 int computed_final_vref_value = 0;
1247
1248 /* Calculate an override of the measured Vref value
1249 but only for configurations we know how to...*/
1250 // we have code for 2-rank DIMMs in both 1-slot or 2-slot configs,
1251 // and can use the 2-rank 1-slot code for 1-rank DIMMs in 1-slot configs
1252 // and can use the 2-rank 2-slot code for 1-rank DIMMs in 2-slot configs
1253
1254 int rtt_wr, dqx_ctl, rtt_nom, index;
1255 bdk_lmcx_modereg_params1_t lmc_modereg_params1;
1256 bdk_lmcx_modereg_params2_t lmc_modereg_params2;
1257 bdk_lmcx_comp_ctl2_t comp_ctl2;
1258
1259 lmc_modereg_params1.u = BDK_CSR_READ(node, BDK_LMCX_MODEREG_PARAMS1(ddr_interface_num));
1260 lmc_modereg_params2.u = BDK_CSR_READ(node, BDK_LMCX_MODEREG_PARAMS2(ddr_interface_num));
1261 comp_ctl2.u = BDK_CSR_READ(node, BDK_LMCX_COMP_CTL2(ddr_interface_num));
1262 dqx_ctl = imp_values->dqx_strength[comp_ctl2.s.dqx_ctl];
1263
1264 // WR always comes from the current rank
1265 index = (lmc_modereg_params1.u >> (rankx * 12 + 5)) & 0x03;
1266 if (!CAVIUM_IS_MODEL(CAVIUM_CN88XX_PASS1_X)) {
1267 index |= lmc_modereg_params1.u >> (51+rankx-2) & 0x04;
1268 }
1269 rtt_wr = imp_values->rtt_wr_ohms [index];
1270
1271 // separate calculations for 1 vs 2 DIMMs per LMC
1272 if (dimm_count == 1) {
1273 // PARK comes from this rank if 1-rank, otherwise other rank
1274 index = (lmc_modereg_params2.u >> ((rankx ^ (rank_count - 1)) * 10 + 0)) & 0x07;
1275 int rtt_park = imp_values->rtt_nom_ohms[index];
1276 computed_final_vref_value = compute_Vref_1slot_2rank(rtt_wr, rtt_park, dqx_ctl, rank_count);
1277 } else {
1278 // get both PARK values from the other DIMM
1279 index = (lmc_modereg_params2.u >> ((rankx ^ 0x02) * 10 + 0)) & 0x07;
1280 int rtt_park_00 = imp_values->rtt_nom_ohms[index];
1281 index = (lmc_modereg_params2.u >> ((rankx ^ 0x03) * 10 + 0)) & 0x07;
1282 int rtt_park_01 = imp_values->rtt_nom_ohms[index];
1283 // NOM comes from this rank if 1-rank, otherwise other rank
1284 index = (lmc_modereg_params1.u >> ((rankx ^ (rank_count - 1)) * 12 + 9)) & 0x07;
1285 rtt_nom = imp_values->rtt_nom_ohms[index];
1286 computed_final_vref_value = compute_Vref_2slot_2rank(rtt_wr, rtt_park_00, rtt_park_01, dqx_ctl, rtt_nom);
1287 }
1288
1289 #if ENABLE_COMPUTED_VREF_ADJUSTMENT
1290 {
1291 int saved_final_vref_value = computed_final_vref_value;
1292 BDK_CSR_INIT(lmc_config, node, BDK_LMCX_CONFIG(ddr_interface_num));
1293 /*
1294 New computed Vref = existing computed Vref – X
1295
1296 The value of X is depending on different conditions. Both #122 and #139 are 2Rx4 RDIMM,
1297 while #124 is stacked die 2Rx4, so I conclude the results into two conditions:
1298
1299 1. Stacked Die: 2Rx4
1300 1-slot: offset = 7. i, e New computed Vref = existing computed Vref – 7
1301 2-slot: offset = 6
1302
1303 2. Regular: 2Rx4
1304 1-slot: offset = 3
1305 2-slot: offset = 2
1306 */
1307 // we know we never get called unless DDR4, so test just the other conditions
1308 if((!!__bdk_dram_is_rdimm(node, 0)) &&
1309 (rank_count == 2) &&
1310 (lmc_config.s.mode_x4dev))
1311 { // it must first be RDIMM and 2-rank and x4
1312 if (is_stacked_die) { // now do according to stacked die or not...
1313 computed_final_vref_value -= (dimm_count == 1) ? 7 : 6;
1314 } else {
1315 computed_final_vref_value -= (dimm_count == 1) ? 3 : 2;
1316 }
1317 // we have adjusted it, so print it out if verbosity is right
1318 VB_PRT(VBL_TME, "N%d.LMC%d.R%d: adjusting computed vref from %2d (0x%02x) to %2d (0x%02x)\n",
1319 node, ddr_interface_num, rankx,
1320 saved_final_vref_value, saved_final_vref_value,
1321 computed_final_vref_value, computed_final_vref_value);
1322 }
1323 }
1324 #endif
1325 return computed_final_vref_value;
1326 }
1327
EXTR_WR(uint64_t u,int x)1328 static unsigned int EXTR_WR(uint64_t u, int x)
1329 {
1330 return (unsigned int)(((u >> (x*12+5)) & 0x3UL) | ((u >> (51+x-2)) & 0x4UL));
1331 }
INSRT_WR(uint64_t * up,int x,int v)1332 static void INSRT_WR(uint64_t *up, int x, int v)
1333 {
1334 uint64_t u = *up;
1335 u &= ~(((0x3UL) << (x*12+5)) | ((0x1UL) << (51+x)));
1336 *up = (u | ((v & 0x3UL) << (x*12+5)) | ((v & 0x4UL) << (51+x-2)));
1337 return;
1338 }
1339
encode_row_lsb_ddr3(int row_lsb,int ddr_interface_wide)1340 static int encode_row_lsb_ddr3(int row_lsb, int ddr_interface_wide)
1341 {
1342 int encoded_row_lsb;
1343 int row_lsb_start = 14;
1344
1345 /* Decoding for row_lsb */
1346 /* 000: row_lsb = mem_adr[14] */
1347 /* 001: row_lsb = mem_adr[15] */
1348 /* 010: row_lsb = mem_adr[16] */
1349 /* 011: row_lsb = mem_adr[17] */
1350 /* 100: row_lsb = mem_adr[18] */
1351 /* 101: row_lsb = mem_adr[19] */
1352 /* 110: row_lsb = mem_adr[20] */
1353 /* 111: RESERVED */
1354
1355 row_lsb_start = 14;
1356
1357 encoded_row_lsb = row_lsb - row_lsb_start ;
1358
1359 return encoded_row_lsb;
1360 }
1361
encode_pbank_lsb_ddr3(int pbank_lsb,int ddr_interface_wide)1362 static int encode_pbank_lsb_ddr3(int pbank_lsb, int ddr_interface_wide)
1363 {
1364 int encoded_pbank_lsb;
1365
1366 /* Decoding for pbank_lsb */
1367 /* 0000:DIMM = mem_adr[28] / rank = mem_adr[27] (if RANK_ENA) */
1368 /* 0001:DIMM = mem_adr[29] / rank = mem_adr[28] " */
1369 /* 0010:DIMM = mem_adr[30] / rank = mem_adr[29] " */
1370 /* 0011:DIMM = mem_adr[31] / rank = mem_adr[30] " */
1371 /* 0100:DIMM = mem_adr[32] / rank = mem_adr[31] " */
1372 /* 0101:DIMM = mem_adr[33] / rank = mem_adr[32] " */
1373 /* 0110:DIMM = mem_adr[34] / rank = mem_adr[33] " */
1374 /* 0111:DIMM = 0 / rank = mem_adr[34] " */
1375 /* 1000-1111: RESERVED */
1376
1377 int pbank_lsb_start = 0;
1378
1379 pbank_lsb_start = 28;
1380
1381 encoded_pbank_lsb = pbank_lsb - pbank_lsb_start;
1382
1383 return encoded_pbank_lsb;
1384 }
1385
octeon_read_lmcx_ddr3_rlevel_dbg(bdk_node_t node,int ddr_interface_num,int idx)1386 static uint64_t octeon_read_lmcx_ddr3_rlevel_dbg(bdk_node_t node, int ddr_interface_num, int idx)
1387 {
1388 DRAM_CSR_MODIFY(c, node, BDK_LMCX_RLEVEL_CTL(ddr_interface_num),
1389 c.s.byte = idx);
1390 BDK_CSR_READ(node, BDK_LMCX_RLEVEL_CTL(ddr_interface_num));
1391 BDK_CSR_INIT(rlevel_dbg, node, BDK_LMCX_RLEVEL_DBG(ddr_interface_num));
1392 return rlevel_dbg.s.bitmask;
1393 }
1394
octeon_read_lmcx_ddr3_wlevel_dbg(bdk_node_t node,int ddr_interface_num,int idx)1395 static uint64_t octeon_read_lmcx_ddr3_wlevel_dbg(bdk_node_t node, int ddr_interface_num, int idx)
1396 {
1397 bdk_lmcx_wlevel_dbg_t wlevel_dbg;
1398
1399 wlevel_dbg.u = 0;
1400 wlevel_dbg.s.byte = idx;
1401
1402 DRAM_CSR_WRITE(node, BDK_LMCX_WLEVEL_DBG(ddr_interface_num), wlevel_dbg.u);
1403 BDK_CSR_READ(node, BDK_LMCX_WLEVEL_DBG(ddr_interface_num));
1404
1405 wlevel_dbg.u = BDK_CSR_READ(node, BDK_LMCX_WLEVEL_DBG(ddr_interface_num));
1406 return wlevel_dbg.s.bitmask;
1407 }
1408
1409
1410 /*
1411 * Apply a filter to the BITMASK results returned from Octeon
1412 * read-leveling to determine the most likely delay result. This
1413 * computed delay may be used to qualify the delay result returned by
1414 * Octeon. Accumulate an error penalty for invalid characteristics of
1415 * the bitmask so that they can be used to select the most reliable
1416 * results.
1417 *
1418 * The algorithm searches for the largest contiguous MASK within a
1419 * maximum RANGE of bits beginning with the MSB.
1420 *
1421 * 1. a MASK with a WIDTH less than 4 will be penalized
1422 * 2. Bubbles in the bitmask that occur before or after the MASK
1423 * will be penalized
1424 * 3. If there are no trailing bubbles then extra bits that occur
1425 * beyond the maximum RANGE will be penalized.
1426 *
1427 * +++++++++++++++++++++++++++++++++++++++++++++++++++
1428 * + +
1429 * + e.g. bitmask = 27B00 +
1430 * + +
1431 * + 63 +--- mstart 0 +
1432 * + | | | +
1433 * + | +---------+ +--- fb | +
1434 * + | | range | | | +
1435 * + V V V V V +
1436 * + +
1437 * + 0 0 ... 1 0 0 1 1 1 1 0 1 1 0 0 0 0 0 0 0 0 +
1438 * + +
1439 * + ^ ^ ^ +
1440 * + | | mask| +
1441 * + lb ---+ +-----+ +
1442 * + width +
1443 * + +
1444 * +++++++++++++++++++++++++++++++++++++++++++++++++++
1445 */
1446 #define RLEVEL_BITMASK_TRAILING_BITS_ERROR 5
1447 #define RLEVEL_BITMASK_BUBBLE_BITS_ERROR 11 // FIXME? now less than TOOLONG
1448 #define RLEVEL_BITMASK_NARROW_ERROR 6
1449 #define RLEVEL_BITMASK_BLANK_ERROR 100
1450 #define RLEVEL_BITMASK_TOOLONG_ERROR 12
1451
1452 #define MASKRANGE_BITS 6
1453 #define MASKRANGE ((1 << MASKRANGE_BITS) - 1)
1454
1455 static int
validate_ddr3_rlevel_bitmask(rlevel_bitmask_t * rlevel_bitmask_p,int ddr_type)1456 validate_ddr3_rlevel_bitmask(rlevel_bitmask_t *rlevel_bitmask_p, int ddr_type)
1457 {
1458 int i;
1459 int errors = 0;
1460 uint64_t mask = 0; /* Used in 64-bit comparisons */
1461 int8_t mstart = 0;
1462 uint8_t width = 0;
1463 uint8_t firstbit = 0;
1464 uint8_t lastbit = 0;
1465 uint8_t bubble = 0;
1466 uint8_t tbubble = 0;
1467 uint8_t blank = 0;
1468 uint8_t narrow = 0;
1469 uint8_t trailing = 0;
1470 uint64_t bitmask = rlevel_bitmask_p->bm;
1471 uint8_t extras = 0;
1472 uint8_t toolong = 0;
1473 uint64_t temp;
1474
1475 if (bitmask == 0) {
1476 blank += RLEVEL_BITMASK_BLANK_ERROR;
1477 } else {
1478
1479 /* Look for fb, the first bit */
1480 temp = bitmask;
1481 while (!(temp & 1)) {
1482 firstbit++;
1483 temp >>= 1;
1484 }
1485
1486 /* Look for lb, the last bit */
1487 lastbit = firstbit;
1488 while ((temp >>= 1))
1489 lastbit++;
1490
1491 /* Start with the max range to try to find the largest mask within the bitmask data */
1492 width = MASKRANGE_BITS;
1493 for (mask = MASKRANGE; mask > 0; mask >>= 1, --width) {
1494 for (mstart = lastbit - width + 1; mstart >= firstbit; --mstart) {
1495 temp = mask << mstart;
1496 if ((bitmask & temp) == temp)
1497 goto done_now;
1498 }
1499 }
1500 done_now:
1501 /* look for any more contiguous 1's to the right of mstart */
1502 if (width == MASKRANGE_BITS) { // only when maximum mask
1503 while ((bitmask >> (mstart - 1)) & 1) { // slide right over more 1's
1504 --mstart;
1505 if (ddr_type == DDR4_DRAM) // only for DDR4
1506 extras++; // count the number of extra bits
1507 }
1508 }
1509
1510 /* Penalize any extra 1's beyond the maximum desired mask */
1511 if (extras > 0)
1512 toolong = RLEVEL_BITMASK_TOOLONG_ERROR * ((1 << extras) - 1);
1513
1514 /* Detect if bitmask is too narrow. */
1515 if (width < 4)
1516 narrow = (4 - width) * RLEVEL_BITMASK_NARROW_ERROR;
1517
1518 /* detect leading bubble bits, that is, any 0's between first and mstart */
1519 temp = bitmask >> (firstbit + 1);
1520 i = mstart - firstbit - 1;
1521 while (--i >= 0) {
1522 if ((temp & 1) == 0)
1523 bubble += RLEVEL_BITMASK_BUBBLE_BITS_ERROR;
1524 temp >>= 1;
1525 }
1526
1527 temp = bitmask >> (mstart + width + extras);
1528 i = lastbit - (mstart + width + extras - 1);
1529 while (--i >= 0) {
1530 if (temp & 1) { /* Detect 1 bits after the trailing end of the mask, including last. */
1531 trailing += RLEVEL_BITMASK_TRAILING_BITS_ERROR;
1532 } else { /* Detect trailing bubble bits, that is, any 0's between end-of-mask and last */
1533 tbubble += RLEVEL_BITMASK_BUBBLE_BITS_ERROR;
1534 }
1535 temp >>= 1;
1536 }
1537 }
1538
1539 errors = bubble + tbubble + blank + narrow + trailing + toolong;
1540
1541 /* Pass out useful statistics */
1542 rlevel_bitmask_p->mstart = mstart;
1543 rlevel_bitmask_p->width = width;
1544
1545 VB_PRT(VBL_DEV2, "bm:%08lx mask:%02llx, width:%2u, mstart:%2d, fb:%2u, lb:%2u"
1546 " (bu:%2d, tb:%2d, bl:%2d, n:%2d, t:%2d, x:%2d) errors:%3d %s\n",
1547 (unsigned long) bitmask, mask, width, mstart,
1548 firstbit, lastbit, bubble, tbubble, blank, narrow,
1549 trailing, toolong, errors, (errors) ? "=> invalid" : "");
1550
1551 return errors;
1552 }
1553
compute_ddr3_rlevel_delay(uint8_t mstart,uint8_t width,bdk_lmcx_rlevel_ctl_t rlevel_ctl)1554 static int compute_ddr3_rlevel_delay(uint8_t mstart, uint8_t width, bdk_lmcx_rlevel_ctl_t rlevel_ctl)
1555 {
1556 int delay;
1557
1558 debug_bitmask_print(" offset_en:%d", rlevel_ctl.cn8.offset_en);
1559
1560 if (rlevel_ctl.s.offset_en) {
1561 delay = max(mstart, mstart + width - 1 - rlevel_ctl.s.offset);
1562 } else {
1563 /* if (rlevel_ctl.s.offset) { */ /* Experimental */
1564 if (0) {
1565 delay = max(mstart + rlevel_ctl.s.offset, mstart + 1);
1566 /* Insure that the offset delay falls within the bitmask */
1567 delay = min(delay, mstart + width-1);
1568 } else {
1569 delay = (width - 1) / 2 + mstart; /* Round down */
1570 /* delay = (width/2) + mstart; */ /* Round up */
1571 }
1572 }
1573
1574 return delay;
1575 }
1576
1577 #define WLEVEL_BYTE_BITS 5
1578 #define WLEVEL_BYTE_MSK ((1UL << 5) - 1)
1579
update_wlevel_rank_struct(bdk_lmcx_wlevel_rankx_t * lmc_wlevel_rank,int byte,int delay)1580 static void update_wlevel_rank_struct(bdk_lmcx_wlevel_rankx_t *lmc_wlevel_rank,
1581 int byte, int delay)
1582 {
1583 bdk_lmcx_wlevel_rankx_t temp_wlevel_rank;
1584 if (byte >= 0 && byte <= 8) {
1585 temp_wlevel_rank.u = lmc_wlevel_rank->u;
1586 temp_wlevel_rank.u &= ~(WLEVEL_BYTE_MSK << (WLEVEL_BYTE_BITS * byte));
1587 temp_wlevel_rank.u |= ((delay & WLEVEL_BYTE_MSK) << (WLEVEL_BYTE_BITS * byte));
1588 lmc_wlevel_rank->u = temp_wlevel_rank.u;
1589 }
1590 }
1591
get_wlevel_rank_struct(bdk_lmcx_wlevel_rankx_t * lmc_wlevel_rank,int byte)1592 static int get_wlevel_rank_struct(bdk_lmcx_wlevel_rankx_t *lmc_wlevel_rank,
1593 int byte)
1594 {
1595 int delay = 0;
1596 if (byte >= 0 && byte <= 8) {
1597 delay = ((lmc_wlevel_rank->u) >> (WLEVEL_BYTE_BITS * byte)) & WLEVEL_BYTE_MSK;
1598 }
1599 return delay;
1600 }
1601
1602 #if 0
1603 // entry = 1 is valid, entry = 0 is invalid
1604 static int
1605 validity_matrix[4][4] = {[0] {1,1,1,0}, // valid pairs when cv == 0: 0,0 + 0,1 + 0,2 == "7"
1606 [1] {0,1,1,1}, // valid pairs when cv == 1: 1,1 + 1,2 + 1,3 == "E"
1607 [2] {1,0,1,1}, // valid pairs when cv == 2: 2,2 + 2,3 + 2,0 == "D"
1608 [3] {1,1,0,1}}; // valid pairs when cv == 3: 3,3 + 3,0 + 3,1 == "B"
1609 #endif
1610 static int
validate_seq(int * wl,int * seq)1611 validate_seq(int *wl, int *seq)
1612 {
1613 int seqx; // sequence index, step through the sequence array
1614 int bitnum;
1615 seqx = 0;
1616 while (seq[seqx+1] >= 0) { // stop on next seq entry == -1
1617 // but now, check current versus next
1618 #if 0
1619 if ( !validity_matrix [wl[seq[seqx]]] [wl[seq[seqx+1]]] )
1620 return 1;
1621 #else
1622 bitnum = (wl[seq[seqx]] << 2) | wl[seq[seqx+1]];
1623 if (!((1 << bitnum) & 0xBDE7)) // magic validity number (see matrix above)
1624 return 1;
1625 #endif
1626 seqx++;
1627 }
1628 return 0;
1629 }
1630
1631 static int
Validate_HW_WL_Settings(bdk_node_t node,int ddr_interface_num,bdk_lmcx_wlevel_rankx_t * lmc_wlevel_rank,int ecc_ena)1632 Validate_HW_WL_Settings(bdk_node_t node, int ddr_interface_num,
1633 bdk_lmcx_wlevel_rankx_t *lmc_wlevel_rank,
1634 int ecc_ena)
1635 {
1636 int wl[9], byte, errors;
1637
1638 // arrange the sequences so
1639 int useq[] = { 0,1,2,3,8,4,5,6,7,-1 }; // index 0 has byte 0, etc, ECC in middle
1640 int rseq1[] = { 8,3,2,1,0,-1 }; // index 0 is ECC, then go down
1641 int rseq2[] = { 4,5,6,7,-1 }; // index 0 has byte 4, then go up
1642 int useqno[] = { 0,1,2,3,4,5,6,7,-1 }; // index 0 has byte 0, etc, no ECC
1643 int rseq1no[] = { 3,2,1,0,-1 }; // index 0 is byte 3, then go down, no ECC
1644
1645 // in the CSR, bytes 0-7 are always data, byte 8 is ECC
1646 for (byte = 0; byte < 8+ecc_ena; byte++) {
1647 wl[byte] = (get_wlevel_rank_struct(lmc_wlevel_rank, byte) >> 1) & 3; // preprocess :-)
1648 }
1649
1650 errors = 0;
1651 if (__bdk_dram_is_rdimm(node, 0) != 0) { // RDIMM order
1652 errors = validate_seq(wl, (ecc_ena) ? rseq1 : rseq1no);
1653 errors += validate_seq(wl, rseq2);
1654 } else { // UDIMM order
1655 errors = validate_seq(wl, (ecc_ena) ? useq : useqno);
1656 }
1657
1658 return errors;
1659 }
1660
1661 #define RLEVEL_BYTE_BITS 6
1662 #define RLEVEL_BYTE_MSK ((1UL << 6) - 1)
1663
update_rlevel_rank_struct(bdk_lmcx_rlevel_rankx_t * lmc_rlevel_rank,int byte,int delay)1664 static void update_rlevel_rank_struct(bdk_lmcx_rlevel_rankx_t *lmc_rlevel_rank,
1665 int byte, int delay)
1666 {
1667 bdk_lmcx_rlevel_rankx_t temp_rlevel_rank;
1668 if (byte >= 0 && byte <= 8) {
1669 temp_rlevel_rank.u = lmc_rlevel_rank->u & ~(RLEVEL_BYTE_MSK << (RLEVEL_BYTE_BITS * byte));
1670 temp_rlevel_rank.u |= ((delay & RLEVEL_BYTE_MSK) << (RLEVEL_BYTE_BITS * byte));
1671 lmc_rlevel_rank->u = temp_rlevel_rank.u;
1672 }
1673 }
1674
1675 #if RLEXTRAS_PATCH || !DISABLE_SW_WL_PASS_2
get_rlevel_rank_struct(bdk_lmcx_rlevel_rankx_t * lmc_rlevel_rank,int byte)1676 static int get_rlevel_rank_struct(bdk_lmcx_rlevel_rankx_t *lmc_rlevel_rank,
1677 int byte)
1678 {
1679 int delay = 0;
1680 if (byte >= 0 && byte <= 8) {
1681 delay = ((lmc_rlevel_rank->u) >> (RLEVEL_BYTE_BITS * byte)) & RLEVEL_BYTE_MSK;
1682 }
1683 return delay;
1684 }
1685 #endif
1686
unpack_rlevel_settings(int ddr_interface_bytemask,int ecc_ena,rlevel_byte_data_t * rlevel_byte,bdk_lmcx_rlevel_rankx_t lmc_rlevel_rank)1687 static void unpack_rlevel_settings(int ddr_interface_bytemask, int ecc_ena,
1688 rlevel_byte_data_t *rlevel_byte,
1689 bdk_lmcx_rlevel_rankx_t lmc_rlevel_rank)
1690 {
1691 if ((ddr_interface_bytemask & 0xff) == 0xff) {
1692 if (ecc_ena) {
1693 rlevel_byte[8].delay = lmc_rlevel_rank.cn83xx.byte7;
1694 rlevel_byte[7].delay = lmc_rlevel_rank.cn83xx.byte6;
1695 rlevel_byte[6].delay = lmc_rlevel_rank.cn83xx.byte5;
1696 rlevel_byte[5].delay = lmc_rlevel_rank.cn83xx.byte4;
1697 rlevel_byte[4].delay = lmc_rlevel_rank.cn83xx.byte8; /* ECC */
1698 } else {
1699 rlevel_byte[7].delay = lmc_rlevel_rank.cn83xx.byte7;
1700 rlevel_byte[6].delay = lmc_rlevel_rank.cn83xx.byte6;
1701 rlevel_byte[5].delay = lmc_rlevel_rank.cn83xx.byte5;
1702 rlevel_byte[4].delay = lmc_rlevel_rank.cn83xx.byte4;
1703 }
1704 } else {
1705 rlevel_byte[8].delay = lmc_rlevel_rank.cn83xx.byte8; /* unused */
1706 rlevel_byte[7].delay = lmc_rlevel_rank.cn83xx.byte7; /* unused */
1707 rlevel_byte[6].delay = lmc_rlevel_rank.cn83xx.byte6; /* unused */
1708 rlevel_byte[5].delay = lmc_rlevel_rank.cn83xx.byte5; /* unused */
1709 rlevel_byte[4].delay = lmc_rlevel_rank.cn83xx.byte4; /* ECC */
1710 }
1711 rlevel_byte[3].delay = lmc_rlevel_rank.cn83xx.byte3;
1712 rlevel_byte[2].delay = lmc_rlevel_rank.cn83xx.byte2;
1713 rlevel_byte[1].delay = lmc_rlevel_rank.cn83xx.byte1;
1714 rlevel_byte[0].delay = lmc_rlevel_rank.cn83xx.byte0;
1715 }
1716
pack_rlevel_settings(int ddr_interface_bytemask,int ecc_ena,rlevel_byte_data_t * rlevel_byte,bdk_lmcx_rlevel_rankx_t * final_rlevel_rank)1717 static void pack_rlevel_settings(int ddr_interface_bytemask, int ecc_ena,
1718 rlevel_byte_data_t *rlevel_byte,
1719 bdk_lmcx_rlevel_rankx_t *final_rlevel_rank)
1720 {
1721 bdk_lmcx_rlevel_rankx_t lmc_rlevel_rank = *final_rlevel_rank;
1722
1723 if ((ddr_interface_bytemask & 0xff) == 0xff) {
1724 if (ecc_ena) {
1725 lmc_rlevel_rank.cn83xx.byte7 = rlevel_byte[8].delay;
1726 lmc_rlevel_rank.cn83xx.byte6 = rlevel_byte[7].delay;
1727 lmc_rlevel_rank.cn83xx.byte5 = rlevel_byte[6].delay;
1728 lmc_rlevel_rank.cn83xx.byte4 = rlevel_byte[5].delay;
1729 lmc_rlevel_rank.cn83xx.byte8 = rlevel_byte[4].delay; /* ECC */
1730 } else {
1731 lmc_rlevel_rank.cn83xx.byte7 = rlevel_byte[7].delay;
1732 lmc_rlevel_rank.cn83xx.byte6 = rlevel_byte[6].delay;
1733 lmc_rlevel_rank.cn83xx.byte5 = rlevel_byte[5].delay;
1734 lmc_rlevel_rank.cn83xx.byte4 = rlevel_byte[4].delay;
1735 }
1736 } else {
1737 lmc_rlevel_rank.cn83xx.byte8 = rlevel_byte[8].delay;
1738 lmc_rlevel_rank.cn83xx.byte7 = rlevel_byte[7].delay;
1739 lmc_rlevel_rank.cn83xx.byte6 = rlevel_byte[6].delay;
1740 lmc_rlevel_rank.cn83xx.byte5 = rlevel_byte[5].delay;
1741 lmc_rlevel_rank.cn83xx.byte4 = rlevel_byte[4].delay;
1742 }
1743 lmc_rlevel_rank.cn83xx.byte3 = rlevel_byte[3].delay;
1744 lmc_rlevel_rank.cn83xx.byte2 = rlevel_byte[2].delay;
1745 lmc_rlevel_rank.cn83xx.byte1 = rlevel_byte[1].delay;
1746 lmc_rlevel_rank.cn83xx.byte0 = rlevel_byte[0].delay;
1747
1748 *final_rlevel_rank = lmc_rlevel_rank;
1749 }
1750
1751 #if !DISABLE_SW_WL_PASS_2
rlevel_to_wlevel(bdk_lmcx_rlevel_rankx_t * lmc_rlevel_rank,bdk_lmcx_wlevel_rankx_t * lmc_wlevel_rank,int byte)1752 static void rlevel_to_wlevel(bdk_lmcx_rlevel_rankx_t *lmc_rlevel_rank,
1753 bdk_lmcx_wlevel_rankx_t *lmc_wlevel_rank, int byte)
1754 {
1755 int byte_delay = get_rlevel_rank_struct(lmc_rlevel_rank, byte);
1756
1757 debug_print("Estimating Wlevel delay byte %d: ", byte);
1758 debug_print("Rlevel=%d => ", byte_delay);
1759 byte_delay = divide_roundup(byte_delay,2) & 0x1e;
1760 debug_print("Wlevel=%d\n", byte_delay);
1761 update_wlevel_rank_struct(lmc_wlevel_rank, byte, byte_delay);
1762 }
1763 #endif /* !DISABLE_SW_WL_PASS_2 */
1764
1765 /* Delay trend: constant=0, decreasing=-1, increasing=1 */
calc_delay_trend(int v)1766 static int calc_delay_trend(int v)
1767 {
1768 if (v == 0)
1769 return (0);
1770 if (v < 0)
1771 return (-1);
1772 return 1;
1773 }
1774
1775 /* Evaluate delay sequence across the whole range of byte delays while
1776 ** keeping track of the overall delay trend, increasing or decreasing.
1777 ** If the trend changes charge an error amount to the score.
1778 */
1779
1780 // NOTE: "max_adj_delay_inc" argument is, by default, 1 for DDR3 and 2 for DDR4
1781
nonsequential_delays(rlevel_byte_data_t * rlevel_byte,int start,int end,int max_adj_delay_inc)1782 static int nonsequential_delays(rlevel_byte_data_t *rlevel_byte,
1783 int start, int end, int max_adj_delay_inc)
1784 {
1785 int error = 0;
1786 int delay_trend, prev_trend = 0;
1787 int byte_idx;
1788 int delay_inc;
1789 int delay_diff;
1790 int byte_err;
1791
1792 for (byte_idx = start; byte_idx < end; ++byte_idx) {
1793 byte_err = 0;
1794
1795 delay_diff = rlevel_byte[byte_idx+1].delay - rlevel_byte[byte_idx].delay;
1796 delay_trend = calc_delay_trend(delay_diff);
1797
1798 debug_bitmask_print("Byte %d: %2d, Byte %d: %2d, delay_trend: %2d, prev_trend: %2d",
1799 byte_idx+0, rlevel_byte[byte_idx+0].delay,
1800 byte_idx+1, rlevel_byte[byte_idx+1].delay,
1801 delay_trend, prev_trend);
1802
1803 /* Increment error each time the trend changes to the opposite direction.
1804 */
1805 if ((prev_trend != 0) && (delay_trend != 0) && (prev_trend != delay_trend)) {
1806 byte_err += RLEVEL_NONSEQUENTIAL_DELAY_ERROR;
1807 prev_trend = delay_trend;
1808 debug_bitmask_print(" => Nonsequential byte delay");
1809 }
1810
1811 delay_inc = _abs(delay_diff); // how big was the delay change, if any
1812
1813 /* Even if the trend did not change to the opposite direction, check for
1814 the magnitude of the change, and scale the penalty by the amount that
1815 the size is larger than the provided limit.
1816 */
1817 if ((max_adj_delay_inc != 0) && (delay_inc > max_adj_delay_inc)) {
1818 byte_err += (delay_inc - max_adj_delay_inc) * RLEVEL_ADJACENT_DELAY_ERROR;
1819 debug_bitmask_print(" => Adjacent delay error");
1820 }
1821
1822 debug_bitmask_print("\n");
1823 if (delay_trend != 0)
1824 prev_trend = delay_trend;
1825
1826 rlevel_byte[byte_idx+1].sqerrs = byte_err;
1827 error += byte_err;
1828 }
1829 return error;
1830 }
1831
roundup_ddr3_wlevel_bitmask(int bitmask)1832 static int roundup_ddr3_wlevel_bitmask(int bitmask)
1833 {
1834 int shifted_bitmask;
1835 int leader;
1836 int delay;
1837
1838 for (leader=0; leader<8; ++leader) {
1839 shifted_bitmask = (bitmask>>leader);
1840 if ((shifted_bitmask&1) == 0)
1841 break;
1842 }
1843
1844 for (/*leader=leader*/; leader<16; ++leader) {
1845 shifted_bitmask = (bitmask>>(leader%8));
1846 if (shifted_bitmask&1)
1847 break;
1848 }
1849
1850 delay = (leader & 1) ? leader + 1 : leader;
1851 delay = delay % 8;
1852
1853 return delay;
1854 }
1855
1856 /* Check to see if any custom offset values are provided */
is_dll_offset_provided(const int8_t * dll_offset_table)1857 static int is_dll_offset_provided(const int8_t *dll_offset_table)
1858 {
1859 int i;
1860 if (dll_offset_table != NULL) {
1861 for (i=0; i<9; ++i) {
1862 if (dll_offset_table[i] != 0)
1863 return (1);
1864 }
1865 }
1866 return (0);
1867 }
1868
1869 /////////////////// These are the RLEVEL settings display routines
1870
1871 // flags
1872 #define WITH_NOTHING 0
1873 #define WITH_SCORE 1
1874 #define WITH_AVERAGE 2
1875 #define WITH_FINAL 4
1876 #define WITH_COMPUTE 8
do_display_RL(bdk_node_t node,int ddr_interface_num,bdk_lmcx_rlevel_rankx_t lmc_rlevel_rank,int rank,int flags,int score)1877 static void do_display_RL(bdk_node_t node, int ddr_interface_num,
1878 bdk_lmcx_rlevel_rankx_t lmc_rlevel_rank,
1879 int rank, int flags, int score)
1880 {
1881 char score_buf[16];
1882 if (flags & WITH_SCORE)
1883 snprintf(score_buf, sizeof(score_buf), "(%d)", score);
1884 else {
1885 score_buf[0] = ' '; score_buf[1] = 0;
1886 }
1887
1888 const char *msg_buf;
1889 char hex_buf[20];
1890 if (flags & WITH_AVERAGE) {
1891 msg_buf = " DELAY AVERAGES ";
1892 } else if (flags & WITH_FINAL) {
1893 msg_buf = " FINAL SETTINGS ";
1894 } else if (flags & WITH_COMPUTE) {
1895 msg_buf = " COMPUTED DELAYS ";
1896 } else {
1897 snprintf(hex_buf, sizeof(hex_buf), "0x%016lX", lmc_rlevel_rank.u);
1898 msg_buf = hex_buf;
1899 }
1900
1901 ddr_print("N%d.LMC%d.R%d: Rlevel Rank %#4x, %s : %5d %5d %5d %5d %5d %5d %5d %5d %5d %s\n",
1902 node, ddr_interface_num, rank,
1903 lmc_rlevel_rank.s.status,
1904 msg_buf,
1905 lmc_rlevel_rank.cn83xx.byte8,
1906 lmc_rlevel_rank.cn83xx.byte7,
1907 lmc_rlevel_rank.cn83xx.byte6,
1908 lmc_rlevel_rank.cn83xx.byte5,
1909 lmc_rlevel_rank.cn83xx.byte4,
1910 lmc_rlevel_rank.cn83xx.byte3,
1911 lmc_rlevel_rank.cn83xx.byte2,
1912 lmc_rlevel_rank.cn83xx.byte1,
1913 lmc_rlevel_rank.cn83xx.byte0,
1914 score_buf
1915 );
1916 }
1917
1918 static inline void
display_RL(bdk_node_t node,int ddr_interface_num,bdk_lmcx_rlevel_rankx_t lmc_rlevel_rank,int rank)1919 display_RL(bdk_node_t node, int ddr_interface_num, bdk_lmcx_rlevel_rankx_t lmc_rlevel_rank, int rank)
1920 {
1921 do_display_RL(node, ddr_interface_num, lmc_rlevel_rank, rank, 0, 0);
1922 }
1923
1924 static inline void
display_RL_with_score(bdk_node_t node,int ddr_interface_num,bdk_lmcx_rlevel_rankx_t lmc_rlevel_rank,int rank,int score)1925 display_RL_with_score(bdk_node_t node, int ddr_interface_num, bdk_lmcx_rlevel_rankx_t lmc_rlevel_rank, int rank, int score)
1926 {
1927 do_display_RL(node, ddr_interface_num, lmc_rlevel_rank, rank, 1, score);
1928 }
1929
1930 #if !PICK_BEST_RANK_SCORE_NOT_AVG
1931 static inline void
display_RL_with_average(bdk_node_t node,int ddr_interface_num,bdk_lmcx_rlevel_rankx_t lmc_rlevel_rank,int rank,int score)1932 display_RL_with_average(bdk_node_t node, int ddr_interface_num, bdk_lmcx_rlevel_rankx_t lmc_rlevel_rank, int rank, int score)
1933 {
1934 do_display_RL(node, ddr_interface_num, lmc_rlevel_rank, rank, 3, score);
1935 }
1936 #endif
1937
1938 static inline void
display_RL_with_final(bdk_node_t node,int ddr_interface_num,bdk_lmcx_rlevel_rankx_t lmc_rlevel_rank,int rank)1939 display_RL_with_final(bdk_node_t node, int ddr_interface_num, bdk_lmcx_rlevel_rankx_t lmc_rlevel_rank, int rank)
1940 {
1941 do_display_RL(node, ddr_interface_num, lmc_rlevel_rank, rank, 4, 0);
1942 }
1943
1944 static inline void
display_RL_with_computed(bdk_node_t node,int ddr_interface_num,bdk_lmcx_rlevel_rankx_t lmc_rlevel_rank,int rank,int score)1945 display_RL_with_computed(bdk_node_t node, int ddr_interface_num, bdk_lmcx_rlevel_rankx_t lmc_rlevel_rank, int rank, int score)
1946 {
1947 do_display_RL(node, ddr_interface_num, lmc_rlevel_rank, rank, 9, score);
1948 }
1949
1950 // flag values
1951 #define WITH_RODT_BLANK 0
1952 #define WITH_RODT_SKIPPING 1
1953 #define WITH_RODT_BESTROW 2
1954 #define WITH_RODT_BESTSCORE 3
1955 // control
1956 #define SKIP_SKIPPING 1
1957
1958 static const char *with_rodt_canned_msgs[4] = { " ", "SKIPPING ", "BEST ROW ", "BEST SCORE" };
1959
display_RL_with_RODT(bdk_node_t node,int ddr_interface_num,bdk_lmcx_rlevel_rankx_t lmc_rlevel_rank,int rank,int score,int nom_ohms,int rodt_ohms,int flag)1960 static void display_RL_with_RODT(bdk_node_t node, int ddr_interface_num,
1961 bdk_lmcx_rlevel_rankx_t lmc_rlevel_rank, int rank, int score,
1962 int nom_ohms, int rodt_ohms, int flag)
1963 {
1964 const char *msg_buf;
1965 char set_buf[20];
1966 #if SKIP_SKIPPING
1967 if (flag == WITH_RODT_SKIPPING) return;
1968 #endif
1969 msg_buf = with_rodt_canned_msgs[flag];
1970 if (nom_ohms < 0) {
1971 snprintf(set_buf, sizeof(set_buf), " RODT %3d ", rodt_ohms);
1972 } else {
1973 snprintf(set_buf, sizeof(set_buf), "NOM %3d RODT %3d", nom_ohms, rodt_ohms);
1974 }
1975
1976 VB_PRT(VBL_TME, "N%d.LMC%d.R%d: Rlevel %s %s : %5d %5d %5d %5d %5d %5d %5d %5d %5d (%d)\n",
1977 node, ddr_interface_num, rank,
1978 set_buf, msg_buf,
1979 lmc_rlevel_rank.cn83xx.byte8,
1980 lmc_rlevel_rank.cn83xx.byte7,
1981 lmc_rlevel_rank.cn83xx.byte6,
1982 lmc_rlevel_rank.cn83xx.byte5,
1983 lmc_rlevel_rank.cn83xx.byte4,
1984 lmc_rlevel_rank.cn83xx.byte3,
1985 lmc_rlevel_rank.cn83xx.byte2,
1986 lmc_rlevel_rank.cn83xx.byte1,
1987 lmc_rlevel_rank.cn83xx.byte0,
1988 score
1989 );
1990
1991 // FIXME: does this help make the output a little easier to focus?
1992 if (flag == WITH_RODT_BESTSCORE) {
1993 VB_PRT(VBL_DEV, "-----------\n");
1994 }
1995 }
1996
1997 static void
do_display_WL(bdk_node_t node,int ddr_interface_num,bdk_lmcx_wlevel_rankx_t lmc_wlevel_rank,int rank,int flags)1998 do_display_WL(bdk_node_t node, int ddr_interface_num, bdk_lmcx_wlevel_rankx_t lmc_wlevel_rank, int rank, int flags)
1999 {
2000 const char *msg_buf;
2001 char hex_buf[20];
2002 int vbl;
2003 if (flags & WITH_FINAL) {
2004 msg_buf = " FINAL SETTINGS ";
2005 vbl = VBL_NORM;
2006 } else {
2007 snprintf(hex_buf, sizeof(hex_buf), "0x%016lX", lmc_wlevel_rank.u);
2008 msg_buf = hex_buf;
2009 vbl = VBL_FAE;
2010 }
2011
2012 VB_PRT(vbl, "N%d.LMC%d.R%d: Wlevel Rank %#4x, %s : %5d %5d %5d %5d %5d %5d %5d %5d %5d\n",
2013 node, ddr_interface_num, rank,
2014 lmc_wlevel_rank.s.status,
2015 msg_buf,
2016 lmc_wlevel_rank.s.byte8,
2017 lmc_wlevel_rank.s.byte7,
2018 lmc_wlevel_rank.s.byte6,
2019 lmc_wlevel_rank.s.byte5,
2020 lmc_wlevel_rank.s.byte4,
2021 lmc_wlevel_rank.s.byte3,
2022 lmc_wlevel_rank.s.byte2,
2023 lmc_wlevel_rank.s.byte1,
2024 lmc_wlevel_rank.s.byte0
2025 );
2026 }
2027
2028 static inline void
display_WL(bdk_node_t node,int ddr_interface_num,bdk_lmcx_wlevel_rankx_t lmc_wlevel_rank,int rank)2029 display_WL(bdk_node_t node, int ddr_interface_num, bdk_lmcx_wlevel_rankx_t lmc_wlevel_rank, int rank)
2030 {
2031 do_display_WL(node, ddr_interface_num, lmc_wlevel_rank, rank, WITH_NOTHING);
2032 }
2033
2034 static inline void
display_WL_with_final(bdk_node_t node,int ddr_interface_num,bdk_lmcx_wlevel_rankx_t lmc_wlevel_rank,int rank)2035 display_WL_with_final(bdk_node_t node, int ddr_interface_num, bdk_lmcx_wlevel_rankx_t lmc_wlevel_rank, int rank)
2036 {
2037 do_display_WL(node, ddr_interface_num, lmc_wlevel_rank, rank, WITH_FINAL);
2038 }
2039
2040 // pretty-print bitmask adjuster
2041 static uint64_t
PPBM(uint64_t bm)2042 PPBM(uint64_t bm)
2043 {
2044 if (bm != 0ul) {
2045 while ((bm & 0x0fful) == 0ul)
2046 bm >>= 4;
2047 }
2048 return bm;
2049 }
2050
2051 // xlate PACKED index to UNPACKED index to use with rlevel_byte
2052 #define XPU(i,e) (((i) < 4)?(i):((i)<8)?(i)+(e):4)
2053 // xlate UNPACKED index to PACKED index to use with rlevel_bitmask
2054 #define XUP(i,e) (((i) < 4)?(i):((i)>4)?(i)-(e):8)
2055
2056 // flag values
2057 #define WITH_WL_BITMASKS 0
2058 #define WITH_RL_BITMASKS 1
2059 #define WITH_RL_MASK_SCORES 2
2060 #define WITH_RL_SEQ_SCORES 3
2061 static void
do_display_BM(bdk_node_t node,int ddr_interface_num,int rank,void * bm,int flags,int ecc_ena)2062 do_display_BM(bdk_node_t node, int ddr_interface_num, int rank, void *bm, int flags, int ecc_ena)
2063 {
2064 int ecc = !!ecc_ena;
2065 if (flags == WITH_WL_BITMASKS) { // wlevel_bitmask array in PACKED index order, so just print them
2066 int *bitmasks = (int *)bm;
2067
2068 ddr_print("N%d.LMC%d.R%d: Wlevel Debug Results : %05x %05x %05x %05x %05x %05x %05x %05x %05x\n",
2069 node, ddr_interface_num, rank,
2070 bitmasks[8],
2071 bitmasks[7],
2072 bitmasks[6],
2073 bitmasks[5],
2074 bitmasks[4],
2075 bitmasks[3],
2076 bitmasks[2],
2077 bitmasks[1],
2078 bitmasks[0]
2079 );
2080 } else
2081 if (flags == WITH_RL_BITMASKS) { // rlevel_bitmask array in PACKED index order, so just print them
2082 rlevel_bitmask_t *rlevel_bitmask = (rlevel_bitmask_t *)bm;
2083 ddr_print("N%d.LMC%d.R%d: Rlevel Debug Bitmasks 8:0 : %05llx %05llx %05llx %05llx %05llx %05llx %05llx %05llx %05llx\n",
2084 node, ddr_interface_num, rank,
2085 PPBM(rlevel_bitmask[8].bm),
2086 PPBM(rlevel_bitmask[7].bm),
2087 PPBM(rlevel_bitmask[6].bm),
2088 PPBM(rlevel_bitmask[5].bm),
2089 PPBM(rlevel_bitmask[4].bm),
2090 PPBM(rlevel_bitmask[3].bm),
2091 PPBM(rlevel_bitmask[2].bm),
2092 PPBM(rlevel_bitmask[1].bm),
2093 PPBM(rlevel_bitmask[0].bm)
2094 );
2095 } else
2096 if (flags == WITH_RL_MASK_SCORES) { // rlevel_bitmask array in PACKED index order, so just print them
2097 rlevel_bitmask_t *rlevel_bitmask = (rlevel_bitmask_t *)bm;
2098 ddr_print("N%d.LMC%d.R%d: Rlevel Debug Bitmask Scores 8:0 : %5d %5d %5d %5d %5d %5d %5d %5d %5d\n",
2099 node, ddr_interface_num, rank,
2100 rlevel_bitmask[8].errs,
2101 rlevel_bitmask[7].errs,
2102 rlevel_bitmask[6].errs,
2103 rlevel_bitmask[5].errs,
2104 rlevel_bitmask[4].errs,
2105 rlevel_bitmask[3].errs,
2106 rlevel_bitmask[2].errs,
2107 rlevel_bitmask[1].errs,
2108 rlevel_bitmask[0].errs
2109 );
2110 } else
2111 if (flags == WITH_RL_SEQ_SCORES) { // rlevel_byte array in UNPACKED index order, so xlate and print them
2112 rlevel_byte_data_t *rlevel_byte = (rlevel_byte_data_t *)bm;
2113 ddr_print("N%d.LMC%d.R%d: Rlevel Debug Non-seq Scores 8:0 : %5d %5d %5d %5d %5d %5d %5d %5d %5d\n",
2114 node, ddr_interface_num, rank,
2115 rlevel_byte[XPU(8,ecc)].sqerrs,
2116 rlevel_byte[XPU(7,ecc)].sqerrs,
2117 rlevel_byte[XPU(6,ecc)].sqerrs,
2118 rlevel_byte[XPU(5,ecc)].sqerrs,
2119 rlevel_byte[XPU(4,ecc)].sqerrs,
2120 rlevel_byte[XPU(3,ecc)].sqerrs,
2121 rlevel_byte[XPU(2,ecc)].sqerrs,
2122 rlevel_byte[XPU(1,ecc)].sqerrs,
2123 rlevel_byte[XPU(0,ecc)].sqerrs
2124 );
2125 }
2126 }
2127
2128 static inline void
display_WL_BM(bdk_node_t node,int ddr_interface_num,int rank,int * bitmasks)2129 display_WL_BM(bdk_node_t node, int ddr_interface_num, int rank, int *bitmasks)
2130 {
2131 do_display_BM(node, ddr_interface_num, rank, (void *)bitmasks, WITH_WL_BITMASKS, 0);
2132 }
2133
2134 static inline void
display_RL_BM(bdk_node_t node,int ddr_interface_num,int rank,rlevel_bitmask_t * bitmasks,int ecc_ena)2135 display_RL_BM(bdk_node_t node, int ddr_interface_num, int rank, rlevel_bitmask_t *bitmasks, int ecc_ena)
2136 {
2137 do_display_BM(node, ddr_interface_num, rank, (void *)bitmasks, WITH_RL_BITMASKS, ecc_ena);
2138 }
2139
2140 static inline void
display_RL_BM_scores(bdk_node_t node,int ddr_interface_num,int rank,rlevel_bitmask_t * bitmasks,int ecc_ena)2141 display_RL_BM_scores(bdk_node_t node, int ddr_interface_num, int rank, rlevel_bitmask_t *bitmasks, int ecc_ena)
2142 {
2143 do_display_BM(node, ddr_interface_num, rank, (void *)bitmasks, WITH_RL_MASK_SCORES, ecc_ena);
2144 }
2145
2146 static inline void
display_RL_SEQ_scores(bdk_node_t node,int ddr_interface_num,int rank,rlevel_byte_data_t * bytes,int ecc_ena)2147 display_RL_SEQ_scores(bdk_node_t node, int ddr_interface_num, int rank, rlevel_byte_data_t *bytes, int ecc_ena)
2148 {
2149 do_display_BM(node, ddr_interface_num, rank, (void *)bytes, WITH_RL_SEQ_SCORES, ecc_ena);
2150 }
2151
load_dll_offset(bdk_node_t node,int ddr_interface_num,int dll_offset_mode,int byte_offset,int byte)2152 unsigned short load_dll_offset(bdk_node_t node, int ddr_interface_num,
2153 int dll_offset_mode, int byte_offset, int byte)
2154 {
2155 bdk_lmcx_dll_ctl3_t ddr_dll_ctl3;
2156 /* byte_sel:
2157 0x1 = byte 0, ..., 0x9 = byte 8
2158 0xA = all bytes */
2159 int byte_sel = (byte == 10) ? byte : byte + 1;
2160
2161 ddr_dll_ctl3.u = BDK_CSR_READ(node, BDK_LMCX_DLL_CTL3(ddr_interface_num));
2162 SET_DDR_DLL_CTL3(load_offset, 0);
2163 DRAM_CSR_WRITE(node, BDK_LMCX_DLL_CTL3(ddr_interface_num), ddr_dll_ctl3.u);
2164 ddr_dll_ctl3.u = BDK_CSR_READ(node, BDK_LMCX_DLL_CTL3(ddr_interface_num));
2165
2166 SET_DDR_DLL_CTL3(mode_sel, dll_offset_mode);
2167 SET_DDR_DLL_CTL3(offset, (_abs(byte_offset)&0x3f) | (_sign(byte_offset) << 6)); /* Always 6-bit field? */
2168 SET_DDR_DLL_CTL3(byte_sel, byte_sel);
2169 DRAM_CSR_WRITE(node, BDK_LMCX_DLL_CTL3(ddr_interface_num), ddr_dll_ctl3.u);
2170 ddr_dll_ctl3.u = BDK_CSR_READ(node, BDK_LMCX_DLL_CTL3(ddr_interface_num));
2171
2172 SET_DDR_DLL_CTL3(load_offset, 1);
2173 DRAM_CSR_WRITE(node, BDK_LMCX_DLL_CTL3(ddr_interface_num), ddr_dll_ctl3.u);
2174 ddr_dll_ctl3.u = BDK_CSR_READ(node, BDK_LMCX_DLL_CTL3(ddr_interface_num));
2175
2176 return ((unsigned short) GET_DDR_DLL_CTL3(offset));
2177 }
2178
change_dll_offset_enable(bdk_node_t node,int ddr_interface_num,int change)2179 void change_dll_offset_enable(bdk_node_t node, int ddr_interface_num, int change)
2180 {
2181 bdk_lmcx_dll_ctl3_t ddr_dll_ctl3;
2182
2183 ddr_dll_ctl3.u = BDK_CSR_READ(node, BDK_LMCX_DLL_CTL3(ddr_interface_num));
2184 SET_DDR_DLL_CTL3(offset_ena, !!change);
2185 DRAM_CSR_WRITE(node, BDK_LMCX_DLL_CTL3(ddr_interface_num), ddr_dll_ctl3.u);
2186 ddr_dll_ctl3.u = BDK_CSR_READ(node, BDK_LMCX_DLL_CTL3(ddr_interface_num));
2187 }
2188
process_custom_dll_offsets(bdk_node_t node,int ddr_interface_num,const char * enable_str,const int8_t * offsets,const char * byte_str,int mode)2189 static void process_custom_dll_offsets(bdk_node_t node, int ddr_interface_num, const char *enable_str,
2190 const int8_t *offsets, const char *byte_str, int mode)
2191 {
2192 const char *s;
2193 int enabled;
2194 int provided;
2195
2196 if ((s = lookup_env_parameter("%s", enable_str)) != NULL) {
2197 enabled = !!strtol(s, NULL, 0);
2198 } else
2199 enabled = -1;
2200
2201 // enabled == -1: no override, do only configured offsets if provided
2202 // enabled == 0: override OFF, do NOT do it even if configured offsets provided
2203 // enabled == 1: override ON, do it for overrides plus configured offsets
2204
2205 if (enabled == 0)
2206 return;
2207
2208 provided = is_dll_offset_provided(offsets);
2209
2210 if (enabled < 0 && !provided)
2211 return;
2212
2213 int byte_offset;
2214 unsigned short offset[9] = {0};
2215 int byte;
2216
2217 // offsets need to be disabled while loading
2218 change_dll_offset_enable(node, ddr_interface_num, 0);
2219
2220 for (byte = 0; byte < 9; ++byte) {
2221
2222 // always take the provided, if available
2223 byte_offset = (provided) ? offsets[byte] : 0;
2224
2225 // then, if enabled, use any overrides present
2226 if (enabled > 0) {
2227 if ((s = lookup_env_parameter(byte_str, ddr_interface_num, byte)) != NULL) {
2228 byte_offset = strtol(s, NULL, 0);
2229 }
2230 }
2231
2232 offset[byte] = load_dll_offset(node, ddr_interface_num, mode, byte_offset, byte);
2233 }
2234
2235 // re-enable offsets after loading
2236 change_dll_offset_enable(node, ddr_interface_num, 1);
2237
2238 ddr_print("N%d.LMC%d: DLL %s Offset 8:0 :"
2239 " 0x%02x 0x%02x 0x%02x 0x%02x 0x%02x 0x%02x 0x%02x 0x%02x 0x%02x\n",
2240 node, ddr_interface_num, (mode == 2) ? "Read " : "Write",
2241 offset[8], offset[7], offset[6], offset[5], offset[4],
2242 offset[3], offset[2], offset[1], offset[0]);
2243 }
2244
perform_octeon3_ddr3_sequence(bdk_node_t node,int rank_mask,int ddr_interface_num,int sequence)2245 void perform_octeon3_ddr3_sequence(bdk_node_t node, int rank_mask, int ddr_interface_num, int sequence)
2246 {
2247 /*
2248 * 3. Without changing any other fields in LMC(0)_CONFIG, write
2249 * LMC(0)_CONFIG[RANKMASK] then write both
2250 * LMC(0)_SEQ_CTL[SEQ_SEL,INIT_START] = 1 with a single CSR write
2251 * operation. LMC(0)_CONFIG[RANKMASK] bits should be set to indicate
2252 * the ranks that will participate in the sequence.
2253 *
2254 * The LMC(0)_SEQ_CTL[SEQ_SEL] value should select power-up/init or
2255 * selfrefresh exit, depending on whether the DRAM parts are in
2256 * self-refresh and whether their contents should be preserved. While
2257 * LMC performs these sequences, it will not perform any other DDR3
2258 * transactions. When the sequence is complete, hardware sets the
2259 * LMC(0)_CONFIG[INIT_STATUS] bits for the ranks that have been
2260 * initialized.
2261 *
2262 * If power-up/init is selected immediately following a DRESET
2263 * assertion, LMC executes the sequence described in the "Reset and
2264 * Initialization Procedure" section of the JEDEC DDR3
2265 * specification. This includes activating CKE, writing all four DDR3
2266 * mode registers on all selected ranks, and issuing the required ZQCL
2267 * command. The LMC(0)_CONFIG[RANKMASK] value should select all ranks
2268 * with attached DRAM in this case. If LMC(0)_CONTROL[RDIMM_ENA] = 1,
2269 * LMC writes the JEDEC standard SSTE32882 control words selected by
2270 * LMC(0)_DIMM_CTL[DIMM*_WMASK] between DDR_CKE* signal assertion and
2271 * the first DDR3 mode register write operation.
2272 * LMC(0)_DIMM_CTL[DIMM*_WMASK] should be cleared to 0 if the
2273 * corresponding DIMM is not present.
2274 *
2275 * If self-refresh exit is selected, LMC executes the required SRX
2276 * command followed by a refresh and ZQ calibration. Section 4.5
2277 * describes behavior of a REF + ZQCS. LMC does not write the DDR3
2278 * mode registers as part of this sequence, and the mode register
2279 * parameters must match at self-refresh entry and exit times.
2280 *
2281 * 4. Read LMC(0)_SEQ_CTL and wait for LMC(0)_SEQ_CTL[SEQ_COMPLETE] to be
2282 * set.
2283 *
2284 * 5. Read LMC(0)_CONFIG[INIT_STATUS] and confirm that all ranks have
2285 * been initialized.
2286 */
2287
2288 const char *s;
2289 static const char *sequence_str[] = {
2290 "Power-up/init",
2291 "Read-leveling",
2292 "Self-refresh entry",
2293 "Self-refresh exit",
2294 "Illegal",
2295 "Illegal",
2296 "Write-leveling",
2297 "Init Register Control Words",
2298 "Mode Register Write",
2299 "MPR Register Access",
2300 "LMC Deskew/Internal Vref training",
2301 "Offset Training"
2302 };
2303
2304 bdk_lmcx_seq_ctl_t seq_ctl;
2305 bdk_lmcx_config_t lmc_config;
2306
2307 lmc_config.u = BDK_CSR_READ(node, BDK_LMCX_CONFIG(ddr_interface_num));
2308 lmc_config.s.rankmask = rank_mask;
2309 DRAM_CSR_WRITE(node, BDK_LMCX_CONFIG(ddr_interface_num), lmc_config.u);
2310
2311 seq_ctl.u = 0;
2312
2313 seq_ctl.s.init_start = 1;
2314 seq_ctl.s.seq_sel = sequence;
2315
2316 VB_PRT(VBL_SEQ, "N%d.LMC%d: Performing LMC sequence=%x: rank_mask=0x%02x, %s\n",
2317 node, ddr_interface_num, sequence, rank_mask, sequence < 12 ? sequence_str[sequence] : "");
2318
2319 if ((s = lookup_env_parameter("ddr_trigger_sequence%d", sequence)) != NULL) {
2320 /* FIXME(dhendrix): this appears to be meant for the eval board */
2321 #if 0
2322 int trigger = strtoul(s, NULL, 0);
2323 if (trigger)
2324 pulse_gpio_pin(node, 1, 2);
2325 #endif
2326 error_print("env parameter ddr_trigger_sequence%d not found\n", sequence);
2327 }
2328
2329 DRAM_CSR_WRITE(node, BDK_LMCX_SEQ_CTL(ddr_interface_num), seq_ctl.u);
2330 BDK_CSR_READ(node, BDK_LMCX_SEQ_CTL(ddr_interface_num));
2331
2332 /* Wait 100us minimum before checking for sequence complete */
2333 bdk_wait_usec(100);
2334 if (BDK_CSR_WAIT_FOR_FIELD(node, BDK_LMCX_SEQ_CTL(ddr_interface_num), seq_complete, ==, 1, 1000000))
2335 {
2336 error_print("N%d.LMC%d: Timeout waiting for LMC sequence=%x, rank_mask=0x%02x, ignoring...\n",
2337 node, ddr_interface_num, sequence, rank_mask);
2338 }
2339 else {
2340 VB_PRT(VBL_SEQ, "N%d.LMC%d: LMC sequence=%x: Completed.\n", node, ddr_interface_num, sequence);
2341 }
2342 }
2343
ddr4_mrw(bdk_node_t node,int ddr_interface_num,int rank,int mr_wr_addr,int mr_wr_sel,int mr_wr_bg1)2344 void ddr4_mrw(bdk_node_t node, int ddr_interface_num, int rank,
2345 int mr_wr_addr, int mr_wr_sel, int mr_wr_bg1)
2346 {
2347 bdk_lmcx_mr_mpr_ctl_t lmc_mr_mpr_ctl;
2348
2349 lmc_mr_mpr_ctl.u = 0;
2350 lmc_mr_mpr_ctl.s.mr_wr_addr = (mr_wr_addr == -1) ? 0 : mr_wr_addr;
2351 lmc_mr_mpr_ctl.s.mr_wr_sel = mr_wr_sel;
2352 lmc_mr_mpr_ctl.s.mr_wr_rank = rank;
2353 //lmc_mr_mpr_ctl.s.mr_wr_pda_mask =
2354 //lmc_mr_mpr_ctl.s.mr_wr_pda_enable =
2355 //lmc_mr_mpr_ctl.s.mpr_loc =
2356 //lmc_mr_mpr_ctl.s.mpr_wr =
2357 //lmc_mr_mpr_ctl.s.mpr_bit_select =
2358 //lmc_mr_mpr_ctl.s.mpr_byte_select =
2359 //lmc_mr_mpr_ctl.s.mpr_whole_byte_enable =
2360 lmc_mr_mpr_ctl.s.mr_wr_use_default_value = (mr_wr_addr == -1) ? 1 : 0;
2361 lmc_mr_mpr_ctl.s.mr_wr_bg1 = mr_wr_bg1;
2362 DRAM_CSR_WRITE(node, BDK_LMCX_MR_MPR_CTL(ddr_interface_num), lmc_mr_mpr_ctl.u);
2363
2364 /* Mode Register Write */
2365 perform_octeon3_ddr3_sequence(node, 1 << rank, ddr_interface_num, 0x8);
2366 }
2367
2368 #define InvA0_17(x) (x ^ 0x22bf8)
set_mpr_mode(bdk_node_t node,int rank_mask,int ddr_interface_num,int dimm_count,int mpr,int bg1)2369 static void set_mpr_mode (bdk_node_t node, int rank_mask,
2370 int ddr_interface_num, int dimm_count, int mpr, int bg1)
2371 {
2372 int rankx;
2373
2374 ddr_print("All Ranks: Set mpr mode = %x %c-side\n",
2375 mpr, (bg1==0) ? 'A' : 'B');
2376
2377 for (rankx = 0; rankx < dimm_count*4; rankx++) {
2378 if (!(rank_mask & (1 << rankx)))
2379 continue;
2380 if (bg1 == 0)
2381 ddr4_mrw(node, ddr_interface_num, rankx, mpr<<2, 3, bg1); /* MR3 A-side */
2382 else
2383 ddr4_mrw(node, ddr_interface_num, rankx, InvA0_17(mpr<<2), ~3, bg1); /* MR3 B-side */
2384 }
2385 }
2386
2387 #if ENABLE_DISPLAY_MPR_PAGE
do_ddr4_mpr_read(bdk_node_t node,int ddr_interface_num,int rank,int page,int location)2388 static void do_ddr4_mpr_read(bdk_node_t node, int ddr_interface_num, int rank,
2389 int page, int location)
2390 {
2391 bdk_lmcx_mr_mpr_ctl_t lmc_mr_mpr_ctl;
2392
2393 lmc_mr_mpr_ctl.u = BDK_CSR_READ(node, BDK_LMCX_MR_MPR_CTL(ddr_interface_num));
2394
2395 lmc_mr_mpr_ctl.s.mr_wr_addr = 0;
2396 lmc_mr_mpr_ctl.s.mr_wr_sel = page; /* Page */
2397 lmc_mr_mpr_ctl.s.mr_wr_rank = rank;
2398 //lmc_mr_mpr_ctl.s.mr_wr_pda_mask =
2399 //lmc_mr_mpr_ctl.s.mr_wr_pda_enable =
2400 lmc_mr_mpr_ctl.s.mpr_loc = location;
2401 lmc_mr_mpr_ctl.s.mpr_wr = 0; /* Read=0, Write=1 */
2402 //lmc_mr_mpr_ctl.s.mpr_bit_select =
2403 //lmc_mr_mpr_ctl.s.mpr_byte_select =
2404 //lmc_mr_mpr_ctl.s.mpr_whole_byte_enable =
2405 //lmc_mr_mpr_ctl.s.mr_wr_use_default_value =
2406 //lmc_mr_mpr_ctl.s.mr_wr_bg1 =
2407
2408 DRAM_CSR_WRITE(node, BDK_LMCX_MR_MPR_CTL(ddr_interface_num), lmc_mr_mpr_ctl.u);
2409
2410 /* MPR register access sequence */
2411 perform_octeon3_ddr3_sequence(node, 1 << rank, ddr_interface_num, 0x9);
2412
2413 debug_print("LMC_MR_MPR_CTL : 0x%016lx\n", lmc_mr_mpr_ctl.u);
2414 debug_print("lmc_mr_mpr_ctl.s.mr_wr_addr: 0x%02x\n", lmc_mr_mpr_ctl.s.mr_wr_addr);
2415 debug_print("lmc_mr_mpr_ctl.s.mr_wr_sel : 0x%02x\n", lmc_mr_mpr_ctl.s.mr_wr_sel);
2416 debug_print("lmc_mr_mpr_ctl.s.mpr_loc : 0x%02x\n", lmc_mr_mpr_ctl.s.mpr_loc);
2417 debug_print("lmc_mr_mpr_ctl.s.mpr_wr : 0x%02x\n", lmc_mr_mpr_ctl.s.mpr_wr);
2418
2419 }
2420 #endif
2421
set_rdimm_mode(bdk_node_t node,int ddr_interface_num,int enable)2422 static int set_rdimm_mode(bdk_node_t node, int ddr_interface_num, int enable)
2423 {
2424 bdk_lmcx_control_t lmc_control;
2425 int save_rdimm_mode;
2426
2427 lmc_control.u = BDK_CSR_READ(node, BDK_LMCX_CONTROL(ddr_interface_num));
2428 save_rdimm_mode = lmc_control.s.rdimm_ena;
2429 lmc_control.s.rdimm_ena = enable;
2430 VB_PRT(VBL_FAE, "Setting RDIMM_ENA = %x\n", enable);
2431 DRAM_CSR_WRITE(node, BDK_LMCX_CONTROL(ddr_interface_num), lmc_control.u);
2432
2433 return (save_rdimm_mode);
2434 }
2435
2436 #if ENABLE_DISPLAY_MPR_PAGE
ddr4_mpr_read(bdk_node_t node,int ddr_interface_num,int rank,int page,int location,uint64_t * mpr_data)2437 static void ddr4_mpr_read(bdk_node_t node, int ddr_interface_num, int rank,
2438 int page, int location, uint64_t *mpr_data)
2439 {
2440 do_ddr4_mpr_read(node, ddr_interface_num, rank, page, location);
2441
2442 mpr_data[0] = BDK_CSR_READ(node, BDK_LMCX_MPR_DATA0(ddr_interface_num));
2443 mpr_data[1] = BDK_CSR_READ(node, BDK_LMCX_MPR_DATA1(ddr_interface_num));
2444 mpr_data[2] = BDK_CSR_READ(node, BDK_LMCX_MPR_DATA2(ddr_interface_num));
2445
2446 debug_print("MPR Read %016lx.%016lx.%016lx\n", mpr_data[2], mpr_data[1], mpr_data[0]);
2447 }
2448
2449 /* Display MPR values for Page Location */
Display_MPR_Page_Location(bdk_node_t node,int rank,int ddr_interface_num,int dimm_count,int page,int location,uint64_t * mpr_data)2450 static void Display_MPR_Page_Location(bdk_node_t node, int rank,
2451 int ddr_interface_num, int dimm_count,
2452 int page, int location, uint64_t *mpr_data)
2453 {
2454 ddr4_mpr_read(node, ddr_interface_num, rank, page, location, mpr_data);
2455 ddr_print("MPR Page %d, Loc %d %016lx.%016lx.%016lx\n",
2456 page, location, mpr_data[2], mpr_data[1], mpr_data[0]);
2457 }
2458
2459 /* Display MPR values for Page */
Display_MPR_Page(bdk_node_t node,int rank_mask,int ddr_interface_num,int dimm_count,int page)2460 static void Display_MPR_Page(bdk_node_t node, int rank_mask,
2461 int ddr_interface_num, int dimm_count, int page)
2462 {
2463 int rankx;
2464 uint64_t mpr_data[3];
2465
2466 for (rankx = 0; rankx < dimm_count * 4;rankx++) {
2467 if (!(rank_mask & (1 << rankx)))
2468 continue;
2469
2470 ddr_print("Rank %d: MPR values for Page %d\n", rankx, page);
2471 for (int location = 0; location < 4; location++) {
2472 Display_MPR_Page_Location(node, rankx, ddr_interface_num, dimm_count,
2473 page, location, &mpr_data[0]);
2474 }
2475
2476 } /* for (rankx = 0; rankx < dimm_count * 4; rankx++) */
2477 }
2478 #endif
2479
ddr4_mpr_write(bdk_node_t node,int ddr_interface_num,int rank,int page,int location,uint8_t mpr_data)2480 static void ddr4_mpr_write(bdk_node_t node, int ddr_interface_num, int rank,
2481 int page, int location, uint8_t mpr_data)
2482 {
2483 bdk_lmcx_mr_mpr_ctl_t lmc_mr_mpr_ctl;
2484
2485 lmc_mr_mpr_ctl.u = 0;
2486 lmc_mr_mpr_ctl.s.mr_wr_addr = mpr_data;
2487 lmc_mr_mpr_ctl.s.mr_wr_sel = page; /* Page */
2488 lmc_mr_mpr_ctl.s.mr_wr_rank = rank;
2489 //lmc_mr_mpr_ctl.s.mr_wr_pda_mask =
2490 //lmc_mr_mpr_ctl.s.mr_wr_pda_enable =
2491 lmc_mr_mpr_ctl.s.mpr_loc = location;
2492 lmc_mr_mpr_ctl.s.mpr_wr = 1; /* Read=0, Write=1 */
2493 //lmc_mr_mpr_ctl.s.mpr_bit_select =
2494 //lmc_mr_mpr_ctl.s.mpr_byte_select =
2495 //lmc_mr_mpr_ctl.s.mpr_whole_byte_enable =
2496 //lmc_mr_mpr_ctl.s.mr_wr_use_default_value =
2497 //lmc_mr_mpr_ctl.s.mr_wr_bg1 =
2498 DRAM_CSR_WRITE(node, BDK_LMCX_MR_MPR_CTL(ddr_interface_num), lmc_mr_mpr_ctl.u);
2499
2500 /* MPR register access sequence */
2501 perform_octeon3_ddr3_sequence(node, (1 << rank), ddr_interface_num, 0x9);
2502
2503 debug_print("LMC_MR_MPR_CTL : 0x%016lx\n", lmc_mr_mpr_ctl.u);
2504 debug_print("lmc_mr_mpr_ctl.s.mr_wr_addr: 0x%02x\n", lmc_mr_mpr_ctl.s.mr_wr_addr);
2505 debug_print("lmc_mr_mpr_ctl.s.mr_wr_sel : 0x%02x\n", lmc_mr_mpr_ctl.s.mr_wr_sel);
2506 debug_print("lmc_mr_mpr_ctl.s.mpr_loc : 0x%02x\n", lmc_mr_mpr_ctl.s.mpr_loc);
2507 debug_print("lmc_mr_mpr_ctl.s.mpr_wr : 0x%02x\n", lmc_mr_mpr_ctl.s.mpr_wr);
2508 }
2509
set_vref(bdk_node_t node,int ddr_interface_num,int rank,int range,int value)2510 void set_vref(bdk_node_t node, int ddr_interface_num, int rank,
2511 int range, int value)
2512 {
2513 bdk_lmcx_mr_mpr_ctl_t lmc_mr_mpr_ctl;
2514 bdk_lmcx_modereg_params3_t lmc_modereg_params3;
2515 int mr_wr_addr = 0;
2516
2517 lmc_mr_mpr_ctl.u = 0;
2518 lmc_modereg_params3.u = BDK_CSR_READ(node, BDK_LMCX_MODEREG_PARAMS3(ddr_interface_num));
2519
2520 mr_wr_addr |= lmc_modereg_params3.s.tccd_l<<10; /* A12:A10 tCCD_L */
2521 mr_wr_addr |= 1<<7; /* A7 1 = Enable(Training Mode) */
2522 mr_wr_addr |= range<<6; /* A6 VrefDQ Training Range */
2523 mr_wr_addr |= value<<0; /* A5:A0 VrefDQ Training Value */
2524
2525 lmc_mr_mpr_ctl.s.mr_wr_addr = mr_wr_addr;
2526 lmc_mr_mpr_ctl.s.mr_wr_sel = 6; /* Write MR6 */
2527 lmc_mr_mpr_ctl.s.mr_wr_rank = rank;
2528 //lmc_mr_mpr_ctl.s.mr_wr_pda_mask =
2529 //lmc_mr_mpr_ctl.s.mr_wr_pda_enable =
2530 //lmc_mr_mpr_ctl.s.mpr_loc = location;
2531 //lmc_mr_mpr_ctl.s.mpr_wr = 0; /* Read=0, Write=1 */
2532 //lmc_mr_mpr_ctl.s.mpr_bit_select =
2533 //lmc_mr_mpr_ctl.s.mpr_byte_select =
2534 //lmc_mr_mpr_ctl.s.mpr_whole_byte_enable =
2535 //lmc_mr_mpr_ctl.s.mr_wr_use_default_value =
2536 //lmc_mr_mpr_ctl.s.mr_wr_bg1 =
2537 DRAM_CSR_WRITE(node, BDK_LMCX_MR_MPR_CTL(ddr_interface_num), lmc_mr_mpr_ctl.u);
2538
2539 /* 0x8 = Mode Register Write */
2540 perform_octeon3_ddr3_sequence(node, 1<<rank, ddr_interface_num, 0x8);
2541
2542 /* It is vendor specific whether Vref_value is captured with A7=1.
2543 A subsequent MRS might be necessary. */
2544 perform_octeon3_ddr3_sequence(node, 1<<rank, ddr_interface_num, 0x8);
2545
2546 mr_wr_addr &= ~(1<<7); /* A7 0 = Disable(Training Mode) */
2547 lmc_mr_mpr_ctl.s.mr_wr_addr = mr_wr_addr;
2548 DRAM_CSR_WRITE(node, BDK_LMCX_MR_MPR_CTL(ddr_interface_num), lmc_mr_mpr_ctl.u);
2549 }
2550
set_DRAM_output_inversion(bdk_node_t node,int ddr_interface_num,int dimm_count,int rank_mask,int inversion)2551 static void set_DRAM_output_inversion (bdk_node_t node,
2552 int ddr_interface_num,
2553 int dimm_count,
2554 int rank_mask,
2555 int inversion)
2556 {
2557 bdk_lmcx_ddr4_dimm_ctl_t lmc_ddr4_dimm_ctl;
2558 bdk_lmcx_dimmx_params_t lmc_dimmx_params;
2559 bdk_lmcx_dimm_ctl_t lmc_dimm_ctl;
2560 int dimm_no;
2561
2562 lmc_ddr4_dimm_ctl.u = 0; /* Don't touch extended register control words */
2563 DRAM_CSR_WRITE(node, BDK_LMCX_DDR4_DIMM_CTL(ddr_interface_num), lmc_ddr4_dimm_ctl.u);
2564
2565 ddr_print("All DIMMs: Register Control Word RC0 : %x\n", (inversion & 1));
2566
2567 for (dimm_no = 0; dimm_no < dimm_count; ++dimm_no) {
2568 lmc_dimmx_params.u = BDK_CSR_READ(node, BDK_LMCX_DIMMX_PARAMS(ddr_interface_num, dimm_no));
2569 lmc_dimmx_params.s.rc0 = (lmc_dimmx_params.s.rc0 & ~1) | (inversion & 1);
2570 DRAM_CSR_WRITE(node, BDK_LMCX_DIMMX_PARAMS(ddr_interface_num, dimm_no), lmc_dimmx_params.u);
2571 }
2572
2573 /* LMC0_DIMM_CTL */
2574 lmc_dimm_ctl.u = BDK_CSR_READ(node, BDK_LMCX_DIMM_CTL(ddr_interface_num));
2575 lmc_dimm_ctl.s.dimm0_wmask = 0x1;
2576 lmc_dimm_ctl.s.dimm1_wmask = (dimm_count > 1) ? 0x0001 : 0x0000;
2577
2578 ddr_print("LMC DIMM_CTL : 0x%016llx\n",
2579 lmc_dimm_ctl.u);
2580 DRAM_CSR_WRITE(node, BDK_LMCX_DIMM_CTL(ddr_interface_num), lmc_dimm_ctl.u);
2581
2582 perform_octeon3_ddr3_sequence(node, rank_mask, ddr_interface_num, 0x7 ); /* Init RCW */
2583 }
2584
write_mpr_page0_pattern(bdk_node_t node,int rank_mask,int ddr_interface_num,int dimm_count,int pattern,int location_mask)2585 static void write_mpr_page0_pattern (bdk_node_t node, int rank_mask,
2586 int ddr_interface_num, int dimm_count, int pattern, int location_mask)
2587 {
2588 int rankx;
2589 int location;
2590
2591 for (rankx = 0; rankx < dimm_count*4; rankx++) {
2592 if (!(rank_mask & (1 << rankx)))
2593 continue;
2594 for (location = 0; location < 4; ++location) {
2595 if (!(location_mask & (1 << location)))
2596 continue;
2597
2598 ddr4_mpr_write(node, ddr_interface_num, rankx,
2599 /* page */ 0, /* location */ location, pattern);
2600 }
2601 }
2602 }
2603
change_rdimm_mpr_pattern(bdk_node_t node,int rank_mask,int ddr_interface_num,int dimm_count)2604 static void change_rdimm_mpr_pattern (bdk_node_t node, int rank_mask,
2605 int ddr_interface_num, int dimm_count)
2606 {
2607 int save_ref_zqcs_int;
2608 bdk_lmcx_config_t lmc_config;
2609
2610 /*
2611 Okay, here is the latest sequence. This should work for all
2612 chips and passes (78,88,73,etc). This sequence should be run
2613 immediately after DRAM INIT. The basic idea is to write the
2614 same pattern into each of the 4 MPR locations in the DRAM, so
2615 that the same value is returned when doing MPR reads regardless
2616 of the inversion state. My advice is to put this into a
2617 function, change_rdimm_mpr_pattern or something like that, so
2618 that it can be called multiple times, as I think David wants a
2619 clock-like pattern for OFFSET training, but does not want a
2620 clock pattern for Bit-Deskew. You should then be able to call
2621 this at any point in the init sequence (after DRAM init) to
2622 change the pattern to a new value.
2623 Mike
2624
2625 A correction: PHY doesn't need any pattern during offset
2626 training, but needs clock like pattern for internal vref and
2627 bit-dskew training. So for that reason, these steps below have
2628 to be conducted before those trainings to pre-condition
2629 the pattern. David
2630
2631 Note: Step 3, 4, 8 and 9 have to be done through RDIMM
2632 sequence. If you issue MRW sequence to do RCW write (in o78 pass
2633 1 at least), LMC will still do two commands because
2634 CONTROL[RDIMM_ENA] is still set high. We don't want it to have
2635 any unintentional mode register write so it's best to do what
2636 Mike is doing here.
2637 Andrew
2638 */
2639
2640
2641 /* 1) Disable refresh (REF_ZQCS_INT = 0) */
2642
2643 debug_print("1) Disable refresh (REF_ZQCS_INT = 0)\n");
2644
2645 lmc_config.u = BDK_CSR_READ(node, BDK_LMCX_CONFIG(ddr_interface_num));
2646 save_ref_zqcs_int = lmc_config.s.ref_zqcs_int;
2647 lmc_config.s.ref_zqcs_int = 0;
2648 DRAM_CSR_WRITE(node, BDK_LMCX_CONFIG(ddr_interface_num), lmc_config.u);
2649
2650
2651 /* 2) Put all devices in MPR mode (Run MRW sequence (sequence=8)
2652 with MODEREG_PARAMS0[MPRLOC]=0,
2653 MODEREG_PARAMS0[MPR]=1, MR_MPR_CTL[MR_WR_SEL]=3, and
2654 MR_MPR_CTL[MR_WR_USE_DEFAULT_VALUE]=1) */
2655
2656 debug_print("2) Put all devices in MPR mode (Run MRW sequence (sequence=8)\n");
2657
2658 set_mpr_mode(node, rank_mask, ddr_interface_num, dimm_count, /* mpr */ 1, /* bg1 */ 0); /* A-side */
2659 set_mpr_mode(node, rank_mask, ddr_interface_num, dimm_count, /* mpr */ 1, /* bg1 */ 1); /* B-side */
2660
2661 /* a. Or you can set MR_MPR_CTL[MR_WR_USE_DEFAULT_VALUE]=0 and set
2662 the value you would like directly into
2663 MR_MPR_CTL[MR_WR_ADDR] */
2664
2665 /* 3) Disable RCD Parity (if previously enabled) - parity does not
2666 work if inversion disabled */
2667
2668 debug_print("3) Disable RCD Parity\n");
2669
2670 /* 4) Disable Inversion in the RCD. */
2671 /* a. I did (3&4) via the RDIMM sequence (seq_sel=7), but it
2672 may be easier to use the MRW sequence (seq_sel=8). Just set
2673 MR_MPR_CTL[MR_WR_SEL]=7, MR_MPR_CTL[MR_WR_ADDR][3:0]=data,
2674 MR_MPR_CTL[MR_WR_ADDR][7:4]=RCD reg */
2675
2676 debug_print("4) Disable Inversion in the RCD.\n");
2677
2678 set_DRAM_output_inversion(node, ddr_interface_num, dimm_count, rank_mask,
2679 1 /* 1=disable output inversion*/);
2680
2681 /* 5) Disable CONTROL[RDIMM_ENA] so that MR sequence goes out
2682 non-inverted. */
2683
2684 debug_print("5) Disable CONTROL[RDIMM_ENA]\n");
2685
2686 set_rdimm_mode(node, ddr_interface_num, 0);
2687
2688 /* 6) Write all 4 MPR registers with the desired pattern (have to
2689 do this for all enabled ranks) */
2690 /* a. MR_MPR_CTL.MPR_WR=1, MR_MPR_CTL.MPR_LOC=0..3,
2691 MR_MPR_CTL.MR_WR_SEL=0, MR_MPR_CTL.MR_WR_ADDR[7:0]=pattern */
2692
2693 debug_print("6) Write all 4 MPR page 0 Training Patterns\n");
2694
2695 write_mpr_page0_pattern(node, rank_mask,
2696 ddr_interface_num, dimm_count, 0x55, 0x8);
2697
2698 /* 7) Re-enable RDIMM_ENA */
2699
2700 debug_print("7) Re-enable RDIMM_ENA\n");
2701
2702 set_rdimm_mode(node, ddr_interface_num, 1);
2703
2704 /* 8) Re-enable RDIMM inversion */
2705
2706 debug_print("8) Re-enable RDIMM inversion\n");
2707
2708 set_DRAM_output_inversion(node, ddr_interface_num, dimm_count, rank_mask,
2709 0 /* 0=re-enable output inversion*/);
2710
2711 /* 9) Re-enable RDIMM parity (if desired) */
2712
2713 debug_print("9) Re-enable RDIMM parity (if desired)\n");
2714
2715 /* 10)Take B-side devices out of MPR mode (Run MRW sequence
2716 (sequence=8) with MODEREG_PARAMS0[MPRLOC]=0,
2717 MODEREG_PARAMS0[MPR]=0, MR_MPR_CTL[MR_WR_SEL]=3, and
2718 MR_MPR_CTL[MR_WR_USE_DEFAULT_VALUE]=1) */
2719
2720 debug_print("10)Take B-side devices out of MPR mode\n");
2721
2722 set_mpr_mode(node, rank_mask, ddr_interface_num, dimm_count, /* mpr */ 0, /* bg1 */ 1);
2723
2724 /* a. Or you can set MR_MPR_CTL[MR_WR_USE_DEFAULT_VALUE]=0 and
2725 set the value you would like directly into
2726 MR_MPR_CTL[MR_WR_ADDR] */
2727
2728 /* 11)Re-enable refresh (REF_ZQCS_INT=previous value) */
2729
2730 debug_print("11)Re-enable refresh (REF_ZQCS_INT=previous value)\n");
2731
2732 lmc_config.u = BDK_CSR_READ(node, BDK_LMCX_CONFIG(ddr_interface_num));
2733 lmc_config.s.ref_zqcs_int = save_ref_zqcs_int;
2734 DRAM_CSR_WRITE(node, BDK_LMCX_CONFIG(ddr_interface_num), lmc_config.u);
2735
2736 }
2737
2738 static unsigned char ddr4_rodt_ohms [RODT_OHMS_COUNT ] = { 0, 40, 60, 80, 120, 240, 34, 48 };
2739 static unsigned char ddr4_rtt_nom_ohms [RTT_NOM_OHMS_COUNT ] = { 0, 60, 120, 40, 240, 48, 80, 34 };
2740 static unsigned char ddr4_rtt_nom_table [RTT_NOM_TABLE_COUNT ] = { 0, 4, 2, 6, 1, 5, 3, 7 };
2741 static unsigned char ddr4_rtt_wr_ohms [RTT_WR_OHMS_COUNT ] = { 0, 120, 240, 99, 80 }; // setting HiZ ohms to 99 for computed vref
2742 static unsigned char ddr4_dic_ohms [DIC_OHMS_COUNT ] = { 34, 48 };
2743 static short ddr4_drive_strength[DRIVE_STRENGTH_COUNT] = { 0, 0, 26, 30, 34, 40, 48, 68, 0,0,0,0,0,0,0 };
2744 static short ddr4_dqx_strength [DRIVE_STRENGTH_COUNT] = { 0, 24, 27, 30, 34, 40, 48, 60, 0,0,0,0,0,0,0 };
2745
2746 impedence_values_t ddr4_impedence_values = {
2747 .rodt_ohms = ddr4_rodt_ohms ,
2748 .rtt_nom_ohms = ddr4_rtt_nom_ohms ,
2749 .rtt_nom_table = ddr4_rtt_nom_table ,
2750 .rtt_wr_ohms = ddr4_rtt_wr_ohms ,
2751 .dic_ohms = ddr4_dic_ohms ,
2752 .drive_strength = ddr4_drive_strength,
2753 .dqx_strength = ddr4_dqx_strength ,
2754 };
2755
2756 static unsigned char ddr3_rodt_ohms [RODT_OHMS_COUNT ] = { 0, 20, 30, 40, 60, 120, 0, 0 };
2757 static unsigned char ddr3_rtt_nom_ohms [RTT_NOM_OHMS_COUNT ] = { 0, 60, 120, 40, 20, 30, 0, 0 };
2758 static unsigned char ddr3_rtt_nom_table [RTT_NOM_TABLE_COUNT ] = { 0, 2, 1, 3, 5, 4, 0, 0 };
2759 static unsigned char ddr3_rtt_wr_ohms [RTT_WR_OHMS_COUNT ] = { 0, 60, 120 };
2760 static unsigned char ddr3_dic_ohms [DIC_OHMS_COUNT ] = { 40, 34 };
2761 static short ddr3_drive_strength[DRIVE_STRENGTH_COUNT] = { 0, 24, 27, 30, 34, 40, 48, 60, 0,0,0,0,0,0,0 };
2762 static impedence_values_t ddr3_impedence_values = {
2763 .rodt_ohms = ddr3_rodt_ohms ,
2764 .rtt_nom_ohms = ddr3_rtt_nom_ohms ,
2765 .rtt_nom_table = ddr3_rtt_nom_table ,
2766 .rtt_wr_ohms = ddr3_rtt_wr_ohms ,
2767 .dic_ohms = ddr3_dic_ohms ,
2768 .drive_strength = ddr3_drive_strength,
2769 .dqx_strength = ddr3_drive_strength,
2770 };
2771
2772
2773 uint64_t
hertz_to_psecs(uint64_t hertz)2774 hertz_to_psecs(uint64_t hertz)
2775 {
2776 return divide_nint((uint64_t) 1000*1000*1000*1000, hertz); /* Clock in psecs */
2777 }
2778
2779 #define DIVIDEND_SCALE 1000 /* Scale to avoid rounding error. */
2780 uint64_t
psecs_to_mts(uint64_t psecs)2781 psecs_to_mts(uint64_t psecs)
2782 {
2783 //ddr_print("psecs %ld, divisor %ld\n", psecs, divide_nint((uint64_t)(2 * 1000000 * DIVIDEND_SCALE), psecs));
2784 return divide_nint(divide_nint((uint64_t)(2 * 1000000 * DIVIDEND_SCALE), psecs), DIVIDEND_SCALE);
2785 }
2786
2787 #define WITHIN(v,b,m) (((v)>=((b)-(m)))&&((v)<=((b)+(m))))
2788
2789 // pretty-print version, only works with what comes from the SPD: tCKmin or tCKAVGmin
2790 unsigned long
pretty_psecs_to_mts(uint64_t psecs)2791 pretty_psecs_to_mts(uint64_t psecs)
2792 {
2793 uint64_t ret = 0; // default to error
2794 if (WITHIN(psecs, 1250, 1))
2795 ret = 1600;
2796 else if (WITHIN(psecs, 1071, 1))
2797 ret = 1866;
2798 else if (WITHIN(psecs, 937, 1))
2799 ret = 2133;
2800 else if (WITHIN(psecs, 833, 1))
2801 ret = 2400;
2802 else if (WITHIN(psecs, 750, 1))
2803 ret = 2666;
2804 return ret;
2805 }
2806
2807 uint64_t
mts_to_hertz(uint64_t mts)2808 mts_to_hertz(uint64_t mts)
2809 {
2810 return ((mts * 1000 * 1000) / 2);
2811 }
2812
2813 #define DEBUG_RC3X_COMPUTE 0
2814 #define rc3x_print(...) \
2815 do { if (DEBUG_RC3X_COMPUTE) printf(__VA_ARGS__); } while (0)
2816
compute_rc3x(int64_t tclk_psecs)2817 static int compute_rc3x (int64_t tclk_psecs)
2818 {
2819 long speed;
2820 long tclk_psecs_min, tclk_psecs_max;
2821 long data_rate_mhz, data_rate_mhz_min, data_rate_mhz_max;
2822 int rc3x;
2823
2824 #define ENCODING_BASE 1240
2825
2826 data_rate_mhz = psecs_to_mts(tclk_psecs);
2827
2828 /* 2400 MT/s is a special case. Using integer arithmetic it rounds
2829 from 833 psecs to 2401 MT/s. Force it to 2400 to pick the
2830 proper setting from the table. */
2831 if (tclk_psecs == 833)
2832 data_rate_mhz = 2400;
2833
2834 for (speed = ENCODING_BASE; speed < 3200; speed += 20) {
2835 int error = 0;
2836
2837 tclk_psecs_min = hertz_to_psecs(mts_to_hertz(speed + 00)); /* Clock in psecs */
2838 tclk_psecs_max = hertz_to_psecs(mts_to_hertz(speed + 18)); /* Clock in psecs */
2839
2840 data_rate_mhz_min = psecs_to_mts(tclk_psecs_min);
2841 data_rate_mhz_max = psecs_to_mts(tclk_psecs_max);
2842
2843 /* Force alingment to multiple to avound rounding errors. */
2844 data_rate_mhz_min = ((data_rate_mhz_min + 18) / 20) * 20;
2845 data_rate_mhz_max = ((data_rate_mhz_max + 18) / 20) * 20;
2846
2847 error += (speed + 00 != data_rate_mhz_min);
2848 error += (speed + 20 != data_rate_mhz_max);
2849
2850 rc3x = (speed - ENCODING_BASE) / 20;
2851
2852 rc3x_print("rc3x: %02x speed: %4ld MT/s < f <= %4ld MT/s, psec: %3ld:%3ld %4ld:%4ld %s\n",
2853 rc3x,
2854 speed, speed + 20,
2855 tclk_psecs_min, tclk_psecs_max,
2856 data_rate_mhz_min, data_rate_mhz_max,
2857 error ? "****" : "");
2858
2859 if (data_rate_mhz <= (speed + 20)) {
2860 rc3x_print("rc3x: %4ld MT/s <= %4ld MT/s\n", data_rate_mhz, speed + 20);
2861 break;
2862 }
2863 }
2864 return rc3x;
2865 }
2866
2867 static const int rlevel_separate_ab = 1;
2868
init_octeon3_ddr3_interface(bdk_node_t node,const ddr_configuration_t * ddr_configuration,uint32_t ddr_hertz,uint32_t cpu_hertz,uint32_t ddr_ref_hertz,int board_type,int board_rev_maj,int board_rev_min,int ddr_interface_num,uint32_t ddr_interface_mask)2869 int init_octeon3_ddr3_interface(bdk_node_t node,
2870 const ddr_configuration_t *ddr_configuration,
2871 uint32_t ddr_hertz,
2872 uint32_t cpu_hertz,
2873 uint32_t ddr_ref_hertz,
2874 int board_type,
2875 int board_rev_maj,
2876 int board_rev_min,
2877 int ddr_interface_num,
2878 uint32_t ddr_interface_mask
2879 )
2880 {
2881 const char *s;
2882
2883 const dimm_odt_config_t *odt_1rank_config = ddr_configuration->odt_1rank_config;
2884 const dimm_odt_config_t *odt_2rank_config = ddr_configuration->odt_2rank_config;
2885 const dimm_odt_config_t *odt_4rank_config = ddr_configuration->odt_4rank_config;
2886 const dimm_config_t *dimm_config_table = ddr_configuration->dimm_config_table;
2887 const dimm_odt_config_t *odt_config;
2888 const ddr3_custom_config_t *custom_lmc_config = &ddr_configuration->custom_lmc_config;
2889 int odt_idx;
2890
2891 /*
2892 ** Compute clock rates to the nearest picosecond.
2893 */
2894 uint64_t tclk_psecs = hertz_to_psecs(ddr_hertz); /* Clock in psecs */
2895 uint64_t eclk_psecs = hertz_to_psecs(cpu_hertz); /* Clock in psecs */
2896
2897 int row_bits, col_bits, num_banks, num_ranks, dram_width;
2898 int dimm_count = 0;
2899 int fatal_error = 0; /* Accumulate and report all the errors before giving up */
2900
2901 int safe_ddr_flag = 0; /* Flag that indicates safe DDR settings should be used */
2902 int ddr_interface_64b = 1; /* THUNDER Default: 64bit interface width */
2903 int ddr_interface_bytemask;
2904 uint32_t mem_size_mbytes = 0;
2905 unsigned int didx;
2906 int bank_bits = 0;
2907 int bunk_enable;
2908 int rank_mask;
2909 int column_bits_start = 1;
2910 int row_lsb;
2911 int pbank_lsb;
2912 int use_ecc = 1;
2913 int mtb_psec = 0; /* quiet */
2914 short ftb_Dividend;
2915 short ftb_Divisor;
2916 int tAAmin;
2917 int tCKmin;
2918 int CL, min_cas_latency = 0, max_cas_latency = 0, override_cas_latency = 0;
2919 int ddr_rtt_nom_auto, ddr_rodt_ctl_auto;
2920 int i;
2921
2922 int spd_addr;
2923 int spd_org;
2924 int spd_banks;
2925 int spd_rdimm;
2926 int spd_dimm_type;
2927 int spd_ecc;
2928 uint32_t spd_cas_latency;
2929 int spd_mtb_dividend;
2930 int spd_mtb_divisor;
2931 int spd_tck_min;
2932 int spd_taa_min;
2933 int spd_twr;
2934 int spd_trcd;
2935 int spd_trrd;
2936 int spd_trp;
2937 int spd_tras;
2938 int spd_trc;
2939 int spd_trfc;
2940 int spd_twtr;
2941 int spd_trtp;
2942 int spd_tfaw;
2943 int spd_addr_mirror;
2944 int spd_package = 0;
2945 int spd_rawcard = 0;
2946 int spd_rawcard_AorB = 0;
2947 int is_stacked_die = 0;
2948 int disable_stacked_die = 0;
2949 int is_3ds_dimm = 0; // 3DS
2950 int lranks_per_prank = 1; // 3DS: logical ranks per package rank
2951 int lranks_bits = 0; // 3DS: logical ranks bits
2952 int die_capacity = 0; // in Mbits; only used for 3DS
2953
2954 /* FTB values are two's complement ranging from +127 to -128. */
2955 typedef signed char SC_t;
2956
2957 int twr;
2958 int trcd;
2959 int trrd;
2960 int trp;
2961 int tras;
2962 int trc;
2963 int trfc;
2964 int twtr;
2965 int trtp = 0; /* quiet */
2966 int tfaw;
2967
2968 int wlevel_bitmask_errors = 0;
2969 int wlevel_loops;
2970 int default_rtt_nom[4];
2971 int dyn_rtt_nom_mask = 0;
2972
2973 ddr_type_t ddr_type;
2974 int ddr4_tCKAVGmin = 0; /* quiet */
2975 int ddr4_tCKAVGmax = 0; /* quiet */
2976 int ddr4_tRCDmin = 0; /* quiet */
2977 int ddr4_tRPmin = 0; /* quiet */
2978 int ddr4_tRASmin = 0; /* quiet */
2979 int ddr4_tRCmin = 0; /* quiet */
2980 int ddr4_tRFC1min = 0; /* quiet */
2981 int ddr4_tRFC2min = 0; /* quiet */
2982 int ddr4_tRFC4min = 0; /* quiet */
2983 int ddr4_tFAWmin = 0; /* quiet */
2984 int ddr4_tRRD_Smin = 0; /* quiet */
2985 int ddr4_tRRD_Lmin;
2986 int ddr4_tCCD_Lmin;
2987 impedence_values_t *imp_values;
2988 int default_rodt_ctl;
2989 // default to disabled (ie, LMC restart, not chip reset)
2990 int ddr_disable_chip_reset = 1;
2991 int disable_deskew_training = 0;
2992 const char *dimm_type_name;
2993
2994 /* Allow the Write bit-deskew feature to be enabled when desired. */
2995 // NOTE: THUNDER pass 2.x only, 81xx, 83xx
2996 int enable_write_deskew = ENABLE_WRITE_DESKEW_DEFAULT;
2997
2998 #if SWL_TRY_HWL_ALT
2999 typedef struct {
3000 uint16_t hwl_alt_mask; // mask of bytelanes with alternate
3001 uint16_t hwl_alt_delay[9]; // bytelane alternate avail if mask=1
3002 } hwl_alt_by_rank_t;
3003 hwl_alt_by_rank_t hwl_alts[4];
3004 memset(hwl_alts, 0, sizeof(hwl_alts));
3005 #endif /* SWL_TRY_HWL_ALT */
3006
3007 bdk_lmcx_config_t lmc_config;
3008
3009 /* Initialize these to shut up the compiler. They are configured
3010 and used only for DDR4 */
3011 ddr4_tRRD_Lmin = 6000;
3012 ddr4_tCCD_Lmin = 6000;
3013
3014 ddr_print("\nInitializing node %d DDR interface %d, DDR Clock %d, DDR Reference Clock %d\n",
3015 node, ddr_interface_num, ddr_hertz, ddr_ref_hertz);
3016
3017 if (dimm_config_table[0].spd_addr == 0 && !dimm_config_table[0].spd_ptr) {
3018 error_print("ERROR: No dimms specified in the dimm_config_table.\n");
3019 return (-1);
3020 }
3021
3022 // allow some overrides to be done
3023
3024 // this one controls whether chip RESET is done, or LMC init restarted from step 6.9.6
3025 if ((s = lookup_env_parameter("ddr_disable_chip_reset")) != NULL) {
3026 ddr_disable_chip_reset = !!strtoul(s, NULL, 0);
3027 }
3028 // this one controls whether Deskew Training is performed
3029 if ((s = lookup_env_parameter("ddr_disable_deskew_training")) != NULL) {
3030 disable_deskew_training = !!strtoul(s, NULL, 0);
3031 }
3032 // this one is in Validate_Read_Deskew_Training and controls a preliminary delay
3033 if ((s = lookup_env_parameter("ddr_deskew_validation_delay")) != NULL) {
3034 deskew_validation_delay = strtoul(s, NULL, 0);
3035 }
3036 // this one is in Perform_Read_Deskew_Training and controls lock retries
3037 if ((s = lookup_env_parameter("ddr_lock_retries")) != NULL) {
3038 default_lock_retry_limit = strtoul(s, NULL, 0);
3039 }
3040 // this one controls whether stacked die status can affect processing
3041 // disabling it will affect computed vref adjustment, and rodt_row_skip_mask
3042 if ((s = lookup_env_parameter("ddr_disable_stacked_die")) != NULL) {
3043 disable_stacked_die = !!strtoul(s, NULL, 0);
3044 }
3045
3046 // setup/override for write bit-deskew feature
3047 if (! CAVIUM_IS_MODEL(CAVIUM_CN88XX_PASS1_X)) { // added 81xx and 83xx
3048 // FIXME: allow override
3049 if ((s = lookup_env_parameter("ddr_enable_write_deskew")) != NULL) {
3050 enable_write_deskew = !!strtoul(s, NULL, 0);
3051 } // else take default setting
3052 } else { // not pass 2.x
3053 enable_write_deskew = 0; // force disabled
3054 }
3055
3056 #if 0 // FIXME: do we really need this anymore?
3057 if (dram_is_verbose(VBL_NORM)) {
3058 printf("DDR SPD Table:");
3059 for (didx = 0; didx < DDR_CFG_T_MAX_DIMMS; ++didx) {
3060 if (dimm_config_table[didx].spd_addr == 0) break;
3061 printf(" --ddr%dspd=0x%02x", ddr_interface_num, dimm_config_table[didx].spd_addr);
3062 }
3063 printf("\n");
3064 }
3065 #endif
3066
3067 /*
3068 ** Walk the DRAM Socket Configuration Table to see what is installed.
3069 */
3070 for (didx = 0; didx < DDR_CFG_T_MAX_DIMMS; ++didx)
3071 {
3072 /* Check for lower DIMM socket populated */
3073 if (validate_dimm(node, &dimm_config_table[didx]) == 1) {
3074 // NOTE: DIMM info printing is now done later when more details are available
3075 ++dimm_count;
3076 } else { break; } /* Finished when there is no lower DIMM */
3077 }
3078
3079
3080 initialize_ddr_clock(node,
3081 ddr_configuration,
3082 cpu_hertz,
3083 ddr_hertz,
3084 ddr_ref_hertz,
3085 ddr_interface_num,
3086 ddr_interface_mask);
3087
3088 if (!odt_1rank_config)
3089 odt_1rank_config = disable_odt_config;
3090 if (!odt_2rank_config)
3091 odt_2rank_config = disable_odt_config;
3092 if (!odt_4rank_config)
3093 odt_4rank_config = disable_odt_config;
3094
3095 if ((s = lookup_env_parameter("ddr_safe")) != NULL) {
3096 safe_ddr_flag = !!strtoul(s, NULL, 0);
3097 }
3098
3099
3100 if (dimm_count == 0) {
3101 error_print("ERROR: DIMM 0 not detected.\n");
3102 return(-1);
3103 }
3104
3105 // look for 32-bit mode specified in the config
3106 if (custom_lmc_config->mode32b) {
3107 ddr_interface_64b = 0;
3108 }
3109
3110 if (ddr_interface_64b == 0) { // check if 32-bit mode is bad
3111 if (!CAVIUM_IS_MODEL(CAVIUM_CN81XX)) {
3112 error_print("32-bit interface width is NOT supported for this Thunder model\n");
3113 ddr_interface_64b = 1; // force to 64-bit
3114 }
3115 } else { // check if 64-bit mode is bad
3116 if (CAVIUM_IS_MODEL(CAVIUM_CN81XX)) { // check the fuses on 81XX for forced 32-bit mode
3117 BDK_CSR_INIT(mio_fus_dat2, node, BDK_MIO_FUS_DAT2);
3118 if (mio_fus_dat2.s.lmc_mode32) {
3119 error_print("32-bit interface width is ONLY supported for this Thunder model\n");
3120 ddr_interface_64b = 0; // force to 32-bit
3121 }
3122 }
3123 }
3124
3125 // finally, say we are in 32-bit mode when it has been validated
3126 if (ddr_interface_64b == 0) {
3127 ddr_print("N%d.LMC%d: Setting 32-bit data width\n",
3128 node, ddr_interface_num);
3129 }
3130
3131 /* ddr_type only indicates DDR4 or DDR3 */
3132 ddr_type = get_ddr_type(node, &dimm_config_table[0]);
3133 debug_print("DRAM Device Type: DDR%d\n", ddr_type);
3134
3135 spd_dimm_type = get_dimm_module_type(node, &dimm_config_table[0], ddr_type);
3136
3137 if (ddr_type == DDR4_DRAM) {
3138 int spd_module_type;
3139 int asymmetric;
3140 const char *signal_load[4] = {"", "MLS", "3DS", "RSV"};
3141
3142 imp_values = &ddr4_impedence_values;
3143 dimm_type_name = ddr4_dimm_types[spd_dimm_type];
3144
3145 spd_addr = read_spd(node, &dimm_config_table[0], DDR4_SPD_ADDRESSING_ROW_COL_BITS);
3146 spd_org = read_spd(node, &dimm_config_table[0], DDR4_SPD_MODULE_ORGANIZATION);
3147 spd_banks = 0xFF & read_spd(node, &dimm_config_table[0], DDR4_SPD_DENSITY_BANKS);
3148
3149 bank_bits = (2 + ((spd_banks >> 4) & 0x3)) + ((spd_banks >> 6) & 0x3);
3150 bank_bits = min((int)bank_bits, 4); /* Controller can only address 4 bits. */
3151
3152 spd_package = 0xFF & read_spd(node, &dimm_config_table[0], DDR4_SPD_PACKAGE_TYPE);
3153 if (spd_package & 0x80) { // non-monolithic device
3154 is_stacked_die = (!disable_stacked_die) ? ((spd_package & 0x73) == 0x11) : 0;
3155 ddr_print("DDR4: Package Type 0x%x (%s), %d die\n", spd_package,
3156 signal_load[(spd_package & 3)], ((spd_package >> 4) & 7) + 1);
3157 is_3ds_dimm = ((spd_package & 3) == 2); // is it 3DS?
3158 if (is_3ds_dimm) { // is it 3DS?
3159 lranks_per_prank = ((spd_package >> 4) & 7) + 1;
3160 // FIXME: should make sure it is only 2H or 4H or 8H?
3161 lranks_bits = lranks_per_prank >> 1;
3162 if (lranks_bits == 4) lranks_bits = 3;
3163 }
3164 } else if (spd_package != 0) {
3165 // FIXME: print non-zero monolithic device definition
3166 ddr_print("DDR4: Package Type MONOLITHIC: %d die, signal load %d\n",
3167 ((spd_package >> 4) & 7) + 1, (spd_package & 3));
3168 }
3169
3170 asymmetric = (spd_org >> 6) & 1;
3171 if (asymmetric) {
3172 int spd_secondary_pkg = read_spd(node, &dimm_config_table[0],
3173 DDR4_SPD_SECONDARY_PACKAGE_TYPE);
3174 ddr_print("DDR4: Module Organization: ASYMMETRICAL: Secondary Package Type 0x%x\n",
3175 spd_secondary_pkg);
3176 } else {
3177 uint64_t bus_width = 8 << (0x07 & read_spd(node, &dimm_config_table[0],
3178 DDR4_SPD_MODULE_MEMORY_BUS_WIDTH));
3179 uint64_t ddr_width = 4 << ((spd_org >> 0) & 0x7);
3180 uint64_t module_cap;
3181 int shift = (spd_banks & 0x0F);
3182 die_capacity = (shift < 8) ? (256UL << shift) : ((12UL << (shift & 1)) << 10);
3183 ddr_print("DDR4: Module Organization: SYMMETRICAL: capacity per die %d %cbit\n",
3184 (die_capacity > 512) ? (die_capacity >> 10) : die_capacity,
3185 (die_capacity > 512) ? 'G' : 'M');
3186 module_cap = ((uint64_t)die_capacity << 20) / 8UL * bus_width / ddr_width *
3187 /* no. pkg ranks*/(1UL + ((spd_org >> 3) & 0x7));
3188 if (is_3ds_dimm) // is it 3DS?
3189 module_cap *= /* die_count */(uint64_t)(((spd_package >> 4) & 7) + 1);
3190 ddr_print("DDR4: Module Organization: SYMMETRICAL: capacity per module %lld GB\n",
3191 module_cap >> 30);
3192 }
3193
3194 spd_rawcard = 0xFF & read_spd(node, &dimm_config_table[0], DDR4_SPD_REFERENCE_RAW_CARD);
3195 ddr_print("DDR4: Reference Raw Card 0x%x \n", spd_rawcard);
3196
3197 spd_module_type = read_spd(node, &dimm_config_table[0], DDR4_SPD_KEY_BYTE_MODULE_TYPE);
3198 if (spd_module_type & 0x80) { // HYBRID module
3199 ddr_print("DDR4: HYBRID module, type %s\n",
3200 ((spd_module_type & 0x70) == 0x10) ? "NVDIMM" : "UNKNOWN");
3201 }
3202
3203 spd_dimm_type = spd_module_type & 0x0F;
3204 spd_rdimm = (spd_dimm_type == 1) || (spd_dimm_type == 5) || (spd_dimm_type == 8);
3205 if (spd_rdimm) {
3206 int spd_mfgr_id = read_spd(node, &dimm_config_table[0], DDR4_SPD_REGISTER_MANUFACTURER_ID_LSB) |
3207 (read_spd(node, &dimm_config_table[0], DDR4_SPD_REGISTER_MANUFACTURER_ID_MSB) << 8);
3208 int spd_register_rev = read_spd(node, &dimm_config_table[0], DDR4_SPD_REGISTER_REVISION_NUMBER);
3209 ddr_print("DDR4: RDIMM Register Manufacturer ID 0x%x Revision 0x%x\n",
3210 spd_mfgr_id, spd_register_rev);
3211
3212 // RAWCARD A or B must be bit 7=0 and bits 4-0 either 00000(A) or 00001(B)
3213 spd_rawcard_AorB = ((spd_rawcard & 0x9fUL) <= 1);
3214 }
3215 } else {
3216 imp_values = &ddr3_impedence_values;
3217 dimm_type_name = ddr3_dimm_types[spd_dimm_type];
3218
3219 spd_addr = read_spd(node, &dimm_config_table[0], DDR3_SPD_ADDRESSING_ROW_COL_BITS);
3220 spd_org = read_spd(node, &dimm_config_table[0], DDR3_SPD_MODULE_ORGANIZATION);
3221 spd_banks = read_spd(node, &dimm_config_table[0], DDR3_SPD_DENSITY_BANKS) & 0xff;
3222
3223 bank_bits = 3 + ((spd_banks >> 4) & 0x7);
3224 bank_bits = min((int)bank_bits, 3); /* Controller can only address 3 bits. */
3225
3226 spd_rdimm = (spd_dimm_type == 1) || (spd_dimm_type == 5) || (spd_dimm_type == 9);
3227 }
3228
3229 #if 0 // FIXME: why should this be possible OR needed?
3230 if ((s = lookup_env_parameter("ddr_rdimm_ena")) != NULL) {
3231 spd_rdimm = !!strtoul(s, NULL, 0);
3232 }
3233 #endif
3234
3235 debug_print("spd_addr : %#06x\n", spd_addr );
3236 debug_print("spd_org : %#06x\n", spd_org );
3237 debug_print("spd_banks : %#06x\n", spd_banks );
3238
3239 row_bits = 12 + ((spd_addr >> 3) & 0x7);
3240 col_bits = 9 + ((spd_addr >> 0) & 0x7);
3241
3242 num_ranks = 1 + ((spd_org >> 3) & 0x7);
3243 dram_width = 4 << ((spd_org >> 0) & 0x7);
3244 num_banks = 1 << bank_bits;
3245
3246 if ((s = lookup_env_parameter("ddr_num_ranks")) != NULL) {
3247 num_ranks = strtoul(s, NULL, 0);
3248 }
3249
3250 /* FIX
3251 ** Check that values are within some theoretical limits.
3252 ** col_bits(min) = row_lsb(min) - bank_bits(max) - bus_bits(max) = 14 - 3 - 4 = 7
3253 ** col_bits(max) = row_lsb(max) - bank_bits(min) - bus_bits(min) = 18 - 2 - 3 = 13
3254 */
3255 if ((col_bits > 13) || (col_bits < 7)) {
3256 error_print("Unsupported number of Col Bits: %d\n", col_bits);
3257 ++fatal_error;
3258 }
3259
3260 /* FIX
3261 ** Check that values are within some theoretical limits.
3262 ** row_bits(min) = pbank_lsb(min) - row_lsb(max) - rank_bits = 26 - 18 - 1 = 7
3263 ** row_bits(max) = pbank_lsb(max) - row_lsb(min) - rank_bits = 33 - 14 - 1 = 18
3264 */
3265 if ((row_bits > 18) || (row_bits < 7)) {
3266 error_print("Unsupported number of Row Bits: %d\n", row_bits);
3267 ++fatal_error;
3268 }
3269
3270 wlevel_loops = WLEVEL_LOOPS_DEFAULT;
3271 // accept generic or interface-specific override but not for ASIM...
3272 if ((s = lookup_env_parameter("ddr_wlevel_loops")) == NULL)
3273 s = lookup_env_parameter("ddr%d_wlevel_loops", ddr_interface_num);
3274 if (s != NULL) {
3275 wlevel_loops = strtoul(s, NULL, 0);
3276 }
3277
3278 bunk_enable = (num_ranks > 1);
3279
3280 column_bits_start = 3;
3281
3282 row_lsb = column_bits_start + col_bits + bank_bits - (! ddr_interface_64b);
3283 debug_print("row_lsb = column_bits_start + col_bits + bank_bits = %d\n", row_lsb);
3284
3285 pbank_lsb = row_lsb + row_bits + bunk_enable;
3286 debug_print("pbank_lsb = row_lsb + row_bits + bunk_enable = %d\n", pbank_lsb);
3287
3288 if (lranks_per_prank > 1) {
3289 pbank_lsb = row_lsb + row_bits + lranks_bits + bunk_enable;
3290 ddr_print("DDR4: 3DS: pbank_lsb = (%d row_lsb) + (%d row_bits) + (%d lranks_bits) + (%d bunk_enable) = %d\n",
3291 row_lsb, row_bits, lranks_bits, bunk_enable, pbank_lsb);
3292 }
3293
3294 mem_size_mbytes = dimm_count * ((1ull << pbank_lsb) >> 20);
3295 if (num_ranks == 4) {
3296 /* Quad rank dimm capacity is equivalent to two dual-rank dimms. */
3297 mem_size_mbytes *= 2;
3298 }
3299
3300 /* Mask with 1 bits set for each active rank, allowing 2 bits per dimm.
3301 ** This makes later calculations simpler, as a variety of CSRs use this layout.
3302 ** This init needs to be updated for dual configs (ie non-identical DIMMs).
3303 ** Bit 0 = dimm0, rank 0
3304 ** Bit 1 = dimm0, rank 1
3305 ** Bit 2 = dimm1, rank 0
3306 ** Bit 3 = dimm1, rank 1
3307 ** ...
3308 */
3309 rank_mask = 0x1;
3310 if (num_ranks > 1)
3311 rank_mask = 0x3;
3312 if (num_ranks > 2)
3313 rank_mask = 0xf;
3314
3315 for (i = 1; i < dimm_count; i++)
3316 rank_mask |= ((rank_mask & 0x3) << (2*i));
3317
3318
3319 #ifdef CAVIUM_ONLY
3320 /* Special request: mismatched DIMM support. Slot 0: 2-Rank, Slot 1: 1-Rank */
3321 if (0)
3322 {
3323 /*
3324 ** Calculate the total memory size in terms of the total
3325 ** number of ranks instead of the number of dimms. The usual
3326 ** requirement is for both dimms to be identical. This check
3327 ** works around that requirement to allow one exception. The
3328 ** dimm in the second slot may now have fewer ranks than the
3329 ** first slot.
3330 */
3331 int spd_org_dimm1;
3332 int num_ranks_dimm1;
3333 int rank_count;
3334 int rank_mask_dimm1;
3335
3336 if (dimm_count > 1) {
3337 spd_org_dimm1 = read_spd(node, &dimm_config_table[1] /* dimm 1*/,
3338 DDR3_SPD_MODULE_ORGANIZATION);
3339 num_ranks_dimm1 = 1 + ((spd_org_dimm1 >> 3) & 0x7);
3340 rank_count = num_ranks/* dimm 0 */ + num_ranks_dimm1 /* dimm 1 */;
3341
3342 if (num_ranks != num_ranks_dimm1) {
3343 mem_size_mbytes = rank_count * ((1ull << (pbank_lsb-bunk_enable)) >> 20);
3344 rank_mask = 1 | ((num_ranks > 1) << 1);
3345 rank_mask_dimm1 = 1 | ((num_ranks_dimm1 > 1) << 1);
3346 rank_mask |= ((rank_mask_dimm1 & 0x3) << 2);
3347 ddr_print("DIMM 1 - ranks: %d, size: %d MB\n",
3348 num_ranks_dimm1, num_ranks_dimm1 * ((1ull << (pbank_lsb-bunk_enable)) >> 20));
3349 }
3350 }
3351 }
3352 #endif /* CAVIUM_ONLY */
3353
3354 spd_ecc = get_dimm_ecc(node, &dimm_config_table[0], ddr_type);
3355
3356 VB_PRT(VBL_DEV, "Summary: - %d %s%s %dRx%d %s, row bits=%d, col bits=%d, bank bits=%d\n",
3357 dimm_count, dimm_type_name, (dimm_count > 1) ? "s" : "",
3358 num_ranks, dram_width, (spd_ecc) ? "ECC" : "non-ECC",
3359 row_bits, col_bits, bank_bits);
3360
3361 // always print out the useful DIMM information...
3362 for (i = 0; i < DDR_CFG_T_MAX_DIMMS; i++) {
3363 if (i < dimm_count)
3364 report_dimm(node, &dimm_config_table[i], i, ddr_interface_num,
3365 num_ranks, dram_width, mem_size_mbytes / dimm_count);
3366 else
3367 if (validate_dimm(node, &dimm_config_table[i]) == 0) // only if there is a slot
3368 printf("N%d.LMC%d.DIMM%d: Not Present\n", node, ddr_interface_num, i);
3369 }
3370
3371 if (ddr_type == DDR4_DRAM) {
3372 spd_cas_latency = ((0xff & read_spd(node, &dimm_config_table[0], DDR4_SPD_CAS_LATENCIES_BYTE0)) << 0);
3373 spd_cas_latency |= ((0xff & read_spd(node, &dimm_config_table[0], DDR4_SPD_CAS_LATENCIES_BYTE1)) << 8);
3374 spd_cas_latency |= ((0xff & read_spd(node, &dimm_config_table[0], DDR4_SPD_CAS_LATENCIES_BYTE2)) << 16);
3375 spd_cas_latency |= ((0xff & read_spd(node, &dimm_config_table[0], DDR4_SPD_CAS_LATENCIES_BYTE3)) << 24);
3376 } else {
3377 spd_cas_latency = 0xff & read_spd(node, &dimm_config_table[0], DDR3_SPD_CAS_LATENCIES_LSB);
3378 spd_cas_latency |= ((0xff & read_spd(node, &dimm_config_table[0], DDR3_SPD_CAS_LATENCIES_MSB)) << 8);
3379 }
3380 debug_print("spd_cas_latency : %#06x\n", spd_cas_latency );
3381
3382 if (ddr_type == DDR4_DRAM) {
3383
3384 /* No other values for DDR4 MTB and FTB are specified at the
3385 * current time so don't bother reading them. Can't speculate how
3386 * new values will be represented.
3387 */
3388 int spdMTB = 125;
3389 int spdFTB = 1;
3390
3391 tAAmin
3392 = spdMTB * read_spd(node, &dimm_config_table[0], DDR4_SPD_MIN_CAS_LATENCY_TAAMIN)
3393 + spdFTB * (SC_t) read_spd(node, &dimm_config_table[0], DDR4_SPD_MIN_CAS_LATENCY_FINE_TAAMIN);
3394
3395 ddr4_tCKAVGmin
3396 = spdMTB * read_spd(node, &dimm_config_table[0], DDR4_SPD_MINIMUM_CYCLE_TIME_TCKAVGMIN)
3397 + spdFTB * (SC_t) read_spd(node, &dimm_config_table[0], DDR4_SPD_MIN_CYCLE_TIME_FINE_TCKAVGMIN);
3398
3399 ddr4_tCKAVGmax
3400 = spdMTB * read_spd(node, &dimm_config_table[0], DDR4_SPD_MAXIMUM_CYCLE_TIME_TCKAVGMAX)
3401 + spdFTB * (SC_t) read_spd(node, &dimm_config_table[0], DDR4_SPD_MAX_CYCLE_TIME_FINE_TCKAVGMAX);
3402
3403 ddr4_tRCDmin
3404 = spdMTB * read_spd(node, &dimm_config_table[0], DDR4_SPD_MIN_RAS_CAS_DELAY_TRCDMIN)
3405 + spdFTB * (SC_t) read_spd(node, &dimm_config_table[0], DDR4_SPD_MIN_RAS_TO_CAS_DELAY_FINE_TRCDMIN);
3406
3407 ddr4_tRPmin
3408 = spdMTB * read_spd(node, &dimm_config_table[0], DDR4_SPD_MIN_ROW_PRECHARGE_DELAY_TRPMIN)
3409 + spdFTB * (SC_t) read_spd(node, &dimm_config_table[0], DDR4_SPD_MIN_ROW_PRECHARGE_DELAY_FINE_TRPMIN);
3410
3411 ddr4_tRASmin
3412 = spdMTB * (((read_spd(node, &dimm_config_table[0], DDR4_SPD_UPPER_NIBBLES_TRAS_TRC) & 0xf) << 8) +
3413 ( read_spd(node, &dimm_config_table[0], DDR4_SPD_MIN_ACTIVE_PRECHARGE_LSB_TRASMIN) & 0xff));
3414
3415 ddr4_tRCmin
3416 = spdMTB * ((((read_spd(node, &dimm_config_table[0], DDR4_SPD_UPPER_NIBBLES_TRAS_TRC) >> 4) & 0xf) << 8) +
3417 ( read_spd(node, &dimm_config_table[0], DDR4_SPD_MIN_ACTIVE_REFRESH_LSB_TRCMIN) & 0xff))
3418 + spdFTB * (SC_t) read_spd(node, &dimm_config_table[0], DDR4_SPD_MIN_ACT_TO_ACT_REFRESH_DELAY_FINE_TRCMIN);
3419
3420 ddr4_tRFC1min
3421 = spdMTB * (((read_spd(node, &dimm_config_table[0], DDR4_SPD_MIN_REFRESH_RECOVERY_MSB_TRFC1MIN) & 0xff) << 8) +
3422 ( read_spd(node, &dimm_config_table[0], DDR4_SPD_MIN_REFRESH_RECOVERY_LSB_TRFC1MIN) & 0xff));
3423
3424 ddr4_tRFC2min
3425 = spdMTB * (((read_spd(node, &dimm_config_table[0], DDR4_SPD_MIN_REFRESH_RECOVERY_MSB_TRFC2MIN) & 0xff) << 8) +
3426 ( read_spd(node, &dimm_config_table[0], DDR4_SPD_MIN_REFRESH_RECOVERY_LSB_TRFC2MIN) & 0xff));
3427
3428 ddr4_tRFC4min
3429 = spdMTB * (((read_spd(node, &dimm_config_table[0], DDR4_SPD_MIN_REFRESH_RECOVERY_MSB_TRFC4MIN) & 0xff) << 8) +
3430 ( read_spd(node, &dimm_config_table[0], DDR4_SPD_MIN_REFRESH_RECOVERY_LSB_TRFC4MIN) & 0xff));
3431
3432 ddr4_tFAWmin
3433 = spdMTB * (((read_spd(node, &dimm_config_table[0], DDR4_SPD_MIN_FOUR_ACTIVE_WINDOW_MSN_TFAWMIN) & 0xf) << 8) +
3434 ( read_spd(node, &dimm_config_table[0], DDR4_SPD_MIN_FOUR_ACTIVE_WINDOW_LSB_TFAWMIN) & 0xff));
3435
3436 ddr4_tRRD_Smin
3437 = spdMTB * read_spd(node, &dimm_config_table[0], DDR4_SPD_MIN_ROW_ACTIVE_DELAY_SAME_TRRD_SMIN)
3438 + spdFTB * (SC_t) read_spd(node, &dimm_config_table[0], DDR4_SPD_MIN_ACT_TO_ACT_DELAY_DIFF_FINE_TRRD_SMIN);
3439
3440 ddr4_tRRD_Lmin
3441 = spdMTB * read_spd(node, &dimm_config_table[0], DDR4_SPD_MIN_ROW_ACTIVE_DELAY_DIFF_TRRD_LMIN)
3442 + spdFTB * (SC_t) read_spd(node, &dimm_config_table[0], DDR4_SPD_MIN_ACT_TO_ACT_DELAY_SAME_FINE_TRRD_LMIN);
3443
3444 ddr4_tCCD_Lmin
3445 = spdMTB * read_spd(node, &dimm_config_table[0], DDR4_SPD_MIN_CAS_TO_CAS_DELAY_TCCD_LMIN)
3446 + spdFTB * (SC_t) read_spd(node, &dimm_config_table[0], DDR4_SPD_MIN_CAS_TO_CAS_DELAY_FINE_TCCD_LMIN);
3447
3448 ddr_print("%-45s : %6d ps\n", "Medium Timebase (MTB)", spdMTB);
3449 ddr_print("%-45s : %6d ps\n", "Fine Timebase (FTB)", spdFTB);
3450
3451 #define DDR4_TWR 15000
3452 #define DDR4_TWTR_S 2500
3453
3454
3455 tCKmin = ddr4_tCKAVGmin;
3456 twr = DDR4_TWR;
3457 trcd = ddr4_tRCDmin;
3458 trrd = ddr4_tRRD_Smin;
3459 trp = ddr4_tRPmin;
3460 tras = ddr4_tRASmin;
3461 trc = ddr4_tRCmin;
3462 trfc = ddr4_tRFC1min;
3463 twtr = DDR4_TWTR_S;
3464 tfaw = ddr4_tFAWmin;
3465
3466 if (spd_rdimm) {
3467 spd_addr_mirror = read_spd(node, &dimm_config_table[0], DDR4_SPD_RDIMM_ADDR_MAPPING_FROM_REGISTER_TO_DRAM) & 0x1;
3468 } else {
3469 spd_addr_mirror = read_spd(node, &dimm_config_table[0], DDR4_SPD_UDIMM_ADDR_MAPPING_FROM_EDGE) & 0x1;
3470 }
3471 debug_print("spd_addr_mirror : %#06x\n", spd_addr_mirror );
3472
3473 } else { /* if (ddr_type == DDR4_DRAM) */
3474 spd_mtb_dividend = 0xff & read_spd(node, &dimm_config_table[0], DDR3_SPD_MEDIUM_TIMEBASE_DIVIDEND);
3475 spd_mtb_divisor = 0xff & read_spd(node, &dimm_config_table[0], DDR3_SPD_MEDIUM_TIMEBASE_DIVISOR);
3476 spd_tck_min = 0xff & read_spd(node, &dimm_config_table[0], DDR3_SPD_MINIMUM_CYCLE_TIME_TCKMIN);
3477 spd_taa_min = 0xff & read_spd(node, &dimm_config_table[0], DDR3_SPD_MIN_CAS_LATENCY_TAAMIN);
3478
3479 spd_twr = 0xff & read_spd(node, &dimm_config_table[0], DDR3_SPD_MIN_WRITE_RECOVERY_TWRMIN);
3480 spd_trcd = 0xff & read_spd(node, &dimm_config_table[0], DDR3_SPD_MIN_RAS_CAS_DELAY_TRCDMIN);
3481 spd_trrd = 0xff & read_spd(node, &dimm_config_table[0], DDR3_SPD_MIN_ROW_ACTIVE_DELAY_TRRDMIN);
3482 spd_trp = 0xff & read_spd(node, &dimm_config_table[0], DDR3_SPD_MIN_ROW_PRECHARGE_DELAY_TRPMIN);
3483 spd_tras = 0xff & read_spd(node, &dimm_config_table[0], DDR3_SPD_MIN_ACTIVE_PRECHARGE_LSB_TRASMIN);
3484 spd_tras |= ((0xff & read_spd(node, &dimm_config_table[0], DDR3_SPD_UPPER_NIBBLES_TRAS_TRC)&0xf) << 8);
3485 spd_trc = 0xff & read_spd(node, &dimm_config_table[0], DDR3_SPD_MIN_ACTIVE_REFRESH_LSB_TRCMIN);
3486 spd_trc |= ((0xff & read_spd(node, &dimm_config_table[0], DDR3_SPD_UPPER_NIBBLES_TRAS_TRC)&0xf0) << 4);
3487 spd_trfc = 0xff & read_spd(node, &dimm_config_table[0], DDR3_SPD_MIN_REFRESH_RECOVERY_LSB_TRFCMIN);
3488 spd_trfc |= ((0xff & read_spd(node, &dimm_config_table[0], DDR3_SPD_MIN_REFRESH_RECOVERY_MSB_TRFCMIN)) << 8);
3489 spd_twtr = 0xff & read_spd(node, &dimm_config_table[0], DDR3_SPD_MIN_INTERNAL_WRITE_READ_CMD_TWTRMIN);
3490 spd_trtp = 0xff & read_spd(node, &dimm_config_table[0], DDR3_SPD_MIN_INTERNAL_READ_PRECHARGE_CMD_TRTPMIN);
3491 spd_tfaw = 0xff & read_spd(node, &dimm_config_table[0], DDR3_SPD_MIN_FOUR_ACTIVE_WINDOW_TFAWMIN);
3492 spd_tfaw |= ((0xff & read_spd(node, &dimm_config_table[0], DDR3_SPD_UPPER_NIBBLE_TFAW)&0xf) << 8);
3493 spd_addr_mirror = 0xff & read_spd(node, &dimm_config_table[0], DDR3_SPD_ADDRESS_MAPPING) & 0x1;
3494 spd_addr_mirror = spd_addr_mirror && !spd_rdimm; /* Only address mirror unbuffered dimms. */
3495 ftb_Dividend = read_spd(node, &dimm_config_table[0], DDR3_SPD_FINE_TIMEBASE_DIVIDEND_DIVISOR) >> 4;
3496 ftb_Divisor = read_spd(node, &dimm_config_table[0], DDR3_SPD_FINE_TIMEBASE_DIVIDEND_DIVISOR) & 0xf;
3497 ftb_Divisor = (ftb_Divisor == 0) ? 1 : ftb_Divisor; /* Make sure that it is not 0 */
3498
3499 debug_print("spd_twr : %#06x\n", spd_twr );
3500 debug_print("spd_trcd : %#06x\n", spd_trcd);
3501 debug_print("spd_trrd : %#06x\n", spd_trrd);
3502 debug_print("spd_trp : %#06x\n", spd_trp );
3503 debug_print("spd_tras : %#06x\n", spd_tras);
3504 debug_print("spd_trc : %#06x\n", spd_trc );
3505 debug_print("spd_trfc : %#06x\n", spd_trfc);
3506 debug_print("spd_twtr : %#06x\n", spd_twtr);
3507 debug_print("spd_trtp : %#06x\n", spd_trtp);
3508 debug_print("spd_tfaw : %#06x\n", spd_tfaw);
3509 debug_print("spd_addr_mirror : %#06x\n", spd_addr_mirror);
3510
3511 mtb_psec = spd_mtb_dividend * 1000 / spd_mtb_divisor;
3512 tAAmin = mtb_psec * spd_taa_min;
3513 tAAmin += ftb_Dividend * (SC_t) read_spd(node, &dimm_config_table[0], DDR3_SPD_MIN_CAS_LATENCY_FINE_TAAMIN) / ftb_Divisor;
3514 tCKmin = mtb_psec * spd_tck_min;
3515 tCKmin += ftb_Dividend * (SC_t) read_spd(node, &dimm_config_table[0], DDR3_SPD_MINIMUM_CYCLE_TIME_FINE_TCKMIN) / ftb_Divisor;
3516
3517 twr = spd_twr * mtb_psec;
3518 trcd = spd_trcd * mtb_psec;
3519 trrd = spd_trrd * mtb_psec;
3520 trp = spd_trp * mtb_psec;
3521 tras = spd_tras * mtb_psec;
3522 trc = spd_trc * mtb_psec;
3523 trfc = spd_trfc * mtb_psec;
3524 twtr = spd_twtr * mtb_psec;
3525 trtp = spd_trtp * mtb_psec;
3526 tfaw = spd_tfaw * mtb_psec;
3527
3528 } /* if (ddr_type == DDR4_DRAM) */
3529
3530 if (ddr_type == DDR4_DRAM) {
3531 ddr_print("%-45s : %6d ps (%ld MT/s)\n", "SDRAM Minimum Cycle Time (tCKAVGmin)",ddr4_tCKAVGmin,
3532 pretty_psecs_to_mts(ddr4_tCKAVGmin));
3533 ddr_print("%-45s : %6d ps\n", "SDRAM Maximum Cycle Time (tCKAVGmax)", ddr4_tCKAVGmax);
3534 ddr_print("%-45s : %6d ps\n", "Minimum CAS Latency Time (tAAmin)", tAAmin);
3535 ddr_print("%-45s : %6d ps\n", "Minimum RAS to CAS Delay Time (tRCDmin)", ddr4_tRCDmin);
3536 ddr_print("%-45s : %6d ps\n", "Minimum Row Precharge Delay Time (tRPmin)", ddr4_tRPmin);
3537 ddr_print("%-45s : %6d ps\n", "Minimum Active to Precharge Delay (tRASmin)", ddr4_tRASmin);
3538 ddr_print("%-45s : %6d ps\n", "Minimum Active to Active/Refr. Delay (tRCmin)", ddr4_tRCmin);
3539 ddr_print("%-45s : %6d ps\n", "Minimum Refresh Recovery Delay (tRFC1min)", ddr4_tRFC1min);
3540 ddr_print("%-45s : %6d ps\n", "Minimum Refresh Recovery Delay (tRFC2min)", ddr4_tRFC2min);
3541 ddr_print("%-45s : %6d ps\n", "Minimum Refresh Recovery Delay (tRFC4min)", ddr4_tRFC4min);
3542 ddr_print("%-45s : %6d ps\n", "Minimum Four Activate Window Time (tFAWmin)", ddr4_tFAWmin);
3543 ddr_print("%-45s : %6d ps\n", "Minimum Act. to Act. Delay (tRRD_Smin)", ddr4_tRRD_Smin);
3544 ddr_print("%-45s : %6d ps\n", "Minimum Act. to Act. Delay (tRRD_Lmin)", ddr4_tRRD_Lmin);
3545 ddr_print("%-45s : %6d ps\n", "Minimum CAS to CAS Delay Time (tCCD_Lmin)", ddr4_tCCD_Lmin);
3546 } else {
3547 ddr_print("Medium Timebase (MTB) : %6d ps\n", mtb_psec);
3548 ddr_print("Minimum Cycle Time (tCKmin) : %6d ps (%ld MT/s)\n", tCKmin,
3549 pretty_psecs_to_mts(tCKmin));
3550 ddr_print("Minimum CAS Latency Time (tAAmin) : %6d ps\n", tAAmin);
3551 ddr_print("Write Recovery Time (tWR) : %6d ps\n", twr);
3552 ddr_print("Minimum RAS to CAS delay (tRCD) : %6d ps\n", trcd);
3553 ddr_print("Minimum Row Active to Row Active delay (tRRD) : %6d ps\n", trrd);
3554 ddr_print("Minimum Row Precharge Delay (tRP) : %6d ps\n", trp);
3555 ddr_print("Minimum Active to Precharge (tRAS) : %6d ps\n", tras);
3556 ddr_print("Minimum Active to Active/Refresh Delay (tRC) : %6d ps\n", trc);
3557 ddr_print("Minimum Refresh Recovery Delay (tRFC) : %6d ps\n", trfc);
3558 ddr_print("Internal write to read command delay (tWTR) : %6d ps\n", twtr);
3559 ddr_print("Min Internal Rd to Precharge Cmd Delay (tRTP) : %6d ps\n", trtp);
3560 ddr_print("Minimum Four Activate Window Delay (tFAW) : %6d ps\n", tfaw);
3561 }
3562
3563
3564 /* When the cycle time is within 1 psec of the minimum accept it
3565 as a slight rounding error and adjust it to exactly the minimum
3566 cycle time. This avoids an unnecessary warning. */
3567 if (_abs(tclk_psecs - tCKmin) < 2)
3568 tclk_psecs = tCKmin;
3569
3570 if (tclk_psecs < (uint64_t)tCKmin) {
3571 ddr_print("WARNING!!!!: DDR Clock Rate (tCLK: %lld) exceeds DIMM specifications (tCKmin: %lld)!!!!\n",
3572 tclk_psecs, (uint64_t)tCKmin);
3573 }
3574
3575
3576 ddr_print("DDR Clock Rate (tCLK) : %6llu ps\n", tclk_psecs);
3577 ddr_print("Core Clock Rate (eCLK) : %6llu ps\n", eclk_psecs);
3578
3579 if ((s = lookup_env_parameter("ddr_use_ecc")) != NULL) {
3580 use_ecc = !!strtoul(s, NULL, 0);
3581 }
3582 use_ecc = use_ecc && spd_ecc;
3583
3584 ddr_interface_bytemask = ddr_interface_64b
3585 ? (use_ecc ? 0x1ff : 0xff)
3586 : (use_ecc ? 0x01f : 0x0f); // FIXME? 81xx does diff from 70xx
3587
3588 ddr_print("DRAM Interface width: %d bits %s bytemask 0x%x\n",
3589 ddr_interface_64b ? 64 : 32, use_ecc ? "+ECC" : "",
3590 ddr_interface_bytemask);
3591
3592 ddr_print("\n------ Board Custom Configuration Settings ------\n");
3593 ddr_print("%-45s : %d\n", "MIN_RTT_NOM_IDX ", custom_lmc_config->min_rtt_nom_idx);
3594 ddr_print("%-45s : %d\n", "MAX_RTT_NOM_IDX ", custom_lmc_config->max_rtt_nom_idx);
3595 ddr_print("%-45s : %d\n", "MIN_RODT_CTL ", custom_lmc_config->min_rodt_ctl);
3596 ddr_print("%-45s : %d\n", "MAX_RODT_CTL ", custom_lmc_config->max_rodt_ctl);
3597 ddr_print("%-45s : %d\n", "MIN_CAS_LATENCY ", custom_lmc_config->min_cas_latency);
3598 ddr_print("%-45s : %d\n", "OFFSET_EN ", custom_lmc_config->offset_en);
3599 ddr_print("%-45s : %d\n", "OFFSET_UDIMM ", custom_lmc_config->offset_udimm);
3600 ddr_print("%-45s : %d\n", "OFFSET_RDIMM ", custom_lmc_config->offset_rdimm);
3601 ddr_print("%-45s : %d\n", "DDR_RTT_NOM_AUTO ", custom_lmc_config->ddr_rtt_nom_auto);
3602 ddr_print("%-45s : %d\n", "DDR_RODT_CTL_AUTO ", custom_lmc_config->ddr_rodt_ctl_auto);
3603 if (spd_rdimm)
3604 ddr_print("%-45s : %d\n", "RLEVEL_COMP_OFFSET", custom_lmc_config->rlevel_comp_offset_rdimm);
3605 else
3606 ddr_print("%-45s : %d\n", "RLEVEL_COMP_OFFSET", custom_lmc_config->rlevel_comp_offset_udimm);
3607 ddr_print("%-45s : %d\n", "RLEVEL_COMPUTE ", custom_lmc_config->rlevel_compute);
3608 ddr_print("%-45s : %d\n", "DDR2T_UDIMM ", custom_lmc_config->ddr2t_udimm);
3609 ddr_print("%-45s : %d\n", "DDR2T_RDIMM ", custom_lmc_config->ddr2t_rdimm);
3610 ddr_print("%-45s : %d\n", "FPRCH2 ", custom_lmc_config->fprch2);
3611 ddr_print("-------------------------------------------------\n");
3612
3613
3614 CL = divide_roundup(tAAmin, tclk_psecs);
3615
3616 ddr_print("Desired CAS Latency : %6d\n", CL);
3617
3618 min_cas_latency = custom_lmc_config->min_cas_latency;
3619
3620
3621 if ((s = lookup_env_parameter("ddr_min_cas_latency")) != NULL) {
3622 min_cas_latency = strtoul(s, NULL, 0);
3623 }
3624
3625 {
3626 int base_CL;
3627 ddr_print("CAS Latencies supported in DIMM :");
3628 base_CL = (ddr_type == DDR4_DRAM) ? 7 : 4;
3629 for (i=0; i<32; ++i) {
3630 if ((spd_cas_latency >> i) & 1) {
3631 ddr_print(" %d", i+base_CL);
3632 max_cas_latency = i+base_CL;
3633 if (min_cas_latency == 0)
3634 min_cas_latency = i+base_CL;
3635 }
3636 }
3637 ddr_print("\n");
3638
3639 /* Use relaxed timing when running slower than the minimum
3640 supported speed. Adjust timing to match the smallest supported
3641 CAS Latency. */
3642 if (CL < min_cas_latency) {
3643 uint64_t adjusted_tclk = tAAmin / min_cas_latency;
3644 CL = min_cas_latency;
3645 ddr_print("Slow clock speed. Adjusting timing: tClk = %llu, Adjusted tClk = %lld\n",
3646 tclk_psecs, adjusted_tclk);
3647 tclk_psecs = adjusted_tclk;
3648 }
3649
3650 if ((s = lookup_env_parameter("ddr_cas_latency")) != NULL) {
3651 override_cas_latency = strtoul(s, NULL, 0);
3652 }
3653
3654 /* Make sure that the selected cas latency is legal */
3655 for (i=(CL-base_CL); i<32; ++i) {
3656 if ((spd_cas_latency >> i) & 1) {
3657 CL = i+base_CL;
3658 break;
3659 }
3660 }
3661 }
3662
3663 if (CL > max_cas_latency)
3664 CL = max_cas_latency;
3665
3666 if (override_cas_latency != 0) {
3667 CL = override_cas_latency;
3668 }
3669
3670 ddr_print("CAS Latency : %6d\n", CL);
3671
3672 if ((CL * tCKmin) > 20000)
3673 {
3674 ddr_print("(CLactual * tCKmin) = %d exceeds 20 ns\n", (CL * tCKmin));
3675 }
3676
3677 if ((num_banks != 4) && (num_banks != 8) && (num_banks != 16))
3678 {
3679 error_print("Unsupported number of banks %d. Must be 4 or 8 or 16.\n", num_banks);
3680 ++fatal_error;
3681 }
3682
3683 if ((num_ranks != 1) && (num_ranks != 2) && (num_ranks != 4))
3684 {
3685 error_print("Unsupported number of ranks: %d\n", num_ranks);
3686 ++fatal_error;
3687 }
3688
3689 if (! CAVIUM_IS_MODEL(CAVIUM_CN81XX)) { // 88XX or 83XX, but not 81XX
3690 if ((dram_width != 8) && (dram_width != 16) && (dram_width != 4)) {
3691 error_print("Unsupported SDRAM Width, x%d. Must be x4, x8 or x16.\n", dram_width);
3692 ++fatal_error;
3693 }
3694 } else if ((dram_width != 8) && (dram_width != 16)) { // 81XX can only do x8 or x16
3695 error_print("Unsupported SDRAM Width, x%d. Must be x8 or x16.\n", dram_width);
3696 ++fatal_error;
3697 }
3698
3699
3700 /*
3701 ** Bail out here if things are not copasetic.
3702 */
3703 if (fatal_error)
3704 return(-1);
3705
3706 /*
3707 * 6.9.6 LMC RESET Initialization
3708 *
3709 * The purpose of this step is to assert/deassert the RESET# pin at the
3710 * DDR3/DDR4 parts.
3711 *
3712 * This LMC RESET step is done for all enabled LMCs.
3713 */
3714 perform_lmc_reset(node, ddr_interface_num);
3715
3716 // Make sure scrambling is disabled during init...
3717 {
3718 bdk_lmcx_control_t lmc_control;
3719
3720 lmc_control.u = BDK_CSR_READ(node, BDK_LMCX_CONTROL(ddr_interface_num));
3721 lmc_control.s.scramble_ena = 0;
3722 DRAM_CSR_WRITE(node, BDK_LMCX_CONTROL(ddr_interface_num), lmc_control.u);
3723
3724 DRAM_CSR_WRITE(node, BDK_LMCX_SCRAMBLE_CFG0(ddr_interface_num), 0);
3725 DRAM_CSR_WRITE(node, BDK_LMCX_SCRAMBLE_CFG1(ddr_interface_num), 0);
3726 DRAM_CSR_WRITE(node, BDK_LMCX_SCRAMBLE_CFG2(ddr_interface_num), 0);
3727 }
3728
3729
3730 odt_idx = dimm_count - 1;
3731
3732 switch (num_ranks) {
3733 case 1:
3734 odt_config = odt_1rank_config;
3735 break;
3736 case 2:
3737 odt_config = odt_2rank_config;
3738 break;
3739 case 4:
3740 odt_config = odt_4rank_config;
3741 break;
3742 default:
3743 odt_config = disable_odt_config;
3744 error_print("Unsupported number of ranks: %d\n", num_ranks);
3745 ++fatal_error;
3746 }
3747
3748
3749 /* Parameters from DDR3 Specifications */
3750 #define DDR3_tREFI 7800000 /* 7.8 us */
3751 #define DDR3_ZQCS 80000ull /* 80 ns */
3752 #define DDR3_ZQCS_Interval 1280000000 /* 128ms/100 */
3753 #define DDR3_tCKE 5000 /* 5 ns */
3754 #define DDR3_tMRD 4 /* 4 nCK */
3755 #define DDR3_tDLLK 512 /* 512 nCK */
3756 #define DDR3_tMPRR 1 /* 1 nCK */
3757 #define DDR3_tWLMRD 40 /* 40 nCK */
3758 #define DDR3_tWLDQSEN 25 /* 25 nCK */
3759
3760 /* Parameters from DDR4 Specifications */
3761 #define DDR4_tMRD 8 /* 8 nCK */
3762 #define DDR4_tDLLK 768 /* 768 nCK */
3763
3764 /*
3765 * 6.9.7 Early LMC Initialization
3766 *
3767 * All of DDR PLL, LMC CK, and LMC DRESET initializations must be
3768 * completed prior to starting this LMC initialization sequence.
3769 *
3770 * Perform the following five substeps for early LMC initialization:
3771 *
3772 * 1. Software must ensure there are no pending DRAM transactions.
3773 *
3774 * 2. Write LMC(0)_CONFIG, LMC(0)_CONTROL, LMC(0)_TIMING_PARAMS0,
3775 * LMC(0)_TIMING_PARAMS1, LMC(0)_MODEREG_PARAMS0,
3776 * LMC(0)_MODEREG_PARAMS1, LMC(0)_DUAL_MEMCFG, LMC(0)_NXM,
3777 * LMC(0)_WODT_MASK, LMC(0)_RODT_MASK, LMC(0)_COMP_CTL2,
3778 * LMC(0)_PHY_CTL, LMC(0)_DIMM0/1_PARAMS, and LMC(0)_DIMM_CTL with
3779 * appropriate values. All sections in this chapter can be used to
3780 * derive proper register settings.
3781 */
3782
3783 /* LMC(0)_CONFIG */
3784 {
3785 lmc_config.u = 0;
3786
3787 lmc_config.s.ecc_ena = use_ecc;
3788 lmc_config.s.row_lsb = encode_row_lsb_ddr3(row_lsb, ddr_interface_64b);
3789 lmc_config.s.pbank_lsb = encode_pbank_lsb_ddr3(pbank_lsb, ddr_interface_64b);
3790
3791 lmc_config.s.idlepower = 0; /* Disabled */
3792
3793 if ((s = lookup_env_parameter("ddr_idlepower")) != NULL) {
3794 lmc_config.s.idlepower = strtoul(s, NULL, 0);
3795 }
3796
3797 lmc_config.s.forcewrite = 0; /* Disabled */
3798 lmc_config.s.ecc_adr = 1; /* Include memory reference address in the ECC */
3799
3800 if ((s = lookup_env_parameter("ddr_ecc_adr")) != NULL) {
3801 lmc_config.s.ecc_adr = strtoul(s, NULL, 0);
3802 }
3803
3804 lmc_config.s.reset = 0;
3805
3806 /*
3807 * Program LMC0_CONFIG[24:18], ref_zqcs_int(6:0) to
3808 * RND-DN(tREFI/clkPeriod/512) Program LMC0_CONFIG[36:25],
3809 * ref_zqcs_int(18:7) to
3810 * RND-DN(ZQCS_Interval/clkPeriod/(512*128)). Note that this
3811 * value should always be greater than 32, to account for
3812 * resistor calibration delays.
3813 */
3814
3815 lmc_config.s.ref_zqcs_int = ((DDR3_tREFI/tclk_psecs/512) & 0x7f);
3816 lmc_config.s.ref_zqcs_int |= ((max(33ull, (DDR3_ZQCS_Interval/(tclk_psecs/100)/(512*128))) & 0xfff) << 7);
3817
3818
3819 lmc_config.s.early_dqx = 1; /* Default to enabled */
3820
3821 if ((s = lookup_env_parameter("ddr_early_dqx")) == NULL)
3822 s = lookup_env_parameter("ddr%d_early_dqx", ddr_interface_num);
3823 if (s != NULL) {
3824 lmc_config.s.early_dqx = strtoul(s, NULL, 0);
3825 }
3826
3827 lmc_config.s.sref_with_dll = 0;
3828
3829 lmc_config.s.rank_ena = bunk_enable;
3830 lmc_config.s.rankmask = rank_mask; /* Set later */
3831 lmc_config.s.mirrmask = (spd_addr_mirror << 1 | spd_addr_mirror << 3) & rank_mask;
3832 lmc_config.s.init_status = rank_mask; /* Set once and don't change it. */
3833 lmc_config.s.early_unload_d0_r0 = 0;
3834 lmc_config.s.early_unload_d0_r1 = 0;
3835 lmc_config.s.early_unload_d1_r0 = 0;
3836 lmc_config.s.early_unload_d1_r1 = 0;
3837 lmc_config.s.scrz = 0;
3838 // set 32-bit mode for real only when selected AND 81xx...
3839 if (!ddr_interface_64b && CAVIUM_IS_MODEL(CAVIUM_CN81XX)) {
3840 lmc_config.s.mode32b = 1;
3841 }
3842 VB_PRT(VBL_DEV, "%-45s : %d\n", "MODE32B (init)", lmc_config.s.mode32b);
3843 lmc_config.s.mode_x4dev = (dram_width == 4) ? 1 : 0;
3844 lmc_config.s.bg2_enable = ((ddr_type == DDR4_DRAM) && (dram_width == 16)) ? 0 : 1;
3845
3846 if ((s = lookup_env_parameter_ull("ddr_config")) != NULL) {
3847 lmc_config.u = strtoull(s, NULL, 0);
3848 }
3849 ddr_print("LMC_CONFIG : 0x%016llx\n", lmc_config.u);
3850 DRAM_CSR_WRITE(node, BDK_LMCX_CONFIG(ddr_interface_num), lmc_config.u);
3851 }
3852
3853 /* LMC(0)_CONTROL */
3854 {
3855 bdk_lmcx_control_t lmc_control;
3856 lmc_control.u = BDK_CSR_READ(node, BDK_LMCX_CONTROL(ddr_interface_num));
3857 lmc_control.s.rdimm_ena = spd_rdimm;
3858 lmc_control.s.bwcnt = 0; /* Clear counter later */
3859 if (spd_rdimm)
3860 lmc_control.s.ddr2t = (safe_ddr_flag ? 1 : custom_lmc_config->ddr2t_rdimm );
3861 else
3862 lmc_control.s.ddr2t = (safe_ddr_flag ? 1 : custom_lmc_config->ddr2t_udimm );
3863 lmc_control.s.pocas = 0;
3864 lmc_control.s.fprch2 = (safe_ddr_flag ? 2 : custom_lmc_config->fprch2 );
3865 lmc_control.s.throttle_rd = safe_ddr_flag ? 1 : 0;
3866 lmc_control.s.throttle_wr = safe_ddr_flag ? 1 : 0;
3867 lmc_control.s.inorder_rd = safe_ddr_flag ? 1 : 0;
3868 lmc_control.s.inorder_wr = safe_ddr_flag ? 1 : 0;
3869 lmc_control.cn81xx.elev_prio_dis = safe_ddr_flag ? 1 : 0;
3870 lmc_control.s.nxm_write_en = 0; /* discards writes to
3871 addresses that don't exist
3872 in the DRAM */
3873 lmc_control.s.max_write_batch = 8;
3874 lmc_control.s.xor_bank = 1;
3875 lmc_control.s.auto_dclkdis = 1;
3876 lmc_control.s.int_zqcs_dis = 0;
3877 lmc_control.s.ext_zqcs_dis = 0;
3878 lmc_control.s.bprch = 1;
3879 lmc_control.s.wodt_bprch = 1;
3880 lmc_control.s.rodt_bprch = 1;
3881
3882 if ((s = lookup_env_parameter("ddr_xor_bank")) != NULL) {
3883 lmc_control.s.xor_bank = strtoul(s, NULL, 0);
3884 }
3885
3886 if ((s = lookup_env_parameter("ddr_2t")) != NULL) {
3887 lmc_control.s.ddr2t = strtoul(s, NULL, 0);
3888 }
3889
3890 if ((s = lookup_env_parameter("ddr_fprch2")) != NULL) {
3891 lmc_control.s.fprch2 = strtoul(s, NULL, 0);
3892 }
3893
3894 if ((s = lookup_env_parameter("ddr_bprch")) != NULL) {
3895 lmc_control.s.bprch = strtoul(s, NULL, 0);
3896 }
3897
3898 if ((s = lookup_env_parameter("ddr_wodt_bprch")) != NULL) {
3899 lmc_control.s.wodt_bprch = strtoul(s, NULL, 0);
3900 }
3901
3902 if ((s = lookup_env_parameter("ddr_rodt_bprch")) != NULL) {
3903 lmc_control.s.rodt_bprch = strtoul(s, NULL, 0);
3904 }
3905
3906 if ((s = lookup_env_parameter("ddr_int_zqcs_dis")) != NULL) {
3907 lmc_control.s.int_zqcs_dis = strtoul(s, NULL, 0);
3908 }
3909
3910 if ((s = lookup_env_parameter("ddr_ext_zqcs_dis")) != NULL) {
3911 lmc_control.s.ext_zqcs_dis = strtoul(s, NULL, 0);
3912 }
3913
3914 if ((s = lookup_env_parameter_ull("ddr_control")) != NULL) {
3915 lmc_control.u = strtoull(s, NULL, 0);
3916 }
3917 ddr_print("LMC_CONTROL : 0x%016llx\n", lmc_control.u);
3918 DRAM_CSR_WRITE(node, BDK_LMCX_CONTROL(ddr_interface_num), lmc_control.u);
3919 }
3920
3921 /* LMC(0)_TIMING_PARAMS0 */
3922 {
3923 unsigned trp_value;
3924 bdk_lmcx_timing_params0_t lmc_timing_params0;
3925 lmc_timing_params0.u = BDK_CSR_READ(node, BDK_LMCX_TIMING_PARAMS0(ddr_interface_num));
3926
3927 trp_value = divide_roundup(trp, tclk_psecs) - 1;
3928 ddr_print("TIMING_PARAMS0[TRP]: NEW 0x%x, OLD 0x%x\n", trp_value,
3929 trp_value + (unsigned)(divide_roundup(max(4*tclk_psecs, 7500ull), tclk_psecs)) - 4);
3930 #if 1
3931 if ((s = lookup_env_parameter_ull("ddr_use_old_trp")) != NULL) {
3932 if (!!strtoull(s, NULL, 0)) {
3933 trp_value += divide_roundup(max(4*tclk_psecs, 7500ull), tclk_psecs) - 4;
3934 ddr_print("TIMING_PARAMS0[trp]: USING OLD 0x%x\n", trp_value);
3935 }
3936 }
3937 #endif
3938
3939 lmc_timing_params0.s.txpr = divide_roundup(max(5*tclk_psecs, trfc+10000ull), 16*tclk_psecs);
3940 lmc_timing_params0.s.tzqinit = divide_roundup(max(512*tclk_psecs, 640000ull), (256*tclk_psecs));
3941 lmc_timing_params0.s.trp = trp_value & 0x1f;
3942 lmc_timing_params0.s.tcksre = divide_roundup(max(5*tclk_psecs, 10000ull), tclk_psecs) - 1;
3943
3944 if (ddr_type == DDR4_DRAM) {
3945 lmc_timing_params0.s.tzqcs = divide_roundup(128*tclk_psecs, (16*tclk_psecs)); /* Always 8. */
3946 lmc_timing_params0.s.tcke = divide_roundup(max(3*tclk_psecs, (uint64_t) DDR3_tCKE), tclk_psecs) - 1;
3947 lmc_timing_params0.s.tmrd = divide_roundup((DDR4_tMRD*tclk_psecs), tclk_psecs) - 1;
3948 //lmc_timing_params0.s.tmod = divide_roundup(max(24*tclk_psecs, 15000ull), tclk_psecs) - 1;
3949 lmc_timing_params0.s.tmod = 25; /* 25 is the max allowed */
3950 lmc_timing_params0.s.tdllk = divide_roundup(DDR4_tDLLK, 256);
3951 } else {
3952 lmc_timing_params0.s.tzqcs = divide_roundup(max(64*tclk_psecs, DDR3_ZQCS), (16*tclk_psecs));
3953 lmc_timing_params0.s.tcke = divide_roundup(DDR3_tCKE, tclk_psecs) - 1;
3954 lmc_timing_params0.s.tmrd = divide_roundup((DDR3_tMRD*tclk_psecs), tclk_psecs) - 1;
3955 lmc_timing_params0.s.tmod = divide_roundup(max(12*tclk_psecs, 15000ull), tclk_psecs) - 1;
3956 lmc_timing_params0.s.tdllk = divide_roundup(DDR3_tDLLK, 256);
3957 }
3958
3959 if ((s = lookup_env_parameter_ull("ddr_timing_params0")) != NULL) {
3960 lmc_timing_params0.u = strtoull(s, NULL, 0);
3961 }
3962 ddr_print("TIMING_PARAMS0 : 0x%016llx\n", lmc_timing_params0.u);
3963 DRAM_CSR_WRITE(node, BDK_LMCX_TIMING_PARAMS0(ddr_interface_num), lmc_timing_params0.u);
3964 }
3965
3966 /* LMC(0)_TIMING_PARAMS1 */
3967 {
3968 int txp, temp_trcd, trfc_dlr;
3969 bdk_lmcx_timing_params1_t lmc_timing_params1;
3970 lmc_timing_params1.u = BDK_CSR_READ(node, BDK_LMCX_TIMING_PARAMS1(ddr_interface_num));
3971
3972 lmc_timing_params1.s.tmprr = divide_roundup(DDR3_tMPRR*tclk_psecs, tclk_psecs) - 1;
3973
3974 lmc_timing_params1.s.tras = divide_roundup(tras, tclk_psecs) - 1;
3975
3976 // NOTE: this is reworked for pass 2.x
3977 temp_trcd = divide_roundup(trcd, tclk_psecs);
3978 #if 1
3979 if (temp_trcd > 15)
3980 ddr_print("TIMING_PARAMS1[trcd]: need extension bit for 0x%x\n", temp_trcd);
3981 #endif
3982 if (CAVIUM_IS_MODEL(CAVIUM_CN88XX_PASS1_X) && (temp_trcd > 15)) {
3983 /* Let .trcd=0 serve as a flag that the field has
3984 overflowed. Must use Additive Latency mode as a
3985 workaround. */
3986 temp_trcd = 0;
3987 }
3988 lmc_timing_params1.s.trcd = temp_trcd & 0x0f;
3989 lmc_timing_params1.s.trcd_ext = (temp_trcd >> 4) & 1;
3990
3991 lmc_timing_params1.s.twtr = divide_roundup(twtr, tclk_psecs) - 1;
3992 lmc_timing_params1.s.trfc = divide_roundup(trfc, 8*tclk_psecs);
3993
3994 // workaround needed for all THUNDER chips thru T88 Pass 2.0,
3995 // but not 81xx and 83xx...
3996 if ((ddr_type == DDR4_DRAM) && CAVIUM_IS_MODEL(CAVIUM_CN88XX)) {
3997 /* Workaround bug 24006. Use Trrd_l. */
3998 lmc_timing_params1.s.trrd = divide_roundup(ddr4_tRRD_Lmin, tclk_psecs) - 2;
3999 } else
4000 lmc_timing_params1.s.trrd = divide_roundup(trrd, tclk_psecs) - 2;
4001
4002 /*
4003 ** tXP = max( 3nCK, 7.5 ns) DDR3-800 tCLK = 2500 psec
4004 ** tXP = max( 3nCK, 7.5 ns) DDR3-1066 tCLK = 1875 psec
4005 ** tXP = max( 3nCK, 6.0 ns) DDR3-1333 tCLK = 1500 psec
4006 ** tXP = max( 3nCK, 6.0 ns) DDR3-1600 tCLK = 1250 psec
4007 ** tXP = max( 3nCK, 6.0 ns) DDR3-1866 tCLK = 1071 psec
4008 ** tXP = max( 3nCK, 6.0 ns) DDR3-2133 tCLK = 937 psec
4009 */
4010 txp = (tclk_psecs < 1875) ? 6000 : 7500;
4011 // NOTE: this is reworked for pass 2.x
4012 int temp_txp = divide_roundup(max(3*tclk_psecs, (unsigned)txp), tclk_psecs) - 1;
4013 #if 1
4014 if (temp_txp > 7)
4015 ddr_print("TIMING_PARAMS1[txp]: need extension bit for 0x%x\n", temp_txp);
4016 #endif
4017 if (CAVIUM_IS_MODEL(CAVIUM_CN88XX_PASS1_X) && (temp_txp > 7)) {
4018 temp_txp = 7; // max it out
4019 }
4020 lmc_timing_params1.s.txp = temp_txp & 7;
4021 lmc_timing_params1.s.txp_ext = (temp_txp >> 3) & 1;
4022
4023 lmc_timing_params1.s.twlmrd = divide_roundup(DDR3_tWLMRD*tclk_psecs, 4*tclk_psecs);
4024 lmc_timing_params1.s.twldqsen = divide_roundup(DDR3_tWLDQSEN*tclk_psecs, 4*tclk_psecs);
4025 lmc_timing_params1.s.tfaw = divide_roundup(tfaw, 4*tclk_psecs);
4026 lmc_timing_params1.s.txpdll = divide_roundup(max(10*tclk_psecs, 24000ull), tclk_psecs) - 1;
4027
4028 if ((ddr_type == DDR4_DRAM) && is_3ds_dimm) {
4029 /*
4030 4 Gb: tRFC_DLR = 90 ns
4031 8 Gb: tRFC_DLR = 120 ns
4032 16 Gb: tRFC_DLR = 190 ns FIXME?
4033 */
4034 // RNDUP[tRFC_DLR(ns) / (8 * TCYC(ns))]
4035 if (die_capacity == 0x1000) // 4 Gbit
4036 trfc_dlr = 90;
4037 else if (die_capacity == 0x2000) // 8 Gbit
4038 trfc_dlr = 120;
4039 else if (die_capacity == 0x4000) // 16 Gbit
4040 trfc_dlr = 190;
4041 else
4042 trfc_dlr = 0;
4043
4044 if (trfc_dlr == 0) {
4045 ddr_print("N%d.LMC%d: ERROR: tRFC_DLR: die_capacity %u Mbit is illegal\n",
4046 node, ddr_interface_num, die_capacity);
4047 } else {
4048 lmc_timing_params1.s.trfc_dlr = divide_roundup(trfc_dlr * 1000UL, 8*tclk_psecs);
4049 ddr_print("N%d.LMC%d: TIMING_PARAMS1[trfc_dlr] set to %u\n",
4050 node, ddr_interface_num, lmc_timing_params1.s.trfc_dlr);
4051 }
4052 }
4053
4054 if ((s = lookup_env_parameter_ull("ddr_timing_params1")) != NULL) {
4055 lmc_timing_params1.u = strtoull(s, NULL, 0);
4056 }
4057 ddr_print("TIMING_PARAMS1 : 0x%016llx\n", lmc_timing_params1.u);
4058 DRAM_CSR_WRITE(node, BDK_LMCX_TIMING_PARAMS1(ddr_interface_num), lmc_timing_params1.u);
4059 }
4060
4061 /* LMC(0)_TIMING_PARAMS2 */
4062 if (ddr_type == DDR4_DRAM) {
4063 bdk_lmcx_timing_params1_t lmc_timing_params1;
4064 bdk_lmcx_timing_params2_t lmc_timing_params2;
4065 lmc_timing_params1.u = BDK_CSR_READ(node, BDK_LMCX_TIMING_PARAMS1(ddr_interface_num));
4066 lmc_timing_params2.u = BDK_CSR_READ(node, BDK_LMCX_TIMING_PARAMS2(ddr_interface_num));
4067 ddr_print("TIMING_PARAMS2 : 0x%016llx\n", lmc_timing_params2.u);
4068
4069 //lmc_timing_params2.s.trrd_l = divide_roundup(ddr4_tRRD_Lmin, tclk_psecs) - 1;
4070 // NOTE: this is reworked for pass 2.x
4071 int temp_trrd_l = divide_roundup(ddr4_tRRD_Lmin, tclk_psecs) - 2;
4072 #if 1
4073 if (temp_trrd_l > 7)
4074 ddr_print("TIMING_PARAMS2[trrd_l]: need extension bit for 0x%x\n", temp_trrd_l);
4075 #endif
4076 if (CAVIUM_IS_MODEL(CAVIUM_CN88XX_PASS1_X) && (temp_trrd_l > 7)) {
4077 temp_trrd_l = 7; // max it out
4078 }
4079 lmc_timing_params2.s.trrd_l = temp_trrd_l & 7;
4080 lmc_timing_params2.s.trrd_l_ext = (temp_trrd_l >> 3) & 1;
4081
4082 lmc_timing_params2.s.twtr_l = divide_nint(max(4*tclk_psecs, 7500ull), tclk_psecs) - 1; // correct for 1600-2400
4083 lmc_timing_params2.s.t_rw_op_max = 7;
4084 lmc_timing_params2.s.trtp = divide_roundup(max(4*tclk_psecs, 7500ull), tclk_psecs) - 1;
4085
4086 ddr_print("TIMING_PARAMS2 : 0x%016llx\n", lmc_timing_params2.u);
4087 DRAM_CSR_WRITE(node, BDK_LMCX_TIMING_PARAMS2(ddr_interface_num), lmc_timing_params2.u);
4088
4089 /* Workaround Errata 25823 - LMC: Possible DDR4 tWTR_L not met
4090 for Write-to-Read operations to the same Bank Group */
4091 if (lmc_timing_params1.s.twtr < (lmc_timing_params2.s.twtr_l - 4)) {
4092 lmc_timing_params1.s.twtr = lmc_timing_params2.s.twtr_l - 4;
4093 ddr_print("ERRATA 25823: NEW: TWTR: %d, TWTR_L: %d\n", lmc_timing_params1.s.twtr, lmc_timing_params2.s.twtr_l);
4094 ddr_print("TIMING_PARAMS1 : 0x%016llx\n", lmc_timing_params1.u);
4095 DRAM_CSR_WRITE(node, BDK_LMCX_TIMING_PARAMS1(ddr_interface_num), lmc_timing_params1.u);
4096 }
4097 }
4098
4099 /* LMC(0)_MODEREG_PARAMS0 */
4100 {
4101 bdk_lmcx_modereg_params0_t lmc_modereg_params0;
4102 int param;
4103
4104 lmc_modereg_params0.u = BDK_CSR_READ(node, BDK_LMCX_MODEREG_PARAMS0(ddr_interface_num));
4105
4106 if (ddr_type == DDR4_DRAM) {
4107 lmc_modereg_params0.s.cwl = 0; /* 1600 (1250ps) */
4108 if (tclk_psecs < 1250)
4109 lmc_modereg_params0.s.cwl = 1; /* 1866 (1072ps) */
4110 if (tclk_psecs < 1072)
4111 lmc_modereg_params0.s.cwl = 2; /* 2133 (938ps) */
4112 if (tclk_psecs < 938)
4113 lmc_modereg_params0.s.cwl = 3; /* 2400 (833ps) */
4114 if (tclk_psecs < 833)
4115 lmc_modereg_params0.s.cwl = 4; /* 2666 (750ps) */
4116 if (tclk_psecs < 750)
4117 lmc_modereg_params0.s.cwl = 5; /* 3200 (625ps) */
4118 } else {
4119 /*
4120 ** CSR CWL CAS write Latency
4121 ** === === =================================
4122 ** 0 5 ( tCK(avg) >= 2.5 ns)
4123 ** 1 6 (2.5 ns > tCK(avg) >= 1.875 ns)
4124 ** 2 7 (1.875 ns > tCK(avg) >= 1.5 ns)
4125 ** 3 8 (1.5 ns > tCK(avg) >= 1.25 ns)
4126 ** 4 9 (1.25 ns > tCK(avg) >= 1.07 ns)
4127 ** 5 10 (1.07 ns > tCK(avg) >= 0.935 ns)
4128 ** 6 11 (0.935 ns > tCK(avg) >= 0.833 ns)
4129 ** 7 12 (0.833 ns > tCK(avg) >= 0.75 ns)
4130 */
4131
4132 lmc_modereg_params0.s.cwl = 0;
4133 if (tclk_psecs < 2500)
4134 lmc_modereg_params0.s.cwl = 1;
4135 if (tclk_psecs < 1875)
4136 lmc_modereg_params0.s.cwl = 2;
4137 if (tclk_psecs < 1500)
4138 lmc_modereg_params0.s.cwl = 3;
4139 if (tclk_psecs < 1250)
4140 lmc_modereg_params0.s.cwl = 4;
4141 if (tclk_psecs < 1070)
4142 lmc_modereg_params0.s.cwl = 5;
4143 if (tclk_psecs < 935)
4144 lmc_modereg_params0.s.cwl = 6;
4145 if (tclk_psecs < 833)
4146 lmc_modereg_params0.s.cwl = 7;
4147 }
4148
4149 if ((s = lookup_env_parameter("ddr_cwl")) != NULL) {
4150 lmc_modereg_params0.s.cwl = strtoul(s, NULL, 0) - 5;
4151 }
4152
4153 if (ddr_type == DDR4_DRAM) {
4154 ddr_print("%-45s : %d, [0x%x]\n", "CAS Write Latency CWL, [CSR]",
4155 lmc_modereg_params0.s.cwl + 9
4156 + ((lmc_modereg_params0.s.cwl>2) ? (lmc_modereg_params0.s.cwl-3) * 2 : 0),
4157 lmc_modereg_params0.s.cwl);
4158 } else {
4159 ddr_print("%-45s : %d, [0x%x]\n", "CAS Write Latency CWL, [CSR]",
4160 lmc_modereg_params0.s.cwl + 5,
4161 lmc_modereg_params0.s.cwl);
4162 }
4163
4164 lmc_modereg_params0.s.mprloc = 0;
4165 lmc_modereg_params0.s.mpr = 0;
4166 lmc_modereg_params0.s.dll = (ddr_type == DDR4_DRAM)?1:0; /* disable(0) for DDR3 and enable(1) for DDR4 */
4167 lmc_modereg_params0.s.al = 0;
4168 lmc_modereg_params0.s.wlev = 0; /* Read Only */
4169 lmc_modereg_params0.s.tdqs = ((ddr_type == DDR4_DRAM) || (dram_width != 8))?0:1; /* disable(0) for DDR4 and x4/x16 DDR3 */
4170 lmc_modereg_params0.s.qoff = 0;
4171 //lmc_modereg_params0.s.bl = 0; /* Don't touch block dirty logic */
4172
4173 if ((s = lookup_env_parameter("ddr_cl")) != NULL) {
4174 CL = strtoul(s, NULL, 0);
4175 ddr_print("CAS Latency : %6d\n", CL);
4176 }
4177
4178 if (ddr_type == DDR4_DRAM) {
4179 lmc_modereg_params0.s.cl = 0x0;
4180 if (CL > 9)
4181 lmc_modereg_params0.s.cl = 0x1;
4182 if (CL > 10)
4183 lmc_modereg_params0.s.cl = 0x2;
4184 if (CL > 11)
4185 lmc_modereg_params0.s.cl = 0x3;
4186 if (CL > 12)
4187 lmc_modereg_params0.s.cl = 0x4;
4188 if (CL > 13)
4189 lmc_modereg_params0.s.cl = 0x5;
4190 if (CL > 14)
4191 lmc_modereg_params0.s.cl = 0x6;
4192 if (CL > 15)
4193 lmc_modereg_params0.s.cl = 0x7;
4194 if (CL > 16)
4195 lmc_modereg_params0.s.cl = 0x8;
4196 if (CL > 18)
4197 lmc_modereg_params0.s.cl = 0x9;
4198 if (CL > 20)
4199 lmc_modereg_params0.s.cl = 0xA;
4200 if (CL > 24)
4201 lmc_modereg_params0.s.cl = 0xB;
4202 } else {
4203 lmc_modereg_params0.s.cl = 0x2;
4204 if (CL > 5)
4205 lmc_modereg_params0.s.cl = 0x4;
4206 if (CL > 6)
4207 lmc_modereg_params0.s.cl = 0x6;
4208 if (CL > 7)
4209 lmc_modereg_params0.s.cl = 0x8;
4210 if (CL > 8)
4211 lmc_modereg_params0.s.cl = 0xA;
4212 if (CL > 9)
4213 lmc_modereg_params0.s.cl = 0xC;
4214 if (CL > 10)
4215 lmc_modereg_params0.s.cl = 0xE;
4216 if (CL > 11)
4217 lmc_modereg_params0.s.cl = 0x1;
4218 if (CL > 12)
4219 lmc_modereg_params0.s.cl = 0x3;
4220 if (CL > 13)
4221 lmc_modereg_params0.s.cl = 0x5;
4222 if (CL > 14)
4223 lmc_modereg_params0.s.cl = 0x7;
4224 if (CL > 15)
4225 lmc_modereg_params0.s.cl = 0x9;
4226 }
4227
4228 lmc_modereg_params0.s.rbt = 0; /* Read Only. */
4229 lmc_modereg_params0.s.tm = 0;
4230 lmc_modereg_params0.s.dllr = 0;
4231
4232 param = divide_roundup(twr, tclk_psecs);
4233
4234 if (ddr_type == DDR4_DRAM) { /* DDR4 */
4235 lmc_modereg_params0.s.wrp = 1;
4236 if (param > 12)
4237 lmc_modereg_params0.s.wrp = 2;
4238 if (param > 14)
4239 lmc_modereg_params0.s.wrp = 3;
4240 if (param > 16)
4241 lmc_modereg_params0.s.wrp = 4;
4242 if (param > 18)
4243 lmc_modereg_params0.s.wrp = 5;
4244 if (param > 20)
4245 lmc_modereg_params0.s.wrp = 6;
4246 if (param > 24) /* RESERVED in DDR4 spec */
4247 lmc_modereg_params0.s.wrp = 7;
4248 } else { /* DDR3 */
4249 lmc_modereg_params0.s.wrp = 1;
4250 if (param > 5)
4251 lmc_modereg_params0.s.wrp = 2;
4252 if (param > 6)
4253 lmc_modereg_params0.s.wrp = 3;
4254 if (param > 7)
4255 lmc_modereg_params0.s.wrp = 4;
4256 if (param > 8)
4257 lmc_modereg_params0.s.wrp = 5;
4258 if (param > 10)
4259 lmc_modereg_params0.s.wrp = 6;
4260 if (param > 12)
4261 lmc_modereg_params0.s.wrp = 7;
4262 }
4263
4264 lmc_modereg_params0.s.ppd = 0;
4265
4266 if ((s = lookup_env_parameter("ddr_wrp")) != NULL) {
4267 lmc_modereg_params0.s.wrp = strtoul(s, NULL, 0);
4268 }
4269
4270 ddr_print("%-45s : %d, [0x%x]\n", "Write recovery for auto precharge WRP, [CSR]",
4271 param, lmc_modereg_params0.s.wrp);
4272
4273 if ((s = lookup_env_parameter_ull("ddr_modereg_params0")) != NULL) {
4274 lmc_modereg_params0.u = strtoull(s, NULL, 0);
4275 }
4276 ddr_print("MODEREG_PARAMS0 : 0x%016llx\n", lmc_modereg_params0.u);
4277 DRAM_CSR_WRITE(node, BDK_LMCX_MODEREG_PARAMS0(ddr_interface_num), lmc_modereg_params0.u);
4278 }
4279
4280 /* LMC(0)_MODEREG_PARAMS1 */
4281 {
4282 bdk_lmcx_modereg_params1_t lmc_modereg_params1;
4283
4284 lmc_modereg_params1.u = odt_config[odt_idx].odt_mask1.u;
4285
4286 #ifdef CAVIUM_ONLY
4287 /* Special request: mismatched DIMM support. Slot 0: 2-Rank, Slot 1: 1-Rank */
4288 if (rank_mask == 0x7) { /* 2-Rank, 1-Rank */
4289 lmc_modereg_params1.s.rtt_nom_00 = 0;
4290 lmc_modereg_params1.s.rtt_nom_01 = 3; /* rttnom_40ohm */
4291 lmc_modereg_params1.s.rtt_nom_10 = 3; /* rttnom_40ohm */
4292 lmc_modereg_params1.s.rtt_nom_11 = 0;
4293 dyn_rtt_nom_mask = 0x6;
4294 }
4295 #endif /* CAVIUM_ONLY */
4296
4297 if ((s = lookup_env_parameter("ddr_rtt_nom_mask")) != NULL) {
4298 dyn_rtt_nom_mask = strtoul(s, NULL, 0);
4299 }
4300
4301
4302 /* Save the original rtt_nom settings before sweeping through settings. */
4303 default_rtt_nom[0] = lmc_modereg_params1.s.rtt_nom_00;
4304 default_rtt_nom[1] = lmc_modereg_params1.s.rtt_nom_01;
4305 default_rtt_nom[2] = lmc_modereg_params1.s.rtt_nom_10;
4306 default_rtt_nom[3] = lmc_modereg_params1.s.rtt_nom_11;
4307
4308 ddr_rtt_nom_auto = custom_lmc_config->ddr_rtt_nom_auto;
4309
4310 for (i=0; i<4; ++i) {
4311 uint64_t value;
4312 if ((s = lookup_env_parameter("ddr_rtt_nom_%1d%1d", !!(i&2), !!(i&1))) == NULL)
4313 s = lookup_env_parameter("ddr%d_rtt_nom_%1d%1d", ddr_interface_num, !!(i&2), !!(i&1));
4314 if (s != NULL) {
4315 value = strtoul(s, NULL, 0);
4316 lmc_modereg_params1.u &= ~((uint64_t)0x7 << (i*12+9));
4317 lmc_modereg_params1.u |= ( (value & 0x7) << (i*12+9));
4318 default_rtt_nom[i] = value;
4319 ddr_rtt_nom_auto = 0;
4320 }
4321 }
4322
4323 if ((s = lookup_env_parameter("ddr_rtt_nom")) == NULL)
4324 s = lookup_env_parameter("ddr%d_rtt_nom", ddr_interface_num);
4325 if (s != NULL) {
4326 uint64_t value;
4327 value = strtoul(s, NULL, 0);
4328
4329 if (dyn_rtt_nom_mask & 1)
4330 default_rtt_nom[0] = lmc_modereg_params1.s.rtt_nom_00 = value;
4331 if (dyn_rtt_nom_mask & 2)
4332 default_rtt_nom[1] = lmc_modereg_params1.s.rtt_nom_01 = value;
4333 if (dyn_rtt_nom_mask & 4)
4334 default_rtt_nom[2] = lmc_modereg_params1.s.rtt_nom_10 = value;
4335 if (dyn_rtt_nom_mask & 8)
4336 default_rtt_nom[3] = lmc_modereg_params1.s.rtt_nom_11 = value;
4337
4338 ddr_rtt_nom_auto = 0;
4339 }
4340
4341 if ((s = lookup_env_parameter("ddr_rtt_wr")) != NULL) {
4342 uint64_t value = strtoul(s, NULL, 0);
4343 for (i=0; i<4; ++i) {
4344 INSRT_WR(&lmc_modereg_params1.u, i, value);
4345 }
4346 }
4347
4348 for (i = 0; i < 4; ++i) {
4349 uint64_t value;
4350 if ((s = lookup_env_parameter("ddr_rtt_wr_%1d%1d", !!(i&2), !!(i&1))) == NULL)
4351 s = lookup_env_parameter("ddr%d_rtt_wr_%1d%1d", ddr_interface_num, !!(i&2), !!(i&1));
4352 if (s != NULL) {
4353 value = strtoul(s, NULL, 0);
4354 INSRT_WR(&lmc_modereg_params1.u, i, value);
4355 }
4356 }
4357
4358 // Make sure pass 1 has valid RTT_WR settings, because
4359 // configuration files may be set-up for pass 2, and
4360 // pass 1 supports no RTT_WR extension bits
4361 if (CAVIUM_IS_MODEL(CAVIUM_CN88XX_PASS1_X)) {
4362 for (i = 0; i < 4; ++i) {
4363 if (EXTR_WR(lmc_modereg_params1.u, i) > 3) { // if 80 or undefined
4364 INSRT_WR(&lmc_modereg_params1.u, i, 1); // FIXME? always insert 120
4365 ddr_print("RTT_WR_%d%d set to 120 for CN88XX pass 1\n", !!(i&2), i&1);
4366 }
4367 }
4368 }
4369 if ((s = lookup_env_parameter("ddr_dic")) != NULL) {
4370 uint64_t value = strtoul(s, NULL, 0);
4371 for (i=0; i<4; ++i) {
4372 lmc_modereg_params1.u &= ~((uint64_t)0x3 << (i*12+7));
4373 lmc_modereg_params1.u |= ( (value & 0x3) << (i*12+7));
4374 }
4375 }
4376
4377 for (i=0; i<4; ++i) {
4378 uint64_t value;
4379 if ((s = lookup_env_parameter("ddr_dic_%1d%1d", !!(i&2), !!(i&1))) != NULL) {
4380 value = strtoul(s, NULL, 0);
4381 lmc_modereg_params1.u &= ~((uint64_t)0x3 << (i*12+7));
4382 lmc_modereg_params1.u |= ( (value & 0x3) << (i*12+7));
4383 }
4384 }
4385
4386 if ((s = lookup_env_parameter_ull("ddr_modereg_params1")) != NULL) {
4387 lmc_modereg_params1.u = strtoull(s, NULL, 0);
4388 }
4389
4390 ddr_print("RTT_NOM %3d, %3d, %3d, %3d ohms : %x,%x,%x,%x\n",
4391 imp_values->rtt_nom_ohms[lmc_modereg_params1.s.rtt_nom_11],
4392 imp_values->rtt_nom_ohms[lmc_modereg_params1.s.rtt_nom_10],
4393 imp_values->rtt_nom_ohms[lmc_modereg_params1.s.rtt_nom_01],
4394 imp_values->rtt_nom_ohms[lmc_modereg_params1.s.rtt_nom_00],
4395 lmc_modereg_params1.s.rtt_nom_11,
4396 lmc_modereg_params1.s.rtt_nom_10,
4397 lmc_modereg_params1.s.rtt_nom_01,
4398 lmc_modereg_params1.s.rtt_nom_00);
4399
4400 ddr_print("RTT_WR %3d, %3d, %3d, %3d ohms : %x,%x,%x,%x\n",
4401 imp_values->rtt_wr_ohms[EXTR_WR(lmc_modereg_params1.u, 3)],
4402 imp_values->rtt_wr_ohms[EXTR_WR(lmc_modereg_params1.u, 2)],
4403 imp_values->rtt_wr_ohms[EXTR_WR(lmc_modereg_params1.u, 1)],
4404 imp_values->rtt_wr_ohms[EXTR_WR(lmc_modereg_params1.u, 0)],
4405 EXTR_WR(lmc_modereg_params1.u, 3),
4406 EXTR_WR(lmc_modereg_params1.u, 2),
4407 EXTR_WR(lmc_modereg_params1.u, 1),
4408 EXTR_WR(lmc_modereg_params1.u, 0));
4409
4410 ddr_print("DIC %3d, %3d, %3d, %3d ohms : %x,%x,%x,%x\n",
4411 imp_values->dic_ohms[lmc_modereg_params1.s.dic_11],
4412 imp_values->dic_ohms[lmc_modereg_params1.s.dic_10],
4413 imp_values->dic_ohms[lmc_modereg_params1.s.dic_01],
4414 imp_values->dic_ohms[lmc_modereg_params1.s.dic_00],
4415 lmc_modereg_params1.s.dic_11,
4416 lmc_modereg_params1.s.dic_10,
4417 lmc_modereg_params1.s.dic_01,
4418 lmc_modereg_params1.s.dic_00);
4419
4420 ddr_print("MODEREG_PARAMS1 : 0x%016llx\n", lmc_modereg_params1.u);
4421 DRAM_CSR_WRITE(node, BDK_LMCX_MODEREG_PARAMS1(ddr_interface_num), lmc_modereg_params1.u);
4422
4423 } /* LMC(0)_MODEREG_PARAMS1 */
4424
4425 /* LMC(0)_MODEREG_PARAMS2 */
4426 if (ddr_type == DDR4_DRAM) {
4427 bdk_lmcx_modereg_params2_t lmc_modereg_params2;
4428 lmc_modereg_params2.u = odt_config[odt_idx].odt_mask2.u;
4429
4430 for (i=0; i<4; ++i) {
4431 uint64_t value;
4432 if ((s = lookup_env_parameter("ddr_rtt_park_%1d%1d", !!(i&2), !!(i&1))) != NULL) {
4433 value = strtoul(s, NULL, 0);
4434 lmc_modereg_params2.u &= ~((uint64_t)0x7 << (i*10+0));
4435 lmc_modereg_params2.u |= ( (value & 0x7) << (i*10+0));
4436 }
4437 }
4438
4439 if ((s = lookup_env_parameter("ddr_rtt_park")) != NULL) {
4440 uint64_t value = strtoul(s, NULL, 0);
4441 for (i=0; i<4; ++i) {
4442 lmc_modereg_params2.u &= ~((uint64_t)0x7 << (i*10+0));
4443 lmc_modereg_params2.u |= ( (value & 0x7) << (i*10+0));
4444 }
4445 }
4446
4447 if ((s = lookup_env_parameter_ull("ddr_modereg_params2")) != NULL) {
4448 lmc_modereg_params2.u = strtoull(s, NULL, 0);
4449 }
4450
4451 ddr_print("RTT_PARK %3d, %3d, %3d, %3d ohms : %x,%x,%x,%x\n",
4452 imp_values->rtt_nom_ohms[lmc_modereg_params2.s.rtt_park_11],
4453 imp_values->rtt_nom_ohms[lmc_modereg_params2.s.rtt_park_10],
4454 imp_values->rtt_nom_ohms[lmc_modereg_params2.s.rtt_park_01],
4455 imp_values->rtt_nom_ohms[lmc_modereg_params2.s.rtt_park_00],
4456 lmc_modereg_params2.s.rtt_park_11,
4457 lmc_modereg_params2.s.rtt_park_10,
4458 lmc_modereg_params2.s.rtt_park_01,
4459 lmc_modereg_params2.s.rtt_park_00);
4460
4461 ddr_print("%-45s : 0x%x,0x%x,0x%x,0x%x\n", "VREF_RANGE",
4462 lmc_modereg_params2.s.vref_range_11,
4463 lmc_modereg_params2.s.vref_range_10,
4464 lmc_modereg_params2.s.vref_range_01,
4465 lmc_modereg_params2.s.vref_range_00);
4466
4467 ddr_print("%-45s : 0x%x,0x%x,0x%x,0x%x\n", "VREF_VALUE",
4468 lmc_modereg_params2.s.vref_value_11,
4469 lmc_modereg_params2.s.vref_value_10,
4470 lmc_modereg_params2.s.vref_value_01,
4471 lmc_modereg_params2.s.vref_value_00);
4472
4473 ddr_print("MODEREG_PARAMS2 : 0x%016llx\n", lmc_modereg_params2.u);
4474 DRAM_CSR_WRITE(node, BDK_LMCX_MODEREG_PARAMS2(ddr_interface_num), lmc_modereg_params2.u);
4475
4476 } /* LMC(0)_MODEREG_PARAMS2 */
4477
4478 /* LMC(0)_MODEREG_PARAMS3 */
4479 if (ddr_type == DDR4_DRAM) {
4480 bdk_lmcx_modereg_params3_t lmc_modereg_params3;
4481
4482 lmc_modereg_params3.u = BDK_CSR_READ(node, BDK_LMCX_MODEREG_PARAMS3(ddr_interface_num));
4483
4484 //lmc_modereg_params3.s.max_pd =
4485 //lmc_modereg_params3.s.tc_ref =
4486 //lmc_modereg_params3.s.vref_mon =
4487 //lmc_modereg_params3.s.cal =
4488 //lmc_modereg_params3.s.sre_abort =
4489 //lmc_modereg_params3.s.rd_preamble =
4490 //lmc_modereg_params3.s.wr_preamble =
4491 //lmc_modereg_params3.s.par_lat_mode =
4492 //lmc_modereg_params3.s.odt_pd =
4493 //lmc_modereg_params3.s.ca_par_pers =
4494 //lmc_modereg_params3.s.dm =
4495 //lmc_modereg_params3.s.wr_dbi =
4496 //lmc_modereg_params3.s.rd_dbi =
4497 lmc_modereg_params3.s.tccd_l = max(divide_roundup(ddr4_tCCD_Lmin, tclk_psecs), 5ull) - 4;
4498 //lmc_modereg_params3.s.lpasr =
4499 //lmc_modereg_params3.s.crc =
4500 //lmc_modereg_params3.s.gd =
4501 //lmc_modereg_params3.s.pda =
4502 //lmc_modereg_params3.s.temp_sense =
4503 //lmc_modereg_params3.s.fgrm =
4504 //lmc_modereg_params3.s.wr_cmd_lat =
4505 //lmc_modereg_params3.s.mpr_fmt =
4506
4507 if (!CAVIUM_IS_MODEL(CAVIUM_CN88XX_PASS1_X)) {
4508 int delay = 0;
4509 if ((lranks_per_prank == 4) && (ddr_hertz >= 1000000000))
4510 delay = 1;
4511 lmc_modereg_params3.s.xrank_add_tccd_l = delay;
4512 lmc_modereg_params3.s.xrank_add_tccd_s = delay;
4513 }
4514
4515 ddr_print("MODEREG_PARAMS3 : 0x%016llx\n", lmc_modereg_params3.u);
4516 DRAM_CSR_WRITE(node, BDK_LMCX_MODEREG_PARAMS3(ddr_interface_num), lmc_modereg_params3.u);
4517 } /* LMC(0)_MODEREG_PARAMS3 */
4518
4519 /* LMC(0)_NXM */
4520 {
4521 bdk_lmcx_nxm_t lmc_nxm;
4522 int num_bits = row_lsb + row_bits + lranks_bits - 26;
4523 lmc_nxm.u = BDK_CSR_READ(node, BDK_LMCX_NXM(ddr_interface_num));
4524
4525 if (rank_mask & 0x1)
4526 lmc_nxm.s.mem_msb_d0_r0 = num_bits;
4527 if (rank_mask & 0x2)
4528 lmc_nxm.s.mem_msb_d0_r1 = num_bits;
4529 if (rank_mask & 0x4)
4530 lmc_nxm.s.mem_msb_d1_r0 = num_bits;
4531 if (rank_mask & 0x8)
4532 lmc_nxm.s.mem_msb_d1_r1 = num_bits;
4533
4534 lmc_nxm.s.cs_mask = ~rank_mask & 0xff; /* Set the mask for non-existant ranks. */
4535
4536 if ((s = lookup_env_parameter_ull("ddr_nxm")) != NULL) {
4537 lmc_nxm.u = strtoull(s, NULL, 0);
4538 }
4539 ddr_print("LMC_NXM : 0x%016llx\n", lmc_nxm.u);
4540 DRAM_CSR_WRITE(node, BDK_LMCX_NXM(ddr_interface_num), lmc_nxm.u);
4541 }
4542
4543 /* LMC(0)_WODT_MASK */
4544 {
4545 bdk_lmcx_wodt_mask_t lmc_wodt_mask;
4546 lmc_wodt_mask.u = odt_config[odt_idx].odt_mask;
4547
4548 if ((s = lookup_env_parameter_ull("ddr_wodt_mask")) != NULL) {
4549 lmc_wodt_mask.u = strtoull(s, NULL, 0);
4550 }
4551
4552 ddr_print("WODT_MASK : 0x%016llx\n", lmc_wodt_mask.u);
4553 DRAM_CSR_WRITE(node, BDK_LMCX_WODT_MASK(ddr_interface_num), lmc_wodt_mask.u);
4554 }
4555
4556 /* LMC(0)_RODT_MASK */
4557 {
4558 int rankx;
4559 bdk_lmcx_rodt_mask_t lmc_rodt_mask;
4560 lmc_rodt_mask.u = odt_config[odt_idx].rodt_ctl;
4561
4562 if ((s = lookup_env_parameter_ull("ddr_rodt_mask")) != NULL) {
4563 lmc_rodt_mask.u = strtoull(s, NULL, 0);
4564 }
4565
4566 ddr_print("%-45s : 0x%016llx\n", "RODT_MASK", lmc_rodt_mask.u);
4567 DRAM_CSR_WRITE(node, BDK_LMCX_RODT_MASK(ddr_interface_num), lmc_rodt_mask.u);
4568
4569 dyn_rtt_nom_mask = 0;
4570 for (rankx = 0; rankx < dimm_count * 4;rankx++) {
4571 if (!(rank_mask & (1 << rankx)))
4572 continue;
4573 dyn_rtt_nom_mask |= ((lmc_rodt_mask.u >> (8*rankx)) & 0xff);
4574 }
4575 if (num_ranks == 4) {
4576 /* Normally ODT1 is wired to rank 1. For quad-ranked DIMMs
4577 ODT1 is wired to the third rank (rank 2). The mask,
4578 dyn_rtt_nom_mask, is used to indicate for which ranks
4579 to sweep RTT_NOM during read-leveling. Shift the bit
4580 from the ODT1 position over to the "ODT2" position so
4581 that the read-leveling analysis comes out right. */
4582 int odt1_bit = dyn_rtt_nom_mask & 2;
4583 dyn_rtt_nom_mask &= ~2;
4584 dyn_rtt_nom_mask |= odt1_bit<<1;
4585 }
4586 ddr_print("%-45s : 0x%02x\n", "DYN_RTT_NOM_MASK", dyn_rtt_nom_mask);
4587 }
4588
4589 /* LMC(0)_COMP_CTL2 */
4590 {
4591 bdk_lmcx_comp_ctl2_t comp_ctl2;
4592
4593 comp_ctl2.u = BDK_CSR_READ(node, BDK_LMCX_COMP_CTL2(ddr_interface_num));
4594
4595 comp_ctl2.s.dqx_ctl = odt_config[odt_idx].odt_ena;
4596 comp_ctl2.s.ck_ctl = (custom_lmc_config->ck_ctl == 0) ? 4 : custom_lmc_config->ck_ctl; /* Default 4=34.3 ohm */
4597 comp_ctl2.s.cmd_ctl = (custom_lmc_config->cmd_ctl == 0) ? 4 : custom_lmc_config->cmd_ctl; /* Default 4=34.3 ohm */
4598 comp_ctl2.s.control_ctl = (custom_lmc_config->ctl_ctl == 0) ? 4 : custom_lmc_config->ctl_ctl; /* Default 4=34.3 ohm */
4599
4600 // NOTE: these are now done earlier, in Step 6.9.3
4601 // comp_ctl2.s.ntune_offset = 0;
4602 // comp_ctl2.s.ptune_offset = 0;
4603
4604 ddr_rodt_ctl_auto = custom_lmc_config->ddr_rodt_ctl_auto;
4605 if ((s = lookup_env_parameter("ddr_rodt_ctl_auto")) != NULL) {
4606 ddr_rodt_ctl_auto = !!strtoul(s, NULL, 0);
4607 }
4608
4609 default_rodt_ctl = odt_config[odt_idx].qs_dic;
4610 if ((s = lookup_env_parameter("ddr_rodt_ctl")) == NULL)
4611 s = lookup_env_parameter("ddr%d_rodt_ctl", ddr_interface_num);
4612 if (s != NULL) {
4613 default_rodt_ctl = strtoul(s, NULL, 0);
4614 ddr_rodt_ctl_auto = 0;
4615 }
4616
4617 comp_ctl2.s.rodt_ctl = default_rodt_ctl;
4618
4619 // if DDR4, force CK_CTL to 26 ohms if it is currently 34 ohms, and DCLK speed is 1 GHz or more...
4620 if ((ddr_type == DDR4_DRAM) && (comp_ctl2.s.ck_ctl == ddr4_driver_34_ohm) && (ddr_hertz >= 1000000000)) {
4621 comp_ctl2.s.ck_ctl = ddr4_driver_26_ohm; // lowest for DDR4 is 26 ohms
4622 ddr_print("Forcing DDR4 COMP_CTL2[CK_CTL] to %d, %d ohms\n", comp_ctl2.s.ck_ctl,
4623 imp_values->drive_strength[comp_ctl2.s.ck_ctl]);
4624 }
4625
4626 if ((s = lookup_env_parameter("ddr_ck_ctl")) != NULL) {
4627 comp_ctl2.s.ck_ctl = strtoul(s, NULL, 0);
4628 }
4629
4630 if ((s = lookup_env_parameter("ddr_cmd_ctl")) != NULL) {
4631 comp_ctl2.s.cmd_ctl = strtoul(s, NULL, 0);
4632 }
4633
4634 if ((s = lookup_env_parameter("ddr_control_ctl")) != NULL) {
4635 comp_ctl2.s.control_ctl = strtoul(s, NULL, 0);
4636 }
4637
4638 if ((s = lookup_env_parameter("ddr_dqx_ctl")) != NULL) {
4639 comp_ctl2.s.dqx_ctl = strtoul(s, NULL, 0);
4640 }
4641
4642 ddr_print("%-45s : %d, %d ohms\n", "DQX_CTL ", comp_ctl2.s.dqx_ctl,
4643 imp_values->dqx_strength [comp_ctl2.s.dqx_ctl ]);
4644 ddr_print("%-45s : %d, %d ohms\n", "CK_CTL ", comp_ctl2.s.ck_ctl,
4645 imp_values->drive_strength[comp_ctl2.s.ck_ctl ]);
4646 ddr_print("%-45s : %d, %d ohms\n", "CMD_CTL ", comp_ctl2.s.cmd_ctl,
4647 imp_values->drive_strength[comp_ctl2.s.cmd_ctl ]);
4648 ddr_print("%-45s : %d, %d ohms\n", "CONTROL_CTL ", comp_ctl2.s.control_ctl,
4649 imp_values->drive_strength[comp_ctl2.s.control_ctl]);
4650 ddr_print("Read ODT_CTL : 0x%x (%d ohms)\n",
4651 comp_ctl2.s.rodt_ctl, imp_values->rodt_ohms[comp_ctl2.s.rodt_ctl]);
4652
4653 DRAM_CSR_WRITE(node, BDK_LMCX_COMP_CTL2(ddr_interface_num), comp_ctl2.u);
4654 }
4655
4656 /* LMC(0)_PHY_CTL */
4657 {
4658 bdk_lmcx_phy_ctl_t lmc_phy_ctl;
4659 lmc_phy_ctl.u = BDK_CSR_READ(node, BDK_LMCX_PHY_CTL(ddr_interface_num));
4660 lmc_phy_ctl.s.ts_stagger = 0;
4661
4662 if (!CAVIUM_IS_MODEL(CAVIUM_CN88XX_PASS1_X) && (lranks_per_prank > 1)) {
4663 lmc_phy_ctl.cn81xx.c0_sel = lmc_phy_ctl.cn81xx.c1_sel = 2; // C0 is TEN, C1 is A17
4664 ddr_print("N%d.LMC%d: 3DS: setting PHY_CTL[cx_csel] = %d\n",
4665 node, ddr_interface_num, lmc_phy_ctl.cn81xx.c1_sel);
4666 }
4667
4668 ddr_print("PHY_CTL : 0x%016llx\n", lmc_phy_ctl.u);
4669 DRAM_CSR_WRITE(node, BDK_LMCX_PHY_CTL(ddr_interface_num), lmc_phy_ctl.u);
4670 }
4671
4672 /* LMC(0)_DIMM0/1_PARAMS */
4673 if (spd_rdimm) {
4674 bdk_lmcx_dimm_ctl_t lmc_dimm_ctl;
4675
4676 for (didx = 0; didx < (unsigned)dimm_count; ++didx) {
4677 bdk_lmcx_dimmx_params_t lmc_dimmx_params;
4678 int dimm = didx;
4679 int rc;
4680
4681 lmc_dimmx_params.u = BDK_CSR_READ(node, BDK_LMCX_DIMMX_PARAMS(ddr_interface_num, dimm));
4682
4683
4684 if (ddr_type == DDR4_DRAM) {
4685
4686 bdk_lmcx_dimmx_ddr4_params0_t lmc_dimmx_ddr4_params0;
4687 bdk_lmcx_dimmx_ddr4_params1_t lmc_dimmx_ddr4_params1;
4688 bdk_lmcx_ddr4_dimm_ctl_t lmc_ddr4_dimm_ctl;
4689
4690 lmc_dimmx_params.s.rc0 = 0;
4691 lmc_dimmx_params.s.rc1 = 0;
4692 lmc_dimmx_params.s.rc2 = 0;
4693
4694 rc = read_spd(node, &dimm_config_table[didx], DDR4_SPD_RDIMM_REGISTER_DRIVE_STRENGTH_CTL);
4695 lmc_dimmx_params.s.rc3 = (rc >> 4) & 0xf;
4696 lmc_dimmx_params.s.rc4 = ((rc >> 0) & 0x3) << 2;
4697 lmc_dimmx_params.s.rc4 |= ((rc >> 2) & 0x3) << 0;
4698
4699 rc = read_spd(node, &dimm_config_table[didx], DDR4_SPD_RDIMM_REGISTER_DRIVE_STRENGTH_CK);
4700 lmc_dimmx_params.s.rc5 = ((rc >> 0) & 0x3) << 2;
4701 lmc_dimmx_params.s.rc5 |= ((rc >> 2) & 0x3) << 0;
4702
4703 lmc_dimmx_params.s.rc6 = 0;
4704 lmc_dimmx_params.s.rc7 = 0;
4705 lmc_dimmx_params.s.rc8 = 0;
4706 lmc_dimmx_params.s.rc9 = 0;
4707
4708 /*
4709 ** rc10 DDR4 RDIMM Operating Speed
4710 ** ==== =========================================================
4711 ** 0 tclk_psecs >= 1250 psec DDR4-1600 (1250 ps)
4712 ** 1 1250 psec > tclk_psecs >= 1071 psec DDR4-1866 (1071 ps)
4713 ** 2 1071 psec > tclk_psecs >= 938 psec DDR4-2133 ( 938 ps)
4714 ** 3 938 psec > tclk_psecs >= 833 psec DDR4-2400 ( 833 ps)
4715 ** 4 833 psec > tclk_psecs >= 750 psec DDR4-2666 ( 750 ps)
4716 ** 5 750 psec > tclk_psecs >= 625 psec DDR4-3200 ( 625 ps)
4717 */
4718 lmc_dimmx_params.s.rc10 = 0;
4719 if (1250 > tclk_psecs)
4720 lmc_dimmx_params.s.rc10 = 1;
4721 if (1071 > tclk_psecs)
4722 lmc_dimmx_params.s.rc10 = 2;
4723 if (938 > tclk_psecs)
4724 lmc_dimmx_params.s.rc10 = 3;
4725 if (833 > tclk_psecs)
4726 lmc_dimmx_params.s.rc10 = 4;
4727 if (750 > tclk_psecs)
4728 lmc_dimmx_params.s.rc10 = 5;
4729
4730 lmc_dimmx_params.s.rc11 = 0;
4731 lmc_dimmx_params.s.rc12 = 0;
4732 lmc_dimmx_params.s.rc13 = (spd_dimm_type == 4) ? 0 : 4; /* 0=LRDIMM, 1=RDIMM */
4733 lmc_dimmx_params.s.rc13 |= (ddr_type == DDR4_DRAM) ? (spd_addr_mirror << 3) : 0;
4734 lmc_dimmx_params.s.rc14 = 0;
4735 //lmc_dimmx_params.s.rc15 = 4; /* 0 nCK latency adder */
4736 lmc_dimmx_params.s.rc15 = 0; /* 1 nCK latency adder */
4737
4738 lmc_dimmx_ddr4_params0.u = 0;
4739
4740 lmc_dimmx_ddr4_params0.s.rc8x = 0;
4741 lmc_dimmx_ddr4_params0.s.rc7x = 0;
4742 lmc_dimmx_ddr4_params0.s.rc6x = 0;
4743 lmc_dimmx_ddr4_params0.s.rc5x = 0;
4744 lmc_dimmx_ddr4_params0.s.rc4x = 0;
4745
4746 lmc_dimmx_ddr4_params0.s.rc3x = compute_rc3x(tclk_psecs);
4747
4748 lmc_dimmx_ddr4_params0.s.rc2x = 0;
4749 lmc_dimmx_ddr4_params0.s.rc1x = 0;
4750
4751 lmc_dimmx_ddr4_params1.u = 0;
4752
4753 lmc_dimmx_ddr4_params1.s.rcbx = 0;
4754 lmc_dimmx_ddr4_params1.s.rcax = 0;
4755 lmc_dimmx_ddr4_params1.s.rc9x = 0;
4756
4757 lmc_ddr4_dimm_ctl.u = 0;
4758 lmc_ddr4_dimm_ctl.s.ddr4_dimm0_wmask = 0x004;
4759 lmc_ddr4_dimm_ctl.s.ddr4_dimm1_wmask = (dimm_count > 1) ? 0x004 : 0x0000;
4760
4761 /*
4762 * Handle any overrides from envvars here...
4763 */
4764 if ((s = lookup_env_parameter("ddr_ddr4_params0")) != NULL) {
4765 lmc_dimmx_ddr4_params0.u = strtoul(s, NULL, 0);
4766 }
4767
4768 if ((s = lookup_env_parameter("ddr_ddr4_params1")) != NULL) {
4769 lmc_dimmx_ddr4_params1.u = strtoul(s, NULL, 0);
4770 }
4771
4772 if ((s = lookup_env_parameter("ddr_ddr4_dimm_ctl")) != NULL) {
4773 lmc_ddr4_dimm_ctl.u = strtoul(s, NULL, 0);
4774 }
4775
4776 for (i=0; i<11; ++i) {
4777 uint64_t value;
4778 if ((s = lookup_env_parameter("ddr_ddr4_rc%1xx", i+1)) != NULL) {
4779 value = strtoul(s, NULL, 0);
4780 if (i < 8) {
4781 lmc_dimmx_ddr4_params0.u &= ~((uint64_t)0xff << (i*8));
4782 lmc_dimmx_ddr4_params0.u |= (value << (i*8));
4783 } else {
4784 lmc_dimmx_ddr4_params1.u &= ~((uint64_t)0xff << ((i-8)*8));
4785 lmc_dimmx_ddr4_params1.u |= (value << ((i-8)*8));
4786 }
4787 }
4788 }
4789
4790 /*
4791 * write the final CSR values
4792 */
4793 DRAM_CSR_WRITE(node, BDK_LMCX_DIMMX_DDR4_PARAMS0(ddr_interface_num, dimm), lmc_dimmx_ddr4_params0.u);
4794
4795 DRAM_CSR_WRITE(node, BDK_LMCX_DDR4_DIMM_CTL(ddr_interface_num), lmc_ddr4_dimm_ctl.u);
4796
4797 DRAM_CSR_WRITE(node, BDK_LMCX_DIMMX_DDR4_PARAMS1(ddr_interface_num, dimm), lmc_dimmx_ddr4_params1.u);
4798
4799 ddr_print("DIMM%d Register Control Words RCBx:RC1x : %x %x %x %x %x %x %x %x %x %x %x\n",
4800 dimm,
4801 lmc_dimmx_ddr4_params1.s.rcbx,
4802 lmc_dimmx_ddr4_params1.s.rcax,
4803 lmc_dimmx_ddr4_params1.s.rc9x,
4804 lmc_dimmx_ddr4_params0.s.rc8x,
4805 lmc_dimmx_ddr4_params0.s.rc7x,
4806 lmc_dimmx_ddr4_params0.s.rc6x,
4807 lmc_dimmx_ddr4_params0.s.rc5x,
4808 lmc_dimmx_ddr4_params0.s.rc4x,
4809 lmc_dimmx_ddr4_params0.s.rc3x,
4810 lmc_dimmx_ddr4_params0.s.rc2x,
4811 lmc_dimmx_ddr4_params0.s.rc1x );
4812
4813 } else { /* if (ddr_type == DDR4_DRAM) */
4814 rc = read_spd(node, &dimm_config_table[didx], 69);
4815 lmc_dimmx_params.s.rc0 = (rc >> 0) & 0xf;
4816 lmc_dimmx_params.s.rc1 = (rc >> 4) & 0xf;
4817
4818 rc = read_spd(node, &dimm_config_table[didx], 70);
4819 lmc_dimmx_params.s.rc2 = (rc >> 0) & 0xf;
4820 lmc_dimmx_params.s.rc3 = (rc >> 4) & 0xf;
4821
4822 rc = read_spd(node, &dimm_config_table[didx], 71);
4823 lmc_dimmx_params.s.rc4 = (rc >> 0) & 0xf;
4824 lmc_dimmx_params.s.rc5 = (rc >> 4) & 0xf;
4825
4826 rc = read_spd(node, &dimm_config_table[didx], 72);
4827 lmc_dimmx_params.s.rc6 = (rc >> 0) & 0xf;
4828 lmc_dimmx_params.s.rc7 = (rc >> 4) & 0xf;
4829
4830 rc = read_spd(node, &dimm_config_table[didx], 73);
4831 lmc_dimmx_params.s.rc8 = (rc >> 0) & 0xf;
4832 lmc_dimmx_params.s.rc9 = (rc >> 4) & 0xf;
4833
4834 rc = read_spd(node, &dimm_config_table[didx], 74);
4835 lmc_dimmx_params.s.rc10 = (rc >> 0) & 0xf;
4836 lmc_dimmx_params.s.rc11 = (rc >> 4) & 0xf;
4837
4838 rc = read_spd(node, &dimm_config_table[didx], 75);
4839 lmc_dimmx_params.s.rc12 = (rc >> 0) & 0xf;
4840 lmc_dimmx_params.s.rc13 = (rc >> 4) & 0xf;
4841
4842 rc = read_spd(node, &dimm_config_table[didx], 76);
4843 lmc_dimmx_params.s.rc14 = (rc >> 0) & 0xf;
4844 lmc_dimmx_params.s.rc15 = (rc >> 4) & 0xf;
4845
4846
4847 if ((s = lookup_env_parameter("ddr_clk_drive")) != NULL) {
4848 if (strcmp(s,"light") == 0) {
4849 lmc_dimmx_params.s.rc5 = 0x0; /* Light Drive */
4850 }
4851 if (strcmp(s,"moderate") == 0) {
4852 lmc_dimmx_params.s.rc5 = 0x5; /* Moderate Drive */
4853 }
4854 if (strcmp(s,"strong") == 0) {
4855 lmc_dimmx_params.s.rc5 = 0xA; /* Strong Drive */
4856 }
4857 }
4858
4859 if ((s = lookup_env_parameter("ddr_cmd_drive")) != NULL) {
4860 if (strcmp(s,"light") == 0) {
4861 lmc_dimmx_params.s.rc3 = 0x0; /* Light Drive */
4862 }
4863 if (strcmp(s,"moderate") == 0) {
4864 lmc_dimmx_params.s.rc3 = 0x5; /* Moderate Drive */
4865 }
4866 if (strcmp(s,"strong") == 0) {
4867 lmc_dimmx_params.s.rc3 = 0xA; /* Strong Drive */
4868 }
4869 }
4870
4871 if ((s = lookup_env_parameter("ddr_ctl_drive")) != NULL) {
4872 if (strcmp(s,"light") == 0) {
4873 lmc_dimmx_params.s.rc4 = 0x0; /* Light Drive */
4874 }
4875 if (strcmp(s,"moderate") == 0) {
4876 lmc_dimmx_params.s.rc4 = 0x5; /* Moderate Drive */
4877 }
4878 }
4879
4880
4881 /*
4882 ** rc10 DDR3 RDIMM Operating Speed
4883 ** ==== =========================================================
4884 ** 0 tclk_psecs >= 2500 psec DDR3/DDR3L-800 (default)
4885 ** 1 2500 psec > tclk_psecs >= 1875 psec DDR3/DDR3L-1066
4886 ** 2 1875 psec > tclk_psecs >= 1500 psec DDR3/DDR3L-1333
4887 ** 3 1500 psec > tclk_psecs >= 1250 psec DDR3/DDR3L-1600
4888 ** 4 1250 psec > tclk_psecs >= 1071 psec DDR3-1866
4889 */
4890 lmc_dimmx_params.s.rc10 = 0;
4891 if (2500 > tclk_psecs)
4892 lmc_dimmx_params.s.rc10 = 1;
4893 if (1875 > tclk_psecs)
4894 lmc_dimmx_params.s.rc10 = 2;
4895 if (1500 > tclk_psecs)
4896 lmc_dimmx_params.s.rc10 = 3;
4897 if (1250 > tclk_psecs)
4898 lmc_dimmx_params.s.rc10 = 4;
4899
4900 } /* if (ddr_type == DDR4_DRAM) */
4901
4902 if ((s = lookup_env_parameter("ddr_dimmx_params")) != NULL) {
4903 lmc_dimmx_params.u = strtoul(s, NULL, 0);
4904 }
4905
4906 for (i=0; i<16; ++i) {
4907 uint64_t value;
4908 if ((s = lookup_env_parameter("ddr_rc%d", i)) != NULL) {
4909 value = strtoul(s, NULL, 0);
4910 lmc_dimmx_params.u &= ~((uint64_t)0xf << (i*4));
4911 lmc_dimmx_params.u |= ( value << (i*4));
4912 }
4913 }
4914
4915 DRAM_CSR_WRITE(node, BDK_LMCX_DIMMX_PARAMS(ddr_interface_num, dimm), lmc_dimmx_params.u);
4916
4917 ddr_print("DIMM%d Register Control Words RC15:RC0 : %x %x %x %x %x %x %x %x %x %x %x %x %x %x %x %x\n",
4918 dimm,
4919 lmc_dimmx_params.s.rc15,
4920 lmc_dimmx_params.s.rc14,
4921 lmc_dimmx_params.s.rc13,
4922 lmc_dimmx_params.s.rc12,
4923 lmc_dimmx_params.s.rc11,
4924 lmc_dimmx_params.s.rc10,
4925 lmc_dimmx_params.s.rc9 ,
4926 lmc_dimmx_params.s.rc8 ,
4927 lmc_dimmx_params.s.rc7 ,
4928 lmc_dimmx_params.s.rc6 ,
4929 lmc_dimmx_params.s.rc5 ,
4930 lmc_dimmx_params.s.rc4 ,
4931 lmc_dimmx_params.s.rc3 ,
4932 lmc_dimmx_params.s.rc2 ,
4933 lmc_dimmx_params.s.rc1 ,
4934 lmc_dimmx_params.s.rc0 );
4935 } /* for didx */
4936
4937 if (ddr_type == DDR4_DRAM) {
4938
4939 /* LMC0_DIMM_CTL */
4940 lmc_dimm_ctl.u = BDK_CSR_READ(node, BDK_LMCX_DIMM_CTL(ddr_interface_num));
4941 lmc_dimm_ctl.s.dimm0_wmask = 0xdf3f;
4942 lmc_dimm_ctl.s.dimm1_wmask = (dimm_count > 1) ? 0xdf3f : 0x0000;
4943 lmc_dimm_ctl.s.tcws = 0x4e0;
4944 lmc_dimm_ctl.cn88xx.parity = custom_lmc_config->parity;
4945
4946 if ((s = lookup_env_parameter("ddr_dimm0_wmask")) != NULL) {
4947 lmc_dimm_ctl.s.dimm0_wmask = strtoul(s, NULL, 0);
4948 }
4949
4950 if ((s = lookup_env_parameter("ddr_dimm1_wmask")) != NULL) {
4951 lmc_dimm_ctl.s.dimm1_wmask = strtoul(s, NULL, 0);
4952 }
4953
4954 if ((s = lookup_env_parameter("ddr_dimm_ctl_parity")) != NULL) {
4955 lmc_dimm_ctl.cn88xx.parity = strtoul(s, NULL, 0);
4956 }
4957
4958 if ((s = lookup_env_parameter("ddr_dimm_ctl_tcws")) != NULL) {
4959 lmc_dimm_ctl.s.tcws = strtoul(s, NULL, 0);
4960 }
4961
4962 ddr_print("LMC DIMM_CTL : 0x%016llx\n", lmc_dimm_ctl.u);
4963 DRAM_CSR_WRITE(node, BDK_LMCX_DIMM_CTL(ddr_interface_num), lmc_dimm_ctl.u);
4964
4965 perform_octeon3_ddr3_sequence(node, rank_mask,
4966 ddr_interface_num, 0x7 ); /* Init RCW */
4967
4968 /* Write RC0D last */
4969 lmc_dimm_ctl.s.dimm0_wmask = 0x2000;
4970 lmc_dimm_ctl.s.dimm1_wmask = (dimm_count > 1) ? 0x2000 : 0x0000;
4971 ddr_print("LMC DIMM_CTL : 0x%016llx\n", lmc_dimm_ctl.u);
4972 DRAM_CSR_WRITE(node, BDK_LMCX_DIMM_CTL(ddr_interface_num), lmc_dimm_ctl.u);
4973
4974 /* Don't write any extended registers the second time */
4975 DRAM_CSR_WRITE(node, BDK_LMCX_DDR4_DIMM_CTL(ddr_interface_num), 0);
4976
4977 perform_octeon3_ddr3_sequence(node, rank_mask,
4978 ddr_interface_num, 0x7 ); /* Init RCW */
4979 } else {
4980
4981 /* LMC0_DIMM_CTL */
4982 lmc_dimm_ctl.u = BDK_CSR_READ(node, BDK_LMCX_DIMM_CTL(ddr_interface_num));
4983 lmc_dimm_ctl.s.dimm0_wmask = 0xffff;
4984 lmc_dimm_ctl.s.dimm1_wmask = (dimm_count > 1) ? 0xffff : 0x0000;
4985 lmc_dimm_ctl.s.tcws = 0x4e0;
4986 lmc_dimm_ctl.cn88xx.parity = custom_lmc_config->parity;
4987
4988 if ((s = lookup_env_parameter("ddr_dimm0_wmask")) != NULL) {
4989 lmc_dimm_ctl.s.dimm0_wmask = strtoul(s, NULL, 0);
4990 }
4991
4992 if ((s = lookup_env_parameter("ddr_dimm1_wmask")) != NULL) {
4993 lmc_dimm_ctl.s.dimm1_wmask = strtoul(s, NULL, 0);
4994 }
4995
4996 if ((s = lookup_env_parameter("ddr_dimm_ctl_parity")) != NULL) {
4997 lmc_dimm_ctl.cn88xx.parity = strtoul(s, NULL, 0);
4998 }
4999
5000 if ((s = lookup_env_parameter("ddr_dimm_ctl_tcws")) != NULL) {
5001 lmc_dimm_ctl.s.tcws = strtoul(s, NULL, 0);
5002 }
5003
5004 ddr_print("LMC DIMM_CTL : 0x%016llx\n", lmc_dimm_ctl.u);
5005 DRAM_CSR_WRITE(node, BDK_LMCX_DIMM_CTL(ddr_interface_num), lmc_dimm_ctl.u);
5006
5007 perform_octeon3_ddr3_sequence(node, rank_mask,
5008 ddr_interface_num, 0x7 ); /* Init RCW */
5009 }
5010 } else { /* if (spd_rdimm) */
5011 /* Disable register control writes for unbuffered */
5012 bdk_lmcx_dimm_ctl_t lmc_dimm_ctl;
5013 lmc_dimm_ctl.u = BDK_CSR_READ(node, BDK_LMCX_DIMM_CTL(ddr_interface_num));
5014 lmc_dimm_ctl.s.dimm0_wmask = 0;
5015 lmc_dimm_ctl.s.dimm1_wmask = 0;
5016 DRAM_CSR_WRITE(node, BDK_LMCX_DIMM_CTL(ddr_interface_num), lmc_dimm_ctl.u);
5017 } /* if (spd_rdimm) */
5018
5019 /*
5020 * Comments (steps 3 through 5) continue in perform_octeon3_ddr3_sequence()
5021 */
5022 {
5023 bdk_lmcx_modereg_params0_t lmc_modereg_params0;
5024
5025 if (ddr_memory_preserved(node)) {
5026 /* Contents are being preserved. Take DRAM out of
5027 self-refresh first. Then init steps can procede
5028 normally */
5029 perform_octeon3_ddr3_sequence(node, rank_mask,
5030 ddr_interface_num, 3); /* self-refresh exit */
5031 }
5032
5033 lmc_modereg_params0.u = BDK_CSR_READ(node, BDK_LMCX_MODEREG_PARAMS0(ddr_interface_num));
5034
5035 lmc_modereg_params0.s.dllr = 1; /* Set during first init sequence */
5036 DRAM_CSR_WRITE(node, BDK_LMCX_MODEREG_PARAMS0(ddr_interface_num), lmc_modereg_params0.u);
5037
5038 perform_ddr_init_sequence(node, rank_mask, ddr_interface_num);
5039
5040 lmc_modereg_params0.s.dllr = 0; /* Clear for normal operation */
5041 DRAM_CSR_WRITE(node, BDK_LMCX_MODEREG_PARAMS0(ddr_interface_num), lmc_modereg_params0.u);
5042 }
5043
5044 // NOTE: this must be done for pass 2.x and pass 1.x
5045 if ((spd_rdimm) && (ddr_type == DDR4_DRAM)) {
5046 VB_PRT(VBL_FAE, "Running init sequence 1\n");
5047 change_rdimm_mpr_pattern(node, rank_mask, ddr_interface_num, dimm_count);
5048 }
5049
5050 #define DEFAULT_INTERNAL_VREF_TRAINING_LIMIT 5
5051 int internal_retries = 0;
5052 int deskew_training_errors;
5053 int dac_eval_retries;
5054 int dac_settings[9];
5055 int num_samples;
5056 int sample, lane;
5057 int last_lane = ((ddr_interface_64b) ? 8 : 4) + use_ecc;
5058
5059 #define DEFAULT_DAC_SAMPLES 7 // originally was 5
5060 #define DAC_RETRIES_LIMIT 2
5061
5062 typedef struct {
5063 int16_t bytes[DEFAULT_DAC_SAMPLES];
5064 } bytelane_sample_t;
5065 bytelane_sample_t lanes[9];
5066
5067 memset(lanes, 0, sizeof(lanes));
5068
5069 if ((ddr_type == DDR4_DRAM) && !CAVIUM_IS_MODEL(CAVIUM_CN88XX_PASS1_X)) {
5070 num_samples = DEFAULT_DAC_SAMPLES;
5071 } else {
5072 num_samples = 1; // if DDR3 or no ability to write DAC values
5073 }
5074
5075 perform_internal_vref_training:
5076
5077 for (sample = 0; sample < num_samples; sample++) {
5078
5079 dac_eval_retries = 0;
5080
5081 do { // make offset and internal vref training repeatable
5082
5083 /* 6.9.8 LMC Offset Training
5084 LMC requires input-receiver offset training. */
5085 Perform_Offset_Training(node, rank_mask, ddr_interface_num);
5086
5087 /* 6.9.9 LMC Internal Vref Training
5088 LMC requires input-reference-voltage training. */
5089 Perform_Internal_VREF_Training(node, rank_mask, ddr_interface_num);
5090
5091 // read and maybe display the DAC values for a sample
5092 read_DAC_DBI_settings(node, ddr_interface_num, /*DAC*/1, dac_settings);
5093 if ((num_samples == 1) || dram_is_verbose(VBL_DEV)) {
5094 display_DAC_DBI_settings(node, ddr_interface_num, /*DAC*/1, use_ecc,
5095 dac_settings, (char *)"Internal VREF");
5096 }
5097
5098 // for DDR4, evaluate the DAC settings and retry if any issues
5099 if (ddr_type == DDR4_DRAM) {
5100 if (evaluate_DAC_settings(ddr_interface_64b, use_ecc, dac_settings)) {
5101 if (++dac_eval_retries > DAC_RETRIES_LIMIT) {
5102 ddr_print("N%d.LMC%d: DDR4 internal VREF DAC settings: retries exhausted; continuing...\n",
5103 node, ddr_interface_num);
5104 } else {
5105 ddr_print("N%d.LMC%d: DDR4 internal VREF DAC settings inconsistent; retrying....\n",
5106 node, ddr_interface_num); // FIXME? verbosity!!!
5107 continue;
5108 }
5109 }
5110 if (num_samples > 1) { // taking multiple samples, otherwise do nothing
5111 // good sample or exhausted retries, record it
5112 for (lane = 0; lane < last_lane; lane++) {
5113 lanes[lane].bytes[sample] = dac_settings[lane];
5114 }
5115 }
5116 }
5117 break; // done if DDR3, or good sample, or exhausted retries
5118
5119 } while (1);
5120
5121 } /* for (sample = 0; sample < num_samples; sample++) */
5122
5123 if (num_samples > 1) {
5124 debug_print("N%d.LMC%d: DDR4 internal VREF DAC settings: processing multiple samples...\n",
5125 node, ddr_interface_num);
5126
5127 for (lane = 0; lane < last_lane; lane++) {
5128 dac_settings[lane] = process_samples_average(&lanes[lane].bytes[0], num_samples,
5129 ddr_interface_num, lane);
5130 }
5131 display_DAC_DBI_settings(node, ddr_interface_num, /*DAC*/1, use_ecc, dac_settings, (char *)"Averaged VREF");
5132
5133 // finally, write the final DAC values
5134 for (lane = 0; lane < last_lane; lane++) {
5135 load_dac_override(node, ddr_interface_num, dac_settings[lane], lane);
5136 }
5137 }
5138
5139 #if DAC_OVERRIDE_EARLY
5140 // as a second step, after internal VREF training, before starting deskew training:
5141 // for DDR3 and THUNDER pass 2.x, override the DAC setting to 127
5142 if ((ddr_type == DDR3_DRAM) && !CAVIUM_IS_MODEL(CAVIUM_CN88XX_PASS1_X)) { // added 81xx and 83xx
5143 load_dac_override(node, ddr_interface_num, 127, /* all */0x0A);
5144 ddr_print("N%d.LMC%d: Overriding DDR3 internal VREF DAC settings to 127 (early).\n",
5145 node, ddr_interface_num);
5146 }
5147 #endif
5148
5149 /*
5150 * 6.9.10 LMC Read Deskew Training
5151 * LMC requires input-read-data deskew training.
5152 */
5153 if (! disable_deskew_training) {
5154
5155 deskew_training_errors = Perform_Read_Deskew_Training(node, rank_mask, ddr_interface_num,
5156 spd_rawcard_AorB, 0, ddr_interface_64b);
5157
5158 // All the Deskew lock and saturation retries (may) have been done,
5159 // but we ended up with nibble errors; so, as a last ditch effort,
5160 // enable retries of the Internal Vref Training...
5161 if (deskew_training_errors) {
5162 if (internal_retries < DEFAULT_INTERNAL_VREF_TRAINING_LIMIT) {
5163 internal_retries++;
5164 VB_PRT(VBL_FAE, "N%d.LMC%d: Deskew training results still unsettled - retrying internal Vref training (%d)\n",
5165 node, ddr_interface_num, internal_retries);
5166 goto perform_internal_vref_training;
5167 } else {
5168 VB_PRT(VBL_FAE, "N%d.LMC%d: Deskew training incomplete - %d retries exhausted, but continuing...\n",
5169 node, ddr_interface_num, internal_retries);
5170 }
5171 }
5172
5173 // FIXME: treat this as the final DSK print from now on, and print if VBL_NORM or above
5174 // also, save the results of the original training
5175 Validate_Read_Deskew_Training(node, rank_mask, ddr_interface_num, &deskew_training_results, VBL_NORM);
5176
5177 // setup write bit-deskew if enabled...
5178 if (enable_write_deskew) {
5179 ddr_print("N%d.LMC%d: WRITE BIT-DESKEW feature enabled- going NEUTRAL.\n",
5180 node, ddr_interface_num);
5181 Neutral_Write_Deskew_Setup(node, ddr_interface_num);
5182 } /* if (enable_write_deskew) */
5183
5184 } /* if (! disable_deskew_training) */
5185
5186 #if !DAC_OVERRIDE_EARLY
5187 // as a final step in internal VREF training, after deskew training but before HW WL:
5188 // for DDR3 and THUNDER pass 2.x, override the DAC setting to 127
5189 if ((ddr_type == DDR3_DRAM) && !CAVIUM_IS_MODEL(CAVIUM_CN88XX_PASS1_X)) { // added 81xx and 83xx
5190 load_dac_override(node, ddr_interface_num, 127, /* all */0x0A);
5191 ddr_print("N%d.LMC%d, Overriding DDR3 internal VREF DAC settings to 127 (late).\n",
5192 node, ddr_interface_num);
5193 }
5194 #endif
5195
5196
5197 /* LMC(0)_EXT_CONFIG */
5198 {
5199 bdk_lmcx_ext_config_t ext_config;
5200 ext_config.u = BDK_CSR_READ(node, BDK_LMCX_EXT_CONFIG(ddr_interface_num));
5201 ext_config.s.vrefint_seq_deskew = 0;
5202 ext_config.s.read_ena_bprch = 1;
5203 ext_config.s.read_ena_fprch = 1;
5204 ext_config.s.drive_ena_fprch = 1;
5205 ext_config.s.drive_ena_bprch = 1;
5206 ext_config.s.invert_data = 0; // make sure this is OFF for all current chips
5207
5208 if ((s = lookup_env_parameter("ddr_read_fprch")) != NULL) {
5209 ext_config.s.read_ena_fprch = strtoul(s, NULL, 0);
5210 }
5211 if ((s = lookup_env_parameter("ddr_read_bprch")) != NULL) {
5212 ext_config.s.read_ena_bprch = strtoul(s, NULL, 0);
5213 }
5214 if ((s = lookup_env_parameter("ddr_drive_fprch")) != NULL) {
5215 ext_config.s.drive_ena_fprch = strtoul(s, NULL, 0);
5216 }
5217 if ((s = lookup_env_parameter("ddr_drive_bprch")) != NULL) {
5218 ext_config.s.drive_ena_bprch = strtoul(s, NULL, 0);
5219 }
5220
5221 if (!CAVIUM_IS_MODEL(CAVIUM_CN88XX_PASS1_X) && (lranks_per_prank > 1)) {
5222 ext_config.s.dimm0_cid = ext_config.s.dimm1_cid = lranks_bits;
5223 ddr_print("N%d.LMC%d: 3DS: setting EXT_CONFIG[dimmx_cid] = %d\n",
5224 node, ddr_interface_num, ext_config.s.dimm0_cid);
5225 }
5226
5227 DRAM_CSR_WRITE(node, BDK_LMCX_EXT_CONFIG(ddr_interface_num), ext_config.u);
5228 ddr_print("%-45s : 0x%016llx\n", "EXT_CONFIG", ext_config.u);
5229 }
5230
5231
5232 {
5233 int save_ref_zqcs_int;
5234 uint64_t temp_delay_usecs;
5235
5236 lmc_config.u = BDK_CSR_READ(node, BDK_LMCX_CONFIG(ddr_interface_num));
5237
5238 /* Temporarily select the minimum ZQCS interval and wait
5239 long enough for a few ZQCS calibrations to occur. This
5240 should ensure that the calibration circuitry is
5241 stabilized before read/write leveling occurs. */
5242 save_ref_zqcs_int = lmc_config.s.ref_zqcs_int;
5243 lmc_config.s.ref_zqcs_int = 1 | (32<<7); /* set smallest interval */
5244
5245 DRAM_CSR_WRITE(node, BDK_LMCX_CONFIG(ddr_interface_num), lmc_config.u);
5246 BDK_CSR_READ(node, BDK_LMCX_CONFIG(ddr_interface_num));
5247
5248 /* Compute an appropriate delay based on the current ZQCS
5249 interval. The delay should be long enough for the
5250 current ZQCS delay counter to expire plus ten of the
5251 minimum intarvals to ensure that some calibrations
5252 occur. */
5253 temp_delay_usecs = (((uint64_t)save_ref_zqcs_int >> 7)
5254 * tclk_psecs * 100 * 512 * 128) / (10000*10000)
5255 + 10 * ((uint64_t)32 * tclk_psecs * 100 * 512 * 128) / (10000*10000);
5256
5257 VB_PRT(VBL_FAE, "N%d.LMC%d: Waiting %lld usecs for ZQCS calibrations to start\n",
5258 node, ddr_interface_num, temp_delay_usecs);
5259 bdk_wait_usec(temp_delay_usecs);
5260
5261 lmc_config.s.ref_zqcs_int = save_ref_zqcs_int; /* Restore computed interval */
5262
5263 DRAM_CSR_WRITE(node, BDK_LMCX_CONFIG(ddr_interface_num), lmc_config.u);
5264 BDK_CSR_READ(node, BDK_LMCX_CONFIG(ddr_interface_num));
5265 }
5266
5267 /*
5268 * 6.9.11 LMC Write Leveling
5269 *
5270 * LMC supports an automatic write leveling like that described in the
5271 * JEDEC DDR3 specifications separately per byte-lane.
5272 *
5273 * All of DDR PLL, LMC CK, LMC DRESET, and early LMC initializations must
5274 * be completed prior to starting this LMC write-leveling sequence.
5275 *
5276 * There are many possible procedures that will write-level all the
5277 * attached DDR3 DRAM parts. One possibility is for software to simply
5278 * write the desired values into LMC(0)_WLEVEL_RANK(0..3). This section
5279 * describes one possible sequence that uses LMC's autowrite-leveling
5280 * capabilities.
5281 *
5282 * 1. If the DQS/DQ delays on the board may be more than the ADD/CMD
5283 * delays, then ensure that LMC(0)_CONFIG[EARLY_DQX] is set at this
5284 * point.
5285 *
5286 * Do the remaining steps 2-7 separately for each rank i with attached
5287 * DRAM.
5288 *
5289 * 2. Write LMC(0)_WLEVEL_RANKi = 0.
5290 *
5291 * 3. For ×8 parts:
5292 *
5293 * Without changing any other fields in LMC(0)_WLEVEL_CTL, write
5294 * LMC(0)_WLEVEL_CTL[LANEMASK] to select all byte lanes with attached
5295 * DRAM.
5296 *
5297 * For ×16 parts:
5298 *
5299 * Without changing any other fields in LMC(0)_WLEVEL_CTL, write
5300 * LMC(0)_WLEVEL_CTL[LANEMASK] to select all even byte lanes with
5301 * attached DRAM.
5302 *
5303 * 4. Without changing any other fields in LMC(0)_CONFIG,
5304 *
5305 * o write LMC(0)_SEQ_CTL[SEQ_SEL] to select write-leveling
5306 *
5307 * o write LMC(0)_CONFIG[RANKMASK] = (1 << i)
5308 *
5309 * o write LMC(0)_SEQ_CTL[INIT_START] = 1
5310 *
5311 * LMC will initiate write-leveling at this point. Assuming
5312 * LMC(0)_WLEVEL_CTL [SSET] = 0, LMC first enables write-leveling on
5313 * the selected DRAM rank via a DDR3 MR1 write, then sequences through
5314 * and accumulates write-leveling results for eight different delay
5315 * settings twice, starting at a delay of zero in this case since
5316 * LMC(0)_WLEVEL_RANKi[BYTE*<4:3>] = 0, increasing by 1/8 CK each
5317 * setting, covering a total distance of one CK, then disables the
5318 * write-leveling via another DDR3 MR1 write.
5319 *
5320 * After the sequence through 16 delay settings is complete:
5321 *
5322 * o LMC sets LMC(0)_WLEVEL_RANKi[STATUS] = 3
5323 *
5324 * o LMC sets LMC(0)_WLEVEL_RANKi[BYTE*<2:0>] (for all ranks selected
5325 * by LMC(0)_WLEVEL_CTL[LANEMASK]) to indicate the first write
5326 * leveling result of 1 that followed result of 0 during the
5327 * sequence, except that the LMC always writes
5328 * LMC(0)_WLEVEL_RANKi[BYTE*<0>]=0.
5329 *
5330 * o Software can read the eight write-leveling results from the first
5331 * pass through the delay settings by reading
5332 * LMC(0)_WLEVEL_DBG[BITMASK] (after writing
5333 * LMC(0)_WLEVEL_DBG[BYTE]). (LMC does not retain the writeleveling
5334 * results from the second pass through the eight delay
5335 * settings. They should often be identical to the
5336 * LMC(0)_WLEVEL_DBG[BITMASK] results, though.)
5337 *
5338 * 5. Wait until LMC(0)_WLEVEL_RANKi[STATUS] != 2.
5339 *
5340 * LMC will have updated LMC(0)_WLEVEL_RANKi[BYTE*<2:0>] for all byte
5341 * lanes selected by LMC(0)_WLEVEL_CTL[LANEMASK] at this point.
5342 * LMC(0)_WLEVEL_RANKi[BYTE*<4:3>] will still be the value that
5343 * software wrote in substep 2 above, which is 0.
5344 *
5345 * 6. For ×16 parts:
5346 *
5347 * Without changing any other fields in LMC(0)_WLEVEL_CTL, write
5348 * LMC(0)_WLEVEL_CTL[LANEMASK] to select all odd byte lanes with
5349 * attached DRAM.
5350 *
5351 * Repeat substeps 4 and 5 with this new LMC(0)_WLEVEL_CTL[LANEMASK]
5352 * setting. Skip to substep 7 if this has already been done.
5353 *
5354 * For ×8 parts:
5355 *
5356 * Skip this substep. Go to substep 7.
5357 *
5358 * 7. Calculate LMC(0)_WLEVEL_RANKi[BYTE*<4:3>] settings for all byte
5359 * lanes on all ranks with attached DRAM.
5360 *
5361 * At this point, all byte lanes on rank i with attached DRAM should
5362 * have been write-leveled, and LMC(0)_WLEVEL_RANKi[BYTE*<2:0>] has
5363 * the result for each byte lane.
5364 *
5365 * But note that the DDR3 write-leveling sequence will only determine
5366 * the delay modulo the CK cycle time, and cannot determine how many
5367 * additional CK cycles of delay are present. Software must calculate
5368 * the number of CK cycles, or equivalently, the
5369 * LMC(0)_WLEVEL_RANKi[BYTE*<4:3>] settings.
5370 *
5371 * This BYTE*<4:3> calculation is system/board specific.
5372 *
5373 * Many techniques can be used to calculate write-leveling BYTE*<4:3> values,
5374 * including:
5375 *
5376 * o Known values for some byte lanes.
5377 *
5378 * o Relative values for some byte lanes relative to others.
5379 *
5380 * For example, suppose lane X is likely to require a larger
5381 * write-leveling delay than lane Y. A BYTEX<2:0> value that is much
5382 * smaller than the BYTEY<2:0> value may then indicate that the
5383 * required lane X delay wrapped into the next CK, so BYTEX<4:3>
5384 * should be set to BYTEY<4:3>+1.
5385 *
5386 * When ECC DRAM is not present (i.e. when DRAM is not attached to the
5387 * DDR_CBS_0_* and DDR_CB<7:0> chip signals, or the DDR_DQS_<4>_* and
5388 * DDR_DQ<35:32> chip signals), write LMC(0)_WLEVEL_RANK*[BYTE8] =
5389 * LMC(0)_WLEVEL_RANK*[BYTE0], using the final calculated BYTE0 value.
5390 * Write LMC(0)_WLEVEL_RANK*[BYTE4] = LMC(0)_WLEVEL_RANK*[BYTE0],
5391 * using the final calculated BYTE0 value.
5392 *
5393 * 8. Initialize LMC(0)_WLEVEL_RANK* values for all unused ranks.
5394 *
5395 * Let rank i be a rank with attached DRAM.
5396 *
5397 * For all ranks j that do not have attached DRAM, set
5398 * LMC(0)_WLEVEL_RANKj = LMC(0)_WLEVEL_RANKi.
5399 */
5400 { // Start HW write-leveling block
5401 #pragma pack(push,1)
5402 bdk_lmcx_wlevel_ctl_t wlevel_ctl;
5403 bdk_lmcx_wlevel_rankx_t lmc_wlevel_rank;
5404 int rankx = 0;
5405 int wlevel_bitmask[9];
5406 int byte_idx;
5407 int ecc_ena;
5408 int ddr_wlevel_roundup = 0;
5409 int ddr_wlevel_printall = (dram_is_verbose(VBL_FAE)); // or default to 1 to print all HW WL samples
5410 int disable_hwl_validity = 0;
5411 int default_wlevel_rtt_nom;
5412 #if WODT_MASK_2R_1S
5413 uint64_t saved_wodt_mask = 0;
5414 #endif
5415 #pragma pack(pop)
5416
5417 if (wlevel_loops)
5418 ddr_print("N%d.LMC%d: Performing Hardware Write-Leveling\n", node, ddr_interface_num);
5419 else {
5420 wlevel_bitmask_errors = 1; /* Force software write-leveling to run */
5421 ddr_print("N%d.LMC%d: Forcing software Write-Leveling\n", node, ddr_interface_num);
5422 }
5423
5424 default_wlevel_rtt_nom = (ddr_type == DDR3_DRAM) ? rttnom_20ohm : ddr4_rttnom_40ohm ; /* FIXME? */
5425
5426 #if WODT_MASK_2R_1S
5427 if ((ddr_type == DDR4_DRAM) && (num_ranks == 2) && (dimm_count == 1)) {
5428 /* LMC(0)_WODT_MASK */
5429 bdk_lmcx_wodt_mask_t lmc_wodt_mask;
5430 // always save original so we can always restore later
5431 saved_wodt_mask = BDK_CSR_READ(node, BDK_LMCX_WODT_MASK(ddr_interface_num));
5432 if ((s = lookup_env_parameter_ull("ddr_hwl_wodt_mask")) != NULL) {
5433 lmc_wodt_mask.u = strtoull(s, NULL, 0);
5434 if (lmc_wodt_mask.u != saved_wodt_mask) { // print/store only when diff
5435 ddr_print("WODT_MASK : 0x%016llx\n", lmc_wodt_mask.u);
5436 DRAM_CSR_WRITE(node, BDK_LMCX_WODT_MASK(ddr_interface_num), lmc_wodt_mask.u);
5437 }
5438 }
5439 }
5440 #endif /* WODT_MASK_2R_1S */
5441
5442 lmc_config.u = BDK_CSR_READ(node, BDK_LMCX_CONFIG(ddr_interface_num));
5443 ecc_ena = lmc_config.s.ecc_ena;
5444
5445 if ((s = lookup_env_parameter("ddr_wlevel_roundup")) != NULL) {
5446 ddr_wlevel_roundup = strtoul(s, NULL, 0);
5447 }
5448 if ((s = lookup_env_parameter("ddr_wlevel_printall")) != NULL) {
5449 ddr_wlevel_printall = strtoul(s, NULL, 0);
5450 }
5451
5452 if ((s = lookup_env_parameter("ddr_disable_hwl_validity")) != NULL) {
5453 disable_hwl_validity = !!strtoul(s, NULL, 0);
5454 }
5455
5456 if ((s = lookup_env_parameter("ddr_wlevel_rtt_nom")) != NULL) {
5457 default_wlevel_rtt_nom = strtoul(s, NULL, 0);
5458 }
5459
5460 // For DDR3, we leave the WLEVEL_CTL fields at default settings
5461 // For DDR4, we touch WLEVEL_CTL fields OR_DIS or BITMASK here
5462 if (ddr_type == DDR4_DRAM) {
5463 int default_or_dis = 1;
5464 int default_bitmask = 0xFF;
5465
5466 // when x4, use only the lower nibble bits
5467 if (dram_width == 4) {
5468 default_bitmask = 0x0F;
5469 VB_PRT(VBL_DEV, "N%d.LMC%d: WLEVEL_CTL: default bitmask is 0x%2x for DDR4 x4\n",
5470 node, ddr_interface_num, default_bitmask);
5471 }
5472
5473 wlevel_ctl.u = BDK_CSR_READ(node, BDK_LMCX_WLEVEL_CTL(ddr_interface_num));
5474 wlevel_ctl.s.or_dis = default_or_dis;
5475 wlevel_ctl.s.bitmask = default_bitmask;
5476
5477 // allow overrides
5478 if ((s = lookup_env_parameter("ddr_wlevel_ctl_or_dis")) != NULL) {
5479 wlevel_ctl.s.or_dis = !!strtoul(s, NULL, 0);
5480 }
5481 if ((s = lookup_env_parameter("ddr_wlevel_ctl_bitmask")) != NULL) {
5482 wlevel_ctl.s.bitmask = strtoul(s, NULL, 0);
5483 }
5484
5485 // print only if not defaults
5486 if ((wlevel_ctl.s.or_dis != default_or_dis) || (wlevel_ctl.s.bitmask != default_bitmask)) {
5487 ddr_print("N%d.LMC%d: WLEVEL_CTL: or_dis=%d, bitmask=0x%02x\n",
5488 node, ddr_interface_num, wlevel_ctl.s.or_dis, wlevel_ctl.s.bitmask);
5489 }
5490 // always write
5491 DRAM_CSR_WRITE(node, BDK_LMCX_WLEVEL_CTL(ddr_interface_num), wlevel_ctl.u);
5492 }
5493
5494 // Start the hardware write-leveling loop per rank
5495 for (rankx = 0; rankx < dimm_count * 4; rankx++) {
5496
5497 if (!(rank_mask & (1 << rankx)))
5498 continue;
5499
5500 #if HW_WL_MAJORITY
5501 // array to collect counts of byte-lane values
5502 // assume low-order 3 bits and even, so really only 2 bit values
5503 int wlevel_bytes[9][4];
5504 memset(wlevel_bytes, 0, sizeof(wlevel_bytes));
5505 #endif
5506
5507 // restructure the looping so we can keep trying until we get the samples we want
5508 //for (int wloop = 0; wloop < wlevel_loops; wloop++) {
5509 int wloop = 0;
5510 int wloop_retries = 0; // retries per sample for HW-related issues with bitmasks or values
5511 int wloop_retries_total = 0;
5512 int wloop_retries_exhausted = 0;
5513 #define WLOOP_RETRIES_DEFAULT 5
5514 int wlevel_validity_errors;
5515 int wlevel_bitmask_errors_rank = 0;
5516 int wlevel_validity_errors_rank = 0;
5517
5518 while (wloop < wlevel_loops) {
5519
5520 wlevel_ctl.u = BDK_CSR_READ(node, BDK_LMCX_WLEVEL_CTL(ddr_interface_num));
5521
5522 wlevel_ctl.s.rtt_nom = (default_wlevel_rtt_nom > 0) ? (default_wlevel_rtt_nom - 1) : 7;
5523
5524
5525 DRAM_CSR_WRITE(node, BDK_LMCX_WLEVEL_RANKX(ddr_interface_num, rankx), 0); /* Clear write-level delays */
5526
5527 wlevel_bitmask_errors = 0; /* Reset error counters */
5528 wlevel_validity_errors = 0;
5529
5530 for (byte_idx=0; byte_idx<9; ++byte_idx) {
5531 wlevel_bitmask[byte_idx] = 0; /* Reset bitmasks */
5532 }
5533
5534 #if HWL_BY_BYTE // FIXME???
5535 /* Make a separate pass for each byte to reduce power. */
5536 for (byte_idx=0; byte_idx<(8+ecc_ena); ++byte_idx) {
5537
5538 if (!(ddr_interface_bytemask&(1<<byte_idx)))
5539 continue;
5540
5541 wlevel_ctl.s.lanemask = (1<<byte_idx);
5542
5543 DRAM_CSR_WRITE(node, BDK_LMCX_WLEVEL_CTL(ddr_interface_num), wlevel_ctl.u);
5544
5545 /* Read and write values back in order to update the
5546 status field. This insures that we read the updated
5547 values after write-leveling has completed. */
5548 DRAM_CSR_WRITE(node, BDK_LMCX_WLEVEL_RANKX(ddr_interface_num, rankx),
5549 BDK_CSR_READ(node, BDK_LMCX_WLEVEL_RANKX(ddr_interface_num, rankx)));
5550
5551 perform_octeon3_ddr3_sequence(node, 1 << rankx, ddr_interface_num, 6); /* write-leveling */
5552
5553 if (!bdk_is_platform(BDK_PLATFORM_ASIM) &&
5554 BDK_CSR_WAIT_FOR_FIELD(node, BDK_LMCX_WLEVEL_RANKX(ddr_interface_num, rankx),
5555 status, ==, 3, 1000000))
5556 {
5557 error_print("ERROR: Timeout waiting for WLEVEL\n");
5558 }
5559 lmc_wlevel_rank.u = BDK_CSR_READ(node, BDK_LMCX_WLEVEL_RANKX(ddr_interface_num, rankx));
5560
5561 wlevel_bitmask[byte_idx] = octeon_read_lmcx_ddr3_wlevel_dbg(node, ddr_interface_num, byte_idx);
5562 if (wlevel_bitmask[byte_idx] == 0)
5563 ++wlevel_bitmask_errors;
5564 } /* for (byte_idx=0; byte_idx<(8+ecc_ena); ++byte_idx) */
5565
5566 wlevel_ctl.s.lanemask = /*0x1ff*/ddr_interface_bytemask; // restore for RL
5567 DRAM_CSR_WRITE(node, BDK_LMCX_WLEVEL_CTL(ddr_interface_num), wlevel_ctl.u);
5568 #else
5569 // do all the byte-lanes at the same time
5570 wlevel_ctl.s.lanemask = /*0x1ff*/ddr_interface_bytemask; // FIXME?
5571
5572 DRAM_CSR_WRITE(node, BDK_LMCX_WLEVEL_CTL(ddr_interface_num), wlevel_ctl.u);
5573
5574 /* Read and write values back in order to update the
5575 status field. This insures that we read the updated
5576 values after write-leveling has completed. */
5577 DRAM_CSR_WRITE(node, BDK_LMCX_WLEVEL_RANKX(ddr_interface_num, rankx),
5578 BDK_CSR_READ(node, BDK_LMCX_WLEVEL_RANKX(ddr_interface_num, rankx)));
5579
5580 perform_octeon3_ddr3_sequence(node, 1 << rankx, ddr_interface_num, 6); /* write-leveling */
5581
5582 if (BDK_CSR_WAIT_FOR_FIELD(node, BDK_LMCX_WLEVEL_RANKX(ddr_interface_num, rankx),
5583 status, ==, 3, 1000000))
5584 {
5585 error_print("ERROR: Timeout waiting for WLEVEL\n");
5586 }
5587
5588 lmc_wlevel_rank.u = BDK_CSR_READ(node, BDK_LMCX_WLEVEL_RANKX(ddr_interface_num, rankx));
5589
5590 for (byte_idx=0; byte_idx<(8+ecc_ena); ++byte_idx) {
5591 if (!(ddr_interface_bytemask&(1<<byte_idx)))
5592 continue;
5593 wlevel_bitmask[byte_idx] = octeon_read_lmcx_ddr3_wlevel_dbg(node, ddr_interface_num, byte_idx);
5594 if (wlevel_bitmask[byte_idx] == 0)
5595 ++wlevel_bitmask_errors;
5596 } /* for (byte_idx=0; byte_idx<(8+ecc_ena); ++byte_idx) */
5597 #endif
5598
5599 // check validity only if no bitmask errors
5600 if (wlevel_bitmask_errors == 0) {
5601 if ((spd_dimm_type != 5) &&
5602 (spd_dimm_type != 6) &&
5603 (spd_dimm_type != 8) &&
5604 (spd_dimm_type != 9) &&
5605 (dram_width != 16) &&
5606 (ddr_interface_64b) &&
5607 !(disable_hwl_validity))
5608 { // bypass if mini-[RU]DIMM or x16 or 32-bit or SO-[RU]DIMM
5609 wlevel_validity_errors =
5610 Validate_HW_WL_Settings(node, ddr_interface_num,
5611 &lmc_wlevel_rank, ecc_ena);
5612 wlevel_validity_errors_rank += (wlevel_validity_errors != 0);
5613 }
5614 } else
5615 wlevel_bitmask_errors_rank++;
5616
5617 // before we print, if we had bitmask or validity errors, do a retry...
5618 if ((wlevel_bitmask_errors != 0) || (wlevel_validity_errors != 0)) {
5619 // VBL must be high to show the bad bitmaps or delays here also
5620 if (dram_is_verbose(VBL_DEV2)) {
5621 display_WL_BM(node, ddr_interface_num, rankx, wlevel_bitmask);
5622 display_WL(node, ddr_interface_num, lmc_wlevel_rank, rankx);
5623 }
5624 if (wloop_retries < WLOOP_RETRIES_DEFAULT) {
5625 wloop_retries++;
5626 wloop_retries_total++;
5627 // this printout is per-retry: only when VBL is high enough (DEV2?)
5628 VB_PRT(VBL_DEV2, "N%d.LMC%d.R%d: H/W Write-Leveling had %s errors - retrying...\n",
5629 node, ddr_interface_num, rankx,
5630 (wlevel_bitmask_errors) ? "Bitmask" : "Validity");
5631 continue; // this takes us back to the top without counting a sample
5632 } else { // ran out of retries for this sample
5633 // retries exhausted, do not print at normal VBL
5634 VB_PRT(VBL_DEV2, "N%d.LMC%d.R%d: H/W Write-Leveling issues: %s errors\n",
5635 node, ddr_interface_num, rankx,
5636 (wlevel_bitmask_errors) ? "Bitmask" : "Validity");
5637 wloop_retries_exhausted++;
5638 }
5639 }
5640 // no errors or exhausted retries, use this sample
5641 wloop_retries = 0; //reset for next sample
5642
5643 // when only 1 sample or forced, print the bitmasks first and current HW WL
5644 if ((wlevel_loops == 1) || ddr_wlevel_printall) {
5645 display_WL_BM(node, ddr_interface_num, rankx, wlevel_bitmask);
5646 display_WL(node, ddr_interface_num, lmc_wlevel_rank, rankx);
5647 }
5648
5649 if (ddr_wlevel_roundup) { /* Round up odd bitmask delays */
5650 for (byte_idx=0; byte_idx<(8+ecc_ena); ++byte_idx) {
5651 if (!(ddr_interface_bytemask&(1<<byte_idx)))
5652 continue;
5653 update_wlevel_rank_struct(&lmc_wlevel_rank,
5654 byte_idx,
5655 roundup_ddr3_wlevel_bitmask(wlevel_bitmask[byte_idx]));
5656 } /* for (byte_idx=0; byte_idx<(8+ecc_ena); ++byte_idx) */
5657 DRAM_CSR_WRITE(node, BDK_LMCX_WLEVEL_RANKX(ddr_interface_num, rankx), lmc_wlevel_rank.u);
5658 display_WL(node, ddr_interface_num, lmc_wlevel_rank, rankx);
5659 }
5660
5661 #if HW_WL_MAJORITY
5662 // OK, we have a decent sample, no bitmask or validity errors
5663 for (byte_idx=0; byte_idx<(8+ecc_ena); ++byte_idx) {
5664 if (!(ddr_interface_bytemask&(1<<byte_idx)))
5665 continue;
5666 // increment count of byte-lane value
5667 int ix = (get_wlevel_rank_struct(&lmc_wlevel_rank, byte_idx) >> 1) & 3; // only 4 values
5668 wlevel_bytes[byte_idx][ix]++;
5669 } /* for (byte_idx=0; byte_idx<(8+ecc_ena); ++byte_idx) */
5670 #endif
5671
5672 wloop++; // if we get here, we have taken a decent sample
5673
5674 } /* while (wloop < wlevel_loops) */
5675
5676 #if HW_WL_MAJORITY
5677 // if we did sample more than once, try to pick a majority vote
5678 if (wlevel_loops > 1) {
5679 // look for the majority in each byte-lane
5680 for (byte_idx = 0; byte_idx < (8+ecc_ena); ++byte_idx) {
5681 int mx = -1, mc = 0, xc = 0, cc = 0;
5682 int ix, ic;
5683 if (!(ddr_interface_bytemask&(1<<byte_idx)))
5684 continue;
5685 for (ix = 0; ix < 4; ix++) {
5686 ic = wlevel_bytes[byte_idx][ix];
5687 // make a bitmask of the ones with a count
5688 if (ic > 0) {
5689 mc |= (1 << ix);
5690 cc++; // count how many had non-zero counts
5691 }
5692 // find the majority
5693 if (ic > xc) { // new max?
5694 xc = ic; // yes
5695 mx = ix; // set its index
5696 }
5697 }
5698 #if SWL_TRY_HWL_ALT
5699 // see if there was an alternate
5700 int alts = (mc & ~(1 << mx)); // take out the majority choice
5701 if (alts != 0) {
5702 for (ix = 0; ix < 4; ix++) {
5703 if (alts & (1 << ix)) { // FIXME: could be done multiple times? bad if so
5704 hwl_alts[rankx].hwl_alt_mask |= (1 << byte_idx); // set the mask
5705 hwl_alts[rankx].hwl_alt_delay[byte_idx] = ix << 1; // record the value
5706 VB_PRT(VBL_DEV, "N%d.LMC%d.R%d: SWL_TRY_HWL_ALT: Byte %d maj %d (%d) alt %d (%d).\n",
5707 node, ddr_interface_num, rankx, byte_idx, mx << 1, xc,
5708 ix << 1, wlevel_bytes[byte_idx][ix]);
5709 }
5710 }
5711 } else {
5712 debug_print("N%d.LMC%d.R%d: SWL_TRY_HWL_ALT: Byte %d maj %d alt NONE.\n",
5713 node, ddr_interface_num, rankx, byte_idx, mx << 1);
5714 }
5715 #endif /* SWL_TRY_HWL_ALT */
5716 if (cc > 2) { // unlikely, but...
5717 // assume: counts for 3 indices are all 1
5718 // possiblities are: 0/2/4, 2/4/6, 0/4/6, 0/2/6
5719 // and the desired?: 2 , 4 , 6, 0
5720 // we choose the middle, assuming one of the outliers is bad
5721 // NOTE: this is an ugly hack at the moment; there must be a better way
5722 switch (mc) {
5723 case 0x7: mx = 1; break; // was 0/2/4, choose 2
5724 case 0xb: mx = 0; break; // was 0/2/6, choose 0
5725 case 0xd: mx = 3; break; // was 0/4/6, choose 6
5726 case 0xe: mx = 2; break; // was 2/4/6, choose 4
5727 default:
5728 case 0xf: mx = 1; break; // was 0/2/4/6, choose 2?
5729 }
5730 error_print("N%d.LMC%d.R%d: HW WL MAJORITY: bad byte-lane %d (0x%x), using %d.\n",
5731 node, ddr_interface_num, rankx, byte_idx, mc, mx << 1);
5732 }
5733 update_wlevel_rank_struct(&lmc_wlevel_rank, byte_idx, mx << 1);
5734 } /* for (byte_idx=0; byte_idx<(8+ecc_ena); ++byte_idx) */
5735
5736 DRAM_CSR_WRITE(node, BDK_LMCX_WLEVEL_RANKX(ddr_interface_num, rankx), lmc_wlevel_rank.u);
5737 display_WL_with_final(node, ddr_interface_num, lmc_wlevel_rank, rankx);
5738 } /* if (wlevel_loops > 1) */
5739 #endif /* HW_WL_MAJORITY */
5740 // maybe print an error summary for the rank
5741 if ((wlevel_bitmask_errors_rank != 0) || (wlevel_validity_errors_rank != 0)) {
5742 VB_PRT(VBL_FAE, "N%d.LMC%d.R%d: H/W Write-Leveling errors - %d bitmask, %d validity, %d retries, %d exhausted\n",
5743 node, ddr_interface_num, rankx,
5744 wlevel_bitmask_errors_rank, wlevel_validity_errors_rank,
5745 wloop_retries_total, wloop_retries_exhausted);
5746 }
5747
5748 } /* for (rankx = 0; rankx < dimm_count * 4;rankx++) */
5749
5750 #if WODT_MASK_2R_1S
5751 if ((ddr_type == DDR4_DRAM) && (num_ranks == 2) && (dimm_count == 1)) {
5752 /* LMC(0)_WODT_MASK */
5753 bdk_lmcx_wodt_mask_t lmc_wodt_mask;
5754 // always read current so we can see if its different from saved
5755 lmc_wodt_mask.u = BDK_CSR_READ(node, BDK_LMCX_WODT_MASK(ddr_interface_num));
5756 if (lmc_wodt_mask.u != saved_wodt_mask) { // always restore what was saved if diff
5757 lmc_wodt_mask.u = saved_wodt_mask;
5758 ddr_print("WODT_MASK : 0x%016llx\n", lmc_wodt_mask.u);
5759 DRAM_CSR_WRITE(node, BDK_LMCX_WODT_MASK(ddr_interface_num), lmc_wodt_mask.u);
5760 }
5761 }
5762 #endif /* WODT_MASK_2R_1S */
5763
5764 } // End HW write-leveling block
5765
5766 // At the end of HW Write Leveling, check on some things...
5767 if (! disable_deskew_training) {
5768
5769 deskew_counts_t dsk_counts;
5770 int retry_count = 0;
5771
5772 VB_PRT(VBL_FAE, "N%d.LMC%d: Check Deskew Settings before Read-Leveling.\n", node, ddr_interface_num);
5773
5774 do {
5775 Validate_Read_Deskew_Training(node, rank_mask, ddr_interface_num, &dsk_counts, VBL_FAE);
5776
5777 // only RAWCARD A or B will not benefit from retraining if there's only saturation
5778 // or any rawcard if there is a nibble error
5779 if ((!spd_rawcard_AorB && dsk_counts.saturated > 0) ||
5780 ((dsk_counts.nibrng_errs != 0) || (dsk_counts.nibunl_errs != 0)))
5781 {
5782 retry_count++;
5783 VB_PRT(VBL_FAE, "N%d.LMC%d: Deskew Status indicates saturation or nibble errors - retry %d Training.\n",
5784 node, ddr_interface_num, retry_count);
5785 Perform_Read_Deskew_Training(node, rank_mask, ddr_interface_num,
5786 spd_rawcard_AorB, 0, ddr_interface_64b);
5787 } else
5788 break;
5789 } while (retry_count < 5);
5790
5791 // print the last setting only if we had to do retries here
5792 if (retry_count > 0)
5793 Validate_Read_Deskew_Training(node, rank_mask, ddr_interface_num, &dsk_counts, VBL_NORM);
5794 }
5795
5796 /*
5797 * 6.9.12 LMC Read Leveling
5798 *
5799 * LMC supports an automatic read-leveling separately per byte-lane using
5800 * the DDR3 multipurpose register predefined pattern for system
5801 * calibration defined in the JEDEC DDR3 specifications.
5802 *
5803 * All of DDR PLL, LMC CK, and LMC DRESET, and early LMC initializations
5804 * must be completed prior to starting this LMC read-leveling sequence.
5805 *
5806 * Software could simply write the desired read-leveling values into
5807 * LMC(0)_RLEVEL_RANK(0..3). This section describes a sequence that uses
5808 * LMC's autoread-leveling capabilities.
5809 *
5810 * When LMC does the read-leveling sequence for a rank, it first enables
5811 * the DDR3 multipurpose register predefined pattern for system
5812 * calibration on the selected DRAM rank via a DDR3 MR3 write, then
5813 * executes 64 RD operations at different internal delay settings, then
5814 * disables the predefined pattern via another DDR3 MR3 write
5815 * operation. LMC determines the pass or fail of each of the 64 settings
5816 * independently for each byte lane, then writes appropriate
5817 * LMC(0)_RLEVEL_RANK(0..3)[BYTE*] values for the rank.
5818 *
5819 * After read-leveling for a rank, software can read the 64 pass/fail
5820 * indications for one byte lane via LMC(0)_RLEVEL_DBG[BITMASK]. Software
5821 * can observe all pass/fail results for all byte lanes in a rank via
5822 * separate read-leveling sequences on the rank with different
5823 * LMC(0)_RLEVEL_CTL[BYTE] values.
5824 *
5825 * The 64 pass/fail results will typically have failures for the low
5826 * delays, followed by a run of some passing settings, followed by more
5827 * failures in the remaining high delays. LMC sets
5828 * LMC(0)_RLEVEL_RANK(0..3)[BYTE*] to one of the passing settings.
5829 * First, LMC selects the longest run of successes in the 64 results. (In
5830 * the unlikely event that there is more than one longest run, LMC
5831 * selects the first one.) Then if LMC(0)_RLEVEL_CTL[OFFSET_EN] = 1 and
5832 * the selected run has more than LMC(0)_RLEVEL_CTL[OFFSET] successes,
5833 * LMC selects the last passing setting in the run minus
5834 * LMC(0)_RLEVEL_CTL[OFFSET]. Otherwise LMC selects the middle setting in
5835 * the run (rounding earlier when necessary). We expect the read-leveling
5836 * sequence to produce good results with the reset values
5837 * LMC(0)_RLEVEL_CTL [OFFSET_EN]=1, LMC(0)_RLEVEL_CTL[OFFSET] = 2.
5838 *
5839 * The read-leveling sequence has the following steps:
5840 *
5841 * 1. Select desired LMC(0)_RLEVEL_CTL[OFFSET_EN,OFFSET,BYTE] settings.
5842 * Do the remaining substeps 2-4 separately for each rank i with
5843 * attached DRAM.
5844 *
5845 * 2. Without changing any other fields in LMC(0)_CONFIG,
5846 *
5847 * o write LMC(0)_SEQ_CTL[SEQ_SEL] to select read-leveling
5848 *
5849 * o write LMC(0)_CONFIG[RANKMASK] = (1 << i)
5850 *
5851 * o write LMC(0)_SEQ_CTL[INIT_START] = 1
5852 *
5853 * This initiates the previously-described read-leveling.
5854 *
5855 * 3. Wait until LMC(0)_RLEVEL_RANKi[STATUS] != 2
5856 *
5857 * LMC will have updated LMC(0)_RLEVEL_RANKi[BYTE*] for all byte lanes
5858 * at this point.
5859 *
5860 * If ECC DRAM is not present (i.e. when DRAM is not attached to the
5861 * DDR_CBS_0_* and DDR_CB<7:0> chip signals, or the DDR_DQS_<4>_* and
5862 * DDR_DQ<35:32> chip signals), write LMC(0)_RLEVEL_RANK*[BYTE8] =
5863 * LMC(0)_RLEVEL_RANK*[BYTE0]. Write LMC(0)_RLEVEL_RANK*[BYTE4] =
5864 * LMC(0)_RLEVEL_RANK*[BYTE0].
5865 *
5866 * 4. If desired, consult LMC(0)_RLEVEL_DBG[BITMASK] and compare to
5867 * LMC(0)_RLEVEL_RANKi[BYTE*] for the lane selected by
5868 * LMC(0)_RLEVEL_CTL[BYTE]. If desired, modify LMC(0)_RLEVEL_CTL[BYTE]
5869 * to a new value and repeat so that all BITMASKs can be observed.
5870 *
5871 * 5. Initialize LMC(0)_RLEVEL_RANK* values for all unused ranks.
5872 *
5873 * Let rank i be a rank with attached DRAM.
5874 *
5875 * For all ranks j that do not have attached DRAM, set
5876 * LMC(0)_RLEVEL_RANKj = LMC(0)_RLEVEL_RANKi.
5877 *
5878 * This read-leveling sequence can help select the proper CN70XX ODT
5879 * resistance value (LMC(0)_COMP_CTL2[RODT_CTL]). A hardware-generated
5880 * LMC(0)_RLEVEL_RANKi[BYTEj] value (for a used byte lane j) that is
5881 * drastically different from a neighboring LMC(0)_RLEVEL_RANKi[BYTEk]
5882 * (for a used byte lane k) can indicate that the CN70XX ODT value is
5883 * bad. It is possible to simultaneously optimize both
5884 * LMC(0)_COMP_CTL2[RODT_CTL] and LMC(0)_RLEVEL_RANKn[BYTE*] values by
5885 * performing this read-leveling sequence for several
5886 * LMC(0)_COMP_CTL2[RODT_CTL] values and selecting the one with the best
5887 * LMC(0)_RLEVEL_RANKn[BYTE*] profile for the ranks.
5888 */
5889
5890 {
5891 #pragma pack(push,4)
5892 bdk_lmcx_rlevel_rankx_t lmc_rlevel_rank;
5893 bdk_lmcx_comp_ctl2_t lmc_comp_ctl2;
5894 bdk_lmcx_rlevel_ctl_t rlevel_ctl;
5895 bdk_lmcx_control_t lmc_control;
5896 bdk_lmcx_modereg_params1_t lmc_modereg_params1;
5897 unsigned char rodt_ctl;
5898 unsigned char rankx = 0;
5899 unsigned char ecc_ena;
5900 unsigned char rtt_nom;
5901 unsigned char rtt_idx;
5902 int min_rtt_nom_idx;
5903 int max_rtt_nom_idx;
5904 int min_rodt_ctl;
5905 int max_rodt_ctl;
5906 int rlevel_debug_loops = 1;
5907 unsigned char save_ddr2t;
5908 int rlevel_avg_loops;
5909 int ddr_rlevel_compute;
5910 int saved_ddr__ptune, saved_ddr__ntune, rlevel_comp_offset;
5911 int saved_int_zqcs_dis = 0;
5912 int disable_sequential_delay_check = 0;
5913 int maximum_adjacent_rlevel_delay_increment = 0;
5914 struct {
5915 uint64_t setting;
5916 int score;
5917 } rlevel_scoreboard[RTT_NOM_OHMS_COUNT][RODT_OHMS_COUNT][4];
5918 int print_nom_ohms;
5919 #if PERFECT_BITMASK_COUNTING
5920 typedef struct {
5921 uint8_t count[9][32]; // 8+ECC by 32 values
5922 uint8_t total[9]; // 8+ECC
5923 } rank_perfect_t;
5924 rank_perfect_t rank_perfect_counts[4];
5925 #endif
5926
5927 #pragma pack(pop)
5928
5929 #if PERFECT_BITMASK_COUNTING
5930 memset(rank_perfect_counts, 0, sizeof(rank_perfect_counts));
5931 #endif /* PERFECT_BITMASK_COUNTING */
5932
5933 lmc_control.u = BDK_CSR_READ(node, BDK_LMCX_CONTROL(ddr_interface_num));
5934 save_ddr2t = lmc_control.s.ddr2t;
5935
5936 lmc_config.u = BDK_CSR_READ(node, BDK_LMCX_CONFIG(ddr_interface_num));
5937 ecc_ena = lmc_config.s.ecc_ena;
5938
5939 #if 0
5940 {
5941 int save_ref_zqcs_int;
5942 uint64_t temp_delay_usecs;
5943
5944 /* Temporarily select the minimum ZQCS interval and wait
5945 long enough for a few ZQCS calibrations to occur. This
5946 should ensure that the calibration circuitry is
5947 stabilized before read-leveling occurs. */
5948 save_ref_zqcs_int = lmc_config.s.ref_zqcs_int;
5949 lmc_config.s.ref_zqcs_int = 1 | (32<<7); /* set smallest interval */
5950 DRAM_CSR_WRITE(node, BDK_LMCX_CONFIG(ddr_interface_num), lmc_config.u);
5951 BDK_CSR_READ(node, BDK_LMCX_CONFIG(ddr_interface_num));
5952
5953 /* Compute an appropriate delay based on the current ZQCS
5954 interval. The delay should be long enough for the
5955 current ZQCS delay counter to expire plus ten of the
5956 minimum intarvals to ensure that some calibrations
5957 occur. */
5958 temp_delay_usecs = (((uint64_t)save_ref_zqcs_int >> 7)
5959 * tclk_psecs * 100 * 512 * 128) / (10000*10000)
5960 + 10 * ((uint64_t)32 * tclk_psecs * 100 * 512 * 128) / (10000*10000);
5961
5962 ddr_print ("Waiting %lu usecs for ZQCS calibrations to start\n",
5963 temp_delay_usecs);
5964 bdk_wait_usec(temp_delay_usecs);
5965
5966 lmc_config.s.ref_zqcs_int = save_ref_zqcs_int; /* Restore computed interval */
5967 DRAM_CSR_WRITE(node, BDK_LMCX_CONFIG(ddr_interface_num), lmc_config.u);
5968 BDK_CSR_READ(node, BDK_LMCX_CONFIG(ddr_interface_num));
5969 }
5970 #endif
5971
5972 if ((s = lookup_env_parameter("ddr_rlevel_2t")) != NULL) {
5973 lmc_control.s.ddr2t = strtoul(s, NULL, 0);
5974 }
5975
5976 DRAM_CSR_WRITE(node, BDK_LMCX_CONTROL(ddr_interface_num), lmc_control.u);
5977
5978 ddr_print("N%d.LMC%d: Performing Read-Leveling\n", node, ddr_interface_num);
5979
5980 rlevel_ctl.u = BDK_CSR_READ(node, BDK_LMCX_RLEVEL_CTL(ddr_interface_num));
5981
5982 rlevel_avg_loops = custom_lmc_config->rlevel_average_loops;
5983 if (rlevel_avg_loops == 0) {
5984 rlevel_avg_loops = RLEVEL_AVG_LOOPS_DEFAULT;
5985 if ((dimm_count == 1) || (num_ranks == 1)) // up the samples for these cases
5986 rlevel_avg_loops = rlevel_avg_loops * 2 + 1;
5987 }
5988
5989 ddr_rlevel_compute = custom_lmc_config->rlevel_compute;
5990 rlevel_ctl.s.offset_en = custom_lmc_config->offset_en;
5991 rlevel_ctl.s.offset = spd_rdimm
5992 ? custom_lmc_config->offset_rdimm
5993 : custom_lmc_config->offset_udimm;
5994
5995 rlevel_ctl.s.delay_unload_0 = 1; /* should normally be set */
5996 rlevel_ctl.s.delay_unload_1 = 1; /* should normally be set */
5997 rlevel_ctl.s.delay_unload_2 = 1; /* should normally be set */
5998 rlevel_ctl.s.delay_unload_3 = 1; /* should normally be set */
5999
6000 rlevel_ctl.s.or_dis = 1; // default to get best bitmasks
6001 if ((s = lookup_env_parameter("ddr_rlevel_or_dis")) != NULL) {
6002 rlevel_ctl.s.or_dis = !!strtoul(s, NULL, 0);
6003 }
6004 rlevel_ctl.s.bitmask = 0xff; // should work in 32b mode also
6005 if ((s = lookup_env_parameter("ddr_rlevel_ctl_bitmask")) != NULL) {
6006 rlevel_ctl.s.bitmask = strtoul(s, NULL, 0);
6007 }
6008 debug_print("N%d.LMC%d: RLEVEL_CTL: or_dis=%d, bitmask=0x%02x\n",
6009 node, ddr_interface_num,
6010 rlevel_ctl.s.or_dis, rlevel_ctl.s.bitmask);
6011
6012 rlevel_comp_offset = spd_rdimm
6013 ? custom_lmc_config->rlevel_comp_offset_rdimm
6014 : custom_lmc_config->rlevel_comp_offset_udimm;
6015
6016 if ((s = lookup_env_parameter("ddr_rlevel_offset")) != NULL) {
6017 rlevel_ctl.s.offset = strtoul(s, NULL, 0);
6018 }
6019
6020 if ((s = lookup_env_parameter("ddr_rlevel_offset_en")) != NULL) {
6021 rlevel_ctl.s.offset_en = strtoul(s, NULL, 0);
6022 }
6023 if ((s = lookup_env_parameter("ddr_rlevel_ctl")) != NULL) {
6024 rlevel_ctl.u = strtoul(s, NULL, 0);
6025 }
6026
6027 DRAM_CSR_WRITE(node, BDK_LMCX_RLEVEL_CTL(ddr_interface_num), rlevel_ctl.u);
6028
6029 if ((s = lookup_env_parameter("ddr%d_rlevel_debug_loops", ddr_interface_num)) != NULL) {
6030 rlevel_debug_loops = strtoul(s, NULL, 0);
6031 }
6032
6033 if ((s = lookup_env_parameter("ddr_rtt_nom_auto")) != NULL) {
6034 ddr_rtt_nom_auto = !!strtoul(s, NULL, 0);
6035 }
6036
6037 if ((s = lookup_env_parameter("ddr_rlevel_average")) != NULL) {
6038 rlevel_avg_loops = strtoul(s, NULL, 0);
6039 }
6040
6041 if ((s = lookup_env_parameter("ddr_rlevel_compute")) != NULL) {
6042 ddr_rlevel_compute = strtoul(s, NULL, 0);
6043 }
6044
6045 ddr_print("RLEVEL_CTL : 0x%016llx\n", rlevel_ctl.u);
6046 ddr_print("RLEVEL_OFFSET : %6d\n", rlevel_ctl.s.offset);
6047 ddr_print("RLEVEL_OFFSET_EN : %6d\n", rlevel_ctl.s.offset_en);
6048
6049 /* The purpose for the indexed table is to sort the settings
6050 ** by the ohm value to simplify the testing when incrementing
6051 ** through the settings. (index => ohms) 1=120, 2=60, 3=40,
6052 ** 4=30, 5=20 */
6053 min_rtt_nom_idx = (custom_lmc_config->min_rtt_nom_idx == 0) ? 1 : custom_lmc_config->min_rtt_nom_idx;
6054 max_rtt_nom_idx = (custom_lmc_config->max_rtt_nom_idx == 0) ? 5 : custom_lmc_config->max_rtt_nom_idx;
6055
6056 min_rodt_ctl = (custom_lmc_config->min_rodt_ctl == 0) ? 1 : custom_lmc_config->min_rodt_ctl;
6057 max_rodt_ctl = (custom_lmc_config->max_rodt_ctl == 0) ? 5 : custom_lmc_config->max_rodt_ctl;
6058
6059 if ((s = lookup_env_parameter("ddr_min_rodt_ctl")) != NULL) {
6060 min_rodt_ctl = strtoul(s, NULL, 0);
6061 }
6062 if ((s = lookup_env_parameter("ddr_max_rodt_ctl")) != NULL) {
6063 max_rodt_ctl = strtoul(s, NULL, 0);
6064 }
6065 if ((s = lookup_env_parameter("ddr_min_rtt_nom_idx")) != NULL) {
6066 min_rtt_nom_idx = strtoul(s, NULL, 0);
6067 }
6068 if ((s = lookup_env_parameter("ddr_max_rtt_nom_idx")) != NULL) {
6069 max_rtt_nom_idx = strtoul(s, NULL, 0);
6070 }
6071
6072 #ifdef ENABLE_CUSTOM_RLEVEL_TABLE
6073 if (custom_lmc_config->rlevel_table != NULL) {
6074 char part_number[21];
6075 /* Check for hard-coded read-leveling settings */
6076 get_dimm_part_number(part_number, node, &dimm_config_table[0], 0, ddr_type);
6077 for (rankx = 0; rankx < dimm_count * 4;rankx++) {
6078 if (!(rank_mask & (1 << rankx)))
6079 continue;
6080
6081 lmc_rlevel_rank.u = BDK_CSR_READ(node, BDK_LMCX_RLEVEL_RANKX(ddr_interface_num, rankx));
6082
6083 i = 0;
6084 while (custom_lmc_config->rlevel_table[i].part != NULL) {
6085 debug_print("DIMM part number:\"%s\", SPD: \"%s\"\n", custom_lmc_config->rlevel_table[i].part, part_number);
6086 if ((strcmp(part_number, custom_lmc_config->rlevel_table[i].part) == 0)
6087 && (_abs(custom_lmc_config->rlevel_table[i].speed - 2*ddr_hertz/(1000*1000)) < 10 ))
6088 {
6089 ddr_print("Using hard-coded read leveling for DIMM part number: \"%s\"\n", part_number);
6090 lmc_rlevel_rank.u = custom_lmc_config->rlevel_table[i].rlevel_rank[ddr_interface_num][rankx];
6091 DRAM_CSR_WRITE(node, BDK_LMCX_RLEVEL_RANKX(ddr_interface_num, rankx), lmc_rlevel_rank.u);
6092 lmc_rlevel_rank.u = BDK_CSR_READ(node, BDK_LMCX_RLEVEL_RANKX(ddr_interface_num, rankx));
6093 display_RL(node, ddr_interface_num, lmc_rlevel_rank, rankx);
6094 rlevel_debug_loops = 0; /* Disable h/w read-leveling */
6095 break;
6096 }
6097 ++i;
6098 }
6099 }
6100 }
6101 #endif /* ENABLE_CUSTOM_RLEVEL_TABLE */
6102
6103 while(rlevel_debug_loops--) {
6104 /* Initialize the error scoreboard */
6105 memset(rlevel_scoreboard, 0, sizeof(rlevel_scoreboard));
6106
6107 if ((s = lookup_env_parameter("ddr_rlevel_comp_offset")) != NULL) {
6108 rlevel_comp_offset = strtoul(s, NULL, 0);
6109 }
6110
6111 disable_sequential_delay_check = custom_lmc_config->disable_sequential_delay_check;
6112
6113 if ((s = lookup_env_parameter("ddr_disable_sequential_delay_check")) != NULL) {
6114 disable_sequential_delay_check = strtoul(s, NULL, 0);
6115 }
6116
6117 maximum_adjacent_rlevel_delay_increment = custom_lmc_config->maximum_adjacent_rlevel_delay_increment;
6118
6119 if ((s = lookup_env_parameter("ddr_maximum_adjacent_rlevel_delay_increment")) != NULL) {
6120 maximum_adjacent_rlevel_delay_increment = strtoul(s, NULL, 0);
6121 }
6122
6123 lmc_comp_ctl2.u = BDK_CSR_READ(node, BDK_LMCX_COMP_CTL2(ddr_interface_num));
6124 saved_ddr__ptune = lmc_comp_ctl2.s.ddr__ptune;
6125 saved_ddr__ntune = lmc_comp_ctl2.s.ddr__ntune;
6126
6127 /* Disable dynamic compensation settings */
6128 if (rlevel_comp_offset != 0) {
6129 lmc_comp_ctl2.s.ptune = saved_ddr__ptune;
6130 lmc_comp_ctl2.s.ntune = saved_ddr__ntune;
6131
6132 /* Round up the ptune calculation to bias the odd cases toward ptune */
6133 lmc_comp_ctl2.s.ptune += divide_roundup(rlevel_comp_offset, 2);
6134 lmc_comp_ctl2.s.ntune -= rlevel_comp_offset/2;
6135
6136 lmc_control.u = BDK_CSR_READ(node, BDK_LMCX_CONTROL(ddr_interface_num));
6137 saved_int_zqcs_dis = lmc_control.s.int_zqcs_dis;
6138 lmc_control.s.int_zqcs_dis = 1; /* Disable ZQCS while in bypass. */
6139 DRAM_CSR_WRITE(node, BDK_LMCX_CONTROL(ddr_interface_num), lmc_control.u);
6140
6141 lmc_comp_ctl2.s.byp = 1; /* Enable bypass mode */
6142 DRAM_CSR_WRITE(node, BDK_LMCX_COMP_CTL2(ddr_interface_num), lmc_comp_ctl2.u);
6143 BDK_CSR_READ(node, BDK_LMCX_COMP_CTL2(ddr_interface_num));
6144 lmc_comp_ctl2.u = BDK_CSR_READ(node, BDK_LMCX_COMP_CTL2(ddr_interface_num)); /* Read again */
6145 ddr_print("DDR__PTUNE/DDR__NTUNE : %d/%d\n",
6146 lmc_comp_ctl2.s.ddr__ptune, lmc_comp_ctl2.s.ddr__ntune);
6147 }
6148
6149 lmc_modereg_params1.u = BDK_CSR_READ(node, BDK_LMCX_MODEREG_PARAMS1(ddr_interface_num));
6150
6151 for (rtt_idx = min_rtt_nom_idx; rtt_idx <= max_rtt_nom_idx; ++rtt_idx) {
6152 rtt_nom = imp_values->rtt_nom_table[rtt_idx];
6153
6154 /* When the read ODT mask is zero the dyn_rtt_nom_mask is
6155 zero than RTT_NOM will not be changing during
6156 read-leveling. Since the value is fixed we only need
6157 to test it once. */
6158 if (dyn_rtt_nom_mask == 0) {
6159 print_nom_ohms = -1; // flag not to print NOM ohms
6160 if (rtt_idx != min_rtt_nom_idx)
6161 continue;
6162 } else {
6163 if (dyn_rtt_nom_mask & 1) lmc_modereg_params1.s.rtt_nom_00 = rtt_nom;
6164 if (dyn_rtt_nom_mask & 2) lmc_modereg_params1.s.rtt_nom_01 = rtt_nom;
6165 if (dyn_rtt_nom_mask & 4) lmc_modereg_params1.s.rtt_nom_10 = rtt_nom;
6166 if (dyn_rtt_nom_mask & 8) lmc_modereg_params1.s.rtt_nom_11 = rtt_nom;
6167 // FIXME? rank 0 ohms always for the printout?
6168 print_nom_ohms = imp_values->rtt_nom_ohms[lmc_modereg_params1.s.rtt_nom_00];
6169 }
6170
6171 DRAM_CSR_WRITE(node, BDK_LMCX_MODEREG_PARAMS1(ddr_interface_num), lmc_modereg_params1.u);
6172 VB_PRT(VBL_TME, "\n");
6173 VB_PRT(VBL_TME, "RTT_NOM %3d, %3d, %3d, %3d ohms : %x,%x,%x,%x\n",
6174 imp_values->rtt_nom_ohms[lmc_modereg_params1.s.rtt_nom_11],
6175 imp_values->rtt_nom_ohms[lmc_modereg_params1.s.rtt_nom_10],
6176 imp_values->rtt_nom_ohms[lmc_modereg_params1.s.rtt_nom_01],
6177 imp_values->rtt_nom_ohms[lmc_modereg_params1.s.rtt_nom_00],
6178 lmc_modereg_params1.s.rtt_nom_11,
6179 lmc_modereg_params1.s.rtt_nom_10,
6180 lmc_modereg_params1.s.rtt_nom_01,
6181 lmc_modereg_params1.s.rtt_nom_00);
6182
6183 perform_ddr_init_sequence(node, rank_mask, ddr_interface_num);
6184
6185 // Try RANK outside RODT to rearrange the output...
6186 for (rankx = 0; rankx < dimm_count * 4; rankx++) {
6187 int byte_idx;
6188 rlevel_byte_data_t rlevel_byte[9];
6189 int average_loops;
6190 int rlevel_rank_errors, rlevel_bitmask_errors, rlevel_nonseq_errors;
6191 rlevel_bitmask_t rlevel_bitmask[9];
6192 #if PICK_BEST_RANK_SCORE_NOT_AVG
6193 int rlevel_best_rank_score;
6194 #endif
6195
6196 if (!(rank_mask & (1 << rankx)))
6197 continue;
6198
6199 for (rodt_ctl = max_rodt_ctl; rodt_ctl >= min_rodt_ctl; --rodt_ctl) {
6200 #if PICK_BEST_RANK_SCORE_NOT_AVG
6201 rlevel_best_rank_score = DEFAULT_BEST_RANK_SCORE;
6202 #endif
6203 lmc_comp_ctl2.u = BDK_CSR_READ(node, BDK_LMCX_COMP_CTL2(ddr_interface_num));
6204 lmc_comp_ctl2.s.rodt_ctl = rodt_ctl;
6205 DRAM_CSR_WRITE(node, BDK_LMCX_COMP_CTL2(ddr_interface_num), lmc_comp_ctl2.u);
6206 lmc_comp_ctl2.u = BDK_CSR_READ(node, BDK_LMCX_COMP_CTL2(ddr_interface_num));
6207 bdk_wait_usec(1); /* Give it a little time to take affect */
6208 VB_PRT(VBL_DEV, "Read ODT_CTL : 0x%x (%d ohms)\n",
6209 lmc_comp_ctl2.s.rodt_ctl, imp_values->rodt_ohms[lmc_comp_ctl2.s.rodt_ctl]);
6210
6211 memset(rlevel_byte, 0, sizeof(rlevel_byte));
6212
6213 for (average_loops = 0; average_loops < rlevel_avg_loops; average_loops++) {
6214 rlevel_bitmask_errors = 0;
6215
6216 if (! (rlevel_separate_ab && spd_rdimm && (ddr_type == DDR4_DRAM))) {
6217 /* Clear read-level delays */
6218 DRAM_CSR_WRITE(node, BDK_LMCX_RLEVEL_RANKX(ddr_interface_num, rankx), 0);
6219
6220 /* read-leveling */
6221 perform_octeon3_ddr3_sequence(node, 1 << rankx, ddr_interface_num, 1);
6222
6223 if (BDK_CSR_WAIT_FOR_FIELD(node, BDK_LMCX_RLEVEL_RANKX(ddr_interface_num, rankx),
6224 status, ==, 3, 1000000))
6225 {
6226 error_print("ERROR: Timeout waiting for RLEVEL\n");
6227 }
6228 }
6229
6230 lmc_rlevel_rank.u = BDK_CSR_READ(node, BDK_LMCX_RLEVEL_RANKX(ddr_interface_num, rankx));
6231
6232 { // start bitmask interpretation block
6233 int redoing_nonseq_errs = 0;
6234
6235 memset(rlevel_bitmask, 0, sizeof(rlevel_bitmask));
6236
6237 if (rlevel_separate_ab && spd_rdimm && (ddr_type == DDR4_DRAM)) {
6238 bdk_lmcx_rlevel_rankx_t lmc_rlevel_rank_aside;
6239 bdk_lmcx_modereg_params0_t lmc_modereg_params0;
6240
6241 /* A-side */
6242 lmc_modereg_params0.u = BDK_CSR_READ(node, BDK_LMCX_MODEREG_PARAMS0(ddr_interface_num));
6243 lmc_modereg_params0.s.mprloc = 0; /* MPR Page 0 Location 0 */
6244 DRAM_CSR_WRITE(node, BDK_LMCX_MODEREG_PARAMS0(ddr_interface_num), lmc_modereg_params0.u);
6245
6246 /* Clear read-level delays */
6247 DRAM_CSR_WRITE(node, BDK_LMCX_RLEVEL_RANKX(ddr_interface_num, rankx), 0);
6248
6249 perform_octeon3_ddr3_sequence(node, 1 << rankx, ddr_interface_num, 1); /* read-leveling */
6250
6251 if (BDK_CSR_WAIT_FOR_FIELD(node, BDK_LMCX_RLEVEL_RANKX(ddr_interface_num, rankx),
6252 status, ==, 3, 1000000))
6253 {
6254 error_print("ERROR: Timeout waiting for RLEVEL\n");
6255
6256 }
6257 lmc_rlevel_rank.u = BDK_CSR_READ(node, BDK_LMCX_RLEVEL_RANKX(ddr_interface_num, rankx));
6258
6259 lmc_rlevel_rank_aside.u = lmc_rlevel_rank.u;
6260
6261 rlevel_bitmask[0].bm = octeon_read_lmcx_ddr3_rlevel_dbg(node, ddr_interface_num, 0);
6262 rlevel_bitmask[1].bm = octeon_read_lmcx_ddr3_rlevel_dbg(node, ddr_interface_num, 1);
6263 rlevel_bitmask[2].bm = octeon_read_lmcx_ddr3_rlevel_dbg(node, ddr_interface_num, 2);
6264 rlevel_bitmask[3].bm = octeon_read_lmcx_ddr3_rlevel_dbg(node, ddr_interface_num, 3);
6265 rlevel_bitmask[8].bm = octeon_read_lmcx_ddr3_rlevel_dbg(node, ddr_interface_num, 8);
6266 /* A-side complete */
6267
6268
6269 /* B-side */
6270 lmc_modereg_params0.u = BDK_CSR_READ(node, BDK_LMCX_MODEREG_PARAMS0(ddr_interface_num));
6271 lmc_modereg_params0.s.mprloc = 3; /* MPR Page 0 Location 3 */
6272 DRAM_CSR_WRITE(node, BDK_LMCX_MODEREG_PARAMS0(ddr_interface_num), lmc_modereg_params0.u);
6273
6274 /* Clear read-level delays */
6275 DRAM_CSR_WRITE(node, BDK_LMCX_RLEVEL_RANKX(ddr_interface_num, rankx), 0);
6276
6277 perform_octeon3_ddr3_sequence(node, 1 << rankx, ddr_interface_num, 1); /* read-leveling */
6278
6279 if (BDK_CSR_WAIT_FOR_FIELD(node, BDK_LMCX_RLEVEL_RANKX(ddr_interface_num, rankx),
6280 status, ==, 3, 1000000))
6281 {
6282 error_print("ERROR: Timeout waiting for RLEVEL\n");
6283 }
6284 lmc_rlevel_rank.u = BDK_CSR_READ(node, BDK_LMCX_RLEVEL_RANKX(ddr_interface_num, rankx));
6285
6286 rlevel_bitmask[4].bm = octeon_read_lmcx_ddr3_rlevel_dbg(node, ddr_interface_num, 4);
6287 rlevel_bitmask[5].bm = octeon_read_lmcx_ddr3_rlevel_dbg(node, ddr_interface_num, 5);
6288 rlevel_bitmask[6].bm = octeon_read_lmcx_ddr3_rlevel_dbg(node, ddr_interface_num, 6);
6289 rlevel_bitmask[7].bm = octeon_read_lmcx_ddr3_rlevel_dbg(node, ddr_interface_num, 7);
6290 /* B-side complete */
6291
6292
6293 update_rlevel_rank_struct(&lmc_rlevel_rank, 0, lmc_rlevel_rank_aside.cn83xx.byte0);
6294 update_rlevel_rank_struct(&lmc_rlevel_rank, 1, lmc_rlevel_rank_aside.cn83xx.byte1);
6295 update_rlevel_rank_struct(&lmc_rlevel_rank, 2, lmc_rlevel_rank_aside.cn83xx.byte2);
6296 update_rlevel_rank_struct(&lmc_rlevel_rank, 3, lmc_rlevel_rank_aside.cn83xx.byte3);
6297 update_rlevel_rank_struct(&lmc_rlevel_rank, 8, lmc_rlevel_rank_aside.cn83xx.byte8); /* ECC A-side */
6298
6299 lmc_modereg_params0.u = BDK_CSR_READ(node, BDK_LMCX_MODEREG_PARAMS0(ddr_interface_num));
6300 lmc_modereg_params0.s.mprloc = 0; /* MPR Page 0 Location 0 */
6301 DRAM_CSR_WRITE(node, BDK_LMCX_MODEREG_PARAMS0(ddr_interface_num), lmc_modereg_params0.u);
6302
6303 } /* if (rlevel_separate_ab && spd_rdimm && (ddr_type == DDR4_DRAM)) */
6304
6305 /*
6306 * Evaluate the quality of the read-leveling delays from the bitmasks.
6307 * Also save off a software computed read-leveling mask that may be
6308 * used later to qualify the delay results from Octeon.
6309 */
6310 for (byte_idx = 0; byte_idx < (8+ecc_ena); ++byte_idx) {
6311 int bmerr;
6312 if (!(ddr_interface_bytemask&(1<<byte_idx)))
6313 continue;
6314 if (! (rlevel_separate_ab && spd_rdimm && (ddr_type == DDR4_DRAM))) {
6315 rlevel_bitmask[byte_idx].bm =
6316 octeon_read_lmcx_ddr3_rlevel_dbg(node, ddr_interface_num, byte_idx);
6317 }
6318 bmerr = validate_ddr3_rlevel_bitmask(&rlevel_bitmask[byte_idx], ddr_type);
6319 rlevel_bitmask[byte_idx].errs = bmerr;
6320 rlevel_bitmask_errors += bmerr;
6321 #if PERFECT_BITMASK_COUNTING
6322 if ((ddr_type == DDR4_DRAM) && !bmerr) { // count only the "perfect" bitmasks
6323 // FIXME: could optimize this a bit?
6324 int delay = get_rlevel_rank_struct(&lmc_rlevel_rank, byte_idx);
6325 rank_perfect_counts[rankx].count[byte_idx][delay] += 1;
6326 rank_perfect_counts[rankx].total[byte_idx] += 1;
6327 }
6328 #endif /* PERFECT_BITMASK_COUNTING */
6329 }
6330
6331 /* Set delays for unused bytes to match byte 0. */
6332 for (byte_idx = 0; byte_idx < 9; ++byte_idx) {
6333 if (ddr_interface_bytemask & (1 << byte_idx))
6334 continue;
6335 update_rlevel_rank_struct(&lmc_rlevel_rank, byte_idx, lmc_rlevel_rank.cn83xx.byte0);
6336 }
6337
6338 /* Save a copy of the byte delays in physical
6339 order for sequential evaluation. */
6340 unpack_rlevel_settings(ddr_interface_bytemask, ecc_ena, rlevel_byte, lmc_rlevel_rank);
6341 redo_nonseq_errs:
6342
6343 rlevel_nonseq_errors = 0;
6344
6345 if (! disable_sequential_delay_check) {
6346 if ((ddr_interface_bytemask & 0xff) == 0xff) {
6347 /* Evaluate delay sequence across the whole range of bytes for standard dimms. */
6348 if ((spd_dimm_type == 1) || (spd_dimm_type == 5)) { /* 1=RDIMM, 5=Mini-RDIMM */
6349 int register_adjacent_delay = _abs(rlevel_byte[4].delay - rlevel_byte[5].delay);
6350 /* Registered dimm topology routes from the center. */
6351 rlevel_nonseq_errors += nonsequential_delays(rlevel_byte, 0, 3+ecc_ena,
6352 maximum_adjacent_rlevel_delay_increment);
6353 rlevel_nonseq_errors += nonsequential_delays(rlevel_byte, 5, 7+ecc_ena,
6354 maximum_adjacent_rlevel_delay_increment);
6355 // byte 5 sqerrs never gets cleared for RDIMMs
6356 rlevel_byte[5].sqerrs = 0;
6357 if (register_adjacent_delay > 1) {
6358 /* Assess proximity of bytes on opposite sides of register */
6359 rlevel_nonseq_errors += (register_adjacent_delay-1) * RLEVEL_ADJACENT_DELAY_ERROR;
6360 // update byte 5 error
6361 rlevel_byte[5].sqerrs += (register_adjacent_delay-1) * RLEVEL_ADJACENT_DELAY_ERROR;
6362 }
6363 }
6364 if ((spd_dimm_type == 2) || (spd_dimm_type == 6)) { /* 2=UDIMM, 6=Mini-UDIMM */
6365 /* Unbuffered dimm topology routes from end to end. */
6366 rlevel_nonseq_errors += nonsequential_delays(rlevel_byte, 0, 7+ecc_ena,
6367 maximum_adjacent_rlevel_delay_increment);
6368 }
6369 } else {
6370 rlevel_nonseq_errors += nonsequential_delays(rlevel_byte, 0, 3+ecc_ena,
6371 maximum_adjacent_rlevel_delay_increment);
6372 }
6373 } /* if (! disable_sequential_delay_check) */
6374
6375 #if 0
6376 // FIXME FIXME: disabled for now, it was too much...
6377
6378 // Calculate total errors for the rank:
6379 // we do NOT add nonsequential errors if mini-[RU]DIMM or x16;
6380 // mini-DIMMs and x16 devices have unusual sequence geometries.
6381 // Make the final scores for them depend only on the bitmasks...
6382 rlevel_rank_errors = rlevel_bitmask_errors;
6383 if ((spd_dimm_type != 5) &&
6384 (spd_dimm_type != 6) &&
6385 (dram_width != 16))
6386 {
6387 rlevel_rank_errors += rlevel_nonseq_errors;
6388 }
6389 #else
6390 rlevel_rank_errors = rlevel_bitmask_errors + rlevel_nonseq_errors;
6391 #endif
6392
6393 // print original sample here only if we are not really averaging or picking best
6394 // also do not print if we were redoing the NONSEQ score for using COMPUTED
6395 if (!redoing_nonseq_errs && ((rlevel_avg_loops < 2) || dram_is_verbose(VBL_DEV2))) {
6396 display_RL_BM(node, ddr_interface_num, rankx, rlevel_bitmask, ecc_ena);
6397 display_RL_BM_scores(node, ddr_interface_num, rankx, rlevel_bitmask, ecc_ena);
6398 display_RL_SEQ_scores(node, ddr_interface_num, rankx, rlevel_byte, ecc_ena);
6399 display_RL_with_score(node, ddr_interface_num, lmc_rlevel_rank, rankx, rlevel_rank_errors);
6400 }
6401
6402 if (ddr_rlevel_compute) {
6403 if (!redoing_nonseq_errs) {
6404 /* Recompute the delays based on the bitmask */
6405 for (byte_idx = 0; byte_idx < (8+ecc_ena); ++byte_idx) {
6406 if (!(ddr_interface_bytemask & (1 << byte_idx)))
6407 continue;
6408 update_rlevel_rank_struct(&lmc_rlevel_rank, byte_idx,
6409 compute_ddr3_rlevel_delay(rlevel_bitmask[byte_idx].mstart,
6410 rlevel_bitmask[byte_idx].width,
6411 rlevel_ctl));
6412 }
6413
6414 /* Override the copy of byte delays with the computed results. */
6415 unpack_rlevel_settings(ddr_interface_bytemask, ecc_ena, rlevel_byte, lmc_rlevel_rank);
6416
6417 redoing_nonseq_errs = 1;
6418 goto redo_nonseq_errs;
6419
6420 } else {
6421 /* now print this if already printed the original sample */
6422 if ((rlevel_avg_loops < 2) || dram_is_verbose(VBL_DEV2)) {
6423 display_RL_with_computed(node, ddr_interface_num,
6424 lmc_rlevel_rank, rankx,
6425 rlevel_rank_errors);
6426 }
6427 }
6428 } /* if (ddr_rlevel_compute) */
6429
6430 } // end bitmask interpretation block
6431
6432 #if PICK_BEST_RANK_SCORE_NOT_AVG
6433
6434 // if it is a better (lower) score, then keep it
6435 if (rlevel_rank_errors < rlevel_best_rank_score) {
6436 rlevel_best_rank_score = rlevel_rank_errors;
6437
6438 // save the new best delays and best errors
6439 for (byte_idx = 0; byte_idx < 9; ++byte_idx) {
6440 rlevel_byte[byte_idx].best = rlevel_byte[byte_idx].delay;
6441 rlevel_byte[byte_idx].bestsq = rlevel_byte[byte_idx].sqerrs;
6442 // save bitmasks and their scores as well
6443 // xlate UNPACKED index to PACKED index to get from rlevel_bitmask
6444 rlevel_byte[byte_idx].bm = rlevel_bitmask[XUP(byte_idx, !!ecc_ena)].bm;
6445 rlevel_byte[byte_idx].bmerrs = rlevel_bitmask[XUP(byte_idx, !!ecc_ena)].errs;
6446 }
6447 }
6448 #else /* PICK_BEST_RANK_SCORE_NOT_AVG */
6449
6450 /* Accumulate the total score across averaging loops for this setting */
6451 debug_print("rlevel_scoreboard[rtt_nom=%d][rodt_ctl=%d][rankx=%d].score: %d [%d]\n",
6452 rtt_nom, rodt_ctl, rankx, rlevel_rank_errors, average_loops);
6453 rlevel_scoreboard[rtt_nom][rodt_ctl][rankx].score += rlevel_rank_errors;
6454
6455 /* Accumulate the delay totals and loop counts
6456 necessary to compute average delay results */
6457 for (byte_idx = 0; byte_idx < 9; ++byte_idx) {
6458 if (rlevel_byte[byte_idx].delay != 0) { /* Don't include delay=0 in the average */
6459 ++rlevel_byte[byte_idx].loop_count;
6460 rlevel_byte[byte_idx].loop_total += rlevel_byte[byte_idx].delay;
6461 }
6462 } /* for (byte_idx = 0; byte_idx < 9; ++byte_idx) */
6463 #endif /* PICK_BEST_RANK_SCORE_NOT_AVG */
6464
6465 } /* for (average_loops = 0; average_loops < rlevel_avg_loops; average_loops++) */
6466
6467 #if PICK_BEST_RANK_SCORE_NOT_AVG
6468
6469 /* We recorded the best score across the averaging loops */
6470 rlevel_scoreboard[rtt_nom][rodt_ctl][rankx].score = rlevel_best_rank_score;
6471
6472 /* Restore the delays from the best fields that go with the best score */
6473 for (byte_idx = 0; byte_idx < 9; ++byte_idx) {
6474 rlevel_byte[byte_idx].delay = rlevel_byte[byte_idx].best;
6475 rlevel_byte[byte_idx].sqerrs = rlevel_byte[byte_idx].bestsq;
6476 }
6477 #else /* PICK_BEST_RANK_SCORE_NOT_AVG */
6478
6479 /* Compute the average score across averaging loops */
6480 rlevel_scoreboard[rtt_nom][rodt_ctl][rankx].score =
6481 divide_nint(rlevel_scoreboard[rtt_nom][rodt_ctl][rankx].score, rlevel_avg_loops);
6482
6483 /* Compute the average delay results */
6484 for (byte_idx=0; byte_idx < 9; ++byte_idx) {
6485 if (rlevel_byte[byte_idx].loop_count == 0)
6486 rlevel_byte[byte_idx].loop_count = 1;
6487 rlevel_byte[byte_idx].delay = divide_nint(rlevel_byte[byte_idx].loop_total,
6488 rlevel_byte[byte_idx].loop_count);
6489 }
6490 #endif /* PICK_BEST_RANK_SCORE_NOT_AVG */
6491
6492 lmc_rlevel_rank.u = BDK_CSR_READ(node, BDK_LMCX_RLEVEL_RANKX(ddr_interface_num, rankx));
6493
6494 pack_rlevel_settings(ddr_interface_bytemask, ecc_ena, rlevel_byte, &lmc_rlevel_rank);
6495
6496 if (rlevel_avg_loops > 1) {
6497 #if PICK_BEST_RANK_SCORE_NOT_AVG
6498 // restore the "best" bitmasks and their scores for printing
6499 for (byte_idx = 0; byte_idx < 9; ++byte_idx) {
6500 if ((ddr_interface_bytemask & (1 << byte_idx)) == 0)
6501 continue;
6502 // xlate PACKED index to UNPACKED index to get from rlevel_byte
6503 rlevel_bitmask[byte_idx].bm = rlevel_byte[XPU(byte_idx, !!ecc_ena)].bm;
6504 rlevel_bitmask[byte_idx].errs = rlevel_byte[XPU(byte_idx, !!ecc_ena)].bmerrs;
6505 }
6506 // print bitmasks/scores here only for DEV // FIXME? lower VBL?
6507 if (dram_is_verbose(VBL_DEV)) {
6508 display_RL_BM(node, ddr_interface_num, rankx, rlevel_bitmask, ecc_ena);
6509 display_RL_BM_scores(node, ddr_interface_num, rankx, rlevel_bitmask, ecc_ena);
6510 display_RL_SEQ_scores(node, ddr_interface_num, rankx, rlevel_byte, ecc_ena);
6511 }
6512
6513 display_RL_with_RODT(node, ddr_interface_num, lmc_rlevel_rank, rankx,
6514 rlevel_scoreboard[rtt_nom][rodt_ctl][rankx].score,
6515 print_nom_ohms, imp_values->rodt_ohms[rodt_ctl],
6516 WITH_RODT_BESTSCORE);
6517
6518 #else /* PICK_BEST_RANK_SCORE_NOT_AVG */
6519 display_RL_with_average(node, ddr_interface_num, lmc_rlevel_rank, rankx,
6520 rlevel_scoreboard[rtt_nom][rodt_ctl][rankx].score);
6521 #endif /* PICK_BEST_RANK_SCORE_NOT_AVG */
6522
6523 } /* if (rlevel_avg_loops > 1) */
6524
6525 rlevel_scoreboard[rtt_nom][rodt_ctl][rankx].setting = lmc_rlevel_rank.u;
6526
6527 } /* for (rodt_ctl = max_rodt_ctl; rodt_ctl >= min_rodt_ctl; --rodt_ctl) */
6528 } /* for (rankx = 0; rankx < dimm_count*4; rankx++) */
6529 } /* for (rtt_idx=min_rtt_nom_idx; rtt_idx<max_rtt_nom_idx; ++rtt_idx) */
6530
6531
6532 /* Re-enable dynamic compensation settings. */
6533 if (rlevel_comp_offset != 0) {
6534 lmc_comp_ctl2.u = BDK_CSR_READ(node, BDK_LMCX_COMP_CTL2(ddr_interface_num));
6535
6536 lmc_comp_ctl2.s.ptune = 0;
6537 lmc_comp_ctl2.s.ntune = 0;
6538 lmc_comp_ctl2.s.byp = 0; /* Disable bypass mode */
6539 DRAM_CSR_WRITE(node, BDK_LMCX_COMP_CTL2(ddr_interface_num), lmc_comp_ctl2.u);
6540 BDK_CSR_READ(node, BDK_LMCX_COMP_CTL2(ddr_interface_num)); /* Read once */
6541
6542 lmc_comp_ctl2.u = BDK_CSR_READ(node, BDK_LMCX_COMP_CTL2(ddr_interface_num)); /* Read again */
6543 ddr_print("DDR__PTUNE/DDR__NTUNE : %d/%d\n",
6544 lmc_comp_ctl2.s.ddr__ptune, lmc_comp_ctl2.s.ddr__ntune);
6545
6546 lmc_control.u = BDK_CSR_READ(node, BDK_LMCX_CONTROL(ddr_interface_num));
6547 lmc_control.s.int_zqcs_dis = saved_int_zqcs_dis; /* Restore original setting */
6548 DRAM_CSR_WRITE(node, BDK_LMCX_CONTROL(ddr_interface_num), lmc_control.u);
6549
6550 }
6551
6552
6553 {
6554 int override_compensation = 0;
6555 if ((s = lookup_env_parameter("ddr__ptune")) != NULL) {
6556 saved_ddr__ptune = strtoul(s, NULL, 0);
6557 override_compensation = 1;
6558 }
6559 if ((s = lookup_env_parameter("ddr__ntune")) != NULL) {
6560 saved_ddr__ntune = strtoul(s, NULL, 0);
6561 override_compensation = 1;
6562 }
6563 if (override_compensation) {
6564 lmc_comp_ctl2.s.ptune = saved_ddr__ptune;
6565 lmc_comp_ctl2.s.ntune = saved_ddr__ntune;
6566
6567 lmc_control.u = BDK_CSR_READ(node, BDK_LMCX_CONTROL(ddr_interface_num));
6568 saved_int_zqcs_dis = lmc_control.s.int_zqcs_dis;
6569 lmc_control.s.int_zqcs_dis = 1; /* Disable ZQCS while in bypass. */
6570 DRAM_CSR_WRITE(node, BDK_LMCX_CONTROL(ddr_interface_num), lmc_control.u);
6571
6572 lmc_comp_ctl2.s.byp = 1; /* Enable bypass mode */
6573 DRAM_CSR_WRITE(node, BDK_LMCX_COMP_CTL2(ddr_interface_num), lmc_comp_ctl2.u);
6574 lmc_comp_ctl2.u = BDK_CSR_READ(node, BDK_LMCX_COMP_CTL2(ddr_interface_num)); /* Read again */
6575
6576 ddr_print("DDR__PTUNE/DDR__NTUNE : %d/%d\n",
6577 lmc_comp_ctl2.s.ptune, lmc_comp_ctl2.s.ntune);
6578 }
6579 }
6580 { /* Evaluation block */
6581 int best_rodt_score = DEFAULT_BEST_RANK_SCORE; /* Start with an arbitrarily high score */
6582 int auto_rodt_ctl = 0;
6583 int auto_rtt_nom = 0;
6584 int rodt_score;
6585 int rodt_row_skip_mask = 0;
6586
6587 // just add specific RODT rows to the skip mask for DDR4 at this time...
6588 if (ddr_type == DDR4_DRAM) {
6589 rodt_row_skip_mask |= (1 << ddr4_rodt_ctl_34_ohm); // skip RODT row 34 ohms for all DDR4 types
6590 rodt_row_skip_mask |= (1 << ddr4_rodt_ctl_40_ohm); // skip RODT row 40 ohms for all DDR4 types
6591 #if ADD_48_OHM_SKIP
6592 rodt_row_skip_mask |= (1 << ddr4_rodt_ctl_48_ohm); // skip RODT row 48 ohms for all DDR4 types
6593 #endif /* ADD_48OHM_SKIP */
6594 #if NOSKIP_40_48_OHM
6595 // For now, do not skip RODT row 40 or 48 ohm when ddr_hertz is above 1075 MHz
6596 if (ddr_hertz > 1075000000) {
6597 rodt_row_skip_mask &= ~(1 << ddr4_rodt_ctl_40_ohm); // noskip RODT row 40 ohms
6598 rodt_row_skip_mask &= ~(1 << ddr4_rodt_ctl_48_ohm); // noskip RODT row 48 ohms
6599 }
6600 #endif /* NOSKIP_40_48_OHM */
6601 #if NOSKIP_48_STACKED
6602 // For now, do not skip RODT row 48 ohm for 2Rx4 stacked die DIMMs
6603 if ((is_stacked_die) && (num_ranks == 2) && (dram_width == 4)) {
6604 rodt_row_skip_mask &= ~(1 << ddr4_rodt_ctl_48_ohm); // noskip RODT row 48 ohms
6605 }
6606 #endif /* NOSKIP_48_STACKED */
6607 #if NOSKIP_FOR_MINI
6608 // for now, leave all rows eligible when we have mini-DIMMs...
6609 if ((spd_dimm_type == 5) || (spd_dimm_type == 6)) {
6610 rodt_row_skip_mask = 0;
6611 }
6612 #endif /* NOSKIP_FOR_MINI */
6613 #if NOSKIP_FOR_2S_1R
6614 // for now, leave all rows eligible when we have a 2-slot 1-rank config
6615 if ((dimm_count == 2) && (num_ranks == 1)) {
6616 rodt_row_skip_mask = 0;
6617 }
6618 #endif /* NOSKIP_FOR_2S_1R */
6619 }
6620
6621 VB_PRT(VBL_DEV, "Evaluating Read-Leveling Scoreboard for AUTO settings.\n");
6622 for (rtt_idx=min_rtt_nom_idx; rtt_idx<=max_rtt_nom_idx; ++rtt_idx) {
6623 rtt_nom = imp_values->rtt_nom_table[rtt_idx];
6624
6625 /* When the read ODT mask is zero the dyn_rtt_nom_mask is
6626 zero than RTT_NOM will not be changing during
6627 read-leveling. Since the value is fixed we only need
6628 to test it once. */
6629 if ((dyn_rtt_nom_mask == 0) && (rtt_idx != min_rtt_nom_idx))
6630 continue;
6631
6632 for (rodt_ctl=max_rodt_ctl; rodt_ctl>=min_rodt_ctl; --rodt_ctl) {
6633 rodt_score = 0;
6634 for (rankx = 0; rankx < dimm_count * 4;rankx++) {
6635 if (!(rank_mask & (1 << rankx)))
6636 continue;
6637 debug_print("rlevel_scoreboard[rtt_nom=%d][rodt_ctl=%d][rankx=%d].score:%d\n",
6638 rtt_nom, rodt_ctl, rankx, rlevel_scoreboard[rtt_nom][rodt_ctl][rankx].score);
6639 rodt_score += rlevel_scoreboard[rtt_nom][rodt_ctl][rankx].score;
6640 }
6641 // FIXME: do we need to skip RODT rows here, like we do below in the by-RANK settings?
6642
6643 /* When using automatic ODT settings use the ODT
6644 settings associated with the best score for
6645 all of the tested ODT combinations. */
6646
6647 if ((rodt_score < best_rodt_score) || // always take lower score, OR
6648 ((rodt_score == best_rodt_score) && // take same score if RODT ohms are higher
6649 (imp_values->rodt_ohms[rodt_ctl] > imp_values->rodt_ohms[auto_rodt_ctl])))
6650 {
6651 debug_print("AUTO: new best score for rodt:%d (%3d), new score:%d, previous score:%d\n",
6652 rodt_ctl, imp_values->rodt_ohms[rodt_ctl], rodt_score, best_rodt_score);
6653 best_rodt_score = rodt_score;
6654 auto_rodt_ctl = rodt_ctl;
6655 auto_rtt_nom = rtt_nom;
6656 }
6657 } /* for (rodt_ctl=max_rodt_ctl; rodt_ctl>=min_rodt_ctl; --rodt_ctl) */
6658 } /* for (rtt_idx=min_rtt_nom_idx; rtt_idx<=max_rtt_nom_idx; ++rtt_idx) */
6659
6660 lmc_modereg_params1.u = BDK_CSR_READ(node, BDK_LMCX_MODEREG_PARAMS1(ddr_interface_num));
6661
6662 if (ddr_rtt_nom_auto) {
6663 /* Store the automatically set RTT_NOM value */
6664 if (dyn_rtt_nom_mask & 1) lmc_modereg_params1.s.rtt_nom_00 = auto_rtt_nom;
6665 if (dyn_rtt_nom_mask & 2) lmc_modereg_params1.s.rtt_nom_01 = auto_rtt_nom;
6666 if (dyn_rtt_nom_mask & 4) lmc_modereg_params1.s.rtt_nom_10 = auto_rtt_nom;
6667 if (dyn_rtt_nom_mask & 8) lmc_modereg_params1.s.rtt_nom_11 = auto_rtt_nom;
6668 } else {
6669 /* restore the manual settings to the register */
6670 lmc_modereg_params1.s.rtt_nom_00 = default_rtt_nom[0];
6671 lmc_modereg_params1.s.rtt_nom_01 = default_rtt_nom[1];
6672 lmc_modereg_params1.s.rtt_nom_10 = default_rtt_nom[2];
6673 lmc_modereg_params1.s.rtt_nom_11 = default_rtt_nom[3];
6674 }
6675
6676 DRAM_CSR_WRITE(node, BDK_LMCX_MODEREG_PARAMS1(ddr_interface_num), lmc_modereg_params1.u);
6677 VB_PRT(VBL_DEV, "RTT_NOM %3d, %3d, %3d, %3d ohms : %x,%x,%x,%x\n",
6678 imp_values->rtt_nom_ohms[lmc_modereg_params1.s.rtt_nom_11],
6679 imp_values->rtt_nom_ohms[lmc_modereg_params1.s.rtt_nom_10],
6680 imp_values->rtt_nom_ohms[lmc_modereg_params1.s.rtt_nom_01],
6681 imp_values->rtt_nom_ohms[lmc_modereg_params1.s.rtt_nom_00],
6682 lmc_modereg_params1.s.rtt_nom_11,
6683 lmc_modereg_params1.s.rtt_nom_10,
6684 lmc_modereg_params1.s.rtt_nom_01,
6685 lmc_modereg_params1.s.rtt_nom_00);
6686
6687 VB_PRT(VBL_DEV, "RTT_WR %3d, %3d, %3d, %3d ohms : %x,%x,%x,%x\n",
6688 imp_values->rtt_wr_ohms[EXTR_WR(lmc_modereg_params1.u, 3)],
6689 imp_values->rtt_wr_ohms[EXTR_WR(lmc_modereg_params1.u, 2)],
6690 imp_values->rtt_wr_ohms[EXTR_WR(lmc_modereg_params1.u, 1)],
6691 imp_values->rtt_wr_ohms[EXTR_WR(lmc_modereg_params1.u, 0)],
6692 EXTR_WR(lmc_modereg_params1.u, 3),
6693 EXTR_WR(lmc_modereg_params1.u, 2),
6694 EXTR_WR(lmc_modereg_params1.u, 1),
6695 EXTR_WR(lmc_modereg_params1.u, 0));
6696
6697 VB_PRT(VBL_DEV, "DIC %3d, %3d, %3d, %3d ohms : %x,%x,%x,%x\n",
6698 imp_values->dic_ohms[lmc_modereg_params1.s.dic_11],
6699 imp_values->dic_ohms[lmc_modereg_params1.s.dic_10],
6700 imp_values->dic_ohms[lmc_modereg_params1.s.dic_01],
6701 imp_values->dic_ohms[lmc_modereg_params1.s.dic_00],
6702 lmc_modereg_params1.s.dic_11,
6703 lmc_modereg_params1.s.dic_10,
6704 lmc_modereg_params1.s.dic_01,
6705 lmc_modereg_params1.s.dic_00);
6706
6707 if (ddr_type == DDR4_DRAM) {
6708 bdk_lmcx_modereg_params2_t lmc_modereg_params2;
6709 /*
6710 * We must read the CSR, and not depend on odt_config[odt_idx].odt_mask2,
6711 * since we could have overridden values with envvars.
6712 * NOTE: this corrects the printout, since the CSR is not written with the old values...
6713 */
6714 lmc_modereg_params2.u = BDK_CSR_READ(node, BDK_LMCX_MODEREG_PARAMS2(ddr_interface_num));
6715
6716 VB_PRT(VBL_DEV, "RTT_PARK %3d, %3d, %3d, %3d ohms : %x,%x,%x,%x\n",
6717 imp_values->rtt_nom_ohms[lmc_modereg_params2.s.rtt_park_11],
6718 imp_values->rtt_nom_ohms[lmc_modereg_params2.s.rtt_park_10],
6719 imp_values->rtt_nom_ohms[lmc_modereg_params2.s.rtt_park_01],
6720 imp_values->rtt_nom_ohms[lmc_modereg_params2.s.rtt_park_00],
6721 lmc_modereg_params2.s.rtt_park_11,
6722 lmc_modereg_params2.s.rtt_park_10,
6723 lmc_modereg_params2.s.rtt_park_01,
6724 lmc_modereg_params2.s.rtt_park_00);
6725
6726 VB_PRT(VBL_DEV, "%-45s : 0x%x,0x%x,0x%x,0x%x\n", "VREF_RANGE",
6727 lmc_modereg_params2.s.vref_range_11,
6728 lmc_modereg_params2.s.vref_range_10,
6729 lmc_modereg_params2.s.vref_range_01,
6730 lmc_modereg_params2.s.vref_range_00);
6731
6732 VB_PRT(VBL_DEV, "%-45s : 0x%x,0x%x,0x%x,0x%x\n", "VREF_VALUE",
6733 lmc_modereg_params2.s.vref_value_11,
6734 lmc_modereg_params2.s.vref_value_10,
6735 lmc_modereg_params2.s.vref_value_01,
6736 lmc_modereg_params2.s.vref_value_00);
6737 }
6738
6739 lmc_comp_ctl2.u = BDK_CSR_READ(node, BDK_LMCX_COMP_CTL2(ddr_interface_num));
6740 if (ddr_rodt_ctl_auto)
6741 lmc_comp_ctl2.s.rodt_ctl = auto_rodt_ctl;
6742 else
6743 lmc_comp_ctl2.s.rodt_ctl = default_rodt_ctl; // back to the original setting
6744 DRAM_CSR_WRITE(node, BDK_LMCX_COMP_CTL2(ddr_interface_num), lmc_comp_ctl2.u);
6745 lmc_comp_ctl2.u = BDK_CSR_READ(node, BDK_LMCX_COMP_CTL2(ddr_interface_num));
6746 VB_PRT(VBL_DEV, "Read ODT_CTL : 0x%x (%d ohms)\n",
6747 lmc_comp_ctl2.s.rodt_ctl, imp_values->rodt_ohms[lmc_comp_ctl2.s.rodt_ctl]);
6748
6749 ////////////////// this is the start of the RANK MAJOR LOOP
6750
6751 for (rankx = 0; rankx < dimm_count * 4; rankx++) {
6752 int best_rank_score = DEFAULT_BEST_RANK_SCORE; /* Start with an arbitrarily high score */
6753 int best_rank_rtt_nom = 0;
6754 //int best_rank_nom_ohms = 0;
6755 int best_rank_ctl = 0;
6756 int best_rank_ohms = 0;
6757 int best_rankx = 0;
6758
6759 if (!(rank_mask & (1 << rankx)))
6760 continue;
6761
6762 /* Use the delays associated with the best score for each individual rank */
6763 VB_PRT(VBL_TME, "Evaluating Read-Leveling Scoreboard for Rank %d settings.\n", rankx);
6764
6765 // some of the rank-related loops below need to operate only on the ranks of a single DIMM,
6766 // so create a mask for their use here
6767 int dimm_rank_mask;
6768 if (num_ranks == 4)
6769 dimm_rank_mask = rank_mask; // should be 1111
6770 else {
6771 dimm_rank_mask = rank_mask & 3; // should be 01 or 11
6772 if (rankx >= 2)
6773 dimm_rank_mask <<= 2; // doing a rank on the second DIMM, should be 0100 or 1100
6774 }
6775 debug_print("DIMM rank mask: 0x%x, rank mask: 0x%x, rankx: %d\n", dimm_rank_mask, rank_mask, rankx);
6776
6777 ////////////////// this is the start of the BEST ROW SCORE LOOP
6778
6779 for (rtt_idx = min_rtt_nom_idx; rtt_idx <= max_rtt_nom_idx; ++rtt_idx) {
6780 //int rtt_nom_ohms;
6781 rtt_nom = imp_values->rtt_nom_table[rtt_idx];
6782 //rtt_nom_ohms = imp_values->rtt_nom_ohms[rtt_nom];
6783
6784 /* When the read ODT mask is zero the dyn_rtt_nom_mask is
6785 zero than RTT_NOM will not be changing during
6786 read-leveling. Since the value is fixed we only need
6787 to test it once. */
6788 if ((dyn_rtt_nom_mask == 0) && (rtt_idx != min_rtt_nom_idx))
6789 continue;
6790
6791 debug_print("N%d.LMC%d.R%d: starting RTT_NOM %d (%d)\n",
6792 node, ddr_interface_num, rankx, rtt_nom, rtt_nom_ohms);
6793
6794 for (rodt_ctl = max_rodt_ctl; rodt_ctl >= min_rodt_ctl; --rodt_ctl) {
6795 int next_ohms = imp_values->rodt_ohms[rodt_ctl];
6796
6797 // skip RODT rows in mask, but *NOT* rows with too high a score;
6798 // we will not use the skipped ones for printing or evaluating, but
6799 // we need to allow all the non-skipped ones to be candidates for "best"
6800 if (((1 << rodt_ctl) & rodt_row_skip_mask) != 0) {
6801 debug_print("N%d.LMC%d.R%d: SKIPPING rodt:%d (%d) with rank_score:%d\n",
6802 node, ddr_interface_num, rankx, rodt_ctl, next_ohms, next_score);
6803 continue;
6804 }
6805 for (int orankx = 0; orankx < dimm_count * 4; orankx++) { // this is ROFFIX-0528
6806 if (!(dimm_rank_mask & (1 << orankx))) // stay on the same DIMM
6807 continue;
6808
6809 int next_score = rlevel_scoreboard[rtt_nom][rodt_ctl][orankx].score;
6810
6811 if (next_score > best_rank_score) // always skip a higher score
6812 continue;
6813 if (next_score == best_rank_score) { // if scores are equal
6814 if (next_ohms < best_rank_ohms) // always skip lower ohms
6815 continue;
6816 if (next_ohms == best_rank_ohms) { // if same ohms
6817 if (orankx != rankx) // always skip the other rank(s)
6818 continue;
6819 }
6820 // else next_ohms are greater, always choose it
6821 }
6822 // else next_score is less than current best, so always choose it
6823 VB_PRT(VBL_DEV2, "N%d.LMC%d.R%d: new best score: rank %d, rodt %d(%3d), new best %d, previous best %d(%d)\n",
6824 node, ddr_interface_num, rankx, orankx, rodt_ctl, next_ohms, next_score,
6825 best_rank_score, best_rank_ohms);
6826 best_rank_score = next_score;
6827 best_rank_rtt_nom = rtt_nom;
6828 //best_rank_nom_ohms = rtt_nom_ohms;
6829 best_rank_ctl = rodt_ctl;
6830 best_rank_ohms = next_ohms;
6831 best_rankx = orankx;
6832 lmc_rlevel_rank.u = rlevel_scoreboard[rtt_nom][rodt_ctl][orankx].setting;
6833
6834 } /* for (int orankx = 0; orankx < dimm_count * 4; orankx++) */
6835 } /* for (rodt_ctl = max_rodt_ctl; rodt_ctl >= min_rodt_ctl; --rodt_ctl) */
6836 } /* for (rtt_idx = min_rtt_nom_idx; rtt_idx <= max_rtt_nom_idx; ++rtt_idx) */
6837
6838 ////////////////// this is the end of the BEST ROW SCORE LOOP
6839
6840 // DANGER, Will Robinson!! Abort now if we did not find a best score at all...
6841 if (best_rank_score == DEFAULT_BEST_RANK_SCORE) {
6842 error_print("WARNING: no best rank score found for N%d.LMC%d.R%d - resetting node...\n",
6843 node, ddr_interface_num, rankx);
6844 bdk_wait_usec(500000);
6845 bdk_reset_chip(node);
6846 }
6847
6848 // FIXME: relative now, but still arbitrary...
6849 // halve the range if 2 DIMMs unless they are single rank...
6850 int MAX_RANK_SCORE = best_rank_score;
6851 MAX_RANK_SCORE += (MAX_RANK_SCORE_LIMIT / ((num_ranks > 1) ? dimm_count : 1));
6852
6853 if (!ecc_ena){
6854 lmc_rlevel_rank.cn83xx.byte8 = lmc_rlevel_rank.cn83xx.byte0; /* ECC is not used */
6855 }
6856
6857 // at the end, write the best row settings to the current rank
6858 DRAM_CSR_WRITE(node, BDK_LMCX_RLEVEL_RANKX(ddr_interface_num, rankx), lmc_rlevel_rank.u);
6859 lmc_rlevel_rank.u = BDK_CSR_READ(node, BDK_LMCX_RLEVEL_RANKX(ddr_interface_num, rankx));
6860
6861 bdk_lmcx_rlevel_rankx_t saved_rlevel_rank;
6862 saved_rlevel_rank.u = lmc_rlevel_rank.u;
6863
6864 ////////////////// this is the start of the PRINT LOOP
6865
6866 // for pass==0, print current rank, pass==1 print other rank(s)
6867 // this is done because we want to show each ranks RODT values together, not interlaced
6868 #if COUNT_RL_CANDIDATES
6869 // keep separates for ranks - pass=0 target rank, pass=1 other rank on DIMM
6870 int mask_skipped[2] = {0,0};
6871 int score_skipped[2] = {0,0};
6872 int selected_rows[2] = {0,0};
6873 int zero_scores[2] = {0,0};
6874 #endif /* COUNT_RL_CANDIDATES */
6875 for (int pass = 0; pass < 2; pass++ ) {
6876 for (int orankx = 0; orankx < dimm_count * 4; orankx++) {
6877 if (!(dimm_rank_mask & (1 << orankx))) // stay on the same DIMM
6878 continue;
6879
6880 if (((pass == 0) && (orankx != rankx)) || ((pass != 0) && (orankx == rankx)))
6881 continue;
6882
6883 for (rtt_idx = min_rtt_nom_idx; rtt_idx <= max_rtt_nom_idx; ++rtt_idx) {
6884 rtt_nom = imp_values->rtt_nom_table[rtt_idx];
6885 if (dyn_rtt_nom_mask == 0) {
6886 print_nom_ohms = -1;
6887 if (rtt_idx != min_rtt_nom_idx)
6888 continue;
6889 } else {
6890 print_nom_ohms = imp_values->rtt_nom_ohms[rtt_nom];
6891 }
6892
6893 // cycle through all the RODT values...
6894 for (rodt_ctl = max_rodt_ctl; rodt_ctl >= min_rodt_ctl; --rodt_ctl) {
6895 bdk_lmcx_rlevel_rankx_t temp_rlevel_rank;
6896 int temp_score = rlevel_scoreboard[rtt_nom][rodt_ctl][orankx].score;
6897 temp_rlevel_rank.u = rlevel_scoreboard[rtt_nom][rodt_ctl][orankx].setting;
6898
6899 // skip RODT rows in mask, or rows with too high a score;
6900 // we will not use them for printing or evaluating...
6901 #if COUNT_RL_CANDIDATES
6902 int skip_row;
6903 if ((1 << rodt_ctl) & rodt_row_skip_mask) {
6904 skip_row = WITH_RODT_SKIPPING;
6905 ++mask_skipped[pass];
6906 } else if (temp_score > MAX_RANK_SCORE) {
6907 skip_row = WITH_RODT_SKIPPING;
6908 ++score_skipped[pass];
6909 } else {
6910 skip_row = WITH_RODT_BLANK;
6911 ++selected_rows[pass];
6912 if (temp_score == 0)
6913 ++zero_scores[pass];
6914 }
6915
6916 #else /* COUNT_RL_CANDIDATES */
6917 int skip_row = (((1 << rodt_ctl) & rodt_row_skip_mask) || (temp_score > MAX_RANK_SCORE))
6918 ? WITH_RODT_SKIPPING: WITH_RODT_BLANK;
6919 #endif /* COUNT_RL_CANDIDATES */
6920
6921 // identify and print the BEST ROW when it comes up
6922 if ((skip_row == WITH_RODT_BLANK) &&
6923 (best_rankx == orankx) &&
6924 (best_rank_rtt_nom == rtt_nom) &&
6925 (best_rank_ctl == rodt_ctl))
6926 {
6927 skip_row = WITH_RODT_BESTROW;
6928 }
6929
6930 display_RL_with_RODT(node, ddr_interface_num,
6931 temp_rlevel_rank, orankx, temp_score,
6932 print_nom_ohms,
6933 imp_values->rodt_ohms[rodt_ctl],
6934 skip_row);
6935
6936 } /* for (rodt_ctl = max_rodt_ctl; rodt_ctl >= min_rodt_ctl; --rodt_ctl) */
6937 } /* for (rtt_idx=min_rtt_nom_idx; rtt_idx<=max_rtt_nom_idx; ++rtt_idx) */
6938 } /* for (int orankx = 0; orankx < dimm_count * 4; orankx++) { */
6939 } /* for (int pass = 0; pass < 2; pass++ ) */
6940 #if COUNT_RL_CANDIDATES
6941 VB_PRT(VBL_TME, "N%d.LMC%d.R%d: RLROWS: selected %d+%d, zero_scores %d+%d, mask_skipped %d+%d, score_skipped %d+%d\n",
6942 node, ddr_interface_num, rankx,
6943 selected_rows[0], selected_rows[1],
6944 zero_scores[0], zero_scores[1],
6945 mask_skipped[0], mask_skipped[1],
6946 score_skipped[0], score_skipped[1]);
6947 #endif /* COUNT_RL_CANDIDATES */
6948
6949 ////////////////// this is the end of the PRINT LOOP
6950
6951 // now evaluate which bytes need adjusting
6952 uint64_t byte_msk = 0x3f; // 6-bit fields
6953 uint64_t best_byte, new_byte, temp_byte, orig_best_byte;
6954
6955 uint64_t rank_best_bytes[9]; // collect the new byte values; first init with current best for neighbor use
6956 for (int byte_idx = 0, byte_sh = 0; byte_idx < 8+ecc_ena; byte_idx++, byte_sh += 6) {
6957 rank_best_bytes[byte_idx] = (lmc_rlevel_rank.u >> byte_sh) & byte_msk;
6958 }
6959
6960 ////////////////// this is the start of the BEST BYTE LOOP
6961
6962 for (int byte_idx = 0, byte_sh = 0; byte_idx < 8+ecc_ena; byte_idx++, byte_sh += 6) {
6963 best_byte = orig_best_byte = rank_best_bytes[byte_idx];
6964
6965 ////////////////// this is the start of the BEST BYTE AVERAGING LOOP
6966
6967 // validate the initial "best" byte by looking at the average of the unskipped byte-column entries
6968 // we want to do this before we go further, so we can try to start with a better initial value
6969 // this is the so-called "BESTBUY" patch set
6970 int sum = 0, count = 0;
6971
6972 for (rtt_idx = min_rtt_nom_idx; rtt_idx <= max_rtt_nom_idx; ++rtt_idx) {
6973 rtt_nom = imp_values->rtt_nom_table[rtt_idx];
6974 if ((dyn_rtt_nom_mask == 0) && (rtt_idx != min_rtt_nom_idx))
6975 continue;
6976
6977 for (rodt_ctl = max_rodt_ctl; rodt_ctl >= min_rodt_ctl; --rodt_ctl) {
6978 bdk_lmcx_rlevel_rankx_t temp_rlevel_rank;
6979 int temp_score;
6980 for (int orankx = 0; orankx < dimm_count * 4; orankx++) { // average over all the ranks
6981 if (!(dimm_rank_mask & (1 << orankx))) // stay on the same DIMM
6982 continue;
6983 temp_score = rlevel_scoreboard[rtt_nom][rodt_ctl][orankx].score;
6984 // skip RODT rows in mask, or rows with too high a score;
6985 // we will not use them for printing or evaluating...
6986
6987 if (!((1 << rodt_ctl) & rodt_row_skip_mask) &&
6988 (temp_score <= MAX_RANK_SCORE))
6989 {
6990 temp_rlevel_rank.u = rlevel_scoreboard[rtt_nom][rodt_ctl][orankx].setting;
6991 temp_byte = (temp_rlevel_rank.u >> byte_sh) & byte_msk;
6992 sum += temp_byte;
6993 count++;
6994 }
6995 } /* for (int orankx = 0; orankx < dimm_count * 4; orankx++) */
6996 } /* for (rodt_ctl = max_rodt_ctl; rodt_ctl >= min_rodt_ctl; --rodt_ctl) */
6997 } /* for (rtt_idx=min_rtt_nom_idx; rtt_idx<=max_rtt_nom_idx; ++rtt_idx) */
6998
6999 ////////////////// this is the end of the BEST BYTE AVERAGING LOOP
7000
7001
7002 uint64_t avg_byte = divide_nint(sum, count); // FIXME: validate count and sum??
7003 int avg_diff = (int)best_byte - (int)avg_byte;
7004 new_byte = best_byte;
7005 if (avg_diff != 0) {
7006 // bump best up/dn by 1, not necessarily all the way to avg
7007 new_byte = best_byte + ((avg_diff > 0) ? -1: 1);
7008 }
7009
7010 VB_PRT(VBL_DEV, "N%d.LMC%d.R%d: START: Byte %d: best %d is different by %d from average %d, using %d.\n",
7011 node, ddr_interface_num, rankx,
7012 byte_idx, (int)best_byte, avg_diff, (int)avg_byte, (int)new_byte);
7013 best_byte = new_byte;
7014
7015 // At this point best_byte is either:
7016 // 1. the original byte-column value from the best scoring RODT row, OR
7017 // 2. that value bumped toward the average of all the byte-column values
7018 //
7019 // best_byte will not change from here on...
7020
7021 ////////////////// this is the start of the BEST BYTE COUNTING LOOP
7022
7023 // NOTE: we do this next loop separately from above, because we count relative to "best_byte"
7024 // which may have been modified by the above averaging operation...
7025 //
7026 // Also, the above only moves toward the average by +- 1, so that we will always have a count
7027 // of at least 1 for the original best byte, even if all the others are further away and not counted;
7028 // this ensures we will go back to the original if no others are counted...
7029 // FIXME: this could cause issue if the range of values for a byte-lane are too disparate...
7030 int count_less = 0, count_same = 0, count_more = 0;
7031 #if FAILSAFE_CHECK
7032 uint64_t count_byte = new_byte; // save the value we will count around
7033 #endif /* FAILSAFE_CHECK */
7034 #if RANK_MAJORITY
7035 int rank_less = 0, rank_same = 0, rank_more = 0;
7036 #endif /* RANK_MAJORITY */
7037
7038 for (rtt_idx = min_rtt_nom_idx; rtt_idx <= max_rtt_nom_idx; ++rtt_idx) {
7039 rtt_nom = imp_values->rtt_nom_table[rtt_idx];
7040 if ((dyn_rtt_nom_mask == 0) && (rtt_idx != min_rtt_nom_idx))
7041 continue;
7042
7043 for (rodt_ctl = max_rodt_ctl; rodt_ctl >= min_rodt_ctl; --rodt_ctl) {
7044 bdk_lmcx_rlevel_rankx_t temp_rlevel_rank;
7045 int temp_score;
7046 for (int orankx = 0; orankx < dimm_count * 4; orankx++) { // count over all the ranks
7047 if (!(dimm_rank_mask & (1 << orankx))) // stay on the same DIMM
7048 continue;
7049 temp_score = rlevel_scoreboard[rtt_nom][rodt_ctl][orankx].score;
7050 // skip RODT rows in mask, or rows with too high a score;
7051 // we will not use them for printing or evaluating...
7052 if (((1 << rodt_ctl) & rodt_row_skip_mask) ||
7053 (temp_score > MAX_RANK_SCORE))
7054 {
7055 continue;
7056 }
7057 temp_rlevel_rank.u = rlevel_scoreboard[rtt_nom][rodt_ctl][orankx].setting;
7058 temp_byte = (temp_rlevel_rank.u >> byte_sh) & byte_msk;
7059
7060 if (temp_byte == 0) // do not count it if illegal
7061 continue;
7062 else if (temp_byte == best_byte)
7063 count_same++;
7064 else if (temp_byte == best_byte - 1)
7065 count_less++;
7066 else if (temp_byte == best_byte + 1)
7067 count_more++;
7068 // else do not count anything more than 1 away from the best
7069 #if RANK_MAJORITY
7070 // FIXME? count is relative to best_byte; should it be rank-based?
7071 if (orankx != rankx) // rank counts only on main rank
7072 continue;
7073 else if (temp_byte == best_byte)
7074 rank_same++;
7075 else if (temp_byte == best_byte - 1)
7076 rank_less++;
7077 else if (temp_byte == best_byte + 1)
7078 rank_more++;
7079 #endif /* RANK_MAJORITY */
7080 } /* for (int orankx = 0; orankx < dimm_count * 4; orankx++) */
7081 } /* for (rodt_ctl = max_rodt_ctl; rodt_ctl >= min_rodt_ctl; --rodt_ctl) */
7082 } /* for (rtt_idx=min_rtt_nom_idx; rtt_idx<=max_rtt_nom_idx; ++rtt_idx) */
7083
7084 #if RANK_MAJORITY
7085 VB_PRT(VBL_DEV, "N%d.LMC%d.R%d: COUNT: Byte %d: orig %d now %d, more %d same %d less %d (%d/%d/%d)\n",
7086 node, ddr_interface_num, rankx,
7087 byte_idx, (int)orig_best_byte, (int)best_byte,
7088 count_more, count_same, count_less,
7089 rank_more, rank_same, rank_less);
7090 #else /* RANK_MAJORITY */
7091 VB_PRT(VBL_DEV, "N%d.LMC%d.R%d: COUNT: Byte %d: orig %d now %d, more %d same %d less %d\n",
7092 node, ddr_interface_num, rankx,
7093 byte_idx, (int)orig_best_byte, (int)best_byte,
7094 count_more, count_same, count_less);
7095 #endif /* RANK_MAJORITY */
7096 ////////////////// this is the end of the BEST BYTE COUNTING LOOP
7097
7098 // choose the new byte value
7099 // we need to check that there is no gap greater than 2 between adjacent bytes
7100 // (adjacency depends on DIMM type)
7101 // use the neighbor value to help decide
7102 // initially, the rank_best_bytes[] will contain values from the chosen lowest score rank
7103 new_byte = 0;
7104
7105 // neighbor is index-1 unless we are index 0 or index 8 (ECC)
7106 int neighbor = (byte_idx == 8) ? 3 : ((byte_idx == 0) ? 1 : byte_idx - 1);
7107 uint64_t neigh_byte = rank_best_bytes[neighbor];
7108
7109
7110 // can go up or down or stay the same, so look at a numeric average to help
7111 new_byte = divide_nint(((count_more * (best_byte + 1)) +
7112 (count_same * (best_byte + 0)) +
7113 (count_less * (best_byte - 1))),
7114 max(1, (count_more + count_same + count_less)));
7115
7116 // use neighbor to help choose with average
7117 if ((byte_idx > 0) && (_abs(neigh_byte - new_byte) > 2)) // but not for byte 0
7118 {
7119 uint64_t avg_pick = new_byte;
7120 if ((new_byte - best_byte) != 0)
7121 new_byte = best_byte; // back to best, average did not get better
7122 else // avg was the same, still too far, now move it towards the neighbor
7123 new_byte += (neigh_byte > new_byte) ? 1 : -1;
7124
7125 VB_PRT(VBL_DEV, "N%d.LMC%d.R%d: AVERAGE: Byte %d: neighbor %d too different %d from average %d, picking %d.\n",
7126 node, ddr_interface_num, rankx,
7127 byte_idx, neighbor, (int)neigh_byte, (int)avg_pick, (int)new_byte);
7128 }
7129 #if MAJORITY_OVER_AVG
7130 // NOTE:
7131 // For now, we let the neighbor processing above trump the new simple majority processing here.
7132 // This is mostly because we have seen no smoking gun for a neighbor bad choice (yet?).
7133 // Also note that we will ALWAYS be using byte 0 majority, because of the if clause above.
7134 else {
7135 // majority is dependent on the counts, which are relative to best_byte, so start there
7136 uint64_t maj_byte = best_byte;
7137 if ((count_more > count_same) && (count_more > count_less)) {
7138 maj_byte++;
7139 } else if ((count_less > count_same) && (count_less > count_more)) {
7140 maj_byte--;
7141 }
7142 if (maj_byte != new_byte) {
7143 // print only when majority choice is different from average
7144 VB_PRT(VBL_DEV, "N%d.LMC%d.R%d: MAJORTY: Byte %d: picking majority of %d over average %d.\n",
7145 node, ddr_interface_num, rankx,
7146 byte_idx, (int)maj_byte, (int)new_byte);
7147 new_byte = maj_byte;
7148 } else {
7149 VB_PRT(VBL_DEV, "N%d.LMC%d.R%d: AVERAGE: Byte %d: picking average of %d.\n",
7150 node, ddr_interface_num, rankx,
7151 byte_idx, (int)new_byte);
7152 }
7153 #if RANK_MAJORITY
7154 // rank majority is dependent on the rank counts, which are relative to best_byte,
7155 // so start there, and adjust according to the rank counts majority
7156 uint64_t rank_maj = best_byte;
7157 if ((rank_more > rank_same) && (rank_more > rank_less)) {
7158 rank_maj++;
7159 } else if ((rank_less > rank_same) && (rank_less > rank_more)) {
7160 rank_maj--;
7161 }
7162 int rank_sum = rank_more + rank_same + rank_less;
7163
7164 // now, let rank majority possibly rule over the current new_byte however we got it
7165 if (rank_maj != new_byte) { // only if different
7166 // Here is where we decide whether to completely apply RANK_MAJORITY or not
7167 // FIXME: For the moment, we do it ONLY when running 2-slot configs
7168 // FIXME? or when rank_sum is big enough?
7169 if ((dimm_count > 1) || (rank_sum > 2)) {
7170 // print only when rank majority choice is selected
7171 VB_PRT(VBL_DEV, "N%d.LMC%d.R%d: RANKMAJ: Byte %d: picking %d over %d.\n",
7172 node, ddr_interface_num, rankx,
7173 byte_idx, (int)rank_maj, (int)new_byte);
7174 new_byte = rank_maj;
7175 } else { // FIXME: print some info when we could have chosen RANKMAJ but did not
7176 VB_PRT(VBL_DEV, "N%d.LMC%d.R%d: RANKMAJ: Byte %d: NOT using %d over %d (best=%d,sum=%d).\n",
7177 node, ddr_interface_num, rankx,
7178 byte_idx, (int)rank_maj, (int)new_byte,
7179 (int)best_byte, rank_sum);
7180 }
7181 }
7182 #endif /* RANK_MAJORITY */
7183 }
7184 #else
7185 else {
7186 VB_PRT(VBL_DEV, "N%d.LMC%d.R%d: AVERAGE: Byte %d: picking average of %d.\n",
7187 node, ddr_interface_num, rankx,
7188 byte_idx, (int)new_byte);
7189 }
7190 #endif
7191 #if FAILSAFE_CHECK
7192 // one last check:
7193 // if new_byte is still count_byte, BUT there was no count for that value, DO SOMETHING!!!
7194 // FIXME: go back to original best byte from the best row
7195 if ((new_byte == count_byte) && (count_same == 0)) {
7196 new_byte = orig_best_byte;
7197 VB_PRT(VBL_DEV, "N%d.LMC%d.R%d: FAILSAF: Byte %d: going back to original %d.\n",
7198 node, ddr_interface_num, rankx,
7199 byte_idx, (int)new_byte);
7200 }
7201 #endif /* FAILSAFE_CHECK */
7202 #if PERFECT_BITMASK_COUNTING
7203 // Look at counts for "perfect" bitmasks if we had any for this byte-lane.
7204 // Remember, we only counted for DDR4, so zero means none or DDR3, and we bypass this...
7205 if (rank_perfect_counts[rankx].total[byte_idx] > 0) {
7206 // FIXME: should be more error checking, look for ties, etc...
7207 /* FIXME(dhendrix): i shadows another local variable, changed to _i in this block */
7208 // int i, delay_count, delay_value, delay_max;
7209 int _i, delay_count, delay_value, delay_max;
7210 uint32_t ties;
7211 delay_value = -1;
7212 delay_max = 0;
7213 ties = 0;
7214
7215 for (_i = 0; _i < 32; _i++) {
7216 delay_count = rank_perfect_counts[rankx].count[byte_idx][_i];
7217 if (delay_count > 0) { // only look closer if there are any,,,
7218 if (delay_count > delay_max) {
7219 delay_max = delay_count;
7220 delay_value = _i;
7221 ties = 0; // reset ties to none
7222 } else if (delay_count == delay_max) {
7223 if (ties == 0)
7224 ties = 1UL << delay_value; // put in original value
7225 ties |= 1UL << _i; // add new value
7226 }
7227 }
7228 } /* for (_i = 0; _i < 32; _i++) */
7229
7230 if (delay_value >= 0) {
7231 if (ties != 0) {
7232 if (ties & (1UL << (int)new_byte)) {
7233 // leave choice as new_byte if any tied one is the same...
7234
7235
7236 delay_value = (int)new_byte;
7237 VB_PRT(VBL_DEV, "N%d.LMC%d.R%d: PERFECT: Byte %d: TIES (0x%x) INCLUDED %d (%d)\n",
7238 node, ddr_interface_num, rankx, byte_idx, ties, (int)new_byte, delay_max);
7239 } else {
7240 // FIXME: should choose a perfect one!!!
7241 // FIXME: for now, leave the choice as new_byte
7242 delay_value = (int)new_byte;
7243 VB_PRT(VBL_DEV, "N%d.LMC%d.R%d: PERFECT: Byte %d: TIES (0x%x) OMITTED %d (%d)\n",
7244 node, ddr_interface_num, rankx, byte_idx, ties, (int)new_byte, delay_max);
7245 }
7246 } /* if (ties != 0) */
7247
7248 if (delay_value != (int)new_byte) {
7249 delay_count = rank_perfect_counts[rankx].count[byte_idx][(int)new_byte];
7250 VB_PRT(VBL_DEV, "N%d.LMC%d.R%d: PERFECT: Byte %d: DIFF from %d (%d), USING %d (%d)\n",
7251 node, ddr_interface_num, rankx, byte_idx, (int)new_byte,
7252 delay_count, delay_value, delay_max);
7253 new_byte = (uint64_t)delay_value; // FIXME: make this optional via envvar?
7254 } else {
7255 debug_print("N%d.LMC%d.R%d: PERFECT: Byte %d: SAME as %d (%d)\n",
7256 node, ddr_interface_num, rankx, byte_idx, new_byte, delay_max);
7257 }
7258 }
7259 } /* if (rank_perfect_counts[rankx].total[byte_idx] > 0) */
7260 else {
7261 if (ddr_type == DDR4_DRAM) { // only report when DDR4
7262 // FIXME: remove or increase VBL for this output...
7263 VB_PRT(VBL_DEV, "N%d.LMC%d.R%d: PERFECT: Byte %d: ZERO perfect bitmasks\n",
7264 node, ddr_interface_num, rankx, byte_idx);
7265 }
7266 } /* if (rank_perfect_counts[rankx].total[byte_idx] > 0) */
7267 #endif /* PERFECT_BITMASK_COUNTING */
7268
7269 VB_PRT(VBL_DEV, "N%d.LMC%d.R%d: SUMMARY: Byte %d: %s: orig %d now %d, more %d same %d less %d, using %d\n",
7270 node, ddr_interface_num, rankx,
7271 byte_idx, "AVG", (int)orig_best_byte,
7272 (int)best_byte, count_more, count_same, count_less, (int)new_byte);
7273
7274 // update the byte with the new value (NOTE: orig value in the CSR may not be current "best")
7275 lmc_rlevel_rank.u &= ~(byte_msk << byte_sh);
7276 lmc_rlevel_rank.u |= (new_byte << byte_sh);
7277
7278 rank_best_bytes[byte_idx] = new_byte; // save new best for neighbor use
7279
7280 } /* for (byte_idx = 0; byte_idx < 8+ecc_ena; byte_idx++) */
7281
7282 ////////////////// this is the end of the BEST BYTE LOOP
7283
7284 if (saved_rlevel_rank.u != lmc_rlevel_rank.u) {
7285 DRAM_CSR_WRITE(node, BDK_LMCX_RLEVEL_RANKX(ddr_interface_num, rankx), lmc_rlevel_rank.u);
7286 lmc_rlevel_rank.u = BDK_CSR_READ(node, BDK_LMCX_RLEVEL_RANKX(ddr_interface_num, rankx));
7287 debug_print("Adjusting Read-Leveling per-RANK settings.\n");
7288 } else {
7289 debug_print("Not Adjusting Read-Leveling per-RANK settings.\n");
7290 }
7291 display_RL_with_final(node, ddr_interface_num, lmc_rlevel_rank, rankx);
7292
7293 #if RLEXTRAS_PATCH
7294 #define RLEVEL_RANKX_EXTRAS_INCR 4
7295 if ((rank_mask & 0x0F) != 0x0F) { // if there are unused entries to be filled
7296 bdk_lmcx_rlevel_rankx_t temp_rlevel_rank = lmc_rlevel_rank; // copy the current rank
7297 int byte, delay;
7298 if (rankx < 3) {
7299 debug_print("N%d.LMC%d.R%d: checking for RLEVEL_RANK unused entries.\n",
7300 node, ddr_interface_num, rankx);
7301 for (byte = 0; byte < 9; byte++) { // modify the copy in prep for writing to empty slot(s)
7302 delay = get_rlevel_rank_struct(&temp_rlevel_rank, byte) + RLEVEL_RANKX_EXTRAS_INCR;
7303 if (delay > (int)RLEVEL_BYTE_MSK) delay = RLEVEL_BYTE_MSK;
7304 update_rlevel_rank_struct(&temp_rlevel_rank, byte, delay);
7305 }
7306 if (rankx == 0) { // if rank 0, write rank 1 and rank 2 here if empty
7307 if (!(rank_mask & (1<<1))) { // check that rank 1 is empty
7308 VB_PRT(VBL_DEV, "N%d.LMC%d.R%d: writing RLEVEL_RANK unused entry R%d.\n",
7309 node, ddr_interface_num, rankx, 1);
7310 DRAM_CSR_WRITE(node, BDK_LMCX_RLEVEL_RANKX(ddr_interface_num, 1), temp_rlevel_rank.u);
7311 }
7312 if (!(rank_mask & (1<<2))) { // check that rank 2 is empty
7313 VB_PRT(VBL_DEV, "N%d.LMC%d.R%d: writing RLEVEL_RANK unused entry R%d.\n",
7314 node, ddr_interface_num, rankx, 2);
7315 DRAM_CSR_WRITE(node, BDK_LMCX_RLEVEL_RANKX(ddr_interface_num, 2), temp_rlevel_rank.u);
7316 }
7317 }
7318 // if ranks 0, 1 or 2, write rank 3 here if empty
7319 if (!(rank_mask & (1<<3))) { // check that rank 3 is empty
7320 VB_PRT(VBL_DEV, "N%d.LMC%d.R%d: writing RLEVEL_RANK unused entry R%d.\n",
7321 node, ddr_interface_num, rankx, 3);
7322 DRAM_CSR_WRITE(node, BDK_LMCX_RLEVEL_RANKX(ddr_interface_num, 3), temp_rlevel_rank.u);
7323 }
7324 }
7325 }
7326 #endif /* RLEXTRAS_PATCH */
7327 } /* for (rankx = 0; rankx < dimm_count * 4; rankx++) */
7328
7329 ////////////////// this is the end of the RANK MAJOR LOOP
7330
7331 } /* Evaluation block */
7332 } /* while(rlevel_debug_loops--) */
7333
7334 lmc_control.s.ddr2t = save_ddr2t;
7335 DRAM_CSR_WRITE(node, BDK_LMCX_CONTROL(ddr_interface_num), lmc_control.u);
7336 lmc_control.u = BDK_CSR_READ(node, BDK_LMCX_CONTROL(ddr_interface_num));
7337 ddr_print("%-45s : %6d\n", "DDR2T", lmc_control.s.ddr2t); /* Display final 2T value */
7338
7339
7340 perform_ddr_init_sequence(node, rank_mask, ddr_interface_num);
7341
7342 for (rankx = 0; rankx < dimm_count * 4;rankx++) {
7343 uint64_t value;
7344 int parameter_set = 0;
7345 if (!(rank_mask & (1 << rankx)))
7346 continue;
7347
7348 lmc_rlevel_rank.u = BDK_CSR_READ(node, BDK_LMCX_RLEVEL_RANKX(ddr_interface_num, rankx));
7349
7350 for (i=0; i<9; ++i) {
7351 if ((s = lookup_env_parameter("ddr%d_rlevel_rank%d_byte%d", ddr_interface_num, rankx, i)) != NULL) {
7352 parameter_set |= 1;
7353 value = strtoul(s, NULL, 0);
7354
7355 update_rlevel_rank_struct(&lmc_rlevel_rank, i, value);
7356 }
7357 }
7358
7359 if ((s = lookup_env_parameter_ull("ddr%d_rlevel_rank%d", ddr_interface_num, rankx)) != NULL) {
7360 parameter_set |= 1;
7361 value = strtoull(s, NULL, 0);
7362 lmc_rlevel_rank.u = value;
7363 }
7364
7365 if (parameter_set) {
7366 DRAM_CSR_WRITE(node, BDK_LMCX_RLEVEL_RANKX(ddr_interface_num, rankx), lmc_rlevel_rank.u);
7367 display_RL(node, ddr_interface_num, lmc_rlevel_rank, rankx);
7368 }
7369 }
7370 }
7371
7372 /* Workaround Trcd overflow by using Additive latency. */
7373 if (CAVIUM_IS_MODEL(CAVIUM_CN88XX_PASS1_X))
7374 {
7375 bdk_lmcx_modereg_params0_t lmc_modereg_params0;
7376 bdk_lmcx_timing_params1_t lmc_timing_params1;
7377 bdk_lmcx_control_t lmc_control;
7378 int rankx;
7379
7380 lmc_timing_params1.u = BDK_CSR_READ(node, BDK_LMCX_TIMING_PARAMS1(ddr_interface_num));
7381 lmc_modereg_params0.u = BDK_CSR_READ(node, BDK_LMCX_MODEREG_PARAMS0(ddr_interface_num));
7382 lmc_control.u = BDK_CSR_READ(node, BDK_LMCX_CONTROL(ddr_interface_num));
7383
7384 if (lmc_timing_params1.s.trcd == 0) {
7385 ddr_print("Workaround Trcd overflow by using Additive latency.\n");
7386 lmc_timing_params1.s.trcd = 12; /* Hard code this to 12 and enable additive latency */
7387 lmc_modereg_params0.s.al = 2; /* CL-2 */
7388 lmc_control.s.pocas = 1;
7389
7390 ddr_print("MODEREG_PARAMS0 : 0x%016llx\n", lmc_modereg_params0.u);
7391 DRAM_CSR_WRITE(node, BDK_LMCX_MODEREG_PARAMS0(ddr_interface_num), lmc_modereg_params0.u);
7392 ddr_print("TIMING_PARAMS1 : 0x%016llx\n", lmc_timing_params1.u);
7393 DRAM_CSR_WRITE(node, BDK_LMCX_TIMING_PARAMS1(ddr_interface_num), lmc_timing_params1.u);
7394
7395 ddr_print("LMC_CONTROL : 0x%016llx\n", lmc_control.u);
7396 DRAM_CSR_WRITE(node, BDK_LMCX_CONTROL(ddr_interface_num), lmc_control.u);
7397
7398 for (rankx = 0; rankx < dimm_count * 4; rankx++) {
7399 if (!(rank_mask & (1 << rankx)))
7400 continue;
7401
7402 ddr4_mrw(node, ddr_interface_num, rankx, -1, 1, 0); /* MR1 */
7403 }
7404 }
7405 }
7406
7407 // this is here just for output, to allow check of the Deskew settings one last time...
7408 if (! disable_deskew_training) {
7409 deskew_counts_t dsk_counts;
7410 VB_PRT(VBL_TME, "N%d.LMC%d: Check Deskew Settings before software Write-Leveling.\n",
7411 node, ddr_interface_num);
7412 Validate_Read_Deskew_Training(node, rank_mask, ddr_interface_num, &dsk_counts, VBL_TME); // TME for FINAL
7413 }
7414
7415
7416 /* Workaround Errata 26304 ([email protected])
7417
7418 When the CSRs LMCX_DLL_CTL3[WR_DESKEW_ENA] = 1 AND
7419 LMCX_PHY_CTL2[DQS[0..8]_DSK_ADJ] > 4, set
7420 LMCX_EXT_CONFIG[DRIVE_ENA_BPRCH] = 1.
7421 */
7422 if (CAVIUM_IS_MODEL(CAVIUM_CN88XX_PASS2_X)) { // only for 88XX pass 2, not 81xx or 83xx
7423 bdk_lmcx_dll_ctl3_t dll_ctl3;
7424 bdk_lmcx_phy_ctl2_t phy_ctl2;
7425 bdk_lmcx_ext_config_t ext_config;
7426 int increased_dsk_adj = 0;
7427 int byte;
7428
7429 phy_ctl2.u = BDK_CSR_READ(node, BDK_LMCX_PHY_CTL2(ddr_interface_num));
7430 ext_config.u = BDK_CSR_READ(node, BDK_LMCX_EXT_CONFIG(ddr_interface_num));
7431 dll_ctl3.u = BDK_CSR_READ(node, BDK_LMCX_DLL_CTL3(ddr_interface_num));
7432
7433 for (byte = 0; byte < 8; ++byte) {
7434 if (!(ddr_interface_bytemask&(1<<byte)))
7435 continue;
7436 increased_dsk_adj |= (((phy_ctl2.u >> (byte*3)) & 0x7) > 4);
7437 }
7438
7439 if ((dll_ctl3.s.wr_deskew_ena == 1) && increased_dsk_adj) {
7440 ext_config.s.drive_ena_bprch = 1;
7441 DRAM_CSR_WRITE(node, BDK_LMCX_EXT_CONFIG(ddr_interface_num),
7442 ext_config.u);
7443 }
7444 }
7445
7446 /*
7447 * 6.9.13 DRAM Vref Training for DDR4
7448 *
7449 * This includes software write-leveling
7450 */
7451
7452 { // Software Write-Leveling block
7453
7454 /* Try to determine/optimize write-level delays experimentally. */
7455 #pragma pack(push,1)
7456 bdk_lmcx_wlevel_rankx_t lmc_wlevel_rank;
7457 bdk_lmcx_wlevel_rankx_t lmc_wlevel_rank_hw_results;
7458 int byte;
7459 int delay;
7460 int rankx = 0;
7461 int active_rank;
7462 #if !DISABLE_SW_WL_PASS_2
7463 bdk_lmcx_rlevel_rankx_t lmc_rlevel_rank;
7464 int sw_wlevel_offset = 1;
7465 #endif
7466 int sw_wlevel_enable = 1; /* FIX... Should be customizable. */
7467 int interfaces;
7468 int measured_vref_flag;
7469 typedef enum {
7470 WL_ESTIMATED = 0, /* HW/SW wleveling failed. Results
7471 estimated. */
7472 WL_HARDWARE = 1, /* H/W wleveling succeeded */
7473 WL_SOFTWARE = 2, /* S/W wleveling passed 2 contiguous
7474 settings. */
7475 WL_SOFTWARE1 = 3, /* S/W wleveling passed 1 marginal
7476 setting. */
7477 } sw_wl_status_t;
7478
7479 static const char *wl_status_strings[] = {
7480 "(e)",
7481 " ",
7482 " ",
7483 "(1)"
7484 };
7485 int sw_wlevel_hw_default = 1; // FIXME: make H/W assist the default now
7486 #pragma pack(pop)
7487
7488 if ((s = lookup_env_parameter("ddr_sw_wlevel_hw")) != NULL) {
7489 sw_wlevel_hw_default = !!strtoul(s, NULL, 0);
7490 }
7491
7492 // cannot use hw-assist when doing 32-bit
7493 if (! ddr_interface_64b) {
7494 sw_wlevel_hw_default = 0;
7495 }
7496
7497 if ((s = lookup_env_parameter("ddr_software_wlevel")) != NULL) {
7498 sw_wlevel_enable = strtoul(s, NULL, 0);
7499 }
7500
7501 #if SWL_WITH_HW_ALTS_CHOOSE_SW
7502 // Choose the SW algo for SWL if any HWL alternates were found
7503 // NOTE: we have to do this here, and for all, since HW-assist including ECC requires ECC enable
7504 for (rankx = 0; rankx < dimm_count * 4; rankx++) {
7505 if (!sw_wlevel_enable)
7506 break;
7507 if (!(rank_mask & (1 << rankx)))
7508 continue;
7509
7510 // if we are doing HW-assist, and there are alternates, switch to SW-algorithm for all
7511 if (sw_wlevel_hw && hwl_alts[rankx].hwl_alt_mask) {
7512 ddr_print("N%d.LMC%d.R%d: Using SW algorithm for write-leveling this rank\n",
7513 node, ddr_interface_num, rankx);
7514 sw_wlevel_hw_default = 0;
7515 break;
7516 }
7517 } /* for (rankx = 0; rankx < dimm_count * 4; rankx++) */
7518 #endif
7519
7520 /* Get the measured_vref setting from the config, check for an override... */
7521 /* NOTE: measured_vref=1 (ON) means force use of MEASURED Vref... */
7522 // NOTE: measured VREF can only be done for DDR4
7523 if (ddr_type == DDR4_DRAM) {
7524 measured_vref_flag = custom_lmc_config->measured_vref;
7525 if ((s = lookup_env_parameter("ddr_measured_vref")) != NULL) {
7526 measured_vref_flag = !!strtoul(s, NULL, 0);
7527 }
7528 } else {
7529 measured_vref_flag = 0; // OFF for DDR3
7530 }
7531
7532 /* Ensure disabled ECC for DRAM tests using the SW algo, else leave it untouched */
7533 if (!sw_wlevel_hw_default) {
7534 lmc_config.u = BDK_CSR_READ(node, BDK_LMCX_CONFIG(ddr_interface_num));
7535 lmc_config.s.ecc_ena = 0;
7536 DRAM_CSR_WRITE(node, BDK_LMCX_CONFIG(ddr_interface_num), lmc_config.u);
7537 }
7538
7539 #if USE_L2_WAYS_LIMIT
7540 limit_l2_ways(node, 0, 0); /* Disable l2 sets for DRAM testing */
7541 #endif
7542
7543 /* We need to track absolute rank number, as well as how many
7544 ** active ranks we have. Two single rank DIMMs show up as
7545 ** ranks 0 and 2, but only 2 ranks are active. */
7546 active_rank = 0;
7547
7548 interfaces = bdk_pop(ddr_interface_mask);
7549
7550 #define VREF_RANGE1_LIMIT 0x33 // range1 is valid for 0x00 - 0x32
7551 #define VREF_RANGE2_LIMIT 0x18 // range2 is valid for 0x00 - 0x17
7552 // full window is valid for 0x00 to 0x4A
7553 // let 0x00 - 0x17 be range2, 0x18 - 0x4a be range 1
7554 #define VREF_LIMIT (VREF_RANGE1_LIMIT + VREF_RANGE2_LIMIT)
7555 #define VREF_FINAL (VREF_LIMIT - 1)
7556
7557 for (rankx = 0; rankx < dimm_count * 4; rankx++) {
7558 uint64_t rank_addr;
7559 int vref_value, final_vref_value, final_vref_range = 0;
7560 int start_vref_value = 0, computed_final_vref_value = -1;
7561 char best_vref_values_count, vref_values_count;
7562 char best_vref_values_start, vref_values_start;
7563
7564 int bytes_failed;
7565 sw_wl_status_t byte_test_status[9];
7566 sw_wl_status_t sw_wl_rank_status = WL_HARDWARE;
7567 int sw_wl_failed = 0;
7568 int sw_wlevel_hw = sw_wlevel_hw_default;
7569
7570 if (!sw_wlevel_enable)
7571 break;
7572
7573 if (!(rank_mask & (1 << rankx)))
7574 continue;
7575
7576 ddr_print("N%d.LMC%d.R%d: Performing Software Write-Leveling %s\n",
7577 node, ddr_interface_num, rankx,
7578 (sw_wlevel_hw) ? "with H/W assist" : "with S/W algorithm");
7579
7580 if ((ddr_type == DDR4_DRAM) && (num_ranks != 4)) {
7581 // always compute when we can...
7582 computed_final_vref_value = compute_vref_value(node, ddr_interface_num, rankx,
7583 dimm_count, num_ranks, imp_values,
7584 is_stacked_die);
7585 if (!measured_vref_flag) // but only use it if allowed
7586 start_vref_value = VREF_FINAL; // skip all the measured Vref processing, just the final setting
7587 }
7588
7589 /* Save off the h/w wl results */
7590 lmc_wlevel_rank_hw_results.u = BDK_CSR_READ(node, BDK_LMCX_WLEVEL_RANKX(ddr_interface_num, rankx));
7591
7592 vref_values_count = 0;
7593 vref_values_start = 0;
7594 best_vref_values_count = 0;
7595 best_vref_values_start = 0;
7596
7597 /* Loop one extra time using the Final Vref value. */
7598 for (vref_value = start_vref_value; vref_value < VREF_LIMIT; ++vref_value) {
7599 if (ddr_type == DDR4_DRAM) {
7600 if (vref_value < VREF_FINAL) {
7601 int vrange, vvalue;
7602 if (vref_value < VREF_RANGE2_LIMIT) {
7603 vrange = 1; vvalue = vref_value;
7604 } else {
7605 vrange = 0; vvalue = vref_value - VREF_RANGE2_LIMIT;
7606 }
7607 set_vref(node, ddr_interface_num, rankx,
7608 vrange, vvalue);
7609 } else { /* if (vref_value < VREF_FINAL) */
7610 /* Print the final Vref value first. */
7611
7612 /* Always print the computed first if its valid */
7613 if (computed_final_vref_value >= 0) {
7614 ddr_print("N%d.LMC%d.R%d: Vref Computed Summary :"
7615 " %2d (0x%02x)\n",
7616 node, ddr_interface_num,
7617 rankx, computed_final_vref_value,
7618 computed_final_vref_value);
7619 }
7620 if (!measured_vref_flag) { // setup to use the computed
7621 best_vref_values_count = 1;
7622 final_vref_value = computed_final_vref_value;
7623 } else { // setup to use the measured
7624 if (best_vref_values_count > 0) {
7625 best_vref_values_count = max(best_vref_values_count, 2);
7626 #if 0
7627 // NOTE: this already adjusts VREF via calculating 40% rather than 50%
7628 final_vref_value = best_vref_values_start + divide_roundup((best_vref_values_count-1)*4,10);
7629 ddr_print("N%d.LMC%d.R%d: Vref Training Summary :"
7630 " %2d <----- %2d (0x%02x) -----> %2d range: %2d\n",
7631 node, ddr_interface_num, rankx, best_vref_values_start,
7632 final_vref_value, final_vref_value,
7633 best_vref_values_start+best_vref_values_count-1,
7634 best_vref_values_count-1);
7635 #else
7636 final_vref_value = best_vref_values_start + divide_nint(best_vref_values_count - 1, 2);
7637 if (final_vref_value < VREF_RANGE2_LIMIT) {
7638 final_vref_range = 1;
7639 } else {
7640 final_vref_range = 0; final_vref_value -= VREF_RANGE2_LIMIT;
7641 }
7642 {
7643 int vvlo = best_vref_values_start;
7644 int vrlo;
7645 if (vvlo < VREF_RANGE2_LIMIT) {
7646 vrlo = 2;
7647 } else {
7648 vrlo = 1; vvlo -= VREF_RANGE2_LIMIT;
7649 }
7650
7651 int vvhi = best_vref_values_start + best_vref_values_count - 1;
7652 int vrhi;
7653 if (vvhi < VREF_RANGE2_LIMIT) {
7654 vrhi = 2;
7655 } else {
7656 vrhi = 1; vvhi -= VREF_RANGE2_LIMIT;
7657 }
7658 ddr_print("N%d.LMC%d.R%d: Vref Training Summary :"
7659 " 0x%02x/%1d <----- 0x%02x/%1d -----> 0x%02x/%1d, range: %2d\n",
7660 node, ddr_interface_num, rankx,
7661 vvlo, vrlo,
7662 final_vref_value, final_vref_range + 1,
7663 vvhi, vrhi,
7664 best_vref_values_count-1);
7665 }
7666 #endif
7667
7668 } else {
7669 /* If nothing passed use the default Vref value for this rank */
7670 bdk_lmcx_modereg_params2_t lmc_modereg_params2;
7671 lmc_modereg_params2.u = BDK_CSR_READ(node, BDK_LMCX_MODEREG_PARAMS2(ddr_interface_num));
7672 final_vref_value = (lmc_modereg_params2.u >> (rankx * 10 + 3)) & 0x3f;
7673 final_vref_range = (lmc_modereg_params2.u >> (rankx * 10 + 9)) & 0x01;
7674
7675 ddr_print("N%d.LMC%d.R%d: Vref Using Default :"
7676 " %2d <----- %2d (0x%02x) -----> %2d, range%1d\n",
7677 node, ddr_interface_num, rankx,
7678 final_vref_value, final_vref_value,
7679 final_vref_value, final_vref_value, final_vref_range+1);
7680 }
7681 }
7682
7683 // allow override
7684 if ((s = lookup_env_parameter("ddr%d_vref_value_%1d%1d",
7685 ddr_interface_num, !!(rankx&2), !!(rankx&1))) != NULL) {
7686 final_vref_value = strtoul(s, NULL, 0);
7687 }
7688
7689 set_vref(node, ddr_interface_num, rankx, final_vref_range, final_vref_value);
7690
7691 } /* if (vref_value < VREF_FINAL) */
7692 } /* if (ddr_type == DDR4_DRAM) */
7693
7694 lmc_wlevel_rank.u = lmc_wlevel_rank_hw_results.u; /* Restore the saved value */
7695
7696 for (byte = 0; byte < 9; ++byte)
7697 byte_test_status[byte] = WL_ESTIMATED;
7698
7699 if (wlevel_bitmask_errors == 0) {
7700
7701 /* Determine address of DRAM to test for pass 1 of software write leveling. */
7702 rank_addr = active_rank * (1ull << (pbank_lsb - bunk_enable + (interfaces/2)));
7703 // FIXME: these now put in by test_dram_byte()
7704 //rank_addr |= (ddr_interface_num<<7); /* Map address into proper interface */
7705 //rank_addr = bdk_numa_get_address(node, rank_addr);
7706 VB_PRT(VBL_DEV, "N%d.LMC%d.R%d: Active Rank %d Address: 0x%llx\n",
7707 node, ddr_interface_num, rankx, active_rank, rank_addr);
7708
7709 { // start parallel write-leveling block for delay high-order bits
7710 int errors = 0;
7711 int byte_delay[9];
7712 uint64_t bytemask;
7713 int bytes_todo;
7714
7715 if (ddr_interface_64b) {
7716 bytes_todo = (sw_wlevel_hw) ? ddr_interface_bytemask : 0xFF;
7717 bytemask = ~0ULL;
7718 } else { // 32-bit, must be using SW algo, only data bytes
7719 bytes_todo = 0x0f;
7720 bytemask = 0x00000000ffffffffULL;
7721 }
7722
7723 for (byte = 0; byte < 9; ++byte) {
7724 if (!(bytes_todo & (1 << byte))) {
7725 byte_delay[byte] = 0;
7726 } else {
7727 byte_delay[byte] = get_wlevel_rank_struct(&lmc_wlevel_rank, byte);
7728 }
7729 } /* for (byte = 0; byte < 9; ++byte) */
7730
7731 #define WL_MIN_NO_ERRORS_COUNT 3 // FIXME? three passes without errors
7732 int no_errors_count = 0;
7733
7734 // Change verbosity if using measured vs computed VREF or DDR3
7735 // measured goes many times through SWL, computed and DDR3 only once
7736 // so we want the EXHAUSTED messages at NORM for computed and DDR3,
7737 // and at DEV2 for measured, just for completeness
7738 int vbl_local = (measured_vref_flag) ? VBL_DEV2 : VBL_NORM;
7739 uint64_t bad_bits[2];
7740 #if ENABLE_SW_WLEVEL_UTILIZATION
7741 uint64_t sum_dram_dclk = 0, sum_dram_ops = 0;
7742 uint64_t start_dram_dclk, stop_dram_dclk;
7743 uint64_t start_dram_ops, stop_dram_ops;
7744 #endif
7745 do {
7746 // write the current set of WL delays
7747 DRAM_CSR_WRITE(node, BDK_LMCX_WLEVEL_RANKX(ddr_interface_num, rankx), lmc_wlevel_rank.u);
7748 lmc_wlevel_rank.u = BDK_CSR_READ(node, BDK_LMCX_WLEVEL_RANKX(ddr_interface_num, rankx));
7749
7750 bdk_watchdog_poke();
7751
7752 // do the test
7753 if (sw_wlevel_hw) {
7754 errors = run_best_hw_patterns(node, ddr_interface_num, rank_addr,
7755 DBTRAIN_TEST, bad_bits);
7756 errors &= bytes_todo; // keep only the ones we are still doing
7757 } else {
7758 #if ENABLE_SW_WLEVEL_UTILIZATION
7759 start_dram_dclk = BDK_CSR_READ(node, BDK_LMCX_DCLK_CNT(ddr_interface_num));
7760 start_dram_ops = BDK_CSR_READ(node, BDK_LMCX_OPS_CNT(ddr_interface_num));
7761 #endif
7762 #if USE_ORIG_TEST_DRAM_BYTE
7763 errors = test_dram_byte(node, ddr_interface_num, rank_addr, bytemask, bad_bits);
7764 #else
7765 errors = dram_tuning_mem_xor(node, ddr_interface_num, rank_addr, bytemask, bad_bits);
7766 #endif
7767 #if ENABLE_SW_WLEVEL_UTILIZATION
7768 stop_dram_dclk = BDK_CSR_READ(node, BDK_LMCX_DCLK_CNT(ddr_interface_num));
7769 stop_dram_ops = BDK_CSR_READ(node, BDK_LMCX_OPS_CNT(ddr_interface_num));
7770 sum_dram_dclk += stop_dram_dclk - start_dram_dclk;
7771 sum_dram_ops += stop_dram_ops - start_dram_ops;
7772 #endif
7773 }
7774
7775 VB_PRT(VBL_DEV2, "N%d.LMC%d.R%d: S/W write-leveling TEST: returned 0x%x\n",
7776 node, ddr_interface_num, rankx, errors);
7777
7778 // remember, errors will not be returned for byte-lanes that have maxxed out...
7779 if (errors == 0) {
7780 no_errors_count++; // bump
7781 if (no_errors_count > 1) // bypass check/update completely
7782 continue; // to end of do-while
7783 } else
7784 no_errors_count = 0; // reset
7785
7786 // check errors by byte
7787 for (byte = 0; byte < 9; ++byte) {
7788 if (!(bytes_todo & (1 << byte)))
7789 continue;
7790
7791 delay = byte_delay[byte];
7792 if (errors & (1 << byte)) { // yes, an error in this byte lane
7793 debug_print(" byte %d delay %2d Errors\n", byte, delay);
7794 // since this byte had an error, we move to the next delay value, unless maxxed out
7795 delay += 8; // incr by 8 to do only delay high-order bits
7796 if (delay < 32) {
7797 update_wlevel_rank_struct(&lmc_wlevel_rank, byte, delay);
7798 debug_print(" byte %d delay %2d New\n", byte, delay);
7799 byte_delay[byte] = delay;
7800 } else { // reached max delay, maybe really done with this byte
7801 #if SWL_TRY_HWL_ALT
7802 if (!measured_vref_flag && // consider an alt only for computed VREF and
7803 (hwl_alts[rankx].hwl_alt_mask & (1 << byte))) // if an alt exists...
7804 {
7805 int bad_delay = delay & 0x6; // just orig low-3 bits
7806 delay = hwl_alts[rankx].hwl_alt_delay[byte]; // yes, use it
7807 hwl_alts[rankx].hwl_alt_mask &= ~(1 << byte); // clear that flag
7808 update_wlevel_rank_struct(&lmc_wlevel_rank, byte, delay);
7809 byte_delay[byte] = delay;
7810 debug_print(" byte %d delay %2d ALTERNATE\n", byte, delay);
7811 VB_PRT(VBL_DEV, "N%d.LMC%d.R%d: SWL: Byte %d: %d FAIL, trying ALTERNATE %d\n",
7812 node, ddr_interface_num, rankx, byte, bad_delay, delay);
7813
7814 } else
7815 #endif /* SWL_TRY_HWL_ALT */
7816 {
7817 unsigned bits_bad;
7818 if (byte < 8) {
7819 bytemask &= ~(0xffULL << (8*byte)); // test no longer, remove from byte mask
7820 bits_bad = (unsigned)((bad_bits[0] >> (8 * byte)) & 0xffUL);
7821 } else {
7822 bits_bad = (unsigned)(bad_bits[1] & 0xffUL);
7823 }
7824 bytes_todo &= ~(1 << byte); // remove from bytes to do
7825 byte_test_status[byte] = WL_ESTIMATED; // make sure this is set for this case
7826 debug_print(" byte %d delay %2d Exhausted\n", byte, delay);
7827 VB_PRT(vbl_local, "N%d.LMC%d.R%d: SWL: Byte %d (0x%02x): delay %d EXHAUSTED \n",
7828 node, ddr_interface_num, rankx, byte, bits_bad, delay);
7829 }
7830 }
7831 } else { // no error, stay with current delay, but keep testing it...
7832 debug_print(" byte %d delay %2d Passed\n", byte, delay);
7833 byte_test_status[byte] = WL_HARDWARE; // change status
7834 }
7835
7836 } /* for (byte = 0; byte < 9; ++byte) */
7837
7838 } while (no_errors_count < WL_MIN_NO_ERRORS_COUNT);
7839
7840 #if ENABLE_SW_WLEVEL_UTILIZATION
7841 if (! sw_wlevel_hw) {
7842 uint64_t percent_x10;
7843 if (sum_dram_dclk == 0)
7844 sum_dram_dclk = 1;
7845 percent_x10 = sum_dram_ops * 1000 / sum_dram_dclk;
7846 ddr_print("N%d.LMC%d.R%d: ops %lu, cycles %lu, used %lu.%lu%%\n",
7847 node, ddr_interface_num, rankx, sum_dram_ops, sum_dram_dclk,
7848 percent_x10 / 10, percent_x10 % 10);
7849 }
7850 #endif
7851 if (errors) {
7852 debug_print("End WLEV_64 while loop: vref_value %d(0x%x), errors 0x%02x\n",
7853 vref_value, vref_value, errors);
7854 }
7855 } // end parallel write-leveling block for delay high-order bits
7856
7857 if (sw_wlevel_hw) { // if we used HW-assist, we did the ECC byte when approp.
7858 VB_PRT(VBL_DEV, "N%d.LMC%d.R%d: HW-assist SWL - no ECC estimate!!!\n",
7859 node, ddr_interface_num, rankx);
7860 goto no_ecc_estimate;
7861 }
7862
7863 if ((ddr_interface_bytemask & 0xff) == 0xff) {
7864 if (use_ecc) {
7865 int save_byte8 = lmc_wlevel_rank.s.byte8; // save original HW delay
7866 byte_test_status[8] = WL_HARDWARE; /* say it is H/W delay value */
7867
7868 if ((save_byte8 != lmc_wlevel_rank.s.byte3) &&
7869 (save_byte8 != lmc_wlevel_rank.s.byte4))
7870 {
7871 // try to calculate a workable delay using SW bytes 3 and 4 and HW byte 8
7872 int test_byte8 = save_byte8;
7873 int test_byte8_error;
7874 int byte8_error = 0x1f;
7875 int adder;
7876 int avg_bytes = divide_nint(lmc_wlevel_rank.s.byte3+lmc_wlevel_rank.s.byte4, 2);
7877 for (adder = 0; adder<= 32; adder+=8) {
7878 test_byte8_error = _abs((adder+save_byte8) - avg_bytes);
7879 if (test_byte8_error < byte8_error) {
7880 byte8_error = test_byte8_error;
7881 test_byte8 = save_byte8 + adder;
7882 }
7883 }
7884
7885 #if SW_WL_CHECK_PATCH
7886 // only do the check if we are not using measured VREF
7887 if (!measured_vref_flag) {
7888 test_byte8 &= ~1; /* Use only even settings, rounding down... */
7889
7890 // do validity check on the calculated ECC delay value
7891 // this depends on the DIMM type
7892 if (spd_rdimm) { // RDIMM
7893 if (spd_dimm_type != 5) { // but not mini-RDIMM
7894 // it can be > byte4, but should never be > byte3
7895 if (test_byte8 > lmc_wlevel_rank.s.byte3) {
7896 byte_test_status[8] = WL_ESTIMATED; /* say it is still estimated */
7897 }
7898 }
7899 } else { // UDIMM
7900 if ((test_byte8 < lmc_wlevel_rank.s.byte3) ||
7901 (test_byte8 > lmc_wlevel_rank.s.byte4))
7902 { // should never be outside the byte 3-4 range
7903 byte_test_status[8] = WL_ESTIMATED; /* say it is still estimated */
7904 }
7905 }
7906 /*
7907 * Report whenever the calculation appears bad.
7908 * This happens if some of the original values were off, or unexpected geometry
7909 * from DIMM type, or custom circuitry (NIC225E, I am looking at you!).
7910 * We will trust the calculated value, and depend on later testing to catch
7911 * any instances when that value is truly bad.
7912 */
7913 if (byte_test_status[8] == WL_ESTIMATED) { // ESTIMATED means there may be an issue
7914 ddr_print("N%d.LMC%d.R%d: SWL: (%cDIMM): calculated ECC delay unexpected (%d/%d/%d)\n",
7915 node, ddr_interface_num, rankx, (spd_rdimm?'R':'U'),
7916 lmc_wlevel_rank.s.byte4, test_byte8, lmc_wlevel_rank.s.byte3);
7917 byte_test_status[8] = WL_HARDWARE;
7918 }
7919 }
7920 #endif /* SW_WL_CHECK_PATCH */
7921 lmc_wlevel_rank.s.byte8 = test_byte8 & ~1; /* Use only even settings */
7922 }
7923
7924 if (lmc_wlevel_rank.s.byte8 != save_byte8) {
7925 /* Change the status if s/w adjusted the delay */
7926 byte_test_status[8] = WL_SOFTWARE; /* Estimated delay */
7927 }
7928 } else {
7929 byte_test_status[8] = WL_HARDWARE; /* H/W delay value */
7930 lmc_wlevel_rank.s.byte8 = lmc_wlevel_rank.s.byte0; /* ECC is not used */
7931 }
7932 } else { /* if ((ddr_interface_bytemask & 0xff) == 0xff) */
7933 if (use_ecc) {
7934 /* Estimate the ECC byte delay */
7935 lmc_wlevel_rank.s.byte4 |= (lmc_wlevel_rank.s.byte3 & 0x38); // add hi-order to b4
7936 if ((lmc_wlevel_rank.s.byte4 & 0x06) < (lmc_wlevel_rank.s.byte3 & 0x06)) // orig b4 < orig b3
7937 lmc_wlevel_rank.s.byte4 += 8; // must be next clock
7938 } else {
7939 lmc_wlevel_rank.s.byte4 = lmc_wlevel_rank.s.byte0; /* ECC is not used */
7940 }
7941 /* Change the status if s/w adjusted the delay */
7942 byte_test_status[4] = WL_SOFTWARE; /* Estimated delay */
7943 } /* if ((ddr_interface_bytemask & 0xff) == 0xff) */
7944 } /* if (wlevel_bitmask_errors == 0) */
7945
7946 no_ecc_estimate:
7947
7948 bytes_failed = 0;
7949 for (byte = 0; byte < 9; ++byte) {
7950 /* Don't accumulate errors for untested bytes. */
7951 if (!(ddr_interface_bytemask & (1 << byte)))
7952 continue;
7953 bytes_failed += (byte_test_status[byte] == WL_ESTIMATED);
7954 }
7955
7956 /* Vref training loop is only used for DDR4 */
7957 if (ddr_type != DDR4_DRAM)
7958 break;
7959
7960 if (bytes_failed == 0) {
7961 if (vref_values_count == 0) {
7962 vref_values_start = vref_value;
7963 }
7964 ++vref_values_count;
7965 if (vref_values_count > best_vref_values_count) {
7966 best_vref_values_count = vref_values_count;
7967 best_vref_values_start = vref_values_start;
7968 debug_print("N%d.LMC%d.R%d: Vref Training (%2d) : 0x%02x <----- ???? -----> 0x%02x\n",
7969 node, ddr_interface_num,
7970 rankx, vref_value, best_vref_values_start,
7971 best_vref_values_start+best_vref_values_count-1);
7972 }
7973 } else {
7974 vref_values_count = 0;
7975 debug_print("N%d.LMC%d.R%d: Vref Training (%2d) : failed\n",
7976 node, ddr_interface_num,
7977 rankx, vref_value);
7978 }
7979 } /* for (vref_value=0; vref_value<VREF_LIMIT; ++vref_value) */
7980
7981 /* Determine address of DRAM to test for pass 2 and final test of software write leveling. */
7982 rank_addr = active_rank * (1ull << (pbank_lsb - bunk_enable + (interfaces/2)));
7983 rank_addr |= (ddr_interface_num<<7); /* Map address into proper interface */
7984 rank_addr = bdk_numa_get_address(node, rank_addr);
7985 debug_print("N%d.LMC%d.R%d: Active Rank %d Address: 0x%lx\n",
7986 node, ddr_interface_num, rankx, active_rank, rank_addr);
7987
7988 int errors;
7989
7990 if (bytes_failed) {
7991
7992 #if !DISABLE_SW_WL_PASS_2
7993
7994 ddr_print("N%d.LMC%d.R%d: Starting SW Write-leveling pass 2\n",
7995 node, ddr_interface_num, rankx);
7996 sw_wl_rank_status = WL_SOFTWARE;
7997
7998 /* If previous s/w fixups failed then retry using s/w write-leveling. */
7999 if (wlevel_bitmask_errors == 0) {
8000 /* h/w succeeded but previous s/w fixups failed. So retry s/w. */
8001 debug_print("N%d.LMC%d.R%d: Retrying software Write-Leveling.\n",
8002 node, ddr_interface_num, rankx);
8003 }
8004
8005 { // start parallel write-leveling block for delay low-order bits
8006 int byte_delay[8];
8007 int byte_passed[8];
8008 uint64_t bytemask;
8009 uint64_t bitmask;
8010 int wl_offset;
8011 int bytes_todo;
8012
8013 for (byte = 0; byte < 8; ++byte) {
8014 byte_passed[byte] = 0;
8015 }
8016
8017 bytes_todo = ddr_interface_bytemask;
8018
8019 for (wl_offset = sw_wlevel_offset; wl_offset >= 0; --wl_offset) {
8020 debug_print("Starting wl_offset for-loop: %d\n", wl_offset);
8021
8022 bytemask = 0;
8023
8024 for (byte = 0; byte < 8; ++byte) {
8025 byte_delay[byte] = 0;
8026 if (!(bytes_todo & (1 << byte))) // this does not contain fully passed bytes
8027 continue;
8028
8029 byte_passed[byte] = 0; // reset across passes if not fully passed
8030 update_wlevel_rank_struct(&lmc_wlevel_rank, byte, 0); // all delays start at 0
8031 bitmask = ((!ddr_interface_64b) && (byte == 4)) ? 0x0f: 0xff;
8032 bytemask |= bitmask << (8*byte); // set the bytes bits in the bytemask
8033 } /* for (byte = 0; byte < 8; ++byte) */
8034
8035 while (bytemask != 0) { // start a pass if there is any byte lane to test
8036
8037 debug_print("Starting bytemask while-loop: 0x%lx\n", bytemask);
8038
8039 // write this set of WL delays
8040 DRAM_CSR_WRITE(node, BDK_LMCX_WLEVEL_RANKX(ddr_interface_num, rankx), lmc_wlevel_rank.u);
8041 lmc_wlevel_rank.u = BDK_CSR_READ(node, BDK_LMCX_WLEVEL_RANKX(ddr_interface_num, rankx));
8042
8043 bdk_watchdog_poke();
8044
8045 // do the test
8046 if (sw_wlevel_hw)
8047 errors = run_best_hw_patterns(node, ddr_interface_num, rank_addr,
8048 DBTRAIN_TEST, NULL);
8049 else
8050 errors = test_dram_byte(node, ddr_interface_num, rank_addr, bytemask, NULL);
8051
8052 debug_print("SWL pass 2: test_dram_byte returned 0x%x\n", errors);
8053
8054 // check errors by byte
8055 for (byte = 0; byte < 8; ++byte) {
8056 if (!(bytes_todo & (1 << byte)))
8057 continue;
8058
8059 delay = byte_delay[byte];
8060 if (errors & (1 << byte)) { // yes, an error
8061 debug_print(" byte %d delay %2d Errors\n", byte, delay);
8062 byte_passed[byte] = 0;
8063 } else { // no error
8064 byte_passed[byte] += 1;
8065 if (byte_passed[byte] == (1 + wl_offset)) { /* Look for consecutive working settings */
8066 debug_print(" byte %d delay %2d FULLY Passed\n", byte, delay);
8067 if (wl_offset == 1) {
8068 byte_test_status[byte] = WL_SOFTWARE;
8069 } else if (wl_offset == 0) {
8070 byte_test_status[byte] = WL_SOFTWARE1;
8071 }
8072 bytemask &= ~(0xffULL << (8*byte)); // test no longer, remove from byte mask this pass
8073 bytes_todo &= ~(1 << byte); // remove completely from concern
8074 continue; // on to the next byte, bypass delay updating!!
8075 } else {
8076 debug_print(" byte %d delay %2d Passed\n", byte, delay);
8077 }
8078 }
8079 // error or no, here we move to the next delay value for this byte, unless done all delays
8080 // only a byte that has "fully passed" will bypass around this,
8081 delay += 2;
8082 if (delay < 32) {
8083 update_wlevel_rank_struct(&lmc_wlevel_rank, byte, delay);
8084 debug_print(" byte %d delay %2d New\n", byte, delay);
8085 byte_delay[byte] = delay;
8086 } else {
8087 // reached max delay, done with this byte
8088 debug_print(" byte %d delay %2d Exhausted\n", byte, delay);
8089 bytemask &= ~(0xffULL << (8*byte)); // test no longer, remove from byte mask this pass
8090 }
8091 } /* for (byte = 0; byte < 8; ++byte) */
8092 debug_print("End of for-loop: bytemask 0x%lx\n", bytemask);
8093
8094 } /* while (bytemask != 0) */
8095 } /* for (wl_offset = sw_wlevel_offset; wl_offset >= 0; --wl_offset) */
8096
8097 for (byte = 0; byte < 8; ++byte) {
8098 // any bytes left in bytes_todo did not pass
8099 if (bytes_todo & (1 << byte)) {
8100 /* Last resort. Use Rlevel settings to estimate
8101 Wlevel if software write-leveling fails */
8102 debug_print("Using RLEVEL as WLEVEL estimate for byte %d\n", byte);
8103 lmc_rlevel_rank.u = BDK_CSR_READ(node, BDK_LMCX_RLEVEL_RANKX(ddr_interface_num, rankx));
8104 rlevel_to_wlevel(&lmc_rlevel_rank, &lmc_wlevel_rank, byte);
8105 }
8106 } /* for (byte = 0; byte < 8; ++byte) */
8107
8108 sw_wl_failed = (bytes_todo != 0);
8109
8110 } // end parallel write-leveling block for delay low-order bits
8111
8112 if (use_ecc) {
8113 /* ECC byte has to be estimated. Take the average of the two surrounding bytes. */
8114 int test_byte8 = divide_nint(lmc_wlevel_rank.s.byte3
8115 + lmc_wlevel_rank.s.byte4
8116 + 2 /* round-up*/ , 2);
8117 lmc_wlevel_rank.s.byte8 = test_byte8 & ~1; /* Use only even settings */
8118 byte_test_status[8] = WL_ESTIMATED; /* Estimated delay */
8119 } else {
8120 byte_test_status[8] = WL_HARDWARE; /* H/W delay value */
8121 lmc_wlevel_rank.s.byte8 = lmc_wlevel_rank.s.byte0; /* ECC is not used */
8122 }
8123
8124 /* Set delays for unused bytes to match byte 0. */
8125 for (byte=0; byte<8; ++byte) {
8126 if ((ddr_interface_bytemask & (1 << byte)))
8127 continue;
8128 update_wlevel_rank_struct(&lmc_wlevel_rank, byte,
8129 lmc_wlevel_rank.s.byte0);
8130 byte_test_status[byte] = WL_SOFTWARE;
8131 }
8132 #else /* !DISABLE_SW_WL_PASS_2 */
8133 // FIXME? the big hammer, did not even try SW WL pass2, assume only chip reset will help
8134 ddr_print("N%d.LMC%d.R%d: S/W write-leveling pass 1 failed\n",
8135 node, ddr_interface_num, rankx);
8136 sw_wl_failed = 1;
8137 #endif /* !DISABLE_SW_WL_PASS_2 */
8138
8139 } else { /* if (bytes_failed) */
8140
8141 // SW WL pass 1 was OK, write the settings
8142 DRAM_CSR_WRITE(node, BDK_LMCX_WLEVEL_RANKX(ddr_interface_num, rankx), lmc_wlevel_rank.u);
8143 lmc_wlevel_rank.u = BDK_CSR_READ(node, BDK_LMCX_WLEVEL_RANKX(ddr_interface_num, rankx));
8144
8145 #if SW_WL_CHECK_PATCH
8146 // do validity check on the delay values by running the test 1 more time...
8147 // FIXME: we really need to check the ECC byte setting here as well,
8148 // so we need to enable ECC for this test!!!
8149 // if there are any errors, claim SW WL failure
8150 {
8151 uint64_t datamask = (ddr_interface_64b) ? 0xffffffffffffffffULL : 0x00000000ffffffffULL;
8152
8153 // do the test
8154 if (sw_wlevel_hw) {
8155 errors = run_best_hw_patterns(node, ddr_interface_num, rank_addr,
8156 DBTRAIN_TEST, NULL) & 0x0ff;
8157 } else {
8158 #if USE_ORIG_TEST_DRAM_BYTE
8159 errors = test_dram_byte(node, ddr_interface_num, rank_addr, datamask, NULL);
8160 #else
8161 errors = dram_tuning_mem_xor(node, ddr_interface_num, rank_addr, datamask, NULL);
8162 #endif
8163 }
8164
8165 if (errors) {
8166 ddr_print("N%d.LMC%d.R%d: Wlevel Rank Final Test errors 0x%x\n",
8167 node, ddr_interface_num, rankx, errors);
8168 sw_wl_failed = 1;
8169 }
8170 }
8171 #endif /* SW_WL_CHECK_PATCH */
8172
8173 } /* if (bytes_failed) */
8174
8175 // FIXME? dump the WL settings, so we get more of a clue as to what happened where
8176 ddr_print("N%d.LMC%d.R%d: Wlevel Rank %#4x, 0x%016llX : %2d%3s %2d%3s %2d%3s %2d%3s %2d%3s %2d%3s %2d%3s %2d%3s %2d%3s %s\n",
8177 node, ddr_interface_num, rankx,
8178 lmc_wlevel_rank.s.status,
8179 lmc_wlevel_rank.u,
8180 lmc_wlevel_rank.s.byte8, wl_status_strings[byte_test_status[8]],
8181 lmc_wlevel_rank.s.byte7, wl_status_strings[byte_test_status[7]],
8182 lmc_wlevel_rank.s.byte6, wl_status_strings[byte_test_status[6]],
8183 lmc_wlevel_rank.s.byte5, wl_status_strings[byte_test_status[5]],
8184 lmc_wlevel_rank.s.byte4, wl_status_strings[byte_test_status[4]],
8185 lmc_wlevel_rank.s.byte3, wl_status_strings[byte_test_status[3]],
8186 lmc_wlevel_rank.s.byte2, wl_status_strings[byte_test_status[2]],
8187 lmc_wlevel_rank.s.byte1, wl_status_strings[byte_test_status[1]],
8188 lmc_wlevel_rank.s.byte0, wl_status_strings[byte_test_status[0]],
8189 (sw_wl_rank_status == WL_HARDWARE) ? "" : "(s)"
8190 );
8191
8192 // finally, check for fatal conditions: either chip reset right here, or return error flag
8193 if (((ddr_type == DDR4_DRAM) && (best_vref_values_count == 0)) || sw_wl_failed) {
8194 if (!ddr_disable_chip_reset) { // do chip RESET
8195 error_print("INFO: Short memory test indicates a retry is needed on N%d.LMC%d.R%d. Resetting node...\n",
8196 node, ddr_interface_num, rankx);
8197 bdk_wait_usec(500000);
8198 bdk_reset_chip(node);
8199 } else { // return error flag so LMC init can be retried...
8200 ddr_print("INFO: Short memory test indicates a retry is needed on N%d.LMC%d.R%d. Restarting LMC init...\n",
8201 node, ddr_interface_num, rankx);
8202 return 0; // 0 indicates restart possible...
8203 }
8204 }
8205
8206 active_rank++;
8207 } /* for (rankx = 0; rankx < dimm_count * 4; rankx++) */
8208
8209 // Finalize the write-leveling settings
8210 for (rankx = 0; rankx < dimm_count * 4;rankx++) {
8211 uint64_t value;
8212 int parameter_set = 0;
8213 if (!(rank_mask & (1 << rankx)))
8214 continue;
8215
8216 lmc_wlevel_rank.u = BDK_CSR_READ(node, BDK_LMCX_WLEVEL_RANKX(ddr_interface_num, rankx));
8217
8218 for (i=0; i<9; ++i) {
8219 if ((s = lookup_env_parameter("ddr%d_wlevel_rank%d_byte%d", ddr_interface_num, rankx, i)) != NULL) {
8220 parameter_set |= 1;
8221 value = strtoul(s, NULL, 0);
8222
8223 update_wlevel_rank_struct(&lmc_wlevel_rank, i, value);
8224 }
8225 }
8226
8227 if ((s = lookup_env_parameter_ull("ddr%d_wlevel_rank%d", ddr_interface_num, rankx)) != NULL) {
8228 parameter_set |= 1;
8229 value = strtoull(s, NULL, 0);
8230 lmc_wlevel_rank.u = value;
8231 }
8232
8233 if (parameter_set) {
8234 DRAM_CSR_WRITE(node, BDK_LMCX_WLEVEL_RANKX(ddr_interface_num, rankx), lmc_wlevel_rank.u);
8235 lmc_wlevel_rank.u = BDK_CSR_READ(node, BDK_LMCX_WLEVEL_RANKX(ddr_interface_num, rankx));
8236 display_WL(node, ddr_interface_num, lmc_wlevel_rank, rankx);
8237 }
8238 #if WLEXTRAS_PATCH
8239 if ((rank_mask & 0x0F) != 0x0F) { // if there are unused entries to be filled
8240 if (rankx < 3) {
8241 debug_print("N%d.LMC%d.R%d: checking for WLEVEL_RANK unused entries.\n",
8242 node, ddr_interface_num, rankx);
8243 if (rankx == 0) { // if rank 0, write ranks 1 and 2 here if empty
8244 if (!(rank_mask & (1<<1))) { // check that rank 1 is empty
8245 DRAM_CSR_WRITE(node, BDK_LMCX_WLEVEL_RANKX(ddr_interface_num, 1), lmc_wlevel_rank.u);
8246 VB_PRT(VBL_DEV, "N%d.LMC%d.R%d: writing WLEVEL_RANK unused entry R%d.\n",
8247 node, ddr_interface_num, rankx, 1);
8248 }
8249 if (!(rank_mask & (1<<2))) { // check that rank 2 is empty
8250 VB_PRT(VBL_DEV, "N%d.LMC%d.R%d: writing WLEVEL_RANK unused entry R%d.\n",
8251 node, ddr_interface_num, rankx, 2);
8252 DRAM_CSR_WRITE(node, BDK_LMCX_WLEVEL_RANKX(ddr_interface_num, 2), lmc_wlevel_rank.u);
8253 }
8254 }
8255 // if rank 0, 1 or 2, write rank 3 here if empty
8256 if (!(rank_mask & (1<<3))) { // check that rank 3 is empty
8257 VB_PRT(VBL_DEV, "N%d.LMC%d.R%d: writing WLEVEL_RANK unused entry R%d.\n",
8258 node, ddr_interface_num, rankx, 3);
8259 DRAM_CSR_WRITE(node, BDK_LMCX_WLEVEL_RANKX(ddr_interface_num, 3), lmc_wlevel_rank.u);
8260 }
8261 }
8262 }
8263 #endif /* WLEXTRAS_PATCH */
8264
8265 } /* for (rankx = 0; rankx < dimm_count * 4;rankx++) */
8266
8267 /* Restore the ECC configuration */
8268 if (!sw_wlevel_hw_default) {
8269 lmc_config.s.ecc_ena = use_ecc;
8270 DRAM_CSR_WRITE(node, BDK_LMCX_CONFIG(ddr_interface_num), lmc_config.u);
8271 }
8272
8273 #if USE_L2_WAYS_LIMIT
8274 /* Restore the l2 set configuration */
8275 if ((s = lookup_env_parameter("limit_l2_ways")) != NULL) {
8276 int ways = strtoul(s, NULL, 10);
8277 limit_l2_ways(node, ways, 1);
8278 } else {
8279 limit_l2_ways(node, bdk_l2c_get_num_assoc(node), 0);
8280 }
8281 #endif
8282
8283 } // End Software Write-Leveling block
8284
8285 #if ENABLE_DISPLAY_MPR_PAGE
8286 if (ddr_type == DDR4_DRAM) {
8287 Display_MPR_Page(node, rank_mask, ddr_interface_num, dimm_count, 2);
8288 Display_MPR_Page(node, rank_mask, ddr_interface_num, dimm_count, 0);
8289 }
8290 #endif
8291
8292 #ifdef CAVIUM_ONLY
8293 {
8294 int _i;
8295 int setting[9];
8296 bdk_lmcx_dll_ctl3_t ddr_dll_ctl3;
8297 ddr_dll_ctl3.u = BDK_CSR_READ(node, BDK_LMCX_DLL_CTL3(ddr_interface_num));
8298
8299 for (_i=0; _i<9; ++_i) {
8300 SET_DDR_DLL_CTL3(dll90_byte_sel, ENCODE_DLL90_BYTE_SEL(_i));
8301 DRAM_CSR_WRITE(node, BDK_LMCX_DLL_CTL3(ddr_interface_num), ddr_dll_ctl3.u);
8302 BDK_CSR_READ(node, BDK_LMCX_DLL_CTL3(ddr_interface_num));
8303 ddr_dll_ctl3.u = BDK_CSR_READ(node, BDK_LMCX_DLL_CTL3(ddr_interface_num));
8304 setting[_i] = GET_DDR_DLL_CTL3(dll90_setting);
8305 debug_print("%d. LMC%d_DLL_CTL3[%d] = %016lx %d\n", _i, ddr_interface_num,
8306 GET_DDR_DLL_CTL3(dll90_byte_sel), ddr_dll_ctl3.u, setting[_i]);
8307 }
8308
8309 VB_PRT(VBL_DEV, "N%d.LMC%d: %-36s : %5d %5d %5d %5d %5d %5d %5d %5d %5d\n",
8310 node, ddr_interface_num, "DLL90 Setting 8:0",
8311 setting[8], setting[7], setting[6], setting[5], setting[4],
8312 setting[3], setting[2], setting[1], setting[0]);
8313
8314 //BDK_CSR_WRITE(node, BDK_LMCX_DLL_CTL3(ddr_interface_num), save_ddr_dll_ctl3.u);
8315 }
8316 #endif /* CAVIUM_ONLY */
8317
8318 // any custom DLL read or write offsets, install them
8319 // FIXME: no need to do these if we are going to auto-tune... ???
8320
8321 process_custom_dll_offsets(node, ddr_interface_num, "ddr_dll_write_offset",
8322 custom_lmc_config->dll_write_offset, "ddr%d_dll_write_offset_byte%d", 1);
8323 process_custom_dll_offsets(node, ddr_interface_num, "ddr_dll_read_offset",
8324 custom_lmc_config->dll_read_offset, "ddr%d_dll_read_offset_byte%d", 2);
8325
8326 // we want to train write bit-deskew here...
8327 if (! disable_deskew_training) {
8328 if (enable_write_deskew) {
8329 ddr_print("N%d.LMC%d: WRITE BIT-DESKEW feature training begins.\n",
8330 node, ddr_interface_num);
8331 Perform_Write_Deskew_Training(node, ddr_interface_num);
8332 } /* if (enable_write_deskew) */
8333 } /* if (! disable_deskew_training) */
8334
8335 /*
8336 * 6.9.14 Final LMC Initialization
8337 *
8338 * Early LMC initialization, LMC write-leveling, and LMC read-leveling
8339 * must be completed prior to starting this final LMC initialization.
8340 *
8341 * LMC hardware updates the LMC(0)_SLOT_CTL0, LMC(0)_SLOT_CTL1,
8342 * LMC(0)_SLOT_CTL2 CSRs with minimum values based on the selected
8343 * readleveling and write-leveling settings. Software should not write
8344 * the final LMC(0)_SLOT_CTL0, LMC(0)_SLOT_CTL1, and LMC(0)_SLOT_CTL2
8345 * values until after the final read-leveling and write-leveling settings
8346 * are written.
8347 *
8348 * Software must ensure the LMC(0)_SLOT_CTL0, LMC(0)_SLOT_CTL1, and
8349 * LMC(0)_SLOT_CTL2 CSR values are appropriate for this step. These CSRs
8350 * select the minimum gaps between read operations and write operations
8351 * of various types.
8352 *
8353 * Software must not reduce the values in these CSR fields below the
8354 * values previously selected by the LMC hardware (during write-leveling
8355 * and read-leveling steps above).
8356 *
8357 * All sections in this chapter may be used to derive proper settings for
8358 * these registers.
8359 *
8360 * For minimal read latency, L2C_CTL[EF_ENA,EF_CNT] should be programmed
8361 * properly. This should be done prior to the first read.
8362 */
8363
8364 #if ENABLE_SLOT_CTL_ACCESS
8365 {
8366 bdk_lmcx_slot_ctl0_t lmc_slot_ctl0;
8367 bdk_lmcx_slot_ctl1_t lmc_slot_ctl1;
8368 bdk_lmcx_slot_ctl2_t lmc_slot_ctl2;
8369 bdk_lmcx_slot_ctl3_t lmc_slot_ctl3;
8370
8371 lmc_slot_ctl0.u = BDK_CSR_READ(node, BDK_LMCX_SLOT_CTL0(ddr_interface_num));
8372 lmc_slot_ctl1.u = BDK_CSR_READ(node, BDK_LMCX_SLOT_CTL1(ddr_interface_num));
8373 lmc_slot_ctl2.u = BDK_CSR_READ(node, BDK_LMCX_SLOT_CTL2(ddr_interface_num));
8374 lmc_slot_ctl3.u = BDK_CSR_READ(node, BDK_LMCX_SLOT_CTL3(ddr_interface_num));
8375
8376 ddr_print("%-45s : 0x%016lx\n", "LMC_SLOT_CTL0", lmc_slot_ctl0.u);
8377 ddr_print("%-45s : 0x%016lx\n", "LMC_SLOT_CTL1", lmc_slot_ctl1.u);
8378 ddr_print("%-45s : 0x%016lx\n", "LMC_SLOT_CTL2", lmc_slot_ctl2.u);
8379 ddr_print("%-45s : 0x%016lx\n", "LMC_SLOT_CTL3", lmc_slot_ctl3.u);
8380
8381 // for now, look only for SLOT_CTL1 envvar for override of contents
8382 if ((s = lookup_env_parameter("ddr%d_slot_ctl1", ddr_interface_num)) != NULL) {
8383 int slot_ctl1_incr = strtoul(s, NULL, 0);
8384 // validate the value
8385 if ((slot_ctl1_incr < 0) || (slot_ctl1_incr > 3)) { // allow 0 for printing only
8386 error_print("ddr%d_slot_ctl1 illegal value (%d); must be 0-3\n",
8387 ddr_interface_num, slot_ctl1_incr);
8388 } else {
8389
8390 #define INCR(csr, chip, field, incr) \
8391 csr.chip.field = (csr.chip.field < (64 - incr)) ? (csr.chip.field + incr) : 63
8392
8393 // only print original when we are changing it!
8394 if (slot_ctl1_incr)
8395 ddr_print("%-45s : 0x%016lx\n", "LMC_SLOT_CTL1", lmc_slot_ctl1.u);
8396
8397 // modify all the SLOT_CTL1 fields by the increment, for now...
8398 // but make sure the value will not overflow!!!
8399 INCR(lmc_slot_ctl1, s, r2r_xrank_init, slot_ctl1_incr);
8400 INCR(lmc_slot_ctl1, s, r2w_xrank_init, slot_ctl1_incr);
8401 INCR(lmc_slot_ctl1, s, w2r_xrank_init, slot_ctl1_incr);
8402 INCR(lmc_slot_ctl1, s, w2w_xrank_init, slot_ctl1_incr);
8403 DRAM_CSR_WRITE(node, BDK_LMCX_SLOT_CTL1(ddr_interface_num), lmc_slot_ctl1.u);
8404 lmc_slot_ctl1.u = BDK_CSR_READ(node, BDK_LMCX_SLOT_CTL1(ddr_interface_num));
8405
8406 // always print when we are changing it!
8407 printf("%-45s : 0x%016lx\n", "LMC_SLOT_CTL1", lmc_slot_ctl1.u);
8408 }
8409 }
8410 }
8411 #endif /* ENABLE_SLOT_CTL_ACCESS */
8412 {
8413 /* Clear any residual ECC errors */
8414 int num_tads = 1;
8415 int tad;
8416
8417 DRAM_CSR_WRITE(node, BDK_LMCX_INT(ddr_interface_num), -1ULL);
8418 BDK_CSR_READ(node, BDK_LMCX_INT(ddr_interface_num));
8419
8420 for (tad=0; tad<num_tads; tad++)
8421 DRAM_CSR_WRITE(node, BDK_L2C_TADX_INT_W1C(tad), BDK_CSR_READ(node, BDK_L2C_TADX_INT_W1C(tad)));
8422
8423 ddr_print("%-45s : 0x%08llx\n", "LMC_INT",
8424 BDK_CSR_READ(node, BDK_LMCX_INT(ddr_interface_num)));
8425
8426 }
8427
8428 // Now we can enable scrambling if desired...
8429 {
8430 bdk_lmcx_control_t lmc_control;
8431 bdk_lmcx_scramble_cfg0_t lmc_scramble_cfg0;
8432 bdk_lmcx_scramble_cfg1_t lmc_scramble_cfg1;
8433 bdk_lmcx_scramble_cfg2_t lmc_scramble_cfg2;
8434 bdk_lmcx_ns_ctl_t lmc_ns_ctl;
8435
8436 lmc_control.u = BDK_CSR_READ(node, BDK_LMCX_CONTROL(ddr_interface_num));
8437 lmc_scramble_cfg0.u = BDK_CSR_READ(node, BDK_LMCX_SCRAMBLE_CFG0(ddr_interface_num));
8438 lmc_scramble_cfg1.u = BDK_CSR_READ(node, BDK_LMCX_SCRAMBLE_CFG1(ddr_interface_num));
8439 lmc_scramble_cfg2.u = BDK_CSR_READ(node, BDK_LMCX_SCRAMBLE_CFG2(ddr_interface_num));
8440 lmc_ns_ctl.u = BDK_CSR_READ(node, BDK_LMCX_NS_CTL(ddr_interface_num));
8441
8442 /* Read the scramble setting from the config and see if we
8443 need scrambling */
8444 int use_scramble = bdk_config_get_int(BDK_CONFIG_DRAM_SCRAMBLE);
8445 if (use_scramble == 2)
8446 {
8447 if (bdk_trust_get_level() >= BDK_TRUST_LEVEL_SIGNED)
8448 use_scramble = 1;
8449 else
8450 use_scramble = 0;
8451 }
8452
8453 /* Generate random values if scrambling is needed */
8454 if (use_scramble)
8455 {
8456 lmc_scramble_cfg0.u = bdk_rng_get_random64();
8457 lmc_scramble_cfg1.u = bdk_rng_get_random64();
8458 lmc_scramble_cfg2.u = bdk_rng_get_random64();
8459 lmc_ns_ctl.s.ns_scramble_dis = 0;
8460 lmc_ns_ctl.s.adr_offset = 0;
8461 lmc_control.s.scramble_ena = 1;
8462 }
8463
8464 if ((s = lookup_env_parameter_ull("ddr_scramble_cfg0")) != NULL) {
8465 lmc_scramble_cfg0.u = strtoull(s, NULL, 0);
8466 lmc_control.s.scramble_ena = 1;
8467 }
8468 ddr_print("%-45s : 0x%016llx\n", "LMC_SCRAMBLE_CFG0", lmc_scramble_cfg0.u);
8469
8470 DRAM_CSR_WRITE(node, BDK_LMCX_SCRAMBLE_CFG0(ddr_interface_num), lmc_scramble_cfg0.u);
8471
8472 if ((s = lookup_env_parameter_ull("ddr_scramble_cfg1")) != NULL) {
8473 lmc_scramble_cfg1.u = strtoull(s, NULL, 0);
8474 lmc_control.s.scramble_ena = 1;
8475 }
8476 ddr_print("%-45s : 0x%016llx\n", "LMC_SCRAMBLE_CFG1", lmc_scramble_cfg1.u);
8477 DRAM_CSR_WRITE(node, BDK_LMCX_SCRAMBLE_CFG1(ddr_interface_num), lmc_scramble_cfg1.u);
8478
8479 if ((s = lookup_env_parameter_ull("ddr_scramble_cfg2")) != NULL) {
8480 lmc_scramble_cfg2.u = strtoull(s, NULL, 0);
8481 lmc_control.s.scramble_ena = 1;
8482 }
8483 ddr_print("%-45s : 0x%016llx\n", "LMC_SCRAMBLE_CFG2", lmc_scramble_cfg2.u);
8484 DRAM_CSR_WRITE(node, BDK_LMCX_SCRAMBLE_CFG2(ddr_interface_num), lmc_scramble_cfg2.u);
8485
8486 if ((s = lookup_env_parameter_ull("ddr_ns_ctl")) != NULL) {
8487 lmc_ns_ctl.u = strtoull(s, NULL, 0);
8488 }
8489 ddr_print("%-45s : 0x%016llx\n", "LMC_NS_CTL", lmc_ns_ctl.u);
8490 DRAM_CSR_WRITE(node, BDK_LMCX_NS_CTL(ddr_interface_num), lmc_ns_ctl.u);
8491
8492 DRAM_CSR_WRITE(node, BDK_LMCX_CONTROL(ddr_interface_num), lmc_control.u);
8493
8494 }
8495
8496 return(mem_size_mbytes);
8497 }
8498