xref: /aosp_15_r20/external/coreboot/src/vendorcode/cavium/bdk/libdram/dram-init-ddr3.c (revision b9411a12aaaa7e1e6a6fb7c5e057f44ee179a49c)
1 /***********************license start***********************************
2 * Copyright (c) 2003-2017  Cavium Inc. ([email protected]). All rights
3 * reserved.
4 *
5 *
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions are
8 * met:
9 *
10 *   * Redistributions of source code must retain the above copyright
11 *     notice, this list of conditions and the following disclaimer.
12 *
13 *   * Redistributions in binary form must reproduce the above
14 *     copyright notice, this list of conditions and the following
15 *     disclaimer in the documentation and/or other materials provided
16 *     with the distribution.
17 *
18 *   * Neither the name of Cavium Inc. nor the names of
19 *     its contributors may be used to endorse or promote products
20 *     derived from this software without specific prior written
21 *     permission.
22 *
23 * This Software, including technical data, may be subject to U.S. export
24 * control laws, including the U.S. Export Administration Act and its
25 * associated regulations, and may be subject to export or import
26 * regulations in other countries.
27 *
28 * TO THE MAXIMUM EXTENT PERMITTED BY LAW, THE SOFTWARE IS PROVIDED "AS IS"
29 * AND WITH ALL FAULTS AND CAVIUM INC. MAKES NO PROMISES, REPRESENTATIONS OR
30 * WARRANTIES, EITHER EXPRESS, IMPLIED, STATUTORY, OR OTHERWISE, WITH RESPECT
31 * TO THE SOFTWARE, INCLUDING ITS CONDITION, ITS CONFORMITY TO ANY
32 * REPRESENTATION OR DESCRIPTION, OR THE EXISTENCE OF ANY LATENT OR PATENT
33 * DEFECTS, AND CAVIUM SPECIFICALLY DISCLAIMS ALL IMPLIED (IF ANY) WARRANTIES
34 * OF TITLE, MERCHANTABILITY, NONINFRINGEMENT, FITNESS FOR A PARTICULAR
35 * PURPOSE, LACK OF VIRUSES, ACCURACY OR COMPLETENESS, QUIET ENJOYMENT,
36 * QUIET POSSESSION OR CORRESPONDENCE TO DESCRIPTION. THE ENTIRE  RISK
37 * ARISING OUT OF USE OR PERFORMANCE OF THE SOFTWARE LIES WITH YOU.
38 ***********************license end**************************************/
39 #include <bdk.h>
40 #include <bdk-coreboot.h>
41 #include "libbdk-arch/bdk-csrs-l2c_tad.h"
42 #include "libbdk-arch/bdk-csrs-mio_fus.h"
43 #include "dram-internal.h"
44 
45 #include <stdlib.h>
46 #include <stdio.h>
47 #include <string.h>
48 #include <libbdk-hal/bdk-config.h>
49 #include <libbdk-hal/bdk-l2c.h>
50 #include <libbdk-hal/bdk-rng.h>
51 #include <libbdk-trust/bdk-trust.h>
52 #include <lame_string.h>
53 
54 #define WODT_MASK_2R_1S 1 // FIXME: did not seem to make much difference with #152 1-slot?
55 
56 #define DESKEW_RODT_CTL 1
57 
58 // Set to 1 to use the feature whenever possible automatically.
59 // When 0, however, the feature is still available, and it can
60 // be enabled via envvar override "ddr_enable_write_deskew=1".
61 #define ENABLE_WRITE_DESKEW_DEFAULT 0
62 
63 #define ENABLE_COMPUTED_VREF_ADJUSTMENT 1
64 
65 #define RLEXTRAS_PATCH     1 // write to unused RL rank entries
66 #define WLEXTRAS_PATCH     1 // write to unused WL rank entries
67 #define ADD_48_OHM_SKIP    1
68 #define NOSKIP_40_48_OHM   1
69 #define NOSKIP_48_STACKED  1
70 #define NOSKIP_FOR_MINI    1
71 #define NOSKIP_FOR_2S_1R   1
72 #define MAJORITY_OVER_AVG  1
73 #define RANK_MAJORITY      MAJORITY_OVER_AVG && 1
74 #define SW_WL_CHECK_PATCH  1 // check validity after SW adjust
75 #define HW_WL_MAJORITY     1
76 #define SWL_TRY_HWL_ALT    HW_WL_MAJORITY && 1 // try HW WL base alternate if available when SW WL fails
77 #define DISABLE_SW_WL_PASS_2 1
78 
79 #define HWL_BY_BYTE 0 // FIXME? set to 1 to do HWL a byte at a time (seemed to work better earlier?)
80 
81 #define USE_ORIG_TEST_DRAM_BYTE 1
82 
83 // collect and print LMC utilization using SWL software algorithm
84 #define ENABLE_SW_WLEVEL_UTILIZATION 0
85 
86 #define COUNT_RL_CANDIDATES 1
87 
88 #define LOOK_FOR_STUCK_BYTE      0
89 #define ENABLE_STUCK_BYTE_RESET  0
90 
91 #define FAILSAFE_CHECK      1
92 
93 #define PERFECT_BITMASK_COUNTING 1
94 
95 #define DAC_OVERRIDE_EARLY  1
96 
97 #define SWL_WITH_HW_ALTS_CHOOSE_SW 0 // FIXME: allow override?
98 
99 #define DEBUG_VALIDATE_BITMASK 0
100 #if DEBUG_VALIDATE_BITMASK
101 #define debug_bitmask_print ddr_print
102 #else
103 #define debug_bitmask_print(...)
104 #endif
105 
106 #define ENABLE_SLOT_CTL_ACCESS 0
107 #undef ENABLE_CUSTOM_RLEVEL_TABLE
108 
109 #define ENABLE_DISPLAY_MPR_PAGE 0
110 #if ENABLE_DISPLAY_MPR_PAGE
111 static void Display_MPR_Page_Location(bdk_node_t node, int rank,
112                                       int ddr_interface_num, int dimm_count,
113                                       int page, int location, uint64_t *mpr_data);
114 #endif
115 
116 #define USE_L2_WAYS_LIMIT 1
117 
118 /* Read out Deskew Settings for DDR */
119 
120 typedef struct {
121     uint16_t bits[8];
122 } deskew_bytes_t;
123 typedef struct {
124     deskew_bytes_t bytes[9];
125 } deskew_data_t;
126 
127 static void
Get_Deskew_Settings(bdk_node_t node,int ddr_interface_num,deskew_data_t * dskdat)128 Get_Deskew_Settings(bdk_node_t node, int ddr_interface_num, deskew_data_t *dskdat)
129 {
130     bdk_lmcx_phy_ctl_t phy_ctl;
131     bdk_lmcx_config_t  lmc_config;
132     int bit_num, bit_index;
133     int byte_lane, byte_limit;
134     // NOTE: these are for pass 2.x
135     int is_t88p2 = !CAVIUM_IS_MODEL(CAVIUM_CN88XX_PASS1_X); // added 81xx and 83xx
136     int bit_end = (is_t88p2) ? 9 : 8;
137 
138     lmc_config.u = BDK_CSR_READ(node, BDK_LMCX_CONFIG(ddr_interface_num));
139     byte_limit = ((lmc_config.s.mode32b) ? 4 : 8) + lmc_config.s.ecc_ena;
140 
141     memset(dskdat, 0, sizeof(*dskdat));
142 
143     BDK_CSR_MODIFY(_phy_ctl, node, BDK_LMCX_PHY_CTL(ddr_interface_num),
144                    _phy_ctl.s.dsk_dbg_clk_scaler = 3);
145 
146     for (byte_lane = 0; byte_lane < byte_limit; byte_lane++) {
147         bit_index = 0;
148         for (bit_num = 0; bit_num <= bit_end; ++bit_num) {        // NOTE: this is for pass 2.x
149 
150             if (bit_num == 4) continue;
151             if ((bit_num == 5) && is_t88p2) continue;        // NOTE: this is for pass 2.x
152 
153             // set byte lane and bit to read
154             BDK_CSR_MODIFY(_phy_ctl, node, BDK_LMCX_PHY_CTL(ddr_interface_num),
155                            (_phy_ctl.s.dsk_dbg_bit_sel = bit_num,
156                             _phy_ctl.s.dsk_dbg_byte_sel = byte_lane));
157 
158             // start read sequence
159             BDK_CSR_MODIFY(_phy_ctl, node, BDK_LMCX_PHY_CTL(ddr_interface_num),
160                            _phy_ctl.s.dsk_dbg_rd_start = 1);
161 
162             // poll for read sequence to complete
163             do {
164                 phy_ctl.u = BDK_CSR_READ(node, BDK_LMCX_PHY_CTL(ddr_interface_num));
165             } while (phy_ctl.s.dsk_dbg_rd_complete != 1);
166 
167             // record the data
168             dskdat->bytes[byte_lane].bits[bit_index] = phy_ctl.s.dsk_dbg_rd_data & 0x3ff;
169             bit_index++;
170 
171         } /* for (bit_num = 0; bit_num <= bit_end; ++bit_num) */
172     } /* for (byte_lane = 0; byte_lane < byte_limit; byte_lane++) */
173 
174     return;
175 }
176 
177 static void
Display_Deskew_Data(bdk_node_t node,int ddr_interface_num,deskew_data_t * dskdat,int print_enable)178 Display_Deskew_Data(bdk_node_t node, int ddr_interface_num,
179                     deskew_data_t *dskdat, int print_enable)
180 {
181     int byte_lane;
182     int bit_num;
183     uint16_t flags, deskew;
184     bdk_lmcx_config_t lmc_config;
185     int byte_limit;
186     const char *fc = " ?-=+*#&";
187 
188     lmc_config.u = BDK_CSR_READ(node, BDK_LMCX_CONFIG(ddr_interface_num));
189     byte_limit = ((lmc_config.s.mode32b) ? 4 : 8) + lmc_config.s.ecc_ena;
190 
191     if (print_enable) {
192         VB_PRT(print_enable, "N%d.LMC%d: Deskew Data:              Bit =>      :",
193                 node, ddr_interface_num);
194         for (bit_num = 7; bit_num >= 0; --bit_num)
195             VB_PRT(print_enable, " %3d  ", bit_num);
196         VB_PRT(print_enable, "\n");
197     }
198 
199     for (byte_lane = 0; byte_lane < byte_limit; byte_lane++) {
200         if (print_enable)
201             VB_PRT(print_enable, "N%d.LMC%d: Bit Deskew Byte %d %s               :",
202                    node, ddr_interface_num, byte_lane,
203                    (print_enable >= VBL_TME) ? "FINAL" : "     ");
204 
205         for (bit_num = 7; bit_num >= 0; --bit_num) {
206 
207             flags = dskdat->bytes[byte_lane].bits[bit_num] & 7;
208             deskew = dskdat->bytes[byte_lane].bits[bit_num] >> 3;
209 
210             if (print_enable)
211                 VB_PRT(print_enable, " %3d %c", deskew, fc[flags^1]);
212 
213         } /* for (bit_num = 7; bit_num >= 0; --bit_num) */
214 
215         if (print_enable)
216             VB_PRT(print_enable, "\n");
217 
218     } /* for (byte_lane = 0; byte_lane < byte_limit; byte_lane++) */
219 
220     return;
221 }
222 
223 static int
change_wr_deskew_ena(bdk_node_t node,int ddr_interface_num,int new_state)224 change_wr_deskew_ena(bdk_node_t node, int ddr_interface_num, int new_state)
225 {
226     bdk_lmcx_dll_ctl3_t ddr_dll_ctl3;
227     int saved_wr_deskew_ena;
228 
229     // return original WR_DESKEW_ENA setting
230     ddr_dll_ctl3.u = BDK_CSR_READ(node, BDK_LMCX_DLL_CTL3(ddr_interface_num));
231     saved_wr_deskew_ena = !!GET_DDR_DLL_CTL3(wr_deskew_ena);
232     if (saved_wr_deskew_ena != !!new_state) { // write it only when changing it
233         SET_DDR_DLL_CTL3(wr_deskew_ena, !!new_state);
234         DRAM_CSR_WRITE(node, BDK_LMCX_DLL_CTL3(ddr_interface_num), ddr_dll_ctl3.u);
235     }
236     return saved_wr_deskew_ena;
237 }
238 
239 typedef struct {
240     int saturated;   // number saturated
241     int unlocked;    // number unlocked
242     int nibrng_errs; // nibble range errors
243     int nibunl_errs; // nibble unlocked errors
244     //int nibsat_errs; // nibble saturation errors
245     int bitval_errs; // bit value errors
246 #if LOOK_FOR_STUCK_BYTE
247     int bytes_stuck; // byte(s) stuck
248 #endif
249 } deskew_counts_t;
250 
251 #define MIN_BITVAL  17
252 #define MAX_BITVAL 110
253 
254 static deskew_counts_t deskew_training_results;
255 static int deskew_validation_delay = 10000; // FIXME: make this a var for overriding
256 
257 static void
Validate_Read_Deskew_Training(bdk_node_t node,int rank_mask,int ddr_interface_num,deskew_counts_t * counts,int print_enable)258 Validate_Read_Deskew_Training(bdk_node_t node, int rank_mask, int ddr_interface_num,
259                               deskew_counts_t *counts, int print_enable)
260 {
261     int byte_lane, bit_num, nib_num;
262     int nibrng_errs, nibunl_errs, bitval_errs;
263     //int nibsat_errs;
264     bdk_lmcx_config_t  lmc_config;
265     int16_t nib_min[2], nib_max[2], nib_unl[2]/*, nib_sat[2]*/;
266     // NOTE: these are for pass 2.x
267     int is_t88p2 = !CAVIUM_IS_MODEL(CAVIUM_CN88XX_PASS1_X); // added 81xx and 83xx
268     int bit_start = (is_t88p2) ? 9 : 8;
269     int byte_limit;
270 #if LOOK_FOR_STUCK_BYTE
271     uint64_t bl_mask[2]; // enough for 128 values
272     int bit_values;
273 #endif
274     deskew_data_t dskdat;
275     int bit_index;
276     int16_t flags, deskew;
277     const char *fc = " ?-=+*#&";
278     int saved_wr_deskew_ena;
279     int bit_last;
280 
281     // save original WR_DESKEW_ENA setting, and disable it for read deskew
282     saved_wr_deskew_ena = change_wr_deskew_ena(node, ddr_interface_num, 0);
283 
284     lmc_config.u = BDK_CSR_READ(node, BDK_LMCX_CONFIG(ddr_interface_num));
285     byte_limit = ((!lmc_config.s.mode32b) ? 8 : 4) + lmc_config.s.ecc_ena;
286 
287     memset(counts, 0, sizeof(deskew_counts_t));
288 
289     Get_Deskew_Settings(node, ddr_interface_num, &dskdat);
290 
291     if (print_enable) {
292         VB_PRT(print_enable, "N%d.LMC%d: Deskew Settings:          Bit =>      :",
293                 node, ddr_interface_num);
294         for (bit_num = 7; bit_num >= 0; --bit_num)
295             VB_PRT(print_enable, " %3d  ", bit_num);
296         VB_PRT(print_enable, "\n");
297     }
298 
299     for (byte_lane = 0; byte_lane < byte_limit; byte_lane++) {
300         if (print_enable)
301             VB_PRT(print_enable, "N%d.LMC%d: Bit Deskew Byte %d %s               :",
302                    node, ddr_interface_num, byte_lane,
303                    (print_enable >= VBL_TME) ? "FINAL" : "     ");
304 
305         nib_min[0] = 127; nib_min[1] = 127;
306         nib_max[0] = 0;   nib_max[1] = 0;
307         nib_unl[0] = 0;   nib_unl[1] = 0;
308         //nib_sat[0] = 0;   nib_sat[1] = 0;
309 
310 #if LOOK_FOR_STUCK_BYTE
311         bl_mask[0] = bl_mask[1] = 0;
312 #endif
313 
314         if ((lmc_config.s.mode32b == 1) && (byte_lane == 4)) {
315             bit_index = 3;
316             bit_last  = 3;
317             if (print_enable)
318                 VB_PRT(print_enable, "                        ");
319         } else {
320             bit_index = 7;
321             bit_last  = bit_start;
322         }
323 
324         for (bit_num = bit_last; bit_num >= 0; --bit_num) {        // NOTE: this is for pass 2.x
325             if (bit_num == 4) continue;
326             if ((bit_num == 5) && is_t88p2) continue;        // NOTE: this is for pass 2.x
327 
328             nib_num = (bit_num > 4) ? 1 : 0;
329 
330             flags = dskdat.bytes[byte_lane].bits[bit_index] & 7;
331             deskew = dskdat.bytes[byte_lane].bits[bit_index] >> 3;
332             bit_index--;
333 
334             counts->saturated += !!(flags & 6);
335             counts->unlocked  +=  !(flags & 1);
336 
337             nib_unl[nib_num]  +=  !(flags & 1);
338             //nib_sat[nib_num]  += !!(flags & 6);
339 
340             if (flags & 1) { // FIXME? only do range when locked
341                 nib_min[nib_num] = min(nib_min[nib_num], deskew);
342                 nib_max[nib_num] = max(nib_max[nib_num], deskew);
343             }
344 
345 #if LOOK_FOR_STUCK_BYTE
346             bl_mask[(deskew >> 6) & 1] |= 1UL << (deskew & 0x3f);
347 #endif
348 
349             if (print_enable)
350                 VB_PRT(print_enable, " %3d %c", deskew, fc[flags^1]);
351 
352         } /* for (bit_num = bit_last; bit_num >= 0; --bit_num) */
353 
354         /*
355           Now look for nibble errors:
356 
357           For bit 55, it looks like a bit deskew problem. When the upper nibble of byte 6
358            needs to go to saturation, bit 7 of byte 6 locks prematurely at 64.
359           For DIMMs with raw card A and B, can we reset the deskew training when we encounter this case?
360           The reset criteria should be looking at one nibble at a time for raw card A and B;
361           if the bit-deskew setting within a nibble is different by > 33, we'll issue a reset
362           to the bit deskew training.
363 
364           LMC0 Bit Deskew Byte(6): 64 0 - 0 - 0 - 26 61 35 64
365         */
366         // upper nibble range, then lower nibble range
367         nibrng_errs  = ((nib_max[1] - nib_min[1]) > 33) ? 1 : 0;
368         nibrng_errs |= ((nib_max[0] - nib_min[0]) > 33) ? 1 : 0;
369 
370         // check for nibble all unlocked
371         nibunl_errs  = ((nib_unl[0] == 4) || (nib_unl[1] == 4)) ? 1 : 0;
372 
373         // check for nibble all saturated
374         //nibsat_errs  = ((nib_sat[0] == 4) || (nib_sat[1] == 4)) ? 1 : 0;
375 
376         // check for bit value errors, ie < 17 or > 110
377         // FIXME? assume max always > MIN_BITVAL and min < MAX_BITVAL
378         bitval_errs  = ((nib_max[1] > MAX_BITVAL) || (nib_max[0] > MAX_BITVAL)) ? 1 : 0;
379         bitval_errs |= ((nib_min[1] < MIN_BITVAL) || (nib_min[0] < MIN_BITVAL)) ? 1 : 0;
380 
381         if (((nibrng_errs != 0) || (nibunl_errs != 0) /*|| (nibsat_errs != 0)*/ || (bitval_errs != 0))
382             && print_enable)
383         {
384             VB_PRT(print_enable, " %c%c%c%c",
385                    (nibrng_errs)?'R':' ',
386                    (nibunl_errs)?'U':' ',
387                    (bitval_errs)?'V':' ',
388                    /*(nibsat_errs)?'S':*/' ');
389         }
390 
391 #if LOOK_FOR_STUCK_BYTE
392         bit_values = __builtin_popcountl(bl_mask[0]) + __builtin_popcountl(bl_mask[1]);
393         if (bit_values < 3) {
394             counts->bytes_stuck |= (1 << byte_lane);
395             if (print_enable)
396                 VB_PRT(print_enable, "X");
397         }
398 #endif
399         if (print_enable)
400             VB_PRT(print_enable, "\n");
401 
402         counts->nibrng_errs |= (nibrng_errs << byte_lane);
403         counts->nibunl_errs |= (nibunl_errs << byte_lane);
404         //counts->nibsat_errs |= (nibsat_errs << byte_lane);
405         counts->bitval_errs |= (bitval_errs << byte_lane);
406 
407 #if LOOK_FOR_STUCK_BYTE
408         // just for completeness, allow print of the stuck values bitmask after the bytelane print
409         if ((bit_values < 3) && print_enable) {
410             VB_PRT(VBL_DEV, "N%d.LMC%d: Deskew byte %d STUCK on value 0x%016lx.%016lx\n",
411                    node, ddr_interface_num, byte_lane,
412                    bl_mask[1], bl_mask[0]);
413         }
414 #endif
415 
416     } /* for (byte_lane = 0; byte_lane < byte_limit; byte_lane++) */
417 
418     // restore original WR_DESKEW_ENA setting
419     change_wr_deskew_ena(node, ddr_interface_num, saved_wr_deskew_ena);
420 
421     return;
422 }
423 
load_dac_override(int node,int ddr_interface_num,int dac_value,int byte)424 unsigned short load_dac_override(int node, int ddr_interface_num,
425                                         int dac_value, int byte)
426 {
427     bdk_lmcx_dll_ctl3_t ddr_dll_ctl3;
428     int bytex = (byte == 0x0A) ? byte : byte + 1; // single bytelanes incr by 1; A is for ALL
429 
430     ddr_dll_ctl3.u = BDK_CSR_READ(node, BDK_LMCX_DLL_CTL3(ddr_interface_num));
431 
432     SET_DDR_DLL_CTL3(byte_sel, bytex);
433     SET_DDR_DLL_CTL3(offset, dac_value >> 1); // only 7-bit field, use MS bits
434 
435     ddr_dll_ctl3.s.bit_select    = 0x9; /* No-op */
436     DRAM_CSR_WRITE(node, BDK_LMCX_DLL_CTL3(ddr_interface_num), ddr_dll_ctl3.u);
437 
438     ddr_dll_ctl3.s.bit_select    = 0xC; /* Vref bypass setting load */
439     DRAM_CSR_WRITE(node, BDK_LMCX_DLL_CTL3(ddr_interface_num), ddr_dll_ctl3.u);
440 
441     ddr_dll_ctl3.s.bit_select    = 0xD; /* Vref bypass on. */
442     DRAM_CSR_WRITE(node, BDK_LMCX_DLL_CTL3(ddr_interface_num), ddr_dll_ctl3.u);
443 
444     ddr_dll_ctl3.s.bit_select    = 0x9; /* No-op */
445     DRAM_CSR_WRITE(node, BDK_LMCX_DLL_CTL3(ddr_interface_num), ddr_dll_ctl3.u);
446 
447     return ((unsigned short) GET_DDR_DLL_CTL3(offset));
448 }
449 
450 // arg dac_or_dbi is 1 for DAC, 0 for DBI
451 // returns 9 entries (bytelanes 0 through 8) in settings[]
452 // returns 0 if OK, -1 if a problem
read_DAC_DBI_settings(int node,int ddr_interface_num,int dac_or_dbi,int * settings)453 int read_DAC_DBI_settings(int node, int ddr_interface_num,
454                           int dac_or_dbi, int *settings)
455 {
456     bdk_lmcx_phy_ctl_t phy_ctl;
457     int byte_lane, bit_num;
458     int deskew;
459     int dac_value;
460     int is_t88p2 = !CAVIUM_IS_MODEL(CAVIUM_CN88XX_PASS1_X); // added 81xx and 83xx
461 
462     phy_ctl.u = BDK_CSR_READ(node, BDK_LMCX_PHY_CTL(ddr_interface_num));
463     phy_ctl.s.dsk_dbg_clk_scaler = 3;
464     DRAM_CSR_WRITE(node, BDK_LMCX_PHY_CTL(ddr_interface_num), phy_ctl.u);
465 
466     bit_num = (dac_or_dbi) ? 4 : 5;
467     if ((bit_num == 5) && !is_t88p2) { // NOTE: this is for pass 1.x
468         return -1;
469     }
470 
471     for (byte_lane = 8; byte_lane >= 0 ; --byte_lane) { // FIXME: always assume ECC is available
472 
473         //set byte lane and bit to read
474         phy_ctl.s.dsk_dbg_bit_sel = bit_num;
475         phy_ctl.s.dsk_dbg_byte_sel = byte_lane;
476         DRAM_CSR_WRITE(node, BDK_LMCX_PHY_CTL(ddr_interface_num), phy_ctl.u);
477 
478         //start read sequence
479         phy_ctl.u = BDK_CSR_READ(node, BDK_LMCX_PHY_CTL(ddr_interface_num));
480         phy_ctl.s.dsk_dbg_rd_start = 1;
481         DRAM_CSR_WRITE(node, BDK_LMCX_PHY_CTL(ddr_interface_num), phy_ctl.u);
482 
483         //poll for read sequence to complete
484         do {
485             phy_ctl.u = BDK_CSR_READ(node, BDK_LMCX_PHY_CTL(ddr_interface_num));
486         } while (phy_ctl.s.dsk_dbg_rd_complete != 1);
487 
488         deskew = phy_ctl.s.dsk_dbg_rd_data /*>> 3*/; // leave the flag bits for DBI
489         dac_value = phy_ctl.s.dsk_dbg_rd_data & 0xff;
490 
491         settings[byte_lane] = (dac_or_dbi) ? dac_value : deskew;
492 
493     } /* for (byte_lane = 8; byte_lane >= 0 ; --byte_lane) { */
494 
495     return 0;
496 }
497 
498 // print out the DBI settings array
499 // arg dac_or_dbi is 1 for DAC, 0 for DBI
500 void
display_DAC_DBI_settings(int node,int lmc,int dac_or_dbi,int ecc_ena,int * settings,const char * title)501 display_DAC_DBI_settings(int node, int lmc, int dac_or_dbi,
502                          int ecc_ena, int *settings, const char *title)
503 {
504     int byte;
505     int flags;
506     int deskew;
507     const char *fc = " ?-=+*#&";
508 
509     ddr_print("N%d.LMC%d: %s %s Deskew Settings %d:0 :",
510               node, lmc, title, (dac_or_dbi)?"DAC":"DBI", 7+ecc_ena);
511     for (byte = (7+ecc_ena); byte >= 0; --byte) { // FIXME: what about 32-bit mode?
512         if (dac_or_dbi) { // DAC
513             flags  = 1; // say its locked to get blank
514             deskew = settings[byte] & 0xff;
515         } else { // DBI
516             flags  = settings[byte] & 7;
517             deskew = (settings[byte] >> 3) & 0x7f;
518         }
519         ddr_print(" %3d %c", deskew, fc[flags^1]);
520     }
521     ddr_print("\n");
522 }
523 
524 // Evaluate the DAC settings array
525 static int
evaluate_DAC_settings(int ddr_interface_64b,int ecc_ena,int * settings)526 evaluate_DAC_settings(int ddr_interface_64b, int ecc_ena, int *settings)
527 {
528     int byte, dac;
529     int last = (ddr_interface_64b) ? 7 : 3;
530 
531     // this looks only for DAC values that are EVEN
532     for (byte = (last+ecc_ena); byte >= 0; --byte) {
533         dac  = settings[byte] & 0xff;
534         if ((dac & 1) == 0)
535             return 1;
536     }
537     return 0;
538 }
539 
540 static void
Perform_Offset_Training(bdk_node_t node,int rank_mask,int ddr_interface_num)541 Perform_Offset_Training(bdk_node_t node, int rank_mask, int ddr_interface_num)
542 {
543     bdk_lmcx_phy_ctl_t lmc_phy_ctl;
544     uint64_t orig_phy_ctl;
545     const char *s;
546 
547     /*
548      * 6.9.8 LMC Offset Training
549      *
550      * LMC requires input-receiver offset training.
551      *
552      * 1. Write LMC(0)_PHY_CTL[DAC_ON] = 1
553      */
554     lmc_phy_ctl.u = BDK_CSR_READ(node, BDK_LMCX_PHY_CTL(ddr_interface_num));
555     orig_phy_ctl = lmc_phy_ctl.u;
556     lmc_phy_ctl.s.dac_on = 1;
557 
558     // allow full CSR override
559     if ((s = lookup_env_parameter_ull("ddr_phy_ctl")) != NULL) {
560         lmc_phy_ctl.u    = strtoull(s, NULL, 0);
561     }
562 
563     // do not print or write if CSR does not change...
564     if (lmc_phy_ctl.u != orig_phy_ctl) {
565         ddr_print("PHY_CTL                                       : 0x%016llx\n", lmc_phy_ctl.u);
566         DRAM_CSR_WRITE(node, BDK_LMCX_PHY_CTL(ddr_interface_num), lmc_phy_ctl.u);
567     }
568 
569 #if 0
570     // FIXME? do we really need to show RODT here?
571     bdk_lmcx_comp_ctl2_t lmc_comp_ctl2;
572     lmc_comp_ctl2.u = BDK_CSR_READ(node, BDK_LMCX_COMP_CTL2(ddr_interface_num));
573     ddr_print("Read ODT_CTL                                  : 0x%x (%d ohms)\n",
574               lmc_comp_ctl2.s.rodt_ctl, imp_values->rodt_ohms[lmc_comp_ctl2.s.rodt_ctl]);
575 #endif
576 
577     /*
578      * 2. Write LMC(0)_SEQ_CTL[SEQ_SEL] = 0x0B and
579      *    LMC(0)_SEQ_CTL[INIT_START] = 1.
580      *
581      * 3. Wait for LMC(0)_SEQ_CTL[SEQ_COMPLETE] to be set to 1.
582      */
583     perform_octeon3_ddr3_sequence(node, rank_mask, ddr_interface_num, 0x0B); /* Offset training sequence */
584 
585 }
586 
587 static void
Perform_Internal_VREF_Training(bdk_node_t node,int rank_mask,int ddr_interface_num)588 Perform_Internal_VREF_Training(bdk_node_t node, int rank_mask, int ddr_interface_num)
589 {
590     bdk_lmcx_ext_config_t ext_config;
591 
592     /*
593      * 6.9.9 LMC Internal Vref Training
594      *
595      * LMC requires input-reference-voltage training.
596      *
597      * 1. Write LMC(0)_EXT_CONFIG[VREFINT_SEQ_DESKEW] = 0.
598      */
599     ext_config.u = BDK_CSR_READ(node, BDK_LMCX_EXT_CONFIG(ddr_interface_num));
600     ext_config.s.vrefint_seq_deskew = 0;
601 
602     VB_PRT(VBL_SEQ, "N%d.LMC%d: Performing LMC sequence: vrefint_seq_deskew = %d\n",
603            node, ddr_interface_num, ext_config.s.vrefint_seq_deskew);
604 
605     DRAM_CSR_WRITE(node, BDK_LMCX_EXT_CONFIG(ddr_interface_num), ext_config.u);
606 
607     /*
608      * 2. Write LMC(0)_SEQ_CTL[SEQ_SEL] = 0x0a and
609      *    LMC(0)_SEQ_CTL[INIT_START] = 1.
610      *
611      * 3. Wait for LMC(0)_SEQ_CTL[SEQ_COMPLETE] to be set to 1.
612      */
613     perform_octeon3_ddr3_sequence(node, rank_mask, ddr_interface_num, 0x0A); /* LMC Internal Vref Training */
614 }
615 
616 #define dbg_avg(format, ...) VB_PRT(VBL_DEV, format, ##__VA_ARGS__)
617 static int
process_samples_average(int16_t * bytes,int num_samples,int lmc,int lane_no)618 process_samples_average(int16_t *bytes, int num_samples, int lmc, int lane_no)
619 {
620     int i, savg, sadj, sum = 0, rng, ret, asum, trunc;
621     int16_t smin = 32767, smax = -32768;
622 
623     dbg_avg("DBG_AVG%d.%d: ", lmc, lane_no);
624 
625     for (i = 0; i < num_samples; i++) {
626         sum += bytes[i];
627         if (bytes[i] < smin) smin = bytes[i];
628         if (bytes[i] > smax) smax = bytes[i];
629         dbg_avg(" %3d", bytes[i]);
630     }
631     rng = smax - smin + 1;
632 
633     dbg_avg(" (%3d, %3d, %2d)", smin, smax, rng);
634 
635     asum = sum - smin - smax;
636 
637     savg = divide_nint(sum * 10, num_samples);
638 
639     sadj = divide_nint(asum * 10, (num_samples - 2));
640 
641     trunc = asum / (num_samples - 2);
642 
643     dbg_avg(" [%3d.%d, %3d.%d, %3d]", savg/10, savg%10, sadj/10, sadj%10, trunc);
644 
645     sadj = divide_nint(sadj, 10);
646     if (trunc & 1)
647         ret = trunc;
648     else if (sadj & 1)
649         ret = sadj;
650     else
651         ret = trunc + 1;
652 
653     dbg_avg(" -> %3d\n", ret);
654 
655     return ret;
656 }
657 
658 
659 #define DEFAULT_SAT_RETRY_LIMIT    11    // 1 + 10 retries
660 static int default_lock_retry_limit = 20;    // 20 retries // FIXME: make a var for overriding
661 
662 static int
Perform_Read_Deskew_Training(bdk_node_t node,int rank_mask,int ddr_interface_num,int spd_rawcard_AorB,int print_flags,int ddr_interface_64b)663 Perform_Read_Deskew_Training(bdk_node_t node, int rank_mask, int ddr_interface_num,
664                              int spd_rawcard_AorB, int print_flags, int ddr_interface_64b)
665 {
666     int unsaturated, locked;
667     //int nibble_sat;
668     int sat_retries, lock_retries, lock_retries_total, lock_retries_limit;
669     int print_first;
670     int print_them_all;
671     deskew_counts_t dsk_counts;
672     uint64_t saved_wr_deskew_ena;
673 #if DESKEW_RODT_CTL
674     bdk_lmcx_comp_ctl2_t comp_ctl2;
675     int save_deskew_rodt_ctl = -1;
676 #endif
677     int is_t88p2 = !CAVIUM_IS_MODEL(CAVIUM_CN88XX_PASS1_X); // added 81xx and 83xx
678 
679     VB_PRT(VBL_FAE, "N%d.LMC%d: Performing Read Deskew Training.\n", node, ddr_interface_num);
680 
681     // save original WR_DESKEW_ENA setting, and disable it for read deskew
682     saved_wr_deskew_ena = change_wr_deskew_ena(node, ddr_interface_num, 0);
683 
684     sat_retries = 0;
685     lock_retries_total = 0;
686     unsaturated = 0;
687     print_first = VBL_FAE; // print the first one, FAE and above
688     print_them_all = dram_is_verbose(VBL_DEV4); // set to true for printing all normal deskew attempts
689 
690     int loops, normal_loops = 1; // default to 1 NORMAL deskew training op...
691     const char *s;
692     if ((s = getenv("ddr_deskew_normal_loops")) != NULL) {
693         normal_loops = strtoul(s, NULL, 0);
694     }
695 
696 #if LOOK_FOR_STUCK_BYTE
697     // provide override for STUCK BYTE RESETS
698     int do_stuck_reset = ENABLE_STUCK_BYTE_RESET;
699     if ((s = getenv("ddr_enable_stuck_byte_reset")) != NULL) {
700         do_stuck_reset = !!strtoul(s, NULL, 0);
701     }
702 #endif
703 
704 #if DESKEW_RODT_CTL
705     if ((s = getenv("ddr_deskew_rodt_ctl")) != NULL) {
706         int deskew_rodt_ctl = strtoul(s, NULL, 0);
707         comp_ctl2.u = BDK_CSR_READ(node, BDK_LMCX_COMP_CTL2(ddr_interface_num));
708         save_deskew_rodt_ctl = comp_ctl2.s.rodt_ctl;
709         comp_ctl2.s.rodt_ctl = deskew_rodt_ctl;
710         DRAM_CSR_WRITE(node, BDK_LMCX_COMP_CTL2(ddr_interface_num), comp_ctl2.u);
711     }
712 #endif
713 
714     lock_retries_limit = default_lock_retry_limit;
715     if (! CAVIUM_IS_MODEL(CAVIUM_CN88XX_PASS1_X)) // added 81xx and 83xx
716         lock_retries_limit *= 2; // give pass 2.0 twice as many
717 
718     do { /* while (sat_retries < sat_retry_limit) */
719 
720         /*
721          * 6.9.10 LMC Deskew Training
722          *
723          * LMC requires input-read-data deskew training.
724          *
725          * 1. Write LMC(0)_EXT_CONFIG[VREFINT_SEQ_DESKEW] = 1.
726          */
727         VB_PRT(VBL_SEQ, "N%d.LMC%d: Performing LMC sequence: Set vrefint_seq_deskew = 1\n",
728                 node, ddr_interface_num);
729         DRAM_CSR_MODIFY(ext_config, node, BDK_LMCX_EXT_CONFIG(ddr_interface_num),
730                         ext_config.s.vrefint_seq_deskew = 1); /* Set Deskew sequence */
731 
732         /*
733          * 2. Write LMC(0)_SEQ_CTL[SEQ_SEL] = 0x0A and
734          *    LMC(0)_SEQ_CTL[INIT_START] = 1.
735          *
736          * 3. Wait for LMC(0)_SEQ_CTL[SEQ_COMPLETE] to be set to 1.
737          */
738         DRAM_CSR_MODIFY(phy_ctl, node, BDK_LMCX_PHY_CTL(ddr_interface_num),
739                         phy_ctl.s.phy_dsk_reset = 1); /* RESET Deskew sequence */
740         perform_octeon3_ddr3_sequence(node, rank_mask, ddr_interface_num, 0x0A); /* LMC Deskew Training */
741 
742         lock_retries = 0;
743 
744     perform_read_deskew_training:
745         // maybe perform the NORMAL deskew training sequence multiple times before looking at lock status
746         for (loops = 0; loops < normal_loops; loops++) {
747             DRAM_CSR_MODIFY(phy_ctl, node, BDK_LMCX_PHY_CTL(ddr_interface_num),
748                             phy_ctl.s.phy_dsk_reset = 0); /* Normal Deskew sequence */
749             perform_octeon3_ddr3_sequence(node, rank_mask, ddr_interface_num, 0x0A); /* LMC Deskew Training */
750         }
751         // Moved this from Validate_Read_Deskew_Training
752         /* Allow deskew results to stabilize before evaluating them. */
753         bdk_wait_usec(deskew_validation_delay);
754 
755         // Now go look at lock and saturation status...
756         Validate_Read_Deskew_Training(node, rank_mask, ddr_interface_num, &dsk_counts, print_first);
757         if (print_first && !print_them_all) // after printing the first and not doing them all, no more
758             print_first = 0;
759 
760         unsaturated = (dsk_counts.saturated == 0);
761         locked = (dsk_counts.unlocked == 0);
762         //nibble_sat = (dsk_counts.nibsat_errs != 0);
763 
764         // only do locking retries if unsaturated or rawcard A or B, otherwise full SAT retry
765         if (unsaturated || (spd_rawcard_AorB && !is_t88p2 /*&& !nibble_sat*/)) {
766             if (!locked) { // and not locked
767                 lock_retries++;
768                 lock_retries_total++;
769                 if (lock_retries <= lock_retries_limit) {
770                     goto perform_read_deskew_training;
771                 } else {
772                     VB_PRT(VBL_TME, "N%d.LMC%d: LOCK RETRIES failed after %d retries\n",
773                             node, ddr_interface_num, lock_retries_limit);
774                 }
775             } else {
776                 if (lock_retries_total > 0) // only print if we did try
777                     VB_PRT(VBL_TME, "N%d.LMC%d: LOCK RETRIES successful after %d retries\n",
778                             node, ddr_interface_num, lock_retries);
779             }
780         } /* if (unsaturated || spd_rawcard_AorB) */
781 
782         ++sat_retries;
783 
784 #if LOOK_FOR_STUCK_BYTE
785         // FIXME: this is a bit of a hack at the moment...
786         // We want to force a Deskew RESET hopefully to unstick the bytes values
787         // and then resume normal deskew training as usual.
788         // For now, do only if it is all locked...
789         if (locked && (dsk_counts.bytes_stuck != 0)) {
790             BDK_CSR_INIT(lmc_config, node, BDK_LMCX_CONFIG(ddr_interface_num));
791             if (do_stuck_reset && lmc_config.s.mode_x4dev) { // FIXME: only when x4!!
792                 unsaturated = 0; // to always make sure the while continues
793                 VB_PRT(VBL_TME, "N%d.LMC%d: STUCK BYTE (0x%x), forcing deskew RESET\n",
794                           node, ddr_interface_num, dsk_counts.bytes_stuck);
795                 continue; // bypass the rest to get back to the RESET
796             } else {
797                 VB_PRT(VBL_TME, "N%d.LMC%d: STUCK BYTE (0x%x), ignoring deskew RESET\n",
798                           node, ddr_interface_num, dsk_counts.bytes_stuck);
799             }
800         }
801 #endif
802         /*
803          * At this point, check for a DDR4 RDIMM that will not benefit from SAT retries; if so, no retries
804          */
805         if (spd_rawcard_AorB && !is_t88p2 /*&& !nibble_sat*/) {
806             VB_PRT(VBL_TME, "N%d.LMC%d: Read Deskew Training Loop: Exiting for RAWCARD == A or B.\n",
807                     node, ddr_interface_num);
808             break; // no sat or lock retries
809         }
810 
811     } while (!unsaturated && (sat_retries < DEFAULT_SAT_RETRY_LIMIT));
812 
813 #if DESKEW_RODT_CTL
814     if (save_deskew_rodt_ctl != -1) {
815         comp_ctl2.u = BDK_CSR_READ(node, BDK_LMCX_COMP_CTL2(ddr_interface_num));
816         comp_ctl2.s.rodt_ctl = save_deskew_rodt_ctl;
817         DRAM_CSR_WRITE(node, BDK_LMCX_COMP_CTL2(ddr_interface_num), comp_ctl2.u);
818     }
819 #endif
820 
821     VB_PRT(VBL_FAE, "N%d.LMC%d: Read Deskew Training %s. %d sat-retries, %d lock-retries\n",
822            node, ddr_interface_num,
823            (sat_retries >= DEFAULT_SAT_RETRY_LIMIT) ? "Timed Out" : "Completed",
824            sat_retries-1, lock_retries_total);
825 
826     // restore original WR_DESKEW_ENA setting
827     change_wr_deskew_ena(node, ddr_interface_num, saved_wr_deskew_ena);
828 
829     if ((dsk_counts.nibrng_errs != 0) || (dsk_counts.nibunl_errs != 0)) {
830         debug_print("N%d.LMC%d: NIBBLE ERROR(S) found, returning FAULT\n",
831                   node, ddr_interface_num);
832         return -1; // we did retry locally, they did not help
833     }
834 
835     // NOTE: we (currently) always print one last training validation before starting Read Leveling...
836 
837     return 0;
838 }
839 
840 static void
do_write_deskew_op(bdk_node_t node,int ddr_interface_num,int bit_sel,int byte_sel,int ena)841 do_write_deskew_op(bdk_node_t node, int ddr_interface_num,
842                    int bit_sel, int byte_sel, int ena)
843 {
844     bdk_lmcx_dll_ctl3_t ddr_dll_ctl3;
845 
846     ddr_dll_ctl3.u = BDK_CSR_READ(node, BDK_LMCX_DLL_CTL3(ddr_interface_num));
847     SET_DDR_DLL_CTL3(bit_select,    bit_sel);
848     SET_DDR_DLL_CTL3(byte_sel,      byte_sel);
849     SET_DDR_DLL_CTL3(wr_deskew_ena, ena);
850     DRAM_CSR_WRITE(node, BDK_LMCX_DLL_CTL3(ddr_interface_num),        ddr_dll_ctl3.u);
851 
852     ddr_dll_ctl3.u = BDK_CSR_READ(node, BDK_LMCX_DLL_CTL3(ddr_interface_num));
853 }
854 
855 static void
set_write_deskew_offset(bdk_node_t node,int ddr_interface_num,int bit_sel,int byte_sel,int offset)856 set_write_deskew_offset(bdk_node_t node, int ddr_interface_num,
857                         int bit_sel, int byte_sel, int offset)
858 {
859     bdk_lmcx_dll_ctl3_t ddr_dll_ctl3;
860 
861     ddr_dll_ctl3.u = BDK_CSR_READ(node, BDK_LMCX_DLL_CTL3(ddr_interface_num));
862     SET_DDR_DLL_CTL3(bit_select, bit_sel);
863     SET_DDR_DLL_CTL3(byte_sel,   byte_sel);
864     SET_DDR_DLL_CTL3(offset,     offset);
865     DRAM_CSR_WRITE(node, BDK_LMCX_DLL_CTL3(ddr_interface_num),        ddr_dll_ctl3.u);
866 
867     ddr_dll_ctl3.u = BDK_CSR_READ(node, BDK_LMCX_DLL_CTL3(ddr_interface_num));
868     SET_DDR_DLL_CTL3(wr_deskew_ld, 1);
869     DRAM_CSR_WRITE(node, BDK_LMCX_DLL_CTL3(ddr_interface_num),        ddr_dll_ctl3.u);
870 
871     ddr_dll_ctl3.u = BDK_CSR_READ(node, BDK_LMCX_DLL_CTL3(ddr_interface_num));
872 }
873 
874 static void
Update_Write_Deskew_Settings(bdk_node_t node,int ddr_interface_num,deskew_data_t * dskdat)875 Update_Write_Deskew_Settings(bdk_node_t node, int ddr_interface_num, deskew_data_t *dskdat)
876 {
877     bdk_lmcx_config_t lmc_config;
878     int bit_num;
879     int byte_lane, byte_limit;
880 
881     lmc_config.u = BDK_CSR_READ(node, BDK_LMCX_CONFIG(ddr_interface_num));
882     byte_limit = ((lmc_config.s.mode32b) ? 4 : 8) + lmc_config.s.ecc_ena;
883 
884     for (byte_lane = 0; byte_lane < byte_limit; byte_lane++) {
885         for (bit_num = 0; bit_num <= 7; ++bit_num) {
886 
887             set_write_deskew_offset(node, ddr_interface_num, bit_num, byte_lane + 1,
888                                     dskdat->bytes[byte_lane].bits[bit_num]);
889 
890         } /* for (bit_num = 0; bit_num <= 7; ++bit_num) */
891     } /* for (byte_lane = 0; byte_lane < byte_limit; byte_lane++) */
892 
893     return;
894 }
895 
896 #define ALL_BYTES 0x0A
897 #define BS_NOOP  0x09
898 #define BS_RESET 0x0F
899 #define BS_REUSE 0x0A
900 
901 // set all entries to the same value (used during training)
902 static void
Set_Write_Deskew_Settings(bdk_node_t node,int ddr_interface_num,int value)903 Set_Write_Deskew_Settings(bdk_node_t node, int ddr_interface_num, int value)
904 {
905     bdk_lmcx_dll_ctl3_t ddr_dll_ctl3;
906     int bit_num;
907 
908     VB_PRT(VBL_DEV2, "N%d.LMC%d: SetWriteDeskew: WRITE %d\n", node, ddr_interface_num, value);
909 
910     for (bit_num = 0; bit_num <= 7; ++bit_num) {
911 
912         // write a bit-deskew value to all bit-lanes of all bytes
913         ddr_dll_ctl3.u = BDK_CSR_READ(node, BDK_LMCX_DLL_CTL3(ddr_interface_num));
914         SET_DDR_DLL_CTL3(bit_select, bit_num);
915         SET_DDR_DLL_CTL3(byte_sel,   ALL_BYTES); // FIXME? will this work in 32-bit mode?
916         SET_DDR_DLL_CTL3(offset,     value);
917         DRAM_CSR_WRITE(node, BDK_LMCX_DLL_CTL3(ddr_interface_num), ddr_dll_ctl3.u);
918 
919         ddr_dll_ctl3.u = BDK_CSR_READ(node, BDK_LMCX_DLL_CTL3(ddr_interface_num));
920         SET_DDR_DLL_CTL3(wr_deskew_ld, 1);
921         DRAM_CSR_WRITE(node, BDK_LMCX_DLL_CTL3(ddr_interface_num), ddr_dll_ctl3.u);
922 
923     } /* for (bit_num = 0; bit_num <= 7; ++bit_num) */
924 
925 #if 0
926     // FIXME: for debug use only
927     Get_Deskew_Settings(node, ddr_interface_num, &dskdat);
928     Display_Deskew_Data(node, ddr_interface_num, &dskdat, VBL_NORM);
929 #endif
930 
931     return;
932 }
933 
934 typedef struct {
935     uint8_t count[8];
936     uint8_t start[8];
937     uint8_t best_count[8];
938     uint8_t best_start[8];
939 } deskew_bytelane_t;
940 typedef struct {
941     deskew_bytelane_t bytes[9];
942 } deskew_rank_t;
943 
944 deskew_rank_t deskew_history[4];
945 
946 #define DSKVAL_INCR 4
947 
948 static void
Neutral_Write_Deskew_Setup(bdk_node_t node,int ddr_interface_num)949 Neutral_Write_Deskew_Setup(bdk_node_t node, int ddr_interface_num)
950 {
951     // first: NO-OP, Select all bytes, Disable write bit-deskew
952     ddr_print("N%d.LMC%d: NEUTRAL Write Deskew Setup: first: NOOP\n", node, ddr_interface_num);
953     do_write_deskew_op(node, ddr_interface_num, BS_NOOP, ALL_BYTES, 0);
954     //Get_Deskew_Settings(node, ddr_interface_num, &dskdat);
955     //Display_Deskew_Data(node, ddr_interface_num, &dskdat, VBL_NORM);
956 
957     // enable write bit-deskew and RESET the settings
958     ddr_print("N%d.LMC%d: NEUTRAL Write Deskew Setup: wr_ena: RESET\n", node, ddr_interface_num);
959     do_write_deskew_op(node, ddr_interface_num, BS_RESET, ALL_BYTES, 1);
960     //Get_Deskew_Settings(node, ddr_interface_num, &dskdat);
961     //Display_Deskew_Data(node, ddr_interface_num, &dskdat, VBL_NORM);
962 }
963 
964 static void
Perform_Write_Deskew_Training(bdk_node_t node,int ddr_interface_num)965 Perform_Write_Deskew_Training(bdk_node_t node, int ddr_interface_num)
966 {
967     deskew_data_t dskdat;
968     int byte, bit_num;
969     int dskval, rankx, rank_mask, active_ranks, errors, bit_errs;
970     uint64_t hw_rank_offset;
971     uint64_t bad_bits[2];
972     uint64_t phys_addr;
973     deskew_rank_t *dhp;
974     int num_lmcs = __bdk_dram_get_num_lmc(node);
975 
976     BDK_CSR_INIT(lmcx_config, node, BDK_LMCX_CONFIG(ddr_interface_num));
977     rank_mask = lmcx_config.s.init_status; // FIXME: is this right when we run?
978 
979     // this should be correct for 1 or 2 ranks, 1 or 2 DIMMs
980     hw_rank_offset = 1ull << (28 + lmcx_config.s.pbank_lsb - lmcx_config.s.rank_ena + (num_lmcs/2));
981 
982     VB_PRT(VBL_FAE, "N%d.LMC%d: Performing Write Deskew Training.\n", node, ddr_interface_num);
983 
984     // first: NO-OP, Select all bytes, Disable write bit-deskew
985     ddr_print("N%d.LMC%d: WriteDeskewConfig: first: NOOP\n", node, ddr_interface_num);
986     do_write_deskew_op(node, ddr_interface_num, BS_NOOP, ALL_BYTES, 0);
987     //Get_Deskew_Settings(node, ddr_interface_num, &dskdat);
988     //Display_Deskew_Data(node, ddr_interface_num, &dskdat, VBL_NORM);
989 
990     // enable write bit-deskew and RESET the settings
991     ddr_print("N%d.LMC%d: WriteDeskewConfig: wr_ena: RESET\n", node, ddr_interface_num);
992     do_write_deskew_op(node, ddr_interface_num, BS_RESET, ALL_BYTES, 1);
993     //Get_Deskew_Settings(node, ddr_interface_num, &dskdat);
994     //Display_Deskew_Data(node, ddr_interface_num, &dskdat, VBL_NORM);
995 
996 #if 0
997     // enable write bit-deskew and REUSE read bit-deskew settings
998     ddr_print("N%d.LMC%d: WriteDeskewConfig: wr_ena: REUSE\n", node, ddr_interface_num);
999     do_write_deskew_op(node, ddr_interface_num, BS_REUSE, ALL_BYTES, 1);
1000     Get_Deskew_Settings(node, ddr_interface_num, &dskdat);
1001     Display_Deskew_Data(node, ddr_interface_num, &dskdat, VBL_NORM);
1002 #endif
1003 
1004 #if 1
1005     memset(deskew_history, 0, sizeof(deskew_history));
1006 
1007     for (dskval = 0; dskval < 128; dskval += DSKVAL_INCR) {
1008 
1009         Set_Write_Deskew_Settings(node, ddr_interface_num, dskval);
1010 
1011         active_ranks = 0;
1012         for (rankx = 0; rankx < 4; rankx++) {
1013             if (!(rank_mask & (1 << rankx)))
1014                 continue;
1015             dhp = &deskew_history[rankx];
1016             phys_addr = hw_rank_offset * active_ranks;
1017             active_ranks++;
1018 
1019             errors = test_dram_byte_hw(node, ddr_interface_num, phys_addr, 0, bad_bits);
1020 
1021             for (byte = 0; byte <= 8; byte++) { // do bytelane(s)
1022 
1023                 // check errors
1024                 if (errors & (1 << byte)) { // yes, error(s) in the byte lane in this rank
1025                     bit_errs = ((byte == 8) ? bad_bits[1] : bad_bits[0] >> (8 * byte)) & 0xFFULL;
1026 
1027                     VB_PRT(VBL_DEV2, "N%d.LMC%d.R%d: Byte %d Value %d: Address 0x%012llx errors 0x%x/0x%x\n",
1028                            node, ddr_interface_num, rankx, byte,
1029                            dskval, phys_addr, errors, bit_errs);
1030 
1031                     for (bit_num = 0; bit_num <= 7; bit_num++) {
1032                         if (!(bit_errs & (1 << bit_num)))
1033                             continue;
1034                         if (dhp->bytes[byte].count[bit_num] > 0) { // had started run
1035                             VB_PRT(VBL_DEV2, "N%d.LMC%d.R%d: Byte %d Bit %d Value %d: stopping a run here\n",
1036                                    node, ddr_interface_num, rankx, byte, bit_num, dskval);
1037                             dhp->bytes[byte].count[bit_num] = 0;   // stop now
1038                         }
1039                     } /* for (bit_num = 0; bit_num <= 7; bit_num++) */
1040 
1041                     // FIXME: else had not started run - nothing else to do?
1042                 } else { // no error in the byte lane
1043                     for (bit_num = 0; bit_num <= 7; bit_num++) {
1044                         if (dhp->bytes[byte].count[bit_num] == 0) { // first success, set run start
1045                             VB_PRT(VBL_DEV2, "N%d.LMC%d.R%d: Byte %d Bit %d Value %d: starting a run here\n",
1046                                    node, ddr_interface_num, rankx, byte, bit_num, dskval);
1047                             dhp->bytes[byte].start[bit_num] = dskval;
1048                         }
1049                         dhp->bytes[byte].count[bit_num] += DSKVAL_INCR; // bump run length
1050 
1051                         // is this now the biggest window?
1052                         if (dhp->bytes[byte].count[bit_num] > dhp->bytes[byte].best_count[bit_num]) {
1053                             dhp->bytes[byte].best_count[bit_num] = dhp->bytes[byte].count[bit_num];
1054                             dhp->bytes[byte].best_start[bit_num] = dhp->bytes[byte].start[bit_num];
1055                             VB_PRT(VBL_DEV2, "N%d.LMC%d.R%d: Byte %d Bit %d Value %d: updating best to %d/%d\n",
1056                                    node, ddr_interface_num, rankx, byte, bit_num, dskval,
1057                                    dhp->bytes[byte].best_start[bit_num],
1058                                    dhp->bytes[byte].best_count[bit_num]);
1059                         }
1060                     } /* for (bit_num = 0; bit_num <= 7; bit_num++) */
1061                 } /* error in the byte lane */
1062             } /* for (byte = 0; byte <= 8; byte++) */
1063         } /* for (rankx = 0; rankx < 4; rankx++) */
1064     } /* for (dskval = 0; dskval < 128; dskval++) */
1065 
1066 
1067     for (byte = 0; byte <= 8; byte++) { // do bytelane(s)
1068 
1069         for (bit_num = 0; bit_num <= 7; bit_num++) { // do bits
1070             int bit_beg, bit_end;
1071 
1072             bit_beg = 0;
1073             bit_end = 128;
1074 
1075             for (rankx = 0; rankx < 4; rankx++) { // merge ranks
1076                 int rank_beg, rank_end, rank_count;
1077                 if (!(rank_mask & (1 << rankx)))
1078                     continue;
1079 
1080                 dhp = &deskew_history[rankx];
1081                 rank_beg = dhp->bytes[byte].best_start[bit_num];
1082                 rank_count = dhp->bytes[byte].best_count[bit_num];
1083 
1084                 if (!rank_count) {
1085                     VB_PRT(VBL_DEV, "N%d.LMC%d.R%d: Byte %d Bit %d: EMPTY\n",
1086                            node, ddr_interface_num, rankx, byte, bit_num);
1087                     continue;
1088                 }
1089 
1090                 bit_beg = max(bit_beg, rank_beg);
1091                 rank_end = rank_beg + rank_count - DSKVAL_INCR;
1092                 bit_end = min(bit_end, rank_end);
1093 
1094             } /* for (rankx = 0; rankx < 4; rankx++) */
1095 
1096             dskdat.bytes[byte].bits[bit_num] = (bit_end + bit_beg) / 2;
1097 
1098         } /* for (bit_num = 0; bit_num <= 7; bit_num++) */
1099     } /* for (byte = 0; byte <= 8; byte++) */
1100 
1101 #endif
1102 
1103     // update the write bit-deskew settings with final settings
1104     ddr_print("N%d.LMC%d: WriteDeskewConfig: wr_ena: UPDATE\n", node, ddr_interface_num);
1105     Update_Write_Deskew_Settings(node, ddr_interface_num, &dskdat);
1106     Get_Deskew_Settings(node, ddr_interface_num, &dskdat);
1107     Display_Deskew_Data(node, ddr_interface_num, &dskdat, VBL_NORM);
1108 
1109     // last: NO-OP, Select all bytes, MUST leave write bit-deskew enabled
1110     ddr_print("N%d.LMC%d: WriteDeskewConfig: last: wr_ena: NOOP\n", node, ddr_interface_num);
1111     do_write_deskew_op(node, ddr_interface_num, BS_NOOP, ALL_BYTES, 1);
1112     //Get_Deskew_Settings(node, ddr_interface_num, &dskdat);
1113     //Display_Deskew_Data(node, ddr_interface_num, &dskdat, VBL_NORM);
1114 
1115 #if 0
1116     // FIXME: disable/delete this when write bit-deskew works...
1117     // final: NO-OP, Select all bytes, do NOT leave write bit-deskew enabled
1118     ddr_print("N%d.LMC%d: WriteDeskewConfig: final: read: NOOP\n", node, ddr_interface_num);
1119     do_write_deskew_op(node, ddr_interface_num, BS_NOOP, ALL_BYTES, 0);
1120     Get_Deskew_Settings(node, ddr_interface_num, &dskdat);
1121     Display_Deskew_Data(node, ddr_interface_num, &dskdat, VBL_NORM);
1122 #endif
1123 }
1124 
1125 #define SCALING_FACTOR (1000)
1126 #define Dprintf debug_print // make this "ddr_print" for extra debug output below
compute_Vref_1slot_2rank(int rtt_wr,int rtt_park,int dqx_ctl,int rank_count)1127 static int compute_Vref_1slot_2rank(int rtt_wr, int rtt_park, int dqx_ctl, int rank_count)
1128 {
1129     uint64_t Reff_s;
1130     uint64_t Rser_s = 15;
1131     uint64_t Vdd = 1200;
1132     uint64_t Vref;
1133     //uint64_t Vl;
1134     uint64_t rtt_wr_s = (((rtt_wr == 0) || (rtt_wr == 99)) ? 1*1024*1024 : rtt_wr); // 99 == HiZ
1135     uint64_t rtt_park_s = (((rtt_park == 0) || ((rank_count == 1) && (rtt_wr != 0))) ? 1*1024*1024 : rtt_park);
1136     uint64_t dqx_ctl_s = (dqx_ctl == 0 ? 1*1024*1024 : dqx_ctl);
1137     int Vref_value;
1138     uint64_t Rangepc = 6000; // range1 base is 60%
1139     uint64_t Vrefpc;
1140     int Vref_range = 0;
1141 
1142     Dprintf("rtt_wr = %d, rtt_park = %d, dqx_ctl = %d\n", rtt_wr, rtt_park, dqx_ctl);
1143     Dprintf("rtt_wr_s = %d, rtt_park_s = %d, dqx_ctl_s = %d\n", rtt_wr_s, rtt_park_s, dqx_ctl_s);
1144 
1145     Reff_s = divide_nint((rtt_wr_s * rtt_park_s) , (rtt_wr_s + rtt_park_s));
1146     Dprintf("Reff_s = %d\n", Reff_s);
1147 
1148     //Vl = (((Rser_s + dqx_ctl_s) * SCALING_FACTOR) / (Rser_s + dqx_ctl_s + Reff_s)) * Vdd / SCALING_FACTOR;
1149     //printf("Vl = %d\n", Vl);
1150 
1151     Vref = (((Rser_s + dqx_ctl_s) * SCALING_FACTOR) / (Rser_s + dqx_ctl_s + Reff_s)) + SCALING_FACTOR;
1152     Dprintf("Vref = %d\n", Vref);
1153 
1154     Vref = (Vref * Vdd) / 2 / SCALING_FACTOR;
1155     Dprintf("Vref = %d\n", Vref);
1156 
1157     Vrefpc = (Vref * 100 * 100) / Vdd;
1158     Dprintf("Vrefpc = %d\n", Vrefpc);
1159 
1160     if (Vrefpc < Rangepc) { // < range1 base, use range2
1161         Vref_range = 1 << 6; // set bit A6 for range2
1162         Rangepc = 4500; // range2 base is 45%
1163     }
1164 
1165     Vref_value = divide_nint(Vrefpc - Rangepc, 65);
1166     if (Vref_value < 0)
1167         Vref_value = Vref_range; // set to base of range as lowest value
1168     else
1169         Vref_value |= Vref_range;
1170     Dprintf("Vref_value = %d (0x%02x)\n", Vref_value, Vref_value);
1171 
1172     debug_print("rtt_wr:%d, rtt_park:%d, dqx_ctl:%d, Vref_value:%d (0x%x)\n",
1173            rtt_wr, rtt_park, dqx_ctl, Vref_value, Vref_value);
1174 
1175     return Vref_value;
1176 }
compute_Vref_2slot_2rank(int rtt_wr,int rtt_park_00,int rtt_park_01,int dqx_ctl,int rtt_nom)1177 static int compute_Vref_2slot_2rank(int rtt_wr, int rtt_park_00, int rtt_park_01, int dqx_ctl, int rtt_nom)
1178 {
1179     //uint64_t Rser = 15;
1180     uint64_t Vdd = 1200;
1181     //uint64_t Vref;
1182     uint64_t Vl, Vlp, Vcm;
1183     uint64_t Rd0, Rd1, Rpullup;
1184     uint64_t rtt_wr_s = (((rtt_wr == 0) || (rtt_wr == 99)) ? 1*1024*1024 : rtt_wr); // 99 == HiZ
1185     uint64_t rtt_park_00_s = (rtt_park_00 == 0 ? 1*1024*1024 : rtt_park_00);
1186     uint64_t rtt_park_01_s = (rtt_park_01 == 0 ? 1*1024*1024 : rtt_park_01);
1187     uint64_t dqx_ctl_s = (dqx_ctl == 0 ? 1*1024*1024 : dqx_ctl);
1188     uint64_t rtt_nom_s = (rtt_nom == 0 ? 1*1024*1024 : rtt_nom);
1189     int Vref_value;
1190     uint64_t Rangepc = 6000; // range1 base is 60%
1191     uint64_t Vrefpc;
1192     int Vref_range = 0;
1193 
1194     // Rd0 = (RTT_NOM /*parallel*/ RTT_WR) + 15 = ((RTT_NOM * RTT_WR) / (RTT_NOM + RTT_WR)) + 15
1195     Rd0 = divide_nint((rtt_nom_s * rtt_wr_s), (rtt_nom_s + rtt_wr_s)) + 15;
1196     //printf("Rd0 = %ld\n", Rd0);
1197 
1198     // Rd1 = (RTT_PARK_00 /*parallel*/ RTT_PARK_01) + 15 = ((RTT_PARK_00 * RTT_PARK_01) / (RTT_PARK_00 + RTT_PARK_01)) + 15
1199     Rd1 = divide_nint((rtt_park_00_s * rtt_park_01_s), (rtt_park_00_s + rtt_park_01_s)) + 15;
1200     //printf("Rd1 = %ld\n", Rd1);
1201 
1202     // Rpullup = Rd0 /*parallel*/ Rd1 = (Rd0 * Rd1) / (Rd0 + Rd1)
1203     Rpullup = divide_nint((Rd0 * Rd1), (Rd0 + Rd1));
1204     //printf("Rpullup = %ld\n", Rpullup);
1205 
1206     // Vl = (DQX_CTL / (DQX_CTL + Rpullup)) * 1.2
1207     Vl = divide_nint((dqx_ctl_s * Vdd), (dqx_ctl_s + Rpullup));
1208     //printf("Vl = %ld\n", Vl);
1209 
1210     // Vlp = ((15 / Rd0) * (1.2 - Vl)) + Vl
1211     Vlp = divide_nint((15 * (Vdd - Vl)), Rd0) + Vl;
1212     //printf("Vlp = %ld\n", Vlp);
1213 
1214     // Vcm = (Vlp + 1.2) / 2
1215     Vcm = divide_nint((Vlp + Vdd), 2);
1216     //printf("Vcm = %ld\n", Vcm);
1217 
1218     // Vrefpc = (Vcm / 1.2) * 100
1219     Vrefpc = divide_nint((Vcm * 100 * 100), Vdd);
1220     //printf("Vrefpc = %ld\n", Vrefpc);
1221 
1222     if (Vrefpc < Rangepc) { // < range1 base, use range2
1223         Vref_range = 1 << 6; // set bit A6 for range2
1224         Rangepc = 4500; // range2 base is 45%
1225     }
1226 
1227     Vref_value = divide_nint(Vrefpc - Rangepc, 65);
1228     if (Vref_value < 0)
1229         Vref_value = Vref_range; // set to base of range as lowest value
1230     else
1231         Vref_value |= Vref_range;
1232     //printf("Vref_value = %d (0x%02x)\n", Vref_value, Vref_value);
1233 
1234     debug_print("rtt_wr:%d, rtt_park_00:%d, rtt_park_01:%d, dqx_ctl:%d, rtt_nom:%d, Vref_value:%d (0x%x)\n",
1235            rtt_wr, rtt_park_00, rtt_park_01, dqx_ctl, rtt_nom, Vref_value, Vref_value);
1236 
1237     return Vref_value;
1238 }
1239 
1240 // NOTE: only call this for DIMMs with 1 or 2 ranks, not 4.
1241 int
compute_vref_value(bdk_node_t node,int ddr_interface_num,int rankx,int dimm_count,int rank_count,impedence_values_t * imp_values,int is_stacked_die)1242 compute_vref_value(bdk_node_t node, int ddr_interface_num,
1243                    int rankx, int dimm_count, int rank_count,
1244                    impedence_values_t *imp_values, int is_stacked_die)
1245 {
1246     int computed_final_vref_value = 0;
1247 
1248     /* Calculate an override of the measured Vref value
1249        but only for configurations we know how to...*/
1250     // we have code for 2-rank DIMMs in both 1-slot or 2-slot configs,
1251     // and can use the 2-rank 1-slot code for 1-rank DIMMs in 1-slot configs
1252     // and can use the 2-rank 2-slot code for 1-rank DIMMs in 2-slot configs
1253 
1254     int rtt_wr, dqx_ctl, rtt_nom, index;
1255     bdk_lmcx_modereg_params1_t lmc_modereg_params1;
1256     bdk_lmcx_modereg_params2_t lmc_modereg_params2;
1257     bdk_lmcx_comp_ctl2_t comp_ctl2;
1258 
1259     lmc_modereg_params1.u = BDK_CSR_READ(node, BDK_LMCX_MODEREG_PARAMS1(ddr_interface_num));
1260     lmc_modereg_params2.u = BDK_CSR_READ(node, BDK_LMCX_MODEREG_PARAMS2(ddr_interface_num));
1261     comp_ctl2.u = BDK_CSR_READ(node, BDK_LMCX_COMP_CTL2(ddr_interface_num));
1262     dqx_ctl = imp_values->dqx_strength[comp_ctl2.s.dqx_ctl];
1263 
1264     // WR always comes from the current rank
1265     index   = (lmc_modereg_params1.u >> (rankx * 12 + 5)) & 0x03;
1266     if (!CAVIUM_IS_MODEL(CAVIUM_CN88XX_PASS1_X)) {
1267         index |= lmc_modereg_params1.u >> (51+rankx-2) & 0x04;
1268     }
1269     rtt_wr  = imp_values->rtt_wr_ohms [index];
1270 
1271     // separate calculations for 1 vs 2 DIMMs per LMC
1272     if (dimm_count == 1) {
1273         // PARK comes from this rank if 1-rank, otherwise other rank
1274         index = (lmc_modereg_params2.u >> ((rankx ^ (rank_count - 1)) * 10 + 0)) & 0x07;
1275         int rtt_park   = imp_values->rtt_nom_ohms[index];
1276         computed_final_vref_value = compute_Vref_1slot_2rank(rtt_wr, rtt_park, dqx_ctl, rank_count);
1277     } else {
1278         // get both PARK values from the other DIMM
1279         index = (lmc_modereg_params2.u >> ((rankx ^ 0x02) * 10 + 0)) & 0x07;
1280         int rtt_park_00 = imp_values->rtt_nom_ohms[index];
1281         index = (lmc_modereg_params2.u >> ((rankx ^ 0x03) * 10 + 0)) & 0x07;
1282         int rtt_park_01 = imp_values->rtt_nom_ohms[index];
1283         // NOM comes from this rank if 1-rank, otherwise other rank
1284         index   = (lmc_modereg_params1.u >> ((rankx ^ (rank_count - 1)) * 12 + 9)) & 0x07;
1285         rtt_nom = imp_values->rtt_nom_ohms[index];
1286         computed_final_vref_value = compute_Vref_2slot_2rank(rtt_wr, rtt_park_00, rtt_park_01, dqx_ctl, rtt_nom);
1287     }
1288 
1289 #if ENABLE_COMPUTED_VREF_ADJUSTMENT
1290     {
1291         int saved_final_vref_value = computed_final_vref_value;
1292         BDK_CSR_INIT(lmc_config, node, BDK_LMCX_CONFIG(ddr_interface_num));
1293         /*
1294           New computed Vref = existing computed Vref – X
1295 
1296           The value of X is depending on different conditions. Both #122 and #139 are 2Rx4 RDIMM,
1297           while #124 is stacked die 2Rx4, so I conclude the results into two conditions:
1298 
1299           1. Stacked Die: 2Rx4
1300              1-slot: offset = 7. i, e New computed Vref = existing computed Vref – 7
1301              2-slot: offset = 6
1302 
1303           2. Regular: 2Rx4
1304              1-slot: offset = 3
1305              2-slot:  offset = 2
1306         */
1307         // we know we never get called unless DDR4, so test just the other conditions
1308         if((!!__bdk_dram_is_rdimm(node, 0)) &&
1309            (rank_count == 2) &&
1310            (lmc_config.s.mode_x4dev))
1311         { // it must first be RDIMM and 2-rank and x4
1312             if (is_stacked_die) { // now do according to stacked die or not...
1313                 computed_final_vref_value -= (dimm_count == 1) ? 7 : 6;
1314             } else {
1315                 computed_final_vref_value -= (dimm_count == 1) ? 3 : 2;
1316             }
1317             // we have adjusted it, so print it out if verbosity is right
1318             VB_PRT(VBL_TME, "N%d.LMC%d.R%d: adjusting computed vref from %2d (0x%02x) to %2d (0x%02x)\n",
1319                    node, ddr_interface_num, rankx,
1320                    saved_final_vref_value, saved_final_vref_value,
1321                    computed_final_vref_value, computed_final_vref_value);
1322         }
1323     }
1324 #endif
1325     return computed_final_vref_value;
1326 }
1327 
EXTR_WR(uint64_t u,int x)1328 static unsigned int EXTR_WR(uint64_t u, int x)
1329 {
1330     return (unsigned int)(((u >> (x*12+5)) & 0x3UL) | ((u >> (51+x-2)) & 0x4UL));
1331 }
INSRT_WR(uint64_t * up,int x,int v)1332 static void INSRT_WR(uint64_t *up, int x, int v)
1333 {
1334     uint64_t u = *up;
1335     u &= ~(((0x3UL) << (x*12+5)) | ((0x1UL) << (51+x)));
1336     *up = (u | ((v & 0x3UL) << (x*12+5)) | ((v & 0x4UL) << (51+x-2)));
1337     return;
1338 }
1339 
encode_row_lsb_ddr3(int row_lsb,int ddr_interface_wide)1340 static int encode_row_lsb_ddr3(int row_lsb, int ddr_interface_wide)
1341 {
1342     int encoded_row_lsb;
1343     int row_lsb_start = 14;
1344 
1345     /*  Decoding for row_lsb             */
1346     /*       000: row_lsb = mem_adr[14]  */
1347     /*       001: row_lsb = mem_adr[15]  */
1348     /*       010: row_lsb = mem_adr[16]  */
1349     /*       011: row_lsb = mem_adr[17]  */
1350     /*       100: row_lsb = mem_adr[18]  */
1351     /*       101: row_lsb = mem_adr[19]  */
1352     /*       110: row_lsb = mem_adr[20]  */
1353     /*       111: RESERVED               */
1354 
1355     row_lsb_start = 14;
1356 
1357     encoded_row_lsb      = row_lsb - row_lsb_start ;
1358 
1359     return encoded_row_lsb;
1360 }
1361 
encode_pbank_lsb_ddr3(int pbank_lsb,int ddr_interface_wide)1362 static int encode_pbank_lsb_ddr3(int pbank_lsb, int ddr_interface_wide)
1363 {
1364     int encoded_pbank_lsb;
1365 
1366     /*  Decoding for pbank_lsb                                             */
1367     /*       0000:DIMM = mem_adr[28]    / rank = mem_adr[27] (if RANK_ENA) */
1368     /*       0001:DIMM = mem_adr[29]    / rank = mem_adr[28]      "        */
1369     /*       0010:DIMM = mem_adr[30]    / rank = mem_adr[29]      "        */
1370     /*       0011:DIMM = mem_adr[31]    / rank = mem_adr[30]      "        */
1371     /*       0100:DIMM = mem_adr[32]    / rank = mem_adr[31]      "        */
1372     /*       0101:DIMM = mem_adr[33]    / rank = mem_adr[32]      "        */
1373     /*       0110:DIMM = mem_adr[34]    / rank = mem_adr[33]      "        */
1374     /*       0111:DIMM = 0              / rank = mem_adr[34]      "        */
1375     /*       1000-1111: RESERVED                                           */
1376 
1377     int pbank_lsb_start = 0;
1378 
1379     pbank_lsb_start = 28;
1380 
1381     encoded_pbank_lsb      = pbank_lsb - pbank_lsb_start;
1382 
1383     return encoded_pbank_lsb;
1384 }
1385 
octeon_read_lmcx_ddr3_rlevel_dbg(bdk_node_t node,int ddr_interface_num,int idx)1386 static uint64_t octeon_read_lmcx_ddr3_rlevel_dbg(bdk_node_t node, int ddr_interface_num, int idx)
1387 {
1388     DRAM_CSR_MODIFY(c, node, BDK_LMCX_RLEVEL_CTL(ddr_interface_num),
1389                     c.s.byte = idx);
1390     BDK_CSR_READ(node, BDK_LMCX_RLEVEL_CTL(ddr_interface_num));
1391     BDK_CSR_INIT(rlevel_dbg, node, BDK_LMCX_RLEVEL_DBG(ddr_interface_num));
1392     return rlevel_dbg.s.bitmask;
1393 }
1394 
octeon_read_lmcx_ddr3_wlevel_dbg(bdk_node_t node,int ddr_interface_num,int idx)1395 static uint64_t octeon_read_lmcx_ddr3_wlevel_dbg(bdk_node_t node, int ddr_interface_num, int idx)
1396 {
1397     bdk_lmcx_wlevel_dbg_t wlevel_dbg;
1398 
1399     wlevel_dbg.u = 0;
1400     wlevel_dbg.s.byte = idx;
1401 
1402     DRAM_CSR_WRITE(node, BDK_LMCX_WLEVEL_DBG(ddr_interface_num), wlevel_dbg.u);
1403     BDK_CSR_READ(node, BDK_LMCX_WLEVEL_DBG(ddr_interface_num));
1404 
1405     wlevel_dbg.u = BDK_CSR_READ(node, BDK_LMCX_WLEVEL_DBG(ddr_interface_num));
1406     return wlevel_dbg.s.bitmask;
1407 }
1408 
1409 
1410 /*
1411  * Apply a filter to the BITMASK results returned from Octeon
1412  * read-leveling to determine the most likely delay result.  This
1413  * computed delay may be used to qualify the delay result returned by
1414  * Octeon. Accumulate an error penalty for invalid characteristics of
1415  * the bitmask so that they can be used to select the most reliable
1416  * results.
1417  *
1418  * The algorithm searches for the largest contiguous MASK within a
1419  * maximum RANGE of bits beginning with the MSB.
1420  *
1421  * 1. a MASK with a WIDTH less than 4 will be penalized
1422  * 2. Bubbles in the bitmask that occur before or after the MASK
1423  *    will be penalized
1424  * 3. If there are no trailing bubbles then extra bits that occur
1425  *    beyond the maximum RANGE will be penalized.
1426  *
1427  *   +++++++++++++++++++++++++++++++++++++++++++++++++++
1428  *   +                                                 +
1429  *   +   e.g. bitmask = 27B00                          +
1430  *   +                                                 +
1431  *   +   63                  +--- mstart           0   +
1432  *   +   |                   |                     |   +
1433  *   +   |         +---------+     +--- fb         |   +
1434  *   +   |         |  range  |     |               |   +
1435  *   +   V         V         V     V               V   +
1436  *   +                                                 +
1437  *   +   0 0 ... 1 0 0 1 1 1 1 0 1 1 0 0 0 0 0 0 0 0   +
1438  *   +                                                 +
1439  *   +           ^     ^     ^                         +
1440  *   +           |     | mask|                         +
1441  *   +     lb ---+     +-----+                         +
1442  *   +                  width                          +
1443  *   +                                                 +
1444  *   +++++++++++++++++++++++++++++++++++++++++++++++++++
1445  */
1446 #define RLEVEL_BITMASK_TRAILING_BITS_ERROR      5
1447 #define RLEVEL_BITMASK_BUBBLE_BITS_ERROR        11 // FIXME? now less than TOOLONG
1448 #define RLEVEL_BITMASK_NARROW_ERROR             6
1449 #define RLEVEL_BITMASK_BLANK_ERROR              100
1450 #define RLEVEL_BITMASK_TOOLONG_ERROR            12
1451 
1452 #define MASKRANGE_BITS  6
1453 #define MASKRANGE       ((1 << MASKRANGE_BITS) - 1)
1454 
1455 static int
validate_ddr3_rlevel_bitmask(rlevel_bitmask_t * rlevel_bitmask_p,int ddr_type)1456 validate_ddr3_rlevel_bitmask(rlevel_bitmask_t *rlevel_bitmask_p, int ddr_type)
1457 {
1458     int i;
1459     int errors  = 0;
1460     uint64_t mask = 0;      /* Used in 64-bit comparisons */
1461     int8_t  mstart = 0;
1462     uint8_t width = 0;
1463     uint8_t firstbit = 0;
1464     uint8_t lastbit = 0;
1465     uint8_t bubble = 0;
1466     uint8_t tbubble = 0;
1467     uint8_t blank = 0;
1468     uint8_t narrow = 0;
1469     uint8_t trailing = 0;
1470     uint64_t bitmask = rlevel_bitmask_p->bm;
1471     uint8_t extras = 0;
1472     uint8_t toolong = 0;
1473     uint64_t temp;
1474 
1475     if (bitmask == 0) {
1476         blank += RLEVEL_BITMASK_BLANK_ERROR;
1477     } else {
1478 
1479         /* Look for fb, the first bit */
1480         temp = bitmask;
1481         while (!(temp & 1)) {
1482             firstbit++;
1483             temp >>= 1;
1484         }
1485 
1486         /* Look for lb, the last bit */
1487         lastbit = firstbit;
1488         while ((temp >>= 1))
1489             lastbit++;
1490 
1491         /* Start with the max range to try to find the largest mask within the bitmask data */
1492         width = MASKRANGE_BITS;
1493         for (mask = MASKRANGE; mask > 0; mask >>= 1, --width) {
1494             for (mstart = lastbit - width + 1; mstart >= firstbit; --mstart) {
1495                 temp = mask << mstart;
1496                 if ((bitmask & temp) == temp)
1497                     goto done_now;
1498             }
1499         }
1500     done_now:
1501         /* look for any more contiguous 1's to the right of mstart */
1502         if (width == MASKRANGE_BITS) { // only when maximum mask
1503             while ((bitmask >> (mstart - 1)) & 1) { // slide right over more 1's
1504                 --mstart;
1505                 if (ddr_type == DDR4_DRAM) // only for DDR4
1506                     extras++; // count the number of extra bits
1507             }
1508         }
1509 
1510         /* Penalize any extra 1's beyond the maximum desired mask */
1511         if (extras > 0)
1512             toolong = RLEVEL_BITMASK_TOOLONG_ERROR * ((1 << extras) - 1);
1513 
1514         /* Detect if bitmask is too narrow. */
1515         if (width < 4)
1516             narrow = (4 - width) * RLEVEL_BITMASK_NARROW_ERROR;
1517 
1518         /* detect leading bubble bits, that is, any 0's between first and mstart */
1519         temp = bitmask >> (firstbit + 1);
1520         i = mstart - firstbit - 1;
1521         while (--i >= 0) {
1522             if ((temp & 1) == 0)
1523                 bubble += RLEVEL_BITMASK_BUBBLE_BITS_ERROR;
1524             temp >>= 1;
1525         }
1526 
1527         temp = bitmask >> (mstart + width + extras);
1528         i = lastbit - (mstart + width + extras - 1);
1529         while (--i >= 0) {
1530             if (temp & 1) { /* Detect 1 bits after the trailing end of the mask, including last. */
1531                 trailing += RLEVEL_BITMASK_TRAILING_BITS_ERROR;
1532             } else { /* Detect trailing bubble bits, that is, any 0's between end-of-mask and last */
1533                 tbubble  += RLEVEL_BITMASK_BUBBLE_BITS_ERROR;
1534             }
1535             temp >>= 1;
1536         }
1537     }
1538 
1539     errors = bubble + tbubble + blank + narrow + trailing + toolong;
1540 
1541     /* Pass out useful statistics */
1542     rlevel_bitmask_p->mstart = mstart;
1543     rlevel_bitmask_p->width  = width;
1544 
1545     VB_PRT(VBL_DEV2, "bm:%08lx mask:%02llx, width:%2u, mstart:%2d, fb:%2u, lb:%2u"
1546            " (bu:%2d, tb:%2d, bl:%2d, n:%2d, t:%2d, x:%2d) errors:%3d %s\n",
1547            (unsigned long) bitmask, mask, width, mstart,
1548            firstbit, lastbit, bubble, tbubble, blank, narrow,
1549            trailing, toolong, errors, (errors) ? "=> invalid" : "");
1550 
1551     return errors;
1552 }
1553 
compute_ddr3_rlevel_delay(uint8_t mstart,uint8_t width,bdk_lmcx_rlevel_ctl_t rlevel_ctl)1554 static int compute_ddr3_rlevel_delay(uint8_t mstart, uint8_t width, bdk_lmcx_rlevel_ctl_t rlevel_ctl)
1555 {
1556     int delay;
1557 
1558     debug_bitmask_print("  offset_en:%d", rlevel_ctl.cn8.offset_en);
1559 
1560     if (rlevel_ctl.s.offset_en) {
1561         delay = max(mstart, mstart + width - 1 - rlevel_ctl.s.offset);
1562     } else {
1563         /* if (rlevel_ctl.s.offset) { */ /* Experimental */
1564         if (0) {
1565             delay = max(mstart + rlevel_ctl.s.offset, mstart + 1);
1566             /* Insure that the offset delay falls within the bitmask */
1567             delay = min(delay, mstart + width-1);
1568         } else {
1569             delay = (width - 1) / 2 + mstart; /* Round down */
1570             /* delay = (width/2) + mstart; */  /* Round up */
1571         }
1572     }
1573 
1574     return delay;
1575 }
1576 
1577 #define WLEVEL_BYTE_BITS 5
1578 #define WLEVEL_BYTE_MSK  ((1UL << 5) - 1)
1579 
update_wlevel_rank_struct(bdk_lmcx_wlevel_rankx_t * lmc_wlevel_rank,int byte,int delay)1580 static void update_wlevel_rank_struct(bdk_lmcx_wlevel_rankx_t *lmc_wlevel_rank,
1581                                       int byte, int delay)
1582 {
1583     bdk_lmcx_wlevel_rankx_t temp_wlevel_rank;
1584     if (byte >= 0 && byte <= 8) {
1585         temp_wlevel_rank.u = lmc_wlevel_rank->u;
1586         temp_wlevel_rank.u &= ~(WLEVEL_BYTE_MSK << (WLEVEL_BYTE_BITS * byte));
1587         temp_wlevel_rank.u |= ((delay & WLEVEL_BYTE_MSK) << (WLEVEL_BYTE_BITS * byte));
1588         lmc_wlevel_rank->u = temp_wlevel_rank.u;
1589     }
1590 }
1591 
get_wlevel_rank_struct(bdk_lmcx_wlevel_rankx_t * lmc_wlevel_rank,int byte)1592 static int  get_wlevel_rank_struct(bdk_lmcx_wlevel_rankx_t *lmc_wlevel_rank,
1593                                    int byte)
1594 {
1595     int delay = 0;
1596     if (byte >= 0 && byte <= 8) {
1597         delay = ((lmc_wlevel_rank->u) >> (WLEVEL_BYTE_BITS * byte)) & WLEVEL_BYTE_MSK;
1598     }
1599     return delay;
1600 }
1601 
1602 #if 0
1603 // entry = 1 is valid, entry = 0 is invalid
1604 static int
1605 validity_matrix[4][4] = {[0] {1,1,1,0},  // valid pairs when cv == 0: 0,0 + 0,1 + 0,2 == "7"
1606                          [1] {0,1,1,1},  // valid pairs when cv == 1: 1,1 + 1,2 + 1,3 == "E"
1607                          [2] {1,0,1,1},  // valid pairs when cv == 2: 2,2 + 2,3 + 2,0 == "D"
1608                          [3] {1,1,0,1}}; // valid pairs when cv == 3: 3,3 + 3,0 + 3,1 == "B"
1609 #endif
1610 static int
validate_seq(int * wl,int * seq)1611 validate_seq(int *wl, int *seq)
1612 {
1613     int seqx; // sequence index, step through the sequence array
1614     int bitnum;
1615     seqx = 0;
1616     while (seq[seqx+1] >= 0) { // stop on next seq entry == -1
1617         // but now, check current versus next
1618 #if 0
1619         if ( !validity_matrix [wl[seq[seqx]]] [wl[seq[seqx+1]]] )
1620             return 1;
1621 #else
1622         bitnum = (wl[seq[seqx]] << 2) | wl[seq[seqx+1]];
1623         if (!((1 << bitnum) & 0xBDE7)) // magic validity number (see matrix above)
1624             return 1;
1625 #endif
1626         seqx++;
1627     }
1628     return 0;
1629 }
1630 
1631 static int
Validate_HW_WL_Settings(bdk_node_t node,int ddr_interface_num,bdk_lmcx_wlevel_rankx_t * lmc_wlevel_rank,int ecc_ena)1632 Validate_HW_WL_Settings(bdk_node_t node, int ddr_interface_num,
1633                         bdk_lmcx_wlevel_rankx_t *lmc_wlevel_rank,
1634                         int ecc_ena)
1635 {
1636     int wl[9], byte, errors;
1637 
1638     // arrange the sequences so
1639     int useq[] = { 0,1,2,3,8,4,5,6,7,-1 }; // index 0 has byte 0, etc, ECC in middle
1640     int rseq1[] = { 8,3,2,1,0,-1 }; // index 0 is ECC, then go down
1641     int rseq2[] = { 4,5,6,7,-1 }; // index 0 has byte 4, then go up
1642     int useqno[] = { 0,1,2,3,4,5,6,7,-1 }; // index 0 has byte 0, etc, no ECC
1643     int rseq1no[] = { 3,2,1,0,-1 }; // index 0 is byte 3, then go down, no ECC
1644 
1645     // in the CSR, bytes 0-7 are always data, byte 8 is ECC
1646     for (byte = 0; byte < 8+ecc_ena; byte++) {
1647         wl[byte] = (get_wlevel_rank_struct(lmc_wlevel_rank, byte) >> 1) & 3; // preprocess :-)
1648     }
1649 
1650     errors = 0;
1651     if (__bdk_dram_is_rdimm(node, 0) != 0) { // RDIMM order
1652         errors  = validate_seq(wl, (ecc_ena) ? rseq1 : rseq1no);
1653         errors += validate_seq(wl, rseq2);
1654     } else { // UDIMM order
1655         errors  = validate_seq(wl, (ecc_ena) ? useq : useqno);
1656     }
1657 
1658     return errors;
1659 }
1660 
1661 #define RLEVEL_BYTE_BITS 6
1662 #define RLEVEL_BYTE_MSK  ((1UL << 6) - 1)
1663 
update_rlevel_rank_struct(bdk_lmcx_rlevel_rankx_t * lmc_rlevel_rank,int byte,int delay)1664 static void update_rlevel_rank_struct(bdk_lmcx_rlevel_rankx_t *lmc_rlevel_rank,
1665                                       int byte, int delay)
1666 {
1667     bdk_lmcx_rlevel_rankx_t temp_rlevel_rank;
1668     if (byte >= 0 && byte <= 8) {
1669         temp_rlevel_rank.u = lmc_rlevel_rank->u & ~(RLEVEL_BYTE_MSK << (RLEVEL_BYTE_BITS * byte));
1670         temp_rlevel_rank.u |= ((delay & RLEVEL_BYTE_MSK) << (RLEVEL_BYTE_BITS * byte));
1671         lmc_rlevel_rank->u = temp_rlevel_rank.u;
1672     }
1673 }
1674 
1675 #if RLEXTRAS_PATCH || !DISABLE_SW_WL_PASS_2
get_rlevel_rank_struct(bdk_lmcx_rlevel_rankx_t * lmc_rlevel_rank,int byte)1676 static int  get_rlevel_rank_struct(bdk_lmcx_rlevel_rankx_t *lmc_rlevel_rank,
1677                                    int byte)
1678 {
1679     int delay = 0;
1680     if (byte >= 0 && byte <= 8) {
1681         delay = ((lmc_rlevel_rank->u) >> (RLEVEL_BYTE_BITS * byte)) & RLEVEL_BYTE_MSK;
1682     }
1683     return delay;
1684 }
1685 #endif
1686 
unpack_rlevel_settings(int ddr_interface_bytemask,int ecc_ena,rlevel_byte_data_t * rlevel_byte,bdk_lmcx_rlevel_rankx_t lmc_rlevel_rank)1687 static void unpack_rlevel_settings(int ddr_interface_bytemask, int ecc_ena,
1688                                    rlevel_byte_data_t *rlevel_byte,
1689                                    bdk_lmcx_rlevel_rankx_t lmc_rlevel_rank)
1690 {
1691     if ((ddr_interface_bytemask & 0xff) == 0xff) {
1692         if (ecc_ena) {
1693             rlevel_byte[8].delay = lmc_rlevel_rank.cn83xx.byte7;
1694             rlevel_byte[7].delay = lmc_rlevel_rank.cn83xx.byte6;
1695             rlevel_byte[6].delay = lmc_rlevel_rank.cn83xx.byte5;
1696             rlevel_byte[5].delay = lmc_rlevel_rank.cn83xx.byte4;
1697             rlevel_byte[4].delay = lmc_rlevel_rank.cn83xx.byte8; /* ECC */
1698         } else {
1699             rlevel_byte[7].delay = lmc_rlevel_rank.cn83xx.byte7;
1700             rlevel_byte[6].delay = lmc_rlevel_rank.cn83xx.byte6;
1701             rlevel_byte[5].delay = lmc_rlevel_rank.cn83xx.byte5;
1702             rlevel_byte[4].delay = lmc_rlevel_rank.cn83xx.byte4;
1703         }
1704     } else {
1705         rlevel_byte[8].delay = lmc_rlevel_rank.cn83xx.byte8; /* unused */
1706         rlevel_byte[7].delay = lmc_rlevel_rank.cn83xx.byte7; /* unused */
1707         rlevel_byte[6].delay = lmc_rlevel_rank.cn83xx.byte6; /* unused */
1708         rlevel_byte[5].delay = lmc_rlevel_rank.cn83xx.byte5; /* unused */
1709         rlevel_byte[4].delay = lmc_rlevel_rank.cn83xx.byte4; /* ECC */
1710     }
1711     rlevel_byte[3].delay = lmc_rlevel_rank.cn83xx.byte3;
1712     rlevel_byte[2].delay = lmc_rlevel_rank.cn83xx.byte2;
1713     rlevel_byte[1].delay = lmc_rlevel_rank.cn83xx.byte1;
1714     rlevel_byte[0].delay = lmc_rlevel_rank.cn83xx.byte0;
1715 }
1716 
pack_rlevel_settings(int ddr_interface_bytemask,int ecc_ena,rlevel_byte_data_t * rlevel_byte,bdk_lmcx_rlevel_rankx_t * final_rlevel_rank)1717 static void pack_rlevel_settings(int ddr_interface_bytemask, int ecc_ena,
1718                                  rlevel_byte_data_t *rlevel_byte,
1719                                  bdk_lmcx_rlevel_rankx_t *final_rlevel_rank)
1720 {
1721     bdk_lmcx_rlevel_rankx_t lmc_rlevel_rank = *final_rlevel_rank;
1722 
1723     if ((ddr_interface_bytemask & 0xff) == 0xff) {
1724         if (ecc_ena) {
1725             lmc_rlevel_rank.cn83xx.byte7 = rlevel_byte[8].delay;
1726             lmc_rlevel_rank.cn83xx.byte6 = rlevel_byte[7].delay;
1727             lmc_rlevel_rank.cn83xx.byte5 = rlevel_byte[6].delay;
1728             lmc_rlevel_rank.cn83xx.byte4 = rlevel_byte[5].delay;
1729             lmc_rlevel_rank.cn83xx.byte8 = rlevel_byte[4].delay; /* ECC */
1730         } else {
1731             lmc_rlevel_rank.cn83xx.byte7 = rlevel_byte[7].delay;
1732             lmc_rlevel_rank.cn83xx.byte6 = rlevel_byte[6].delay;
1733             lmc_rlevel_rank.cn83xx.byte5 = rlevel_byte[5].delay;
1734             lmc_rlevel_rank.cn83xx.byte4 = rlevel_byte[4].delay;
1735         }
1736     } else {
1737         lmc_rlevel_rank.cn83xx.byte8 = rlevel_byte[8].delay;
1738         lmc_rlevel_rank.cn83xx.byte7 = rlevel_byte[7].delay;
1739         lmc_rlevel_rank.cn83xx.byte6 = rlevel_byte[6].delay;
1740         lmc_rlevel_rank.cn83xx.byte5 = rlevel_byte[5].delay;
1741         lmc_rlevel_rank.cn83xx.byte4 = rlevel_byte[4].delay;
1742     }
1743     lmc_rlevel_rank.cn83xx.byte3 = rlevel_byte[3].delay;
1744     lmc_rlevel_rank.cn83xx.byte2 = rlevel_byte[2].delay;
1745     lmc_rlevel_rank.cn83xx.byte1 = rlevel_byte[1].delay;
1746     lmc_rlevel_rank.cn83xx.byte0 = rlevel_byte[0].delay;
1747 
1748     *final_rlevel_rank = lmc_rlevel_rank;
1749 }
1750 
1751 #if !DISABLE_SW_WL_PASS_2
rlevel_to_wlevel(bdk_lmcx_rlevel_rankx_t * lmc_rlevel_rank,bdk_lmcx_wlevel_rankx_t * lmc_wlevel_rank,int byte)1752 static void rlevel_to_wlevel(bdk_lmcx_rlevel_rankx_t *lmc_rlevel_rank,
1753                              bdk_lmcx_wlevel_rankx_t *lmc_wlevel_rank, int byte)
1754 {
1755     int byte_delay = get_rlevel_rank_struct(lmc_rlevel_rank, byte);
1756 
1757     debug_print("Estimating Wlevel delay byte %d: ", byte);
1758     debug_print("Rlevel=%d => ", byte_delay);
1759     byte_delay = divide_roundup(byte_delay,2) & 0x1e;
1760     debug_print("Wlevel=%d\n", byte_delay);
1761     update_wlevel_rank_struct(lmc_wlevel_rank, byte, byte_delay);
1762 }
1763 #endif /* !DISABLE_SW_WL_PASS_2 */
1764 
1765 /* Delay trend: constant=0, decreasing=-1, increasing=1 */
calc_delay_trend(int v)1766 static int calc_delay_trend(int v)
1767 {
1768     if (v == 0)
1769         return (0);
1770     if (v < 0)
1771         return (-1);
1772     return 1;
1773 }
1774 
1775 /* Evaluate delay sequence across the whole range of byte delays while
1776 ** keeping track of the overall delay trend, increasing or decreasing.
1777 ** If the trend changes charge an error amount to the score.
1778 */
1779 
1780 // NOTE: "max_adj_delay_inc" argument is, by default, 1 for DDR3 and 2 for DDR4
1781 
nonsequential_delays(rlevel_byte_data_t * rlevel_byte,int start,int end,int max_adj_delay_inc)1782 static int nonsequential_delays(rlevel_byte_data_t *rlevel_byte,
1783                                 int start, int end, int max_adj_delay_inc)
1784 {
1785     int error = 0;
1786     int delay_trend, prev_trend = 0;
1787     int byte_idx;
1788     int delay_inc;
1789     int delay_diff;
1790     int byte_err;
1791 
1792     for (byte_idx = start; byte_idx < end; ++byte_idx) {
1793         byte_err = 0;
1794 
1795         delay_diff = rlevel_byte[byte_idx+1].delay - rlevel_byte[byte_idx].delay;
1796         delay_trend = calc_delay_trend(delay_diff);
1797 
1798         debug_bitmask_print("Byte %d: %2d, Byte %d: %2d, delay_trend: %2d, prev_trend: %2d",
1799                             byte_idx+0, rlevel_byte[byte_idx+0].delay,
1800                             byte_idx+1, rlevel_byte[byte_idx+1].delay,
1801                             delay_trend, prev_trend);
1802 
1803         /* Increment error each time the trend changes to the opposite direction.
1804          */
1805         if ((prev_trend != 0) && (delay_trend != 0) && (prev_trend != delay_trend)) {
1806             byte_err += RLEVEL_NONSEQUENTIAL_DELAY_ERROR;
1807             prev_trend = delay_trend;
1808             debug_bitmask_print(" => Nonsequential byte delay");
1809         }
1810 
1811         delay_inc = _abs(delay_diff); // how big was the delay change, if any
1812 
1813         /* Even if the trend did not change to the opposite direction, check for
1814            the magnitude of the change, and scale the penalty by the amount that
1815            the size is larger than the provided limit.
1816          */
1817         if ((max_adj_delay_inc != 0) && (delay_inc > max_adj_delay_inc)) {
1818             byte_err += (delay_inc - max_adj_delay_inc) * RLEVEL_ADJACENT_DELAY_ERROR;
1819             debug_bitmask_print(" => Adjacent delay error");
1820         }
1821 
1822         debug_bitmask_print("\n");
1823         if (delay_trend != 0)
1824             prev_trend = delay_trend;
1825 
1826         rlevel_byte[byte_idx+1].sqerrs = byte_err;
1827         error += byte_err;
1828     }
1829     return error;
1830 }
1831 
roundup_ddr3_wlevel_bitmask(int bitmask)1832 static int roundup_ddr3_wlevel_bitmask(int bitmask)
1833 {
1834     int shifted_bitmask;
1835     int leader;
1836     int delay;
1837 
1838     for (leader=0; leader<8; ++leader) {
1839         shifted_bitmask = (bitmask>>leader);
1840         if ((shifted_bitmask&1) == 0)
1841             break;
1842     }
1843 
1844     for (/*leader=leader*/; leader<16; ++leader) {
1845         shifted_bitmask = (bitmask>>(leader%8));
1846         if (shifted_bitmask&1)
1847             break;
1848     }
1849 
1850     delay = (leader & 1) ? leader + 1 : leader;
1851     delay = delay % 8;
1852 
1853     return delay;
1854 }
1855 
1856 /* Check to see if any custom offset values are provided */
is_dll_offset_provided(const int8_t * dll_offset_table)1857 static int is_dll_offset_provided(const int8_t *dll_offset_table)
1858 {
1859     int i;
1860     if (dll_offset_table != NULL) {
1861         for (i=0; i<9; ++i) {
1862             if (dll_offset_table[i] != 0)
1863                 return (1);
1864         }
1865     }
1866     return (0);
1867 }
1868 
1869 /////////////////// These are the RLEVEL settings display routines
1870 
1871 // flags
1872 #define WITH_NOTHING 0
1873 #define WITH_SCORE   1
1874 #define WITH_AVERAGE 2
1875 #define WITH_FINAL   4
1876 #define WITH_COMPUTE 8
do_display_RL(bdk_node_t node,int ddr_interface_num,bdk_lmcx_rlevel_rankx_t lmc_rlevel_rank,int rank,int flags,int score)1877 static void do_display_RL(bdk_node_t node, int ddr_interface_num,
1878                           bdk_lmcx_rlevel_rankx_t lmc_rlevel_rank,
1879                           int rank, int flags, int score)
1880 {
1881     char score_buf[16];
1882     if (flags & WITH_SCORE)
1883         snprintf(score_buf, sizeof(score_buf), "(%d)", score);
1884     else {
1885         score_buf[0] = ' '; score_buf[1] = 0;
1886     }
1887 
1888     const char *msg_buf;
1889     char hex_buf[20];
1890     if (flags & WITH_AVERAGE) {
1891         msg_buf = "  DELAY AVERAGES  ";
1892     } else if (flags & WITH_FINAL) {
1893         msg_buf = "  FINAL SETTINGS  ";
1894     } else if (flags & WITH_COMPUTE) {
1895         msg_buf = "  COMPUTED DELAYS ";
1896     } else {
1897         snprintf(hex_buf, sizeof(hex_buf), "0x%016lX", lmc_rlevel_rank.u);
1898         msg_buf = hex_buf;
1899     }
1900 
1901     ddr_print("N%d.LMC%d.R%d: Rlevel Rank %#4x, %s  : %5d %5d %5d %5d %5d %5d %5d %5d %5d %s\n",
1902               node, ddr_interface_num, rank,
1903               lmc_rlevel_rank.s.status,
1904               msg_buf,
1905               lmc_rlevel_rank.cn83xx.byte8,
1906               lmc_rlevel_rank.cn83xx.byte7,
1907               lmc_rlevel_rank.cn83xx.byte6,
1908               lmc_rlevel_rank.cn83xx.byte5,
1909               lmc_rlevel_rank.cn83xx.byte4,
1910               lmc_rlevel_rank.cn83xx.byte3,
1911               lmc_rlevel_rank.cn83xx.byte2,
1912               lmc_rlevel_rank.cn83xx.byte1,
1913               lmc_rlevel_rank.cn83xx.byte0,
1914               score_buf
1915               );
1916 }
1917 
1918 static inline void
display_RL(bdk_node_t node,int ddr_interface_num,bdk_lmcx_rlevel_rankx_t lmc_rlevel_rank,int rank)1919 display_RL(bdk_node_t node, int ddr_interface_num, bdk_lmcx_rlevel_rankx_t lmc_rlevel_rank, int rank)
1920 {
1921     do_display_RL(node, ddr_interface_num, lmc_rlevel_rank, rank, 0, 0);
1922 }
1923 
1924 static inline void
display_RL_with_score(bdk_node_t node,int ddr_interface_num,bdk_lmcx_rlevel_rankx_t lmc_rlevel_rank,int rank,int score)1925 display_RL_with_score(bdk_node_t node, int ddr_interface_num, bdk_lmcx_rlevel_rankx_t lmc_rlevel_rank, int rank, int score)
1926 {
1927     do_display_RL(node, ddr_interface_num, lmc_rlevel_rank, rank, 1, score);
1928 }
1929 
1930 #if !PICK_BEST_RANK_SCORE_NOT_AVG
1931 static inline void
display_RL_with_average(bdk_node_t node,int ddr_interface_num,bdk_lmcx_rlevel_rankx_t lmc_rlevel_rank,int rank,int score)1932 display_RL_with_average(bdk_node_t node, int ddr_interface_num, bdk_lmcx_rlevel_rankx_t lmc_rlevel_rank, int rank, int score)
1933 {
1934     do_display_RL(node, ddr_interface_num, lmc_rlevel_rank, rank, 3, score);
1935 }
1936 #endif
1937 
1938 static inline void
display_RL_with_final(bdk_node_t node,int ddr_interface_num,bdk_lmcx_rlevel_rankx_t lmc_rlevel_rank,int rank)1939 display_RL_with_final(bdk_node_t node, int ddr_interface_num, bdk_lmcx_rlevel_rankx_t lmc_rlevel_rank, int rank)
1940 {
1941     do_display_RL(node, ddr_interface_num, lmc_rlevel_rank, rank, 4, 0);
1942 }
1943 
1944 static inline void
display_RL_with_computed(bdk_node_t node,int ddr_interface_num,bdk_lmcx_rlevel_rankx_t lmc_rlevel_rank,int rank,int score)1945 display_RL_with_computed(bdk_node_t node, int ddr_interface_num, bdk_lmcx_rlevel_rankx_t lmc_rlevel_rank, int rank, int score)
1946 {
1947     do_display_RL(node, ddr_interface_num, lmc_rlevel_rank, rank, 9, score);
1948 }
1949 
1950 // flag values
1951 #define WITH_RODT_BLANK      0
1952 #define WITH_RODT_SKIPPING   1
1953 #define WITH_RODT_BESTROW    2
1954 #define WITH_RODT_BESTSCORE  3
1955 // control
1956 #define SKIP_SKIPPING 1
1957 
1958 static const char *with_rodt_canned_msgs[4] = { "          ", "SKIPPING  ", "BEST ROW  ", "BEST SCORE" };
1959 
display_RL_with_RODT(bdk_node_t node,int ddr_interface_num,bdk_lmcx_rlevel_rankx_t lmc_rlevel_rank,int rank,int score,int nom_ohms,int rodt_ohms,int flag)1960 static void display_RL_with_RODT(bdk_node_t node, int ddr_interface_num,
1961                                  bdk_lmcx_rlevel_rankx_t lmc_rlevel_rank, int rank, int score,
1962                                  int nom_ohms, int rodt_ohms, int flag)
1963 {
1964     const char *msg_buf;
1965     char set_buf[20];
1966 #if SKIP_SKIPPING
1967     if (flag == WITH_RODT_SKIPPING) return;
1968 #endif
1969     msg_buf = with_rodt_canned_msgs[flag];
1970     if (nom_ohms < 0) {
1971         snprintf(set_buf, sizeof(set_buf), "    RODT %3d    ", rodt_ohms);
1972     } else {
1973         snprintf(set_buf, sizeof(set_buf), "NOM %3d RODT %3d", nom_ohms, rodt_ohms);
1974     }
1975 
1976     VB_PRT(VBL_TME, "N%d.LMC%d.R%d: Rlevel %s   %s  : %5d %5d %5d %5d %5d %5d %5d %5d %5d (%d)\n",
1977            node, ddr_interface_num, rank,
1978            set_buf, msg_buf,
1979            lmc_rlevel_rank.cn83xx.byte8,
1980            lmc_rlevel_rank.cn83xx.byte7,
1981            lmc_rlevel_rank.cn83xx.byte6,
1982            lmc_rlevel_rank.cn83xx.byte5,
1983            lmc_rlevel_rank.cn83xx.byte4,
1984            lmc_rlevel_rank.cn83xx.byte3,
1985            lmc_rlevel_rank.cn83xx.byte2,
1986            lmc_rlevel_rank.cn83xx.byte1,
1987            lmc_rlevel_rank.cn83xx.byte0,
1988            score
1989            );
1990 
1991     // FIXME: does this help make the output a little easier to focus?
1992     if (flag == WITH_RODT_BESTSCORE) {
1993         VB_PRT(VBL_DEV, "-----------\n");
1994     }
1995 }
1996 
1997 static void
do_display_WL(bdk_node_t node,int ddr_interface_num,bdk_lmcx_wlevel_rankx_t lmc_wlevel_rank,int rank,int flags)1998 do_display_WL(bdk_node_t node, int ddr_interface_num, bdk_lmcx_wlevel_rankx_t lmc_wlevel_rank, int rank, int flags)
1999 {
2000     const char *msg_buf;
2001     char hex_buf[20];
2002     int vbl;
2003     if (flags & WITH_FINAL) {
2004         msg_buf = "  FINAL SETTINGS  ";
2005         vbl = VBL_NORM;
2006     } else {
2007         snprintf(hex_buf, sizeof(hex_buf), "0x%016lX", lmc_wlevel_rank.u);
2008         msg_buf = hex_buf;
2009         vbl = VBL_FAE;
2010     }
2011 
2012     VB_PRT(vbl, "N%d.LMC%d.R%d: Wlevel Rank %#4x, %s  : %5d %5d %5d %5d %5d %5d %5d %5d %5d\n",
2013             node, ddr_interface_num, rank,
2014             lmc_wlevel_rank.s.status,
2015             msg_buf,
2016             lmc_wlevel_rank.s.byte8,
2017             lmc_wlevel_rank.s.byte7,
2018             lmc_wlevel_rank.s.byte6,
2019             lmc_wlevel_rank.s.byte5,
2020             lmc_wlevel_rank.s.byte4,
2021             lmc_wlevel_rank.s.byte3,
2022             lmc_wlevel_rank.s.byte2,
2023             lmc_wlevel_rank.s.byte1,
2024             lmc_wlevel_rank.s.byte0
2025             );
2026 }
2027 
2028 static inline void
display_WL(bdk_node_t node,int ddr_interface_num,bdk_lmcx_wlevel_rankx_t lmc_wlevel_rank,int rank)2029 display_WL(bdk_node_t node, int ddr_interface_num, bdk_lmcx_wlevel_rankx_t lmc_wlevel_rank, int rank)
2030 {
2031     do_display_WL(node, ddr_interface_num, lmc_wlevel_rank, rank, WITH_NOTHING);
2032 }
2033 
2034 static inline void
display_WL_with_final(bdk_node_t node,int ddr_interface_num,bdk_lmcx_wlevel_rankx_t lmc_wlevel_rank,int rank)2035 display_WL_with_final(bdk_node_t node, int ddr_interface_num, bdk_lmcx_wlevel_rankx_t lmc_wlevel_rank, int rank)
2036 {
2037     do_display_WL(node, ddr_interface_num, lmc_wlevel_rank, rank, WITH_FINAL);
2038 }
2039 
2040 // pretty-print bitmask adjuster
2041 static uint64_t
PPBM(uint64_t bm)2042 PPBM(uint64_t bm)
2043 {
2044     if (bm != 0ul) {
2045         while ((bm & 0x0fful) == 0ul)
2046             bm >>= 4;
2047     }
2048     return bm;
2049 }
2050 
2051 // xlate PACKED index to UNPACKED index to use with rlevel_byte
2052 #define XPU(i,e) (((i) < 4)?(i):((i)<8)?(i)+(e):4)
2053 // xlate UNPACKED index to PACKED index to use with rlevel_bitmask
2054 #define XUP(i,e) (((i) < 4)?(i):((i)>4)?(i)-(e):8)
2055 
2056 // flag values
2057 #define WITH_WL_BITMASKS      0
2058 #define WITH_RL_BITMASKS      1
2059 #define WITH_RL_MASK_SCORES   2
2060 #define WITH_RL_SEQ_SCORES    3
2061 static void
do_display_BM(bdk_node_t node,int ddr_interface_num,int rank,void * bm,int flags,int ecc_ena)2062 do_display_BM(bdk_node_t node, int ddr_interface_num, int rank, void *bm, int flags, int ecc_ena)
2063 {
2064     int ecc = !!ecc_ena;
2065     if (flags == WITH_WL_BITMASKS) { // wlevel_bitmask array in PACKED index order, so just print them
2066         int *bitmasks = (int *)bm;
2067 
2068         ddr_print("N%d.LMC%d.R%d: Wlevel Debug Results                  : %05x %05x %05x %05x %05x %05x %05x %05x %05x\n",
2069               node, ddr_interface_num, rank,
2070               bitmasks[8],
2071               bitmasks[7],
2072               bitmasks[6],
2073               bitmasks[5],
2074               bitmasks[4],
2075               bitmasks[3],
2076               bitmasks[2],
2077               bitmasks[1],
2078               bitmasks[0]
2079               );
2080     } else
2081     if (flags == WITH_RL_BITMASKS) { // rlevel_bitmask array in PACKED index order, so just print them
2082         rlevel_bitmask_t *rlevel_bitmask = (rlevel_bitmask_t *)bm;
2083         ddr_print("N%d.LMC%d.R%d: Rlevel Debug Bitmasks        8:0      : %05llx %05llx %05llx %05llx %05llx %05llx %05llx %05llx %05llx\n",
2084                   node, ddr_interface_num, rank,
2085                   PPBM(rlevel_bitmask[8].bm),
2086                   PPBM(rlevel_bitmask[7].bm),
2087                   PPBM(rlevel_bitmask[6].bm),
2088                   PPBM(rlevel_bitmask[5].bm),
2089                   PPBM(rlevel_bitmask[4].bm),
2090                   PPBM(rlevel_bitmask[3].bm),
2091                   PPBM(rlevel_bitmask[2].bm),
2092                   PPBM(rlevel_bitmask[1].bm),
2093                   PPBM(rlevel_bitmask[0].bm)
2094                   );
2095     } else
2096         if (flags == WITH_RL_MASK_SCORES) { // rlevel_bitmask array in PACKED index order, so just print them
2097         rlevel_bitmask_t *rlevel_bitmask = (rlevel_bitmask_t *)bm;
2098         ddr_print("N%d.LMC%d.R%d: Rlevel Debug Bitmask Scores  8:0      : %5d %5d %5d %5d %5d %5d %5d %5d %5d\n",
2099                   node, ddr_interface_num, rank,
2100                   rlevel_bitmask[8].errs,
2101                   rlevel_bitmask[7].errs,
2102                   rlevel_bitmask[6].errs,
2103                   rlevel_bitmask[5].errs,
2104                   rlevel_bitmask[4].errs,
2105                   rlevel_bitmask[3].errs,
2106                   rlevel_bitmask[2].errs,
2107                   rlevel_bitmask[1].errs,
2108                   rlevel_bitmask[0].errs
2109                   );
2110     } else
2111     if (flags == WITH_RL_SEQ_SCORES) { // rlevel_byte array in UNPACKED index order, so xlate and print them
2112         rlevel_byte_data_t *rlevel_byte = (rlevel_byte_data_t *)bm;
2113         ddr_print("N%d.LMC%d.R%d: Rlevel Debug Non-seq Scores  8:0      : %5d %5d %5d %5d %5d %5d %5d %5d %5d\n",
2114                   node, ddr_interface_num, rank,
2115                   rlevel_byte[XPU(8,ecc)].sqerrs,
2116                   rlevel_byte[XPU(7,ecc)].sqerrs,
2117                   rlevel_byte[XPU(6,ecc)].sqerrs,
2118                   rlevel_byte[XPU(5,ecc)].sqerrs,
2119                   rlevel_byte[XPU(4,ecc)].sqerrs,
2120                   rlevel_byte[XPU(3,ecc)].sqerrs,
2121                   rlevel_byte[XPU(2,ecc)].sqerrs,
2122                   rlevel_byte[XPU(1,ecc)].sqerrs,
2123                   rlevel_byte[XPU(0,ecc)].sqerrs
2124                   );
2125     }
2126 }
2127 
2128 static inline void
display_WL_BM(bdk_node_t node,int ddr_interface_num,int rank,int * bitmasks)2129 display_WL_BM(bdk_node_t node, int ddr_interface_num, int rank, int *bitmasks)
2130 {
2131     do_display_BM(node, ddr_interface_num, rank, (void *)bitmasks, WITH_WL_BITMASKS, 0);
2132 }
2133 
2134 static inline void
display_RL_BM(bdk_node_t node,int ddr_interface_num,int rank,rlevel_bitmask_t * bitmasks,int ecc_ena)2135 display_RL_BM(bdk_node_t node, int ddr_interface_num, int rank, rlevel_bitmask_t *bitmasks, int ecc_ena)
2136 {
2137     do_display_BM(node, ddr_interface_num, rank, (void *)bitmasks, WITH_RL_BITMASKS, ecc_ena);
2138 }
2139 
2140 static inline void
display_RL_BM_scores(bdk_node_t node,int ddr_interface_num,int rank,rlevel_bitmask_t * bitmasks,int ecc_ena)2141 display_RL_BM_scores(bdk_node_t node, int ddr_interface_num, int rank, rlevel_bitmask_t *bitmasks, int ecc_ena)
2142 {
2143     do_display_BM(node, ddr_interface_num, rank, (void *)bitmasks, WITH_RL_MASK_SCORES, ecc_ena);
2144 }
2145 
2146 static inline void
display_RL_SEQ_scores(bdk_node_t node,int ddr_interface_num,int rank,rlevel_byte_data_t * bytes,int ecc_ena)2147 display_RL_SEQ_scores(bdk_node_t node, int ddr_interface_num, int rank, rlevel_byte_data_t *bytes, int ecc_ena)
2148 {
2149     do_display_BM(node, ddr_interface_num, rank, (void *)bytes, WITH_RL_SEQ_SCORES, ecc_ena);
2150 }
2151 
load_dll_offset(bdk_node_t node,int ddr_interface_num,int dll_offset_mode,int byte_offset,int byte)2152 unsigned short load_dll_offset(bdk_node_t node, int ddr_interface_num,
2153                                int dll_offset_mode, int byte_offset, int byte)
2154 {
2155     bdk_lmcx_dll_ctl3_t ddr_dll_ctl3;
2156     /* byte_sel:
2157        0x1 = byte 0, ..., 0x9 = byte 8
2158        0xA = all bytes */
2159     int byte_sel = (byte == 10) ? byte : byte + 1;
2160 
2161     ddr_dll_ctl3.u = BDK_CSR_READ(node, BDK_LMCX_DLL_CTL3(ddr_interface_num));
2162     SET_DDR_DLL_CTL3(load_offset, 0);
2163     DRAM_CSR_WRITE(node, BDK_LMCX_DLL_CTL3(ddr_interface_num),        ddr_dll_ctl3.u);
2164     ddr_dll_ctl3.u = BDK_CSR_READ(node, BDK_LMCX_DLL_CTL3(ddr_interface_num));
2165 
2166     SET_DDR_DLL_CTL3(mode_sel, dll_offset_mode);
2167     SET_DDR_DLL_CTL3(offset, (_abs(byte_offset)&0x3f) | (_sign(byte_offset) << 6)); /* Always 6-bit field? */
2168     SET_DDR_DLL_CTL3(byte_sel, byte_sel);
2169     DRAM_CSR_WRITE(node, BDK_LMCX_DLL_CTL3(ddr_interface_num),        ddr_dll_ctl3.u);
2170     ddr_dll_ctl3.u = BDK_CSR_READ(node, BDK_LMCX_DLL_CTL3(ddr_interface_num));
2171 
2172     SET_DDR_DLL_CTL3(load_offset, 1);
2173     DRAM_CSR_WRITE(node, BDK_LMCX_DLL_CTL3(ddr_interface_num),        ddr_dll_ctl3.u);
2174     ddr_dll_ctl3.u = BDK_CSR_READ(node, BDK_LMCX_DLL_CTL3(ddr_interface_num));
2175 
2176     return ((unsigned short) GET_DDR_DLL_CTL3(offset));
2177 }
2178 
change_dll_offset_enable(bdk_node_t node,int ddr_interface_num,int change)2179 void change_dll_offset_enable(bdk_node_t node, int ddr_interface_num, int change)
2180 {
2181     bdk_lmcx_dll_ctl3_t ddr_dll_ctl3;
2182 
2183     ddr_dll_ctl3.u = BDK_CSR_READ(node, BDK_LMCX_DLL_CTL3(ddr_interface_num));
2184     SET_DDR_DLL_CTL3(offset_ena, !!change);
2185     DRAM_CSR_WRITE(node, BDK_LMCX_DLL_CTL3(ddr_interface_num),        ddr_dll_ctl3.u);
2186     ddr_dll_ctl3.u = BDK_CSR_READ(node, BDK_LMCX_DLL_CTL3(ddr_interface_num));
2187 }
2188 
process_custom_dll_offsets(bdk_node_t node,int ddr_interface_num,const char * enable_str,const int8_t * offsets,const char * byte_str,int mode)2189 static void process_custom_dll_offsets(bdk_node_t node, int ddr_interface_num, const char *enable_str,
2190                                        const int8_t *offsets, const char *byte_str, int mode)
2191 {
2192     const char *s;
2193     int enabled;
2194     int provided;
2195 
2196     if ((s = lookup_env_parameter("%s", enable_str)) != NULL) {
2197         enabled = !!strtol(s, NULL, 0);
2198     } else
2199         enabled = -1;
2200 
2201     // enabled == -1: no override, do only configured offsets if provided
2202     // enabled ==  0: override OFF, do NOT do it even if configured offsets provided
2203     // enabled ==  1: override ON, do it for overrides plus configured offsets
2204 
2205     if (enabled == 0)
2206         return;
2207 
2208     provided = is_dll_offset_provided(offsets);
2209 
2210     if (enabled < 0 && !provided)
2211         return;
2212 
2213     int byte_offset;
2214     unsigned short offset[9] = {0};
2215     int byte;
2216 
2217     // offsets need to be disabled while loading
2218     change_dll_offset_enable(node, ddr_interface_num, 0);
2219 
2220     for (byte = 0; byte < 9; ++byte) {
2221 
2222         // always take the provided, if available
2223         byte_offset = (provided) ? offsets[byte] : 0;
2224 
2225         // then, if enabled, use any overrides present
2226         if (enabled > 0) {
2227             if ((s = lookup_env_parameter(byte_str, ddr_interface_num, byte)) != NULL) {
2228                 byte_offset = strtol(s, NULL, 0);
2229             }
2230         }
2231 
2232         offset[byte] = load_dll_offset(node, ddr_interface_num, mode, byte_offset, byte);
2233     }
2234 
2235     // re-enable offsets after loading
2236     change_dll_offset_enable(node, ddr_interface_num, 1);
2237 
2238     ddr_print("N%d.LMC%d: DLL %s Offset 8:0       :"
2239               "  0x%02x  0x%02x  0x%02x  0x%02x  0x%02x  0x%02x  0x%02x  0x%02x  0x%02x\n",
2240               node, ddr_interface_num, (mode == 2) ? "Read " : "Write",
2241               offset[8], offset[7], offset[6], offset[5], offset[4],
2242               offset[3], offset[2], offset[1], offset[0]);
2243 }
2244 
perform_octeon3_ddr3_sequence(bdk_node_t node,int rank_mask,int ddr_interface_num,int sequence)2245 void perform_octeon3_ddr3_sequence(bdk_node_t node, int rank_mask, int ddr_interface_num, int sequence)
2246 {
2247     /*
2248      * 3. Without changing any other fields in LMC(0)_CONFIG, write
2249      *    LMC(0)_CONFIG[RANKMASK] then write both
2250      *    LMC(0)_SEQ_CTL[SEQ_SEL,INIT_START] = 1 with a single CSR write
2251      *    operation. LMC(0)_CONFIG[RANKMASK] bits should be set to indicate
2252      *    the ranks that will participate in the sequence.
2253      *
2254      *    The LMC(0)_SEQ_CTL[SEQ_SEL] value should select power-up/init or
2255      *    selfrefresh exit, depending on whether the DRAM parts are in
2256      *    self-refresh and whether their contents should be preserved. While
2257      *    LMC performs these sequences, it will not perform any other DDR3
2258      *    transactions. When the sequence is complete, hardware sets the
2259      *    LMC(0)_CONFIG[INIT_STATUS] bits for the ranks that have been
2260      *    initialized.
2261      *
2262      *    If power-up/init is selected immediately following a DRESET
2263      *    assertion, LMC executes the sequence described in the "Reset and
2264      *    Initialization Procedure" section of the JEDEC DDR3
2265      *    specification. This includes activating CKE, writing all four DDR3
2266      *    mode registers on all selected ranks, and issuing the required ZQCL
2267      *    command. The LMC(0)_CONFIG[RANKMASK] value should select all ranks
2268      *    with attached DRAM in this case. If LMC(0)_CONTROL[RDIMM_ENA] = 1,
2269      *    LMC writes the JEDEC standard SSTE32882 control words selected by
2270      *    LMC(0)_DIMM_CTL[DIMM*_WMASK] between DDR_CKE* signal assertion and
2271      *    the first DDR3 mode register write operation.
2272      *    LMC(0)_DIMM_CTL[DIMM*_WMASK] should be cleared to 0 if the
2273      *    corresponding DIMM is not present.
2274      *
2275      *    If self-refresh exit is selected, LMC executes the required SRX
2276      *    command followed by a refresh and ZQ calibration. Section 4.5
2277      *    describes behavior of a REF + ZQCS.  LMC does not write the DDR3
2278      *    mode registers as part of this sequence, and the mode register
2279      *    parameters must match at self-refresh entry and exit times.
2280      *
2281      * 4. Read LMC(0)_SEQ_CTL and wait for LMC(0)_SEQ_CTL[SEQ_COMPLETE] to be
2282      *    set.
2283      *
2284      * 5. Read LMC(0)_CONFIG[INIT_STATUS] and confirm that all ranks have
2285      *    been initialized.
2286      */
2287 
2288     const char *s;
2289     static const char *sequence_str[] = {
2290         "Power-up/init",
2291         "Read-leveling",
2292         "Self-refresh entry",
2293         "Self-refresh exit",
2294         "Illegal",
2295         "Illegal",
2296         "Write-leveling",
2297         "Init Register Control Words",
2298         "Mode Register Write",
2299         "MPR Register Access",
2300         "LMC Deskew/Internal Vref training",
2301         "Offset Training"
2302     };
2303 
2304     bdk_lmcx_seq_ctl_t seq_ctl;
2305     bdk_lmcx_config_t  lmc_config;
2306 
2307     lmc_config.u = BDK_CSR_READ(node, BDK_LMCX_CONFIG(ddr_interface_num));
2308     lmc_config.s.rankmask     = rank_mask;
2309     DRAM_CSR_WRITE(node, BDK_LMCX_CONFIG(ddr_interface_num), lmc_config.u);
2310 
2311     seq_ctl.u    = 0;
2312 
2313     seq_ctl.s.init_start  = 1;
2314     seq_ctl.s.seq_sel    = sequence;
2315 
2316     VB_PRT(VBL_SEQ, "N%d.LMC%d: Performing LMC sequence=%x: rank_mask=0x%02x, %s\n",
2317             node, ddr_interface_num, sequence, rank_mask, sequence < 12 ? sequence_str[sequence] : "");
2318 
2319     if ((s = lookup_env_parameter("ddr_trigger_sequence%d", sequence)) != NULL) {
2320             /* FIXME(dhendrix): this appears to be meant for the eval board */
2321 #if 0
2322         int trigger = strtoul(s, NULL, 0);
2323         if (trigger)
2324             pulse_gpio_pin(node, 1, 2);
2325 #endif
2326         error_print("env parameter ddr_trigger_sequence%d not found\n", sequence);
2327     }
2328 
2329     DRAM_CSR_WRITE(node, BDK_LMCX_SEQ_CTL(ddr_interface_num), seq_ctl.u);
2330     BDK_CSR_READ(node, BDK_LMCX_SEQ_CTL(ddr_interface_num));
2331 
2332     /* Wait 100us minimum before checking for sequence complete */
2333     bdk_wait_usec(100);
2334     if (BDK_CSR_WAIT_FOR_FIELD(node, BDK_LMCX_SEQ_CTL(ddr_interface_num), seq_complete, ==, 1, 1000000))
2335     {
2336         error_print("N%d.LMC%d: Timeout waiting for LMC sequence=%x, rank_mask=0x%02x, ignoring...\n",
2337                     node, ddr_interface_num, sequence, rank_mask);
2338     }
2339     else {
2340         VB_PRT(VBL_SEQ, "N%d.LMC%d: LMC sequence=%x: Completed.\n", node, ddr_interface_num, sequence);
2341     }
2342 }
2343 
ddr4_mrw(bdk_node_t node,int ddr_interface_num,int rank,int mr_wr_addr,int mr_wr_sel,int mr_wr_bg1)2344 void ddr4_mrw(bdk_node_t node, int ddr_interface_num, int rank,
2345               int mr_wr_addr, int mr_wr_sel, int mr_wr_bg1)
2346 {
2347     bdk_lmcx_mr_mpr_ctl_t lmc_mr_mpr_ctl;
2348 
2349     lmc_mr_mpr_ctl.u = 0;
2350     lmc_mr_mpr_ctl.s.mr_wr_addr                 = (mr_wr_addr == -1) ? 0 : mr_wr_addr;
2351     lmc_mr_mpr_ctl.s.mr_wr_sel                  = mr_wr_sel;
2352     lmc_mr_mpr_ctl.s.mr_wr_rank                 = rank;
2353     //lmc_mr_mpr_ctl.s.mr_wr_pda_mask           =
2354     //lmc_mr_mpr_ctl.s.mr_wr_pda_enable         =
2355     //lmc_mr_mpr_ctl.s.mpr_loc                  =
2356     //lmc_mr_mpr_ctl.s.mpr_wr                   =
2357     //lmc_mr_mpr_ctl.s.mpr_bit_select           =
2358     //lmc_mr_mpr_ctl.s.mpr_byte_select          =
2359     //lmc_mr_mpr_ctl.s.mpr_whole_byte_enable    =
2360     lmc_mr_mpr_ctl.s.mr_wr_use_default_value    = (mr_wr_addr == -1) ? 1 : 0;
2361     lmc_mr_mpr_ctl.s.mr_wr_bg1                  = mr_wr_bg1;
2362     DRAM_CSR_WRITE(node, BDK_LMCX_MR_MPR_CTL(ddr_interface_num), lmc_mr_mpr_ctl.u);
2363 
2364     /* Mode Register Write */
2365     perform_octeon3_ddr3_sequence(node, 1 << rank, ddr_interface_num, 0x8);
2366 }
2367 
2368 #define InvA0_17(x) (x ^ 0x22bf8)
set_mpr_mode(bdk_node_t node,int rank_mask,int ddr_interface_num,int dimm_count,int mpr,int bg1)2369 static void set_mpr_mode (bdk_node_t node, int rank_mask,
2370                           int ddr_interface_num, int dimm_count, int mpr, int bg1)
2371 {
2372     int rankx;
2373 
2374     ddr_print("All Ranks: Set mpr mode = %x %c-side\n",
2375               mpr, (bg1==0) ? 'A' : 'B');
2376 
2377     for (rankx = 0; rankx < dimm_count*4; rankx++) {
2378         if (!(rank_mask & (1 << rankx)))
2379             continue;
2380         if (bg1 == 0)
2381             ddr4_mrw(node, ddr_interface_num, rankx,          mpr<<2,   3, bg1); /* MR3 A-side */
2382         else
2383             ddr4_mrw(node, ddr_interface_num, rankx, InvA0_17(mpr<<2), ~3, bg1); /* MR3 B-side */
2384     }
2385 }
2386 
2387 #if ENABLE_DISPLAY_MPR_PAGE
do_ddr4_mpr_read(bdk_node_t node,int ddr_interface_num,int rank,int page,int location)2388 static void do_ddr4_mpr_read(bdk_node_t node, int ddr_interface_num, int rank,
2389                       int page, int location)
2390 {
2391     bdk_lmcx_mr_mpr_ctl_t lmc_mr_mpr_ctl;
2392 
2393     lmc_mr_mpr_ctl.u = BDK_CSR_READ(node, BDK_LMCX_MR_MPR_CTL(ddr_interface_num));
2394 
2395     lmc_mr_mpr_ctl.s.mr_wr_addr                 = 0;
2396     lmc_mr_mpr_ctl.s.mr_wr_sel                  = page; /* Page */
2397     lmc_mr_mpr_ctl.s.mr_wr_rank                 = rank;
2398     //lmc_mr_mpr_ctl.s.mr_wr_pda_mask           =
2399     //lmc_mr_mpr_ctl.s.mr_wr_pda_enable         =
2400     lmc_mr_mpr_ctl.s.mpr_loc                    = location;
2401     lmc_mr_mpr_ctl.s.mpr_wr                     = 0; /* Read=0, Write=1 */
2402     //lmc_mr_mpr_ctl.s.mpr_bit_select           =
2403     //lmc_mr_mpr_ctl.s.mpr_byte_select          =
2404     //lmc_mr_mpr_ctl.s.mpr_whole_byte_enable    =
2405     //lmc_mr_mpr_ctl.s.mr_wr_use_default_value  =
2406     //lmc_mr_mpr_ctl.s.mr_wr_bg1                =
2407 
2408     DRAM_CSR_WRITE(node, BDK_LMCX_MR_MPR_CTL(ddr_interface_num), lmc_mr_mpr_ctl.u);
2409 
2410     /* MPR register access sequence */
2411     perform_octeon3_ddr3_sequence(node, 1 << rank, ddr_interface_num, 0x9);
2412 
2413     debug_print("LMC_MR_MPR_CTL             : 0x%016lx\n", lmc_mr_mpr_ctl.u);
2414     debug_print("lmc_mr_mpr_ctl.s.mr_wr_addr: 0x%02x\n", lmc_mr_mpr_ctl.s.mr_wr_addr);
2415     debug_print("lmc_mr_mpr_ctl.s.mr_wr_sel : 0x%02x\n", lmc_mr_mpr_ctl.s.mr_wr_sel);
2416     debug_print("lmc_mr_mpr_ctl.s.mpr_loc   : 0x%02x\n", lmc_mr_mpr_ctl.s.mpr_loc);
2417     debug_print("lmc_mr_mpr_ctl.s.mpr_wr    : 0x%02x\n", lmc_mr_mpr_ctl.s.mpr_wr);
2418 
2419 }
2420 #endif
2421 
set_rdimm_mode(bdk_node_t node,int ddr_interface_num,int enable)2422 static int set_rdimm_mode(bdk_node_t node, int ddr_interface_num, int enable)
2423 {
2424     bdk_lmcx_control_t lmc_control;
2425     int save_rdimm_mode;
2426 
2427     lmc_control.u = BDK_CSR_READ(node, BDK_LMCX_CONTROL(ddr_interface_num));
2428     save_rdimm_mode = lmc_control.s.rdimm_ena;
2429     lmc_control.s.rdimm_ena       = enable;
2430     VB_PRT(VBL_FAE, "Setting RDIMM_ENA = %x\n", enable);
2431     DRAM_CSR_WRITE(node, BDK_LMCX_CONTROL(ddr_interface_num), lmc_control.u);
2432 
2433     return (save_rdimm_mode);
2434 }
2435 
2436 #if ENABLE_DISPLAY_MPR_PAGE
ddr4_mpr_read(bdk_node_t node,int ddr_interface_num,int rank,int page,int location,uint64_t * mpr_data)2437 static void ddr4_mpr_read(bdk_node_t node, int ddr_interface_num, int rank,
2438                           int page, int location, uint64_t *mpr_data)
2439 {
2440     do_ddr4_mpr_read(node, ddr_interface_num, rank, page, location);
2441 
2442     mpr_data[0] = BDK_CSR_READ(node, BDK_LMCX_MPR_DATA0(ddr_interface_num));
2443     mpr_data[1] = BDK_CSR_READ(node, BDK_LMCX_MPR_DATA1(ddr_interface_num));
2444     mpr_data[2] = BDK_CSR_READ(node, BDK_LMCX_MPR_DATA2(ddr_interface_num));
2445 
2446     debug_print("MPR Read %016lx.%016lx.%016lx\n", mpr_data[2], mpr_data[1], mpr_data[0]);
2447 }
2448 
2449 /* Display MPR values for Page Location */
Display_MPR_Page_Location(bdk_node_t node,int rank,int ddr_interface_num,int dimm_count,int page,int location,uint64_t * mpr_data)2450 static void Display_MPR_Page_Location(bdk_node_t node, int rank,
2451                                       int ddr_interface_num, int dimm_count,
2452                                       int page, int location, uint64_t *mpr_data)
2453 {
2454     ddr4_mpr_read(node, ddr_interface_num, rank, page, location, mpr_data);
2455     ddr_print("MPR Page %d, Loc %d %016lx.%016lx.%016lx\n",
2456               page, location, mpr_data[2], mpr_data[1], mpr_data[0]);
2457 }
2458 
2459 /* Display MPR values for Page */
Display_MPR_Page(bdk_node_t node,int rank_mask,int ddr_interface_num,int dimm_count,int page)2460 static void Display_MPR_Page(bdk_node_t node, int rank_mask,
2461                       int ddr_interface_num, int dimm_count, int page)
2462 {
2463     int rankx;
2464     uint64_t mpr_data[3];
2465 
2466     for (rankx = 0; rankx < dimm_count * 4;rankx++) {
2467         if (!(rank_mask & (1 << rankx)))
2468             continue;
2469 
2470         ddr_print("Rank %d: MPR values for Page %d\n", rankx, page);
2471         for (int location = 0; location < 4; location++) {
2472             Display_MPR_Page_Location(node, rankx, ddr_interface_num, dimm_count,
2473                                       page, location, &mpr_data[0]);
2474         }
2475 
2476     } /* for (rankx = 0; rankx < dimm_count * 4; rankx++) */
2477 }
2478 #endif
2479 
ddr4_mpr_write(bdk_node_t node,int ddr_interface_num,int rank,int page,int location,uint8_t mpr_data)2480 static void ddr4_mpr_write(bdk_node_t node, int ddr_interface_num, int rank,
2481                     int page, int location, uint8_t mpr_data)
2482 {
2483     bdk_lmcx_mr_mpr_ctl_t lmc_mr_mpr_ctl;
2484 
2485     lmc_mr_mpr_ctl.u = 0;
2486     lmc_mr_mpr_ctl.s.mr_wr_addr                 = mpr_data;
2487     lmc_mr_mpr_ctl.s.mr_wr_sel                  = page; /* Page */
2488     lmc_mr_mpr_ctl.s.mr_wr_rank                 = rank;
2489     //lmc_mr_mpr_ctl.s.mr_wr_pda_mask           =
2490     //lmc_mr_mpr_ctl.s.mr_wr_pda_enable         =
2491     lmc_mr_mpr_ctl.s.mpr_loc                    = location;
2492     lmc_mr_mpr_ctl.s.mpr_wr                     = 1; /* Read=0, Write=1 */
2493     //lmc_mr_mpr_ctl.s.mpr_bit_select           =
2494     //lmc_mr_mpr_ctl.s.mpr_byte_select          =
2495     //lmc_mr_mpr_ctl.s.mpr_whole_byte_enable    =
2496     //lmc_mr_mpr_ctl.s.mr_wr_use_default_value  =
2497     //lmc_mr_mpr_ctl.s.mr_wr_bg1                =
2498     DRAM_CSR_WRITE(node, BDK_LMCX_MR_MPR_CTL(ddr_interface_num), lmc_mr_mpr_ctl.u);
2499 
2500     /* MPR register access sequence */
2501     perform_octeon3_ddr3_sequence(node, (1 << rank), ddr_interface_num, 0x9);
2502 
2503     debug_print("LMC_MR_MPR_CTL             : 0x%016lx\n", lmc_mr_mpr_ctl.u);
2504     debug_print("lmc_mr_mpr_ctl.s.mr_wr_addr: 0x%02x\n", lmc_mr_mpr_ctl.s.mr_wr_addr);
2505     debug_print("lmc_mr_mpr_ctl.s.mr_wr_sel : 0x%02x\n", lmc_mr_mpr_ctl.s.mr_wr_sel);
2506     debug_print("lmc_mr_mpr_ctl.s.mpr_loc   : 0x%02x\n", lmc_mr_mpr_ctl.s.mpr_loc);
2507     debug_print("lmc_mr_mpr_ctl.s.mpr_wr    : 0x%02x\n", lmc_mr_mpr_ctl.s.mpr_wr);
2508 }
2509 
set_vref(bdk_node_t node,int ddr_interface_num,int rank,int range,int value)2510 void set_vref(bdk_node_t node, int ddr_interface_num, int rank,
2511               int range, int value)
2512 {
2513     bdk_lmcx_mr_mpr_ctl_t lmc_mr_mpr_ctl;
2514     bdk_lmcx_modereg_params3_t lmc_modereg_params3;
2515     int mr_wr_addr = 0;
2516 
2517     lmc_mr_mpr_ctl.u = 0;
2518     lmc_modereg_params3.u = BDK_CSR_READ(node, BDK_LMCX_MODEREG_PARAMS3(ddr_interface_num));
2519 
2520     mr_wr_addr |= lmc_modereg_params3.s.tccd_l<<10; /* A12:A10 tCCD_L */
2521     mr_wr_addr |= 1<<7;         /* A7 1 = Enable(Training Mode) */
2522     mr_wr_addr |= range<<6;     /* A6 VrefDQ Training Range */
2523     mr_wr_addr |= value<<0;     /* A5:A0 VrefDQ Training Value */
2524 
2525     lmc_mr_mpr_ctl.s.mr_wr_addr               = mr_wr_addr;
2526     lmc_mr_mpr_ctl.s.mr_wr_sel                = 6; /* Write MR6 */
2527     lmc_mr_mpr_ctl.s.mr_wr_rank               = rank;
2528     //lmc_mr_mpr_ctl.s.mr_wr_pda_mask           =
2529     //lmc_mr_mpr_ctl.s.mr_wr_pda_enable         =
2530     //lmc_mr_mpr_ctl.s.mpr_loc                  = location;
2531     //lmc_mr_mpr_ctl.s.mpr_wr                   = 0; /* Read=0, Write=1 */
2532     //lmc_mr_mpr_ctl.s.mpr_bit_select           =
2533     //lmc_mr_mpr_ctl.s.mpr_byte_select          =
2534     //lmc_mr_mpr_ctl.s.mpr_whole_byte_enable    =
2535     //lmc_mr_mpr_ctl.s.mr_wr_use_default_value  =
2536     //lmc_mr_mpr_ctl.s.mr_wr_bg1                =
2537     DRAM_CSR_WRITE(node, BDK_LMCX_MR_MPR_CTL(ddr_interface_num), lmc_mr_mpr_ctl.u);
2538 
2539     /* 0x8 = Mode Register Write */
2540     perform_octeon3_ddr3_sequence(node, 1<<rank, ddr_interface_num, 0x8);
2541 
2542     /* It is vendor specific whether Vref_value is captured with A7=1.
2543        A subsequent MRS might be necessary. */
2544     perform_octeon3_ddr3_sequence(node, 1<<rank, ddr_interface_num, 0x8);
2545 
2546     mr_wr_addr &= ~(1<<7);         /* A7 0 = Disable(Training Mode) */
2547     lmc_mr_mpr_ctl.s.mr_wr_addr               = mr_wr_addr;
2548     DRAM_CSR_WRITE(node, BDK_LMCX_MR_MPR_CTL(ddr_interface_num), lmc_mr_mpr_ctl.u);
2549 }
2550 
set_DRAM_output_inversion(bdk_node_t node,int ddr_interface_num,int dimm_count,int rank_mask,int inversion)2551 static void set_DRAM_output_inversion (bdk_node_t node,
2552                                        int ddr_interface_num,
2553                                        int dimm_count,
2554                                        int rank_mask,
2555                                        int inversion)
2556 {
2557     bdk_lmcx_ddr4_dimm_ctl_t lmc_ddr4_dimm_ctl;
2558     bdk_lmcx_dimmx_params_t lmc_dimmx_params;
2559     bdk_lmcx_dimm_ctl_t lmc_dimm_ctl;
2560     int dimm_no;
2561 
2562     lmc_ddr4_dimm_ctl.u = 0;  /* Don't touch extended register control words */
2563     DRAM_CSR_WRITE(node, BDK_LMCX_DDR4_DIMM_CTL(ddr_interface_num), lmc_ddr4_dimm_ctl.u);
2564 
2565     ddr_print("All DIMMs: Register Control Word          RC0 : %x\n", (inversion & 1));
2566 
2567     for (dimm_no = 0; dimm_no < dimm_count; ++dimm_no) {
2568         lmc_dimmx_params.u = BDK_CSR_READ(node, BDK_LMCX_DIMMX_PARAMS(ddr_interface_num, dimm_no));
2569         lmc_dimmx_params.s.rc0  = (lmc_dimmx_params.s.rc0 & ~1) | (inversion & 1);
2570         DRAM_CSR_WRITE(node, BDK_LMCX_DIMMX_PARAMS(ddr_interface_num, dimm_no), lmc_dimmx_params.u);
2571     }
2572 
2573     /* LMC0_DIMM_CTL */
2574     lmc_dimm_ctl.u = BDK_CSR_READ(node, BDK_LMCX_DIMM_CTL(ddr_interface_num));
2575     lmc_dimm_ctl.s.dimm0_wmask         = 0x1;
2576     lmc_dimm_ctl.s.dimm1_wmask         = (dimm_count > 1) ? 0x0001 : 0x0000;
2577 
2578     ddr_print("LMC DIMM_CTL                                  : 0x%016llx\n",
2579               lmc_dimm_ctl.u);
2580     DRAM_CSR_WRITE(node, BDK_LMCX_DIMM_CTL(ddr_interface_num), lmc_dimm_ctl.u);
2581 
2582     perform_octeon3_ddr3_sequence(node, rank_mask, ddr_interface_num, 0x7 ); /* Init RCW */
2583 }
2584 
write_mpr_page0_pattern(bdk_node_t node,int rank_mask,int ddr_interface_num,int dimm_count,int pattern,int location_mask)2585 static void write_mpr_page0_pattern (bdk_node_t node, int rank_mask,
2586                                      int ddr_interface_num, int dimm_count, int pattern, int location_mask)
2587 {
2588     int rankx;
2589     int location;
2590 
2591     for (rankx = 0; rankx < dimm_count*4; rankx++) {
2592         if (!(rank_mask & (1 << rankx)))
2593             continue;
2594         for (location = 0; location < 4; ++location) {
2595             if (!(location_mask & (1 << location)))
2596                 continue;
2597 
2598             ddr4_mpr_write(node, ddr_interface_num, rankx,
2599                            /* page */ 0, /* location */ location, pattern);
2600         }
2601     }
2602 }
2603 
change_rdimm_mpr_pattern(bdk_node_t node,int rank_mask,int ddr_interface_num,int dimm_count)2604 static void change_rdimm_mpr_pattern (bdk_node_t node, int rank_mask,
2605                                       int ddr_interface_num, int dimm_count)
2606 {
2607     int save_ref_zqcs_int;
2608     bdk_lmcx_config_t  lmc_config;
2609 
2610     /*
2611       Okay, here is the latest sequence.  This should work for all
2612       chips and passes (78,88,73,etc).  This sequence should be run
2613       immediately after DRAM INIT.  The basic idea is to write the
2614       same pattern into each of the 4 MPR locations in the DRAM, so
2615       that the same value is returned when doing MPR reads regardless
2616       of the inversion state.  My advice is to put this into a
2617       function, change_rdimm_mpr_pattern or something like that, so
2618       that it can be called multiple times, as I think David wants a
2619       clock-like pattern for OFFSET training, but does not want a
2620       clock pattern for Bit-Deskew.  You should then be able to call
2621       this at any point in the init sequence (after DRAM init) to
2622       change the pattern to a new value.
2623       Mike
2624 
2625       A correction: PHY doesn't need any pattern during offset
2626       training, but needs clock like pattern for internal vref and
2627       bit-dskew training.  So for that reason, these steps below have
2628       to be conducted before those trainings to pre-condition
2629       the pattern.  David
2630 
2631       Note: Step 3, 4, 8 and 9 have to be done through RDIMM
2632       sequence. If you issue MRW sequence to do RCW write (in o78 pass
2633       1 at least), LMC will still do two commands because
2634       CONTROL[RDIMM_ENA] is still set high. We don't want it to have
2635       any unintentional mode register write so it's best to do what
2636       Mike is doing here.
2637       Andrew
2638     */
2639 
2640 
2641     /* 1) Disable refresh (REF_ZQCS_INT = 0) */
2642 
2643     debug_print("1) Disable refresh (REF_ZQCS_INT = 0)\n");
2644 
2645     lmc_config.u = BDK_CSR_READ(node, BDK_LMCX_CONFIG(ddr_interface_num));
2646     save_ref_zqcs_int         = lmc_config.s.ref_zqcs_int;
2647     lmc_config.s.ref_zqcs_int = 0;
2648     DRAM_CSR_WRITE(node, BDK_LMCX_CONFIG(ddr_interface_num), lmc_config.u);
2649 
2650 
2651     /* 2) Put all devices in MPR mode (Run MRW sequence (sequence=8)
2652        with MODEREG_PARAMS0[MPRLOC]=0,
2653        MODEREG_PARAMS0[MPR]=1, MR_MPR_CTL[MR_WR_SEL]=3, and
2654        MR_MPR_CTL[MR_WR_USE_DEFAULT_VALUE]=1) */
2655 
2656     debug_print("2) Put all devices in MPR mode (Run MRW sequence (sequence=8)\n");
2657 
2658     set_mpr_mode(node, rank_mask, ddr_interface_num, dimm_count, /* mpr */ 1, /* bg1 */ 0); /* A-side */
2659     set_mpr_mode(node, rank_mask, ddr_interface_num, dimm_count, /* mpr */ 1, /* bg1 */ 1); /* B-side */
2660 
2661     /*    a. Or you can set MR_MPR_CTL[MR_WR_USE_DEFAULT_VALUE]=0 and set
2662           the value you would like directly into
2663           MR_MPR_CTL[MR_WR_ADDR] */
2664 
2665     /* 3) Disable RCD Parity (if previously enabled) - parity does not
2666        work if inversion disabled */
2667 
2668     debug_print("3) Disable RCD Parity\n");
2669 
2670     /* 4) Disable Inversion in the RCD. */
2671     /*    a. I did (3&4) via the RDIMM sequence (seq_sel=7), but it
2672           may be easier to use the MRW sequence (seq_sel=8).  Just set
2673           MR_MPR_CTL[MR_WR_SEL]=7, MR_MPR_CTL[MR_WR_ADDR][3:0]=data,
2674           MR_MPR_CTL[MR_WR_ADDR][7:4]=RCD reg */
2675 
2676     debug_print("4) Disable Inversion in the RCD.\n");
2677 
2678     set_DRAM_output_inversion(node, ddr_interface_num, dimm_count, rank_mask,
2679                                1 /* 1=disable output inversion*/);
2680 
2681     /* 5) Disable CONTROL[RDIMM_ENA] so that MR sequence goes out
2682        non-inverted.  */
2683 
2684     debug_print("5) Disable CONTROL[RDIMM_ENA]\n");
2685 
2686     set_rdimm_mode(node, ddr_interface_num, 0);
2687 
2688     /* 6) Write all 4 MPR registers with the desired pattern (have to
2689        do this for all enabled ranks) */
2690     /*    a. MR_MPR_CTL.MPR_WR=1, MR_MPR_CTL.MPR_LOC=0..3,
2691           MR_MPR_CTL.MR_WR_SEL=0, MR_MPR_CTL.MR_WR_ADDR[7:0]=pattern */
2692 
2693     debug_print("6) Write all 4 MPR page 0 Training Patterns\n");
2694 
2695     write_mpr_page0_pattern(node, rank_mask,
2696                              ddr_interface_num, dimm_count, 0x55, 0x8);
2697 
2698     /* 7) Re-enable RDIMM_ENA */
2699 
2700     debug_print("7) Re-enable RDIMM_ENA\n");
2701 
2702     set_rdimm_mode(node, ddr_interface_num, 1);
2703 
2704     /* 8) Re-enable RDIMM inversion */
2705 
2706     debug_print("8) Re-enable RDIMM inversion\n");
2707 
2708     set_DRAM_output_inversion(node, ddr_interface_num, dimm_count, rank_mask,
2709                                0 /* 0=re-enable output inversion*/);
2710 
2711     /* 9) Re-enable RDIMM parity (if desired) */
2712 
2713     debug_print("9) Re-enable RDIMM parity (if desired)\n");
2714 
2715     /* 10)Take B-side devices out of MPR mode (Run MRW sequence
2716        (sequence=8) with MODEREG_PARAMS0[MPRLOC]=0,
2717        MODEREG_PARAMS0[MPR]=0, MR_MPR_CTL[MR_WR_SEL]=3, and
2718        MR_MPR_CTL[MR_WR_USE_DEFAULT_VALUE]=1) */
2719 
2720     debug_print("10)Take B-side devices out of MPR mode\n");
2721 
2722     set_mpr_mode(node, rank_mask, ddr_interface_num, dimm_count, /* mpr */ 0, /* bg1 */ 1);
2723 
2724     /*    a. Or you can set MR_MPR_CTL[MR_WR_USE_DEFAULT_VALUE]=0 and
2725           set the value you would like directly into
2726           MR_MPR_CTL[MR_WR_ADDR] */
2727 
2728     /* 11)Re-enable refresh (REF_ZQCS_INT=previous value) */
2729 
2730     debug_print("11)Re-enable refresh (REF_ZQCS_INT=previous value)\n");
2731 
2732     lmc_config.u = BDK_CSR_READ(node, BDK_LMCX_CONFIG(ddr_interface_num));
2733     lmc_config.s.ref_zqcs_int = save_ref_zqcs_int;
2734     DRAM_CSR_WRITE(node, BDK_LMCX_CONFIG(ddr_interface_num), lmc_config.u);
2735 
2736 }
2737 
2738 static unsigned char ddr4_rodt_ohms     [RODT_OHMS_COUNT     ] = {  0,  40,  60, 80, 120, 240, 34, 48 };
2739 static unsigned char ddr4_rtt_nom_ohms  [RTT_NOM_OHMS_COUNT  ] = {  0,  60, 120, 40, 240,  48, 80, 34 };
2740 static unsigned char ddr4_rtt_nom_table [RTT_NOM_TABLE_COUNT ] = {  0,   4,   2,  6,   1,   5,  3,  7 };
2741 static unsigned char ddr4_rtt_wr_ohms   [RTT_WR_OHMS_COUNT   ] = {  0, 120, 240, 99,  80 }; // setting HiZ ohms to 99 for computed vref
2742 static unsigned char ddr4_dic_ohms      [DIC_OHMS_COUNT      ] = { 34,  48 };
2743 static short         ddr4_drive_strength[DRIVE_STRENGTH_COUNT] = {  0,   0, 26, 30, 34, 40, 48, 68, 0,0,0,0,0,0,0 };
2744 static short         ddr4_dqx_strength  [DRIVE_STRENGTH_COUNT] = {  0,  24, 27, 30, 34, 40, 48, 60, 0,0,0,0,0,0,0 };
2745 
2746 impedence_values_t ddr4_impedence_values = {
2747     .rodt_ohms             =  ddr4_rodt_ohms     ,
2748     .rtt_nom_ohms          =  ddr4_rtt_nom_ohms  ,
2749     .rtt_nom_table         =  ddr4_rtt_nom_table ,
2750     .rtt_wr_ohms           =  ddr4_rtt_wr_ohms   ,
2751     .dic_ohms              =  ddr4_dic_ohms      ,
2752     .drive_strength        =  ddr4_drive_strength,
2753     .dqx_strength          =  ddr4_dqx_strength  ,
2754 };
2755 
2756 static unsigned char ddr3_rodt_ohms     [RODT_OHMS_COUNT     ] = { 0, 20, 30, 40, 60, 120, 0, 0 };
2757 static unsigned char ddr3_rtt_nom_ohms  [RTT_NOM_OHMS_COUNT  ] = { 0, 60, 120, 40, 20, 30, 0, 0 };
2758 static unsigned char ddr3_rtt_nom_table [RTT_NOM_TABLE_COUNT ] = { 0, 2, 1, 3, 5, 4, 0, 0 };
2759 static unsigned char ddr3_rtt_wr_ohms   [RTT_WR_OHMS_COUNT   ] = { 0, 60, 120 };
2760 static unsigned char ddr3_dic_ohms      [DIC_OHMS_COUNT      ] = { 40, 34 };
2761 static short         ddr3_drive_strength[DRIVE_STRENGTH_COUNT] = { 0, 24, 27, 30, 34, 40, 48, 60, 0,0,0,0,0,0,0 };
2762 static impedence_values_t ddr3_impedence_values = {
2763     .rodt_ohms             =  ddr3_rodt_ohms     ,
2764     .rtt_nom_ohms          =  ddr3_rtt_nom_ohms  ,
2765     .rtt_nom_table         =  ddr3_rtt_nom_table ,
2766     .rtt_wr_ohms           =  ddr3_rtt_wr_ohms   ,
2767     .dic_ohms              =  ddr3_dic_ohms      ,
2768     .drive_strength        =  ddr3_drive_strength,
2769     .dqx_strength          =  ddr3_drive_strength,
2770 };
2771 
2772 
2773 uint64_t
hertz_to_psecs(uint64_t hertz)2774 hertz_to_psecs(uint64_t hertz)
2775 {
2776     return divide_nint((uint64_t) 1000*1000*1000*1000, hertz); /* Clock in psecs */
2777 }
2778 
2779 #define DIVIDEND_SCALE 1000      /* Scale to avoid rounding error. */
2780 uint64_t
psecs_to_mts(uint64_t psecs)2781 psecs_to_mts(uint64_t psecs)
2782 {
2783     //ddr_print("psecs %ld, divisor %ld\n", psecs, divide_nint((uint64_t)(2 * 1000000 * DIVIDEND_SCALE), psecs));
2784     return divide_nint(divide_nint((uint64_t)(2 * 1000000 * DIVIDEND_SCALE), psecs), DIVIDEND_SCALE);
2785 }
2786 
2787 #define WITHIN(v,b,m) (((v)>=((b)-(m)))&&((v)<=((b)+(m))))
2788 
2789 // pretty-print version, only works with what comes from the SPD: tCKmin or tCKAVGmin
2790 unsigned long
pretty_psecs_to_mts(uint64_t psecs)2791 pretty_psecs_to_mts(uint64_t psecs)
2792 {
2793     uint64_t ret = 0; // default to error
2794     if (WITHIN(psecs, 1250, 1))
2795         ret = 1600;
2796     else if (WITHIN(psecs, 1071, 1))
2797         ret = 1866;
2798     else if (WITHIN(psecs, 937, 1))
2799         ret = 2133;
2800     else if (WITHIN(psecs, 833, 1))
2801         ret = 2400;
2802     else if (WITHIN(psecs, 750, 1))
2803         ret = 2666;
2804     return ret;
2805 }
2806 
2807 uint64_t
mts_to_hertz(uint64_t mts)2808 mts_to_hertz(uint64_t mts)
2809 {
2810     return ((mts * 1000 * 1000) / 2);
2811 }
2812 
2813 #define DEBUG_RC3X_COMPUTE 0
2814 #define rc3x_print(...) \
2815     do { if (DEBUG_RC3X_COMPUTE) printf(__VA_ARGS__); } while (0)
2816 
compute_rc3x(int64_t tclk_psecs)2817 static int compute_rc3x (int64_t tclk_psecs)
2818 {
2819     long speed;
2820     long tclk_psecs_min, tclk_psecs_max;
2821     long data_rate_mhz, data_rate_mhz_min, data_rate_mhz_max;
2822     int rc3x;
2823 
2824 #define ENCODING_BASE 1240
2825 
2826     data_rate_mhz = psecs_to_mts(tclk_psecs);
2827 
2828     /* 2400 MT/s is a special case. Using integer arithmetic it rounds
2829        from 833 psecs to 2401 MT/s. Force it to 2400 to pick the
2830        proper setting from the table. */
2831     if (tclk_psecs == 833)
2832         data_rate_mhz = 2400;
2833 
2834     for (speed = ENCODING_BASE; speed < 3200; speed += 20) {
2835         int error = 0;
2836 
2837         tclk_psecs_min = hertz_to_psecs(mts_to_hertz(speed + 00)); /* Clock in psecs */
2838         tclk_psecs_max = hertz_to_psecs(mts_to_hertz(speed + 18)); /* Clock in psecs */
2839 
2840         data_rate_mhz_min = psecs_to_mts(tclk_psecs_min);
2841         data_rate_mhz_max = psecs_to_mts(tclk_psecs_max);
2842 
2843         /* Force alingment to multiple to avound rounding errors. */
2844         data_rate_mhz_min = ((data_rate_mhz_min + 18) / 20) * 20;
2845         data_rate_mhz_max = ((data_rate_mhz_max + 18) / 20) * 20;
2846 
2847         error += (speed + 00 != data_rate_mhz_min);
2848         error += (speed + 20 != data_rate_mhz_max);
2849 
2850         rc3x = (speed - ENCODING_BASE) / 20;
2851 
2852         rc3x_print("rc3x: %02x speed: %4ld MT/s < f <= %4ld MT/s, psec: %3ld:%3ld %4ld:%4ld %s\n",
2853                    rc3x,
2854                    speed, speed + 20,
2855                    tclk_psecs_min, tclk_psecs_max,
2856                    data_rate_mhz_min, data_rate_mhz_max,
2857                    error ? "****" : "");
2858 
2859         if (data_rate_mhz <= (speed + 20)) {
2860             rc3x_print("rc3x: %4ld MT/s <= %4ld MT/s\n", data_rate_mhz, speed + 20);
2861             break;
2862         }
2863     }
2864     return rc3x;
2865 }
2866 
2867 static const int   rlevel_separate_ab  = 1;
2868 
init_octeon3_ddr3_interface(bdk_node_t node,const ddr_configuration_t * ddr_configuration,uint32_t ddr_hertz,uint32_t cpu_hertz,uint32_t ddr_ref_hertz,int board_type,int board_rev_maj,int board_rev_min,int ddr_interface_num,uint32_t ddr_interface_mask)2869 int init_octeon3_ddr3_interface(bdk_node_t node,
2870                                const ddr_configuration_t *ddr_configuration,
2871                                uint32_t ddr_hertz,
2872                                uint32_t cpu_hertz,
2873                                uint32_t ddr_ref_hertz,
2874                                int board_type,
2875                                int board_rev_maj,
2876                                int board_rev_min,
2877                                int ddr_interface_num,
2878                                uint32_t ddr_interface_mask
2879                                )
2880 {
2881     const char *s;
2882 
2883     const dimm_odt_config_t *odt_1rank_config = ddr_configuration->odt_1rank_config;
2884     const dimm_odt_config_t *odt_2rank_config = ddr_configuration->odt_2rank_config;
2885     const dimm_odt_config_t *odt_4rank_config = ddr_configuration->odt_4rank_config;
2886     const dimm_config_t *dimm_config_table = ddr_configuration->dimm_config_table;
2887     const dimm_odt_config_t *odt_config;
2888     const ddr3_custom_config_t *custom_lmc_config = &ddr_configuration->custom_lmc_config;
2889     int odt_idx;
2890 
2891     /*
2892     ** Compute clock rates to the nearest picosecond.
2893     */
2894     uint64_t tclk_psecs = hertz_to_psecs(ddr_hertz); /* Clock in psecs */
2895     uint64_t eclk_psecs = hertz_to_psecs(cpu_hertz); /* Clock in psecs */
2896 
2897     int row_bits, col_bits, num_banks, num_ranks, dram_width;
2898     int dimm_count = 0;
2899     int fatal_error = 0;        /* Accumulate and report all the errors before giving up */
2900 
2901     int safe_ddr_flag = 0; /* Flag that indicates safe DDR settings should be used */
2902     int ddr_interface_64b = 1;  /* THUNDER Default: 64bit interface width */
2903     int ddr_interface_bytemask;
2904     uint32_t mem_size_mbytes = 0;
2905     unsigned int didx;
2906     int bank_bits = 0;
2907     int bunk_enable;
2908     int rank_mask;
2909     int column_bits_start = 1;
2910     int row_lsb;
2911     int pbank_lsb;
2912     int use_ecc = 1;
2913     int mtb_psec = 0; /* quiet */
2914     short ftb_Dividend;
2915     short ftb_Divisor;
2916     int tAAmin;
2917     int tCKmin;
2918     int CL, min_cas_latency = 0, max_cas_latency = 0, override_cas_latency = 0;
2919     int ddr_rtt_nom_auto, ddr_rodt_ctl_auto;
2920     int i;
2921 
2922     int spd_addr;
2923     int spd_org;
2924     int spd_banks;
2925     int spd_rdimm;
2926     int spd_dimm_type;
2927     int spd_ecc;
2928     uint32_t spd_cas_latency;
2929     int spd_mtb_dividend;
2930     int spd_mtb_divisor;
2931     int spd_tck_min;
2932     int spd_taa_min;
2933     int spd_twr;
2934     int spd_trcd;
2935     int spd_trrd;
2936     int spd_trp;
2937     int spd_tras;
2938     int spd_trc;
2939     int spd_trfc;
2940     int spd_twtr;
2941     int spd_trtp;
2942     int spd_tfaw;
2943     int spd_addr_mirror;
2944     int spd_package = 0;
2945     int spd_rawcard = 0;
2946     int spd_rawcard_AorB = 0;
2947     int is_stacked_die = 0;
2948     int disable_stacked_die = 0;
2949     int is_3ds_dimm = 0; // 3DS
2950     int lranks_per_prank = 1; // 3DS: logical ranks per package rank
2951     int lranks_bits = 0; // 3DS: logical ranks bits
2952     int die_capacity = 0; // in Mbits; only used for 3DS
2953 
2954     /* FTB values are two's complement ranging from +127 to -128. */
2955     typedef signed char SC_t;
2956 
2957     int twr;
2958     int trcd;
2959     int trrd;
2960     int trp;
2961     int tras;
2962     int trc;
2963     int trfc;
2964     int twtr;
2965     int trtp = 0;  /* quiet */
2966     int tfaw;
2967 
2968     int wlevel_bitmask_errors = 0;
2969     int wlevel_loops;
2970     int default_rtt_nom[4];
2971     int dyn_rtt_nom_mask = 0;
2972 
2973     ddr_type_t ddr_type;
2974     int ddr4_tCKAVGmin = 0; /* quiet */
2975     int ddr4_tCKAVGmax = 0; /* quiet */
2976     int ddr4_tRCDmin = 0; /* quiet */
2977     int ddr4_tRPmin = 0; /* quiet */
2978     int ddr4_tRASmin = 0; /* quiet */
2979     int ddr4_tRCmin = 0; /* quiet */
2980     int ddr4_tRFC1min = 0; /* quiet */
2981     int ddr4_tRFC2min = 0; /* quiet */
2982     int ddr4_tRFC4min = 0; /* quiet */
2983     int ddr4_tFAWmin = 0; /* quiet */
2984     int ddr4_tRRD_Smin = 0; /* quiet */
2985     int ddr4_tRRD_Lmin;
2986     int ddr4_tCCD_Lmin;
2987     impedence_values_t *imp_values;
2988     int default_rodt_ctl;
2989     // default to disabled (ie, LMC restart, not chip reset)
2990     int ddr_disable_chip_reset = 1;
2991     int disable_deskew_training = 0;
2992     const char *dimm_type_name;
2993 
2994     /* Allow the Write bit-deskew feature to be enabled when desired. */
2995     // NOTE: THUNDER pass 2.x only, 81xx, 83xx
2996     int enable_write_deskew = ENABLE_WRITE_DESKEW_DEFAULT;
2997 
2998 #if SWL_TRY_HWL_ALT
2999     typedef struct {
3000         uint16_t hwl_alt_mask; // mask of bytelanes with alternate
3001         uint16_t  hwl_alt_delay[9]; // bytelane alternate avail if mask=1
3002     } hwl_alt_by_rank_t;
3003     hwl_alt_by_rank_t hwl_alts[4];
3004     memset(hwl_alts, 0, sizeof(hwl_alts));
3005 #endif /* SWL_TRY_HWL_ALT */
3006 
3007     bdk_lmcx_config_t  lmc_config;
3008 
3009     /* Initialize these to shut up the compiler. They are configured
3010        and used only for DDR4  */
3011     ddr4_tRRD_Lmin = 6000;
3012     ddr4_tCCD_Lmin = 6000;
3013 
3014     ddr_print("\nInitializing node %d DDR interface %d, DDR Clock %d, DDR Reference Clock %d\n",
3015               node, ddr_interface_num, ddr_hertz, ddr_ref_hertz);
3016 
3017     if (dimm_config_table[0].spd_addr == 0 && !dimm_config_table[0].spd_ptr) {
3018         error_print("ERROR: No dimms specified in the dimm_config_table.\n");
3019         return (-1);
3020     }
3021 
3022     // allow some overrides to be done
3023 
3024     // this one controls whether chip RESET is done, or LMC init restarted from step 6.9.6
3025     if ((s = lookup_env_parameter("ddr_disable_chip_reset")) != NULL) {
3026         ddr_disable_chip_reset = !!strtoul(s, NULL, 0);
3027     }
3028     // this one controls whether Deskew Training is performed
3029     if ((s = lookup_env_parameter("ddr_disable_deskew_training")) != NULL) {
3030         disable_deskew_training = !!strtoul(s, NULL, 0);
3031     }
3032     // this one is in Validate_Read_Deskew_Training and controls a preliminary delay
3033     if ((s = lookup_env_parameter("ddr_deskew_validation_delay")) != NULL) {
3034         deskew_validation_delay = strtoul(s, NULL, 0);
3035     }
3036     // this one is in Perform_Read_Deskew_Training and controls lock retries
3037     if ((s = lookup_env_parameter("ddr_lock_retries")) != NULL) {
3038         default_lock_retry_limit = strtoul(s, NULL, 0);
3039     }
3040     // this one controls whether stacked die status can affect processing
3041     // disabling it will affect computed vref adjustment, and rodt_row_skip_mask
3042     if ((s = lookup_env_parameter("ddr_disable_stacked_die")) != NULL) {
3043         disable_stacked_die = !!strtoul(s, NULL, 0);
3044     }
3045 
3046     // setup/override for write bit-deskew feature
3047     if (! CAVIUM_IS_MODEL(CAVIUM_CN88XX_PASS1_X)) { // added 81xx and 83xx
3048         // FIXME: allow override
3049         if ((s = lookup_env_parameter("ddr_enable_write_deskew")) != NULL) {
3050             enable_write_deskew = !!strtoul(s, NULL, 0);
3051         } // else take default setting
3052     } else { // not pass 2.x
3053         enable_write_deskew = 0; // force disabled
3054     }
3055 
3056 #if 0 // FIXME: do we really need this anymore?
3057     if (dram_is_verbose(VBL_NORM)) {
3058         printf("DDR SPD Table:");
3059         for (didx = 0; didx < DDR_CFG_T_MAX_DIMMS; ++didx) {
3060             if (dimm_config_table[didx].spd_addr == 0) break;
3061             printf(" --ddr%dspd=0x%02x", ddr_interface_num, dimm_config_table[didx].spd_addr);
3062         }
3063         printf("\n");
3064     }
3065 #endif
3066 
3067     /*
3068     ** Walk the DRAM Socket Configuration Table to see what is installed.
3069     */
3070     for (didx = 0; didx < DDR_CFG_T_MAX_DIMMS; ++didx)
3071     {
3072         /* Check for lower DIMM socket populated */
3073         if (validate_dimm(node, &dimm_config_table[didx]) == 1) {
3074             // NOTE: DIMM info printing is now done later when more details are available
3075             ++dimm_count;
3076         } else { break; }       /* Finished when there is no lower DIMM */
3077     }
3078 
3079 
3080     initialize_ddr_clock(node,
3081                          ddr_configuration,
3082                          cpu_hertz,
3083                          ddr_hertz,
3084                          ddr_ref_hertz,
3085                          ddr_interface_num,
3086                          ddr_interface_mask);
3087 
3088     if (!odt_1rank_config)
3089         odt_1rank_config = disable_odt_config;
3090     if (!odt_2rank_config)
3091         odt_2rank_config = disable_odt_config;
3092     if (!odt_4rank_config)
3093         odt_4rank_config = disable_odt_config;
3094 
3095     if ((s = lookup_env_parameter("ddr_safe")) != NULL) {
3096         safe_ddr_flag = !!strtoul(s, NULL, 0);
3097     }
3098 
3099 
3100     if (dimm_count == 0) {
3101         error_print("ERROR: DIMM 0 not detected.\n");
3102         return(-1);
3103     }
3104 
3105     // look for 32-bit mode specified in the config
3106     if (custom_lmc_config->mode32b) {
3107         ddr_interface_64b = 0;
3108     }
3109 
3110     if (ddr_interface_64b == 0) { // check if 32-bit mode is bad
3111         if (!CAVIUM_IS_MODEL(CAVIUM_CN81XX)) {
3112             error_print("32-bit interface width is NOT supported for this Thunder model\n");
3113             ddr_interface_64b = 1; // force to 64-bit
3114         }
3115     } else { // check if 64-bit mode is bad
3116         if (CAVIUM_IS_MODEL(CAVIUM_CN81XX)) { // check the fuses on 81XX for forced 32-bit mode
3117             BDK_CSR_INIT(mio_fus_dat2, node, BDK_MIO_FUS_DAT2);
3118             if (mio_fus_dat2.s.lmc_mode32) {
3119                 error_print("32-bit interface width is ONLY supported for this Thunder model\n");
3120                 ddr_interface_64b = 0; // force to 32-bit
3121             }
3122         }
3123     }
3124 
3125     // finally, say we are in 32-bit mode when it has been validated
3126     if (ddr_interface_64b == 0) {
3127         ddr_print("N%d.LMC%d: Setting 32-bit data width\n",
3128                   node, ddr_interface_num);
3129     }
3130 
3131     /* ddr_type only indicates DDR4 or DDR3 */
3132     ddr_type = get_ddr_type(node, &dimm_config_table[0]);
3133     debug_print("DRAM Device Type: DDR%d\n", ddr_type);
3134 
3135     spd_dimm_type = get_dimm_module_type(node, &dimm_config_table[0], ddr_type);
3136 
3137     if (ddr_type == DDR4_DRAM) {
3138         int spd_module_type;
3139         int asymmetric;
3140         const char *signal_load[4] = {"", "MLS", "3DS", "RSV"};
3141 
3142         imp_values = &ddr4_impedence_values;
3143         dimm_type_name = ddr4_dimm_types[spd_dimm_type];
3144 
3145         spd_addr =  read_spd(node, &dimm_config_table[0], DDR4_SPD_ADDRESSING_ROW_COL_BITS);
3146         spd_org  =  read_spd(node, &dimm_config_table[0], DDR4_SPD_MODULE_ORGANIZATION);
3147         spd_banks = 0xFF & read_spd(node, &dimm_config_table[0], DDR4_SPD_DENSITY_BANKS);
3148 
3149         bank_bits = (2 + ((spd_banks >> 4) & 0x3)) + ((spd_banks >> 6) & 0x3);
3150         bank_bits = min((int)bank_bits, 4); /* Controller can only address 4 bits. */
3151 
3152         spd_package = 0xFF & read_spd(node, &dimm_config_table[0], DDR4_SPD_PACKAGE_TYPE);
3153         if (spd_package & 0x80) { // non-monolithic device
3154             is_stacked_die = (!disable_stacked_die) ? ((spd_package & 0x73) == 0x11) : 0;
3155             ddr_print("DDR4: Package Type 0x%x (%s), %d die\n", spd_package,
3156                       signal_load[(spd_package & 3)], ((spd_package >> 4) & 7) + 1);
3157             is_3ds_dimm = ((spd_package & 3) == 2); // is it 3DS?
3158             if (is_3ds_dimm) { // is it 3DS?
3159                 lranks_per_prank = ((spd_package >> 4) & 7) + 1;
3160                 // FIXME: should make sure it is only 2H or 4H or 8H?
3161                 lranks_bits = lranks_per_prank >> 1;
3162                 if (lranks_bits == 4) lranks_bits = 3;
3163             }
3164         } else if (spd_package != 0) {
3165             // FIXME: print non-zero monolithic device definition
3166             ddr_print("DDR4: Package Type MONOLITHIC: %d die, signal load %d\n",
3167                       ((spd_package >> 4) & 7) + 1, (spd_package & 3));
3168         }
3169 
3170         asymmetric = (spd_org >> 6) & 1;
3171         if (asymmetric) {
3172             int spd_secondary_pkg = read_spd(node, &dimm_config_table[0],
3173                                              DDR4_SPD_SECONDARY_PACKAGE_TYPE);
3174             ddr_print("DDR4: Module Organization: ASYMMETRICAL: Secondary Package Type 0x%x\n",
3175                       spd_secondary_pkg);
3176         } else {
3177             uint64_t bus_width = 8 << (0x07 & read_spd(node, &dimm_config_table[0],
3178                                                   DDR4_SPD_MODULE_MEMORY_BUS_WIDTH));
3179             uint64_t ddr_width = 4 << ((spd_org >> 0) & 0x7);
3180             uint64_t module_cap;
3181             int shift = (spd_banks & 0x0F);
3182             die_capacity = (shift < 8) ? (256UL << shift) : ((12UL << (shift & 1)) << 10);
3183             ddr_print("DDR4: Module Organization: SYMMETRICAL: capacity per die %d %cbit\n",
3184                       (die_capacity > 512) ? (die_capacity >> 10) : die_capacity,
3185                       (die_capacity > 512) ? 'G' : 'M');
3186             module_cap = ((uint64_t)die_capacity << 20) / 8UL * bus_width / ddr_width *
3187                 /* no. pkg ranks*/(1UL + ((spd_org >> 3) & 0x7));
3188             if (is_3ds_dimm) // is it 3DS?
3189                 module_cap *= /* die_count */(uint64_t)(((spd_package >> 4) & 7) + 1);
3190             ddr_print("DDR4: Module Organization: SYMMETRICAL: capacity per module %lld GB\n",
3191                       module_cap >> 30);
3192         }
3193 
3194         spd_rawcard = 0xFF & read_spd(node, &dimm_config_table[0], DDR4_SPD_REFERENCE_RAW_CARD);
3195         ddr_print("DDR4: Reference Raw Card 0x%x \n", spd_rawcard);
3196 
3197         spd_module_type = read_spd(node, &dimm_config_table[0], DDR4_SPD_KEY_BYTE_MODULE_TYPE);
3198         if (spd_module_type & 0x80) { // HYBRID module
3199             ddr_print("DDR4: HYBRID module, type %s\n",
3200                       ((spd_module_type & 0x70) == 0x10) ? "NVDIMM" : "UNKNOWN");
3201         }
3202 
3203         spd_dimm_type   = spd_module_type & 0x0F;
3204         spd_rdimm = (spd_dimm_type == 1) || (spd_dimm_type == 5) || (spd_dimm_type == 8);
3205         if (spd_rdimm) {
3206             int spd_mfgr_id = read_spd(node, &dimm_config_table[0], DDR4_SPD_REGISTER_MANUFACTURER_ID_LSB) |
3207                 (read_spd(node, &dimm_config_table[0], DDR4_SPD_REGISTER_MANUFACTURER_ID_MSB) << 8);
3208             int spd_register_rev = read_spd(node, &dimm_config_table[0], DDR4_SPD_REGISTER_REVISION_NUMBER);
3209             ddr_print("DDR4: RDIMM Register Manufacturer ID 0x%x Revision 0x%x\n",
3210                       spd_mfgr_id, spd_register_rev);
3211 
3212             // RAWCARD A or B must be bit 7=0 and bits 4-0 either 00000(A) or 00001(B)
3213             spd_rawcard_AorB = ((spd_rawcard & 0x9fUL) <= 1);
3214         }
3215     } else {
3216         imp_values = &ddr3_impedence_values;
3217         dimm_type_name = ddr3_dimm_types[spd_dimm_type];
3218 
3219         spd_addr = read_spd(node, &dimm_config_table[0], DDR3_SPD_ADDRESSING_ROW_COL_BITS);
3220         spd_org = read_spd(node, &dimm_config_table[0], DDR3_SPD_MODULE_ORGANIZATION);
3221         spd_banks = read_spd(node, &dimm_config_table[0], DDR3_SPD_DENSITY_BANKS) & 0xff;
3222 
3223         bank_bits = 3 + ((spd_banks >> 4) & 0x7);
3224         bank_bits = min((int)bank_bits, 3); /* Controller can only address 3 bits. */
3225 
3226         spd_rdimm       = (spd_dimm_type == 1) || (spd_dimm_type == 5) || (spd_dimm_type == 9);
3227     }
3228 
3229 #if 0 // FIXME: why should this be possible OR needed?
3230     if ((s = lookup_env_parameter("ddr_rdimm_ena")) != NULL) {
3231         spd_rdimm = !!strtoul(s, NULL, 0);
3232     }
3233 #endif
3234 
3235     debug_print("spd_addr        : %#06x\n", spd_addr );
3236     debug_print("spd_org         : %#06x\n", spd_org );
3237     debug_print("spd_banks       : %#06x\n", spd_banks );
3238 
3239     row_bits = 12 + ((spd_addr >> 3) & 0x7);
3240     col_bits =  9 + ((spd_addr >> 0) & 0x7);
3241 
3242     num_ranks =  1 + ((spd_org >> 3) & 0x7);
3243     dram_width = 4 << ((spd_org >> 0) & 0x7);
3244     num_banks = 1 << bank_bits;
3245 
3246     if ((s = lookup_env_parameter("ddr_num_ranks")) != NULL) {
3247         num_ranks = strtoul(s, NULL, 0);
3248     }
3249 
3250     /* FIX
3251     ** Check that values are within some theoretical limits.
3252     ** col_bits(min) = row_lsb(min) - bank_bits(max) - bus_bits(max) = 14 - 3 - 4 = 7
3253     ** col_bits(max) = row_lsb(max) - bank_bits(min) - bus_bits(min) = 18 - 2 - 3 = 13
3254     */
3255     if ((col_bits > 13) || (col_bits < 7)) {
3256         error_print("Unsupported number of Col Bits: %d\n", col_bits);
3257         ++fatal_error;
3258     }
3259 
3260     /* FIX
3261     ** Check that values are within some theoretical limits.
3262     ** row_bits(min) = pbank_lsb(min) - row_lsb(max) - rank_bits = 26 - 18 - 1 = 7
3263     ** row_bits(max) = pbank_lsb(max) - row_lsb(min) - rank_bits = 33 - 14 - 1 = 18
3264     */
3265     if ((row_bits > 18) || (row_bits < 7)) {
3266         error_print("Unsupported number of Row Bits: %d\n", row_bits);
3267         ++fatal_error;
3268     }
3269 
3270         wlevel_loops = WLEVEL_LOOPS_DEFAULT;
3271         // accept generic or interface-specific override but not for ASIM...
3272         if ((s = lookup_env_parameter("ddr_wlevel_loops")) == NULL)
3273             s = lookup_env_parameter("ddr%d_wlevel_loops", ddr_interface_num);
3274         if (s != NULL) {
3275             wlevel_loops = strtoul(s, NULL, 0);
3276         }
3277 
3278     bunk_enable = (num_ranks > 1);
3279 
3280     column_bits_start = 3;
3281 
3282     row_lsb = column_bits_start + col_bits + bank_bits - (! ddr_interface_64b);
3283     debug_print("row_lsb = column_bits_start + col_bits + bank_bits = %d\n", row_lsb);
3284 
3285     pbank_lsb = row_lsb + row_bits + bunk_enable;
3286     debug_print("pbank_lsb = row_lsb + row_bits + bunk_enable = %d\n", pbank_lsb);
3287 
3288     if (lranks_per_prank > 1) {
3289         pbank_lsb = row_lsb + row_bits + lranks_bits + bunk_enable;
3290         ddr_print("DDR4: 3DS: pbank_lsb = (%d row_lsb) + (%d row_bits) + (%d lranks_bits) + (%d bunk_enable) = %d\n",
3291                   row_lsb, row_bits, lranks_bits, bunk_enable, pbank_lsb);
3292     }
3293 
3294     mem_size_mbytes =  dimm_count * ((1ull << pbank_lsb) >> 20);
3295     if (num_ranks == 4) {
3296         /* Quad rank dimm capacity is equivalent to two dual-rank dimms. */
3297         mem_size_mbytes *= 2;
3298     }
3299 
3300     /* Mask with 1 bits set for each active rank, allowing 2 bits per dimm.
3301     ** This makes later calculations simpler, as a variety of CSRs use this layout.
3302     ** This init needs to be updated for dual configs (ie non-identical DIMMs).
3303     ** Bit 0 = dimm0, rank 0
3304     ** Bit 1 = dimm0, rank 1
3305     ** Bit 2 = dimm1, rank 0
3306     ** Bit 3 = dimm1, rank 1
3307     ** ...
3308     */
3309     rank_mask = 0x1;
3310     if (num_ranks > 1)
3311         rank_mask = 0x3;
3312     if (num_ranks > 2)
3313         rank_mask = 0xf;
3314 
3315     for (i = 1; i < dimm_count; i++)
3316         rank_mask |= ((rank_mask & 0x3) << (2*i));
3317 
3318 
3319 #ifdef CAVIUM_ONLY
3320     /* Special request: mismatched DIMM support. Slot 0: 2-Rank, Slot 1: 1-Rank */
3321     if (0)
3322     {
3323         /*
3324         ** Calculate the total memory size in terms of the total
3325         ** number of ranks instead of the number of dimms.  The usual
3326         ** requirement is for both dimms to be identical.  This check
3327         ** works around that requirement to allow one exception.  The
3328         ** dimm in the second slot may now have fewer ranks than the
3329         ** first slot.
3330         */
3331         int spd_org_dimm1;
3332         int num_ranks_dimm1;
3333         int rank_count;
3334         int rank_mask_dimm1;
3335 
3336         if (dimm_count > 1) {
3337             spd_org_dimm1 = read_spd(node, &dimm_config_table[1] /* dimm 1*/,
3338                                      DDR3_SPD_MODULE_ORGANIZATION);
3339             num_ranks_dimm1 = 1 + ((spd_org_dimm1 >> 3) & 0x7);
3340             rank_count =  num_ranks/* dimm 0 */ + num_ranks_dimm1 /* dimm 1 */;
3341 
3342             if (num_ranks != num_ranks_dimm1) {
3343                 mem_size_mbytes =  rank_count * ((1ull << (pbank_lsb-bunk_enable)) >> 20);
3344                 rank_mask = 1 | ((num_ranks > 1) << 1);
3345                 rank_mask_dimm1 = 1 | ((num_ranks_dimm1 > 1) << 1);
3346                 rank_mask |= ((rank_mask_dimm1 & 0x3) << 2);
3347                 ddr_print("DIMM 1 - ranks: %d, size: %d MB\n",
3348                           num_ranks_dimm1, num_ranks_dimm1 * ((1ull << (pbank_lsb-bunk_enable)) >> 20));
3349             }
3350         }
3351     }
3352 #endif  /* CAVIUM_ONLY */
3353 
3354     spd_ecc = get_dimm_ecc(node, &dimm_config_table[0], ddr_type);
3355 
3356     VB_PRT(VBL_DEV, "Summary: - %d %s%s %dRx%d %s, row bits=%d, col bits=%d, bank bits=%d\n",
3357               dimm_count, dimm_type_name, (dimm_count > 1) ? "s" : "",
3358               num_ranks, dram_width, (spd_ecc) ? "ECC" : "non-ECC",
3359               row_bits, col_bits, bank_bits);
3360 
3361     // always print out the useful DIMM information...
3362     for (i = 0; i < DDR_CFG_T_MAX_DIMMS; i++) {
3363         if (i < dimm_count)
3364             report_dimm(node, &dimm_config_table[i], i, ddr_interface_num,
3365                         num_ranks, dram_width, mem_size_mbytes / dimm_count);
3366         else
3367             if (validate_dimm(node, &dimm_config_table[i]) == 0) // only if there is a slot
3368                 printf("N%d.LMC%d.DIMM%d: Not Present\n", node, ddr_interface_num, i);
3369     }
3370 
3371     if (ddr_type == DDR4_DRAM) {
3372         spd_cas_latency  = ((0xff & read_spd(node, &dimm_config_table[0], DDR4_SPD_CAS_LATENCIES_BYTE0)) <<  0);
3373         spd_cas_latency |= ((0xff & read_spd(node, &dimm_config_table[0], DDR4_SPD_CAS_LATENCIES_BYTE1)) <<  8);
3374         spd_cas_latency |= ((0xff & read_spd(node, &dimm_config_table[0], DDR4_SPD_CAS_LATENCIES_BYTE2)) << 16);
3375         spd_cas_latency |= ((0xff & read_spd(node, &dimm_config_table[0], DDR4_SPD_CAS_LATENCIES_BYTE3)) << 24);
3376     } else {
3377         spd_cas_latency  = 0xff & read_spd(node, &dimm_config_table[0], DDR3_SPD_CAS_LATENCIES_LSB);
3378         spd_cas_latency |= ((0xff & read_spd(node, &dimm_config_table[0], DDR3_SPD_CAS_LATENCIES_MSB)) << 8);
3379     }
3380     debug_print("spd_cas_latency : %#06x\n", spd_cas_latency );
3381 
3382     if (ddr_type == DDR4_DRAM) {
3383 
3384         /* No other values for DDR4 MTB and FTB are specified at the
3385          * current time so don't bother reading them. Can't speculate how
3386          * new values will be represented.
3387          */
3388         int spdMTB = 125;
3389         int spdFTB = 1;
3390 
3391         tAAmin
3392           = spdMTB *        read_spd(node, &dimm_config_table[0], DDR4_SPD_MIN_CAS_LATENCY_TAAMIN)
3393           + spdFTB * (SC_t) read_spd(node, &dimm_config_table[0], DDR4_SPD_MIN_CAS_LATENCY_FINE_TAAMIN);
3394 
3395         ddr4_tCKAVGmin
3396           = spdMTB *        read_spd(node, &dimm_config_table[0], DDR4_SPD_MINIMUM_CYCLE_TIME_TCKAVGMIN)
3397           + spdFTB * (SC_t) read_spd(node, &dimm_config_table[0], DDR4_SPD_MIN_CYCLE_TIME_FINE_TCKAVGMIN);
3398 
3399         ddr4_tCKAVGmax
3400           = spdMTB *        read_spd(node, &dimm_config_table[0], DDR4_SPD_MAXIMUM_CYCLE_TIME_TCKAVGMAX)
3401           + spdFTB * (SC_t) read_spd(node, &dimm_config_table[0], DDR4_SPD_MAX_CYCLE_TIME_FINE_TCKAVGMAX);
3402 
3403         ddr4_tRCDmin
3404           = spdMTB *        read_spd(node, &dimm_config_table[0], DDR4_SPD_MIN_RAS_CAS_DELAY_TRCDMIN)
3405           + spdFTB * (SC_t) read_spd(node, &dimm_config_table[0], DDR4_SPD_MIN_RAS_TO_CAS_DELAY_FINE_TRCDMIN);
3406 
3407         ddr4_tRPmin
3408           = spdMTB *        read_spd(node, &dimm_config_table[0], DDR4_SPD_MIN_ROW_PRECHARGE_DELAY_TRPMIN)
3409           + spdFTB * (SC_t) read_spd(node, &dimm_config_table[0], DDR4_SPD_MIN_ROW_PRECHARGE_DELAY_FINE_TRPMIN);
3410 
3411         ddr4_tRASmin
3412           = spdMTB * (((read_spd(node, &dimm_config_table[0], DDR4_SPD_UPPER_NIBBLES_TRAS_TRC) & 0xf) << 8) +
3413                       ( read_spd(node, &dimm_config_table[0], DDR4_SPD_MIN_ACTIVE_PRECHARGE_LSB_TRASMIN) & 0xff));
3414 
3415         ddr4_tRCmin
3416           = spdMTB * ((((read_spd(node, &dimm_config_table[0], DDR4_SPD_UPPER_NIBBLES_TRAS_TRC) >> 4) & 0xf) << 8) +
3417                       (  read_spd(node, &dimm_config_table[0], DDR4_SPD_MIN_ACTIVE_REFRESH_LSB_TRCMIN) & 0xff))
3418           + spdFTB * (SC_t) read_spd(node, &dimm_config_table[0], DDR4_SPD_MIN_ACT_TO_ACT_REFRESH_DELAY_FINE_TRCMIN);
3419 
3420         ddr4_tRFC1min
3421           = spdMTB * (((read_spd(node, &dimm_config_table[0], DDR4_SPD_MIN_REFRESH_RECOVERY_MSB_TRFC1MIN) & 0xff) << 8) +
3422                       ( read_spd(node, &dimm_config_table[0], DDR4_SPD_MIN_REFRESH_RECOVERY_LSB_TRFC1MIN) & 0xff));
3423 
3424         ddr4_tRFC2min
3425             = spdMTB * (((read_spd(node, &dimm_config_table[0], DDR4_SPD_MIN_REFRESH_RECOVERY_MSB_TRFC2MIN) & 0xff) << 8) +
3426                         ( read_spd(node, &dimm_config_table[0], DDR4_SPD_MIN_REFRESH_RECOVERY_LSB_TRFC2MIN) & 0xff));
3427 
3428         ddr4_tRFC4min
3429             = spdMTB * (((read_spd(node, &dimm_config_table[0], DDR4_SPD_MIN_REFRESH_RECOVERY_MSB_TRFC4MIN) & 0xff) << 8) +
3430                         ( read_spd(node, &dimm_config_table[0], DDR4_SPD_MIN_REFRESH_RECOVERY_LSB_TRFC4MIN) & 0xff));
3431 
3432         ddr4_tFAWmin
3433             = spdMTB * (((read_spd(node, &dimm_config_table[0], DDR4_SPD_MIN_FOUR_ACTIVE_WINDOW_MSN_TFAWMIN) & 0xf) << 8) +
3434                         ( read_spd(node, &dimm_config_table[0], DDR4_SPD_MIN_FOUR_ACTIVE_WINDOW_LSB_TFAWMIN) & 0xff));
3435 
3436         ddr4_tRRD_Smin
3437             = spdMTB *        read_spd(node, &dimm_config_table[0], DDR4_SPD_MIN_ROW_ACTIVE_DELAY_SAME_TRRD_SMIN)
3438             + spdFTB * (SC_t) read_spd(node, &dimm_config_table[0], DDR4_SPD_MIN_ACT_TO_ACT_DELAY_DIFF_FINE_TRRD_SMIN);
3439 
3440         ddr4_tRRD_Lmin
3441             = spdMTB *        read_spd(node, &dimm_config_table[0], DDR4_SPD_MIN_ROW_ACTIVE_DELAY_DIFF_TRRD_LMIN)
3442             + spdFTB * (SC_t) read_spd(node, &dimm_config_table[0], DDR4_SPD_MIN_ACT_TO_ACT_DELAY_SAME_FINE_TRRD_LMIN);
3443 
3444         ddr4_tCCD_Lmin
3445             = spdMTB *        read_spd(node, &dimm_config_table[0], DDR4_SPD_MIN_CAS_TO_CAS_DELAY_TCCD_LMIN)
3446             + spdFTB * (SC_t) read_spd(node, &dimm_config_table[0], DDR4_SPD_MIN_CAS_TO_CAS_DELAY_FINE_TCCD_LMIN);
3447 
3448         ddr_print("%-45s : %6d ps\n", "Medium Timebase (MTB)",                             spdMTB);
3449         ddr_print("%-45s : %6d ps\n", "Fine Timebase   (FTB)",                             spdFTB);
3450 
3451     #define DDR4_TWR 15000
3452     #define DDR4_TWTR_S 2500
3453 
3454 
3455         tCKmin                = ddr4_tCKAVGmin;
3456         twr             = DDR4_TWR;
3457         trcd            = ddr4_tRCDmin;
3458         trrd            = ddr4_tRRD_Smin;
3459         trp             = ddr4_tRPmin;
3460         tras            = ddr4_tRASmin;
3461         trc             = ddr4_tRCmin;
3462         trfc            = ddr4_tRFC1min;
3463         twtr            = DDR4_TWTR_S;
3464         tfaw            = ddr4_tFAWmin;
3465 
3466         if (spd_rdimm) {
3467             spd_addr_mirror = read_spd(node, &dimm_config_table[0], DDR4_SPD_RDIMM_ADDR_MAPPING_FROM_REGISTER_TO_DRAM) & 0x1;
3468         } else {
3469             spd_addr_mirror = read_spd(node, &dimm_config_table[0], DDR4_SPD_UDIMM_ADDR_MAPPING_FROM_EDGE) & 0x1;
3470         }
3471         debug_print("spd_addr_mirror : %#06x\n", spd_addr_mirror );
3472 
3473     } else { /* if (ddr_type == DDR4_DRAM) */
3474         spd_mtb_dividend = 0xff & read_spd(node, &dimm_config_table[0], DDR3_SPD_MEDIUM_TIMEBASE_DIVIDEND);
3475         spd_mtb_divisor  = 0xff & read_spd(node, &dimm_config_table[0], DDR3_SPD_MEDIUM_TIMEBASE_DIVISOR);
3476         spd_tck_min      = 0xff & read_spd(node, &dimm_config_table[0], DDR3_SPD_MINIMUM_CYCLE_TIME_TCKMIN);
3477         spd_taa_min      = 0xff & read_spd(node, &dimm_config_table[0], DDR3_SPD_MIN_CAS_LATENCY_TAAMIN);
3478 
3479         spd_twr          = 0xff & read_spd(node, &dimm_config_table[0], DDR3_SPD_MIN_WRITE_RECOVERY_TWRMIN);
3480         spd_trcd         = 0xff & read_spd(node, &dimm_config_table[0], DDR3_SPD_MIN_RAS_CAS_DELAY_TRCDMIN);
3481         spd_trrd         = 0xff & read_spd(node, &dimm_config_table[0], DDR3_SPD_MIN_ROW_ACTIVE_DELAY_TRRDMIN);
3482         spd_trp          = 0xff & read_spd(node, &dimm_config_table[0], DDR3_SPD_MIN_ROW_PRECHARGE_DELAY_TRPMIN);
3483         spd_tras         = 0xff & read_spd(node, &dimm_config_table[0], DDR3_SPD_MIN_ACTIVE_PRECHARGE_LSB_TRASMIN);
3484         spd_tras        |= ((0xff & read_spd(node, &dimm_config_table[0], DDR3_SPD_UPPER_NIBBLES_TRAS_TRC)&0xf) << 8);
3485         spd_trc          = 0xff & read_spd(node, &dimm_config_table[0], DDR3_SPD_MIN_ACTIVE_REFRESH_LSB_TRCMIN);
3486         spd_trc         |= ((0xff & read_spd(node, &dimm_config_table[0], DDR3_SPD_UPPER_NIBBLES_TRAS_TRC)&0xf0) << 4);
3487         spd_trfc         = 0xff & read_spd(node, &dimm_config_table[0], DDR3_SPD_MIN_REFRESH_RECOVERY_LSB_TRFCMIN);
3488         spd_trfc        |= ((0xff & read_spd(node, &dimm_config_table[0], DDR3_SPD_MIN_REFRESH_RECOVERY_MSB_TRFCMIN)) << 8);
3489         spd_twtr         = 0xff & read_spd(node, &dimm_config_table[0], DDR3_SPD_MIN_INTERNAL_WRITE_READ_CMD_TWTRMIN);
3490         spd_trtp         = 0xff & read_spd(node, &dimm_config_table[0], DDR3_SPD_MIN_INTERNAL_READ_PRECHARGE_CMD_TRTPMIN);
3491         spd_tfaw         = 0xff & read_spd(node, &dimm_config_table[0], DDR3_SPD_MIN_FOUR_ACTIVE_WINDOW_TFAWMIN);
3492         spd_tfaw        |= ((0xff & read_spd(node, &dimm_config_table[0], DDR3_SPD_UPPER_NIBBLE_TFAW)&0xf) << 8);
3493         spd_addr_mirror  = 0xff & read_spd(node, &dimm_config_table[0], DDR3_SPD_ADDRESS_MAPPING) & 0x1;
3494         spd_addr_mirror  = spd_addr_mirror && !spd_rdimm; /* Only address mirror unbuffered dimms.  */
3495         ftb_Dividend     = read_spd(node, &dimm_config_table[0], DDR3_SPD_FINE_TIMEBASE_DIVIDEND_DIVISOR) >> 4;
3496         ftb_Divisor      = read_spd(node, &dimm_config_table[0], DDR3_SPD_FINE_TIMEBASE_DIVIDEND_DIVISOR) & 0xf;
3497         ftb_Divisor      = (ftb_Divisor == 0) ? 1 : ftb_Divisor; /* Make sure that it is not 0 */
3498 
3499         debug_print("spd_twr         : %#06x\n", spd_twr );
3500         debug_print("spd_trcd        : %#06x\n", spd_trcd);
3501         debug_print("spd_trrd        : %#06x\n", spd_trrd);
3502         debug_print("spd_trp         : %#06x\n", spd_trp );
3503         debug_print("spd_tras        : %#06x\n", spd_tras);
3504         debug_print("spd_trc         : %#06x\n", spd_trc );
3505         debug_print("spd_trfc        : %#06x\n", spd_trfc);
3506         debug_print("spd_twtr        : %#06x\n", spd_twtr);
3507         debug_print("spd_trtp        : %#06x\n", spd_trtp);
3508         debug_print("spd_tfaw        : %#06x\n", spd_tfaw);
3509         debug_print("spd_addr_mirror : %#06x\n", spd_addr_mirror);
3510 
3511         mtb_psec        = spd_mtb_dividend * 1000 / spd_mtb_divisor;
3512         tAAmin          = mtb_psec * spd_taa_min;
3513         tAAmin         += ftb_Dividend * (SC_t) read_spd(node, &dimm_config_table[0], DDR3_SPD_MIN_CAS_LATENCY_FINE_TAAMIN) / ftb_Divisor;
3514         tCKmin          = mtb_psec * spd_tck_min;
3515         tCKmin         += ftb_Dividend * (SC_t) read_spd(node, &dimm_config_table[0], DDR3_SPD_MINIMUM_CYCLE_TIME_FINE_TCKMIN) / ftb_Divisor;
3516 
3517         twr             = spd_twr  * mtb_psec;
3518         trcd            = spd_trcd * mtb_psec;
3519         trrd            = spd_trrd * mtb_psec;
3520         trp             = spd_trp  * mtb_psec;
3521         tras            = spd_tras * mtb_psec;
3522         trc             = spd_trc  * mtb_psec;
3523         trfc            = spd_trfc * mtb_psec;
3524         twtr            = spd_twtr * mtb_psec;
3525         trtp            = spd_trtp * mtb_psec;
3526         tfaw            = spd_tfaw * mtb_psec;
3527 
3528     } /* if (ddr_type == DDR4_DRAM) */
3529 
3530     if (ddr_type == DDR4_DRAM) {
3531         ddr_print("%-45s : %6d ps (%ld MT/s)\n", "SDRAM Minimum Cycle Time (tCKAVGmin)",ddr4_tCKAVGmin,
3532                   pretty_psecs_to_mts(ddr4_tCKAVGmin));
3533         ddr_print("%-45s : %6d ps\n", "SDRAM Maximum Cycle Time (tCKAVGmax)",          ddr4_tCKAVGmax);
3534         ddr_print("%-45s : %6d ps\n", "Minimum CAS Latency Time (tAAmin)",             tAAmin);
3535         ddr_print("%-45s : %6d ps\n", "Minimum RAS to CAS Delay Time (tRCDmin)",       ddr4_tRCDmin);
3536         ddr_print("%-45s : %6d ps\n", "Minimum Row Precharge Delay Time (tRPmin)",     ddr4_tRPmin);
3537         ddr_print("%-45s : %6d ps\n", "Minimum Active to Precharge Delay (tRASmin)",   ddr4_tRASmin);
3538         ddr_print("%-45s : %6d ps\n", "Minimum Active to Active/Refr. Delay (tRCmin)", ddr4_tRCmin);
3539         ddr_print("%-45s : %6d ps\n", "Minimum Refresh Recovery Delay (tRFC1min)",     ddr4_tRFC1min);
3540         ddr_print("%-45s : %6d ps\n", "Minimum Refresh Recovery Delay (tRFC2min)",     ddr4_tRFC2min);
3541         ddr_print("%-45s : %6d ps\n", "Minimum Refresh Recovery Delay (tRFC4min)",     ddr4_tRFC4min);
3542         ddr_print("%-45s : %6d ps\n", "Minimum Four Activate Window Time (tFAWmin)",   ddr4_tFAWmin);
3543         ddr_print("%-45s : %6d ps\n", "Minimum Act. to Act. Delay (tRRD_Smin)",        ddr4_tRRD_Smin);
3544         ddr_print("%-45s : %6d ps\n", "Minimum Act. to Act. Delay (tRRD_Lmin)",        ddr4_tRRD_Lmin);
3545         ddr_print("%-45s : %6d ps\n", "Minimum CAS to CAS Delay Time (tCCD_Lmin)",     ddr4_tCCD_Lmin);
3546     } else {
3547         ddr_print("Medium Timebase (MTB)                         : %6d ps\n", mtb_psec);
3548         ddr_print("Minimum Cycle Time (tCKmin)                   : %6d ps (%ld MT/s)\n", tCKmin,
3549                   pretty_psecs_to_mts(tCKmin));
3550         ddr_print("Minimum CAS Latency Time (tAAmin)             : %6d ps\n", tAAmin);
3551         ddr_print("Write Recovery Time (tWR)                     : %6d ps\n", twr);
3552         ddr_print("Minimum RAS to CAS delay (tRCD)               : %6d ps\n", trcd);
3553         ddr_print("Minimum Row Active to Row Active delay (tRRD) : %6d ps\n", trrd);
3554         ddr_print("Minimum Row Precharge Delay (tRP)             : %6d ps\n", trp);
3555         ddr_print("Minimum Active to Precharge (tRAS)            : %6d ps\n", tras);
3556         ddr_print("Minimum Active to Active/Refresh Delay (tRC)  : %6d ps\n", trc);
3557         ddr_print("Minimum Refresh Recovery Delay (tRFC)         : %6d ps\n", trfc);
3558         ddr_print("Internal write to read command delay (tWTR)   : %6d ps\n", twtr);
3559         ddr_print("Min Internal Rd to Precharge Cmd Delay (tRTP) : %6d ps\n", trtp);
3560         ddr_print("Minimum Four Activate Window Delay (tFAW)     : %6d ps\n", tfaw);
3561     }
3562 
3563 
3564     /* When the cycle time is within 1 psec of the minimum accept it
3565        as a slight rounding error and adjust it to exactly the minimum
3566        cycle time. This avoids an unnecessary warning. */
3567     if (_abs(tclk_psecs - tCKmin) < 2)
3568         tclk_psecs = tCKmin;
3569 
3570     if (tclk_psecs < (uint64_t)tCKmin) {
3571         ddr_print("WARNING!!!!: DDR Clock Rate (tCLK: %lld) exceeds DIMM specifications (tCKmin: %lld)!!!!\n",
3572                     tclk_psecs, (uint64_t)tCKmin);
3573     }
3574 
3575 
3576     ddr_print("DDR Clock Rate (tCLK)                         : %6llu ps\n", tclk_psecs);
3577     ddr_print("Core Clock Rate (eCLK)                        : %6llu ps\n", eclk_psecs);
3578 
3579     if ((s = lookup_env_parameter("ddr_use_ecc")) != NULL) {
3580         use_ecc = !!strtoul(s, NULL, 0);
3581     }
3582     use_ecc = use_ecc && spd_ecc;
3583 
3584     ddr_interface_bytemask = ddr_interface_64b
3585         ? (use_ecc ? 0x1ff : 0xff)
3586         : (use_ecc ? 0x01f : 0x0f); // FIXME? 81xx does diff from 70xx
3587 
3588     ddr_print("DRAM Interface width: %d bits %s bytemask 0x%x\n",
3589               ddr_interface_64b ? 64 : 32, use_ecc ? "+ECC" : "",
3590               ddr_interface_bytemask);
3591 
3592     ddr_print("\n------ Board Custom Configuration Settings ------\n");
3593     ddr_print("%-45s : %d\n", "MIN_RTT_NOM_IDX   ", custom_lmc_config->min_rtt_nom_idx);
3594     ddr_print("%-45s : %d\n", "MAX_RTT_NOM_IDX   ", custom_lmc_config->max_rtt_nom_idx);
3595     ddr_print("%-45s : %d\n", "MIN_RODT_CTL      ", custom_lmc_config->min_rodt_ctl);
3596     ddr_print("%-45s : %d\n", "MAX_RODT_CTL      ", custom_lmc_config->max_rodt_ctl);
3597     ddr_print("%-45s : %d\n", "MIN_CAS_LATENCY   ", custom_lmc_config->min_cas_latency);
3598     ddr_print("%-45s : %d\n", "OFFSET_EN         ", custom_lmc_config->offset_en);
3599     ddr_print("%-45s : %d\n", "OFFSET_UDIMM      ", custom_lmc_config->offset_udimm);
3600     ddr_print("%-45s : %d\n", "OFFSET_RDIMM      ", custom_lmc_config->offset_rdimm);
3601     ddr_print("%-45s : %d\n", "DDR_RTT_NOM_AUTO  ", custom_lmc_config->ddr_rtt_nom_auto);
3602     ddr_print("%-45s : %d\n", "DDR_RODT_CTL_AUTO ", custom_lmc_config->ddr_rodt_ctl_auto);
3603     if (spd_rdimm)
3604         ddr_print("%-45s : %d\n", "RLEVEL_COMP_OFFSET", custom_lmc_config->rlevel_comp_offset_rdimm);
3605     else
3606         ddr_print("%-45s : %d\n", "RLEVEL_COMP_OFFSET", custom_lmc_config->rlevel_comp_offset_udimm);
3607     ddr_print("%-45s : %d\n", "RLEVEL_COMPUTE    ", custom_lmc_config->rlevel_compute);
3608     ddr_print("%-45s : %d\n", "DDR2T_UDIMM       ", custom_lmc_config->ddr2t_udimm);
3609     ddr_print("%-45s : %d\n", "DDR2T_RDIMM       ", custom_lmc_config->ddr2t_rdimm);
3610     ddr_print("%-45s : %d\n", "FPRCH2            ", custom_lmc_config->fprch2);
3611     ddr_print("-------------------------------------------------\n");
3612 
3613 
3614     CL              = divide_roundup(tAAmin, tclk_psecs);
3615 
3616     ddr_print("Desired CAS Latency                           : %6d\n", CL);
3617 
3618     min_cas_latency = custom_lmc_config->min_cas_latency;
3619 
3620 
3621     if ((s = lookup_env_parameter("ddr_min_cas_latency")) != NULL) {
3622         min_cas_latency = strtoul(s, NULL, 0);
3623     }
3624 
3625     {
3626         int base_CL;
3627         ddr_print("CAS Latencies supported in DIMM               :");
3628         base_CL = (ddr_type == DDR4_DRAM) ? 7 : 4;
3629         for (i=0; i<32; ++i) {
3630             if ((spd_cas_latency >> i) & 1) {
3631                 ddr_print(" %d", i+base_CL);
3632                 max_cas_latency = i+base_CL;
3633                 if (min_cas_latency == 0)
3634                     min_cas_latency = i+base_CL;
3635             }
3636         }
3637         ddr_print("\n");
3638 
3639         /* Use relaxed timing when running slower than the minimum
3640            supported speed.  Adjust timing to match the smallest supported
3641            CAS Latency. */
3642         if (CL < min_cas_latency) {
3643             uint64_t adjusted_tclk = tAAmin / min_cas_latency;
3644             CL = min_cas_latency;
3645             ddr_print("Slow clock speed. Adjusting timing: tClk = %llu, Adjusted tClk = %lld\n",
3646                       tclk_psecs, adjusted_tclk);
3647             tclk_psecs = adjusted_tclk;
3648         }
3649 
3650         if ((s = lookup_env_parameter("ddr_cas_latency")) != NULL) {
3651             override_cas_latency = strtoul(s, NULL, 0);
3652         }
3653 
3654         /* Make sure that the selected cas latency is legal */
3655         for (i=(CL-base_CL); i<32; ++i) {
3656             if ((spd_cas_latency >> i) & 1) {
3657                 CL = i+base_CL;
3658                 break;
3659             }
3660         }
3661     }
3662 
3663     if (CL > max_cas_latency)
3664         CL = max_cas_latency;
3665 
3666     if (override_cas_latency != 0) {
3667         CL = override_cas_latency;
3668     }
3669 
3670     ddr_print("CAS Latency                                   : %6d\n", CL);
3671 
3672     if ((CL * tCKmin) > 20000)
3673     {
3674         ddr_print("(CLactual * tCKmin) = %d exceeds 20 ns\n", (CL * tCKmin));
3675     }
3676 
3677     if ((num_banks != 4) && (num_banks != 8) && (num_banks != 16))
3678     {
3679         error_print("Unsupported number of banks %d. Must be 4 or 8 or 16.\n", num_banks);
3680         ++fatal_error;
3681     }
3682 
3683     if ((num_ranks != 1) && (num_ranks != 2) && (num_ranks != 4))
3684     {
3685         error_print("Unsupported number of ranks: %d\n", num_ranks);
3686         ++fatal_error;
3687     }
3688 
3689     if (! CAVIUM_IS_MODEL(CAVIUM_CN81XX)) { // 88XX or 83XX, but not 81XX
3690         if ((dram_width != 8) && (dram_width != 16) && (dram_width != 4)) {
3691             error_print("Unsupported SDRAM Width, x%d.  Must be x4, x8 or x16.\n", dram_width);
3692             ++fatal_error;
3693         }
3694     } else if ((dram_width != 8) && (dram_width != 16)) { // 81XX can only do x8 or x16
3695         error_print("Unsupported SDRAM Width, x%d.  Must be x8 or x16.\n", dram_width);
3696         ++fatal_error;
3697     }
3698 
3699 
3700     /*
3701     ** Bail out here if things are not copasetic.
3702     */
3703     if (fatal_error)
3704         return(-1);
3705 
3706     /*
3707      * 6.9.6 LMC RESET Initialization
3708      *
3709      * The purpose of this step is to assert/deassert the RESET# pin at the
3710      * DDR3/DDR4 parts.
3711      *
3712      * This LMC RESET step is done for all enabled LMCs.
3713      */
3714     perform_lmc_reset(node, ddr_interface_num);
3715 
3716     // Make sure scrambling is disabled during init...
3717     {
3718         bdk_lmcx_control_t lmc_control;
3719 
3720         lmc_control.u = BDK_CSR_READ(node, BDK_LMCX_CONTROL(ddr_interface_num));
3721         lmc_control.s.scramble_ena = 0;
3722         DRAM_CSR_WRITE(node, BDK_LMCX_CONTROL(ddr_interface_num), lmc_control.u);
3723 
3724         DRAM_CSR_WRITE(node, BDK_LMCX_SCRAMBLE_CFG0(ddr_interface_num), 0);
3725         DRAM_CSR_WRITE(node, BDK_LMCX_SCRAMBLE_CFG1(ddr_interface_num), 0);
3726         DRAM_CSR_WRITE(node, BDK_LMCX_SCRAMBLE_CFG2(ddr_interface_num), 0);
3727     }
3728 
3729 
3730     odt_idx = dimm_count - 1;
3731 
3732     switch (num_ranks) {
3733     case 1:
3734         odt_config = odt_1rank_config;
3735         break;
3736     case 2:
3737         odt_config = odt_2rank_config;
3738         break;
3739     case 4:
3740         odt_config = odt_4rank_config;
3741         break;
3742     default:
3743         odt_config = disable_odt_config;
3744         error_print("Unsupported number of ranks: %d\n", num_ranks);
3745         ++fatal_error;
3746     }
3747 
3748 
3749     /* Parameters from DDR3 Specifications */
3750 #define DDR3_tREFI         7800000    /* 7.8 us */
3751 #define DDR3_ZQCS          80000ull   /* 80 ns */
3752 #define DDR3_ZQCS_Interval 1280000000 /* 128ms/100 */
3753 #define DDR3_tCKE          5000       /* 5 ns */
3754 #define DDR3_tMRD          4          /* 4 nCK */
3755 #define DDR3_tDLLK         512        /* 512 nCK */
3756 #define DDR3_tMPRR         1          /* 1 nCK */
3757 #define DDR3_tWLMRD        40         /* 40 nCK */
3758 #define DDR3_tWLDQSEN      25         /* 25 nCK */
3759 
3760     /* Parameters from DDR4 Specifications */
3761 #define DDR4_tMRD          8          /* 8 nCK */
3762 #define DDR4_tDLLK         768        /* 768 nCK */
3763 
3764      /*
3765      * 6.9.7 Early LMC Initialization
3766      *
3767      * All of DDR PLL, LMC CK, and LMC DRESET initializations must be
3768      * completed prior to starting this LMC initialization sequence.
3769      *
3770      * Perform the following five substeps for early LMC initialization:
3771      *
3772      * 1. Software must ensure there are no pending DRAM transactions.
3773      *
3774      * 2. Write LMC(0)_CONFIG, LMC(0)_CONTROL, LMC(0)_TIMING_PARAMS0,
3775      *    LMC(0)_TIMING_PARAMS1, LMC(0)_MODEREG_PARAMS0,
3776      *    LMC(0)_MODEREG_PARAMS1, LMC(0)_DUAL_MEMCFG, LMC(0)_NXM,
3777      *    LMC(0)_WODT_MASK, LMC(0)_RODT_MASK, LMC(0)_COMP_CTL2,
3778      *    LMC(0)_PHY_CTL, LMC(0)_DIMM0/1_PARAMS, and LMC(0)_DIMM_CTL with
3779      *    appropriate values. All sections in this chapter can be used to
3780      *    derive proper register settings.
3781      */
3782 
3783     /* LMC(0)_CONFIG */
3784     {
3785         lmc_config.u = 0;
3786 
3787         lmc_config.s.ecc_ena         = use_ecc;
3788         lmc_config.s.row_lsb         = encode_row_lsb_ddr3(row_lsb, ddr_interface_64b);
3789         lmc_config.s.pbank_lsb       = encode_pbank_lsb_ddr3(pbank_lsb, ddr_interface_64b);
3790 
3791         lmc_config.s.idlepower       = 0; /* Disabled */
3792 
3793         if ((s = lookup_env_parameter("ddr_idlepower")) != NULL) {
3794             lmc_config.s.idlepower = strtoul(s, NULL, 0);
3795         }
3796 
3797         lmc_config.s.forcewrite      = 0; /* Disabled */
3798         lmc_config.s.ecc_adr         = 1; /* Include memory reference address in the ECC */
3799 
3800         if ((s = lookup_env_parameter("ddr_ecc_adr")) != NULL) {
3801             lmc_config.s.ecc_adr = strtoul(s, NULL, 0);
3802         }
3803 
3804         lmc_config.s.reset           = 0;
3805 
3806         /*
3807          *  Program LMC0_CONFIG[24:18], ref_zqcs_int(6:0) to
3808          *  RND-DN(tREFI/clkPeriod/512) Program LMC0_CONFIG[36:25],
3809          *  ref_zqcs_int(18:7) to
3810          *  RND-DN(ZQCS_Interval/clkPeriod/(512*128)). Note that this
3811          *  value should always be greater than 32, to account for
3812          *  resistor calibration delays.
3813          */
3814 
3815         lmc_config.s.ref_zqcs_int     = ((DDR3_tREFI/tclk_psecs/512) & 0x7f);
3816         lmc_config.s.ref_zqcs_int    |= ((max(33ull, (DDR3_ZQCS_Interval/(tclk_psecs/100)/(512*128))) & 0xfff) << 7);
3817 
3818 
3819         lmc_config.s.early_dqx       = 1; /* Default to enabled */
3820 
3821         if ((s = lookup_env_parameter("ddr_early_dqx")) == NULL)
3822             s = lookup_env_parameter("ddr%d_early_dqx", ddr_interface_num);
3823         if (s != NULL) {
3824             lmc_config.s.early_dqx = strtoul(s, NULL, 0);
3825         }
3826 
3827         lmc_config.s.sref_with_dll        = 0;
3828 
3829         lmc_config.s.rank_ena        = bunk_enable;
3830         lmc_config.s.rankmask        = rank_mask; /* Set later */
3831         lmc_config.s.mirrmask        = (spd_addr_mirror << 1 | spd_addr_mirror << 3) & rank_mask;
3832         lmc_config.s.init_status     = rank_mask; /* Set once and don't change it. */
3833         lmc_config.s.early_unload_d0_r0   = 0;
3834         lmc_config.s.early_unload_d0_r1   = 0;
3835         lmc_config.s.early_unload_d1_r0   = 0;
3836         lmc_config.s.early_unload_d1_r1   = 0;
3837         lmc_config.s.scrz                 = 0;
3838         // set 32-bit mode for real only when selected AND 81xx...
3839         if (!ddr_interface_64b && CAVIUM_IS_MODEL(CAVIUM_CN81XX)) {
3840             lmc_config.s.mode32b          = 1;
3841         }
3842         VB_PRT(VBL_DEV, "%-45s : %d\n", "MODE32B (init)", lmc_config.s.mode32b);
3843         lmc_config.s.mode_x4dev           = (dram_width == 4) ? 1 : 0;
3844         lmc_config.s.bg2_enable           = ((ddr_type == DDR4_DRAM) && (dram_width == 16)) ? 0 : 1;
3845 
3846         if ((s = lookup_env_parameter_ull("ddr_config")) != NULL) {
3847             lmc_config.u    = strtoull(s, NULL, 0);
3848         }
3849         ddr_print("LMC_CONFIG                                    : 0x%016llx\n", lmc_config.u);
3850         DRAM_CSR_WRITE(node, BDK_LMCX_CONFIG(ddr_interface_num), lmc_config.u);
3851     }
3852 
3853     /* LMC(0)_CONTROL */
3854     {
3855         bdk_lmcx_control_t lmc_control;
3856         lmc_control.u = BDK_CSR_READ(node, BDK_LMCX_CONTROL(ddr_interface_num));
3857         lmc_control.s.rdimm_ena       = spd_rdimm;
3858         lmc_control.s.bwcnt           = 0; /* Clear counter later */
3859         if (spd_rdimm)
3860             lmc_control.s.ddr2t       = (safe_ddr_flag ? 1 : custom_lmc_config->ddr2t_rdimm );
3861         else
3862             lmc_control.s.ddr2t       = (safe_ddr_flag ? 1 : custom_lmc_config->ddr2t_udimm );
3863         lmc_control.s.pocas           = 0;
3864         lmc_control.s.fprch2          = (safe_ddr_flag ? 2 : custom_lmc_config->fprch2 );
3865         lmc_control.s.throttle_rd     = safe_ddr_flag ? 1 : 0;
3866         lmc_control.s.throttle_wr     = safe_ddr_flag ? 1 : 0;
3867         lmc_control.s.inorder_rd      = safe_ddr_flag ? 1 : 0;
3868         lmc_control.s.inorder_wr      = safe_ddr_flag ? 1 : 0;
3869         lmc_control.cn81xx.elev_prio_dis   = safe_ddr_flag ? 1 : 0;
3870         lmc_control.s.nxm_write_en    = 0; /* discards writes to
3871                                             addresses that don't exist
3872                                             in the DRAM */
3873         lmc_control.s.max_write_batch = 8;
3874         lmc_control.s.xor_bank        = 1;
3875         lmc_control.s.auto_dclkdis    = 1;
3876         lmc_control.s.int_zqcs_dis    = 0;
3877         lmc_control.s.ext_zqcs_dis    = 0;
3878         lmc_control.s.bprch           = 1;
3879         lmc_control.s.wodt_bprch      = 1;
3880         lmc_control.s.rodt_bprch      = 1;
3881 
3882         if ((s = lookup_env_parameter("ddr_xor_bank")) != NULL) {
3883             lmc_control.s.xor_bank = strtoul(s, NULL, 0);
3884         }
3885 
3886         if ((s = lookup_env_parameter("ddr_2t")) != NULL) {
3887             lmc_control.s.ddr2t = strtoul(s, NULL, 0);
3888         }
3889 
3890         if ((s = lookup_env_parameter("ddr_fprch2")) != NULL) {
3891             lmc_control.s.fprch2 = strtoul(s, NULL, 0);
3892         }
3893 
3894         if ((s = lookup_env_parameter("ddr_bprch")) != NULL) {
3895             lmc_control.s.bprch = strtoul(s, NULL, 0);
3896         }
3897 
3898         if ((s = lookup_env_parameter("ddr_wodt_bprch")) != NULL) {
3899             lmc_control.s.wodt_bprch = strtoul(s, NULL, 0);
3900         }
3901 
3902         if ((s = lookup_env_parameter("ddr_rodt_bprch")) != NULL) {
3903             lmc_control.s.rodt_bprch = strtoul(s, NULL, 0);
3904         }
3905 
3906         if ((s = lookup_env_parameter("ddr_int_zqcs_dis")) != NULL) {
3907             lmc_control.s.int_zqcs_dis = strtoul(s, NULL, 0);
3908         }
3909 
3910         if ((s = lookup_env_parameter("ddr_ext_zqcs_dis")) != NULL) {
3911             lmc_control.s.ext_zqcs_dis = strtoul(s, NULL, 0);
3912         }
3913 
3914         if ((s = lookup_env_parameter_ull("ddr_control")) != NULL) {
3915             lmc_control.u    = strtoull(s, NULL, 0);
3916         }
3917         ddr_print("LMC_CONTROL                                   : 0x%016llx\n", lmc_control.u);
3918         DRAM_CSR_WRITE(node, BDK_LMCX_CONTROL(ddr_interface_num), lmc_control.u);
3919     }
3920 
3921     /* LMC(0)_TIMING_PARAMS0 */
3922     {
3923         unsigned trp_value;
3924         bdk_lmcx_timing_params0_t lmc_timing_params0;
3925         lmc_timing_params0.u = BDK_CSR_READ(node, BDK_LMCX_TIMING_PARAMS0(ddr_interface_num));
3926 
3927         trp_value = divide_roundup(trp, tclk_psecs) - 1;
3928         ddr_print("TIMING_PARAMS0[TRP]: NEW 0x%x, OLD 0x%x\n", trp_value,
3929                   trp_value + (unsigned)(divide_roundup(max(4*tclk_psecs, 7500ull), tclk_psecs)) - 4);
3930 #if 1
3931         if ((s = lookup_env_parameter_ull("ddr_use_old_trp")) != NULL) {
3932             if (!!strtoull(s, NULL, 0)) {
3933                 trp_value += divide_roundup(max(4*tclk_psecs, 7500ull), tclk_psecs) - 4;
3934                 ddr_print("TIMING_PARAMS0[trp]: USING OLD 0x%x\n", trp_value);
3935             }
3936         }
3937 #endif
3938 
3939         lmc_timing_params0.s.txpr     = divide_roundup(max(5*tclk_psecs, trfc+10000ull), 16*tclk_psecs);
3940         lmc_timing_params0.s.tzqinit  = divide_roundup(max(512*tclk_psecs, 640000ull), (256*tclk_psecs));
3941         lmc_timing_params0.s.trp      = trp_value & 0x1f;
3942         lmc_timing_params0.s.tcksre   = divide_roundup(max(5*tclk_psecs, 10000ull), tclk_psecs) - 1;
3943 
3944         if (ddr_type == DDR4_DRAM) {
3945             lmc_timing_params0.s.tzqcs    = divide_roundup(128*tclk_psecs, (16*tclk_psecs)); /* Always 8. */
3946             lmc_timing_params0.s.tcke     = divide_roundup(max(3*tclk_psecs, (uint64_t) DDR3_tCKE), tclk_psecs) - 1;
3947             lmc_timing_params0.s.tmrd     = divide_roundup((DDR4_tMRD*tclk_psecs), tclk_psecs) - 1;
3948             //lmc_timing_params0.s.tmod     = divide_roundup(max(24*tclk_psecs, 15000ull), tclk_psecs) - 1;
3949             lmc_timing_params0.s.tmod     = 25; /* 25 is the max allowed */
3950             lmc_timing_params0.s.tdllk    = divide_roundup(DDR4_tDLLK, 256);
3951         } else {
3952             lmc_timing_params0.s.tzqcs    = divide_roundup(max(64*tclk_psecs, DDR3_ZQCS), (16*tclk_psecs));
3953             lmc_timing_params0.s.tcke     = divide_roundup(DDR3_tCKE, tclk_psecs) - 1;
3954             lmc_timing_params0.s.tmrd     = divide_roundup((DDR3_tMRD*tclk_psecs), tclk_psecs) - 1;
3955             lmc_timing_params0.s.tmod     = divide_roundup(max(12*tclk_psecs, 15000ull), tclk_psecs) - 1;
3956             lmc_timing_params0.s.tdllk    = divide_roundup(DDR3_tDLLK, 256);
3957         }
3958 
3959         if ((s = lookup_env_parameter_ull("ddr_timing_params0")) != NULL) {
3960             lmc_timing_params0.u    = strtoull(s, NULL, 0);
3961         }
3962         ddr_print("TIMING_PARAMS0                                : 0x%016llx\n", lmc_timing_params0.u);
3963         DRAM_CSR_WRITE(node, BDK_LMCX_TIMING_PARAMS0(ddr_interface_num), lmc_timing_params0.u);
3964     }
3965 
3966     /* LMC(0)_TIMING_PARAMS1 */
3967     {
3968         int txp, temp_trcd, trfc_dlr;
3969         bdk_lmcx_timing_params1_t lmc_timing_params1;
3970         lmc_timing_params1.u = BDK_CSR_READ(node, BDK_LMCX_TIMING_PARAMS1(ddr_interface_num));
3971 
3972         lmc_timing_params1.s.tmprr    = divide_roundup(DDR3_tMPRR*tclk_psecs, tclk_psecs) - 1;
3973 
3974         lmc_timing_params1.s.tras     = divide_roundup(tras, tclk_psecs) - 1;
3975 
3976         // NOTE: this is reworked for pass 2.x
3977         temp_trcd = divide_roundup(trcd, tclk_psecs);
3978 #if 1
3979         if (temp_trcd > 15)
3980             ddr_print("TIMING_PARAMS1[trcd]: need extension bit for 0x%x\n", temp_trcd);
3981 #endif
3982         if (CAVIUM_IS_MODEL(CAVIUM_CN88XX_PASS1_X) && (temp_trcd > 15)) {
3983             /* Let .trcd=0 serve as a flag that the field has
3984                overflowed. Must use Additive Latency mode as a
3985                workaround. */
3986             temp_trcd = 0;
3987         }
3988         lmc_timing_params1.s.trcd     = temp_trcd & 0x0f;
3989         lmc_timing_params1.s.trcd_ext = (temp_trcd >> 4) & 1;
3990 
3991         lmc_timing_params1.s.twtr     = divide_roundup(twtr, tclk_psecs) - 1;
3992         lmc_timing_params1.s.trfc     = divide_roundup(trfc, 8*tclk_psecs);
3993 
3994         // workaround needed for all THUNDER chips thru T88 Pass 2.0,
3995         // but not 81xx and 83xx...
3996         if ((ddr_type == DDR4_DRAM) && CAVIUM_IS_MODEL(CAVIUM_CN88XX)) {
3997             /* Workaround bug 24006. Use Trrd_l. */
3998             lmc_timing_params1.s.trrd     = divide_roundup(ddr4_tRRD_Lmin, tclk_psecs) - 2;
3999         } else
4000             lmc_timing_params1.s.trrd     = divide_roundup(trrd, tclk_psecs) - 2;
4001 
4002         /*
4003         ** tXP = max( 3nCK, 7.5 ns)     DDR3-800   tCLK = 2500 psec
4004         ** tXP = max( 3nCK, 7.5 ns)     DDR3-1066  tCLK = 1875 psec
4005         ** tXP = max( 3nCK, 6.0 ns)     DDR3-1333  tCLK = 1500 psec
4006         ** tXP = max( 3nCK, 6.0 ns)     DDR3-1600  tCLK = 1250 psec
4007         ** tXP = max( 3nCK, 6.0 ns)     DDR3-1866  tCLK = 1071 psec
4008         ** tXP = max( 3nCK, 6.0 ns)     DDR3-2133  tCLK =  937 psec
4009         */
4010         txp = (tclk_psecs < 1875) ? 6000 : 7500;
4011         // NOTE: this is reworked for pass 2.x
4012         int temp_txp = divide_roundup(max(3*tclk_psecs, (unsigned)txp), tclk_psecs) - 1;
4013 #if 1
4014         if (temp_txp > 7)
4015             ddr_print("TIMING_PARAMS1[txp]: need extension bit for 0x%x\n", temp_txp);
4016 #endif
4017         if (CAVIUM_IS_MODEL(CAVIUM_CN88XX_PASS1_X) && (temp_txp > 7)) {
4018             temp_txp = 7; // max it out
4019         }
4020         lmc_timing_params1.s.txp      = temp_txp & 7;
4021         lmc_timing_params1.s.txp_ext  = (temp_txp >> 3) & 1;
4022 
4023         lmc_timing_params1.s.twlmrd   = divide_roundup(DDR3_tWLMRD*tclk_psecs, 4*tclk_psecs);
4024         lmc_timing_params1.s.twldqsen = divide_roundup(DDR3_tWLDQSEN*tclk_psecs, 4*tclk_psecs);
4025         lmc_timing_params1.s.tfaw     = divide_roundup(tfaw, 4*tclk_psecs);
4026         lmc_timing_params1.s.txpdll   = divide_roundup(max(10*tclk_psecs, 24000ull), tclk_psecs) - 1;
4027 
4028         if ((ddr_type == DDR4_DRAM) && is_3ds_dimm) {
4029             /*
4030               4 Gb: tRFC_DLR = 90 ns
4031               8 Gb: tRFC_DLR = 120 ns
4032               16 Gb: tRFC_DLR = 190 ns FIXME?
4033              */
4034             // RNDUP[tRFC_DLR(ns) / (8 * TCYC(ns))]
4035             if (die_capacity == 0x1000) // 4 Gbit
4036                 trfc_dlr = 90;
4037             else if (die_capacity == 0x2000) // 8 Gbit
4038                 trfc_dlr = 120;
4039             else if (die_capacity == 0x4000) // 16 Gbit
4040                 trfc_dlr = 190;
4041             else
4042                 trfc_dlr = 0;
4043 
4044             if (trfc_dlr == 0) {
4045                 ddr_print("N%d.LMC%d: ERROR: tRFC_DLR: die_capacity %u Mbit is illegal\n",
4046                           node, ddr_interface_num, die_capacity);
4047             } else {
4048                 lmc_timing_params1.s.trfc_dlr = divide_roundup(trfc_dlr * 1000UL, 8*tclk_psecs);
4049                 ddr_print("N%d.LMC%d: TIMING_PARAMS1[trfc_dlr] set to %u\n",
4050                           node, ddr_interface_num, lmc_timing_params1.s.trfc_dlr);
4051             }
4052         }
4053 
4054         if ((s = lookup_env_parameter_ull("ddr_timing_params1")) != NULL) {
4055             lmc_timing_params1.u    = strtoull(s, NULL, 0);
4056         }
4057         ddr_print("TIMING_PARAMS1                                : 0x%016llx\n", lmc_timing_params1.u);
4058         DRAM_CSR_WRITE(node, BDK_LMCX_TIMING_PARAMS1(ddr_interface_num), lmc_timing_params1.u);
4059     }
4060 
4061     /* LMC(0)_TIMING_PARAMS2 */
4062     if (ddr_type == DDR4_DRAM) {
4063         bdk_lmcx_timing_params1_t lmc_timing_params1;
4064         bdk_lmcx_timing_params2_t lmc_timing_params2;
4065         lmc_timing_params1.u = BDK_CSR_READ(node, BDK_LMCX_TIMING_PARAMS1(ddr_interface_num));
4066         lmc_timing_params2.u = BDK_CSR_READ(node, BDK_LMCX_TIMING_PARAMS2(ddr_interface_num));
4067         ddr_print("TIMING_PARAMS2                                : 0x%016llx\n", lmc_timing_params2.u);
4068 
4069         //lmc_timing_params2.s.trrd_l = divide_roundup(ddr4_tRRD_Lmin, tclk_psecs) - 1;
4070         // NOTE: this is reworked for pass 2.x
4071         int temp_trrd_l = divide_roundup(ddr4_tRRD_Lmin, tclk_psecs) - 2;
4072 #if 1
4073         if (temp_trrd_l > 7)
4074             ddr_print("TIMING_PARAMS2[trrd_l]: need extension bit for 0x%x\n", temp_trrd_l);
4075 #endif
4076         if (CAVIUM_IS_MODEL(CAVIUM_CN88XX_PASS1_X) && (temp_trrd_l > 7)) {
4077             temp_trrd_l = 7; // max it out
4078         }
4079         lmc_timing_params2.s.trrd_l      = temp_trrd_l & 7;
4080         lmc_timing_params2.s.trrd_l_ext  = (temp_trrd_l >> 3) & 1;
4081 
4082         lmc_timing_params2.s.twtr_l = divide_nint(max(4*tclk_psecs, 7500ull), tclk_psecs) - 1; // correct for 1600-2400
4083         lmc_timing_params2.s.t_rw_op_max = 7;
4084         lmc_timing_params2.s.trtp = divide_roundup(max(4*tclk_psecs, 7500ull), tclk_psecs) - 1;
4085 
4086         ddr_print("TIMING_PARAMS2                                : 0x%016llx\n", lmc_timing_params2.u);
4087         DRAM_CSR_WRITE(node, BDK_LMCX_TIMING_PARAMS2(ddr_interface_num), lmc_timing_params2.u);
4088 
4089         /* Workaround Errata 25823 - LMC: Possible DDR4 tWTR_L not met
4090            for Write-to-Read operations to the same Bank Group */
4091         if (lmc_timing_params1.s.twtr < (lmc_timing_params2.s.twtr_l - 4)) {
4092             lmc_timing_params1.s.twtr = lmc_timing_params2.s.twtr_l - 4;
4093             ddr_print("ERRATA 25823: NEW: TWTR: %d, TWTR_L: %d\n", lmc_timing_params1.s.twtr, lmc_timing_params2.s.twtr_l);
4094             ddr_print("TIMING_PARAMS1                                : 0x%016llx\n", lmc_timing_params1.u);
4095             DRAM_CSR_WRITE(node, BDK_LMCX_TIMING_PARAMS1(ddr_interface_num), lmc_timing_params1.u);
4096         }
4097     }
4098 
4099     /* LMC(0)_MODEREG_PARAMS0 */
4100     {
4101         bdk_lmcx_modereg_params0_t lmc_modereg_params0;
4102         int param;
4103 
4104         lmc_modereg_params0.u = BDK_CSR_READ(node, BDK_LMCX_MODEREG_PARAMS0(ddr_interface_num));
4105 
4106         if (ddr_type == DDR4_DRAM) {
4107             lmc_modereg_params0.s.cwl     = 0; /* 1600 (1250ps) */
4108             if (tclk_psecs < 1250)
4109                 lmc_modereg_params0.s.cwl = 1; /* 1866 (1072ps) */
4110             if (tclk_psecs < 1072)
4111                 lmc_modereg_params0.s.cwl = 2; /* 2133 (938ps) */
4112             if (tclk_psecs < 938)
4113                 lmc_modereg_params0.s.cwl = 3; /* 2400 (833ps) */
4114             if (tclk_psecs < 833)
4115                 lmc_modereg_params0.s.cwl = 4; /* 2666 (750ps) */
4116             if (tclk_psecs < 750)
4117                 lmc_modereg_params0.s.cwl = 5; /* 3200 (625ps) */
4118         } else {
4119         /*
4120         ** CSR   CWL         CAS write Latency
4121         ** ===   ===   =================================
4122         **  0      5   (           tCK(avg) >=   2.5 ns)
4123         **  1      6   (2.5 ns   > tCK(avg) >= 1.875 ns)
4124         **  2      7   (1.875 ns > tCK(avg) >= 1.5   ns)
4125         **  3      8   (1.5 ns   > tCK(avg) >= 1.25  ns)
4126         **  4      9   (1.25 ns  > tCK(avg) >= 1.07  ns)
4127         **  5     10   (1.07 ns  > tCK(avg) >= 0.935 ns)
4128         **  6     11   (0.935 ns > tCK(avg) >= 0.833 ns)
4129         **  7     12   (0.833 ns > tCK(avg) >= 0.75  ns)
4130         */
4131 
4132         lmc_modereg_params0.s.cwl     = 0;
4133         if (tclk_psecs < 2500)
4134             lmc_modereg_params0.s.cwl = 1;
4135         if (tclk_psecs < 1875)
4136             lmc_modereg_params0.s.cwl = 2;
4137         if (tclk_psecs < 1500)
4138             lmc_modereg_params0.s.cwl = 3;
4139         if (tclk_psecs < 1250)
4140             lmc_modereg_params0.s.cwl = 4;
4141         if (tclk_psecs < 1070)
4142             lmc_modereg_params0.s.cwl = 5;
4143         if (tclk_psecs <  935)
4144             lmc_modereg_params0.s.cwl = 6;
4145         if (tclk_psecs <  833)
4146             lmc_modereg_params0.s.cwl = 7;
4147         }
4148 
4149         if ((s = lookup_env_parameter("ddr_cwl")) != NULL) {
4150             lmc_modereg_params0.s.cwl = strtoul(s, NULL, 0) - 5;
4151         }
4152 
4153         if (ddr_type == DDR4_DRAM) {
4154             ddr_print("%-45s : %d, [0x%x]\n", "CAS Write Latency CWL, [CSR]",
4155                       lmc_modereg_params0.s.cwl + 9
4156                       + ((lmc_modereg_params0.s.cwl>2) ? (lmc_modereg_params0.s.cwl-3) * 2 : 0),
4157                       lmc_modereg_params0.s.cwl);
4158         } else {
4159             ddr_print("%-45s : %d, [0x%x]\n", "CAS Write Latency CWL, [CSR]",
4160                       lmc_modereg_params0.s.cwl + 5,
4161                       lmc_modereg_params0.s.cwl);
4162         }
4163 
4164         lmc_modereg_params0.s.mprloc  = 0;
4165         lmc_modereg_params0.s.mpr     = 0;
4166         lmc_modereg_params0.s.dll     = (ddr_type == DDR4_DRAM)?1:0; /* disable(0) for DDR3 and enable(1) for DDR4 */
4167         lmc_modereg_params0.s.al      = 0;
4168         lmc_modereg_params0.s.wlev    = 0; /* Read Only */
4169         lmc_modereg_params0.s.tdqs    = ((ddr_type == DDR4_DRAM) || (dram_width != 8))?0:1; /* disable(0) for DDR4 and x4/x16 DDR3 */
4170         lmc_modereg_params0.s.qoff    = 0;
4171         //lmc_modereg_params0.s.bl      = 0; /* Don't touch block dirty logic */
4172 
4173         if ((s = lookup_env_parameter("ddr_cl")) != NULL) {
4174             CL = strtoul(s, NULL, 0);
4175             ddr_print("CAS Latency                                   : %6d\n", CL);
4176         }
4177 
4178         if (ddr_type == DDR4_DRAM) {
4179             lmc_modereg_params0.s.cl      = 0x0;
4180             if (CL > 9)
4181                 lmc_modereg_params0.s.cl  = 0x1;
4182             if (CL > 10)
4183                 lmc_modereg_params0.s.cl  = 0x2;
4184             if (CL > 11)
4185                 lmc_modereg_params0.s.cl  = 0x3;
4186             if (CL > 12)
4187                 lmc_modereg_params0.s.cl  = 0x4;
4188             if (CL > 13)
4189                 lmc_modereg_params0.s.cl  = 0x5;
4190             if (CL > 14)
4191                 lmc_modereg_params0.s.cl  = 0x6;
4192             if (CL > 15)
4193                 lmc_modereg_params0.s.cl  = 0x7;
4194             if (CL > 16)
4195                 lmc_modereg_params0.s.cl  = 0x8;
4196             if (CL > 18)
4197                 lmc_modereg_params0.s.cl  = 0x9;
4198             if (CL > 20)
4199                 lmc_modereg_params0.s.cl  = 0xA;
4200             if (CL > 24)
4201                 lmc_modereg_params0.s.cl  = 0xB;
4202         } else {
4203             lmc_modereg_params0.s.cl      = 0x2;
4204             if (CL > 5)
4205                 lmc_modereg_params0.s.cl  = 0x4;
4206             if (CL > 6)
4207                 lmc_modereg_params0.s.cl  = 0x6;
4208             if (CL > 7)
4209                 lmc_modereg_params0.s.cl  = 0x8;
4210             if (CL > 8)
4211                 lmc_modereg_params0.s.cl  = 0xA;
4212             if (CL > 9)
4213                 lmc_modereg_params0.s.cl  = 0xC;
4214             if (CL > 10)
4215                 lmc_modereg_params0.s.cl  = 0xE;
4216             if (CL > 11)
4217                 lmc_modereg_params0.s.cl  = 0x1;
4218             if (CL > 12)
4219                 lmc_modereg_params0.s.cl  = 0x3;
4220             if (CL > 13)
4221                 lmc_modereg_params0.s.cl  = 0x5;
4222             if (CL > 14)
4223                 lmc_modereg_params0.s.cl  = 0x7;
4224             if (CL > 15)
4225                 lmc_modereg_params0.s.cl  = 0x9;
4226         }
4227 
4228         lmc_modereg_params0.s.rbt     = 0; /* Read Only. */
4229         lmc_modereg_params0.s.tm      = 0;
4230         lmc_modereg_params0.s.dllr    = 0;
4231 
4232         param = divide_roundup(twr, tclk_psecs);
4233 
4234         if (ddr_type == DDR4_DRAM) {    /* DDR4 */
4235             lmc_modereg_params0.s.wrp     = 1;
4236             if (param > 12)
4237                 lmc_modereg_params0.s.wrp = 2;
4238             if (param > 14)
4239                 lmc_modereg_params0.s.wrp = 3;
4240             if (param > 16)
4241                 lmc_modereg_params0.s.wrp = 4;
4242             if (param > 18)
4243                 lmc_modereg_params0.s.wrp = 5;
4244             if (param > 20)
4245                 lmc_modereg_params0.s.wrp = 6;
4246             if (param > 24)         /* RESERVED in DDR4 spec */
4247                 lmc_modereg_params0.s.wrp = 7;
4248         } else {                /* DDR3 */
4249             lmc_modereg_params0.s.wrp     = 1;
4250             if (param > 5)
4251                 lmc_modereg_params0.s.wrp = 2;
4252             if (param > 6)
4253                 lmc_modereg_params0.s.wrp = 3;
4254             if (param > 7)
4255                 lmc_modereg_params0.s.wrp = 4;
4256             if (param > 8)
4257                 lmc_modereg_params0.s.wrp = 5;
4258             if (param > 10)
4259                 lmc_modereg_params0.s.wrp = 6;
4260             if (param > 12)
4261                 lmc_modereg_params0.s.wrp = 7;
4262         }
4263 
4264         lmc_modereg_params0.s.ppd     = 0;
4265 
4266         if ((s = lookup_env_parameter("ddr_wrp")) != NULL) {
4267             lmc_modereg_params0.s.wrp = strtoul(s, NULL, 0);
4268         }
4269 
4270         ddr_print("%-45s : %d, [0x%x]\n", "Write recovery for auto precharge WRP, [CSR]",
4271                   param, lmc_modereg_params0.s.wrp);
4272 
4273         if ((s = lookup_env_parameter_ull("ddr_modereg_params0")) != NULL) {
4274             lmc_modereg_params0.u    = strtoull(s, NULL, 0);
4275         }
4276         ddr_print("MODEREG_PARAMS0                               : 0x%016llx\n", lmc_modereg_params0.u);
4277         DRAM_CSR_WRITE(node, BDK_LMCX_MODEREG_PARAMS0(ddr_interface_num), lmc_modereg_params0.u);
4278     }
4279 
4280     /* LMC(0)_MODEREG_PARAMS1 */
4281     {
4282         bdk_lmcx_modereg_params1_t lmc_modereg_params1;
4283 
4284         lmc_modereg_params1.u = odt_config[odt_idx].odt_mask1.u;
4285 
4286 #ifdef CAVIUM_ONLY
4287         /* Special request: mismatched DIMM support. Slot 0: 2-Rank, Slot 1: 1-Rank */
4288         if (rank_mask == 0x7) { /* 2-Rank, 1-Rank */
4289             lmc_modereg_params1.s.rtt_nom_00 = 0;
4290             lmc_modereg_params1.s.rtt_nom_01 = 3; /* rttnom_40ohm */
4291             lmc_modereg_params1.s.rtt_nom_10 = 3; /* rttnom_40ohm */
4292             lmc_modereg_params1.s.rtt_nom_11 = 0;
4293             dyn_rtt_nom_mask = 0x6;
4294         }
4295 #endif  /* CAVIUM_ONLY */
4296 
4297         if ((s = lookup_env_parameter("ddr_rtt_nom_mask")) != NULL) {
4298             dyn_rtt_nom_mask    = strtoul(s, NULL, 0);
4299         }
4300 
4301 
4302         /* Save the original rtt_nom settings before sweeping through settings. */
4303         default_rtt_nom[0] = lmc_modereg_params1.s.rtt_nom_00;
4304         default_rtt_nom[1] = lmc_modereg_params1.s.rtt_nom_01;
4305         default_rtt_nom[2] = lmc_modereg_params1.s.rtt_nom_10;
4306         default_rtt_nom[3] = lmc_modereg_params1.s.rtt_nom_11;
4307 
4308         ddr_rtt_nom_auto = custom_lmc_config->ddr_rtt_nom_auto;
4309 
4310         for (i=0; i<4; ++i) {
4311             uint64_t value;
4312             if ((s = lookup_env_parameter("ddr_rtt_nom_%1d%1d", !!(i&2), !!(i&1))) == NULL)
4313                 s = lookup_env_parameter("ddr%d_rtt_nom_%1d%1d", ddr_interface_num, !!(i&2), !!(i&1));
4314             if (s != NULL) {
4315                 value = strtoul(s, NULL, 0);
4316                 lmc_modereg_params1.u &= ~((uint64_t)0x7  << (i*12+9));
4317                 lmc_modereg_params1.u |=  ( (value & 0x7) << (i*12+9));
4318                 default_rtt_nom[i] = value;
4319                 ddr_rtt_nom_auto = 0;
4320             }
4321         }
4322 
4323         if ((s = lookup_env_parameter("ddr_rtt_nom")) == NULL)
4324             s = lookup_env_parameter("ddr%d_rtt_nom", ddr_interface_num);
4325         if (s != NULL) {
4326             uint64_t value;
4327             value = strtoul(s, NULL, 0);
4328 
4329             if (dyn_rtt_nom_mask & 1)
4330                 default_rtt_nom[0] = lmc_modereg_params1.s.rtt_nom_00 = value;
4331             if (dyn_rtt_nom_mask & 2)
4332                 default_rtt_nom[1] = lmc_modereg_params1.s.rtt_nom_01 = value;
4333             if (dyn_rtt_nom_mask & 4)
4334                 default_rtt_nom[2] = lmc_modereg_params1.s.rtt_nom_10 = value;
4335             if (dyn_rtt_nom_mask & 8)
4336                 default_rtt_nom[3] = lmc_modereg_params1.s.rtt_nom_11 = value;
4337 
4338             ddr_rtt_nom_auto = 0;
4339         }
4340 
4341         if ((s = lookup_env_parameter("ddr_rtt_wr")) != NULL) {
4342             uint64_t value = strtoul(s, NULL, 0);
4343             for (i=0; i<4; ++i) {
4344                 INSRT_WR(&lmc_modereg_params1.u, i, value);
4345             }
4346         }
4347 
4348         for (i = 0; i < 4; ++i) {
4349             uint64_t value;
4350             if ((s = lookup_env_parameter("ddr_rtt_wr_%1d%1d", !!(i&2), !!(i&1))) == NULL)
4351                 s = lookup_env_parameter("ddr%d_rtt_wr_%1d%1d", ddr_interface_num, !!(i&2), !!(i&1));
4352             if (s != NULL) {
4353                 value = strtoul(s, NULL, 0);
4354                 INSRT_WR(&lmc_modereg_params1.u, i, value);
4355             }
4356         }
4357 
4358         // Make sure pass 1 has valid RTT_WR settings, because
4359         // configuration files may be set-up for pass 2, and
4360         // pass 1 supports no RTT_WR extension bits
4361         if (CAVIUM_IS_MODEL(CAVIUM_CN88XX_PASS1_X)) {
4362             for (i = 0; i < 4; ++i) {
4363                 if (EXTR_WR(lmc_modereg_params1.u, i) > 3) { // if 80 or undefined
4364                     INSRT_WR(&lmc_modereg_params1.u, i, 1); // FIXME? always insert 120
4365                     ddr_print("RTT_WR_%d%d set to 120 for CN88XX pass 1\n", !!(i&2), i&1);
4366                 }
4367             }
4368         }
4369         if ((s = lookup_env_parameter("ddr_dic")) != NULL) {
4370             uint64_t value = strtoul(s, NULL, 0);
4371             for (i=0; i<4; ++i) {
4372                 lmc_modereg_params1.u &= ~((uint64_t)0x3  << (i*12+7));
4373                 lmc_modereg_params1.u |=  ( (value & 0x3) << (i*12+7));
4374             }
4375         }
4376 
4377         for (i=0; i<4; ++i) {
4378             uint64_t value;
4379             if ((s = lookup_env_parameter("ddr_dic_%1d%1d", !!(i&2), !!(i&1))) != NULL) {
4380                 value = strtoul(s, NULL, 0);
4381                 lmc_modereg_params1.u &= ~((uint64_t)0x3  << (i*12+7));
4382                 lmc_modereg_params1.u |=  ( (value & 0x3) << (i*12+7));
4383             }
4384         }
4385 
4386         if ((s = lookup_env_parameter_ull("ddr_modereg_params1")) != NULL) {
4387             lmc_modereg_params1.u    = strtoull(s, NULL, 0);
4388         }
4389 
4390         ddr_print("RTT_NOM     %3d, %3d, %3d, %3d ohms           :  %x,%x,%x,%x\n",
4391                   imp_values->rtt_nom_ohms[lmc_modereg_params1.s.rtt_nom_11],
4392                   imp_values->rtt_nom_ohms[lmc_modereg_params1.s.rtt_nom_10],
4393                   imp_values->rtt_nom_ohms[lmc_modereg_params1.s.rtt_nom_01],
4394                   imp_values->rtt_nom_ohms[lmc_modereg_params1.s.rtt_nom_00],
4395                   lmc_modereg_params1.s.rtt_nom_11,
4396                   lmc_modereg_params1.s.rtt_nom_10,
4397                   lmc_modereg_params1.s.rtt_nom_01,
4398                   lmc_modereg_params1.s.rtt_nom_00);
4399 
4400         ddr_print("RTT_WR      %3d, %3d, %3d, %3d ohms           :  %x,%x,%x,%x\n",
4401                   imp_values->rtt_wr_ohms[EXTR_WR(lmc_modereg_params1.u, 3)],
4402                   imp_values->rtt_wr_ohms[EXTR_WR(lmc_modereg_params1.u, 2)],
4403                   imp_values->rtt_wr_ohms[EXTR_WR(lmc_modereg_params1.u, 1)],
4404                   imp_values->rtt_wr_ohms[EXTR_WR(lmc_modereg_params1.u, 0)],
4405                   EXTR_WR(lmc_modereg_params1.u, 3),
4406                   EXTR_WR(lmc_modereg_params1.u, 2),
4407                   EXTR_WR(lmc_modereg_params1.u, 1),
4408                   EXTR_WR(lmc_modereg_params1.u, 0));
4409 
4410         ddr_print("DIC         %3d, %3d, %3d, %3d ohms           :  %x,%x,%x,%x\n",
4411                   imp_values->dic_ohms[lmc_modereg_params1.s.dic_11],
4412                   imp_values->dic_ohms[lmc_modereg_params1.s.dic_10],
4413                   imp_values->dic_ohms[lmc_modereg_params1.s.dic_01],
4414                   imp_values->dic_ohms[lmc_modereg_params1.s.dic_00],
4415                   lmc_modereg_params1.s.dic_11,
4416                   lmc_modereg_params1.s.dic_10,
4417                   lmc_modereg_params1.s.dic_01,
4418                   lmc_modereg_params1.s.dic_00);
4419 
4420         ddr_print("MODEREG_PARAMS1                               : 0x%016llx\n", lmc_modereg_params1.u);
4421         DRAM_CSR_WRITE(node, BDK_LMCX_MODEREG_PARAMS1(ddr_interface_num), lmc_modereg_params1.u);
4422 
4423     } /* LMC(0)_MODEREG_PARAMS1 */
4424 
4425     /* LMC(0)_MODEREG_PARAMS2 */
4426     if (ddr_type == DDR4_DRAM) {
4427         bdk_lmcx_modereg_params2_t lmc_modereg_params2;
4428         lmc_modereg_params2.u = odt_config[odt_idx].odt_mask2.u;
4429 
4430         for (i=0; i<4; ++i) {
4431             uint64_t value;
4432             if ((s = lookup_env_parameter("ddr_rtt_park_%1d%1d", !!(i&2), !!(i&1))) != NULL) {
4433                 value = strtoul(s, NULL, 0);
4434                 lmc_modereg_params2.u &= ~((uint64_t)0x7  << (i*10+0));
4435                 lmc_modereg_params2.u |=  ( (value & 0x7) << (i*10+0));
4436             }
4437         }
4438 
4439         if ((s = lookup_env_parameter("ddr_rtt_park")) != NULL) {
4440             uint64_t value = strtoul(s, NULL, 0);
4441             for (i=0; i<4; ++i) {
4442                 lmc_modereg_params2.u &= ~((uint64_t)0x7  << (i*10+0));
4443                 lmc_modereg_params2.u |=  ( (value & 0x7) << (i*10+0));
4444             }
4445         }
4446 
4447         if ((s = lookup_env_parameter_ull("ddr_modereg_params2")) != NULL) {
4448             lmc_modereg_params2.u    = strtoull(s, NULL, 0);
4449         }
4450 
4451         ddr_print("RTT_PARK    %3d, %3d, %3d, %3d ohms           :  %x,%x,%x,%x\n",
4452                   imp_values->rtt_nom_ohms[lmc_modereg_params2.s.rtt_park_11],
4453                   imp_values->rtt_nom_ohms[lmc_modereg_params2.s.rtt_park_10],
4454                   imp_values->rtt_nom_ohms[lmc_modereg_params2.s.rtt_park_01],
4455                   imp_values->rtt_nom_ohms[lmc_modereg_params2.s.rtt_park_00],
4456                   lmc_modereg_params2.s.rtt_park_11,
4457                   lmc_modereg_params2.s.rtt_park_10,
4458                   lmc_modereg_params2.s.rtt_park_01,
4459                   lmc_modereg_params2.s.rtt_park_00);
4460 
4461         ddr_print("%-45s :  0x%x,0x%x,0x%x,0x%x\n", "VREF_RANGE",
4462                   lmc_modereg_params2.s.vref_range_11,
4463                   lmc_modereg_params2.s.vref_range_10,
4464                   lmc_modereg_params2.s.vref_range_01,
4465                   lmc_modereg_params2.s.vref_range_00);
4466 
4467         ddr_print("%-45s :  0x%x,0x%x,0x%x,0x%x\n", "VREF_VALUE",
4468                   lmc_modereg_params2.s.vref_value_11,
4469                   lmc_modereg_params2.s.vref_value_10,
4470                   lmc_modereg_params2.s.vref_value_01,
4471                   lmc_modereg_params2.s.vref_value_00);
4472 
4473         ddr_print("MODEREG_PARAMS2                               : 0x%016llx\n", lmc_modereg_params2.u);
4474         DRAM_CSR_WRITE(node, BDK_LMCX_MODEREG_PARAMS2(ddr_interface_num), lmc_modereg_params2.u);
4475 
4476     } /* LMC(0)_MODEREG_PARAMS2 */
4477 
4478     /* LMC(0)_MODEREG_PARAMS3 */
4479     if (ddr_type == DDR4_DRAM) {
4480         bdk_lmcx_modereg_params3_t lmc_modereg_params3;
4481 
4482         lmc_modereg_params3.u = BDK_CSR_READ(node, BDK_LMCX_MODEREG_PARAMS3(ddr_interface_num));
4483 
4484         //lmc_modereg_params3.s.max_pd          =
4485         //lmc_modereg_params3.s.tc_ref          =
4486         //lmc_modereg_params3.s.vref_mon        =
4487         //lmc_modereg_params3.s.cal             =
4488         //lmc_modereg_params3.s.sre_abort       =
4489         //lmc_modereg_params3.s.rd_preamble     =
4490         //lmc_modereg_params3.s.wr_preamble     =
4491         //lmc_modereg_params3.s.par_lat_mode    =
4492         //lmc_modereg_params3.s.odt_pd          =
4493         //lmc_modereg_params3.s.ca_par_pers     =
4494         //lmc_modereg_params3.s.dm              =
4495         //lmc_modereg_params3.s.wr_dbi          =
4496         //lmc_modereg_params3.s.rd_dbi          =
4497         lmc_modereg_params3.s.tccd_l            = max(divide_roundup(ddr4_tCCD_Lmin, tclk_psecs), 5ull) - 4;
4498         //lmc_modereg_params3.s.lpasr           =
4499         //lmc_modereg_params3.s.crc             =
4500         //lmc_modereg_params3.s.gd              =
4501         //lmc_modereg_params3.s.pda             =
4502         //lmc_modereg_params3.s.temp_sense      =
4503         //lmc_modereg_params3.s.fgrm            =
4504         //lmc_modereg_params3.s.wr_cmd_lat      =
4505         //lmc_modereg_params3.s.mpr_fmt         =
4506 
4507         if (!CAVIUM_IS_MODEL(CAVIUM_CN88XX_PASS1_X)) {
4508             int delay = 0;
4509             if ((lranks_per_prank == 4) && (ddr_hertz >= 1000000000))
4510                 delay = 1;
4511             lmc_modereg_params3.s.xrank_add_tccd_l = delay;
4512             lmc_modereg_params3.s.xrank_add_tccd_s = delay;
4513         }
4514 
4515         ddr_print("MODEREG_PARAMS3                               : 0x%016llx\n", lmc_modereg_params3.u);
4516         DRAM_CSR_WRITE(node, BDK_LMCX_MODEREG_PARAMS3(ddr_interface_num), lmc_modereg_params3.u);
4517     } /* LMC(0)_MODEREG_PARAMS3 */
4518 
4519     /* LMC(0)_NXM */
4520     {
4521         bdk_lmcx_nxm_t lmc_nxm;
4522         int num_bits = row_lsb + row_bits + lranks_bits - 26;
4523         lmc_nxm.u = BDK_CSR_READ(node, BDK_LMCX_NXM(ddr_interface_num));
4524 
4525         if (rank_mask & 0x1)
4526             lmc_nxm.s.mem_msb_d0_r0 = num_bits;
4527         if (rank_mask & 0x2)
4528             lmc_nxm.s.mem_msb_d0_r1 = num_bits;
4529         if (rank_mask & 0x4)
4530             lmc_nxm.s.mem_msb_d1_r0 = num_bits;
4531         if (rank_mask & 0x8)
4532             lmc_nxm.s.mem_msb_d1_r1 = num_bits;
4533 
4534         lmc_nxm.s.cs_mask = ~rank_mask & 0xff; /* Set the mask for non-existant ranks. */
4535 
4536        if ((s = lookup_env_parameter_ull("ddr_nxm")) != NULL) {
4537             lmc_nxm.u    = strtoull(s, NULL, 0);
4538         }
4539         ddr_print("LMC_NXM                                       : 0x%016llx\n", lmc_nxm.u);
4540         DRAM_CSR_WRITE(node, BDK_LMCX_NXM(ddr_interface_num), lmc_nxm.u);
4541     }
4542 
4543     /* LMC(0)_WODT_MASK */
4544     {
4545         bdk_lmcx_wodt_mask_t lmc_wodt_mask;
4546         lmc_wodt_mask.u = odt_config[odt_idx].odt_mask;
4547 
4548         if ((s = lookup_env_parameter_ull("ddr_wodt_mask")) != NULL) {
4549             lmc_wodt_mask.u    = strtoull(s, NULL, 0);
4550         }
4551 
4552         ddr_print("WODT_MASK                                     : 0x%016llx\n", lmc_wodt_mask.u);
4553         DRAM_CSR_WRITE(node, BDK_LMCX_WODT_MASK(ddr_interface_num), lmc_wodt_mask.u);
4554     }
4555 
4556     /* LMC(0)_RODT_MASK */
4557     {
4558         int rankx;
4559         bdk_lmcx_rodt_mask_t lmc_rodt_mask;
4560         lmc_rodt_mask.u = odt_config[odt_idx].rodt_ctl;
4561 
4562         if ((s = lookup_env_parameter_ull("ddr_rodt_mask")) != NULL) {
4563             lmc_rodt_mask.u    = strtoull(s, NULL, 0);
4564         }
4565 
4566         ddr_print("%-45s : 0x%016llx\n", "RODT_MASK", lmc_rodt_mask.u);
4567         DRAM_CSR_WRITE(node, BDK_LMCX_RODT_MASK(ddr_interface_num), lmc_rodt_mask.u);
4568 
4569         dyn_rtt_nom_mask = 0;
4570         for (rankx = 0; rankx < dimm_count * 4;rankx++) {
4571             if (!(rank_mask & (1 << rankx)))
4572                 continue;
4573             dyn_rtt_nom_mask |= ((lmc_rodt_mask.u >> (8*rankx)) & 0xff);
4574         }
4575         if (num_ranks == 4) {
4576             /* Normally ODT1 is wired to rank 1. For quad-ranked DIMMs
4577                ODT1 is wired to the third rank (rank 2).  The mask,
4578                dyn_rtt_nom_mask, is used to indicate for which ranks
4579                to sweep RTT_NOM during read-leveling. Shift the bit
4580                from the ODT1 position over to the "ODT2" position so
4581                that the read-leveling analysis comes out right. */
4582             int odt1_bit = dyn_rtt_nom_mask & 2;
4583             dyn_rtt_nom_mask &= ~2;
4584             dyn_rtt_nom_mask |= odt1_bit<<1;
4585         }
4586         ddr_print("%-45s : 0x%02x\n", "DYN_RTT_NOM_MASK", dyn_rtt_nom_mask);
4587     }
4588 
4589     /* LMC(0)_COMP_CTL2 */
4590     {
4591         bdk_lmcx_comp_ctl2_t comp_ctl2;
4592 
4593         comp_ctl2.u = BDK_CSR_READ(node, BDK_LMCX_COMP_CTL2(ddr_interface_num));
4594 
4595         comp_ctl2.s.dqx_ctl        = odt_config[odt_idx].odt_ena;
4596         comp_ctl2.s.ck_ctl        = (custom_lmc_config->ck_ctl  == 0) ? 4 : custom_lmc_config->ck_ctl;  /* Default 4=34.3 ohm */
4597         comp_ctl2.s.cmd_ctl        = (custom_lmc_config->cmd_ctl == 0) ? 4 : custom_lmc_config->cmd_ctl; /* Default 4=34.3 ohm */
4598         comp_ctl2.s.control_ctl        = (custom_lmc_config->ctl_ctl == 0) ? 4 : custom_lmc_config->ctl_ctl; /* Default 4=34.3 ohm */
4599 
4600         // NOTE: these are now done earlier, in Step 6.9.3
4601         // comp_ctl2.s.ntune_offset    = 0;
4602         // comp_ctl2.s.ptune_offset    = 0;
4603 
4604         ddr_rodt_ctl_auto = custom_lmc_config->ddr_rodt_ctl_auto;
4605         if ((s = lookup_env_parameter("ddr_rodt_ctl_auto")) != NULL) {
4606             ddr_rodt_ctl_auto = !!strtoul(s, NULL, 0);
4607         }
4608 
4609         default_rodt_ctl = odt_config[odt_idx].qs_dic;
4610         if ((s = lookup_env_parameter("ddr_rodt_ctl")) == NULL)
4611             s = lookup_env_parameter("ddr%d_rodt_ctl", ddr_interface_num);
4612         if (s != NULL) {
4613             default_rodt_ctl    = strtoul(s, NULL, 0);
4614             ddr_rodt_ctl_auto = 0;
4615         }
4616 
4617         comp_ctl2.s.rodt_ctl = default_rodt_ctl;
4618 
4619         // if DDR4, force CK_CTL to 26 ohms if it is currently 34 ohms, and DCLK speed is 1 GHz or more...
4620         if ((ddr_type == DDR4_DRAM) && (comp_ctl2.s.ck_ctl == ddr4_driver_34_ohm) && (ddr_hertz >= 1000000000)) {
4621             comp_ctl2.s.ck_ctl = ddr4_driver_26_ohm; // lowest for DDR4 is 26 ohms
4622             ddr_print("Forcing DDR4 COMP_CTL2[CK_CTL] to %d, %d ohms\n", comp_ctl2.s.ck_ctl,
4623                       imp_values->drive_strength[comp_ctl2.s.ck_ctl]);
4624         }
4625 
4626         if ((s = lookup_env_parameter("ddr_ck_ctl")) != NULL) {
4627             comp_ctl2.s.ck_ctl  = strtoul(s, NULL, 0);
4628         }
4629 
4630         if ((s = lookup_env_parameter("ddr_cmd_ctl")) != NULL) {
4631             comp_ctl2.s.cmd_ctl  = strtoul(s, NULL, 0);
4632         }
4633 
4634         if ((s = lookup_env_parameter("ddr_control_ctl")) != NULL) {
4635             comp_ctl2.s.control_ctl  = strtoul(s, NULL, 0);
4636         }
4637 
4638         if ((s = lookup_env_parameter("ddr_dqx_ctl")) != NULL) {
4639             comp_ctl2.s.dqx_ctl  = strtoul(s, NULL, 0);
4640         }
4641 
4642         ddr_print("%-45s : %d, %d ohms\n", "DQX_CTL           ", comp_ctl2.s.dqx_ctl,
4643                   imp_values->dqx_strength  [comp_ctl2.s.dqx_ctl    ]);
4644         ddr_print("%-45s : %d, %d ohms\n", "CK_CTL            ", comp_ctl2.s.ck_ctl,
4645                   imp_values->drive_strength[comp_ctl2.s.ck_ctl     ]);
4646         ddr_print("%-45s : %d, %d ohms\n", "CMD_CTL           ", comp_ctl2.s.cmd_ctl,
4647                   imp_values->drive_strength[comp_ctl2.s.cmd_ctl    ]);
4648         ddr_print("%-45s : %d, %d ohms\n", "CONTROL_CTL       ", comp_ctl2.s.control_ctl,
4649                   imp_values->drive_strength[comp_ctl2.s.control_ctl]);
4650         ddr_print("Read ODT_CTL                                  : 0x%x (%d ohms)\n",
4651                   comp_ctl2.s.rodt_ctl, imp_values->rodt_ohms[comp_ctl2.s.rodt_ctl]);
4652 
4653         DRAM_CSR_WRITE(node, BDK_LMCX_COMP_CTL2(ddr_interface_num), comp_ctl2.u);
4654     }
4655 
4656     /* LMC(0)_PHY_CTL */
4657     {
4658         bdk_lmcx_phy_ctl_t lmc_phy_ctl;
4659         lmc_phy_ctl.u = BDK_CSR_READ(node, BDK_LMCX_PHY_CTL(ddr_interface_num));
4660         lmc_phy_ctl.s.ts_stagger           = 0;
4661 
4662         if (!CAVIUM_IS_MODEL(CAVIUM_CN88XX_PASS1_X) && (lranks_per_prank > 1)) {
4663             lmc_phy_ctl.cn81xx.c0_sel = lmc_phy_ctl.cn81xx.c1_sel = 2; // C0 is TEN, C1 is A17
4664             ddr_print("N%d.LMC%d: 3DS: setting PHY_CTL[cx_csel] = %d\n",
4665                       node, ddr_interface_num, lmc_phy_ctl.cn81xx.c1_sel);
4666         }
4667 
4668         ddr_print("PHY_CTL                                       : 0x%016llx\n", lmc_phy_ctl.u);
4669         DRAM_CSR_WRITE(node, BDK_LMCX_PHY_CTL(ddr_interface_num), lmc_phy_ctl.u);
4670     }
4671 
4672     /* LMC(0)_DIMM0/1_PARAMS */
4673     if (spd_rdimm) {
4674         bdk_lmcx_dimm_ctl_t lmc_dimm_ctl;
4675 
4676         for (didx = 0; didx < (unsigned)dimm_count; ++didx) {
4677             bdk_lmcx_dimmx_params_t lmc_dimmx_params;
4678             int dimm = didx;
4679             int rc;
4680 
4681             lmc_dimmx_params.u = BDK_CSR_READ(node, BDK_LMCX_DIMMX_PARAMS(ddr_interface_num, dimm));
4682 
4683 
4684             if (ddr_type == DDR4_DRAM) {
4685 
4686                 bdk_lmcx_dimmx_ddr4_params0_t lmc_dimmx_ddr4_params0;
4687                 bdk_lmcx_dimmx_ddr4_params1_t lmc_dimmx_ddr4_params1;
4688                 bdk_lmcx_ddr4_dimm_ctl_t lmc_ddr4_dimm_ctl;
4689 
4690                 lmc_dimmx_params.s.rc0  = 0;
4691                 lmc_dimmx_params.s.rc1  = 0;
4692                 lmc_dimmx_params.s.rc2  = 0;
4693 
4694                 rc = read_spd(node, &dimm_config_table[didx], DDR4_SPD_RDIMM_REGISTER_DRIVE_STRENGTH_CTL);
4695                 lmc_dimmx_params.s.rc3  = (rc >> 4) & 0xf;
4696                 lmc_dimmx_params.s.rc4  = ((rc >> 0) & 0x3) << 2;
4697                 lmc_dimmx_params.s.rc4 |= ((rc >> 2) & 0x3) << 0;
4698 
4699                 rc = read_spd(node, &dimm_config_table[didx], DDR4_SPD_RDIMM_REGISTER_DRIVE_STRENGTH_CK);
4700                 lmc_dimmx_params.s.rc5  = ((rc >> 0) & 0x3) << 2;
4701                 lmc_dimmx_params.s.rc5 |= ((rc >> 2) & 0x3) << 0;
4702 
4703                 lmc_dimmx_params.s.rc6  = 0;
4704                 lmc_dimmx_params.s.rc7  = 0;
4705                 lmc_dimmx_params.s.rc8  = 0;
4706                 lmc_dimmx_params.s.rc9  = 0;
4707 
4708                 /*
4709                 ** rc10               DDR4 RDIMM Operating Speed
4710                 ** ====   =========================================================
4711                 **  0                 tclk_psecs >= 1250 psec DDR4-1600 (1250 ps)
4712                 **  1     1250 psec > tclk_psecs >= 1071 psec DDR4-1866 (1071 ps)
4713                 **  2     1071 psec > tclk_psecs >=  938 psec DDR4-2133 ( 938 ps)
4714                 **  3      938 psec > tclk_psecs >=  833 psec DDR4-2400 ( 833 ps)
4715                 **  4      833 psec > tclk_psecs >=  750 psec DDR4-2666 ( 750 ps)
4716                 **  5      750 psec > tclk_psecs >=  625 psec DDR4-3200 ( 625 ps)
4717                 */
4718                 lmc_dimmx_params.s.rc10        = 0;
4719                 if (1250 > tclk_psecs)
4720                     lmc_dimmx_params.s.rc10    = 1;
4721                 if (1071 > tclk_psecs)
4722                     lmc_dimmx_params.s.rc10    = 2;
4723                 if (938 > tclk_psecs)
4724                     lmc_dimmx_params.s.rc10    = 3;
4725                 if (833 > tclk_psecs)
4726                     lmc_dimmx_params.s.rc10    = 4;
4727                 if (750 > tclk_psecs)
4728                     lmc_dimmx_params.s.rc10    = 5;
4729 
4730                 lmc_dimmx_params.s.rc11 = 0;
4731                 lmc_dimmx_params.s.rc12 = 0;
4732                 lmc_dimmx_params.s.rc13 = (spd_dimm_type == 4) ? 0 : 4; /* 0=LRDIMM, 1=RDIMM */
4733                 lmc_dimmx_params.s.rc13 |= (ddr_type == DDR4_DRAM) ? (spd_addr_mirror << 3) : 0;
4734                 lmc_dimmx_params.s.rc14 = 0;
4735                 //lmc_dimmx_params.s.rc15 = 4; /* 0 nCK latency adder */
4736                 lmc_dimmx_params.s.rc15 = 0; /* 1 nCK latency adder */
4737 
4738                 lmc_dimmx_ddr4_params0.u = 0;
4739 
4740                 lmc_dimmx_ddr4_params0.s.rc8x = 0;
4741                 lmc_dimmx_ddr4_params0.s.rc7x = 0;
4742                 lmc_dimmx_ddr4_params0.s.rc6x = 0;
4743                 lmc_dimmx_ddr4_params0.s.rc5x = 0;
4744                 lmc_dimmx_ddr4_params0.s.rc4x = 0;
4745 
4746                 lmc_dimmx_ddr4_params0.s.rc3x = compute_rc3x(tclk_psecs);
4747 
4748                 lmc_dimmx_ddr4_params0.s.rc2x = 0;
4749                 lmc_dimmx_ddr4_params0.s.rc1x = 0;
4750 
4751                 lmc_dimmx_ddr4_params1.u = 0;
4752 
4753                 lmc_dimmx_ddr4_params1.s.rcbx = 0;
4754                 lmc_dimmx_ddr4_params1.s.rcax = 0;
4755                 lmc_dimmx_ddr4_params1.s.rc9x = 0;
4756 
4757                 lmc_ddr4_dimm_ctl.u = 0;
4758                 lmc_ddr4_dimm_ctl.s.ddr4_dimm0_wmask = 0x004;
4759                 lmc_ddr4_dimm_ctl.s.ddr4_dimm1_wmask = (dimm_count > 1) ? 0x004 : 0x0000;
4760 
4761                 /*
4762                  * Handle any overrides from envvars here...
4763                  */
4764                 if ((s = lookup_env_parameter("ddr_ddr4_params0")) != NULL) {
4765                     lmc_dimmx_ddr4_params0.u = strtoul(s, NULL, 0);
4766                 }
4767 
4768                 if ((s = lookup_env_parameter("ddr_ddr4_params1")) != NULL) {
4769                     lmc_dimmx_ddr4_params1.u = strtoul(s, NULL, 0);
4770                 }
4771 
4772                 if ((s = lookup_env_parameter("ddr_ddr4_dimm_ctl")) != NULL) {
4773                     lmc_ddr4_dimm_ctl.u = strtoul(s, NULL, 0);
4774                 }
4775 
4776                 for (i=0; i<11; ++i) {
4777                     uint64_t value;
4778                     if ((s = lookup_env_parameter("ddr_ddr4_rc%1xx", i+1)) != NULL) {
4779                         value = strtoul(s, NULL, 0);
4780                         if (i < 8) {
4781                             lmc_dimmx_ddr4_params0.u &= ~((uint64_t)0xff << (i*8));
4782                             lmc_dimmx_ddr4_params0.u |=           (value << (i*8));
4783                         } else {
4784                             lmc_dimmx_ddr4_params1.u &= ~((uint64_t)0xff << ((i-8)*8));
4785                             lmc_dimmx_ddr4_params1.u |=           (value << ((i-8)*8));
4786                         }
4787                     }
4788                 }
4789 
4790                 /*
4791                  * write the final CSR values
4792                  */
4793                 DRAM_CSR_WRITE(node, BDK_LMCX_DIMMX_DDR4_PARAMS0(ddr_interface_num, dimm), lmc_dimmx_ddr4_params0.u);
4794 
4795                 DRAM_CSR_WRITE(node, BDK_LMCX_DDR4_DIMM_CTL(ddr_interface_num), lmc_ddr4_dimm_ctl.u);
4796 
4797                 DRAM_CSR_WRITE(node, BDK_LMCX_DIMMX_DDR4_PARAMS1(ddr_interface_num, dimm), lmc_dimmx_ddr4_params1.u);
4798 
4799                 ddr_print("DIMM%d Register Control Words        RCBx:RC1x : %x %x %x %x %x %x %x %x %x %x %x\n",
4800                           dimm,
4801                           lmc_dimmx_ddr4_params1.s.rcbx,
4802                           lmc_dimmx_ddr4_params1.s.rcax,
4803                           lmc_dimmx_ddr4_params1.s.rc9x,
4804                           lmc_dimmx_ddr4_params0.s.rc8x,
4805                           lmc_dimmx_ddr4_params0.s.rc7x,
4806                           lmc_dimmx_ddr4_params0.s.rc6x,
4807                           lmc_dimmx_ddr4_params0.s.rc5x,
4808                           lmc_dimmx_ddr4_params0.s.rc4x,
4809                           lmc_dimmx_ddr4_params0.s.rc3x,
4810                           lmc_dimmx_ddr4_params0.s.rc2x,
4811                           lmc_dimmx_ddr4_params0.s.rc1x );
4812 
4813             } else { /* if (ddr_type == DDR4_DRAM) */
4814                 rc = read_spd(node, &dimm_config_table[didx], 69);
4815                 lmc_dimmx_params.s.rc0         = (rc >> 0) & 0xf;
4816                 lmc_dimmx_params.s.rc1         = (rc >> 4) & 0xf;
4817 
4818                 rc = read_spd(node, &dimm_config_table[didx], 70);
4819                 lmc_dimmx_params.s.rc2         = (rc >> 0) & 0xf;
4820                 lmc_dimmx_params.s.rc3         = (rc >> 4) & 0xf;
4821 
4822                 rc = read_spd(node, &dimm_config_table[didx], 71);
4823                 lmc_dimmx_params.s.rc4         = (rc >> 0) & 0xf;
4824                 lmc_dimmx_params.s.rc5         = (rc >> 4) & 0xf;
4825 
4826                 rc = read_spd(node, &dimm_config_table[didx], 72);
4827                 lmc_dimmx_params.s.rc6         = (rc >> 0) & 0xf;
4828                 lmc_dimmx_params.s.rc7         = (rc >> 4) & 0xf;
4829 
4830                 rc = read_spd(node, &dimm_config_table[didx], 73);
4831                 lmc_dimmx_params.s.rc8         = (rc >> 0) & 0xf;
4832                 lmc_dimmx_params.s.rc9         = (rc >> 4) & 0xf;
4833 
4834                 rc = read_spd(node, &dimm_config_table[didx], 74);
4835                 lmc_dimmx_params.s.rc10        = (rc >> 0) & 0xf;
4836                 lmc_dimmx_params.s.rc11        = (rc >> 4) & 0xf;
4837 
4838                 rc = read_spd(node, &dimm_config_table[didx], 75);
4839                 lmc_dimmx_params.s.rc12        = (rc >> 0) & 0xf;
4840                 lmc_dimmx_params.s.rc13        = (rc >> 4) & 0xf;
4841 
4842                 rc = read_spd(node, &dimm_config_table[didx], 76);
4843                 lmc_dimmx_params.s.rc14        = (rc >> 0) & 0xf;
4844                 lmc_dimmx_params.s.rc15        = (rc >> 4) & 0xf;
4845 
4846 
4847                 if ((s = lookup_env_parameter("ddr_clk_drive")) != NULL) {
4848                     if (strcmp(s,"light") == 0) {
4849                         lmc_dimmx_params.s.rc5         = 0x0; /* Light Drive */
4850                     }
4851                     if (strcmp(s,"moderate") == 0) {
4852                         lmc_dimmx_params.s.rc5         = 0x5; /* Moderate Drive */
4853                     }
4854                     if (strcmp(s,"strong") == 0) {
4855                         lmc_dimmx_params.s.rc5         = 0xA; /* Strong Drive */
4856                     }
4857                 }
4858 
4859                 if ((s = lookup_env_parameter("ddr_cmd_drive")) != NULL) {
4860                     if (strcmp(s,"light") == 0) {
4861                         lmc_dimmx_params.s.rc3         = 0x0; /* Light Drive */
4862                     }
4863                     if (strcmp(s,"moderate") == 0) {
4864                         lmc_dimmx_params.s.rc3         = 0x5; /* Moderate Drive */
4865                     }
4866                     if (strcmp(s,"strong") == 0) {
4867                         lmc_dimmx_params.s.rc3         = 0xA; /* Strong Drive */
4868                     }
4869                 }
4870 
4871                 if ((s = lookup_env_parameter("ddr_ctl_drive")) != NULL) {
4872                     if (strcmp(s,"light") == 0) {
4873                         lmc_dimmx_params.s.rc4         = 0x0; /* Light Drive */
4874                     }
4875                     if (strcmp(s,"moderate") == 0) {
4876                         lmc_dimmx_params.s.rc4         = 0x5; /* Moderate Drive */
4877                     }
4878                 }
4879 
4880 
4881                 /*
4882                 ** rc10               DDR3 RDIMM Operating Speed
4883                 ** ====   =========================================================
4884                 **  0                 tclk_psecs >= 2500 psec DDR3/DDR3L-800 (default)
4885                 **  1     2500 psec > tclk_psecs >= 1875 psec DDR3/DDR3L-1066
4886                 **  2     1875 psec > tclk_psecs >= 1500 psec DDR3/DDR3L-1333
4887                 **  3     1500 psec > tclk_psecs >= 1250 psec DDR3/DDR3L-1600
4888                 **  4     1250 psec > tclk_psecs >= 1071 psec DDR3-1866
4889                 */
4890                 lmc_dimmx_params.s.rc10        = 0;
4891                 if (2500 > tclk_psecs)
4892                     lmc_dimmx_params.s.rc10    = 1;
4893                 if (1875 > tclk_psecs)
4894                     lmc_dimmx_params.s.rc10    = 2;
4895                 if (1500 > tclk_psecs)
4896                     lmc_dimmx_params.s.rc10    = 3;
4897                 if (1250 > tclk_psecs)
4898                     lmc_dimmx_params.s.rc10    = 4;
4899 
4900             } /* if (ddr_type == DDR4_DRAM) */
4901 
4902             if ((s = lookup_env_parameter("ddr_dimmx_params")) != NULL) {
4903                 lmc_dimmx_params.u = strtoul(s, NULL, 0);
4904             }
4905 
4906             for (i=0; i<16; ++i) {
4907                 uint64_t value;
4908                 if ((s = lookup_env_parameter("ddr_rc%d", i)) != NULL) {
4909                     value = strtoul(s, NULL, 0);
4910                     lmc_dimmx_params.u &= ~((uint64_t)0xf << (i*4));
4911                     lmc_dimmx_params.u |=           (  value << (i*4));
4912                 }
4913             }
4914 
4915             DRAM_CSR_WRITE(node, BDK_LMCX_DIMMX_PARAMS(ddr_interface_num, dimm), lmc_dimmx_params.u);
4916 
4917             ddr_print("DIMM%d Register Control Words         RC15:RC0 : %x %x %x %x %x %x %x %x %x %x %x %x %x %x %x %x\n",
4918                       dimm,
4919                       lmc_dimmx_params.s.rc15,
4920                       lmc_dimmx_params.s.rc14,
4921                       lmc_dimmx_params.s.rc13,
4922                       lmc_dimmx_params.s.rc12,
4923                       lmc_dimmx_params.s.rc11,
4924                       lmc_dimmx_params.s.rc10,
4925                       lmc_dimmx_params.s.rc9 ,
4926                       lmc_dimmx_params.s.rc8 ,
4927                       lmc_dimmx_params.s.rc7 ,
4928                       lmc_dimmx_params.s.rc6 ,
4929                       lmc_dimmx_params.s.rc5 ,
4930                       lmc_dimmx_params.s.rc4 ,
4931                       lmc_dimmx_params.s.rc3 ,
4932                       lmc_dimmx_params.s.rc2 ,
4933                       lmc_dimmx_params.s.rc1 ,
4934                       lmc_dimmx_params.s.rc0 );
4935         } /* for didx */
4936 
4937         if (ddr_type == DDR4_DRAM) {
4938 
4939             /* LMC0_DIMM_CTL */
4940             lmc_dimm_ctl.u = BDK_CSR_READ(node, BDK_LMCX_DIMM_CTL(ddr_interface_num));
4941             lmc_dimm_ctl.s.dimm0_wmask         = 0xdf3f;
4942             lmc_dimm_ctl.s.dimm1_wmask         = (dimm_count > 1) ? 0xdf3f : 0x0000;
4943             lmc_dimm_ctl.s.tcws                = 0x4e0;
4944             lmc_dimm_ctl.cn88xx.parity         = custom_lmc_config->parity;
4945 
4946             if ((s = lookup_env_parameter("ddr_dimm0_wmask")) != NULL) {
4947                 lmc_dimm_ctl.s.dimm0_wmask    = strtoul(s, NULL, 0);
4948             }
4949 
4950             if ((s = lookup_env_parameter("ddr_dimm1_wmask")) != NULL) {
4951                 lmc_dimm_ctl.s.dimm1_wmask    = strtoul(s, NULL, 0);
4952             }
4953 
4954             if ((s = lookup_env_parameter("ddr_dimm_ctl_parity")) != NULL) {
4955                 lmc_dimm_ctl.cn88xx.parity = strtoul(s, NULL, 0);
4956             }
4957 
4958             if ((s = lookup_env_parameter("ddr_dimm_ctl_tcws")) != NULL) {
4959                 lmc_dimm_ctl.s.tcws = strtoul(s, NULL, 0);
4960             }
4961 
4962             ddr_print("LMC DIMM_CTL                                  : 0x%016llx\n", lmc_dimm_ctl.u);
4963             DRAM_CSR_WRITE(node, BDK_LMCX_DIMM_CTL(ddr_interface_num), lmc_dimm_ctl.u);
4964 
4965             perform_octeon3_ddr3_sequence(node, rank_mask,
4966                                           ddr_interface_num, 0x7 ); /* Init RCW */
4967 
4968             /* Write RC0D last */
4969             lmc_dimm_ctl.s.dimm0_wmask         = 0x2000;
4970             lmc_dimm_ctl.s.dimm1_wmask         = (dimm_count > 1) ? 0x2000 : 0x0000;
4971             ddr_print("LMC DIMM_CTL                                  : 0x%016llx\n", lmc_dimm_ctl.u);
4972             DRAM_CSR_WRITE(node, BDK_LMCX_DIMM_CTL(ddr_interface_num), lmc_dimm_ctl.u);
4973 
4974             /* Don't write any extended registers the second time */
4975             DRAM_CSR_WRITE(node, BDK_LMCX_DDR4_DIMM_CTL(ddr_interface_num), 0);
4976 
4977             perform_octeon3_ddr3_sequence(node, rank_mask,
4978                                           ddr_interface_num, 0x7 ); /* Init RCW */
4979         } else {
4980 
4981             /* LMC0_DIMM_CTL */
4982             lmc_dimm_ctl.u = BDK_CSR_READ(node, BDK_LMCX_DIMM_CTL(ddr_interface_num));
4983             lmc_dimm_ctl.s.dimm0_wmask         = 0xffff;
4984             lmc_dimm_ctl.s.dimm1_wmask         = (dimm_count > 1) ? 0xffff : 0x0000;
4985             lmc_dimm_ctl.s.tcws                = 0x4e0;
4986             lmc_dimm_ctl.cn88xx.parity         = custom_lmc_config->parity;
4987 
4988             if ((s = lookup_env_parameter("ddr_dimm0_wmask")) != NULL) {
4989                 lmc_dimm_ctl.s.dimm0_wmask    = strtoul(s, NULL, 0);
4990             }
4991 
4992             if ((s = lookup_env_parameter("ddr_dimm1_wmask")) != NULL) {
4993                 lmc_dimm_ctl.s.dimm1_wmask    = strtoul(s, NULL, 0);
4994             }
4995 
4996             if ((s = lookup_env_parameter("ddr_dimm_ctl_parity")) != NULL) {
4997                 lmc_dimm_ctl.cn88xx.parity = strtoul(s, NULL, 0);
4998             }
4999 
5000             if ((s = lookup_env_parameter("ddr_dimm_ctl_tcws")) != NULL) {
5001                 lmc_dimm_ctl.s.tcws = strtoul(s, NULL, 0);
5002             }
5003 
5004             ddr_print("LMC DIMM_CTL                                : 0x%016llx\n", lmc_dimm_ctl.u);
5005             DRAM_CSR_WRITE(node, BDK_LMCX_DIMM_CTL(ddr_interface_num), lmc_dimm_ctl.u);
5006 
5007             perform_octeon3_ddr3_sequence(node, rank_mask,
5008                                           ddr_interface_num, 0x7 ); /* Init RCW */
5009         }
5010     } else { /* if (spd_rdimm) */
5011         /* Disable register control writes for unbuffered */
5012         bdk_lmcx_dimm_ctl_t lmc_dimm_ctl;
5013         lmc_dimm_ctl.u = BDK_CSR_READ(node, BDK_LMCX_DIMM_CTL(ddr_interface_num));
5014         lmc_dimm_ctl.s.dimm0_wmask         = 0;
5015         lmc_dimm_ctl.s.dimm1_wmask         = 0;
5016         DRAM_CSR_WRITE(node, BDK_LMCX_DIMM_CTL(ddr_interface_num), lmc_dimm_ctl.u);
5017     } /* if (spd_rdimm) */
5018 
5019     /*
5020      * Comments (steps 3 through 5) continue in perform_octeon3_ddr3_sequence()
5021      */
5022     {
5023         bdk_lmcx_modereg_params0_t lmc_modereg_params0;
5024 
5025         if (ddr_memory_preserved(node)) {
5026             /* Contents are being preserved. Take DRAM out of
5027                self-refresh first. Then init steps can procede
5028                normally */
5029             perform_octeon3_ddr3_sequence(node, rank_mask,
5030                                           ddr_interface_num, 3); /* self-refresh exit */
5031         }
5032 
5033         lmc_modereg_params0.u = BDK_CSR_READ(node, BDK_LMCX_MODEREG_PARAMS0(ddr_interface_num));
5034 
5035         lmc_modereg_params0.s.dllr = 1; /* Set during first init sequence */
5036         DRAM_CSR_WRITE(node, BDK_LMCX_MODEREG_PARAMS0(ddr_interface_num), lmc_modereg_params0.u);
5037 
5038         perform_ddr_init_sequence(node, rank_mask, ddr_interface_num);
5039 
5040         lmc_modereg_params0.s.dllr = 0; /* Clear for normal operation */
5041         DRAM_CSR_WRITE(node, BDK_LMCX_MODEREG_PARAMS0(ddr_interface_num), lmc_modereg_params0.u);
5042     }
5043 
5044     // NOTE: this must be done for pass 2.x and pass 1.x
5045     if ((spd_rdimm) && (ddr_type == DDR4_DRAM)) {
5046         VB_PRT(VBL_FAE, "Running init sequence 1\n");
5047         change_rdimm_mpr_pattern(node, rank_mask, ddr_interface_num, dimm_count);
5048     }
5049 
5050 #define DEFAULT_INTERNAL_VREF_TRAINING_LIMIT 5
5051     int internal_retries = 0;
5052     int deskew_training_errors;
5053     int dac_eval_retries;
5054     int dac_settings[9];
5055     int num_samples;
5056     int sample, lane;
5057     int last_lane = ((ddr_interface_64b) ? 8 : 4) + use_ecc;
5058 
5059 #define DEFAULT_DAC_SAMPLES 7 // originally was 5
5060 #define DAC_RETRIES_LIMIT   2
5061 
5062     typedef struct {
5063         int16_t bytes[DEFAULT_DAC_SAMPLES];
5064     } bytelane_sample_t;
5065     bytelane_sample_t lanes[9];
5066 
5067     memset(lanes, 0, sizeof(lanes));
5068 
5069     if ((ddr_type == DDR4_DRAM) && !CAVIUM_IS_MODEL(CAVIUM_CN88XX_PASS1_X)) {
5070         num_samples = DEFAULT_DAC_SAMPLES;
5071     } else {
5072         num_samples = 1; // if DDR3 or no ability to write DAC values
5073     }
5074 
5075  perform_internal_vref_training:
5076 
5077     for (sample = 0; sample < num_samples; sample++) {
5078 
5079         dac_eval_retries = 0;
5080 
5081         do { // make offset and internal vref training repeatable
5082 
5083             /* 6.9.8 LMC Offset Training
5084                LMC requires input-receiver offset training. */
5085             Perform_Offset_Training(node, rank_mask, ddr_interface_num);
5086 
5087             /* 6.9.9 LMC Internal Vref Training
5088                LMC requires input-reference-voltage training. */
5089             Perform_Internal_VREF_Training(node, rank_mask, ddr_interface_num);
5090 
5091             // read and maybe display the DAC values for a sample
5092             read_DAC_DBI_settings(node, ddr_interface_num, /*DAC*/1, dac_settings);
5093             if ((num_samples == 1) || dram_is_verbose(VBL_DEV)) {
5094                 display_DAC_DBI_settings(node, ddr_interface_num, /*DAC*/1, use_ecc,
5095                                          dac_settings, (char *)"Internal VREF");
5096             }
5097 
5098             // for DDR4, evaluate the DAC settings and retry if any issues
5099             if (ddr_type == DDR4_DRAM) {
5100                 if (evaluate_DAC_settings(ddr_interface_64b, use_ecc, dac_settings)) {
5101                     if (++dac_eval_retries > DAC_RETRIES_LIMIT) {
5102                         ddr_print("N%d.LMC%d: DDR4 internal VREF DAC settings: retries exhausted; continuing...\n",
5103                                   node, ddr_interface_num);
5104                     } else {
5105                         ddr_print("N%d.LMC%d: DDR4 internal VREF DAC settings inconsistent; retrying....\n",
5106                                   node, ddr_interface_num); // FIXME? verbosity!!!
5107                         continue;
5108                     }
5109                 }
5110                 if (num_samples > 1) { // taking multiple samples, otherwise do nothing
5111                     // good sample or exhausted retries, record it
5112                     for (lane = 0; lane < last_lane; lane++) {
5113                         lanes[lane].bytes[sample] = dac_settings[lane];
5114                     }
5115                 }
5116             }
5117             break; // done if DDR3, or good sample, or exhausted retries
5118 
5119         } while (1);
5120 
5121     } /* for (sample = 0; sample < num_samples; sample++) */
5122 
5123     if (num_samples > 1) {
5124         debug_print("N%d.LMC%d: DDR4 internal VREF DAC settings: processing multiple samples...\n",
5125                     node, ddr_interface_num);
5126 
5127         for (lane = 0; lane < last_lane; lane++) {
5128             dac_settings[lane] = process_samples_average(&lanes[lane].bytes[0], num_samples,
5129                                                          ddr_interface_num, lane);
5130         }
5131         display_DAC_DBI_settings(node, ddr_interface_num, /*DAC*/1, use_ecc, dac_settings, (char *)"Averaged VREF");
5132 
5133         // finally, write the final DAC values
5134         for (lane = 0; lane < last_lane; lane++) {
5135             load_dac_override(node, ddr_interface_num, dac_settings[lane], lane);
5136         }
5137     }
5138 
5139 #if DAC_OVERRIDE_EARLY
5140     // as a second step, after internal VREF training, before starting deskew training:
5141     // for DDR3 and THUNDER pass 2.x, override the DAC setting to 127
5142     if ((ddr_type == DDR3_DRAM) && !CAVIUM_IS_MODEL(CAVIUM_CN88XX_PASS1_X)) { // added 81xx and 83xx
5143         load_dac_override(node, ddr_interface_num, 127, /* all */0x0A);
5144         ddr_print("N%d.LMC%d: Overriding DDR3 internal VREF DAC settings to 127 (early).\n",
5145                   node, ddr_interface_num);
5146     }
5147 #endif
5148 
5149     /*
5150      * 6.9.10 LMC Read Deskew Training
5151      * LMC requires input-read-data deskew training.
5152      */
5153     if (! disable_deskew_training) {
5154 
5155         deskew_training_errors = Perform_Read_Deskew_Training(node, rank_mask, ddr_interface_num,
5156                                                          spd_rawcard_AorB, 0, ddr_interface_64b);
5157 
5158         // All the Deskew lock and saturation retries (may) have been done,
5159         //  but we ended up with nibble errors; so, as a last ditch effort,
5160         //  enable retries of the Internal Vref Training...
5161         if (deskew_training_errors) {
5162             if (internal_retries < DEFAULT_INTERNAL_VREF_TRAINING_LIMIT) {
5163                 internal_retries++;
5164                 VB_PRT(VBL_FAE, "N%d.LMC%d: Deskew training results still unsettled - retrying internal Vref training (%d)\n",
5165                        node, ddr_interface_num, internal_retries);
5166                 goto perform_internal_vref_training;
5167             } else {
5168                 VB_PRT(VBL_FAE, "N%d.LMC%d: Deskew training incomplete - %d retries exhausted, but continuing...\n",
5169                        node, ddr_interface_num, internal_retries);
5170             }
5171         }
5172 
5173         // FIXME: treat this as the final DSK print from now on, and print if VBL_NORM or above
5174         // also, save the results of the original training
5175         Validate_Read_Deskew_Training(node, rank_mask, ddr_interface_num, &deskew_training_results, VBL_NORM);
5176 
5177         // setup write bit-deskew if enabled...
5178         if (enable_write_deskew) {
5179             ddr_print("N%d.LMC%d: WRITE BIT-DESKEW feature enabled- going NEUTRAL.\n",
5180                       node, ddr_interface_num);
5181             Neutral_Write_Deskew_Setup(node, ddr_interface_num);
5182         } /* if (enable_write_deskew) */
5183 
5184     } /* if (! disable_deskew_training) */
5185 
5186 #if !DAC_OVERRIDE_EARLY
5187     // as a final step in internal VREF training, after deskew training but before HW WL:
5188     // for DDR3 and THUNDER pass 2.x, override the DAC setting to 127
5189     if ((ddr_type == DDR3_DRAM) && !CAVIUM_IS_MODEL(CAVIUM_CN88XX_PASS1_X)) { // added 81xx and 83xx
5190         load_dac_override(node, ddr_interface_num, 127, /* all */0x0A);
5191         ddr_print("N%d.LMC%d, Overriding DDR3 internal VREF DAC settings to 127 (late).\n",
5192                   node, ddr_interface_num);
5193     }
5194 #endif
5195 
5196 
5197     /* LMC(0)_EXT_CONFIG */
5198     {
5199         bdk_lmcx_ext_config_t ext_config;
5200         ext_config.u = BDK_CSR_READ(node, BDK_LMCX_EXT_CONFIG(ddr_interface_num));
5201         ext_config.s.vrefint_seq_deskew = 0;
5202         ext_config.s.read_ena_bprch = 1;
5203         ext_config.s.read_ena_fprch = 1;
5204         ext_config.s.drive_ena_fprch = 1;
5205         ext_config.s.drive_ena_bprch = 1;
5206         ext_config.s.invert_data = 0; // make sure this is OFF for all current chips
5207 
5208         if ((s = lookup_env_parameter("ddr_read_fprch")) != NULL) {
5209             ext_config.s.read_ena_fprch = strtoul(s, NULL, 0);
5210         }
5211         if ((s = lookup_env_parameter("ddr_read_bprch")) != NULL) {
5212             ext_config.s.read_ena_bprch = strtoul(s, NULL, 0);
5213         }
5214         if ((s = lookup_env_parameter("ddr_drive_fprch")) != NULL) {
5215             ext_config.s.drive_ena_fprch = strtoul(s, NULL, 0);
5216         }
5217         if ((s = lookup_env_parameter("ddr_drive_bprch")) != NULL) {
5218             ext_config.s.drive_ena_bprch = strtoul(s, NULL, 0);
5219         }
5220 
5221         if (!CAVIUM_IS_MODEL(CAVIUM_CN88XX_PASS1_X) && (lranks_per_prank > 1)) {
5222             ext_config.s.dimm0_cid = ext_config.s.dimm1_cid = lranks_bits;
5223             ddr_print("N%d.LMC%d: 3DS: setting EXT_CONFIG[dimmx_cid] = %d\n",
5224                       node, ddr_interface_num, ext_config.s.dimm0_cid);
5225         }
5226 
5227         DRAM_CSR_WRITE(node, BDK_LMCX_EXT_CONFIG(ddr_interface_num), ext_config.u);
5228         ddr_print("%-45s : 0x%016llx\n", "EXT_CONFIG", ext_config.u);
5229     }
5230 
5231 
5232     {
5233         int save_ref_zqcs_int;
5234         uint64_t temp_delay_usecs;
5235 
5236         lmc_config.u = BDK_CSR_READ(node, BDK_LMCX_CONFIG(ddr_interface_num));
5237 
5238         /* Temporarily select the minimum ZQCS interval and wait
5239            long enough for a few ZQCS calibrations to occur.  This
5240            should ensure that the calibration circuitry is
5241            stabilized before read/write leveling occurs. */
5242         save_ref_zqcs_int         = lmc_config.s.ref_zqcs_int;
5243         lmc_config.s.ref_zqcs_int = 1 | (32<<7); /* set smallest interval */
5244 
5245         DRAM_CSR_WRITE(node, BDK_LMCX_CONFIG(ddr_interface_num), lmc_config.u);
5246         BDK_CSR_READ(node, BDK_LMCX_CONFIG(ddr_interface_num));
5247 
5248         /* Compute an appropriate delay based on the current ZQCS
5249            interval. The delay should be long enough for the
5250            current ZQCS delay counter to expire plus ten of the
5251            minimum intarvals to ensure that some calibrations
5252            occur. */
5253         temp_delay_usecs = (((uint64_t)save_ref_zqcs_int >> 7)
5254                             * tclk_psecs * 100 * 512 * 128) / (10000*10000)
5255             + 10 * ((uint64_t)32 * tclk_psecs * 100 * 512 * 128) / (10000*10000);
5256 
5257         VB_PRT(VBL_FAE, "N%d.LMC%d: Waiting %lld usecs for ZQCS calibrations to start\n",
5258                 node, ddr_interface_num, temp_delay_usecs);
5259         bdk_wait_usec(temp_delay_usecs);
5260 
5261         lmc_config.s.ref_zqcs_int = save_ref_zqcs_int; /* Restore computed interval */
5262 
5263         DRAM_CSR_WRITE(node, BDK_LMCX_CONFIG(ddr_interface_num), lmc_config.u);
5264         BDK_CSR_READ(node, BDK_LMCX_CONFIG(ddr_interface_num));
5265     }
5266 
5267     /*
5268      * 6.9.11 LMC Write Leveling
5269      *
5270      * LMC supports an automatic write leveling like that described in the
5271      * JEDEC DDR3 specifications separately per byte-lane.
5272      *
5273      * All of DDR PLL, LMC CK, LMC DRESET, and early LMC initializations must
5274      * be completed prior to starting this LMC write-leveling sequence.
5275      *
5276      * There are many possible procedures that will write-level all the
5277      * attached DDR3 DRAM parts. One possibility is for software to simply
5278      * write the desired values into LMC(0)_WLEVEL_RANK(0..3). This section
5279      * describes one possible sequence that uses LMC's autowrite-leveling
5280      * capabilities.
5281      *
5282      * 1. If the DQS/DQ delays on the board may be more than the ADD/CMD
5283      *    delays, then ensure that LMC(0)_CONFIG[EARLY_DQX] is set at this
5284      *    point.
5285      *
5286      * Do the remaining steps 2-7 separately for each rank i with attached
5287      * DRAM.
5288      *
5289      * 2. Write LMC(0)_WLEVEL_RANKi = 0.
5290      *
5291      * 3. For ×8 parts:
5292      *
5293      *    Without changing any other fields in LMC(0)_WLEVEL_CTL, write
5294      *    LMC(0)_WLEVEL_CTL[LANEMASK] to select all byte lanes with attached
5295      *    DRAM.
5296      *
5297      *    For ×16 parts:
5298      *
5299      *    Without changing any other fields in LMC(0)_WLEVEL_CTL, write
5300      *    LMC(0)_WLEVEL_CTL[LANEMASK] to select all even byte lanes with
5301      *    attached DRAM.
5302      *
5303      * 4. Without changing any other fields in LMC(0)_CONFIG,
5304      *
5305      *    o write LMC(0)_SEQ_CTL[SEQ_SEL] to select write-leveling
5306      *
5307      *    o write LMC(0)_CONFIG[RANKMASK] = (1 << i)
5308      *
5309      *    o write LMC(0)_SEQ_CTL[INIT_START] = 1
5310      *
5311      *    LMC will initiate write-leveling at this point. Assuming
5312      *    LMC(0)_WLEVEL_CTL [SSET] = 0, LMC first enables write-leveling on
5313      *    the selected DRAM rank via a DDR3 MR1 write, then sequences through
5314      *    and accumulates write-leveling results for eight different delay
5315      *    settings twice, starting at a delay of zero in this case since
5316      *    LMC(0)_WLEVEL_RANKi[BYTE*<4:3>] = 0, increasing by 1/8 CK each
5317      *    setting, covering a total distance of one CK, then disables the
5318      *    write-leveling via another DDR3 MR1 write.
5319      *
5320      *    After the sequence through 16 delay settings is complete:
5321      *
5322      *    o LMC sets LMC(0)_WLEVEL_RANKi[STATUS] = 3
5323      *
5324      *    o LMC sets LMC(0)_WLEVEL_RANKi[BYTE*<2:0>] (for all ranks selected
5325      *      by LMC(0)_WLEVEL_CTL[LANEMASK]) to indicate the first write
5326      *      leveling result of 1 that followed result of 0 during the
5327      *      sequence, except that the LMC always writes
5328      *      LMC(0)_WLEVEL_RANKi[BYTE*<0>]=0.
5329      *
5330      *    o Software can read the eight write-leveling results from the first
5331      *      pass through the delay settings by reading
5332      *      LMC(0)_WLEVEL_DBG[BITMASK] (after writing
5333      *      LMC(0)_WLEVEL_DBG[BYTE]). (LMC does not retain the writeleveling
5334      *      results from the second pass through the eight delay
5335      *      settings. They should often be identical to the
5336      *      LMC(0)_WLEVEL_DBG[BITMASK] results, though.)
5337      *
5338      * 5. Wait until LMC(0)_WLEVEL_RANKi[STATUS] != 2.
5339      *
5340      *    LMC will have updated LMC(0)_WLEVEL_RANKi[BYTE*<2:0>] for all byte
5341      *    lanes selected by LMC(0)_WLEVEL_CTL[LANEMASK] at this point.
5342      *    LMC(0)_WLEVEL_RANKi[BYTE*<4:3>] will still be the value that
5343      *    software wrote in substep 2 above, which is 0.
5344      *
5345      * 6. For ×16 parts:
5346      *
5347      *    Without changing any other fields in LMC(0)_WLEVEL_CTL, write
5348      *    LMC(0)_WLEVEL_CTL[LANEMASK] to select all odd byte lanes with
5349      *    attached DRAM.
5350      *
5351      *    Repeat substeps 4 and 5 with this new LMC(0)_WLEVEL_CTL[LANEMASK]
5352      *    setting. Skip to substep 7 if this has already been done.
5353      *
5354      *    For ×8 parts:
5355      *
5356      *    Skip this substep. Go to substep 7.
5357      *
5358      * 7. Calculate LMC(0)_WLEVEL_RANKi[BYTE*<4:3>] settings for all byte
5359      *    lanes on all ranks with attached DRAM.
5360      *
5361      *    At this point, all byte lanes on rank i with attached DRAM should
5362      *    have been write-leveled, and LMC(0)_WLEVEL_RANKi[BYTE*<2:0>] has
5363      *    the result for each byte lane.
5364      *
5365      *    But note that the DDR3 write-leveling sequence will only determine
5366      *    the delay modulo the CK cycle time, and cannot determine how many
5367      *    additional CK cycles of delay are present. Software must calculate
5368      *    the number of CK cycles, or equivalently, the
5369      *    LMC(0)_WLEVEL_RANKi[BYTE*<4:3>] settings.
5370      *
5371      *    This BYTE*<4:3> calculation is system/board specific.
5372      *
5373      * Many techniques can be used to calculate write-leveling BYTE*<4:3> values,
5374      * including:
5375      *
5376      *    o Known values for some byte lanes.
5377      *
5378      *    o Relative values for some byte lanes relative to others.
5379      *
5380      *    For example, suppose lane X is likely to require a larger
5381      *    write-leveling delay than lane Y. A BYTEX<2:0> value that is much
5382      *    smaller than the BYTEY<2:0> value may then indicate that the
5383      *    required lane X delay wrapped into the next CK, so BYTEX<4:3>
5384      *    should be set to BYTEY<4:3>+1.
5385      *
5386      *    When ECC DRAM is not present (i.e. when DRAM is not attached to the
5387      *    DDR_CBS_0_* and DDR_CB<7:0> chip signals, or the DDR_DQS_<4>_* and
5388      *    DDR_DQ<35:32> chip signals), write LMC(0)_WLEVEL_RANK*[BYTE8] =
5389      *    LMC(0)_WLEVEL_RANK*[BYTE0], using the final calculated BYTE0 value.
5390      *    Write LMC(0)_WLEVEL_RANK*[BYTE4] = LMC(0)_WLEVEL_RANK*[BYTE0],
5391      *    using the final calculated BYTE0 value.
5392      *
5393      * 8. Initialize LMC(0)_WLEVEL_RANK* values for all unused ranks.
5394      *
5395      *    Let rank i be a rank with attached DRAM.
5396      *
5397      *    For all ranks j that do not have attached DRAM, set
5398      *    LMC(0)_WLEVEL_RANKj = LMC(0)_WLEVEL_RANKi.
5399      */
5400     { // Start HW write-leveling block
5401 #pragma pack(push,1)
5402         bdk_lmcx_wlevel_ctl_t wlevel_ctl;
5403         bdk_lmcx_wlevel_rankx_t lmc_wlevel_rank;
5404         int rankx = 0;
5405         int wlevel_bitmask[9];
5406         int byte_idx;
5407         int ecc_ena;
5408         int ddr_wlevel_roundup = 0;
5409         int ddr_wlevel_printall = (dram_is_verbose(VBL_FAE)); // or default to 1 to print all HW WL samples
5410         int disable_hwl_validity = 0;
5411         int default_wlevel_rtt_nom;
5412 #if WODT_MASK_2R_1S
5413         uint64_t saved_wodt_mask = 0;
5414 #endif
5415 #pragma pack(pop)
5416 
5417         if (wlevel_loops)
5418             ddr_print("N%d.LMC%d: Performing Hardware Write-Leveling\n", node, ddr_interface_num);
5419         else {
5420             wlevel_bitmask_errors = 1; /* Force software write-leveling to run */
5421             ddr_print("N%d.LMC%d: Forcing software Write-Leveling\n", node, ddr_interface_num);
5422         }
5423 
5424         default_wlevel_rtt_nom = (ddr_type == DDR3_DRAM) ? rttnom_20ohm : ddr4_rttnom_40ohm ; /* FIXME? */
5425 
5426 #if WODT_MASK_2R_1S
5427         if ((ddr_type == DDR4_DRAM) && (num_ranks == 2) && (dimm_count == 1)) {
5428             /* LMC(0)_WODT_MASK */
5429             bdk_lmcx_wodt_mask_t lmc_wodt_mask;
5430             // always save original so we can always restore later
5431             saved_wodt_mask = BDK_CSR_READ(node, BDK_LMCX_WODT_MASK(ddr_interface_num));
5432             if ((s = lookup_env_parameter_ull("ddr_hwl_wodt_mask")) != NULL) {
5433                 lmc_wodt_mask.u = strtoull(s, NULL, 0);
5434                 if (lmc_wodt_mask.u != saved_wodt_mask) { // print/store only when diff
5435                     ddr_print("WODT_MASK                                     : 0x%016llx\n", lmc_wodt_mask.u);
5436                     DRAM_CSR_WRITE(node, BDK_LMCX_WODT_MASK(ddr_interface_num), lmc_wodt_mask.u);
5437                 }
5438             }
5439         }
5440 #endif /* WODT_MASK_2R_1S */
5441 
5442         lmc_config.u = BDK_CSR_READ(node, BDK_LMCX_CONFIG(ddr_interface_num));
5443         ecc_ena = lmc_config.s.ecc_ena;
5444 
5445         if ((s = lookup_env_parameter("ddr_wlevel_roundup")) != NULL) {
5446             ddr_wlevel_roundup = strtoul(s, NULL, 0);
5447         }
5448         if ((s = lookup_env_parameter("ddr_wlevel_printall")) != NULL) {
5449             ddr_wlevel_printall = strtoul(s, NULL, 0);
5450         }
5451 
5452         if ((s = lookup_env_parameter("ddr_disable_hwl_validity")) != NULL) {
5453             disable_hwl_validity = !!strtoul(s, NULL, 0);
5454         }
5455 
5456         if ((s = lookup_env_parameter("ddr_wlevel_rtt_nom")) != NULL) {
5457             default_wlevel_rtt_nom = strtoul(s, NULL, 0);
5458         }
5459 
5460         // For DDR3, we leave the WLEVEL_CTL fields at default settings
5461         // For DDR4, we touch WLEVEL_CTL fields OR_DIS or BITMASK here
5462         if (ddr_type == DDR4_DRAM) {
5463             int default_or_dis  = 1;
5464             int default_bitmask = 0xFF;
5465 
5466             // when x4, use only the lower nibble bits
5467             if (dram_width == 4) {
5468                 default_bitmask = 0x0F;
5469                 VB_PRT(VBL_DEV, "N%d.LMC%d: WLEVEL_CTL: default bitmask is 0x%2x for DDR4 x4\n",
5470                           node, ddr_interface_num, default_bitmask);
5471             }
5472 
5473             wlevel_ctl.u = BDK_CSR_READ(node, BDK_LMCX_WLEVEL_CTL(ddr_interface_num));
5474             wlevel_ctl.s.or_dis  = default_or_dis;
5475             wlevel_ctl.s.bitmask = default_bitmask;
5476 
5477             // allow overrides
5478             if ((s = lookup_env_parameter("ddr_wlevel_ctl_or_dis")) != NULL) {
5479                 wlevel_ctl.s.or_dis = !!strtoul(s, NULL, 0);
5480             }
5481             if ((s = lookup_env_parameter("ddr_wlevel_ctl_bitmask")) != NULL) {
5482                 wlevel_ctl.s.bitmask = strtoul(s, NULL, 0);
5483             }
5484 
5485             // print only if not defaults
5486             if ((wlevel_ctl.s.or_dis != default_or_dis) || (wlevel_ctl.s.bitmask != default_bitmask)) {
5487                 ddr_print("N%d.LMC%d: WLEVEL_CTL: or_dis=%d, bitmask=0x%02x\n",
5488                           node, ddr_interface_num, wlevel_ctl.s.or_dis, wlevel_ctl.s.bitmask);
5489             }
5490             // always write
5491             DRAM_CSR_WRITE(node, BDK_LMCX_WLEVEL_CTL(ddr_interface_num), wlevel_ctl.u);
5492         }
5493 
5494         // Start the hardware write-leveling loop per rank
5495         for (rankx = 0; rankx < dimm_count * 4; rankx++) {
5496 
5497             if (!(rank_mask & (1 << rankx)))
5498                 continue;
5499 
5500 #if HW_WL_MAJORITY
5501             // array to collect counts of byte-lane values
5502             // assume low-order 3 bits and even, so really only 2 bit values
5503             int wlevel_bytes[9][4];
5504             memset(wlevel_bytes, 0, sizeof(wlevel_bytes));
5505 #endif
5506 
5507             // restructure the looping so we can keep trying until we get the samples we want
5508             //for (int wloop = 0; wloop < wlevel_loops; wloop++) {
5509             int wloop = 0;
5510             int wloop_retries = 0; // retries per sample for HW-related issues with bitmasks or values
5511             int wloop_retries_total = 0;
5512             int wloop_retries_exhausted = 0;
5513 #define WLOOP_RETRIES_DEFAULT 5
5514             int wlevel_validity_errors;
5515             int wlevel_bitmask_errors_rank = 0;
5516             int wlevel_validity_errors_rank = 0;
5517 
5518             while (wloop < wlevel_loops) {
5519 
5520                 wlevel_ctl.u = BDK_CSR_READ(node, BDK_LMCX_WLEVEL_CTL(ddr_interface_num));
5521 
5522                 wlevel_ctl.s.rtt_nom = (default_wlevel_rtt_nom > 0) ? (default_wlevel_rtt_nom - 1) : 7;
5523 
5524 
5525                 DRAM_CSR_WRITE(node, BDK_LMCX_WLEVEL_RANKX(ddr_interface_num, rankx), 0); /* Clear write-level delays */
5526 
5527                 wlevel_bitmask_errors = 0; /* Reset error counters */
5528                 wlevel_validity_errors = 0;
5529 
5530                 for (byte_idx=0; byte_idx<9; ++byte_idx) {
5531                     wlevel_bitmask[byte_idx] = 0; /* Reset bitmasks */
5532                 }
5533 
5534 #if HWL_BY_BYTE // FIXME???
5535                 /* Make a separate pass for each byte to reduce power. */
5536                 for (byte_idx=0; byte_idx<(8+ecc_ena); ++byte_idx) {
5537 
5538                     if (!(ddr_interface_bytemask&(1<<byte_idx)))
5539                         continue;
5540 
5541                     wlevel_ctl.s.lanemask = (1<<byte_idx);
5542 
5543                     DRAM_CSR_WRITE(node, BDK_LMCX_WLEVEL_CTL(ddr_interface_num), wlevel_ctl.u);
5544 
5545                     /* Read and write values back in order to update the
5546                        status field. This insures that we read the updated
5547                        values after write-leveling has completed. */
5548                     DRAM_CSR_WRITE(node, BDK_LMCX_WLEVEL_RANKX(ddr_interface_num, rankx),
5549                                    BDK_CSR_READ(node, BDK_LMCX_WLEVEL_RANKX(ddr_interface_num, rankx)));
5550 
5551                     perform_octeon3_ddr3_sequence(node, 1 << rankx, ddr_interface_num, 6); /* write-leveling */
5552 
5553                     if (!bdk_is_platform(BDK_PLATFORM_ASIM) &&
5554                         BDK_CSR_WAIT_FOR_FIELD(node, BDK_LMCX_WLEVEL_RANKX(ddr_interface_num, rankx),
5555                                                status, ==, 3, 1000000))
5556                     {
5557                         error_print("ERROR: Timeout waiting for WLEVEL\n");
5558                     }
5559                     lmc_wlevel_rank.u = BDK_CSR_READ(node, BDK_LMCX_WLEVEL_RANKX(ddr_interface_num, rankx));
5560 
5561                     wlevel_bitmask[byte_idx] = octeon_read_lmcx_ddr3_wlevel_dbg(node, ddr_interface_num, byte_idx);
5562                     if (wlevel_bitmask[byte_idx] == 0)
5563                         ++wlevel_bitmask_errors;
5564                 } /* for (byte_idx=0; byte_idx<(8+ecc_ena); ++byte_idx) */
5565 
5566                 wlevel_ctl.s.lanemask = /*0x1ff*/ddr_interface_bytemask; // restore for RL
5567                 DRAM_CSR_WRITE(node, BDK_LMCX_WLEVEL_CTL(ddr_interface_num), wlevel_ctl.u);
5568 #else
5569                 // do all the byte-lanes at the same time
5570                 wlevel_ctl.s.lanemask = /*0x1ff*/ddr_interface_bytemask; // FIXME?
5571 
5572                 DRAM_CSR_WRITE(node, BDK_LMCX_WLEVEL_CTL(ddr_interface_num), wlevel_ctl.u);
5573 
5574                 /* Read and write values back in order to update the
5575                    status field. This insures that we read the updated
5576                    values after write-leveling has completed. */
5577                 DRAM_CSR_WRITE(node, BDK_LMCX_WLEVEL_RANKX(ddr_interface_num, rankx),
5578                                BDK_CSR_READ(node, BDK_LMCX_WLEVEL_RANKX(ddr_interface_num, rankx)));
5579 
5580                 perform_octeon3_ddr3_sequence(node, 1 << rankx, ddr_interface_num, 6); /* write-leveling */
5581 
5582                 if (BDK_CSR_WAIT_FOR_FIELD(node, BDK_LMCX_WLEVEL_RANKX(ddr_interface_num, rankx),
5583                                            status, ==, 3, 1000000))
5584                 {
5585                     error_print("ERROR: Timeout waiting for WLEVEL\n");
5586                 }
5587 
5588                 lmc_wlevel_rank.u = BDK_CSR_READ(node, BDK_LMCX_WLEVEL_RANKX(ddr_interface_num, rankx));
5589 
5590                 for (byte_idx=0; byte_idx<(8+ecc_ena); ++byte_idx) {
5591                     if (!(ddr_interface_bytemask&(1<<byte_idx)))
5592                         continue;
5593                     wlevel_bitmask[byte_idx] = octeon_read_lmcx_ddr3_wlevel_dbg(node, ddr_interface_num, byte_idx);
5594                     if (wlevel_bitmask[byte_idx] == 0)
5595                         ++wlevel_bitmask_errors;
5596                 } /* for (byte_idx=0; byte_idx<(8+ecc_ena); ++byte_idx) */
5597 #endif
5598 
5599                 // check validity only if no bitmask errors
5600                 if (wlevel_bitmask_errors == 0) {
5601                     if ((spd_dimm_type != 5) &&
5602                         (spd_dimm_type != 6) &&
5603                         (spd_dimm_type != 8) &&
5604                         (spd_dimm_type != 9) &&
5605                         (dram_width != 16)   &&
5606                         (ddr_interface_64b)  &&
5607                         !(disable_hwl_validity))
5608                     { // bypass if mini-[RU]DIMM or x16 or 32-bit or SO-[RU]DIMM
5609                         wlevel_validity_errors =
5610                             Validate_HW_WL_Settings(node, ddr_interface_num,
5611                                                     &lmc_wlevel_rank, ecc_ena);
5612                         wlevel_validity_errors_rank += (wlevel_validity_errors != 0);
5613                     }
5614                 } else
5615                     wlevel_bitmask_errors_rank++;
5616 
5617                 // before we print, if we had bitmask or validity errors, do a retry...
5618                 if ((wlevel_bitmask_errors != 0) || (wlevel_validity_errors != 0)) {
5619                     // VBL must be high to show the bad bitmaps or delays here also
5620                     if (dram_is_verbose(VBL_DEV2)) {
5621                         display_WL_BM(node, ddr_interface_num, rankx, wlevel_bitmask);
5622                         display_WL(node, ddr_interface_num, lmc_wlevel_rank, rankx);
5623                     }
5624                     if (wloop_retries < WLOOP_RETRIES_DEFAULT) {
5625                         wloop_retries++;
5626                         wloop_retries_total++;
5627                         // this printout is per-retry: only when VBL is high enough (DEV2?)
5628                         VB_PRT(VBL_DEV2, "N%d.LMC%d.R%d: H/W Write-Leveling had %s errors - retrying...\n",
5629                                   node, ddr_interface_num, rankx,
5630                                   (wlevel_bitmask_errors) ? "Bitmask" : "Validity");
5631                         continue; // this takes us back to the top without counting a sample
5632                     } else { // ran out of retries for this sample
5633                         // retries exhausted, do not print at normal VBL
5634                         VB_PRT(VBL_DEV2, "N%d.LMC%d.R%d: H/W Write-Leveling issues: %s errors\n",
5635                                   node, ddr_interface_num, rankx,
5636                                   (wlevel_bitmask_errors) ? "Bitmask" : "Validity");
5637                         wloop_retries_exhausted++;
5638                     }
5639                 }
5640                 // no errors or exhausted retries, use this sample
5641                 wloop_retries = 0; //reset for next sample
5642 
5643                 // when only 1 sample or forced, print the bitmasks first and current HW WL
5644                 if ((wlevel_loops == 1) || ddr_wlevel_printall) {
5645                     display_WL_BM(node, ddr_interface_num, rankx, wlevel_bitmask);
5646                     display_WL(node, ddr_interface_num, lmc_wlevel_rank, rankx);
5647                 }
5648 
5649                 if (ddr_wlevel_roundup) { /* Round up odd bitmask delays */
5650                     for (byte_idx=0; byte_idx<(8+ecc_ena); ++byte_idx) {
5651                         if (!(ddr_interface_bytemask&(1<<byte_idx)))
5652                             continue;
5653                         update_wlevel_rank_struct(&lmc_wlevel_rank,
5654                                                   byte_idx,
5655                                                   roundup_ddr3_wlevel_bitmask(wlevel_bitmask[byte_idx]));
5656                     } /* for (byte_idx=0; byte_idx<(8+ecc_ena); ++byte_idx) */
5657                     DRAM_CSR_WRITE(node, BDK_LMCX_WLEVEL_RANKX(ddr_interface_num, rankx), lmc_wlevel_rank.u);
5658                     display_WL(node, ddr_interface_num, lmc_wlevel_rank, rankx);
5659                 }
5660 
5661 #if HW_WL_MAJORITY
5662                 // OK, we have a decent sample, no bitmask or validity errors
5663                 for (byte_idx=0; byte_idx<(8+ecc_ena); ++byte_idx) {
5664                     if (!(ddr_interface_bytemask&(1<<byte_idx)))
5665                         continue;
5666                     // increment count of byte-lane value
5667                     int ix = (get_wlevel_rank_struct(&lmc_wlevel_rank, byte_idx) >> 1) & 3; // only 4 values
5668                     wlevel_bytes[byte_idx][ix]++;
5669                 } /* for (byte_idx=0; byte_idx<(8+ecc_ena); ++byte_idx) */
5670 #endif
5671 
5672                 wloop++; // if we get here, we have taken a decent sample
5673 
5674             } /* while (wloop < wlevel_loops) */
5675 
5676 #if HW_WL_MAJORITY
5677             // if we did sample more than once, try to pick a majority vote
5678             if (wlevel_loops > 1) {
5679                 // look for the majority in each byte-lane
5680                 for (byte_idx = 0; byte_idx < (8+ecc_ena); ++byte_idx) {
5681                     int mx = -1, mc = 0, xc = 0, cc = 0;
5682                     int ix, ic;
5683                     if (!(ddr_interface_bytemask&(1<<byte_idx)))
5684                         continue;
5685                     for (ix = 0; ix < 4; ix++) {
5686                         ic = wlevel_bytes[byte_idx][ix];
5687                         // make a bitmask of the ones with a count
5688                         if (ic > 0) {
5689                             mc |= (1 << ix);
5690                             cc++; // count how many had non-zero counts
5691                         }
5692                         // find the majority
5693                         if (ic > xc) { // new max?
5694                             xc = ic; // yes
5695                             mx = ix; // set its index
5696                         }
5697                     }
5698 #if SWL_TRY_HWL_ALT
5699                     // see if there was an alternate
5700                     int alts = (mc & ~(1 << mx)); // take out the majority choice
5701                     if (alts != 0) {
5702                         for (ix = 0; ix < 4; ix++) {
5703                             if (alts & (1 << ix)) { // FIXME: could be done multiple times? bad if so
5704                                 hwl_alts[rankx].hwl_alt_mask |= (1 << byte_idx); // set the mask
5705                                 hwl_alts[rankx].hwl_alt_delay[byte_idx] = ix << 1; // record the value
5706                                 VB_PRT(VBL_DEV, "N%d.LMC%d.R%d: SWL_TRY_HWL_ALT: Byte %d maj %d (%d) alt %d (%d).\n",
5707                                        node, ddr_interface_num, rankx, byte_idx, mx << 1, xc,
5708                                        ix << 1, wlevel_bytes[byte_idx][ix]);
5709                             }
5710                         }
5711                     } else {
5712                         debug_print("N%d.LMC%d.R%d: SWL_TRY_HWL_ALT: Byte %d maj %d alt NONE.\n",
5713                                     node, ddr_interface_num, rankx, byte_idx, mx << 1);
5714                     }
5715 #endif /* SWL_TRY_HWL_ALT */
5716                     if (cc > 2) { // unlikely, but...
5717                         // assume: counts for 3 indices are all 1
5718                         // possiblities are: 0/2/4, 2/4/6, 0/4/6, 0/2/6
5719                         // and the desired?:   2  ,   4  ,     6, 0
5720                         // we choose the middle, assuming one of the outliers is bad
5721                         // NOTE: this is an ugly hack at the moment; there must be a better way
5722                         switch (mc) {
5723                         case 0x7: mx = 1; break; // was 0/2/4, choose 2
5724                         case 0xb: mx = 0; break; // was 0/2/6, choose 0
5725                         case 0xd: mx = 3; break; // was 0/4/6, choose 6
5726                         case 0xe: mx = 2; break; // was 2/4/6, choose 4
5727                         default:
5728                         case 0xf: mx = 1; break; // was 0/2/4/6, choose 2?
5729                         }
5730                         error_print("N%d.LMC%d.R%d: HW WL MAJORITY: bad byte-lane %d (0x%x), using %d.\n",
5731                                   node, ddr_interface_num, rankx, byte_idx, mc, mx << 1);
5732                     }
5733                     update_wlevel_rank_struct(&lmc_wlevel_rank, byte_idx, mx << 1);
5734                 } /* for (byte_idx=0; byte_idx<(8+ecc_ena); ++byte_idx) */
5735 
5736                 DRAM_CSR_WRITE(node, BDK_LMCX_WLEVEL_RANKX(ddr_interface_num, rankx), lmc_wlevel_rank.u);
5737                 display_WL_with_final(node, ddr_interface_num, lmc_wlevel_rank, rankx);
5738             } /* if (wlevel_loops > 1) */
5739 #endif /* HW_WL_MAJORITY */
5740             // maybe print an error summary for the rank
5741             if ((wlevel_bitmask_errors_rank != 0) || (wlevel_validity_errors_rank != 0)) {
5742                 VB_PRT(VBL_FAE, "N%d.LMC%d.R%d: H/W Write-Leveling errors - %d bitmask, %d validity, %d retries, %d exhausted\n",
5743                        node, ddr_interface_num, rankx,
5744                        wlevel_bitmask_errors_rank, wlevel_validity_errors_rank,
5745                        wloop_retries_total, wloop_retries_exhausted);
5746             }
5747 
5748         } /* for (rankx = 0; rankx < dimm_count * 4;rankx++) */
5749 
5750 #if WODT_MASK_2R_1S
5751         if ((ddr_type == DDR4_DRAM) && (num_ranks == 2) && (dimm_count == 1)) {
5752             /* LMC(0)_WODT_MASK */
5753             bdk_lmcx_wodt_mask_t lmc_wodt_mask;
5754             // always read current so we can see if its different from saved
5755             lmc_wodt_mask.u = BDK_CSR_READ(node, BDK_LMCX_WODT_MASK(ddr_interface_num));
5756             if (lmc_wodt_mask.u != saved_wodt_mask) { // always restore what was saved if diff
5757                 lmc_wodt_mask.u = saved_wodt_mask;
5758                 ddr_print("WODT_MASK                                     : 0x%016llx\n", lmc_wodt_mask.u);
5759                 DRAM_CSR_WRITE(node, BDK_LMCX_WODT_MASK(ddr_interface_num), lmc_wodt_mask.u);
5760             }
5761         }
5762 #endif /* WODT_MASK_2R_1S */
5763 
5764     } // End HW write-leveling block
5765 
5766     // At the end of HW Write Leveling, check on some things...
5767     if (! disable_deskew_training) {
5768 
5769         deskew_counts_t dsk_counts;
5770         int retry_count = 0;
5771 
5772         VB_PRT(VBL_FAE, "N%d.LMC%d: Check Deskew Settings before Read-Leveling.\n", node, ddr_interface_num);
5773 
5774         do {
5775             Validate_Read_Deskew_Training(node, rank_mask, ddr_interface_num, &dsk_counts, VBL_FAE);
5776 
5777             // only RAWCARD A or B will not benefit from retraining if there's only saturation
5778             // or any rawcard if there is a nibble error
5779             if ((!spd_rawcard_AorB && dsk_counts.saturated > 0) ||
5780                 ((dsk_counts.nibrng_errs != 0) || (dsk_counts.nibunl_errs != 0)))
5781             {
5782                 retry_count++;
5783                 VB_PRT(VBL_FAE, "N%d.LMC%d: Deskew Status indicates saturation or nibble errors - retry %d Training.\n",
5784                           node, ddr_interface_num, retry_count);
5785                 Perform_Read_Deskew_Training(node, rank_mask, ddr_interface_num,
5786                                              spd_rawcard_AorB, 0, ddr_interface_64b);
5787             } else
5788                 break;
5789         } while (retry_count < 5);
5790 
5791         // print the last setting only if we had to do retries here
5792         if (retry_count > 0)
5793             Validate_Read_Deskew_Training(node, rank_mask, ddr_interface_num, &dsk_counts, VBL_NORM);
5794     }
5795 
5796     /*
5797      * 6.9.12 LMC Read Leveling
5798      *
5799      * LMC supports an automatic read-leveling separately per byte-lane using
5800      * the DDR3 multipurpose register predefined pattern for system
5801      * calibration defined in the JEDEC DDR3 specifications.
5802      *
5803      * All of DDR PLL, LMC CK, and LMC DRESET, and early LMC initializations
5804      * must be completed prior to starting this LMC read-leveling sequence.
5805      *
5806      * Software could simply write the desired read-leveling values into
5807      * LMC(0)_RLEVEL_RANK(0..3). This section describes a sequence that uses
5808      * LMC's autoread-leveling capabilities.
5809      *
5810      * When LMC does the read-leveling sequence for a rank, it first enables
5811      * the DDR3 multipurpose register predefined pattern for system
5812      * calibration on the selected DRAM rank via a DDR3 MR3 write, then
5813      * executes 64 RD operations at different internal delay settings, then
5814      * disables the predefined pattern via another DDR3 MR3 write
5815      * operation. LMC determines the pass or fail of each of the 64 settings
5816      * independently for each byte lane, then writes appropriate
5817      * LMC(0)_RLEVEL_RANK(0..3)[BYTE*] values for the rank.
5818      *
5819      * After read-leveling for a rank, software can read the 64 pass/fail
5820      * indications for one byte lane via LMC(0)_RLEVEL_DBG[BITMASK]. Software
5821      * can observe all pass/fail results for all byte lanes in a rank via
5822      * separate read-leveling sequences on the rank with different
5823      * LMC(0)_RLEVEL_CTL[BYTE] values.
5824      *
5825      * The 64 pass/fail results will typically have failures for the low
5826      * delays, followed by a run of some passing settings, followed by more
5827      * failures in the remaining high delays.  LMC sets
5828      * LMC(0)_RLEVEL_RANK(0..3)[BYTE*] to one of the passing settings.
5829      * First, LMC selects the longest run of successes in the 64 results. (In
5830      * the unlikely event that there is more than one longest run, LMC
5831      * selects the first one.) Then if LMC(0)_RLEVEL_CTL[OFFSET_EN] = 1 and
5832      * the selected run has more than LMC(0)_RLEVEL_CTL[OFFSET] successes,
5833      * LMC selects the last passing setting in the run minus
5834      * LMC(0)_RLEVEL_CTL[OFFSET]. Otherwise LMC selects the middle setting in
5835      * the run (rounding earlier when necessary). We expect the read-leveling
5836      * sequence to produce good results with the reset values
5837      * LMC(0)_RLEVEL_CTL [OFFSET_EN]=1, LMC(0)_RLEVEL_CTL[OFFSET] = 2.
5838      *
5839      * The read-leveling sequence has the following steps:
5840      *
5841      * 1. Select desired LMC(0)_RLEVEL_CTL[OFFSET_EN,OFFSET,BYTE] settings.
5842      *    Do the remaining substeps 2-4 separately for each rank i with
5843      *    attached DRAM.
5844      *
5845      * 2. Without changing any other fields in LMC(0)_CONFIG,
5846      *
5847      *    o write LMC(0)_SEQ_CTL[SEQ_SEL] to select read-leveling
5848      *
5849      *    o write LMC(0)_CONFIG[RANKMASK] = (1 << i)
5850      *
5851      *    o write LMC(0)_SEQ_CTL[INIT_START] = 1
5852      *
5853      *    This initiates the previously-described read-leveling.
5854      *
5855      * 3. Wait until LMC(0)_RLEVEL_RANKi[STATUS] != 2
5856      *
5857      *    LMC will have updated LMC(0)_RLEVEL_RANKi[BYTE*] for all byte lanes
5858      *    at this point.
5859      *
5860      *    If ECC DRAM is not present (i.e. when DRAM is not attached to the
5861      *    DDR_CBS_0_* and DDR_CB<7:0> chip signals, or the DDR_DQS_<4>_* and
5862      *    DDR_DQ<35:32> chip signals), write LMC(0)_RLEVEL_RANK*[BYTE8] =
5863      *    LMC(0)_RLEVEL_RANK*[BYTE0]. Write LMC(0)_RLEVEL_RANK*[BYTE4] =
5864      *    LMC(0)_RLEVEL_RANK*[BYTE0].
5865      *
5866      * 4. If desired, consult LMC(0)_RLEVEL_DBG[BITMASK] and compare to
5867      *    LMC(0)_RLEVEL_RANKi[BYTE*] for the lane selected by
5868      *    LMC(0)_RLEVEL_CTL[BYTE]. If desired, modify LMC(0)_RLEVEL_CTL[BYTE]
5869      *    to a new value and repeat so that all BITMASKs can be observed.
5870      *
5871      * 5. Initialize LMC(0)_RLEVEL_RANK* values for all unused ranks.
5872      *
5873      *    Let rank i be a rank with attached DRAM.
5874      *
5875      *    For all ranks j that do not have attached DRAM, set
5876      *    LMC(0)_RLEVEL_RANKj = LMC(0)_RLEVEL_RANKi.
5877      *
5878      * This read-leveling sequence can help select the proper CN70XX ODT
5879      * resistance value (LMC(0)_COMP_CTL2[RODT_CTL]). A hardware-generated
5880      * LMC(0)_RLEVEL_RANKi[BYTEj] value (for a used byte lane j) that is
5881      * drastically different from a neighboring LMC(0)_RLEVEL_RANKi[BYTEk]
5882      * (for a used byte lane k) can indicate that the CN70XX ODT value is
5883      * bad. It is possible to simultaneously optimize both
5884      * LMC(0)_COMP_CTL2[RODT_CTL] and LMC(0)_RLEVEL_RANKn[BYTE*] values by
5885      * performing this read-leveling sequence for several
5886      * LMC(0)_COMP_CTL2[RODT_CTL] values and selecting the one with the best
5887      * LMC(0)_RLEVEL_RANKn[BYTE*] profile for the ranks.
5888      */
5889 
5890     {
5891 #pragma pack(push,4)
5892         bdk_lmcx_rlevel_rankx_t lmc_rlevel_rank;
5893         bdk_lmcx_comp_ctl2_t lmc_comp_ctl2;
5894         bdk_lmcx_rlevel_ctl_t rlevel_ctl;
5895         bdk_lmcx_control_t lmc_control;
5896         bdk_lmcx_modereg_params1_t lmc_modereg_params1;
5897         unsigned char rodt_ctl;
5898         unsigned char rankx = 0;
5899         unsigned char ecc_ena;
5900         unsigned char rtt_nom;
5901         unsigned char rtt_idx;
5902         int min_rtt_nom_idx;
5903         int max_rtt_nom_idx;
5904         int min_rodt_ctl;
5905         int max_rodt_ctl;
5906         int rlevel_debug_loops = 1;
5907         unsigned char save_ddr2t;
5908         int rlevel_avg_loops;
5909         int ddr_rlevel_compute;
5910         int saved_ddr__ptune, saved_ddr__ntune, rlevel_comp_offset;
5911         int saved_int_zqcs_dis = 0;
5912         int disable_sequential_delay_check = 0;
5913         int maximum_adjacent_rlevel_delay_increment = 0;
5914         struct {
5915             uint64_t setting;
5916             int      score;
5917         } rlevel_scoreboard[RTT_NOM_OHMS_COUNT][RODT_OHMS_COUNT][4];
5918         int print_nom_ohms;
5919 #if PERFECT_BITMASK_COUNTING
5920         typedef struct {
5921             uint8_t count[9][32]; // 8+ECC by 32 values
5922             uint8_t total[9];     // 8+ECC
5923         } rank_perfect_t;
5924         rank_perfect_t rank_perfect_counts[4];
5925 #endif
5926 
5927 #pragma pack(pop)
5928 
5929 #if PERFECT_BITMASK_COUNTING
5930         memset(rank_perfect_counts, 0, sizeof(rank_perfect_counts));
5931 #endif /* PERFECT_BITMASK_COUNTING */
5932 
5933         lmc_control.u = BDK_CSR_READ(node, BDK_LMCX_CONTROL(ddr_interface_num));
5934         save_ddr2t    = lmc_control.s.ddr2t;
5935 
5936         lmc_config.u = BDK_CSR_READ(node, BDK_LMCX_CONFIG(ddr_interface_num));
5937         ecc_ena      = lmc_config.s.ecc_ena;
5938 
5939 #if 0
5940         {
5941             int save_ref_zqcs_int;
5942             uint64_t temp_delay_usecs;
5943 
5944             /* Temporarily select the minimum ZQCS interval and wait
5945                long enough for a few ZQCS calibrations to occur.  This
5946                should ensure that the calibration circuitry is
5947                stabilized before read-leveling occurs. */
5948             save_ref_zqcs_int         = lmc_config.s.ref_zqcs_int;
5949             lmc_config.s.ref_zqcs_int = 1 | (32<<7); /* set smallest interval */
5950             DRAM_CSR_WRITE(node, BDK_LMCX_CONFIG(ddr_interface_num), lmc_config.u);
5951             BDK_CSR_READ(node, BDK_LMCX_CONFIG(ddr_interface_num));
5952 
5953             /* Compute an appropriate delay based on the current ZQCS
5954                interval. The delay should be long enough for the
5955                current ZQCS delay counter to expire plus ten of the
5956                minimum intarvals to ensure that some calibrations
5957                occur. */
5958             temp_delay_usecs = (((uint64_t)save_ref_zqcs_int >> 7)
5959                                 * tclk_psecs * 100 * 512 * 128) / (10000*10000)
5960                 + 10 * ((uint64_t)32 * tclk_psecs * 100 * 512 * 128) / (10000*10000);
5961 
5962             ddr_print ("Waiting %lu usecs for ZQCS calibrations to start\n",
5963                          temp_delay_usecs);
5964             bdk_wait_usec(temp_delay_usecs);
5965 
5966             lmc_config.s.ref_zqcs_int = save_ref_zqcs_int; /* Restore computed interval */
5967             DRAM_CSR_WRITE(node, BDK_LMCX_CONFIG(ddr_interface_num), lmc_config.u);
5968             BDK_CSR_READ(node, BDK_LMCX_CONFIG(ddr_interface_num));
5969         }
5970 #endif
5971 
5972         if ((s = lookup_env_parameter("ddr_rlevel_2t")) != NULL) {
5973             lmc_control.s.ddr2t = strtoul(s, NULL, 0);
5974         }
5975 
5976         DRAM_CSR_WRITE(node, BDK_LMCX_CONTROL(ddr_interface_num), lmc_control.u);
5977 
5978         ddr_print("N%d.LMC%d: Performing Read-Leveling\n", node, ddr_interface_num);
5979 
5980         rlevel_ctl.u = BDK_CSR_READ(node, BDK_LMCX_RLEVEL_CTL(ddr_interface_num));
5981 
5982         rlevel_avg_loops = custom_lmc_config->rlevel_average_loops;
5983         if (rlevel_avg_loops == 0) {
5984             rlevel_avg_loops = RLEVEL_AVG_LOOPS_DEFAULT;
5985             if ((dimm_count == 1) || (num_ranks == 1)) // up the samples for these cases
5986                 rlevel_avg_loops = rlevel_avg_loops * 2 + 1;
5987         }
5988 
5989         ddr_rlevel_compute = custom_lmc_config->rlevel_compute;
5990         rlevel_ctl.s.offset_en = custom_lmc_config->offset_en;
5991         rlevel_ctl.s.offset    = spd_rdimm
5992             ? custom_lmc_config->offset_rdimm
5993             : custom_lmc_config->offset_udimm;
5994 
5995         rlevel_ctl.s.delay_unload_0 = 1; /* should normally be set */
5996         rlevel_ctl.s.delay_unload_1 = 1; /* should normally be set */
5997         rlevel_ctl.s.delay_unload_2 = 1; /* should normally be set */
5998         rlevel_ctl.s.delay_unload_3 = 1; /* should normally be set */
5999 
6000         rlevel_ctl.s.or_dis = 1; // default to get best bitmasks
6001         if ((s = lookup_env_parameter("ddr_rlevel_or_dis")) != NULL) {
6002             rlevel_ctl.s.or_dis = !!strtoul(s, NULL, 0);
6003         }
6004         rlevel_ctl.s.bitmask = 0xff; // should work in 32b mode also
6005         if ((s = lookup_env_parameter("ddr_rlevel_ctl_bitmask")) != NULL) {
6006             rlevel_ctl.s.bitmask = strtoul(s, NULL, 0);
6007         }
6008         debug_print("N%d.LMC%d: RLEVEL_CTL: or_dis=%d, bitmask=0x%02x\n",
6009                     node, ddr_interface_num,
6010                     rlevel_ctl.s.or_dis, rlevel_ctl.s.bitmask);
6011 
6012         rlevel_comp_offset = spd_rdimm
6013             ? custom_lmc_config->rlevel_comp_offset_rdimm
6014             : custom_lmc_config->rlevel_comp_offset_udimm;
6015 
6016         if ((s = lookup_env_parameter("ddr_rlevel_offset")) != NULL) {
6017             rlevel_ctl.s.offset   = strtoul(s, NULL, 0);
6018         }
6019 
6020         if ((s = lookup_env_parameter("ddr_rlevel_offset_en")) != NULL) {
6021             rlevel_ctl.s.offset_en   = strtoul(s, NULL, 0);
6022         }
6023         if ((s = lookup_env_parameter("ddr_rlevel_ctl")) != NULL) {
6024             rlevel_ctl.u   = strtoul(s, NULL, 0);
6025         }
6026 
6027         DRAM_CSR_WRITE(node, BDK_LMCX_RLEVEL_CTL(ddr_interface_num), rlevel_ctl.u);
6028 
6029         if ((s = lookup_env_parameter("ddr%d_rlevel_debug_loops", ddr_interface_num)) != NULL) {
6030             rlevel_debug_loops = strtoul(s, NULL, 0);
6031         }
6032 
6033         if ((s = lookup_env_parameter("ddr_rtt_nom_auto")) != NULL) {
6034             ddr_rtt_nom_auto = !!strtoul(s, NULL, 0);
6035         }
6036 
6037         if ((s = lookup_env_parameter("ddr_rlevel_average")) != NULL) {
6038             rlevel_avg_loops = strtoul(s, NULL, 0);
6039         }
6040 
6041         if ((s = lookup_env_parameter("ddr_rlevel_compute")) != NULL) {
6042             ddr_rlevel_compute = strtoul(s, NULL, 0);
6043         }
6044 
6045         ddr_print("RLEVEL_CTL                                    : 0x%016llx\n", rlevel_ctl.u);
6046         ddr_print("RLEVEL_OFFSET                                 : %6d\n", rlevel_ctl.s.offset);
6047         ddr_print("RLEVEL_OFFSET_EN                              : %6d\n", rlevel_ctl.s.offset_en);
6048 
6049         /* The purpose for the indexed table is to sort the settings
6050         ** by the ohm value to simplify the testing when incrementing
6051         ** through the settings.  (index => ohms) 1=120, 2=60, 3=40,
6052         ** 4=30, 5=20 */
6053         min_rtt_nom_idx = (custom_lmc_config->min_rtt_nom_idx == 0) ? 1 : custom_lmc_config->min_rtt_nom_idx;
6054         max_rtt_nom_idx = (custom_lmc_config->max_rtt_nom_idx == 0) ? 5 : custom_lmc_config->max_rtt_nom_idx;
6055 
6056         min_rodt_ctl = (custom_lmc_config->min_rodt_ctl == 0) ? 1 : custom_lmc_config->min_rodt_ctl;
6057         max_rodt_ctl = (custom_lmc_config->max_rodt_ctl == 0) ? 5 : custom_lmc_config->max_rodt_ctl;
6058 
6059         if ((s = lookup_env_parameter("ddr_min_rodt_ctl")) != NULL) {
6060             min_rodt_ctl = strtoul(s, NULL, 0);
6061         }
6062         if ((s = lookup_env_parameter("ddr_max_rodt_ctl")) != NULL) {
6063             max_rodt_ctl = strtoul(s, NULL, 0);
6064         }
6065         if ((s = lookup_env_parameter("ddr_min_rtt_nom_idx")) != NULL) {
6066             min_rtt_nom_idx = strtoul(s, NULL, 0);
6067         }
6068         if ((s = lookup_env_parameter("ddr_max_rtt_nom_idx")) != NULL) {
6069             max_rtt_nom_idx = strtoul(s, NULL, 0);
6070         }
6071 
6072 #ifdef ENABLE_CUSTOM_RLEVEL_TABLE
6073         if (custom_lmc_config->rlevel_table != NULL) {
6074             char part_number[21];
6075             /* Check for hard-coded read-leveling settings */
6076             get_dimm_part_number(part_number, node, &dimm_config_table[0], 0, ddr_type);
6077             for (rankx = 0; rankx < dimm_count * 4;rankx++) {
6078                 if (!(rank_mask & (1 << rankx)))
6079                     continue;
6080 
6081                 lmc_rlevel_rank.u = BDK_CSR_READ(node, BDK_LMCX_RLEVEL_RANKX(ddr_interface_num, rankx));
6082 
6083                 i = 0;
6084                 while (custom_lmc_config->rlevel_table[i].part != NULL) {
6085                     debug_print("DIMM part number:\"%s\", SPD: \"%s\"\n", custom_lmc_config->rlevel_table[i].part, part_number);
6086                     if ((strcmp(part_number, custom_lmc_config->rlevel_table[i].part) == 0)
6087                         && (_abs(custom_lmc_config->rlevel_table[i].speed - 2*ddr_hertz/(1000*1000)) < 10 ))
6088                     {
6089                         ddr_print("Using hard-coded read leveling for DIMM part number: \"%s\"\n", part_number);
6090                         lmc_rlevel_rank.u = custom_lmc_config->rlevel_table[i].rlevel_rank[ddr_interface_num][rankx];
6091                         DRAM_CSR_WRITE(node, BDK_LMCX_RLEVEL_RANKX(ddr_interface_num, rankx), lmc_rlevel_rank.u);
6092                         lmc_rlevel_rank.u = BDK_CSR_READ(node, BDK_LMCX_RLEVEL_RANKX(ddr_interface_num, rankx));
6093                         display_RL(node, ddr_interface_num, lmc_rlevel_rank, rankx);
6094                         rlevel_debug_loops = 0; /* Disable h/w read-leveling */
6095                         break;
6096                     }
6097                     ++i;
6098                 }
6099             }
6100         }
6101 #endif /* ENABLE_CUSTOM_RLEVEL_TABLE */
6102 
6103         while(rlevel_debug_loops--) {
6104             /* Initialize the error scoreboard */
6105             memset(rlevel_scoreboard, 0, sizeof(rlevel_scoreboard));
6106 
6107             if ((s = lookup_env_parameter("ddr_rlevel_comp_offset")) != NULL) {
6108                 rlevel_comp_offset = strtoul(s, NULL, 0);
6109             }
6110 
6111             disable_sequential_delay_check = custom_lmc_config->disable_sequential_delay_check;
6112 
6113             if ((s = lookup_env_parameter("ddr_disable_sequential_delay_check")) != NULL) {
6114                 disable_sequential_delay_check = strtoul(s, NULL, 0);
6115             }
6116 
6117             maximum_adjacent_rlevel_delay_increment = custom_lmc_config->maximum_adjacent_rlevel_delay_increment;
6118 
6119             if ((s = lookup_env_parameter("ddr_maximum_adjacent_rlevel_delay_increment")) != NULL) {
6120                 maximum_adjacent_rlevel_delay_increment = strtoul(s, NULL, 0);
6121             }
6122 
6123             lmc_comp_ctl2.u = BDK_CSR_READ(node, BDK_LMCX_COMP_CTL2(ddr_interface_num));
6124             saved_ddr__ptune = lmc_comp_ctl2.s.ddr__ptune;
6125             saved_ddr__ntune = lmc_comp_ctl2.s.ddr__ntune;
6126 
6127             /* Disable dynamic compensation settings */
6128             if (rlevel_comp_offset != 0) {
6129                 lmc_comp_ctl2.s.ptune = saved_ddr__ptune;
6130                 lmc_comp_ctl2.s.ntune = saved_ddr__ntune;
6131 
6132                 /* Round up the ptune calculation to bias the odd cases toward ptune */
6133                 lmc_comp_ctl2.s.ptune += divide_roundup(rlevel_comp_offset, 2);
6134                 lmc_comp_ctl2.s.ntune -= rlevel_comp_offset/2;
6135 
6136                 lmc_control.u = BDK_CSR_READ(node, BDK_LMCX_CONTROL(ddr_interface_num));
6137                 saved_int_zqcs_dis = lmc_control.s.int_zqcs_dis;
6138                 lmc_control.s.int_zqcs_dis    = 1; /* Disable ZQCS while in bypass. */
6139                 DRAM_CSR_WRITE(node, BDK_LMCX_CONTROL(ddr_interface_num), lmc_control.u);
6140 
6141                 lmc_comp_ctl2.s.byp = 1; /* Enable bypass mode */
6142                 DRAM_CSR_WRITE(node, BDK_LMCX_COMP_CTL2(ddr_interface_num), lmc_comp_ctl2.u);
6143                 BDK_CSR_READ(node, BDK_LMCX_COMP_CTL2(ddr_interface_num));
6144                 lmc_comp_ctl2.u = BDK_CSR_READ(node, BDK_LMCX_COMP_CTL2(ddr_interface_num)); /* Read again */
6145                 ddr_print("DDR__PTUNE/DDR__NTUNE                         : %d/%d\n",
6146                           lmc_comp_ctl2.s.ddr__ptune, lmc_comp_ctl2.s.ddr__ntune);
6147             }
6148 
6149             lmc_modereg_params1.u = BDK_CSR_READ(node, BDK_LMCX_MODEREG_PARAMS1(ddr_interface_num));
6150 
6151             for (rtt_idx = min_rtt_nom_idx; rtt_idx <= max_rtt_nom_idx; ++rtt_idx) {
6152                 rtt_nom = imp_values->rtt_nom_table[rtt_idx];
6153 
6154                 /* When the read ODT mask is zero the dyn_rtt_nom_mask is
6155                    zero than RTT_NOM will not be changing during
6156                    read-leveling.  Since the value is fixed we only need
6157                    to test it once. */
6158                 if (dyn_rtt_nom_mask == 0) {
6159                     print_nom_ohms = -1; // flag not to print NOM ohms
6160                     if (rtt_idx != min_rtt_nom_idx)
6161                         continue;
6162                 } else {
6163                     if (dyn_rtt_nom_mask & 1) lmc_modereg_params1.s.rtt_nom_00 = rtt_nom;
6164                     if (dyn_rtt_nom_mask & 2) lmc_modereg_params1.s.rtt_nom_01 = rtt_nom;
6165                     if (dyn_rtt_nom_mask & 4) lmc_modereg_params1.s.rtt_nom_10 = rtt_nom;
6166                     if (dyn_rtt_nom_mask & 8) lmc_modereg_params1.s.rtt_nom_11 = rtt_nom;
6167                     // FIXME? rank 0 ohms always for the printout?
6168                     print_nom_ohms = imp_values->rtt_nom_ohms[lmc_modereg_params1.s.rtt_nom_00];
6169                 }
6170 
6171                 DRAM_CSR_WRITE(node, BDK_LMCX_MODEREG_PARAMS1(ddr_interface_num), lmc_modereg_params1.u);
6172                 VB_PRT(VBL_TME, "\n");
6173                 VB_PRT(VBL_TME, "RTT_NOM     %3d, %3d, %3d, %3d ohms           :  %x,%x,%x,%x\n",
6174                           imp_values->rtt_nom_ohms[lmc_modereg_params1.s.rtt_nom_11],
6175                           imp_values->rtt_nom_ohms[lmc_modereg_params1.s.rtt_nom_10],
6176                           imp_values->rtt_nom_ohms[lmc_modereg_params1.s.rtt_nom_01],
6177                           imp_values->rtt_nom_ohms[lmc_modereg_params1.s.rtt_nom_00],
6178                           lmc_modereg_params1.s.rtt_nom_11,
6179                           lmc_modereg_params1.s.rtt_nom_10,
6180                           lmc_modereg_params1.s.rtt_nom_01,
6181                           lmc_modereg_params1.s.rtt_nom_00);
6182 
6183                 perform_ddr_init_sequence(node, rank_mask, ddr_interface_num);
6184 
6185                 // Try RANK outside RODT to rearrange the output...
6186                 for (rankx = 0; rankx < dimm_count * 4; rankx++) {
6187                     int byte_idx;
6188                     rlevel_byte_data_t rlevel_byte[9];
6189                     int average_loops;
6190                     int rlevel_rank_errors, rlevel_bitmask_errors, rlevel_nonseq_errors;
6191                     rlevel_bitmask_t rlevel_bitmask[9];
6192 #if PICK_BEST_RANK_SCORE_NOT_AVG
6193                     int rlevel_best_rank_score;
6194 #endif
6195 
6196                     if (!(rank_mask & (1 << rankx)))
6197                         continue;
6198 
6199                     for (rodt_ctl = max_rodt_ctl; rodt_ctl >= min_rodt_ctl; --rodt_ctl) {
6200 #if PICK_BEST_RANK_SCORE_NOT_AVG
6201                         rlevel_best_rank_score = DEFAULT_BEST_RANK_SCORE;
6202 #endif
6203                         lmc_comp_ctl2.u = BDK_CSR_READ(node, BDK_LMCX_COMP_CTL2(ddr_interface_num));
6204                         lmc_comp_ctl2.s.rodt_ctl = rodt_ctl;
6205                         DRAM_CSR_WRITE(node, BDK_LMCX_COMP_CTL2(ddr_interface_num), lmc_comp_ctl2.u);
6206                         lmc_comp_ctl2.u = BDK_CSR_READ(node, BDK_LMCX_COMP_CTL2(ddr_interface_num));
6207                         bdk_wait_usec(1); /* Give it a little time to take affect */
6208                         VB_PRT(VBL_DEV, "Read ODT_CTL                                  : 0x%x (%d ohms)\n",
6209                                lmc_comp_ctl2.s.rodt_ctl, imp_values->rodt_ohms[lmc_comp_ctl2.s.rodt_ctl]);
6210 
6211                         memset(rlevel_byte, 0, sizeof(rlevel_byte));
6212 
6213                         for (average_loops = 0; average_loops < rlevel_avg_loops; average_loops++) {
6214                             rlevel_bitmask_errors = 0;
6215 
6216                             if (! (rlevel_separate_ab && spd_rdimm && (ddr_type == DDR4_DRAM))) {
6217                                 /* Clear read-level delays */
6218                                 DRAM_CSR_WRITE(node, BDK_LMCX_RLEVEL_RANKX(ddr_interface_num, rankx), 0);
6219 
6220                                 /* read-leveling */
6221                                 perform_octeon3_ddr3_sequence(node, 1 << rankx, ddr_interface_num, 1);
6222 
6223                                 if (BDK_CSR_WAIT_FOR_FIELD(node, BDK_LMCX_RLEVEL_RANKX(ddr_interface_num, rankx),
6224                                                            status, ==, 3, 1000000))
6225                                 {
6226                                     error_print("ERROR: Timeout waiting for RLEVEL\n");
6227                                 }
6228                             }
6229 
6230                             lmc_rlevel_rank.u = BDK_CSR_READ(node, BDK_LMCX_RLEVEL_RANKX(ddr_interface_num, rankx));
6231 
6232                             { // start bitmask interpretation block
6233                                 int redoing_nonseq_errs = 0;
6234 
6235                                 memset(rlevel_bitmask, 0, sizeof(rlevel_bitmask));
6236 
6237                                 if (rlevel_separate_ab && spd_rdimm && (ddr_type == DDR4_DRAM)) {
6238                                     bdk_lmcx_rlevel_rankx_t lmc_rlevel_rank_aside;
6239                                     bdk_lmcx_modereg_params0_t lmc_modereg_params0;
6240 
6241                                     /* A-side */
6242                                     lmc_modereg_params0.u = BDK_CSR_READ(node, BDK_LMCX_MODEREG_PARAMS0(ddr_interface_num));
6243                                     lmc_modereg_params0.s.mprloc = 0; /* MPR Page 0 Location 0 */
6244                                     DRAM_CSR_WRITE(node, BDK_LMCX_MODEREG_PARAMS0(ddr_interface_num), lmc_modereg_params0.u);
6245 
6246                                     /* Clear read-level delays */
6247                                     DRAM_CSR_WRITE(node, BDK_LMCX_RLEVEL_RANKX(ddr_interface_num, rankx), 0);
6248 
6249                                     perform_octeon3_ddr3_sequence(node, 1 << rankx, ddr_interface_num, 1); /* read-leveling */
6250 
6251                                     if (BDK_CSR_WAIT_FOR_FIELD(node, BDK_LMCX_RLEVEL_RANKX(ddr_interface_num, rankx),
6252                                                                status, ==, 3, 1000000))
6253                                         {
6254                                             error_print("ERROR: Timeout waiting for RLEVEL\n");
6255 
6256                                         }
6257                                     lmc_rlevel_rank.u = BDK_CSR_READ(node, BDK_LMCX_RLEVEL_RANKX(ddr_interface_num, rankx));
6258 
6259                                     lmc_rlevel_rank_aside.u = lmc_rlevel_rank.u;
6260 
6261                                     rlevel_bitmask[0].bm = octeon_read_lmcx_ddr3_rlevel_dbg(node, ddr_interface_num, 0);
6262                                     rlevel_bitmask[1].bm = octeon_read_lmcx_ddr3_rlevel_dbg(node, ddr_interface_num, 1);
6263                                     rlevel_bitmask[2].bm = octeon_read_lmcx_ddr3_rlevel_dbg(node, ddr_interface_num, 2);
6264                                     rlevel_bitmask[3].bm = octeon_read_lmcx_ddr3_rlevel_dbg(node, ddr_interface_num, 3);
6265                                     rlevel_bitmask[8].bm = octeon_read_lmcx_ddr3_rlevel_dbg(node, ddr_interface_num, 8);
6266                                     /* A-side complete */
6267 
6268 
6269                                     /* B-side */
6270                                     lmc_modereg_params0.u = BDK_CSR_READ(node, BDK_LMCX_MODEREG_PARAMS0(ddr_interface_num));
6271                                     lmc_modereg_params0.s.mprloc = 3; /* MPR Page 0 Location 3 */
6272                                     DRAM_CSR_WRITE(node, BDK_LMCX_MODEREG_PARAMS0(ddr_interface_num), lmc_modereg_params0.u);
6273 
6274                                     /* Clear read-level delays */
6275                                     DRAM_CSR_WRITE(node, BDK_LMCX_RLEVEL_RANKX(ddr_interface_num, rankx), 0);
6276 
6277                                     perform_octeon3_ddr3_sequence(node, 1 << rankx, ddr_interface_num, 1); /* read-leveling */
6278 
6279                                     if (BDK_CSR_WAIT_FOR_FIELD(node, BDK_LMCX_RLEVEL_RANKX(ddr_interface_num, rankx),
6280                                                                status, ==, 3, 1000000))
6281                                         {
6282                                             error_print("ERROR: Timeout waiting for RLEVEL\n");
6283                                         }
6284                                     lmc_rlevel_rank.u = BDK_CSR_READ(node, BDK_LMCX_RLEVEL_RANKX(ddr_interface_num, rankx));
6285 
6286                                     rlevel_bitmask[4].bm = octeon_read_lmcx_ddr3_rlevel_dbg(node, ddr_interface_num, 4);
6287                                     rlevel_bitmask[5].bm = octeon_read_lmcx_ddr3_rlevel_dbg(node, ddr_interface_num, 5);
6288                                     rlevel_bitmask[6].bm = octeon_read_lmcx_ddr3_rlevel_dbg(node, ddr_interface_num, 6);
6289                                     rlevel_bitmask[7].bm = octeon_read_lmcx_ddr3_rlevel_dbg(node, ddr_interface_num, 7);
6290                                     /* B-side complete */
6291 
6292 
6293                                     update_rlevel_rank_struct(&lmc_rlevel_rank, 0, lmc_rlevel_rank_aside.cn83xx.byte0);
6294                                     update_rlevel_rank_struct(&lmc_rlevel_rank, 1, lmc_rlevel_rank_aside.cn83xx.byte1);
6295                                     update_rlevel_rank_struct(&lmc_rlevel_rank, 2, lmc_rlevel_rank_aside.cn83xx.byte2);
6296                                     update_rlevel_rank_struct(&lmc_rlevel_rank, 3, lmc_rlevel_rank_aside.cn83xx.byte3);
6297                                     update_rlevel_rank_struct(&lmc_rlevel_rank, 8, lmc_rlevel_rank_aside.cn83xx.byte8); /* ECC A-side */
6298 
6299                                     lmc_modereg_params0.u = BDK_CSR_READ(node, BDK_LMCX_MODEREG_PARAMS0(ddr_interface_num));
6300                                     lmc_modereg_params0.s.mprloc = 0; /* MPR Page 0 Location 0 */
6301                                     DRAM_CSR_WRITE(node, BDK_LMCX_MODEREG_PARAMS0(ddr_interface_num), lmc_modereg_params0.u);
6302 
6303                                 } /* if (rlevel_separate_ab && spd_rdimm && (ddr_type == DDR4_DRAM)) */
6304 
6305                                 /*
6306                                  * Evaluate the quality of the read-leveling delays from the bitmasks.
6307                                  * Also save off a software computed read-leveling mask that may be
6308                                  * used later to qualify the delay results from Octeon.
6309                                  */
6310                                 for (byte_idx = 0; byte_idx < (8+ecc_ena); ++byte_idx) {
6311                                     int bmerr;
6312                                     if (!(ddr_interface_bytemask&(1<<byte_idx)))
6313                                         continue;
6314                                     if (! (rlevel_separate_ab && spd_rdimm && (ddr_type == DDR4_DRAM))) {
6315                                         rlevel_bitmask[byte_idx].bm =
6316                                             octeon_read_lmcx_ddr3_rlevel_dbg(node, ddr_interface_num, byte_idx);
6317                                     }
6318                                     bmerr = validate_ddr3_rlevel_bitmask(&rlevel_bitmask[byte_idx], ddr_type);
6319                                     rlevel_bitmask[byte_idx].errs = bmerr;
6320                                     rlevel_bitmask_errors += bmerr;
6321 #if PERFECT_BITMASK_COUNTING
6322                                     if ((ddr_type == DDR4_DRAM) && !bmerr) { // count only the "perfect" bitmasks
6323                                         // FIXME: could optimize this a bit?
6324                                         int delay = get_rlevel_rank_struct(&lmc_rlevel_rank, byte_idx);
6325                                         rank_perfect_counts[rankx].count[byte_idx][delay] += 1;
6326                                         rank_perfect_counts[rankx].total[byte_idx] += 1;
6327                                     }
6328 #endif /* PERFECT_BITMASK_COUNTING */
6329                                 }
6330 
6331                                 /* Set delays for unused bytes to match byte 0. */
6332                                 for (byte_idx = 0; byte_idx < 9; ++byte_idx) {
6333                                     if (ddr_interface_bytemask & (1 << byte_idx))
6334                                         continue;
6335                                     update_rlevel_rank_struct(&lmc_rlevel_rank, byte_idx, lmc_rlevel_rank.cn83xx.byte0);
6336                                 }
6337 
6338                                 /* Save a copy of the byte delays in physical
6339                                    order for sequential evaluation. */
6340                                 unpack_rlevel_settings(ddr_interface_bytemask, ecc_ena, rlevel_byte, lmc_rlevel_rank);
6341                             redo_nonseq_errs:
6342 
6343                                 rlevel_nonseq_errors  = 0;
6344 
6345                                 if (! disable_sequential_delay_check) {
6346                                     if ((ddr_interface_bytemask & 0xff) == 0xff) {
6347                                         /* Evaluate delay sequence across the whole range of bytes for standard dimms. */
6348                                         if ((spd_dimm_type == 1) || (spd_dimm_type == 5)) { /* 1=RDIMM, 5=Mini-RDIMM */
6349                                             int register_adjacent_delay = _abs(rlevel_byte[4].delay - rlevel_byte[5].delay);
6350                                             /* Registered dimm topology routes from the center. */
6351                                             rlevel_nonseq_errors += nonsequential_delays(rlevel_byte, 0, 3+ecc_ena,
6352                                                                                        maximum_adjacent_rlevel_delay_increment);
6353                                             rlevel_nonseq_errors += nonsequential_delays(rlevel_byte, 5, 7+ecc_ena,
6354                                                                                        maximum_adjacent_rlevel_delay_increment);
6355                                             // byte 5 sqerrs never gets cleared for RDIMMs
6356                                             rlevel_byte[5].sqerrs = 0;
6357                                             if (register_adjacent_delay > 1) {
6358                                                 /* Assess proximity of bytes on opposite sides of register */
6359                                                 rlevel_nonseq_errors += (register_adjacent_delay-1) * RLEVEL_ADJACENT_DELAY_ERROR;
6360                                                 // update byte 5 error
6361                                                 rlevel_byte[5].sqerrs += (register_adjacent_delay-1) * RLEVEL_ADJACENT_DELAY_ERROR;
6362                                             }
6363                                         }
6364                                         if ((spd_dimm_type == 2) || (spd_dimm_type == 6)) { /* 2=UDIMM, 6=Mini-UDIMM */
6365                                             /* Unbuffered dimm topology routes from end to end. */
6366                                             rlevel_nonseq_errors += nonsequential_delays(rlevel_byte, 0, 7+ecc_ena,
6367                                                                                        maximum_adjacent_rlevel_delay_increment);
6368                                         }
6369                                     } else {
6370                                         rlevel_nonseq_errors += nonsequential_delays(rlevel_byte, 0, 3+ecc_ena,
6371                                                                                    maximum_adjacent_rlevel_delay_increment);
6372                                     }
6373                                 } /* if (! disable_sequential_delay_check) */
6374 
6375 #if 0
6376                                 // FIXME FIXME: disabled for now, it was too much...
6377 
6378                                 // Calculate total errors for the rank:
6379                                 // we do NOT add nonsequential errors if mini-[RU]DIMM or x16;
6380                                 // mini-DIMMs and x16 devices have unusual sequence geometries.
6381                                 // Make the final scores for them depend only on the bitmasks...
6382                                 rlevel_rank_errors = rlevel_bitmask_errors;
6383                                 if ((spd_dimm_type != 5) &&
6384                                     (spd_dimm_type != 6) &&
6385                                     (dram_width != 16))
6386                                 {
6387                                     rlevel_rank_errors += rlevel_nonseq_errors;
6388                                 }
6389 #else
6390                                 rlevel_rank_errors = rlevel_bitmask_errors + rlevel_nonseq_errors;
6391 #endif
6392 
6393                                 // print original sample here only if we are not really averaging or picking best
6394                                 // also do not print if we were redoing the NONSEQ score for using COMPUTED
6395                                 if (!redoing_nonseq_errs && ((rlevel_avg_loops < 2) || dram_is_verbose(VBL_DEV2))) {
6396                                     display_RL_BM(node, ddr_interface_num, rankx, rlevel_bitmask, ecc_ena);
6397                                     display_RL_BM_scores(node, ddr_interface_num, rankx, rlevel_bitmask, ecc_ena);
6398                                     display_RL_SEQ_scores(node, ddr_interface_num, rankx, rlevel_byte, ecc_ena);
6399                                     display_RL_with_score(node, ddr_interface_num, lmc_rlevel_rank, rankx, rlevel_rank_errors);
6400                                 }
6401 
6402                                 if (ddr_rlevel_compute) {
6403                                     if (!redoing_nonseq_errs) {
6404                                         /* Recompute the delays based on the bitmask */
6405                                         for (byte_idx = 0; byte_idx < (8+ecc_ena); ++byte_idx) {
6406                                             if (!(ddr_interface_bytemask & (1 << byte_idx)))
6407                                                 continue;
6408                                             update_rlevel_rank_struct(&lmc_rlevel_rank, byte_idx,
6409                                                                       compute_ddr3_rlevel_delay(rlevel_bitmask[byte_idx].mstart,
6410                                                                                                 rlevel_bitmask[byte_idx].width,
6411                                                                                                 rlevel_ctl));
6412                                         }
6413 
6414                                         /* Override the copy of byte delays with the computed results. */
6415                                         unpack_rlevel_settings(ddr_interface_bytemask, ecc_ena, rlevel_byte, lmc_rlevel_rank);
6416 
6417                                         redoing_nonseq_errs = 1;
6418                                         goto redo_nonseq_errs;
6419 
6420                                     } else {
6421                                         /* now print this if already printed the original sample */
6422                                         if ((rlevel_avg_loops < 2) || dram_is_verbose(VBL_DEV2)) {
6423                                             display_RL_with_computed(node, ddr_interface_num,
6424                                                                      lmc_rlevel_rank, rankx,
6425                                                                      rlevel_rank_errors);
6426                                         }
6427                                     }
6428                                 } /* if (ddr_rlevel_compute) */
6429 
6430                             } // end bitmask interpretation block
6431 
6432 #if PICK_BEST_RANK_SCORE_NOT_AVG
6433 
6434                             // if it is a better (lower) score, then  keep it
6435                             if (rlevel_rank_errors < rlevel_best_rank_score) {
6436                                 rlevel_best_rank_score = rlevel_rank_errors;
6437 
6438                                 // save the new best delays and best errors
6439                                 for (byte_idx = 0; byte_idx < 9; ++byte_idx) {
6440                                     rlevel_byte[byte_idx].best = rlevel_byte[byte_idx].delay;
6441                                     rlevel_byte[byte_idx].bestsq = rlevel_byte[byte_idx].sqerrs;
6442                                     // save bitmasks and their scores as well
6443                                     // xlate UNPACKED index to PACKED index to get from rlevel_bitmask
6444                                     rlevel_byte[byte_idx].bm     = rlevel_bitmask[XUP(byte_idx, !!ecc_ena)].bm;
6445                                     rlevel_byte[byte_idx].bmerrs = rlevel_bitmask[XUP(byte_idx, !!ecc_ena)].errs;
6446                                 }
6447                             }
6448 #else /* PICK_BEST_RANK_SCORE_NOT_AVG */
6449 
6450                             /* Accumulate the total score across averaging loops for this setting */
6451                             debug_print("rlevel_scoreboard[rtt_nom=%d][rodt_ctl=%d][rankx=%d].score: %d [%d]\n",
6452                                       rtt_nom, rodt_ctl, rankx, rlevel_rank_errors, average_loops);
6453                             rlevel_scoreboard[rtt_nom][rodt_ctl][rankx].score += rlevel_rank_errors;
6454 
6455                             /* Accumulate the delay totals and loop counts
6456                                necessary to compute average delay results */
6457                             for (byte_idx = 0; byte_idx < 9; ++byte_idx) {
6458                                 if (rlevel_byte[byte_idx].delay != 0) { /* Don't include delay=0 in the average */
6459                                     ++rlevel_byte[byte_idx].loop_count;
6460                                     rlevel_byte[byte_idx].loop_total += rlevel_byte[byte_idx].delay;
6461                                 }
6462                             } /* for (byte_idx = 0; byte_idx < 9; ++byte_idx) */
6463 #endif /* PICK_BEST_RANK_SCORE_NOT_AVG */
6464 
6465                         } /* for (average_loops = 0; average_loops < rlevel_avg_loops; average_loops++) */
6466 
6467 #if PICK_BEST_RANK_SCORE_NOT_AVG
6468 
6469                         /* We recorded the best score across the averaging loops */
6470                         rlevel_scoreboard[rtt_nom][rodt_ctl][rankx].score = rlevel_best_rank_score;
6471 
6472                         /* Restore the delays from the best fields that go with the best score */
6473                         for (byte_idx = 0; byte_idx < 9; ++byte_idx) {
6474                             rlevel_byte[byte_idx].delay = rlevel_byte[byte_idx].best;
6475                             rlevel_byte[byte_idx].sqerrs = rlevel_byte[byte_idx].bestsq;
6476                         }
6477 #else /* PICK_BEST_RANK_SCORE_NOT_AVG */
6478 
6479                         /* Compute the average score across averaging loops */
6480                         rlevel_scoreboard[rtt_nom][rodt_ctl][rankx].score =
6481                             divide_nint(rlevel_scoreboard[rtt_nom][rodt_ctl][rankx].score, rlevel_avg_loops);
6482 
6483                         /* Compute the average delay results */
6484                         for (byte_idx=0; byte_idx < 9; ++byte_idx) {
6485                             if (rlevel_byte[byte_idx].loop_count == 0)
6486                                 rlevel_byte[byte_idx].loop_count = 1;
6487                             rlevel_byte[byte_idx].delay = divide_nint(rlevel_byte[byte_idx].loop_total,
6488                                                                       rlevel_byte[byte_idx].loop_count);
6489                         }
6490 #endif /* PICK_BEST_RANK_SCORE_NOT_AVG */
6491 
6492                         lmc_rlevel_rank.u = BDK_CSR_READ(node, BDK_LMCX_RLEVEL_RANKX(ddr_interface_num, rankx));
6493 
6494                         pack_rlevel_settings(ddr_interface_bytemask, ecc_ena, rlevel_byte, &lmc_rlevel_rank);
6495 
6496                         if (rlevel_avg_loops > 1) {
6497 #if PICK_BEST_RANK_SCORE_NOT_AVG
6498                             // restore the "best" bitmasks and their scores for printing
6499                             for (byte_idx = 0; byte_idx < 9; ++byte_idx) {
6500                                 if ((ddr_interface_bytemask & (1 << byte_idx)) == 0)
6501                                     continue;
6502                                 // xlate PACKED index to UNPACKED index to get from rlevel_byte
6503                                 rlevel_bitmask[byte_idx].bm   = rlevel_byte[XPU(byte_idx, !!ecc_ena)].bm;
6504                                 rlevel_bitmask[byte_idx].errs = rlevel_byte[XPU(byte_idx, !!ecc_ena)].bmerrs;
6505                             }
6506                             // print bitmasks/scores here only for DEV // FIXME? lower VBL?
6507                             if (dram_is_verbose(VBL_DEV)) {
6508                                 display_RL_BM(node, ddr_interface_num, rankx, rlevel_bitmask, ecc_ena);
6509                                 display_RL_BM_scores(node, ddr_interface_num, rankx, rlevel_bitmask, ecc_ena);
6510                                 display_RL_SEQ_scores(node, ddr_interface_num, rankx, rlevel_byte, ecc_ena);
6511                             }
6512 
6513                             display_RL_with_RODT(node, ddr_interface_num, lmc_rlevel_rank, rankx,
6514                                                  rlevel_scoreboard[rtt_nom][rodt_ctl][rankx].score,
6515                                                  print_nom_ohms, imp_values->rodt_ohms[rodt_ctl],
6516                                                  WITH_RODT_BESTSCORE);
6517 
6518 #else /* PICK_BEST_RANK_SCORE_NOT_AVG */
6519                             display_RL_with_average(node, ddr_interface_num, lmc_rlevel_rank, rankx,
6520                                                     rlevel_scoreboard[rtt_nom][rodt_ctl][rankx].score);
6521 #endif /* PICK_BEST_RANK_SCORE_NOT_AVG */
6522 
6523                         } /* if (rlevel_avg_loops > 1) */
6524 
6525                         rlevel_scoreboard[rtt_nom][rodt_ctl][rankx].setting = lmc_rlevel_rank.u;
6526 
6527                     } /* for (rodt_ctl = max_rodt_ctl; rodt_ctl >= min_rodt_ctl; --rodt_ctl) */
6528                 } /* for (rankx = 0; rankx < dimm_count*4; rankx++) */
6529             } /*  for (rtt_idx=min_rtt_nom_idx; rtt_idx<max_rtt_nom_idx; ++rtt_idx) */
6530 
6531 
6532             /* Re-enable dynamic compensation settings. */
6533             if (rlevel_comp_offset != 0) {
6534                 lmc_comp_ctl2.u = BDK_CSR_READ(node, BDK_LMCX_COMP_CTL2(ddr_interface_num));
6535 
6536                 lmc_comp_ctl2.s.ptune = 0;
6537                 lmc_comp_ctl2.s.ntune = 0;
6538                 lmc_comp_ctl2.s.byp = 0; /* Disable bypass mode */
6539                 DRAM_CSR_WRITE(node, BDK_LMCX_COMP_CTL2(ddr_interface_num), lmc_comp_ctl2.u);
6540                 BDK_CSR_READ(node, BDK_LMCX_COMP_CTL2(ddr_interface_num)); /* Read once */
6541 
6542                 lmc_comp_ctl2.u = BDK_CSR_READ(node, BDK_LMCX_COMP_CTL2(ddr_interface_num)); /* Read again */
6543                 ddr_print("DDR__PTUNE/DDR__NTUNE                         : %d/%d\n",
6544                           lmc_comp_ctl2.s.ddr__ptune, lmc_comp_ctl2.s.ddr__ntune);
6545 
6546                 lmc_control.u = BDK_CSR_READ(node, BDK_LMCX_CONTROL(ddr_interface_num));
6547                 lmc_control.s.int_zqcs_dis    = saved_int_zqcs_dis; /* Restore original setting */
6548                 DRAM_CSR_WRITE(node, BDK_LMCX_CONTROL(ddr_interface_num), lmc_control.u);
6549 
6550             }
6551 
6552 
6553             {
6554                 int override_compensation = 0;
6555                 if ((s = lookup_env_parameter("ddr__ptune")) != NULL) {
6556                     saved_ddr__ptune = strtoul(s, NULL, 0);
6557                     override_compensation = 1;
6558                 }
6559                 if ((s = lookup_env_parameter("ddr__ntune")) != NULL) {
6560                     saved_ddr__ntune = strtoul(s, NULL, 0);
6561                     override_compensation = 1;
6562                 }
6563                 if (override_compensation) {
6564                     lmc_comp_ctl2.s.ptune = saved_ddr__ptune;
6565                     lmc_comp_ctl2.s.ntune = saved_ddr__ntune;
6566 
6567                     lmc_control.u = BDK_CSR_READ(node, BDK_LMCX_CONTROL(ddr_interface_num));
6568                     saved_int_zqcs_dis = lmc_control.s.int_zqcs_dis;
6569                     lmc_control.s.int_zqcs_dis    = 1; /* Disable ZQCS while in bypass. */
6570                     DRAM_CSR_WRITE(node, BDK_LMCX_CONTROL(ddr_interface_num), lmc_control.u);
6571 
6572                     lmc_comp_ctl2.s.byp = 1; /* Enable bypass mode */
6573                     DRAM_CSR_WRITE(node, BDK_LMCX_COMP_CTL2(ddr_interface_num), lmc_comp_ctl2.u);
6574                     lmc_comp_ctl2.u = BDK_CSR_READ(node, BDK_LMCX_COMP_CTL2(ddr_interface_num)); /* Read again */
6575 
6576                     ddr_print("DDR__PTUNE/DDR__NTUNE                         : %d/%d\n",
6577                               lmc_comp_ctl2.s.ptune, lmc_comp_ctl2.s.ntune);
6578                 }
6579             }
6580             { /* Evaluation block */
6581                 int      best_rodt_score = DEFAULT_BEST_RANK_SCORE; /* Start with an arbitrarily high score */
6582                 int      auto_rodt_ctl = 0;
6583                 int      auto_rtt_nom  = 0;
6584                 int      rodt_score;
6585                 int      rodt_row_skip_mask = 0;
6586 
6587                 // just add specific RODT rows to the skip mask for DDR4 at this time...
6588                 if (ddr_type == DDR4_DRAM) {
6589                     rodt_row_skip_mask |= (1 << ddr4_rodt_ctl_34_ohm); // skip RODT row 34 ohms for all DDR4 types
6590                     rodt_row_skip_mask |= (1 << ddr4_rodt_ctl_40_ohm); // skip RODT row 40 ohms for all DDR4 types
6591 #if ADD_48_OHM_SKIP
6592                     rodt_row_skip_mask |= (1 << ddr4_rodt_ctl_48_ohm); // skip RODT row 48 ohms for all DDR4 types
6593 #endif /* ADD_48OHM_SKIP */
6594 #if NOSKIP_40_48_OHM
6595                     // For now, do not skip RODT row 40 or 48 ohm when ddr_hertz is above 1075 MHz
6596                     if (ddr_hertz > 1075000000) {
6597                         rodt_row_skip_mask &= ~(1 << ddr4_rodt_ctl_40_ohm); // noskip RODT row 40 ohms
6598                         rodt_row_skip_mask &= ~(1 << ddr4_rodt_ctl_48_ohm); // noskip RODT row 48 ohms
6599                     }
6600 #endif /* NOSKIP_40_48_OHM */
6601 #if NOSKIP_48_STACKED
6602                     // For now, do not skip RODT row 48 ohm for 2Rx4 stacked die DIMMs
6603                     if ((is_stacked_die) && (num_ranks == 2) && (dram_width == 4)) {
6604                         rodt_row_skip_mask &= ~(1 << ddr4_rodt_ctl_48_ohm); // noskip RODT row 48 ohms
6605                     }
6606 #endif /* NOSKIP_48_STACKED */
6607 #if NOSKIP_FOR_MINI
6608                     // for now, leave all rows eligible when we have mini-DIMMs...
6609                     if ((spd_dimm_type == 5) || (spd_dimm_type == 6)) {
6610                         rodt_row_skip_mask = 0;
6611                     }
6612 #endif /* NOSKIP_FOR_MINI */
6613 #if NOSKIP_FOR_2S_1R
6614                     // for now, leave all rows eligible when we have a 2-slot 1-rank config
6615                     if ((dimm_count == 2) && (num_ranks == 1)) {
6616                         rodt_row_skip_mask = 0;
6617                     }
6618 #endif /* NOSKIP_FOR_2S_1R */
6619                 }
6620 
6621                 VB_PRT(VBL_DEV, "Evaluating Read-Leveling Scoreboard for AUTO settings.\n");
6622                 for (rtt_idx=min_rtt_nom_idx; rtt_idx<=max_rtt_nom_idx; ++rtt_idx) {
6623                     rtt_nom = imp_values->rtt_nom_table[rtt_idx];
6624 
6625                     /* When the read ODT mask is zero the dyn_rtt_nom_mask is
6626                        zero than RTT_NOM will not be changing during
6627                        read-leveling.  Since the value is fixed we only need
6628                        to test it once. */
6629                     if ((dyn_rtt_nom_mask == 0) && (rtt_idx != min_rtt_nom_idx))
6630                         continue;
6631 
6632                     for (rodt_ctl=max_rodt_ctl; rodt_ctl>=min_rodt_ctl; --rodt_ctl) {
6633                         rodt_score = 0;
6634                         for (rankx = 0; rankx < dimm_count * 4;rankx++) {
6635                             if (!(rank_mask & (1 << rankx)))
6636                                 continue;
6637                             debug_print("rlevel_scoreboard[rtt_nom=%d][rodt_ctl=%d][rankx=%d].score:%d\n",
6638                                         rtt_nom, rodt_ctl, rankx, rlevel_scoreboard[rtt_nom][rodt_ctl][rankx].score);
6639                             rodt_score += rlevel_scoreboard[rtt_nom][rodt_ctl][rankx].score;
6640                         }
6641                         // FIXME: do we need to skip RODT rows here, like we do below in the by-RANK settings?
6642 
6643                         /* When using automatic ODT settings use the ODT
6644                            settings associated with the best score for
6645                            all of the tested ODT combinations. */
6646 
6647                         if ((rodt_score < best_rodt_score) || // always take lower score, OR
6648                             ((rodt_score == best_rodt_score) && // take same score if RODT ohms are higher
6649                              (imp_values->rodt_ohms[rodt_ctl] > imp_values->rodt_ohms[auto_rodt_ctl])))
6650                             {
6651                                 debug_print("AUTO: new best score for rodt:%d (%3d), new score:%d, previous score:%d\n",
6652                                             rodt_ctl, imp_values->rodt_ohms[rodt_ctl], rodt_score, best_rodt_score);
6653                                 best_rodt_score = rodt_score;
6654                                 auto_rodt_ctl   = rodt_ctl;
6655                                 auto_rtt_nom    = rtt_nom;
6656                             }
6657                     } /* for (rodt_ctl=max_rodt_ctl; rodt_ctl>=min_rodt_ctl; --rodt_ctl) */
6658                 } /* for (rtt_idx=min_rtt_nom_idx; rtt_idx<=max_rtt_nom_idx; ++rtt_idx) */
6659 
6660                 lmc_modereg_params1.u = BDK_CSR_READ(node, BDK_LMCX_MODEREG_PARAMS1(ddr_interface_num));
6661 
6662                 if (ddr_rtt_nom_auto) {
6663                     /* Store the automatically set RTT_NOM value */
6664                     if (dyn_rtt_nom_mask & 1) lmc_modereg_params1.s.rtt_nom_00 = auto_rtt_nom;
6665                     if (dyn_rtt_nom_mask & 2) lmc_modereg_params1.s.rtt_nom_01 = auto_rtt_nom;
6666                     if (dyn_rtt_nom_mask & 4) lmc_modereg_params1.s.rtt_nom_10 = auto_rtt_nom;
6667                     if (dyn_rtt_nom_mask & 8) lmc_modereg_params1.s.rtt_nom_11 = auto_rtt_nom;
6668                 } else {
6669                     /* restore the manual settings to the register */
6670                     lmc_modereg_params1.s.rtt_nom_00 = default_rtt_nom[0];
6671                     lmc_modereg_params1.s.rtt_nom_01 = default_rtt_nom[1];
6672                     lmc_modereg_params1.s.rtt_nom_10 = default_rtt_nom[2];
6673                     lmc_modereg_params1.s.rtt_nom_11 = default_rtt_nom[3];
6674                 }
6675 
6676                 DRAM_CSR_WRITE(node, BDK_LMCX_MODEREG_PARAMS1(ddr_interface_num), lmc_modereg_params1.u);
6677                 VB_PRT(VBL_DEV, "RTT_NOM     %3d, %3d, %3d, %3d ohms           :  %x,%x,%x,%x\n",
6678                         imp_values->rtt_nom_ohms[lmc_modereg_params1.s.rtt_nom_11],
6679                         imp_values->rtt_nom_ohms[lmc_modereg_params1.s.rtt_nom_10],
6680                         imp_values->rtt_nom_ohms[lmc_modereg_params1.s.rtt_nom_01],
6681                         imp_values->rtt_nom_ohms[lmc_modereg_params1.s.rtt_nom_00],
6682                         lmc_modereg_params1.s.rtt_nom_11,
6683                         lmc_modereg_params1.s.rtt_nom_10,
6684                         lmc_modereg_params1.s.rtt_nom_01,
6685                         lmc_modereg_params1.s.rtt_nom_00);
6686 
6687                 VB_PRT(VBL_DEV, "RTT_WR      %3d, %3d, %3d, %3d ohms           :  %x,%x,%x,%x\n",
6688                        imp_values->rtt_wr_ohms[EXTR_WR(lmc_modereg_params1.u, 3)],
6689                        imp_values->rtt_wr_ohms[EXTR_WR(lmc_modereg_params1.u, 2)],
6690                        imp_values->rtt_wr_ohms[EXTR_WR(lmc_modereg_params1.u, 1)],
6691                        imp_values->rtt_wr_ohms[EXTR_WR(lmc_modereg_params1.u, 0)],
6692                        EXTR_WR(lmc_modereg_params1.u, 3),
6693                        EXTR_WR(lmc_modereg_params1.u, 2),
6694                        EXTR_WR(lmc_modereg_params1.u, 1),
6695                        EXTR_WR(lmc_modereg_params1.u, 0));
6696 
6697                 VB_PRT(VBL_DEV, "DIC         %3d, %3d, %3d, %3d ohms           :  %x,%x,%x,%x\n",
6698                         imp_values->dic_ohms[lmc_modereg_params1.s.dic_11],
6699                         imp_values->dic_ohms[lmc_modereg_params1.s.dic_10],
6700                         imp_values->dic_ohms[lmc_modereg_params1.s.dic_01],
6701                         imp_values->dic_ohms[lmc_modereg_params1.s.dic_00],
6702                         lmc_modereg_params1.s.dic_11,
6703                         lmc_modereg_params1.s.dic_10,
6704                         lmc_modereg_params1.s.dic_01,
6705                         lmc_modereg_params1.s.dic_00);
6706 
6707                 if (ddr_type == DDR4_DRAM) {
6708                     bdk_lmcx_modereg_params2_t lmc_modereg_params2;
6709                     /*
6710                      * We must read the CSR, and not depend on odt_config[odt_idx].odt_mask2,
6711                      * since we could have overridden values with envvars.
6712                      * NOTE: this corrects the printout, since the CSR is not written with the old values...
6713                      */
6714                     lmc_modereg_params2.u = BDK_CSR_READ(node, BDK_LMCX_MODEREG_PARAMS2(ddr_interface_num));
6715 
6716                     VB_PRT(VBL_DEV, "RTT_PARK    %3d, %3d, %3d, %3d ohms           :  %x,%x,%x,%x\n",
6717                               imp_values->rtt_nom_ohms[lmc_modereg_params2.s.rtt_park_11],
6718                               imp_values->rtt_nom_ohms[lmc_modereg_params2.s.rtt_park_10],
6719                               imp_values->rtt_nom_ohms[lmc_modereg_params2.s.rtt_park_01],
6720                               imp_values->rtt_nom_ohms[lmc_modereg_params2.s.rtt_park_00],
6721                               lmc_modereg_params2.s.rtt_park_11,
6722                               lmc_modereg_params2.s.rtt_park_10,
6723                               lmc_modereg_params2.s.rtt_park_01,
6724                               lmc_modereg_params2.s.rtt_park_00);
6725 
6726                     VB_PRT(VBL_DEV, "%-45s :  0x%x,0x%x,0x%x,0x%x\n", "VREF_RANGE",
6727                               lmc_modereg_params2.s.vref_range_11,
6728                               lmc_modereg_params2.s.vref_range_10,
6729                               lmc_modereg_params2.s.vref_range_01,
6730                               lmc_modereg_params2.s.vref_range_00);
6731 
6732                     VB_PRT(VBL_DEV, "%-45s :  0x%x,0x%x,0x%x,0x%x\n", "VREF_VALUE",
6733                               lmc_modereg_params2.s.vref_value_11,
6734                               lmc_modereg_params2.s.vref_value_10,
6735                               lmc_modereg_params2.s.vref_value_01,
6736                               lmc_modereg_params2.s.vref_value_00);
6737                 }
6738 
6739                 lmc_comp_ctl2.u = BDK_CSR_READ(node, BDK_LMCX_COMP_CTL2(ddr_interface_num));
6740                 if (ddr_rodt_ctl_auto)
6741                     lmc_comp_ctl2.s.rodt_ctl = auto_rodt_ctl;
6742                 else
6743                     lmc_comp_ctl2.s.rodt_ctl = default_rodt_ctl; // back to the original setting
6744                 DRAM_CSR_WRITE(node, BDK_LMCX_COMP_CTL2(ddr_interface_num), lmc_comp_ctl2.u);
6745                 lmc_comp_ctl2.u = BDK_CSR_READ(node, BDK_LMCX_COMP_CTL2(ddr_interface_num));
6746                 VB_PRT(VBL_DEV, "Read ODT_CTL                                  : 0x%x (%d ohms)\n",
6747                           lmc_comp_ctl2.s.rodt_ctl, imp_values->rodt_ohms[lmc_comp_ctl2.s.rodt_ctl]);
6748 
6749                 ////////////////// this is the start of the RANK MAJOR LOOP
6750 
6751                 for (rankx = 0; rankx < dimm_count * 4; rankx++) {
6752                     int best_rank_score = DEFAULT_BEST_RANK_SCORE; /* Start with an arbitrarily high score */
6753                     int best_rank_rtt_nom = 0;
6754                     //int best_rank_nom_ohms = 0;
6755                     int best_rank_ctl = 0;
6756                     int best_rank_ohms = 0;
6757                     int best_rankx = 0;
6758 
6759                     if (!(rank_mask & (1 << rankx)))
6760                         continue;
6761 
6762                     /* Use the delays associated with the best score for each individual rank */
6763                     VB_PRT(VBL_TME, "Evaluating Read-Leveling Scoreboard for Rank %d settings.\n", rankx);
6764 
6765                     // some of the rank-related loops below need to operate only on the ranks of a single DIMM,
6766                     // so create a mask for their use here
6767                     int dimm_rank_mask;
6768                     if (num_ranks == 4)
6769                         dimm_rank_mask = rank_mask; // should be 1111
6770                     else {
6771                         dimm_rank_mask = rank_mask & 3; // should be 01 or 11
6772                         if (rankx >= 2)
6773                             dimm_rank_mask <<= 2; // doing a rank on the second DIMM, should be 0100 or 1100
6774                     }
6775                     debug_print("DIMM rank mask: 0x%x, rank mask: 0x%x, rankx: %d\n", dimm_rank_mask, rank_mask, rankx);
6776 
6777                     ////////////////// this is the start of the BEST ROW SCORE LOOP
6778 
6779                     for (rtt_idx = min_rtt_nom_idx; rtt_idx <= max_rtt_nom_idx; ++rtt_idx) {
6780                         //int rtt_nom_ohms;
6781                         rtt_nom = imp_values->rtt_nom_table[rtt_idx];
6782                         //rtt_nom_ohms = imp_values->rtt_nom_ohms[rtt_nom];
6783 
6784                         /* When the read ODT mask is zero the dyn_rtt_nom_mask is
6785                            zero than RTT_NOM will not be changing during
6786                            read-leveling.  Since the value is fixed we only need
6787                            to test it once. */
6788                         if ((dyn_rtt_nom_mask == 0) && (rtt_idx != min_rtt_nom_idx))
6789                             continue;
6790 
6791                         debug_print("N%d.LMC%d.R%d: starting RTT_NOM %d (%d)\n",
6792                                     node, ddr_interface_num, rankx, rtt_nom, rtt_nom_ohms);
6793 
6794                         for (rodt_ctl = max_rodt_ctl; rodt_ctl >= min_rodt_ctl; --rodt_ctl) {
6795                             int next_ohms = imp_values->rodt_ohms[rodt_ctl];
6796 
6797                             // skip RODT rows in mask, but *NOT* rows with too high a score;
6798                             // we will not use the skipped ones for printing or evaluating, but
6799                             // we need to allow all the non-skipped ones to be candidates for "best"
6800                             if (((1 << rodt_ctl) & rodt_row_skip_mask) != 0) {
6801                                 debug_print("N%d.LMC%d.R%d: SKIPPING rodt:%d (%d) with rank_score:%d\n",
6802                                             node, ddr_interface_num, rankx, rodt_ctl, next_ohms, next_score);
6803                                 continue;
6804                             }
6805                             for (int orankx = 0; orankx < dimm_count * 4; orankx++) { // this is ROFFIX-0528
6806                                 if (!(dimm_rank_mask & (1 << orankx))) // stay on the same DIMM
6807                                     continue;
6808 
6809                                 int next_score = rlevel_scoreboard[rtt_nom][rodt_ctl][orankx].score;
6810 
6811                                 if (next_score > best_rank_score) // always skip a higher score
6812                                     continue;
6813                                 if (next_score == best_rank_score) { // if scores are equal
6814                                     if (next_ohms < best_rank_ohms) // always skip lower ohms
6815                                         continue;
6816                                     if (next_ohms == best_rank_ohms) { // if same ohms
6817                                         if (orankx != rankx) // always skip the other rank(s)
6818                                             continue;
6819                                     }
6820                                     // else next_ohms are greater, always choose it
6821                                 }
6822                                 // else next_score is less than current best, so always choose it
6823                                 VB_PRT(VBL_DEV2, "N%d.LMC%d.R%d: new best score: rank %d, rodt %d(%3d), new best %d, previous best %d(%d)\n",
6824                                         node, ddr_interface_num, rankx, orankx, rodt_ctl, next_ohms, next_score,
6825                                         best_rank_score, best_rank_ohms);
6826                                 best_rank_score            = next_score;
6827                                 best_rank_rtt_nom   = rtt_nom;
6828                                 //best_rank_nom_ohms  = rtt_nom_ohms;
6829                                 best_rank_ctl       = rodt_ctl;
6830                                 best_rank_ohms      = next_ohms;
6831                                 best_rankx          = orankx;
6832                                 lmc_rlevel_rank.u   = rlevel_scoreboard[rtt_nom][rodt_ctl][orankx].setting;
6833 
6834                             } /* for (int orankx = 0; orankx < dimm_count * 4; orankx++) */
6835                         } /* for (rodt_ctl = max_rodt_ctl; rodt_ctl >= min_rodt_ctl; --rodt_ctl) */
6836                     } /* for (rtt_idx = min_rtt_nom_idx; rtt_idx <= max_rtt_nom_idx; ++rtt_idx) */
6837 
6838                     ////////////////// this is the end of the BEST ROW SCORE LOOP
6839 
6840                     // DANGER, Will Robinson!! Abort now if we did not find a best score at all...
6841                     if (best_rank_score == DEFAULT_BEST_RANK_SCORE) {
6842                         error_print("WARNING: no best rank score found for N%d.LMC%d.R%d - resetting node...\n",
6843                                     node, ddr_interface_num, rankx);
6844                         bdk_wait_usec(500000);
6845                         bdk_reset_chip(node);
6846                     }
6847 
6848                     // FIXME: relative now, but still arbitrary...
6849                     // halve the range if 2 DIMMs unless they are single rank...
6850                     int MAX_RANK_SCORE = best_rank_score;
6851                     MAX_RANK_SCORE += (MAX_RANK_SCORE_LIMIT / ((num_ranks > 1) ? dimm_count : 1));
6852 
6853                     if (!ecc_ena){
6854                         lmc_rlevel_rank.cn83xx.byte8 = lmc_rlevel_rank.cn83xx.byte0; /* ECC is not used */
6855                     }
6856 
6857                     // at the end, write the best row settings to the current rank
6858                     DRAM_CSR_WRITE(node, BDK_LMCX_RLEVEL_RANKX(ddr_interface_num, rankx), lmc_rlevel_rank.u);
6859                     lmc_rlevel_rank.u = BDK_CSR_READ(node, BDK_LMCX_RLEVEL_RANKX(ddr_interface_num, rankx));
6860 
6861                     bdk_lmcx_rlevel_rankx_t saved_rlevel_rank;
6862                     saved_rlevel_rank.u = lmc_rlevel_rank.u;
6863 
6864                     ////////////////// this is the start of the PRINT LOOP
6865 
6866                     // for pass==0, print current rank, pass==1 print other rank(s)
6867                     // this is done because we want to show each ranks RODT values together, not interlaced
6868 #if COUNT_RL_CANDIDATES
6869                     // keep separates for ranks - pass=0 target rank, pass=1 other rank on DIMM
6870                     int mask_skipped[2] = {0,0};
6871                     int score_skipped[2] = {0,0};
6872                     int selected_rows[2] = {0,0};
6873                     int zero_scores[2] = {0,0};
6874 #endif /* COUNT_RL_CANDIDATES */
6875                     for (int pass = 0; pass < 2; pass++ ) {
6876                         for (int orankx = 0; orankx < dimm_count * 4; orankx++) {
6877                             if (!(dimm_rank_mask & (1 << orankx))) // stay on the same DIMM
6878                                 continue;
6879 
6880                             if (((pass == 0) && (orankx != rankx)) || ((pass != 0) && (orankx == rankx)))
6881                                 continue;
6882 
6883                             for (rtt_idx = min_rtt_nom_idx; rtt_idx <= max_rtt_nom_idx; ++rtt_idx) {
6884                                 rtt_nom = imp_values->rtt_nom_table[rtt_idx];
6885                                 if (dyn_rtt_nom_mask == 0) {
6886                                     print_nom_ohms = -1;
6887                                     if (rtt_idx != min_rtt_nom_idx)
6888                                         continue;
6889                                 } else {
6890                                     print_nom_ohms = imp_values->rtt_nom_ohms[rtt_nom];
6891                                 }
6892 
6893                                 // cycle through all the RODT values...
6894                                 for (rodt_ctl = max_rodt_ctl; rodt_ctl >= min_rodt_ctl; --rodt_ctl) {
6895                                     bdk_lmcx_rlevel_rankx_t temp_rlevel_rank;
6896                                     int temp_score = rlevel_scoreboard[rtt_nom][rodt_ctl][orankx].score;
6897                                     temp_rlevel_rank.u = rlevel_scoreboard[rtt_nom][rodt_ctl][orankx].setting;
6898 
6899                                     // skip RODT rows in mask, or rows with too high a score;
6900                                     // we will not use them for printing or evaluating...
6901 #if COUNT_RL_CANDIDATES
6902                                     int skip_row;
6903                                     if ((1 << rodt_ctl) & rodt_row_skip_mask) {
6904                                         skip_row = WITH_RODT_SKIPPING;
6905                                         ++mask_skipped[pass];
6906                                     } else if (temp_score > MAX_RANK_SCORE) {
6907                                         skip_row = WITH_RODT_SKIPPING;
6908                                         ++score_skipped[pass];
6909                                     } else {
6910                                         skip_row = WITH_RODT_BLANK;
6911                                         ++selected_rows[pass];
6912                                         if (temp_score == 0)
6913                                             ++zero_scores[pass];
6914                                     }
6915 
6916 #else /* COUNT_RL_CANDIDATES */
6917                                     int skip_row = (((1 << rodt_ctl) & rodt_row_skip_mask) || (temp_score > MAX_RANK_SCORE))
6918                                                     ? WITH_RODT_SKIPPING: WITH_RODT_BLANK;
6919 #endif /* COUNT_RL_CANDIDATES */
6920 
6921                                     // identify and print the BEST ROW when it comes up
6922                                     if ((skip_row == WITH_RODT_BLANK) &&
6923                                         (best_rankx == orankx) &&
6924                                         (best_rank_rtt_nom == rtt_nom) &&
6925                                         (best_rank_ctl == rodt_ctl))
6926                                     {
6927                                         skip_row = WITH_RODT_BESTROW;
6928                                     }
6929 
6930                                     display_RL_with_RODT(node, ddr_interface_num,
6931                                                          temp_rlevel_rank, orankx, temp_score,
6932                                                          print_nom_ohms,
6933                                                          imp_values->rodt_ohms[rodt_ctl],
6934                                                          skip_row);
6935 
6936                                 } /* for (rodt_ctl = max_rodt_ctl; rodt_ctl >= min_rodt_ctl; --rodt_ctl) */
6937                             } /* for (rtt_idx=min_rtt_nom_idx; rtt_idx<=max_rtt_nom_idx; ++rtt_idx) */
6938                         } /* for (int orankx = 0; orankx < dimm_count * 4; orankx++) { */
6939                     } /* for (int pass = 0; pass < 2; pass++ ) */
6940 #if COUNT_RL_CANDIDATES
6941                     VB_PRT(VBL_TME, "N%d.LMC%d.R%d: RLROWS: selected %d+%d, zero_scores %d+%d, mask_skipped %d+%d, score_skipped %d+%d\n",
6942                            node, ddr_interface_num, rankx,
6943                            selected_rows[0], selected_rows[1],
6944                            zero_scores[0], zero_scores[1],
6945                            mask_skipped[0], mask_skipped[1],
6946                            score_skipped[0], score_skipped[1]);
6947 #endif /* COUNT_RL_CANDIDATES */
6948 
6949                     ////////////////// this is the end of the PRINT LOOP
6950 
6951                     // now evaluate which bytes need adjusting
6952                     uint64_t byte_msk = 0x3f; // 6-bit fields
6953                     uint64_t best_byte, new_byte, temp_byte, orig_best_byte;
6954 
6955                     uint64_t rank_best_bytes[9]; // collect the new byte values; first init with current best for neighbor use
6956                     for (int byte_idx = 0, byte_sh = 0; byte_idx < 8+ecc_ena; byte_idx++, byte_sh += 6) {
6957                         rank_best_bytes[byte_idx] = (lmc_rlevel_rank.u >> byte_sh) & byte_msk;
6958                     }
6959 
6960                     ////////////////// this is the start of the BEST BYTE LOOP
6961 
6962                     for (int byte_idx = 0, byte_sh = 0; byte_idx < 8+ecc_ena; byte_idx++, byte_sh += 6) {
6963                         best_byte = orig_best_byte = rank_best_bytes[byte_idx];
6964 
6965                         ////////////////// this is the start of the BEST BYTE AVERAGING LOOP
6966 
6967                         // validate the initial "best" byte by looking at the average of the unskipped byte-column entries
6968                         // we want to do this before we go further, so we can try to start with a better initial value
6969                         // this is the so-called "BESTBUY" patch set
6970                         int sum = 0, count = 0;
6971 
6972                         for (rtt_idx = min_rtt_nom_idx; rtt_idx <= max_rtt_nom_idx; ++rtt_idx) {
6973                             rtt_nom = imp_values->rtt_nom_table[rtt_idx];
6974                             if ((dyn_rtt_nom_mask == 0) && (rtt_idx != min_rtt_nom_idx))
6975                                 continue;
6976 
6977                             for (rodt_ctl = max_rodt_ctl; rodt_ctl >= min_rodt_ctl; --rodt_ctl) {
6978                                 bdk_lmcx_rlevel_rankx_t temp_rlevel_rank;
6979                                 int temp_score;
6980                                 for (int orankx = 0; orankx < dimm_count * 4; orankx++) { // average over all the ranks
6981                                     if (!(dimm_rank_mask & (1 << orankx))) // stay on the same DIMM
6982                                         continue;
6983                                     temp_score = rlevel_scoreboard[rtt_nom][rodt_ctl][orankx].score;
6984                                     // skip RODT rows in mask, or rows with too high a score;
6985                                     // we will not use them for printing or evaluating...
6986 
6987                                     if (!((1 << rodt_ctl) & rodt_row_skip_mask) &&
6988                                         (temp_score <= MAX_RANK_SCORE))
6989                                     {
6990                                         temp_rlevel_rank.u = rlevel_scoreboard[rtt_nom][rodt_ctl][orankx].setting;
6991                                         temp_byte = (temp_rlevel_rank.u >> byte_sh) & byte_msk;
6992                                         sum += temp_byte;
6993                                         count++;
6994                                     }
6995                                 } /* for (int orankx = 0; orankx < dimm_count * 4; orankx++) */
6996                             } /* for (rodt_ctl = max_rodt_ctl; rodt_ctl >= min_rodt_ctl; --rodt_ctl) */
6997                         } /* for (rtt_idx=min_rtt_nom_idx; rtt_idx<=max_rtt_nom_idx; ++rtt_idx) */
6998 
6999                         ////////////////// this is the end of the BEST BYTE AVERAGING LOOP
7000 
7001 
7002                         uint64_t avg_byte = divide_nint(sum, count); // FIXME: validate count and sum??
7003                         int avg_diff = (int)best_byte - (int)avg_byte;
7004                         new_byte = best_byte;
7005                         if (avg_diff != 0) {
7006                             // bump best up/dn by 1, not necessarily all the way to avg
7007                             new_byte = best_byte + ((avg_diff > 0) ? -1: 1);
7008                         }
7009 
7010                         VB_PRT(VBL_DEV, "N%d.LMC%d.R%d: START:   Byte %d: best %d is different by %d from average %d, using %d.\n",
7011                                         node, ddr_interface_num, rankx,
7012                                         byte_idx, (int)best_byte, avg_diff, (int)avg_byte, (int)new_byte);
7013                         best_byte = new_byte;
7014 
7015                         // At this point best_byte is either:
7016                         // 1. the original byte-column value from the best scoring RODT row, OR
7017                         // 2. that value bumped toward the average of all the byte-column values
7018                         //
7019                         // best_byte will not change from here on...
7020 
7021                         ////////////////// this is the start of the BEST BYTE COUNTING LOOP
7022 
7023                         // NOTE: we do this next loop separately from above, because we count relative to "best_byte"
7024                         // which may have been modified by the above averaging operation...
7025                         //
7026                         // Also, the above only moves toward the average by +- 1, so that we will always have a count
7027                         // of at least 1 for the original best byte, even if all the others are further away and not counted;
7028                         // this ensures we will go back to the original if no others are counted...
7029                         // FIXME: this could cause issue if the range of values for a byte-lane are too disparate...
7030                         int count_less = 0, count_same = 0, count_more = 0;
7031 #if FAILSAFE_CHECK
7032                         uint64_t count_byte = new_byte; // save the value we will count around
7033 #endif /* FAILSAFE_CHECK */
7034 #if RANK_MAJORITY
7035                         int rank_less = 0, rank_same = 0, rank_more = 0;
7036 #endif /* RANK_MAJORITY */
7037 
7038                         for (rtt_idx = min_rtt_nom_idx; rtt_idx <= max_rtt_nom_idx; ++rtt_idx) {
7039                             rtt_nom = imp_values->rtt_nom_table[rtt_idx];
7040                             if ((dyn_rtt_nom_mask == 0) && (rtt_idx != min_rtt_nom_idx))
7041                                 continue;
7042 
7043                             for (rodt_ctl = max_rodt_ctl; rodt_ctl >= min_rodt_ctl; --rodt_ctl) {
7044                                 bdk_lmcx_rlevel_rankx_t temp_rlevel_rank;
7045                                 int temp_score;
7046                                 for (int orankx = 0; orankx < dimm_count * 4; orankx++) { // count over all the ranks
7047                                     if (!(dimm_rank_mask & (1 << orankx))) // stay on the same DIMM
7048                                         continue;
7049                                     temp_score = rlevel_scoreboard[rtt_nom][rodt_ctl][orankx].score;
7050                                     // skip RODT rows in mask, or rows with too high a score;
7051                                     // we will not use them for printing or evaluating...
7052                                     if (((1 << rodt_ctl) & rodt_row_skip_mask) ||
7053                                         (temp_score > MAX_RANK_SCORE))
7054                                         {
7055                                             continue;
7056                                         }
7057                                     temp_rlevel_rank.u = rlevel_scoreboard[rtt_nom][rodt_ctl][orankx].setting;
7058                                     temp_byte = (temp_rlevel_rank.u >> byte_sh) & byte_msk;
7059 
7060                                     if (temp_byte == 0) // do not count it if illegal
7061                                         continue;
7062                                     else if (temp_byte == best_byte)
7063                                         count_same++;
7064                                     else if (temp_byte == best_byte - 1)
7065                                         count_less++;
7066                                     else if (temp_byte == best_byte + 1)
7067                                         count_more++;
7068                                     // else do not count anything more than 1 away from the best
7069 #if RANK_MAJORITY
7070                                     // FIXME? count is relative to best_byte; should it be rank-based?
7071                                     if (orankx != rankx) // rank counts only on main rank
7072                                         continue;
7073                                     else if (temp_byte == best_byte)
7074                                         rank_same++;
7075                                     else if (temp_byte == best_byte - 1)
7076                                         rank_less++;
7077                                     else if (temp_byte == best_byte + 1)
7078                                         rank_more++;
7079 #endif /* RANK_MAJORITY */
7080                                 } /* for (int orankx = 0; orankx < dimm_count * 4; orankx++) */
7081                             } /* for (rodt_ctl = max_rodt_ctl; rodt_ctl >= min_rodt_ctl; --rodt_ctl) */
7082                         } /* for (rtt_idx=min_rtt_nom_idx; rtt_idx<=max_rtt_nom_idx; ++rtt_idx) */
7083 
7084 #if RANK_MAJORITY
7085                         VB_PRT(VBL_DEV, "N%d.LMC%d.R%d: COUNT:   Byte %d: orig %d now %d, more %d same %d less %d (%d/%d/%d)\n",
7086                                         node, ddr_interface_num, rankx,
7087                                         byte_idx, (int)orig_best_byte, (int)best_byte,
7088                                         count_more, count_same, count_less,
7089                                         rank_more, rank_same, rank_less);
7090 #else /* RANK_MAJORITY */
7091                         VB_PRT(VBL_DEV, "N%d.LMC%d.R%d: COUNT:   Byte %d: orig %d now %d, more %d same %d less %d\n",
7092                                         node, ddr_interface_num, rankx,
7093                                         byte_idx, (int)orig_best_byte, (int)best_byte,
7094                                         count_more, count_same, count_less);
7095 #endif /* RANK_MAJORITY */
7096                         ////////////////// this is the end of the BEST BYTE COUNTING LOOP
7097 
7098                         // choose the new byte value
7099                         // we need to check that there is no gap greater than 2 between adjacent bytes
7100                         //  (adjacency depends on DIMM type)
7101                         // use the neighbor value to help decide
7102                         // initially, the rank_best_bytes[] will contain values from the chosen lowest score rank
7103                         new_byte = 0;
7104 
7105                         // neighbor is index-1 unless we are index 0 or index 8 (ECC)
7106                         int neighbor = (byte_idx == 8) ? 3 : ((byte_idx == 0) ? 1 : byte_idx - 1);
7107                         uint64_t neigh_byte = rank_best_bytes[neighbor];
7108 
7109 
7110                         // can go up or down or stay the same, so look at a numeric average to help
7111                         new_byte = divide_nint(((count_more * (best_byte + 1)) +
7112                                                 (count_same * (best_byte + 0)) +
7113                                                 (count_less * (best_byte - 1))),
7114                                                max(1, (count_more + count_same + count_less)));
7115 
7116                         // use neighbor to help choose with average
7117                         if ((byte_idx > 0) && (_abs(neigh_byte - new_byte) > 2)) // but not for byte 0
7118                         {
7119                             uint64_t avg_pick = new_byte;
7120                             if ((new_byte - best_byte) != 0)
7121                                 new_byte = best_byte; // back to best, average did not get better
7122                             else // avg was the same, still too far, now move it towards the neighbor
7123                                 new_byte += (neigh_byte > new_byte) ? 1 : -1;
7124 
7125                             VB_PRT(VBL_DEV, "N%d.LMC%d.R%d: AVERAGE: Byte %d: neighbor %d too different %d from average %d, picking %d.\n",
7126                                             node, ddr_interface_num, rankx,
7127                                             byte_idx, neighbor, (int)neigh_byte, (int)avg_pick, (int)new_byte);
7128                         }
7129 #if MAJORITY_OVER_AVG
7130                         // NOTE:
7131                         // For now, we let the neighbor processing above trump the new simple majority processing here.
7132                         // This is mostly because we have seen no smoking gun for a neighbor bad choice (yet?).
7133                         // Also note that we will ALWAYS be using byte 0 majority, because of the if clause above.
7134                         else {
7135                             // majority is dependent on the counts, which are relative to best_byte, so start there
7136                             uint64_t maj_byte = best_byte;
7137                             if ((count_more > count_same) && (count_more > count_less)) {
7138                                 maj_byte++;
7139                             } else if ((count_less > count_same) && (count_less > count_more)) {
7140                                 maj_byte--;
7141                             }
7142                             if (maj_byte != new_byte) {
7143                                 // print only when majority choice is different from average
7144                                 VB_PRT(VBL_DEV, "N%d.LMC%d.R%d: MAJORTY: Byte %d: picking majority of %d over average %d.\n",
7145                                                 node, ddr_interface_num, rankx,
7146                                                 byte_idx, (int)maj_byte, (int)new_byte);
7147                                 new_byte = maj_byte;
7148                             } else {
7149                                 VB_PRT(VBL_DEV, "N%d.LMC%d.R%d: AVERAGE: Byte %d: picking average of %d.\n",
7150                                                 node, ddr_interface_num, rankx,
7151                                                 byte_idx, (int)new_byte);
7152                             }
7153 #if RANK_MAJORITY
7154                             // rank majority is dependent on the rank counts, which are relative to best_byte,
7155                             // so start there, and adjust according to the rank counts majority
7156                             uint64_t rank_maj = best_byte;
7157                             if ((rank_more > rank_same) && (rank_more > rank_less)) {
7158                                 rank_maj++;
7159                             } else if ((rank_less > rank_same) && (rank_less > rank_more)) {
7160                                 rank_maj--;
7161                             }
7162                             int rank_sum = rank_more + rank_same + rank_less;
7163 
7164                             // now, let rank majority possibly rule over the current new_byte however we got it
7165                             if (rank_maj != new_byte) { // only if different
7166                                 // Here is where we decide whether to completely apply RANK_MAJORITY or not
7167                                 // FIXME: For the moment, we do it ONLY when running 2-slot configs
7168                                 // FIXME? or when rank_sum is big enough?
7169                                 if ((dimm_count > 1) || (rank_sum > 2)) {
7170                                     // print only when rank majority choice is selected
7171                                     VB_PRT(VBL_DEV, "N%d.LMC%d.R%d: RANKMAJ: Byte %d: picking %d over %d.\n",
7172                                                     node, ddr_interface_num, rankx,
7173                                                     byte_idx, (int)rank_maj, (int)new_byte);
7174                                     new_byte = rank_maj;
7175                                 } else { // FIXME: print some info when we could have chosen RANKMAJ but did not
7176                                     VB_PRT(VBL_DEV, "N%d.LMC%d.R%d: RANKMAJ: Byte %d: NOT using %d over %d (best=%d,sum=%d).\n",
7177                                                     node, ddr_interface_num, rankx,
7178                                                     byte_idx, (int)rank_maj, (int)new_byte,
7179                                                     (int)best_byte, rank_sum);
7180                                 }
7181                             }
7182 #endif /* RANK_MAJORITY */
7183                         }
7184 #else
7185                         else {
7186                             VB_PRT(VBL_DEV, "N%d.LMC%d.R%d: AVERAGE: Byte %d: picking average of %d.\n",
7187                                             node, ddr_interface_num, rankx,
7188                                             byte_idx, (int)new_byte);
7189                         }
7190 #endif
7191 #if FAILSAFE_CHECK
7192                         // one last check:
7193                         // if new_byte is still count_byte, BUT there was no count for that value, DO SOMETHING!!!
7194                         // FIXME: go back to original best byte from the best row
7195                         if ((new_byte == count_byte) && (count_same == 0)) {
7196                             new_byte = orig_best_byte;
7197                             VB_PRT(VBL_DEV, "N%d.LMC%d.R%d: FAILSAF: Byte %d: going back to original %d.\n",
7198                                             node, ddr_interface_num, rankx,
7199                                             byte_idx, (int)new_byte);
7200                         }
7201 #endif /* FAILSAFE_CHECK */
7202 #if PERFECT_BITMASK_COUNTING
7203                         // Look at counts for "perfect" bitmasks if we had any for this byte-lane.
7204                         // Remember, we only counted for DDR4, so zero means none or DDR3, and we bypass this...
7205                         if (rank_perfect_counts[rankx].total[byte_idx] > 0) {
7206                             // FIXME: should be more error checking, look for ties, etc...
7207                             /* FIXME(dhendrix): i shadows another local variable, changed to _i in this block */
7208 //                            int i, delay_count, delay_value, delay_max;
7209                             int _i, delay_count, delay_value, delay_max;
7210                             uint32_t ties;
7211                             delay_value = -1;
7212                             delay_max = 0;
7213                             ties = 0;
7214 
7215                             for (_i = 0; _i < 32; _i++) {
7216                                 delay_count = rank_perfect_counts[rankx].count[byte_idx][_i];
7217                                 if (delay_count > 0) { // only look closer if there are any,,,
7218                                     if (delay_count > delay_max) {
7219                                         delay_max = delay_count;
7220                                         delay_value = _i;
7221                                         ties = 0; // reset ties to none
7222                                     } else if (delay_count == delay_max) {
7223                                         if (ties == 0)
7224                                             ties = 1UL << delay_value; // put in original value
7225                                         ties |= 1UL << _i; // add new value
7226                                     }
7227                                 }
7228                             } /* for (_i = 0; _i < 32; _i++) */
7229 
7230                             if (delay_value >= 0) {
7231                                 if (ties != 0) {
7232                                     if (ties & (1UL << (int)new_byte)) {
7233                                         // leave choice as new_byte if any tied one is the same...
7234 
7235 
7236                                         delay_value = (int)new_byte;
7237                                         VB_PRT(VBL_DEV, "N%d.LMC%d.R%d: PERFECT: Byte %d: TIES (0x%x) INCLUDED %d (%d)\n",
7238                                                   node, ddr_interface_num, rankx, byte_idx, ties, (int)new_byte, delay_max);
7239                                     } else {
7240                                         // FIXME: should choose a perfect one!!!
7241                                         // FIXME: for now, leave the choice as new_byte
7242                                         delay_value = (int)new_byte;
7243                                         VB_PRT(VBL_DEV, "N%d.LMC%d.R%d: PERFECT: Byte %d: TIES (0x%x) OMITTED %d (%d)\n",
7244                                                   node, ddr_interface_num, rankx, byte_idx, ties, (int)new_byte, delay_max);
7245                                     }
7246                                 } /* if (ties != 0) */
7247 
7248                                 if (delay_value != (int)new_byte) {
7249                                     delay_count = rank_perfect_counts[rankx].count[byte_idx][(int)new_byte];
7250                                     VB_PRT(VBL_DEV, "N%d.LMC%d.R%d: PERFECT: Byte %d: DIFF from %d (%d), USING %d (%d)\n",
7251                                            node, ddr_interface_num, rankx, byte_idx, (int)new_byte,
7252                                            delay_count, delay_value, delay_max);
7253                                     new_byte = (uint64_t)delay_value; // FIXME: make this optional via envvar?
7254                                 } else {
7255                                     debug_print("N%d.LMC%d.R%d: PERFECT: Byte %d: SAME as %d (%d)\n",
7256                                                 node, ddr_interface_num, rankx, byte_idx, new_byte, delay_max);
7257                                 }
7258                             }
7259                         } /* if (rank_perfect_counts[rankx].total[byte_idx] > 0) */
7260                         else {
7261                             if (ddr_type == DDR4_DRAM) { // only report when DDR4
7262                                 // FIXME: remove or increase VBL for this output...
7263                                 VB_PRT(VBL_DEV, "N%d.LMC%d.R%d: PERFECT: Byte %d: ZERO perfect bitmasks\n",
7264                                           node, ddr_interface_num, rankx, byte_idx);
7265                             }
7266                         } /* if (rank_perfect_counts[rankx].total[byte_idx] > 0) */
7267 #endif /* PERFECT_BITMASK_COUNTING */
7268 
7269                         VB_PRT(VBL_DEV, "N%d.LMC%d.R%d: SUMMARY: Byte %d: %s: orig %d now %d, more %d same %d less %d, using %d\n",
7270                                         node, ddr_interface_num, rankx,
7271                                         byte_idx, "AVG", (int)orig_best_byte,
7272                                         (int)best_byte, count_more, count_same, count_less, (int)new_byte);
7273 
7274                         // update the byte with the new value (NOTE: orig value in the CSR may not be current "best")
7275                         lmc_rlevel_rank.u &= ~(byte_msk << byte_sh);
7276                         lmc_rlevel_rank.u |= (new_byte << byte_sh);
7277 
7278                         rank_best_bytes[byte_idx] = new_byte; // save new best for neighbor use
7279 
7280                     } /* for (byte_idx = 0; byte_idx < 8+ecc_ena; byte_idx++) */
7281 
7282                     ////////////////// this is the end of the BEST BYTE LOOP
7283 
7284                     if (saved_rlevel_rank.u != lmc_rlevel_rank.u) {
7285                         DRAM_CSR_WRITE(node, BDK_LMCX_RLEVEL_RANKX(ddr_interface_num, rankx), lmc_rlevel_rank.u);
7286                         lmc_rlevel_rank.u = BDK_CSR_READ(node, BDK_LMCX_RLEVEL_RANKX(ddr_interface_num, rankx));
7287                         debug_print("Adjusting Read-Leveling per-RANK settings.\n");
7288                     } else {
7289                         debug_print("Not Adjusting Read-Leveling per-RANK settings.\n");
7290                     }
7291                     display_RL_with_final(node, ddr_interface_num, lmc_rlevel_rank, rankx);
7292 
7293 #if RLEXTRAS_PATCH
7294 #define RLEVEL_RANKX_EXTRAS_INCR  4
7295                     if ((rank_mask & 0x0F) != 0x0F) { // if there are unused entries to be filled
7296                         bdk_lmcx_rlevel_rankx_t temp_rlevel_rank = lmc_rlevel_rank; // copy the current rank
7297                         int byte, delay;
7298                         if (rankx < 3) {
7299                             debug_print("N%d.LMC%d.R%d: checking for RLEVEL_RANK unused entries.\n",
7300                                       node, ddr_interface_num, rankx);
7301                             for (byte = 0; byte < 9; byte++) { // modify the copy in prep for writing to empty slot(s)
7302                                 delay = get_rlevel_rank_struct(&temp_rlevel_rank, byte) + RLEVEL_RANKX_EXTRAS_INCR;
7303                                 if (delay > (int)RLEVEL_BYTE_MSK) delay = RLEVEL_BYTE_MSK;
7304                                 update_rlevel_rank_struct(&temp_rlevel_rank, byte, delay);
7305                             }
7306                             if (rankx == 0) { // if rank 0, write rank 1 and rank 2 here if empty
7307                                 if (!(rank_mask & (1<<1))) { // check that rank 1 is empty
7308                                     VB_PRT(VBL_DEV, "N%d.LMC%d.R%d: writing RLEVEL_RANK unused entry R%d.\n",
7309                                                     node, ddr_interface_num, rankx, 1);
7310                                     DRAM_CSR_WRITE(node, BDK_LMCX_RLEVEL_RANKX(ddr_interface_num, 1), temp_rlevel_rank.u);
7311                                 }
7312                                 if (!(rank_mask & (1<<2))) { // check that rank 2 is empty
7313                                     VB_PRT(VBL_DEV, "N%d.LMC%d.R%d: writing RLEVEL_RANK unused entry R%d.\n",
7314                                                     node, ddr_interface_num, rankx, 2);
7315                                     DRAM_CSR_WRITE(node, BDK_LMCX_RLEVEL_RANKX(ddr_interface_num, 2), temp_rlevel_rank.u);
7316                                 }
7317                             }
7318                             // if ranks 0, 1 or 2, write rank 3 here if empty
7319                             if (!(rank_mask & (1<<3))) { // check that rank 3 is empty
7320                                 VB_PRT(VBL_DEV, "N%d.LMC%d.R%d: writing RLEVEL_RANK unused entry R%d.\n",
7321                                           node, ddr_interface_num, rankx, 3);
7322                                 DRAM_CSR_WRITE(node, BDK_LMCX_RLEVEL_RANKX(ddr_interface_num, 3), temp_rlevel_rank.u);
7323                             }
7324                         }
7325                     }
7326 #endif /* RLEXTRAS_PATCH */
7327                 } /* for (rankx = 0; rankx < dimm_count * 4; rankx++) */
7328 
7329                 ////////////////// this is the end of the RANK MAJOR LOOP
7330 
7331             }  /* Evaluation block */
7332         } /* while(rlevel_debug_loops--) */
7333 
7334         lmc_control.s.ddr2t           = save_ddr2t;
7335         DRAM_CSR_WRITE(node, BDK_LMCX_CONTROL(ddr_interface_num), lmc_control.u);
7336         lmc_control.u = BDK_CSR_READ(node, BDK_LMCX_CONTROL(ddr_interface_num));
7337         ddr_print("%-45s : %6d\n", "DDR2T", lmc_control.s.ddr2t); /* Display final 2T value */
7338 
7339 
7340         perform_ddr_init_sequence(node, rank_mask, ddr_interface_num);
7341 
7342         for (rankx = 0; rankx < dimm_count * 4;rankx++) {
7343             uint64_t value;
7344             int parameter_set = 0;
7345             if (!(rank_mask & (1 << rankx)))
7346                 continue;
7347 
7348             lmc_rlevel_rank.u = BDK_CSR_READ(node, BDK_LMCX_RLEVEL_RANKX(ddr_interface_num, rankx));
7349 
7350             for (i=0; i<9; ++i) {
7351                 if ((s = lookup_env_parameter("ddr%d_rlevel_rank%d_byte%d", ddr_interface_num, rankx, i)) != NULL) {
7352                     parameter_set |= 1;
7353                     value = strtoul(s, NULL, 0);
7354 
7355                     update_rlevel_rank_struct(&lmc_rlevel_rank, i, value);
7356                 }
7357             }
7358 
7359             if ((s = lookup_env_parameter_ull("ddr%d_rlevel_rank%d", ddr_interface_num, rankx)) != NULL) {
7360                 parameter_set |= 1;
7361                 value = strtoull(s, NULL, 0);
7362                 lmc_rlevel_rank.u = value;
7363             }
7364 
7365             if (parameter_set) {
7366                 DRAM_CSR_WRITE(node, BDK_LMCX_RLEVEL_RANKX(ddr_interface_num, rankx), lmc_rlevel_rank.u);
7367                 display_RL(node, ddr_interface_num, lmc_rlevel_rank, rankx);
7368             }
7369         }
7370     }
7371 
7372     /* Workaround Trcd overflow by using Additive latency. */
7373     if (CAVIUM_IS_MODEL(CAVIUM_CN88XX_PASS1_X))
7374     {
7375         bdk_lmcx_modereg_params0_t lmc_modereg_params0;
7376         bdk_lmcx_timing_params1_t lmc_timing_params1;
7377         bdk_lmcx_control_t lmc_control;
7378         int rankx;
7379 
7380         lmc_timing_params1.u = BDK_CSR_READ(node, BDK_LMCX_TIMING_PARAMS1(ddr_interface_num));
7381         lmc_modereg_params0.u = BDK_CSR_READ(node, BDK_LMCX_MODEREG_PARAMS0(ddr_interface_num));
7382         lmc_control.u = BDK_CSR_READ(node, BDK_LMCX_CONTROL(ddr_interface_num));
7383 
7384         if (lmc_timing_params1.s.trcd == 0) {
7385             ddr_print("Workaround Trcd overflow by using Additive latency.\n");
7386             lmc_timing_params1.s.trcd     = 12; /* Hard code this to 12 and enable additive latency */
7387             lmc_modereg_params0.s.al      = 2; /* CL-2 */
7388             lmc_control.s.pocas           = 1;
7389 
7390             ddr_print("MODEREG_PARAMS0                               : 0x%016llx\n", lmc_modereg_params0.u);
7391             DRAM_CSR_WRITE(node, BDK_LMCX_MODEREG_PARAMS0(ddr_interface_num), lmc_modereg_params0.u);
7392             ddr_print("TIMING_PARAMS1                                : 0x%016llx\n", lmc_timing_params1.u);
7393             DRAM_CSR_WRITE(node, BDK_LMCX_TIMING_PARAMS1(ddr_interface_num), lmc_timing_params1.u);
7394 
7395             ddr_print("LMC_CONTROL                                   : 0x%016llx\n", lmc_control.u);
7396             DRAM_CSR_WRITE(node, BDK_LMCX_CONTROL(ddr_interface_num), lmc_control.u);
7397 
7398             for (rankx = 0; rankx < dimm_count * 4; rankx++) {
7399                 if (!(rank_mask & (1 << rankx)))
7400                     continue;
7401 
7402                 ddr4_mrw(node, ddr_interface_num, rankx, -1, 1, 0); /* MR1 */
7403             }
7404         }
7405     }
7406 
7407     // this is here just for output, to allow check of the Deskew settings one last time...
7408     if (! disable_deskew_training) {
7409         deskew_counts_t dsk_counts;
7410         VB_PRT(VBL_TME, "N%d.LMC%d: Check Deskew Settings before software Write-Leveling.\n",
7411                   node, ddr_interface_num);
7412         Validate_Read_Deskew_Training(node, rank_mask, ddr_interface_num, &dsk_counts, VBL_TME); // TME for FINAL
7413     }
7414 
7415 
7416     /* Workaround Errata 26304 ([email protected])
7417 
7418        When the CSRs LMCX_DLL_CTL3[WR_DESKEW_ENA] = 1 AND
7419        LMCX_PHY_CTL2[DQS[0..8]_DSK_ADJ] > 4, set
7420        LMCX_EXT_CONFIG[DRIVE_ENA_BPRCH] = 1.
7421     */
7422     if (CAVIUM_IS_MODEL(CAVIUM_CN88XX_PASS2_X)) { // only for 88XX pass 2, not 81xx or 83xx
7423         bdk_lmcx_dll_ctl3_t dll_ctl3;
7424         bdk_lmcx_phy_ctl2_t phy_ctl2;
7425         bdk_lmcx_ext_config_t ext_config;
7426         int increased_dsk_adj = 0;
7427         int byte;
7428 
7429         phy_ctl2.u = BDK_CSR_READ(node, BDK_LMCX_PHY_CTL2(ddr_interface_num));
7430         ext_config.u = BDK_CSR_READ(node, BDK_LMCX_EXT_CONFIG(ddr_interface_num));
7431         dll_ctl3.u = BDK_CSR_READ(node, BDK_LMCX_DLL_CTL3(ddr_interface_num));
7432 
7433         for (byte = 0; byte < 8; ++byte) {
7434             if (!(ddr_interface_bytemask&(1<<byte)))
7435                 continue;
7436             increased_dsk_adj |= (((phy_ctl2.u >> (byte*3)) & 0x7) > 4);
7437         }
7438 
7439         if ((dll_ctl3.s.wr_deskew_ena == 1) && increased_dsk_adj) {
7440             ext_config.s.drive_ena_bprch = 1;
7441             DRAM_CSR_WRITE(node, BDK_LMCX_EXT_CONFIG(ddr_interface_num),
7442                                       ext_config.u);
7443         }
7444     }
7445 
7446     /*
7447      * 6.9.13 DRAM Vref Training for DDR4
7448      *
7449      * This includes software write-leveling
7450      */
7451 
7452     { // Software Write-Leveling block
7453 
7454         /* Try to determine/optimize write-level delays experimentally. */
7455 #pragma pack(push,1)
7456         bdk_lmcx_wlevel_rankx_t lmc_wlevel_rank;
7457         bdk_lmcx_wlevel_rankx_t lmc_wlevel_rank_hw_results;
7458         int byte;
7459         int delay;
7460         int rankx = 0;
7461         int active_rank;
7462 #if !DISABLE_SW_WL_PASS_2
7463         bdk_lmcx_rlevel_rankx_t lmc_rlevel_rank;
7464         int sw_wlevel_offset = 1;
7465 #endif
7466         int sw_wlevel_enable = 1; /* FIX... Should be customizable. */
7467         int interfaces;
7468         int measured_vref_flag;
7469         typedef enum {
7470             WL_ESTIMATED = 0,   /* HW/SW wleveling failed. Results
7471                                    estimated. */
7472             WL_HARDWARE  = 1,   /* H/W wleveling succeeded */
7473             WL_SOFTWARE  = 2,   /* S/W wleveling passed 2 contiguous
7474                                    settings. */
7475             WL_SOFTWARE1 = 3,   /* S/W wleveling passed 1 marginal
7476                                    setting. */
7477         } sw_wl_status_t;
7478 
7479         static const char *wl_status_strings[] = {
7480             "(e)",
7481             "   ",
7482             "   ",
7483             "(1)"
7484         };
7485         int sw_wlevel_hw_default = 1; // FIXME: make H/W assist the default now
7486 #pragma pack(pop)
7487 
7488         if ((s = lookup_env_parameter("ddr_sw_wlevel_hw")) != NULL) {
7489             sw_wlevel_hw_default = !!strtoul(s, NULL, 0);
7490         }
7491 
7492          // cannot use hw-assist when doing 32-bit
7493         if (! ddr_interface_64b) {
7494             sw_wlevel_hw_default = 0;
7495         }
7496 
7497         if ((s = lookup_env_parameter("ddr_software_wlevel")) != NULL) {
7498             sw_wlevel_enable = strtoul(s, NULL, 0);
7499         }
7500 
7501 #if SWL_WITH_HW_ALTS_CHOOSE_SW
7502         // Choose the SW algo for SWL if any HWL alternates were found
7503         // NOTE: we have to do this here, and for all, since HW-assist including ECC requires ECC enable
7504         for (rankx = 0; rankx < dimm_count * 4; rankx++) {
7505             if (!sw_wlevel_enable)
7506                 break;
7507             if (!(rank_mask & (1 << rankx)))
7508                 continue;
7509 
7510             // if we are doing HW-assist, and there are alternates, switch to SW-algorithm for all
7511             if (sw_wlevel_hw && hwl_alts[rankx].hwl_alt_mask) {
7512                 ddr_print("N%d.LMC%d.R%d: Using SW algorithm for write-leveling this rank\n",
7513                           node, ddr_interface_num, rankx);
7514                 sw_wlevel_hw_default = 0;
7515                 break;
7516             }
7517         } /* for (rankx = 0; rankx < dimm_count * 4; rankx++) */
7518 #endif
7519 
7520         /* Get the measured_vref setting from the config, check for an override... */
7521         /* NOTE: measured_vref=1 (ON) means force use of MEASURED Vref... */
7522         // NOTE: measured VREF can only be done for DDR4
7523         if (ddr_type == DDR4_DRAM) {
7524             measured_vref_flag = custom_lmc_config->measured_vref;
7525             if ((s = lookup_env_parameter("ddr_measured_vref")) != NULL) {
7526                 measured_vref_flag = !!strtoul(s, NULL, 0);
7527             }
7528         } else {
7529             measured_vref_flag = 0; // OFF for DDR3
7530         }
7531 
7532         /* Ensure disabled ECC for DRAM tests using the SW algo, else leave it untouched */
7533         if (!sw_wlevel_hw_default) {
7534             lmc_config.u = BDK_CSR_READ(node, BDK_LMCX_CONFIG(ddr_interface_num));
7535             lmc_config.s.ecc_ena = 0;
7536             DRAM_CSR_WRITE(node, BDK_LMCX_CONFIG(ddr_interface_num), lmc_config.u);
7537         }
7538 
7539 #if USE_L2_WAYS_LIMIT
7540         limit_l2_ways(node, 0, 0);       /* Disable l2 sets for DRAM testing */
7541 #endif
7542 
7543         /* We need to track absolute rank number, as well as how many
7544         ** active ranks we have.  Two single rank DIMMs show up as
7545         ** ranks 0 and 2, but only 2 ranks are active. */
7546         active_rank = 0;
7547 
7548         interfaces = bdk_pop(ddr_interface_mask);
7549 
7550 #define VREF_RANGE1_LIMIT 0x33 // range1 is valid for 0x00 - 0x32
7551 #define VREF_RANGE2_LIMIT 0x18 // range2 is valid for 0x00 - 0x17
7552 // full window is valid for 0x00 to 0x4A
7553 // let 0x00 - 0x17 be range2, 0x18 - 0x4a be range 1
7554 #define VREF_LIMIT        (VREF_RANGE1_LIMIT + VREF_RANGE2_LIMIT)
7555 #define VREF_FINAL        (VREF_LIMIT - 1)
7556 
7557         for (rankx = 0; rankx < dimm_count * 4; rankx++) {
7558             uint64_t rank_addr;
7559             int vref_value, final_vref_value, final_vref_range = 0;
7560             int start_vref_value = 0, computed_final_vref_value = -1;
7561             char best_vref_values_count, vref_values_count;
7562             char best_vref_values_start, vref_values_start;
7563 
7564             int bytes_failed;
7565             sw_wl_status_t byte_test_status[9];
7566             sw_wl_status_t sw_wl_rank_status = WL_HARDWARE;
7567             int sw_wl_failed = 0;
7568             int sw_wlevel_hw = sw_wlevel_hw_default;
7569 
7570             if (!sw_wlevel_enable)
7571                 break;
7572 
7573             if (!(rank_mask & (1 << rankx)))
7574                 continue;
7575 
7576             ddr_print("N%d.LMC%d.R%d: Performing Software Write-Leveling %s\n",
7577                       node, ddr_interface_num, rankx,
7578                       (sw_wlevel_hw) ? "with H/W assist" : "with S/W algorithm");
7579 
7580             if ((ddr_type == DDR4_DRAM) && (num_ranks != 4)) {
7581                 // always compute when we can...
7582                 computed_final_vref_value = compute_vref_value(node, ddr_interface_num, rankx,
7583                                                                dimm_count, num_ranks, imp_values,
7584                                                                is_stacked_die);
7585                 if (!measured_vref_flag) // but only use it if allowed
7586                     start_vref_value = VREF_FINAL; // skip all the measured Vref processing, just the final setting
7587             }
7588 
7589             /* Save off the h/w wl results */
7590             lmc_wlevel_rank_hw_results.u = BDK_CSR_READ(node, BDK_LMCX_WLEVEL_RANKX(ddr_interface_num, rankx));
7591 
7592             vref_values_count = 0;
7593             vref_values_start = 0;
7594             best_vref_values_count = 0;
7595             best_vref_values_start = 0;
7596 
7597             /* Loop one extra time using the Final Vref value. */
7598             for (vref_value = start_vref_value; vref_value < VREF_LIMIT; ++vref_value) {
7599                 if (ddr_type == DDR4_DRAM) {
7600                     if (vref_value < VREF_FINAL) {
7601                         int vrange, vvalue;
7602                         if (vref_value < VREF_RANGE2_LIMIT) {
7603                             vrange = 1; vvalue = vref_value;
7604                         } else {
7605                             vrange = 0; vvalue = vref_value - VREF_RANGE2_LIMIT;
7606                         }
7607                         set_vref(node, ddr_interface_num, rankx,
7608                                  vrange, vvalue);
7609                     } else { /* if (vref_value < VREF_FINAL) */
7610                         /* Print the final Vref value first. */
7611 
7612                         /* Always print the computed first if its valid */
7613                         if (computed_final_vref_value >= 0) {
7614                             ddr_print("N%d.LMC%d.R%d: Vref Computed Summary                 :"
7615                                       "              %2d (0x%02x)\n",
7616                                       node, ddr_interface_num,
7617                                       rankx, computed_final_vref_value,
7618                                       computed_final_vref_value);
7619                         }
7620                         if (!measured_vref_flag) { // setup to use the computed
7621                             best_vref_values_count = 1;
7622                             final_vref_value = computed_final_vref_value;
7623                         } else { // setup to use the measured
7624                             if (best_vref_values_count > 0) {
7625                                 best_vref_values_count = max(best_vref_values_count, 2);
7626 #if 0
7627                                 // NOTE: this already adjusts VREF via calculating 40% rather than 50%
7628                                 final_vref_value = best_vref_values_start + divide_roundup((best_vref_values_count-1)*4,10);
7629                                 ddr_print("N%d.LMC%d.R%d: Vref Training Summary                 :"
7630                                           "    %2d <----- %2d (0x%02x) -----> %2d range: %2d\n",
7631                                           node, ddr_interface_num, rankx, best_vref_values_start,
7632                                           final_vref_value, final_vref_value,
7633                                           best_vref_values_start+best_vref_values_count-1,
7634                                           best_vref_values_count-1);
7635 #else
7636                                 final_vref_value = best_vref_values_start + divide_nint(best_vref_values_count - 1, 2);
7637                                 if (final_vref_value < VREF_RANGE2_LIMIT) {
7638                                     final_vref_range = 1;
7639                                 } else {
7640                                     final_vref_range = 0; final_vref_value -= VREF_RANGE2_LIMIT;
7641                                 }
7642                                 {
7643                                     int vvlo = best_vref_values_start;
7644                                     int vrlo;
7645                                     if (vvlo < VREF_RANGE2_LIMIT) {
7646                                         vrlo = 2;
7647                                     } else {
7648                                         vrlo = 1; vvlo -= VREF_RANGE2_LIMIT;
7649                                     }
7650 
7651                                     int vvhi = best_vref_values_start + best_vref_values_count - 1;
7652                                     int vrhi;
7653                                     if (vvhi < VREF_RANGE2_LIMIT) {
7654                                         vrhi = 2;
7655                                     } else {
7656                                         vrhi = 1; vvhi -= VREF_RANGE2_LIMIT;
7657                                     }
7658                                     ddr_print("N%d.LMC%d.R%d: Vref Training Summary                 :"
7659                                               "  0x%02x/%1d <----- 0x%02x/%1d -----> 0x%02x/%1d, range: %2d\n",
7660                                               node, ddr_interface_num, rankx,
7661                                               vvlo, vrlo,
7662                                               final_vref_value, final_vref_range + 1,
7663                                               vvhi, vrhi,
7664                                               best_vref_values_count-1);
7665                                 }
7666 #endif
7667 
7668                             } else {
7669                                 /* If nothing passed use the default Vref value for this rank */
7670                                 bdk_lmcx_modereg_params2_t lmc_modereg_params2;
7671                                 lmc_modereg_params2.u = BDK_CSR_READ(node, BDK_LMCX_MODEREG_PARAMS2(ddr_interface_num));
7672                                 final_vref_value = (lmc_modereg_params2.u >> (rankx * 10 + 3)) & 0x3f;
7673                                 final_vref_range = (lmc_modereg_params2.u >> (rankx * 10 + 9)) & 0x01;
7674 
7675                                 ddr_print("N%d.LMC%d.R%d: Vref Using Default                    :"
7676                                           "    %2d <----- %2d (0x%02x) -----> %2d, range%1d\n",
7677                                           node, ddr_interface_num, rankx,
7678                                           final_vref_value, final_vref_value,
7679                                           final_vref_value, final_vref_value, final_vref_range+1);
7680                             }
7681                         }
7682 
7683                         // allow override
7684                         if ((s = lookup_env_parameter("ddr%d_vref_value_%1d%1d",
7685                                                       ddr_interface_num, !!(rankx&2), !!(rankx&1))) != NULL) {
7686                             final_vref_value = strtoul(s, NULL, 0);
7687                         }
7688 
7689                         set_vref(node, ddr_interface_num, rankx, final_vref_range, final_vref_value);
7690 
7691                     } /* if (vref_value < VREF_FINAL) */
7692                 } /* if (ddr_type == DDR4_DRAM) */
7693 
7694                 lmc_wlevel_rank.u = lmc_wlevel_rank_hw_results.u; /* Restore the saved value */
7695 
7696                 for (byte = 0; byte < 9; ++byte)
7697                     byte_test_status[byte] = WL_ESTIMATED;
7698 
7699                 if (wlevel_bitmask_errors == 0) {
7700 
7701                     /* Determine address of DRAM to test for pass 1 of software write leveling. */
7702                     rank_addr  = active_rank * (1ull << (pbank_lsb - bunk_enable + (interfaces/2)));
7703                     // FIXME: these now put in by test_dram_byte()
7704                     //rank_addr |= (ddr_interface_num<<7); /* Map address into proper interface */
7705                     //rank_addr = bdk_numa_get_address(node, rank_addr);
7706                     VB_PRT(VBL_DEV, "N%d.LMC%d.R%d: Active Rank %d Address: 0x%llx\n",
7707                            node, ddr_interface_num, rankx, active_rank, rank_addr);
7708 
7709                     { // start parallel write-leveling block for delay high-order bits
7710                         int errors = 0;
7711                         int byte_delay[9];
7712                         uint64_t bytemask;
7713                         int bytes_todo;
7714 
7715                         if (ddr_interface_64b) {
7716                             bytes_todo = (sw_wlevel_hw) ? ddr_interface_bytemask : 0xFF;
7717                             bytemask = ~0ULL;
7718                         } else { // 32-bit, must be using SW algo, only data bytes
7719                             bytes_todo = 0x0f;
7720                             bytemask = 0x00000000ffffffffULL;
7721                         }
7722 
7723                         for (byte = 0; byte < 9; ++byte) {
7724                             if (!(bytes_todo & (1 << byte))) {
7725                                 byte_delay[byte] = 0;
7726                             } else {
7727                                 byte_delay[byte] = get_wlevel_rank_struct(&lmc_wlevel_rank, byte);
7728                             }
7729                         } /* for (byte = 0; byte < 9; ++byte) */
7730 
7731 #define WL_MIN_NO_ERRORS_COUNT 3  // FIXME? three passes without errors
7732                         int no_errors_count = 0;
7733 
7734                         // Change verbosity if using measured vs computed VREF or DDR3
7735                         // measured goes many times through SWL, computed and DDR3 only once
7736                         // so we want the EXHAUSTED messages at NORM for computed and DDR3,
7737                         // and at DEV2 for measured, just for completeness
7738                         int vbl_local = (measured_vref_flag) ? VBL_DEV2 : VBL_NORM;
7739                         uint64_t bad_bits[2];
7740 #if ENABLE_SW_WLEVEL_UTILIZATION
7741                         uint64_t sum_dram_dclk = 0, sum_dram_ops = 0;
7742                         uint64_t start_dram_dclk, stop_dram_dclk;
7743                         uint64_t start_dram_ops, stop_dram_ops;
7744 #endif
7745                         do {
7746                             // write the current set of WL delays
7747                             DRAM_CSR_WRITE(node, BDK_LMCX_WLEVEL_RANKX(ddr_interface_num, rankx), lmc_wlevel_rank.u);
7748                             lmc_wlevel_rank.u = BDK_CSR_READ(node, BDK_LMCX_WLEVEL_RANKX(ddr_interface_num, rankx));
7749 
7750                             bdk_watchdog_poke();
7751 
7752                             // do the test
7753                             if (sw_wlevel_hw) {
7754                                 errors = run_best_hw_patterns(node, ddr_interface_num, rank_addr,
7755                                                               DBTRAIN_TEST, bad_bits);
7756                                 errors &= bytes_todo; // keep only the ones we are still doing
7757                             } else {
7758 #if ENABLE_SW_WLEVEL_UTILIZATION
7759                                 start_dram_dclk = BDK_CSR_READ(node, BDK_LMCX_DCLK_CNT(ddr_interface_num));
7760                                 start_dram_ops  = BDK_CSR_READ(node, BDK_LMCX_OPS_CNT(ddr_interface_num));
7761 #endif
7762 #if USE_ORIG_TEST_DRAM_BYTE
7763                                 errors = test_dram_byte(node, ddr_interface_num, rank_addr, bytemask, bad_bits);
7764 #else
7765                                 errors = dram_tuning_mem_xor(node, ddr_interface_num, rank_addr, bytemask, bad_bits);
7766 #endif
7767 #if ENABLE_SW_WLEVEL_UTILIZATION
7768                                 stop_dram_dclk = BDK_CSR_READ(node, BDK_LMCX_DCLK_CNT(ddr_interface_num));
7769                                 stop_dram_ops  = BDK_CSR_READ(node, BDK_LMCX_OPS_CNT(ddr_interface_num));
7770                                 sum_dram_dclk += stop_dram_dclk - start_dram_dclk;
7771                                 sum_dram_ops  += stop_dram_ops  - start_dram_ops;
7772 #endif
7773                             }
7774 
7775                             VB_PRT(VBL_DEV2, "N%d.LMC%d.R%d: S/W write-leveling TEST: returned 0x%x\n",
7776                                    node, ddr_interface_num, rankx, errors);
7777 
7778                             // remember, errors will not be returned for byte-lanes that have maxxed out...
7779                             if (errors == 0) {
7780                                 no_errors_count++; // bump
7781                                 if (no_errors_count > 1) // bypass check/update completely
7782                                     continue; // to end of do-while
7783                             } else
7784                                 no_errors_count = 0; // reset
7785 
7786                             // check errors by byte
7787                             for (byte = 0; byte < 9; ++byte) {
7788                                 if (!(bytes_todo & (1 << byte)))
7789                                     continue;
7790 
7791                                 delay = byte_delay[byte];
7792                                 if (errors & (1 << byte)) { // yes, an error in this byte lane
7793                                     debug_print("        byte %d delay %2d Errors\n", byte, delay);
7794                                     // since this byte had an error, we move to the next delay value, unless maxxed out
7795                                     delay += 8; // incr by 8 to do only delay high-order bits
7796                                     if (delay < 32) {
7797                                         update_wlevel_rank_struct(&lmc_wlevel_rank, byte, delay);
7798                                         debug_print("        byte %d delay %2d New\n", byte, delay);
7799                                         byte_delay[byte] = delay;
7800                                     } else { // reached max delay, maybe really done with this byte
7801 #if SWL_TRY_HWL_ALT
7802                                         if (!measured_vref_flag && // consider an alt only for computed VREF and
7803                                             (hwl_alts[rankx].hwl_alt_mask & (1 << byte))) // if an alt exists...
7804                                         {
7805                                             int bad_delay = delay & 0x6; // just orig low-3 bits
7806                                             delay = hwl_alts[rankx].hwl_alt_delay[byte]; // yes, use it
7807                                             hwl_alts[rankx].hwl_alt_mask &= ~(1 << byte); // clear that flag
7808                                             update_wlevel_rank_struct(&lmc_wlevel_rank, byte, delay);
7809                                             byte_delay[byte] = delay;
7810                                             debug_print("        byte %d delay %2d ALTERNATE\n", byte, delay);
7811                                             VB_PRT(VBL_DEV, "N%d.LMC%d.R%d: SWL: Byte %d: %d FAIL, trying ALTERNATE %d\n",
7812                                                    node, ddr_interface_num, rankx, byte, bad_delay, delay);
7813 
7814                                         } else
7815 #endif /* SWL_TRY_HWL_ALT */
7816                                         {
7817                                             unsigned bits_bad;
7818                                             if (byte < 8) {
7819                                                 bytemask &= ~(0xffULL << (8*byte)); // test no longer, remove from byte mask
7820                                                 bits_bad = (unsigned)((bad_bits[0] >> (8 * byte)) & 0xffUL);
7821                                             } else {
7822                                                 bits_bad = (unsigned)(bad_bits[1] & 0xffUL);
7823                                             }
7824                                             bytes_todo &= ~(1 << byte); // remove from bytes to do
7825                                             byte_test_status[byte] = WL_ESTIMATED; // make sure this is set for this case
7826                                             debug_print("        byte %d delay %2d Exhausted\n", byte, delay);
7827                                             VB_PRT(vbl_local, "N%d.LMC%d.R%d: SWL: Byte %d (0x%02x): delay %d EXHAUSTED \n",
7828                                                    node, ddr_interface_num, rankx, byte, bits_bad, delay);
7829                                         }
7830                                     }
7831                                 } else { // no error, stay with current delay, but keep testing it...
7832                                     debug_print("        byte %d delay %2d Passed\n", byte, delay);
7833                                     byte_test_status[byte] = WL_HARDWARE; // change status
7834                                 }
7835 
7836                             } /* for (byte = 0; byte < 9; ++byte) */
7837 
7838                         } while (no_errors_count < WL_MIN_NO_ERRORS_COUNT);
7839 
7840 #if ENABLE_SW_WLEVEL_UTILIZATION
7841                         if (! sw_wlevel_hw) {
7842                             uint64_t percent_x10;
7843                             if (sum_dram_dclk == 0)
7844                                 sum_dram_dclk = 1;
7845                             percent_x10 = sum_dram_ops * 1000 / sum_dram_dclk;
7846                             ddr_print("N%d.LMC%d.R%d: ops %lu, cycles %lu, used %lu.%lu%%\n",
7847                                       node, ddr_interface_num, rankx, sum_dram_ops, sum_dram_dclk,
7848                                       percent_x10 / 10, percent_x10 % 10);
7849                         }
7850 #endif
7851                         if (errors) {
7852                             debug_print("End WLEV_64 while loop: vref_value %d(0x%x), errors 0x%02x\n",
7853                                       vref_value, vref_value, errors);
7854                         }
7855                     } // end parallel write-leveling block for delay high-order bits
7856 
7857                     if (sw_wlevel_hw) { // if we used HW-assist, we did the ECC byte when approp.
7858                         VB_PRT(VBL_DEV, "N%d.LMC%d.R%d: HW-assist SWL - no ECC estimate!!!\n",
7859                                node, ddr_interface_num, rankx);
7860                         goto no_ecc_estimate;
7861                     }
7862 
7863                     if ((ddr_interface_bytemask & 0xff) == 0xff) {
7864                         if (use_ecc) {
7865                             int save_byte8 = lmc_wlevel_rank.s.byte8; // save original HW delay
7866                             byte_test_status[8] = WL_HARDWARE; /* say it is H/W delay value */
7867 
7868                             if ((save_byte8 != lmc_wlevel_rank.s.byte3) &&
7869                                 (save_byte8 != lmc_wlevel_rank.s.byte4))
7870                             {
7871                                 // try to calculate a workable delay using SW bytes 3 and 4 and HW byte 8
7872                                 int test_byte8 = save_byte8;
7873                                 int test_byte8_error;
7874                                 int byte8_error = 0x1f;
7875                                 int adder;
7876                                 int avg_bytes = divide_nint(lmc_wlevel_rank.s.byte3+lmc_wlevel_rank.s.byte4, 2);
7877                                 for (adder = 0; adder<= 32; adder+=8) {
7878                                     test_byte8_error = _abs((adder+save_byte8) - avg_bytes);
7879                                     if (test_byte8_error < byte8_error) {
7880                                         byte8_error = test_byte8_error;
7881                                         test_byte8 = save_byte8 + adder;
7882                                     }
7883                                 }
7884 
7885 #if SW_WL_CHECK_PATCH
7886                                 // only do the check if we are not using measured VREF
7887                                 if (!measured_vref_flag) {
7888                                     test_byte8 &= ~1; /* Use only even settings, rounding down... */
7889 
7890                                     // do validity check on the calculated ECC delay value
7891                                     // this depends on the DIMM type
7892                                     if (spd_rdimm) { // RDIMM
7893                                         if (spd_dimm_type != 5) { // but not mini-RDIMM
7894                                             // it can be > byte4, but should never be > byte3
7895                                             if (test_byte8 > lmc_wlevel_rank.s.byte3) {
7896                                                 byte_test_status[8] = WL_ESTIMATED; /* say it is still estimated */
7897                                             }
7898                                         }
7899                                     } else { // UDIMM
7900                                         if ((test_byte8 < lmc_wlevel_rank.s.byte3) ||
7901                                             (test_byte8 > lmc_wlevel_rank.s.byte4))
7902                                             { // should never be outside the byte 3-4 range
7903                                                 byte_test_status[8] = WL_ESTIMATED; /* say it is still estimated */
7904                                             }
7905                                     }
7906                                     /*
7907                                      * Report whenever the calculation appears bad.
7908                                      * This happens if some of the original values were off, or unexpected geometry
7909                                      * from DIMM type, or custom circuitry (NIC225E, I am looking at you!).
7910                                      * We will trust the calculated value, and depend on later testing to catch
7911                                      * any instances when that value is truly bad.
7912                                      */
7913                                     if (byte_test_status[8] == WL_ESTIMATED) { // ESTIMATED means there may be an issue
7914                                         ddr_print("N%d.LMC%d.R%d: SWL: (%cDIMM): calculated ECC delay unexpected (%d/%d/%d)\n",
7915                                                   node, ddr_interface_num, rankx, (spd_rdimm?'R':'U'),
7916                                                   lmc_wlevel_rank.s.byte4, test_byte8, lmc_wlevel_rank.s.byte3);
7917                                         byte_test_status[8] = WL_HARDWARE;
7918                                     }
7919                                 }
7920 #endif /* SW_WL_CHECK_PATCH */
7921                                 lmc_wlevel_rank.s.byte8 = test_byte8 & ~1; /* Use only even settings */
7922                             }
7923 
7924                             if (lmc_wlevel_rank.s.byte8 != save_byte8) {
7925                                 /* Change the status if s/w adjusted the delay */
7926                                 byte_test_status[8] = WL_SOFTWARE; /* Estimated delay */
7927                             }
7928                         } else {
7929                             byte_test_status[8] = WL_HARDWARE; /* H/W delay value */
7930                             lmc_wlevel_rank.s.byte8 = lmc_wlevel_rank.s.byte0; /* ECC is not used */
7931                         }
7932                     } else { /* if ((ddr_interface_bytemask & 0xff) == 0xff) */
7933                         if (use_ecc) {
7934                             /* Estimate the ECC byte delay  */
7935                             lmc_wlevel_rank.s.byte4 |= (lmc_wlevel_rank.s.byte3 & 0x38); // add hi-order to b4
7936                             if ((lmc_wlevel_rank.s.byte4 & 0x06) < (lmc_wlevel_rank.s.byte3 & 0x06)) // orig b4 < orig b3
7937                                 lmc_wlevel_rank.s.byte4 += 8; // must be next clock
7938                         } else {
7939                             lmc_wlevel_rank.s.byte4 = lmc_wlevel_rank.s.byte0; /* ECC is not used */
7940                         }
7941                         /* Change the status if s/w adjusted the delay */
7942                         byte_test_status[4] = WL_SOFTWARE; /* Estimated delay */
7943                     } /* if ((ddr_interface_bytemask & 0xff) == 0xff) */
7944                 } /* if (wlevel_bitmask_errors == 0) */
7945 
7946             no_ecc_estimate:
7947 
7948                 bytes_failed = 0;
7949                 for (byte = 0; byte < 9; ++byte) {
7950                     /* Don't accumulate errors for untested bytes. */
7951                     if (!(ddr_interface_bytemask & (1 << byte)))
7952                         continue;
7953                     bytes_failed += (byte_test_status[byte] == WL_ESTIMATED);
7954                 }
7955 
7956                  /* Vref training loop is only used for DDR4  */
7957                 if (ddr_type != DDR4_DRAM)
7958                         break;
7959 
7960                 if (bytes_failed == 0) {
7961                     if (vref_values_count == 0) {
7962                         vref_values_start = vref_value;
7963                     }
7964                     ++vref_values_count;
7965                     if (vref_values_count > best_vref_values_count) {
7966                         best_vref_values_count = vref_values_count;
7967                         best_vref_values_start = vref_values_start;
7968                         debug_print("N%d.LMC%d.R%d: Vref Training                    (%2d) :    0x%02x <----- ???? -----> 0x%02x\n",
7969                                     node, ddr_interface_num,
7970                                   rankx, vref_value, best_vref_values_start,
7971                                   best_vref_values_start+best_vref_values_count-1);
7972                     }
7973                 } else {
7974                     vref_values_count = 0;
7975                     debug_print("N%d.LMC%d.R%d: Vref Training                    (%2d) :    failed\n",
7976                                 node, ddr_interface_num,
7977                                 rankx, vref_value);
7978                 }
7979             } /* for (vref_value=0; vref_value<VREF_LIMIT; ++vref_value) */
7980 
7981             /* Determine address of DRAM to test for pass 2 and final test of software write leveling. */
7982             rank_addr  = active_rank * (1ull << (pbank_lsb - bunk_enable + (interfaces/2)));
7983             rank_addr |= (ddr_interface_num<<7); /* Map address into proper interface */
7984             rank_addr = bdk_numa_get_address(node, rank_addr);
7985             debug_print("N%d.LMC%d.R%d: Active Rank %d Address: 0x%lx\n",
7986                         node, ddr_interface_num, rankx, active_rank, rank_addr);
7987 
7988             int errors;
7989 
7990             if (bytes_failed) {
7991 
7992 #if !DISABLE_SW_WL_PASS_2
7993 
7994                 ddr_print("N%d.LMC%d.R%d: Starting SW Write-leveling pass 2\n",
7995                           node, ddr_interface_num, rankx);
7996                 sw_wl_rank_status = WL_SOFTWARE;
7997 
7998                 /* If previous s/w fixups failed then retry using s/w write-leveling. */
7999                 if (wlevel_bitmask_errors == 0) {
8000                     /* h/w succeeded but previous s/w fixups failed. So retry s/w. */
8001                     debug_print("N%d.LMC%d.R%d: Retrying software Write-Leveling.\n",
8002                                 node, ddr_interface_num, rankx);
8003                 }
8004 
8005                 { // start parallel write-leveling block for delay low-order bits
8006                     int byte_delay[8];
8007                     int byte_passed[8];
8008                     uint64_t bytemask;
8009                     uint64_t bitmask;
8010                     int wl_offset;
8011                     int bytes_todo;
8012 
8013                     for (byte = 0; byte < 8; ++byte) {
8014                         byte_passed[byte] = 0;
8015                     }
8016 
8017                     bytes_todo = ddr_interface_bytemask;
8018 
8019                     for (wl_offset = sw_wlevel_offset; wl_offset >= 0; --wl_offset) {
8020                         debug_print("Starting wl_offset for-loop: %d\n", wl_offset);
8021 
8022                         bytemask = 0;
8023 
8024                         for (byte = 0; byte < 8; ++byte) {
8025                             byte_delay[byte] = 0;
8026                             if (!(bytes_todo & (1 << byte))) // this does not contain fully passed bytes
8027                                 continue;
8028 
8029                             byte_passed[byte] = 0; // reset across passes if not fully passed
8030                             update_wlevel_rank_struct(&lmc_wlevel_rank, byte, 0); // all delays start at 0
8031                             bitmask = ((!ddr_interface_64b) && (byte == 4)) ? 0x0f: 0xff;
8032                             bytemask |= bitmask << (8*byte); // set the bytes bits in the bytemask
8033                         } /* for (byte = 0; byte < 8; ++byte) */
8034 
8035                         while (bytemask != 0) { // start a pass if there is any byte lane to test
8036 
8037                             debug_print("Starting bytemask while-loop: 0x%lx\n", bytemask);
8038 
8039                             // write this set of WL delays
8040                             DRAM_CSR_WRITE(node, BDK_LMCX_WLEVEL_RANKX(ddr_interface_num, rankx), lmc_wlevel_rank.u);
8041                             lmc_wlevel_rank.u = BDK_CSR_READ(node, BDK_LMCX_WLEVEL_RANKX(ddr_interface_num, rankx));
8042 
8043                             bdk_watchdog_poke();
8044 
8045                             // do the test
8046                             if (sw_wlevel_hw)
8047                                 errors = run_best_hw_patterns(node, ddr_interface_num, rank_addr,
8048                                                               DBTRAIN_TEST, NULL);
8049                             else
8050                                 errors = test_dram_byte(node, ddr_interface_num, rank_addr, bytemask, NULL);
8051 
8052                             debug_print("SWL pass 2: test_dram_byte returned 0x%x\n", errors);
8053 
8054                             // check errors by byte
8055                             for (byte = 0; byte < 8; ++byte) {
8056                                 if (!(bytes_todo & (1 << byte)))
8057                                     continue;
8058 
8059                                 delay = byte_delay[byte];
8060                                 if (errors & (1 << byte)) { // yes, an error
8061                                     debug_print("        byte %d delay %2d Errors\n", byte, delay);
8062                                     byte_passed[byte] = 0;
8063                                 } else { // no error
8064                                     byte_passed[byte] += 1;
8065                                     if (byte_passed[byte] == (1 + wl_offset)) { /* Look for consecutive working settings */
8066                                         debug_print("        byte %d delay %2d FULLY Passed\n", byte, delay);
8067                                         if (wl_offset == 1) {
8068                                             byte_test_status[byte] = WL_SOFTWARE;
8069                                         } else if (wl_offset == 0) {
8070                                             byte_test_status[byte] = WL_SOFTWARE1;
8071                                         }
8072                                         bytemask &= ~(0xffULL << (8*byte)); // test no longer, remove from byte mask this pass
8073                                         bytes_todo &= ~(1 << byte); // remove completely from concern
8074                                         continue; // on to the next byte, bypass delay updating!!
8075                                     } else {
8076                                         debug_print("        byte %d delay %2d Passed\n", byte, delay);
8077                                     }
8078                                 }
8079                                 // error or no, here we move to the next delay value for this byte, unless done all delays
8080                                 // only a byte that has "fully passed" will bypass around this,
8081                                 delay += 2;
8082                                 if (delay < 32) {
8083                                     update_wlevel_rank_struct(&lmc_wlevel_rank, byte, delay);
8084                                     debug_print("        byte %d delay %2d New\n", byte, delay);
8085                                     byte_delay[byte] = delay;
8086                                 } else {
8087                                     // reached max delay, done with this byte
8088                                     debug_print("        byte %d delay %2d Exhausted\n", byte, delay);
8089                                     bytemask &= ~(0xffULL << (8*byte)); // test no longer, remove from byte mask this pass
8090                                 }
8091                             } /* for (byte = 0; byte < 8; ++byte) */
8092                             debug_print("End of for-loop: bytemask 0x%lx\n", bytemask);
8093 
8094                         } /* while (bytemask != 0) */
8095                     } /* for (wl_offset = sw_wlevel_offset; wl_offset >= 0; --wl_offset) */
8096 
8097                     for (byte = 0; byte < 8; ++byte) {
8098                         // any bytes left in bytes_todo did not pass
8099                         if (bytes_todo & (1 << byte)) {
8100                             /* Last resort. Use Rlevel settings to estimate
8101                                Wlevel if software write-leveling fails */
8102                             debug_print("Using RLEVEL as WLEVEL estimate for byte %d\n", byte);
8103                             lmc_rlevel_rank.u = BDK_CSR_READ(node, BDK_LMCX_RLEVEL_RANKX(ddr_interface_num, rankx));
8104                             rlevel_to_wlevel(&lmc_rlevel_rank, &lmc_wlevel_rank, byte);
8105                         }
8106                     } /* for (byte = 0; byte < 8; ++byte) */
8107 
8108                     sw_wl_failed = (bytes_todo != 0);
8109 
8110                 } // end parallel write-leveling block for delay low-order bits
8111 
8112                 if (use_ecc) {
8113                     /* ECC byte has to be estimated. Take the average of the two surrounding bytes. */
8114                     int test_byte8 = divide_nint(lmc_wlevel_rank.s.byte3
8115                                                  + lmc_wlevel_rank.s.byte4
8116                                                  + 2 /* round-up*/ , 2);
8117                     lmc_wlevel_rank.s.byte8 = test_byte8 & ~1; /* Use only even settings */
8118                     byte_test_status[8] = WL_ESTIMATED; /* Estimated delay */
8119                 } else {
8120                     byte_test_status[8] = WL_HARDWARE; /* H/W delay value */
8121                     lmc_wlevel_rank.s.byte8 = lmc_wlevel_rank.s.byte0; /* ECC is not used */
8122                 }
8123 
8124                 /* Set delays for unused bytes to match byte 0. */
8125                 for (byte=0; byte<8; ++byte) {
8126                     if ((ddr_interface_bytemask & (1 << byte)))
8127                         continue;
8128                     update_wlevel_rank_struct(&lmc_wlevel_rank, byte,
8129                                               lmc_wlevel_rank.s.byte0);
8130                     byte_test_status[byte] = WL_SOFTWARE;
8131                 }
8132 #else /* !DISABLE_SW_WL_PASS_2 */
8133                 // FIXME? the big hammer, did not even try SW WL pass2, assume only chip reset will help
8134                 ddr_print("N%d.LMC%d.R%d: S/W write-leveling pass 1 failed\n",
8135                           node, ddr_interface_num, rankx);
8136                 sw_wl_failed = 1;
8137 #endif /* !DISABLE_SW_WL_PASS_2 */
8138 
8139             } else { /* if (bytes_failed) */
8140 
8141                 // SW WL pass 1 was OK, write the settings
8142                 DRAM_CSR_WRITE(node, BDK_LMCX_WLEVEL_RANKX(ddr_interface_num, rankx), lmc_wlevel_rank.u);
8143                 lmc_wlevel_rank.u = BDK_CSR_READ(node, BDK_LMCX_WLEVEL_RANKX(ddr_interface_num, rankx));
8144 
8145 #if SW_WL_CHECK_PATCH
8146                 // do validity check on the delay values by running the test 1 more time...
8147                 // FIXME: we really need to check the ECC byte setting here as well,
8148                 //        so we need to enable ECC for this test!!!
8149                 // if there are any errors, claim SW WL failure
8150                 {
8151                     uint64_t datamask = (ddr_interface_64b) ? 0xffffffffffffffffULL : 0x00000000ffffffffULL;
8152 
8153                     // do the test
8154                     if (sw_wlevel_hw) {
8155                         errors = run_best_hw_patterns(node, ddr_interface_num, rank_addr,
8156                                                       DBTRAIN_TEST, NULL) & 0x0ff;
8157                     } else {
8158 #if USE_ORIG_TEST_DRAM_BYTE
8159                         errors = test_dram_byte(node, ddr_interface_num, rank_addr, datamask, NULL);
8160 #else
8161                         errors = dram_tuning_mem_xor(node, ddr_interface_num, rank_addr, datamask, NULL);
8162 #endif
8163                     }
8164 
8165                     if (errors) {
8166                         ddr_print("N%d.LMC%d.R%d: Wlevel Rank Final Test errors 0x%x\n",
8167                               node, ddr_interface_num, rankx, errors);
8168                         sw_wl_failed = 1;
8169                     }
8170                 }
8171 #endif /* SW_WL_CHECK_PATCH */
8172 
8173             } /* if (bytes_failed) */
8174 
8175             // FIXME? dump the WL settings, so we get more of a clue as to what happened where
8176             ddr_print("N%d.LMC%d.R%d: Wlevel Rank %#4x, 0x%016llX  : %2d%3s %2d%3s %2d%3s %2d%3s %2d%3s %2d%3s %2d%3s %2d%3s %2d%3s %s\n",
8177                       node, ddr_interface_num, rankx,
8178                       lmc_wlevel_rank.s.status,
8179                       lmc_wlevel_rank.u,
8180                       lmc_wlevel_rank.s.byte8, wl_status_strings[byte_test_status[8]],
8181                       lmc_wlevel_rank.s.byte7, wl_status_strings[byte_test_status[7]],
8182                       lmc_wlevel_rank.s.byte6, wl_status_strings[byte_test_status[6]],
8183                       lmc_wlevel_rank.s.byte5, wl_status_strings[byte_test_status[5]],
8184                       lmc_wlevel_rank.s.byte4, wl_status_strings[byte_test_status[4]],
8185                       lmc_wlevel_rank.s.byte3, wl_status_strings[byte_test_status[3]],
8186                       lmc_wlevel_rank.s.byte2, wl_status_strings[byte_test_status[2]],
8187                       lmc_wlevel_rank.s.byte1, wl_status_strings[byte_test_status[1]],
8188                       lmc_wlevel_rank.s.byte0, wl_status_strings[byte_test_status[0]],
8189                       (sw_wl_rank_status == WL_HARDWARE) ? "" : "(s)"
8190                       );
8191 
8192             // finally, check for fatal conditions: either chip reset right here, or return error flag
8193             if (((ddr_type == DDR4_DRAM) && (best_vref_values_count == 0)) || sw_wl_failed) {
8194                 if (!ddr_disable_chip_reset) { // do chip RESET
8195                     error_print("INFO: Short memory test indicates a retry is needed on N%d.LMC%d.R%d. Resetting node...\n",
8196                                 node, ddr_interface_num, rankx);
8197                     bdk_wait_usec(500000);
8198                     bdk_reset_chip(node);
8199                 } else { // return error flag so LMC init can be retried...
8200                     ddr_print("INFO: Short memory test indicates a retry is needed on N%d.LMC%d.R%d. Restarting LMC init...\n",
8201                               node, ddr_interface_num, rankx);
8202                     return 0; // 0 indicates restart possible...
8203                 }
8204             }
8205 
8206             active_rank++;
8207         } /* for (rankx = 0; rankx < dimm_count * 4; rankx++) */
8208 
8209         // Finalize the write-leveling settings
8210         for (rankx = 0; rankx < dimm_count * 4;rankx++) {
8211             uint64_t value;
8212             int parameter_set = 0;
8213             if (!(rank_mask & (1 << rankx)))
8214                 continue;
8215 
8216             lmc_wlevel_rank.u = BDK_CSR_READ(node, BDK_LMCX_WLEVEL_RANKX(ddr_interface_num, rankx));
8217 
8218             for (i=0; i<9; ++i) {
8219                 if ((s = lookup_env_parameter("ddr%d_wlevel_rank%d_byte%d", ddr_interface_num, rankx, i)) != NULL) {
8220                     parameter_set |= 1;
8221                     value = strtoul(s, NULL, 0);
8222 
8223                     update_wlevel_rank_struct(&lmc_wlevel_rank, i, value);
8224                 }
8225             }
8226 
8227             if ((s = lookup_env_parameter_ull("ddr%d_wlevel_rank%d", ddr_interface_num, rankx)) != NULL) {
8228                 parameter_set |= 1;
8229                 value = strtoull(s, NULL, 0);
8230                 lmc_wlevel_rank.u = value;
8231             }
8232 
8233             if (parameter_set) {
8234                 DRAM_CSR_WRITE(node, BDK_LMCX_WLEVEL_RANKX(ddr_interface_num, rankx), lmc_wlevel_rank.u);
8235                 lmc_wlevel_rank.u = BDK_CSR_READ(node, BDK_LMCX_WLEVEL_RANKX(ddr_interface_num, rankx));
8236                 display_WL(node, ddr_interface_num, lmc_wlevel_rank, rankx);
8237             }
8238 #if WLEXTRAS_PATCH
8239             if ((rank_mask & 0x0F) != 0x0F) { // if there are unused entries to be filled
8240                 if (rankx < 3) {
8241                     debug_print("N%d.LMC%d.R%d: checking for WLEVEL_RANK unused entries.\n",
8242                               node, ddr_interface_num, rankx);
8243                     if (rankx == 0) { // if rank 0, write ranks 1 and 2 here if empty
8244                         if (!(rank_mask & (1<<1))) { // check that rank 1 is empty
8245                             DRAM_CSR_WRITE(node, BDK_LMCX_WLEVEL_RANKX(ddr_interface_num, 1), lmc_wlevel_rank.u);
8246                             VB_PRT(VBL_DEV, "N%d.LMC%d.R%d: writing WLEVEL_RANK unused entry R%d.\n",
8247                                       node, ddr_interface_num, rankx, 1);
8248                         }
8249                         if (!(rank_mask & (1<<2))) { // check that rank 2 is empty
8250                             VB_PRT(VBL_DEV, "N%d.LMC%d.R%d: writing WLEVEL_RANK unused entry R%d.\n",
8251                                       node, ddr_interface_num, rankx, 2);
8252                             DRAM_CSR_WRITE(node, BDK_LMCX_WLEVEL_RANKX(ddr_interface_num, 2), lmc_wlevel_rank.u);
8253                         }
8254                     }
8255                     // if rank 0, 1 or 2, write rank 3 here if empty
8256                     if (!(rank_mask & (1<<3))) { // check that rank 3 is empty
8257                         VB_PRT(VBL_DEV, "N%d.LMC%d.R%d: writing WLEVEL_RANK unused entry R%d.\n",
8258                                   node, ddr_interface_num, rankx, 3);
8259                         DRAM_CSR_WRITE(node, BDK_LMCX_WLEVEL_RANKX(ddr_interface_num, 3), lmc_wlevel_rank.u);
8260                     }
8261                 }
8262             }
8263 #endif /* WLEXTRAS_PATCH */
8264 
8265         } /* for (rankx = 0; rankx < dimm_count * 4;rankx++) */
8266 
8267         /* Restore the ECC configuration */
8268         if (!sw_wlevel_hw_default) {
8269             lmc_config.s.ecc_ena = use_ecc;
8270             DRAM_CSR_WRITE(node, BDK_LMCX_CONFIG(ddr_interface_num), lmc_config.u);
8271         }
8272 
8273 #if USE_L2_WAYS_LIMIT
8274         /* Restore the l2 set configuration */
8275         if ((s = lookup_env_parameter("limit_l2_ways")) != NULL) {
8276             int ways = strtoul(s, NULL, 10);
8277             limit_l2_ways(node, ways, 1);
8278         } else {
8279             limit_l2_ways(node, bdk_l2c_get_num_assoc(node), 0);
8280         }
8281 #endif
8282 
8283     } // End Software Write-Leveling block
8284 
8285 #if ENABLE_DISPLAY_MPR_PAGE
8286     if (ddr_type == DDR4_DRAM) {
8287             Display_MPR_Page(node, rank_mask, ddr_interface_num, dimm_count, 2);
8288             Display_MPR_Page(node, rank_mask, ddr_interface_num, dimm_count, 0);
8289     }
8290 #endif
8291 
8292 #ifdef CAVIUM_ONLY
8293     {
8294         int _i;
8295         int setting[9];
8296         bdk_lmcx_dll_ctl3_t ddr_dll_ctl3;
8297         ddr_dll_ctl3.u = BDK_CSR_READ(node, BDK_LMCX_DLL_CTL3(ddr_interface_num));
8298 
8299         for (_i=0; _i<9; ++_i) {
8300             SET_DDR_DLL_CTL3(dll90_byte_sel, ENCODE_DLL90_BYTE_SEL(_i));
8301             DRAM_CSR_WRITE(node, BDK_LMCX_DLL_CTL3(ddr_interface_num),        ddr_dll_ctl3.u);
8302             BDK_CSR_READ(node, BDK_LMCX_DLL_CTL3(ddr_interface_num));
8303             ddr_dll_ctl3.u = BDK_CSR_READ(node, BDK_LMCX_DLL_CTL3(ddr_interface_num));
8304             setting[_i] = GET_DDR_DLL_CTL3(dll90_setting);
8305             debug_print("%d. LMC%d_DLL_CTL3[%d] = %016lx %d\n", _i, ddr_interface_num,
8306                       GET_DDR_DLL_CTL3(dll90_byte_sel), ddr_dll_ctl3.u, setting[_i]);
8307         }
8308 
8309         VB_PRT(VBL_DEV, "N%d.LMC%d: %-36s : %5d %5d %5d %5d %5d %5d %5d %5d %5d\n",
8310                node, ddr_interface_num, "DLL90 Setting 8:0",
8311                setting[8], setting[7], setting[6], setting[5], setting[4],
8312                setting[3], setting[2], setting[1], setting[0]);
8313 
8314         //BDK_CSR_WRITE(node, BDK_LMCX_DLL_CTL3(ddr_interface_num),        save_ddr_dll_ctl3.u);
8315     }
8316 #endif  /* CAVIUM_ONLY */
8317 
8318     // any custom DLL read or write offsets, install them
8319     // FIXME: no need to do these if we are going to auto-tune... ???
8320 
8321     process_custom_dll_offsets(node, ddr_interface_num, "ddr_dll_write_offset",
8322                                custom_lmc_config->dll_write_offset, "ddr%d_dll_write_offset_byte%d", 1);
8323     process_custom_dll_offsets(node, ddr_interface_num, "ddr_dll_read_offset",
8324                                custom_lmc_config->dll_read_offset,  "ddr%d_dll_read_offset_byte%d",  2);
8325 
8326     // we want to train write bit-deskew here...
8327     if (! disable_deskew_training) {
8328         if (enable_write_deskew) {
8329             ddr_print("N%d.LMC%d: WRITE BIT-DESKEW feature training begins.\n",
8330                       node, ddr_interface_num);
8331             Perform_Write_Deskew_Training(node, ddr_interface_num);
8332         } /* if (enable_write_deskew) */
8333     } /* if (! disable_deskew_training) */
8334 
8335     /*
8336      * 6.9.14 Final LMC Initialization
8337      *
8338      * Early LMC initialization, LMC write-leveling, and LMC read-leveling
8339      * must be completed prior to starting this final LMC initialization.
8340      *
8341      * LMC hardware updates the LMC(0)_SLOT_CTL0, LMC(0)_SLOT_CTL1,
8342      * LMC(0)_SLOT_CTL2 CSRs with minimum values based on the selected
8343      * readleveling and write-leveling settings. Software should not write
8344      * the final LMC(0)_SLOT_CTL0, LMC(0)_SLOT_CTL1, and LMC(0)_SLOT_CTL2
8345      * values until after the final read-leveling and write-leveling settings
8346      * are written.
8347      *
8348      * Software must ensure the LMC(0)_SLOT_CTL0, LMC(0)_SLOT_CTL1, and
8349      * LMC(0)_SLOT_CTL2 CSR values are appropriate for this step. These CSRs
8350      * select the minimum gaps between read operations and write operations
8351      * of various types.
8352      *
8353      * Software must not reduce the values in these CSR fields below the
8354      * values previously selected by the LMC hardware (during write-leveling
8355      * and read-leveling steps above).
8356      *
8357      * All sections in this chapter may be used to derive proper settings for
8358      * these registers.
8359      *
8360      * For minimal read latency, L2C_CTL[EF_ENA,EF_CNT] should be programmed
8361      * properly. This should be done prior to the first read.
8362      */
8363 
8364 #if ENABLE_SLOT_CTL_ACCESS
8365     {
8366         bdk_lmcx_slot_ctl0_t lmc_slot_ctl0;
8367         bdk_lmcx_slot_ctl1_t lmc_slot_ctl1;
8368         bdk_lmcx_slot_ctl2_t lmc_slot_ctl2;
8369         bdk_lmcx_slot_ctl3_t lmc_slot_ctl3;
8370 
8371         lmc_slot_ctl0.u = BDK_CSR_READ(node, BDK_LMCX_SLOT_CTL0(ddr_interface_num));
8372         lmc_slot_ctl1.u = BDK_CSR_READ(node, BDK_LMCX_SLOT_CTL1(ddr_interface_num));
8373         lmc_slot_ctl2.u = BDK_CSR_READ(node, BDK_LMCX_SLOT_CTL2(ddr_interface_num));
8374         lmc_slot_ctl3.u = BDK_CSR_READ(node, BDK_LMCX_SLOT_CTL3(ddr_interface_num));
8375 
8376         ddr_print("%-45s : 0x%016lx\n", "LMC_SLOT_CTL0", lmc_slot_ctl0.u);
8377         ddr_print("%-45s : 0x%016lx\n", "LMC_SLOT_CTL1", lmc_slot_ctl1.u);
8378         ddr_print("%-45s : 0x%016lx\n", "LMC_SLOT_CTL2", lmc_slot_ctl2.u);
8379         ddr_print("%-45s : 0x%016lx\n", "LMC_SLOT_CTL3", lmc_slot_ctl3.u);
8380 
8381         // for now, look only for SLOT_CTL1 envvar for override of contents
8382         if ((s = lookup_env_parameter("ddr%d_slot_ctl1", ddr_interface_num)) != NULL) {
8383             int slot_ctl1_incr = strtoul(s, NULL, 0);
8384             // validate the value
8385             if ((slot_ctl1_incr < 0) || (slot_ctl1_incr > 3)) { // allow 0 for printing only
8386                 error_print("ddr%d_slot_ctl1 illegal value (%d); must be 0-3\n",
8387                             ddr_interface_num, slot_ctl1_incr);
8388             } else {
8389 
8390 #define INCR(csr, chip, field, incr)  \
8391                 csr.chip.field = (csr.chip.field < (64 - incr)) ? (csr.chip.field + incr) : 63
8392 
8393                 // only print original when we are changing it!
8394                 if (slot_ctl1_incr)
8395                     ddr_print("%-45s : 0x%016lx\n", "LMC_SLOT_CTL1", lmc_slot_ctl1.u);
8396 
8397                 // modify all the SLOT_CTL1 fields by the increment, for now...
8398                 // but make sure the value will not overflow!!!
8399                 INCR(lmc_slot_ctl1, s, r2r_xrank_init, slot_ctl1_incr);
8400                 INCR(lmc_slot_ctl1, s, r2w_xrank_init, slot_ctl1_incr);
8401                 INCR(lmc_slot_ctl1, s, w2r_xrank_init, slot_ctl1_incr);
8402                 INCR(lmc_slot_ctl1, s, w2w_xrank_init, slot_ctl1_incr);
8403                 DRAM_CSR_WRITE(node, BDK_LMCX_SLOT_CTL1(ddr_interface_num), lmc_slot_ctl1.u);
8404                 lmc_slot_ctl1.u = BDK_CSR_READ(node, BDK_LMCX_SLOT_CTL1(ddr_interface_num));
8405 
8406                 // always print when we are changing it!
8407                 printf("%-45s : 0x%016lx\n", "LMC_SLOT_CTL1", lmc_slot_ctl1.u);
8408             }
8409         }
8410     }
8411 #endif /* ENABLE_SLOT_CTL_ACCESS */
8412     {
8413         /* Clear any residual ECC errors */
8414         int num_tads = 1;
8415         int tad;
8416 
8417         DRAM_CSR_WRITE(node, BDK_LMCX_INT(ddr_interface_num), -1ULL);
8418         BDK_CSR_READ(node, BDK_LMCX_INT(ddr_interface_num));
8419 
8420         for (tad=0; tad<num_tads; tad++)
8421             DRAM_CSR_WRITE(node, BDK_L2C_TADX_INT_W1C(tad), BDK_CSR_READ(node, BDK_L2C_TADX_INT_W1C(tad)));
8422 
8423         ddr_print("%-45s : 0x%08llx\n", "LMC_INT",
8424                   BDK_CSR_READ(node, BDK_LMCX_INT(ddr_interface_num)));
8425 
8426     }
8427 
8428     // Now we can enable scrambling if desired...
8429     {
8430         bdk_lmcx_control_t lmc_control;
8431         bdk_lmcx_scramble_cfg0_t lmc_scramble_cfg0;
8432         bdk_lmcx_scramble_cfg1_t lmc_scramble_cfg1;
8433         bdk_lmcx_scramble_cfg2_t lmc_scramble_cfg2;
8434         bdk_lmcx_ns_ctl_t lmc_ns_ctl;
8435 
8436         lmc_control.u = BDK_CSR_READ(node, BDK_LMCX_CONTROL(ddr_interface_num));
8437         lmc_scramble_cfg0.u = BDK_CSR_READ(node, BDK_LMCX_SCRAMBLE_CFG0(ddr_interface_num));
8438         lmc_scramble_cfg1.u = BDK_CSR_READ(node, BDK_LMCX_SCRAMBLE_CFG1(ddr_interface_num));
8439         lmc_scramble_cfg2.u = BDK_CSR_READ(node, BDK_LMCX_SCRAMBLE_CFG2(ddr_interface_num));
8440         lmc_ns_ctl.u = BDK_CSR_READ(node, BDK_LMCX_NS_CTL(ddr_interface_num));
8441 
8442         /* Read the scramble setting from the config and see if we
8443            need scrambling */
8444         int use_scramble = bdk_config_get_int(BDK_CONFIG_DRAM_SCRAMBLE);
8445         if (use_scramble == 2)
8446         {
8447             if (bdk_trust_get_level() >= BDK_TRUST_LEVEL_SIGNED)
8448                 use_scramble = 1;
8449             else
8450                 use_scramble = 0;
8451         }
8452 
8453         /* Generate random values if scrambling is needed */
8454         if (use_scramble)
8455         {
8456             lmc_scramble_cfg0.u = bdk_rng_get_random64();
8457             lmc_scramble_cfg1.u = bdk_rng_get_random64();
8458             lmc_scramble_cfg2.u = bdk_rng_get_random64();
8459             lmc_ns_ctl.s.ns_scramble_dis = 0;
8460             lmc_ns_ctl.s.adr_offset = 0;
8461             lmc_control.s.scramble_ena = 1;
8462         }
8463 
8464         if ((s = lookup_env_parameter_ull("ddr_scramble_cfg0")) != NULL) {
8465             lmc_scramble_cfg0.u    = strtoull(s, NULL, 0);
8466             lmc_control.s.scramble_ena = 1;
8467         }
8468         ddr_print("%-45s : 0x%016llx\n", "LMC_SCRAMBLE_CFG0", lmc_scramble_cfg0.u);
8469 
8470         DRAM_CSR_WRITE(node, BDK_LMCX_SCRAMBLE_CFG0(ddr_interface_num), lmc_scramble_cfg0.u);
8471 
8472         if ((s = lookup_env_parameter_ull("ddr_scramble_cfg1")) != NULL) {
8473             lmc_scramble_cfg1.u    = strtoull(s, NULL, 0);
8474             lmc_control.s.scramble_ena = 1;
8475         }
8476         ddr_print("%-45s : 0x%016llx\n", "LMC_SCRAMBLE_CFG1", lmc_scramble_cfg1.u);
8477         DRAM_CSR_WRITE(node, BDK_LMCX_SCRAMBLE_CFG1(ddr_interface_num), lmc_scramble_cfg1.u);
8478 
8479         if ((s = lookup_env_parameter_ull("ddr_scramble_cfg2")) != NULL) {
8480             lmc_scramble_cfg2.u    = strtoull(s, NULL, 0);
8481             lmc_control.s.scramble_ena = 1;
8482         }
8483         ddr_print("%-45s : 0x%016llx\n", "LMC_SCRAMBLE_CFG2", lmc_scramble_cfg2.u);
8484         DRAM_CSR_WRITE(node, BDK_LMCX_SCRAMBLE_CFG2(ddr_interface_num), lmc_scramble_cfg2.u);
8485 
8486         if ((s = lookup_env_parameter_ull("ddr_ns_ctl")) != NULL) {
8487             lmc_ns_ctl.u    = strtoull(s, NULL, 0);
8488         }
8489         ddr_print("%-45s : 0x%016llx\n", "LMC_NS_CTL", lmc_ns_ctl.u);
8490         DRAM_CSR_WRITE(node, BDK_LMCX_NS_CTL(ddr_interface_num), lmc_ns_ctl.u);
8491 
8492         DRAM_CSR_WRITE(node, BDK_LMCX_CONTROL(ddr_interface_num), lmc_control.u);
8493 
8494     }
8495 
8496     return(mem_size_mbytes);
8497 }
8498