xref: /aosp_15_r20/external/coreboot/src/vendorcode/cavium/bdk/libdram/lib_octeon_shared.c (revision b9411a12aaaa7e1e6a6fb7c5e057f44ee179a49c)
1 /***********************license start***********************************
2 * Copyright (c) 2003-2017  Cavium Inc. ([email protected]). All rights
3 * reserved.
4 *
5 *
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions are
8 * met:
9 *
10 *   * Redistributions of source code must retain the above copyright
11 *     notice, this list of conditions and the following disclaimer.
12 *
13 *   * Redistributions in binary form must reproduce the above
14 *     copyright notice, this list of conditions and the following
15 *     disclaimer in the documentation and/or other materials provided
16 *     with the distribution.
17 *
18 *   * Neither the name of Cavium Inc. nor the names of
19 *     its contributors may be used to endorse or promote products
20 *     derived from this software without specific prior written
21 *     permission.
22 *
23 * This Software, including technical data, may be subject to U.S. export
24 * control laws, including the U.S. Export Administration Act and its
25 * associated regulations, and may be subject to export or import
26 * regulations in other countries.
27 *
28 * TO THE MAXIMUM EXTENT PERMITTED BY LAW, THE SOFTWARE IS PROVIDED "AS IS"
29 * AND WITH ALL FAULTS AND CAVIUM INC. MAKES NO PROMISES, REPRESENTATIONS OR
30 * WARRANTIES, EITHER EXPRESS, IMPLIED, STATUTORY, OR OTHERWISE, WITH RESPECT
31 * TO THE SOFTWARE, INCLUDING ITS CONDITION, ITS CONFORMITY TO ANY
32 * REPRESENTATION OR DESCRIPTION, OR THE EXISTENCE OF ANY LATENT OR PATENT
33 * DEFECTS, AND CAVIUM SPECIFICALLY DISCLAIMS ALL IMPLIED (IF ANY) WARRANTIES
34 * OF TITLE, MERCHANTABILITY, NONINFRINGEMENT, FITNESS FOR A PARTICULAR
35 * PURPOSE, LACK OF VIRUSES, ACCURACY OR COMPLETENESS, QUIET ENJOYMENT,
36 * QUIET POSSESSION OR CORRESPONDENCE TO DESCRIPTION. THE ENTIRE  RISK
37 * ARISING OUT OF USE OR PERFORMANCE OF THE SOFTWARE LIES WITH YOU.
38 ***********************license end**************************************/
39 
40 /* $Revision: 102369 $ */
41 
42 #include <bdk.h>
43 #include "libbdk-arch/bdk-csrs-l2c.h"
44 #include "dram-internal.h"
45 
46 #include "dram-env.h"
47 #include <libbdk-hal/bdk-rng.h>
48 #include <lame_string.h>
49 
50 /* Define DDR_DEBUG to debug the DDR interface.  This also enables the
51 ** output necessary for review by Cavium Inc., Inc. */
52 /* #define DDR_DEBUG */
53 
54 
55 static int global_ddr_clock_initialized = 0;
56 static int global_ddr_memory_preserved  = 0;
57 
58 #if 1
59 uint64_t max_p1 = 0UL;
60 #endif
61 
62 /*
63  * SDRAM Physical Address (figure 6-2 from the HRM)
64  *                                                                   7 6    3 2   0
65  * +---------+----+----------------------+---------------+--------+---+------+-----+
66  * |  Dimm   |Rank|         Row          |      Col      |  Bank  | C |  Col | Bus |
67  * +---------+----+----------------------+---------------+--------+---+------+-----+
68  *     |    ^   |            |          ^        |           |      |
69  *   0 or 1 |   |       12-18 bits      |      6-8 bits      |    1 or 2 bits
70  *    bit   | 0 or 1 bit           LMC_CONFIG[ROW_LSB]+X     |    (X=1 or 2, resp)
71  *          |                                                |
72  *   LMC_CONFIG[PBANK_LSB]+X                               3 or 4 bits
73  *
74  *    Bus     = Selects the byte on the 72-bit DDR3 bus
75  *    Col     = Column Address for the memory part (10-12 bits)
76  *    C       = Selects the LMC that services the reference
77  *              (2 bits for 4 LMC mode, 1 bit for 2 LMC mode; X=width)
78  *    Bank    = Bank Address for the memory part (DDR3=3 bits, DDR4=3 or 4 bits)
79  *    Row     = Row Address for the memory part (12-18 bits)
80  *    Rank    = Optional Rank Address for dual-rank DIMMs
81  *              (present when LMC_CONFIG[RANK_ENA] is set)
82  *    Dimm    = Optional DIMM address (preseent with more than 1 DIMM)
83  */
84 
85 
86 /**
87  * Divide and round results to the nearest integer.
88  *
89  * @param dividend
90  * @param divisor
91  *
92  * @return
93  */
divide_nint(uint64_t dividend,uint64_t divisor)94 uint64_t divide_nint(uint64_t dividend, uint64_t divisor)
95 {
96     uint64_t quotent, remainder;
97     quotent   = dividend / divisor;
98     remainder = dividend % divisor;
99     return quotent + ((remainder * 2) >= divisor);
100 }
101 
102 /* Sometimes the pass/fail results for all possible delay settings
103  * determined by the read-leveling sequence is too forgiving.  This
104  * usually occurs for DCLK speeds below 300 MHz. As a result the
105  * passing range is exaggerated. This function accepts the bitmask
106  * results from the sequence and truncates the passing range to a
107  * reasonable range and recomputes the proper deskew setting.
108  */
109 
110 /* Default ODT config must disable ODT */
111 /* Must be const (read only) so that the structure is in flash */
112 const dimm_odt_config_t disable_odt_config[] = {
113 	/* DDR4 needs an additional field in the struct (odt_mask2) */
114 	/* DIMMS   ODT_ENA ODT_MASK   ODT_MASK1      ODT_MASK2     QS_DIC RODT_CTL */
115 	/* =====   ======= ========   =========      =========     ====== ======== */
116 	/*   1 */ {   0,    0x0000,   {.u = 0x0000}, {.u = 0x0000},    0,   0x0000  },
117 	/*   2 */ {   0,    0x0000,   {.u = 0x0000}, {.u = 0x0000},    0,   0x0000  },
118 	/*   3 */ {   0,    0x0000,   {.u = 0x0000}, {.u = 0x0000},    0,   0x0000  },
119 	/*   4 */ {   0,    0x0000,   {.u = 0x0000}, {.u = 0x0000},    0,   0x0000  },
120 };
121 /* Memory controller setup function */
init_octeon_dram_interface(bdk_node_t node,const ddr_configuration_t * ddr_configuration,uint32_t ddr_hertz,uint32_t cpu_hertz,uint32_t ddr_ref_hertz,int board_type,int board_rev_maj,int board_rev_min,int ddr_interface_num,uint32_t ddr_interface_mask)122 static int init_octeon_dram_interface(bdk_node_t node,
123 				      const ddr_configuration_t *ddr_configuration,
124 				      uint32_t ddr_hertz,
125 				      uint32_t cpu_hertz,
126 				      uint32_t ddr_ref_hertz,
127 				      int board_type,
128 				      int board_rev_maj,
129 				      int board_rev_min,
130 				      int ddr_interface_num,
131 				      uint32_t ddr_interface_mask)
132 {
133     uint32_t mem_size_mbytes = 0;
134     int lmc_restart_retries = 0;
135 
136     const char *s;
137     if ((s = lookup_env_parameter("ddr_timing_hertz")) != NULL)
138 	ddr_hertz = strtoul(s, NULL, 0);
139 
140  restart_lmc_init:
141 
142     /* Poke the watchdog timer so it doesn't expire during DRAM init */
143     bdk_watchdog_poke();
144 
145     mem_size_mbytes = init_octeon3_ddr3_interface(node,
146 						  ddr_configuration,
147 						  ddr_hertz,
148 						  cpu_hertz,
149 						  ddr_ref_hertz,
150 						  board_type,
151 						  board_rev_maj,
152 						  board_rev_min,
153 						  ddr_interface_num,
154 						  ddr_interface_mask);
155 #define DEFAULT_RESTART_RETRIES 3
156     if (mem_size_mbytes == 0) { // means restart is possible
157         if (lmc_restart_retries < DEFAULT_RESTART_RETRIES) {
158             lmc_restart_retries++;
159             ddr_print("N%d.LMC%d Configuration problem: attempting LMC reset and init restart %d\n",
160                         node, ddr_interface_num, lmc_restart_retries);
161             // re-assert RESET first, as that is the assumption of the init code
162             if (!ddr_memory_preserved(node))
163                 cn88xx_lmc_ddr3_reset(node, ddr_interface_num, LMC_DDR3_RESET_ASSERT);
164             goto restart_lmc_init;
165         } else {
166             error_print("INFO: N%d.LMC%d Configuration: fatal problem remains after %d LMC init retries - Resetting node...\n",
167                         node, ddr_interface_num, lmc_restart_retries);
168             bdk_wait_usec(500000);
169             bdk_reset_chip(node);
170         }
171     }
172 
173     printf("N%d.LMC%d Configuration Completed: %d MB\n",
174            node, ddr_interface_num, mem_size_mbytes);
175     return mem_size_mbytes;
176 }
177 
178 #define DO_LIKE_RANDOM_XOR 1
179 
180 #if !DO_LIKE_RANDOM_XOR
181 /*
182  * Suggested testing patterns.
183  *
184  *  0xFFFF_FFFF_FFFF_FFFF
185  *  0xAAAA_AAAA_AAAA_AAAA
186  *  0xFFFF_FFFF_FFFF_FFFF
187  *  0xAAAA_AAAA_AAAA_AAAA
188  *  0x5555_5555_5555_5555
189  *  0xAAAA_AAAA_AAAA_AAAA
190  *  0xFFFF_FFFF_FFFF_FFFF
191  *  0xAAAA_AAAA_AAAA_AAAA
192  *  0xFFFF_FFFF_FFFF_FFFF
193  *  0x5555_5555_5555_5555
194  *  0xFFFF_FFFF_FFFF_FFFF
195  *  0x5555_5555_5555_5555
196  *  0xAAAA_AAAA_AAAA_AAAA
197  *  0x5555_5555_5555_5555
198  *  0xFFFF_FFFF_FFFF_FFFF
199  *  0x5555_5555_5555_5555
200  *
201  *  or possibly
202  *
203  *  0xFDFD_FDFD_FDFD_FDFD
204  *  0x8787_8787_8787_8787
205  *  0xFEFE_FEFE_FEFE_FEFE
206  *  0xC3C3_C3C3_C3C3_C3C3
207  *  0x7F7F_7F7F_7F7F_7F7F
208  *  0xE1E1_E1E1_E1E1_E1E1
209  *  0xBFBF_BFBF_BFBF_BFBF
210  *  0xF0F0_F0F0_F0F0_F0F0
211  *  0xDFDF_DFDF_DFDF_DFDF
212  *  0x7878_7878_7878_7878
213  *  0xEFEF_EFEF_EFEF_EFEF
214  *  0x3C3C_3C3C_3C3C_3C3C
215  *  0xF7F7_F7F7_F7F7_F7F7
216  *  0x1E1E_1E1E_1E1E_1E1E
217  *  0xFBFB_FBFB_FBFB_FBFB
218  *  0x0F0F_0F0F_0F0F_0F0F
219  */
220 
221 static const uint64_t test_pattern[] = {
222     0xAAAAAAAAAAAAAAAAULL,
223     0x5555555555555555ULL,
224     0xAAAAAAAAAAAAAAAAULL,
225     0x5555555555555555ULL,
226     0xAAAAAAAAAAAAAAAAULL,
227     0x5555555555555555ULL,
228     0xAAAAAAAAAAAAAAAAULL,
229     0x5555555555555555ULL,
230     0xAAAAAAAAAAAAAAAAULL,
231     0x5555555555555555ULL,
232     0xAAAAAAAAAAAAAAAAULL,
233     0x5555555555555555ULL,
234     0xAAAAAAAAAAAAAAAAULL,
235     0x5555555555555555ULL,
236     0xAAAAAAAAAAAAAAAAULL,
237     0x5555555555555555ULL,
238     0xAAAAAAAAAAAAAAAAULL,
239     0x5555555555555555ULL,
240     0xAAAAAAAAAAAAAAAAULL,
241     0x5555555555555555ULL,
242     0xAAAAAAAAAAAAAAAAULL,
243     0x5555555555555555ULL,
244     0xAAAAAAAAAAAAAAAAULL,
245     0x5555555555555555ULL,
246     0xAAAAAAAAAAAAAAAAULL,
247     0x5555555555555555ULL,
248     0xAAAAAAAAAAAAAAAAULL,
249     0x5555555555555555ULL,
250     0xAAAAAAAAAAAAAAAAULL,
251     0x5555555555555555ULL,
252     0xAAAAAAAAAAAAAAAAULL,
253     0x5555555555555555ULL,
254 };
255 #endif  /* !DO_LIKE_RANDOM_XOR */
256 
test_dram_byte(bdk_node_t node,int lmc,uint64_t p,uint64_t bitmask,uint64_t * xor_data)257 int test_dram_byte(bdk_node_t node, int lmc, uint64_t p, uint64_t bitmask, uint64_t *xor_data)
258 {
259     uint64_t p1, p2, d1, d2;
260     uint64_t v, v1;
261     uint64_t p2offset = 0x10000000;
262     uint64_t datamask;
263     uint64_t xor;
264     int i, j, k;
265     int errors = 0;
266     int index;
267 #if DO_LIKE_RANDOM_XOR
268     uint64_t pattern1 = bdk_rng_get_random64();
269     uint64_t this_pattern;
270 #endif
271     uint64_t bad_bits[2] = {0,0};
272 
273     // When doing in parallel, the caller must provide full 8-byte bitmask.
274     // Byte lanes may be clear in the mask to indicate no testing on that lane.
275     datamask = bitmask;
276 
277     // final address must include LMC and node
278     p |= (lmc<<7); /* Map address into proper interface */
279     p = bdk_numa_get_address(node, p); /* Map to node */
280 
281     // Not on THUNDER:	p |= 1ull<<63;
282 
283     /* Add offset to both test regions to not clobber boot stuff
284      * when running from L2.
285      */
286     p += 0x10000000; // FIXME? was: 0x4000000; // make sure base is out of the way of boot
287 
288     /* The loop ranges and increments walk through a range of addresses avoiding bits that alias
289      * to different memory interfaces (LMCs) on the CN88XX; ie we want to limit activity to a
290      * single memory channel.
291      */
292 
293     /* Store something into each location first */
294     // NOTE: the ordering of loops is purposeful: fill full cachelines and flush
295     for (k = 0; k < (1 << 20); k += (1 << 14)) {
296 	for (j = 0; j < (1 << 12); j += (1 << 9)) {
297 	    for (i = 0; i < (1 << 7); i += 8) {
298 		index = i + j + k;
299 		p1 = p + index;
300 		p2 = p1 + p2offset;
301 
302 #if DO_LIKE_RANDOM_XOR
303 		v = pattern1 * p1;
304 		v1 = v; // write the same thing to both areas
305 #else
306 		v = 0ULL;
307 		v1 = v;
308 #endif
309 		__bdk_dram_write64(p1, v);
310 		__bdk_dram_write64(p2, v1);
311 
312 		/* Write back and invalidate the cache lines
313 		 *
314 		 * For OCX we cannot limit the number of L2 ways
315 		 * so instead we just write back and invalidate
316 		 * the L2 cache lines.  This is not possible
317 		 * when booting remotely, however so this is
318 		 * only enabled for U-Boot right now.
319 		 * Potentially the BDK can also take advantage
320 		 * of this.
321 		 */
322 		BDK_CACHE_WBI_L2(p1);
323 		BDK_CACHE_WBI_L2(p2);
324 	    }
325 	}
326     }
327 
328     BDK_DCACHE_INVALIDATE;
329 
330 #if DO_LIKE_RANDOM_XOR
331     this_pattern = bdk_rng_get_random64();
332 #endif
333 
334     // modify the contents of each location in some way
335     // NOTE: the ordering of loops is purposeful: modify full cachelines and flush
336     for (k = 0; k < (1 << 20); k += (1 << 14)) {
337 	for (j = 0; j < (1 << 12); j += (1 << 9)) {
338 	    for (i = 0; i < (1 << 7); i += 8) {
339 		index = i + j + k;
340 		p1 = p + index;
341 		p2 = p1 + p2offset;
342 #if DO_LIKE_RANDOM_XOR
343 		v  = __bdk_dram_read64(p1) ^ this_pattern;
344 		v1 = __bdk_dram_read64(p2) ^ this_pattern;
345 #else
346 		v = test_pattern[index%(sizeof(test_pattern)/sizeof(uint64_t))];
347 		v &= datamask;
348 		v1 = ~v;
349 #endif
350 
351 		debug_print("[0x%016llX]: 0x%016llX, [0x%016llX]: 0x%016llX\n",
352 		            p1, v, p2, v1);
353 
354 		__bdk_dram_write64(p1, v);
355 		__bdk_dram_write64(p2, v1);
356 
357 		/* Write back and invalidate the cache lines
358 		 *
359 		 * For OCX we cannot limit the number of L2 ways
360 		 * so instead we just write back and invalidate
361 		 * the L2 cache lines.  This is not possible
362 		 * when booting remotely, however so this is
363 		 * only enabled for U-Boot right now.
364 		 * Potentially the BDK can also take advantage
365 		 * of this.
366 		 */
367 		BDK_CACHE_WBI_L2(p1);
368 		BDK_CACHE_WBI_L2(p2);
369 	    }
370 	}
371     }
372 
373     BDK_DCACHE_INVALIDATE;
374 
375     // test the contents of each location by predicting what should be there
376     // NOTE: the ordering of loops is purposeful: test full cachelines to detect
377     //       an error occuring in any slot thereof
378     for (k = 0; k < (1 << 20); k += (1 << 14)) {
379 	for (j = 0; j < (1 << 12); j += (1 << 9)) {
380 	    for (i = 0; i < (1 << 7); i += 8) {
381 		index = i + j + k;
382 		p1 = p + index;
383 		p2 = p1 + p2offset;
384 #if DO_LIKE_RANDOM_XOR
385 		v = (p1 * pattern1) ^ this_pattern; // FIXME: this should predict what we find...???
386 		d1 = __bdk_dram_read64(p1);
387 		d2 = __bdk_dram_read64(p2);
388 #else
389 		v = test_pattern[index%(sizeof(test_pattern)/sizeof(uint64_t))];
390 		d1 = __bdk_dram_read64(p1);
391 		d2 = ~__bdk_dram_read64(p2);
392 #endif
393 		debug_print("[0x%016llX]: 0x%016llX, [0x%016llX]: 0x%016llX\n",
394                             p1, d1, p2, d2);
395 
396 		xor = ((d1 ^ v) | (d2 ^ v)) & datamask; // union of error bits only in active byte lanes
397 
398                 if (!xor)
399                     continue;
400 
401                 // accumulate bad bits
402                 bad_bits[0] |= xor;
403                 //bad_bits[1] |= ~mpr_data1 & 0xffUL; // cannot do ECC here
404 
405 		int bybit = 1;
406 		uint64_t bymsk = 0xffULL; // start in byte lane 0
407 		while (xor != 0) {
408 		    debug_print("ERROR: [0x%016llX] [0x%016llX]  expected 0x%016llX xor %016llX\n",
409 				p1, p2, v, xor);
410 		    if (xor & bymsk) { // error(s) in this lane
411 			errors |= bybit; // set the byte error bit
412 			xor &= ~bymsk; // clear byte lane in error bits
413 			datamask &= ~bymsk; // clear the byte lane in the mask
414 			if (datamask == 0) { // nothing left to do
415 			    goto done_now; // completely done when errors found in all byte lanes in datamask
416 			}
417 		    }
418 		    bymsk <<= 8; // move mask into next byte lane
419 		    bybit <<= 1; // move bit into next byte position
420 		}
421 	    }
422 	}
423     }
424 
425  done_now:
426     if (xor_data != NULL) { // send the bad bits back...
427         xor_data[0] = bad_bits[0];
428         xor_data[1] = bad_bits[1]; // let it be zeroed
429     }
430     return errors;
431 }
432 
433 // NOTE: "mode" argument:
434 //         DBTRAIN_TEST: for testing using GP patterns, includes ECC
435 //         DBTRAIN_DBI:  for DBI deskew training behavior (uses GP patterns)
436 //         DBTRAIN_LFSR: for testing using LFSR patterns, includes ECC
437 // NOTE: trust the caller to specify the correct/supported mode
438 //
test_dram_byte_hw(bdk_node_t node,int ddr_interface_num,uint64_t p,int mode,uint64_t * xor_data)439 int test_dram_byte_hw(bdk_node_t node, int ddr_interface_num,
440                       uint64_t p, int mode, uint64_t *xor_data)
441 {
442     uint64_t p1;
443     uint64_t k;
444     int errors = 0;
445 
446     uint64_t mpr_data0, mpr_data1;
447     uint64_t bad_bits[2] = {0,0};
448 
449     int node_address, lmc, dimm;
450     int prank, lrank;
451     int bank, row, col;
452     int save_or_dis;
453     int byte;
454     int ba_loop, ba_bits;
455 
456     bdk_lmcx_rlevel_ctl_t rlevel_ctl;
457     bdk_lmcx_dbtrain_ctl_t dbtrain_ctl;
458 
459     int bank_errs;
460 
461     // FIXME: K iterations set to 4 for now.
462     // FIXME: decrement to increase interations.
463     // FIXME: must be no less than 22 to stay above an LMC hash field.
464     int kshift = 26;
465     const char *s;
466 
467     // allow override default setting for kshift
468     if ((s = getenv("ddr_tune_set_kshift")) != NULL) {
469         int temp = strtoul(s, NULL, 0);
470         if ((temp < 22) || (temp > 27)) {
471             ddr_print("N%d.LMC%d: ILLEGAL override of kshift to %d, using default %d\n",
472                       node, ddr_interface_num, temp, kshift);
473         } else {
474             VB_PRT(VBL_DEV2, "N%d.LMC%d: overriding kshift (%d) to %d\n",
475                    node, ddr_interface_num, kshift, temp);
476             kshift = temp;
477         }
478     }
479 
480     /*
481       1) Make sure that RLEVEL_CTL[OR_DIS] = 0.
482     */
483     rlevel_ctl.u = BDK_CSR_READ(node, BDK_LMCX_RLEVEL_CTL(ddr_interface_num));
484     save_or_dis = rlevel_ctl.s.or_dis;
485     rlevel_ctl.s.or_dis = 0;    /* or_dis must be disabled for this sequence */
486     DRAM_CSR_WRITE(node, BDK_LMCX_RLEVEL_CTL(ddr_interface_num), rlevel_ctl.u);
487 
488     /*
489       NOTE: this step done in the calling routine(s)
490       3) Setup GENERAL_PURPOSE[0-2] registers with the data pattern of choice.
491         a. GENERAL_PURPOSE0[DATA<63:0>] – sets the initial lower (rising edge) 64 bits of data.
492         b. GENERAL_PURPOSE1[DATA<63:0>] – sets the initial upper (falling edge) 64 bits of data.
493         c. GENERAL_PURPOSE2[DATA<15:0>] – sets the initial lower (rising edge <7:0>) and upper
494            (falling edge <15:8>) ECC data.
495      */
496 
497     // final address must include LMC and node
498     p |= (ddr_interface_num << 7); /* Map address into proper interface */
499     p = bdk_numa_get_address(node, p); /* Map to node */
500 
501     /*
502      * Add base offset to both test regions to not clobber u-boot stuff
503      * when running from L2 for NAND boot.
504      */
505     p += 0x10000000; // offset to 256MB
506 
507     errors = 0;
508 
509     bdk_dram_address_extract_info(p, &node_address, &lmc, &dimm, &prank, &lrank, &bank, &row, &col);
510     VB_PRT(VBL_DEV2, "test_dram_byte_hw: START at A:0x%012llx, N%d L%d D%d R%d/%d B%1x Row:%05x Col:%05x\n",
511            p, node_address, lmc, dimm, prank, lrank, bank, row, col);
512 
513     // only check once per call, and ignore if no match...
514     if ((int)node != node_address) {
515         error_print("ERROR: Node address mismatch; ignoring...\n");
516         return 0;
517     }
518     if (lmc != ddr_interface_num) {
519         error_print("ERROR: LMC address mismatch\n");
520         return 0;
521     }
522 
523     /*
524       7) Set PHY_CTL[PHY_RESET] = 1 (LMC automatically clears this as it’s a one-shot operation).
525          This is to get into the habit of resetting PHY’s SILO to the original 0 location.
526     */
527     BDK_CSR_MODIFY(phy_ctl, node, BDK_LMCX_PHY_CTL(ddr_interface_num),
528 		   phy_ctl.s.phy_reset = 1);
529 
530     /* Walk through a range of addresses avoiding bits that alias
531      * interfaces on the CN88XX.
532      */
533 
534     // FIXME: want to try to keep the K increment from affecting the LMC via hash,
535     // FIXME: so keep it above bit 21
536     // NOTE:  we also want to keep k less than the base offset of bit 28 (256MB)
537 
538     for (k = 0; k < (1UL << 28); k += (1UL << kshift)) {
539 
540 	// FIXME: the sequence will interate over 1/2 cacheline
541 	// FIXME: for each unit specified in "read_cmd_count",
542 	// FIXME: so, we setup each sequence to do the max cachelines it can
543 
544 	p1 = p + k;
545 
546 	bdk_dram_address_extract_info(p1, &node_address, &lmc, &dimm, &prank, &lrank, &bank, &row, &col);
547         VB_PRT(VBL_DEV3, "test_dram_byte_hw: NEXT interation at A:0x%012llx, N%d L%d D%d R%d/%d B%1x Row:%05x Col:%05x\n",
548                p1, node_address, lmc, dimm, prank, lrank, bank, row, col);
549 
550         /*
551           2) Setup the fields of the CSR DBTRAIN_CTL as follows:
552             a. COL, ROW, BA, BG, PRANK points to the starting point of the address.
553                You can just set them to all 0.
554             b. RW_TRAIN – set this to 1.
555             c. TCCD_L – set this to 0.
556             d. READ_CMD_COUNT – instruct the sequence to the how many writes/reads.
557                It is 5 bits field, so set to 31 of maximum # of r/w.
558         */
559         dbtrain_ctl.u = BDK_CSR_READ(node, BDK_LMCX_DBTRAIN_CTL(ddr_interface_num));
560         dbtrain_ctl.s.column_a       = col;
561         dbtrain_ctl.s.row_a          = row;
562         dbtrain_ctl.s.bg             = (bank >> 2) & 3;
563         dbtrain_ctl.s.prank          = (dimm * 2) + prank; // FIXME?
564         dbtrain_ctl.s.lrank          = lrank; // FIXME?
565         dbtrain_ctl.s.activate       = (mode == DBTRAIN_DBI);
566         dbtrain_ctl.s.write_ena      = 1;
567         dbtrain_ctl.s.read_cmd_count = 31; // max count pass 1.x
568         if (! CAVIUM_IS_MODEL(CAVIUM_CN88XX_PASS1_X)) // added 81xx and 83xx
569             dbtrain_ctl.s.cmd_count_ext = 3; // max count pass 2.x
570         else
571             dbtrain_ctl.s.cmd_count_ext = 0; // max count pass 1.x
572         dbtrain_ctl.s.rw_train       = 1;
573         dbtrain_ctl.s.tccd_sel       = (mode == DBTRAIN_DBI);
574 
575         // LFSR should only be on when chip supports it...
576         dbtrain_ctl.s.lfsr_pattern_sel = (mode == DBTRAIN_LFSR) ? 1 : 0;
577 
578         bank_errs = 0;
579 
580 	// for each address, iterate over the 4 "banks" in the BA
581 	for (ba_loop = 0, ba_bits = bank & 3;
582 	     ba_loop < 4;
583 	     ba_loop++, ba_bits = (ba_bits + 1) & 3)
584 	{
585             dbtrain_ctl.s.ba = ba_bits;
586             DRAM_CSR_WRITE(node, BDK_LMCX_DBTRAIN_CTL(ddr_interface_num), dbtrain_ctl.u);
587 
588             VB_PRT(VBL_DEV3, "test_dram_byte_hw: DBTRAIN: Pr:%d Lr:%d Bg:%d Ba:%d Row:%05x Col:%05x\n",
589                    dbtrain_ctl.s.prank, dbtrain_ctl.s.lrank,
590                    dbtrain_ctl.s.bg, dbtrain_ctl.s.ba, row, col);
591 	    /*
592 	      4) Kick off the sequence (SEQ_CTL[SEQ_SEL] = 14, SEQ_CTL[INIT_START] = 1).
593 	      5) Poll on SEQ_CTL[SEQ_COMPLETE] for completion.
594 	    */
595 	    perform_octeon3_ddr3_sequence(node, prank, ddr_interface_num, 14);
596 
597 	    /*
598 	      6) Read MPR_DATA0 and MPR_DATA1 for results:
599 	        a. MPR_DATA0[MPR_DATA<63:0>] – comparison results for DQ63:DQ0.
600 	           (1 means MATCH, 0 means FAIL).
601                 b. MPR_DATA1[MPR_DATA<7:0>] – comparison results for ECC bit7:0.
602 	    */
603 	    mpr_data0 = BDK_CSR_READ(node, BDK_LMCX_MPR_DATA0(ddr_interface_num));
604 	    mpr_data1 = BDK_CSR_READ(node, BDK_LMCX_MPR_DATA1(ddr_interface_num));
605 
606 	    /*
607 	      7) Set PHY_CTL[PHY_RESET] = 1 (LMC automatically clears this as it’s a one-shot operation).
608 	      This is to get into the habit of resetting PHY’s SILO to the original 0 location.
609 	    */
610 	    BDK_CSR_MODIFY(phy_ctl, node, BDK_LMCX_PHY_CTL(ddr_interface_num),
611 			   phy_ctl.s.phy_reset = 1);
612 
613             if (mode == DBTRAIN_DBI)
614                 continue; // bypass any error checking or updating when DBI mode
615 
616             // data bytes
617             if (~mpr_data0) {
618                 for (byte = 0; byte < 8; byte++) {
619                     if ((~mpr_data0 >> (8 * byte)) & 0xffUL)
620                         bank_errs |= (1 << byte);
621                 }
622                 // accumulate bad bits
623                 bad_bits[0] |= ~mpr_data0;
624             }
625 
626             // include ECC byte errors
627             if (~mpr_data1 & 0xffUL) {
628                 bank_errs |= (1 << 8);
629                 bad_bits[1] |= ~mpr_data1 & 0xffUL;
630             }
631 
632 	} /* for (int ba_loop = 0; ba_loop < 4; ba_loop++) */
633 
634         errors |= bank_errs;
635 
636     } /* end for (k=...) */
637 
638     rlevel_ctl.s.or_dis = save_or_dis;
639     DRAM_CSR_WRITE(node, BDK_LMCX_RLEVEL_CTL(ddr_interface_num), rlevel_ctl.u);
640 
641     if ((mode != DBTRAIN_DBI) && (xor_data != NULL)) { // send the bad bits back...
642         xor_data[0] = bad_bits[0];
643         xor_data[1] = bad_bits[1];
644     }
645 
646     return errors;
647 }
648 
set_ddr_memory_preserved(bdk_node_t node)649 static void set_ddr_memory_preserved(bdk_node_t node)
650 {
651     global_ddr_memory_preserved |= 0x1 << node;
652 
653 }
ddr_memory_preserved(bdk_node_t node)654 int ddr_memory_preserved(bdk_node_t node)
655 {
656     return (global_ddr_memory_preserved & (0x1 << node)) != 0;
657 }
658 
perform_ddr_init_sequence(bdk_node_t node,int rank_mask,int ddr_interface_num)659 void perform_ddr_init_sequence(bdk_node_t node, int rank_mask,
660 				       int ddr_interface_num)
661 {
662     const char *s;
663     int ddr_init_loops = 1;
664     int rankx;
665 
666     if ((s = lookup_env_parameter("ddr%d_init_loops", ddr_interface_num)) != NULL)
667 	ddr_init_loops = strtoul(s, NULL, 0);
668 
669     while (ddr_init_loops--) {
670 	for (rankx = 0; rankx < 8; rankx++) {
671 	    if (!(rank_mask & (1 << rankx)))
672 		continue;
673 
674             perform_octeon3_ddr3_sequence(node, (1 << rankx),
675                                           ddr_interface_num, 0); /* power-up/init */
676 
677 	    bdk_wait_usec(1000);   /* Wait a while. */
678 
679 	    if ((s = lookup_env_parameter("ddr_sequence1")) != NULL) {
680 		int sequence1;
681 		sequence1 = strtoul(s, NULL, 0);
682 		perform_octeon3_ddr3_sequence(node, (1 << rankx),
683 					      ddr_interface_num, sequence1);
684 	    }
685 
686 	    if ((s = lookup_env_parameter("ddr_sequence2")) != NULL) {
687 		int sequence2;
688 		sequence2 = strtoul(s, NULL, 0);
689 		perform_octeon3_ddr3_sequence(node, (1 << rankx),
690 					      ddr_interface_num, sequence2);
691 	    }
692 	}
693     }
694 }
695 
set_ddr_clock_initialized(bdk_node_t node,int ddr_interface,int inited_flag)696 static void set_ddr_clock_initialized(bdk_node_t node, int ddr_interface, int inited_flag)
697 {
698 	int bit = node * 8 + ddr_interface;
699 	if (inited_flag)
700 		global_ddr_clock_initialized |= (0x1 << bit);
701 	else
702 		global_ddr_clock_initialized &= ~(0x1 << bit);
703 }
ddr_clock_initialized(bdk_node_t node,int ddr_interface)704 static int ddr_clock_initialized(bdk_node_t node, int ddr_interface)
705 {
706 	int bit = node * 8 + ddr_interface;
707 	return (!!(global_ddr_clock_initialized & (0x1 << bit)));
708 }
709 
710 
cn78xx_lmc_dreset_init(bdk_node_t node,int ddr_interface_num)711 static void cn78xx_lmc_dreset_init (bdk_node_t node, int ddr_interface_num)
712 {
713         /*
714 	 * This is the embodiment of the 6.9.4 LMC DRESET Initialization section below.
715 	 *
716          * The remainder of this section describes the sequence for LMCn.
717          *
718          * 1. If not done already, write LMC(0..3)_DLL_CTL2 to its reset value
719          *    (except without changing the LMC(0..3)_DLL_CTL2[INTF_EN] value from
720          *    that set in the prior Step 3), including LMC(0..3)_DLL_CTL2[DRESET] = 1.
721          *
722          * 2. Without changing any other LMC(0..3)_DLL_CTL2 fields, write
723          *    LMC(0..3)_DLL_CTL2[DLL_BRINGUP] = 1.
724          */
725 
726         DRAM_CSR_MODIFY(c, node, BDK_LMCX_DLL_CTL2(ddr_interface_num),
727 			c.s.dll_bringup = 1);
728 
729         /*
730          * 3. Read LMC(0..3)_DLL_CTL2 and wait for the result.
731          */
732 
733         BDK_CSR_READ(node, BDK_LMCX_DLL_CTL2(ddr_interface_num));
734 
735         /*
736          * 4. Wait for a minimum of 10 LMC CK cycles.
737          */
738 
739         bdk_wait_usec(1);
740 
741         /*
742          * 5. Without changing any other fields in LMC(0..3)_DLL_CTL2, write
743          *    LMC(0..3)_DLL_CTL2[QUAD_DLL_ENA] = 1.
744          *    LMC(0..3)_DLL_CTL2[QUAD_DLL_ENA] must not change after this point
745          *    without restarting the LMCn DRESET initialization sequence.
746          */
747 
748         DRAM_CSR_MODIFY(c, node, BDK_LMCX_DLL_CTL2(ddr_interface_num),
749 			c.s.quad_dll_ena = 1);
750 
751         /*
752          * 6. Read LMC(0..3)_DLL_CTL2 and wait for the result.
753          */
754 
755         BDK_CSR_READ(node, BDK_LMCX_DLL_CTL2(ddr_interface_num));
756 
757         /*
758          * 7. Wait a minimum of 10 us.
759          */
760 
761         bdk_wait_usec(10);
762 
763         /*
764          * 8. Without changing any other fields in LMC(0..3)_DLL_CTL2, write
765          *    LMC(0..3)_DLL_CTL2[DLL_BRINGUP] = 0.
766          *    LMC(0..3)_DLL_CTL2[DLL_BRINGUP] must not change after this point
767          *    without restarting the LMCn DRESET initialization sequence.
768          */
769 
770         DRAM_CSR_MODIFY(c, node, BDK_LMCX_DLL_CTL2(ddr_interface_num),
771 			c.s.dll_bringup = 0);
772 
773         /*
774          * 9. Read LMC(0..3)_DLL_CTL2 and wait for the result.
775          */
776 
777         BDK_CSR_READ(node, BDK_LMCX_DLL_CTL2(ddr_interface_num));
778 
779         /*
780          * 10. Without changing any other fields in LMC(0..3)_DLL_CTL2, write
781          *    LMC(0..3)_DLL_CTL2[DRESET] = 0.
782          *    LMC(0..3)_DLL_CTL2[DRESET] must not change after this point without
783          *    restarting the LMCn DRESET initialization sequence.
784          *
785          *    After completing LMCn DRESET initialization, all LMC CSRs may be
786          *    accessed.  Prior to completing LMC DRESET initialization, only
787          *    LMC(0..3)_DDR_PLL_CTL, LMC(0..3)_DLL_CTL2, LMC(0..3)_RESET_CTL, and
788          *    LMC(0..3)_COMP_CTL2 LMC CSRs can be accessed.
789          */
790 
791         DRAM_CSR_MODIFY(c, node, BDK_LMCX_DLL_CTL2(ddr_interface_num),
792 			c.s.dreset = 0);
793 
794         /*
795 	 * NEW STEP - necessary for O73, O78 P2.0, O75, and T88 P2.0
796 	 * McBuggin: #24821
797 	 *
798          * 11. Wait for a minimum of 10 LMC CK cycles.
799          */
800 
801         bdk_wait_usec(1);
802 }
803 
cn88xx_lmc_ddr3_reset(bdk_node_t node,int ddr_interface_num,int reset)804 /*static*/ void cn88xx_lmc_ddr3_reset(bdk_node_t node, int ddr_interface_num, int reset)
805 {
806     /*
807      * 4. Deassert DDRn_RESET_L pin by writing LMC(0..3)_RESET_CTL[DDR3RST] = 1
808      *    without modifying any other LMC(0..3)_RESET_CTL fields.
809      * 5. Read LMC(0..3)_RESET_CTL and wait for the result.
810      * 6. Wait a minimum of 500us. This guarantees the necessary T = 500us
811      *    delay between DDRn_RESET_L deassertion and DDRn_DIMM*_CKE* assertion.
812      */
813     ddr_print("LMC%d %s DDR_RESET_L\n", ddr_interface_num,
814 	      (reset == LMC_DDR3_RESET_DEASSERT) ? "De-asserting" : "Asserting");
815     DRAM_CSR_MODIFY(c, node, BDK_LMCX_RESET_CTL(ddr_interface_num),
816 		    c.cn8.ddr3rst = reset);
817     BDK_CSR_READ(node, BDK_LMCX_RESET_CTL(ddr_interface_num));
818     bdk_wait_usec(500);
819 }
820 
initialize_ddr_clock(bdk_node_t node,const ddr_configuration_t * ddr_configuration,uint32_t cpu_hertz,uint32_t ddr_hertz,uint32_t ddr_ref_hertz,int ddr_interface_num,uint32_t ddr_interface_mask)821 int initialize_ddr_clock(bdk_node_t node,
822 			 const ddr_configuration_t *ddr_configuration,
823                          uint32_t cpu_hertz,
824                          uint32_t ddr_hertz,
825                          uint32_t ddr_ref_hertz,
826                          int ddr_interface_num,
827                          uint32_t ddr_interface_mask
828                          )
829 {
830     const char *s;
831 
832     if (ddr_clock_initialized(node, ddr_interface_num))
833         return 0;
834 
835     if (!ddr_clock_initialized(node, 0)) { /* Do this once */
836         int i;
837         bdk_lmcx_reset_ctl_t reset_ctl;
838         /* Check to see if memory is to be preserved and set global flag */
839         for (i=3; i>=0; --i) {
840             if ((ddr_interface_mask & (1 << i)) == 0)
841                 continue;
842             reset_ctl.u = BDK_CSR_READ(node, BDK_LMCX_RESET_CTL(i));
843             if (reset_ctl.s.ddr3psv == 1) {
844                 ddr_print("LMC%d Preserving memory\n", i);
845                 set_ddr_memory_preserved(node);
846 
847                 /* Re-initialize flags */
848                 reset_ctl.cn8.ddr3pwarm = 0;
849                 reset_ctl.cn8.ddr3psoft = 0;
850                 reset_ctl.s.ddr3psv   = 0;
851                 DRAM_CSR_WRITE(node, BDK_LMCX_RESET_CTL(i), reset_ctl.u);
852             }
853         }
854     }
855 
856     if (CAVIUM_IS_MODEL(CAVIUM_CN8XXX)) {
857 
858         bdk_lmcx_ddr_pll_ctl_t ddr_pll_ctl;
859         const dimm_config_t *dimm_config_table = ddr_configuration->dimm_config_table;
860 
861         /* ddr_type only indicates DDR4 or DDR3 */
862         int ddr_type = get_ddr_type(node, &dimm_config_table[0]);
863 
864         /*
865          * 6.9 LMC Initialization Sequence
866          *
867          * There are 14 parts to the LMC initialization procedure:
868          *
869          * 1. LMC interface enable initialization
870          *
871          * 2. DDR PLL initialization
872          *
873          * 3. LMC CK initialization
874          *
875          * 4. LMC DRESET initialization
876          *
877          * 5. LMC CK local initialization
878          *
879          * 6. LMC RESET initialization
880          *
881          * 7. Early LMC initialization
882          *
883          * 8. LMC offset training
884          *
885          * 9. LMC internal Vref training
886          *
887          * 10. LMC deskew training
888          *
889          * 11. LMC write leveling
890          *
891          * 12. LMC read leveling
892          *
893          * 13. DRAM Vref Training for DDR4
894 	 *
895          * 14. Final LMC initialization
896          *
897          * CN88XX supports two modes:
898          *
899          * ­ two-LMC mode: both LMCs 2/3 must not be enabled
900          *   (LMC2/3_DLL_CTL2[DRESET] must be set to 1 and LMC2/3_DLL_CTL2[INTF_EN]
901          *   must be set to 0) and both LMCs 0/1 must be enabled).
902          *
903          * ­ four-LMC mode: all four LMCs 0..3 must be enabled.
904          *
905          * Steps 4 and 6..14 should each be performed for each enabled LMC (either
906          * twice or four times). Steps 1..3 and 5 are more global in nature and
907          * each must be executed exactly once (not once per LMC) each time the
908          *  DDR PLL changes or is first brought up. Steps 1..3 and 5 need not be
909          * performed if the DDR PLL is stable.
910          *
911          * Generally, the steps are performed in order. The exception is that the
912          * CK local initialization (step 5) must be performed after some DRESET
913          * initializations (step 4) and before other DRESET initializations when
914          * the DDR PLL is brought up or changed. (The CK local initialization
915          * uses information from some LMCs to bring up the other local CKs.) The
916          * following text describes these ordering requirements in more detail.
917          *
918          * Following any chip reset, the DDR PLL must be brought up, and all 14
919          * steps should be executed. Subsequently, it is possible to execute only
920          * steps 4 and 6..14, or to execute only steps 8..14.
921          *
922          * The remainder of this section covers these initialization steps in
923          * sequence.
924          */
925 
926         if (ddr_interface_num == 0) { /* Do this once */
927             bdk_lmcx_dll_ctl2_t	dll_ctl2;
928             int loop_interface_num;
929 
930             /*
931              * 6.9.1 LMC Interface-Enable Initialization
932              *
933              * LMC interface-enable initialization (Step 1) must be performed only
934              * once, not once per LMC in four-LMC mode. This step is not required
935 	     * in two-LMC mode.
936 	     *
937              * Perform the following three substeps for the LMC interface-enable
938              * initialization:
939              *
940              * 1. Without changing any other LMC2_DLL_CTL2 fields (LMC(0..3)_DLL_CTL2
941              *    should be at their reset values after Step 1), write
942              *    LMC2_DLL_CTL2[INTF_EN] = 1 if four-LMC mode is desired.
943              *
944              * 2. Without changing any other LMC3_DLL_CTL2 fields, write
945              *    LMC3_DLL_CTL2[INTF_EN] = 1 if four-LMC mode is desired.
946              *
947              * 3. Read LMC2_DLL_CTL2 and wait for the result.
948              *
949              * The LMC2_DLL_CTL2[INTF_EN] and LMC3_DLL_CTL2[INTF_EN] values should
950              * not be changed by software from this point.
951              *
952              */
953 
954 	    /* Put all LMCs into DRESET here; these are the reset values... */
955             for (loop_interface_num = 0; loop_interface_num < 4; ++loop_interface_num) {
956                 if ((ddr_interface_mask & (1 << loop_interface_num)) == 0)
957                     continue;
958 
959                 dll_ctl2.u = BDK_CSR_READ(node, BDK_LMCX_DLL_CTL2(loop_interface_num));
960 
961                 dll_ctl2.s.byp_setting          = 0;
962                 dll_ctl2.s.byp_sel              = 0;
963                 dll_ctl2.s.quad_dll_ena         = 0;
964                 dll_ctl2.s.dreset               = 1;
965                 dll_ctl2.s.dll_bringup          = 0;
966                 dll_ctl2.s.intf_en              = 0;
967 
968                 DRAM_CSR_WRITE(node, BDK_LMCX_DLL_CTL2(loop_interface_num), dll_ctl2.u);
969             }
970 
971 	    /* Now set INTF_EN for *ONLY* LMC2/3 if they are to be active on 88XX. */
972 	    /* Do *NOT* touch LMC0/1 INTF_EN=0 setting on 88XX. */
973             /* But we do have to set LMC1 INTF_EN=1 on 83XX if we want it active... */
974             /* Note that 81xx has only LMC0 so the mask should reflect that. */
975             for (loop_interface_num = (CAVIUM_IS_MODEL(CAVIUM_CN83XX)) ? 1 : 2;
976                  loop_interface_num < 4; ++loop_interface_num) {
977                 if ((ddr_interface_mask & (1 << loop_interface_num)) == 0)
978                     continue;
979 
980                 DRAM_CSR_MODIFY(c, node, BDK_LMCX_DLL_CTL2(loop_interface_num),
981 				c.s.intf_en = 1);
982                 BDK_CSR_READ(node, BDK_LMCX_DLL_CTL2(loop_interface_num));
983             }
984 
985             /*
986              * 6.9.2 DDR PLL Initialization
987              *
988              * DDR PLL initialization (Step 2) must be performed for each chip reset
989              * and whenever the DDR clock speed changes. This step needs to be
990              * performed only once, not once per LMC.
991              *
992              * Perform the following eight substeps to initialize the DDR PLL:
993              *
994              * 1. If not done already, write all fields in LMC(0..1)_DDR_PLL_CTL and
995              *    LMC(0..1)_DLL_CTL2 to their reset values, including:
996              *
997              * .. LMC0_DDR_PLL_CTL[DDR_DIV_RESET] = 1
998              * .. LMC0_DLL_CTL2[DRESET] = 1
999              *
1000              * This substep is not necessary after a chip reset.
1001              *
1002              */
1003 
1004             ddr_pll_ctl.u = BDK_CSR_READ(node, BDK_LMCX_DDR_PLL_CTL(0));
1005 
1006             ddr_pll_ctl.cn83xx.reset_n           = 0;
1007             ddr_pll_ctl.cn83xx.ddr_div_reset     = 1;
1008             ddr_pll_ctl.cn83xx.phy_dcok          = 0;
1009             ddr_pll_ctl.cn83xx.dclk_invert       = 0;
1010 
1011             // allow override of LMC0 desired setting for DCLK_INVERT
1012             if ((s = lookup_env_parameter("ddr0_set_dclk_invert")) != NULL) {
1013                 ddr_pll_ctl.cn83xx.dclk_invert = !!strtoul(s, NULL, 0);
1014                 ddr_print("LMC0: override DDR_PLL_CTL[dclk_invert] to %d\n",
1015                           ddr_pll_ctl.cn83xx.dclk_invert);
1016             }
1017 
1018             // always write LMC0 CSR, it must be active
1019             DRAM_CSR_WRITE(node, BDK_LMCX_DDR_PLL_CTL(0), ddr_pll_ctl.u);
1020             ddr_print("%-45s : 0x%016llx\n", "LMC0: DDR_PLL_CTL", ddr_pll_ctl.u);
1021 
1022             // only when LMC1 is active
1023             // NOTE: 81xx has only 1 LMC, and 83xx can operate in 1-LMC mode
1024             if (ddr_interface_mask & 0x2) {
1025 
1026                 ddr_pll_ctl.cn83xx.dclk_invert       ^= 1; /* DEFAULT: Toggle dclk_invert from LMC0 */
1027 
1028                 // allow override of LMC1 desired setting for DCLK_INVERT
1029                 if ((s = lookup_env_parameter("ddr1_set_dclk_invert")) != NULL) {
1030                     ddr_pll_ctl.cn83xx.dclk_invert = !!strtoul(s, NULL, 0);
1031                     ddr_print("LMC1: override DDR_PLL_CTL[dclk_invert] to %d\n",
1032                               ddr_pll_ctl.cn83xx.dclk_invert);
1033                 }
1034 
1035                 // always write LMC1 CSR when it is active
1036                 DRAM_CSR_WRITE(node, BDK_LMCX_DDR_PLL_CTL(1), ddr_pll_ctl.u);
1037                 ddr_print("%-45s : 0x%016llx\n", "LMC1: DDR_PLL_CTL", ddr_pll_ctl.u);
1038             }
1039 
1040             /*
1041              * 2. If the current DRAM contents are not preserved (see
1042              *    LMC(0..3)_RESET_ CTL[DDR3PSV]), this is also an appropriate time to
1043              *    assert the RESET# pin of the DDR3/DDR4 DRAM parts. If desired, write
1044              *    LMC0_RESET_ CTL[DDR3RST] = 0 without modifying any other
1045              *    LMC0_RESET_CTL fields to assert the DDR_RESET_L pin. No action is
1046              *    required here to assert DDR_RESET_L following a chip reset. Refer to
1047              *    Section 6.9.6. Do this for all enabled LMCs.
1048              */
1049 
1050             for (loop_interface_num = 0;
1051                  ( !ddr_memory_preserved(node)) && loop_interface_num < 4;
1052                  ++loop_interface_num)
1053 	    {
1054 
1055                 if ((ddr_interface_mask & (1 << loop_interface_num)) == 0)
1056                     continue;
1057 
1058 		cn88xx_lmc_ddr3_reset(node, loop_interface_num, LMC_DDR3_RESET_ASSERT);
1059             }
1060 
1061             /*
1062              * 3. Without changing any other LMC0_DDR_PLL_CTL values, write LMC0_DDR_
1063              *    PLL_CTL[CLKF] with a value that gives a desired DDR PLL speed. The
1064              *    LMC0_DDR_PLL_CTL[CLKF] value should be selected in conjunction with
1065              *    the post-scalar divider values for LMC (LMC0_DDR_PLL_CTL[DDR_PS_EN])
1066              *    so that the desired LMC CK speeds are is produced (all enabled LMCs
1067              *    must run the same speed). Section 5.14 describes
1068              *    LMC0_DDR_PLL_CTL[CLKF] and LMC0_DDR_PLL_CTL[DDR_PS_EN] programmings
1069              *    that produce the desired LMC CK speed. Section 6.9.3 describes LMC CK
1070              *    initialization, which can be done separately from the DDR PLL
1071              *    initialization described in this section.
1072              *
1073              * The LMC0_DDR_PLL_CTL[CLKF] value must not change after this point
1074              * without restarting this SDRAM PLL initialization sequence.
1075              */
1076 
1077             {
1078                 /* CLKF = (DCLK * (CLKR+1) * EN(1, 2, 3, 4, 5, 6, 7, 8, 10, 12))/DREF - 1 */
1079                 int en_idx, save_en_idx, best_en_idx=0;
1080                 uint64_t clkf, clkr, max_clkf = 127;
1081                 uint64_t best_clkf=0, best_clkr=0;
1082                 uint64_t best_pll_MHz = 0;
1083                 uint64_t pll_MHz;
1084                 uint64_t min_pll_MHz = 800;
1085                 uint64_t max_pll_MHz = 5000;
1086                 uint64_t error;
1087                 uint64_t best_error;
1088                 uint64_t best_calculated_ddr_hertz = 0;
1089                 uint64_t calculated_ddr_hertz = 0;
1090 		uint64_t orig_ddr_hertz = ddr_hertz;
1091                 static const int _en[] = {1, 2, 3, 4, 5, 6, 7, 8, 10, 12};
1092                 int override_pll_settings;
1093                 int new_bwadj;
1094 
1095                 error = best_error = ddr_hertz;  /* Init to max error */
1096 
1097                 ddr_print("DDR Reference Hertz = %d\n", ddr_ref_hertz);
1098 
1099                 while (best_error == ddr_hertz) {
1100 
1101 		    for (clkr = 0; clkr < 4; ++clkr) {
1102 			for (en_idx=sizeof(_en)/sizeof(int)-1; en_idx>=0; --en_idx) {
1103 			    save_en_idx = en_idx;
1104 			    clkf = ((ddr_hertz) * (clkr+1) * (_en[save_en_idx]));
1105 			    clkf = divide_nint(clkf, ddr_ref_hertz) - 1;
1106 			    pll_MHz = ddr_ref_hertz * (clkf+1) / (clkr+1) / 1000000;
1107 			    calculated_ddr_hertz = ddr_ref_hertz * (clkf + 1) / ((clkr + 1) * (_en[save_en_idx]));
1108 			    error = ddr_hertz - calculated_ddr_hertz;
1109 
1110 			    if ((pll_MHz < min_pll_MHz) || (pll_MHz > max_pll_MHz)) continue;
1111 			    if (clkf > max_clkf) continue; /* PLL requires clkf to be limited */
1112 			    if (_abs(error) > _abs(best_error)) continue;
1113 
1114 			    VB_PRT(VBL_TME, "clkr: %2llu, en[%d]: %2d, clkf: %4llu, pll_MHz: %4llu, ddr_hertz: %8llu, error: %8lld\n",
1115                                     clkr, save_en_idx, _en[save_en_idx], clkf, pll_MHz, calculated_ddr_hertz, error);
1116 
1117 			    /* Favor the highest PLL frequency. */
1118 			    if ((_abs(error) < _abs(best_error)) || (pll_MHz > best_pll_MHz)) {
1119 				best_pll_MHz = pll_MHz;
1120 				best_calculated_ddr_hertz = calculated_ddr_hertz;
1121 				best_error = error;
1122 				best_clkr = clkr;
1123 				best_clkf = clkf;
1124 				best_en_idx = save_en_idx;
1125 			    }
1126 			}
1127 		    }
1128 
1129 		    override_pll_settings = 0;
1130 
1131 		    if ((s = lookup_env_parameter("ddr_pll_clkr")) != NULL) {
1132 			best_clkr = strtoul(s, NULL, 0);
1133 			override_pll_settings = 1;
1134 		    }
1135 		    if ((s = lookup_env_parameter("ddr_pll_clkf")) != NULL) {
1136 			best_clkf = strtoul(s, NULL, 0);
1137 			override_pll_settings = 1;
1138 		    }
1139 		    if ((s = lookup_env_parameter("ddr_pll_en_idx")) != NULL) {
1140 			best_en_idx = strtoul(s, NULL, 0);
1141 			override_pll_settings = 1;
1142 		    }
1143 
1144 		    if (best_en_idx >= ARRAY_SIZE(_en)) {
1145 			error_print("ERROR: best_en_idx %u exceeds _en array size\n", best_en_idx);
1146 			return -1;
1147 		    }
1148 
1149 		    if (override_pll_settings) {
1150 			best_pll_MHz = ddr_ref_hertz * (best_clkf+1) / (best_clkr+1) / 1000000;
1151 			best_calculated_ddr_hertz = ddr_ref_hertz * (best_clkf + 1) / ((best_clkr + 1) * (_en[best_en_idx]));
1152 			best_error = ddr_hertz - best_calculated_ddr_hertz;
1153 		    }
1154 
1155 		    ddr_print("clkr: %2llu, en[%d]: %2d, clkf: %4llu, pll_MHz: %4llu, ddr_hertz: %8llu, error: %8lld <==\n",
1156 			      best_clkr, best_en_idx, _en[best_en_idx], best_clkf, best_pll_MHz,
1157 			      best_calculated_ddr_hertz, best_error);
1158 
1159 		    /* Try lowering the frequency if we can't get a working configuration */
1160 		    if (best_error == ddr_hertz) {
1161 			if (ddr_hertz < orig_ddr_hertz - 10000000)
1162 			    break;
1163 			ddr_hertz -= 1000000;
1164 			best_error = ddr_hertz;
1165 		    }
1166 
1167 		} /* while (best_error == ddr_hertz) */
1168 
1169 
1170                 if (best_error == ddr_hertz) {
1171                     error_print("ERROR: Can not compute a legal DDR clock speed configuration.\n");
1172                     return(-1);
1173                 }
1174 
1175                 new_bwadj = (best_clkf + 1) / 10;
1176                 VB_PRT(VBL_TME, "bwadj: %2d\n", new_bwadj);
1177 
1178                 if ((s = lookup_env_parameter("ddr_pll_bwadj")) != NULL) {
1179                     new_bwadj = strtoul(s, NULL, 0);
1180                     VB_PRT(VBL_TME, "bwadj: %2d\n", new_bwadj);
1181                 }
1182 
1183                 for (loop_interface_num = 0; loop_interface_num<2; ++loop_interface_num) {
1184                     if ((ddr_interface_mask & (1 << loop_interface_num)) == 0)
1185                         continue;
1186 
1187                     // make sure we preserve any settings already there
1188                     ddr_pll_ctl.u = BDK_CSR_READ(node, BDK_LMCX_DDR_PLL_CTL(loop_interface_num));
1189                     ddr_print("LMC%d: DDR_PLL_CTL                             : 0x%016llx\n",
1190                               loop_interface_num, ddr_pll_ctl.u);
1191 
1192                     ddr_pll_ctl.cn83xx.ddr_ps_en = best_en_idx;
1193                     ddr_pll_ctl.cn83xx.clkf = best_clkf;
1194                     ddr_pll_ctl.cn83xx.clkr = best_clkr;
1195                     ddr_pll_ctl.cn83xx.reset_n = 0;
1196                     ddr_pll_ctl.cn83xx.bwadj = new_bwadj;
1197 
1198                     DRAM_CSR_WRITE(node, BDK_LMCX_DDR_PLL_CTL(loop_interface_num), ddr_pll_ctl.u);
1199                     ddr_print("LMC%d: DDR_PLL_CTL                             : 0x%016llx\n",
1200                               loop_interface_num, ddr_pll_ctl.u);
1201                 }
1202             }
1203 
1204 
1205             for (loop_interface_num = 0; loop_interface_num<4; ++loop_interface_num) {
1206                 if ((ddr_interface_mask & (1 << loop_interface_num)) == 0)
1207                     continue;
1208 
1209 		/*
1210 		 * 4. Read LMC0_DDR_PLL_CTL and wait for the result.
1211 		 */
1212 
1213 		BDK_CSR_READ(node, BDK_LMCX_DDR_PLL_CTL(loop_interface_num));
1214 
1215 		/*
1216 		 * 5. Wait a minimum of 3 us.
1217 		 */
1218 
1219 		bdk_wait_usec(3);          /* Wait 3 us */
1220 
1221 		/*
1222 		 * 6. Write LMC0_DDR_PLL_CTL[RESET_N] = 1 without changing any other
1223 		 *    LMC0_DDR_PLL_CTL values.
1224 		 */
1225 
1226 		DRAM_CSR_MODIFY(c, node, BDK_LMCX_DDR_PLL_CTL(loop_interface_num),
1227 				c.cn83xx.reset_n = 1);
1228 
1229 		/*
1230 		 * 7. Read LMC0_DDR_PLL_CTL and wait for the result.
1231 		 */
1232 
1233 		BDK_CSR_READ(node, BDK_LMCX_DDR_PLL_CTL(loop_interface_num));
1234 
1235 		/*
1236 		 * 8. Wait a minimum of 25 us.
1237 		 */
1238 
1239 		bdk_wait_usec(25);          /* Wait 25 us */
1240 
1241             } /* for (loop_interface_num = 0; loop_interface_num<4; ++loop_interface_num) */
1242 
1243             for (loop_interface_num = 0; loop_interface_num<4; ++loop_interface_num) {
1244                 if ((ddr_interface_mask & (1 << loop_interface_num)) == 0)
1245                     continue;
1246 		/*
1247 		 * 6.9.3 LMC CK Initialization
1248 		 *
1249 		 * DDR PLL initialization must be completed prior to starting LMC CK
1250 		 * initialization.
1251 		 *
1252 		 * Perform the following substeps to initialize the LMC CK. Perform
1253 		 * substeps 1..3 for both LMC0 and LMC1.
1254 		 *
1255 		 * 1. Without changing any other LMC(0..3)_DDR_PLL_CTL values, write
1256 		 *    LMC(0..3)_DDR_PLL_CTL[DDR_DIV_RESET] = 1 and
1257 		 *    LMC(0..3)_DDR_PLL_CTL[DDR_PS_EN] with the appropriate value to get the
1258 		 *    desired LMC CK speed. Section 5.14 discusses CLKF and DDR_PS_EN
1259 		 *    programmings.  The LMC(0..3)_DDR_PLL_CTL[DDR_PS_EN] must not change
1260 		 *    after this point without restarting this LMC CK initialization
1261 		 *  sequence.
1262 		 */
1263 
1264 		DRAM_CSR_MODIFY(c, node, BDK_LMCX_DDR_PLL_CTL(loop_interface_num),
1265 				c.cn83xx.ddr_div_reset = 1);
1266 
1267 		/*
1268 		 * 2. Without changing any other fields in LMC(0..3)_DDR_PLL_CTL, write
1269 		 *    LMC(0..3)_DDR_PLL_CTL[DDR4_MODE] = 0.
1270 		 */
1271 
1272 		DRAM_CSR_MODIFY(c, node, BDK_LMCX_DDR_PLL_CTL(loop_interface_num),
1273 				c.cn83xx.ddr4_mode = (ddr_type == DDR4_DRAM) ? 1 : 0);
1274 
1275 		/*
1276 		 * 3. Read LMC(0..3)_DDR_PLL_CTL and wait for the result.
1277 		 */
1278 
1279 		BDK_CSR_READ(node, BDK_LMCX_DDR_PLL_CTL(loop_interface_num));
1280 
1281 		/*
1282 		 * 4. Wait a minimum of 1 us.
1283 		 */
1284 
1285 		bdk_wait_usec(1);          /* Wait 1 us */
1286 
1287 		/*
1288 		 * 5. Without changing any other fields in LMC(0..3)_DDR_PLL_CTL, write
1289 		 *    LMC(0..3)_DDR_PLL_CTL[PHY_DCOK] = 1.
1290 		 */
1291 
1292 		DRAM_CSR_MODIFY(c, node, BDK_LMCX_DDR_PLL_CTL(loop_interface_num),
1293 				c.cn83xx.phy_dcok = 1);
1294 
1295 		/*
1296 		 * 6. Read LMC(0..3)_DDR_PLL_CTL and wait for the result.
1297 		 */
1298 
1299 		BDK_CSR_READ(node, BDK_LMCX_DDR_PLL_CTL(loop_interface_num));
1300 
1301 		/*
1302 		 * 7. Wait a minimum of 20 us.
1303 		 */
1304 
1305 		bdk_wait_usec(20);          /* Wait 20 us */
1306 
1307 		/*
1308 		 * 8. Without changing any other LMC(0..3)_COMP_CTL2 values, write
1309 		 *    LMC(0..3)_COMP_CTL2[CK_CTL,CONTROL_CTL,CMD_CTL] to the desired
1310 		 *    DDR*_CK_*_P control and command signals drive strength.
1311 		 */
1312 
1313 		{
1314 		    bdk_lmcx_comp_ctl2_t comp_ctl2;
1315 		    const ddr3_custom_config_t *custom_lmc_config = &ddr_configuration->custom_lmc_config;
1316 
1317 		    comp_ctl2.u = BDK_CSR_READ(node, BDK_LMCX_COMP_CTL2(loop_interface_num));
1318 
1319 		    comp_ctl2.s.dqx_ctl  = 4; /* Default 4=34.3 ohm */
1320 		    comp_ctl2.s.ck_ctl   =
1321 			(custom_lmc_config->ck_ctl  == 0) ? 4 : custom_lmc_config->ck_ctl;  /* Default 4=34.3 ohm */
1322 		    comp_ctl2.s.cmd_ctl  =
1323 			(custom_lmc_config->cmd_ctl == 0) ? 4 : custom_lmc_config->cmd_ctl; /* Default 4=34.3 ohm */
1324 
1325 		    comp_ctl2.s.rodt_ctl           = 0x4; /* 60 ohm */
1326 
1327 		    // These need to be done here, not later in Step 6.9.7.
1328 		    // NOTE: these are/will be specific to a chip; for now, set to 0
1329 		    // should we provide overrides for these?
1330 		    comp_ctl2.s.ntune_offset    = 0;
1331 		    comp_ctl2.s.ptune_offset    = 0;
1332 
1333 		    // now do any overrides...
1334 		    if ((s = lookup_env_parameter("ddr_ck_ctl")) != NULL) {
1335 			comp_ctl2.s.ck_ctl  = strtoul(s, NULL, 0);
1336 		    }
1337 
1338 		    if ((s = lookup_env_parameter("ddr_cmd_ctl")) != NULL) {
1339 			comp_ctl2.s.cmd_ctl  = strtoul(s, NULL, 0);
1340 		    }
1341 
1342 		    if ((s = lookup_env_parameter("ddr_dqx_ctl")) != NULL) {
1343 			comp_ctl2.s.dqx_ctl  = strtoul(s, NULL, 0);
1344 		    }
1345 
1346 		    DRAM_CSR_WRITE(node, BDK_LMCX_COMP_CTL2(loop_interface_num), comp_ctl2.u);
1347 		}
1348 
1349 		/*
1350 		 * 9. Read LMC(0..3)_DDR_PLL_CTL and wait for the result.
1351 		 */
1352 
1353 		BDK_CSR_READ(node, BDK_LMCX_DDR_PLL_CTL(loop_interface_num));
1354 
1355 		/*
1356 		 * 10. Wait a minimum of 200 ns.
1357 		 */
1358 
1359 		bdk_wait_usec(1);          /* Wait 1 us */
1360 
1361 		/*
1362 		 * 11. Without changing any other LMC(0..3)_DDR_PLL_CTL values, write
1363 		 *     LMC(0..3)_DDR_PLL_CTL[DDR_DIV_RESET] = 0.
1364 		 */
1365 
1366 		DRAM_CSR_MODIFY(c, node, BDK_LMCX_DDR_PLL_CTL(loop_interface_num),
1367 				c.cn83xx.ddr_div_reset = 0);
1368 
1369 		/*
1370 		 * 12. Read LMC(0..3)_DDR_PLL_CTL and wait for the result.
1371 		 */
1372 
1373 		BDK_CSR_READ(node, BDK_LMCX_DDR_PLL_CTL(loop_interface_num));
1374 
1375 		/*
1376 		 * 13. Wait a minimum of 200 ns.
1377 		 */
1378 		bdk_wait_usec(1);          /* Wait 1 us */
1379 
1380 	    } /* for (loop_interface_num = 0; loop_interface_num<4; ++loop_interface_num) */
1381 
1382         }  /* if (ddr_interface_num == 0) */ /* Do this once */
1383 
1384         if (ddr_interface_num == 0) { /* Do this once */
1385             bdk_lmcx_dll_ctl3_t ddr_dll_ctl3;
1386 
1387             /*
1388              * 6.9.4 LMC DRESET Initialization
1389              *
1390              * All of the DDR PLL, LMC global CK, and LMC interface enable
1391              * initializations must be completed prior to starting this LMC DRESET
1392              * initialization (Step 4).
1393              *
1394              * This LMC DRESET step is done for all enabled LMCs.
1395              *
1396              * There are special constraints on the ordering of DRESET initialization
1397              * (Steps 4) and CK local initialization (Step 5) whenever CK local
1398              * initialization must be executed.  CK local initialization must be
1399              * executed whenever the DDR PLL is being brought up (for each chip reset
1400              * and whenever the DDR clock speed changes).
1401              *
1402              * When Step 5 must be executed in the two-LMC mode case:
1403              * ­ LMC0 DRESET initialization must occur before Step 5.
1404              * ­ LMC1 DRESET initialization must occur after Step 5.
1405              *
1406              * When Step 5 must be executed in the four-LMC mode case:
1407              * ­ LMC2 and LMC3 DRESET initialization must occur before Step 5.
1408              * ­ LMC0 and LMC1 DRESET initialization must occur after Step 5.
1409              */
1410 
1411             if ((ddr_interface_mask == 0x1) || (ddr_interface_mask == 0x3)) {
1412                 /* ONE-LMC MODE FOR 81XX AND 83XX BEFORE STEP 5 */
1413                 /* TWO-LMC MODE BEFORE STEP 5 */
1414                 cn78xx_lmc_dreset_init(node, 0);
1415 
1416             } else if (ddr_interface_mask == 0xf) {
1417                 /* FOUR-LMC MODE BEFORE STEP 5 */
1418                 cn78xx_lmc_dreset_init(node, 2);
1419                 cn78xx_lmc_dreset_init(node, 3);
1420             }
1421 
1422             /*
1423              * 6.9.5 LMC CK Local Initialization
1424              *
1425              * All of DDR PLL, LMC global CK, and LMC interface-enable
1426              * initializations must be completed prior to starting this LMC CK local
1427              * initialization (Step 5).
1428              *
1429              * LMC CK Local initialization must be performed for each chip reset and
1430              * whenever the DDR clock speed changes. This step needs to be performed
1431              * only once, not once per LMC.
1432              *
1433              * There are special constraints on the ordering of DRESET initialization
1434              * (Steps 4) and CK local initialization (Step 5) whenever CK local
1435              * initialization must be executed.  CK local initialization must be
1436              * executed whenever the DDR PLL is being brought up (for each chip reset
1437              * and whenever the DDR clock speed changes).
1438              *
1439              * When Step 5 must be executed in the two-LMC mode case:
1440              * ­ LMC0 DRESET initialization must occur before Step 5.
1441              * ­ LMC1 DRESET initialization must occur after Step 5.
1442              *
1443              * When Step 5 must be executed in the four-LMC mode case:
1444              * ­ LMC2 and LMC3 DRESET initialization must occur before Step 5.
1445              * ­ LMC0 and LMC1 DRESET initialization must occur after Step 5.
1446              *
1447              * LMC CK local initialization is different depending on whether two-LMC
1448              * or four-LMC modes are desired.
1449              */
1450 
1451             if (ddr_interface_mask == 0x3) {
1452 		/*
1453 		 * 6.9.5.1 LMC CK Local Initialization for Two-LMC Mode
1454 		 *
1455 		 * 1. Write LMC0_DLL_CTL3 to its reset value. (Note that
1456 		 *    LMC0_DLL_CTL3[DLL_90_BYTE_SEL] = 0x2 .. 0x8 should also work.)
1457 		 */
1458 
1459 		ddr_dll_ctl3.u = 0;
1460 		ddr_dll_ctl3.s.dclk90_recal_dis = 1;
1461 		ddr_dll_ctl3.s.dll90_byte_sel = 1;
1462 		DRAM_CSR_WRITE(node, BDK_LMCX_DLL_CTL3(0),	ddr_dll_ctl3.u);
1463 
1464 		/*
1465 		 * 2. Read LMC0_DLL_CTL3 and wait for the result.
1466 		 */
1467 
1468 		BDK_CSR_READ(node, BDK_LMCX_DLL_CTL3(0));
1469 
1470 		/*
1471 		 * 3. Without changing any other fields in LMC0_DLL_CTL3, write
1472 		 *    LMC0_DLL_CTL3[DCLK90_FWD] = 1.  Writing LMC0_DLL_CTL3[DCLK90_FWD] = 1
1473 		 *    causes clock-delay information to be forwarded from LMC0 to LMC1.
1474 		 */
1475 
1476 		ddr_dll_ctl3.s.dclk90_fwd = 1;
1477 		DRAM_CSR_WRITE(node, BDK_LMCX_DLL_CTL3(0),	ddr_dll_ctl3.u);
1478 
1479 		/*
1480 		 * 4. Read LMC0_DLL_CTL3 and wait for the result.
1481 		 */
1482 
1483 		BDK_CSR_READ(node, BDK_LMCX_DLL_CTL3(0));
1484             } /* if (ddr_interface_mask == 0x3) */
1485 
1486             if (ddr_interface_mask == 0xf) {
1487 		/*
1488 		 * 6.9.5.2 LMC CK Local Initialization for Four-LMC Mode
1489 		 *
1490 		 * 1. Write LMC2_DLL_CTL3 to its reset value except
1491 		 *    LMC2_DLL_CTL3[DLL90_BYTE_SEL] = 0x7.
1492 		 */
1493 
1494 		ddr_dll_ctl3.u = 0;
1495 		ddr_dll_ctl3.s.dclk90_recal_dis = 1;
1496 		ddr_dll_ctl3.s.dll90_byte_sel = 7;
1497 		DRAM_CSR_WRITE(node, BDK_LMCX_DLL_CTL3(2),	ddr_dll_ctl3.u);
1498 
1499 		/*
1500 		 * 2. Write LMC3_DLL_CTL3 to its reset value except
1501 		 *    LMC3_DLL_CTL3[DLL90_BYTE_SEL] = 0x0.
1502 		 */
1503 
1504 		ddr_dll_ctl3.u = 0;
1505 		ddr_dll_ctl3.s.dclk90_recal_dis = 1;
1506 		ddr_dll_ctl3.s.dll90_byte_sel = 0; /* HRM wants 0, not 2 */
1507 		DRAM_CSR_WRITE(node, BDK_LMCX_DLL_CTL3(3),	ddr_dll_ctl3.u); /* HRM wants LMC3 */
1508 
1509 		/*
1510 		 * 3. Read LMC3_DLL_CTL3 and wait for the result.
1511 		 */
1512 
1513 		BDK_CSR_READ(node, BDK_LMCX_DLL_CTL3(3));
1514 
1515 		/*
1516 		 * 4. Without changing any other fields in LMC2_DLL_CTL3, write
1517 		 *    LMC2_DLL_CTL3[DCLK90_FWD] = 1 and LMC2_DLL_CTL3[DCLK90_RECAL_DIS] = 1.
1518 		 *    Writing LMC2_DLL_CTL3[DCLK90_FWD] = 1 causes LMC 2 to forward
1519 		 *    clock-delay information to LMC0. Setting
1520 		 *    LMC2_DLL_CTL3[DCLK90_RECAL_DIS] to 1 prevents LMC2 from periodically
1521 		 *    recalibrating this delay information.
1522 		 */
1523 
1524 		DRAM_CSR_MODIFY(c, node, BDK_LMCX_DLL_CTL3(2),
1525 				c.s.dclk90_fwd = 1;
1526 				c.s.dclk90_recal_dis = 1);
1527 
1528 		/*
1529 		 * 5. Without changing any other fields in LMC3_DLL_CTL3, write
1530 		 *    LMC3_DLL_CTL3[DCLK90_FWD] = 1 and LMC3_DLL_CTL3[DCLK90_RECAL_DIS] = 1.
1531 		 *    Writing LMC3_DLL_CTL3[DCLK90_FWD] = 1 causes LMC3 to forward
1532 		 *    clock-delay information to LMC1. Setting
1533 		 *    LMC3_DLL_CTL3[DCLK90_RECAL_DIS] to 1 prevents LMC3 from periodically
1534 		 *    recalibrating this delay information.
1535 		 */
1536 
1537 		DRAM_CSR_MODIFY(c, node, BDK_LMCX_DLL_CTL3(3),
1538 				c.s.dclk90_fwd = 1;
1539 				c.s.dclk90_recal_dis = 1);
1540 
1541 		/*
1542 		 * 6. Read LMC3_DLL_CTL3 and wait for the result.
1543 		 */
1544 
1545 		BDK_CSR_READ(node, BDK_LMCX_DLL_CTL3(3));
1546             } /* if (ddr_interface_mask == 0xf) */
1547 
1548 
1549             /* ONE-LMC MODE AFTER STEP 5 - NOTHING */
1550 
1551             /* TWO-LMC MODE AFTER STEP 5 */
1552             if (ddr_interface_mask == 0x3) {
1553                 cn78xx_lmc_dreset_init(node, 1);
1554             }
1555 
1556             /* FOUR-LMC MODE AFTER STEP 5 */
1557             if (ddr_interface_mask == 0xf) {
1558                 cn78xx_lmc_dreset_init(node, 0);
1559                 cn78xx_lmc_dreset_init(node, 1);
1560 
1561                 /* Enable periodic recalibration of DDR90 delay line in. */
1562 		DRAM_CSR_MODIFY(c, node, BDK_LMCX_DLL_CTL3(0),
1563 				c.s.dclk90_recal_dis = 0);
1564 		DRAM_CSR_MODIFY(c, node, BDK_LMCX_DLL_CTL3(1),
1565 				c.s.dclk90_recal_dis = 0);
1566             }
1567 
1568 
1569             /* Enable fine tune mode for all LMCs */
1570             for (int lmc = 0; lmc<4; ++lmc) {
1571                 if ((ddr_interface_mask & (1 << lmc)) == 0)
1572                     continue;
1573 		DRAM_CSR_MODIFY(c, node, BDK_LMCX_DLL_CTL3(lmc),
1574 				c.s.fine_tune_mode = 1);
1575             }
1576 
1577             /* Enable the trim circuit on the appropriate channels to
1578                adjust the DDR clock duty cycle for chips that support
1579                it. */
1580             if (! CAVIUM_IS_MODEL(CAVIUM_CN88XX_PASS1_X)) { // added 81xx and 83xx
1581                 bdk_lmcx_phy_ctl_t lmc_phy_ctl;
1582                 int loop_interface_num;
1583 
1584                 for (loop_interface_num = 0; loop_interface_num<4; ++loop_interface_num) {
1585                     if ((ddr_interface_mask & (1 << loop_interface_num)) == 0)
1586                         continue;
1587 
1588                     lmc_phy_ctl.u = BDK_CSR_READ(node, BDK_LMCX_PHY_CTL(loop_interface_num));
1589                     lmc_phy_ctl.cn83xx.lv_mode = (~loop_interface_num) & 1; /* Odd LMCs = 0, Even LMCs = 1 */
1590 
1591                     ddr_print("LMC%d: PHY_CTL                                 : 0x%016llx\n",
1592                               loop_interface_num, lmc_phy_ctl.u);
1593                     DRAM_CSR_WRITE(node, BDK_LMCX_PHY_CTL(loop_interface_num), lmc_phy_ctl.u);
1594                 }
1595             }
1596 
1597         } /* Do this once */
1598 
1599     } /* if (CAVIUM_IS_MODEL(CAVIUM_CN8XXX)) */
1600 
1601     set_ddr_clock_initialized(node, ddr_interface_num, 1);
1602     return(0);
1603 }
1604 void
perform_lmc_reset(bdk_node_t node,int ddr_interface_num)1605 perform_lmc_reset(bdk_node_t node, int ddr_interface_num)
1606 {
1607     /*
1608      * 6.9.6 LMC RESET Initialization
1609      *
1610      * The purpose of this step is to assert/deassert the RESET# pin at the
1611      * DDR3/DDR4 parts.
1612      *
1613      * This LMC RESET step is done for all enabled LMCs.
1614      *
1615      * It may be appropriate to skip this step if the DDR3/DDR4 DRAM parts
1616      * are in self refresh and are currently preserving their
1617      * contents. (Software can determine this via
1618      * LMC(0..3)_RESET_CTL[DDR3PSV] in some circumstances.) The remainder of
1619      * this section assumes that the DRAM contents need not be preserved.
1620      *
1621      * The remainder of this section assumes that the CN78XX DDRn_RESET_L pin
1622      * is attached to the RESET# pin of the attached DDR3/DDR4 parts, as will
1623      * be appropriate in many systems.
1624      *
1625      * (In other systems, such as ones that can preserve DDR3/DDR4 part
1626      * contents while CN78XX is powered down, it will not be appropriate to
1627      * directly attach the CN78XX DDRn_RESET_L pin to DRESET# of the
1628      * DDR3/DDR4 parts, and this section may not apply.)
1629      *
1630      * The remainder of this section describes the sequence for LMCn.
1631      *
1632      * Perform the following six substeps for LMC reset initialization:
1633      *
1634      * 1. If not done already, assert DDRn_RESET_L pin by writing
1635      *    LMC(0..3)_RESET_ CTL[DDR3RST] = 0 without modifying any other
1636      *    LMC(0..3)_RESET_CTL fields.
1637      */
1638 
1639     if ( !ddr_memory_preserved(node)) {
1640         /*
1641          * 2. Read LMC(0..3)_RESET_CTL and wait for the result.
1642          */
1643 
1644         BDK_CSR_READ(node, BDK_LMCX_RESET_CTL(ddr_interface_num));
1645 
1646         /*
1647          * 3. Wait until RESET# assertion-time requirement from JEDEC DDR3/DDR4
1648          *    specification is satisfied (200 us during a power-on ramp, 100ns when
1649          *    power is already stable).
1650          */
1651 
1652         bdk_wait_usec(200);
1653 
1654         /*
1655          * 4. Deassert DDRn_RESET_L pin by writing LMC(0..3)_RESET_CTL[DDR3RST] = 1
1656          *    without modifying any other LMC(0..3)_RESET_CTL fields.
1657          * 5. Read LMC(0..3)_RESET_CTL and wait for the result.
1658          * 6. Wait a minimum of 500us. This guarantees the necessary T = 500us
1659          *    delay between DDRn_RESET_L deassertion and DDRn_DIMM*_CKE* assertion.
1660          */
1661         cn88xx_lmc_ddr3_reset(node, ddr_interface_num, LMC_DDR3_RESET_DEASSERT);
1662 
1663         /* Toggle Reset Again */
1664         /* That is, assert, then de-assert, one more time */
1665         cn88xx_lmc_ddr3_reset(node, ddr_interface_num, LMC_DDR3_RESET_ASSERT);
1666         cn88xx_lmc_ddr3_reset(node, ddr_interface_num, LMC_DDR3_RESET_DEASSERT);
1667 
1668     } /* if ( !ddr_memory_preserved(node)) */
1669 }
1670 
1671 ///////////////////////////////////////////////////////////
1672 // start of DBI switchover
1673 
1674 /* first pattern example:
1675    GENERAL_PURPOSE0.DATA == 64'h00ff00ff00ff00ff;
1676    GENERAL_PURPOSE1.DATA == 64'h00ff00ff00ff00ff;
1677    GENERAL_PURPOSE0.DATA == 16'h0000;
1678 */
1679 const uint64_t dbi_pattern[3] = { 0x00ff00ff00ff00ffULL, 0x00ff00ff00ff00ffULL, 0x0000ULL };
1680 
1681 // Perform switchover to DBI
dbi_switchover_interface(int node,int lmc)1682 static void dbi_switchover_interface(int node, int lmc)
1683 {
1684     bdk_lmcx_modereg_params0_t modereg_params0;
1685     bdk_lmcx_modereg_params3_t modereg_params3;
1686     bdk_lmcx_phy_ctl_t phy_ctl;
1687     bdk_lmcx_config_t lmcx_config;
1688     bdk_lmcx_ddr_pll_ctl_t ddr_pll_ctl;
1689     int rank_mask, rankx, active_ranks;
1690     uint64_t phys_addr, rank_offset;
1691     int num_lmcs, errors;
1692     int dbi_settings[9], byte, unlocked, retries;
1693     int ecc_ena;
1694     int rank_max = 1; // FIXME: make this 4 to try all the ranks
1695 
1696     ddr_pll_ctl.u = BDK_CSR_READ(node, BDK_LMCX_DDR_PLL_CTL(0));
1697 
1698     lmcx_config.u = BDK_CSR_READ(node, BDK_LMCX_CONFIG(lmc));
1699     rank_mask = lmcx_config.s.init_status;
1700     ecc_ena = lmcx_config.s.ecc_ena;
1701 
1702     // FIXME: must filter out any non-supported configs
1703     //        ie, no DDR3, no x4 devices, no 81XX
1704     if ((ddr_pll_ctl.cn83xx.ddr4_mode == 0)  ||
1705         (lmcx_config.s.mode_x4dev == 1) ||
1706         CAVIUM_IS_MODEL(CAVIUM_CN81XX)      )
1707     {
1708         ddr_print("N%d.LMC%d: DBI switchover: inappropriate device; EXITING...\n",
1709                   node, lmc);
1710         return;
1711     }
1712 
1713     // this should be correct for 1 or 2 ranks, 1 or 2 DIMMs
1714     num_lmcs = __bdk_dram_get_num_lmc(node);
1715     rank_offset = 1ull << (28 + lmcx_config.s.pbank_lsb - lmcx_config.s.rank_ena + (num_lmcs/2));
1716 
1717     ddr_print("N%d.LMC%d: DBI switchover: rank mask 0x%x, rank size 0x%016llx.\n",
1718 	      node, lmc, rank_mask, (unsigned long long)rank_offset);
1719 
1720     /* 1. conduct the current init sequence as usual all the way
1721          after software write leveling.
1722      */
1723 
1724     read_DAC_DBI_settings(node, lmc, /*DBI*/0, dbi_settings);
1725 
1726     display_DAC_DBI_settings(node, lmc, /* DBI */0, ecc_ena, dbi_settings, " INIT");
1727 
1728    /* 2. set DBI related CSRs as below and issue MR write.
1729          MODEREG_PARAMS3.WR_DBI=1
1730          MODEREG_PARAMS3.RD_DBI=1
1731          PHY_CTL.DBI_MODE_ENA=1
1732     */
1733     modereg_params0.u = BDK_CSR_READ(node, BDK_LMCX_MODEREG_PARAMS0(lmc));
1734 
1735     modereg_params3.u = BDK_CSR_READ(node, BDK_LMCX_MODEREG_PARAMS3(lmc));
1736     modereg_params3.s.wr_dbi = 1;
1737     modereg_params3.s.rd_dbi = 1;
1738     DRAM_CSR_WRITE(node, BDK_LMCX_MODEREG_PARAMS3(lmc), modereg_params3.u);
1739 
1740     phy_ctl.u = BDK_CSR_READ(node, BDK_LMCX_PHY_CTL(lmc));
1741     phy_ctl.s.dbi_mode_ena = 1;
1742     DRAM_CSR_WRITE(node, BDK_LMCX_PHY_CTL(lmc), phy_ctl.u);
1743 
1744     /*
1745         there are two options for data to send.  Lets start with (1) and could move to (2) in the future:
1746 
1747         1) DBTRAIN_CTL[LFSR_PATTERN_SEL] = 0 (or for older chips where this does not exist)
1748            set data directly in these reigsters.  this will yield a clk/2 pattern:
1749            GENERAL_PURPOSE0.DATA == 64'h00ff00ff00ff00ff;
1750            GENERAL_PURPOSE1.DATA == 64'h00ff00ff00ff00ff;
1751            GENERAL_PURPOSE0.DATA == 16'h0000;
1752         2) DBTRAIN_CTL[LFSR_PATTERN_SEL] = 1
1753            here data comes from the LFSR generating a PRBS pattern
1754            CHAR_CTL.EN = 0
1755            CHAR_CTL.SEL = 0; // for PRBS
1756            CHAR_CTL.DR = 1;
1757            CHAR_CTL.PRBS = setup for whatever type of PRBS to send
1758            CHAR_CTL.SKEW_ON = 1;
1759     */
1760     DRAM_CSR_WRITE(node, BDK_LMCX_GENERAL_PURPOSE0(lmc), dbi_pattern[0]);
1761     DRAM_CSR_WRITE(node, BDK_LMCX_GENERAL_PURPOSE1(lmc), dbi_pattern[1]);
1762     DRAM_CSR_WRITE(node, BDK_LMCX_GENERAL_PURPOSE2(lmc), dbi_pattern[2]);
1763 
1764     /*
1765       3. adjust cas_latency (only necessary if RD_DBI is set).
1766          here is my code for doing this:
1767 
1768          if (csr_model.MODEREG_PARAMS3.RD_DBI.value == 1) begin
1769            case (csr_model.MODEREG_PARAMS0.CL.value)
1770              0,1,2,3,4: csr_model.MODEREG_PARAMS0.CL.value += 2; // CL 9-13 -> 11-15
1771              5: begin
1772                 // CL=14, CWL=10,12 gets +2, CLW=11,14 gets +3
1773                 if((csr_model.MODEREG_PARAMS0.CWL.value==1 || csr_model.MODEREG_PARAMS0.CWL.value==3))
1774                   csr_model.MODEREG_PARAMS0.CL.value = 7; // 14->16
1775                 else
1776                   csr_model.MODEREG_PARAMS0.CL.value = 13; // 14->17
1777                 end
1778              6: csr_model.MODEREG_PARAMS0.CL.value = 8; // 15->18
1779              7: csr_model.MODEREG_PARAMS0.CL.value = 14; // 16->19
1780              8: csr_model.MODEREG_PARAMS0.CL.value = 15; // 18->21
1781              default:
1782              `cn_fatal(("Error mem_cfg (%s) CL (%d) with RD_DBI=1, I am not sure what to do.",
1783                         mem_cfg, csr_model.MODEREG_PARAMS3.RD_DBI.value))
1784            endcase
1785         end
1786     */
1787     if (modereg_params3.s.rd_dbi == 1) {
1788         int old_cl, new_cl, old_cwl;
1789 
1790         old_cl  = modereg_params0.s.cl;
1791         old_cwl = modereg_params0.s.cwl;
1792 
1793         switch (old_cl) {
1794         case 0: case 1: case 2: case 3: case 4: new_cl = old_cl + 2; break; // 9-13->11-15
1795         // CL=14, CWL=10,12 gets +2, CLW=11,14 gets +3
1796         case 5: new_cl = ((old_cwl == 1) || (old_cwl == 3)) ? 7 : 13; break;
1797         case 6: new_cl =  8; break; // 15->18
1798         case 7: new_cl = 14; break; // 16->19
1799         case 8: new_cl = 15; break; // 18->21
1800         default:
1801             error_print("ERROR: Bad CL value (%d) for DBI switchover.\n", old_cl);
1802             // FIXME: need to error exit here...
1803             old_cl = -1;
1804             new_cl = -1;
1805             break;
1806         }
1807         ddr_print("N%d.LMC%d: DBI switchover: CL ADJ: old_cl 0x%x, old_cwl 0x%x, new_cl 0x%x.\n",
1808                   node, lmc, old_cl, old_cwl, new_cl);
1809         modereg_params0.s.cl = new_cl;
1810         DRAM_CSR_WRITE(node, BDK_LMCX_MODEREG_PARAMS0(lmc), modereg_params0.u);
1811     }
1812 
1813     /*
1814       4. issue MRW to MR0 (CL) and MR5 (DBI), using LMC sequence SEQ_CTL[SEQ_SEL] = MRW.
1815      */
1816     // Use the default values, from the CSRs fields
1817     // also, do B-sides for RDIMMs...
1818 
1819     for (rankx = 0; rankx < 4; rankx++) {
1820         if (!(rank_mask & (1 << rankx)))
1821             continue;
1822 
1823         // for RDIMMs, B-side writes should get done automatically when the A-side is written
1824         ddr4_mrw(node, lmc, rankx, -1/* use_default*/,   0/*MRreg*/, 0 /*A-side*/); /* MR0 */
1825         ddr4_mrw(node, lmc, rankx, -1/* use_default*/,   5/*MRreg*/, 0 /*A-side*/); /* MR5 */
1826 
1827     } /* for (rankx = 0; rankx < 4; rankx++) */
1828 
1829     /*
1830       5. conduct DBI bit deskew training via the General Purpose R/W sequence (dbtrain).
1831          may need to run this over and over to get a lock (I need up to 5 in simulation):
1832          SEQ_CTL[SEQ_SEL] = RW_TRAINING (15)
1833          DBTRAIN_CTL.CMD_COUNT_EXT = all 1's
1834          DBTRAIN_CTL.READ_CMD_COUNT = all 1's
1835          DBTRAIN_CTL.TCCD_SEL = set according to MODEREG_PARAMS3[TCCD_L]
1836          DBTRAIN_CTL.RW_TRAIN = 1
1837          DBTRAIN_CTL.READ_DQ_COUNT = dont care
1838          DBTRAIN_CTL.WRITE_ENA = 1;
1839          DBTRAIN_CTL.ACTIVATE = 1;
1840          DBTRAIN_CTL LRANK, PRANK, ROW_A, BG, BA, COLUMN_A = set to a valid address
1841      */
1842 
1843     // NOW - do the training
1844     ddr_print("N%d.LMC%d: DBI switchover: TRAINING begins...\n",
1845                   node, lmc);
1846 
1847     active_ranks = 0;
1848     for (rankx = 0; rankx < rank_max; rankx++) {
1849         if (!(rank_mask & (1 << rankx)))
1850             continue;
1851 
1852         phys_addr = rank_offset * active_ranks;
1853         // FIXME: now done by test_dram_byte_hw()
1854         //phys_addr |= (lmc << 7);
1855         //phys_addr = bdk_numa_get_address(node, phys_addr); // map to node
1856 
1857         active_ranks++;
1858 
1859         retries = 0;
1860 
1861 #if 0
1862         phy_ctl.u = BDK_CSR_READ(node, BDK_LMCX_PHY_CTL(lmc));
1863         phy_ctl.s.phy_reset = 1; // FIXME: this may reset too much?
1864         DRAM_CSR_WRITE(node, BDK_LMCX_PHY_CTL(lmc), phy_ctl.u);
1865 #endif
1866 
1867 restart_training:
1868 
1869         // NOTE: return is a bitmask of the erroring bytelanes - we only print it
1870         errors = test_dram_byte_hw(node, lmc, phys_addr, DBTRAIN_DBI, NULL);
1871 
1872         ddr_print("N%d.LMC%d: DBI switchover: TEST: rank %d, phys_addr 0x%llx, errors 0x%x.\n",
1873                   node, lmc, rankx, phys_addr, errors);
1874 
1875         // NEXT - check for locking
1876         unlocked = 0;
1877         read_DAC_DBI_settings(node, lmc, /*DBI*/0, dbi_settings);
1878 
1879         for (byte = 0; byte < (8+ecc_ena); byte++) {
1880             unlocked += (dbi_settings[byte] & 1) ^ 1;
1881         }
1882 
1883         // FIXME: print out the DBI settings array after each rank?
1884         if (rank_max > 1) // only when doing more than 1 rank
1885             display_DAC_DBI_settings(node, lmc, /* DBI */0, ecc_ena, dbi_settings, " RANK");
1886 
1887         if (unlocked > 0) {
1888             ddr_print("N%d.LMC%d: DBI switchover: LOCK: %d still unlocked.\n",
1889                   node, lmc, unlocked);
1890 
1891             retries++;
1892             if (retries < 10) {
1893                 goto restart_training;
1894             } else {
1895                 ddr_print("N%d.LMC%d: DBI switchover: LOCK: %d retries exhausted.\n",
1896                           node, lmc, retries);
1897             }
1898         }
1899     } /* for (rankx = 0; rankx < rank_max; rankx++) */
1900 
1901     // print out the final DBI settings array
1902     display_DAC_DBI_settings(node, lmc, /* DBI */0, ecc_ena, dbi_settings, "FINAL");
1903 }
1904 // end of DBI switchover
1905 ///////////////////////////////////////////////////////////
1906 
measure_octeon_ddr_clock(bdk_node_t node,const ddr_configuration_t * ddr_configuration,uint32_t cpu_hertz,uint32_t ddr_hertz,uint32_t ddr_ref_hertz,int ddr_interface_num,uint32_t ddr_interface_mask)1907 static uint32_t measure_octeon_ddr_clock(bdk_node_t node,
1908 				  const ddr_configuration_t *ddr_configuration,
1909 				  uint32_t cpu_hertz,
1910 				  uint32_t ddr_hertz,
1911 				  uint32_t ddr_ref_hertz,
1912 				  int ddr_interface_num,
1913 				  uint32_t ddr_interface_mask)
1914 {
1915 	uint64_t core_clocks;
1916 	uint64_t ddr_clocks;
1917 	uint64_t calc_ddr_hertz;
1918 
1919 	if (ddr_configuration) {
1920 	    if (initialize_ddr_clock(node,
1921 				     ddr_configuration,
1922 				     cpu_hertz,
1923 				     ddr_hertz,
1924 				     ddr_ref_hertz,
1925 				     ddr_interface_num,
1926 				     ddr_interface_mask) != 0)
1927 		return 0;
1928 	}
1929 
1930 	/* Dynamically determine the DDR clock speed */
1931         core_clocks = bdk_clock_get_count(BDK_CLOCK_TIME);
1932         ddr_clocks = BDK_CSR_READ(node, BDK_LMCX_DCLK_CNT(ddr_interface_num));
1933         bdk_wait_usec(100000); /* 100ms */
1934         ddr_clocks = BDK_CSR_READ(node, BDK_LMCX_DCLK_CNT(ddr_interface_num)) - ddr_clocks;
1935         core_clocks = bdk_clock_get_count(BDK_CLOCK_TIME) - core_clocks;
1936         calc_ddr_hertz = ddr_clocks * bdk_clock_get_rate(bdk_numa_local(), BDK_CLOCK_TIME) / core_clocks;
1937 
1938 	ddr_print("LMC%d: Measured DDR clock: %llu, cpu clock: %u, ddr clocks: %llu\n",
1939 		  ddr_interface_num, calc_ddr_hertz, cpu_hertz, ddr_clocks);
1940 
1941 	/* Check for unreasonable settings. */
1942 	if (calc_ddr_hertz == 0) {
1943 	    error_print("DDR clock misconfigured. Exiting.\n");
1944 	    /* FIXME(dhendrix): We don't exit() in coreboot */
1945 //	    exit(1);
1946 	}
1947 	return calc_ddr_hertz;
1948 }
1949 
octeon_ddr_initialize(bdk_node_t node,uint32_t cpu_hertz,uint32_t ddr_hertz,uint32_t ddr_ref_hertz,uint32_t ddr_interface_mask,const ddr_configuration_t * ddr_configuration,uint32_t * measured_ddr_hertz,int board_type,int board_rev_maj,int board_rev_min)1950 int octeon_ddr_initialize(bdk_node_t node,
1951 			  uint32_t cpu_hertz,
1952 			  uint32_t ddr_hertz,
1953 			  uint32_t ddr_ref_hertz,
1954 			  uint32_t ddr_interface_mask,
1955 			  const ddr_configuration_t *ddr_configuration,
1956 			  uint32_t *measured_ddr_hertz,
1957 			  int board_type,
1958 			  int board_rev_maj,
1959 			  int board_rev_min)
1960 {
1961     uint32_t ddr_config_valid_mask = 0;
1962     int memsize_mbytes = 0;
1963     const char *s;
1964     int retval;
1965     int interface_index;
1966     uint32_t ddr_max_speed = 1210000000; /* needs to be this high for DDR4 */
1967     uint32_t calc_ddr_hertz = -1;
1968 
1969 #ifndef OCTEON_SDK_VERSION_STRING
1970 # define OCTEON_SDK_VERSION_STRING "Development Build"
1971 #endif
1972 
1973     ddr_print(OCTEON_SDK_VERSION_STRING": $Revision: 102369 $\n");
1974 
1975 #ifdef CAVIUM_ONLY
1976     /* Override speed restrictions to support internal testing. */
1977     ddr_max_speed = 1210000000;
1978 #endif  /* CAVIUM_ONLY */
1979 
1980     if (ddr_hertz > ddr_max_speed) {
1981 	error_print("DDR clock speed %u exceeds maximum speed supported by "
1982 		    "processor, reducing to %uHz\n",
1983 		    ddr_hertz, ddr_max_speed);
1984 	ddr_hertz = ddr_max_speed;
1985     }
1986 
1987     // Do this earlier so we can return without doing unnecessary things...
1988     /* Check for DIMM 0 socket populated for each LMC present */
1989     for (interface_index = 0; interface_index < 4; ++interface_index) {
1990 	if ((ddr_interface_mask & (1 << interface_index)) &&
1991 	    (validate_dimm(node, &ddr_configuration[interface_index].dimm_config_table[0])) == 1)
1992 	{
1993 	    ddr_config_valid_mask |= (1 << interface_index);
1994 	}
1995     }
1996 
1997     if (CAVIUM_IS_MODEL(CAVIUM_CN88XX)) {
1998 	int four_lmc_mode = 1;
1999 
2000         // Validate that it can only be 2-LMC mode or 4-LMC mode
2001         if ((ddr_config_valid_mask != 0x03) && (ddr_config_valid_mask != 0x0f)) {
2002             puts("ERROR: Invalid LMC configuration detected.\n");
2003             return -1;
2004         }
2005 
2006 	if ((s = lookup_env_parameter("ddr_four_lmc")) != NULL)
2007 	    four_lmc_mode = !!strtoul(s, NULL, 0);
2008 
2009 	if (!four_lmc_mode) {
2010 	    puts("Forcing two-LMC Mode.\n");
2011 	    ddr_config_valid_mask &= ~(3<<2); /* Invalidate LMC[2:3] */
2012 	}
2013     }
2014 
2015     if (!ddr_config_valid_mask) {
2016 	puts("ERROR: No valid DIMMs detected on any DDR interface.\n");
2017 	return -1;
2018     }
2019 
2020     {
2021 	/*
2022 
2023 	rdf_cnt: Defines the sample point of the LMC response data in
2024 	the DDR-clock/core-clock crossing.  For optimal
2025 	performance set to 10 * (DDR-clock period/core-clock
2026 	period) - 1.  To disable set to 0. All other values
2027 	are reserved.
2028 	*/
2029 
2030 	uint64_t rdf_cnt;
2031 	BDK_CSR_INIT(l2c_ctl, node, BDK_L2C_CTL);
2032 	/* It is more convenient to compute the ratio using clock
2033 	   frequencies rather than clock periods. */
2034 	rdf_cnt = (((uint64_t) 10 * cpu_hertz) / ddr_hertz) - 1;
2035 	rdf_cnt = rdf_cnt<256 ? rdf_cnt : 255;
2036 	l2c_ctl.s.rdf_cnt = rdf_cnt;
2037 
2038 	if ((s = lookup_env_parameter("early_fill_count")) != NULL)
2039 	    l2c_ctl.s.rdf_cnt = strtoul(s, NULL, 0);
2040 
2041 	ddr_print("%-45s : %d, cpu_hertz:%u, ddr_hertz:%u\n", "EARLY FILL COUNT  ",
2042 		  l2c_ctl.s.rdf_cnt, cpu_hertz, ddr_hertz);
2043 	DRAM_CSR_WRITE(node, BDK_L2C_CTL, l2c_ctl.u);
2044     }
2045 
2046     /* Check to see if we should limit the number of L2 ways. */
2047     if ((s = lookup_env_parameter("limit_l2_ways")) != NULL) {
2048         int ways = strtoul(s, NULL, 10);
2049 	limit_l2_ways(node, ways, 1);
2050     }
2051 
2052     /* We measure the DDR frequency by counting DDR clocks.  We can
2053      * confirm or adjust the expected frequency as necessary.  We use
2054      * the measured frequency to make accurate timing calculations
2055      * used to configure the controller.
2056      */
2057     for (interface_index = 0; interface_index < 4; ++interface_index) {
2058 	uint32_t tmp_hertz;
2059 
2060 	if (! (ddr_config_valid_mask & (1 << interface_index)))
2061 	    continue;
2062 
2063     try_again:
2064         // if we are LMC0
2065         if (interface_index == 0) {
2066             // if we are asking for 100 MHz refclk, we can only get it via alternate, so switch to it
2067             if (ddr_ref_hertz == 100000000) {
2068                 DRAM_CSR_MODIFY(c, node, BDK_LMCX_DDR_PLL_CTL(0), c.s.dclk_alt_refclk_sel = 1);
2069                 bdk_wait_usec(1000); // wait 1 msec
2070             } else {
2071                 // if we are NOT asking for 100MHz, then reset to (assumed) 50MHz and go on
2072                 DRAM_CSR_MODIFY(c, node, BDK_LMCX_DDR_PLL_CTL(0), c.s.dclk_alt_refclk_sel = 0);
2073                 bdk_wait_usec(1000); // wait 1 msec
2074             }
2075         }
2076 
2077 	tmp_hertz = measure_octeon_ddr_clock(node,
2078 					     &ddr_configuration[interface_index],
2079 					     cpu_hertz,
2080 					     ddr_hertz,
2081 					     ddr_ref_hertz,
2082 					     interface_index,
2083 					     ddr_config_valid_mask);
2084 
2085         // if we are LMC0 and we are asked for 100 MHz refclk,
2086         // we must be sure it is available
2087         // If not, we print an error message, set to 50MHz, and go on...
2088         if ((interface_index == 0) && (ddr_ref_hertz == 100000000)) {
2089             // validate that the clock returned is close enough to the clock desired
2090             // FIXME: is 5% close enough?
2091             int hertz_diff = _abs((int)tmp_hertz - (int)ddr_hertz);
2092             if (hertz_diff > ((int)ddr_hertz * 5 / 100)) { // nope, diff is greater than than 5%
2093                 ddr_print("N%d: DRAM init: requested 100 MHz refclk NOT FOUND\n", node);
2094                 ddr_ref_hertz = bdk_clock_get_rate(node, BDK_CLOCK_MAIN_REF);
2095                 set_ddr_clock_initialized(node, 0, 0); // clear the flag before trying again!!
2096                 goto try_again;
2097             } else {
2098                 ddr_print("N%d: DRAM Init: requested 100 MHz refclk FOUND and SELECTED.\n", node);
2099             }
2100         }
2101 
2102 	if (tmp_hertz > 0)
2103 	    calc_ddr_hertz = tmp_hertz;
2104 
2105     } /* for (interface_index = 0; interface_index < 4; ++interface_index) */
2106 
2107     if (measured_ddr_hertz)
2108 	*measured_ddr_hertz = calc_ddr_hertz;
2109 
2110     memsize_mbytes = 0;
2111     for (interface_index = 0; interface_index < 4; ++interface_index) {
2112 	if (! (ddr_config_valid_mask & (1 << interface_index))) { // if LMC has no DIMMs found
2113             if (ddr_interface_mask & (1 << interface_index)) { // but the LMC is present
2114                 for (int i = 0; i < DDR_CFG_T_MAX_DIMMS; i++) {
2115                     // check for slot presence
2116                     if (validate_dimm(node, &ddr_configuration[interface_index].dimm_config_table[i]) == 0)
2117                         printf("N%d.LMC%d.DIMM%d: Not Present\n", node, interface_index, i);
2118                 }
2119                 error_print("N%d.LMC%d Configuration Completed: 0 MB\n", node, interface_index);
2120             }
2121 	    continue;
2122         }
2123 
2124 	retval = init_octeon_dram_interface(node,
2125 					    &ddr_configuration[interface_index],
2126 					    calc_ddr_hertz, /* Configure using measured value */
2127 					    cpu_hertz,
2128 					    ddr_ref_hertz,
2129 					    board_type,
2130 					    board_rev_maj,
2131 					    board_rev_min,
2132 					    interface_index,
2133 					    ddr_config_valid_mask);
2134 	if (retval > 0)
2135 	    memsize_mbytes += retval;
2136     }
2137 
2138     if (memsize_mbytes == 0)
2139 	/* All interfaces failed to initialize, so return error */
2140 	return -1;
2141 
2142     // switch over to DBI mode only for chips that support it, and enabled by envvar
2143     if (! CAVIUM_IS_MODEL(CAVIUM_CN88XX_PASS1_X)) { // added 81xx and 83xx
2144         int do_dbi = 0;
2145         if ((s = lookup_env_parameter("ddr_dbi_switchover")) != NULL) {
2146             do_dbi = !!strtoul(s, NULL, 10);
2147         }
2148         if (do_dbi) {
2149             ddr_print("DBI Switchover starting...\n");
2150             for (interface_index = 0; interface_index < 4; ++interface_index) {
2151                 if (! (ddr_config_valid_mask & (1 << interface_index)))
2152                     continue;
2153                 dbi_switchover_interface(node, interface_index);
2154             }
2155             printf("DBI Switchover finished.\n");
2156         }
2157     }
2158 
2159     // limit memory size if desired...
2160     if ((s = lookup_env_parameter("limit_dram_mbytes")) != NULL) {
2161 	unsigned int mbytes = strtoul(s, NULL, 10);
2162 	if (mbytes > 0) {
2163 	    memsize_mbytes = mbytes;
2164 	    printf("Limiting DRAM size to %d MBytes based on limit_dram_mbytes env. variable\n",
2165 		   mbytes);
2166 	}
2167     }
2168 
2169     return memsize_mbytes;
2170 }
2171 
2172