1 // SPDX-License-Identifier: GPL-2.0
2 /*
3 * Driver for Intel client SoC with integrated memory controller using IBECC
4 *
5 * Copyright (C) 2020 Intel Corporation
6 *
7 * The In-Band ECC (IBECC) IP provides ECC protection to all or specific
8 * regions of the physical memory space. It's used for memory controllers
9 * that don't support the out-of-band ECC which often needs an additional
10 * storage device to each channel for storing ECC data.
11 */
12
13 #include <linux/module.h>
14 #include <linux/init.h>
15 #include <linux/pci.h>
16 #include <linux/slab.h>
17 #include <linux/irq_work.h>
18 #include <linux/llist.h>
19 #include <linux/genalloc.h>
20 #include <linux/edac.h>
21 #include <linux/bits.h>
22 #include <linux/io.h>
23 #include <asm/mach_traps.h>
24 #include <asm/nmi.h>
25 #include <asm/mce.h>
26
27 #include "edac_mc.h"
28 #include "edac_module.h"
29
30 #define IGEN6_REVISION "v2.5.1"
31
32 #define EDAC_MOD_STR "igen6_edac"
33 #define IGEN6_NMI_NAME "igen6_ibecc"
34
35 /* Debug macros */
36 #define igen6_printk(level, fmt, arg...) \
37 edac_printk(level, "igen6", fmt, ##arg)
38
39 #define igen6_mc_printk(mci, level, fmt, arg...) \
40 edac_mc_chipset_printk(mci, level, "igen6", fmt, ##arg)
41
42 #define GET_BITFIELD(v, lo, hi) (((v) & GENMASK_ULL(hi, lo)) >> (lo))
43
44 #define NUM_IMC 2 /* Max memory controllers */
45 #define NUM_CHANNELS 2 /* Max channels */
46 #define NUM_DIMMS 2 /* Max DIMMs per channel */
47
48 #define _4GB BIT_ULL(32)
49
50 /* Size of physical memory */
51 #define TOM_OFFSET 0xa0
52 /* Top of low usable DRAM */
53 #define TOLUD_OFFSET 0xbc
54 /* Capability register C */
55 #define CAPID_C_OFFSET 0xec
56 #define CAPID_C_IBECC BIT(15)
57
58 /* Capability register E */
59 #define CAPID_E_OFFSET 0xf0
60 #define CAPID_E_IBECC BIT(12)
61 #define CAPID_E_IBECC_BIT18 BIT(18)
62
63 /* Error Status */
64 #define ERRSTS_OFFSET 0xc8
65 #define ERRSTS_CE BIT_ULL(6)
66 #define ERRSTS_UE BIT_ULL(7)
67
68 /* Error Command */
69 #define ERRCMD_OFFSET 0xca
70 #define ERRCMD_CE BIT_ULL(6)
71 #define ERRCMD_UE BIT_ULL(7)
72
73 /* IBECC MMIO base address */
74 #define IBECC_BASE (res_cfg->ibecc_base)
75 #define IBECC_ACTIVATE_OFFSET IBECC_BASE
76 #define IBECC_ACTIVATE_EN BIT(0)
77
78 /* IBECC error log */
79 #define ECC_ERROR_LOG_OFFSET (IBECC_BASE + res_cfg->ibecc_error_log_offset)
80 #define ECC_ERROR_LOG_CE BIT_ULL(62)
81 #define ECC_ERROR_LOG_UE BIT_ULL(63)
82 #define ECC_ERROR_LOG_ADDR_SHIFT 5
83 #define ECC_ERROR_LOG_ADDR(v) GET_BITFIELD(v, 5, 38)
84 #define ECC_ERROR_LOG_ADDR45(v) GET_BITFIELD(v, 5, 45)
85 #define ECC_ERROR_LOG_SYND(v) GET_BITFIELD(v, 46, 61)
86
87 /* Host MMIO base address */
88 #define MCHBAR_OFFSET 0x48
89 #define MCHBAR_EN BIT_ULL(0)
90 #define MCHBAR_BASE(v) (GET_BITFIELD(v, 16, 38) << 16)
91 #define MCHBAR_SIZE 0x10000
92
93 /* Parameters for the channel decode stage */
94 #define IMC_BASE (res_cfg->imc_base)
95 #define MAD_INTER_CHANNEL_OFFSET IMC_BASE
96 #define MAD_INTER_CHANNEL_DDR_TYPE(v) GET_BITFIELD(v, 0, 2)
97 #define MAD_INTER_CHANNEL_ECHM(v) GET_BITFIELD(v, 3, 3)
98 #define MAD_INTER_CHANNEL_CH_L_MAP(v) GET_BITFIELD(v, 4, 4)
99 #define MAD_INTER_CHANNEL_CH_S_SIZE(v) ((u64)GET_BITFIELD(v, 12, 19) << 29)
100
101 /* Parameters for DRAM decode stage */
102 #define MAD_INTRA_CH0_OFFSET (IMC_BASE + 4)
103 #define MAD_INTRA_CH_DIMM_L_MAP(v) GET_BITFIELD(v, 0, 0)
104
105 /* DIMM characteristics */
106 #define MAD_DIMM_CH0_OFFSET (IMC_BASE + 0xc)
107 #define MAD_DIMM_CH_DIMM_L_SIZE(v) ((u64)GET_BITFIELD(v, 0, 6) << 29)
108 #define MAD_DIMM_CH_DLW(v) GET_BITFIELD(v, 7, 8)
109 #define MAD_DIMM_CH_DIMM_S_SIZE(v) ((u64)GET_BITFIELD(v, 16, 22) << 29)
110 #define MAD_DIMM_CH_DSW(v) GET_BITFIELD(v, 24, 25)
111
112 /* Hash for memory controller selection */
113 #define MAD_MC_HASH_OFFSET (IMC_BASE + 0x1b8)
114 #define MAC_MC_HASH_LSB(v) GET_BITFIELD(v, 1, 3)
115
116 /* Hash for channel selection */
117 #define CHANNEL_HASH_OFFSET (IMC_BASE + 0x24)
118 /* Hash for enhanced channel selection */
119 #define CHANNEL_EHASH_OFFSET (IMC_BASE + 0x28)
120 #define CHANNEL_HASH_MASK(v) (GET_BITFIELD(v, 6, 19) << 6)
121 #define CHANNEL_HASH_LSB_MASK_BIT(v) GET_BITFIELD(v, 24, 26)
122 #define CHANNEL_HASH_MODE(v) GET_BITFIELD(v, 28, 28)
123
124 /* Parameters for memory slice decode stage */
125 #define MEM_SLICE_HASH_MASK(v) (GET_BITFIELD(v, 6, 19) << 6)
126 #define MEM_SLICE_HASH_LSB_MASK_BIT(v) GET_BITFIELD(v, 24, 26)
127
128 static struct res_config {
129 bool machine_check;
130 int num_imc;
131 u32 imc_base;
132 u32 cmf_base;
133 u32 cmf_size;
134 u32 ms_hash_offset;
135 u32 ibecc_base;
136 u32 ibecc_error_log_offset;
137 bool (*ibecc_available)(struct pci_dev *pdev);
138 /* Extract error address logged in IBECC */
139 u64 (*err_addr)(u64 ecclog);
140 /* Convert error address logged in IBECC to system physical address */
141 u64 (*err_addr_to_sys_addr)(u64 eaddr, int mc);
142 /* Convert error address logged in IBECC to integrated memory controller address */
143 u64 (*err_addr_to_imc_addr)(u64 eaddr, int mc);
144 } *res_cfg;
145
146 struct igen6_imc {
147 int mc;
148 struct mem_ctl_info *mci;
149 struct pci_dev *pdev;
150 struct device dev;
151 void __iomem *window;
152 u64 size;
153 u64 ch_s_size;
154 int ch_l_map;
155 u64 dimm_s_size[NUM_CHANNELS];
156 u64 dimm_l_size[NUM_CHANNELS];
157 int dimm_l_map[NUM_CHANNELS];
158 };
159
160 static struct igen6_pvt {
161 struct igen6_imc imc[NUM_IMC];
162 u64 ms_hash;
163 u64 ms_s_size;
164 int ms_l_map;
165 } *igen6_pvt;
166
167 /* The top of low usable DRAM */
168 static u32 igen6_tolud;
169 /* The size of physical memory */
170 static u64 igen6_tom;
171
172 struct decoded_addr {
173 int mc;
174 u64 imc_addr;
175 u64 sys_addr;
176 int channel_idx;
177 u64 channel_addr;
178 int sub_channel_idx;
179 u64 sub_channel_addr;
180 };
181
182 struct ecclog_node {
183 struct llist_node llnode;
184 int mc;
185 u64 ecclog;
186 };
187
188 /*
189 * In the NMI handler, the driver uses the lock-less memory allocator
190 * to allocate memory to store the IBECC error logs and links the logs
191 * to the lock-less list. Delay printk() and the work of error reporting
192 * to EDAC core in a worker.
193 */
194 #define ECCLOG_POOL_SIZE PAGE_SIZE
195 static LLIST_HEAD(ecclog_llist);
196 static struct gen_pool *ecclog_pool;
197 static char ecclog_buf[ECCLOG_POOL_SIZE];
198 static struct irq_work ecclog_irq_work;
199 static struct work_struct ecclog_work;
200
201 /* Compute die IDs for Elkhart Lake with IBECC */
202 #define DID_EHL_SKU5 0x4514
203 #define DID_EHL_SKU6 0x4528
204 #define DID_EHL_SKU7 0x452a
205 #define DID_EHL_SKU8 0x4516
206 #define DID_EHL_SKU9 0x452c
207 #define DID_EHL_SKU10 0x452e
208 #define DID_EHL_SKU11 0x4532
209 #define DID_EHL_SKU12 0x4518
210 #define DID_EHL_SKU13 0x451a
211 #define DID_EHL_SKU14 0x4534
212 #define DID_EHL_SKU15 0x4536
213
214 /* Compute die IDs for ICL-NNPI with IBECC */
215 #define DID_ICL_SKU8 0x4581
216 #define DID_ICL_SKU10 0x4585
217 #define DID_ICL_SKU11 0x4589
218 #define DID_ICL_SKU12 0x458d
219
220 /* Compute die IDs for Tiger Lake with IBECC */
221 #define DID_TGL_SKU 0x9a14
222
223 /* Compute die IDs for Alder Lake with IBECC */
224 #define DID_ADL_SKU1 0x4601
225 #define DID_ADL_SKU2 0x4602
226 #define DID_ADL_SKU3 0x4621
227 #define DID_ADL_SKU4 0x4641
228
229 /* Compute die IDs for Alder Lake-N with IBECC */
230 #define DID_ADL_N_SKU1 0x4614
231 #define DID_ADL_N_SKU2 0x4617
232 #define DID_ADL_N_SKU3 0x461b
233 #define DID_ADL_N_SKU4 0x461c
234 #define DID_ADL_N_SKU5 0x4673
235 #define DID_ADL_N_SKU6 0x4674
236 #define DID_ADL_N_SKU7 0x4675
237 #define DID_ADL_N_SKU8 0x4677
238 #define DID_ADL_N_SKU9 0x4678
239 #define DID_ADL_N_SKU10 0x4679
240 #define DID_ADL_N_SKU11 0x467c
241 #define DID_ADL_N_SKU12 0x4632
242
243 /* Compute die IDs for Raptor Lake-P with IBECC */
244 #define DID_RPL_P_SKU1 0xa706
245 #define DID_RPL_P_SKU2 0xa707
246 #define DID_RPL_P_SKU3 0xa708
247 #define DID_RPL_P_SKU4 0xa716
248 #define DID_RPL_P_SKU5 0xa718
249
250 /* Compute die IDs for Meteor Lake-PS with IBECC */
251 #define DID_MTL_PS_SKU1 0x7d21
252 #define DID_MTL_PS_SKU2 0x7d22
253 #define DID_MTL_PS_SKU3 0x7d23
254 #define DID_MTL_PS_SKU4 0x7d24
255
256 /* Compute die IDs for Meteor Lake-P with IBECC */
257 #define DID_MTL_P_SKU1 0x7d01
258 #define DID_MTL_P_SKU2 0x7d02
259 #define DID_MTL_P_SKU3 0x7d14
260
261 /* Compute die IDs for Arrow Lake-UH with IBECC */
262 #define DID_ARL_UH_SKU1 0x7d06
263 #define DID_ARL_UH_SKU2 0x7d20
264 #define DID_ARL_UH_SKU3 0x7d30
265
266 /* Compute die IDs for Panther Lake-H with IBECC */
267 #define DID_PTL_H_SKU1 0xb000
268 #define DID_PTL_H_SKU2 0xb001
269 #define DID_PTL_H_SKU3 0xb002
270
get_mchbar(struct pci_dev * pdev,u64 * mchbar)271 static int get_mchbar(struct pci_dev *pdev, u64 *mchbar)
272 {
273 union {
274 u64 v;
275 struct {
276 u32 v_lo;
277 u32 v_hi;
278 };
279 } u;
280
281 if (pci_read_config_dword(pdev, MCHBAR_OFFSET, &u.v_lo)) {
282 igen6_printk(KERN_ERR, "Failed to read lower MCHBAR\n");
283 return -ENODEV;
284 }
285
286 if (pci_read_config_dword(pdev, MCHBAR_OFFSET + 4, &u.v_hi)) {
287 igen6_printk(KERN_ERR, "Failed to read upper MCHBAR\n");
288 return -ENODEV;
289 }
290
291 if (!(u.v & MCHBAR_EN)) {
292 igen6_printk(KERN_ERR, "MCHBAR is disabled\n");
293 return -ENODEV;
294 }
295
296 *mchbar = MCHBAR_BASE(u.v);
297
298 return 0;
299 }
300
ehl_ibecc_available(struct pci_dev * pdev)301 static bool ehl_ibecc_available(struct pci_dev *pdev)
302 {
303 u32 v;
304
305 if (pci_read_config_dword(pdev, CAPID_C_OFFSET, &v))
306 return false;
307
308 return !!(CAPID_C_IBECC & v);
309 }
310
ehl_err_addr_to_sys_addr(u64 eaddr,int mc)311 static u64 ehl_err_addr_to_sys_addr(u64 eaddr, int mc)
312 {
313 return eaddr;
314 }
315
ehl_err_addr_to_imc_addr(u64 eaddr,int mc)316 static u64 ehl_err_addr_to_imc_addr(u64 eaddr, int mc)
317 {
318 if (eaddr < igen6_tolud)
319 return eaddr;
320
321 if (igen6_tom <= _4GB)
322 return eaddr + igen6_tolud - _4GB;
323
324 if (eaddr >= igen6_tom)
325 return eaddr + igen6_tolud - igen6_tom;
326
327 return eaddr;
328 }
329
icl_ibecc_available(struct pci_dev * pdev)330 static bool icl_ibecc_available(struct pci_dev *pdev)
331 {
332 u32 v;
333
334 if (pci_read_config_dword(pdev, CAPID_C_OFFSET, &v))
335 return false;
336
337 return !(CAPID_C_IBECC & v) &&
338 (boot_cpu_data.x86_stepping >= 1);
339 }
340
tgl_ibecc_available(struct pci_dev * pdev)341 static bool tgl_ibecc_available(struct pci_dev *pdev)
342 {
343 u32 v;
344
345 if (pci_read_config_dword(pdev, CAPID_E_OFFSET, &v))
346 return false;
347
348 return !(CAPID_E_IBECC & v);
349 }
350
mtl_p_ibecc_available(struct pci_dev * pdev)351 static bool mtl_p_ibecc_available(struct pci_dev *pdev)
352 {
353 u32 v;
354
355 if (pci_read_config_dword(pdev, CAPID_E_OFFSET, &v))
356 return false;
357
358 return !(CAPID_E_IBECC_BIT18 & v);
359 }
360
mtl_ps_ibecc_available(struct pci_dev * pdev)361 static bool mtl_ps_ibecc_available(struct pci_dev *pdev)
362 {
363 #define MCHBAR_MEMSS_IBECCDIS 0x13c00
364 void __iomem *window;
365 u64 mchbar;
366 u32 val;
367
368 if (get_mchbar(pdev, &mchbar))
369 return false;
370
371 window = ioremap(mchbar, MCHBAR_SIZE * 2);
372 if (!window) {
373 igen6_printk(KERN_ERR, "Failed to ioremap 0x%llx\n", mchbar);
374 return false;
375 }
376
377 val = readl(window + MCHBAR_MEMSS_IBECCDIS);
378 iounmap(window);
379
380 /* Bit6: 1 - IBECC is disabled, 0 - IBECC isn't disabled */
381 return !GET_BITFIELD(val, 6, 6);
382 }
383
mem_addr_to_sys_addr(u64 maddr)384 static u64 mem_addr_to_sys_addr(u64 maddr)
385 {
386 if (maddr < igen6_tolud)
387 return maddr;
388
389 if (igen6_tom <= _4GB)
390 return maddr - igen6_tolud + _4GB;
391
392 if (maddr < _4GB)
393 return maddr - igen6_tolud + igen6_tom;
394
395 return maddr;
396 }
397
mem_slice_hash(u64 addr,u64 mask,u64 hash_init,int intlv_bit)398 static u64 mem_slice_hash(u64 addr, u64 mask, u64 hash_init, int intlv_bit)
399 {
400 u64 hash_addr = addr & mask, hash = hash_init;
401 u64 intlv = (addr >> intlv_bit) & 1;
402 int i;
403
404 for (i = 6; i < 20; i++)
405 hash ^= (hash_addr >> i) & 1;
406
407 return hash ^ intlv;
408 }
409
tgl_err_addr_to_mem_addr(u64 eaddr,int mc)410 static u64 tgl_err_addr_to_mem_addr(u64 eaddr, int mc)
411 {
412 u64 maddr, hash, mask, ms_s_size;
413 int intlv_bit;
414 u32 ms_hash;
415
416 ms_s_size = igen6_pvt->ms_s_size;
417 if (eaddr >= ms_s_size)
418 return eaddr + ms_s_size;
419
420 ms_hash = igen6_pvt->ms_hash;
421
422 mask = MEM_SLICE_HASH_MASK(ms_hash);
423 intlv_bit = MEM_SLICE_HASH_LSB_MASK_BIT(ms_hash) + 6;
424
425 maddr = GET_BITFIELD(eaddr, intlv_bit, 63) << (intlv_bit + 1) |
426 GET_BITFIELD(eaddr, 0, intlv_bit - 1);
427
428 hash = mem_slice_hash(maddr, mask, mc, intlv_bit);
429
430 return maddr | (hash << intlv_bit);
431 }
432
tgl_err_addr_to_sys_addr(u64 eaddr,int mc)433 static u64 tgl_err_addr_to_sys_addr(u64 eaddr, int mc)
434 {
435 u64 maddr = tgl_err_addr_to_mem_addr(eaddr, mc);
436
437 return mem_addr_to_sys_addr(maddr);
438 }
439
tgl_err_addr_to_imc_addr(u64 eaddr,int mc)440 static u64 tgl_err_addr_to_imc_addr(u64 eaddr, int mc)
441 {
442 return eaddr;
443 }
444
adl_err_addr_to_sys_addr(u64 eaddr,int mc)445 static u64 adl_err_addr_to_sys_addr(u64 eaddr, int mc)
446 {
447 return mem_addr_to_sys_addr(eaddr);
448 }
449
adl_err_addr_to_imc_addr(u64 eaddr,int mc)450 static u64 adl_err_addr_to_imc_addr(u64 eaddr, int mc)
451 {
452 u64 imc_addr, ms_s_size = igen6_pvt->ms_s_size;
453 struct igen6_imc *imc = &igen6_pvt->imc[mc];
454 int intlv_bit;
455 u32 mc_hash;
456
457 if (eaddr >= 2 * ms_s_size)
458 return eaddr - ms_s_size;
459
460 mc_hash = readl(imc->window + MAD_MC_HASH_OFFSET);
461
462 intlv_bit = MAC_MC_HASH_LSB(mc_hash) + 6;
463
464 imc_addr = GET_BITFIELD(eaddr, intlv_bit + 1, 63) << intlv_bit |
465 GET_BITFIELD(eaddr, 0, intlv_bit - 1);
466
467 return imc_addr;
468 }
469
rpl_p_err_addr(u64 ecclog)470 static u64 rpl_p_err_addr(u64 ecclog)
471 {
472 return ECC_ERROR_LOG_ADDR45(ecclog);
473 }
474
475 static struct res_config ehl_cfg = {
476 .num_imc = 1,
477 .imc_base = 0x5000,
478 .ibecc_base = 0xdc00,
479 .ibecc_available = ehl_ibecc_available,
480 .ibecc_error_log_offset = 0x170,
481 .err_addr_to_sys_addr = ehl_err_addr_to_sys_addr,
482 .err_addr_to_imc_addr = ehl_err_addr_to_imc_addr,
483 };
484
485 static struct res_config icl_cfg = {
486 .num_imc = 1,
487 .imc_base = 0x5000,
488 .ibecc_base = 0xd800,
489 .ibecc_error_log_offset = 0x170,
490 .ibecc_available = icl_ibecc_available,
491 .err_addr_to_sys_addr = ehl_err_addr_to_sys_addr,
492 .err_addr_to_imc_addr = ehl_err_addr_to_imc_addr,
493 };
494
495 static struct res_config tgl_cfg = {
496 .machine_check = true,
497 .num_imc = 2,
498 .imc_base = 0x5000,
499 .cmf_base = 0x11000,
500 .cmf_size = 0x800,
501 .ms_hash_offset = 0xac,
502 .ibecc_base = 0xd400,
503 .ibecc_error_log_offset = 0x170,
504 .ibecc_available = tgl_ibecc_available,
505 .err_addr_to_sys_addr = tgl_err_addr_to_sys_addr,
506 .err_addr_to_imc_addr = tgl_err_addr_to_imc_addr,
507 };
508
509 static struct res_config adl_cfg = {
510 .machine_check = true,
511 .num_imc = 2,
512 .imc_base = 0xd800,
513 .ibecc_base = 0xd400,
514 .ibecc_error_log_offset = 0x68,
515 .ibecc_available = tgl_ibecc_available,
516 .err_addr_to_sys_addr = adl_err_addr_to_sys_addr,
517 .err_addr_to_imc_addr = adl_err_addr_to_imc_addr,
518 };
519
520 static struct res_config adl_n_cfg = {
521 .machine_check = true,
522 .num_imc = 1,
523 .imc_base = 0xd800,
524 .ibecc_base = 0xd400,
525 .ibecc_error_log_offset = 0x68,
526 .ibecc_available = tgl_ibecc_available,
527 .err_addr_to_sys_addr = adl_err_addr_to_sys_addr,
528 .err_addr_to_imc_addr = adl_err_addr_to_imc_addr,
529 };
530
531 static struct res_config rpl_p_cfg = {
532 .machine_check = true,
533 .num_imc = 2,
534 .imc_base = 0xd800,
535 .ibecc_base = 0xd400,
536 .ibecc_error_log_offset = 0x68,
537 .ibecc_available = tgl_ibecc_available,
538 .err_addr = rpl_p_err_addr,
539 .err_addr_to_sys_addr = adl_err_addr_to_sys_addr,
540 .err_addr_to_imc_addr = adl_err_addr_to_imc_addr,
541 };
542
543 static struct res_config mtl_ps_cfg = {
544 .machine_check = true,
545 .num_imc = 2,
546 .imc_base = 0xd800,
547 .ibecc_base = 0xd400,
548 .ibecc_error_log_offset = 0x170,
549 .ibecc_available = mtl_ps_ibecc_available,
550 .err_addr_to_sys_addr = adl_err_addr_to_sys_addr,
551 .err_addr_to_imc_addr = adl_err_addr_to_imc_addr,
552 };
553
554 static struct res_config mtl_p_cfg = {
555 .machine_check = true,
556 .num_imc = 2,
557 .imc_base = 0xd800,
558 .ibecc_base = 0xd400,
559 .ibecc_error_log_offset = 0x170,
560 .ibecc_available = mtl_p_ibecc_available,
561 .err_addr_to_sys_addr = adl_err_addr_to_sys_addr,
562 .err_addr_to_imc_addr = adl_err_addr_to_imc_addr,
563 };
564
565 static const struct pci_device_id igen6_pci_tbl[] = {
566 { PCI_VDEVICE(INTEL, DID_EHL_SKU5), (kernel_ulong_t)&ehl_cfg },
567 { PCI_VDEVICE(INTEL, DID_EHL_SKU6), (kernel_ulong_t)&ehl_cfg },
568 { PCI_VDEVICE(INTEL, DID_EHL_SKU7), (kernel_ulong_t)&ehl_cfg },
569 { PCI_VDEVICE(INTEL, DID_EHL_SKU8), (kernel_ulong_t)&ehl_cfg },
570 { PCI_VDEVICE(INTEL, DID_EHL_SKU9), (kernel_ulong_t)&ehl_cfg },
571 { PCI_VDEVICE(INTEL, DID_EHL_SKU10), (kernel_ulong_t)&ehl_cfg },
572 { PCI_VDEVICE(INTEL, DID_EHL_SKU11), (kernel_ulong_t)&ehl_cfg },
573 { PCI_VDEVICE(INTEL, DID_EHL_SKU12), (kernel_ulong_t)&ehl_cfg },
574 { PCI_VDEVICE(INTEL, DID_EHL_SKU13), (kernel_ulong_t)&ehl_cfg },
575 { PCI_VDEVICE(INTEL, DID_EHL_SKU14), (kernel_ulong_t)&ehl_cfg },
576 { PCI_VDEVICE(INTEL, DID_EHL_SKU15), (kernel_ulong_t)&ehl_cfg },
577 { PCI_VDEVICE(INTEL, DID_ICL_SKU8), (kernel_ulong_t)&icl_cfg },
578 { PCI_VDEVICE(INTEL, DID_ICL_SKU10), (kernel_ulong_t)&icl_cfg },
579 { PCI_VDEVICE(INTEL, DID_ICL_SKU11), (kernel_ulong_t)&icl_cfg },
580 { PCI_VDEVICE(INTEL, DID_ICL_SKU12), (kernel_ulong_t)&icl_cfg },
581 { PCI_VDEVICE(INTEL, DID_TGL_SKU), (kernel_ulong_t)&tgl_cfg },
582 { PCI_VDEVICE(INTEL, DID_ADL_SKU1), (kernel_ulong_t)&adl_cfg },
583 { PCI_VDEVICE(INTEL, DID_ADL_SKU2), (kernel_ulong_t)&adl_cfg },
584 { PCI_VDEVICE(INTEL, DID_ADL_SKU3), (kernel_ulong_t)&adl_cfg },
585 { PCI_VDEVICE(INTEL, DID_ADL_SKU4), (kernel_ulong_t)&adl_cfg },
586 { PCI_VDEVICE(INTEL, DID_ADL_N_SKU1), (kernel_ulong_t)&adl_n_cfg },
587 { PCI_VDEVICE(INTEL, DID_ADL_N_SKU2), (kernel_ulong_t)&adl_n_cfg },
588 { PCI_VDEVICE(INTEL, DID_ADL_N_SKU3), (kernel_ulong_t)&adl_n_cfg },
589 { PCI_VDEVICE(INTEL, DID_ADL_N_SKU4), (kernel_ulong_t)&adl_n_cfg },
590 { PCI_VDEVICE(INTEL, DID_ADL_N_SKU5), (kernel_ulong_t)&adl_n_cfg },
591 { PCI_VDEVICE(INTEL, DID_ADL_N_SKU6), (kernel_ulong_t)&adl_n_cfg },
592 { PCI_VDEVICE(INTEL, DID_ADL_N_SKU7), (kernel_ulong_t)&adl_n_cfg },
593 { PCI_VDEVICE(INTEL, DID_ADL_N_SKU8), (kernel_ulong_t)&adl_n_cfg },
594 { PCI_VDEVICE(INTEL, DID_ADL_N_SKU9), (kernel_ulong_t)&adl_n_cfg },
595 { PCI_VDEVICE(INTEL, DID_ADL_N_SKU10), (kernel_ulong_t)&adl_n_cfg },
596 { PCI_VDEVICE(INTEL, DID_ADL_N_SKU11), (kernel_ulong_t)&adl_n_cfg },
597 { PCI_VDEVICE(INTEL, DID_ADL_N_SKU12), (kernel_ulong_t)&adl_n_cfg },
598 { PCI_VDEVICE(INTEL, DID_RPL_P_SKU1), (kernel_ulong_t)&rpl_p_cfg },
599 { PCI_VDEVICE(INTEL, DID_RPL_P_SKU2), (kernel_ulong_t)&rpl_p_cfg },
600 { PCI_VDEVICE(INTEL, DID_RPL_P_SKU3), (kernel_ulong_t)&rpl_p_cfg },
601 { PCI_VDEVICE(INTEL, DID_RPL_P_SKU4), (kernel_ulong_t)&rpl_p_cfg },
602 { PCI_VDEVICE(INTEL, DID_RPL_P_SKU5), (kernel_ulong_t)&rpl_p_cfg },
603 { PCI_VDEVICE(INTEL, DID_MTL_PS_SKU1), (kernel_ulong_t)&mtl_ps_cfg },
604 { PCI_VDEVICE(INTEL, DID_MTL_PS_SKU2), (kernel_ulong_t)&mtl_ps_cfg },
605 { PCI_VDEVICE(INTEL, DID_MTL_PS_SKU3), (kernel_ulong_t)&mtl_ps_cfg },
606 { PCI_VDEVICE(INTEL, DID_MTL_PS_SKU4), (kernel_ulong_t)&mtl_ps_cfg },
607 { PCI_VDEVICE(INTEL, DID_MTL_P_SKU1), (kernel_ulong_t)&mtl_p_cfg },
608 { PCI_VDEVICE(INTEL, DID_MTL_P_SKU2), (kernel_ulong_t)&mtl_p_cfg },
609 { PCI_VDEVICE(INTEL, DID_MTL_P_SKU3), (kernel_ulong_t)&mtl_p_cfg },
610 { PCI_VDEVICE(INTEL, DID_ARL_UH_SKU1), (kernel_ulong_t)&mtl_p_cfg },
611 { PCI_VDEVICE(INTEL, DID_ARL_UH_SKU2), (kernel_ulong_t)&mtl_p_cfg },
612 { PCI_VDEVICE(INTEL, DID_ARL_UH_SKU3), (kernel_ulong_t)&mtl_p_cfg },
613 { PCI_VDEVICE(INTEL, DID_PTL_H_SKU1), (kernel_ulong_t)&mtl_p_cfg },
614 { PCI_VDEVICE(INTEL, DID_PTL_H_SKU2), (kernel_ulong_t)&mtl_p_cfg },
615 { PCI_VDEVICE(INTEL, DID_PTL_H_SKU3), (kernel_ulong_t)&mtl_p_cfg },
616 { },
617 };
618 MODULE_DEVICE_TABLE(pci, igen6_pci_tbl);
619
get_width(int dimm_l,u32 mad_dimm)620 static enum dev_type get_width(int dimm_l, u32 mad_dimm)
621 {
622 u32 w = dimm_l ? MAD_DIMM_CH_DLW(mad_dimm) :
623 MAD_DIMM_CH_DSW(mad_dimm);
624
625 switch (w) {
626 case 0:
627 return DEV_X8;
628 case 1:
629 return DEV_X16;
630 case 2:
631 return DEV_X32;
632 default:
633 return DEV_UNKNOWN;
634 }
635 }
636
get_memory_type(u32 mad_inter)637 static enum mem_type get_memory_type(u32 mad_inter)
638 {
639 u32 t = MAD_INTER_CHANNEL_DDR_TYPE(mad_inter);
640
641 switch (t) {
642 case 0:
643 return MEM_DDR4;
644 case 1:
645 return MEM_DDR3;
646 case 2:
647 return MEM_LPDDR3;
648 case 3:
649 return MEM_LPDDR4;
650 case 4:
651 return MEM_WIO2;
652 default:
653 return MEM_UNKNOWN;
654 }
655 }
656
decode_chan_idx(u64 addr,u64 mask,int intlv_bit)657 static int decode_chan_idx(u64 addr, u64 mask, int intlv_bit)
658 {
659 u64 hash_addr = addr & mask, hash = 0;
660 u64 intlv = (addr >> intlv_bit) & 1;
661 int i;
662
663 for (i = 6; i < 20; i++)
664 hash ^= (hash_addr >> i) & 1;
665
666 return (int)hash ^ intlv;
667 }
668
decode_channel_addr(u64 addr,int intlv_bit)669 static u64 decode_channel_addr(u64 addr, int intlv_bit)
670 {
671 u64 channel_addr;
672
673 /* Remove the interleave bit and shift upper part down to fill gap */
674 channel_addr = GET_BITFIELD(addr, intlv_bit + 1, 63) << intlv_bit;
675 channel_addr |= GET_BITFIELD(addr, 0, intlv_bit - 1);
676
677 return channel_addr;
678 }
679
decode_addr(u64 addr,u32 hash,u64 s_size,int l_map,int * idx,u64 * sub_addr)680 static void decode_addr(u64 addr, u32 hash, u64 s_size, int l_map,
681 int *idx, u64 *sub_addr)
682 {
683 int intlv_bit = CHANNEL_HASH_LSB_MASK_BIT(hash) + 6;
684
685 if (addr > 2 * s_size) {
686 *sub_addr = addr - s_size;
687 *idx = l_map;
688 return;
689 }
690
691 if (CHANNEL_HASH_MODE(hash)) {
692 *sub_addr = decode_channel_addr(addr, intlv_bit);
693 *idx = decode_chan_idx(addr, CHANNEL_HASH_MASK(hash), intlv_bit);
694 } else {
695 *sub_addr = decode_channel_addr(addr, 6);
696 *idx = GET_BITFIELD(addr, 6, 6);
697 }
698 }
699
igen6_decode(struct decoded_addr * res)700 static int igen6_decode(struct decoded_addr *res)
701 {
702 struct igen6_imc *imc = &igen6_pvt->imc[res->mc];
703 u64 addr = res->imc_addr, sub_addr, s_size;
704 int idx, l_map;
705 u32 hash;
706
707 if (addr >= igen6_tom) {
708 edac_dbg(0, "Address 0x%llx out of range\n", addr);
709 return -EINVAL;
710 }
711
712 /* Decode channel */
713 hash = readl(imc->window + CHANNEL_HASH_OFFSET);
714 s_size = imc->ch_s_size;
715 l_map = imc->ch_l_map;
716 decode_addr(addr, hash, s_size, l_map, &idx, &sub_addr);
717 res->channel_idx = idx;
718 res->channel_addr = sub_addr;
719
720 /* Decode sub-channel/DIMM */
721 hash = readl(imc->window + CHANNEL_EHASH_OFFSET);
722 s_size = imc->dimm_s_size[idx];
723 l_map = imc->dimm_l_map[idx];
724 decode_addr(res->channel_addr, hash, s_size, l_map, &idx, &sub_addr);
725 res->sub_channel_idx = idx;
726 res->sub_channel_addr = sub_addr;
727
728 return 0;
729 }
730
igen6_output_error(struct decoded_addr * res,struct mem_ctl_info * mci,u64 ecclog)731 static void igen6_output_error(struct decoded_addr *res,
732 struct mem_ctl_info *mci, u64 ecclog)
733 {
734 enum hw_event_mc_err_type type = ecclog & ECC_ERROR_LOG_UE ?
735 HW_EVENT_ERR_UNCORRECTED :
736 HW_EVENT_ERR_CORRECTED;
737
738 edac_mc_handle_error(type, mci, 1,
739 res->sys_addr >> PAGE_SHIFT,
740 res->sys_addr & ~PAGE_MASK,
741 ECC_ERROR_LOG_SYND(ecclog),
742 res->channel_idx, res->sub_channel_idx,
743 -1, "", "");
744 }
745
ecclog_gen_pool_create(void)746 static struct gen_pool *ecclog_gen_pool_create(void)
747 {
748 struct gen_pool *pool;
749
750 pool = gen_pool_create(ilog2(sizeof(struct ecclog_node)), -1);
751 if (!pool)
752 return NULL;
753
754 if (gen_pool_add(pool, (unsigned long)ecclog_buf, ECCLOG_POOL_SIZE, -1)) {
755 gen_pool_destroy(pool);
756 return NULL;
757 }
758
759 return pool;
760 }
761
ecclog_gen_pool_add(int mc,u64 ecclog)762 static int ecclog_gen_pool_add(int mc, u64 ecclog)
763 {
764 struct ecclog_node *node;
765
766 node = (void *)gen_pool_alloc(ecclog_pool, sizeof(*node));
767 if (!node)
768 return -ENOMEM;
769
770 node->mc = mc;
771 node->ecclog = ecclog;
772 llist_add(&node->llnode, &ecclog_llist);
773
774 return 0;
775 }
776
777 /*
778 * Either the memory-mapped I/O status register ECC_ERROR_LOG or the PCI
779 * configuration space status register ERRSTS can indicate whether a
780 * correctable error or an uncorrectable error occurred. We only use the
781 * ECC_ERROR_LOG register to check error type, but need to clear both
782 * registers to enable future error events.
783 */
ecclog_read_and_clear(struct igen6_imc * imc)784 static u64 ecclog_read_and_clear(struct igen6_imc *imc)
785 {
786 u64 ecclog = readq(imc->window + ECC_ERROR_LOG_OFFSET);
787
788 /*
789 * Quirk: The ECC_ERROR_LOG register of certain SoCs may contain
790 * the invalid value ~0. This will result in a flood of invalid
791 * error reports in polling mode. Skip it.
792 */
793 if (ecclog == ~0)
794 return 0;
795
796 /* Neither a CE nor a UE. Skip it.*/
797 if (!(ecclog & (ECC_ERROR_LOG_CE | ECC_ERROR_LOG_UE)))
798 return 0;
799
800 /* Clear CE/UE bits by writing 1s */
801 writeq(ecclog, imc->window + ECC_ERROR_LOG_OFFSET);
802
803 return ecclog;
804 }
805
errsts_clear(struct igen6_imc * imc)806 static void errsts_clear(struct igen6_imc *imc)
807 {
808 u16 errsts;
809
810 if (pci_read_config_word(imc->pdev, ERRSTS_OFFSET, &errsts)) {
811 igen6_printk(KERN_ERR, "Failed to read ERRSTS\n");
812 return;
813 }
814
815 /* Clear CE/UE bits by writing 1s */
816 if (errsts & (ERRSTS_CE | ERRSTS_UE))
817 pci_write_config_word(imc->pdev, ERRSTS_OFFSET, errsts);
818 }
819
errcmd_enable_error_reporting(bool enable)820 static int errcmd_enable_error_reporting(bool enable)
821 {
822 struct igen6_imc *imc = &igen6_pvt->imc[0];
823 u16 errcmd;
824 int rc;
825
826 rc = pci_read_config_word(imc->pdev, ERRCMD_OFFSET, &errcmd);
827 if (rc)
828 return pcibios_err_to_errno(rc);
829
830 if (enable)
831 errcmd |= ERRCMD_CE | ERRSTS_UE;
832 else
833 errcmd &= ~(ERRCMD_CE | ERRSTS_UE);
834
835 rc = pci_write_config_word(imc->pdev, ERRCMD_OFFSET, errcmd);
836 if (rc)
837 return pcibios_err_to_errno(rc);
838
839 return 0;
840 }
841
ecclog_handler(void)842 static int ecclog_handler(void)
843 {
844 struct igen6_imc *imc;
845 int i, n = 0;
846 u64 ecclog;
847
848 for (i = 0; i < res_cfg->num_imc; i++) {
849 imc = &igen6_pvt->imc[i];
850
851 /* errsts_clear() isn't NMI-safe. Delay it in the IRQ context */
852
853 ecclog = ecclog_read_and_clear(imc);
854 if (!ecclog)
855 continue;
856
857 if (!ecclog_gen_pool_add(i, ecclog))
858 irq_work_queue(&ecclog_irq_work);
859
860 n++;
861 }
862
863 return n;
864 }
865
ecclog_work_cb(struct work_struct * work)866 static void ecclog_work_cb(struct work_struct *work)
867 {
868 struct ecclog_node *node, *tmp;
869 struct mem_ctl_info *mci;
870 struct llist_node *head;
871 struct decoded_addr res;
872 u64 eaddr;
873
874 head = llist_del_all(&ecclog_llist);
875 if (!head)
876 return;
877
878 llist_for_each_entry_safe(node, tmp, head, llnode) {
879 memset(&res, 0, sizeof(res));
880 if (res_cfg->err_addr)
881 eaddr = res_cfg->err_addr(node->ecclog);
882 else
883 eaddr = ECC_ERROR_LOG_ADDR(node->ecclog) <<
884 ECC_ERROR_LOG_ADDR_SHIFT;
885 res.mc = node->mc;
886 res.sys_addr = res_cfg->err_addr_to_sys_addr(eaddr, res.mc);
887 res.imc_addr = res_cfg->err_addr_to_imc_addr(eaddr, res.mc);
888
889 mci = igen6_pvt->imc[res.mc].mci;
890
891 edac_dbg(2, "MC %d, ecclog = 0x%llx\n", node->mc, node->ecclog);
892 igen6_mc_printk(mci, KERN_DEBUG, "HANDLING IBECC MEMORY ERROR\n");
893 igen6_mc_printk(mci, KERN_DEBUG, "ADDR 0x%llx ", res.sys_addr);
894
895 if (!igen6_decode(&res))
896 igen6_output_error(&res, mci, node->ecclog);
897
898 gen_pool_free(ecclog_pool, (unsigned long)node, sizeof(*node));
899 }
900 }
901
ecclog_irq_work_cb(struct irq_work * irq_work)902 static void ecclog_irq_work_cb(struct irq_work *irq_work)
903 {
904 int i;
905
906 for (i = 0; i < res_cfg->num_imc; i++)
907 errsts_clear(&igen6_pvt->imc[i]);
908
909 if (!llist_empty(&ecclog_llist))
910 schedule_work(&ecclog_work);
911 }
912
ecclog_nmi_handler(unsigned int cmd,struct pt_regs * regs)913 static int ecclog_nmi_handler(unsigned int cmd, struct pt_regs *regs)
914 {
915 unsigned char reason;
916
917 if (!ecclog_handler())
918 return NMI_DONE;
919
920 /*
921 * Both In-Band ECC correctable error and uncorrectable error are
922 * reported by SERR# NMI. The NMI generic code (see pci_serr_error())
923 * doesn't clear the bit NMI_REASON_CLEAR_SERR (in port 0x61) to
924 * re-enable the SERR# NMI after NMI handling. So clear this bit here
925 * to re-enable SERR# NMI for receiving future In-Band ECC errors.
926 */
927 reason = x86_platform.get_nmi_reason() & NMI_REASON_CLEAR_MASK;
928 reason |= NMI_REASON_CLEAR_SERR;
929 outb(reason, NMI_REASON_PORT);
930 reason &= ~NMI_REASON_CLEAR_SERR;
931 outb(reason, NMI_REASON_PORT);
932
933 return NMI_HANDLED;
934 }
935
ecclog_mce_handler(struct notifier_block * nb,unsigned long val,void * data)936 static int ecclog_mce_handler(struct notifier_block *nb, unsigned long val,
937 void *data)
938 {
939 struct mce *mce = (struct mce *)data;
940 char *type;
941
942 if (mce->kflags & MCE_HANDLED_CEC)
943 return NOTIFY_DONE;
944
945 /*
946 * Ignore unless this is a memory related error.
947 * We don't check the bit MCI_STATUS_ADDRV of MCi_STATUS here,
948 * since this bit isn't set on some CPU (e.g., Tiger Lake UP3).
949 */
950 if ((mce->status & 0xefff) >> 7 != 1)
951 return NOTIFY_DONE;
952
953 if (mce->mcgstatus & MCG_STATUS_MCIP)
954 type = "Exception";
955 else
956 type = "Event";
957
958 edac_dbg(0, "CPU %d: Machine Check %s: 0x%llx Bank %d: 0x%llx\n",
959 mce->extcpu, type, mce->mcgstatus,
960 mce->bank, mce->status);
961 edac_dbg(0, "TSC 0x%llx\n", mce->tsc);
962 edac_dbg(0, "ADDR 0x%llx\n", mce->addr);
963 edac_dbg(0, "MISC 0x%llx\n", mce->misc);
964 edac_dbg(0, "PROCESSOR %u:0x%x TIME %llu SOCKET %u APIC 0x%x\n",
965 mce->cpuvendor, mce->cpuid, mce->time,
966 mce->socketid, mce->apicid);
967 /*
968 * We just use the Machine Check for the memory error notification.
969 * Each memory controller is associated with an IBECC instance.
970 * Directly read and clear the error information(error address and
971 * error type) on all the IBECC instances so that we know on which
972 * memory controller the memory error(s) occurred.
973 */
974 if (!ecclog_handler())
975 return NOTIFY_DONE;
976
977 mce->kflags |= MCE_HANDLED_EDAC;
978
979 return NOTIFY_DONE;
980 }
981
982 static struct notifier_block ecclog_mce_dec = {
983 .notifier_call = ecclog_mce_handler,
984 .priority = MCE_PRIO_EDAC,
985 };
986
igen6_check_ecc(struct igen6_imc * imc)987 static bool igen6_check_ecc(struct igen6_imc *imc)
988 {
989 u32 activate = readl(imc->window + IBECC_ACTIVATE_OFFSET);
990
991 return !!(activate & IBECC_ACTIVATE_EN);
992 }
993
igen6_get_dimm_config(struct mem_ctl_info * mci)994 static int igen6_get_dimm_config(struct mem_ctl_info *mci)
995 {
996 struct igen6_imc *imc = mci->pvt_info;
997 u32 mad_inter, mad_intra, mad_dimm;
998 int i, j, ndimms, mc = imc->mc;
999 struct dimm_info *dimm;
1000 enum mem_type mtype;
1001 enum dev_type dtype;
1002 u64 dsize;
1003 bool ecc;
1004
1005 edac_dbg(2, "\n");
1006
1007 mad_inter = readl(imc->window + MAD_INTER_CHANNEL_OFFSET);
1008 mtype = get_memory_type(mad_inter);
1009 ecc = igen6_check_ecc(imc);
1010 imc->ch_s_size = MAD_INTER_CHANNEL_CH_S_SIZE(mad_inter);
1011 imc->ch_l_map = MAD_INTER_CHANNEL_CH_L_MAP(mad_inter);
1012
1013 for (i = 0; i < NUM_CHANNELS; i++) {
1014 mad_intra = readl(imc->window + MAD_INTRA_CH0_OFFSET + i * 4);
1015 mad_dimm = readl(imc->window + MAD_DIMM_CH0_OFFSET + i * 4);
1016
1017 imc->dimm_l_size[i] = MAD_DIMM_CH_DIMM_L_SIZE(mad_dimm);
1018 imc->dimm_s_size[i] = MAD_DIMM_CH_DIMM_S_SIZE(mad_dimm);
1019 imc->dimm_l_map[i] = MAD_INTRA_CH_DIMM_L_MAP(mad_intra);
1020 imc->size += imc->dimm_s_size[i];
1021 imc->size += imc->dimm_l_size[i];
1022 ndimms = 0;
1023
1024 for (j = 0; j < NUM_DIMMS; j++) {
1025 dimm = edac_get_dimm(mci, i, j, 0);
1026
1027 if (j ^ imc->dimm_l_map[i]) {
1028 dtype = get_width(0, mad_dimm);
1029 dsize = imc->dimm_s_size[i];
1030 } else {
1031 dtype = get_width(1, mad_dimm);
1032 dsize = imc->dimm_l_size[i];
1033 }
1034
1035 if (!dsize)
1036 continue;
1037
1038 dimm->grain = 64;
1039 dimm->mtype = mtype;
1040 dimm->dtype = dtype;
1041 dimm->nr_pages = MiB_TO_PAGES(dsize >> 20);
1042 dimm->edac_mode = EDAC_SECDED;
1043 snprintf(dimm->label, sizeof(dimm->label),
1044 "MC#%d_Chan#%d_DIMM#%d", mc, i, j);
1045 edac_dbg(0, "MC %d, Channel %d, DIMM %d, Size %llu MiB (%u pages)\n",
1046 mc, i, j, dsize >> 20, dimm->nr_pages);
1047
1048 ndimms++;
1049 }
1050
1051 if (ndimms && !ecc) {
1052 igen6_printk(KERN_ERR, "MC%d In-Band ECC is disabled\n", mc);
1053 return -ENODEV;
1054 }
1055 }
1056
1057 edac_dbg(0, "MC %d, total size %llu MiB\n", mc, imc->size >> 20);
1058
1059 return 0;
1060 }
1061
1062 #ifdef CONFIG_EDAC_DEBUG
1063 /* Top of upper usable DRAM */
1064 static u64 igen6_touud;
1065 #define TOUUD_OFFSET 0xa8
1066
igen6_reg_dump(struct igen6_imc * imc)1067 static void igen6_reg_dump(struct igen6_imc *imc)
1068 {
1069 int i;
1070
1071 edac_dbg(2, "CHANNEL_HASH : 0x%x\n",
1072 readl(imc->window + CHANNEL_HASH_OFFSET));
1073 edac_dbg(2, "CHANNEL_EHASH : 0x%x\n",
1074 readl(imc->window + CHANNEL_EHASH_OFFSET));
1075 edac_dbg(2, "MAD_INTER_CHANNEL: 0x%x\n",
1076 readl(imc->window + MAD_INTER_CHANNEL_OFFSET));
1077 edac_dbg(2, "ECC_ERROR_LOG : 0x%llx\n",
1078 readq(imc->window + ECC_ERROR_LOG_OFFSET));
1079
1080 for (i = 0; i < NUM_CHANNELS; i++) {
1081 edac_dbg(2, "MAD_INTRA_CH%d : 0x%x\n", i,
1082 readl(imc->window + MAD_INTRA_CH0_OFFSET + i * 4));
1083 edac_dbg(2, "MAD_DIMM_CH%d : 0x%x\n", i,
1084 readl(imc->window + MAD_DIMM_CH0_OFFSET + i * 4));
1085 }
1086 edac_dbg(2, "TOLUD : 0x%x", igen6_tolud);
1087 edac_dbg(2, "TOUUD : 0x%llx", igen6_touud);
1088 edac_dbg(2, "TOM : 0x%llx", igen6_tom);
1089 }
1090
1091 static struct dentry *igen6_test;
1092
debugfs_u64_set(void * data,u64 val)1093 static int debugfs_u64_set(void *data, u64 val)
1094 {
1095 u64 ecclog;
1096
1097 if ((val >= igen6_tolud && val < _4GB) || val >= igen6_touud) {
1098 edac_dbg(0, "Address 0x%llx out of range\n", val);
1099 return 0;
1100 }
1101
1102 pr_warn_once("Fake error to 0x%llx injected via debugfs\n", val);
1103
1104 val >>= ECC_ERROR_LOG_ADDR_SHIFT;
1105 ecclog = (val << ECC_ERROR_LOG_ADDR_SHIFT) | ECC_ERROR_LOG_CE;
1106
1107 if (!ecclog_gen_pool_add(0, ecclog))
1108 irq_work_queue(&ecclog_irq_work);
1109
1110 return 0;
1111 }
1112 DEFINE_SIMPLE_ATTRIBUTE(fops_u64_wo, NULL, debugfs_u64_set, "%llu\n");
1113
igen6_debug_setup(void)1114 static void igen6_debug_setup(void)
1115 {
1116 igen6_test = edac_debugfs_create_dir("igen6_test");
1117 if (!igen6_test)
1118 return;
1119
1120 if (!edac_debugfs_create_file("addr", 0200, igen6_test,
1121 NULL, &fops_u64_wo)) {
1122 debugfs_remove(igen6_test);
1123 igen6_test = NULL;
1124 }
1125 }
1126
igen6_debug_teardown(void)1127 static void igen6_debug_teardown(void)
1128 {
1129 debugfs_remove_recursive(igen6_test);
1130 }
1131 #else
igen6_reg_dump(struct igen6_imc * imc)1132 static void igen6_reg_dump(struct igen6_imc *imc) {}
igen6_debug_setup(void)1133 static void igen6_debug_setup(void) {}
igen6_debug_teardown(void)1134 static void igen6_debug_teardown(void) {}
1135 #endif
1136
igen6_pci_setup(struct pci_dev * pdev,u64 * mchbar)1137 static int igen6_pci_setup(struct pci_dev *pdev, u64 *mchbar)
1138 {
1139 union {
1140 u64 v;
1141 struct {
1142 u32 v_lo;
1143 u32 v_hi;
1144 };
1145 } u;
1146
1147 edac_dbg(2, "\n");
1148
1149 if (!res_cfg->ibecc_available(pdev)) {
1150 edac_dbg(2, "No In-Band ECC IP\n");
1151 goto fail;
1152 }
1153
1154 if (pci_read_config_dword(pdev, TOLUD_OFFSET, &igen6_tolud)) {
1155 igen6_printk(KERN_ERR, "Failed to read TOLUD\n");
1156 goto fail;
1157 }
1158
1159 igen6_tolud &= GENMASK(31, 20);
1160
1161 if (pci_read_config_dword(pdev, TOM_OFFSET, &u.v_lo)) {
1162 igen6_printk(KERN_ERR, "Failed to read lower TOM\n");
1163 goto fail;
1164 }
1165
1166 if (pci_read_config_dword(pdev, TOM_OFFSET + 4, &u.v_hi)) {
1167 igen6_printk(KERN_ERR, "Failed to read upper TOM\n");
1168 goto fail;
1169 }
1170
1171 igen6_tom = u.v & GENMASK_ULL(38, 20);
1172
1173 if (get_mchbar(pdev, mchbar))
1174 goto fail;
1175
1176 #ifdef CONFIG_EDAC_DEBUG
1177 if (pci_read_config_dword(pdev, TOUUD_OFFSET, &u.v_lo))
1178 edac_dbg(2, "Failed to read lower TOUUD\n");
1179 else if (pci_read_config_dword(pdev, TOUUD_OFFSET + 4, &u.v_hi))
1180 edac_dbg(2, "Failed to read upper TOUUD\n");
1181 else
1182 igen6_touud = u.v & GENMASK_ULL(38, 20);
1183 #endif
1184
1185 return 0;
1186 fail:
1187 return -ENODEV;
1188 }
1189
igen6_check(struct mem_ctl_info * mci)1190 static void igen6_check(struct mem_ctl_info *mci)
1191 {
1192 struct igen6_imc *imc = mci->pvt_info;
1193 u64 ecclog;
1194
1195 /* errsts_clear() isn't NMI-safe. Delay it in the IRQ context */
1196 ecclog = ecclog_read_and_clear(imc);
1197 if (!ecclog)
1198 return;
1199
1200 if (!ecclog_gen_pool_add(imc->mc, ecclog))
1201 irq_work_queue(&ecclog_irq_work);
1202 }
1203
igen6_register_mci(int mc,u64 mchbar,struct pci_dev * pdev)1204 static int igen6_register_mci(int mc, u64 mchbar, struct pci_dev *pdev)
1205 {
1206 struct edac_mc_layer layers[2];
1207 struct mem_ctl_info *mci;
1208 struct igen6_imc *imc;
1209 void __iomem *window;
1210 int rc;
1211
1212 edac_dbg(2, "\n");
1213
1214 mchbar += mc * MCHBAR_SIZE;
1215 window = ioremap(mchbar, MCHBAR_SIZE);
1216 if (!window) {
1217 igen6_printk(KERN_ERR, "Failed to ioremap 0x%llx\n", mchbar);
1218 return -ENODEV;
1219 }
1220
1221 layers[0].type = EDAC_MC_LAYER_CHANNEL;
1222 layers[0].size = NUM_CHANNELS;
1223 layers[0].is_virt_csrow = false;
1224 layers[1].type = EDAC_MC_LAYER_SLOT;
1225 layers[1].size = NUM_DIMMS;
1226 layers[1].is_virt_csrow = true;
1227
1228 mci = edac_mc_alloc(mc, ARRAY_SIZE(layers), layers, 0);
1229 if (!mci) {
1230 rc = -ENOMEM;
1231 goto fail;
1232 }
1233
1234 mci->ctl_name = kasprintf(GFP_KERNEL, "Intel_client_SoC MC#%d", mc);
1235 if (!mci->ctl_name) {
1236 rc = -ENOMEM;
1237 goto fail2;
1238 }
1239
1240 mci->mtype_cap = MEM_FLAG_LPDDR4 | MEM_FLAG_DDR4;
1241 mci->edac_ctl_cap = EDAC_FLAG_SECDED;
1242 mci->edac_cap = EDAC_FLAG_SECDED;
1243 mci->mod_name = EDAC_MOD_STR;
1244 mci->dev_name = pci_name(pdev);
1245 if (edac_op_state == EDAC_OPSTATE_POLL)
1246 mci->edac_check = igen6_check;
1247 mci->pvt_info = &igen6_pvt->imc[mc];
1248
1249 imc = mci->pvt_info;
1250 device_initialize(&imc->dev);
1251 /*
1252 * EDAC core uses mci->pdev(pointer of structure device) as
1253 * memory controller ID. The client SoCs attach one or more
1254 * memory controllers to single pci_dev (single pci_dev->dev
1255 * can be for multiple memory controllers).
1256 *
1257 * To make mci->pdev unique, assign pci_dev->dev to mci->pdev
1258 * for the first memory controller and assign a unique imc->dev
1259 * to mci->pdev for each non-first memory controller.
1260 */
1261 mci->pdev = mc ? &imc->dev : &pdev->dev;
1262 imc->mc = mc;
1263 imc->pdev = pdev;
1264 imc->window = window;
1265
1266 igen6_reg_dump(imc);
1267
1268 rc = igen6_get_dimm_config(mci);
1269 if (rc)
1270 goto fail3;
1271
1272 rc = edac_mc_add_mc(mci);
1273 if (rc) {
1274 igen6_printk(KERN_ERR, "Failed to register mci#%d\n", mc);
1275 goto fail3;
1276 }
1277
1278 imc->mci = mci;
1279 return 0;
1280 fail3:
1281 mci->pvt_info = NULL;
1282 kfree(mci->ctl_name);
1283 fail2:
1284 edac_mc_free(mci);
1285 fail:
1286 iounmap(window);
1287 return rc;
1288 }
1289
igen6_unregister_mcis(void)1290 static void igen6_unregister_mcis(void)
1291 {
1292 struct mem_ctl_info *mci;
1293 struct igen6_imc *imc;
1294 int i;
1295
1296 edac_dbg(2, "\n");
1297
1298 for (i = 0; i < res_cfg->num_imc; i++) {
1299 imc = &igen6_pvt->imc[i];
1300 mci = imc->mci;
1301 if (!mci)
1302 continue;
1303
1304 edac_mc_del_mc(mci->pdev);
1305 kfree(mci->ctl_name);
1306 mci->pvt_info = NULL;
1307 edac_mc_free(mci);
1308 iounmap(imc->window);
1309 }
1310 }
1311
igen6_mem_slice_setup(u64 mchbar)1312 static int igen6_mem_slice_setup(u64 mchbar)
1313 {
1314 struct igen6_imc *imc = &igen6_pvt->imc[0];
1315 u64 base = mchbar + res_cfg->cmf_base;
1316 u32 offset = res_cfg->ms_hash_offset;
1317 u32 size = res_cfg->cmf_size;
1318 u64 ms_s_size, ms_hash;
1319 void __iomem *cmf;
1320 int ms_l_map;
1321
1322 edac_dbg(2, "\n");
1323
1324 if (imc[0].size < imc[1].size) {
1325 ms_s_size = imc[0].size;
1326 ms_l_map = 1;
1327 } else {
1328 ms_s_size = imc[1].size;
1329 ms_l_map = 0;
1330 }
1331
1332 igen6_pvt->ms_s_size = ms_s_size;
1333 igen6_pvt->ms_l_map = ms_l_map;
1334
1335 edac_dbg(0, "ms_s_size: %llu MiB, ms_l_map %d\n",
1336 ms_s_size >> 20, ms_l_map);
1337
1338 if (!size)
1339 return 0;
1340
1341 cmf = ioremap(base, size);
1342 if (!cmf) {
1343 igen6_printk(KERN_ERR, "Failed to ioremap cmf 0x%llx\n", base);
1344 return -ENODEV;
1345 }
1346
1347 ms_hash = readq(cmf + offset);
1348 igen6_pvt->ms_hash = ms_hash;
1349
1350 edac_dbg(0, "MEM_SLICE_HASH: 0x%llx\n", ms_hash);
1351
1352 iounmap(cmf);
1353
1354 return 0;
1355 }
1356
register_err_handler(void)1357 static int register_err_handler(void)
1358 {
1359 int rc;
1360
1361 if (res_cfg->machine_check) {
1362 mce_register_decode_chain(&ecclog_mce_dec);
1363 return 0;
1364 }
1365
1366 rc = register_nmi_handler(NMI_SERR, ecclog_nmi_handler,
1367 0, IGEN6_NMI_NAME);
1368 if (rc) {
1369 igen6_printk(KERN_ERR, "Failed to register NMI handler\n");
1370 return rc;
1371 }
1372
1373 return 0;
1374 }
1375
unregister_err_handler(void)1376 static void unregister_err_handler(void)
1377 {
1378 if (res_cfg->machine_check) {
1379 mce_unregister_decode_chain(&ecclog_mce_dec);
1380 return;
1381 }
1382
1383 unregister_nmi_handler(NMI_SERR, IGEN6_NMI_NAME);
1384 }
1385
opstate_set(struct res_config * cfg,const struct pci_device_id * ent)1386 static void opstate_set(struct res_config *cfg, const struct pci_device_id *ent)
1387 {
1388 /*
1389 * Quirk: Certain SoCs' error reporting interrupts don't work.
1390 * Force polling mode for them to ensure that memory error
1391 * events can be handled.
1392 */
1393 if (ent->device == DID_ADL_N_SKU4) {
1394 edac_op_state = EDAC_OPSTATE_POLL;
1395 return;
1396 }
1397
1398 /* Set the mode according to the configuration data. */
1399 if (cfg->machine_check)
1400 edac_op_state = EDAC_OPSTATE_INT;
1401 else
1402 edac_op_state = EDAC_OPSTATE_NMI;
1403 }
1404
igen6_probe(struct pci_dev * pdev,const struct pci_device_id * ent)1405 static int igen6_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
1406 {
1407 u64 mchbar;
1408 int i, rc;
1409
1410 edac_dbg(2, "\n");
1411
1412 igen6_pvt = kzalloc(sizeof(*igen6_pvt), GFP_KERNEL);
1413 if (!igen6_pvt)
1414 return -ENOMEM;
1415
1416 res_cfg = (struct res_config *)ent->driver_data;
1417
1418 rc = igen6_pci_setup(pdev, &mchbar);
1419 if (rc)
1420 goto fail;
1421
1422 opstate_set(res_cfg, ent);
1423
1424 for (i = 0; i < res_cfg->num_imc; i++) {
1425 rc = igen6_register_mci(i, mchbar, pdev);
1426 if (rc)
1427 goto fail2;
1428 }
1429
1430 if (res_cfg->num_imc > 1) {
1431 rc = igen6_mem_slice_setup(mchbar);
1432 if (rc)
1433 goto fail2;
1434 }
1435
1436 ecclog_pool = ecclog_gen_pool_create();
1437 if (!ecclog_pool) {
1438 rc = -ENOMEM;
1439 goto fail2;
1440 }
1441
1442 INIT_WORK(&ecclog_work, ecclog_work_cb);
1443 init_irq_work(&ecclog_irq_work, ecclog_irq_work_cb);
1444
1445 rc = register_err_handler();
1446 if (rc)
1447 goto fail3;
1448
1449 /* Enable error reporting */
1450 rc = errcmd_enable_error_reporting(true);
1451 if (rc) {
1452 igen6_printk(KERN_ERR, "Failed to enable error reporting\n");
1453 goto fail4;
1454 }
1455
1456 /* Check if any pending errors before/during the registration of the error handler */
1457 ecclog_handler();
1458
1459 igen6_debug_setup();
1460 return 0;
1461 fail4:
1462 unregister_nmi_handler(NMI_SERR, IGEN6_NMI_NAME);
1463 fail3:
1464 gen_pool_destroy(ecclog_pool);
1465 fail2:
1466 igen6_unregister_mcis();
1467 fail:
1468 kfree(igen6_pvt);
1469 return rc;
1470 }
1471
igen6_remove(struct pci_dev * pdev)1472 static void igen6_remove(struct pci_dev *pdev)
1473 {
1474 edac_dbg(2, "\n");
1475
1476 igen6_debug_teardown();
1477 errcmd_enable_error_reporting(false);
1478 unregister_err_handler();
1479 irq_work_sync(&ecclog_irq_work);
1480 flush_work(&ecclog_work);
1481 gen_pool_destroy(ecclog_pool);
1482 igen6_unregister_mcis();
1483 kfree(igen6_pvt);
1484 }
1485
1486 static struct pci_driver igen6_driver = {
1487 .name = EDAC_MOD_STR,
1488 .probe = igen6_probe,
1489 .remove = igen6_remove,
1490 .id_table = igen6_pci_tbl,
1491 };
1492
igen6_init(void)1493 static int __init igen6_init(void)
1494 {
1495 const char *owner;
1496 int rc;
1497
1498 edac_dbg(2, "\n");
1499
1500 if (ghes_get_devices())
1501 return -EBUSY;
1502
1503 owner = edac_get_owner();
1504 if (owner && strncmp(owner, EDAC_MOD_STR, sizeof(EDAC_MOD_STR)))
1505 return -EBUSY;
1506
1507 rc = pci_register_driver(&igen6_driver);
1508 if (rc)
1509 return rc;
1510
1511 igen6_printk(KERN_INFO, "%s\n", IGEN6_REVISION);
1512
1513 return 0;
1514 }
1515
igen6_exit(void)1516 static void __exit igen6_exit(void)
1517 {
1518 edac_dbg(2, "\n");
1519
1520 pci_unregister_driver(&igen6_driver);
1521 }
1522
1523 module_init(igen6_init);
1524 module_exit(igen6_exit);
1525
1526 MODULE_LICENSE("GPL v2");
1527 MODULE_AUTHOR("Qiuxu Zhuo");
1528 MODULE_DESCRIPTION("MC Driver for Intel client SoC using In-Band ECC");
1529