1 // SPDX-License-Identifier: GPL-2.0-or-later
2 /*
3  * FRU (Field-Replaceable Unit) Memory Poison Manager
4  *
5  * Copyright (c) 2024, Advanced Micro Devices, Inc.
6  * All Rights Reserved.
7  *
8  * Authors:
9  *	Naveen Krishna Chatradhi <[email protected]>
10  *	Muralidhara M K <[email protected]>
11  *	Yazen Ghannam <[email protected]>
12  *
13  * Implementation notes, assumptions, and limitations:
14  *
15  * - FRU memory poison section and memory poison descriptor definitions are not yet
16  *   included in the UEFI specification. So they are defined here. Afterwards, they
17  *   may be moved to linux/cper.h, if appropriate.
18  *
19  * - Platforms based on AMD MI300 systems will be the first to use these structures.
20  *   There are a number of assumptions made here that will need to be generalized
21  *   to support other platforms.
22  *
23  *   AMD MI300-based platform(s) assumptions:
24  *   - Memory errors are reported through x86 MCA.
25  *   - The entire DRAM row containing a memory error should be retired.
26  *   - There will be (1) FRU memory poison section per CPER.
27  *   - The FRU will be the CPU package (processor socket).
28  *   - The default number of memory poison descriptor entries should be (8).
29  *   - The platform will use ACPI ERST for persistent storage.
30  *   - All FRU records should be saved to persistent storage. Module init will
31  *     fail if any FRU record is not successfully written.
32  *
33  * - Boot time memory retirement may occur later than ideal due to dependencies
34  *   on other libraries and drivers. This leaves a gap where bad memory may be
35  *   accessed during early boot stages.
36  *
37  * - Enough memory should be pre-allocated for each FRU record to be able to hold
38  *   the expected number of descriptor entries. This, mostly empty, record is
39  *   written to storage during init time. Subsequent writes to the same record
40  *   should allow the Platform to update the stored record in-place. Otherwise,
41  *   if the record is extended, then the Platform may need to perform costly memory
42  *   management operations on the storage. For example, the Platform may spend time
43  *   in Firmware copying and invalidating memory on a relatively slow SPI ROM.
44  */
45 
46 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
47 
48 #include <linux/cper.h>
49 #include <linux/ras.h>
50 #include <linux/cpu.h>
51 
52 #include <acpi/apei.h>
53 
54 #include <asm/cpu_device_id.h>
55 #include <asm/mce.h>
56 
57 #include "../debugfs.h"
58 
59 #include "atl/internal.h"
60 
61 #define INVALID_CPU			UINT_MAX
62 
63 /* Validation Bits */
64 #define FMP_VALID_ARCH_TYPE		BIT_ULL(0)
65 #define FMP_VALID_ARCH			BIT_ULL(1)
66 #define FMP_VALID_ID_TYPE		BIT_ULL(2)
67 #define FMP_VALID_ID			BIT_ULL(3)
68 #define FMP_VALID_LIST_ENTRIES		BIT_ULL(4)
69 #define FMP_VALID_LIST			BIT_ULL(5)
70 
71 /* FRU Architecture Types */
72 #define FMP_ARCH_TYPE_X86_CPUID_1_EAX	0
73 
74 /* FRU ID Types */
75 #define FMP_ID_TYPE_X86_PPIN		0
76 
77 /* FRU Memory Poison Section */
78 struct cper_sec_fru_mem_poison {
79 	u32 checksum;
80 	u64 validation_bits;
81 	u32 fru_arch_type;
82 	u64 fru_arch;
83 	u32 fru_id_type;
84 	u64 fru_id;
85 	u32 nr_entries;
86 } __packed;
87 
88 /* FRU Descriptor ID Types */
89 #define FPD_HW_ID_TYPE_MCA_IPID		0
90 
91 /* FRU Descriptor Address Types */
92 #define FPD_ADDR_TYPE_MCA_ADDR		0
93 
94 /* Memory Poison Descriptor */
95 struct cper_fru_poison_desc {
96 	u64 timestamp;
97 	u32 hw_id_type;
98 	u64 hw_id;
99 	u32 addr_type;
100 	u64 addr;
101 } __packed;
102 
103 /* Collection of headers and sections for easy pointer use. */
104 struct fru_rec {
105 	struct cper_record_header	hdr;
106 	struct cper_section_descriptor	sec_desc;
107 	struct cper_sec_fru_mem_poison	fmp;
108 	struct cper_fru_poison_desc	entries[];
109 } __packed;
110 
111 /*
112  * Pointers to the complete CPER record of each FRU.
113  *
114  * Memory allocation will include padded space for descriptor entries.
115  */
116 static struct fru_rec **fru_records;
117 
118 /* system physical addresses array */
119 static u64 *spa_entries;
120 
121 static struct dentry *fmpm_dfs_dir;
122 static struct dentry *fmpm_dfs_entries;
123 
124 #define CPER_CREATOR_FMP						\
125 	GUID_INIT(0xcd5c2993, 0xf4b2, 0x41b2, 0xb5, 0xd4, 0xf9, 0xc3,	\
126 		  0xa0, 0x33, 0x08, 0x75)
127 
128 #define CPER_SECTION_TYPE_FMP						\
129 	GUID_INIT(0x5e4706c1, 0x5356, 0x48c6, 0x93, 0x0b, 0x52, 0xf2,	\
130 		  0x12, 0x0a, 0x44, 0x58)
131 
132 /**
133  * DOC: max_nr_entries (byte)
134  * Maximum number of descriptor entries possible for each FRU.
135  *
136  * Values between '1' and '255' are valid.
137  * No input or '0' will default to FMPM_DEFAULT_MAX_NR_ENTRIES.
138  */
139 static u8 max_nr_entries;
140 module_param(max_nr_entries, byte, 0644);
141 MODULE_PARM_DESC(max_nr_entries,
142 		 "Maximum number of memory poison descriptor entries per FRU");
143 
144 #define FMPM_DEFAULT_MAX_NR_ENTRIES	8
145 
146 /* Maximum number of FRUs in the system. */
147 #define FMPM_MAX_NR_FRU			256
148 static unsigned int max_nr_fru;
149 
150 /* Total length of record including headers and list of descriptor entries. */
151 static size_t max_rec_len;
152 
153 #define FMPM_MAX_REC_LEN (sizeof(struct fru_rec) + (sizeof(struct cper_fru_poison_desc) * 255))
154 
155 /* Total number of SPA entries across all FRUs. */
156 static unsigned int spa_nr_entries;
157 
158 /*
159  * Protect the local records cache in fru_records and prevent concurrent
160  * writes to storage. This is only needed after init once notifier block
161  * registration is done.
162  *
163  * The majority of a record is fixed at module init and will not change
164  * during run time. The entries within a record will be updated as new
165  * errors are reported. The mutex should be held whenever the entries are
166  * accessed during run time.
167  */
168 static DEFINE_MUTEX(fmpm_update_mutex);
169 
170 #define for_each_fru(i, rec) \
171 	for (i = 0; rec = fru_records[i], i < max_nr_fru; i++)
172 
get_fmp_len(struct fru_rec * rec)173 static inline u32 get_fmp_len(struct fru_rec *rec)
174 {
175 	return rec->sec_desc.section_length - sizeof(struct cper_section_descriptor);
176 }
177 
get_fru_record(u64 fru_id)178 static struct fru_rec *get_fru_record(u64 fru_id)
179 {
180 	struct fru_rec *rec;
181 	unsigned int i;
182 
183 	for_each_fru(i, rec) {
184 		if (rec->fmp.fru_id == fru_id)
185 			return rec;
186 	}
187 
188 	pr_debug("Record not found for FRU 0x%016llx\n", fru_id);
189 
190 	return NULL;
191 }
192 
193 /*
194  * Sum up all bytes within the FRU Memory Poison Section including the Memory
195  * Poison Descriptor entries.
196  *
197  * Don't include the old checksum here. It's a u32 value, so summing each of its
198  * bytes will give the wrong total.
199  */
do_fmp_checksum(struct cper_sec_fru_mem_poison * fmp,u32 len)200 static u32 do_fmp_checksum(struct cper_sec_fru_mem_poison *fmp, u32 len)
201 {
202 	u32 checksum = 0;
203 	u8 *buf, *end;
204 
205 	/* Skip old checksum. */
206 	buf = (u8 *)fmp + sizeof(u32);
207 	end = buf + len;
208 
209 	while (buf < end)
210 		checksum += (u8)(*(buf++));
211 
212 	return checksum;
213 }
214 
update_record_on_storage(struct fru_rec * rec)215 static int update_record_on_storage(struct fru_rec *rec)
216 {
217 	u32 len, checksum;
218 	int ret;
219 
220 	/* Calculate a new checksum. */
221 	len = get_fmp_len(rec);
222 
223 	/* Get the current total. */
224 	checksum = do_fmp_checksum(&rec->fmp, len);
225 
226 	/* Use the complement value. */
227 	rec->fmp.checksum = -checksum;
228 
229 	pr_debug("Writing to storage\n");
230 
231 	ret = erst_write(&rec->hdr);
232 	if (ret) {
233 		pr_warn("Storage update failed for FRU 0x%016llx\n", rec->fmp.fru_id);
234 
235 		if (ret == -ENOSPC)
236 			pr_warn("Not enough space on storage\n");
237 	}
238 
239 	return ret;
240 }
241 
rec_has_valid_entries(struct fru_rec * rec)242 static bool rec_has_valid_entries(struct fru_rec *rec)
243 {
244 	if (!(rec->fmp.validation_bits & FMP_VALID_LIST_ENTRIES))
245 		return false;
246 
247 	if (!(rec->fmp.validation_bits & FMP_VALID_LIST))
248 		return false;
249 
250 	return true;
251 }
252 
253 /*
254  * Row retirement is done on MI300 systems, and some bits are 'don't
255  * care' for comparing addresses with unique physical rows.  This
256  * includes all column bits and the row[13] bit.
257  */
258 #define MASK_ADDR(addr)	((addr) & ~(MI300_UMC_MCA_ROW13 | MI300_UMC_MCA_COL))
259 
fpds_equal(struct cper_fru_poison_desc * old,struct cper_fru_poison_desc * new)260 static bool fpds_equal(struct cper_fru_poison_desc *old, struct cper_fru_poison_desc *new)
261 {
262 	/*
263 	 * Ignore timestamp field.
264 	 * The same physical error may be reported multiple times due to stuck bits, etc.
265 	 *
266 	 * Also, order the checks from most->least likely to fail to shortcut the code.
267 	 */
268 	if (MASK_ADDR(old->addr) != MASK_ADDR(new->addr))
269 		return false;
270 
271 	if (old->hw_id != new->hw_id)
272 		return false;
273 
274 	if (old->addr_type != new->addr_type)
275 		return false;
276 
277 	if (old->hw_id_type != new->hw_id_type)
278 		return false;
279 
280 	return true;
281 }
282 
rec_has_fpd(struct fru_rec * rec,struct cper_fru_poison_desc * fpd)283 static bool rec_has_fpd(struct fru_rec *rec, struct cper_fru_poison_desc *fpd)
284 {
285 	unsigned int i;
286 
287 	for (i = 0; i < rec->fmp.nr_entries; i++) {
288 		struct cper_fru_poison_desc *fpd_i = &rec->entries[i];
289 
290 		if (fpds_equal(fpd_i, fpd)) {
291 			pr_debug("Found duplicate record\n");
292 			return true;
293 		}
294 	}
295 
296 	return false;
297 }
298 
save_spa(struct fru_rec * rec,unsigned int entry,u64 addr,u64 id,unsigned int cpu)299 static void save_spa(struct fru_rec *rec, unsigned int entry,
300 		     u64 addr, u64 id, unsigned int cpu)
301 {
302 	unsigned int i, fru_idx, spa_entry;
303 	struct atl_err a_err;
304 	unsigned long spa;
305 
306 	if (entry >= max_nr_entries) {
307 		pr_warn_once("FRU descriptor entry %d out-of-bounds (max: %d)\n",
308 			     entry, max_nr_entries);
309 		return;
310 	}
311 
312 	/* spa_nr_entries is always multiple of max_nr_entries */
313 	for (i = 0; i < spa_nr_entries; i += max_nr_entries) {
314 		fru_idx = i / max_nr_entries;
315 		if (fru_records[fru_idx] == rec)
316 			break;
317 	}
318 
319 	if (i >= spa_nr_entries) {
320 		pr_warn_once("FRU record %d not found\n", i);
321 		return;
322 	}
323 
324 	spa_entry = i + entry;
325 	if (spa_entry >= spa_nr_entries) {
326 		pr_warn_once("spa_entries[] index out-of-bounds\n");
327 		return;
328 	}
329 
330 	memset(&a_err, 0, sizeof(struct atl_err));
331 
332 	a_err.addr = addr;
333 	a_err.ipid = id;
334 	a_err.cpu  = cpu;
335 
336 	spa = amd_convert_umc_mca_addr_to_sys_addr(&a_err);
337 	if (IS_ERR_VALUE(spa)) {
338 		pr_debug("Failed to get system address\n");
339 		return;
340 	}
341 
342 	spa_entries[spa_entry] = spa;
343 	pr_debug("fru_idx: %u, entry: %u, spa_entry: %u, spa: 0x%016llx\n",
344 		 fru_idx, entry, spa_entry, spa_entries[spa_entry]);
345 }
346 
update_fru_record(struct fru_rec * rec,struct mce * m)347 static void update_fru_record(struct fru_rec *rec, struct mce *m)
348 {
349 	struct cper_sec_fru_mem_poison *fmp = &rec->fmp;
350 	struct cper_fru_poison_desc fpd, *fpd_dest;
351 	u32 entry = 0;
352 
353 	mutex_lock(&fmpm_update_mutex);
354 
355 	memset(&fpd, 0, sizeof(struct cper_fru_poison_desc));
356 
357 	fpd.timestamp	= m->time;
358 	fpd.hw_id_type = FPD_HW_ID_TYPE_MCA_IPID;
359 	fpd.hw_id	= m->ipid;
360 	fpd.addr_type	= FPD_ADDR_TYPE_MCA_ADDR;
361 	fpd.addr	= m->addr;
362 
363 	/* This is the first entry, so just save it. */
364 	if (!rec_has_valid_entries(rec))
365 		goto save_fpd;
366 
367 	/* Ignore already recorded errors. */
368 	if (rec_has_fpd(rec, &fpd))
369 		goto out_unlock;
370 
371 	if (rec->fmp.nr_entries >= max_nr_entries) {
372 		pr_warn("Exceeded number of entries for FRU 0x%016llx\n", rec->fmp.fru_id);
373 		goto out_unlock;
374 	}
375 
376 	entry  = fmp->nr_entries;
377 
378 save_fpd:
379 	save_spa(rec, entry, m->addr, m->ipid, m->extcpu);
380 	fpd_dest  = &rec->entries[entry];
381 	memcpy(fpd_dest, &fpd, sizeof(struct cper_fru_poison_desc));
382 
383 	fmp->nr_entries		 = entry + 1;
384 	fmp->validation_bits	|= FMP_VALID_LIST_ENTRIES;
385 	fmp->validation_bits	|= FMP_VALID_LIST;
386 
387 	pr_debug("Updated FRU 0x%016llx entry #%u\n", fmp->fru_id, entry);
388 
389 	update_record_on_storage(rec);
390 
391 out_unlock:
392 	mutex_unlock(&fmpm_update_mutex);
393 }
394 
retire_dram_row(u64 addr,u64 id,u32 cpu)395 static void retire_dram_row(u64 addr, u64 id, u32 cpu)
396 {
397 	struct atl_err a_err;
398 
399 	memset(&a_err, 0, sizeof(struct atl_err));
400 
401 	a_err.addr = addr;
402 	a_err.ipid = id;
403 	a_err.cpu  = cpu;
404 
405 	amd_retire_dram_row(&a_err);
406 }
407 
fru_handle_mem_poison(struct notifier_block * nb,unsigned long val,void * data)408 static int fru_handle_mem_poison(struct notifier_block *nb, unsigned long val, void *data)
409 {
410 	struct mce *m = (struct mce *)data;
411 	struct fru_rec *rec;
412 
413 	if (!mce_is_memory_error(m))
414 		return NOTIFY_DONE;
415 
416 	retire_dram_row(m->addr, m->ipid, m->extcpu);
417 
418 	/*
419 	 * An invalid FRU ID should not happen on real errors. But it
420 	 * could happen from software error injection, etc.
421 	 */
422 	rec = get_fru_record(m->ppin);
423 	if (!rec)
424 		return NOTIFY_DONE;
425 
426 	update_fru_record(rec, m);
427 
428 	return NOTIFY_OK;
429 }
430 
431 static struct notifier_block fru_mem_poison_nb = {
432 	.notifier_call  = fru_handle_mem_poison,
433 	.priority	= MCE_PRIO_LOWEST,
434 };
435 
retire_mem_fmp(struct fru_rec * rec)436 static void retire_mem_fmp(struct fru_rec *rec)
437 {
438 	struct cper_sec_fru_mem_poison *fmp = &rec->fmp;
439 	unsigned int i, cpu;
440 
441 	for (i = 0; i < fmp->nr_entries; i++) {
442 		struct cper_fru_poison_desc *fpd = &rec->entries[i];
443 		unsigned int err_cpu = INVALID_CPU;
444 
445 		if (fpd->hw_id_type != FPD_HW_ID_TYPE_MCA_IPID)
446 			continue;
447 
448 		if (fpd->addr_type != FPD_ADDR_TYPE_MCA_ADDR)
449 			continue;
450 
451 		cpus_read_lock();
452 		for_each_online_cpu(cpu) {
453 			if (topology_ppin(cpu) == fmp->fru_id) {
454 				err_cpu = cpu;
455 				break;
456 			}
457 		}
458 		cpus_read_unlock();
459 
460 		if (err_cpu == INVALID_CPU)
461 			continue;
462 
463 		retire_dram_row(fpd->addr, fpd->hw_id, err_cpu);
464 		save_spa(rec, i, fpd->addr, fpd->hw_id, err_cpu);
465 	}
466 }
467 
retire_mem_records(void)468 static void retire_mem_records(void)
469 {
470 	struct fru_rec *rec;
471 	unsigned int i;
472 
473 	for_each_fru(i, rec) {
474 		if (!rec_has_valid_entries(rec))
475 			continue;
476 
477 		retire_mem_fmp(rec);
478 	}
479 }
480 
481 /* Set the CPER Record Header and CPER Section Descriptor fields. */
set_rec_fields(struct fru_rec * rec)482 static void set_rec_fields(struct fru_rec *rec)
483 {
484 	struct cper_section_descriptor	*sec_desc = &rec->sec_desc;
485 	struct cper_record_header	*hdr	  = &rec->hdr;
486 
487 	/*
488 	 * This is a saved record created with fewer max_nr_entries.
489 	 * Update the record lengths and keep everything else as-is.
490 	 */
491 	if (hdr->record_length && hdr->record_length < max_rec_len) {
492 		pr_debug("Growing record 0x%016llx from %u to %zu bytes\n",
493 			 hdr->record_id, hdr->record_length, max_rec_len);
494 		goto update_lengths;
495 	}
496 
497 	memcpy(hdr->signature, CPER_SIG_RECORD, CPER_SIG_SIZE);
498 	hdr->revision			= CPER_RECORD_REV;
499 	hdr->signature_end		= CPER_SIG_END;
500 
501 	/*
502 	 * Currently, it is assumed that there is one FRU Memory Poison
503 	 * section per CPER. But this may change for other implementations.
504 	 */
505 	hdr->section_count		= 1;
506 
507 	/* The logged errors are recoverable. Otherwise, they'd never make it here. */
508 	hdr->error_severity		= CPER_SEV_RECOVERABLE;
509 
510 	hdr->validation_bits		= 0;
511 	hdr->creator_id			= CPER_CREATOR_FMP;
512 	hdr->notification_type		= CPER_NOTIFY_MCE;
513 	hdr->record_id			= cper_next_record_id();
514 	hdr->flags			= CPER_HW_ERROR_FLAGS_PREVERR;
515 
516 	sec_desc->section_offset	= sizeof(struct cper_record_header);
517 	sec_desc->revision		= CPER_SEC_REV;
518 	sec_desc->validation_bits	= 0;
519 	sec_desc->flags			= CPER_SEC_PRIMARY;
520 	sec_desc->section_type		= CPER_SECTION_TYPE_FMP;
521 	sec_desc->section_severity	= CPER_SEV_RECOVERABLE;
522 
523 update_lengths:
524 	hdr->record_length		= max_rec_len;
525 	sec_desc->section_length	= max_rec_len - sizeof(struct cper_record_header);
526 }
527 
save_new_records(void)528 static int save_new_records(void)
529 {
530 	DECLARE_BITMAP(new_records, FMPM_MAX_NR_FRU);
531 	struct fru_rec *rec;
532 	unsigned int i;
533 	int ret = 0;
534 
535 	for_each_fru(i, rec) {
536 		/* No need to update saved records that match the current record size. */
537 		if (rec->hdr.record_length == max_rec_len)
538 			continue;
539 
540 		if (!rec->hdr.record_length)
541 			set_bit(i, new_records);
542 
543 		set_rec_fields(rec);
544 
545 		ret = update_record_on_storage(rec);
546 		if (ret)
547 			goto out_clear;
548 	}
549 
550 	return ret;
551 
552 out_clear:
553 	for_each_fru(i, rec) {
554 		if (!test_bit(i, new_records))
555 			continue;
556 
557 		erst_clear(rec->hdr.record_id);
558 	}
559 
560 	return ret;
561 }
562 
563 /* Check that the record matches expected types for the current system.*/
fmp_is_usable(struct fru_rec * rec)564 static bool fmp_is_usable(struct fru_rec *rec)
565 {
566 	struct cper_sec_fru_mem_poison *fmp = &rec->fmp;
567 	u64 cpuid;
568 
569 	pr_debug("Validation bits: 0x%016llx\n", fmp->validation_bits);
570 
571 	if (!(fmp->validation_bits & FMP_VALID_ARCH_TYPE)) {
572 		pr_debug("Arch type unknown\n");
573 		return false;
574 	}
575 
576 	if (fmp->fru_arch_type != FMP_ARCH_TYPE_X86_CPUID_1_EAX) {
577 		pr_debug("Arch type not 'x86 Family/Model/Stepping'\n");
578 		return false;
579 	}
580 
581 	if (!(fmp->validation_bits & FMP_VALID_ARCH)) {
582 		pr_debug("Arch value unknown\n");
583 		return false;
584 	}
585 
586 	cpuid = cpuid_eax(1);
587 	if (fmp->fru_arch != cpuid) {
588 		pr_debug("Arch value mismatch: record = 0x%016llx, system = 0x%016llx\n",
589 			 fmp->fru_arch, cpuid);
590 		return false;
591 	}
592 
593 	if (!(fmp->validation_bits & FMP_VALID_ID_TYPE)) {
594 		pr_debug("FRU ID type unknown\n");
595 		return false;
596 	}
597 
598 	if (fmp->fru_id_type != FMP_ID_TYPE_X86_PPIN) {
599 		pr_debug("FRU ID type is not 'x86 PPIN'\n");
600 		return false;
601 	}
602 
603 	if (!(fmp->validation_bits & FMP_VALID_ID)) {
604 		pr_debug("FRU ID value unknown\n");
605 		return false;
606 	}
607 
608 	return true;
609 }
610 
fmp_is_valid(struct fru_rec * rec)611 static bool fmp_is_valid(struct fru_rec *rec)
612 {
613 	struct cper_sec_fru_mem_poison *fmp = &rec->fmp;
614 	u32 checksum, len;
615 
616 	len = get_fmp_len(rec);
617 	if (len < sizeof(struct cper_sec_fru_mem_poison)) {
618 		pr_debug("fmp length is too small\n");
619 		return false;
620 	}
621 
622 	/* Checksum must sum to zero for the entire section. */
623 	checksum = do_fmp_checksum(fmp, len) + fmp->checksum;
624 	if (checksum) {
625 		pr_debug("fmp checksum failed: sum = 0x%x\n", checksum);
626 		print_hex_dump_debug("fmp record: ", DUMP_PREFIX_NONE, 16, 1, fmp, len, false);
627 		return false;
628 	}
629 
630 	if (!fmp_is_usable(rec))
631 		return false;
632 
633 	return true;
634 }
635 
get_valid_record(struct fru_rec * old)636 static struct fru_rec *get_valid_record(struct fru_rec *old)
637 {
638 	struct fru_rec *new;
639 
640 	if (!fmp_is_valid(old)) {
641 		pr_debug("Ignoring invalid record\n");
642 		return NULL;
643 	}
644 
645 	new = get_fru_record(old->fmp.fru_id);
646 	if (!new)
647 		pr_debug("Ignoring record for absent FRU\n");
648 
649 	return new;
650 }
651 
652 /*
653  * Fetch saved records from persistent storage.
654  *
655  * For each found record:
656  * - If it was not created by this module, then ignore it.
657  * - If it is valid, then copy its data to the local cache.
658  * - If it is not valid, then erase it.
659  */
get_saved_records(void)660 static int get_saved_records(void)
661 {
662 	struct fru_rec *old, *new;
663 	u64 record_id;
664 	int ret, pos;
665 	ssize_t len;
666 
667 	old = kmalloc(FMPM_MAX_REC_LEN, GFP_KERNEL);
668 	if (!old) {
669 		ret = -ENOMEM;
670 		goto out;
671 	}
672 
673 	ret = erst_get_record_id_begin(&pos);
674 	if (ret < 0)
675 		goto out_end;
676 
677 	while (!erst_get_record_id_next(&pos, &record_id)) {
678 		if (record_id == APEI_ERST_INVALID_RECORD_ID)
679 			goto out_end;
680 		/*
681 		 * Make sure to clear temporary buffer between reads to avoid
682 		 * leftover data from records of various sizes.
683 		 */
684 		memset(old, 0, FMPM_MAX_REC_LEN);
685 
686 		len = erst_read_record(record_id, &old->hdr, FMPM_MAX_REC_LEN,
687 				       sizeof(struct fru_rec), &CPER_CREATOR_FMP);
688 		if (len < 0)
689 			continue;
690 
691 		new = get_valid_record(old);
692 		if (!new) {
693 			erst_clear(record_id);
694 			continue;
695 		}
696 
697 		if (len > max_rec_len) {
698 			unsigned int saved_nr_entries;
699 
700 			saved_nr_entries  = len - sizeof(struct fru_rec);
701 			saved_nr_entries /= sizeof(struct cper_fru_poison_desc);
702 
703 			pr_warn("Saved record found with %u entries.\n", saved_nr_entries);
704 			pr_warn("Please increase max_nr_entries to %u.\n", saved_nr_entries);
705 
706 			ret = -EINVAL;
707 			goto out_end;
708 		}
709 
710 		/* Restore the record */
711 		memcpy(new, old, len);
712 	}
713 
714 out_end:
715 	erst_get_record_id_end();
716 	kfree(old);
717 out:
718 	return ret;
719 }
720 
set_fmp_fields(struct fru_rec * rec,unsigned int cpu)721 static void set_fmp_fields(struct fru_rec *rec, unsigned int cpu)
722 {
723 	struct cper_sec_fru_mem_poison *fmp = &rec->fmp;
724 
725 	fmp->fru_arch_type    = FMP_ARCH_TYPE_X86_CPUID_1_EAX;
726 	fmp->validation_bits |= FMP_VALID_ARCH_TYPE;
727 
728 	/* Assume all CPUs in the system have the same value for now. */
729 	fmp->fru_arch	      = cpuid_eax(1);
730 	fmp->validation_bits |= FMP_VALID_ARCH;
731 
732 	fmp->fru_id_type      = FMP_ID_TYPE_X86_PPIN;
733 	fmp->validation_bits |= FMP_VALID_ID_TYPE;
734 
735 	fmp->fru_id	      = topology_ppin(cpu);
736 	fmp->validation_bits |= FMP_VALID_ID;
737 }
738 
init_fmps(void)739 static int init_fmps(void)
740 {
741 	struct fru_rec *rec;
742 	unsigned int i, cpu;
743 	int ret = 0;
744 
745 	for_each_fru(i, rec) {
746 		unsigned int fru_cpu = INVALID_CPU;
747 
748 		cpus_read_lock();
749 		for_each_online_cpu(cpu) {
750 			if (topology_physical_package_id(cpu) == i) {
751 				fru_cpu = cpu;
752 				break;
753 			}
754 		}
755 		cpus_read_unlock();
756 
757 		if (fru_cpu == INVALID_CPU) {
758 			pr_debug("Failed to find matching CPU for FRU #%u\n", i);
759 			ret = -ENODEV;
760 			break;
761 		}
762 
763 		set_fmp_fields(rec, fru_cpu);
764 	}
765 
766 	return ret;
767 }
768 
get_system_info(void)769 static int get_system_info(void)
770 {
771 	/* Only load on MI300A systems for now. */
772 	if (!(boot_cpu_data.x86_model >= 0x90 &&
773 	      boot_cpu_data.x86_model <= 0x9f))
774 		return -ENODEV;
775 
776 	if (!cpu_feature_enabled(X86_FEATURE_AMD_PPIN)) {
777 		pr_debug("PPIN feature not available\n");
778 		return -ENODEV;
779 	}
780 
781 	/* Use CPU socket as FRU for MI300 systems. */
782 	max_nr_fru = topology_max_packages();
783 	if (!max_nr_fru)
784 		return -ENODEV;
785 
786 	if (max_nr_fru > FMPM_MAX_NR_FRU) {
787 		pr_warn("Too many FRUs to manage: found: %u, max: %u\n",
788 			max_nr_fru, FMPM_MAX_NR_FRU);
789 		return -ENODEV;
790 	}
791 
792 	if (!max_nr_entries)
793 		max_nr_entries = FMPM_DEFAULT_MAX_NR_ENTRIES;
794 
795 	spa_nr_entries = max_nr_fru * max_nr_entries;
796 
797 	max_rec_len  = sizeof(struct fru_rec);
798 	max_rec_len += sizeof(struct cper_fru_poison_desc) * max_nr_entries;
799 
800 	pr_info("max FRUs: %u, max entries: %u, max record length: %lu\n",
801 		 max_nr_fru, max_nr_entries, max_rec_len);
802 
803 	return 0;
804 }
805 
free_records(void)806 static void free_records(void)
807 {
808 	struct fru_rec *rec;
809 	int i;
810 
811 	for_each_fru(i, rec)
812 		kfree(rec);
813 
814 	kfree(fru_records);
815 	kfree(spa_entries);
816 }
817 
allocate_records(void)818 static int allocate_records(void)
819 {
820 	int i, ret = 0;
821 
822 	fru_records = kcalloc(max_nr_fru, sizeof(struct fru_rec *), GFP_KERNEL);
823 	if (!fru_records) {
824 		ret = -ENOMEM;
825 		goto out;
826 	}
827 
828 	for (i = 0; i < max_nr_fru; i++) {
829 		fru_records[i] = kzalloc(max_rec_len, GFP_KERNEL);
830 		if (!fru_records[i]) {
831 			ret = -ENOMEM;
832 			goto out_free;
833 		}
834 	}
835 
836 	spa_entries = kcalloc(spa_nr_entries, sizeof(u64), GFP_KERNEL);
837 	if (!spa_entries) {
838 		ret = -ENOMEM;
839 		goto out_free;
840 	}
841 
842 	for (i = 0; i < spa_nr_entries; i++)
843 		spa_entries[i] = INVALID_SPA;
844 
845 	return ret;
846 
847 out_free:
848 	while (--i >= 0)
849 		kfree(fru_records[i]);
850 
851 	kfree(fru_records);
852 out:
853 	return ret;
854 }
855 
fmpm_start(struct seq_file * f,loff_t * pos)856 static void *fmpm_start(struct seq_file *f, loff_t *pos)
857 {
858 	if (*pos >= (spa_nr_entries + 1))
859 		return NULL;
860 	return pos;
861 }
862 
fmpm_next(struct seq_file * f,void * data,loff_t * pos)863 static void *fmpm_next(struct seq_file *f, void *data, loff_t *pos)
864 {
865 	if (++(*pos) >= (spa_nr_entries + 1))
866 		return NULL;
867 	return pos;
868 }
869 
fmpm_stop(struct seq_file * f,void * data)870 static void fmpm_stop(struct seq_file *f, void *data)
871 {
872 }
873 
874 #define SHORT_WIDTH	8
875 #define U64_WIDTH	18
876 #define TIMESTAMP_WIDTH	19
877 #define LONG_WIDTH	24
878 #define U64_PAD		(LONG_WIDTH - U64_WIDTH)
879 #define TS_PAD		(LONG_WIDTH - TIMESTAMP_WIDTH)
fmpm_show(struct seq_file * f,void * data)880 static int fmpm_show(struct seq_file *f, void *data)
881 {
882 	unsigned int fru_idx, entry, spa_entry, line;
883 	struct cper_fru_poison_desc *fpd;
884 	struct fru_rec *rec;
885 
886 	line = *(loff_t *)data;
887 	if (line == 0) {
888 		seq_printf(f, "%-*s", SHORT_WIDTH, "fru_idx");
889 		seq_printf(f, "%-*s", LONG_WIDTH,  "fru_id");
890 		seq_printf(f, "%-*s", SHORT_WIDTH, "entry");
891 		seq_printf(f, "%-*s", LONG_WIDTH,  "timestamp");
892 		seq_printf(f, "%-*s", LONG_WIDTH,  "hw_id");
893 		seq_printf(f, "%-*s", LONG_WIDTH,  "addr");
894 		seq_printf(f, "%-*s", LONG_WIDTH,  "spa");
895 		goto out_newline;
896 	}
897 
898 	spa_entry = line - 1;
899 	fru_idx	  = spa_entry / max_nr_entries;
900 	entry	  = spa_entry % max_nr_entries;
901 
902 	rec = fru_records[fru_idx];
903 	if (!rec)
904 		goto out;
905 
906 	seq_printf(f, "%-*u",		SHORT_WIDTH, fru_idx);
907 	seq_printf(f, "0x%016llx%-*s",	rec->fmp.fru_id, U64_PAD, "");
908 	seq_printf(f, "%-*u",		SHORT_WIDTH, entry);
909 
910 	mutex_lock(&fmpm_update_mutex);
911 
912 	if (entry >= rec->fmp.nr_entries) {
913 		seq_printf(f, "%-*s", LONG_WIDTH, "*");
914 		seq_printf(f, "%-*s", LONG_WIDTH, "*");
915 		seq_printf(f, "%-*s", LONG_WIDTH, "*");
916 		seq_printf(f, "%-*s", LONG_WIDTH, "*");
917 		goto out_unlock;
918 	}
919 
920 	fpd = &rec->entries[entry];
921 
922 	seq_printf(f, "%ptT%-*s",	&fpd->timestamp, TS_PAD,  "");
923 	seq_printf(f, "0x%016llx%-*s",	fpd->hw_id,	 U64_PAD, "");
924 	seq_printf(f, "0x%016llx%-*s",	fpd->addr,	 U64_PAD, "");
925 
926 	if (spa_entries[spa_entry] == INVALID_SPA)
927 		seq_printf(f, "%-*s", LONG_WIDTH, "*");
928 	else
929 		seq_printf(f, "0x%016llx%-*s", spa_entries[spa_entry], U64_PAD, "");
930 
931 out_unlock:
932 	mutex_unlock(&fmpm_update_mutex);
933 out_newline:
934 	seq_putc(f, '\n');
935 out:
936 	return 0;
937 }
938 
939 static const struct seq_operations fmpm_seq_ops = {
940 	.start	= fmpm_start,
941 	.next	= fmpm_next,
942 	.stop	= fmpm_stop,
943 	.show	= fmpm_show,
944 };
945 
fmpm_open(struct inode * inode,struct file * file)946 static int fmpm_open(struct inode *inode, struct file *file)
947 {
948 	return seq_open(file, &fmpm_seq_ops);
949 }
950 
951 static const struct file_operations fmpm_fops = {
952 	.open		= fmpm_open,
953 	.release	= seq_release,
954 	.read		= seq_read,
955 	.llseek		= seq_lseek,
956 };
957 
setup_debugfs(void)958 static void setup_debugfs(void)
959 {
960 	struct dentry *dfs = ras_get_debugfs_root();
961 
962 	if (!dfs)
963 		return;
964 
965 	fmpm_dfs_dir = debugfs_create_dir("fmpm", dfs);
966 	if (!fmpm_dfs_dir)
967 		return;
968 
969 	fmpm_dfs_entries = debugfs_create_file("entries", 0400, fmpm_dfs_dir, NULL, &fmpm_fops);
970 	if (!fmpm_dfs_entries)
971 		debugfs_remove(fmpm_dfs_dir);
972 }
973 
974 static const struct x86_cpu_id fmpm_cpuids[] = {
975 	X86_MATCH_VENDOR_FAM(AMD, 0x19, NULL),
976 	{ }
977 };
978 MODULE_DEVICE_TABLE(x86cpu, fmpm_cpuids);
979 
fru_mem_poison_init(void)980 static int __init fru_mem_poison_init(void)
981 {
982 	int ret;
983 
984 	if (!x86_match_cpu(fmpm_cpuids)) {
985 		ret = -ENODEV;
986 		goto out;
987 	}
988 
989 	if (erst_disable) {
990 		pr_debug("ERST not available\n");
991 		ret = -ENODEV;
992 		goto out;
993 	}
994 
995 	ret = get_system_info();
996 	if (ret)
997 		goto out;
998 
999 	ret = allocate_records();
1000 	if (ret)
1001 		goto out;
1002 
1003 	ret = init_fmps();
1004 	if (ret)
1005 		goto out_free;
1006 
1007 	ret = get_saved_records();
1008 	if (ret)
1009 		goto out_free;
1010 
1011 	ret = save_new_records();
1012 	if (ret)
1013 		goto out_free;
1014 
1015 	setup_debugfs();
1016 
1017 	retire_mem_records();
1018 
1019 	mce_register_decode_chain(&fru_mem_poison_nb);
1020 
1021 	pr_info("FRU Memory Poison Manager initialized\n");
1022 	return 0;
1023 
1024 out_free:
1025 	free_records();
1026 out:
1027 	return ret;
1028 }
1029 
fru_mem_poison_exit(void)1030 static void __exit fru_mem_poison_exit(void)
1031 {
1032 	mce_unregister_decode_chain(&fru_mem_poison_nb);
1033 	debugfs_remove(fmpm_dfs_dir);
1034 	free_records();
1035 }
1036 
1037 module_init(fru_mem_poison_init);
1038 module_exit(fru_mem_poison_exit);
1039 
1040 MODULE_LICENSE("GPL");
1041 MODULE_DESCRIPTION("FRU Memory Poison Manager");
1042