Lines Matching +full:row +full:- +full:hold

1 // SPDX-License-Identifier: GPL-2.0-or-later
3 * FRU (Field-Replaceable Unit) Memory Poison Manager
15 * - FRU memory poison section and memory poison descriptor definitions are not yet
19 * - Platforms based on AMD MI300 systems will be the first to use these structures.
23 * AMD MI300-based platform(s) assumptions:
24 * - Memory errors are reported through x86 MCA.
25 * - The entire DRAM row containing a memory error should be retired.
26 * - There will be (1) FRU memory poison section per CPER.
27 * - The FRU will be the CPU package (processor socket).
28 * - The default number of memory poison descriptor entries should be (8).
29 * - The platform will use ACPI ERST for persistent storage.
30 * - All FRU records should be saved to persistent storage. Module init will
33 * - Boot time memory retirement may occur later than ideal due to dependencies
37 * - Enough memory should be pre-allocated for each FRU record to be able to hold
40 * should allow the Platform to update the stored record in-place. Otherwise,
175 return rec->sec_desc.section_length - sizeof(struct cper_section_descriptor); in get_fmp_len()
184 if (rec->fmp.fru_id == fru_id) in get_fru_record()
224 checksum = do_fmp_checksum(&rec->fmp, len); in update_record_on_storage()
227 rec->fmp.checksum = -checksum; in update_record_on_storage()
231 ret = erst_write(&rec->hdr); in update_record_on_storage()
233 pr_warn("Storage update failed for FRU 0x%016llx\n", rec->fmp.fru_id); in update_record_on_storage()
235 if (ret == -ENOSPC) in update_record_on_storage()
244 if (!(rec->fmp.validation_bits & FMP_VALID_LIST_ENTRIES)) in rec_has_valid_entries()
247 if (!(rec->fmp.validation_bits & FMP_VALID_LIST)) in rec_has_valid_entries()
254 * Row retirement is done on MI300 systems, and some bits are 'don't
256 * includes all column bits and the row[13] bit.
266 * Also, order the checks from most->least likely to fail to shortcut the code. in fpds_equal()
268 if (MASK_ADDR(old->addr) != MASK_ADDR(new->addr)) in fpds_equal()
271 if (old->hw_id != new->hw_id) in fpds_equal()
274 if (old->addr_type != new->addr_type) in fpds_equal()
277 if (old->hw_id_type != new->hw_id_type) in fpds_equal()
287 for (i = 0; i < rec->fmp.nr_entries; i++) { in rec_has_fpd()
288 struct cper_fru_poison_desc *fpd_i = &rec->entries[i]; in rec_has_fpd()
307 pr_warn_once("FRU descriptor entry %d out-of-bounds (max: %d)\n", in save_spa()
326 pr_warn_once("spa_entries[] index out-of-bounds\n"); in save_spa()
349 struct cper_sec_fru_mem_poison *fmp = &rec->fmp; in update_fru_record()
357 fpd.timestamp = m->time; in update_fru_record()
359 fpd.hw_id = m->ipid; in update_fru_record()
361 fpd.addr = m->addr; in update_fru_record()
371 if (rec->fmp.nr_entries >= max_nr_entries) { in update_fru_record()
372 pr_warn("Exceeded number of entries for FRU 0x%016llx\n", rec->fmp.fru_id); in update_fru_record()
376 entry = fmp->nr_entries; in update_fru_record()
379 save_spa(rec, entry, m->addr, m->ipid, m->extcpu); in update_fru_record()
380 fpd_dest = &rec->entries[entry]; in update_fru_record()
383 fmp->nr_entries = entry + 1; in update_fru_record()
384 fmp->validation_bits |= FMP_VALID_LIST_ENTRIES; in update_fru_record()
385 fmp->validation_bits |= FMP_VALID_LIST; in update_fru_record()
387 pr_debug("Updated FRU 0x%016llx entry #%u\n", fmp->fru_id, entry); in update_fru_record()
416 retire_dram_row(m->addr, m->ipid, m->extcpu); in fru_handle_mem_poison()
422 rec = get_fru_record(m->ppin); in fru_handle_mem_poison()
438 struct cper_sec_fru_mem_poison *fmp = &rec->fmp; in retire_mem_fmp()
441 for (i = 0; i < fmp->nr_entries; i++) { in retire_mem_fmp()
442 struct cper_fru_poison_desc *fpd = &rec->entries[i]; in retire_mem_fmp()
445 if (fpd->hw_id_type != FPD_HW_ID_TYPE_MCA_IPID) in retire_mem_fmp()
448 if (fpd->addr_type != FPD_ADDR_TYPE_MCA_ADDR) in retire_mem_fmp()
453 if (topology_ppin(cpu) == fmp->fru_id) { in retire_mem_fmp()
463 retire_dram_row(fpd->addr, fpd->hw_id, err_cpu); in retire_mem_fmp()
464 save_spa(rec, i, fpd->addr, fpd->hw_id, err_cpu); in retire_mem_fmp()
484 struct cper_section_descriptor *sec_desc = &rec->sec_desc; in set_rec_fields()
485 struct cper_record_header *hdr = &rec->hdr; in set_rec_fields()
489 * Update the record lengths and keep everything else as-is. in set_rec_fields()
491 if (hdr->record_length && hdr->record_length < max_rec_len) { in set_rec_fields()
493 hdr->record_id, hdr->record_length, max_rec_len); in set_rec_fields()
497 memcpy(hdr->signature, CPER_SIG_RECORD, CPER_SIG_SIZE); in set_rec_fields()
498 hdr->revision = CPER_RECORD_REV; in set_rec_fields()
499 hdr->signature_end = CPER_SIG_END; in set_rec_fields()
505 hdr->section_count = 1; in set_rec_fields()
508 hdr->error_severity = CPER_SEV_RECOVERABLE; in set_rec_fields()
510 hdr->validation_bits = 0; in set_rec_fields()
511 hdr->creator_id = CPER_CREATOR_FMP; in set_rec_fields()
512 hdr->notification_type = CPER_NOTIFY_MCE; in set_rec_fields()
513 hdr->record_id = cper_next_record_id(); in set_rec_fields()
514 hdr->flags = CPER_HW_ERROR_FLAGS_PREVERR; in set_rec_fields()
516 sec_desc->section_offset = sizeof(struct cper_record_header); in set_rec_fields()
517 sec_desc->revision = CPER_SEC_REV; in set_rec_fields()
518 sec_desc->validation_bits = 0; in set_rec_fields()
519 sec_desc->flags = CPER_SEC_PRIMARY; in set_rec_fields()
520 sec_desc->section_type = CPER_SECTION_TYPE_FMP; in set_rec_fields()
521 sec_desc->section_severity = CPER_SEV_RECOVERABLE; in set_rec_fields()
524 hdr->record_length = max_rec_len; in set_rec_fields()
525 sec_desc->section_length = max_rec_len - sizeof(struct cper_record_header); in set_rec_fields()
537 if (rec->hdr.record_length == max_rec_len) in save_new_records()
540 if (!rec->hdr.record_length) in save_new_records()
557 erst_clear(rec->hdr.record_id); in save_new_records()
566 struct cper_sec_fru_mem_poison *fmp = &rec->fmp; in fmp_is_usable()
569 pr_debug("Validation bits: 0x%016llx\n", fmp->validation_bits); in fmp_is_usable()
571 if (!(fmp->validation_bits & FMP_VALID_ARCH_TYPE)) { in fmp_is_usable()
576 if (fmp->fru_arch_type != FMP_ARCH_TYPE_X86_CPUID_1_EAX) { in fmp_is_usable()
581 if (!(fmp->validation_bits & FMP_VALID_ARCH)) { in fmp_is_usable()
587 if (fmp->fru_arch != cpuid) { in fmp_is_usable()
589 fmp->fru_arch, cpuid); in fmp_is_usable()
593 if (!(fmp->validation_bits & FMP_VALID_ID_TYPE)) { in fmp_is_usable()
598 if (fmp->fru_id_type != FMP_ID_TYPE_X86_PPIN) { in fmp_is_usable()
603 if (!(fmp->validation_bits & FMP_VALID_ID)) { in fmp_is_usable()
613 struct cper_sec_fru_mem_poison *fmp = &rec->fmp; in fmp_is_valid()
623 checksum = do_fmp_checksum(fmp, len) + fmp->checksum; in fmp_is_valid()
645 new = get_fru_record(old->fmp.fru_id); in get_valid_record()
656 * - If it was not created by this module, then ignore it.
657 * - If it is valid, then copy its data to the local cache.
658 * - If it is not valid, then erase it.
669 ret = -ENOMEM; in get_saved_records()
686 len = erst_read_record(record_id, &old->hdr, FMPM_MAX_REC_LEN, in get_saved_records()
700 saved_nr_entries = len - sizeof(struct fru_rec); in get_saved_records()
706 ret = -EINVAL; in get_saved_records()
723 struct cper_sec_fru_mem_poison *fmp = &rec->fmp; in set_fmp_fields()
725 fmp->fru_arch_type = FMP_ARCH_TYPE_X86_CPUID_1_EAX; in set_fmp_fields()
726 fmp->validation_bits |= FMP_VALID_ARCH_TYPE; in set_fmp_fields()
729 fmp->fru_arch = cpuid_eax(1); in set_fmp_fields()
730 fmp->validation_bits |= FMP_VALID_ARCH; in set_fmp_fields()
732 fmp->fru_id_type = FMP_ID_TYPE_X86_PPIN; in set_fmp_fields()
733 fmp->validation_bits |= FMP_VALID_ID_TYPE; in set_fmp_fields()
735 fmp->fru_id = topology_ppin(cpu); in set_fmp_fields()
736 fmp->validation_bits |= FMP_VALID_ID; in set_fmp_fields()
759 ret = -ENODEV; in init_fmps()
774 return -ENODEV; in get_system_info()
778 return -ENODEV; in get_system_info()
784 return -ENODEV; in get_system_info()
789 return -ENODEV; in get_system_info()
824 ret = -ENOMEM; in allocate_records()
831 ret = -ENOMEM; in allocate_records()
838 ret = -ENOMEM; in allocate_records()
848 while (--i >= 0) in allocate_records()
878 #define U64_PAD (LONG_WIDTH - U64_WIDTH)
879 #define TS_PAD (LONG_WIDTH - TIMESTAMP_WIDTH)
888 seq_printf(f, "%-*s", SHORT_WIDTH, "fru_idx"); in fmpm_show()
889 seq_printf(f, "%-*s", LONG_WIDTH, "fru_id"); in fmpm_show()
890 seq_printf(f, "%-*s", SHORT_WIDTH, "entry"); in fmpm_show()
891 seq_printf(f, "%-*s", LONG_WIDTH, "timestamp"); in fmpm_show()
892 seq_printf(f, "%-*s", LONG_WIDTH, "hw_id"); in fmpm_show()
893 seq_printf(f, "%-*s", LONG_WIDTH, "addr"); in fmpm_show()
894 seq_printf(f, "%-*s", LONG_WIDTH, "spa"); in fmpm_show()
898 spa_entry = line - 1; in fmpm_show()
906 seq_printf(f, "%-*u", SHORT_WIDTH, fru_idx); in fmpm_show()
907 seq_printf(f, "0x%016llx%-*s", rec->fmp.fru_id, U64_PAD, ""); in fmpm_show()
908 seq_printf(f, "%-*u", SHORT_WIDTH, entry); in fmpm_show()
912 if (entry >= rec->fmp.nr_entries) { in fmpm_show()
913 seq_printf(f, "%-*s", LONG_WIDTH, "*"); in fmpm_show()
914 seq_printf(f, "%-*s", LONG_WIDTH, "*"); in fmpm_show()
915 seq_printf(f, "%-*s", LONG_WIDTH, "*"); in fmpm_show()
916 seq_printf(f, "%-*s", LONG_WIDTH, "*"); in fmpm_show()
920 fpd = &rec->entries[entry]; in fmpm_show()
922 seq_printf(f, "%ptT%-*s", &fpd->timestamp, TS_PAD, ""); in fmpm_show()
923 seq_printf(f, "0x%016llx%-*s", fpd->hw_id, U64_PAD, ""); in fmpm_show()
924 seq_printf(f, "0x%016llx%-*s", fpd->addr, U64_PAD, ""); in fmpm_show()
927 seq_printf(f, "%-*s", LONG_WIDTH, "*"); in fmpm_show()
929 seq_printf(f, "0x%016llx%-*s", spa_entries[spa_entry], U64_PAD, ""); in fmpm_show()
985 ret = -ENODEV; in fru_mem_poison_init()
991 ret = -ENODEV; in fru_mem_poison_init()