Merge tag 'please-pull-apei' into x86/ras
APEI is currently implemented so that it depends on x86 hardware.
The primary dependency is that GHES uses the x86 NMI for hardware
error notification and MCE for memory error handling. These patches
remove that dependency.
Other APEI features such as error reporting via external IRQ, error
serialization, or error injection, do not require changes to use them
on non-x86 architectures.
The following patch set eliminates the APEI Kconfig x86 dependency
by making these changes:
- treat NMI notification as GHES architecture - HAVE_ACPI_APEI_NMI
- group and wrap around #ifdef CONFIG_HAVE_ACPI_APEI_NMI code which
is used only for NMI path
- identify architectural boxes and abstract it accordingly (tlb flush and MCE)
- rework ioremap for both IRQ and NMI context
NMI code is kept in ghes.c file since NMI and IRQ context are tightly coupled.
Note, these patches introduce no functional changes for x86. The NMI notification
feature is hard selected for x86. Architectures that want to use this
feature should also provide NMI code infrastructure.
diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c
index bb92f38..4fc5797 100644
--- a/arch/x86/kernel/cpu/mcheck/mce.c
+++ b/arch/x86/kernel/cpu/mcheck/mce.c
@@ -2385,6 +2385,10 @@
threshold_cpu_callback(action, cpu);
mce_device_remove(cpu);
mce_intel_hcpu_update(cpu);
+
+ /* intentionally ignoring frozen here */
+ if (!(action & CPU_TASKS_FROZEN))
+ cmci_rediscover();
break;
case CPU_DOWN_PREPARE:
smp_call_function_single(cpu, mce_disable_cpu, &action, 1);
@@ -2396,11 +2400,6 @@
break;
}
- if (action == CPU_POST_DEAD) {
- /* intentionally ignoring frozen here */
- cmci_rediscover();
- }
-
return NOTIFY_OK;
}
@@ -2451,6 +2450,12 @@
for_each_online_cpu(i) {
err = mce_device_create(i);
if (err) {
+ /*
+ * Register notifier anyway (and do not unreg it) so
+ * that we don't leave undeleted timers, see notifier
+ * callback above.
+ */
+ __register_hotcpu_notifier(&mce_cpu_notifier);
cpu_notifier_register_done();
goto err_device_create;
}
@@ -2471,10 +2476,6 @@
err_register:
unregister_syscore_ops(&mce_syscore_ops);
- cpu_notifier_register_begin();
- __unregister_hotcpu_notifier(&mce_cpu_notifier);
- cpu_notifier_register_done();
-
err_device_create:
/*
* We didn't keep track of which devices were created above, but
diff --git a/drivers/Kconfig b/drivers/Kconfig
index 0e87a34..4e6e66c 100644
--- a/drivers/Kconfig
+++ b/drivers/Kconfig
@@ -176,4 +176,6 @@
source "drivers/mcb/Kconfig"
+source "drivers/ras/Kconfig"
+
endmenu
diff --git a/drivers/Makefile b/drivers/Makefile
index f98b50d..65c32b1 100644
--- a/drivers/Makefile
+++ b/drivers/Makefile
@@ -158,3 +158,4 @@
obj-$(CONFIG_FMC) += fmc/
obj-$(CONFIG_POWERCAP) += powercap/
obj-$(CONFIG_MCB) += mcb/
+obj-$(CONFIG_RAS) += ras/
diff --git a/drivers/acpi/Kconfig b/drivers/acpi/Kconfig
index a34a228..206942b 100644
--- a/drivers/acpi/Kconfig
+++ b/drivers/acpi/Kconfig
@@ -370,6 +370,7 @@
tristate "Extended Error Log support"
depends on X86_MCE && X86_LOCAL_APIC
select UEFI_CPER
+ select RAS
default n
help
Certain usages such as Predictive Failure Analysis (PFA) require
@@ -384,6 +385,7 @@
Enhanced MCA Logging allows firmware to provide additional error
information to system software, synchronous with MCE or CMCI. This
- driver adds support for that functionality.
+ driver adds support for that functionality with corresponding
+ tracepoint which carries that information to userspace.
endif # ACPI
diff --git a/drivers/acpi/acpi_extlog.c b/drivers/acpi/acpi_extlog.c
index 1853341..0ad6f38 100644
--- a/drivers/acpi/acpi_extlog.c
+++ b/drivers/acpi/acpi_extlog.c
@@ -12,10 +12,12 @@
#include <linux/cper.h>
#include <linux/ratelimit.h>
#include <linux/edac.h>
+#include <linux/ras.h>
#include <asm/cpu.h>
#include <asm/mce.h>
#include "apei/apei-internal.h"
+#include <ras/ras_event.h>
#define EXT_ELOG_ENTRY_MASK GENMASK_ULL(51, 0) /* elog entry address mask */
@@ -137,8 +139,12 @@
struct mce *mce = (struct mce *)data;
int bank = mce->bank;
int cpu = mce->extcpu;
- struct acpi_generic_status *estatus;
- int rc;
+ struct acpi_generic_status *estatus, *tmp;
+ struct acpi_generic_data *gdata;
+ const uuid_le *fru_id = &NULL_UUID_LE;
+ char *fru_text = "";
+ uuid_le *sec_type;
+ static u32 err_seq;
estatus = extlog_elog_entry_check(cpu, bank);
if (estatus == NULL)
@@ -148,8 +154,29 @@
/* clear record status to enable BIOS to update it again */
estatus->block_status = 0;
- rc = print_extlog_rcd(NULL, (struct acpi_generic_status *)elog_buf, cpu);
+ tmp = (struct acpi_generic_status *)elog_buf;
+ if (!ras_userspace_consumers()) {
+ print_extlog_rcd(NULL, tmp, cpu);
+ goto out;
+ }
+
+ /* log event via trace */
+ err_seq++;
+ gdata = (struct acpi_generic_data *)(tmp + 1);
+ if (gdata->validation_bits & CPER_SEC_VALID_FRU_ID)
+ fru_id = (uuid_le *)gdata->fru_id;
+ if (gdata->validation_bits & CPER_SEC_VALID_FRU_TEXT)
+ fru_text = gdata->fru_text;
+ sec_type = (uuid_le *)gdata->section_type;
+ if (!uuid_le_cmp(*sec_type, CPER_SEC_PLATFORM_MEM)) {
+ struct cper_sec_mem_err *mem = (void *)(gdata + 1);
+ if (gdata->error_data_length >= sizeof(*mem))
+ trace_extlog_mem_event(mem, err_seq, fru_id, fru_text,
+ (u8)gdata->error_severity);
+ }
+
+out:
return NOTIFY_STOP;
}
@@ -196,19 +223,16 @@
u64 cap;
int rc;
+ rdmsrl(MSR_IA32_MCG_CAP, cap);
+
+ if (!(cap & MCG_ELOG_P) || !extlog_get_l1addr())
+ return -ENODEV;
+
if (get_edac_report_status() == EDAC_REPORTING_FORCE) {
pr_warn("Not loading eMCA, error reporting force-enabled through EDAC.\n");
return -EPERM;
}
- rc = -ENODEV;
- rdmsrl(MSR_IA32_MCG_CAP, cap);
- if (!(cap & MCG_ELOG_P))
- return rc;
-
- if (!extlog_get_l1addr())
- return rc;
-
rc = -EINVAL;
/* get L1 header to fetch necessary information */
l1_hdr_size = sizeof(struct extlog_l1_head);
diff --git a/drivers/edac/Kconfig b/drivers/edac/Kconfig
index 878f090..d3c0465 100644
--- a/drivers/edac/Kconfig
+++ b/drivers/edac/Kconfig
@@ -72,6 +72,7 @@
config EDAC_MM_EDAC
tristate "Main Memory EDAC (Error Detection And Correction) reporting"
+ select RAS
help
Some systems are able to detect and correct errors in main
memory. EDAC can report statistics on memory error
diff --git a/drivers/edac/edac_mc.c b/drivers/edac/edac_mc.c
index 2c694b5..9f134823 100644
--- a/drivers/edac/edac_mc.c
+++ b/drivers/edac/edac_mc.c
@@ -33,9 +33,6 @@
#include <asm/edac.h>
#include "edac_core.h"
#include "edac_module.h"
-
-#define CREATE_TRACE_POINTS
-#define TRACE_INCLUDE_PATH ../../include/ras
#include <ras/ras_event.h>
/* lock to memory controller's control array */
diff --git a/drivers/firmware/efi/cper.c b/drivers/firmware/efi/cper.c
index 1491dd4..437e6fd 100644
--- a/drivers/firmware/efi/cper.c
+++ b/drivers/firmware/efi/cper.c
@@ -34,6 +34,9 @@
#include <linux/aer.h>
#define INDENT_SP " "
+
+static char rcd_decode_str[CPER_REC_LEN];
+
/*
* CPER record ID need to be unique even after reboot, because record
* ID is used as index for ERST storage, while CPER records from
@@ -50,18 +53,19 @@
}
EXPORT_SYMBOL_GPL(cper_next_record_id);
-static const char *cper_severity_strs[] = {
+static const char * const severity_strs[] = {
"recoverable",
"fatal",
"corrected",
"info",
};
-static const char *cper_severity_str(unsigned int severity)
+const char *cper_severity_str(unsigned int severity)
{
- return severity < ARRAY_SIZE(cper_severity_strs) ?
- cper_severity_strs[severity] : "unknown";
+ return severity < ARRAY_SIZE(severity_strs) ?
+ severity_strs[severity] : "unknown";
}
+EXPORT_SYMBOL_GPL(cper_severity_str);
/*
* cper_print_bits - print strings for set bits
@@ -100,32 +104,32 @@
printk("%s\n", buf);
}
-static const char * const cper_proc_type_strs[] = {
+static const char * const proc_type_strs[] = {
"IA32/X64",
"IA64",
};
-static const char * const cper_proc_isa_strs[] = {
+static const char * const proc_isa_strs[] = {
"IA32",
"IA64",
"X64",
};
-static const char * const cper_proc_error_type_strs[] = {
+static const char * const proc_error_type_strs[] = {
"cache error",
"TLB error",
"bus error",
"micro-architectural error",
};
-static const char * const cper_proc_op_strs[] = {
+static const char * const proc_op_strs[] = {
"unknown or generic",
"data read",
"data write",
"instruction execution",
};
-static const char * const cper_proc_flag_strs[] = {
+static const char * const proc_flag_strs[] = {
"restartable",
"precise IP",
"overflow",
@@ -137,26 +141,26 @@
{
if (proc->validation_bits & CPER_PROC_VALID_TYPE)
printk("%s""processor_type: %d, %s\n", pfx, proc->proc_type,
- proc->proc_type < ARRAY_SIZE(cper_proc_type_strs) ?
- cper_proc_type_strs[proc->proc_type] : "unknown");
+ proc->proc_type < ARRAY_SIZE(proc_type_strs) ?
+ proc_type_strs[proc->proc_type] : "unknown");
if (proc->validation_bits & CPER_PROC_VALID_ISA)
printk("%s""processor_isa: %d, %s\n", pfx, proc->proc_isa,
- proc->proc_isa < ARRAY_SIZE(cper_proc_isa_strs) ?
- cper_proc_isa_strs[proc->proc_isa] : "unknown");
+ proc->proc_isa < ARRAY_SIZE(proc_isa_strs) ?
+ proc_isa_strs[proc->proc_isa] : "unknown");
if (proc->validation_bits & CPER_PROC_VALID_ERROR_TYPE) {
printk("%s""error_type: 0x%02x\n", pfx, proc->proc_error_type);
cper_print_bits(pfx, proc->proc_error_type,
- cper_proc_error_type_strs,
- ARRAY_SIZE(cper_proc_error_type_strs));
+ proc_error_type_strs,
+ ARRAY_SIZE(proc_error_type_strs));
}
if (proc->validation_bits & CPER_PROC_VALID_OPERATION)
printk("%s""operation: %d, %s\n", pfx, proc->operation,
- proc->operation < ARRAY_SIZE(cper_proc_op_strs) ?
- cper_proc_op_strs[proc->operation] : "unknown");
+ proc->operation < ARRAY_SIZE(proc_op_strs) ?
+ proc_op_strs[proc->operation] : "unknown");
if (proc->validation_bits & CPER_PROC_VALID_FLAGS) {
printk("%s""flags: 0x%02x\n", pfx, proc->flags);
- cper_print_bits(pfx, proc->flags, cper_proc_flag_strs,
- ARRAY_SIZE(cper_proc_flag_strs));
+ cper_print_bits(pfx, proc->flags, proc_flag_strs,
+ ARRAY_SIZE(proc_flag_strs));
}
if (proc->validation_bits & CPER_PROC_VALID_LEVEL)
printk("%s""level: %d\n", pfx, proc->level);
@@ -177,7 +181,7 @@
printk("%s""IP: 0x%016llx\n", pfx, proc->ip);
}
-static const char *cper_mem_err_type_strs[] = {
+static const char * const mem_err_type_strs[] = {
"unknown",
"no error",
"single-bit ECC",
@@ -196,8 +200,115 @@
"physical memory map-out event",
};
+const char *cper_mem_err_type_str(unsigned int etype)
+{
+ return etype < ARRAY_SIZE(mem_err_type_strs) ?
+ mem_err_type_strs[etype] : "unknown";
+}
+EXPORT_SYMBOL_GPL(cper_mem_err_type_str);
+
+static int cper_mem_err_location(struct cper_mem_err_compact *mem, char *msg)
+{
+ u32 len, n;
+
+ if (!msg)
+ return 0;
+
+ n = 0;
+ len = CPER_REC_LEN - 1;
+ if (mem->validation_bits & CPER_MEM_VALID_NODE)
+ n += scnprintf(msg + n, len - n, "node: %d ", mem->node);
+ if (mem->validation_bits & CPER_MEM_VALID_CARD)
+ n += scnprintf(msg + n, len - n, "card: %d ", mem->card);
+ if (mem->validation_bits & CPER_MEM_VALID_MODULE)
+ n += scnprintf(msg + n, len - n, "module: %d ", mem->module);
+ if (mem->validation_bits & CPER_MEM_VALID_RANK_NUMBER)
+ n += scnprintf(msg + n, len - n, "rank: %d ", mem->rank);
+ if (mem->validation_bits & CPER_MEM_VALID_BANK)
+ n += scnprintf(msg + n, len - n, "bank: %d ", mem->bank);
+ if (mem->validation_bits & CPER_MEM_VALID_DEVICE)
+ n += scnprintf(msg + n, len - n, "device: %d ", mem->device);
+ if (mem->validation_bits & CPER_MEM_VALID_ROW)
+ n += scnprintf(msg + n, len - n, "row: %d ", mem->row);
+ if (mem->validation_bits & CPER_MEM_VALID_COLUMN)
+ n += scnprintf(msg + n, len - n, "column: %d ", mem->column);
+ if (mem->validation_bits & CPER_MEM_VALID_BIT_POSITION)
+ n += scnprintf(msg + n, len - n, "bit_position: %d ",
+ mem->bit_pos);
+ if (mem->validation_bits & CPER_MEM_VALID_REQUESTOR_ID)
+ n += scnprintf(msg + n, len - n, "requestor_id: 0x%016llx ",
+ mem->requestor_id);
+ if (mem->validation_bits & CPER_MEM_VALID_RESPONDER_ID)
+ n += scnprintf(msg + n, len - n, "responder_id: 0x%016llx ",
+ mem->responder_id);
+ if (mem->validation_bits & CPER_MEM_VALID_TARGET_ID)
+ scnprintf(msg + n, len - n, "target_id: 0x%016llx ",
+ mem->target_id);
+
+ msg[n] = '\0';
+ return n;
+}
+
+static int cper_dimm_err_location(struct cper_mem_err_compact *mem, char *msg)
+{
+ u32 len, n;
+ const char *bank = NULL, *device = NULL;
+
+ if (!msg || !(mem->validation_bits & CPER_MEM_VALID_MODULE_HANDLE))
+ return 0;
+
+ n = 0;
+ len = CPER_REC_LEN - 1;
+ dmi_memdev_name(mem->mem_dev_handle, &bank, &device);
+ if (bank && device)
+ n = snprintf(msg, len, "DIMM location: %s %s ", bank, device);
+ else
+ n = snprintf(msg, len,
+ "DIMM location: not present. DMI handle: 0x%.4x ",
+ mem->mem_dev_handle);
+
+ msg[n] = '\0';
+ return n;
+}
+
+void cper_mem_err_pack(const struct cper_sec_mem_err *mem,
+ struct cper_mem_err_compact *cmem)
+{
+ cmem->validation_bits = mem->validation_bits;
+ cmem->node = mem->node;
+ cmem->card = mem->card;
+ cmem->module = mem->module;
+ cmem->bank = mem->bank;
+ cmem->device = mem->device;
+ cmem->row = mem->row;
+ cmem->column = mem->column;
+ cmem->bit_pos = mem->bit_pos;
+ cmem->requestor_id = mem->requestor_id;
+ cmem->responder_id = mem->responder_id;
+ cmem->target_id = mem->target_id;
+ cmem->rank = mem->rank;
+ cmem->mem_array_handle = mem->mem_array_handle;
+ cmem->mem_dev_handle = mem->mem_dev_handle;
+}
+
+const char *cper_mem_err_unpack(struct trace_seq *p,
+ struct cper_mem_err_compact *cmem)
+{
+ const char *ret = p->buffer + p->len;
+
+ if (cper_mem_err_location(cmem, rcd_decode_str))
+ trace_seq_printf(p, "%s", rcd_decode_str);
+ if (cper_dimm_err_location(cmem, rcd_decode_str))
+ trace_seq_printf(p, "%s", rcd_decode_str);
+ trace_seq_putc(p, '\0');
+
+ return ret;
+}
+
static void cper_print_mem(const char *pfx, const struct cper_sec_mem_err *mem)
{
+ struct cper_mem_err_compact cmem;
+
if (mem->validation_bits & CPER_MEM_VALID_ERROR_STATUS)
printk("%s""error_status: 0x%016llx\n", pfx, mem->error_status);
if (mem->validation_bits & CPER_MEM_VALID_PA)
@@ -206,48 +317,19 @@
if (mem->validation_bits & CPER_MEM_VALID_PA_MASK)
printk("%s""physical_address_mask: 0x%016llx\n",
pfx, mem->physical_addr_mask);
- if (mem->validation_bits & CPER_MEM_VALID_NODE)
- pr_debug("node: %d\n", mem->node);
- if (mem->validation_bits & CPER_MEM_VALID_CARD)
- pr_debug("card: %d\n", mem->card);
- if (mem->validation_bits & CPER_MEM_VALID_MODULE)
- pr_debug("module: %d\n", mem->module);
- if (mem->validation_bits & CPER_MEM_VALID_RANK_NUMBER)
- pr_debug("rank: %d\n", mem->rank);
- if (mem->validation_bits & CPER_MEM_VALID_BANK)
- pr_debug("bank: %d\n", mem->bank);
- if (mem->validation_bits & CPER_MEM_VALID_DEVICE)
- pr_debug("device: %d\n", mem->device);
- if (mem->validation_bits & CPER_MEM_VALID_ROW)
- pr_debug("row: %d\n", mem->row);
- if (mem->validation_bits & CPER_MEM_VALID_COLUMN)
- pr_debug("column: %d\n", mem->column);
- if (mem->validation_bits & CPER_MEM_VALID_BIT_POSITION)
- pr_debug("bit_position: %d\n", mem->bit_pos);
- if (mem->validation_bits & CPER_MEM_VALID_REQUESTOR_ID)
- pr_debug("requestor_id: 0x%016llx\n", mem->requestor_id);
- if (mem->validation_bits & CPER_MEM_VALID_RESPONDER_ID)
- pr_debug("responder_id: 0x%016llx\n", mem->responder_id);
- if (mem->validation_bits & CPER_MEM_VALID_TARGET_ID)
- pr_debug("target_id: 0x%016llx\n", mem->target_id);
+ cper_mem_err_pack(mem, &cmem);
+ if (cper_mem_err_location(&cmem, rcd_decode_str))
+ printk("%s%s\n", pfx, rcd_decode_str);
if (mem->validation_bits & CPER_MEM_VALID_ERROR_TYPE) {
u8 etype = mem->error_type;
printk("%s""error_type: %d, %s\n", pfx, etype,
- etype < ARRAY_SIZE(cper_mem_err_type_strs) ?
- cper_mem_err_type_strs[etype] : "unknown");
+ cper_mem_err_type_str(etype));
}
- if (mem->validation_bits & CPER_MEM_VALID_MODULE_HANDLE) {
- const char *bank = NULL, *device = NULL;
- dmi_memdev_name(mem->mem_dev_handle, &bank, &device);
- if (bank != NULL && device != NULL)
- printk("%s""DIMM location: %s %s", pfx, bank, device);
- else
- printk("%s""DIMM DMI handle: 0x%.4x",
- pfx, mem->mem_dev_handle);
- }
+ if (cper_dimm_err_location(&cmem, rcd_decode_str))
+ printk("%s%s\n", pfx, rcd_decode_str);
}
-static const char *cper_pcie_port_type_strs[] = {
+static const char * const pcie_port_type_strs[] = {
"PCIe end point",
"legacy PCI end point",
"unknown",
@@ -266,8 +348,8 @@
{
if (pcie->validation_bits & CPER_PCIE_VALID_PORT_TYPE)
printk("%s""port_type: %d, %s\n", pfx, pcie->port_type,
- pcie->port_type < ARRAY_SIZE(cper_pcie_port_type_strs) ?
- cper_pcie_port_type_strs[pcie->port_type] : "unknown");
+ pcie->port_type < ARRAY_SIZE(pcie_port_type_strs) ?
+ pcie_port_type_strs[pcie->port_type] : "unknown");
if (pcie->validation_bits & CPER_PCIE_VALID_VERSION)
printk("%s""version: %d.%d\n", pfx,
pcie->version.major, pcie->version.minor);
diff --git a/drivers/pci/pcie/aer/Kconfig b/drivers/pci/pcie/aer/Kconfig
index 50e94e0..3894402 100644
--- a/drivers/pci/pcie/aer/Kconfig
+++ b/drivers/pci/pcie/aer/Kconfig
@@ -5,6 +5,7 @@
config PCIEAER
boolean "Root Port Advanced Error Reporting support"
depends on PCIEPORTBUS
+ select RAS
default y
help
This enables PCI Express Root Port Advanced Error Reporting
diff --git a/drivers/pci/pcie/aer/aerdrv_errprint.c b/drivers/pci/pcie/aer/aerdrv_errprint.c
index 36ed31b5..35d06e1 100644
--- a/drivers/pci/pcie/aer/aerdrv_errprint.c
+++ b/drivers/pci/pcie/aer/aerdrv_errprint.c
@@ -22,9 +22,7 @@
#include <linux/cper.h>
#include "aerdrv.h"
-
-#define CREATE_TRACE_POINTS
-#include <trace/events/ras.h>
+#include <ras/ras_event.h>
#define AER_AGENT_RECEIVER 0
#define AER_AGENT_REQUESTER 1
diff --git a/drivers/ras/Kconfig b/drivers/ras/Kconfig
new file mode 100644
index 0000000..f9da613
--- /dev/null
+++ b/drivers/ras/Kconfig
@@ -0,0 +1,2 @@
+config RAS
+ bool
diff --git a/drivers/ras/Makefile b/drivers/ras/Makefile
new file mode 100644
index 0000000..d7f7334
--- /dev/null
+++ b/drivers/ras/Makefile
@@ -0,0 +1 @@
+obj-$(CONFIG_RAS) += ras.o debugfs.o
diff --git a/drivers/ras/debugfs.c b/drivers/ras/debugfs.c
new file mode 100644
index 0000000..0322acf
--- /dev/null
+++ b/drivers/ras/debugfs.c
@@ -0,0 +1,56 @@
+#include <linux/debugfs.h>
+
+static struct dentry *ras_debugfs_dir;
+
+static atomic_t trace_count = ATOMIC_INIT(0);
+
+int ras_userspace_consumers(void)
+{
+ return atomic_read(&trace_count);
+}
+EXPORT_SYMBOL_GPL(ras_userspace_consumers);
+
+static int trace_show(struct seq_file *m, void *v)
+{
+ return atomic_read(&trace_count);
+}
+
+static int trace_open(struct inode *inode, struct file *file)
+{
+ atomic_inc(&trace_count);
+ return single_open(file, trace_show, NULL);
+}
+
+static int trace_release(struct inode *inode, struct file *file)
+{
+ atomic_dec(&trace_count);
+ return single_release(inode, file);
+}
+
+static const struct file_operations trace_fops = {
+ .open = trace_open,
+ .read = seq_read,
+ .llseek = seq_lseek,
+ .release = trace_release,
+};
+
+int __init ras_add_daemon_trace(void)
+{
+ struct dentry *fentry;
+
+ if (!ras_debugfs_dir)
+ return -ENOENT;
+
+ fentry = debugfs_create_file("daemon_active", S_IRUSR, ras_debugfs_dir,
+ NULL, &trace_fops);
+ if (!fentry)
+ return -ENODEV;
+
+ return 0;
+
+}
+
+void __init ras_debugfs_init(void)
+{
+ ras_debugfs_dir = debugfs_create_dir("ras", NULL);
+}
diff --git a/drivers/ras/ras.c b/drivers/ras/ras.c
new file mode 100644
index 0000000..b67dd36
--- /dev/null
+++ b/drivers/ras/ras.c
@@ -0,0 +1,29 @@
+/*
+ * Copyright (C) 2014 Intel Corporation
+ *
+ * Authors:
+ * Chen, Gong <gong.chen@linux.intel.com>
+ */
+
+#include <linux/init.h>
+#include <linux/ras.h>
+
+#define CREATE_TRACE_POINTS
+#define TRACE_INCLUDE_PATH ../../include/ras
+#include <ras/ras_event.h>
+
+static int __init ras_init(void)
+{
+ int rc = 0;
+
+ ras_debugfs_init();
+ rc = ras_add_daemon_trace();
+
+ return rc;
+}
+subsys_initcall(ras_init);
+
+#if defined(CONFIG_ACPI_EXTLOG) || defined(CONFIG_ACPI_EXTLOG_MODULE)
+EXPORT_TRACEPOINT_SYMBOL_GPL(extlog_mem_event);
+#endif
+EXPORT_TRACEPOINT_SYMBOL_GPL(mc_event);
diff --git a/include/linux/aer.h b/include/linux/aer.h
index 4dbaa70..c826d1c 100644
--- a/include/linux/aer.h
+++ b/include/linux/aer.h
@@ -11,6 +11,8 @@
#define AER_FATAL 1
#define AER_CORRECTABLE 2
+struct pci_dev;
+
struct aer_header_log_regs {
unsigned int dw0;
unsigned int dw1;
diff --git a/include/linux/cper.h b/include/linux/cper.h
index 2fc0ec3..76abba4 100644
--- a/include/linux/cper.h
+++ b/include/linux/cper.h
@@ -22,6 +22,7 @@
#define LINUX_CPER_H
#include <linux/uuid.h>
+#include <linux/trace_seq.h>
/* CPER record signature and the size */
#define CPER_SIG_RECORD "CPER"
@@ -36,6 +37,13 @@
#define CPER_RECORD_REV 0x0100
/*
+ * CPER record length contains the CPER fields which are relevant for further
+ * handling of a memory error in userspace (we don't carry all the fields
+ * defined in the UEFI spec because some of them don't make any sense.)
+ * Currently, a length of 256 should be more than enough.
+ */
+#define CPER_REC_LEN 256
+/*
* Severity difinition for error_severity in struct cper_record_header
* and section_severity in struct cper_section_descriptor
*/
@@ -356,6 +364,24 @@
__u16 mem_dev_handle; /* module handle in UEFI 2.4 */
};
+struct cper_mem_err_compact {
+ __u64 validation_bits;
+ __u16 node;
+ __u16 card;
+ __u16 module;
+ __u16 bank;
+ __u16 device;
+ __u16 row;
+ __u16 column;
+ __u16 bit_pos;
+ __u64 requestor_id;
+ __u64 responder_id;
+ __u64 target_id;
+ __u16 rank;
+ __u16 mem_array_handle;
+ __u16 mem_dev_handle;
+};
+
struct cper_sec_pcie {
__u64 validation_bits;
__u32 port_type;
@@ -395,7 +421,13 @@
#pragma pack()
u64 cper_next_record_id(void);
+const char *cper_severity_str(unsigned int);
+const char *cper_mem_err_type_str(unsigned int);
void cper_print_bits(const char *prefix, unsigned int bits,
const char * const strs[], unsigned int strs_size);
+void cper_mem_err_pack(const struct cper_sec_mem_err *,
+ struct cper_mem_err_compact *);
+const char *cper_mem_err_unpack(struct trace_seq *,
+ struct cper_mem_err_compact *);
#endif
diff --git a/include/linux/ras.h b/include/linux/ras.h
new file mode 100644
index 0000000..2aceeaf
--- /dev/null
+++ b/include/linux/ras.h
@@ -0,0 +1,14 @@
+#ifndef __RAS_H__
+#define __RAS_H__
+
+#ifdef CONFIG_DEBUG_FS
+int ras_userspace_consumers(void);
+void ras_debugfs_init(void);
+int ras_add_daemon_trace(void);
+#else
+static inline int ras_userspace_consumers(void) { return 0; }
+static inline void ras_debugfs_init(void) { return; }
+static inline int ras_add_daemon_trace(void) { return 0; }
+#endif
+
+#endif
diff --git a/include/ras/ras_event.h b/include/ras/ras_event.h
index 21cdb0b..47da53c 100644
--- a/include/ras/ras_event.h
+++ b/include/ras/ras_event.h
@@ -8,6 +8,71 @@
#include <linux/tracepoint.h>
#include <linux/edac.h>
#include <linux/ktime.h>
+#include <linux/aer.h>
+#include <linux/cper.h>
+
+/*
+ * MCE Extended Error Log trace event
+ *
+ * These events are generated when hardware detects a corrected or
+ * uncorrected event.
+ */
+
+/* memory trace event */
+
+#if defined(CONFIG_ACPI_EXTLOG) || defined(CONFIG_ACPI_EXTLOG_MODULE)
+TRACE_EVENT(extlog_mem_event,
+ TP_PROTO(struct cper_sec_mem_err *mem,
+ u32 err_seq,
+ const uuid_le *fru_id,
+ const char *fru_text,
+ u8 sev),
+
+ TP_ARGS(mem, err_seq, fru_id, fru_text, sev),
+
+ TP_STRUCT__entry(
+ __field(u32, err_seq)
+ __field(u8, etype)
+ __field(u8, sev)
+ __field(u64, pa)
+ __field(u8, pa_mask_lsb)
+ __field_struct(uuid_le, fru_id)
+ __string(fru_text, fru_text)
+ __field_struct(struct cper_mem_err_compact, data)
+ ),
+
+ TP_fast_assign(
+ __entry->err_seq = err_seq;
+ if (mem->validation_bits & CPER_MEM_VALID_ERROR_TYPE)
+ __entry->etype = mem->error_type;
+ else
+ __entry->etype = ~0;
+ __entry->sev = sev;
+ if (mem->validation_bits & CPER_MEM_VALID_PA)
+ __entry->pa = mem->physical_addr;
+ else
+ __entry->pa = ~0ull;
+
+ if (mem->validation_bits & CPER_MEM_VALID_PA_MASK)
+ __entry->pa_mask_lsb = (u8)__ffs64(mem->physical_addr_mask);
+ else
+ __entry->pa_mask_lsb = ~0;
+ __entry->fru_id = *fru_id;
+ __assign_str(fru_text, fru_text);
+ cper_mem_err_pack(mem, &__entry->data);
+ ),
+
+ TP_printk("{%d} %s error: %s physical addr: %016llx (mask lsb: %x) %sFRU: %pUl %.20s",
+ __entry->err_seq,
+ cper_severity_str(__entry->sev),
+ cper_mem_err_type_str(__entry->etype),
+ __entry->pa,
+ __entry->pa_mask_lsb,
+ cper_mem_err_unpack(p, &__entry->data),
+ &__entry->fru_id,
+ __get_str(fru_text))
+);
+#endif
/*
* Hardware Events Report
@@ -94,6 +159,69 @@
__get_str(driver_detail))
);
+/*
+ * PCIe AER Trace event
+ *
+ * These events are generated when hardware detects a corrected or
+ * uncorrected event on a PCIe device. The event report has
+ * the following structure:
+ *
+ * char * dev_name - The name of the slot where the device resides
+ * ([domain:]bus:device.function).
+ * u32 status - Either the correctable or uncorrectable register
+ * indicating what error or errors have been seen
+ * u8 severity - error severity 0:NONFATAL 1:FATAL 2:CORRECTED
+ */
+
+#define aer_correctable_errors \
+ {BIT(0), "Receiver Error"}, \
+ {BIT(6), "Bad TLP"}, \
+ {BIT(7), "Bad DLLP"}, \
+ {BIT(8), "RELAY_NUM Rollover"}, \
+ {BIT(12), "Replay Timer Timeout"}, \
+ {BIT(13), "Advisory Non-Fatal"}
+
+#define aer_uncorrectable_errors \
+ {BIT(4), "Data Link Protocol"}, \
+ {BIT(12), "Poisoned TLP"}, \
+ {BIT(13), "Flow Control Protocol"}, \
+ {BIT(14), "Completion Timeout"}, \
+ {BIT(15), "Completer Abort"}, \
+ {BIT(16), "Unexpected Completion"}, \
+ {BIT(17), "Receiver Overflow"}, \
+ {BIT(18), "Malformed TLP"}, \
+ {BIT(19), "ECRC"}, \
+ {BIT(20), "Unsupported Request"}
+
+TRACE_EVENT(aer_event,
+ TP_PROTO(const char *dev_name,
+ const u32 status,
+ const u8 severity),
+
+ TP_ARGS(dev_name, status, severity),
+
+ TP_STRUCT__entry(
+ __string( dev_name, dev_name )
+ __field( u32, status )
+ __field( u8, severity )
+ ),
+
+ TP_fast_assign(
+ __assign_str(dev_name, dev_name);
+ __entry->status = status;
+ __entry->severity = severity;
+ ),
+
+ TP_printk("%s PCIe Bus Error: severity=%s, %s\n",
+ __get_str(dev_name),
+ __entry->severity == AER_CORRECTABLE ? "Corrected" :
+ __entry->severity == AER_FATAL ?
+ "Fatal" : "Uncorrected, non-fatal",
+ __entry->severity == AER_CORRECTABLE ?
+ __print_flags(__entry->status, "|", aer_correctable_errors) :
+ __print_flags(__entry->status, "|", aer_uncorrectable_errors))
+);
+
#endif /* _TRACE_HW_EVENT_MC_H */
/* This part must be outside protection */
diff --git a/include/trace/events/ras.h b/include/trace/events/ras.h
deleted file mode 100644
index 1c875ad..0000000
--- a/include/trace/events/ras.h
+++ /dev/null
@@ -1,77 +0,0 @@
-#undef TRACE_SYSTEM
-#define TRACE_SYSTEM ras
-
-#if !defined(_TRACE_AER_H) || defined(TRACE_HEADER_MULTI_READ)
-#define _TRACE_AER_H
-
-#include <linux/tracepoint.h>
-#include <linux/aer.h>
-
-
-/*
- * PCIe AER Trace event
- *
- * These events are generated when hardware detects a corrected or
- * uncorrected event on a PCIe device. The event report has
- * the following structure:
- *
- * char * dev_name - The name of the slot where the device resides
- * ([domain:]bus:device.function).
- * u32 status - Either the correctable or uncorrectable register
- * indicating what error or errors have been seen
- * u8 severity - error severity 0:NONFATAL 1:FATAL 2:CORRECTED
- */
-
-#define aer_correctable_errors \
- {BIT(0), "Receiver Error"}, \
- {BIT(6), "Bad TLP"}, \
- {BIT(7), "Bad DLLP"}, \
- {BIT(8), "RELAY_NUM Rollover"}, \
- {BIT(12), "Replay Timer Timeout"}, \
- {BIT(13), "Advisory Non-Fatal"}
-
-#define aer_uncorrectable_errors \
- {BIT(4), "Data Link Protocol"}, \
- {BIT(12), "Poisoned TLP"}, \
- {BIT(13), "Flow Control Protocol"}, \
- {BIT(14), "Completion Timeout"}, \
- {BIT(15), "Completer Abort"}, \
- {BIT(16), "Unexpected Completion"}, \
- {BIT(17), "Receiver Overflow"}, \
- {BIT(18), "Malformed TLP"}, \
- {BIT(19), "ECRC"}, \
- {BIT(20), "Unsupported Request"}
-
-TRACE_EVENT(aer_event,
- TP_PROTO(const char *dev_name,
- const u32 status,
- const u8 severity),
-
- TP_ARGS(dev_name, status, severity),
-
- TP_STRUCT__entry(
- __string( dev_name, dev_name )
- __field( u32, status )
- __field( u8, severity )
- ),
-
- TP_fast_assign(
- __assign_str(dev_name, dev_name);
- __entry->status = status;
- __entry->severity = severity;
- ),
-
- TP_printk("%s PCIe Bus Error: severity=%s, %s\n",
- __get_str(dev_name),
- __entry->severity == AER_CORRECTABLE ? "Corrected" :
- __entry->severity == AER_FATAL ?
- "Fatal" : "Uncorrected, non-fatal",
- __entry->severity == AER_CORRECTABLE ?
- __print_flags(__entry->status, "|", aer_correctable_errors) :
- __print_flags(__entry->status, "|", aer_uncorrectable_errors))
-);
-
-#endif /* _TRACE_AER_H */
-
-/* This part must be outside protection */
-#include <trace/define_trace.h>