powerpc/eeh: Dump PHB diag-data early
On PowerNV platform, PHB diag-data is dumped after stopping device
drivers. In case of recursive EEH errors, the kernel is usually
crashed before dumping PHB diag-data for the second EEH error. It's
hard to locate the root cause of the second EEH error without PHB
diag-data.
The patch adds one more EEH option "eeh=early_log", which helps
dumping PHB diag-data immediately once frozen PE is detected, in
order to get the PHB diag-data for the second EEH error.
Signed-off-by: Gavin Shan <gwshan@linux.vnet.ibm.com>
Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
diff --git a/arch/powerpc/platforms/powernv/eeh-ioda.c b/arch/powerpc/platforms/powernv/eeh-ioda.c
index fb38fe4..2809c98 100644
--- a/arch/powerpc/platforms/powernv/eeh-ioda.c
+++ b/arch/powerpc/platforms/powernv/eeh-ioda.c
@@ -353,6 +353,9 @@
} else if (!(pe->state & EEH_PE_ISOLATED)) {
eeh_pe_state_mark(pe, EEH_PE_ISOLATED);
ioda_eeh_phb_diag(pe);
+
+ if (eeh_has_flag(EEH_EARLY_DUMP_LOG))
+ pnv_pci_dump_phb_diag_data(pe->phb, pe->data);
}
return result;
@@ -451,6 +454,9 @@
eeh_pe_state_mark(pe, EEH_PE_ISOLATED);
ioda_eeh_phb_diag(pe);
+
+ if (eeh_has_flag(EEH_EARLY_DUMP_LOG))
+ pnv_pci_dump_phb_diag_data(pe->phb, pe->data);
}
return result;
@@ -730,7 +736,8 @@
static int ioda_eeh_get_log(struct eeh_pe *pe, int severity,
char *drv_log, unsigned long len)
{
- pnv_pci_dump_phb_diag_data(pe->phb, pe->data);
+ if (!eeh_has_flag(EEH_EARLY_DUMP_LOG))
+ pnv_pci_dump_phb_diag_data(pe->phb, pe->data);
return 0;
}
@@ -1086,6 +1093,10 @@
!((*pe)->state & EEH_PE_ISOLATED)) {
eeh_pe_state_mark(*pe, EEH_PE_ISOLATED);
ioda_eeh_phb_diag(*pe);
+
+ if (eeh_has_flag(EEH_EARLY_DUMP_LOG))
+ pnv_pci_dump_phb_diag_data((*pe)->phb,
+ (*pe)->data);
}
/*