On Sat, Feb 22, 2014 at 07:05:15AM +1100, Benjamin Herrenschmidt wrote: >On Fri, 2014-02-21 at 19:53 +0800, Gavin Shan wrote: >> According to Ben's suggestion, the patch makes the PHB diag-data >> dump looks a bit short by printing multiple values in one line >> and outputing "-" for zero fields. >> >> After the patch applied, the PHB diag-data dump looks like: > >Actually, I wouldn't do that "-" thing, I would leave zeros as >zeros but I would remove lines that have all zeros. >
Ok. I'll change it in next revision :-) >Additionally, we might want to consider what if we can get rid >of more fields for INF, or maybe even not dump them by default >and just count them (should we have counters in sysfs ?) > Yes, I'll remove dumping for INF and have a sysfs entry for the INF counter, which would be separate patch in next revision. >One thing I'm tempted to do is turn the full logs into actual >error logs (sent to FSP) and only display a "analyzed" version >in the kernel, something that decodes the PEST for example >and indicates if it's an DMA or MMIO error, the address, etc... > Ok. I'll try to do it in next revision :-) Thanks, Gavin >> PHB3 PHB#3 Diag-data (Version: 1) >> >> brdgCtl: 00000002 >> UtlSts: - - - >> RootSts: 0000000f 00400000 b0830008 00100147 00002000 >> RootErrSts: - - - >> RootErrLog: - - - - >> RootErrLog1: - - - >> nFir: - 0030006e00000000 - >> PhbSts: 0000001c00000000 - >> Lem: 0000000000100000 42498e327f502eae - >> PhbErr: - - - - >> OutErr: - - - - >> InAErr: 8000000000000000 8000000000000000 0402030000000000 - >> InBErr: - - - - >> PE[ 8] A/B: 8480002b00000000 8000000000000000 >> >> Signed-off-by: Gavin Shan <sha...@linux.vnet.ibm.com> >> --- >> arch/powerpc/platforms/powernv/pci.c | 238 >> ++++++++++++++++++++-------------- >> 1 file changed, 143 insertions(+), 95 deletions(-) >> >> diff --git a/arch/powerpc/platforms/powernv/pci.c >> b/arch/powerpc/platforms/powernv/pci.c >> index 67b2254..a5f236a 100644 >> --- a/arch/powerpc/platforms/powernv/pci.c >> +++ b/arch/powerpc/platforms/powernv/pci.c >> @@ -124,67 +124,103 @@ static void pnv_teardown_msi_irqs(struct pci_dev >> *pdev) >> } >> #endif /* CONFIG_PCI_MSI */ >> >> +static char *pnv_pci_diag_field(char *buf, int fmt, u64 val64) >> +{ >> + u32 val32 = (u32)val64; >> + >> + memset(buf, 0, 24); >> + switch (fmt) { >> + case 8: >> + if (val32) >> + sprintf(buf, "%08x", val32); >> + else >> + sprintf(buf, "%s", "-"); >> + break; >> + case 16: >> + if (val64) >> + sprintf(buf, "%016llx", val64); >> + else >> + sprintf(buf, "%s", "-"); >> + break; >> + default: >> + sprintf(buf, "%s", "-"); >> + } >> + >> + return buf; >> +} >> + >> static void pnv_pci_dump_p7ioc_diag_data(struct pci_controller *hose, >> struct OpalIoPhbErrorCommon *common) >> { >> struct OpalIoP7IOCPhbErrorData *data; >> + char buf[120]; >> int i; >> >> data = (struct OpalIoP7IOCPhbErrorData *)common; >> pr_info("P7IOC PHB#%d Diag-data (Version: %d)\n\n", >> hose->global_number, common->version); >> >> - pr_info(" brdgCtl: %08x\n", data->brdgCtl); >> - >> - pr_info(" portStatusReg: %08x\n", data->portStatusReg); >> - pr_info(" rootCmplxStatus: %08x\n", data->rootCmplxStatus); >> - pr_info(" busAgentStatus: %08x\n", data->busAgentStatus); >> - >> - pr_info(" deviceStatus: %08x\n", data->deviceStatus); >> - pr_info(" slotStatus: %08x\n", data->slotStatus); >> - pr_info(" linkStatus: %08x\n", data->linkStatus); >> - pr_info(" devCmdStatus: %08x\n", data->devCmdStatus); >> - pr_info(" devSecStatus: %08x\n", data->devSecStatus); >> - >> - pr_info(" rootErrorStatus: %08x\n", data->rootErrorStatus); >> - pr_info(" uncorrErrorStatus: %08x\n", data->uncorrErrorStatus); >> - pr_info(" corrErrorStatus: %08x\n", data->corrErrorStatus); >> - pr_info(" tlpHdr1: %08x\n", data->tlpHdr1); >> - pr_info(" tlpHdr2: %08x\n", data->tlpHdr2); >> - pr_info(" tlpHdr3: %08x\n", data->tlpHdr3); >> - pr_info(" tlpHdr4: %08x\n", data->tlpHdr4); >> - pr_info(" sourceId: %08x\n", data->sourceId); >> - pr_info(" errorClass: %016llx\n", data->errorClass); >> - pr_info(" correlator: %016llx\n", data->correlator); >> - pr_info(" p7iocPlssr: %016llx\n", data->p7iocPlssr); >> - pr_info(" p7iocCsr: %016llx\n", data->p7iocCsr); >> - pr_info(" lemFir: %016llx\n", data->lemFir); >> - pr_info(" lemErrorMask: %016llx\n", data->lemErrorMask); >> - pr_info(" lemWOF: %016llx\n", data->lemWOF); >> - pr_info(" phbErrorStatus: %016llx\n", data->phbErrorStatus); >> - pr_info(" phbFirstErrorStatus: %016llx\n", data->phbFirstErrorStatus); >> - pr_info(" phbErrorLog0: %016llx\n", data->phbErrorLog0); >> - pr_info(" phbErrorLog1: %016llx\n", data->phbErrorLog1); >> - pr_info(" mmioErrorStatus: %016llx\n", data->mmioErrorStatus); >> - pr_info(" mmioFirstErrorStatus: %016llx\n", >> data->mmioFirstErrorStatus); >> - pr_info(" mmioErrorLog0: %016llx\n", data->mmioErrorLog0); >> - pr_info(" mmioErrorLog1: %016llx\n", data->mmioErrorLog1); >> - pr_info(" dma0ErrorStatus: %016llx\n", data->dma0ErrorStatus); >> - pr_info(" dma0FirstErrorStatus: %016llx\n", >> data->dma0FirstErrorStatus); >> - pr_info(" dma0ErrorLog0: %016llx\n", data->dma0ErrorLog0); >> - pr_info(" dma0ErrorLog1: %016llx\n", data->dma0ErrorLog1); >> - pr_info(" dma1ErrorStatus: %016llx\n", data->dma1ErrorStatus); >> - pr_info(" dma1FirstErrorStatus: %016llx\n", >> data->dma1FirstErrorStatus); >> - pr_info(" dma1ErrorLog0: %016llx\n", data->dma1ErrorLog0); >> - pr_info(" dma1ErrorLog1: %016llx\n", data->dma1ErrorLog1); >> + pr_info(" brdgCtl: %s\n", >> + pnv_pci_diag_field(&buf[0], 8, data->brdgCtl)); >> + pr_info(" UtlSts: %s %s %s\n", >> + pnv_pci_diag_field(&buf[0], 8, data->portStatusReg), >> + pnv_pci_diag_field(&buf[1 * 24], 8, data->rootCmplxStatus), >> + pnv_pci_diag_field(&buf[2 * 24], 8, data->busAgentStatus)); >> + pr_info(" RootSts: %s %s %s %s %s\n", >> + pnv_pci_diag_field(&buf[0], 8, data->deviceStatus), >> + pnv_pci_diag_field(&buf[1 * 24], 8, data->slotStatus), >> + pnv_pci_diag_field(&buf[2 * 24], 8, data->linkStatus), >> + pnv_pci_diag_field(&buf[3 * 24], 8, data->devCmdStatus), >> + pnv_pci_diag_field(&buf[4 * 24], 8, data->devSecStatus)); >> + pr_info(" RootErrSts: %s %s %s\n", >> + pnv_pci_diag_field(&buf[0], 8, data->rootErrorStatus), >> + pnv_pci_diag_field(&buf[1 * 24], 8, data->uncorrErrorStatus), >> + pnv_pci_diag_field(&buf[2 * 24], 8, data->corrErrorStatus)); >> + pr_info(" RootErrLog: %s %s %s %s\n", >> + pnv_pci_diag_field(&buf[0], 8, data->tlpHdr1), >> + pnv_pci_diag_field(&buf[1 * 24], 8, data->tlpHdr2), >> + pnv_pci_diag_field(&buf[2 * 24], 8, data->tlpHdr3), >> + pnv_pci_diag_field(&buf[3 * 24], 8, data->tlpHdr4)); >> + pr_info(" RootErrLog1: %s %s %s\n", >> + pnv_pci_diag_field(&buf[0], 8, data->sourceId), >> + pnv_pci_diag_field(&buf[1 * 24], 16, data->errorClass), >> + pnv_pci_diag_field(&buf[2 * 24], 16, data->correlator)); >> + pr_info(" PhbSts: %s %s\n", >> + pnv_pci_diag_field(&buf[0], 16, data->p7iocPlssr), >> + pnv_pci_diag_field(&buf[1 * 24], 16, data->p7iocCsr)); >> + pr_info(" Lem: %s %s %s\n", >> + pnv_pci_diag_field(&buf[0], 16, data->lemFir), >> + pnv_pci_diag_field(&buf[1 * 24], 16, data->lemErrorMask), >> + pnv_pci_diag_field(&buf[2 * 24], 16, data->lemWOF)); >> + pr_info(" PhbErr: %s %s %s %s\n", >> + pnv_pci_diag_field(&buf[0], 16, data->phbErrorStatus), >> + pnv_pci_diag_field(&buf[1 * 24], 16, data->phbFirstErrorStatus), >> + pnv_pci_diag_field(&buf[2 * 24], 16, data->phbErrorLog0), >> + pnv_pci_diag_field(&buf[3 * 24], 16, data->phbErrorLog1)); >> + pr_info(" OutErr: %s %s %s %s\n", >> + pnv_pci_diag_field(&buf[0], 16, data->mmioErrorStatus), >> + pnv_pci_diag_field(&buf[1 * 24], 16, >> data->mmioFirstErrorStatus), >> + pnv_pci_diag_field(&buf[2 * 24], 16, data->mmioErrorLog0), >> + pnv_pci_diag_field(&buf[3 * 24], 16, data->mmioErrorLog1)); >> + pr_info(" InAErr: %s %s %s %s\n", >> + pnv_pci_diag_field(&buf[0], 16, data->dma0ErrorStatus), >> + pnv_pci_diag_field(&buf[1 * 24], 16, >> data->dma0FirstErrorStatus), >> + pnv_pci_diag_field(&buf[2 * 24], 16, data->dma0ErrorLog0), >> + pnv_pci_diag_field(&buf[3 * 24], 16, data->dma0ErrorLog1)); >> + pr_info(" InBErr: %s %s %s %s\n", >> + pnv_pci_diag_field(&buf[0], 16, data->dma1ErrorStatus), >> + pnv_pci_diag_field(&buf[1 * 24], 16, >> data->dma1FirstErrorStatus), >> + pnv_pci_diag_field(&buf[2 * 24], 16, data->dma1ErrorLog0), >> + pnv_pci_diag_field(&buf[3 * 24], 16, data->dma1ErrorLog1)); >> >> for (i = 0; i < OPAL_P7IOC_NUM_PEST_REGS; i++) { >> if ((data->pestA[i] >> 63) == 0 && >> (data->pestB[i] >> 63) == 0) >> continue; >> >> - pr_info(" PE[%3d] PESTA: %016llx\n", i, data->pestA[i]); >> - pr_info(" PESTB: %016llx\n", data->pestB[i]); >> + pr_info(" PE[%3d] A/B: %s %s\n", >> + i, pnv_pci_diag_field(&buf[0], 16, data->pestA[i]), >> + pnv_pci_diag_field(&buf[1 * 24], 16, data->pestB[i])); >> } >> } >> >> @@ -192,67 +228,79 @@ static void pnv_pci_dump_phb3_diag_data(struct >> pci_controller *hose, >> struct OpalIoPhbErrorCommon *common) >> { >> struct OpalIoPhb3ErrorData *data; >> - int i; >> + char buf[120]; >> + int i = 0; >> >> + memset(buf, 0, 120); >> data = (struct OpalIoPhb3ErrorData*)common; >> pr_info("PHB3 PHB#%d Diag-data (Version: %d)\n\n", >> hose->global_number, common->version); >> >> - pr_info(" brdgCtl: %08x\n", data->brdgCtl); >> - >> - pr_info(" portStatusReg: %08x\n", data->portStatusReg); >> - pr_info(" rootCmplxStatus: %08x\n", data->rootCmplxStatus); >> - pr_info(" busAgentStatus: %08x\n", data->busAgentStatus); >> - >> - pr_info(" deviceStatus: %08x\n", data->deviceStatus); >> - pr_info(" slotStatus: %08x\n", data->slotStatus); >> - pr_info(" linkStatus: %08x\n", data->linkStatus); >> - pr_info(" devCmdStatus: %08x\n", data->devCmdStatus); >> - pr_info(" devSecStatus: %08x\n", data->devSecStatus); >> - >> - pr_info(" rootErrorStatus: %08x\n", data->rootErrorStatus); >> - pr_info(" uncorrErrorStatus: %08x\n", data->uncorrErrorStatus); >> - pr_info(" corrErrorStatus: %08x\n", data->corrErrorStatus); >> - pr_info(" tlpHdr1: %08x\n", data->tlpHdr1); >> - pr_info(" tlpHdr2: %08x\n", data->tlpHdr2); >> - pr_info(" tlpHdr3: %08x\n", data->tlpHdr3); >> - pr_info(" tlpHdr4: %08x\n", data->tlpHdr4); >> - pr_info(" sourceId: %08x\n", data->sourceId); >> - pr_info(" errorClass: %016llx\n", data->errorClass); >> - pr_info(" correlator: %016llx\n", data->correlator); >> - >> - pr_info(" nFir: %016llx\n", data->nFir); >> - pr_info(" nFirMask: %016llx\n", data->nFirMask); >> - pr_info(" nFirWOF: %016llx\n", data->nFirWOF); >> - pr_info(" PhbPlssr: %016llx\n", data->phbPlssr); >> - pr_info(" PhbCsr: %016llx\n", data->phbCsr); >> - pr_info(" lemFir: %016llx\n", data->lemFir); >> - pr_info(" lemErrorMask: %016llx\n", data->lemErrorMask); >> - pr_info(" lemWOF: %016llx\n", data->lemWOF); >> - pr_info(" phbErrorStatus: %016llx\n", data->phbErrorStatus); >> - pr_info(" phbFirstErrorStatus: %016llx\n", data->phbFirstErrorStatus); >> - pr_info(" phbErrorLog0: %016llx\n", data->phbErrorLog0); >> - pr_info(" phbErrorLog1: %016llx\n", data->phbErrorLog1); >> - pr_info(" mmioErrorStatus: %016llx\n", data->mmioErrorStatus); >> - pr_info(" mmioFirstErrorStatus: %016llx\n", >> data->mmioFirstErrorStatus); >> - pr_info(" mmioErrorLog0: %016llx\n", data->mmioErrorLog0); >> - pr_info(" mmioErrorLog1: %016llx\n", data->mmioErrorLog1); >> - pr_info(" dma0ErrorStatus: %016llx\n", data->dma0ErrorStatus); >> - pr_info(" dma0FirstErrorStatus: %016llx\n", >> data->dma0FirstErrorStatus); >> - pr_info(" dma0ErrorLog0: %016llx\n", data->dma0ErrorLog0); >> - pr_info(" dma0ErrorLog1: %016llx\n", data->dma0ErrorLog1); >> - pr_info(" dma1ErrorStatus: %016llx\n", data->dma1ErrorStatus); >> - pr_info(" dma1FirstErrorStatus: %016llx\n", >> data->dma1FirstErrorStatus); >> - pr_info(" dma1ErrorLog0: %016llx\n", data->dma1ErrorLog0); >> - pr_info(" dma1ErrorLog1: %016llx\n", data->dma1ErrorLog1); >> + pr_info(" brdgCtl: %s\n", >> + pnv_pci_diag_field(&buf[0], 8, data->brdgCtl)); >> + pr_info(" UtlSts: %s %s %s\n", >> + pnv_pci_diag_field(&buf[0], 8, data->portStatusReg), >> + pnv_pci_diag_field(&buf[1 * 24], 8, data->rootCmplxStatus), >> + pnv_pci_diag_field(&buf[2 * 24], 8, data->busAgentStatus)); >> + pr_info(" RootSts: %s %s %s %s %s\n", >> + pnv_pci_diag_field(&buf[0], 8, data->deviceStatus), >> + pnv_pci_diag_field(&buf[1 * 24], 8, data->slotStatus), >> + pnv_pci_diag_field(&buf[2 * 24], 8, data->linkStatus), >> + pnv_pci_diag_field(&buf[3 * 24], 8, data->devCmdStatus), >> + pnv_pci_diag_field(&buf[4 * 24], 8, data->devSecStatus)); >> + pr_info(" RootErrSts: %s %s %s\n", >> + pnv_pci_diag_field(&buf[0], 8, data->rootErrorStatus), >> + pnv_pci_diag_field(&buf[1 * 24], 8, data->uncorrErrorStatus), >> + pnv_pci_diag_field(&buf[2 * 24], 8, data->corrErrorStatus)); >> + pr_info(" RootErrLog: %s %s %s %s\n", >> + pnv_pci_diag_field(&buf[0], 8, data->tlpHdr1), >> + pnv_pci_diag_field(&buf[1 * 24], 8, data->tlpHdr2), >> + pnv_pci_diag_field(&buf[2 * 24], 8, data->tlpHdr3), >> + pnv_pci_diag_field(&buf[3 * 24], 8, data->tlpHdr4)); >> + pr_info(" RootErrLog1: %s %s %s\n", >> + pnv_pci_diag_field(&buf[0], 8, data->sourceId), >> + pnv_pci_diag_field(&buf[1 * 24], 16, data->errorClass), >> + pnv_pci_diag_field(&buf[2 * 24], 16, data->correlator)); >> + pr_info(" nFir: %s %s %s\n", >> + pnv_pci_diag_field(&buf[0], 16, data->nFir), >> + pnv_pci_diag_field(&buf[1 * 24], 16, data->nFirMask), >> + pnv_pci_diag_field(&buf[2 * 24], 16, data->nFirWOF)); >> + pr_info(" PhbSts: %s %s\n", >> + pnv_pci_diag_field(&buf[0], 16, data->phbPlssr), >> + pnv_pci_diag_field(&buf[1 * 24], 16, data->phbCsr)); >> + pr_info(" Lem: %s %s %s\n", >> + pnv_pci_diag_field(&buf[0], 16, data->lemFir), >> + pnv_pci_diag_field(&buf[1 * 24], 16, data->lemErrorMask), >> + pnv_pci_diag_field(&buf[2 * 24], 16, data->lemWOF)); >> + pr_info(" PhbErr: %s %s %s %s\n", >> + pnv_pci_diag_field(&buf[0], 16, data->phbErrorStatus), >> + pnv_pci_diag_field(&buf[1 * 24], 16, data->phbFirstErrorStatus), >> + pnv_pci_diag_field(&buf[2 * 24], 16, data->phbErrorLog0), >> + pnv_pci_diag_field(&buf[3 * 24], 16, data->phbErrorLog1)); >> + pr_info(" OutErr: %s %s %s %s\n", >> + pnv_pci_diag_field(&buf[0], 16, data->mmioErrorStatus), >> + pnv_pci_diag_field(&buf[1 * 24], 16, >> data->mmioFirstErrorStatus), >> + pnv_pci_diag_field(&buf[2 * 24], 16, data->mmioErrorLog0), >> + pnv_pci_diag_field(&buf[3 * 24], 16, data->mmioErrorLog1)); >> + pr_info(" InAErr: %s %s %s %s\n", >> + pnv_pci_diag_field(&buf[0], 16, data->dma0ErrorStatus), >> + pnv_pci_diag_field(&buf[1 * 24], 16, >> data->dma0FirstErrorStatus), >> + pnv_pci_diag_field(&buf[2 * 24], 16, data->dma0ErrorLog0), >> + pnv_pci_diag_field(&buf[3 * 24], 16, data->dma0ErrorLog1)); >> + pr_info(" InBErr: %s %s %s %s\n", >> + pnv_pci_diag_field(&buf[0], 16, data->dma1ErrorStatus), >> + pnv_pci_diag_field(&buf[1 * 24], 16, >> data->dma1FirstErrorStatus), >> + pnv_pci_diag_field(&buf[2 * 24], 16, data->dma1ErrorLog0), >> + pnv_pci_diag_field(&buf[3 * 24], 16, data->dma1ErrorLog1)); >> >> for (i = 0; i < OPAL_PHB3_NUM_PEST_REGS; i++) { >> if ((data->pestA[i] >> 63) == 0 && >> (data->pestB[i] >> 63) == 0) >> continue; >> >> - pr_info(" PE[%3d] PESTA: %016llx\n", i, data->pestA[i]); >> - pr_info(" PESTB: %016llx\n", data->pestB[i]); >> + pr_info(" PE[%3d] A/B: %s %s\n", >> + i, pnv_pci_diag_field(&buf[0], 16, data->pestA[i]), >> + pnv_pci_diag_field(&buf[1 * 24], 16, data->pestB[i])); >> } >> } >> > > _______________________________________________ Linuxppc-dev mailing list Linuxppc-dev@lists.ozlabs.org https://lists.ozlabs.org/listinfo/linuxppc-dev