On 04.09.14 13:13, Aravinda Prasad wrote: > Whenever there is a physical memory error due to bit > flips, which cannot be corrected by hardware, the error > is passed on to the kernel. If the memory address in > error belongs to guest address space then guest kernel > is responsible to take action. Hence the error is passed > on to guest via KVM by invoking 0x200 NMI vector. > > However, guest OS, as per PAPR, expects an error log > upon such error. This patch registers a new hcall > which is issued from 0x200 interrupt vector and builds > the error log, copies the error log to rtas space and > passes the address of the error log to guest > > Enhancement to KVM to perform above functionality is > already in upstream kernel. > > Signed-off-by: Aravinda Prasad <aravi...@linux.vnet.ibm.com> > --- > hw/ppc/spapr_hcall.c | 154 > ++++++++++++++++++++++++++++++++++++++++++++++++ > include/hw/ppc/spapr.h | 4 + > 2 files changed, 157 insertions(+), 1 deletion(-) > > diff --git a/hw/ppc/spapr_hcall.c b/hw/ppc/spapr_hcall.c > index 01650ba..c3aa448 100644 > --- a/hw/ppc/spapr_hcall.c > +++ b/hw/ppc/spapr_hcall.c > @@ -14,6 +14,88 @@ struct SPRSyncState { > target_ulong mask; > }; > > +/* Offset from rtas-base where error log is placed */ > +#define RTAS_ERROR_OFFSET (TARGET_PAGE_SIZE)
You can't assume this. Please compute the value at the same place you compute the rtas-size. > + > +#define RTAS_ELOG_SEVERITY_SHIFT 0x5 > +#define RTAS_ELOG_DISPOSITION_SHIFT 0x3 > +#define RTAS_ELOG_INITIATOR_SHIFT 0x4 > + > +/* > + * Only required RTAS event severity, disposition, initiator > + * target and type are copied from arch/powerpc/include/asm/rtas.h > + */ > + > +/* RTAS event severity */ > +#define RTAS_SEVERITY_ERROR_SYNC 0x3 > + > +/* RTAS event disposition */ > +#define RTAS_DISP_NOT_RECOVERED 0x2 > + > +/* RTAS event initiator */ > +#define RTAS_INITIATOR_MEMORY 0x4 > + > +/* RTAS event target */ > +#define RTAS_TARGET_MEMORY 0x4 > + > +/* RTAS event type */ > +#define RTAS_TYPE_ECC_UNCORR 0x09 > + > +/* > + * Currently KVM only passes on the uncorrected machine > + * check memory error to guest. Other machine check errors > + * such as SLB multi-hit and TLB multi-hit are recovered > + * in KVM and are not passed on to guest. > + * > + * DSISR Bit for uncorrected machine check error. Based > + * on arch/powerpc/include/asm/mce.h Please don't include Linux code. This file is GPLv2+ licensed and I don't want to taint it as GPLv2 only just for the sake of mce. > + */ > +#define PPC_BIT(bit) (0x8000000000000000ULL >> bit) > +#define P7_DSISR_MC_UE (PPC_BIT(48)) /* P8 too */ > + > +/* Adopted from kernel source arch/powerpc/include/asm/rtas.h */ > +struct rtas_error_log { > + /* Byte 0 */ > + uint8_t byte0; /* Architectural version */ > + > + /* Byte 1 */ > + uint8_t byte1; > + /* XXXXXXXX > + * XXX 3: Severity level of error > + * XX 2: Degree of recovery > + * X 1: Extended log present? > + * XX 2: Reserved > + */ > + > + /* Byte 2 */ > + uint8_t byte2; > + /* XXXXXXXX > + * XXXX 4: Initiator of event > + * XXXX 4: Target of failed operation > + */ > + uint8_t byte3; /* General event or error*/ > +}; > + > +/* > + * Data format in RTAS-Blob > + * > + * This structure contains error information related to Machine > + * Check exception. This is filled up and copied to rtas-blob > + * upon machine check exception. > + */ > +struct rtas_mc_log { > + target_ulong srr0; > + target_ulong srr1; > + /* > + * Beginning of error log address. This is properly > + * populated and passed on to OS registered machine > + * check notification routine upon machine check > + * exception > + */ > + target_ulong r3; > + struct rtas_error_log err_log; > +}; > + > static void do_spr_sync(void *arg) > { > struct SPRSyncState *s = arg; > @@ -586,6 +668,77 @@ static target_ulong h_rtas_update(PowerPCCPU *cpu, > sPAPREnvironment *spapr, > return 0; > } > > +static target_ulong h_report_mc_err(PowerPCCPU *cpu, sPAPREnvironment *spapr, > + target_ulong opcode, target_ulong *args) > +{ > + struct rtas_mc_log mc_log; > + CPUPPCState *env = &cpu->env; > + PowerPCCPUClass *pcc = POWERPC_CPU_GET_CLASS(cpu); > + > + /* > + * We save the original r3 register in SPRG2 in 0x200 vector, > + * which is patched during call to ibm.nmi-register. Original > + * r3 is required to be included in error log > + */ > + mc_log.r3 = env->spr[SPR_SPRG2]; Don't you have to call cpu_synchronize_registers() before you access SPRG2? Otherwise the value may be stale. > + > + /* > + * SRR0 and SRR1, containing nip and msr at the time of exception, > + * are clobbered when we return from this hcall. Hence they > + * need to be properly saved and restored. We save srr0 > + * and srr1 in rtas blob and restore it in 0x200 vector > + * before branching to OS registered machine check handler > + */ > + mc_log.srr0 = env->spr[SPR_SRR0]; > + mc_log.srr1 = env->spr[SPR_SRR1]; > + > + /* Set error log fields */ > + mc_log.err_log.byte0 = 0x00; > + mc_log.err_log.byte1 = > + (RTAS_SEVERITY_ERROR_SYNC << RTAS_ELOG_SEVERITY_SHIFT); > + mc_log.err_log.byte1 |= > + (RTAS_DISP_NOT_RECOVERED << RTAS_ELOG_DISPOSITION_SHIFT); > + mc_log.err_log.byte2 = > + (RTAS_INITIATOR_MEMORY << RTAS_ELOG_INITIATOR_SHIFT); > + mc_log.err_log.byte2 |= RTAS_TARGET_MEMORY; > + > + if (env->spr[SPR_DSISR] & P7_DSISR_MC_UE) { > + mc_log.err_log.byte3 = RTAS_TYPE_ECC_UNCORR; > + } else { > + mc_log.err_log.byte3 = 0x0; > + } > + > + /* Handle all Host/Guest LE/BE combinations */ > + if ((*pcc->interrupts_big_endian)(cpu)) { This should check MSR.LE, not ILE. Alex