[PATCH] target/ppc: Fix wrong interpretation of the disposition flag.

2020-04-08 Thread Ganesh Goudar
Bitwise AND with kvm_run->flags to evaluate if we recovered from
MCE or not is not correct, As disposition in kvm_run->flags is a
two-bit integer value and not a bit map, So check for equality
instead of bitwise AND.

Without the fix qemu treats any unrecoverable mce error as recoverable
and ends up in a mce loop inside the guest, Below are the MCE logs before
and after the fix.

Before fix:

[   66.775757] MCE: CPU0: Initiator CPU
[   66.775891] MCE: CPU0: Unknown
[   66.776587] MCE: CPU0: machine check (Harmless) Host UE Indeterminate 
[Recovered]
[   66.776857] MCE: CPU0: NIP: [c008000e00b8] mcetest_tlbie+0xb0/0x128 
[mcetest_tlbie]

After fix:

[ 20.650577] CPU: 0 PID: 1415 Comm: insmod Tainted: G M O 5.6.0-fwnmi-arv+ #11
[ 20.650618] NIP: c008023a00e8 LR: c008023a00d8 CTR: c0021fe0
[ 20.650660] REGS: c001fffd3d70 TRAP: 0200 Tainted: G M O (5.6.0-fwnmi-arv+)
[ 20.650708] MSR: 82a0b033  CR: 
42000222 XER: 2004
[ 20.650758] CFAR: c000b940 DAR: c008025e00e0 DSISR: 0200 
IRQMASK: 0
[ 20.650758] GPR00: c008023a00d8 c001fddd79a0 c008023a8500 
0039
[ 20.650758] GPR04: 0001   
0007
[ 20.650758] GPR08: 0007 c008025e00e0  
00f7
[ 20.650758] GPR12:  c190 c101f398 
c008025c052f
[ 20.650758] GPR16: 03a8 c008025c c001fddd7d70 
c15b7940
[ 20.650758] GPR20: fff1 c0f72c28 c008025a0988 

[ 20.650758] GPR24: 0100 c008023a05d0 c01f1d70 

[ 20.650758] GPR28: c001fde2 c001fd02b2e0 c008023a 
c008025e
[ 20.651178] NIP [c008023a00e8] mcetest_tlbie+0xe8/0xf0 [mcetest_tlbie]
[ 20.651220] LR [c008023a00d8] mcetest_tlbie+0xd8/0xf0 [mcetest_tlbie]
[ 20.651262] Call Trace:
[ 20.651280] [c001fddd79a0] [c008023a00d8] mcetest_tlbie+0xd8/0xf0 
[mcetest_tlbie] (unreliable)
[ 20.651340] [c001fddd7a10] [c001091c] do_one_initcall+0x6c/0x2c0
[ 20.651390] [c001fddd7af0] [c01f7998] do_init_module+0x90/0x298
[ 20.651433] [c001fddd7b80] [c01f61a8] load_module+0x1f58/0x27a0
[ 20.651476] [c001fddd7d40] [c01f6c70] 
__do_sys_finit_module+0xe0/0x100
[ 20.651526] [c001fddd7e20] [c000b9d0] system_call+0x5c/0x68
[ 20.651567] Instruction dump:
[ 20.651594] e8410018 3c62 e8638020 48cd e8410018 3c62 e8638028 
48bd
[ 20.651646] e8410018 7be904e4 3940 612900e0 <7d434a64> 4b74 3c4c0001 
38428410
[ 20.651699] ---[ end trace 4c40897f016b4340 ]---
[ 20.653310]
Bus error
[ 20.655575] MCE: CPU0: machine check (Harmless) Host UE Indeterminate [Not 
recovered]
[ 20.655575] MCE: CPU0: NIP: [c008023a00e8] mcetest_tlbie+0xe8/0xf0 
[mcetest_tlbie]
[ 20.655576] MCE: CPU0: Initiator CPU
[ 20.655576] MCE: CPU0: Unknown

Signed-off-by: Ganesh Goudar 
---
 target/ppc/kvm.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/target/ppc/kvm.c b/target/ppc/kvm.c
index 03d0667e8f..2692f76130 100644
--- a/target/ppc/kvm.c
+++ b/target/ppc/kvm.c
@@ -2816,11 +2816,11 @@ int kvm_arch_msi_data_to_gsi(uint32_t data)
 #if defined(TARGET_PPC64)
 int kvm_handle_nmi(PowerPCCPU *cpu, struct kvm_run *run)
 {
-bool recovered = run->flags & KVM_RUN_PPC_NMI_DISP_FULLY_RECOV;
+uint16_t flags = run->flags & KVM_RUN_PPC_NMI_DISP_MASK;
 
 cpu_synchronize_state(CPU(cpu));
 
-spapr_mce_req_event(cpu, recovered);
+spapr_mce_req_event(cpu, flags == KVM_RUN_PPC_NMI_DISP_FULLY_RECOV);
 
 return 0;
 }
-- 
2.17.2




[PATCH v21 5/7] ppc: spapr: Handle "ibm, nmi-register" and "ibm, nmi-interlock" RTAS calls

2020-01-30 Thread Ganesh Goudar
From: Aravinda Prasad 

This patch adds support in QEMU to handle "ibm,nmi-register"
and "ibm,nmi-interlock" RTAS calls.

The machine check notification address is saved when the
OS issues "ibm,nmi-register" RTAS call.

This patch also handles the case when multiple processors
experience machine check at or about the same time by
handling "ibm,nmi-interlock" call. In such cases, as per
PAPR, subsequent processors serialize waiting for the first
processor to issue the "ibm,nmi-interlock" call. The second
processor that also received a machine check error waits
till the first processor is done reading the error log.
The first processor issues "ibm,nmi-interlock" call
when the error log is consumed.

Signed-off-by: Aravinda Prasad 
[Register fwnmi RTAS calls in core_rtas_register_types()
 where other RTAS calls are registered]
Signed-off-by: Ganesh Goudar 
---
 hw/ppc/spapr_caps.c| 10 +++
 hw/ppc/spapr_rtas.c| 59 ++
 include/hw/ppc/spapr.h |  4 ++-
 3 files changed, 72 insertions(+), 1 deletion(-)

diff --git a/hw/ppc/spapr_caps.c b/hw/ppc/spapr_caps.c
index 3001098601..76c628ad22 100644
--- a/hw/ppc/spapr_caps.c
+++ b/hw/ppc/spapr_caps.c
@@ -502,6 +502,16 @@ static void cap_fwnmi_mce_apply(SpaprMachineState *spapr, 
uint8_t val,
 if (!val) {
 return; /* Disabled by default */
 }
+
+if (tcg_enabled()) {
+warn_report("Firmware Assisted Non-Maskable Interrupts(FWNMI) not "
+"supported in TCG");
+} else if (kvm_enabled()) {
+if (kvmppc_set_fwnmi() < 0) {
+error_setg(errp, "Firmware Assisted Non-Maskable Interrupts(FWNMI) 
"
+ "not supported by KVM");
+}
+}
 }
 
 SpaprCapabilityInfo capability_table[SPAPR_CAP_NUM] = {
diff --git a/hw/ppc/spapr_rtas.c b/hw/ppc/spapr_rtas.c
index 89b7eb6c54..35d91260e6 100644
--- a/hw/ppc/spapr_rtas.c
+++ b/hw/ppc/spapr_rtas.c
@@ -399,6 +399,61 @@ static void rtas_get_power_level(PowerPCCPU *cpu, 
SpaprMachineState *spapr,
 rtas_st(rets, 1, 100);
 }
 
+static void rtas_ibm_nmi_register(PowerPCCPU *cpu,
+  SpaprMachineState *spapr,
+  uint32_t token, uint32_t nargs,
+  target_ulong args,
+  uint32_t nret, target_ulong rets)
+{
+hwaddr rtas_addr;
+
+if (spapr_get_cap(spapr, SPAPR_CAP_FWNMI_MCE) == SPAPR_CAP_OFF) {
+rtas_st(rets, 0, RTAS_OUT_NOT_SUPPORTED);
+return;
+}
+
+rtas_addr = spapr_get_rtas_addr();
+if (!rtas_addr) {
+rtas_st(rets, 0, RTAS_OUT_NOT_SUPPORTED);
+return;
+}
+
+spapr->guest_machine_check_addr = rtas_ld(args, 1);
+rtas_st(rets, 0, RTAS_OUT_SUCCESS);
+}
+
+static void rtas_ibm_nmi_interlock(PowerPCCPU *cpu,
+   SpaprMachineState *spapr,
+   uint32_t token, uint32_t nargs,
+   target_ulong args,
+   uint32_t nret, target_ulong rets)
+{
+if (spapr_get_cap(spapr, SPAPR_CAP_FWNMI_MCE) == SPAPR_CAP_OFF) {
+rtas_st(rets, 0, RTAS_OUT_NOT_SUPPORTED);
+return;
+}
+
+if (spapr->guest_machine_check_addr == -1) {
+/* NMI register not called */
+rtas_st(rets, 0, RTAS_OUT_PARAM_ERROR);
+return;
+}
+
+if (spapr->mc_status != cpu->vcpu_id) {
+/* The vCPU that hit the NMI should invoke "ibm,nmi-interlock" */
+rtas_st(rets, 0, RTAS_OUT_PARAM_ERROR);
+return;
+}
+
+/*
+ * vCPU issuing "ibm,nmi-interlock" is done with NMI handling,
+ * hence unset mc_status.
+ */
+spapr->mc_status = -1;
+qemu_cond_signal(>mc_delivery_cond);
+rtas_st(rets, 0, RTAS_OUT_SUCCESS);
+}
+
 static struct rtas_call {
 const char *name;
 spapr_rtas_fn fn;
@@ -527,6 +582,10 @@ static void core_rtas_register_types(void)
 rtas_set_power_level);
 spapr_rtas_register(RTAS_GET_POWER_LEVEL, "get-power-level",
 rtas_get_power_level);
+spapr_rtas_register(RTAS_IBM_NMI_REGISTER, "ibm,nmi-register",
+rtas_ibm_nmi_register);
+spapr_rtas_register(RTAS_IBM_NMI_INTERLOCK, "ibm,nmi-interlock",
+rtas_ibm_nmi_interlock);
 }
 
 type_init(core_rtas_register_types)
diff --git a/include/hw/ppc/spapr.h b/include/hw/ppc/spapr.h
index 652a5514e8..f6f82d88aa 100644
--- a/include/hw/ppc/spapr.h
+++ b/include/hw/ppc/spapr.h
@@ -656,8 +656,10 @@ target_ulong spapr_hypercall(PowerPCCPU *cpu, target_ulong 
opcode,
 #define RTAS_IBM_REMOVE_PE_DMA_WINDOW   (RTAS_TOKEN_BASE + 0x28)
 #define RTAS_IBM_RESET_PE_DMA_WINDOW(RTAS_TOKEN_BASE + 0x29)
 #define

[PATCH v21 7/7] ppc: spapr: Activate the FWNMI functionality

2020-01-30 Thread Ganesh Goudar
From: Aravinda Prasad 

This patch sets the default value of SPAPR_CAP_FWNMI_MCE
to SPAPR_CAP_ON for machine type 5.0.

Signed-off-by: Aravinda Prasad 
Signed-off-by: Ganesh Goudar 
---
 hw/ppc/spapr.c | 5 -
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c
index b2cde5f143..d4da8b29bd 100644
--- a/hw/ppc/spapr.c
+++ b/hw/ppc/spapr.c
@@ -4454,7 +4454,7 @@ static void spapr_machine_class_init(ObjectClass *oc, 
void *data)
 smc->default_caps.caps[SPAPR_CAP_NESTED_KVM_HV] = SPAPR_CAP_OFF;
 smc->default_caps.caps[SPAPR_CAP_LARGE_DECREMENTER] = SPAPR_CAP_ON;
 smc->default_caps.caps[SPAPR_CAP_CCF_ASSIST] = SPAPR_CAP_OFF;
-smc->default_caps.caps[SPAPR_CAP_FWNMI_MCE] = SPAPR_CAP_OFF;
+smc->default_caps.caps[SPAPR_CAP_FWNMI_MCE] = SPAPR_CAP_ON;
 spapr_caps_add_properties(smc, _abort);
 smc->irq = _irq_dual;
 smc->dr_phb_enabled = true;
@@ -4522,8 +4522,11 @@ DEFINE_SPAPR_MACHINE(5_0, "5.0", true);
  */
 static void spapr_machine_4_2_class_options(MachineClass *mc)
 {
+SpaprMachineClass *smc = SPAPR_MACHINE_CLASS(mc);
+
 spapr_machine_5_0_class_options(mc);
 compat_props_add(mc->compat_props, hw_compat_4_2, hw_compat_4_2_len);
+smc->default_caps.caps[SPAPR_CAP_FWNMI_MCE] = SPAPR_CAP_OFF;
 }
 
 DEFINE_SPAPR_MACHINE(4_2, "4.2", false);
-- 
2.17.2




[PATCH v21 4/7] target/ppc: Build rtas error log upon an MCE

2020-01-30 Thread Ganesh Goudar
From: Aravinda Prasad 

Upon a machine check exception (MCE) in a guest address space,
KVM causes a guest exit to enable QEMU to build and pass the
error to the guest in the PAPR defined rtas error log format.

This patch builds the rtas error log, copies it to the rtas_addr
and then invokes the guest registered machine check handler. The
handler in the guest takes suitable action(s) depending on the type
and criticality of the error. For example, if an error is
unrecoverable memory corruption in an application inside the
guest, then the guest kernel sends a SIGBUS to the application.
For recoverable errors, the guest performs recovery actions and
logs the error.

Signed-off-by: Aravinda Prasad 
[Assume SLOF has allocated enough room for rtas error log]
Signed-off-by: Ganesh Goudar 
Reviewed-by: David Gibson 
---
 hw/ppc/spapr_events.c  | 220 -
 hw/ppc/spapr_rtas.c|  26 +
 include/hw/ppc/spapr.h |   6 +-
 target/ppc/kvm.c   |   4 +-
 4 files changed, 253 insertions(+), 3 deletions(-)

diff --git a/hw/ppc/spapr_events.c b/hw/ppc/spapr_events.c
index dfc0de840a..54eaf28a9e 100644
--- a/hw/ppc/spapr_events.c
+++ b/hw/ppc/spapr_events.c
@@ -214,6 +214,104 @@ struct hp_extended_log {
 struct rtas_event_log_v6_hp hp;
 } QEMU_PACKED;
 
+struct rtas_event_log_v6_mc {
+#define RTAS_LOG_V6_SECTION_ID_MC   0x4D43 /* MC */
+struct rtas_event_log_v6_section_header hdr;
+uint32_t fru_id;
+uint32_t proc_id;
+uint8_t error_type;
+#define RTAS_LOG_V6_MC_TYPE_UE   0
+#define RTAS_LOG_V6_MC_TYPE_SLB  1
+#define RTAS_LOG_V6_MC_TYPE_ERAT 2
+#define RTAS_LOG_V6_MC_TYPE_TLB  4
+#define RTAS_LOG_V6_MC_TYPE_D_CACHE  5
+#define RTAS_LOG_V6_MC_TYPE_I_CACHE  7
+uint8_t sub_err_type;
+#define RTAS_LOG_V6_MC_UE_INDETERMINATE  0
+#define RTAS_LOG_V6_MC_UE_IFETCH 1
+#define RTAS_LOG_V6_MC_UE_PAGE_TABLE_WALK_IFETCH 2
+#define RTAS_LOG_V6_MC_UE_LOAD_STORE 3
+#define RTAS_LOG_V6_MC_UE_PAGE_TABLE_WALK_LOAD_STORE 4
+#define RTAS_LOG_V6_MC_SLB_PARITY0
+#define RTAS_LOG_V6_MC_SLB_MULTIHIT  1
+#define RTAS_LOG_V6_MC_SLB_INDETERMINATE 2
+#define RTAS_LOG_V6_MC_ERAT_PARITY   1
+#define RTAS_LOG_V6_MC_ERAT_MULTIHIT 2
+#define RTAS_LOG_V6_MC_ERAT_INDETERMINATE3
+#define RTAS_LOG_V6_MC_TLB_PARITY1
+#define RTAS_LOG_V6_MC_TLB_MULTIHIT  2
+#define RTAS_LOG_V6_MC_TLB_INDETERMINATE 3
+uint8_t reserved_1[6];
+uint64_t effective_address;
+uint64_t logical_address;
+} QEMU_PACKED;
+
+struct mc_extended_log {
+struct rtas_event_log_v6 v6hdr;
+struct rtas_event_log_v6_mc mc;
+} QEMU_PACKED;
+
+struct MC_ierror_table {
+unsigned long srr1_mask;
+unsigned long srr1_value;
+bool nip_valid; /* nip is a valid indicator of faulting address */
+uint8_t error_type;
+uint8_t error_subtype;
+unsigned int initiator;
+unsigned int severity;
+};
+
+static const struct MC_ierror_table mc_ierror_table[] = {
+{ 0x081c, 0x0004, true,
+  RTAS_LOG_V6_MC_TYPE_UE, RTAS_LOG_V6_MC_UE_IFETCH,
+  RTAS_LOG_INITIATOR_CPU, RTAS_LOG_SEVERITY_ERROR_SYNC, },
+{ 0x081c, 0x0008, true,
+  RTAS_LOG_V6_MC_TYPE_SLB, RTAS_LOG_V6_MC_SLB_PARITY,
+  RTAS_LOG_INITIATOR_CPU, RTAS_LOG_SEVERITY_ERROR_SYNC, },
+{ 0x081c, 0x000c, true,
+  RTAS_LOG_V6_MC_TYPE_SLB, RTAS_LOG_V6_MC_SLB_MULTIHIT,
+  RTAS_LOG_INITIATOR_CPU, RTAS_LOG_SEVERITY_ERROR_SYNC, },
+{ 0x081c, 0x0010, true,
+  RTAS_LOG_V6_MC_TYPE_ERAT, RTAS_LOG_V6_MC_ERAT_MULTIHIT,
+  RTAS_LOG_INITIATOR_CPU, RTAS_LOG_SEVERITY_ERROR_SYNC, },
+{ 0x081c, 0x0014, true,
+  RTAS_LOG_V6_MC_TYPE_TLB, RTAS_LOG_V6_MC_TLB_MULTIHIT,
+  RTAS_LOG_INITIATOR_CPU, RTAS_LOG_SEVERITY_ERROR_SYNC, },
+{ 0x081c, 0x0018, true,
+  RTAS_LOG_V6_MC_TYPE_UE, RTAS_LOG_V6_MC_UE_PAGE_TABLE_WALK_IFETCH,
+  RTAS_LOG_INITIATOR_CPU, RTAS_LOG_SEVERITY_ERROR_SYNC, } };
+
+struct MC_derror_table {
+unsigned long dsisr_value;
+bool dar_valid; /* dar is a valid indicator of faulting address */
+uint8_t error_type;
+uint8_t error_subtype;
+unsigned int initiator;
+unsigned int severity;
+};
+
+static const struct MC_derror_table mc_derror_table[] = {
+{ 0x8000, false,
+  RTAS_LOG_V6_MC_TYPE_UE, RTAS_LOG_V6_MC_UE_LOAD_STORE,
+  RTAS_LOG_INITIATOR_CPU, RTAS_LOG_SEVERITY_ERROR_SYNC, },
+{ 0x4000, true,
+  RTAS_LOG_V6_MC_TYPE_UE, RTAS_LOG_V6_MC_UE_PAGE_TABLE_WALK_LOAD_STORE,
+  RTAS_LOG_INITIATOR_CPU, RTAS_LOG_SEVERITY_ERROR_SYNC, },
+{ 0x0800, true,
+  RTAS_LOG_V6_MC_TYPE_ERAT, RTAS_LOG_V6_MC_ERAT_MULTIHIT

[PATCH v21 1/7] Wrapper function to wait on condition for the main loop mutex

2020-01-30 Thread Ganesh Goudar
From: Aravinda Prasad 

Introduce a wrapper function to wait on condition for
the main loop mutex. This function atomically releases
the main loop mutex and causes the calling thread to
block on the condition. This wrapper is required because
qemu_global_mutex is a static variable.

Signed-off-by: Aravinda Prasad 
Signed-off-by: Ganesh Goudar 
Reviewed-by: David Gibson 
Reviewed-by: Greg Kurz 
---
 cpus.c   | 5 +
 include/qemu/main-loop.h | 8 
 2 files changed, 13 insertions(+)

diff --git a/cpus.c b/cpus.c
index b612116f95..b4f8b84b61 100644
--- a/cpus.c
+++ b/cpus.c
@@ -1839,6 +1839,11 @@ void qemu_mutex_unlock_iothread(void)
 qemu_mutex_unlock(_global_mutex);
 }
 
+void qemu_cond_wait_iothread(QemuCond *cond)
+{
+qemu_cond_wait(cond, _global_mutex);
+}
+
 static bool all_vcpus_paused(void)
 {
 CPUState *cpu;
diff --git a/include/qemu/main-loop.h b/include/qemu/main-loop.h
index f6ba78ea73..a6d20b0719 100644
--- a/include/qemu/main-loop.h
+++ b/include/qemu/main-loop.h
@@ -295,6 +295,14 @@ void qemu_mutex_lock_iothread_impl(const char *file, int 
line);
  */
 void qemu_mutex_unlock_iothread(void);
 
+/*
+ * qemu_cond_wait_iothread: Wait on condition for the main loop mutex
+ *
+ * This function atomically releases the main loop mutex and causes
+ * the calling thread to block on the condition.
+ */
+void qemu_cond_wait_iothread(QemuCond *cond);
+
 /* internal interfaces */
 
 void qemu_fd_register(int fd);
-- 
2.17.2




[PATCH v21 2/7] ppc: spapr: Introduce FWNMI capability

2020-01-30 Thread Ganesh Goudar
From: Aravinda Prasad 

Introduce fwnmi an spapr capability and add a helper function
which tries to enable it, which would be used by following patch
of the series. This patch by itself does not change the existing
behavior.

Signed-off-by: Aravinda Prasad 
[eliminate cap_ppc_fwnmi, add fwnmi cap to migration state
 and reprhase the commit message]
Signed-off-by: Ganesh Goudar 
Reviewed-by: David Gibson 
---
 hw/ppc/spapr.c |  2 ++
 hw/ppc/spapr_caps.c| 18 ++
 include/hw/ppc/spapr.h |  5 -
 target/ppc/kvm.c   |  8 
 target/ppc/kvm_ppc.h   |  6 ++
 5 files changed, 38 insertions(+), 1 deletion(-)

diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c
index 02cf53fc5b..d819decffa 100644
--- a/hw/ppc/spapr.c
+++ b/hw/ppc/spapr.c
@@ -1992,6 +1992,7 @@ static const VMStateDescription vmstate_spapr = {
 _spapr_dtb,
 _spapr_cap_large_decr,
 _spapr_cap_ccf_assist,
+_spapr_cap_fwnmi,
 NULL
 }
 };
@@ -4398,6 +4399,7 @@ static void spapr_machine_class_init(ObjectClass *oc, 
void *data)
 smc->default_caps.caps[SPAPR_CAP_NESTED_KVM_HV] = SPAPR_CAP_OFF;
 smc->default_caps.caps[SPAPR_CAP_LARGE_DECREMENTER] = SPAPR_CAP_ON;
 smc->default_caps.caps[SPAPR_CAP_CCF_ASSIST] = SPAPR_CAP_OFF;
+smc->default_caps.caps[SPAPR_CAP_FWNMI_MCE] = SPAPR_CAP_OFF;
 spapr_caps_add_properties(smc, _abort);
 smc->irq = _irq_dual;
 smc->dr_phb_enabled = true;
diff --git a/hw/ppc/spapr_caps.c b/hw/ppc/spapr_caps.c
index 481dfd2a27..3001098601 100644
--- a/hw/ppc/spapr_caps.c
+++ b/hw/ppc/spapr_caps.c
@@ -496,6 +496,14 @@ static void cap_ccf_assist_apply(SpaprMachineState *spapr, 
uint8_t val,
 }
 }
 
+static void cap_fwnmi_mce_apply(SpaprMachineState *spapr, uint8_t val,
+Error **errp)
+{
+if (!val) {
+return; /* Disabled by default */
+}
+}
+
 SpaprCapabilityInfo capability_table[SPAPR_CAP_NUM] = {
 [SPAPR_CAP_HTM] = {
 .name = "htm",
@@ -595,6 +603,15 @@ SpaprCapabilityInfo capability_table[SPAPR_CAP_NUM] = {
 .type = "bool",
 .apply = cap_ccf_assist_apply,
 },
+[SPAPR_CAP_FWNMI_MCE] = {
+.name = "fwnmi-mce",
+.description = "Handle fwnmi machine check exceptions",
+.index = SPAPR_CAP_FWNMI_MCE,
+.get = spapr_cap_get_bool,
+.set = spapr_cap_set_bool,
+.type = "bool",
+.apply = cap_fwnmi_mce_apply,
+},
 };
 
 static SpaprCapabilities default_caps_with_cpu(SpaprMachineState *spapr,
@@ -734,6 +751,7 @@ SPAPR_CAP_MIG_STATE(hpt_maxpagesize, 
SPAPR_CAP_HPT_MAXPAGESIZE);
 SPAPR_CAP_MIG_STATE(nested_kvm_hv, SPAPR_CAP_NESTED_KVM_HV);
 SPAPR_CAP_MIG_STATE(large_decr, SPAPR_CAP_LARGE_DECREMENTER);
 SPAPR_CAP_MIG_STATE(ccf_assist, SPAPR_CAP_CCF_ASSIST);
+SPAPR_CAP_MIG_STATE(fwnmi, SPAPR_CAP_FWNMI_MCE);
 
 void spapr_caps_init(SpaprMachineState *spapr)
 {
diff --git a/include/hw/ppc/spapr.h b/include/hw/ppc/spapr.h
index 61f005c6f6..7bc5fc3a9e 100644
--- a/include/hw/ppc/spapr.h
+++ b/include/hw/ppc/spapr.h
@@ -79,8 +79,10 @@ typedef enum {
 #define SPAPR_CAP_LARGE_DECREMENTER 0x08
 /* Count Cache Flush Assist HW Instruction */
 #define SPAPR_CAP_CCF_ASSIST0x09
+/* FWNMI machine check handling */
+#define SPAPR_CAP_FWNMI_MCE 0x0A
 /* Num Caps */
-#define SPAPR_CAP_NUM   (SPAPR_CAP_CCF_ASSIST + 1)
+#define SPAPR_CAP_NUM   (SPAPR_CAP_FWNMI_MCE + 1)
 
 /*
  * Capability Values
@@ -869,6 +871,7 @@ extern const VMStateDescription 
vmstate_spapr_cap_hpt_maxpagesize;
 extern const VMStateDescription vmstate_spapr_cap_nested_kvm_hv;
 extern const VMStateDescription vmstate_spapr_cap_large_decr;
 extern const VMStateDescription vmstate_spapr_cap_ccf_assist;
+extern const VMStateDescription vmstate_spapr_cap_fwnmi;
 
 static inline uint8_t spapr_get_cap(SpaprMachineState *spapr, int cap)
 {
diff --git a/target/ppc/kvm.c b/target/ppc/kvm.c
index 06fd0cc162..87e00cd5d7 100644
--- a/target/ppc/kvm.c
+++ b/target/ppc/kvm.c
@@ -2054,6 +2054,14 @@ void kvmppc_set_mpic_proxy(PowerPCCPU *cpu, int 
mpic_proxy)
 }
 }
 
+int kvmppc_set_fwnmi(void)
+{
+PowerPCCPU *cpu = POWERPC_CPU(first_cpu);
+CPUState *cs = CPU(cpu);
+
+return kvm_vcpu_enable_cap(cs, KVM_CAP_PPC_FWNMI, 0);
+}
+
 int kvmppc_smt_threads(void)
 {
 return cap_ppc_smt ? cap_ppc_smt : 1;
diff --git a/target/ppc/kvm_ppc.h b/target/ppc/kvm_ppc.h
index b713097bfb..2c60dedd0d 100644
--- a/target/ppc/kvm_ppc.h
+++ b/target/ppc/kvm_ppc.h
@@ -27,6 +27,7 @@ void kvmppc_enable_h_page_init(void);
 void kvmppc_set_papr(PowerPCCPU *cpu);
 int kvmppc_set_compat(PowerPCCPU *cpu, uint32_t compat_pvr);
 void kvmppc_set_mpic_proxy(PowerPCCPU *cpu, int mpic_proxy);
+int kvmppc_set_fwnmi(void);
 int kvmppc_smt_threads(void);
 void kvmppc_error_append_smt_possible_hint(Error *const *errp);
 int kvmppc_set_smt_threads(int smt);
@@ -

[PATCH v21 6/7] migration: Include migration support for machine check handling

2020-01-30 Thread Ganesh Goudar
From: Aravinda Prasad 

This patch includes migration support for machine check
handling. Especially this patch blocks VM migration
requests until the machine check error handling is
complete as these errors are specific to the source
hardware and is irrelevant on the target hardware.

Signed-off-by: Aravinda Prasad 
[Do not set FWNMI cap in post_load, now its done in .apply hook]
Signed-off-by: Ganesh Goudar 
---
 hw/ppc/spapr.c | 47 ++
 hw/ppc/spapr_events.c  | 16 +-
 hw/ppc/spapr_rtas.c|  2 ++
 include/hw/ppc/spapr.h |  2 ++
 4 files changed, 66 insertions(+), 1 deletion(-)

diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c
index 1e56617a49..b2cde5f143 100644
--- a/hw/ppc/spapr.c
+++ b/hw/ppc/spapr.c
@@ -46,6 +46,7 @@
 #include "migration/qemu-file-types.h"
 #include "migration/global_state.h"
 #include "migration/register.h"
+#include "migration/blocker.h"
 #include "mmu-hash64.h"
 #include "mmu-book3s-v3.h"
 #include "cpu-models.h"
@@ -1683,6 +1684,8 @@ static void spapr_machine_reset(MachineState *machine)
 
 /* Signal all vCPUs waiting on this condition */
 qemu_cond_broadcast(>mc_delivery_cond);
+
+migrate_del_blocker(spapr->fwnmi_migration_blocker);
 }
 
 static void spapr_create_nvram(SpaprMachineState *spapr)
@@ -1965,6 +1968,42 @@ static const VMStateDescription vmstate_spapr_dtb = {
 },
 };
 
+static bool spapr_fwnmi_needed(void *opaque)
+{
+SpaprMachineState *spapr = (SpaprMachineState *)opaque;
+
+return spapr->guest_machine_check_addr != -1;
+}
+
+static int spapr_fwnmi_pre_save(void *opaque)
+{
+SpaprMachineState *spapr = (SpaprMachineState *)opaque;
+
+/*
+ * Check if machine check handling is in progress and print a
+ * warning message.
+ */
+if (spapr->mc_status != -1) {
+warn_report("A machine check is being handled during migration. The"
+"handler may run and log hardware error on the destination");
+}
+
+return 0;
+}
+
+static const VMStateDescription vmstate_spapr_machine_check = {
+.name = "spapr_machine_check",
+.version_id = 1,
+.minimum_version_id = 1,
+.needed = spapr_fwnmi_needed,
+.pre_save = spapr_fwnmi_pre_save,
+.fields = (VMStateField[]) {
+VMSTATE_UINT64(guest_machine_check_addr, SpaprMachineState),
+VMSTATE_INT32(mc_status, SpaprMachineState),
+VMSTATE_END_OF_LIST()
+},
+};
+
 static const VMStateDescription vmstate_spapr = {
 .name = "spapr",
 .version_id = 3,
@@ -1999,6 +2038,7 @@ static const VMStateDescription vmstate_spapr = {
 _spapr_cap_large_decr,
 _spapr_cap_ccf_assist,
 _spapr_cap_fwnmi,
+_spapr_machine_check,
 NULL
 }
 };
@@ -2814,6 +2854,13 @@ static void spapr_machine_init(MachineState *machine)
 spapr_create_lmb_dr_connectors(spapr);
 }
 
+if (spapr_get_cap(spapr, SPAPR_CAP_FWNMI_MCE) == SPAPR_CAP_ON) {
+/* Create the error string for live migration blocker */
+error_setg(>fwnmi_migration_blocker,
+"A machine check is being handled during migration. The handler"
+"may run and log hardware error on the destination");
+}
+
 /* Set up RTAS event infrastructure */
 spapr_events_init(spapr);
 
diff --git a/hw/ppc/spapr_events.c b/hw/ppc/spapr_events.c
index 54eaf28a9e..884e455f02 100644
--- a/hw/ppc/spapr_events.c
+++ b/hw/ppc/spapr_events.c
@@ -43,6 +43,7 @@
 #include "qemu/main-loop.h"
 #include "hw/ppc/spapr_ovec.h"
 #include 
+#include "migration/blocker.h"
 
 #define RTAS_LOG_VERSION_MASK   0xff00
 #define   RTAS_LOG_VERSION_60x0600
@@ -843,6 +844,8 @@ void spapr_mce_req_event(PowerPCCPU *cpu, bool recovered)
 {
 SpaprMachineState *spapr = SPAPR_MACHINE(qdev_get_machine());
 CPUState *cs = CPU(cpu);
+int ret;
+Error *local_err = NULL;
 
 if (spapr->guest_machine_check_addr == -1) {
 /*
@@ -872,8 +875,19 @@ void spapr_mce_req_event(PowerPCCPU *cpu, bool recovered)
 return;
 }
 }
-spapr->mc_status = cpu->vcpu_id;
 
+ret = migrate_add_blocker(spapr->fwnmi_migration_blocker, _err);
+if (ret == -EBUSY) {
+/*
+ * We don't want to abort so we let the migration to continue.
+ * In a rare case, the machine check handler will run on the target.
+ * Though this is not preferable, it is better than aborting
+ * the migration or killing the VM.
+ */
+warn_report("Received a fwnmi while migration was in progress");
+}
+
+spapr->mc_status = cpu->vcpu_id;
 spapr_mce_dispatch_elog(cpu, recovered);
 }
 
diff --git a/hw/ppc/spapr_rtas.c b/hw/ppc/spapr_rtas.c
index 35d91260e6..883fe28465 1006

[PATCH v21 3/7] target/ppc: Handle NMI guest exit

2020-01-30 Thread Ganesh Goudar
From: Aravinda Prasad 

Memory error such as bit flips that cannot be corrected
by hardware are passed on to the kernel for handling.
If the memory address in error belongs to guest then
the guest kernel is responsible for taking suitable action.
Patch [1] enhances KVM to exit guest with exit reason
set to KVM_EXIT_NMI in such cases. This patch handles
KVM_EXIT_NMI exit.

[1] https://www.spinics.net/lists/kvm-ppc/msg12637.html
(e20bbd3d and related commits)

Signed-off-by: Aravinda Prasad 
Signed-off-by: Ganesh Goudar 
Reviewed-by: David Gibson 
Reviewed-by: Greg Kurz 
---
 hw/ppc/spapr.c  |  8 
 hw/ppc/spapr_events.c   | 37 +
 include/hw/ppc/spapr.h  | 10 ++
 target/ppc/kvm.c| 14 ++
 target/ppc/kvm_ppc.h|  2 ++
 target/ppc/trace-events |  1 +
 6 files changed, 72 insertions(+)

diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c
index d819decffa..1e56617a49 100644
--- a/hw/ppc/spapr.c
+++ b/hw/ppc/spapr.c
@@ -1677,6 +1677,12 @@ static void spapr_machine_reset(MachineState *machine)
 first_ppc_cpu->env.gpr[5] = 0;
 
 spapr->cas_reboot = false;
+
+spapr->mc_status = -1;
+spapr->guest_machine_check_addr = -1;
+
+/* Signal all vCPUs waiting on this condition */
+qemu_cond_broadcast(>mc_delivery_cond);
 }
 
 static void spapr_create_nvram(SpaprMachineState *spapr)
@@ -2971,6 +2977,8 @@ static void spapr_machine_init(MachineState *machine)
 
 kvmppc_spapr_enable_inkernel_multitce();
 }
+
+qemu_cond_init(>mc_delivery_cond);
 }
 
 static int spapr_kvm_type(MachineState *machine, const char *vm_type)
diff --git a/hw/ppc/spapr_events.c b/hw/ppc/spapr_events.c
index e355e000d0..dfc0de840a 100644
--- a/hw/ppc/spapr_events.c
+++ b/hw/ppc/spapr_events.c
@@ -40,6 +40,7 @@
 #include "hw/ppc/spapr_drc.h"
 #include "qemu/help_option.h"
 #include "qemu/bcd.h"
+#include "qemu/main-loop.h"
 #include "hw/ppc/spapr_ovec.h"
 #include 
 
@@ -622,6 +623,42 @@ void 
spapr_hotplug_req_remove_by_count_indexed(SpaprDrcType drc_type,
 RTAS_LOG_V6_HP_ACTION_REMOVE, drc_type, _id);
 }
 
+void spapr_mce_req_event(PowerPCCPU *cpu)
+{
+SpaprMachineState *spapr = SPAPR_MACHINE(qdev_get_machine());
+CPUState *cs = CPU(cpu);
+
+if (spapr->guest_machine_check_addr == -1) {
+/*
+ * This implies that we have hit a machine check either when the
+ * guest has not registered FWNMI (i.e., "ibm,nmi-register" not
+ * called) or between system reset and "ibm,nmi-register".
+ * Fall back to the old machine check behavior in such cases.
+ */
+cs->exception_index = POWERPC_EXCP_MCHECK;
+ppc_cpu_do_interrupt(cs);
+return;
+}
+
+while (spapr->mc_status != -1) {
+/*
+ * Check whether the same CPU got machine check error
+ * while still handling the mc error (i.e., before
+ * that CPU called "ibm,nmi-interlock")
+ */
+if (spapr->mc_status == cpu->vcpu_id) {
+qemu_system_guest_panicked(NULL);
+return;
+}
+qemu_cond_wait_iothread(>mc_delivery_cond);
+/* Meanwhile if the system is reset, then just return */
+if (spapr->guest_machine_check_addr == -1) {
+return;
+}
+}
+spapr->mc_status = cpu->vcpu_id;
+}
+
 static void check_exception(PowerPCCPU *cpu, SpaprMachineState *spapr,
 uint32_t token, uint32_t nargs,
 target_ulong args,
diff --git a/include/hw/ppc/spapr.h b/include/hw/ppc/spapr.h
index 7bc5fc3a9e..909d3976f9 100644
--- a/include/hw/ppc/spapr.h
+++ b/include/hw/ppc/spapr.h
@@ -191,6 +191,15 @@ struct SpaprMachineState {
  * occurs during the unplug process. */
 QTAILQ_HEAD(, SpaprDimmState) pending_dimm_unplugs;
 
+/* State related to "ibm,nmi-register" and "ibm,nmi-interlock" calls */
+target_ulong guest_machine_check_addr;
+/*
+ * mc_status is set to -1 if mc is not in progress, else is set to the CPU
+ * handling the mc.
+ */
+int mc_status;
+QemuCond mc_delivery_cond;
+
 /*< public >*/
 char *kvm_type;
 char *host_model;
@@ -804,6 +813,7 @@ void spapr_clear_pending_events(SpaprMachineState *spapr);
 int spapr_max_server_number(SpaprMachineState *spapr);
 void spapr_store_hpte(PowerPCCPU *cpu, hwaddr ptex,
   uint64_t pte0, uint64_t pte1);
+void spapr_mce_req_event(PowerPCCPU *cpu);
 
 /* DRC callbacks. */
 void spapr_core_release(DeviceState *dev);
diff --git a/target/ppc/kvm.c b/target/ppc/kvm.c
index 87e00cd5d7..1ccf725aed 100644
--- a/target/ppc/kvm.c
+++ b/target/ppc/kvm.c
@@ -1702,6 +1702,11 @@ int kvm_arch_handle_exit(CPUState *cs, struct kvm_run 
*run)
 ret = 0;
 break;
 
+case KV

[PATCH v21 0/7]target-ppc/spapr: Add FWNMI support in QEMU for PowerKVM guests

2020-01-30 Thread Ganesh Goudar
This patch set adds support for FWNMI in PowerKVM guests.

System errors such as SLB multihit and memory errors
that cannot be corrected by hardware is passed on to
the kernel for handling by raising machine check
exception (an NMI). Upon such machine check exceptions,
if the address in error belongs to guest then KVM
invokes guests' 0x200 interrupt vector if the guest
is not FWNMI capable. For FWNMI capable guest
KVM passes the control to QEMU by exiting the guest.

This patch series adds functionality to QEMU to pass
on such machine check exceptions to the FWNMI capable
guest kernel by building an error log and invoking
the guest registered machine check handling routine.

The KVM changes are now part of the upstream kernel
(commit e20bbd3d). This series contain QEMU changes.

Change Log v21:
  - Use error_setg() for failure handling in apply hook.
  - Report warning if FWNMI enabled for TCG.
  - Enable FWNMI by default for machine type 5.0.

Change Log v20:
  - Remove code left over from previous version.

Change Log v19:
  - Create error object for migration blocker in machine_init().
  - Remove the check to see fwnmi calls are already registered,
which is no longer needed.
  - Register fwnmi RTAS calls in core_rtas_register_types() where
other RTAS calls are registered.
  - Bail out from interlock call if the cap is not set.
  - Reorder and add missing S-O-Bs.

Change Log v18:
  - Dynamically create the Error object before adding it as blocker
  - In apply hook check if the fwnmi calls are already registered and
if kvm supports fwnmi before registering the fwnmi calls.
  - In rtas_ibm_nmi_register() test the feature flag before attempting
to get the RTAS address
  - Introduce a bool member "fwnmi_calls_registered" to check if the
fwnmi calls are registered and use the same in needed hook to save
the state during migration. 

Change Log v17:
  - Add fwnmi cap to migration state
  - Reprhase the commit message in patch 2/7

Change Log v16:
  - Fixed coding style problems

Change Log v15:
  - Removed cap_ppc_fwnmi
  - Moved fwnmi registeration to .apply hook
  - Assume SLOF has allocated enough room for rtas error log
  - Using ARRAY_SIZE to end the loop
  - Do not set FWNMI cap in post_load, now its done in .apply hook

Change Log v14:
  - Feature activation moved to a separate patch
  - Fixed issues with migration blocker

Change Log v13:
  - Minor fixes (mostly nits)
  - Moved FWNMI guest registration check from patch 4 to 3.

Change Log v12:
  - Rebased to latest ppc-for-4.2 (SHA b1e8156743)

Change Log v11:
  - Moved FWNMI SPAPR cap defaults to 4.2 class option
  - Fixed issues with handling fwnmi KVM capability

---

Aravinda Prasad (7):
  Wrapper function to wait on condition for the main loop mutex
  ppc: spapr: Introduce FWNMI capability
  target/ppc: Handle NMI guest exit
  target/ppc: Build rtas error log upon an MCE
  ppc: spapr: Handle "ibm,nmi-register" and "ibm,nmi-interlock" RTAS
calls
  migration: Include migration support for machine check handling
  ppc: spapr: Activate the FWNMI functionality

 cpus.c   |   5 +
 hw/ppc/spapr.c   |  60 +
 hw/ppc/spapr_caps.c  |  28 
 hw/ppc/spapr_events.c| 269 +++
 hw/ppc/spapr_rtas.c  |  87 +
 include/hw/ppc/spapr.h   |  25 +++-
 include/qemu/main-loop.h |   8 ++
 target/ppc/kvm.c |  24 
 target/ppc/kvm_ppc.h |   8 ++
 target/ppc/trace-events  |   1 +
 10 files changed, 513 insertions(+), 2 deletions(-)

-- 
2.17.2




[PATCH v20 5/7] ppc: spapr: Handle "ibm, nmi-register" and "ibm, nmi-interlock" RTAS calls

2020-01-17 Thread Ganesh Goudar
From: Aravinda Prasad 

This patch adds support in QEMU to handle "ibm,nmi-register"
and "ibm,nmi-interlock" RTAS calls.

The machine check notification address is saved when the
OS issues "ibm,nmi-register" RTAS call.

This patch also handles the case when multiple processors
experience machine check at or about the same time by
handling "ibm,nmi-interlock" call. In such cases, as per
PAPR, subsequent processors serialize waiting for the first
processor to issue the "ibm,nmi-interlock" call. The second
processor that also received a machine check error waits
till the first processor is done reading the error log.
The first processor issues "ibm,nmi-interlock" call
when the error log is consumed.

Signed-off-by: Aravinda Prasad 
[Register fwnmi RTAS calls in core_rtas_register_types()
 where other RTAS calls are registered]
Signed-off-by: Ganesh Goudar 
---
 hw/ppc/spapr_caps.c|  7 +
 hw/ppc/spapr_rtas.c| 59 ++
 include/hw/ppc/spapr.h |  4 ++-
 3 files changed, 69 insertions(+), 1 deletion(-)

diff --git a/hw/ppc/spapr_caps.c b/hw/ppc/spapr_caps.c
index 3001098601..c43498da49 100644
--- a/hw/ppc/spapr_caps.c
+++ b/hw/ppc/spapr_caps.c
@@ -502,6 +502,13 @@ static void cap_fwnmi_mce_apply(SpaprMachineState *spapr, 
uint8_t val,
 if (!val) {
 return; /* Disabled by default */
 }
+
+if (kvm_enabled()) {
+if (kvmppc_set_fwnmi() < 0) {
+error_report("Could not enable fwnmi capability");
+exit(1);
+}
+}
 }
 
 SpaprCapabilityInfo capability_table[SPAPR_CAP_NUM] = {
diff --git a/hw/ppc/spapr_rtas.c b/hw/ppc/spapr_rtas.c
index 2c066a372d..3f162d82f5 100644
--- a/hw/ppc/spapr_rtas.c
+++ b/hw/ppc/spapr_rtas.c
@@ -400,6 +400,61 @@ static void rtas_get_power_level(PowerPCCPU *cpu, 
SpaprMachineState *spapr,
 rtas_st(rets, 1, 100);
 }
 
+static void rtas_ibm_nmi_register(PowerPCCPU *cpu,
+  SpaprMachineState *spapr,
+  uint32_t token, uint32_t nargs,
+  target_ulong args,
+  uint32_t nret, target_ulong rets)
+{
+hwaddr rtas_addr;
+
+if (spapr_get_cap(spapr, SPAPR_CAP_FWNMI_MCE) == SPAPR_CAP_OFF) {
+rtas_st(rets, 0, RTAS_OUT_NOT_SUPPORTED);
+return;
+}
+
+rtas_addr = spapr_get_rtas_addr();
+if (!rtas_addr) {
+rtas_st(rets, 0, RTAS_OUT_NOT_SUPPORTED);
+return;
+}
+
+spapr->guest_machine_check_addr = rtas_ld(args, 1);
+rtas_st(rets, 0, RTAS_OUT_SUCCESS);
+}
+
+static void rtas_ibm_nmi_interlock(PowerPCCPU *cpu,
+   SpaprMachineState *spapr,
+   uint32_t token, uint32_t nargs,
+   target_ulong args,
+   uint32_t nret, target_ulong rets)
+{
+if (spapr_get_cap(spapr, SPAPR_CAP_FWNMI_MCE) == SPAPR_CAP_OFF) {
+rtas_st(rets, 0, RTAS_OUT_NOT_SUPPORTED);
+return;
+}
+
+if (spapr->guest_machine_check_addr == -1) {
+/* NMI register not called */
+rtas_st(rets, 0, RTAS_OUT_PARAM_ERROR);
+return;
+}
+
+if (spapr->mc_status != cpu->vcpu_id) {
+/* The vCPU that hit the NMI should invoke "ibm,nmi-interlock" */
+rtas_st(rets, 0, RTAS_OUT_PARAM_ERROR);
+return;
+}
+
+/*
+ * vCPU issuing "ibm,nmi-interlock" is done with NMI handling,
+ * hence unset mc_status.
+ */
+spapr->mc_status = -1;
+qemu_cond_signal(>mc_delivery_cond);
+rtas_st(rets, 0, RTAS_OUT_SUCCESS);
+}
+
 static struct rtas_call {
 const char *name;
 spapr_rtas_fn fn;
@@ -528,6 +583,10 @@ static void core_rtas_register_types(void)
 rtas_set_power_level);
 spapr_rtas_register(RTAS_GET_POWER_LEVEL, "get-power-level",
 rtas_get_power_level);
+spapr_rtas_register(RTAS_IBM_NMI_REGISTER, "ibm,nmi-register",
+rtas_ibm_nmi_register);
+spapr_rtas_register(RTAS_IBM_NMI_INTERLOCK, "ibm,nmi-interlock",
+rtas_ibm_nmi_interlock);
 }
 
 type_init(core_rtas_register_types)
diff --git a/include/hw/ppc/spapr.h b/include/hw/ppc/spapr.h
index 652a5514e8..f6f82d88aa 100644
--- a/include/hw/ppc/spapr.h
+++ b/include/hw/ppc/spapr.h
@@ -656,8 +656,10 @@ target_ulong spapr_hypercall(PowerPCCPU *cpu, target_ulong 
opcode,
 #define RTAS_IBM_REMOVE_PE_DMA_WINDOW   (RTAS_TOKEN_BASE + 0x28)
 #define RTAS_IBM_RESET_PE_DMA_WINDOW(RTAS_TOKEN_BASE + 0x29)
 #define RTAS_IBM_SUSPEND_ME (RTAS_TOKEN_BASE + 0x2A)
+#define RTAS_IBM_NMI_REGISTER   (RTAS_TOKEN_BASE + 0x2B)
+#define RTAS_IBM_NMI_INTERLOCK  (RTAS_TOKEN_BASE + 0x2C)
 
-#define RTAS_TOK

[PATCH v20 4/7] target/ppc: Build rtas error log upon an MCE

2020-01-17 Thread Ganesh Goudar
From: Aravinda Prasad 

Upon a machine check exception (MCE) in a guest address space,
KVM causes a guest exit to enable QEMU to build and pass the
error to the guest in the PAPR defined rtas error log format.

This patch builds the rtas error log, copies it to the rtas_addr
and then invokes the guest registered machine check handler. The
handler in the guest takes suitable action(s) depending on the type
and criticality of the error. For example, if an error is
unrecoverable memory corruption in an application inside the
guest, then the guest kernel sends a SIGBUS to the application.
For recoverable errors, the guest performs recovery actions and
logs the error.

Signed-off-by: Aravinda Prasad 
[Assume SLOF has allocated enough room for rtas error log]
Signed-off-by: Ganesh Goudar 
Reviewed-by: David Gibson 
---
 hw/ppc/spapr_events.c  | 220 -
 hw/ppc/spapr_rtas.c|  26 +
 include/hw/ppc/spapr.h |   6 +-
 target/ppc/kvm.c   |   4 +-
 4 files changed, 253 insertions(+), 3 deletions(-)

diff --git a/hw/ppc/spapr_events.c b/hw/ppc/spapr_events.c
index dfc0de840a..54eaf28a9e 100644
--- a/hw/ppc/spapr_events.c
+++ b/hw/ppc/spapr_events.c
@@ -214,6 +214,104 @@ struct hp_extended_log {
 struct rtas_event_log_v6_hp hp;
 } QEMU_PACKED;
 
+struct rtas_event_log_v6_mc {
+#define RTAS_LOG_V6_SECTION_ID_MC   0x4D43 /* MC */
+struct rtas_event_log_v6_section_header hdr;
+uint32_t fru_id;
+uint32_t proc_id;
+uint8_t error_type;
+#define RTAS_LOG_V6_MC_TYPE_UE   0
+#define RTAS_LOG_V6_MC_TYPE_SLB  1
+#define RTAS_LOG_V6_MC_TYPE_ERAT 2
+#define RTAS_LOG_V6_MC_TYPE_TLB  4
+#define RTAS_LOG_V6_MC_TYPE_D_CACHE  5
+#define RTAS_LOG_V6_MC_TYPE_I_CACHE  7
+uint8_t sub_err_type;
+#define RTAS_LOG_V6_MC_UE_INDETERMINATE  0
+#define RTAS_LOG_V6_MC_UE_IFETCH 1
+#define RTAS_LOG_V6_MC_UE_PAGE_TABLE_WALK_IFETCH 2
+#define RTAS_LOG_V6_MC_UE_LOAD_STORE 3
+#define RTAS_LOG_V6_MC_UE_PAGE_TABLE_WALK_LOAD_STORE 4
+#define RTAS_LOG_V6_MC_SLB_PARITY0
+#define RTAS_LOG_V6_MC_SLB_MULTIHIT  1
+#define RTAS_LOG_V6_MC_SLB_INDETERMINATE 2
+#define RTAS_LOG_V6_MC_ERAT_PARITY   1
+#define RTAS_LOG_V6_MC_ERAT_MULTIHIT 2
+#define RTAS_LOG_V6_MC_ERAT_INDETERMINATE3
+#define RTAS_LOG_V6_MC_TLB_PARITY1
+#define RTAS_LOG_V6_MC_TLB_MULTIHIT  2
+#define RTAS_LOG_V6_MC_TLB_INDETERMINATE 3
+uint8_t reserved_1[6];
+uint64_t effective_address;
+uint64_t logical_address;
+} QEMU_PACKED;
+
+struct mc_extended_log {
+struct rtas_event_log_v6 v6hdr;
+struct rtas_event_log_v6_mc mc;
+} QEMU_PACKED;
+
+struct MC_ierror_table {
+unsigned long srr1_mask;
+unsigned long srr1_value;
+bool nip_valid; /* nip is a valid indicator of faulting address */
+uint8_t error_type;
+uint8_t error_subtype;
+unsigned int initiator;
+unsigned int severity;
+};
+
+static const struct MC_ierror_table mc_ierror_table[] = {
+{ 0x081c, 0x0004, true,
+  RTAS_LOG_V6_MC_TYPE_UE, RTAS_LOG_V6_MC_UE_IFETCH,
+  RTAS_LOG_INITIATOR_CPU, RTAS_LOG_SEVERITY_ERROR_SYNC, },
+{ 0x081c, 0x0008, true,
+  RTAS_LOG_V6_MC_TYPE_SLB, RTAS_LOG_V6_MC_SLB_PARITY,
+  RTAS_LOG_INITIATOR_CPU, RTAS_LOG_SEVERITY_ERROR_SYNC, },
+{ 0x081c, 0x000c, true,
+  RTAS_LOG_V6_MC_TYPE_SLB, RTAS_LOG_V6_MC_SLB_MULTIHIT,
+  RTAS_LOG_INITIATOR_CPU, RTAS_LOG_SEVERITY_ERROR_SYNC, },
+{ 0x081c, 0x0010, true,
+  RTAS_LOG_V6_MC_TYPE_ERAT, RTAS_LOG_V6_MC_ERAT_MULTIHIT,
+  RTAS_LOG_INITIATOR_CPU, RTAS_LOG_SEVERITY_ERROR_SYNC, },
+{ 0x081c, 0x0014, true,
+  RTAS_LOG_V6_MC_TYPE_TLB, RTAS_LOG_V6_MC_TLB_MULTIHIT,
+  RTAS_LOG_INITIATOR_CPU, RTAS_LOG_SEVERITY_ERROR_SYNC, },
+{ 0x081c, 0x0018, true,
+  RTAS_LOG_V6_MC_TYPE_UE, RTAS_LOG_V6_MC_UE_PAGE_TABLE_WALK_IFETCH,
+  RTAS_LOG_INITIATOR_CPU, RTAS_LOG_SEVERITY_ERROR_SYNC, } };
+
+struct MC_derror_table {
+unsigned long dsisr_value;
+bool dar_valid; /* dar is a valid indicator of faulting address */
+uint8_t error_type;
+uint8_t error_subtype;
+unsigned int initiator;
+unsigned int severity;
+};
+
+static const struct MC_derror_table mc_derror_table[] = {
+{ 0x8000, false,
+  RTAS_LOG_V6_MC_TYPE_UE, RTAS_LOG_V6_MC_UE_LOAD_STORE,
+  RTAS_LOG_INITIATOR_CPU, RTAS_LOG_SEVERITY_ERROR_SYNC, },
+{ 0x4000, true,
+  RTAS_LOG_V6_MC_TYPE_UE, RTAS_LOG_V6_MC_UE_PAGE_TABLE_WALK_LOAD_STORE,
+  RTAS_LOG_INITIATOR_CPU, RTAS_LOG_SEVERITY_ERROR_SYNC, },
+{ 0x0800, true,
+  RTAS_LOG_V6_MC_TYPE_ERAT, RTAS_LOG_V6_MC_ERAT_MULTIHIT

[PATCH v20 3/7] target/ppc: Handle NMI guest exit

2020-01-17 Thread Ganesh Goudar
From: Aravinda Prasad 

Memory error such as bit flips that cannot be corrected
by hardware are passed on to the kernel for handling.
If the memory address in error belongs to guest then
the guest kernel is responsible for taking suitable action.
Patch [1] enhances KVM to exit guest with exit reason
set to KVM_EXIT_NMI in such cases. This patch handles
KVM_EXIT_NMI exit.

[1] https://www.spinics.net/lists/kvm-ppc/msg12637.html
(e20bbd3d and related commits)

Signed-off-by: Aravinda Prasad 
Signed-off-by: Ganesh Goudar 
Reviewed-by: David Gibson 
Reviewed-by: Greg Kurz 
---
 hw/ppc/spapr.c  |  8 
 hw/ppc/spapr_events.c   | 37 +
 include/hw/ppc/spapr.h  | 10 ++
 target/ppc/kvm.c| 14 ++
 target/ppc/kvm_ppc.h|  2 ++
 target/ppc/trace-events |  1 +
 6 files changed, 72 insertions(+)

diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c
index f5c036d3d9..46bac1a83c 100644
--- a/hw/ppc/spapr.c
+++ b/hw/ppc/spapr.c
@@ -1677,6 +1677,12 @@ static void spapr_machine_reset(MachineState *machine)
 first_ppc_cpu->env.gpr[5] = 0;
 
 spapr->cas_reboot = false;
+
+spapr->mc_status = -1;
+spapr->guest_machine_check_addr = -1;
+
+/* Signal all vCPUs waiting on this condition */
+qemu_cond_broadcast(>mc_delivery_cond);
 }
 
 static void spapr_create_nvram(SpaprMachineState *spapr)
@@ -2971,6 +2977,8 @@ static void spapr_machine_init(MachineState *machine)
 
 kvmppc_spapr_enable_inkernel_multitce();
 }
+
+qemu_cond_init(>mc_delivery_cond);
 }
 
 static int spapr_kvm_type(MachineState *machine, const char *vm_type)
diff --git a/hw/ppc/spapr_events.c b/hw/ppc/spapr_events.c
index e355e000d0..dfc0de840a 100644
--- a/hw/ppc/spapr_events.c
+++ b/hw/ppc/spapr_events.c
@@ -40,6 +40,7 @@
 #include "hw/ppc/spapr_drc.h"
 #include "qemu/help_option.h"
 #include "qemu/bcd.h"
+#include "qemu/main-loop.h"
 #include "hw/ppc/spapr_ovec.h"
 #include 
 
@@ -622,6 +623,42 @@ void 
spapr_hotplug_req_remove_by_count_indexed(SpaprDrcType drc_type,
 RTAS_LOG_V6_HP_ACTION_REMOVE, drc_type, _id);
 }
 
+void spapr_mce_req_event(PowerPCCPU *cpu)
+{
+SpaprMachineState *spapr = SPAPR_MACHINE(qdev_get_machine());
+CPUState *cs = CPU(cpu);
+
+if (spapr->guest_machine_check_addr == -1) {
+/*
+ * This implies that we have hit a machine check either when the
+ * guest has not registered FWNMI (i.e., "ibm,nmi-register" not
+ * called) or between system reset and "ibm,nmi-register".
+ * Fall back to the old machine check behavior in such cases.
+ */
+cs->exception_index = POWERPC_EXCP_MCHECK;
+ppc_cpu_do_interrupt(cs);
+return;
+}
+
+while (spapr->mc_status != -1) {
+/*
+ * Check whether the same CPU got machine check error
+ * while still handling the mc error (i.e., before
+ * that CPU called "ibm,nmi-interlock")
+ */
+if (spapr->mc_status == cpu->vcpu_id) {
+qemu_system_guest_panicked(NULL);
+return;
+}
+qemu_cond_wait_iothread(>mc_delivery_cond);
+/* Meanwhile if the system is reset, then just return */
+if (spapr->guest_machine_check_addr == -1) {
+return;
+}
+}
+spapr->mc_status = cpu->vcpu_id;
+}
+
 static void check_exception(PowerPCCPU *cpu, SpaprMachineState *spapr,
 uint32_t token, uint32_t nargs,
 target_ulong args,
diff --git a/include/hw/ppc/spapr.h b/include/hw/ppc/spapr.h
index 7bc5fc3a9e..909d3976f9 100644
--- a/include/hw/ppc/spapr.h
+++ b/include/hw/ppc/spapr.h
@@ -191,6 +191,15 @@ struct SpaprMachineState {
  * occurs during the unplug process. */
 QTAILQ_HEAD(, SpaprDimmState) pending_dimm_unplugs;
 
+/* State related to "ibm,nmi-register" and "ibm,nmi-interlock" calls */
+target_ulong guest_machine_check_addr;
+/*
+ * mc_status is set to -1 if mc is not in progress, else is set to the CPU
+ * handling the mc.
+ */
+int mc_status;
+QemuCond mc_delivery_cond;
+
 /*< public >*/
 char *kvm_type;
 char *host_model;
@@ -804,6 +813,7 @@ void spapr_clear_pending_events(SpaprMachineState *spapr);
 int spapr_max_server_number(SpaprMachineState *spapr);
 void spapr_store_hpte(PowerPCCPU *cpu, hwaddr ptex,
   uint64_t pte0, uint64_t pte1);
+void spapr_mce_req_event(PowerPCCPU *cpu);
 
 /* DRC callbacks. */
 void spapr_core_release(DeviceState *dev);
diff --git a/target/ppc/kvm.c b/target/ppc/kvm.c
index 9161eea865..21a4c79d41 100644
--- a/target/ppc/kvm.c
+++ b/target/ppc/kvm.c
@@ -1702,6 +1702,11 @@ int kvm_arch_handle_exit(CPUState *cs, struct kvm_run 
*run)
 ret = 0;
 break;
 
+case KV

[PATCH v20 2/7] ppc: spapr: Introduce FWNMI capability

2020-01-17 Thread Ganesh Goudar
From: Aravinda Prasad 

Introduce fwnmi an spapr capability and add a helper function
which tries to enable it, which would be used by following patch
of the series. This patch by itself does not change the existing
behavior.

Signed-off-by: Aravinda Prasad 
[eliminate cap_ppc_fwnmi, add fwnmi cap to migration state
 and reprhase the commit message]
Signed-off-by: Ganesh Goudar 
---
 hw/ppc/spapr.c |  2 ++
 hw/ppc/spapr_caps.c| 18 ++
 include/hw/ppc/spapr.h |  5 -
 target/ppc/kvm.c   |  8 
 target/ppc/kvm_ppc.h   |  6 ++
 5 files changed, 38 insertions(+), 1 deletion(-)

diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c
index 30a5fbd3be..f5c036d3d9 100644
--- a/hw/ppc/spapr.c
+++ b/hw/ppc/spapr.c
@@ -1992,6 +1992,7 @@ static const VMStateDescription vmstate_spapr = {
 _spapr_dtb,
 _spapr_cap_large_decr,
 _spapr_cap_ccf_assist,
+_spapr_cap_fwnmi,
 NULL
 }
 };
@@ -4398,6 +4399,7 @@ static void spapr_machine_class_init(ObjectClass *oc, 
void *data)
 smc->default_caps.caps[SPAPR_CAP_NESTED_KVM_HV] = SPAPR_CAP_OFF;
 smc->default_caps.caps[SPAPR_CAP_LARGE_DECREMENTER] = SPAPR_CAP_ON;
 smc->default_caps.caps[SPAPR_CAP_CCF_ASSIST] = SPAPR_CAP_OFF;
+smc->default_caps.caps[SPAPR_CAP_FWNMI_MCE] = SPAPR_CAP_OFF;
 spapr_caps_add_properties(smc, _abort);
 smc->irq = _irq_dual;
 smc->dr_phb_enabled = true;
diff --git a/hw/ppc/spapr_caps.c b/hw/ppc/spapr_caps.c
index 481dfd2a27..3001098601 100644
--- a/hw/ppc/spapr_caps.c
+++ b/hw/ppc/spapr_caps.c
@@ -496,6 +496,14 @@ static void cap_ccf_assist_apply(SpaprMachineState *spapr, 
uint8_t val,
 }
 }
 
+static void cap_fwnmi_mce_apply(SpaprMachineState *spapr, uint8_t val,
+Error **errp)
+{
+if (!val) {
+return; /* Disabled by default */
+}
+}
+
 SpaprCapabilityInfo capability_table[SPAPR_CAP_NUM] = {
 [SPAPR_CAP_HTM] = {
 .name = "htm",
@@ -595,6 +603,15 @@ SpaprCapabilityInfo capability_table[SPAPR_CAP_NUM] = {
 .type = "bool",
 .apply = cap_ccf_assist_apply,
 },
+[SPAPR_CAP_FWNMI_MCE] = {
+.name = "fwnmi-mce",
+.description = "Handle fwnmi machine check exceptions",
+.index = SPAPR_CAP_FWNMI_MCE,
+.get = spapr_cap_get_bool,
+.set = spapr_cap_set_bool,
+.type = "bool",
+.apply = cap_fwnmi_mce_apply,
+},
 };
 
 static SpaprCapabilities default_caps_with_cpu(SpaprMachineState *spapr,
@@ -734,6 +751,7 @@ SPAPR_CAP_MIG_STATE(hpt_maxpagesize, 
SPAPR_CAP_HPT_MAXPAGESIZE);
 SPAPR_CAP_MIG_STATE(nested_kvm_hv, SPAPR_CAP_NESTED_KVM_HV);
 SPAPR_CAP_MIG_STATE(large_decr, SPAPR_CAP_LARGE_DECREMENTER);
 SPAPR_CAP_MIG_STATE(ccf_assist, SPAPR_CAP_CCF_ASSIST);
+SPAPR_CAP_MIG_STATE(fwnmi, SPAPR_CAP_FWNMI_MCE);
 
 void spapr_caps_init(SpaprMachineState *spapr)
 {
diff --git a/include/hw/ppc/spapr.h b/include/hw/ppc/spapr.h
index 61f005c6f6..7bc5fc3a9e 100644
--- a/include/hw/ppc/spapr.h
+++ b/include/hw/ppc/spapr.h
@@ -79,8 +79,10 @@ typedef enum {
 #define SPAPR_CAP_LARGE_DECREMENTER 0x08
 /* Count Cache Flush Assist HW Instruction */
 #define SPAPR_CAP_CCF_ASSIST0x09
+/* FWNMI machine check handling */
+#define SPAPR_CAP_FWNMI_MCE 0x0A
 /* Num Caps */
-#define SPAPR_CAP_NUM   (SPAPR_CAP_CCF_ASSIST + 1)
+#define SPAPR_CAP_NUM   (SPAPR_CAP_FWNMI_MCE + 1)
 
 /*
  * Capability Values
@@ -869,6 +871,7 @@ extern const VMStateDescription 
vmstate_spapr_cap_hpt_maxpagesize;
 extern const VMStateDescription vmstate_spapr_cap_nested_kvm_hv;
 extern const VMStateDescription vmstate_spapr_cap_large_decr;
 extern const VMStateDescription vmstate_spapr_cap_ccf_assist;
+extern const VMStateDescription vmstate_spapr_cap_fwnmi;
 
 static inline uint8_t spapr_get_cap(SpaprMachineState *spapr, int cap)
 {
diff --git a/target/ppc/kvm.c b/target/ppc/kvm.c
index b5799e62b4..9161eea865 100644
--- a/target/ppc/kvm.c
+++ b/target/ppc/kvm.c
@@ -2054,6 +2054,14 @@ void kvmppc_set_mpic_proxy(PowerPCCPU *cpu, int 
mpic_proxy)
 }
 }
 
+int kvmppc_set_fwnmi(void)
+{
+PowerPCCPU *cpu = POWERPC_CPU(first_cpu);
+CPUState *cs = CPU(cpu);
+
+return kvm_vcpu_enable_cap(cs, KVM_CAP_PPC_FWNMI, 0);
+}
+
 int kvmppc_smt_threads(void)
 {
 return cap_ppc_smt ? cap_ppc_smt : 1;
diff --git a/target/ppc/kvm_ppc.h b/target/ppc/kvm_ppc.h
index b713097bfb..2c60dedd0d 100644
--- a/target/ppc/kvm_ppc.h
+++ b/target/ppc/kvm_ppc.h
@@ -27,6 +27,7 @@ void kvmppc_enable_h_page_init(void);
 void kvmppc_set_papr(PowerPCCPU *cpu);
 int kvmppc_set_compat(PowerPCCPU *cpu, uint32_t compat_pvr);
 void kvmppc_set_mpic_proxy(PowerPCCPU *cpu, int mpic_proxy);
+int kvmppc_set_fwnmi(void);
 int kvmppc_smt_threads(void);
 void kvmppc_error_append_smt_possible_hint(Error *const *errp);
 int kvmppc_set_smt_threads(int smt);
@@ -160,6 +161,11 @@ stati

[PATCH v20 7/7] ppc: spapr: Activate the FWNMI functionality

2020-01-17 Thread Ganesh Goudar
From: Aravinda Prasad 

This patch sets the default value of SPAPR_CAP_FWNMI_MCE
to SPAPR_CAP_ON for machine type 4.2.

Signed-off-by: Aravinda Prasad 
Signed-off-by: Ganesh Goudar 
---
 hw/ppc/spapr.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c
index c8bc2fa9f3..a81c18b6b6 100644
--- a/hw/ppc/spapr.c
+++ b/hw/ppc/spapr.c
@@ -4454,7 +4454,7 @@ static void spapr_machine_class_init(ObjectClass *oc, 
void *data)
 smc->default_caps.caps[SPAPR_CAP_NESTED_KVM_HV] = SPAPR_CAP_OFF;
 smc->default_caps.caps[SPAPR_CAP_LARGE_DECREMENTER] = SPAPR_CAP_ON;
 smc->default_caps.caps[SPAPR_CAP_CCF_ASSIST] = SPAPR_CAP_OFF;
-smc->default_caps.caps[SPAPR_CAP_FWNMI_MCE] = SPAPR_CAP_OFF;
+smc->default_caps.caps[SPAPR_CAP_FWNMI_MCE] = SPAPR_CAP_ON;
 spapr_caps_add_properties(smc, _abort);
 smc->irq = _irq_dual;
 smc->dr_phb_enabled = true;
@@ -4544,6 +4544,7 @@ static void spapr_machine_4_1_class_options(MachineClass 
*mc)
 smc->smp_threads_vsmt = false;
 compat_props_add(mc->compat_props, hw_compat_4_1, hw_compat_4_1_len);
 compat_props_add(mc->compat_props, compat, G_N_ELEMENTS(compat));
+smc->default_caps.caps[SPAPR_CAP_FWNMI_MCE] = SPAPR_CAP_OFF;
 }
 
 DEFINE_SPAPR_MACHINE(4_1, "4.1", false);
-- 
2.17.2




[PATCH v20 6/7] migration: Include migration support for machine check handling

2020-01-17 Thread Ganesh Goudar
From: Aravinda Prasad 

This patch includes migration support for machine check
handling. Especially this patch blocks VM migration
requests until the machine check error handling is
complete as these errors are specific to the source
hardware and is irrelevant on the target hardware.

Signed-off-by: Aravinda Prasad 
[Do not set FWNMI cap in post_load, now its done in .apply hook]
Signed-off-by: Ganesh Goudar 
---
 hw/ppc/spapr.c | 47 ++
 hw/ppc/spapr_events.c  | 16 +-
 hw/ppc/spapr_rtas.c|  2 ++
 include/hw/ppc/spapr.h |  2 ++
 4 files changed, 66 insertions(+), 1 deletion(-)

diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c
index 46bac1a83c..c8bc2fa9f3 100644
--- a/hw/ppc/spapr.c
+++ b/hw/ppc/spapr.c
@@ -46,6 +46,7 @@
 #include "migration/qemu-file-types.h"
 #include "migration/global_state.h"
 #include "migration/register.h"
+#include "migration/blocker.h"
 #include "mmu-hash64.h"
 #include "mmu-book3s-v3.h"
 #include "cpu-models.h"
@@ -1683,6 +1684,8 @@ static void spapr_machine_reset(MachineState *machine)
 
 /* Signal all vCPUs waiting on this condition */
 qemu_cond_broadcast(>mc_delivery_cond);
+
+migrate_del_blocker(spapr->fwnmi_migration_blocker);
 }
 
 static void spapr_create_nvram(SpaprMachineState *spapr)
@@ -1965,6 +1968,42 @@ static const VMStateDescription vmstate_spapr_dtb = {
 },
 };
 
+static bool spapr_fwnmi_needed(void *opaque)
+{
+SpaprMachineState *spapr = (SpaprMachineState *)opaque;
+
+return spapr->guest_machine_check_addr != -1;
+}
+
+static int spapr_fwnmi_pre_save(void *opaque)
+{
+SpaprMachineState *spapr = (SpaprMachineState *)opaque;
+
+/*
+ * Check if machine check handling is in progress and print a
+ * warning message.
+ */
+if (spapr->mc_status != -1) {
+warn_report("A machine check is being handled during migration. The"
+"handler may run and log hardware error on the destination");
+}
+
+return 0;
+}
+
+static const VMStateDescription vmstate_spapr_machine_check = {
+.name = "spapr_machine_check",
+.version_id = 1,
+.minimum_version_id = 1,
+.needed = spapr_fwnmi_needed,
+.pre_save = spapr_fwnmi_pre_save,
+.fields = (VMStateField[]) {
+VMSTATE_UINT64(guest_machine_check_addr, SpaprMachineState),
+VMSTATE_INT32(mc_status, SpaprMachineState),
+VMSTATE_END_OF_LIST()
+},
+};
+
 static const VMStateDescription vmstate_spapr = {
 .name = "spapr",
 .version_id = 3,
@@ -1999,6 +2038,7 @@ static const VMStateDescription vmstate_spapr = {
 _spapr_cap_large_decr,
 _spapr_cap_ccf_assist,
 _spapr_cap_fwnmi,
+_spapr_machine_check,
 NULL
 }
 };
@@ -2814,6 +2854,13 @@ static void spapr_machine_init(MachineState *machine)
 spapr_create_lmb_dr_connectors(spapr);
 }
 
+if (spapr_get_cap(spapr, SPAPR_CAP_FWNMI_MCE) == SPAPR_CAP_ON) {
+/* Create the error string for live migration blocker */
+error_setg(>fwnmi_migration_blocker,
+"A machine check is being handled during migration. The handler"
+"may run and log hardware error on the destination");
+}
+
 /* Set up RTAS event infrastructure */
 spapr_events_init(spapr);
 
diff --git a/hw/ppc/spapr_events.c b/hw/ppc/spapr_events.c
index 54eaf28a9e..884e455f02 100644
--- a/hw/ppc/spapr_events.c
+++ b/hw/ppc/spapr_events.c
@@ -43,6 +43,7 @@
 #include "qemu/main-loop.h"
 #include "hw/ppc/spapr_ovec.h"
 #include 
+#include "migration/blocker.h"
 
 #define RTAS_LOG_VERSION_MASK   0xff00
 #define   RTAS_LOG_VERSION_60x0600
@@ -843,6 +844,8 @@ void spapr_mce_req_event(PowerPCCPU *cpu, bool recovered)
 {
 SpaprMachineState *spapr = SPAPR_MACHINE(qdev_get_machine());
 CPUState *cs = CPU(cpu);
+int ret;
+Error *local_err = NULL;
 
 if (spapr->guest_machine_check_addr == -1) {
 /*
@@ -872,8 +875,19 @@ void spapr_mce_req_event(PowerPCCPU *cpu, bool recovered)
 return;
 }
 }
-spapr->mc_status = cpu->vcpu_id;
 
+ret = migrate_add_blocker(spapr->fwnmi_migration_blocker, _err);
+if (ret == -EBUSY) {
+/*
+ * We don't want to abort so we let the migration to continue.
+ * In a rare case, the machine check handler will run on the target.
+ * Though this is not preferable, it is better than aborting
+ * the migration or killing the VM.
+ */
+warn_report("Received a fwnmi while migration was in progress");
+}
+
+spapr->mc_status = cpu->vcpu_id;
 spapr_mce_dispatch_elog(cpu, recovered);
 }
 
diff --git a/hw/ppc/spapr_rtas.c b/hw/ppc/spapr_rtas.c
index 3f162d82f5..4ce8e48d2a 1006

[PATCH v20 1/7] Wrapper function to wait on condition for the main loop mutex

2020-01-17 Thread Ganesh Goudar
From: Aravinda Prasad 

Introduce a wrapper function to wait on condition for
the main loop mutex. This function atomically releases
the main loop mutex and causes the calling thread to
block on the condition. This wrapper is required because
qemu_global_mutex is a static variable.

Signed-off-by: Aravinda Prasad 
Signed-off-by: Ganesh Goudar 
Reviewed-by: David Gibson 
Reviewed-by: Greg Kurz 
---
 cpus.c   | 5 +
 include/qemu/main-loop.h | 8 
 2 files changed, 13 insertions(+)

diff --git a/cpus.c b/cpus.c
index be2d655f37..c06a2ad20b 100644
--- a/cpus.c
+++ b/cpus.c
@@ -1839,6 +1839,11 @@ void qemu_mutex_unlock_iothread(void)
 qemu_mutex_unlock(_global_mutex);
 }
 
+void qemu_cond_wait_iothread(QemuCond *cond)
+{
+qemu_cond_wait(cond, _global_mutex);
+}
+
 static bool all_vcpus_paused(void)
 {
 CPUState *cpu;
diff --git a/include/qemu/main-loop.h b/include/qemu/main-loop.h
index f6ba78ea73..a6d20b0719 100644
--- a/include/qemu/main-loop.h
+++ b/include/qemu/main-loop.h
@@ -295,6 +295,14 @@ void qemu_mutex_lock_iothread_impl(const char *file, int 
line);
  */
 void qemu_mutex_unlock_iothread(void);
 
+/*
+ * qemu_cond_wait_iothread: Wait on condition for the main loop mutex
+ *
+ * This function atomically releases the main loop mutex and causes
+ * the calling thread to block on the condition.
+ */
+void qemu_cond_wait_iothread(QemuCond *cond);
+
 /* internal interfaces */
 
 void qemu_fd_register(int fd);
-- 
2.17.2




[PATCH v20 0/7]target-ppc/spapr: Add FWNMI support in QEMU for PowerKVM guests

2020-01-17 Thread Ganesh Goudar
This patch set adds support for FWNMI in PowerKVM guests.

System errors such as SLB multihit and memory errors
that cannot be corrected by hardware is passed on to
the kernel for handling by raising machine check
exception (an NMI). Upon such machine check exceptions,
if the address in error belongs to guest then KVM
invokes guests' 0x200 interrupt vector if the guest
is not FWNMI capable. For FWNMI capable guest
KVM passes the control to QEMU by exiting the guest.

This patch series adds functionality to QEMU to pass
on such machine check exceptions to the FWNMI capable
guest kernel by building an error log and invoking
the guest registered machine check handling routine.

The KVM changes are now part of the upstream kernel
(commit e20bbd3d). This series contain QEMU changes.

Change Log v20:
  - Remove code left over from previous version.

Change Log v19:
  - Create error object for migration blocker in machine_init().
  - Remove the check to see fwnmi calls are already registered,
which is no longer needed.
  - Register fwnmi RTAS calls in core_rtas_register_types() where
other RTAS calls are registered.
  - Bail out from interlock call if the cap is not set.
  - Reorder and add missing S-O-Bs.

Change Log v18:
  - Dynamically create the Error object before adding it as blocker
  - In apply hook check if the fwnmi calls are already registered and
if kvm supports fwnmi before registering the fwnmi calls.
  - In rtas_ibm_nmi_register() test the feature flag before attempting
to get the RTAS address
  - Introduce a bool member "fwnmi_calls_registered" to check if the
fwnmi calls are registered and use the same in needed hook to save
the state during migration. 

Change Log v17:
  - Add fwnmi cap to migration state
  - Reprhase the commit message in patch 2/7

Change Log v16:
  - Fixed coding style problems

Change Log v15:
  - Removed cap_ppc_fwnmi
  - Moved fwnmi registeration to .apply hook
  - Assume SLOF has allocated enough room for rtas error log
  - Using ARRAY_SIZE to end the loop
  - Do not set FWNMI cap in post_load, now its done in .apply hook

Change Log v14:
  - Feature activation moved to a separate patch
  - Fixed issues with migration blocker

Change Log v13:
  - Minor fixes (mostly nits)
  - Moved FWNMI guest registration check from patch 4 to 3.

Change Log v12:
  - Rebased to latest ppc-for-4.2 (SHA b1e8156743)

Change Log v11:
  - Moved FWNMI SPAPR cap defaults to 4.2 class option
  - Fixed issues with handling fwnmi KVM capability

---

Aravinda Prasad (7):
  Wrapper function to wait on condition for the main loop mutex
  ppc: spapr: Introduce FWNMI capability
  target/ppc: Handle NMI guest exit
  target/ppc: Build rtas error log upon an MCE
  ppc: spapr: Handle "ibm,nmi-register" and "ibm,nmi-interlock" RTAS
calls
  migration: Include migration support for machine check handling
  ppc: spapr: Activate the FWNMI functionality

 cpus.c   |   5 +
 hw/ppc/spapr.c   |  58 +
 hw/ppc/spapr_caps.c  |  25 
 hw/ppc/spapr_events.c| 269 +++
 hw/ppc/spapr_rtas.c  |  87 +
 include/hw/ppc/spapr.h   |  25 +++-
 include/qemu/main-loop.h |   8 ++
 target/ppc/kvm.c |  24 
 target/ppc/kvm_ppc.h |   8 ++
 target/ppc/trace-events  |   1 +
 10 files changed, 508 insertions(+), 2 deletions(-)

-- 
2.17.2




[PATCH v19 7/7] ppc: spapr: Activate the FWNMI functionality

2020-01-16 Thread Ganesh Goudar
From: Aravinda Prasad 

This patch sets the default value of SPAPR_CAP_FWNMI_MCE
to SPAPR_CAP_ON for machine type 4.2.

Signed-off-by: Aravinda Prasad 
Signed-off-by: Ganesh Goudar 
---
 hw/ppc/spapr.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c
index c8bc2fa9f3..a81c18b6b6 100644
--- a/hw/ppc/spapr.c
+++ b/hw/ppc/spapr.c
@@ -4454,7 +4454,7 @@ static void spapr_machine_class_init(ObjectClass *oc, 
void *data)
 smc->default_caps.caps[SPAPR_CAP_NESTED_KVM_HV] = SPAPR_CAP_OFF;
 smc->default_caps.caps[SPAPR_CAP_LARGE_DECREMENTER] = SPAPR_CAP_ON;
 smc->default_caps.caps[SPAPR_CAP_CCF_ASSIST] = SPAPR_CAP_OFF;
-smc->default_caps.caps[SPAPR_CAP_FWNMI_MCE] = SPAPR_CAP_OFF;
+smc->default_caps.caps[SPAPR_CAP_FWNMI_MCE] = SPAPR_CAP_ON;
 spapr_caps_add_properties(smc, _abort);
 smc->irq = _irq_dual;
 smc->dr_phb_enabled = true;
@@ -4544,6 +4544,7 @@ static void spapr_machine_4_1_class_options(MachineClass 
*mc)
 smc->smp_threads_vsmt = false;
 compat_props_add(mc->compat_props, hw_compat_4_1, hw_compat_4_1_len);
 compat_props_add(mc->compat_props, compat, G_N_ELEMENTS(compat));
+smc->default_caps.caps[SPAPR_CAP_FWNMI_MCE] = SPAPR_CAP_OFF;
 }
 
 DEFINE_SPAPR_MACHINE(4_1, "4.1", false);
-- 
2.17.2




[PATCH v19 6/7] migration: Include migration support for machine check handling

2020-01-16 Thread Ganesh Goudar
From: Aravinda Prasad 

This patch includes migration support for machine check
handling. Especially this patch blocks VM migration
requests until the machine check error handling is
complete as these errors are specific to the source
hardware and is irrelevant on the target hardware.

Signed-off-by: Aravinda Prasad 
[Do not set FWNMI cap in post_load, now its done in .apply hook]
Signed-off-by: Ganesh Goudar 
---
 hw/ppc/spapr.c | 47 ++
 hw/ppc/spapr_events.c  | 16 +-
 hw/ppc/spapr_rtas.c|  4 
 include/hw/ppc/spapr.h |  2 +-
 4 files changed, 67 insertions(+), 2 deletions(-)

diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c
index 46bac1a83c..c8bc2fa9f3 100644
--- a/hw/ppc/spapr.c
+++ b/hw/ppc/spapr.c
@@ -46,6 +46,7 @@
 #include "migration/qemu-file-types.h"
 #include "migration/global_state.h"
 #include "migration/register.h"
+#include "migration/blocker.h"
 #include "mmu-hash64.h"
 #include "mmu-book3s-v3.h"
 #include "cpu-models.h"
@@ -1683,6 +1684,8 @@ static void spapr_machine_reset(MachineState *machine)
 
 /* Signal all vCPUs waiting on this condition */
 qemu_cond_broadcast(>mc_delivery_cond);
+
+migrate_del_blocker(spapr->fwnmi_migration_blocker);
 }
 
 static void spapr_create_nvram(SpaprMachineState *spapr)
@@ -1965,6 +1968,42 @@ static const VMStateDescription vmstate_spapr_dtb = {
 },
 };
 
+static bool spapr_fwnmi_needed(void *opaque)
+{
+SpaprMachineState *spapr = (SpaprMachineState *)opaque;
+
+return spapr->guest_machine_check_addr != -1;
+}
+
+static int spapr_fwnmi_pre_save(void *opaque)
+{
+SpaprMachineState *spapr = (SpaprMachineState *)opaque;
+
+/*
+ * Check if machine check handling is in progress and print a
+ * warning message.
+ */
+if (spapr->mc_status != -1) {
+warn_report("A machine check is being handled during migration. The"
+"handler may run and log hardware error on the destination");
+}
+
+return 0;
+}
+
+static const VMStateDescription vmstate_spapr_machine_check = {
+.name = "spapr_machine_check",
+.version_id = 1,
+.minimum_version_id = 1,
+.needed = spapr_fwnmi_needed,
+.pre_save = spapr_fwnmi_pre_save,
+.fields = (VMStateField[]) {
+VMSTATE_UINT64(guest_machine_check_addr, SpaprMachineState),
+VMSTATE_INT32(mc_status, SpaprMachineState),
+VMSTATE_END_OF_LIST()
+},
+};
+
 static const VMStateDescription vmstate_spapr = {
 .name = "spapr",
 .version_id = 3,
@@ -1999,6 +2038,7 @@ static const VMStateDescription vmstate_spapr = {
 _spapr_cap_large_decr,
 _spapr_cap_ccf_assist,
 _spapr_cap_fwnmi,
+_spapr_machine_check,
 NULL
 }
 };
@@ -2814,6 +2854,13 @@ static void spapr_machine_init(MachineState *machine)
 spapr_create_lmb_dr_connectors(spapr);
 }
 
+if (spapr_get_cap(spapr, SPAPR_CAP_FWNMI_MCE) == SPAPR_CAP_ON) {
+/* Create the error string for live migration blocker */
+error_setg(>fwnmi_migration_blocker,
+"A machine check is being handled during migration. The handler"
+"may run and log hardware error on the destination");
+}
+
 /* Set up RTAS event infrastructure */
 spapr_events_init(spapr);
 
diff --git a/hw/ppc/spapr_events.c b/hw/ppc/spapr_events.c
index 54eaf28a9e..884e455f02 100644
--- a/hw/ppc/spapr_events.c
+++ b/hw/ppc/spapr_events.c
@@ -43,6 +43,7 @@
 #include "qemu/main-loop.h"
 #include "hw/ppc/spapr_ovec.h"
 #include 
+#include "migration/blocker.h"
 
 #define RTAS_LOG_VERSION_MASK   0xff00
 #define   RTAS_LOG_VERSION_60x0600
@@ -843,6 +844,8 @@ void spapr_mce_req_event(PowerPCCPU *cpu, bool recovered)
 {
 SpaprMachineState *spapr = SPAPR_MACHINE(qdev_get_machine());
 CPUState *cs = CPU(cpu);
+int ret;
+Error *local_err = NULL;
 
 if (spapr->guest_machine_check_addr == -1) {
 /*
@@ -872,8 +875,19 @@ void spapr_mce_req_event(PowerPCCPU *cpu, bool recovered)
 return;
 }
 }
-spapr->mc_status = cpu->vcpu_id;
 
+ret = migrate_add_blocker(spapr->fwnmi_migration_blocker, _err);
+if (ret == -EBUSY) {
+/*
+ * We don't want to abort so we let the migration to continue.
+ * In a rare case, the machine check handler will run on the target.
+ * Though this is not preferable, it is better than aborting
+ * the migration or killing the VM.
+ */
+warn_report("Received a fwnmi while migration was in progress");
+}
+
+spapr->mc_status = cpu->vcpu_id;
 spapr_mce_dispatch_elog(cpu, recovered);
 }
 
diff --git a/hw/ppc/spapr_rtas.c b/hw/ppc/spapr_rtas.c
index 3f162d82f5..b1594443c7 1006

[PATCH v19 5/7] ppc: spapr: Handle "ibm, nmi-register" and "ibm, nmi-interlock" RTAS calls

2020-01-16 Thread Ganesh Goudar
From: Aravinda Prasad 

This patch adds support in QEMU to handle "ibm,nmi-register"
and "ibm,nmi-interlock" RTAS calls.

The machine check notification address is saved when the
OS issues "ibm,nmi-register" RTAS call.

This patch also handles the case when multiple processors
experience machine check at or about the same time by
handling "ibm,nmi-interlock" call. In such cases, as per
PAPR, subsequent processors serialize waiting for the first
processor to issue the "ibm,nmi-interlock" call. The second
processor that also received a machine check error waits
till the first processor is done reading the error log.
The first processor issues "ibm,nmi-interlock" call
when the error log is consumed.

Signed-off-by: Aravinda Prasad 
[Register fwnmi RTAS calls in core_rtas_register_types()
 where other RTAS calls are registered]
Signed-off-by: Ganesh Goudar 
---
 hw/ppc/spapr_caps.c|  7 +
 hw/ppc/spapr_rtas.c| 59 ++
 include/hw/ppc/spapr.h |  7 -
 3 files changed, 72 insertions(+), 1 deletion(-)

diff --git a/hw/ppc/spapr_caps.c b/hw/ppc/spapr_caps.c
index 3001098601..c43498da49 100644
--- a/hw/ppc/spapr_caps.c
+++ b/hw/ppc/spapr_caps.c
@@ -502,6 +502,13 @@ static void cap_fwnmi_mce_apply(SpaprMachineState *spapr, 
uint8_t val,
 if (!val) {
 return; /* Disabled by default */
 }
+
+if (kvm_enabled()) {
+if (kvmppc_set_fwnmi() < 0) {
+error_report("Could not enable fwnmi capability");
+exit(1);
+}
+}
 }
 
 SpaprCapabilityInfo capability_table[SPAPR_CAP_NUM] = {
diff --git a/hw/ppc/spapr_rtas.c b/hw/ppc/spapr_rtas.c
index 2c066a372d..3f162d82f5 100644
--- a/hw/ppc/spapr_rtas.c
+++ b/hw/ppc/spapr_rtas.c
@@ -400,6 +400,61 @@ static void rtas_get_power_level(PowerPCCPU *cpu, 
SpaprMachineState *spapr,
 rtas_st(rets, 1, 100);
 }
 
+static void rtas_ibm_nmi_register(PowerPCCPU *cpu,
+  SpaprMachineState *spapr,
+  uint32_t token, uint32_t nargs,
+  target_ulong args,
+  uint32_t nret, target_ulong rets)
+{
+hwaddr rtas_addr;
+
+if (spapr_get_cap(spapr, SPAPR_CAP_FWNMI_MCE) == SPAPR_CAP_OFF) {
+rtas_st(rets, 0, RTAS_OUT_NOT_SUPPORTED);
+return;
+}
+
+rtas_addr = spapr_get_rtas_addr();
+if (!rtas_addr) {
+rtas_st(rets, 0, RTAS_OUT_NOT_SUPPORTED);
+return;
+}
+
+spapr->guest_machine_check_addr = rtas_ld(args, 1);
+rtas_st(rets, 0, RTAS_OUT_SUCCESS);
+}
+
+static void rtas_ibm_nmi_interlock(PowerPCCPU *cpu,
+   SpaprMachineState *spapr,
+   uint32_t token, uint32_t nargs,
+   target_ulong args,
+   uint32_t nret, target_ulong rets)
+{
+if (spapr_get_cap(spapr, SPAPR_CAP_FWNMI_MCE) == SPAPR_CAP_OFF) {
+rtas_st(rets, 0, RTAS_OUT_NOT_SUPPORTED);
+return;
+}
+
+if (spapr->guest_machine_check_addr == -1) {
+/* NMI register not called */
+rtas_st(rets, 0, RTAS_OUT_PARAM_ERROR);
+return;
+}
+
+if (spapr->mc_status != cpu->vcpu_id) {
+/* The vCPU that hit the NMI should invoke "ibm,nmi-interlock" */
+rtas_st(rets, 0, RTAS_OUT_PARAM_ERROR);
+return;
+}
+
+/*
+ * vCPU issuing "ibm,nmi-interlock" is done with NMI handling,
+ * hence unset mc_status.
+ */
+spapr->mc_status = -1;
+qemu_cond_signal(>mc_delivery_cond);
+rtas_st(rets, 0, RTAS_OUT_SUCCESS);
+}
+
 static struct rtas_call {
 const char *name;
 spapr_rtas_fn fn;
@@ -528,6 +583,10 @@ static void core_rtas_register_types(void)
 rtas_set_power_level);
 spapr_rtas_register(RTAS_GET_POWER_LEVEL, "get-power-level",
 rtas_get_power_level);
+spapr_rtas_register(RTAS_IBM_NMI_REGISTER, "ibm,nmi-register",
+rtas_ibm_nmi_register);
+spapr_rtas_register(RTAS_IBM_NMI_INTERLOCK, "ibm,nmi-interlock",
+rtas_ibm_nmi_interlock);
 }
 
 type_init(core_rtas_register_types)
diff --git a/include/hw/ppc/spapr.h b/include/hw/ppc/spapr.h
index 652a5514e8..a90e677cc3 100644
--- a/include/hw/ppc/spapr.h
+++ b/include/hw/ppc/spapr.h
@@ -218,6 +218,8 @@ struct SpaprMachineState {
 
 unsigned gpu_numa_id;
 SpaprTpmProxy *tpm_proxy;
+
+bool fwnmi_calls_registered;
 };
 
 #define H_SUCCESS 0
@@ -656,8 +658,10 @@ target_ulong spapr_hypercall(PowerPCCPU *cpu, target_ulong 
opcode,
 #define RTAS_IBM_REMOVE_PE_DMA_WINDOW   (RTAS_TOKEN_BASE + 0x28)
 #define RTAS_IBM_RESET_PE_DMA_WINDOW(RTAS_TOKEN_BASE + 0x29)
 #define RTAS_IBM_SUSPEND_ME  

[PATCH v19 1/7] Wrapper function to wait on condition for the main loop mutex

2020-01-16 Thread Ganesh Goudar
From: Aravinda Prasad 

Introduce a wrapper function to wait on condition for
the main loop mutex. This function atomically releases
the main loop mutex and causes the calling thread to
block on the condition. This wrapper is required because
qemu_global_mutex is a static variable.

Signed-off-by: Aravinda Prasad 
Signed-off-by: Ganesh Goudar 
Reviewed-by: David Gibson 
Reviewed-by: Greg Kurz 
---
 cpus.c   | 5 +
 include/qemu/main-loop.h | 8 
 2 files changed, 13 insertions(+)

diff --git a/cpus.c b/cpus.c
index be2d655f37..c06a2ad20b 100644
--- a/cpus.c
+++ b/cpus.c
@@ -1839,6 +1839,11 @@ void qemu_mutex_unlock_iothread(void)
 qemu_mutex_unlock(_global_mutex);
 }
 
+void qemu_cond_wait_iothread(QemuCond *cond)
+{
+qemu_cond_wait(cond, _global_mutex);
+}
+
 static bool all_vcpus_paused(void)
 {
 CPUState *cpu;
diff --git a/include/qemu/main-loop.h b/include/qemu/main-loop.h
index f6ba78ea73..a6d20b0719 100644
--- a/include/qemu/main-loop.h
+++ b/include/qemu/main-loop.h
@@ -295,6 +295,14 @@ void qemu_mutex_lock_iothread_impl(const char *file, int 
line);
  */
 void qemu_mutex_unlock_iothread(void);
 
+/*
+ * qemu_cond_wait_iothread: Wait on condition for the main loop mutex
+ *
+ * This function atomically releases the main loop mutex and causes
+ * the calling thread to block on the condition.
+ */
+void qemu_cond_wait_iothread(QemuCond *cond);
+
 /* internal interfaces */
 
 void qemu_fd_register(int fd);
-- 
2.17.2




[PATCH v19 3/7] target/ppc: Handle NMI guest exit

2020-01-16 Thread Ganesh Goudar
From: Aravinda Prasad 

Memory error such as bit flips that cannot be corrected
by hardware are passed on to the kernel for handling.
If the memory address in error belongs to guest then
the guest kernel is responsible for taking suitable action.
Patch [1] enhances KVM to exit guest with exit reason
set to KVM_EXIT_NMI in such cases. This patch handles
KVM_EXIT_NMI exit.

[1] https://www.spinics.net/lists/kvm-ppc/msg12637.html
(e20bbd3d and related commits)

Signed-off-by: Aravinda Prasad 
Signed-off-by: Ganesh Goudar 
Reviewed-by: David Gibson 
Reviewed-by: Greg Kurz 
---
 hw/ppc/spapr.c  |  8 
 hw/ppc/spapr_events.c   | 37 +
 include/hw/ppc/spapr.h  | 10 ++
 target/ppc/kvm.c| 14 ++
 target/ppc/kvm_ppc.h|  2 ++
 target/ppc/trace-events |  1 +
 6 files changed, 72 insertions(+)

diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c
index f5c036d3d9..46bac1a83c 100644
--- a/hw/ppc/spapr.c
+++ b/hw/ppc/spapr.c
@@ -1677,6 +1677,12 @@ static void spapr_machine_reset(MachineState *machine)
 first_ppc_cpu->env.gpr[5] = 0;
 
 spapr->cas_reboot = false;
+
+spapr->mc_status = -1;
+spapr->guest_machine_check_addr = -1;
+
+/* Signal all vCPUs waiting on this condition */
+qemu_cond_broadcast(>mc_delivery_cond);
 }
 
 static void spapr_create_nvram(SpaprMachineState *spapr)
@@ -2971,6 +2977,8 @@ static void spapr_machine_init(MachineState *machine)
 
 kvmppc_spapr_enable_inkernel_multitce();
 }
+
+qemu_cond_init(>mc_delivery_cond);
 }
 
 static int spapr_kvm_type(MachineState *machine, const char *vm_type)
diff --git a/hw/ppc/spapr_events.c b/hw/ppc/spapr_events.c
index e355e000d0..dfc0de840a 100644
--- a/hw/ppc/spapr_events.c
+++ b/hw/ppc/spapr_events.c
@@ -40,6 +40,7 @@
 #include "hw/ppc/spapr_drc.h"
 #include "qemu/help_option.h"
 #include "qemu/bcd.h"
+#include "qemu/main-loop.h"
 #include "hw/ppc/spapr_ovec.h"
 #include 
 
@@ -622,6 +623,42 @@ void 
spapr_hotplug_req_remove_by_count_indexed(SpaprDrcType drc_type,
 RTAS_LOG_V6_HP_ACTION_REMOVE, drc_type, _id);
 }
 
+void spapr_mce_req_event(PowerPCCPU *cpu)
+{
+SpaprMachineState *spapr = SPAPR_MACHINE(qdev_get_machine());
+CPUState *cs = CPU(cpu);
+
+if (spapr->guest_machine_check_addr == -1) {
+/*
+ * This implies that we have hit a machine check either when the
+ * guest has not registered FWNMI (i.e., "ibm,nmi-register" not
+ * called) or between system reset and "ibm,nmi-register".
+ * Fall back to the old machine check behavior in such cases.
+ */
+cs->exception_index = POWERPC_EXCP_MCHECK;
+ppc_cpu_do_interrupt(cs);
+return;
+}
+
+while (spapr->mc_status != -1) {
+/*
+ * Check whether the same CPU got machine check error
+ * while still handling the mc error (i.e., before
+ * that CPU called "ibm,nmi-interlock")
+ */
+if (spapr->mc_status == cpu->vcpu_id) {
+qemu_system_guest_panicked(NULL);
+return;
+}
+qemu_cond_wait_iothread(>mc_delivery_cond);
+/* Meanwhile if the system is reset, then just return */
+if (spapr->guest_machine_check_addr == -1) {
+return;
+}
+}
+spapr->mc_status = cpu->vcpu_id;
+}
+
 static void check_exception(PowerPCCPU *cpu, SpaprMachineState *spapr,
 uint32_t token, uint32_t nargs,
 target_ulong args,
diff --git a/include/hw/ppc/spapr.h b/include/hw/ppc/spapr.h
index 7bc5fc3a9e..909d3976f9 100644
--- a/include/hw/ppc/spapr.h
+++ b/include/hw/ppc/spapr.h
@@ -191,6 +191,15 @@ struct SpaprMachineState {
  * occurs during the unplug process. */
 QTAILQ_HEAD(, SpaprDimmState) pending_dimm_unplugs;
 
+/* State related to "ibm,nmi-register" and "ibm,nmi-interlock" calls */
+target_ulong guest_machine_check_addr;
+/*
+ * mc_status is set to -1 if mc is not in progress, else is set to the CPU
+ * handling the mc.
+ */
+int mc_status;
+QemuCond mc_delivery_cond;
+
 /*< public >*/
 char *kvm_type;
 char *host_model;
@@ -804,6 +813,7 @@ void spapr_clear_pending_events(SpaprMachineState *spapr);
 int spapr_max_server_number(SpaprMachineState *spapr);
 void spapr_store_hpte(PowerPCCPU *cpu, hwaddr ptex,
   uint64_t pte0, uint64_t pte1);
+void spapr_mce_req_event(PowerPCCPU *cpu);
 
 /* DRC callbacks. */
 void spapr_core_release(DeviceState *dev);
diff --git a/target/ppc/kvm.c b/target/ppc/kvm.c
index 9161eea865..21a4c79d41 100644
--- a/target/ppc/kvm.c
+++ b/target/ppc/kvm.c
@@ -1702,6 +1702,11 @@ int kvm_arch_handle_exit(CPUState *cs, struct kvm_run 
*run)
 ret = 0;
 break;
 
+case KV

[PATCH v19 4/7] target/ppc: Build rtas error log upon an MCE

2020-01-16 Thread Ganesh Goudar
From: Aravinda Prasad 

Upon a machine check exception (MCE) in a guest address space,
KVM causes a guest exit to enable QEMU to build and pass the
error to the guest in the PAPR defined rtas error log format.

This patch builds the rtas error log, copies it to the rtas_addr
and then invokes the guest registered machine check handler. The
handler in the guest takes suitable action(s) depending on the type
and criticality of the error. For example, if an error is
unrecoverable memory corruption in an application inside the
guest, then the guest kernel sends a SIGBUS to the application.
For recoverable errors, the guest performs recovery actions and
logs the error.

Signed-off-by: Aravinda Prasad 
[Assume SLOF has allocated enough room for rtas error log]
Signed-off-by: Ganesh Goudar 
Reviewed-by: David Gibson 
---
 hw/ppc/spapr_events.c  | 220 -
 hw/ppc/spapr_rtas.c|  26 +
 include/hw/ppc/spapr.h |   6 +-
 target/ppc/kvm.c   |   4 +-
 4 files changed, 253 insertions(+), 3 deletions(-)

diff --git a/hw/ppc/spapr_events.c b/hw/ppc/spapr_events.c
index dfc0de840a..54eaf28a9e 100644
--- a/hw/ppc/spapr_events.c
+++ b/hw/ppc/spapr_events.c
@@ -214,6 +214,104 @@ struct hp_extended_log {
 struct rtas_event_log_v6_hp hp;
 } QEMU_PACKED;
 
+struct rtas_event_log_v6_mc {
+#define RTAS_LOG_V6_SECTION_ID_MC   0x4D43 /* MC */
+struct rtas_event_log_v6_section_header hdr;
+uint32_t fru_id;
+uint32_t proc_id;
+uint8_t error_type;
+#define RTAS_LOG_V6_MC_TYPE_UE   0
+#define RTAS_LOG_V6_MC_TYPE_SLB  1
+#define RTAS_LOG_V6_MC_TYPE_ERAT 2
+#define RTAS_LOG_V6_MC_TYPE_TLB  4
+#define RTAS_LOG_V6_MC_TYPE_D_CACHE  5
+#define RTAS_LOG_V6_MC_TYPE_I_CACHE  7
+uint8_t sub_err_type;
+#define RTAS_LOG_V6_MC_UE_INDETERMINATE  0
+#define RTAS_LOG_V6_MC_UE_IFETCH 1
+#define RTAS_LOG_V6_MC_UE_PAGE_TABLE_WALK_IFETCH 2
+#define RTAS_LOG_V6_MC_UE_LOAD_STORE 3
+#define RTAS_LOG_V6_MC_UE_PAGE_TABLE_WALK_LOAD_STORE 4
+#define RTAS_LOG_V6_MC_SLB_PARITY0
+#define RTAS_LOG_V6_MC_SLB_MULTIHIT  1
+#define RTAS_LOG_V6_MC_SLB_INDETERMINATE 2
+#define RTAS_LOG_V6_MC_ERAT_PARITY   1
+#define RTAS_LOG_V6_MC_ERAT_MULTIHIT 2
+#define RTAS_LOG_V6_MC_ERAT_INDETERMINATE3
+#define RTAS_LOG_V6_MC_TLB_PARITY1
+#define RTAS_LOG_V6_MC_TLB_MULTIHIT  2
+#define RTAS_LOG_V6_MC_TLB_INDETERMINATE 3
+uint8_t reserved_1[6];
+uint64_t effective_address;
+uint64_t logical_address;
+} QEMU_PACKED;
+
+struct mc_extended_log {
+struct rtas_event_log_v6 v6hdr;
+struct rtas_event_log_v6_mc mc;
+} QEMU_PACKED;
+
+struct MC_ierror_table {
+unsigned long srr1_mask;
+unsigned long srr1_value;
+bool nip_valid; /* nip is a valid indicator of faulting address */
+uint8_t error_type;
+uint8_t error_subtype;
+unsigned int initiator;
+unsigned int severity;
+};
+
+static const struct MC_ierror_table mc_ierror_table[] = {
+{ 0x081c, 0x0004, true,
+  RTAS_LOG_V6_MC_TYPE_UE, RTAS_LOG_V6_MC_UE_IFETCH,
+  RTAS_LOG_INITIATOR_CPU, RTAS_LOG_SEVERITY_ERROR_SYNC, },
+{ 0x081c, 0x0008, true,
+  RTAS_LOG_V6_MC_TYPE_SLB, RTAS_LOG_V6_MC_SLB_PARITY,
+  RTAS_LOG_INITIATOR_CPU, RTAS_LOG_SEVERITY_ERROR_SYNC, },
+{ 0x081c, 0x000c, true,
+  RTAS_LOG_V6_MC_TYPE_SLB, RTAS_LOG_V6_MC_SLB_MULTIHIT,
+  RTAS_LOG_INITIATOR_CPU, RTAS_LOG_SEVERITY_ERROR_SYNC, },
+{ 0x081c, 0x0010, true,
+  RTAS_LOG_V6_MC_TYPE_ERAT, RTAS_LOG_V6_MC_ERAT_MULTIHIT,
+  RTAS_LOG_INITIATOR_CPU, RTAS_LOG_SEVERITY_ERROR_SYNC, },
+{ 0x081c, 0x0014, true,
+  RTAS_LOG_V6_MC_TYPE_TLB, RTAS_LOG_V6_MC_TLB_MULTIHIT,
+  RTAS_LOG_INITIATOR_CPU, RTAS_LOG_SEVERITY_ERROR_SYNC, },
+{ 0x081c, 0x0018, true,
+  RTAS_LOG_V6_MC_TYPE_UE, RTAS_LOG_V6_MC_UE_PAGE_TABLE_WALK_IFETCH,
+  RTAS_LOG_INITIATOR_CPU, RTAS_LOG_SEVERITY_ERROR_SYNC, } };
+
+struct MC_derror_table {
+unsigned long dsisr_value;
+bool dar_valid; /* dar is a valid indicator of faulting address */
+uint8_t error_type;
+uint8_t error_subtype;
+unsigned int initiator;
+unsigned int severity;
+};
+
+static const struct MC_derror_table mc_derror_table[] = {
+{ 0x8000, false,
+  RTAS_LOG_V6_MC_TYPE_UE, RTAS_LOG_V6_MC_UE_LOAD_STORE,
+  RTAS_LOG_INITIATOR_CPU, RTAS_LOG_SEVERITY_ERROR_SYNC, },
+{ 0x4000, true,
+  RTAS_LOG_V6_MC_TYPE_UE, RTAS_LOG_V6_MC_UE_PAGE_TABLE_WALK_LOAD_STORE,
+  RTAS_LOG_INITIATOR_CPU, RTAS_LOG_SEVERITY_ERROR_SYNC, },
+{ 0x0800, true,
+  RTAS_LOG_V6_MC_TYPE_ERAT, RTAS_LOG_V6_MC_ERAT_MULTIHIT

[PATCH v19 2/7] ppc: spapr: Introduce FWNMI capability

2020-01-16 Thread Ganesh Goudar
From: Aravinda Prasad 

Introduce fwnmi an spapr capability and add a helper function
which tries to enable it, which would be used by following patch
of the series. This patch by itself does not change the existing
behavior.

Signed-off-by: Aravinda Prasad 
[eliminate cap_ppc_fwnmi, add fwnmi cap to migration state
 and reprhase the commit message]
Signed-off-by: Ganesh Goudar 
---
 hw/ppc/spapr.c |  2 ++
 hw/ppc/spapr_caps.c| 18 ++
 include/hw/ppc/spapr.h |  5 -
 target/ppc/kvm.c   |  8 
 target/ppc/kvm_ppc.h   |  6 ++
 5 files changed, 38 insertions(+), 1 deletion(-)

diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c
index 30a5fbd3be..f5c036d3d9 100644
--- a/hw/ppc/spapr.c
+++ b/hw/ppc/spapr.c
@@ -1992,6 +1992,7 @@ static const VMStateDescription vmstate_spapr = {
 _spapr_dtb,
 _spapr_cap_large_decr,
 _spapr_cap_ccf_assist,
+_spapr_cap_fwnmi,
 NULL
 }
 };
@@ -4398,6 +4399,7 @@ static void spapr_machine_class_init(ObjectClass *oc, 
void *data)
 smc->default_caps.caps[SPAPR_CAP_NESTED_KVM_HV] = SPAPR_CAP_OFF;
 smc->default_caps.caps[SPAPR_CAP_LARGE_DECREMENTER] = SPAPR_CAP_ON;
 smc->default_caps.caps[SPAPR_CAP_CCF_ASSIST] = SPAPR_CAP_OFF;
+smc->default_caps.caps[SPAPR_CAP_FWNMI_MCE] = SPAPR_CAP_OFF;
 spapr_caps_add_properties(smc, _abort);
 smc->irq = _irq_dual;
 smc->dr_phb_enabled = true;
diff --git a/hw/ppc/spapr_caps.c b/hw/ppc/spapr_caps.c
index 481dfd2a27..3001098601 100644
--- a/hw/ppc/spapr_caps.c
+++ b/hw/ppc/spapr_caps.c
@@ -496,6 +496,14 @@ static void cap_ccf_assist_apply(SpaprMachineState *spapr, 
uint8_t val,
 }
 }
 
+static void cap_fwnmi_mce_apply(SpaprMachineState *spapr, uint8_t val,
+Error **errp)
+{
+if (!val) {
+return; /* Disabled by default */
+}
+}
+
 SpaprCapabilityInfo capability_table[SPAPR_CAP_NUM] = {
 [SPAPR_CAP_HTM] = {
 .name = "htm",
@@ -595,6 +603,15 @@ SpaprCapabilityInfo capability_table[SPAPR_CAP_NUM] = {
 .type = "bool",
 .apply = cap_ccf_assist_apply,
 },
+[SPAPR_CAP_FWNMI_MCE] = {
+.name = "fwnmi-mce",
+.description = "Handle fwnmi machine check exceptions",
+.index = SPAPR_CAP_FWNMI_MCE,
+.get = spapr_cap_get_bool,
+.set = spapr_cap_set_bool,
+.type = "bool",
+.apply = cap_fwnmi_mce_apply,
+},
 };
 
 static SpaprCapabilities default_caps_with_cpu(SpaprMachineState *spapr,
@@ -734,6 +751,7 @@ SPAPR_CAP_MIG_STATE(hpt_maxpagesize, 
SPAPR_CAP_HPT_MAXPAGESIZE);
 SPAPR_CAP_MIG_STATE(nested_kvm_hv, SPAPR_CAP_NESTED_KVM_HV);
 SPAPR_CAP_MIG_STATE(large_decr, SPAPR_CAP_LARGE_DECREMENTER);
 SPAPR_CAP_MIG_STATE(ccf_assist, SPAPR_CAP_CCF_ASSIST);
+SPAPR_CAP_MIG_STATE(fwnmi, SPAPR_CAP_FWNMI_MCE);
 
 void spapr_caps_init(SpaprMachineState *spapr)
 {
diff --git a/include/hw/ppc/spapr.h b/include/hw/ppc/spapr.h
index 61f005c6f6..7bc5fc3a9e 100644
--- a/include/hw/ppc/spapr.h
+++ b/include/hw/ppc/spapr.h
@@ -79,8 +79,10 @@ typedef enum {
 #define SPAPR_CAP_LARGE_DECREMENTER 0x08
 /* Count Cache Flush Assist HW Instruction */
 #define SPAPR_CAP_CCF_ASSIST0x09
+/* FWNMI machine check handling */
+#define SPAPR_CAP_FWNMI_MCE 0x0A
 /* Num Caps */
-#define SPAPR_CAP_NUM   (SPAPR_CAP_CCF_ASSIST + 1)
+#define SPAPR_CAP_NUM   (SPAPR_CAP_FWNMI_MCE + 1)
 
 /*
  * Capability Values
@@ -869,6 +871,7 @@ extern const VMStateDescription 
vmstate_spapr_cap_hpt_maxpagesize;
 extern const VMStateDescription vmstate_spapr_cap_nested_kvm_hv;
 extern const VMStateDescription vmstate_spapr_cap_large_decr;
 extern const VMStateDescription vmstate_spapr_cap_ccf_assist;
+extern const VMStateDescription vmstate_spapr_cap_fwnmi;
 
 static inline uint8_t spapr_get_cap(SpaprMachineState *spapr, int cap)
 {
diff --git a/target/ppc/kvm.c b/target/ppc/kvm.c
index b5799e62b4..9161eea865 100644
--- a/target/ppc/kvm.c
+++ b/target/ppc/kvm.c
@@ -2054,6 +2054,14 @@ void kvmppc_set_mpic_proxy(PowerPCCPU *cpu, int 
mpic_proxy)
 }
 }
 
+int kvmppc_set_fwnmi(void)
+{
+PowerPCCPU *cpu = POWERPC_CPU(first_cpu);
+CPUState *cs = CPU(cpu);
+
+return kvm_vcpu_enable_cap(cs, KVM_CAP_PPC_FWNMI, 0);
+}
+
 int kvmppc_smt_threads(void)
 {
 return cap_ppc_smt ? cap_ppc_smt : 1;
diff --git a/target/ppc/kvm_ppc.h b/target/ppc/kvm_ppc.h
index b713097bfb..2c60dedd0d 100644
--- a/target/ppc/kvm_ppc.h
+++ b/target/ppc/kvm_ppc.h
@@ -27,6 +27,7 @@ void kvmppc_enable_h_page_init(void);
 void kvmppc_set_papr(PowerPCCPU *cpu);
 int kvmppc_set_compat(PowerPCCPU *cpu, uint32_t compat_pvr);
 void kvmppc_set_mpic_proxy(PowerPCCPU *cpu, int mpic_proxy);
+int kvmppc_set_fwnmi(void);
 int kvmppc_smt_threads(void);
 void kvmppc_error_append_smt_possible_hint(Error *const *errp);
 int kvmppc_set_smt_threads(int smt);
@@ -160,6 +161,11 @@ stati

[PATCH v19 0/7]target-ppc/spapr: Add FWNMI support in QEMU for PowerKM guests

2020-01-16 Thread Ganesh Goudar
This patch set adds support for FWNMI in PowerKVM guests.

System errors such as SLB multihit and memory errors
that cannot be corrected by hardware is passed on to
the kernel for handling by raising machine check
exception (an NMI). Upon such machine check exceptions,
if the address in error belongs to guest then KVM
invokes guests' 0x200 interrupt vector if the guest
is not FWNMI capable. For FWNMI capable guest
KVM passes the control to QEMU by exiting the guest.

This patch series adds functionality to QEMU to pass
on such machine check exceptions to the FWNMI capable
guest kernel by building an error log and invoking
the guest registered machine check handling routine.

The KVM changes are now part of the upstream kernel
(commit e20bbd3d). This series contain QEMU changes.

Change Log v19:
  - Create error object for migration blocker in machine_init().
  - Remove the check to see fwnmi calls are already registered,
which is no longer needed.
  - Register fwnmi RTAS calls in core_rtas_register_types() where
other RTAS calls are registered.
  - Bail out from interlock call if the cap is not set.
  - Reorder and add missing S-O-Bs.

Change Log v18:
  - Dynamically create the Error object before adding it as blocker
  - In apply hook check if the fwnmi calls are already registered and
if kvm supports fwnmi before registering the fwnmi calls.
  - In rtas_ibm_nmi_register() test the feature flag before attempting
to get the RTAS address
  - Introduce a bool member "fwnmi_calls_registered" to check if the
fwnmi calls are registered and use the same in needed hook to save
the state during migration. 

Change Log v17:
  - Add fwnmi cap to migration state
  - Reprhase the commit message in patch 2/7

Change Log v16:
  - Fixed coding style problems

Change Log v15:
  - Removed cap_ppc_fwnmi
  - Moved fwnmi registeration to .apply hook
  - Assume SLOF has allocated enough room for rtas error log
  - Using ARRAY_SIZE to end the loop
  - Do not set FWNMI cap in post_load, now its done in .apply hook

Change Log v14:
  - Feature activation moved to a separate patch
  - Fixed issues with migration blocker

Change Log v13:
  - Minor fixes (mostly nits)
  - Moved FWNMI guest registration check from patch 4 to 3.

Change Log v12:
  - Rebased to latest ppc-for-4.2 (SHA b1e8156743)

Change Log v11:
  - Moved FWNMI SPAPR cap defaults to 4.2 class option
  - Fixed issues with handling fwnmi KVM capability

---

Aravinda Prasad (7):
  Wrapper function to wait on condition for the main loop mutex
  ppc: spapr: Introduce FWNMI capability
  target/ppc: Handle NMI guest exit
  target/ppc: Build rtas error log upon an MCE
  ppc: spapr: Handle "ibm,nmi-register" and "ibm,nmi-interlock" RTAS
calls
  migration: Include migration support for machine check handling
  ppc: spapr: Activate the FWNMI functionality

 cpus.c   |   5 +
 hw/ppc/spapr.c   |  58 +
 hw/ppc/spapr_caps.c  |  25 
 hw/ppc/spapr_events.c| 269 +++
 hw/ppc/spapr_rtas.c  |  89 +
 include/hw/ppc/spapr.h   |  26 +++-
 include/qemu/main-loop.h |   8 ++
 target/ppc/kvm.c |  24 
 target/ppc/kvm_ppc.h |   8 ++
 target/ppc/trace-events  |   1 +
 10 files changed, 511 insertions(+), 2 deletions(-)

-- 
2.17.2




[PATCH v18 3/7] target/ppc: Handle NMI guest exit

2020-01-01 Thread Ganesh Goudar
From: Aravinda Prasad 

Memory error such as bit flips that cannot be corrected
by hardware are passed on to the kernel for handling.
If the memory address in error belongs to guest then
the guest kernel is responsible for taking suitable action.
Patch [1] enhances KVM to exit guest with exit reason
set to KVM_EXIT_NMI in such cases. This patch handles
KVM_EXIT_NMI exit.

[1] https://www.spinics.net/lists/kvm-ppc/msg12637.html
(e20bbd3d and related commits)

Signed-off-by: Aravinda Prasad 
Reviewed-by: David Gibson 
Reviewed-by: Greg Kurz 
---
 hw/ppc/spapr.c  |  8 
 hw/ppc/spapr_events.c   | 37 +
 include/hw/ppc/spapr.h  | 10 ++
 target/ppc/kvm.c| 14 ++
 target/ppc/kvm_ppc.h|  2 ++
 target/ppc/trace-events |  1 +
 6 files changed, 72 insertions(+)

diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c
index c91e64aad0..975d7da734 100644
--- a/hw/ppc/spapr.c
+++ b/hw/ppc/spapr.c
@@ -1679,6 +1679,12 @@ static void spapr_machine_reset(MachineState *machine)
 first_ppc_cpu->env.gpr[5] = 0;
 
 spapr->cas_reboot = false;
+
+spapr->mc_status = -1;
+spapr->guest_machine_check_addr = -1;
+
+/* Signal all vCPUs waiting on this condition */
+qemu_cond_broadcast(>mc_delivery_cond);
 }
 
 static void spapr_create_nvram(SpaprMachineState *spapr)
@@ -2973,6 +2979,8 @@ static void spapr_machine_init(MachineState *machine)
 
 kvmppc_spapr_enable_inkernel_multitce();
 }
+
+qemu_cond_init(>mc_delivery_cond);
 }
 
 static int spapr_kvm_type(MachineState *machine, const char *vm_type)
diff --git a/hw/ppc/spapr_events.c b/hw/ppc/spapr_events.c
index e355e000d0..dfc0de840a 100644
--- a/hw/ppc/spapr_events.c
+++ b/hw/ppc/spapr_events.c
@@ -40,6 +40,7 @@
 #include "hw/ppc/spapr_drc.h"
 #include "qemu/help_option.h"
 #include "qemu/bcd.h"
+#include "qemu/main-loop.h"
 #include "hw/ppc/spapr_ovec.h"
 #include 
 
@@ -622,6 +623,42 @@ void 
spapr_hotplug_req_remove_by_count_indexed(SpaprDrcType drc_type,
 RTAS_LOG_V6_HP_ACTION_REMOVE, drc_type, _id);
 }
 
+void spapr_mce_req_event(PowerPCCPU *cpu)
+{
+SpaprMachineState *spapr = SPAPR_MACHINE(qdev_get_machine());
+CPUState *cs = CPU(cpu);
+
+if (spapr->guest_machine_check_addr == -1) {
+/*
+ * This implies that we have hit a machine check either when the
+ * guest has not registered FWNMI (i.e., "ibm,nmi-register" not
+ * called) or between system reset and "ibm,nmi-register".
+ * Fall back to the old machine check behavior in such cases.
+ */
+cs->exception_index = POWERPC_EXCP_MCHECK;
+ppc_cpu_do_interrupt(cs);
+return;
+}
+
+while (spapr->mc_status != -1) {
+/*
+ * Check whether the same CPU got machine check error
+ * while still handling the mc error (i.e., before
+ * that CPU called "ibm,nmi-interlock")
+ */
+if (spapr->mc_status == cpu->vcpu_id) {
+qemu_system_guest_panicked(NULL);
+return;
+}
+qemu_cond_wait_iothread(>mc_delivery_cond);
+/* Meanwhile if the system is reset, then just return */
+if (spapr->guest_machine_check_addr == -1) {
+return;
+}
+}
+spapr->mc_status = cpu->vcpu_id;
+}
+
 static void check_exception(PowerPCCPU *cpu, SpaprMachineState *spapr,
 uint32_t token, uint32_t nargs,
 target_ulong args,
diff --git a/include/hw/ppc/spapr.h b/include/hw/ppc/spapr.h
index 7bc5fc3a9e..909d3976f9 100644
--- a/include/hw/ppc/spapr.h
+++ b/include/hw/ppc/spapr.h
@@ -191,6 +191,15 @@ struct SpaprMachineState {
  * occurs during the unplug process. */
 QTAILQ_HEAD(, SpaprDimmState) pending_dimm_unplugs;
 
+/* State related to "ibm,nmi-register" and "ibm,nmi-interlock" calls */
+target_ulong guest_machine_check_addr;
+/*
+ * mc_status is set to -1 if mc is not in progress, else is set to the CPU
+ * handling the mc.
+ */
+int mc_status;
+QemuCond mc_delivery_cond;
+
 /*< public >*/
 char *kvm_type;
 char *host_model;
@@ -804,6 +813,7 @@ void spapr_clear_pending_events(SpaprMachineState *spapr);
 int spapr_max_server_number(SpaprMachineState *spapr);
 void spapr_store_hpte(PowerPCCPU *cpu, hwaddr ptex,
   uint64_t pte0, uint64_t pte1);
+void spapr_mce_req_event(PowerPCCPU *cpu);
 
 /* DRC callbacks. */
 void spapr_core_release(DeviceState *dev);
diff --git a/target/ppc/kvm.c b/target/ppc/kvm.c
index 518de7e4b7..2aebb44e2f 100644
--- a/target/ppc/kvm.c
+++ b/target/ppc/kvm.c
@@ -1708,6 +1708,11 @@ int kvm_arch_handle_exit(CPUState *cs, struct kvm_run 
*run)
 ret = 0;
 break;
 
+case KVM_EXIT_NMI:
+trace_kvm_handle_nmi_exception();
+ret = kvm_handle_nmi(cpu, run);
+break;
+
 default:
 fprintf(stderr, "KVM: unknown exit reason %d\n", 

[PATCH v18 7/7] ppc: spapr: Activate the FWNMI functionality

2020-01-01 Thread Ganesh Goudar
From: Aravinda Prasad 

This patch sets the default value of SPAPR_CAP_FWNMI_MCE
to SPAPR_CAP_ON for machine type 4.2.

Signed-off-by: Aravinda Prasad 
---
 hw/ppc/spapr.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c
index 4acdc30100..e705b0f9e9 100644
--- a/hw/ppc/spapr.c
+++ b/hw/ppc/spapr.c
@@ -4450,7 +4450,7 @@ static void spapr_machine_class_init(ObjectClass *oc, 
void *data)
 smc->default_caps.caps[SPAPR_CAP_NESTED_KVM_HV] = SPAPR_CAP_OFF;
 smc->default_caps.caps[SPAPR_CAP_LARGE_DECREMENTER] = SPAPR_CAP_ON;
 smc->default_caps.caps[SPAPR_CAP_CCF_ASSIST] = SPAPR_CAP_OFF;
-smc->default_caps.caps[SPAPR_CAP_FWNMI_MCE] = SPAPR_CAP_OFF;
+smc->default_caps.caps[SPAPR_CAP_FWNMI_MCE] = SPAPR_CAP_ON;
 spapr_caps_add_properties(smc, _abort);
 smc->irq = _irq_dual;
 smc->dr_phb_enabled = true;
@@ -4540,6 +4540,7 @@ static void spapr_machine_4_1_class_options(MachineClass 
*mc)
 smc->smp_threads_vsmt = false;
 compat_props_add(mc->compat_props, hw_compat_4_1, hw_compat_4_1_len);
 compat_props_add(mc->compat_props, compat, G_N_ELEMENTS(compat));
+smc->default_caps.caps[SPAPR_CAP_FWNMI_MCE] = SPAPR_CAP_OFF;
 }
 
 DEFINE_SPAPR_MACHINE(4_1, "4.1", false);
-- 
2.17.2




[PATCH v18 5/7] ppc: spapr: Handle "ibm, nmi-register" and "ibm, nmi-interlock" RTAS calls

2020-01-01 Thread Ganesh Goudar
From: Aravinda Prasad 

This patch adds support in QEMU to handle "ibm,nmi-register"
and "ibm,nmi-interlock" RTAS calls.

The machine check notification address is saved when the
OS issues "ibm,nmi-register" RTAS call.

This patch also handles the case when multiple processors
experience machine check at or about the same time by
handling "ibm,nmi-interlock" call. In such cases, as per
PAPR, subsequent processors serialize waiting for the first
processor to issue the "ibm,nmi-interlock" call. The second
processor that also received a machine check error waits
till the first processor is done reading the error log.
The first processor issues "ibm,nmi-interlock" call
when the error log is consumed.

[Move fwnmi registration to .apply hook]
Signed-off-by: Ganesh Goudar 
Signed-off-by: Aravinda Prasad 
---
 hw/ppc/spapr_caps.c|  6 +
 hw/ppc/spapr_rtas.c| 58 ++
 include/hw/ppc/spapr.h |  7 -
 3 files changed, 70 insertions(+), 1 deletion(-)

diff --git a/hw/ppc/spapr_caps.c b/hw/ppc/spapr_caps.c
index 3001098601..e922419cfb 100644
--- a/hw/ppc/spapr_caps.c
+++ b/hw/ppc/spapr_caps.c
@@ -502,6 +502,12 @@ static void cap_fwnmi_mce_apply(SpaprMachineState *spapr, 
uint8_t val,
 if (!val) {
 return; /* Disabled by default */
 }
+
+if (!spapr->fwnmi_calls_registered && !kvmppc_set_fwnmi()) {
+/* Register ibm,nmi-register and ibm,nmi-interlock RTAS calls */
+spapr_fwnmi_register();
+spapr->fwnmi_calls_registered = true;
+}
 }
 
 SpaprCapabilityInfo capability_table[SPAPR_CAP_NUM] = {
diff --git a/hw/ppc/spapr_rtas.c b/hw/ppc/spapr_rtas.c
index 2c066a372d..54b142f35b 100644
--- a/hw/ppc/spapr_rtas.c
+++ b/hw/ppc/spapr_rtas.c
@@ -400,6 +400,56 @@ static void rtas_get_power_level(PowerPCCPU *cpu, 
SpaprMachineState *spapr,
 rtas_st(rets, 1, 100);
 }
 
+static void rtas_ibm_nmi_register(PowerPCCPU *cpu,
+  SpaprMachineState *spapr,
+  uint32_t token, uint32_t nargs,
+  target_ulong args,
+  uint32_t nret, target_ulong rets)
+{
+hwaddr rtas_addr;
+
+if (spapr_get_cap(spapr, SPAPR_CAP_FWNMI_MCE) == SPAPR_CAP_OFF) {
+rtas_st(rets, 0, RTAS_OUT_NOT_SUPPORTED);
+return;
+}
+
+rtas_addr = spapr_get_rtas_addr();
+if (!rtas_addr) {
+rtas_st(rets, 0, RTAS_OUT_NOT_SUPPORTED);
+return;
+}
+
+spapr->guest_machine_check_addr = rtas_ld(args, 1);
+rtas_st(rets, 0, RTAS_OUT_SUCCESS);
+}
+
+static void rtas_ibm_nmi_interlock(PowerPCCPU *cpu,
+   SpaprMachineState *spapr,
+   uint32_t token, uint32_t nargs,
+   target_ulong args,
+   uint32_t nret, target_ulong rets)
+{
+if (spapr->guest_machine_check_addr == -1) {
+/* NMI register not called */
+rtas_st(rets, 0, RTAS_OUT_PARAM_ERROR);
+return;
+}
+
+if (spapr->mc_status != cpu->vcpu_id) {
+/* The vCPU that hit the NMI should invoke "ibm,nmi-interlock" */
+rtas_st(rets, 0, RTAS_OUT_PARAM_ERROR);
+return;
+}
+
+/*
+ * vCPU issuing "ibm,nmi-interlock" is done with NMI handling,
+ * hence unset mc_status.
+ */
+spapr->mc_status = -1;
+qemu_cond_signal(>mc_delivery_cond);
+rtas_st(rets, 0, RTAS_OUT_SUCCESS);
+}
+
 static struct rtas_call {
 const char *name;
 spapr_rtas_fn fn;
@@ -503,6 +553,14 @@ hwaddr spapr_get_rtas_addr(void)
 return (hwaddr)fdt32_to_cpu(*rtas_data);
 }
 
+void spapr_fwnmi_register(void)
+{
+spapr_rtas_register(RTAS_IBM_NMI_REGISTER, "ibm,nmi-register",
+rtas_ibm_nmi_register);
+spapr_rtas_register(RTAS_IBM_NMI_INTERLOCK, "ibm,nmi-interlock",
+rtas_ibm_nmi_interlock);
+}
+
 static void core_rtas_register_types(void)
 {
 spapr_rtas_register(RTAS_DISPLAY_CHARACTER, "display-character",
diff --git a/include/hw/ppc/spapr.h b/include/hw/ppc/spapr.h
index 652a5514e8..a90e677cc3 100644
--- a/include/hw/ppc/spapr.h
+++ b/include/hw/ppc/spapr.h
@@ -218,6 +218,8 @@ struct SpaprMachineState {
 
 unsigned gpu_numa_id;
 SpaprTpmProxy *tpm_proxy;
+
+bool fwnmi_calls_registered;
 };
 
 #define H_SUCCESS 0
@@ -656,8 +658,10 @@ target_ulong spapr_hypercall(PowerPCCPU *cpu, target_ulong 
opcode,
 #define RTAS_IBM_REMOVE_PE_DMA_WINDOW   (RTAS_TOKEN_BASE + 0x28)
 #define RTAS_IBM_RESET_PE_DMA_WINDOW(RTAS_TOKEN_BASE + 0x29)
 #define RTAS_IBM_SUSPEND_ME (RTAS_TOKEN_BASE + 0x2A)
+#define RTAS_IBM_NMI_REGISTER   (RTAS_TOKEN_BASE + 0x2B)
+#define RTAS_IBM_NMI_INTERLOCK

[PATCH v18 1/7] Wrapper function to wait on condition for the main loop mutex

2020-01-01 Thread Ganesh Goudar
From: Aravinda Prasad 

Introduce a wrapper function to wait on condition for
the main loop mutex. This function atomically releases
the main loop mutex and causes the calling thread to
block on the condition. This wrapper is required because
qemu_global_mutex is a static variable.

Signed-off-by: Aravinda Prasad 
Reviewed-by: David Gibson 
Reviewed-by: Greg Kurz 
---
 cpus.c   | 5 +
 include/qemu/main-loop.h | 8 
 2 files changed, 13 insertions(+)

diff --git a/cpus.c b/cpus.c
index b472378b70..79388d2b0f 100644
--- a/cpus.c
+++ b/cpus.c
@@ -1835,6 +1835,11 @@ void qemu_mutex_unlock_iothread(void)
 qemu_mutex_unlock(_global_mutex);
 }
 
+void qemu_cond_wait_iothread(QemuCond *cond)
+{
+qemu_cond_wait(cond, _global_mutex);
+}
+
 static bool all_vcpus_paused(void)
 {
 CPUState *cpu;
diff --git a/include/qemu/main-loop.h b/include/qemu/main-loop.h
index f6ba78ea73..a6d20b0719 100644
--- a/include/qemu/main-loop.h
+++ b/include/qemu/main-loop.h
@@ -295,6 +295,14 @@ void qemu_mutex_lock_iothread_impl(const char *file, int 
line);
  */
 void qemu_mutex_unlock_iothread(void);
 
+/*
+ * qemu_cond_wait_iothread: Wait on condition for the main loop mutex
+ *
+ * This function atomically releases the main loop mutex and causes
+ * the calling thread to block on the condition.
+ */
+void qemu_cond_wait_iothread(QemuCond *cond);
+
 /* internal interfaces */
 
 void qemu_fd_register(int fd);
-- 
2.17.2




[PATCH v18 6/7] migration: Include migration support for machine check handling

2020-01-01 Thread Ganesh Goudar
From: Aravinda Prasad 

This patch includes migration support for machine check
handling. Especially this patch blocks VM migration
requests until the machine check error handling is
complete as these errors are specific to the source
hardware and is irrelevant on the target hardware.

[Do not set FWNMI cap in post_load, now its done in .apply hook]
Signed-off-by: Ganesh Goudar 
Signed-off-by: Aravinda Prasad 
---
 hw/ppc/spapr.c | 41 +
 hw/ppc/spapr_events.c  | 20 +++-
 hw/ppc/spapr_rtas.c|  4 
 include/hw/ppc/spapr.h |  1 +
 4 files changed, 65 insertions(+), 1 deletion(-)

diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c
index 975d7da734..4acdc30100 100644
--- a/hw/ppc/spapr.c
+++ b/hw/ppc/spapr.c
@@ -46,6 +46,7 @@
 #include "migration/qemu-file-types.h"
 #include "migration/global_state.h"
 #include "migration/register.h"
+#include "migration/blocker.h"
 #include "mmu-hash64.h"
 #include "mmu-book3s-v3.h"
 #include "cpu-models.h"
@@ -1685,6 +1686,8 @@ static void spapr_machine_reset(MachineState *machine)
 
 /* Signal all vCPUs waiting on this condition */
 qemu_cond_broadcast(>mc_delivery_cond);
+
+migrate_del_blocker(spapr->fwnmi_migration_blocker);
 }
 
 static void spapr_create_nvram(SpaprMachineState *spapr)
@@ -1967,6 +1970,43 @@ static const VMStateDescription vmstate_spapr_dtb = {
 },
 };
 
+static bool spapr_fwnmi_needed(void *opaque)
+{
+SpaprMachineState *spapr = (SpaprMachineState *)opaque;
+
+return spapr->fwnmi_calls_registered;
+}
+
+static int spapr_fwnmi_pre_save(void *opaque)
+{
+SpaprMachineState *spapr = (SpaprMachineState *)opaque;
+
+/*
+ * Check if machine check handling is in progress and print a
+ * warning message.
+ */
+if (spapr->mc_status != -1) {
+warn_report("A machine check is being handled during migration. The"
+"handler may run and log hardware error on the destination");
+}
+
+return 0;
+}
+
+static const VMStateDescription vmstate_spapr_machine_check = {
+.name = "spapr_machine_check",
+.version_id = 1,
+.minimum_version_id = 1,
+.needed = spapr_fwnmi_needed,
+.pre_save = spapr_fwnmi_pre_save,
+.fields = (VMStateField[]) {
+VMSTATE_UINT64(guest_machine_check_addr, SpaprMachineState),
+VMSTATE_INT32(mc_status, SpaprMachineState),
+VMSTATE_BOOL(fwnmi_calls_registered, SpaprMachineState),
+VMSTATE_END_OF_LIST()
+},
+};
+
 static const VMStateDescription vmstate_spapr = {
 .name = "spapr",
 .version_id = 3,
@@ -2001,6 +2041,7 @@ static const VMStateDescription vmstate_spapr = {
 _spapr_cap_large_decr,
 _spapr_cap_ccf_assist,
 _spapr_cap_fwnmi,
+_spapr_machine_check,
 NULL
 }
 };
diff --git a/hw/ppc/spapr_events.c b/hw/ppc/spapr_events.c
index 54eaf28a9e..7092687fa0 100644
--- a/hw/ppc/spapr_events.c
+++ b/hw/ppc/spapr_events.c
@@ -43,6 +43,7 @@
 #include "qemu/main-loop.h"
 #include "hw/ppc/spapr_ovec.h"
 #include 
+#include "migration/blocker.h"
 
 #define RTAS_LOG_VERSION_MASK   0xff00
 #define   RTAS_LOG_VERSION_60x0600
@@ -843,6 +844,8 @@ void spapr_mce_req_event(PowerPCCPU *cpu, bool recovered)
 {
 SpaprMachineState *spapr = SPAPR_MACHINE(qdev_get_machine());
 CPUState *cs = CPU(cpu);
+int ret;
+Error *local_err = NULL;
 
 if (spapr->guest_machine_check_addr == -1) {
 /*
@@ -872,8 +875,23 @@ void spapr_mce_req_event(PowerPCCPU *cpu, bool recovered)
 return;
 }
 }
-spapr->mc_status = cpu->vcpu_id;
 
+error_setg(>fwnmi_migration_blocker,
+   "Live migration not supported during machine check handling");
+ret = migrate_add_blocker(spapr->fwnmi_migration_blocker, _err);
+if (ret == -EBUSY) {
+/*
+ * We don't want to abort so we let the migration to continue.
+ * In a rare case, the machine check handler will run on the target.
+ * Though this is not preferable, it is better than aborting
+ * the migration or killing the VM.
+ */
+error_free(spapr->fwnmi_migration_blocker);
+spapr->fwnmi_migration_blocker = NULL;
+warn_report("Received a fwnmi while migration was in progress");
+}
+
+spapr->mc_status = cpu->vcpu_id;
 spapr_mce_dispatch_elog(cpu, recovered);
 }
 
diff --git a/hw/ppc/spapr_rtas.c b/hw/ppc/spapr_rtas.c
index 54b142f35b..3409f6b896 100644
--- a/hw/ppc/spapr_rtas.c
+++ b/hw/ppc/spapr_rtas.c
@@ -50,6 +50,7 @@
 #include "hw/ppc/fdt.h"
 #include "target/ppc/mmu-hash64.h"
 #include "target/ppc/mmu-book3s-v3.h"
+#include "migration/blocker.h"
 
 stati

[PATCH v18 2/7] ppc: spapr: Introduce FWNMI capability

2020-01-01 Thread Ganesh Goudar
From: Aravinda Prasad 

Introduce fwnmi an spapr capability and validate it against
the kernels existing capability by trying to enable it.

[eliminate cap_ppc_fwnmi, add fwnmi cap to migration state
 and reprhase the commit message]
Signed-off-by: Ganesh Goudar 
Signed-off-by: Aravinda Prasad 
---
 hw/ppc/spapr.c |  2 ++
 hw/ppc/spapr_caps.c| 18 ++
 include/hw/ppc/spapr.h |  5 -
 target/ppc/kvm.c   |  8 
 target/ppc/kvm_ppc.h   |  6 ++
 5 files changed, 38 insertions(+), 1 deletion(-)

diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c
index f11422fc41..c91e64aad0 100644
--- a/hw/ppc/spapr.c
+++ b/hw/ppc/spapr.c
@@ -1994,6 +1994,7 @@ static const VMStateDescription vmstate_spapr = {
 _spapr_dtb,
 _spapr_cap_large_decr,
 _spapr_cap_ccf_assist,
+_spapr_cap_fwnmi,
 NULL
 }
 };
@@ -4400,6 +4401,7 @@ static void spapr_machine_class_init(ObjectClass *oc, 
void *data)
 smc->default_caps.caps[SPAPR_CAP_NESTED_KVM_HV] = SPAPR_CAP_OFF;
 smc->default_caps.caps[SPAPR_CAP_LARGE_DECREMENTER] = SPAPR_CAP_ON;
 smc->default_caps.caps[SPAPR_CAP_CCF_ASSIST] = SPAPR_CAP_OFF;
+smc->default_caps.caps[SPAPR_CAP_FWNMI_MCE] = SPAPR_CAP_OFF;
 spapr_caps_add_properties(smc, _abort);
 smc->irq = _irq_dual;
 smc->dr_phb_enabled = true;
diff --git a/hw/ppc/spapr_caps.c b/hw/ppc/spapr_caps.c
index 481dfd2a27..3001098601 100644
--- a/hw/ppc/spapr_caps.c
+++ b/hw/ppc/spapr_caps.c
@@ -496,6 +496,14 @@ static void cap_ccf_assist_apply(SpaprMachineState *spapr, 
uint8_t val,
 }
 }
 
+static void cap_fwnmi_mce_apply(SpaprMachineState *spapr, uint8_t val,
+Error **errp)
+{
+if (!val) {
+return; /* Disabled by default */
+}
+}
+
 SpaprCapabilityInfo capability_table[SPAPR_CAP_NUM] = {
 [SPAPR_CAP_HTM] = {
 .name = "htm",
@@ -595,6 +603,15 @@ SpaprCapabilityInfo capability_table[SPAPR_CAP_NUM] = {
 .type = "bool",
 .apply = cap_ccf_assist_apply,
 },
+[SPAPR_CAP_FWNMI_MCE] = {
+.name = "fwnmi-mce",
+.description = "Handle fwnmi machine check exceptions",
+.index = SPAPR_CAP_FWNMI_MCE,
+.get = spapr_cap_get_bool,
+.set = spapr_cap_set_bool,
+.type = "bool",
+.apply = cap_fwnmi_mce_apply,
+},
 };
 
 static SpaprCapabilities default_caps_with_cpu(SpaprMachineState *spapr,
@@ -734,6 +751,7 @@ SPAPR_CAP_MIG_STATE(hpt_maxpagesize, 
SPAPR_CAP_HPT_MAXPAGESIZE);
 SPAPR_CAP_MIG_STATE(nested_kvm_hv, SPAPR_CAP_NESTED_KVM_HV);
 SPAPR_CAP_MIG_STATE(large_decr, SPAPR_CAP_LARGE_DECREMENTER);
 SPAPR_CAP_MIG_STATE(ccf_assist, SPAPR_CAP_CCF_ASSIST);
+SPAPR_CAP_MIG_STATE(fwnmi, SPAPR_CAP_FWNMI_MCE);
 
 void spapr_caps_init(SpaprMachineState *spapr)
 {
diff --git a/include/hw/ppc/spapr.h b/include/hw/ppc/spapr.h
index 61f005c6f6..7bc5fc3a9e 100644
--- a/include/hw/ppc/spapr.h
+++ b/include/hw/ppc/spapr.h
@@ -79,8 +79,10 @@ typedef enum {
 #define SPAPR_CAP_LARGE_DECREMENTER 0x08
 /* Count Cache Flush Assist HW Instruction */
 #define SPAPR_CAP_CCF_ASSIST0x09
+/* FWNMI machine check handling */
+#define SPAPR_CAP_FWNMI_MCE 0x0A
 /* Num Caps */
-#define SPAPR_CAP_NUM   (SPAPR_CAP_CCF_ASSIST + 1)
+#define SPAPR_CAP_NUM   (SPAPR_CAP_FWNMI_MCE + 1)
 
 /*
  * Capability Values
@@ -869,6 +871,7 @@ extern const VMStateDescription 
vmstate_spapr_cap_hpt_maxpagesize;
 extern const VMStateDescription vmstate_spapr_cap_nested_kvm_hv;
 extern const VMStateDescription vmstate_spapr_cap_large_decr;
 extern const VMStateDescription vmstate_spapr_cap_ccf_assist;
+extern const VMStateDescription vmstate_spapr_cap_fwnmi;
 
 static inline uint8_t spapr_get_cap(SpaprMachineState *spapr, int cap)
 {
diff --git a/target/ppc/kvm.c b/target/ppc/kvm.c
index d1c334f0e3..518de7e4b7 100644
--- a/target/ppc/kvm.c
+++ b/target/ppc/kvm.c
@@ -2060,6 +2060,14 @@ void kvmppc_set_mpic_proxy(PowerPCCPU *cpu, int 
mpic_proxy)
 }
 }
 
+int kvmppc_set_fwnmi(void)
+{
+PowerPCCPU *cpu = POWERPC_CPU(first_cpu);
+CPUState *cs = CPU(cpu);
+
+return kvm_vcpu_enable_cap(cs, KVM_CAP_PPC_FWNMI, 0);
+}
+
 int kvmppc_smt_threads(void)
 {
 return cap_ppc_smt ? cap_ppc_smt : 1;
diff --git a/target/ppc/kvm_ppc.h b/target/ppc/kvm_ppc.h
index f22daabf51..eedb15d48e 100644
--- a/target/ppc/kvm_ppc.h
+++ b/target/ppc/kvm_ppc.h
@@ -27,6 +27,7 @@ void kvmppc_enable_h_page_init(void);
 void kvmppc_set_papr(PowerPCCPU *cpu);
 int kvmppc_set_compat(PowerPCCPU *cpu, uint32_t compat_pvr);
 void kvmppc_set_mpic_proxy(PowerPCCPU *cpu, int mpic_proxy);
+int kvmppc_set_fwnmi(void);
 int kvmppc_smt_threads(void);
 void kvmppc_error_append_smt_possible_hint(Error *const *errp);
 int kvmppc_set_smt_threads(int smt);
@@ -159,6 +160,11 @@ static inline void kvmppc_set_mpic_proxy(PowerPCCPU *cpu, 
int mpic_proxy)
 {
 }
 
+static inli

[PATCH v18 0/7] target-ppc/spapr: Add FWNMI support in QEMU for PowerKM guests

2020-01-01 Thread Ganesh Goudar
This patch set adds support for FWNMI in PowerKVM guests.

System errors such as SLB multihit and memory errors
that cannot be corrected by hardware is passed on to
the kernel for handling by raising machine check
exception (an NMI). Upon such machine check exceptions,
if the address in error belongs to guest then KVM
invokes guests' 0x200 interrupt vector if the guest
is not FWNMI capable. For FWNMI capable guest
KVM passes the control to QEMU by exiting the guest.

This patch series adds functionality to QEMU to pass
on such machine check exceptions to the FWNMI capable
guest kernel by building an error log and invoking
the guest registered machine check handling routine.

The KVM changes are now part of the upstream kernel
(commit e20bbd3d). This series contain QEMU changes.

Change Log v18:
  - Dynamically create the Error object before adding it as blocker
  - In apply hook check if the fwnmi calls are already registered and
if kvm supports fwnmi before registering the fwnmi calls.
  - In rtas_ibm_nmi_register() test the feature flag before attempting
to get the RTAS address
  - Introduce a bool member "fwnmi_calls_registered" to check if the
fwnmi calls are registered and use the same in needed hook to save
the state during migration. 

Change Log v17:
  - Add fwnmi cap to migration state
  - Reprhase the commit message in patch 2/7

Change Log v16:
  - Fixed coding style problems

Change Log v15:
  - Removed cap_ppc_fwnmi
  - Moved fwnmi registeration to .apply hook
  - Assume SLOF has allocated enough room for rtas error log
  - Using ARRAY_SIZE to end the loop
  - Do not set FWNMI cap in post_load, now its done in .apply hook

Change Log v14:
  - Feature activation moved to a separate patch
  - Fixed issues with migration blocker

Change Log v13:
  - Minor fixes (mostly nits)
  - Moved FWNMI guest registration check from patch 4 to 3.

Change Log v12:
  - Rebased to latest ppc-for-4.2 (SHA b1e8156743)

Change Log v11:
  - Moved FWNMI SPAPR cap defaults to 4.2 class option
  - Fixed issues with handling fwnmi KVM capability

Change Log v10:
  - Reshuffled the patch sequence + minor fixes

Change Log v9:
  - Fixed kvm cap and spapr cap issues

Change Log v8:
  - Added functionality to check FWNMI capability during
VM migration

---

Aravinda Prasad (7):
  Wrapper function to wait on condition for the main loop mutex
  ppc: spapr: Introduce FWNMI capability
  target/ppc: Handle NMI guest exit
  target/ppc: Build rtas error log upon an MCE
  ppc: spapr: Handle "ibm,nmi-register" and "ibm,nmi-interlock" RTAS
calls
  migration: Include migration support for machine check handling
  ppc: spapr: Activate the FWNMI functionality

 cpus.c   |   5 +
 hw/ppc/spapr.c   |  52 
 hw/ppc/spapr_caps.c  |  24 
 hw/ppc/spapr_events.c| 273 +++
 hw/ppc/spapr_rtas.c  |  88 +
 include/hw/ppc/spapr.h   |  27 +++-
 include/qemu/main-loop.h |   8 ++
 target/ppc/kvm.c |  24 
 target/ppc/kvm_ppc.h |   8 ++
 target/ppc/trace-events  |   1 +
 10 files changed, 508 insertions(+), 2 deletions(-)

-- 
2.24.0.155.gd9f6f3b619




[PATCH v18 4/7] target/ppc: Build rtas error log upon an MCE

2020-01-01 Thread Ganesh Goudar
From: Aravinda Prasad 

Upon a machine check exception (MCE) in a guest address space,
KVM causes a guest exit to enable QEMU to build and pass the
error to the guest in the PAPR defined rtas error log format.

This patch builds the rtas error log, copies it to the rtas_addr
and then invokes the guest registered machine check handler. The
handler in the guest takes suitable action(s) depending on the type
and criticality of the error. For example, if an error is
unrecoverable memory corruption in an application inside the
guest, then the guest kernel sends a SIGBUS to the application.
For recoverable errors, the guest performs recovery actions and
logs the error.

[Assume SLOF has allocated enough room for rtas error log]
Signed-off-by: Ganesh Goudar 
Signed-off-by: Aravinda Prasad 
Reviewed-by: David Gibson 
---
 hw/ppc/spapr_events.c  | 220 -
 hw/ppc/spapr_rtas.c|  26 +
 include/hw/ppc/spapr.h |   6 +-
 target/ppc/kvm.c   |   4 +-
 4 files changed, 253 insertions(+), 3 deletions(-)

diff --git a/hw/ppc/spapr_events.c b/hw/ppc/spapr_events.c
index dfc0de840a..54eaf28a9e 100644
--- a/hw/ppc/spapr_events.c
+++ b/hw/ppc/spapr_events.c
@@ -214,6 +214,104 @@ struct hp_extended_log {
 struct rtas_event_log_v6_hp hp;
 } QEMU_PACKED;
 
+struct rtas_event_log_v6_mc {
+#define RTAS_LOG_V6_SECTION_ID_MC   0x4D43 /* MC */
+struct rtas_event_log_v6_section_header hdr;
+uint32_t fru_id;
+uint32_t proc_id;
+uint8_t error_type;
+#define RTAS_LOG_V6_MC_TYPE_UE   0
+#define RTAS_LOG_V6_MC_TYPE_SLB  1
+#define RTAS_LOG_V6_MC_TYPE_ERAT 2
+#define RTAS_LOG_V6_MC_TYPE_TLB  4
+#define RTAS_LOG_V6_MC_TYPE_D_CACHE  5
+#define RTAS_LOG_V6_MC_TYPE_I_CACHE  7
+uint8_t sub_err_type;
+#define RTAS_LOG_V6_MC_UE_INDETERMINATE  0
+#define RTAS_LOG_V6_MC_UE_IFETCH 1
+#define RTAS_LOG_V6_MC_UE_PAGE_TABLE_WALK_IFETCH 2
+#define RTAS_LOG_V6_MC_UE_LOAD_STORE 3
+#define RTAS_LOG_V6_MC_UE_PAGE_TABLE_WALK_LOAD_STORE 4
+#define RTAS_LOG_V6_MC_SLB_PARITY0
+#define RTAS_LOG_V6_MC_SLB_MULTIHIT  1
+#define RTAS_LOG_V6_MC_SLB_INDETERMINATE 2
+#define RTAS_LOG_V6_MC_ERAT_PARITY   1
+#define RTAS_LOG_V6_MC_ERAT_MULTIHIT 2
+#define RTAS_LOG_V6_MC_ERAT_INDETERMINATE3
+#define RTAS_LOG_V6_MC_TLB_PARITY1
+#define RTAS_LOG_V6_MC_TLB_MULTIHIT  2
+#define RTAS_LOG_V6_MC_TLB_INDETERMINATE 3
+uint8_t reserved_1[6];
+uint64_t effective_address;
+uint64_t logical_address;
+} QEMU_PACKED;
+
+struct mc_extended_log {
+struct rtas_event_log_v6 v6hdr;
+struct rtas_event_log_v6_mc mc;
+} QEMU_PACKED;
+
+struct MC_ierror_table {
+unsigned long srr1_mask;
+unsigned long srr1_value;
+bool nip_valid; /* nip is a valid indicator of faulting address */
+uint8_t error_type;
+uint8_t error_subtype;
+unsigned int initiator;
+unsigned int severity;
+};
+
+static const struct MC_ierror_table mc_ierror_table[] = {
+{ 0x081c, 0x0004, true,
+  RTAS_LOG_V6_MC_TYPE_UE, RTAS_LOG_V6_MC_UE_IFETCH,
+  RTAS_LOG_INITIATOR_CPU, RTAS_LOG_SEVERITY_ERROR_SYNC, },
+{ 0x081c, 0x0008, true,
+  RTAS_LOG_V6_MC_TYPE_SLB, RTAS_LOG_V6_MC_SLB_PARITY,
+  RTAS_LOG_INITIATOR_CPU, RTAS_LOG_SEVERITY_ERROR_SYNC, },
+{ 0x081c, 0x000c, true,
+  RTAS_LOG_V6_MC_TYPE_SLB, RTAS_LOG_V6_MC_SLB_MULTIHIT,
+  RTAS_LOG_INITIATOR_CPU, RTAS_LOG_SEVERITY_ERROR_SYNC, },
+{ 0x081c, 0x0010, true,
+  RTAS_LOG_V6_MC_TYPE_ERAT, RTAS_LOG_V6_MC_ERAT_MULTIHIT,
+  RTAS_LOG_INITIATOR_CPU, RTAS_LOG_SEVERITY_ERROR_SYNC, },
+{ 0x081c, 0x0014, true,
+  RTAS_LOG_V6_MC_TYPE_TLB, RTAS_LOG_V6_MC_TLB_MULTIHIT,
+  RTAS_LOG_INITIATOR_CPU, RTAS_LOG_SEVERITY_ERROR_SYNC, },
+{ 0x081c, 0x0018, true,
+  RTAS_LOG_V6_MC_TYPE_UE, RTAS_LOG_V6_MC_UE_PAGE_TABLE_WALK_IFETCH,
+  RTAS_LOG_INITIATOR_CPU, RTAS_LOG_SEVERITY_ERROR_SYNC, } };
+
+struct MC_derror_table {
+unsigned long dsisr_value;
+bool dar_valid; /* dar is a valid indicator of faulting address */
+uint8_t error_type;
+uint8_t error_subtype;
+unsigned int initiator;
+unsigned int severity;
+};
+
+static const struct MC_derror_table mc_derror_table[] = {
+{ 0x8000, false,
+  RTAS_LOG_V6_MC_TYPE_UE, RTAS_LOG_V6_MC_UE_LOAD_STORE,
+  RTAS_LOG_INITIATOR_CPU, RTAS_LOG_SEVERITY_ERROR_SYNC, },
+{ 0x4000, true,
+  RTAS_LOG_V6_MC_TYPE_UE, RTAS_LOG_V6_MC_UE_PAGE_TABLE_WALK_LOAD_STORE,
+  RTAS_LOG_INITIATOR_CPU, RTAS_LOG_SEVERITY_ERROR_SYNC, },
+{ 0x0800, true,
+  RTAS_LOG_V6_MC_TYPE_ERAT, RTAS_LOG_V6_MC_ERAT_MULTIHIT

[PATCH v17 5/7] ppc: spapr: Handle "ibm, nmi-register" and "ibm, nmi-interlock" RTAS calls

2019-10-24 Thread Ganesh Goudar
From: Aravinda Prasad 

This patch adds support in QEMU to handle "ibm,nmi-register"
and "ibm,nmi-interlock" RTAS calls.

The machine check notification address is saved when the
OS issues "ibm,nmi-register" RTAS call.

This patch also handles the case when multiple processors
experience machine check at or about the same time by
handling "ibm,nmi-interlock" call. In such cases, as per
PAPR, subsequent processors serialize waiting for the first
processor to issue the "ibm,nmi-interlock" call. The second
processor that also received a machine check error waits
till the first processor is done reading the error log.
The first processor issues "ibm,nmi-interlock" call
when the error log is consumed.

[Move fwnmi registeration to .apply hook]
Signed-off-by: Ganesh Goudar 
Signed-off-by: Aravinda Prasad 
---
 hw/ppc/spapr_caps.c|  9 +--
 hw/ppc/spapr_rtas.c| 57 ++
 include/hw/ppc/spapr.h |  5 +++-
 3 files changed, 68 insertions(+), 3 deletions(-)

diff --git a/hw/ppc/spapr_caps.c b/hw/ppc/spapr_caps.c
index 976d709210..1675ebd45e 100644
--- a/hw/ppc/spapr_caps.c
+++ b/hw/ppc/spapr_caps.c
@@ -509,9 +509,14 @@ static void cap_fwnmi_mce_apply(SpaprMachineState *spapr, 
uint8_t val,
  * of software injected faults like duplicate SLBs).
  */
 warn_report("Firmware Assisted Non-Maskable Interrupts not supported 
in TCG");
-} else if (kvm_enabled() && (kvmppc_set_fwnmi() != 0)) {
-error_setg(errp,
+} else if (kvm_enabled()) {
+if (!kvmppc_set_fwnmi()) {
+/* Register ibm,nmi-register and ibm,nmi-interlock RTAS calls */
+spapr_fwnmi_register();
+} else {
+error_setg(errp,
 "Firmware Assisted Non-Maskable Interrupts not supported by KVM, try 
cap-fwnmi-mce=off");
+}
 }
 }
 
diff --git a/hw/ppc/spapr_rtas.c b/hw/ppc/spapr_rtas.c
index 2c066a372d..0328b1f341 100644
--- a/hw/ppc/spapr_rtas.c
+++ b/hw/ppc/spapr_rtas.c
@@ -400,6 +400,55 @@ static void rtas_get_power_level(PowerPCCPU *cpu, 
SpaprMachineState *spapr,
 rtas_st(rets, 1, 100);
 }
 
+static void rtas_ibm_nmi_register(PowerPCCPU *cpu,
+  SpaprMachineState *spapr,
+  uint32_t token, uint32_t nargs,
+  target_ulong args,
+  uint32_t nret, target_ulong rets)
+{
+hwaddr rtas_addr = spapr_get_rtas_addr();
+
+if (!rtas_addr) {
+rtas_st(rets, 0, RTAS_OUT_NOT_SUPPORTED);
+return;
+}
+
+if (spapr_get_cap(spapr, SPAPR_CAP_FWNMI_MCE) == SPAPR_CAP_OFF) {
+rtas_st(rets, 0, RTAS_OUT_NOT_SUPPORTED);
+return;
+}
+
+spapr->guest_machine_check_addr = rtas_ld(args, 1);
+rtas_st(rets, 0, RTAS_OUT_SUCCESS);
+}
+
+static void rtas_ibm_nmi_interlock(PowerPCCPU *cpu,
+   SpaprMachineState *spapr,
+   uint32_t token, uint32_t nargs,
+   target_ulong args,
+   uint32_t nret, target_ulong rets)
+{
+if (spapr->guest_machine_check_addr == -1) {
+/* NMI register not called */
+rtas_st(rets, 0, RTAS_OUT_PARAM_ERROR);
+return;
+}
+
+if (spapr->mc_status != cpu->vcpu_id) {
+/* The vCPU that hit the NMI should invoke "ibm,nmi-interlock" */
+rtas_st(rets, 0, RTAS_OUT_PARAM_ERROR);
+return;
+}
+
+/*
+ * vCPU issuing "ibm,nmi-interlock" is done with NMI handling,
+ * hence unset mc_status.
+ */
+spapr->mc_status = -1;
+qemu_cond_signal(>mc_delivery_cond);
+rtas_st(rets, 0, RTAS_OUT_SUCCESS);
+}
+
 static struct rtas_call {
 const char *name;
 spapr_rtas_fn fn;
@@ -503,6 +552,14 @@ hwaddr spapr_get_rtas_addr(void)
 return (hwaddr)fdt32_to_cpu(*rtas_data);
 }
 
+void spapr_fwnmi_register(void)
+{
+spapr_rtas_register(RTAS_IBM_NMI_REGISTER, "ibm,nmi-register",
+rtas_ibm_nmi_register);
+spapr_rtas_register(RTAS_IBM_NMI_INTERLOCK, "ibm,nmi-interlock",
+rtas_ibm_nmi_interlock);
+}
+
 static void core_rtas_register_types(void)
 {
 spapr_rtas_register(RTAS_DISPLAY_CHARACTER, "display-character",
diff --git a/include/hw/ppc/spapr.h b/include/hw/ppc/spapr.h
index 4afa8d4d09..86f0fc8fdd 100644
--- a/include/hw/ppc/spapr.h
+++ b/include/hw/ppc/spapr.h
@@ -653,8 +653,10 @@ target_ulong spapr_hypercall(PowerPCCPU *cpu, target_ulong 
opcode,
 #define RTAS_IBM_REMOVE_PE_DMA_WINDOW   (RTAS_TOKEN_BASE + 0x28)
 #define RTAS_IBM_RESET_PE_DMA_WINDOW(RTAS_TOKEN_BASE + 0x29)
 #define RTAS_IBM_SUSPEND_ME (RTAS_TOKEN_BASE + 0x2A)
+#define RTAS_IBM_NMI_REGISTER   (RTAS_TOKEN_BASE + 0x2B)
+#

[PATCH v17 2/7] ppc: spapr: Introduce FWNMI capability

2019-10-24 Thread Ganesh Goudar
From: Aravinda Prasad 

Introduce fwnmi an spapr capability and validate it against
the kernels existing capability by trying to enable it.

[eliminate cap_ppc_fwnmi, add fwnmi cap to migration state
 and reprhase the commit message]
Signed-off-by: Ganesh Goudar 
Signed-off-by: Aravinda Prasad 
---
 hw/ppc/spapr.c |  2 ++
 hw/ppc/spapr_caps.c| 29 +
 include/hw/ppc/spapr.h |  5 -
 target/ppc/kvm.c   |  8 
 target/ppc/kvm_ppc.h   |  6 ++
 5 files changed, 49 insertions(+), 1 deletion(-)

diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c
index 4eb97d3a9b..31a3fb72d4 100644
--- a/hw/ppc/spapr.c
+++ b/hw/ppc/spapr.c
@@ -2068,6 +2068,7 @@ static const VMStateDescription vmstate_spapr = {
 _spapr_dtb,
 _spapr_cap_large_decr,
 _spapr_cap_ccf_assist,
+_spapr_cap_fwnmi,
 NULL
 }
 };
@@ -4434,6 +4435,7 @@ static void spapr_machine_class_init(ObjectClass *oc, 
void *data)
 smc->default_caps.caps[SPAPR_CAP_NESTED_KVM_HV] = SPAPR_CAP_OFF;
 smc->default_caps.caps[SPAPR_CAP_LARGE_DECREMENTER] = SPAPR_CAP_ON;
 smc->default_caps.caps[SPAPR_CAP_CCF_ASSIST] = SPAPR_CAP_OFF;
+smc->default_caps.caps[SPAPR_CAP_FWNMI_MCE] = SPAPR_CAP_OFF;
 spapr_caps_add_properties(smc, _abort);
 smc->irq = _irq_dual;
 smc->dr_phb_enabled = true;
diff --git a/hw/ppc/spapr_caps.c b/hw/ppc/spapr_caps.c
index 481dfd2a27..976d709210 100644
--- a/hw/ppc/spapr_caps.c
+++ b/hw/ppc/spapr_caps.c
@@ -496,6 +496,25 @@ static void cap_ccf_assist_apply(SpaprMachineState *spapr, 
uint8_t val,
 }
 }
 
+static void cap_fwnmi_mce_apply(SpaprMachineState *spapr, uint8_t val,
+Error **errp)
+{
+if (!val) {
+return; /* Disabled by default */
+}
+
+if (tcg_enabled()) {
+/*
+ * TCG support may not be correct in some conditions (e.g., in case
+ * of software injected faults like duplicate SLBs).
+ */
+warn_report("Firmware Assisted Non-Maskable Interrupts not supported 
in TCG");
+} else if (kvm_enabled() && (kvmppc_set_fwnmi() != 0)) {
+error_setg(errp,
+"Firmware Assisted Non-Maskable Interrupts not supported by KVM, try 
cap-fwnmi-mce=off");
+}
+}
+
 SpaprCapabilityInfo capability_table[SPAPR_CAP_NUM] = {
 [SPAPR_CAP_HTM] = {
 .name = "htm",
@@ -595,6 +614,15 @@ SpaprCapabilityInfo capability_table[SPAPR_CAP_NUM] = {
 .type = "bool",
 .apply = cap_ccf_assist_apply,
 },
+[SPAPR_CAP_FWNMI_MCE] = {
+.name = "fwnmi-mce",
+.description = "Handle fwnmi machine check exceptions",
+.index = SPAPR_CAP_FWNMI_MCE,
+.get = spapr_cap_get_bool,
+.set = spapr_cap_set_bool,
+.type = "bool",
+.apply = cap_fwnmi_mce_apply,
+},
 };
 
 static SpaprCapabilities default_caps_with_cpu(SpaprMachineState *spapr,
@@ -734,6 +762,7 @@ SPAPR_CAP_MIG_STATE(hpt_maxpagesize, 
SPAPR_CAP_HPT_MAXPAGESIZE);
 SPAPR_CAP_MIG_STATE(nested_kvm_hv, SPAPR_CAP_NESTED_KVM_HV);
 SPAPR_CAP_MIG_STATE(large_decr, SPAPR_CAP_LARGE_DECREMENTER);
 SPAPR_CAP_MIG_STATE(ccf_assist, SPAPR_CAP_CCF_ASSIST);
+SPAPR_CAP_MIG_STATE(fwnmi, SPAPR_CAP_FWNMI_MCE);
 
 void spapr_caps_init(SpaprMachineState *spapr)
 {
diff --git a/include/hw/ppc/spapr.h b/include/hw/ppc/spapr.h
index cbd1a4c9f3..af9a9ce6f2 100644
--- a/include/hw/ppc/spapr.h
+++ b/include/hw/ppc/spapr.h
@@ -79,8 +79,10 @@ typedef enum {
 #define SPAPR_CAP_LARGE_DECREMENTER 0x08
 /* Count Cache Flush Assist HW Instruction */
 #define SPAPR_CAP_CCF_ASSIST0x09
+/* FWNMI machine check handling */
+#define SPAPR_CAP_FWNMI_MCE 0x0A
 /* Num Caps */
-#define SPAPR_CAP_NUM   (SPAPR_CAP_CCF_ASSIST + 1)
+#define SPAPR_CAP_NUM   (SPAPR_CAP_FWNMI_MCE + 1)
 
 /*
  * Capability Values
@@ -868,6 +870,7 @@ extern const VMStateDescription 
vmstate_spapr_cap_hpt_maxpagesize;
 extern const VMStateDescription vmstate_spapr_cap_nested_kvm_hv;
 extern const VMStateDescription vmstate_spapr_cap_large_decr;
 extern const VMStateDescription vmstate_spapr_cap_ccf_assist;
+extern const VMStateDescription vmstate_spapr_cap_fwnmi;
 
 static inline uint8_t spapr_get_cap(SpaprMachineState *spapr, int cap)
 {
diff --git a/target/ppc/kvm.c b/target/ppc/kvm.c
index 820724cc7d..d56f11a883 100644
--- a/target/ppc/kvm.c
+++ b/target/ppc/kvm.c
@@ -2060,6 +2060,14 @@ void kvmppc_set_mpic_proxy(PowerPCCPU *cpu, int 
mpic_proxy)
 }
 }
 
+int kvmppc_set_fwnmi(void)
+{
+PowerPCCPU *cpu = POWERPC_CPU(first_cpu);
+CPUState *cs = CPU(cpu);
+
+return kvm_vcpu_enable_cap(cs, KVM_CAP_PPC_FWNMI, 0);
+}
+
 int kvmppc_smt_threads(void)
 {
 return cap_ppc_smt ? cap_ppc_smt : 1;
diff --git a/target/ppc/kvm_ppc.h b/target/ppc/kvm_ppc.h
index 98bd7d5da6..5727a5025f 100644
--- a/target/ppc/kvm_ppc.h
+++ b/target/ppc/kv

[PATCH v17 1/7] Wrapper function to wait on condition for the main loop mutex

2019-10-24 Thread Ganesh Goudar
From: Aravinda Prasad 

Introduce a wrapper function to wait on condition for
the main loop mutex. This function atomically releases
the main loop mutex and causes the calling thread to
block on the condition. This wrapper is required because
qemu_global_mutex is a static variable.

Signed-off-by: Aravinda Prasad 
Reviewed-by: David Gibson 
Reviewed-by: Greg Kurz 
---
 cpus.c   | 5 +
 include/qemu/main-loop.h | 8 
 2 files changed, 13 insertions(+)

diff --git a/cpus.c b/cpus.c
index fabbeca6f4..3c8e423f74 100644
--- a/cpus.c
+++ b/cpus.c
@@ -1897,6 +1897,11 @@ void qemu_mutex_unlock_iothread(void)
 qemu_mutex_unlock(_global_mutex);
 }
 
+void qemu_cond_wait_iothread(QemuCond *cond)
+{
+qemu_cond_wait(cond, _global_mutex);
+}
+
 static bool all_vcpus_paused(void)
 {
 CPUState *cpu;
diff --git a/include/qemu/main-loop.h b/include/qemu/main-loop.h
index f6ba78ea73..a6d20b0719 100644
--- a/include/qemu/main-loop.h
+++ b/include/qemu/main-loop.h
@@ -295,6 +295,14 @@ void qemu_mutex_lock_iothread_impl(const char *file, int 
line);
  */
 void qemu_mutex_unlock_iothread(void);
 
+/*
+ * qemu_cond_wait_iothread: Wait on condition for the main loop mutex
+ *
+ * This function atomically releases the main loop mutex and causes
+ * the calling thread to block on the condition.
+ */
+void qemu_cond_wait_iothread(QemuCond *cond);
+
 /* internal interfaces */
 
 void qemu_fd_register(int fd);
-- 
2.17.2




[PATCH v17 7/7] ppc: spapr: Activate the FWNMI functionality

2019-10-24 Thread Ganesh Goudar
From: Aravinda Prasad 

This patch sets the default value of SPAPR_CAP_FWNMI_MCE
to SPAPR_CAP_ON for machine type 4.2.

Signed-off-by: Aravinda Prasad 
---
 hw/ppc/spapr.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c
index e0d0f95ec0..4e7def1bdb 100644
--- a/hw/ppc/spapr.c
+++ b/hw/ppc/spapr.c
@@ -4484,7 +4484,7 @@ static void spapr_machine_class_init(ObjectClass *oc, 
void *data)
 smc->default_caps.caps[SPAPR_CAP_NESTED_KVM_HV] = SPAPR_CAP_OFF;
 smc->default_caps.caps[SPAPR_CAP_LARGE_DECREMENTER] = SPAPR_CAP_ON;
 smc->default_caps.caps[SPAPR_CAP_CCF_ASSIST] = SPAPR_CAP_OFF;
-smc->default_caps.caps[SPAPR_CAP_FWNMI_MCE] = SPAPR_CAP_OFF;
+smc->default_caps.caps[SPAPR_CAP_FWNMI_MCE] = SPAPR_CAP_ON;
 spapr_caps_add_properties(smc, _abort);
 smc->irq = _irq_dual;
 smc->dr_phb_enabled = true;
@@ -4558,6 +4558,7 @@ static void spapr_machine_4_1_class_options(MachineClass 
*mc)
 smc->linux_pci_probe = false;
 compat_props_add(mc->compat_props, hw_compat_4_1, hw_compat_4_1_len);
 compat_props_add(mc->compat_props, compat, G_N_ELEMENTS(compat));
+smc->default_caps.caps[SPAPR_CAP_FWNMI_MCE] = SPAPR_CAP_OFF;
 }
 
 DEFINE_SPAPR_MACHINE(4_1, "4.1", false);
-- 
2.17.2




[PATCH v17 3/7] target/ppc: Handle NMI guest exit

2019-10-24 Thread Ganesh Goudar
From: Aravinda Prasad 

Memory error such as bit flips that cannot be corrected
by hardware are passed on to the kernel for handling.
If the memory address in error belongs to guest then
the guest kernel is responsible for taking suitable action.
Patch [1] enhances KVM to exit guest with exit reason
set to KVM_EXIT_NMI in such cases. This patch handles
KVM_EXIT_NMI exit.

[1] https://www.spinics.net/lists/kvm-ppc/msg12637.html
(e20bbd3d and related commits)

Signed-off-by: Aravinda Prasad 
Reviewed-by: David Gibson 
Reviewed-by: Greg Kurz 
---
 hw/ppc/spapr.c  |  8 
 hw/ppc/spapr_events.c   | 37 +
 include/hw/ppc/spapr.h  | 10 ++
 target/ppc/kvm.c| 14 ++
 target/ppc/kvm_ppc.h|  2 ++
 target/ppc/trace-events |  1 +
 6 files changed, 72 insertions(+)

diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c
index 31a3fb72d4..346ec5ba6c 100644
--- a/hw/ppc/spapr.c
+++ b/hw/ppc/spapr.c
@@ -1745,6 +1745,12 @@ static void spapr_machine_reset(MachineState *machine)
 first_ppc_cpu->env.gpr[5] = 0;
 
 spapr->cas_reboot = false;
+
+spapr->mc_status = -1;
+spapr->guest_machine_check_addr = -1;
+
+/* Signal all vCPUs waiting on this condition */
+qemu_cond_broadcast(>mc_delivery_cond);
 }
 
 static void spapr_create_nvram(SpaprMachineState *spapr)
@@ -3044,6 +3050,8 @@ static void spapr_machine_init(MachineState *machine)
 
 kvmppc_spapr_enable_inkernel_multitce();
 }
+
+qemu_cond_init(>mc_delivery_cond);
 }
 
 static int spapr_kvm_type(MachineState *machine, const char *vm_type)
diff --git a/hw/ppc/spapr_events.c b/hw/ppc/spapr_events.c
index 0e4c19523a..0ce96b86be 100644
--- a/hw/ppc/spapr_events.c
+++ b/hw/ppc/spapr_events.c
@@ -40,6 +40,7 @@
 #include "hw/ppc/spapr_drc.h"
 #include "qemu/help_option.h"
 #include "qemu/bcd.h"
+#include "qemu/main-loop.h"
 #include "hw/ppc/spapr_ovec.h"
 #include 
 
@@ -621,6 +622,42 @@ void 
spapr_hotplug_req_remove_by_count_indexed(SpaprDrcType drc_type,
 RTAS_LOG_V6_HP_ACTION_REMOVE, drc_type, _id);
 }
 
+void spapr_mce_req_event(PowerPCCPU *cpu)
+{
+SpaprMachineState *spapr = SPAPR_MACHINE(qdev_get_machine());
+CPUState *cs = CPU(cpu);
+
+if (spapr->guest_machine_check_addr == -1) {
+/*
+ * This implies that we have hit a machine check either when the
+ * guest has not registered FWNMI (i.e., "ibm,nmi-register" not
+ * called) or between system reset and "ibm,nmi-register".
+ * Fall back to the old machine check behavior in such cases.
+ */
+cs->exception_index = POWERPC_EXCP_MCHECK;
+ppc_cpu_do_interrupt(cs);
+return;
+}
+
+while (spapr->mc_status != -1) {
+/*
+ * Check whether the same CPU got machine check error
+ * while still handling the mc error (i.e., before
+ * that CPU called "ibm,nmi-interlock")
+ */
+if (spapr->mc_status == cpu->vcpu_id) {
+qemu_system_guest_panicked(NULL);
+return;
+}
+qemu_cond_wait_iothread(>mc_delivery_cond);
+/* Meanwhile if the system is reset, then just return */
+if (spapr->guest_machine_check_addr == -1) {
+return;
+}
+}
+spapr->mc_status = cpu->vcpu_id;
+}
+
 static void check_exception(PowerPCCPU *cpu, SpaprMachineState *spapr,
 uint32_t token, uint32_t nargs,
 target_ulong args,
diff --git a/include/hw/ppc/spapr.h b/include/hw/ppc/spapr.h
index af9a9ce6f2..1f5eb8c856 100644
--- a/include/hw/ppc/spapr.h
+++ b/include/hw/ppc/spapr.h
@@ -190,6 +190,15 @@ struct SpaprMachineState {
  * occurs during the unplug process. */
 QTAILQ_HEAD(, SpaprDimmState) pending_dimm_unplugs;
 
+/* State related to "ibm,nmi-register" and "ibm,nmi-interlock" calls */
+target_ulong guest_machine_check_addr;
+/*
+ * mc_status is set to -1 if mc is not in progress, else is set to the CPU
+ * handling the mc.
+ */
+int mc_status;
+QemuCond mc_delivery_cond;
+
 /*< public >*/
 char *kvm_type;
 char *host_model;
@@ -803,6 +812,7 @@ void spapr_clear_pending_events(SpaprMachineState *spapr);
 int spapr_max_server_number(SpaprMachineState *spapr);
 void spapr_store_hpte(PowerPCCPU *cpu, hwaddr ptex,
   uint64_t pte0, uint64_t pte1);
+void spapr_mce_req_event(PowerPCCPU *cpu);
 
 /* DRC callbacks. */
 void spapr_core_release(DeviceState *dev);
diff --git a/target/ppc/kvm.c b/target/ppc/kvm.c
index d56f11a883..2d8db6d832 100644
--- a/target/ppc/kvm.c
+++ b/target/ppc/kvm.c
@@ -1708,6 +1708,11 @@ int kvm_arch_handle_exit(CPUState *cs, struct kvm_run 
*run)
 ret = 0;
 break;
 
+case KVM_EXIT_NMI:
+trace_kvm_handle_nmi_exception();
+ret = kvm_handle_nmi(cpu, run);
+break;
+
 default:
 fprintf(stderr, "KVM: unknown exit reason %d\n", 

[PATCH v17 6/7] migration: Include migration support for machine check handling

2019-10-24 Thread Ganesh Goudar
From: Aravinda Prasad 

This patch includes migration support for machine check
handling. Especially this patch blocks VM migration
requests until the machine check error handling is
complete as these errors are specific to the source
hardware and is irrelevant on the target hardware.

[Do not set FWNMI cap in post_load, now its done in .apply hook]
Signed-off-by: Ganesh Goudar 
Signed-off-by: Aravinda Prasad 
---
 hw/ppc/spapr.c | 41 +
 hw/ppc/spapr_events.c  | 16 +++-
 hw/ppc/spapr_rtas.c|  2 ++
 include/hw/ppc/spapr.h |  2 ++
 4 files changed, 60 insertions(+), 1 deletion(-)

diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c
index 346ec5ba6c..e0d0f95ec0 100644
--- a/hw/ppc/spapr.c
+++ b/hw/ppc/spapr.c
@@ -46,6 +46,7 @@
 #include "migration/qemu-file-types.h"
 #include "migration/global_state.h"
 #include "migration/register.h"
+#include "migration/blocker.h"
 #include "mmu-hash64.h"
 #include "mmu-book3s-v3.h"
 #include "cpu-models.h"
@@ -1751,6 +1752,8 @@ static void spapr_machine_reset(MachineState *machine)
 
 /* Signal all vCPUs waiting on this condition */
 qemu_cond_broadcast(>mc_delivery_cond);
+
+migrate_del_blocker(spapr->fwnmi_migration_blocker);
 }
 
 static void spapr_create_nvram(SpaprMachineState *spapr)
@@ -2041,6 +2044,43 @@ static const VMStateDescription vmstate_spapr_dtb = {
 },
 };
 
+static bool spapr_fwnmi_needed(void *opaque)
+{
+SpaprMachineState *spapr = (SpaprMachineState *)opaque;
+
+return spapr->guest_machine_check_addr != -1;
+}
+
+static int spapr_fwnmi_pre_save(void *opaque)
+{
+SpaprMachineState *spapr = (SpaprMachineState *)opaque;
+
+/*
+ * With -only-migratable QEMU option, we cannot block migration.
+ * Hence check if machine check handling is in progress and print
+ * a warning message.
+ */
+if (spapr->mc_status != -1) {
+warn_report("A machine check is being handled during migration. The"
+"handler may run and log hardware error on the destination");
+}
+
+return 0;
+}
+
+static const VMStateDescription vmstate_spapr_machine_check = {
+.name = "spapr_machine_check",
+.version_id = 1,
+.minimum_version_id = 1,
+.needed = spapr_fwnmi_needed,
+.pre_save = spapr_fwnmi_pre_save,
+.fields = (VMStateField[]) {
+VMSTATE_UINT64(guest_machine_check_addr, SpaprMachineState),
+VMSTATE_INT32(mc_status, SpaprMachineState),
+VMSTATE_END_OF_LIST()
+},
+};
+
 static const VMStateDescription vmstate_spapr = {
 .name = "spapr",
 .version_id = 3,
@@ -2075,6 +2115,7 @@ static const VMStateDescription vmstate_spapr = {
 _spapr_cap_large_decr,
 _spapr_cap_ccf_assist,
 _spapr_cap_fwnmi,
+_spapr_machine_check,
 NULL
 }
 };
diff --git a/hw/ppc/spapr_events.c b/hw/ppc/spapr_events.c
index db44e09154..30d9371c88 100644
--- a/hw/ppc/spapr_events.c
+++ b/hw/ppc/spapr_events.c
@@ -43,6 +43,7 @@
 #include "qemu/main-loop.h"
 #include "hw/ppc/spapr_ovec.h"
 #include 
+#include "migration/blocker.h"
 
 #define RTAS_LOG_VERSION_MASK   0xff00
 #define   RTAS_LOG_VERSION_60x0600
@@ -842,6 +843,8 @@ void spapr_mce_req_event(PowerPCCPU *cpu, bool recovered)
 {
 SpaprMachineState *spapr = SPAPR_MACHINE(qdev_get_machine());
 CPUState *cs = CPU(cpu);
+int ret;
+Error *local_err = NULL;
 
 if (spapr->guest_machine_check_addr == -1) {
 /*
@@ -871,8 +874,19 @@ void spapr_mce_req_event(PowerPCCPU *cpu, bool recovered)
 return;
 }
 }
-spapr->mc_status = cpu->vcpu_id;
 
+ret = migrate_add_blocker(spapr->fwnmi_migration_blocker, _err);
+if (ret == -EBUSY) {
+/*
+ * We don't want to abort so we let the migration to continue.
+ * In a rare case, the machine check handler will run on the target.
+ * Though this is not preferable, it is better than aborting
+ * the migration or killing the VM.
+ */
+warn_report_err(local_err);
+}
+
+spapr->mc_status = cpu->vcpu_id;
 spapr_mce_dispatch_elog(cpu, recovered);
 }
 
diff --git a/hw/ppc/spapr_rtas.c b/hw/ppc/spapr_rtas.c
index 0328b1f341..c78d96ee7e 100644
--- a/hw/ppc/spapr_rtas.c
+++ b/hw/ppc/spapr_rtas.c
@@ -50,6 +50,7 @@
 #include "hw/ppc/fdt.h"
 #include "target/ppc/mmu-hash64.h"
 #include "target/ppc/mmu-book3s-v3.h"
+#include "migration/blocker.h"
 
 static void rtas_display_character(PowerPCCPU *cpu, SpaprMachineState *spapr,
uint32_t token, uint32_t nargs,
@@ -446,6 +447,7 @@ static void rtas_ibm_nmi_interlock(PowerPCCPU *cpu,
  */
 spapr->mc_status = -1;
 qemu_cond_signal(>mc_

[PATCH v17 4/7] target/ppc: Build rtas error log upon an MCE

2019-10-24 Thread Ganesh Goudar
From: Aravinda Prasad 

Upon a machine check exception (MCE) in a guest address space,
KVM causes a guest exit to enable QEMU to build and pass the
error to the guest in the PAPR defined rtas error log format.

This patch builds the rtas error log, copies it to the rtas_addr
and then invokes the guest registered machine check handler. The
handler in the guest takes suitable action(s) depending on the type
and criticality of the error. For example, if an error is
unrecoverable memory corruption in an application inside the
guest, then the guest kernel sends a SIGBUS to the application.
For recoverable errors, the guest performs recovery actions and
logs the error.

[Assume SLOF has allocated enough room for rtas error log]
Signed-off-by: Ganesh Goudar 
Signed-off-by: Aravinda Prasad 
---
 hw/ppc/spapr_events.c  | 220 -
 hw/ppc/spapr_rtas.c|  26 +
 include/hw/ppc/spapr.h |   6 +-
 target/ppc/kvm.c   |   4 +-
 4 files changed, 253 insertions(+), 3 deletions(-)

diff --git a/hw/ppc/spapr_events.c b/hw/ppc/spapr_events.c
index 0ce96b86be..db44e09154 100644
--- a/hw/ppc/spapr_events.c
+++ b/hw/ppc/spapr_events.c
@@ -214,6 +214,104 @@ struct hp_extended_log {
 struct rtas_event_log_v6_hp hp;
 } QEMU_PACKED;
 
+struct rtas_event_log_v6_mc {
+#define RTAS_LOG_V6_SECTION_ID_MC   0x4D43 /* MC */
+struct rtas_event_log_v6_section_header hdr;
+uint32_t fru_id;
+uint32_t proc_id;
+uint8_t error_type;
+#define RTAS_LOG_V6_MC_TYPE_UE   0
+#define RTAS_LOG_V6_MC_TYPE_SLB  1
+#define RTAS_LOG_V6_MC_TYPE_ERAT 2
+#define RTAS_LOG_V6_MC_TYPE_TLB  4
+#define RTAS_LOG_V6_MC_TYPE_D_CACHE  5
+#define RTAS_LOG_V6_MC_TYPE_I_CACHE  7
+uint8_t sub_err_type;
+#define RTAS_LOG_V6_MC_UE_INDETERMINATE  0
+#define RTAS_LOG_V6_MC_UE_IFETCH 1
+#define RTAS_LOG_V6_MC_UE_PAGE_TABLE_WALK_IFETCH 2
+#define RTAS_LOG_V6_MC_UE_LOAD_STORE 3
+#define RTAS_LOG_V6_MC_UE_PAGE_TABLE_WALK_LOAD_STORE 4
+#define RTAS_LOG_V6_MC_SLB_PARITY0
+#define RTAS_LOG_V6_MC_SLB_MULTIHIT  1
+#define RTAS_LOG_V6_MC_SLB_INDETERMINATE 2
+#define RTAS_LOG_V6_MC_ERAT_PARITY   1
+#define RTAS_LOG_V6_MC_ERAT_MULTIHIT 2
+#define RTAS_LOG_V6_MC_ERAT_INDETERMINATE3
+#define RTAS_LOG_V6_MC_TLB_PARITY1
+#define RTAS_LOG_V6_MC_TLB_MULTIHIT  2
+#define RTAS_LOG_V6_MC_TLB_INDETERMINATE 3
+uint8_t reserved_1[6];
+uint64_t effective_address;
+uint64_t logical_address;
+} QEMU_PACKED;
+
+struct mc_extended_log {
+struct rtas_event_log_v6 v6hdr;
+struct rtas_event_log_v6_mc mc;
+} QEMU_PACKED;
+
+struct MC_ierror_table {
+unsigned long srr1_mask;
+unsigned long srr1_value;
+bool nip_valid; /* nip is a valid indicator of faulting address */
+uint8_t error_type;
+uint8_t error_subtype;
+unsigned int initiator;
+unsigned int severity;
+};
+
+static const struct MC_ierror_table mc_ierror_table[] = {
+{ 0x081c, 0x0004, true,
+  RTAS_LOG_V6_MC_TYPE_UE, RTAS_LOG_V6_MC_UE_IFETCH,
+  RTAS_LOG_INITIATOR_CPU, RTAS_LOG_SEVERITY_ERROR_SYNC, },
+{ 0x081c, 0x0008, true,
+  RTAS_LOG_V6_MC_TYPE_SLB, RTAS_LOG_V6_MC_SLB_PARITY,
+  RTAS_LOG_INITIATOR_CPU, RTAS_LOG_SEVERITY_ERROR_SYNC, },
+{ 0x081c, 0x000c, true,
+  RTAS_LOG_V6_MC_TYPE_SLB, RTAS_LOG_V6_MC_SLB_MULTIHIT,
+  RTAS_LOG_INITIATOR_CPU, RTAS_LOG_SEVERITY_ERROR_SYNC, },
+{ 0x081c, 0x0010, true,
+  RTAS_LOG_V6_MC_TYPE_ERAT, RTAS_LOG_V6_MC_ERAT_MULTIHIT,
+  RTAS_LOG_INITIATOR_CPU, RTAS_LOG_SEVERITY_ERROR_SYNC, },
+{ 0x081c, 0x0014, true,
+  RTAS_LOG_V6_MC_TYPE_TLB, RTAS_LOG_V6_MC_TLB_MULTIHIT,
+  RTAS_LOG_INITIATOR_CPU, RTAS_LOG_SEVERITY_ERROR_SYNC, },
+{ 0x081c, 0x0018, true,
+  RTAS_LOG_V6_MC_TYPE_UE, RTAS_LOG_V6_MC_UE_PAGE_TABLE_WALK_IFETCH,
+  RTAS_LOG_INITIATOR_CPU, RTAS_LOG_SEVERITY_ERROR_SYNC, } };
+
+struct MC_derror_table {
+unsigned long dsisr_value;
+bool dar_valid; /* dar is a valid indicator of faulting address */
+uint8_t error_type;
+uint8_t error_subtype;
+unsigned int initiator;
+unsigned int severity;
+};
+
+static const struct MC_derror_table mc_derror_table[] = {
+{ 0x8000, false,
+  RTAS_LOG_V6_MC_TYPE_UE, RTAS_LOG_V6_MC_UE_LOAD_STORE,
+  RTAS_LOG_INITIATOR_CPU, RTAS_LOG_SEVERITY_ERROR_SYNC, },
+{ 0x4000, true,
+  RTAS_LOG_V6_MC_TYPE_UE, RTAS_LOG_V6_MC_UE_PAGE_TABLE_WALK_LOAD_STORE,
+  RTAS_LOG_INITIATOR_CPU, RTAS_LOG_SEVERITY_ERROR_SYNC, },
+{ 0x0800, true,
+  RTAS_LOG_V6_MC_TYPE_ERAT, RTAS_LOG_V6_MC_ERAT_MULTIHIT,
+  RTAS_LOG_INITIATOR_CPU

[PATCH v17 0/7] target-ppc/spapr: Add FWNMI support in QEMU for PowerKM guests

2019-10-24 Thread Ganesh Goudar
This patch set adds support for FWNMI in PowerKVM guests.

System errors such as SLB multihit and memory errors
that cannot be corrected by hardware is passed on to
the kernel for handling by raising machine check
exception (an NMI). Upon such machine check exceptions,
if the address in error belongs to guest then KVM
invokes guests' 0x200 interrupt vector if the guest
is not FWNMI capable. For FWNMI capable guest
KVM passes the control to QEMU by exiting the guest.

This patch series adds functionality to QEMU to pass
on such machine check exceptions to the FWNMI capable
guest kernel by building an error log and invoking
the guest registered machine check handling routine.

The KVM changes are now part of the upstream kernel
(commit e20bbd3d). This series contain QEMU changes.

Change Log v17:
  - Add fwnmi cap to migration state
  - Reprhase the commit message in patch 2/7

Change Log v16:
  - Fixed coding style problems

Change Log v15:
  - Removed cap_ppc_fwnmi
  - Moved fwnmi registeration to .apply hook
  - Assume SLOF has allocated enough room for rtas error log
  - Using ARRAY_SIZE to end the loop
  - Do not set FWNMI cap in post_load, now its done in .apply hook

Change Log v14:
  - Feature activation moved to a separate patch
  - Fixed issues with migration blocker

Change Log v13:
  - Minor fixes (mostly nits)
  - Moved FWNMI guest registration check from patch 4 to 3.

Change Log v12:
  - Rebased to latest ppc-for-4.2 (SHA b1e8156743)

Change Log v11:
  - Moved FWNMI SPAPR cap defaults to 4.2 class option
  - Fixed issues with handling fwnmi KVM capability

Change Log v10:
  - Reshuffled the patch sequence + minor fixes

Change Log v9:
  - Fixed kvm cap and spapr cap issues

Change Log v8:
  - Added functionality to check FWNMI capability during
VM migration

---

Aravinda Prasad (7):
  Wrapper function to wait on condition for the main loop mutex
  ppc: spapr: Introduce FWNMI capability
  target/ppc: Handle NMI guest exit
  target/ppc: Build rtas error log upon an MCE
  ppc: spapr: Handle "ibm,nmi-register" and "ibm,nmi-interlock" RTAS
calls
  migration: Include migration support for machine check handling
  ppc: spapr: Activate the FWNMI functionality

 cpus.c   |   5 +
 hw/ppc/spapr.c   |  52 
 hw/ppc/spapr_caps.c  |  34 +
 hw/ppc/spapr_events.c| 269 +++
 hw/ppc/spapr_rtas.c  |  85 +
 include/hw/ppc/spapr.h   |  26 +++-
 include/qemu/main-loop.h |   8 ++
 target/ppc/kvm.c |  24 
 target/ppc/kvm_ppc.h |   8 ++
 target/ppc/trace-events  |   1 +
 10 files changed, 510 insertions(+), 2 deletions(-)

-- 
2.17.2




[PATCH v16 6/7] migration: Include migration support for machine check handling

2019-10-10 Thread Ganesh Goudar
From: Aravinda Prasad 

This patch includes migration support for machine check
handling. Especially this patch blocks VM migration
requests until the machine check error handling is
complete as these errors are specific to the source
hardware and is irrelevant on the target hardware.

[Do not set FWNMI cap in post_load, now its done in .apply hook]
Signed-off-by: Ganesh Goudar 
Signed-off-by: Aravinda Prasad 
---
 hw/ppc/spapr.c | 41 +
 hw/ppc/spapr_events.c  | 16 +++-
 hw/ppc/spapr_rtas.c|  2 ++
 include/hw/ppc/spapr.h |  2 ++
 4 files changed, 60 insertions(+), 1 deletion(-)

diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c
index 328da3f88a..eac3123d83 100644
--- a/hw/ppc/spapr.c
+++ b/hw/ppc/spapr.c
@@ -46,6 +46,7 @@
 #include "migration/qemu-file-types.h"
 #include "migration/global_state.h"
 #include "migration/register.h"
+#include "migration/blocker.h"
 #include "mmu-hash64.h"
 #include "mmu-book3s-v3.h"
 #include "cpu-models.h"
@@ -1759,6 +1760,8 @@ static void spapr_machine_reset(MachineState *machine)
 
 /* Signal all vCPUs waiting on this condition */
 qemu_cond_broadcast(>mc_delivery_cond);
+
+migrate_del_blocker(spapr->fwnmi_migration_blocker);
 }
 
 static void spapr_create_nvram(SpaprMachineState *spapr)
@@ -2049,6 +2052,43 @@ static const VMStateDescription vmstate_spapr_dtb = {
 },
 };
 
+static bool spapr_fwnmi_needed(void *opaque)
+{
+SpaprMachineState *spapr = (SpaprMachineState *)opaque;
+
+return spapr->guest_machine_check_addr != -1;
+}
+
+static int spapr_fwnmi_pre_save(void *opaque)
+{
+SpaprMachineState *spapr = (SpaprMachineState *)opaque;
+
+/*
+ * With -only-migratable QEMU option, we cannot block migration.
+ * Hence check if machine check handling is in progress and print
+ * a warning message.
+ */
+if (spapr->mc_status != -1) {
+warn_report("A machine check is being handled during migration. The"
+"handler may run and log hardware error on the destination");
+}
+
+return 0;
+}
+
+static const VMStateDescription vmstate_spapr_machine_check = {
+.name = "spapr_machine_check",
+.version_id = 1,
+.minimum_version_id = 1,
+.needed = spapr_fwnmi_needed,
+.pre_save = spapr_fwnmi_pre_save,
+.fields = (VMStateField[]) {
+VMSTATE_UINT64(guest_machine_check_addr, SpaprMachineState),
+VMSTATE_INT32(mc_status, SpaprMachineState),
+VMSTATE_END_OF_LIST()
+},
+};
+
 static const VMStateDescription vmstate_spapr = {
 .name = "spapr",
 .version_id = 3,
@@ -2082,6 +2122,7 @@ static const VMStateDescription vmstate_spapr = {
 _spapr_dtb,
 _spapr_cap_large_decr,
 _spapr_cap_ccf_assist,
+_spapr_machine_check,
 NULL
 }
 };
diff --git a/hw/ppc/spapr_events.c b/hw/ppc/spapr_events.c
index db44e09154..30d9371c88 100644
--- a/hw/ppc/spapr_events.c
+++ b/hw/ppc/spapr_events.c
@@ -43,6 +43,7 @@
 #include "qemu/main-loop.h"
 #include "hw/ppc/spapr_ovec.h"
 #include 
+#include "migration/blocker.h"
 
 #define RTAS_LOG_VERSION_MASK   0xff00
 #define   RTAS_LOG_VERSION_60x0600
@@ -842,6 +843,8 @@ void spapr_mce_req_event(PowerPCCPU *cpu, bool recovered)
 {
 SpaprMachineState *spapr = SPAPR_MACHINE(qdev_get_machine());
 CPUState *cs = CPU(cpu);
+int ret;
+Error *local_err = NULL;
 
 if (spapr->guest_machine_check_addr == -1) {
 /*
@@ -871,8 +874,19 @@ void spapr_mce_req_event(PowerPCCPU *cpu, bool recovered)
 return;
 }
 }
-spapr->mc_status = cpu->vcpu_id;
 
+ret = migrate_add_blocker(spapr->fwnmi_migration_blocker, _err);
+if (ret == -EBUSY) {
+/*
+ * We don't want to abort so we let the migration to continue.
+ * In a rare case, the machine check handler will run on the target.
+ * Though this is not preferable, it is better than aborting
+ * the migration or killing the VM.
+ */
+warn_report_err(local_err);
+}
+
+spapr->mc_status = cpu->vcpu_id;
 spapr_mce_dispatch_elog(cpu, recovered);
 }
 
diff --git a/hw/ppc/spapr_rtas.c b/hw/ppc/spapr_rtas.c
index 0328b1f341..c78d96ee7e 100644
--- a/hw/ppc/spapr_rtas.c
+++ b/hw/ppc/spapr_rtas.c
@@ -50,6 +50,7 @@
 #include "hw/ppc/fdt.h"
 #include "target/ppc/mmu-hash64.h"
 #include "target/ppc/mmu-book3s-v3.h"
+#include "migration/blocker.h"
 
 static void rtas_display_character(PowerPCCPU *cpu, SpaprMachineState *spapr,
uint32_t token, uint32_t nargs,
@@ -446,6 +447,7 @@ static void rtas_ibm_nmi_interlock(PowerPCCPU *cpu,
  */
 spapr->mc_status = -1;
 qemu_cond_signal(>mc_delivery_cond);
+

[PATCH v16 7/7] ppc: spapr: Activate the FWNMI functionality

2019-10-10 Thread Ganesh Goudar
From: Aravinda Prasad 

This patch sets the default value of SPAPR_CAP_FWNMI_MCE
to SPAPR_CAP_ON for machine type 4.2.

Signed-off-by: Aravinda Prasad 
---
 hw/ppc/spapr.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c
index eac3123d83..d27c1f1cf0 100644
--- a/hw/ppc/spapr.c
+++ b/hw/ppc/spapr.c
@@ -4490,7 +4490,7 @@ static void spapr_machine_class_init(ObjectClass *oc, 
void *data)
 smc->default_caps.caps[SPAPR_CAP_NESTED_KVM_HV] = SPAPR_CAP_OFF;
 smc->default_caps.caps[SPAPR_CAP_LARGE_DECREMENTER] = SPAPR_CAP_ON;
 smc->default_caps.caps[SPAPR_CAP_CCF_ASSIST] = SPAPR_CAP_OFF;
-smc->default_caps.caps[SPAPR_CAP_FWNMI_MCE] = SPAPR_CAP_OFF;
+smc->default_caps.caps[SPAPR_CAP_FWNMI_MCE] = SPAPR_CAP_ON;
 spapr_caps_add_properties(smc, _abort);
 smc->irq = _irq_dual;
 smc->dr_phb_enabled = true;
@@ -4564,6 +4564,7 @@ static void spapr_machine_4_1_class_options(MachineClass 
*mc)
 smc->linux_pci_probe = false;
 compat_props_add(mc->compat_props, hw_compat_4_1, hw_compat_4_1_len);
 compat_props_add(mc->compat_props, compat, G_N_ELEMENTS(compat));
+smc->default_caps.caps[SPAPR_CAP_FWNMI_MCE] = SPAPR_CAP_OFF;
 }
 
 DEFINE_SPAPR_MACHINE(4_1, "4.1", false);
-- 
2.17.2




[PATCH v16 3/7] target/ppc: Handle NMI guest exit

2019-10-10 Thread Ganesh Goudar
From: Aravinda Prasad 

Memory error such as bit flips that cannot be corrected
by hardware are passed on to the kernel for handling.
If the memory address in error belongs to guest then
the guest kernel is responsible for taking suitable action.
Patch [1] enhances KVM to exit guest with exit reason
set to KVM_EXIT_NMI in such cases. This patch handles
KVM_EXIT_NMI exit.

[1] https://www.spinics.net/lists/kvm-ppc/msg12637.html
(e20bbd3d and related commits)

Signed-off-by: Aravinda Prasad 
Reviewed-by: David Gibson 
Reviewed-by: Greg Kurz 
---
 hw/ppc/spapr.c  |  8 
 hw/ppc/spapr_events.c   | 37 +
 include/hw/ppc/spapr.h  | 10 ++
 target/ppc/kvm.c| 14 ++
 target/ppc/kvm_ppc.h|  2 ++
 target/ppc/trace-events |  1 +
 6 files changed, 72 insertions(+)

diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c
index 7e6a15c9b4..328da3f88a 100644
--- a/hw/ppc/spapr.c
+++ b/hw/ppc/spapr.c
@@ -1753,6 +1753,12 @@ static void spapr_machine_reset(MachineState *machine)
 first_ppc_cpu->env.gpr[5] = 0;
 
 spapr->cas_reboot = false;
+
+spapr->mc_status = -1;
+spapr->guest_machine_check_addr = -1;
+
+/* Signal all vCPUs waiting on this condition */
+qemu_cond_broadcast(>mc_delivery_cond);
 }
 
 static void spapr_create_nvram(SpaprMachineState *spapr)
@@ -3051,6 +3057,8 @@ static void spapr_machine_init(MachineState *machine)
 
 kvmppc_spapr_enable_inkernel_multitce();
 }
+
+qemu_cond_init(>mc_delivery_cond);
 }
 
 static int spapr_kvm_type(MachineState *machine, const char *vm_type)
diff --git a/hw/ppc/spapr_events.c b/hw/ppc/spapr_events.c
index 0e4c19523a..0ce96b86be 100644
--- a/hw/ppc/spapr_events.c
+++ b/hw/ppc/spapr_events.c
@@ -40,6 +40,7 @@
 #include "hw/ppc/spapr_drc.h"
 #include "qemu/help_option.h"
 #include "qemu/bcd.h"
+#include "qemu/main-loop.h"
 #include "hw/ppc/spapr_ovec.h"
 #include 
 
@@ -621,6 +622,42 @@ void 
spapr_hotplug_req_remove_by_count_indexed(SpaprDrcType drc_type,
 RTAS_LOG_V6_HP_ACTION_REMOVE, drc_type, _id);
 }
 
+void spapr_mce_req_event(PowerPCCPU *cpu)
+{
+SpaprMachineState *spapr = SPAPR_MACHINE(qdev_get_machine());
+CPUState *cs = CPU(cpu);
+
+if (spapr->guest_machine_check_addr == -1) {
+/*
+ * This implies that we have hit a machine check either when the
+ * guest has not registered FWNMI (i.e., "ibm,nmi-register" not
+ * called) or between system reset and "ibm,nmi-register".
+ * Fall back to the old machine check behavior in such cases.
+ */
+cs->exception_index = POWERPC_EXCP_MCHECK;
+ppc_cpu_do_interrupt(cs);
+return;
+}
+
+while (spapr->mc_status != -1) {
+/*
+ * Check whether the same CPU got machine check error
+ * while still handling the mc error (i.e., before
+ * that CPU called "ibm,nmi-interlock")
+ */
+if (spapr->mc_status == cpu->vcpu_id) {
+qemu_system_guest_panicked(NULL);
+return;
+}
+qemu_cond_wait_iothread(>mc_delivery_cond);
+/* Meanwhile if the system is reset, then just return */
+if (spapr->guest_machine_check_addr == -1) {
+return;
+}
+}
+spapr->mc_status = cpu->vcpu_id;
+}
+
 static void check_exception(PowerPCCPU *cpu, SpaprMachineState *spapr,
 uint32_t token, uint32_t nargs,
 target_ulong args,
diff --git a/include/hw/ppc/spapr.h b/include/hw/ppc/spapr.h
index dcd2e7d0cc..412f48c07b 100644
--- a/include/hw/ppc/spapr.h
+++ b/include/hw/ppc/spapr.h
@@ -190,6 +190,15 @@ struct SpaprMachineState {
  * occurs during the unplug process. */
 QTAILQ_HEAD(, SpaprDimmState) pending_dimm_unplugs;
 
+/* State related to "ibm,nmi-register" and "ibm,nmi-interlock" calls */
+target_ulong guest_machine_check_addr;
+/*
+ * mc_status is set to -1 if mc is not in progress, else is set to the CPU
+ * handling the mc.
+ */
+int mc_status;
+QemuCond mc_delivery_cond;
+
 /*< public >*/
 char *kvm_type;
 char *host_model;
@@ -803,6 +812,7 @@ void spapr_clear_pending_events(SpaprMachineState *spapr);
 int spapr_max_server_number(SpaprMachineState *spapr);
 void spapr_store_hpte(PowerPCCPU *cpu, hwaddr ptex,
   uint64_t pte0, uint64_t pte1);
+void spapr_mce_req_event(PowerPCCPU *cpu);
 
 /* DRC callbacks. */
 void spapr_core_release(DeviceState *dev);
diff --git a/target/ppc/kvm.c b/target/ppc/kvm.c
index d56f11a883..2d8db6d832 100644
--- a/target/ppc/kvm.c
+++ b/target/ppc/kvm.c
@@ -1708,6 +1708,11 @@ int kvm_arch_handle_exit(CPUState *cs, struct kvm_run 
*run)
 ret = 0;
 break;
 
+case KVM_EXIT_NMI:
+trace_kvm_handle_nmi_exception();
+ret = kvm_handle_nmi(cpu, run);
+break;
+
 default:
 fprintf(stderr, "KVM: unknown exit reason %d\n", 

[PATCH v16 2/7] ppc: spapr: Introduce FWNMI capability

2019-10-10 Thread Ganesh Goudar
From: Aravinda Prasad 

Introduce the KVM capability KVM_CAP_PPC_FWNMI so that
the KVM causes guest exit with NMI as exit reason
when it encounters a machine check exception on the
address belonging to a guest. Without this capability
enabled, KVM redirects machine check exceptions to
guest's 0x200 vector.

This patch also introduces fwnmi-mce capability to
deal with the case when a guest with the
KVM_CAP_PPC_FWNMI capability enabled is attempted
to migrate to a host that does not support this
capability.

[eliminate cap_ppc_fwnmi]
Signed-off-by: Ganesh Goudar 
Signed-off-by: Aravinda Prasad 
---
 hw/ppc/spapr.c |  1 +
 hw/ppc/spapr_caps.c| 29 +
 include/hw/ppc/spapr.h |  4 +++-
 target/ppc/kvm.c   |  8 
 target/ppc/kvm_ppc.h   |  6 ++
 5 files changed, 47 insertions(+), 1 deletion(-)

diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c
index 514a17ae74..7e6a15c9b4 100644
--- a/hw/ppc/spapr.c
+++ b/hw/ppc/spapr.c
@@ -4441,6 +4441,7 @@ static void spapr_machine_class_init(ObjectClass *oc, 
void *data)
 smc->default_caps.caps[SPAPR_CAP_NESTED_KVM_HV] = SPAPR_CAP_OFF;
 smc->default_caps.caps[SPAPR_CAP_LARGE_DECREMENTER] = SPAPR_CAP_ON;
 smc->default_caps.caps[SPAPR_CAP_CCF_ASSIST] = SPAPR_CAP_OFF;
+smc->default_caps.caps[SPAPR_CAP_FWNMI_MCE] = SPAPR_CAP_OFF;
 spapr_caps_add_properties(smc, _abort);
 smc->irq = _irq_dual;
 smc->dr_phb_enabled = true;
diff --git a/hw/ppc/spapr_caps.c b/hw/ppc/spapr_caps.c
index 481dfd2a27..778bf32181 100644
--- a/hw/ppc/spapr_caps.c
+++ b/hw/ppc/spapr_caps.c
@@ -496,6 +496,25 @@ static void cap_ccf_assist_apply(SpaprMachineState *spapr, 
uint8_t val,
 }
 }
 
+static void cap_fwnmi_mce_apply(SpaprMachineState *spapr, uint8_t val,
+Error **errp)
+{
+if (!val) {
+return; /* Disabled by default */
+}
+
+if (tcg_enabled()) {
+/*
+ * TCG support may not be correct in some conditions (e.g., in case
+ * of software injected faults like duplicate SLBs).
+ */
+warn_report("Firmware Assisted Non-Maskable Interrupts not supported 
in TCG");
+} else if (kvm_enabled() && kvmppc_set_fwnmi()) {
+error_setg(errp,
+"Firmware Assisted Non-Maskable Interrupts not supported by KVM, try 
cap-fwnmi-mce=off");
+}
+}
+
 SpaprCapabilityInfo capability_table[SPAPR_CAP_NUM] = {
 [SPAPR_CAP_HTM] = {
 .name = "htm",
@@ -595,6 +614,15 @@ SpaprCapabilityInfo capability_table[SPAPR_CAP_NUM] = {
 .type = "bool",
 .apply = cap_ccf_assist_apply,
 },
+[SPAPR_CAP_FWNMI_MCE] = {
+.name = "fwnmi-mce",
+.description = "Handle fwnmi machine check exceptions",
+.index = SPAPR_CAP_FWNMI_MCE,
+.get = spapr_cap_get_bool,
+.set = spapr_cap_set_bool,
+.type = "bool",
+.apply = cap_fwnmi_mce_apply,
+},
 };
 
 static SpaprCapabilities default_caps_with_cpu(SpaprMachineState *spapr,
@@ -734,6 +762,7 @@ SPAPR_CAP_MIG_STATE(hpt_maxpagesize, 
SPAPR_CAP_HPT_MAXPAGESIZE);
 SPAPR_CAP_MIG_STATE(nested_kvm_hv, SPAPR_CAP_NESTED_KVM_HV);
 SPAPR_CAP_MIG_STATE(large_decr, SPAPR_CAP_LARGE_DECREMENTER);
 SPAPR_CAP_MIG_STATE(ccf_assist, SPAPR_CAP_CCF_ASSIST);
+SPAPR_CAP_MIG_STATE(fwnmi, SPAPR_CAP_FWNMI_MCE);
 
 void spapr_caps_init(SpaprMachineState *spapr)
 {
diff --git a/include/hw/ppc/spapr.h b/include/hw/ppc/spapr.h
index cbd1a4c9f3..dcd2e7d0cc 100644
--- a/include/hw/ppc/spapr.h
+++ b/include/hw/ppc/spapr.h
@@ -79,8 +79,10 @@ typedef enum {
 #define SPAPR_CAP_LARGE_DECREMENTER 0x08
 /* Count Cache Flush Assist HW Instruction */
 #define SPAPR_CAP_CCF_ASSIST0x09
+/* FWNMI machine check handling */
+#define SPAPR_CAP_FWNMI_MCE 0x0A
 /* Num Caps */
-#define SPAPR_CAP_NUM   (SPAPR_CAP_CCF_ASSIST + 1)
+#define SPAPR_CAP_NUM   (SPAPR_CAP_FWNMI_MCE + 1)
 
 /*
  * Capability Values
diff --git a/target/ppc/kvm.c b/target/ppc/kvm.c
index 820724cc7d..d56f11a883 100644
--- a/target/ppc/kvm.c
+++ b/target/ppc/kvm.c
@@ -2060,6 +2060,14 @@ void kvmppc_set_mpic_proxy(PowerPCCPU *cpu, int 
mpic_proxy)
 }
 }
 
+int kvmppc_set_fwnmi(void)
+{
+PowerPCCPU *cpu = POWERPC_CPU(first_cpu);
+CPUState *cs = CPU(cpu);
+
+return kvm_vcpu_enable_cap(cs, KVM_CAP_PPC_FWNMI, 0);
+}
+
 int kvmppc_smt_threads(void)
 {
 return cap_ppc_smt ? cap_ppc_smt : 1;
diff --git a/target/ppc/kvm_ppc.h b/target/ppc/kvm_ppc.h
index 98bd7d5da6..5727a5025f 100644
--- a/target/ppc/kvm_ppc.h
+++ b/target/ppc/kvm_ppc.h
@@ -27,6 +27,7 @@ void kvmppc_enable_h_page_init(void);
 void kvmppc_set_papr(PowerPCCPU *cpu);
 int kvmppc_set_compat(PowerPCCPU *cpu, uint32_t compat_pvr);
 void kvmppc_set_mpic_proxy(PowerPCCPU *cpu, int mpic_proxy);
+int kvmppc_set_fwnmi(void);
 int kvmppc_smt_threads(void);
 void kvmppc_hint_smt_possi

[PATCH v16 4/7] target/ppc: Build rtas error log upon an MCE

2019-10-10 Thread Ganesh Goudar
From: Aravinda Prasad 

Upon a machine check exception (MCE) in a guest address space,
KVM causes a guest exit to enable QEMU to build and pass the
error to the guest in the PAPR defined rtas error log format.

This patch builds the rtas error log, copies it to the rtas_addr
and then invokes the guest registered machine check handler. The
handler in the guest takes suitable action(s) depending on the type
and criticality of the error. For example, if an error is
unrecoverable memory corruption in an application inside the
guest, then the guest kernel sends a SIGBUS to the application.
For recoverable errors, the guest performs recovery actions and
logs the error.

[Assume SLOF has allocated enough room for rtas error log]
Signed-off-by: Ganesh Goudar 
Signed-off-by: Aravinda Prasad 
---
 hw/ppc/spapr_events.c  | 220 -
 hw/ppc/spapr_rtas.c|  26 +
 include/hw/ppc/spapr.h |   6 +-
 target/ppc/kvm.c   |   4 +-
 4 files changed, 253 insertions(+), 3 deletions(-)

diff --git a/hw/ppc/spapr_events.c b/hw/ppc/spapr_events.c
index 0ce96b86be..db44e09154 100644
--- a/hw/ppc/spapr_events.c
+++ b/hw/ppc/spapr_events.c
@@ -214,6 +214,104 @@ struct hp_extended_log {
 struct rtas_event_log_v6_hp hp;
 } QEMU_PACKED;
 
+struct rtas_event_log_v6_mc {
+#define RTAS_LOG_V6_SECTION_ID_MC   0x4D43 /* MC */
+struct rtas_event_log_v6_section_header hdr;
+uint32_t fru_id;
+uint32_t proc_id;
+uint8_t error_type;
+#define RTAS_LOG_V6_MC_TYPE_UE   0
+#define RTAS_LOG_V6_MC_TYPE_SLB  1
+#define RTAS_LOG_V6_MC_TYPE_ERAT 2
+#define RTAS_LOG_V6_MC_TYPE_TLB  4
+#define RTAS_LOG_V6_MC_TYPE_D_CACHE  5
+#define RTAS_LOG_V6_MC_TYPE_I_CACHE  7
+uint8_t sub_err_type;
+#define RTAS_LOG_V6_MC_UE_INDETERMINATE  0
+#define RTAS_LOG_V6_MC_UE_IFETCH 1
+#define RTAS_LOG_V6_MC_UE_PAGE_TABLE_WALK_IFETCH 2
+#define RTAS_LOG_V6_MC_UE_LOAD_STORE 3
+#define RTAS_LOG_V6_MC_UE_PAGE_TABLE_WALK_LOAD_STORE 4
+#define RTAS_LOG_V6_MC_SLB_PARITY0
+#define RTAS_LOG_V6_MC_SLB_MULTIHIT  1
+#define RTAS_LOG_V6_MC_SLB_INDETERMINATE 2
+#define RTAS_LOG_V6_MC_ERAT_PARITY   1
+#define RTAS_LOG_V6_MC_ERAT_MULTIHIT 2
+#define RTAS_LOG_V6_MC_ERAT_INDETERMINATE3
+#define RTAS_LOG_V6_MC_TLB_PARITY1
+#define RTAS_LOG_V6_MC_TLB_MULTIHIT  2
+#define RTAS_LOG_V6_MC_TLB_INDETERMINATE 3
+uint8_t reserved_1[6];
+uint64_t effective_address;
+uint64_t logical_address;
+} QEMU_PACKED;
+
+struct mc_extended_log {
+struct rtas_event_log_v6 v6hdr;
+struct rtas_event_log_v6_mc mc;
+} QEMU_PACKED;
+
+struct MC_ierror_table {
+unsigned long srr1_mask;
+unsigned long srr1_value;
+bool nip_valid; /* nip is a valid indicator of faulting address */
+uint8_t error_type;
+uint8_t error_subtype;
+unsigned int initiator;
+unsigned int severity;
+};
+
+static const struct MC_ierror_table mc_ierror_table[] = {
+{ 0x081c, 0x0004, true,
+  RTAS_LOG_V6_MC_TYPE_UE, RTAS_LOG_V6_MC_UE_IFETCH,
+  RTAS_LOG_INITIATOR_CPU, RTAS_LOG_SEVERITY_ERROR_SYNC, },
+{ 0x081c, 0x0008, true,
+  RTAS_LOG_V6_MC_TYPE_SLB, RTAS_LOG_V6_MC_SLB_PARITY,
+  RTAS_LOG_INITIATOR_CPU, RTAS_LOG_SEVERITY_ERROR_SYNC, },
+{ 0x081c, 0x000c, true,
+  RTAS_LOG_V6_MC_TYPE_SLB, RTAS_LOG_V6_MC_SLB_MULTIHIT,
+  RTAS_LOG_INITIATOR_CPU, RTAS_LOG_SEVERITY_ERROR_SYNC, },
+{ 0x081c, 0x0010, true,
+  RTAS_LOG_V6_MC_TYPE_ERAT, RTAS_LOG_V6_MC_ERAT_MULTIHIT,
+  RTAS_LOG_INITIATOR_CPU, RTAS_LOG_SEVERITY_ERROR_SYNC, },
+{ 0x081c, 0x0014, true,
+  RTAS_LOG_V6_MC_TYPE_TLB, RTAS_LOG_V6_MC_TLB_MULTIHIT,
+  RTAS_LOG_INITIATOR_CPU, RTAS_LOG_SEVERITY_ERROR_SYNC, },
+{ 0x081c, 0x0018, true,
+  RTAS_LOG_V6_MC_TYPE_UE, RTAS_LOG_V6_MC_UE_PAGE_TABLE_WALK_IFETCH,
+  RTAS_LOG_INITIATOR_CPU, RTAS_LOG_SEVERITY_ERROR_SYNC, } };
+
+struct MC_derror_table {
+unsigned long dsisr_value;
+bool dar_valid; /* dar is a valid indicator of faulting address */
+uint8_t error_type;
+uint8_t error_subtype;
+unsigned int initiator;
+unsigned int severity;
+};
+
+static const struct MC_derror_table mc_derror_table[] = {
+{ 0x8000, false,
+  RTAS_LOG_V6_MC_TYPE_UE, RTAS_LOG_V6_MC_UE_LOAD_STORE,
+  RTAS_LOG_INITIATOR_CPU, RTAS_LOG_SEVERITY_ERROR_SYNC, },
+{ 0x4000, true,
+  RTAS_LOG_V6_MC_TYPE_UE, RTAS_LOG_V6_MC_UE_PAGE_TABLE_WALK_LOAD_STORE,
+  RTAS_LOG_INITIATOR_CPU, RTAS_LOG_SEVERITY_ERROR_SYNC, },
+{ 0x0800, true,
+  RTAS_LOG_V6_MC_TYPE_ERAT, RTAS_LOG_V6_MC_ERAT_MULTIHIT,
+  RTAS_LOG_INITIATOR_CPU

[PATCH v16 5/7] ppc: spapr: Handle "ibm, nmi-register" and "ibm, nmi-interlock" RTAS calls

2019-10-10 Thread Ganesh Goudar
From: Aravinda Prasad 

This patch adds support in QEMU to handle "ibm,nmi-register"
and "ibm,nmi-interlock" RTAS calls.

The machine check notification address is saved when the
OS issues "ibm,nmi-register" RTAS call.

This patch also handles the case when multiple processors
experience machine check at or about the same time by
handling "ibm,nmi-interlock" call. In such cases, as per
PAPR, subsequent processors serialize waiting for the first
processor to issue the "ibm,nmi-interlock" call. The second
processor that also received a machine check error waits
till the first processor is done reading the error log.
The first processor issues "ibm,nmi-interlock" call
when the error log is consumed.

[Move fwnmi registeration to .apply hook]
Signed-off-by: Ganesh Goudar 
Signed-off-by: Aravinda Prasad 
---
 hw/ppc/spapr_caps.c|  9 +--
 hw/ppc/spapr_rtas.c| 57 ++
 include/hw/ppc/spapr.h |  5 +++-
 3 files changed, 68 insertions(+), 3 deletions(-)

diff --git a/hw/ppc/spapr_caps.c b/hw/ppc/spapr_caps.c
index 778bf32181..1675ebd45e 100644
--- a/hw/ppc/spapr_caps.c
+++ b/hw/ppc/spapr_caps.c
@@ -509,9 +509,14 @@ static void cap_fwnmi_mce_apply(SpaprMachineState *spapr, 
uint8_t val,
  * of software injected faults like duplicate SLBs).
  */
 warn_report("Firmware Assisted Non-Maskable Interrupts not supported 
in TCG");
-} else if (kvm_enabled() && kvmppc_set_fwnmi()) {
-error_setg(errp,
+} else if (kvm_enabled()) {
+if (!kvmppc_set_fwnmi()) {
+/* Register ibm,nmi-register and ibm,nmi-interlock RTAS calls */
+spapr_fwnmi_register();
+} else {
+error_setg(errp,
 "Firmware Assisted Non-Maskable Interrupts not supported by KVM, try 
cap-fwnmi-mce=off");
+}
 }
 }
 
diff --git a/hw/ppc/spapr_rtas.c b/hw/ppc/spapr_rtas.c
index 2c066a372d..0328b1f341 100644
--- a/hw/ppc/spapr_rtas.c
+++ b/hw/ppc/spapr_rtas.c
@@ -400,6 +400,55 @@ static void rtas_get_power_level(PowerPCCPU *cpu, 
SpaprMachineState *spapr,
 rtas_st(rets, 1, 100);
 }
 
+static void rtas_ibm_nmi_register(PowerPCCPU *cpu,
+  SpaprMachineState *spapr,
+  uint32_t token, uint32_t nargs,
+  target_ulong args,
+  uint32_t nret, target_ulong rets)
+{
+hwaddr rtas_addr = spapr_get_rtas_addr();
+
+if (!rtas_addr) {
+rtas_st(rets, 0, RTAS_OUT_NOT_SUPPORTED);
+return;
+}
+
+if (spapr_get_cap(spapr, SPAPR_CAP_FWNMI_MCE) == SPAPR_CAP_OFF) {
+rtas_st(rets, 0, RTAS_OUT_NOT_SUPPORTED);
+return;
+}
+
+spapr->guest_machine_check_addr = rtas_ld(args, 1);
+rtas_st(rets, 0, RTAS_OUT_SUCCESS);
+}
+
+static void rtas_ibm_nmi_interlock(PowerPCCPU *cpu,
+   SpaprMachineState *spapr,
+   uint32_t token, uint32_t nargs,
+   target_ulong args,
+   uint32_t nret, target_ulong rets)
+{
+if (spapr->guest_machine_check_addr == -1) {
+/* NMI register not called */
+rtas_st(rets, 0, RTAS_OUT_PARAM_ERROR);
+return;
+}
+
+if (spapr->mc_status != cpu->vcpu_id) {
+/* The vCPU that hit the NMI should invoke "ibm,nmi-interlock" */
+rtas_st(rets, 0, RTAS_OUT_PARAM_ERROR);
+return;
+}
+
+/*
+ * vCPU issuing "ibm,nmi-interlock" is done with NMI handling,
+ * hence unset mc_status.
+ */
+spapr->mc_status = -1;
+qemu_cond_signal(>mc_delivery_cond);
+rtas_st(rets, 0, RTAS_OUT_SUCCESS);
+}
+
 static struct rtas_call {
 const char *name;
 spapr_rtas_fn fn;
@@ -503,6 +552,14 @@ hwaddr spapr_get_rtas_addr(void)
 return (hwaddr)fdt32_to_cpu(*rtas_data);
 }
 
+void spapr_fwnmi_register(void)
+{
+spapr_rtas_register(RTAS_IBM_NMI_REGISTER, "ibm,nmi-register",
+rtas_ibm_nmi_register);
+spapr_rtas_register(RTAS_IBM_NMI_INTERLOCK, "ibm,nmi-interlock",
+rtas_ibm_nmi_interlock);
+}
+
 static void core_rtas_register_types(void)
 {
 spapr_rtas_register(RTAS_DISPLAY_CHARACTER, "display-character",
diff --git a/include/hw/ppc/spapr.h b/include/hw/ppc/spapr.h
index 8049f06d1c..fa280c956e 100644
--- a/include/hw/ppc/spapr.h
+++ b/include/hw/ppc/spapr.h
@@ -653,8 +653,10 @@ target_ulong spapr_hypercall(PowerPCCPU *cpu, target_ulong 
opcode,
 #define RTAS_IBM_REMOVE_PE_DMA_WINDOW   (RTAS_TOKEN_BASE + 0x28)
 #define RTAS_IBM_RESET_PE_DMA_WINDOW(RTAS_TOKEN_BASE + 0x29)
 #define RTAS_IBM_SUSPEND_ME (RTAS_TOKEN_BASE + 0x2A)
+#define RTAS_IBM_NMI_REGISTER   (RTAS_TOKEN_BASE + 0x2B)
+#

[PATCH v16 0/7] target-ppc/spapr: Add FWNMI support in QEMU for PowerKVM guests

2019-10-10 Thread Ganesh Goudar
This patch set adds support for FWNMI in PowerKVM guests.

System errors such as SLB multihit and memory errors
that cannot be corrected by hardware is passed on to
the kernel for handling by raising machine check
exception (an NMI). Upon such machine check exceptions,
if the address in error belongs to guest then KVM
invokes guests' 0x200 interrupt vector if the guest
is not FWNMI capable. For FWNMI capable guest
KVM passes the control to QEMU by exiting the guest.

This patch series adds functionality to QEMU to pass
on such machine check exceptions to the FWNMI capable
guest kernel by building an error log and invoking
the guest registered machine check handling routine.

The KVM changes are now part of the upstream kernel
(commit e20bbd3d). This series contain QEMU changes.

Change Log v16:
  - Fixed coding style problems

Change Log v15:
  - Removed cap_ppc_fwnmi
  - Moved fwnmi registeration to .apply hook
  - Assume SLOF has allocated enough room for rtas error log
  - Using ARRAY_SIZE to end the loop
  - Do not set FWNMI cap in post_load, now its done in .apply hook

Change Log v14:
  - Feature activation moved to a separate patch
  - Fixed issues with migration blocker

Change Log v13:
  - Minor fixes (mostly nits)
  - Moved FWNMI guest registration check from patch 4 to 3.

Change Log v12:
  - Rebased to latest ppc-for-4.2 (SHA b1e8156743)

Change Log v11:
  - Moved FWNMI SPAPR cap defaults to 4.2 class option
  - Fixed issues with handling fwnmi KVM capability

Change Log v10:
  - Reshuffled the patch sequence + minor fixes

Change Log v9:
  - Fixed kvm cap and spapr cap issues

Change Log v8:
  - Added functionality to check FWNMI capability during
VM migration

---

Aravinda Prasad (7):
  Wrapper function to wait on condition for the main loop mutex
  ppc: spapr: Introduce FWNMI capability
  target/ppc: Handle NMI guest exit
  target/ppc: Build rtas error log upon an MCE
  ppc: spapr: Handle "ibm,nmi-register" and "ibm,nmi-interlock" RTAS
calls
  migration: Include migration support for machine check handling
  ppc: spapr: Activate the FWNMI functionality

 cpus.c   |   5 +
 hw/ppc/spapr.c   |  51 
 hw/ppc/spapr_caps.c  |  34 +
 hw/ppc/spapr_events.c| 269 +++
 hw/ppc/spapr_rtas.c  |  85 +
 include/hw/ppc/spapr.h   |  25 +++-
 include/qemu/main-loop.h |   8 ++
 target/ppc/kvm.c |  24 
 target/ppc/kvm_ppc.h |   8 ++
 target/ppc/trace-events  |   1 +
 10 files changed, 508 insertions(+), 2 deletions(-)

-- 
2.17.2




[PATCH v16 1/7] Wrapper function to wait on condition for the main loop mutex

2019-10-10 Thread Ganesh Goudar
From: Aravinda Prasad 

Introduce a wrapper function to wait on condition for
the main loop mutex. This function atomically releases
the main loop mutex and causes the calling thread to
block on the condition. This wrapper is required because
qemu_global_mutex is a static variable.

Signed-off-by: Aravinda Prasad 
Reviewed-by: David Gibson 
Reviewed-by: Greg Kurz 
---
 cpus.c   | 5 +
 include/qemu/main-loop.h | 8 
 2 files changed, 13 insertions(+)

diff --git a/cpus.c b/cpus.c
index d2c61ff155..f84b54943d 100644
--- a/cpus.c
+++ b/cpus.c
@@ -1886,6 +1886,11 @@ void qemu_mutex_unlock_iothread(void)
 qemu_mutex_unlock(_global_mutex);
 }
 
+void qemu_cond_wait_iothread(QemuCond *cond)
+{
+qemu_cond_wait(cond, _global_mutex);
+}
+
 static bool all_vcpus_paused(void)
 {
 CPUState *cpu;
diff --git a/include/qemu/main-loop.h b/include/qemu/main-loop.h
index f6ba78ea73..a6d20b0719 100644
--- a/include/qemu/main-loop.h
+++ b/include/qemu/main-loop.h
@@ -295,6 +295,14 @@ void qemu_mutex_lock_iothread_impl(const char *file, int 
line);
  */
 void qemu_mutex_unlock_iothread(void);
 
+/*
+ * qemu_cond_wait_iothread: Wait on condition for the main loop mutex
+ *
+ * This function atomically releases the main loop mutex and causes
+ * the calling thread to block on the condition.
+ */
+void qemu_cond_wait_iothread(QemuCond *cond);
+
 /* internal interfaces */
 
 void qemu_fd_register(int fd);
-- 
2.17.2




[PATCH v15 7/7] ppc: spapr: Activate the FWNMI functionality

2019-10-09 Thread Ganesh Goudar
From: Aravinda Prasad 

This patch sets the default value of SPAPR_CAP_FWNMI_MCE
to SPAPR_CAP_ON for machine type 4.2.

Signed-off-by: Aravinda Prasad 
---
 hw/ppc/spapr.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c
index eb267d4c43..a1ed36db77 100644
--- a/hw/ppc/spapr.c
+++ b/hw/ppc/spapr.c
@@ -4537,7 +4537,7 @@ static void spapr_machine_class_init(ObjectClass *oc, 
void *data)
 smc->default_caps.caps[SPAPR_CAP_NESTED_KVM_HV] = SPAPR_CAP_OFF;
 smc->default_caps.caps[SPAPR_CAP_LARGE_DECREMENTER] = SPAPR_CAP_ON;
 smc->default_caps.caps[SPAPR_CAP_CCF_ASSIST] = SPAPR_CAP_OFF;
-smc->default_caps.caps[SPAPR_CAP_FWNMI_MCE] = SPAPR_CAP_OFF;
+smc->default_caps.caps[SPAPR_CAP_FWNMI_MCE] = SPAPR_CAP_ON;
 spapr_caps_add_properties(smc, _abort);
 smc->irq = _irq_dual;
 smc->dr_phb_enabled = true;
@@ -4611,6 +4611,7 @@ static void spapr_machine_4_1_class_options(MachineClass 
*mc)
 smc->linux_pci_probe = false;
 compat_props_add(mc->compat_props, hw_compat_4_1, hw_compat_4_1_len);
 compat_props_add(mc->compat_props, compat, G_N_ELEMENTS(compat));
+smc->default_caps.caps[SPAPR_CAP_FWNMI_MCE] = SPAPR_CAP_OFF;
 }
 
 DEFINE_SPAPR_MACHINE(4_1, "4.1", false);
-- 
2.17.2




[PATCH v15 1/7] Wrapper function to wait on condition for the main loop mutex

2019-10-09 Thread Ganesh Goudar
From: Aravinda Prasad 

Introduce a wrapper function to wait on condition for
the main loop mutex. This function atomically releases
the main loop mutex and causes the calling thread to
block on the condition. This wrapper is required because
qemu_global_mutex is a static variable.

Signed-off-by: Aravinda Prasad 
Reviewed-by: David Gibson 
Reviewed-by: Greg Kurz 
---
 cpus.c   | 5 +
 include/qemu/main-loop.h | 8 
 2 files changed, 13 insertions(+)

diff --git a/cpus.c b/cpus.c
index d2c61ff155..f84b54943d 100644
--- a/cpus.c
+++ b/cpus.c
@@ -1886,6 +1886,11 @@ void qemu_mutex_unlock_iothread(void)
 qemu_mutex_unlock(_global_mutex);
 }
 
+void qemu_cond_wait_iothread(QemuCond *cond)
+{
+qemu_cond_wait(cond, _global_mutex);
+}
+
 static bool all_vcpus_paused(void)
 {
 CPUState *cpu;
diff --git a/include/qemu/main-loop.h b/include/qemu/main-loop.h
index f6ba78ea73..a6d20b0719 100644
--- a/include/qemu/main-loop.h
+++ b/include/qemu/main-loop.h
@@ -295,6 +295,14 @@ void qemu_mutex_lock_iothread_impl(const char *file, int 
line);
  */
 void qemu_mutex_unlock_iothread(void);
 
+/*
+ * qemu_cond_wait_iothread: Wait on condition for the main loop mutex
+ *
+ * This function atomically releases the main loop mutex and causes
+ * the calling thread to block on the condition.
+ */
+void qemu_cond_wait_iothread(QemuCond *cond);
+
 /* internal interfaces */
 
 void qemu_fd_register(int fd);
-- 
2.17.2




[PATCH v15 0/7] target-ppc/spapr: Add FWNMI support in QEMU for PowerKVM guests

2019-10-09 Thread Ganesh Goudar
This patch set adds support for FWNMI in PowerKVM guests.

System errors such as SLB multihit and memory errors
that cannot be corrected by hardware is passed on to
the kernel for handling by raising machine check
exception (an NMI). Upon such machine check exceptions,
if the address in error belongs to guest then KVM
invokes guests' 0x200 interrupt vector if the guest
is not FWNMI capable. For FWNMI capable guest
KVM passes the control to QEMU by exiting the guest.

This patch series adds functionality to QEMU to pass
on such machine check exceptions to the FWNMI capable
guest kernel by building an error log and invoking
the guest registered machine check handling routine.

The KVM changes are now part of the upstream kernel
(commit e20bbd3d). This series contain QEMU changes.

Change Log v15:
  - Removed cap_ppc_fwnmi
  - Moved fwnmi registeration to .apply hook
  - Assume SLOF has allocated enough room for rtas error log
  - Using ARRAY_SIZE to end the loop
  - Do not set FWNMI cap in post_load, now its done in .apply hook

Change Log v14:
  - Feature activation moved to a separate patch
  - Fixed issues with migration blocker

Change Log v13:
  - Minor fixes (mostly nits)
  - Moved FWNMI guest registration check from patch 4 to 3.

Change Log v12:
  - Rebased to latest ppc-for-4.2 (SHA b1e8156743)

Change Log v11:
  - Moved FWNMI SPAPR cap defaults to 4.2 class option
  - Fixed issues with handling fwnmi KVM capability

Change Log v10:
  - Reshuffled the patch sequence + minor fixes

Change Log v9:
  - Fixed kvm cap and spapr cap issues

Change Log v8:
  - Added functionality to check FWNMI capability during
VM migration

---

Aravinda Prasad (7):
  Wrapper function to wait on condition for the main loop mutex
  ppc: spapr: Introduce FWNMI capability
  target/ppc: Handle NMI guest exit
  target/ppc: Build rtas error log upon an MCE
  ppc: spapr: Handle "ibm,nmi-register" and "ibm,nmi-interlock" RTAS
calls
  migration: Include migration support for machine check handling
  ppc: spapr: Activate the FWNMI functionality

 cpus.c   |   5 +
 hw/ppc/spapr.c   |  51 
 hw/ppc/spapr_caps.c  |  34 +
 hw/ppc/spapr_events.c| 269 +++
 hw/ppc/spapr_rtas.c  |  85 +
 include/hw/ppc/spapr.h   |  25 +++-
 include/qemu/main-loop.h |   8 ++
 target/ppc/kvm.c |  24 
 target/ppc/kvm_ppc.h |   8 ++
 target/ppc/trace-events  |   1 +
 10 files changed, 508 insertions(+), 2 deletions(-)

-- 
2.17.2




[PATCH v15 5/7] ppc: spapr: Handle "ibm, nmi-register" and "ibm, nmi-interlock" RTAS calls

2019-10-09 Thread Ganesh Goudar
From: Aravinda Prasad 

This patch adds support in QEMU to handle "ibm,nmi-register"
and "ibm,nmi-interlock" RTAS calls.

The machine check notification address is saved when the
OS issues "ibm,nmi-register" RTAS call.

This patch also handles the case when multiple processors
experience machine check at or about the same time by
handling "ibm,nmi-interlock" call. In such cases, as per
PAPR, subsequent processors serialize waiting for the first
processor to issue the "ibm,nmi-interlock" call. The second
processor that also received a machine check error waits
till the first processor is done reading the error log.
The first processor issues "ibm,nmi-interlock" call
when the error log is consumed.

[Move fwnmi registeration to .apply hook]
Signed-off-by: Ganesh Goudar 
Signed-off-by: Aravinda Prasad 
---
 hw/ppc/spapr_caps.c|  9 +--
 hw/ppc/spapr_rtas.c| 57 ++
 include/hw/ppc/spapr.h |  5 +++-
 3 files changed, 68 insertions(+), 3 deletions(-)

diff --git a/hw/ppc/spapr_caps.c b/hw/ppc/spapr_caps.c
index 778bf32181..b8876f2c2e 100644
--- a/hw/ppc/spapr_caps.c
+++ b/hw/ppc/spapr_caps.c
@@ -509,9 +509,14 @@ static void cap_fwnmi_mce_apply(SpaprMachineState *spapr, 
uint8_t val,
  * of software injected faults like duplicate SLBs).
  */
 warn_report("Firmware Assisted Non-Maskable Interrupts not supported 
in TCG");
-} else if (kvm_enabled() && kvmppc_set_fwnmi()) {
-error_setg(errp,
+} else if (kvm_enabled()) {
+   if (!kvmppc_set_fwnmi()) {
+   /* Register ibm,nmi-register and ibm,nmi-interlock RTAS calls */
+   spapr_fwnmi_register();
+   } else {
+   error_setg(errp,
 "Firmware Assisted Non-Maskable Interrupts not supported by KVM, try 
cap-fwnmi-mce=off");
+   }
 }
 }
 
diff --git a/hw/ppc/spapr_rtas.c b/hw/ppc/spapr_rtas.c
index d8fb8a8443..b56953841a 100644
--- a/hw/ppc/spapr_rtas.c
+++ b/hw/ppc/spapr_rtas.c
@@ -400,6 +400,55 @@ static void rtas_get_power_level(PowerPCCPU *cpu, 
SpaprMachineState *spapr,
 rtas_st(rets, 1, 100);
 }
 
+static void rtas_ibm_nmi_register(PowerPCCPU *cpu,
+  SpaprMachineState *spapr,
+  uint32_t token, uint32_t nargs,
+  target_ulong args,
+  uint32_t nret, target_ulong rets)
+{
+hwaddr rtas_addr = spapr_get_rtas_addr();
+
+if (!rtas_addr) {
+rtas_st(rets, 0, RTAS_OUT_NOT_SUPPORTED);
+return;
+}
+
+if (spapr_get_cap(spapr, SPAPR_CAP_FWNMI_MCE) == SPAPR_CAP_OFF) {
+rtas_st(rets, 0, RTAS_OUT_NOT_SUPPORTED);
+return;
+}
+
+spapr->guest_machine_check_addr = rtas_ld(args, 1);
+rtas_st(rets, 0, RTAS_OUT_SUCCESS);
+}
+
+static void rtas_ibm_nmi_interlock(PowerPCCPU *cpu,
+   SpaprMachineState *spapr,
+   uint32_t token, uint32_t nargs,
+   target_ulong args,
+   uint32_t nret, target_ulong rets)
+{
+if (spapr->guest_machine_check_addr == -1) {
+/* NMI register not called */
+rtas_st(rets, 0, RTAS_OUT_PARAM_ERROR);
+return;
+}
+
+if (spapr->mc_status != cpu->vcpu_id) {
+/* The vCPU that hit the NMI should invoke "ibm,nmi-interlock" */
+rtas_st(rets, 0, RTAS_OUT_PARAM_ERROR);
+return;
+}
+
+/*
+ * vCPU issuing "ibm,nmi-interlock" is done with NMI handling,
+ * hence unset mc_status.
+ */
+spapr->mc_status = -1;
+qemu_cond_signal(>mc_delivery_cond);
+rtas_st(rets, 0, RTAS_OUT_SUCCESS);
+}
+
 static struct rtas_call {
 const char *name;
 spapr_rtas_fn fn;
@@ -544,6 +593,14 @@ hwaddr spapr_get_rtas_addr(void)
 return (hwaddr)fdt32_to_cpu(*rtas_data);
 }
 
+void spapr_fwnmi_register(void)
+{
+spapr_rtas_register(RTAS_IBM_NMI_REGISTER, "ibm,nmi-register",
+rtas_ibm_nmi_register);
+spapr_rtas_register(RTAS_IBM_NMI_INTERLOCK, "ibm,nmi-interlock",
+rtas_ibm_nmi_interlock);
+}
+
 static void core_rtas_register_types(void)
 {
 spapr_rtas_register(RTAS_DISPLAY_CHARACTER, "display-character",
diff --git a/include/hw/ppc/spapr.h b/include/hw/ppc/spapr.h
index ffefde77d0..dada821d21 100644
--- a/include/hw/ppc/spapr.h
+++ b/include/hw/ppc/spapr.h
@@ -655,8 +655,10 @@ target_ulong spapr_hypercall(PowerPCCPU *cpu, target_ulong 
opcode,
 #define RTAS_IBM_REMOVE_PE_DMA_WINDOW   (RTAS_TOKEN_BASE + 0x28)
 #define RTAS_IBM_RESET_PE_DMA_WINDOW(RTAS_TOKEN_BASE + 0x29)
 #define RTAS_IBM_SUSPEND_ME (RTAS_TOKEN_BASE + 0x2A)
+#define RTAS_IBM_NMI_REGISTER   (RTAS_TOKEN_BASE + 0x2B)
+#

[PATCH v15 4/7] target/ppc: Build rtas error log upon an MCE

2019-10-09 Thread Ganesh Goudar
From: Aravinda Prasad 

Upon a machine check exception (MCE) in a guest address space,
KVM causes a guest exit to enable QEMU to build and pass the
error to the guest in the PAPR defined rtas error log format.

This patch builds the rtas error log, copies it to the rtas_addr
and then invokes the guest registered machine check handler. The
handler in the guest takes suitable action(s) depending on the type
and criticality of the error. For example, if an error is
unrecoverable memory corruption in an application inside the
guest, then the guest kernel sends a SIGBUS to the application.
For recoverable errors, the guest performs recovery actions and
logs the error.

[Assume SLOF has allocated enough room for rtas error log]
Signed-off-by: Ganesh Goudar 
Signed-off-by: Aravinda Prasad 
---
 hw/ppc/spapr_events.c  | 220 -
 hw/ppc/spapr_rtas.c|  26 +
 include/hw/ppc/spapr.h |   6 +-
 target/ppc/kvm.c   |   4 +-
 4 files changed, 253 insertions(+), 3 deletions(-)

diff --git a/hw/ppc/spapr_events.c b/hw/ppc/spapr_events.c
index 0ce96b86be..5624bdac6c 100644
--- a/hw/ppc/spapr_events.c
+++ b/hw/ppc/spapr_events.c
@@ -214,6 +214,104 @@ struct hp_extended_log {
 struct rtas_event_log_v6_hp hp;
 } QEMU_PACKED;
 
+struct rtas_event_log_v6_mc {
+#define RTAS_LOG_V6_SECTION_ID_MC   0x4D43 /* MC */
+struct rtas_event_log_v6_section_header hdr;
+uint32_t fru_id;
+uint32_t proc_id;
+uint8_t error_type;
+#define RTAS_LOG_V6_MC_TYPE_UE   0
+#define RTAS_LOG_V6_MC_TYPE_SLB  1
+#define RTAS_LOG_V6_MC_TYPE_ERAT 2
+#define RTAS_LOG_V6_MC_TYPE_TLB  4
+#define RTAS_LOG_V6_MC_TYPE_D_CACHE  5
+#define RTAS_LOG_V6_MC_TYPE_I_CACHE  7
+uint8_t sub_err_type;
+#define RTAS_LOG_V6_MC_UE_INDETERMINATE  0
+#define RTAS_LOG_V6_MC_UE_IFETCH 1
+#define RTAS_LOG_V6_MC_UE_PAGE_TABLE_WALK_IFETCH 2
+#define RTAS_LOG_V6_MC_UE_LOAD_STORE 3
+#define RTAS_LOG_V6_MC_UE_PAGE_TABLE_WALK_LOAD_STORE 4
+#define RTAS_LOG_V6_MC_SLB_PARITY0
+#define RTAS_LOG_V6_MC_SLB_MULTIHIT  1
+#define RTAS_LOG_V6_MC_SLB_INDETERMINATE 2
+#define RTAS_LOG_V6_MC_ERAT_PARITY   1
+#define RTAS_LOG_V6_MC_ERAT_MULTIHIT 2
+#define RTAS_LOG_V6_MC_ERAT_INDETERMINATE3
+#define RTAS_LOG_V6_MC_TLB_PARITY1
+#define RTAS_LOG_V6_MC_TLB_MULTIHIT  2
+#define RTAS_LOG_V6_MC_TLB_INDETERMINATE 3
+uint8_t reserved_1[6];
+uint64_t effective_address;
+uint64_t logical_address;
+} QEMU_PACKED;
+
+struct mc_extended_log {
+struct rtas_event_log_v6 v6hdr;
+struct rtas_event_log_v6_mc mc;
+} QEMU_PACKED;
+
+struct MC_ierror_table {
+unsigned long srr1_mask;
+unsigned long srr1_value;
+bool nip_valid; /* nip is a valid indicator of faulting address */
+uint8_t error_type;
+uint8_t error_subtype;
+unsigned int initiator;
+unsigned int severity;
+};
+
+static const struct MC_ierror_table mc_ierror_table[] = {
+{ 0x081c, 0x0004, true,
+  RTAS_LOG_V6_MC_TYPE_UE, RTAS_LOG_V6_MC_UE_IFETCH,
+  RTAS_LOG_INITIATOR_CPU, RTAS_LOG_SEVERITY_ERROR_SYNC, },
+{ 0x081c, 0x0008, true,
+  RTAS_LOG_V6_MC_TYPE_SLB, RTAS_LOG_V6_MC_SLB_PARITY,
+  RTAS_LOG_INITIATOR_CPU, RTAS_LOG_SEVERITY_ERROR_SYNC, },
+{ 0x081c, 0x000c, true,
+  RTAS_LOG_V6_MC_TYPE_SLB, RTAS_LOG_V6_MC_SLB_MULTIHIT,
+  RTAS_LOG_INITIATOR_CPU, RTAS_LOG_SEVERITY_ERROR_SYNC, },
+{ 0x081c, 0x0010, true,
+  RTAS_LOG_V6_MC_TYPE_ERAT, RTAS_LOG_V6_MC_ERAT_MULTIHIT,
+  RTAS_LOG_INITIATOR_CPU, RTAS_LOG_SEVERITY_ERROR_SYNC, },
+{ 0x081c, 0x0014, true,
+  RTAS_LOG_V6_MC_TYPE_TLB, RTAS_LOG_V6_MC_TLB_MULTIHIT,
+  RTAS_LOG_INITIATOR_CPU, RTAS_LOG_SEVERITY_ERROR_SYNC, },
+{ 0x081c, 0x0018, true,
+  RTAS_LOG_V6_MC_TYPE_UE, RTAS_LOG_V6_MC_UE_PAGE_TABLE_WALK_IFETCH,
+  RTAS_LOG_INITIATOR_CPU, RTAS_LOG_SEVERITY_ERROR_SYNC, }};
+
+struct MC_derror_table {
+unsigned long dsisr_value;
+bool dar_valid; /* dar is a valid indicator of faulting address */
+uint8_t error_type;
+uint8_t error_subtype;
+unsigned int initiator;
+unsigned int severity;
+};
+
+static const struct MC_derror_table mc_derror_table[] = {
+{ 0x8000, false,
+  RTAS_LOG_V6_MC_TYPE_UE, RTAS_LOG_V6_MC_UE_LOAD_STORE,
+  RTAS_LOG_INITIATOR_CPU, RTAS_LOG_SEVERITY_ERROR_SYNC, },
+{ 0x4000, true,
+  RTAS_LOG_V6_MC_TYPE_UE, RTAS_LOG_V6_MC_UE_PAGE_TABLE_WALK_LOAD_STORE,
+  RTAS_LOG_INITIATOR_CPU, RTAS_LOG_SEVERITY_ERROR_SYNC, },
+{ 0x0800, true,
+  RTAS_LOG_V6_MC_TYPE_ERAT, RTAS_LOG_V6_MC_ERAT_MULTIHIT,
+  RTAS_LOG_INITIATOR_CPU

[PATCH v15 2/7] ppc: spapr: Introduce FWNMI capability

2019-10-09 Thread Ganesh Goudar
From: Aravinda Prasad 

Introduce the KVM capability KVM_CAP_PPC_FWNMI so that
the KVM causes guest exit with NMI as exit reason
when it encounters a machine check exception on the
address belonging to a guest. Without this capability
enabled, KVM redirects machine check exceptions to
guest's 0x200 vector.

This patch also introduces fwnmi-mce capability to
deal with the case when a guest with the
KVM_CAP_PPC_FWNMI capability enabled is attempted
to migrate to a host that does not support this
capability.

[eliminate cap_ppc_fwnmi]
Signed-off-by: Ganesh Goudar 
Signed-off-by: Aravinda Prasad 
---
 hw/ppc/spapr.c |  1 +
 hw/ppc/spapr_caps.c| 29 +
 include/hw/ppc/spapr.h |  4 +++-
 target/ppc/kvm.c   |  8 
 target/ppc/kvm_ppc.h   |  6 ++
 5 files changed, 47 insertions(+), 1 deletion(-)

diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c
index 08a2a5a770..9b5f65c655 100644
--- a/hw/ppc/spapr.c
+++ b/hw/ppc/spapr.c
@@ -4488,6 +4488,7 @@ static void spapr_machine_class_init(ObjectClass *oc, 
void *data)
 smc->default_caps.caps[SPAPR_CAP_NESTED_KVM_HV] = SPAPR_CAP_OFF;
 smc->default_caps.caps[SPAPR_CAP_LARGE_DECREMENTER] = SPAPR_CAP_ON;
 smc->default_caps.caps[SPAPR_CAP_CCF_ASSIST] = SPAPR_CAP_OFF;
+smc->default_caps.caps[SPAPR_CAP_FWNMI_MCE] = SPAPR_CAP_OFF;
 spapr_caps_add_properties(smc, _abort);
 smc->irq = _irq_dual;
 smc->dr_phb_enabled = true;
diff --git a/hw/ppc/spapr_caps.c b/hw/ppc/spapr_caps.c
index 481dfd2a27..778bf32181 100644
--- a/hw/ppc/spapr_caps.c
+++ b/hw/ppc/spapr_caps.c
@@ -496,6 +496,25 @@ static void cap_ccf_assist_apply(SpaprMachineState *spapr, 
uint8_t val,
 }
 }
 
+static void cap_fwnmi_mce_apply(SpaprMachineState *spapr, uint8_t val,
+Error **errp)
+{
+if (!val) {
+return; /* Disabled by default */
+}
+
+if (tcg_enabled()) {
+/*
+ * TCG support may not be correct in some conditions (e.g., in case
+ * of software injected faults like duplicate SLBs).
+ */
+warn_report("Firmware Assisted Non-Maskable Interrupts not supported 
in TCG");
+} else if (kvm_enabled() && kvmppc_set_fwnmi()) {
+error_setg(errp,
+"Firmware Assisted Non-Maskable Interrupts not supported by KVM, try 
cap-fwnmi-mce=off");
+}
+}
+
 SpaprCapabilityInfo capability_table[SPAPR_CAP_NUM] = {
 [SPAPR_CAP_HTM] = {
 .name = "htm",
@@ -595,6 +614,15 @@ SpaprCapabilityInfo capability_table[SPAPR_CAP_NUM] = {
 .type = "bool",
 .apply = cap_ccf_assist_apply,
 },
+[SPAPR_CAP_FWNMI_MCE] = {
+.name = "fwnmi-mce",
+.description = "Handle fwnmi machine check exceptions",
+.index = SPAPR_CAP_FWNMI_MCE,
+.get = spapr_cap_get_bool,
+.set = spapr_cap_set_bool,
+.type = "bool",
+.apply = cap_fwnmi_mce_apply,
+},
 };
 
 static SpaprCapabilities default_caps_with_cpu(SpaprMachineState *spapr,
@@ -734,6 +762,7 @@ SPAPR_CAP_MIG_STATE(hpt_maxpagesize, 
SPAPR_CAP_HPT_MAXPAGESIZE);
 SPAPR_CAP_MIG_STATE(nested_kvm_hv, SPAPR_CAP_NESTED_KVM_HV);
 SPAPR_CAP_MIG_STATE(large_decr, SPAPR_CAP_LARGE_DECREMENTER);
 SPAPR_CAP_MIG_STATE(ccf_assist, SPAPR_CAP_CCF_ASSIST);
+SPAPR_CAP_MIG_STATE(fwnmi, SPAPR_CAP_FWNMI_MCE);
 
 void spapr_caps_init(SpaprMachineState *spapr)
 {
diff --git a/include/hw/ppc/spapr.h b/include/hw/ppc/spapr.h
index 03111fd55b..66049ac611 100644
--- a/include/hw/ppc/spapr.h
+++ b/include/hw/ppc/spapr.h
@@ -79,8 +79,10 @@ typedef enum {
 #define SPAPR_CAP_LARGE_DECREMENTER 0x08
 /* Count Cache Flush Assist HW Instruction */
 #define SPAPR_CAP_CCF_ASSIST0x09
+/* FWNMI machine check handling */
+#define SPAPR_CAP_FWNMI_MCE 0x0A
 /* Num Caps */
-#define SPAPR_CAP_NUM   (SPAPR_CAP_CCF_ASSIST + 1)
+#define SPAPR_CAP_NUM   (SPAPR_CAP_FWNMI_MCE + 1)
 
 /*
  * Capability Values
diff --git a/target/ppc/kvm.c b/target/ppc/kvm.c
index 8c5b1f25cc..e9cae0ded6 100644
--- a/target/ppc/kvm.c
+++ b/target/ppc/kvm.c
@@ -2055,6 +2055,14 @@ void kvmppc_set_mpic_proxy(PowerPCCPU *cpu, int 
mpic_proxy)
 }
 }
 
+int kvmppc_set_fwnmi(void)
+{
+PowerPCCPU *cpu = POWERPC_CPU(first_cpu);
+CPUState *cs = CPU(cpu);
+
+return kvm_vcpu_enable_cap(cs, KVM_CAP_PPC_FWNMI, 0);
+}
+
 int kvmppc_smt_threads(void)
 {
 return cap_ppc_smt ? cap_ppc_smt : 1;
diff --git a/target/ppc/kvm_ppc.h b/target/ppc/kvm_ppc.h
index 98bd7d5da6..5727a5025f 100644
--- a/target/ppc/kvm_ppc.h
+++ b/target/ppc/kvm_ppc.h
@@ -27,6 +27,7 @@ void kvmppc_enable_h_page_init(void);
 void kvmppc_set_papr(PowerPCCPU *cpu);
 int kvmppc_set_compat(PowerPCCPU *cpu, uint32_t compat_pvr);
 void kvmppc_set_mpic_proxy(PowerPCCPU *cpu, int mpic_proxy);
+int kvmppc_set_fwnmi(void);
 int kvmppc_smt_threads(void);
 void kvmppc_hint_smt_possi

[PATCH v15 6/7] migration: Include migration support for machine check handling

2019-10-09 Thread Ganesh Goudar
From: Aravinda Prasad 

This patch includes migration support for machine check
handling. Especially this patch blocks VM migration
requests until the machine check error handling is
complete as these errors are specific to the source
hardware and is irrelevant on the target hardware.

[Do not set FWNMI cap in post_load, now its done in .apply hook]
Signed-off-by: Ganesh Goudar 
Signed-off-by: Aravinda Prasad 
---
 hw/ppc/spapr.c | 41 +
 hw/ppc/spapr_events.c  | 16 +++-
 hw/ppc/spapr_rtas.c|  2 ++
 include/hw/ppc/spapr.h |  2 ++
 4 files changed, 60 insertions(+), 1 deletion(-)

diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c
index b7b4196545..eb267d4c43 100644
--- a/hw/ppc/spapr.c
+++ b/hw/ppc/spapr.c
@@ -46,6 +46,7 @@
 #include "migration/qemu-file-types.h"
 #include "migration/global_state.h"
 #include "migration/register.h"
+#include "migration/blocker.h"
 #include "mmu-hash64.h"
 #include "mmu-book3s-v3.h"
 #include "cpu-models.h"
@@ -1830,6 +1831,8 @@ static void spapr_machine_reset(MachineState *machine)
 
 /* Signal all vCPUs waiting on this condition */
 qemu_cond_broadcast(>mc_delivery_cond);
+
+migrate_del_blocker(spapr->fwnmi_migration_blocker);
 }
 
 static void spapr_create_nvram(SpaprMachineState *spapr)
@@ -2120,6 +2123,43 @@ static const VMStateDescription vmstate_spapr_dtb = {
 },
 };
 
+static bool spapr_fwnmi_needed(void *opaque)
+{
+SpaprMachineState *spapr = (SpaprMachineState *)opaque;
+
+return spapr->guest_machine_check_addr != -1;
+}
+
+static int spapr_fwnmi_pre_save(void *opaque)
+{
+SpaprMachineState *spapr = (SpaprMachineState *)opaque;
+
+/*
+ * With -only-migratable QEMU option, we cannot block migration.
+ * Hence check if machine check handling is in progress and print
+ * a warning message.
+ */
+if (spapr->mc_status != -1) {
+warn_report("A machine check is being handled during migration. The"
+"handler may run and log hardware error on the destination");
+}
+
+return 0;
+}
+
+static const VMStateDescription vmstate_spapr_machine_check = {
+.name = "spapr_machine_check",
+.version_id = 1,
+.minimum_version_id = 1,
+.needed = spapr_fwnmi_needed,
+.pre_save = spapr_fwnmi_pre_save,
+.fields = (VMStateField[]) {
+VMSTATE_UINT64(guest_machine_check_addr, SpaprMachineState),
+VMSTATE_INT32(mc_status, SpaprMachineState),
+VMSTATE_END_OF_LIST()
+},
+};
+
 static const VMStateDescription vmstate_spapr = {
 .name = "spapr",
 .version_id = 3,
@@ -2153,6 +2193,7 @@ static const VMStateDescription vmstate_spapr = {
 _spapr_dtb,
 _spapr_cap_large_decr,
 _spapr_cap_ccf_assist,
+_spapr_machine_check,
 NULL
 }
 };
diff --git a/hw/ppc/spapr_events.c b/hw/ppc/spapr_events.c
index 5624bdac6c..ff34c2645a 100644
--- a/hw/ppc/spapr_events.c
+++ b/hw/ppc/spapr_events.c
@@ -43,6 +43,7 @@
 #include "qemu/main-loop.h"
 #include "hw/ppc/spapr_ovec.h"
 #include 
+#include "migration/blocker.h"
 
 #define RTAS_LOG_VERSION_MASK   0xff00
 #define   RTAS_LOG_VERSION_60x0600
@@ -842,6 +843,8 @@ void spapr_mce_req_event(PowerPCCPU *cpu, bool recovered)
 {
 SpaprMachineState *spapr = SPAPR_MACHINE(qdev_get_machine());
 CPUState *cs = CPU(cpu);
+int ret;
+Error *local_err = NULL;
 
 if (spapr->guest_machine_check_addr == -1) {
 /*
@@ -871,8 +874,19 @@ void spapr_mce_req_event(PowerPCCPU *cpu, bool recovered)
 return;
 }
 }
-spapr->mc_status = cpu->vcpu_id;
 
+ret = migrate_add_blocker(spapr->fwnmi_migration_blocker, _err);
+if (ret == -EBUSY) {
+/*
+ * We don't want to abort so we let the migration to continue.
+ * In a rare case, the machine check handler will run on the target.
+ * Though this is not preferable, it is better than aborting
+ * the migration or killing the VM.
+ */
+warn_report_err(local_err);
+}
+
+spapr->mc_status = cpu->vcpu_id;
 spapr_mce_dispatch_elog(cpu, recovered);
 }
 
diff --git a/hw/ppc/spapr_rtas.c b/hw/ppc/spapr_rtas.c
index b56953841a..c652ec3caa 100644
--- a/hw/ppc/spapr_rtas.c
+++ b/hw/ppc/spapr_rtas.c
@@ -50,6 +50,7 @@
 #include "hw/ppc/fdt.h"
 #include "target/ppc/mmu-hash64.h"
 #include "target/ppc/mmu-book3s-v3.h"
+#include "migration/blocker.h"
 
 static void rtas_display_character(PowerPCCPU *cpu, SpaprMachineState *spapr,
uint32_t token, uint32_t nargs,
@@ -446,6 +447,7 @@ static void rtas_ibm_nmi_interlock(PowerPCCPU *cpu,
  */
 spapr->mc_status = -1;
 qemu_cond_signal(>mc_delivery_cond);
+

[PATCH v15 3/7] target/ppc: Handle NMI guest exit

2019-10-09 Thread Ganesh Goudar
From: Aravinda Prasad 

Memory error such as bit flips that cannot be corrected
by hardware are passed on to the kernel for handling.
If the memory address in error belongs to guest then
the guest kernel is responsible for taking suitable action.
Patch [1] enhances KVM to exit guest with exit reason
set to KVM_EXIT_NMI in such cases. This patch handles
KVM_EXIT_NMI exit.

[1] https://www.spinics.net/lists/kvm-ppc/msg12637.html
(e20bbd3d and related commits)

Signed-off-by: Aravinda Prasad 
Reviewed-by: David Gibson 
Reviewed-by: Greg Kurz 
---
 hw/ppc/spapr.c  |  8 
 hw/ppc/spapr_events.c   | 37 +
 include/hw/ppc/spapr.h  | 10 ++
 target/ppc/kvm.c| 14 ++
 target/ppc/kvm_ppc.h|  2 ++
 target/ppc/trace-events |  1 +
 6 files changed, 72 insertions(+)

diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c
index 9b5f65c655..b7b4196545 100644
--- a/hw/ppc/spapr.c
+++ b/hw/ppc/spapr.c
@@ -1824,6 +1824,12 @@ static void spapr_machine_reset(MachineState *machine)
 first_ppc_cpu->env.gpr[5] = 0;
 
 spapr->cas_reboot = false;
+
+spapr->mc_status = -1;
+spapr->guest_machine_check_addr = -1;
+
+/* Signal all vCPUs waiting on this condition */
+qemu_cond_broadcast(>mc_delivery_cond);
 }
 
 static void spapr_create_nvram(SpaprMachineState *spapr)
@@ -3100,6 +3106,8 @@ static void spapr_machine_init(MachineState *machine)
 
 kvmppc_spapr_enable_inkernel_multitce();
 }
+
+qemu_cond_init(>mc_delivery_cond);
 }
 
 static int spapr_kvm_type(MachineState *machine, const char *vm_type)
diff --git a/hw/ppc/spapr_events.c b/hw/ppc/spapr_events.c
index 0e4c19523a..0ce96b86be 100644
--- a/hw/ppc/spapr_events.c
+++ b/hw/ppc/spapr_events.c
@@ -40,6 +40,7 @@
 #include "hw/ppc/spapr_drc.h"
 #include "qemu/help_option.h"
 #include "qemu/bcd.h"
+#include "qemu/main-loop.h"
 #include "hw/ppc/spapr_ovec.h"
 #include 
 
@@ -621,6 +622,42 @@ void 
spapr_hotplug_req_remove_by_count_indexed(SpaprDrcType drc_type,
 RTAS_LOG_V6_HP_ACTION_REMOVE, drc_type, _id);
 }
 
+void spapr_mce_req_event(PowerPCCPU *cpu)
+{
+SpaprMachineState *spapr = SPAPR_MACHINE(qdev_get_machine());
+CPUState *cs = CPU(cpu);
+
+if (spapr->guest_machine_check_addr == -1) {
+/*
+ * This implies that we have hit a machine check either when the
+ * guest has not registered FWNMI (i.e., "ibm,nmi-register" not
+ * called) or between system reset and "ibm,nmi-register".
+ * Fall back to the old machine check behavior in such cases.
+ */
+cs->exception_index = POWERPC_EXCP_MCHECK;
+ppc_cpu_do_interrupt(cs);
+return;
+}
+
+while (spapr->mc_status != -1) {
+/*
+ * Check whether the same CPU got machine check error
+ * while still handling the mc error (i.e., before
+ * that CPU called "ibm,nmi-interlock")
+ */
+if (spapr->mc_status == cpu->vcpu_id) {
+qemu_system_guest_panicked(NULL);
+return;
+}
+qemu_cond_wait_iothread(>mc_delivery_cond);
+/* Meanwhile if the system is reset, then just return */
+if (spapr->guest_machine_check_addr == -1) {
+return;
+}
+}
+spapr->mc_status = cpu->vcpu_id;
+}
+
 static void check_exception(PowerPCCPU *cpu, SpaprMachineState *spapr,
 uint32_t token, uint32_t nargs,
 target_ulong args,
diff --git a/include/hw/ppc/spapr.h b/include/hw/ppc/spapr.h
index 66049ac611..99a29668b0 100644
--- a/include/hw/ppc/spapr.h
+++ b/include/hw/ppc/spapr.h
@@ -192,6 +192,15 @@ struct SpaprMachineState {
  * occurs during the unplug process. */
 QTAILQ_HEAD(, SpaprDimmState) pending_dimm_unplugs;
 
+/* State related to "ibm,nmi-register" and "ibm,nmi-interlock" calls */
+target_ulong guest_machine_check_addr;
+/*
+ * mc_status is set to -1 if mc is not in progress, else is set to the CPU
+ * handling the mc.
+ */
+int mc_status;
+QemuCond mc_delivery_cond;
+
 /*< public >*/
 char *kvm_type;
 char *host_model;
@@ -805,6 +814,7 @@ void spapr_clear_pending_events(SpaprMachineState *spapr);
 int spapr_max_server_number(SpaprMachineState *spapr);
 void spapr_store_hpte(PowerPCCPU *cpu, hwaddr ptex,
   uint64_t pte0, uint64_t pte1);
+void spapr_mce_req_event(PowerPCCPU *cpu);
 
 /* DRC callbacks. */
 void spapr_core_release(DeviceState *dev);
diff --git a/target/ppc/kvm.c b/target/ppc/kvm.c
index e9cae0ded6..5b97eadc3a 100644
--- a/target/ppc/kvm.c
+++ b/target/ppc/kvm.c
@@ -1703,6 +1703,11 @@ int kvm_arch_handle_exit(CPUState *cs, struct kvm_run 
*run)
 ret = 0;
 break;
 
+case KVM_EXIT_NMI:
+trace_kvm_handle_nmi_exception();
+ret = kvm_handle_nmi(cpu, run);
+break;
+
 default:
 fprintf(stderr, "KVM: unknown exit reason %d\n",