Re: [PATCH] KVM: arm/arm64: vgic: Fix deadlock on error handling

2017-01-12 Thread Christoffer Dall
On Thu, Jan 12, 2017 at 04:16:14PM +, Marc Zyngier wrote:
> Dmitry Vyukov reported that the syzkaller fuzzer triggered a
> deadlock in the vgic setup code when an error was detected, as
> the cleanup code tries to take a lock that is already held by
> the setup code.
> 
> The fix is to avoid retaking the lock when cleaning up, by
> telling the cleanup function that we already hold it.
> 
> Cc: sta...@vger.kernel.org
> Signed-off-by: Marc Zyngier 

Reviewed-by: Christoffer Dall 

> ---
>  virt/kvm/arm/vgic/vgic-init.c | 18 +-
>  virt/kvm/arm/vgic/vgic-v2.c   |  2 --
>  virt/kvm/arm/vgic/vgic-v3.c   |  2 --
>  3 files changed, 13 insertions(+), 9 deletions(-)
> 
> diff --git a/virt/kvm/arm/vgic/vgic-init.c b/virt/kvm/arm/vgic/vgic-init.c
> index 5114391..c737ea0 100644
> --- a/virt/kvm/arm/vgic/vgic-init.c
> +++ b/virt/kvm/arm/vgic/vgic-init.c
> @@ -268,15 +268,11 @@ static void kvm_vgic_dist_destroy(struct kvm *kvm)
>  {
>   struct vgic_dist *dist = >arch.vgic;
>  
> - mutex_lock(>lock);
> -
>   dist->ready = false;
>   dist->initialized = false;
>  
>   kfree(dist->spis);
>   dist->nr_spis = 0;
> -
> - mutex_unlock(>lock);
>  }
>  
>  void kvm_vgic_vcpu_destroy(struct kvm_vcpu *vcpu)
> @@ -286,7 +282,8 @@ void kvm_vgic_vcpu_destroy(struct kvm_vcpu *vcpu)
>   INIT_LIST_HEAD(_cpu->ap_list_head);
>  }
>  
> -void kvm_vgic_destroy(struct kvm *kvm)
> +/* To be called with kvm->lock held */
> +static void __kvm_vgic_destroy(struct kvm *kvm)
>  {
>   struct kvm_vcpu *vcpu;
>   int i;
> @@ -297,6 +294,13 @@ void kvm_vgic_destroy(struct kvm *kvm)
>   kvm_vgic_vcpu_destroy(vcpu);
>  }
>  
> +void kvm_vgic_destroy(struct kvm *kvm)
> +{
> + mutex_lock(>lock);
> + __kvm_vgic_destroy(kvm);
> + mutex_unlock(>lock);
> +}
> +
>  /**
>   * vgic_lazy_init: Lazy init is only allowed if the GIC exposed to the guest
>   * is a GICv2. A GICv3 must be explicitly initialized by the guest using the
> @@ -348,6 +352,10 @@ int kvm_vgic_map_resources(struct kvm *kvm)
>   ret = vgic_v2_map_resources(kvm);
>   else
>   ret = vgic_v3_map_resources(kvm);
> +
> + if (ret)
> + __kvm_vgic_destroy(kvm);
> +
>  out:
>   mutex_unlock(>lock);
>   return ret;
> diff --git a/virt/kvm/arm/vgic/vgic-v2.c b/virt/kvm/arm/vgic/vgic-v2.c
> index 9bab867..834137e 100644
> --- a/virt/kvm/arm/vgic/vgic-v2.c
> +++ b/virt/kvm/arm/vgic/vgic-v2.c
> @@ -293,8 +293,6 @@ int vgic_v2_map_resources(struct kvm *kvm)
>   dist->ready = true;
>  
>  out:
> - if (ret)
> - kvm_vgic_destroy(kvm);
>   return ret;
>  }
>  
> diff --git a/virt/kvm/arm/vgic/vgic-v3.c b/virt/kvm/arm/vgic/vgic-v3.c
> index 7df1b90..a4c7fff 100644
> --- a/virt/kvm/arm/vgic/vgic-v3.c
> +++ b/virt/kvm/arm/vgic/vgic-v3.c
> @@ -308,8 +308,6 @@ int vgic_v3_map_resources(struct kvm *kvm)
>   dist->ready = true;
>  
>  out:
> - if (ret)
> - kvm_vgic_destroy(kvm);
>   return ret;
>  }
>  
> -- 
> 2.1.4
> 
___
kvmarm mailing list
kvmarm@lists.cs.columbia.edu
https://lists.cs.columbia.edu/mailman/listinfo/kvmarm


[PATCH V7 10/10] arm/arm64: KVM: add guest SEA support

2017-01-12 Thread Tyler Baicar
Currently external aborts are unsupported by the guest abort
handling. Add handling for SEAs so that the host kernel reports
SEAs which occur in the guest kernel.

Signed-off-by: Tyler Baicar 
---
 arch/arm/include/asm/kvm_arm.h   |  1 +
 arch/arm/include/asm/system_misc.h   |  5 +
 arch/arm/kvm/mmu.c   | 18 --
 arch/arm64/include/asm/kvm_arm.h |  1 +
 arch/arm64/include/asm/system_misc.h |  2 ++
 arch/arm64/mm/fault.c| 13 +
 6 files changed, 38 insertions(+), 2 deletions(-)

diff --git a/arch/arm/include/asm/kvm_arm.h b/arch/arm/include/asm/kvm_arm.h
index e22089f..33a77509 100644
--- a/arch/arm/include/asm/kvm_arm.h
+++ b/arch/arm/include/asm/kvm_arm.h
@@ -187,6 +187,7 @@
 #define FSC_FAULT  (0x04)
 #define FSC_ACCESS (0x08)
 #define FSC_PERM   (0x0c)
+#define FSC_EXTABT (0x10)
 
 /* Hyp Prefetch Fault Address Register (HPFAR/HDFAR) */
 #define HPFAR_MASK (~0xf)
diff --git a/arch/arm/include/asm/system_misc.h 
b/arch/arm/include/asm/system_misc.h
index a3d61ad..ea45d94 100644
--- a/arch/arm/include/asm/system_misc.h
+++ b/arch/arm/include/asm/system_misc.h
@@ -24,4 +24,9 @@ extern unsigned int user_debug;
 
 #endif /* !__ASSEMBLY__ */
 
+static inline int handle_guest_sea(unsigned long addr, unsigned int esr)
+{
+   return -1;
+}
+
 #endif /* __ASM_ARM_SYSTEM_MISC_H */
diff --git a/arch/arm/kvm/mmu.c b/arch/arm/kvm/mmu.c
index e9a5c0e..1152966 100644
--- a/arch/arm/kvm/mmu.c
+++ b/arch/arm/kvm/mmu.c
@@ -29,6 +29,7 @@
 #include 
 #include 
 #include 
+#include 
 
 #include "trace.h"
 
@@ -1441,8 +1442,21 @@ int kvm_handle_guest_abort(struct kvm_vcpu *vcpu, struct 
kvm_run *run)
 
/* Check the stage-2 fault is trans. fault or write fault */
fault_status = kvm_vcpu_trap_get_fault_type(vcpu);
-   if (fault_status != FSC_FAULT && fault_status != FSC_PERM &&
-   fault_status != FSC_ACCESS) {
+
+   /* The host kernel will handle the synchronous external abort. There
+* is no need to pass the error into the guest.
+*/
+   if (fault_status == FSC_EXTABT) {
+   if(handle_guest_sea((unsigned long)fault_ipa,
+   kvm_vcpu_get_hsr(vcpu))) {
+   kvm_err("Failed to handle guest SEA, FSC: EC=%#x 
xFSC=%#lx ESR_EL2=%#lx\n",
+   kvm_vcpu_trap_get_class(vcpu),
+   (unsigned long)kvm_vcpu_trap_get_fault(vcpu),
+   (unsigned long)kvm_vcpu_get_hsr(vcpu));
+   return -EFAULT;
+   }
+   } else if (fault_status != FSC_FAULT && fault_status != FSC_PERM &&
+  fault_status != FSC_ACCESS) {
kvm_err("Unsupported FSC: EC=%#x xFSC=%#lx ESR_EL2=%#lx\n",
kvm_vcpu_trap_get_class(vcpu),
(unsigned long)kvm_vcpu_trap_get_fault(vcpu),
diff --git a/arch/arm64/include/asm/kvm_arm.h b/arch/arm64/include/asm/kvm_arm.h
index 4b5c977..be0efb6 100644
--- a/arch/arm64/include/asm/kvm_arm.h
+++ b/arch/arm64/include/asm/kvm_arm.h
@@ -201,6 +201,7 @@
 #define FSC_FAULT  ESR_ELx_FSC_FAULT
 #define FSC_ACCESS ESR_ELx_FSC_ACCESS
 #define FSC_PERM   ESR_ELx_FSC_PERM
+#define FSC_EXTABT ESR_ELx_FSC_EXTABT
 
 /* Hyp Prefetch Fault Address Register (HPFAR/HDFAR) */
 #define HPFAR_MASK (~UL(0xf))
diff --git a/arch/arm64/include/asm/system_misc.h 
b/arch/arm64/include/asm/system_misc.h
index e7f3440..27816cb 100644
--- a/arch/arm64/include/asm/system_misc.h
+++ b/arch/arm64/include/asm/system_misc.h
@@ -77,4 +77,6 @@ extern void (*arm_pm_restart)(enum reboot_mode reboot_mode, 
const char *cmd);
 int register_sea_notifier(struct notifier_block *nb);
 void unregister_sea_notifier(struct notifier_block *nb);
 
+int handle_guest_sea(unsigned long addr, unsigned int esr);
+
 #endif /* __ASM_SYSTEM_MISC_H */
diff --git a/arch/arm64/mm/fault.c b/arch/arm64/mm/fault.c
index 81039c7..fa8d4d7 100644
--- a/arch/arm64/mm/fault.c
+++ b/arch/arm64/mm/fault.c
@@ -597,6 +597,19 @@ static const char *fault_name(unsigned int esr)
 }
 
 /*
+ * Handle Synchronous External Aborts that occur in a guest kernel.
+ */
+int handle_guest_sea(unsigned long addr, unsigned int esr)
+{
+   atomic_notifier_call_chain(_handler_chain, 0, NULL);
+
+   pr_err("Synchronous External Abort: %s (0x%08x) at 0x%016lx\n",
+   fault_name(esr), esr, addr);
+
+   return 0;
+}
+
+/*
  * Dispatch a data abort to the relevant handler.
  */
 asmlinkage void __exception do_mem_abort(unsigned long addr, unsigned int esr,
-- 
Qualcomm Datacenter Technologies, Inc. as an affiliate of Qualcomm 
Technologies, Inc.
Qualcomm Technologies, Inc. is a member of the Code Aurora Forum,
a Linux Foundation Collaborative Project.

___
kvmarm mailing list
kvmarm@lists.cs.columbia.edu

[PATCH V7 08/10] ras: acpi / apei: generate trace event for unrecognized CPER section

2017-01-12 Thread Tyler Baicar
UEFI spec allows for non-standard section in Common Platform Error
Record. This is defined in section N.2.3 of UEFI version 2.5.

Currently if the CPER section's type (UUID) does not match with
any section type that the kernel knows how to parse, trace event
is not generated for such section. And thus user is not able to know
happening of such hardware error, including error record of
non-standard section.

This commit generates a trace event which contains raw error data
for unrecognized CPER section.

Signed-off-by: Tyler Baicar 
Signed-off-by: Jonathan (Zhixiong) Zhang 
---
 drivers/acpi/apei/ghes.c | 22 --
 drivers/ras/ras.c|  1 +
 include/ras/ras_event.h  | 45 +
 3 files changed, 66 insertions(+), 2 deletions(-)

diff --git a/drivers/acpi/apei/ghes.c b/drivers/acpi/apei/ghes.c
index 4e9a20d..a32c1b1 100644
--- a/drivers/acpi/apei/ghes.c
+++ b/drivers/acpi/apei/ghes.c
@@ -45,11 +45,13 @@
 #include 
 #include 
 #include 
+#include 
 
 #include 
 #include 
 #include 
 #include 
+#include 
 
 #ifdef CONFIG_HAVE_ACPI_APEI_SEA
 #include 
@@ -458,11 +460,21 @@ static void ghes_do_proc(struct ghes *ghes,
 {
int sev, sec_sev;
struct acpi_hest_generic_data *gdata;
+   uuid_le sec_type;
+   uuid_le *fru_id = _UUID_LE;
+   char *fru_text = "";
 
sev = ghes_severity(estatus->error_severity);
apei_estatus_for_each_section(estatus, gdata) {
sec_sev = ghes_severity(gdata->error_severity);
-   if (!uuid_le_cmp(*(uuid_le *)gdata->section_type,
+   sec_type = *(uuid_le *)gdata->section_type;
+
+   if (gdata->validation_bits & CPER_SEC_VALID_FRU_ID)
+   fru_id = (uuid_le *)gdata->fru_id;
+   if (gdata->validation_bits & CPER_SEC_VALID_FRU_TEXT)
+   fru_text = gdata->fru_text;
+
+   if (!uuid_le_cmp(sec_type,
 CPER_SEC_PLATFORM_MEM)) {
struct cper_sec_mem_err *mem_err;
 
@@ -473,7 +485,7 @@ static void ghes_do_proc(struct ghes *ghes,
ghes_handle_memory_failure(gdata, sev);
}
 #ifdef CONFIG_ACPI_APEI_PCIEAER
-   else if (!uuid_le_cmp(*(uuid_le *)gdata->section_type,
+   else if (!uuid_le_cmp(sec_type,
  CPER_SEC_PCIE)) {
struct cper_sec_pcie *pcie_err;
 
@@ -506,6 +518,12 @@ static void ghes_do_proc(struct ghes *ghes,
 
}
 #endif
+   else {
+   void *unknown_err = 
acpi_hest_generic_data_payload(gdata);
+   trace_unknown_sec_event(_type,
+   fru_id, fru_text, sec_sev,
+   unknown_err, gdata->error_data_length);
+   }
}
 }
 
diff --git a/drivers/ras/ras.c b/drivers/ras/ras.c
index b67dd36..fb2500b 100644
--- a/drivers/ras/ras.c
+++ b/drivers/ras/ras.c
@@ -27,3 +27,4 @@ subsys_initcall(ras_init);
 EXPORT_TRACEPOINT_SYMBOL_GPL(extlog_mem_event);
 #endif
 EXPORT_TRACEPOINT_SYMBOL_GPL(mc_event);
+EXPORT_TRACEPOINT_SYMBOL_GPL(unknown_sec_event);
diff --git a/include/ras/ras_event.h b/include/ras/ras_event.h
index 1791a12..5861b6f 100644
--- a/include/ras/ras_event.h
+++ b/include/ras/ras_event.h
@@ -162,6 +162,51 @@ TRACE_EVENT(mc_event,
 );
 
 /*
+ * Unknown Section Report
+ *
+ * This event is generated when hardware detected a hardware
+ * error event, which may be of non-standard section as defined
+ * in UEFI spec appendix "Common Platform Error Record", or may
+ * be of sections for which TRACE_EVENT is not defined.
+ *
+ */
+TRACE_EVENT(unknown_sec_event,
+
+   TP_PROTO(const uuid_le *sec_type,
+const uuid_le *fru_id,
+const char *fru_text,
+const u8 sev,
+const u8 *err,
+const u32 len),
+
+   TP_ARGS(sec_type, fru_id, fru_text, sev, err, len),
+
+   TP_STRUCT__entry(
+   __array(char, sec_type, 16)
+   __array(char, fru_id, 16)
+   __string(fru_text, fru_text)
+   __field(u8, sev)
+   __field(u32, len)
+   __dynamic_array(u8, buf, len)
+   ),
+
+   TP_fast_assign(
+   memcpy(__entry->sec_type, sec_type, sizeof(uuid_le));
+   memcpy(__entry->fru_id, fru_id, sizeof(uuid_le));
+   __assign_str(fru_text, fru_text);
+   __entry->sev = sev;
+   __entry->len = len;
+   memcpy(__get_dynamic_array(buf), err, len);
+   ),
+
+   TP_printk("severity: %d; sec type:%pU; FRU: %pU %s; data len:%d; raw 
data:%s",
+ __entry->sev, __entry->sec_type,
+ __entry->fru_id, __get_str(fru_text),
+ __entry->len,
+ 

[PATCH V7 05/10] acpi: apei: handle SEA notification type for ARMv8

2017-01-12 Thread Tyler Baicar
ARM APEI extension proposal added SEA (Synchrounous External
Abort) notification type for ARMv8.
Add a new GHES error source handling function for SEA. If an error
source's notification type is SEA, then this function can be registered
into the SEA exception handler. That way GHES will parse and report
SEA exceptions when they occur.

Signed-off-by: Tyler Baicar 
Signed-off-by: Jonathan (Zhixiong) Zhang 
Signed-off-by: Naveen Kaje 
---
 arch/arm64/Kconfig|  2 ++
 drivers/acpi/apei/Kconfig | 14 
 drivers/acpi/apei/ghes.c  | 84 +++
 3 files changed, 100 insertions(+)

diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig
index b380c87..0465601 100644
--- a/arch/arm64/Kconfig
+++ b/arch/arm64/Kconfig
@@ -53,6 +53,8 @@ config ARM64
select HANDLE_DOMAIN_IRQ
select HARDIRQS_SW_RESEND
select HAVE_ACPI_APEI if (ACPI && EFI)
+   select HAVE_ACPI_APEI_SEA if (ACPI && EFI)
+   select HAVE_NMI if HAVE_ACPI_APEI_SEA
select HAVE_ALIGNED_STRUCT_PAGE if SLUB
select HAVE_ARCH_AUDITSYSCALL
select HAVE_ARCH_BITREVERSE
diff --git a/drivers/acpi/apei/Kconfig b/drivers/acpi/apei/Kconfig
index b0140c8..3786ff1 100644
--- a/drivers/acpi/apei/Kconfig
+++ b/drivers/acpi/apei/Kconfig
@@ -4,6 +4,20 @@ config HAVE_ACPI_APEI
 config HAVE_ACPI_APEI_NMI
bool
 
+config HAVE_ACPI_APEI_SEA
+   bool "APEI Synchronous External Abort logging/recovering support"
+   depends on ARM64
+   help
+ This option should be enabled if the system supports
+ firmware first handling of SEA (Synchronous External Abort).
+ SEA happens with certain faults of data abort or instruction
+ abort synchronous exceptions on ARMv8 systems. If a system
+ supports firmware first handling of SEA, the platform analyzes
+ and handles hardware error notifications with SEA, and it may then
+ form a HW error record for the OS to parse and handle. This
+ option allows the OS to look for such HW error record, and
+ take appropriate action.
+
 config ACPI_APEI
bool "ACPI Platform Error Interface (APEI)"
select MISC_FILESYSTEMS
diff --git a/drivers/acpi/apei/ghes.c b/drivers/acpi/apei/ghes.c
index 2acbc60..87efe26 100644
--- a/drivers/acpi/apei/ghes.c
+++ b/drivers/acpi/apei/ghes.c
@@ -44,12 +44,17 @@
 #include 
 #include 
 #include 
+#include 
 
 #include 
 #include 
 #include 
 #include 
 
+#ifdef CONFIG_HAVE_ACPI_APEI_SEA
+#include 
+#endif
+
 #include "apei-internal.h"
 
 #define GHES_PFX   "GHES: "
@@ -767,6 +772,62 @@ static struct notifier_block ghes_notifier_sci = {
.notifier_call = ghes_notify_sci,
 };
 
+#ifdef CONFIG_HAVE_ACPI_APEI_SEA
+static LIST_HEAD(ghes_sea);
+
+static int ghes_notify_sea(struct notifier_block *this,
+ unsigned long event, void *data)
+{
+   struct ghes *ghes;
+   int ret = NOTIFY_DONE;
+
+   nmi_enter();
+   list_for_each_entry_rcu(ghes, _sea, list) {
+   if (!ghes_proc(ghes))
+   ret = NOTIFY_OK;
+   }
+   nmi_exit();
+
+   return ret;
+}
+
+static struct notifier_block ghes_notifier_sea = {
+   .notifier_call = ghes_notify_sea,
+};
+
+static int ghes_sea_add(struct ghes *ghes)
+{
+   mutex_lock(_list_mutex);
+   if (list_empty(_sea))
+   register_sea_notifier(_notifier_sea);
+   list_add_rcu(>list, _sea);
+   mutex_unlock(_list_mutex);
+   return 0;
+}
+
+static void ghes_sea_remove(struct ghes *ghes)
+{
+   mutex_lock(_list_mutex);
+   list_del_rcu(>list);
+   if (list_empty(_sea))
+   unregister_sea_notifier(_notifier_sea);
+   mutex_unlock(_list_mutex);
+}
+#else /* CONFIG_HAVE_ACPI_APEI_SEA */
+static inline int ghes_sea_add(struct ghes *ghes)
+{
+   pr_err(GHES_PFX "ID: %d, trying to add SEA notification which is not 
supported\n",
+  ghes->generic->header.source_id);
+   return -ENOTSUPP;
+}
+
+static inline void ghes_sea_remove(struct ghes *ghes)
+{
+   pr_err(GHES_PFX "ID: %d, trying to remove SEA notification which is not 
supported\n",
+  ghes->generic->header.source_id);
+}
+#endif /* CONFIG_HAVE_ACPI_APEI_SEA */
+
 #ifdef CONFIG_HAVE_ACPI_APEI_NMI
 /*
  * printk is not safe in NMI context.  So in NMI handler, we allocate
@@ -1011,6 +1072,14 @@ static int ghes_probe(struct platform_device *ghes_dev)
case ACPI_HEST_NOTIFY_EXTERNAL:
case ACPI_HEST_NOTIFY_SCI:
break;
+   case ACPI_HEST_NOTIFY_SEA:
+   if (!IS_ENABLED(CONFIG_HAVE_ACPI_APEI_SEA)) {
+   pr_warn(GHES_PFX "Generic hardware error source: %d 
notified via SEA is not supported\n",
+   generic->header.source_id);
+   rc = -ENOTSUPP;
+   goto err;
+ 

[PATCH V7 09/10] trace, ras: add ARM processor error trace event

2017-01-12 Thread Tyler Baicar
Currently there are trace events for the various RAS
errors with the exception of ARM processor type errors.
Add a new trace event for such errors so that the user
will know when they occur. These trace events are
consistent with the ARM processor error section type
defined in UEFI 2.6 spec section N.2.4.4.

Signed-off-by: Tyler Baicar 
Acked-by: Steven Rostedt 
---
 drivers/acpi/apei/ghes.c|  7 ++-
 drivers/firmware/efi/cper.c |  1 +
 drivers/ras/ras.c   |  1 +
 include/ras/ras_event.h | 34 ++
 4 files changed, 42 insertions(+), 1 deletion(-)

diff --git a/drivers/acpi/apei/ghes.c b/drivers/acpi/apei/ghes.c
index a32c1b1..324a032 100644
--- a/drivers/acpi/apei/ghes.c
+++ b/drivers/acpi/apei/ghes.c
@@ -518,7 +518,12 @@ static void ghes_do_proc(struct ghes *ghes,
 
}
 #endif
-   else {
+   else if (!uuid_le_cmp(sec_type, CPER_SEC_PROC_ARM)) {
+   struct cper_sec_proc_arm *arm_err;
+
+   arm_err = acpi_hest_generic_data_payload(gdata);
+   trace_arm_event(arm_err);
+   } else {
void *unknown_err = 
acpi_hest_generic_data_payload(gdata);
trace_unknown_sec_event(_type,
fru_id, fru_text, sec_sev,
diff --git a/drivers/firmware/efi/cper.c b/drivers/firmware/efi/cper.c
index 48cb8ee..0ec678e 100644
--- a/drivers/firmware/efi/cper.c
+++ b/drivers/firmware/efi/cper.c
@@ -35,6 +35,7 @@
 #include 
 #include 
 #include 
+#include 
 
 #define INDENT_SP  " "
 
diff --git a/drivers/ras/ras.c b/drivers/ras/ras.c
index fb2500b..8ba5a94 100644
--- a/drivers/ras/ras.c
+++ b/drivers/ras/ras.c
@@ -28,3 +28,4 @@ EXPORT_TRACEPOINT_SYMBOL_GPL(extlog_mem_event);
 #endif
 EXPORT_TRACEPOINT_SYMBOL_GPL(mc_event);
 EXPORT_TRACEPOINT_SYMBOL_GPL(unknown_sec_event);
+EXPORT_TRACEPOINT_SYMBOL_GPL(arm_event);
diff --git a/include/ras/ras_event.h b/include/ras/ras_event.h
index 5861b6f..b36db48 100644
--- a/include/ras/ras_event.h
+++ b/include/ras/ras_event.h
@@ -162,6 +162,40 @@ TRACE_EVENT(mc_event,
 );
 
 /*
+ * ARM Processor Events Report
+ *
+ * This event is generated when hardware detects an ARM processor error
+ * has occurred. UEFI 2.6 spec section N.2.4.4.
+ */
+TRACE_EVENT(arm_event,
+
+   TP_PROTO(const struct cper_sec_proc_arm *proc),
+
+   TP_ARGS(proc),
+
+   TP_STRUCT__entry(
+   __field(u64, mpidr)
+   __field(u64, midr)
+   __field(u32, running_state)
+   __field(u32, psci_state)
+   __field(u8, affinity)
+   ),
+
+   TP_fast_assign(
+   __entry->affinity = proc->affinity_level;
+   __entry->mpidr = proc->mpidr;
+   __entry->midr = proc->midr;
+   __entry->running_state = proc->running_state;
+   __entry->psci_state = proc->psci_state;
+   ),
+
+   TP_printk("affinity level: %d; MPIDR: %016llx; MIDR: %016llx; "
+ "running state: %d; PSCI state: %d",
+ __entry->affinity, __entry->mpidr, __entry->midr,
+ __entry->running_state, __entry->psci_state)
+);
+
+/*
  * Unknown Section Report
  *
  * This event is generated when hardware detected a hardware
-- 
Qualcomm Datacenter Technologies, Inc. as an affiliate of Qualcomm 
Technologies, Inc.
Qualcomm Technologies, Inc. is a member of the Code Aurora Forum,
a Linux Foundation Collaborative Project.

___
kvmarm mailing list
kvmarm@lists.cs.columbia.edu
https://lists.cs.columbia.edu/mailman/listinfo/kvmarm


[PATCH V7 07/10] efi: print unrecognized CPER section

2017-01-12 Thread Tyler Baicar
UEFI spec allows for non-standard section in Common Platform Error
Record. This is defined in section N.2.3 of UEFI version 2.5.

Currently if the CPER section's type (UUID) does not match with
one of the section types that the kernel knows how to parse, the
section is skipped. Therefore, user is not able to see
such CPER data, for instance, error record of non-standard section.

For above mentioned case, this change prints out the raw data in
hex in dmesg buffer. Data length is taken from Error Data length
field of Generic Error Data Entry.

Following is a sample output from dmesg:
[  115.771702] {1}[Hardware Error]: Hardware error from APEI Generic Hardware 
Error Source: 2
[  115.779042] {1}[Hardware Error]: It has been corrected by h/w and requires 
no further action
[  115.787456] {1}[Hardware Error]: event severity: corrected
[  115.792927] {1}[Hardware Error]:  Error 0, type: corrected
[  115.798415] {1}[Hardware Error]:  fru_id: 
----
[  115.805596] {1}[Hardware Error]:  fru_text:
[  115.816105] {1}[Hardware Error]:  section type: 
d2e2621c-f936-468d-0d84-15a4ed015c8b
[  115.823880] {1}[Hardware Error]:  section length: 88
[  115.828779] {1}[Hardware Error]:   : 0101 0002 5f434345 
525f4543
[  115.836153] {1}[Hardware Error]:   0010: 574d   

[  115.843531] {1}[Hardware Error]:   0020:    

[  115.850908] {1}[Hardware Error]:   0030:    

[  115.858288] {1}[Hardware Error]:   0040: fe80  0004 
5f434345
[  115.865665] {1}[Hardware Error]:   0050: 525f4543 574d

Signed-off-by: Tyler Baicar 
Signed-off-by: Jonathan (Zhixiong) Zhang 
---
 drivers/firmware/efi/cper.c | 12 ++--
 1 file changed, 10 insertions(+), 2 deletions(-)

diff --git a/drivers/firmware/efi/cper.c b/drivers/firmware/efi/cper.c
index c2b0a12..48cb8ee 100644
--- a/drivers/firmware/efi/cper.c
+++ b/drivers/firmware/efi/cper.c
@@ -591,8 +591,16 @@ static void cper_estatus_print_section(
cper_print_proc_arm(newpfx, arm_err);
else
goto err_section_too_small;
-   } else
-   printk("%s""section type: unknown, %pUl\n", newpfx, sec_type);
+   } else {
+   const void *unknown_err;
+
+   unknown_err = acpi_hest_generic_data_payload(gdata);
+   printk("%ssection type: %pUl\n", newpfx, sec_type);
+   printk("%ssection length: %d\n", newpfx,
+  gdata->error_data_length);
+   print_hex_dump(newpfx, "", DUMP_PREFIX_OFFSET, 16, 4,
+  unknown_err, gdata->error_data_length, 0);
+   }
 
return;
 
-- 
Qualcomm Datacenter Technologies, Inc. as an affiliate of Qualcomm 
Technologies, Inc.
Qualcomm Technologies, Inc. is a member of the Code Aurora Forum,
a Linux Foundation Collaborative Project.

___
kvmarm mailing list
kvmarm@lists.cs.columbia.edu
https://lists.cs.columbia.edu/mailman/listinfo/kvmarm


[PATCH V7 04/10] arm64: exception: handle Synchronous External Abort

2017-01-12 Thread Tyler Baicar
SEA exceptions are often caused by an uncorrected hardware
error, and are handled when data abort and instruction abort
exception classes have specific values for their Fault Status
Code.
When SEA occurs, before killing the process, go through
the handlers registered in the notification list.
Update fault_info[] with specific SEA faults so that the
new SEA handler is used.

Signed-off-by: Tyler Baicar 
Signed-off-by: Jonathan (Zhixiong) Zhang 
Signed-off-by: Naveen Kaje 
---
 arch/arm64/include/asm/system_misc.h | 13 
 arch/arm64/mm/fault.c| 58 +---
 2 files changed, 61 insertions(+), 10 deletions(-)

diff --git a/arch/arm64/include/asm/system_misc.h 
b/arch/arm64/include/asm/system_misc.h
index 57f110b..e7f3440 100644
--- a/arch/arm64/include/asm/system_misc.h
+++ b/arch/arm64/include/asm/system_misc.h
@@ -64,4 +64,17 @@ extern void (*arm_pm_restart)(enum reboot_mode reboot_mode, 
const char *cmd);
 
 #endif /* __ASSEMBLY__ */
 
+/*
+ * The functions below are used to register and unregister callbacks
+ * that are to be invoked when a Synchronous External Abort (SEA)
+ * occurs. An SEA is raised by certain fault status codes that have
+ * either data or instruction abort as the exception class, and
+ * callbacks may be registered to parse or handle such hardware errors.
+ *
+ * Registered callbacks are run in an interrupt/atomic context. They
+ * are not allowed to block or sleep.
+ */
+int register_sea_notifier(struct notifier_block *nb);
+void unregister_sea_notifier(struct notifier_block *nb);
+
 #endif /* __ASM_SYSTEM_MISC_H */
diff --git a/arch/arm64/mm/fault.c b/arch/arm64/mm/fault.c
index 05d2bd7..81039c7 100644
--- a/arch/arm64/mm/fault.c
+++ b/arch/arm64/mm/fault.c
@@ -39,6 +39,22 @@
 #include 
 #include 
 
+/*
+ * GHES SEA handler code may register a notifier call here to
+ * handle HW error record passed from platform.
+ */
+static ATOMIC_NOTIFIER_HEAD(sea_handler_chain);
+
+int register_sea_notifier(struct notifier_block *nb)
+{
+   return atomic_notifier_chain_register(_handler_chain, nb);
+}
+
+void unregister_sea_notifier(struct notifier_block *nb)
+{
+   atomic_notifier_chain_unregister(_handler_chain, nb);
+}
+
 static const char *fault_name(unsigned int esr);
 
 #ifdef CONFIG_KPROBES
@@ -480,6 +496,28 @@ static int do_bad(unsigned long addr, unsigned int esr, 
struct pt_regs *regs)
return 1;
 }
 
+/*
+ * This abort handler deals with Synchronous External Abort.
+ * It calls notifiers, and then returns "fault".
+ */
+static int do_sea(unsigned long addr, unsigned int esr, struct pt_regs *regs)
+{
+   struct siginfo info;
+
+   atomic_notifier_call_chain(_handler_chain, 0, NULL);
+
+   pr_err("Synchronous External Abort: %s (0x%08x) at 0x%016lx\n",
+fault_name(esr), esr, addr);
+
+   info.si_signo = SIGBUS;
+   info.si_errno = 0;
+   info.si_code  = 0;
+   info.si_addr  = (void __user *)addr;
+   arm64_notify_die("", regs, , esr);
+
+   return 0;
+}
+
 static const struct fault_info {
int (*fn)(unsigned long addr, unsigned int esr, struct pt_regs 
*regs);
int sig;
@@ -502,22 +540,22 @@ static const struct fault_info {
{ do_page_fault,SIGSEGV, SEGV_ACCERR,   "level 1 permission 
fault"  },
{ do_page_fault,SIGSEGV, SEGV_ACCERR,   "level 2 permission 
fault"  },
{ do_page_fault,SIGSEGV, SEGV_ACCERR,   "level 3 permission 
fault"  },
-   { do_bad,   SIGBUS,  0, "synchronous external 
abort"},
+   { do_sea,   SIGBUS,  0, "synchronous external 
abort"},
{ do_bad,   SIGBUS,  0, "unknown 17"
},
{ do_bad,   SIGBUS,  0, "unknown 18"
},
{ do_bad,   SIGBUS,  0, "unknown 19"
},
-   { do_bad,   SIGBUS,  0, "synchronous abort 
(translation table walk)" },
-   { do_bad,   SIGBUS,  0, "synchronous abort 
(translation table walk)" },
-   { do_bad,   SIGBUS,  0, "synchronous abort 
(translation table walk)" },
-   { do_bad,   SIGBUS,  0, "synchronous abort 
(translation table walk)" },
-   { do_bad,   SIGBUS,  0, "synchronous parity 
error"  },
+   { do_sea,   SIGBUS,  0, "level 0 SEA 
(translation table walk)"  },
+   { do_sea,   SIGBUS,  0, "level 1 SEA 
(translation table walk)"  },
+   { do_sea,   SIGBUS,  0, "level 2 SEA 
(translation table walk)"  },
+   { do_sea,   SIGBUS,  0, "level 3 SEA 
(translation table walk)"  },
+   { do_sea,   SIGBUS,  

[PATCH V7 06/10] acpi: apei: panic OS with fatal error status block

2017-01-12 Thread Tyler Baicar
From: "Jonathan (Zhixiong) Zhang" 

Even if an error status block's severity is fatal, the kernel does not
honor the severity level and panic.

With the firmware first model, the platform could inform the OS about a
fatal hardware error through the non-NMI GHES notification type. The OS
should panic when a hardware error record is received with this
severity.

Call panic() after CPER data in error status block is printed if
severity is fatal, before each error section is handled.

Signed-off-by: Jonathan (Zhixiong) Zhang 
Signed-off-by: Tyler Baicar 
---
 drivers/acpi/apei/ghes.c | 19 ++-
 1 file changed, 14 insertions(+), 5 deletions(-)

diff --git a/drivers/acpi/apei/ghes.c b/drivers/acpi/apei/ghes.c
index 87efe26..4e9a20d 100644
--- a/drivers/acpi/apei/ghes.c
+++ b/drivers/acpi/apei/ghes.c
@@ -142,6 +142,8 @@ static unsigned long ghes_estatus_pool_size_request;
 static struct ghes_estatus_cache 
*ghes_estatus_caches[GHES_ESTATUS_CACHES_SIZE];
 static atomic_t ghes_estatus_cache_alloced;
 
+static int ghes_panic_timeout __read_mostly = 30;
+
 static int ghes_ioremap_init(void)
 {
ghes_ioremap_area = __get_vm_area(PAGE_SIZE * GHES_IOREMAP_PAGES,
@@ -693,6 +695,13 @@ static int ghes_ack_error(struct acpi_hest_generic_v2 
*generic_v2)
return rc;
 }
 
+static void __ghes_call_panic(void)
+{
+   if (panic_timeout == 0)
+   panic_timeout = ghes_panic_timeout;
+   panic("Fatal hardware error!");
+}
+
 static int ghes_proc(struct ghes *ghes)
 {
int rc;
@@ -704,6 +713,10 @@ static int ghes_proc(struct ghes *ghes)
if (ghes_print_estatus(NULL, ghes->generic, ghes->estatus))
ghes_estatus_cache_add(ghes->generic, ghes->estatus);
}
+   if (ghes_severity(ghes->estatus->error_severity) >= GHES_SEV_PANIC) {
+   __ghes_call_panic();
+   }
+
ghes_do_proc(ghes, ghes->estatus);
 
if (IS_HEST_TYPE_GENERIC_V2(ghes)) {
@@ -848,8 +861,6 @@ static atomic_t ghes_in_nmi = ATOMIC_INIT(0);
 
 static LIST_HEAD(ghes_nmi);
 
-static int ghes_panic_timeout  __read_mostly = 30;
-
 static void ghes_proc_in_irq(struct irq_work *irq_work)
 {
struct llist_node *llnode, *next;
@@ -942,9 +953,7 @@ static void __ghes_panic(struct ghes *ghes)
__ghes_print_estatus(KERN_EMERG, ghes->generic, ghes->estatus);
 
/* reboot to log the error! */
-   if (panic_timeout == 0)
-   panic_timeout = ghes_panic_timeout;
-   panic("Fatal hardware error!");
+   __ghes_call_panic();
 }
 
 static int ghes_notify_nmi(unsigned int cmd, struct pt_regs *regs)
-- 
Qualcomm Datacenter Technologies, Inc. as an affiliate of Qualcomm 
Technologies, Inc.
Qualcomm Technologies, Inc. is a member of the Code Aurora Forum,
a Linux Foundation Collaborative Project.

___
kvmarm mailing list
kvmarm@lists.cs.columbia.edu
https://lists.cs.columbia.edu/mailman/listinfo/kvmarm


[PATCH V7 03/10] efi: parse ARM processor error

2017-01-12 Thread Tyler Baicar
Add support for ARM Common Platform Error Record (CPER).
UEFI 2.6 specification adds support for ARM specific
processor error information to be reported as part of the
CPER records. This provides more detail on for processor error logs.

Signed-off-by: Tyler Baicar 
Signed-off-by: Jonathan (Zhixiong) Zhang 
Signed-off-by: Naveen Kaje 
Reviewed-by: James Morse 
---
 drivers/firmware/efi/cper.c | 133 
 include/linux/cper.h|  54 ++
 2 files changed, 187 insertions(+)

diff --git a/drivers/firmware/efi/cper.c b/drivers/firmware/efi/cper.c
index 8fa4e23..c2b0a12 100644
--- a/drivers/firmware/efi/cper.c
+++ b/drivers/firmware/efi/cper.c
@@ -110,12 +110,15 @@ void cper_print_bits(const char *pfx, unsigned int bits,
 static const char * const proc_type_strs[] = {
"IA32/X64",
"IA64",
+   "ARM",
 };
 
 static const char * const proc_isa_strs[] = {
"IA32",
"IA64",
"X64",
+   "ARM A32/T32",
+   "ARM A64",
 };
 
 static const char * const proc_error_type_strs[] = {
@@ -139,6 +142,18 @@ static const char * const proc_flag_strs[] = {
"corrected",
 };
 
+static const char * const arm_reg_ctx_strs[] = {
+   "AArch32 general purpose registers",
+   "AArch32 EL1 context registers",
+   "AArch32 EL2 context registers",
+   "AArch32 secure context registers",
+   "AArch64 general purpose registers",
+   "AArch64 EL1 context registers",
+   "AArch64 EL2 context registers",
+   "AArch64 EL3 context registers",
+   "Misc. system register structure",
+};
+
 static void cper_print_proc_generic(const char *pfx,
const struct cper_sec_proc_generic *proc)
 {
@@ -184,6 +199,114 @@ static void cper_print_proc_generic(const char *pfx,
printk("%s""IP: 0x%016llx\n", pfx, proc->ip);
 }
 
+static void cper_print_proc_arm(const char *pfx,
+   const struct cper_sec_proc_arm *proc)
+{
+   int i, len, max_ctx_type;
+   struct cper_arm_err_info *err_info;
+   struct cper_arm_ctx_info *ctx_info;
+   char newpfx[64];
+
+   printk("%s""section length: %d\n", pfx, proc->section_length);
+   printk("%s""MIDR: 0x%016llx\n", pfx, proc->midr);
+
+   len = proc->section_length - (sizeof(*proc) +
+   proc->err_info_num * (sizeof(*err_info)));
+   if (len < 0) {
+   printk("%s""section length is too small\n", pfx);
+   printk("%s""firmware-generated error record is incorrect\n", 
pfx);
+   printk("%s""ERR_INFO_NUM is %d\n", pfx, proc->err_info_num);
+   return;
+   }
+
+   if (proc->validation_bits & CPER_ARM_VALID_MPIDR)
+   printk("%s""MPIDR: 0x%016llx\n", pfx, proc->mpidr);
+   if (proc->validation_bits & CPER_ARM_VALID_AFFINITY_LEVEL)
+   printk("%s""error affinity level: %d\n", pfx,
+   proc->affinity_level);
+   if (proc->validation_bits & CPER_ARM_VALID_RUNNING_STATE) {
+   printk("%s""running state: 0x%x\n", pfx, proc->running_state);
+   printk("%s""PSCI state: %d\n", pfx, proc->psci_state);
+   }
+
+   snprintf(newpfx, sizeof(newpfx), "%s%s", pfx, INDENT_SP);
+
+   err_info = (struct cper_arm_err_info *)(proc + 1);
+   for (i = 0; i < proc->err_info_num; i++) {
+   printk("%s""Error info structure %d:\n", pfx, i);
+   printk("%s""version:%d\n", newpfx, err_info->version);
+   printk("%s""length:%d\n", newpfx, err_info->length);
+   if (err_info->validation_bits &
+   CPER_ARM_INFO_VALID_MULTI_ERR) {
+   if (err_info->multiple_error == 0)
+   printk("%s""single error\n", newpfx);
+   else if (err_info->multiple_error == 1)
+   printk("%s""multiple errors\n", newpfx);
+   else
+   printk("%s""multiple errors count:%u\n",
+   newpfx, err_info->multiple_error);
+   }
+   if (err_info->validation_bits & CPER_ARM_INFO_VALID_FLAGS) {
+   if (err_info->flags & CPER_ARM_INFO_FLAGS_FIRST)
+   printk("%s""first error captured\n", newpfx);
+   if (err_info->flags & CPER_ARM_INFO_FLAGS_LAST)
+   printk("%s""last error captured\n", newpfx);
+   if (err_info->flags & CPER_ARM_INFO_FLAGS_PROPAGATED)
+   printk("%s""propagated error captured\n",
+  newpfx);
+   if (err_info->flags & CPER_ARM_INFO_FLAGS_OVERFLOW)
+   printk("%s""overflow occurred, error info is 

[PATCH V7 02/10] ras: acpi/apei: cper: generic error data entry v3 per ACPI 6.1

2017-01-12 Thread Tyler Baicar
Currently when a RAS error is reported it is not timestamped.
The ACPI 6.1 spec adds the timestamp field to the generic error
data entry v3 structure. The timestamp of when the firmware
generated the error is now being reported.

Signed-off-by: Tyler Baicar 
Signed-off-by: Jonathan (Zhixiong) Zhang 
Signed-off-by: Richard Ruigrok 
Signed-off-by: Naveen Kaje 
Reviewed-by: James Morse 
---
 drivers/acpi/apei/ghes.c|  9 ---
 drivers/firmware/efi/cper.c | 63 +++--
 include/acpi/ghes.h | 22 
 3 files changed, 77 insertions(+), 17 deletions(-)

diff --git a/drivers/acpi/apei/ghes.c b/drivers/acpi/apei/ghes.c
index b23160d..2acbc60 100644
--- a/drivers/acpi/apei/ghes.c
+++ b/drivers/acpi/apei/ghes.c
@@ -420,7 +420,8 @@ static void ghes_handle_memory_failure(struct 
acpi_hest_generic_data *gdata, int
int flags = -1;
int sec_sev = ghes_severity(gdata->error_severity);
struct cper_sec_mem_err *mem_err;
-   mem_err = (struct cper_sec_mem_err *)(gdata + 1);
+
+   mem_err = acpi_hest_generic_data_payload(gdata);
 
if (!(mem_err->validation_bits & CPER_MEM_VALID_PA))
return;
@@ -457,7 +458,8 @@ static void ghes_do_proc(struct ghes *ghes,
if (!uuid_le_cmp(*(uuid_le *)gdata->section_type,
 CPER_SEC_PLATFORM_MEM)) {
struct cper_sec_mem_err *mem_err;
-   mem_err = (struct cper_sec_mem_err *)(gdata+1);
+
+   mem_err = acpi_hest_generic_data_payload(gdata);
ghes_edac_report_mem_error(ghes, sev, mem_err);
 
arch_apei_report_mem_error(sev, mem_err);
@@ -467,7 +469,8 @@ static void ghes_do_proc(struct ghes *ghes,
else if (!uuid_le_cmp(*(uuid_le *)gdata->section_type,
  CPER_SEC_PCIE)) {
struct cper_sec_pcie *pcie_err;
-   pcie_err = (struct cper_sec_pcie *)(gdata+1);
+
+   pcie_err = acpi_hest_generic_data_payload(gdata);
if (sev == GHES_SEV_RECOVERABLE &&
sec_sev == GHES_SEV_RECOVERABLE &&
pcie_err->validation_bits & 
CPER_PCIE_VALID_DEVICE_ID &&
diff --git a/drivers/firmware/efi/cper.c b/drivers/firmware/efi/cper.c
index d425374..8fa4e23 100644
--- a/drivers/firmware/efi/cper.c
+++ b/drivers/firmware/efi/cper.c
@@ -32,6 +32,9 @@
 #include 
 #include 
 #include 
+#include 
+#include 
+#include 
 
 #define INDENT_SP  " "
 
@@ -386,13 +389,37 @@ static void cper_print_pcie(const char *pfx, const struct 
cper_sec_pcie *pcie,
pfx, pcie->bridge.secondary_status, pcie->bridge.control);
 }
 
+static void cper_estatus_print_section_v300(const char *pfx,
+   const struct acpi_hest_generic_data_v300 *gdata)
+{
+   __u8 hour, min, sec, day, mon, year, century, *timestamp;
+
+   if (gdata->validation_bits & ACPI_HEST_GEN_VALID_TIMESTAMP) {
+   timestamp = (__u8 *)&(gdata->time_stamp);
+   sec = bcd2bin(timestamp[0]);
+   min = bcd2bin(timestamp[1]);
+   hour = bcd2bin(timestamp[2]);
+   day = bcd2bin(timestamp[4]);
+   mon = bcd2bin(timestamp[5]);
+   year = bcd2bin(timestamp[6]);
+   century = bcd2bin(timestamp[7]);
+   printk("%stime: %7s %02d%02d-%02d-%02d %02d:%02d:%02d\n", pfx,
+   0x01 & *(timestamp + 3) ? "precise" : "", century,
+   year, mon, day, hour, min, sec);
+   }
+}
+
 static void cper_estatus_print_section(
-   const char *pfx, const struct acpi_hest_generic_data *gdata, int sec_no)
+   const char *pfx, struct acpi_hest_generic_data *gdata, int sec_no)
 {
uuid_le *sec_type = (uuid_le *)gdata->section_type;
__u16 severity;
char newpfx[64];
 
+   if (acpi_hest_generic_data_version(gdata) >= 3)
+   cper_estatus_print_section_v300(pfx,
+   (const struct acpi_hest_generic_data_v300 *)gdata);
+
severity = gdata->error_severity;
printk("%s""Error %d, type: %s\n", pfx, sec_no,
   cper_severity_str(severity));
@@ -403,14 +430,18 @@ static void cper_estatus_print_section(
 
snprintf(newpfx, sizeof(newpfx), "%s%s", pfx, INDENT_SP);
if (!uuid_le_cmp(*sec_type, CPER_SEC_PROC_GENERIC)) {
-   struct cper_sec_proc_generic *proc_err = (void *)(gdata + 1);
+   struct cper_sec_proc_generic *proc_err;
+
+   proc_err = acpi_hest_generic_data_payload(gdata);
printk("%s""section_type: general processor error\n", newpfx);
if (gdata->error_data_length >= sizeof(*proc_err))

[PATCH V7 01/10] acpi: apei: read ack upon ghes record consumption

2017-01-12 Thread Tyler Baicar
A RAS (Reliability, Availability, Serviceability) controller
may be a separate processor running in parallel with OS
execution, and may generate error records for consumption by
the OS. If the RAS controller produces multiple error records,
then they may be overwritten before the OS has consumed them.

The Generic Hardware Error Source (GHES) v2 structure
introduces the capability for the OS to acknowledge the
consumption of the error record generated by the RAS
controller. A RAS controller supporting GHESv2 shall wait for
the acknowledgment before writing a new error record, thus
eliminating the race condition.

Add support for parsing of GHESv2 sub-tables as well.

Signed-off-by: Tyler Baicar 
Signed-off-by: Jonathan (Zhixiong) Zhang 
Signed-off-by: Richard Ruigrok 
Signed-off-by: Naveen Kaje 
Reviewed-by: James Morse 
---
 drivers/acpi/apei/ghes.c | 49 +---
 drivers/acpi/apei/hest.c |  7 +--
 include/acpi/ghes.h  |  5 -
 3 files changed, 55 insertions(+), 6 deletions(-)

diff --git a/drivers/acpi/apei/ghes.c b/drivers/acpi/apei/ghes.c
index 60746ef..b23160d 100644
--- a/drivers/acpi/apei/ghes.c
+++ b/drivers/acpi/apei/ghes.c
@@ -45,6 +45,7 @@
 #include 
 #include 
 
+#include 
 #include 
 #include 
 #include 
@@ -79,6 +80,10 @@
((struct acpi_hest_generic_status *)\
 ((struct ghes_estatus_node *)(estatus_node) + 1))
 
+#define IS_HEST_TYPE_GENERIC_V2(ghes)  \
+   ((struct acpi_hest_header *)ghes->generic)->type == \
+ACPI_HEST_TYPE_GENERIC_ERROR_V2
+
 /*
  * This driver isn't really modular, however for the time being,
  * continuing to use module_param is the easiest way to remain
@@ -248,10 +253,18 @@ static struct ghes *ghes_new(struct acpi_hest_generic 
*generic)
ghes = kzalloc(sizeof(*ghes), GFP_KERNEL);
if (!ghes)
return ERR_PTR(-ENOMEM);
+
ghes->generic = generic;
+   if (IS_HEST_TYPE_GENERIC_V2(ghes)) {
+   rc = apei_map_generic_address(
+   >generic_v2->read_ack_register);
+   if (rc)
+   goto err_free;
+   }
+
rc = apei_map_generic_address(>error_status_address);
if (rc)
-   goto err_free;
+   goto err_unmap_read_ack_addr;
error_block_length = generic->error_block_length;
if (error_block_length > GHES_ESTATUS_MAX_SIZE) {
pr_warning(FW_WARN GHES_PFX
@@ -263,13 +276,17 @@ static struct ghes *ghes_new(struct acpi_hest_generic 
*generic)
ghes->estatus = kmalloc(error_block_length, GFP_KERNEL);
if (!ghes->estatus) {
rc = -ENOMEM;
-   goto err_unmap;
+   goto err_unmap_status_addr;
}
 
return ghes;
 
-err_unmap:
+err_unmap_status_addr:
apei_unmap_generic_address(>error_status_address);
+err_unmap_read_ack_addr:
+   if (IS_HEST_TYPE_GENERIC_V2(ghes))
+   apei_unmap_generic_address(
+   >generic_v2->read_ack_register);
 err_free:
kfree(ghes);
return ERR_PTR(rc);
@@ -279,6 +296,9 @@ static void ghes_fini(struct ghes *ghes)
 {
kfree(ghes->estatus);
apei_unmap_generic_address(>generic->error_status_address);
+   if (IS_HEST_TYPE_GENERIC_V2(ghes))
+   apei_unmap_generic_address(
+   >generic_v2->read_ack_register);
 }
 
 static inline int ghes_severity(int severity)
@@ -648,6 +668,23 @@ static void ghes_estatus_cache_add(
rcu_read_unlock();
 }
 
+static int ghes_ack_error(struct acpi_hest_generic_v2 *generic_v2)
+{
+   int rc;
+   u64 val = 0;
+
+   rc = apei_read(, _v2->read_ack_register);
+   if (rc)
+   return rc;
+   val &= generic_v2->read_ack_preserve <<
+   generic_v2->read_ack_register.bit_offset;
+   val |= generic_v2->read_ack_write <<
+   generic_v2->read_ack_register.bit_offset;
+   rc = apei_write(val, _v2->read_ack_register);
+
+   return rc;
+}
+
 static int ghes_proc(struct ghes *ghes)
 {
int rc;
@@ -660,6 +697,12 @@ static int ghes_proc(struct ghes *ghes)
ghes_estatus_cache_add(ghes->generic, ghes->estatus);
}
ghes_do_proc(ghes, ghes->estatus);
+
+   if (IS_HEST_TYPE_GENERIC_V2(ghes)) {
+   rc = ghes_ack_error(ghes->generic_v2);
+   if (rc)
+   return rc;
+   }
 out:
ghes_clear_estatus(ghes);
return 0;
diff --git a/drivers/acpi/apei/hest.c b/drivers/acpi/apei/hest.c
index 792a0d9..ef725a9 100644
--- a/drivers/acpi/apei/hest.c
+++ b/drivers/acpi/apei/hest.c
@@ -52,6 +52,7 @@ static const int hest_esrc_len_tab[ACPI_HEST_TYPE_RESERVED] = 
{
[ACPI_HEST_TYPE_AER_ENDPOINT] = 

[PATCH V7 00/10] Add UEFI 2.6 and ACPI 6.1 updates for RAS on ARM64

2017-01-12 Thread Tyler Baicar
When a memory error, CPU error, PCIe error, or other type of hardware error
that's covered by RAS occurs, firmware should populate the shared GHES memory
location with the proper GHES structures to notify the OS of the error.
For example, platforms that implement firmware first handling may implement
separate GHES sources for corrected errors and uncorrected errors. If the
error is an uncorrectable error, then the firmware will notify the OS
immediately since the error needs to be handled ASAP. The OS will then be able
to take the appropriate action needed such as offlining a page. If the error
is a corrected error, then the firmware will not interrupt the OS immediately.
Instead, the OS will see and report the error the next time it's GHES timer
expires. The kernel will first parse the GHES structures and report the errors
through the kernel logs and then notify the user space through RAS trace
events. This allows user space applications such as RAS Daemon to see the
errors and report them however the user desires. This patchset extends the
kernel functionality for RAS errors based on updates in the UEFI 2.6 and
ACPI 6.1 specifications.

An example flow from firmware to user space could be:

 +---+
   +>|   |
   | |  GHES polling |--+
+-+  |source |  |   +---+   ++
| |  +---+  |   |  Kernel GHES  |   ||
|  Firmware   | +-->|  CPER AER and |-->|  RAS trace |
| |  +---+  |   |  EDAC drivers |   |   event|
+-+  |   |  |   +---+   ++
   | |  GHES sci |--+
   +>|   source  |
 +---+

Add support for Generic Hardware Error Source (GHES) v2, which introduces the
capability for the OS to acknowledge the consumption of the error record
generated by the Reliability, Availability and Serviceability (RAS) controller.
This eliminates potential race conditions between the OS and the RAS controller.

Add support for the timestamp field added to the Generic Error Data Entry v3,
allowing the OS to log the time that the error is generated by the firmware,
rather than the time the error is consumed. This improves the correctness of
event sequences when analyzing error logs. The timestamp is added in
ACPI 6.1, reference Table 18-343 Generic Error Data Entry.

Add support for ARMv8 Common Platform Error Record (CPER) per UEFI 2.6
specification. ARMv8 specific processor error information is reported as part of
the CPER records.  This provides more detail on for processor error logs. This
can help describe ARMv8 cache, tlb, and bus errors.

Synchronous External Abort (SEA) represents a specific processor error condition
in ARM systems. A handler is added to recognize SEA errors, and a notifier is
added to parse and report the errors before the process is killed. Refer to
section N.2.1.1 in the Common Platform Error Record appendix of the UEFI 2.6
specification.

Currently the kernel ignores CPER records that are unrecognized.
On the other hand, UEFI spec allows for non-standard (eg. vendor
proprietary) error section type in CPER (Common Platform Error Record),
as defined in section N2.3 of UEFI version 2.5. Therefore, user
is not able to see hardware error data of non-standard section.

If section Type field of Generic Error Data Entry is unrecognized,
prints out the raw data in dmesg buffer, and also adds a tracepoint
for reporting such hardware errors.

Currently even if an error status block's severity is fatal, the kernel
does not honor the severity level and panic. With the firmware first
model, the platform could inform the OS about a fatal hardware error
through the non-NMI GHES notification type. The OS should panic when a
hardware error record is received with this severity.

Add support to handle SEAs that occur while a KVM guest kernel is
running. Currently these are unsupported by the guest abort handling.

V7: Update a couple prints for ARM processor errors
Add Print notifying if overflow occurred for ARM processor errors
Check for ARM configuration to allow the compiler to ignore ARM code
 on non-ARM systems
Use SEA acronym instead of spelling it out
Update fault_info prints to be more clear
Add NMI locking to SEA notification
Remove error info structure from ARM trace event since there can be
 a variable amount of these structures

V6: Change HEST_TYPE_GENERIC_V2 to IS_HEST_TYPE_GENERIC_V2 for readability
Move APEI helper defines from cper.h to ghes.h
Add data_len decrement back into print loop
Change references to ARMv8 to just ARM
Rewrite ARM processor context info parsing
Check valid bit of ARM error info field before printing it
Add include of linux/uuid.h in ghes.c

V5: Fix GHES goto logic for error conditions
Change ghes_do_read_ack to ghes_ack_error

Re: [kvm-unit-tests PATCH 1/6] libcflat: add PRI(dux)32 format types

2017-01-12 Thread Alex Bennée

Paolo Bonzini  writes:

> On 12/01/2017 18:18, Alex Bennée wrote:
>>> Although I feel there should be a compiler macro way to do this without
>>> a need for configure/makefile trickery at all...
>>
>> I did ask our toolchain bods. They started going on about potential
>> solutions using _Generic but I fear that might be worse in this case!
>
> I don't think _Generic can do string concatenation, can it?
>
> inttypes.h is not part of the set of freestanding headers, only stdint.h
> is.  Who is providing stdint.h in your case?

/usr/lib/gcc/arm-none-eabi/5.4.1/include/stdint.h

is part of the compiler package although it can just include the lib
stdint.h if it is there:

  #ifndef _GCC_WRAP_STDINT_H
  #if __STDC_HOSTED__
  # if defined __cplusplus && __cplusplus >= 201103L
  #  undef __STDC_LIMIT_MACROS
  #  define __STDC_LIMIT_MACROS
  #  undef __STDC_CONSTANT_MACROS
  #  define __STDC_CONSTANT_MACROS
  # endif
  # include_next 
  #else
  # include "stdint-gcc.h"
  #endif
  #define _GCC_WRAP_STDINT_H
  #endif

So using inttypes we would get:

>arm-none-eabi-gcc ./test.c -E | grep typedef | grep uint32
typedef long unsigned int __uint32_t;
typedef __uint32_t uint32_t ;


--
Alex Bennée
___
kvmarm mailing list
kvmarm@lists.cs.columbia.edu
https://lists.cs.columbia.edu/mailman/listinfo/kvmarm


Re: [kvm-unit-tests PATCH 6/6] run_tests: allow passing of options to QEMU

2017-01-12 Thread Paolo Bonzini


On 12/01/2017 18:32, Andrew Jones wrote:
>>  
>> +# Any options left for QEMU?
>> +shift $((OPTIND-1))
>> +if [ "$#" -gt  0 ]; then
>> +extra_opts="$@"
>> +fi
> We can unconditionally do the extra_opts="$@", extra_opts will just
> be null in the case there aren't more args, like it was before.

extra_opts is not an array, so this would mess up options that contain
spaces.  (Alex's patch in general, not your tweak).

Paolo

>> +
>>  RUNTIME_log_stderr () { cat >> test.log; }
>>  RUNTIME_log_stdout () {
>>  if [ "$PRETTY_PRINT_STACKS" = "yes" ]; then
>> @@ -59,4 +68,4 @@ RUNTIME_log_stdout () {
>>  config=$TEST_DIR/unittests.cfg
>>  rm -f test.log
>>  printf "BUILD_HEAD=$(cat build-head)\n\n" > test.log
>> -for_each_unittest $config run
>> +for_each_unittest $config run "$extra_opts"
___
kvmarm mailing list
kvmarm@lists.cs.columbia.edu
https://lists.cs.columbia.edu/mailman/listinfo/kvmarm


Re: [kvm-unit-tests PATCH 6/6] run_tests: allow passing of options to QEMU

2017-01-12 Thread Andrew Jones
On Wed, Jan 11, 2017 at 04:28:41PM +, Alex Bennée wrote:
> This allows additional options to be passed to QEMU. It follows the
> convention of passing parameters after a -- to the child process. In
> my case I'm using it to toggle MTTCG on an off:
> 
>   ./run_tests.sh -- --accel tcg,thread=multi
> 
> Signed-off-by: Alex Bennée 
> 
> ---
> v1
>   - changes from -o to --
>   - fixed whitespace damage
> ---
>  README.md  |  6 ++
>  run_tests.sh   | 13 +++--
>  scripts/functions.bash |  7 ---
>  3 files changed, 21 insertions(+), 5 deletions(-)
> 
> diff --git a/README.md b/README.md
> index fa3a445..1bd6dcb 100644
> --- a/README.md
> +++ b/README.md
> @@ -55,6 +55,12 @@ To extend or disable the timeouts:
>  
>  TIMEOUT=0 ./run_tests.sh
>  
> +Any arguments past the end-of-arguments marker (--) is passed on down
> +to the QEMU invocation. This can of course be combined with the other
> +modifiers:
> +
> +ACCEL=tcg ./run_tests.sh -v -- --accel tcg,thread=multi
> +
>  # Contributing
>  
>  ## Directory structure
> diff --git a/run_tests.sh b/run_tests.sh
> index 254129d..3270fba 100755
> --- a/run_tests.sh
> +++ b/run_tests.sh
> @@ -13,7 +13,7 @@ function usage()
>  {
>  cat <  
> -Usage: $0 [-g group] [-h] [-v]
> +Usage: $0 [-g group] [-h] [-v] [-- QEMU options]
>  
>  -g: Only execute tests in the given group
>  -h: Output this help text
> @@ -22,6 +22,8 @@ Usage: $0 [-g group] [-h] [-v]
>  Set the environment variable QEMU=/path/to/qemu-system-ARCH to
>  specify the appropriate qemu binary for ARCH-run.
>  
> +All options specified after -- are passed on to QEMU.
> +
>  EOF
>  }
>  
> @@ -29,6 +31,7 @@ RUNTIME_arch_run="./$TEST_DIR/run"
>  source scripts/runtime.bash
>  
>  while getopts "g:hv" opt; do
> +

stray blank line added?

>  case $opt in
>  g)
>  only_group=$OPTARG
> @@ -46,6 +49,12 @@ while getopts "g:hv" opt; do
>  esac
>  done
>  
> +# Any options left for QEMU?
> +shift $((OPTIND-1))
> +if [ "$#" -gt  0 ]; then
> +extra_opts="$@"
> +fi

We can unconditionally do the extra_opts="$@", extra_opts will just
be null in the case there aren't more args, like it was before.

> +
>  RUNTIME_log_stderr () { cat >> test.log; }
>  RUNTIME_log_stdout () {
>  if [ "$PRETTY_PRINT_STACKS" = "yes" ]; then
> @@ -59,4 +68,4 @@ RUNTIME_log_stdout () {
>  config=$TEST_DIR/unittests.cfg
>  rm -f test.log
>  printf "BUILD_HEAD=$(cat build-head)\n\n" > test.log
> -for_each_unittest $config run
> +for_each_unittest $config run "$extra_opts"
> diff --git a/scripts/functions.bash b/scripts/functions.bash
> index ee9143c..60fbc6a 100644
> --- a/scripts/functions.bash
> +++ b/scripts/functions.bash
> @@ -3,10 +3,11 @@ function for_each_unittest()
>  {
>   local unittests="$1"
>   local cmd="$2"
> + local extra_opts=$3
>   local testname
>   local smp
>   local kernel
> - local opts
> + local opts=$extra_opts
>   local groups
>   local arch
>   local check
> @@ -21,7 +22,7 @@ function for_each_unittest()
>   testname=${BASH_REMATCH[1]}
>   smp=1
>   kernel=""
> - opts=""
> + opts=$extra_opts
>   groups=""
>   arch=""
>   check=""
> @@ -32,7 +33,7 @@ function for_each_unittest()
>   elif [[ $line =~ ^smp\ *=\ *(.*)$ ]]; then
>   smp=${BASH_REMATCH[1]}
>   elif [[ $line =~ ^extra_params\ *=\ *(.*)$ ]]; then
> - opts=${BASH_REMATCH[1]}
> + opts="$opts ${BASH_REMATCH[1]}"

How do QEMU opts work with respect to precedence? If the later
opts override the earlier (I think they do), then we should put
opts after rematch here, because the user explicitly added those
options to the command line, and therefore probably prefers them.

>   elif [[ $line =~ ^groups\ *=\ *(.*)$ ]]; then
>   groups=${BASH_REMATCH[1]}
>   elif [[ $line =~ ^arch\ *=\ *(.*)$ ]]; then
> -- 
> 2.11.0
>

Thanks for this patch! I'm looking forward to making use of it for
testing Peter's EL2 series with/without "-machine virtualization=on"

drew
___
kvmarm mailing list
kvmarm@lists.cs.columbia.edu
https://lists.cs.columbia.edu/mailman/listinfo/kvmarm


Re: [kvm-unit-tests PATCH 1/6] libcflat: add PRI(dux)32 format types

2017-01-12 Thread Alex Bennée

Andrew Jones  writes:

> On Thu, Jan 12, 2017 at 01:29:24PM +0100, Paolo Bonzini wrote:
>>
>>
>> On 11/01/2017 17:28, Alex Bennée wrote:
>> > So we can have portable formatting of uint32_t types. However there is
>> > a catch. Different compilers can use legally subtly different types
>> > though so we need to probe the compiler defined intdef.h first.
>>
>> Interesting, what platform has long uint32_t?  I thought the issue was
>> whether 64-bit is long or "long long".
>>
>> Paolo
>>
>> > Signed-off-by: Alex Bennée 
>> > ---
>> >  Makefile   |  1 +
>> >  configure  | 13 +
>> >  lib/libcflat.h |  9 +
>> >  3 files changed, 23 insertions(+)
>> >
>> > diff --git a/Makefile b/Makefile
>> > index a32333b..9822d9a 100644
>> > --- a/Makefile
>> > +++ b/Makefile
>> > @@ -55,6 +55,7 @@ CFLAGS += $(fomit_frame_pointer)
>> >  CFLAGS += $(fno_stack_protector)
>> >  CFLAGS += $(fno_stack_protector_all)
>> >  CFLAGS += $(wno_frame_address)
>> > +CFLAGS += $(if $(U32_LONG_FMT),-D__U32_LONG_FMT__,)
>> >
>> >  CXXFLAGS += $(CFLAGS)
>> >
>> > diff --git a/configure b/configure
>> > index 995c8fa..127868c 100755
>> > --- a/configure
>> > +++ b/configure
>> > @@ -109,6 +109,18 @@ if [ -f $testdir/run ]; then
>> >  ln -fs $testdir/run $testdir-run
>> >  fi
>> >
>> > +# check if uint32_t needs a long format modifier
>> > +cat << EOF > lib_test.c
>> > +#include 
>> > +EOF
>> > +
>> > +$cross_prefix$cc lib_test.c -E | grep "typedef" | grep "long" | grep 
>> > "uint32_t" &> /dev/null
>
> This won't work with cross compilers that don't have inttypes.h in their
> path (like mine). How about something like

Hmm good point, in my case inttypes.h came from the libnewlib-dev package.

>
>  cat << EOF > lib_test.c
>  __UINT32_TYPE__
>  EOF
>  u32_long="`aarch64-linux-gnu-gcc lib_test.c -E | awk '/unsigned/ && $2 == 
> "long"'`"

Hmm it is not clear from the docs if this is a GCCism. If it is do we
care? Also the docs do say:

"Some of these macros may not be defined on particular systems if GCC does not 
provide a stdint.h header on those systems. "

> Although I feel there should be a compiler macro way to do this without
> a need for configure/makefile trickery at all...

I did ask our toolchain bods. They started going on about potential
solutions using _Generic but I fear that might be worse in this case!

>
> Thanks,
> drew
>
>
>> > +exit=$?
>> > +if [ $exit -eq 0 ]; then
>> > +u32_long=true
>> > +fi
>> > +rm -f lib_test.c
>> > +
>> >  # check for dependent 32 bit libraries
>> >  if [ "$arch" != "arm" ]; then
>> >  cat << EOF > lib_test.c
>> > @@ -155,4 +167,5 @@ TEST_DIR=$testdir
>> >  FIRMWARE=$firmware
>> >  ENDIAN=$endian
>> >  PRETTY_PRINT_STACKS=$pretty_print_stacks
>> > +U32_LONG_FMT=$u32_long
>> >  EOF
>> > diff --git a/lib/libcflat.h b/lib/libcflat.h
>> > index 380395f..e80fc50 100644
>> > --- a/lib/libcflat.h
>> > +++ b/lib/libcflat.h
>> > @@ -58,12 +58,21 @@ typedef _Bool  bool;
>> >  #define true  1
>> >
>> >  #if __SIZEOF_LONG__ == 8
>> > +#  define __PRI32_PREFIX
>> >  #  define __PRI64_PREFIX  "l"
>> >  #  define __PRIPTR_PREFIX "l"
>> >  #else
>> > +#if defined(__U32_LONG_FMT__)
>> > +#  define __PRI32_PREFIX"l"
>> > +#else
>> > +#  define __PRI32_PREFIX
>> > +#endif
>> >  #  define __PRI64_PREFIX  "ll"
>> >  #  define __PRIPTR_PREFIX
>> >  #endif
>> > +#define PRId32  __PRI32_PREFIX"d"
>> > +#define PRIu32  __PRI32_PREFIX"u"
>> > +#define PRIx32  __PRI32_PREFIX"x"
>> >  #define PRId64  __PRI64_PREFIX"d"
>> >  #define PRIu64  __PRI64_PREFIX"u"
>> >  #define PRIx64  __PRI64_PREFIX"x"
>> >
>> --
>> To unsubscribe from this list: send the line "unsubscribe kvm" in
>> the body of a message to majord...@vger.kernel.org
>> More majordomo info at  http://vger.kernel.org/majordomo-info.html


--
Alex Bennée
___
kvmarm mailing list
kvmarm@lists.cs.columbia.edu
https://lists.cs.columbia.edu/mailman/listinfo/kvmarm


Re: [kvm-unit-tests PATCH 5/6] docs: mention modifying env vars in README

2017-01-12 Thread Andrew Jones
On Wed, Jan 11, 2017 at 04:28:40PM +, Alex Bennée wrote:
> I had started adding a series of flags to control the run-time
> behaviour of the tests but it was pointed out env vars can already do
> that. Mention them in the README so others can find out to.
> 
> Signed-off-by: Alex Bennée 
> ---
>  README.md | 8 
>  1 file changed, 8 insertions(+)
> 
> diff --git a/README.md b/README.md
> index 9462824..fa3a445 100644
> --- a/README.md
> +++ b/README.md
> @@ -47,6 +47,14 @@ environment variable:
>  
>  QEMU=/tmp/qemu/x86_64-softmmu/qemu-system-x86_64 ./x86-run ./x86/msr.flat
>  
> +To force the acceleration mode:
> +
> +ACCEL=tcg ./run_tests.sh
> +
> +To extend or disable the timeouts:
> +
> +TIMEOUT=0 ./run_tests.sh
> +

This is a nice addition to the README, but please add more detail.

To force the use of TCG:
 ACCEL=tcg ./run_tests.sh

To force failure when KVM is not present:
 ACCEL=kvm ./run_tests.sh

To modify the timeout:
 TIMEOUT=$DURATION ./run_tests.sh # man timeout(1) for duration format
 TIMEOUT=0 ./run_tests.sh # disable the timeout

or something like that...

thanks,
drew


>  # Contributing
>  
>  ## Directory structure
> -- 
> 2.11.0
> 
> --
> To unsubscribe from this list: send the line "unsubscribe kvm" in
> the body of a message to majord...@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html
___
kvmarm mailing list
kvmarm@lists.cs.columbia.edu
https://lists.cs.columbia.edu/mailman/listinfo/kvmarm


Re: [kvm-unit-tests PATCH 4/6] docs: mention checkpatch in the README

2017-01-12 Thread Andrew Jones
On Thu, Jan 12, 2017 at 12:35:03PM +, Alex Bennée wrote:
> 
> Paolo Bonzini  writes:
> 
> > On 11/01/2017 17:28, Alex Bennée wrote:
> >> Signed-off-by: Alex Bennée 
> >> ---
> >>  README.md | 1 +
> >>  1 file changed, 1 insertion(+)
> >>
> >> diff --git a/README.md b/README.md
> >> index 5027b62..9462824 100644
> >> --- a/README.md
> >> +++ b/README.md
> >> @@ -79,3 +79,4 @@ You can add the following to .git/config to do this 
> >> automatically for you:
> >>  [format]
> >>  subjectprefix = kvm-unit-tests PATCH
> >>
> >> +Please run the kernel's ./scripts/checkpatch.pl on new patches

s/patches/files

All patches should be new :-)

drew

> >
> > Does it really work well on kvm-unit-tests?
> 
> Well I keep getting pulled up on kernel coding style on my reviews so if
> we mean it we should enforce it.
> 
> --
> Alex Bennée
> --
> To unsubscribe from this list: send the line "unsubscribe kvm" in
> the body of a message to majord...@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html
___
kvmarm mailing list
kvmarm@lists.cs.columbia.edu
https://lists.cs.columbia.edu/mailman/listinfo/kvmarm


Re: [kvm-unit-tests PATCH 4/6] docs: mention checkpatch in the README

2017-01-12 Thread Andrew Jones
On Thu, Jan 12, 2017 at 01:29:52PM +0100, Paolo Bonzini wrote:
> 
> 
> On 11/01/2017 17:28, Alex Bennée wrote:
> > Signed-off-by: Alex Bennée 
> > ---
> >  README.md | 1 +
> >  1 file changed, 1 insertion(+)
> > 
> > diff --git a/README.md b/README.md
> > index 5027b62..9462824 100644
> > --- a/README.md
> > +++ b/README.md
> > @@ -79,3 +79,4 @@ You can add the following to .git/config to do this 
> > automatically for you:
> >  [format]
> >  subjectprefix = kvm-unit-tests PATCH
> >  
> > +Please run the kernel's ./scripts/checkpatch.pl on new patches
> 
> Does it really work well on kvm-unit-tests?

I use it on new code (liberally ignoring stuff I don't think
is worth conforming to), but there's no chance it'll work on
old files (many lib files and pretty much all x86 files). So
we can't really enforce for x86 developers, as we prefer
consistency over kernel style. That said, I think mentioning
in the README that new files could use it (at least for guidance)
is a good idea.

Thanks,
drew
___
kvmarm mailing list
kvmarm@lists.cs.columbia.edu
https://lists.cs.columbia.edu/mailman/listinfo/kvmarm


Re: [RFC 07/13] KVM: arm64: ITS: Change entry_size and indirect bit in BASER

2017-01-12 Thread Marc Zyngier
On 12/01/17 15:56, Eric Auger wrote:
> Change the device table entry_size to 16 bytes instead of 8.
> We also Store the device and collection device in the its
> struct.
> 
> The patch also clears the indirect bit for the device BASER.
> The indirect bit is set as read-only.

Err... Why? We *really* want to continue supporting indirect tables, as
this is a massive memory saver for the guest.

> 
> Signed-off-by: Eric Auger 
> 
> ---
> 
> TODO: investigate support of 2 level tables, ie. enabling
> Indirect = 1. Support of 2 level tables is implementation
> defined.

Clearly, that's a regression. What exactly is the issue that decided you
to disable it?

Thanks,

M.
-- 
Jazz is not dead. It just smells funny...
___
kvmarm mailing list
kvmarm@lists.cs.columbia.edu
https://lists.cs.columbia.edu/mailman/listinfo/kvmarm


Re: [RFC 06/13] KVM: arm64: ITS: Expose ITT_Entry_Size in GITS_TYPER

2017-01-12 Thread Andre Przywara
Hi Eric,

On 12/01/17 15:56, Eric Auger wrote:
> An ITT_Entry_Size of 2x8Bytes is reported to the guest
> to provision for ITTE state storage.
> 
> Signed-off-by: Eric Auger 
> ---
>  include/linux/irqchip/arm-gic-v3.h | 1 +
>  virt/kvm/arm/vgic/vgic-its.c   | 3 +++
>  2 files changed, 4 insertions(+)
> 
> diff --git a/include/linux/irqchip/arm-gic-v3.h 
> b/include/linux/irqchip/arm-gic-v3.h
> index 170e00a..8cfd81bc 100644
> --- a/include/linux/irqchip/arm-gic-v3.h
> +++ b/include/linux/irqchip/arm-gic-v3.h
> @@ -233,6 +233,7 @@
>  #define GITS_CTLR_QUIESCENT  (1U << 31)
>  
>  #define GITS_TYPER_PLPIS (1UL << 0)
> +#define GITS_TYPER_ITT_ENTRY_SIZE_SHIFT  4
>  #define GITS_TYPER_IDBITS_SHIFT  8
>  #define GITS_TYPER_DEVBITS_SHIFT 13
>  #define GITS_TYPER_DEVBITS(r)r) >> 
> GITS_TYPER_DEVBITS_SHIFT) & 0x1f) + 1)
> diff --git a/virt/kvm/arm/vgic/vgic-its.c b/virt/kvm/arm/vgic/vgic-its.c
> index e174220..96378b8 100644
> --- a/virt/kvm/arm/vgic/vgic-its.c
> +++ b/virt/kvm/arm/vgic/vgic-its.c
> @@ -33,6 +33,8 @@
>  #include "vgic.h"
>  #include "vgic-mmio.h"
>  
> +#define ITTE_SIZE 16
> +
>  /*
>   * Creates a new (reference to a) struct vgic_irq for a given LPI.
>   * If this LPI is already mapped on another ITS, we increase its refcount
> @@ -399,6 +401,7 @@ static unsigned long vgic_mmio_read_its_typer(struct kvm 
> *kvm,
>*/
>   reg |= 0x0f << GITS_TYPER_DEVBITS_SHIFT;
>   reg |= 0x0f << GITS_TYPER_IDBITS_SHIFT;
> + reg |= ITTE_SIZE << GITS_TYPER_ITT_ENTRY_SIZE_SHIFT;

The field is defined as the "... number of bytes per translation table
entry, minus one.". So it should be: (ITTE_SIZE - 1) << ...

Cheers,
Andre.

>  
>   return extract_bytes(reg, addr & 7, len);
>  }
> 
___
kvmarm mailing list
kvmarm@lists.cs.columbia.edu
https://lists.cs.columbia.edu/mailman/listinfo/kvmarm


Re: [kvm-unit-tests PATCH 3/6] docs: move README to README.md and symlink

2017-01-12 Thread Andrew Jones
On Wed, Jan 11, 2017 at 04:28:38PM +, Alex Bennée wrote:
> This allows a slightly nicer formatting of the text when displayed on
> some repository hosts. We keep a symlink from README for the
> old-school purists.
> 
> Signed-off-by: Alex Bennée 
> ---
>  README| 69 +
>  README.md | 81 
> +++
>  2 files changed, 82 insertions(+), 68 deletions(-)
>  mode change 100644 => 12 README
>  create mode 100644 README.md

Please use 'format-patch -M' to create patches when renaming files.
I'm not sure I care about having a .md version or not, but only
one README* file in the root dir would be my preference, i.e. if we
want a .md version, then let's just have that, i.e no symlink.

Thanks,
drew
___
kvmarm mailing list
kvmarm@lists.cs.columbia.edu
https://lists.cs.columbia.edu/mailman/listinfo/kvmarm


Re: [PATCH v3 3/5] arm64: Create and use __tlbi_dsb() macros

2017-01-12 Thread Will Deacon
Hi Christopher,

On Wed, Jan 11, 2017 at 09:41:16AM -0500, Christopher Covington wrote:
> This refactoring will allow an errata workaround that repeats tlbi dsb
> sequences to only change one location. This is not intended to change the
> generated assembly and comparison of before and after preprocessor output
> of arch/arm64/mm/mmu.c and vmlinux objdump shows no functional changes.
> 
> Signed-off-by: Christopher Covington 
> ---
>  arch/arm64/include/asm/tlbflush.h | 104 
> +-
>  1 file changed, 69 insertions(+), 35 deletions(-)
> 
> diff --git a/arch/arm64/include/asm/tlbflush.h 
> b/arch/arm64/include/asm/tlbflush.h
> index deab523..f28813c 100644
> --- a/arch/arm64/include/asm/tlbflush.h
> +++ b/arch/arm64/include/asm/tlbflush.h
> @@ -25,22 +25,69 @@
>  #include 
>  
>  /*
> - * Raw TLBI operations.
> + * Raw TLBI, DSB operations
>   *
> - * Where necessary, use the __tlbi() macro to avoid asm()
> - * boilerplate. Drivers and most kernel code should use the TLB
> - * management routines in preference to the macro below.
> + * Where necessary, use __tlbi_*dsb() macros to avoid asm() boilerplate.
> + * Drivers and most kernel code should use the TLB management routines in
> + * preference to the macros below.
>   *
> - * The macro can be used as __tlbi(op) or __tlbi(op, arg), depending
> - * on whether a particular TLBI operation takes an argument or
> - * not. The macros handles invoking the asm with or without the
> - * register argument as appropriate.
> + * The __tlbi_dsb() macro handles invoking the asm without any register
> + * argument, with a single register argument, and with start (included)
> + * and end (excluded) range of register arguments. For example:
> + *
> + * __tlbi_dsb(op, attr)
> + *
> + *   tlbi op
> + *   dsb attr
> + *
> + * __tlbi_dsb(op, attr, addr)
> + *
> + *   mov %[addr], =addr
> + *   tlbi op, %[addr]
> + *   dsb attr
> + *
> + * __tlbi_range_dsb(op, attr, start, end)
> + *
> + *   mov %[arg], =start
> + *   mov %[end], =end
> + * for:
> + *   tlbi op, %[addr]
> + *   add %[addr], %[addr], #(1 << (PAGE_SHIFT - 12))
> + *   cmp %[addr], %[end]
> + *   b.ne for
> + *   dsb attr
>   */
> -#define __TLBI_0(op, arg)asm ("tlbi " #op)
> -#define __TLBI_1(op, arg)asm ("tlbi " #op ", %0" : : "r" (arg))
> -#define __TLBI_N(op, arg, n, ...)__TLBI_##n(op, arg)
>  
> -#define __tlbi(op, ...)  __TLBI_N(op, ##__VA_ARGS__, 1, 0)
> +#define __TLBI_FOR_0(ig0, ig1, ig2)
> +#define __TLBI_INSTR_0(op, ig1, ig2) "tlbi " #op
> +#define __TLBI_IO_0(ig0, ig1, ig2)   : :
> +
> +#define __TLBI_FOR_1(ig0, ig1, ig2)
> +#define __TLBI_INSTR_1(op, ig0, ig1) "tlbi " #op ", %0"
> +#define __TLBI_IO_1(ig0, arg, ig1)   : : "r" (arg)
> +
> +#define __TLBI_FOR_2(ig0, start, ig1)unsigned long addr; 
>\
> + for (addr = start; addr < end; \
> + addr += 1 << (PAGE_SHIFT - 12))
> +#define __TLBI_INSTR_2(op, ig0, ig1) "tlbi " #op ", %0"
> +#define __TLBI_IO_2(ig0, ig1, ig2)   : : "r" (addr)
> +
> +#define __TLBI_FOR_N(op, a1, a2, n, ...) __TLBI_FOR_##n(op, a1, a2)
> +#define __TLBI_INSTR_N(op, a1, a2, n, ...)   __TLBI_INSTR_##n(op, a1, a2)
> +#define __TLBI_IO_N(op, a1, a2, n, ...)  __TLBI_IO_##n(op, a1, a2)
> +
> +#define __TLBI_FOR(op, ...)  __TLBI_FOR_N(op, ##__VA_ARGS__, 2, 1, 0)
> +#define __TLBI_INSTR(op, ...)__TLBI_INSTR_N(op, 
> ##__VA_ARGS__, 2, 1, 0)
> +#define __TLBI_IO(op, ...)   __TLBI_IO_N(op, ##__VA_ARGS__, 2, 1, 0)
> +
> +#define __tlbi_asm_dsb(as, op, attr, ...) do {   
>\
> + __TLBI_FOR(op, ##__VA_ARGS__)  \
> + asm (__TLBI_INSTR(op, ##__VA_ARGS__)   \
> + __TLBI_IO(op, ##__VA_ARGS__)); \
> + asm volatile (   as "\ndsb " #attr "\n"\
> + : : : "memory"); } while (0)
> +
> +#define __tlbi_dsb(...)  __tlbi_asm_dsb("", ##__VA_ARGS__)

I can't deny that this is cool, but ultimately it's completely unreadable.
What I was thinking you'd do would be make __tlbi expand to:

  tlbi
  dsb
  tlbi
  dsb

for Falkor, and:

  tlbi
  nop
  nop
  nop

for everybody else.

Wouldn't that localise this change sufficiently that you wouldn't need
to change all the callers and encode the looping in your cpp macros?

I realise you get an extra dsb in some places with that change, but I'd
like to see numbers for the impact of that on top of the workaround. If
it's an issue, then an alternative sequence would be:

  tlbi
  dsb
  tlbi

and you'd rely on the existing dsb to complete that.

Having said that, I don't understand how your current loop code works
when the workaround is applied. AFAICT, you end up emitting something
like:

dsb ishst
for i in 0 to n
  

Re: [kvm-unit-tests PATCH 1/6] libcflat: add PRI(dux)32 format types

2017-01-12 Thread Andrew Jones
On Thu, Jan 12, 2017 at 01:29:24PM +0100, Paolo Bonzini wrote:
> 
> 
> On 11/01/2017 17:28, Alex Bennée wrote:
> > So we can have portable formatting of uint32_t types. However there is
> > a catch. Different compilers can use legally subtly different types
> > though so we need to probe the compiler defined intdef.h first.
> 
> Interesting, what platform has long uint32_t?  I thought the issue was
> whether 64-bit is long or "long long".
> 
> Paolo
> 
> > Signed-off-by: Alex Bennée 
> > ---
> >  Makefile   |  1 +
> >  configure  | 13 +
> >  lib/libcflat.h |  9 +
> >  3 files changed, 23 insertions(+)
> > 
> > diff --git a/Makefile b/Makefile
> > index a32333b..9822d9a 100644
> > --- a/Makefile
> > +++ b/Makefile
> > @@ -55,6 +55,7 @@ CFLAGS += $(fomit_frame_pointer)
> >  CFLAGS += $(fno_stack_protector)
> >  CFLAGS += $(fno_stack_protector_all)
> >  CFLAGS += $(wno_frame_address)
> > +CFLAGS += $(if $(U32_LONG_FMT),-D__U32_LONG_FMT__,)
> >  
> >  CXXFLAGS += $(CFLAGS)
> >  
> > diff --git a/configure b/configure
> > index 995c8fa..127868c 100755
> > --- a/configure
> > +++ b/configure
> > @@ -109,6 +109,18 @@ if [ -f $testdir/run ]; then
> >  ln -fs $testdir/run $testdir-run
> >  fi
> >  
> > +# check if uint32_t needs a long format modifier
> > +cat << EOF > lib_test.c
> > +#include 
> > +EOF
> > +
> > +$cross_prefix$cc lib_test.c -E | grep "typedef" | grep "long" | grep 
> > "uint32_t" &> /dev/null

This won't work with cross compilers that don't have inttypes.h in their
path (like mine). How about something like

 cat << EOF > lib_test.c
 __UINT32_TYPE__
 EOF
 u32_long="`aarch64-linux-gnu-gcc lib_test.c -E | awk '/unsigned/ && $2 == 
"long"'`"

Although I feel there should be a compiler macro way to do this without
a need for configure/makefile trickery at all...

Thanks,
drew


> > +exit=$?
> > +if [ $exit -eq 0 ]; then
> > +u32_long=true
> > +fi
> > +rm -f lib_test.c
> > +
> >  # check for dependent 32 bit libraries
> >  if [ "$arch" != "arm" ]; then
> >  cat << EOF > lib_test.c
> > @@ -155,4 +167,5 @@ TEST_DIR=$testdir
> >  FIRMWARE=$firmware
> >  ENDIAN=$endian
> >  PRETTY_PRINT_STACKS=$pretty_print_stacks
> > +U32_LONG_FMT=$u32_long
> >  EOF
> > diff --git a/lib/libcflat.h b/lib/libcflat.h
> > index 380395f..e80fc50 100644
> > --- a/lib/libcflat.h
> > +++ b/lib/libcflat.h
> > @@ -58,12 +58,21 @@ typedef _Bool   bool;
> >  #define true  1
> >  
> >  #if __SIZEOF_LONG__ == 8
> > +#  define __PRI32_PREFIX
> >  #  define __PRI64_PREFIX   "l"
> >  #  define __PRIPTR_PREFIX  "l"
> >  #else
> > +#if defined(__U32_LONG_FMT__)
> > +#  define __PRI32_PREFIX"l"
> > +#else
> > +#  define __PRI32_PREFIX
> > +#endif
> >  #  define __PRI64_PREFIX   "ll"
> >  #  define __PRIPTR_PREFIX
> >  #endif
> > +#define PRId32  __PRI32_PREFIX "d"
> > +#define PRIu32  __PRI32_PREFIX "u"
> > +#define PRIx32  __PRI32_PREFIX "x"
> >  #define PRId64  __PRI64_PREFIX "d"
> >  #define PRIu64  __PRI64_PREFIX "u"
> >  #define PRIx64  __PRI64_PREFIX "x"
> > 
> --
> To unsubscribe from this list: send the line "unsubscribe kvm" in
> the body of a message to majord...@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html
___
kvmarm mailing list
kvmarm@lists.cs.columbia.edu
https://lists.cs.columbia.edu/mailman/listinfo/kvmarm


Re: [RFC 01/13] KVM: arm/arm64: Add vITS save/restore API documentation

2017-01-12 Thread Marc Zyngier
Hi Eric,

On 12/01/17 15:56, Eric Auger wrote:
> Add description for how to access vITS registers and how to flush/restore
> vITS tables into/from memory
> 
> Signed-off-by: Eric Auger 
> ---
>  Documentation/virtual/kvm/devices/arm-vgic-its.txt | 70 
> ++
>  1 file changed, 70 insertions(+)
> 
> diff --git a/Documentation/virtual/kvm/devices/arm-vgic-its.txt 
> b/Documentation/virtual/kvm/devices/arm-vgic-its.txt
> index 6081a5b..bd74613 100644
> --- a/Documentation/virtual/kvm/devices/arm-vgic-its.txt
> +++ b/Documentation/virtual/kvm/devices/arm-vgic-its.txt
> @@ -36,3 +36,73 @@ Groups:
>  -ENXIO:  ITS not properly configured as required prior to setting
>   this attribute
>  -ENOMEM: Memory shortage when allocating ITS internal data
> +
> +  KVM_DEV_ARM_VGIC_GRP_ITS_REGS
> +  Attributes:
> +  The attr field of kvm_device_attr encodes the offset of the
> +  ITS register, relative to the ITS control frame base address
> +  (ITS_base).
> +
> +  kvm_device_attr.addr points to a __u64 value whatever the width
> +  of the addressed register (32/64 bits).
> +
> +  Writes to read-only registers are ignored by the kernel except
> +  for a single register, GITS_READR. Normally this register is RO
> +  but it needs to be restored otherwise commands in the queue will
> +  be re-executed after CWRITER setting.
> +
> +  For other registers, Getting or setting a register has the same
> +  effect as reading/writing the register on real hardware.
> +  Errors:
> +-ENXIO: Offset does not correspond to any supported register
> +-EFAULT: Invalid user pointer for attr->addr
> +-EINVAL: Offset is not 64-bit aligned
> +
> +  KVM_DEV_ARM_VGIC_GRP_ITS_TABLES
> +  Attributes
> +   The attr field of kvm_device_attr is not used.
> +
> +   request the flush-save/restore of the ITS tables, namely
> +   the device table, the collection table, all the ITT tables,
> +   the LPI pending tables. On save, the tables are flushed
> +   into guest memory at the location provisionned by the guest

provisioned

> +   in GITS_BASER (device and collection tables), on MAPD command
> +   (ITT_addr), GICR_PENDBASERs (pending tables).
> +
> +   This means the GIC should be restored before the ITS and all
> +   ITS registers but the GITS_CTRL must be restored before
> +   restoring the ITS tables.
> +
> +   Note the LPI configuration table is read-only for the
> +   in-kernel ITS and its save/restore goes through the standard
> +   RAM save/restore.
> +
> +   The layout of the tables in guest memory defines an ABI.
> +   The entries are laid out in memory as follows;
> +
> +Device Table Entry (DTE) layout: entry size = 16 bytes
> +
> +bits: | 63   ...  32  | 31 ... 6 | 5 | 4   ...   0 |
> +values:   |   DeviceID|   Resv   | V |Size |
> +
> +bits: | 63 ... 44 | 43  ...  0  |
> +values:   |Resv   |  ITT_addr   |

While I appreciate this layout represents the absolute maximum an ITS
could implement, I'm a bit concerned about the amount of memory we may
end-up requiring here. All the ITSs implementations I know of seem to
get away with 8 bytes per entry. Maybe I'm just too worried.

Also, please mention that ITT_addr is actually ITT_addr[51:8], as we're
guaranteed to have an ITT that is 256 byte aligned.

> +
> +Collection Table Entry (CTE) layout: entry size = 8 bytes
> +
> +bits: | 63| 62 ..  52  | 51 ... 16 | 15  ...   0 |
> +values:   | V |RES0|  RDBase   |ICID |
> +
> +Interrupt Translation Table Entry (ITTE) layout: entry size = 16 bytes

The actual name is Interrupt Translation Entry (ITE). I have a patch
renaming this all over the vgic-its.c file...

> +
> +bits: | 63   ...  32  | 31 ... 17 | 16 | 15  ...  0 |
> +values:   |   DeviceID|RES0   | V  |ICID|
> +
> +bits: | 63 ...  32| 31  ...0 |
> +values:   |   pINTID  |EventID   |

Same concern here. 32bit DevID, EventID and INTID seem a bit over the
top. But maybe we shouldn't be concerned about memory... ;-)

> +
> +LPI Pending Table layout:
> +
> +As specified in the ARM Generic Interrupt Controller Architecture
> +Specification GIC Architecture version 3.0 and version 4. The first
> +1kB contains only zeros.
> 

You definitely want to relax this. An ITS implementation is allowed (and
actually encouraged) to maintain a coarse map in the first kB, and use
this to quickly scan the table, which would be very useful on restore.

Thanks,

M.
-- 
Jazz is not dead. It just smells funny...
___
kvmarm mailing list
kvmarm@lists.cs.columbia.edu
https://lists.cs.columbia.edu/mailman/listinfo/kvmarm


[PATCH] KVM: arm/arm64: vgic: Fix deadlock on error handling

2017-01-12 Thread Marc Zyngier
Dmitry Vyukov reported that the syzkaller fuzzer triggered a
deadlock in the vgic setup code when an error was detected, as
the cleanup code tries to take a lock that is already held by
the setup code.

The fix is to avoid retaking the lock when cleaning up, by
telling the cleanup function that we already hold it.

Cc: sta...@vger.kernel.org
Signed-off-by: Marc Zyngier 
---
 virt/kvm/arm/vgic/vgic-init.c | 18 +-
 virt/kvm/arm/vgic/vgic-v2.c   |  2 --
 virt/kvm/arm/vgic/vgic-v3.c   |  2 --
 3 files changed, 13 insertions(+), 9 deletions(-)

diff --git a/virt/kvm/arm/vgic/vgic-init.c b/virt/kvm/arm/vgic/vgic-init.c
index 5114391..c737ea0 100644
--- a/virt/kvm/arm/vgic/vgic-init.c
+++ b/virt/kvm/arm/vgic/vgic-init.c
@@ -268,15 +268,11 @@ static void kvm_vgic_dist_destroy(struct kvm *kvm)
 {
struct vgic_dist *dist = >arch.vgic;
 
-   mutex_lock(>lock);
-
dist->ready = false;
dist->initialized = false;
 
kfree(dist->spis);
dist->nr_spis = 0;
-
-   mutex_unlock(>lock);
 }
 
 void kvm_vgic_vcpu_destroy(struct kvm_vcpu *vcpu)
@@ -286,7 +282,8 @@ void kvm_vgic_vcpu_destroy(struct kvm_vcpu *vcpu)
INIT_LIST_HEAD(_cpu->ap_list_head);
 }
 
-void kvm_vgic_destroy(struct kvm *kvm)
+/* To be called with kvm->lock held */
+static void __kvm_vgic_destroy(struct kvm *kvm)
 {
struct kvm_vcpu *vcpu;
int i;
@@ -297,6 +294,13 @@ void kvm_vgic_destroy(struct kvm *kvm)
kvm_vgic_vcpu_destroy(vcpu);
 }
 
+void kvm_vgic_destroy(struct kvm *kvm)
+{
+   mutex_lock(>lock);
+   __kvm_vgic_destroy(kvm);
+   mutex_unlock(>lock);
+}
+
 /**
  * vgic_lazy_init: Lazy init is only allowed if the GIC exposed to the guest
  * is a GICv2. A GICv3 must be explicitly initialized by the guest using the
@@ -348,6 +352,10 @@ int kvm_vgic_map_resources(struct kvm *kvm)
ret = vgic_v2_map_resources(kvm);
else
ret = vgic_v3_map_resources(kvm);
+
+   if (ret)
+   __kvm_vgic_destroy(kvm);
+
 out:
mutex_unlock(>lock);
return ret;
diff --git a/virt/kvm/arm/vgic/vgic-v2.c b/virt/kvm/arm/vgic/vgic-v2.c
index 9bab867..834137e 100644
--- a/virt/kvm/arm/vgic/vgic-v2.c
+++ b/virt/kvm/arm/vgic/vgic-v2.c
@@ -293,8 +293,6 @@ int vgic_v2_map_resources(struct kvm *kvm)
dist->ready = true;
 
 out:
-   if (ret)
-   kvm_vgic_destroy(kvm);
return ret;
 }
 
diff --git a/virt/kvm/arm/vgic/vgic-v3.c b/virt/kvm/arm/vgic/vgic-v3.c
index 7df1b90..a4c7fff 100644
--- a/virt/kvm/arm/vgic/vgic-v3.c
+++ b/virt/kvm/arm/vgic/vgic-v3.c
@@ -308,8 +308,6 @@ int vgic_v3_map_resources(struct kvm *kvm)
dist->ready = true;
 
 out:
-   if (ret)
-   kvm_vgic_destroy(kvm);
return ret;
 }
 
-- 
2.1.4

___
kvmarm mailing list
kvmarm@lists.cs.columbia.edu
https://lists.cs.columbia.edu/mailman/listinfo/kvmarm


Re: [PATCH v3 2/5] arm64: Work around Falkor erratum 1003

2017-01-12 Thread Mark Rutland
On Thu, Jan 12, 2017 at 03:45:48PM +, Catalin Marinas wrote:
> On Wed, Jan 11, 2017 at 06:40:52PM +, Mark Rutland wrote:

> > Likewise, I beleive we may need to modify cpu_set_reserved_ttbr0().
> 
> This may be fine if my assumptions about this erratum are correct. In
> the cpu_set_reserved_ttbr0() case we set TTBR0_EL1 to a table without
> any entries, so no new entries could be tagged with the old ASID.

For some reason, I was under the impression that the issue was old table
entries being allocated to the new ASID. Looking over the series again,
it's not clear to me precisely which cases can occur.

It would be good to see that clarified.

Thanks,
Mark.
___
kvmarm mailing list
kvmarm@lists.cs.columbia.edu
https://lists.cs.columbia.edu/mailman/listinfo/kvmarm


Re: [PATCH v3 2/5] arm64: Work around Falkor erratum 1003

2017-01-12 Thread Will Deacon
On Thu, Jan 12, 2017 at 03:55:58PM +, Catalin Marinas wrote:
> On Wed, Jan 11, 2017 at 06:22:08PM +, Marc Zyngier wrote:
> > On 11/01/17 18:06, Catalin Marinas wrote:
> > > On Wed, Jan 11, 2017 at 09:41:15AM -0500, Christopher Covington wrote:
> > >> diff --git a/arch/arm64/mm/proc.S b/arch/arm64/mm/proc.S
> > >> index 32682be..9ee46df 100644
> > >> --- a/arch/arm64/mm/proc.S
> > >> +++ b/arch/arm64/mm/proc.S
> > >> @@ -23,6 +23,7 @@
> > >>  #include 
> > >>  #include 
> > >>  #include 
> > >> +#include 
> > >>  #include 
> > >>  #include 
> > >>  #include 
> > >> @@ -140,6 +141,18 @@ ENDPROC(cpu_do_resume)
> > >>  ENTRY(cpu_do_switch_mm)
> > >>  mmidx1, x1  // get mm->context.id
> > >>  bfi x0, x1, #48, #16// set the ASID
> > >> +#ifdef CONFIG_QCOM_FALKOR_ERRATUM_1003
> > >> +alternative_if ARM64_WORKAROUND_QCOM_FALKOR_E1003
> > >> +mrs x2, ttbr0_el1
> > >> +mov x3, #FALKOR_RESERVED_ASID
> > >> +bfi x2, x3, #48, #16// reserved ASID + old 
> > >> BADDR
> > >> +msr ttbr0_el1, x2
> > >> +isb
> > >> +bfi x2, x0, #0, #48 // reserved ASID + new 
> > >> BADDR
> > >> +msr ttbr0_el1, x2
> > >> +isb
> > >> +alternative_else_nop_endif
> > >> +#endif
> > >>  msr ttbr0_el1, x0   // set TTBR0
> > >>  isb
> > >>  post_ttbr0_update_workaround
> > > 
> > > Please move the above hunk to a pre_ttbr0_update_workaround macro for
> > > consistency with post_ttbr0_update_workaround.
> > 
> > In which case (and also for consistency), should we add that pre_ttbr0
> > macro to entry.S, just before __uaccess_ttbr0_enable? It may not be
> > needed in the SW pan case, but it is probably worth entertaining the
> > idea that there may be something to do there...
> 
> It may actually be needed in entry.S as well. With SW PAN, we move the
> context switching from cpu_do_switch_mm to the kernel_exit macro when
> returning to user. In this case we are switching from the reserved ASID
> 0 and reserved TTBR0_EL1 (pointing to a zeroed page) to the user's
> TTBR0_EL1 and ASID. If the ASID switch isn't taken into account, we may
> end up with new TLB entries being tagged with the reserved ASID. Apart
> from a potential loss of protection with TTBR0 PAN, is there anything
> else that could go wrong? Maybe a TLB conflict if we mix TLBs from
> multiple address spaces tagged with the same reserved ASID.
> 
> If the above is an issue, we would need to patch
> __uaccess_ttbr0_enable() as well, though I'm more inclined to make this
> erratum not selectable when TTBR0 PAN is enabled.

I don't think that's a reasonable approach. By all means change the
default, but we need to support kernel images with both of these kconfig
options enabled.

Will
___
kvmarm mailing list
kvmarm@lists.cs.columbia.edu
https://lists.cs.columbia.edu/mailman/listinfo/kvmarm


[RFC 10/13] KVM: arm64: ITS: vgic_its_alloc_itte/device

2017-01-12 Thread Eric Auger
Add two new helpers to allocate an its itte and an its device.
This will avoid duplication on restore path.

Signed-off-by: Eric Auger 
---
 virt/kvm/arm/vgic/vgic-its.c | 71 ++--
 1 file changed, 48 insertions(+), 23 deletions(-)

diff --git a/virt/kvm/arm/vgic/vgic-its.c b/virt/kvm/arm/vgic/vgic-its.c
index aeb9d08..07bf180 100644
--- a/virt/kvm/arm/vgic/vgic-its.c
+++ b/virt/kvm/arm/vgic/vgic-its.c
@@ -732,6 +732,26 @@ static void vgic_its_free_collection(struct vgic_its *its, 
u32 coll_id)
kfree(collection);
 }
 
+static int vgic_its_alloc_itte(struct its_device *device,
+  struct its_itte **ittep,
+  struct its_collection *collection,
+  u32 lpi_id, u32 event_id)
+{
+   struct its_itte *itte;
+
+   itte = kzalloc(sizeof(itte), GFP_KERNEL);
+   if (!itte)
+   return -ENOMEM;
+
+   itte->event_id  = event_id;
+   itte->collection = collection;
+   itte->lpi = lpi_id;
+
+   list_add_tail(>itte_list, >itt_head);
+   *ittep = itte;
+   return 0;
+}
+
 /*
  * The MAPTI and MAPI commands map LPIs to ITTEs.
  * Must be called with its_lock mutex held.
@@ -745,7 +765,7 @@ static int vgic_its_cmd_handle_mapi(struct kvm *kvm, struct 
vgic_its *its,
struct its_itte *itte;
struct its_device *device;
struct its_collection *collection, *new_coll = NULL;
-   int lpi_nr;
+   int lpi_nr, ret;
struct vgic_irq *irq;
 
device = find_its_device(its, device_id);
@@ -772,19 +792,13 @@ static int vgic_its_cmd_handle_mapi(struct kvm *kvm, 
struct vgic_its *its,
new_coll = collection;
}
 
-   itte = kzalloc(sizeof(struct its_itte), GFP_KERNEL);
-   if (!itte) {
+   ret = vgic_its_alloc_itte(device, , collection, lpi_nr, event_id);
+   if (ret) {
if (new_coll)
vgic_its_free_collection(its, coll_id);
-   return -ENOMEM;
+   return ret;
}
 
-   itte->event_id  = event_id;
-   list_add_tail(>itte_list, >itt_head);
-
-   itte->collection = collection;
-   itte->lpi = lpi_nr;
-
irq = vgic_add_lpi(kvm, lpi_nr);
if (IS_ERR(irq)) {
if (new_coll)
@@ -823,6 +837,29 @@ static void vgic_its_unmap_device(struct kvm *kvm, struct 
its_device *device)
kfree(device);
 }
 
+static int vgic_its_alloc_device(struct vgic_its *its,
+struct its_device **devp,
+u32 device_id, gpa_t itt_addr_field,
+size_t size_field)
+{
+   struct its_device *device;
+
+   device = kzalloc(sizeof(*device), GFP_KERNEL);
+   if (!device)
+   return -ENOMEM;
+
+   device->device_id = device_id;
+   device->itt_addr = itt_addr_field << 8;
+   device->nb_eventid_bits = size_field + 1;
+   device->itt_size = ((2 << (size_field + 1)) - 1) * 16;
+   INIT_LIST_HEAD(>itt_head);
+
+   list_add_tail(>dev_list, >device_list);
+   *devp = device;
+
+   return 0;
+}
+
 /*
  * MAPD maps or unmaps a device ID to Interrupt Translation Tables (ITTs).
  * Must be called with the its_lock mutex held.
@@ -856,19 +893,7 @@ static int vgic_its_cmd_handle_mapd(struct kvm *kvm, 
struct vgic_its *its,
if (!valid)
return 0;
 
-   device = kzalloc(sizeof(struct its_device), GFP_KERNEL);
-   if (!device)
-   return -ENOMEM;
-
-   device->device_id = device_id;
-   device->itt_addr = itt_addr << 8;
-   device->nb_eventid_bits = size + 1;
-   device->itt_size = ((2 << (size + 1)) - 1) * 16;
-   INIT_LIST_HEAD(>itt_head);
-
-   list_add_tail(>dev_list, >device_list);
-
-   return 0;
+   return vgic_its_alloc_device(its, , device_id, itt_addr, size);
 }
 
 /*
-- 
2.5.5

___
kvmarm mailing list
kvmarm@lists.cs.columbia.edu
https://lists.cs.columbia.edu/mailman/listinfo/kvmarm


[RFC 11/13] KVM: arm64: ITS: Collection table save/restore

2017-01-12 Thread Eric Auger
The flush path copies the collection entries into guest RAM
at the GPA specified in the BASER register. This obviously
requires the BASER to be set. The last written element
is a dummy collection table entry with valid bit set to 0.

On restore path we re-allocate the collection objects.

Signed-off-by: Eric Auger 

---
---
 virt/kvm/arm/vgic/vgic-its.c | 66 ++--
 1 file changed, 64 insertions(+), 2 deletions(-)

diff --git a/virt/kvm/arm/vgic/vgic-its.c b/virt/kvm/arm/vgic/vgic-its.c
index 07bf180..a1861b8 100644
--- a/virt/kvm/arm/vgic/vgic-its.c
+++ b/virt/kvm/arm/vgic/vgic-its.c
@@ -1618,7 +1618,39 @@ static int vgic_its_restore_device_tables(struct kvm 
*kvm,
 static int vgic_its_flush_collection_table(struct kvm *kvm,
   struct vgic_its *its)
 {
-   return -ENXIO;
+   struct its_collection *collection;
+   u64 val, *ptr;
+   size_t max_size, filled = 0;
+   int ret;
+
+   ptr = (u64 *)(its->baser_coll_table & 0xF000);
+   if (!ptr)
+   return 0;
+
+   max_size = its->collection_table_size;
+
+   list_for_each_entry(collection, >collection_list, coll_list) {
+   if (filled == max_size)
+   return -ENOSPC;
+   val = ((u64)1 << 63 |
+  ((u64)collection->target_addr << 16) |
+  collection->collection_id);
+   ret = kvm_write_guest(kvm, (gpa_t)ptr++, , 8);
+   if (ret)
+   return ret;
+   filled += COLL_ENTRY_SIZE;
+   }
+
+   if (filled == max_size)
+   return 0;
+
+   /*
+* table is not fully filled, add a last dummy element
+* with valid bit unset
+*/
+   val = 0;
+   ret = kvm_write_guest(kvm, (gpa_t)ptr, , 8);
+   return ret;
 }
 
 /**
@@ -1629,7 +1661,37 @@ static int vgic_its_flush_collection_table(struct kvm 
*kvm,
 static int vgic_its_restore_collection_table(struct kvm *kvm,
 struct vgic_its *its)
 {
-   return -ENXIO;
+   struct its_collection *collection;
+   size_t max_size, read = 0;
+   u64 val, *ptr;
+   bool valid = true;
+   int ret;
+
+   ptr = (u64 *)(its->baser_coll_table & 0xF000);
+   if (!ptr)
+   return 0;
+
+   max_size = its->collection_table_size;
+
+   while (read < max_size) {
+   u32 target_addr;
+   u32 coll_id;
+
+   ret = kvm_read_guest(kvm, (gpa_t)ptr++, , 8);
+   if (ret)
+   return ret;
+   valid = val >> 63;
+   if (!valid)
+   break;
+   target_addr = (u32)(val >> 16);
+   coll_id = val & 0x;
+   ret = vgic_its_alloc_collection(its, , coll_id);
+   if (ret)
+   return ret;
+   collection->target_addr = target_addr;
+   read += COLL_ENTRY_SIZE;
+   }
+   return 0;
 }
 
 /**
-- 
2.5.5

___
kvmarm mailing list
kvmarm@lists.cs.columbia.edu
https://lists.cs.columbia.edu/mailman/listinfo/kvmarm


[RFC 13/13] KVM: arm64: ITS: Pending table save/restore

2017-01-12 Thread Eric Auger
Save and restore the pending tables.

Pending table restore obviously requires the pendbaser to be
already set.

We also make sure the first kB of the pending table is zeroed.

Signed-off-by: Eric Auger 

---

Not sure what to do with this 1st kB. Should we really care?

Maybe we should read it instead from guest RAM and return
an error if it is non zero, as the spec says on init it should be
zero. Then we do not touch it.
---
 virt/kvm/arm/vgic/vgic-its.c | 94 +++-
 1 file changed, 92 insertions(+), 2 deletions(-)

diff --git a/virt/kvm/arm/vgic/vgic-its.c b/virt/kvm/arm/vgic/vgic-its.c
index 16a9aac..4a2b4de 100644
--- a/virt/kvm/arm/vgic/vgic-its.c
+++ b/virt/kvm/arm/vgic/vgic-its.c
@@ -316,6 +316,29 @@ static u32 max_lpis_propbaser(u64 propbaser)
return 1U << min(nr_idbits, INTERRUPT_ID_BITS_ITS);
 }
 
+/**
+ * its_zero_pending_table_first1kB - Sets the first kB of the
+ * pending table to 0
+ * @vcpu: vcpu handle the pending table is attached to
+ *
+ * The content of the 1st kB is implementation defined. We force
+ * it to 0. Note the spec says it should already contain zeros on
+ * initial allocation and it must be visible to redist, else the
+ * behavior is unpredicatable
+ */
+static int its_zero_pending_table_first1kB(struct kvm_vcpu *vcpu)
+{
+   gpa_t pendbase = PENDBASER_ADDRESS(vcpu->arch.vgic_cpu.pendbaser);
+   u8 *tmp;
+   int ret;
+
+   tmp = kzalloc(1024, GFP_KERNEL);
+   ret = kvm_write_guest(vcpu->kvm, (gpa_t)pendbase, tmp, 1024);
+   kfree(tmp);
+
+   return ret;
+}
+
 /*
  * Scan the whole LPI pending table and sync the pending bit in there
  * with our own data structures. This relies on the LPI being
@@ -1399,6 +1422,8 @@ void vgic_enable_lpis(struct kvm_vcpu *vcpu)
 {
if (!(vcpu->arch.vgic_cpu.pendbaser & GICR_PENDBASER_PTZ))
its_sync_lpi_pending_table(vcpu);
+
+   its_zero_pending_table_first1kB(vcpu);
 }
 
 static int vgic_register_its_iodev(struct kvm *kvm, struct vgic_its *its)
@@ -1579,7 +1604,47 @@ int vgic_its_attr_regs_access(struct kvm_device *dev,
 static int vgic_its_flush_pending_tables(struct kvm *kvm,
 struct vgic_its *its)
 {
-   return -ENXIO;
+   struct vgic_dist *dist = >arch.vgic;
+   struct vgic_irq *irq;
+   int ret;
+
+   /**
+* we do not take the dist->lpi_list_lock this we have a garantee
+* the LPI list is not touched while the cmd and its lock are held
+*/
+   list_for_each_entry(irq, >lpi_list_head, lpi_list) {
+   struct kvm_vcpu *vcpu;
+   gpa_t pendbase, ptr;
+   bool stored;
+   u8 val;
+
+   vcpu = irq->target_vcpu;
+   if (!vcpu)
+   return -EINVAL;
+
+   pendbase = PENDBASER_ADDRESS(vcpu->arch.vgic_cpu.pendbaser);
+
+   ptr = pendbase + (irq->intid / BITS_PER_BYTE);
+
+   ret = kvm_read_guest(kvm, (gpa_t)ptr, , 1);
+   if (ret)
+   return ret;
+
+   stored = val & (irq->intid % BITS_PER_BYTE);
+   if (stored == irq->pending)
+   continue;
+
+   if (irq->pending)
+   val |= 1 << (irq->intid % BITS_PER_BYTE);
+   else
+   val &= ~(1 << (irq->intid % BITS_PER_BYTE));
+
+   ret = kvm_write_guest(kvm, (gpa_t)ptr, , 1);
+   if (ret)
+   return ret;
+   }
+
+   return 0;
 }
 
 /**
@@ -1589,7 +1654,32 @@ static int vgic_its_flush_pending_tables(struct kvm *kvm,
 static int vgic_its_restore_pending_tables(struct kvm *kvm,
   struct vgic_its *its)
 {
-   return -ENXIO;
+   struct vgic_irq *irq;
+   struct vgic_dist *dist = >arch.vgic;
+   int ret;
+
+   list_for_each_entry(irq, >lpi_list_head, lpi_list) {
+   struct kvm_vcpu *vcpu;
+   gpa_t pendbase, ptr;
+   u8 val;
+
+   vcpu = irq->target_vcpu;
+   if (!vcpu)
+   return -EINVAL;
+
+   if (!(vcpu->arch.vgic_cpu.pendbaser & GICR_PENDBASER_PTZ))
+   return 0;
+
+   pendbase = PENDBASER_ADDRESS(vcpu->arch.vgic_cpu.pendbaser);
+
+   ptr = pendbase + (irq->intid / BITS_PER_BYTE);
+
+   ret = kvm_read_guest(kvm, (gpa_t)ptr, , 1);
+   if (ret)
+   return ret;
+   irq->pending = val & (1 << (irq->intid % BITS_PER_BYTE));
+   }
+   return 0;
 }
 
 static int vgic_its_flush_itt(struct kvm *kvm, struct its_device *device)
-- 
2.5.5

___
kvmarm mailing list
kvmarm@lists.cs.columbia.edu
https://lists.cs.columbia.edu/mailman/listinfo/kvmarm


[RFC 12/13] KVM: arm64: ITS: Device and translation table flush

2017-01-12 Thread Eric Auger
This patch flushes the device table into guest RAM, at
the address specified by the BASER register. The last
written element is a dummy element with valid bit set to
zero.

For each device listed in the device table, we also flush
the translation table into the address specific in the
corresponding device table entry.

On restore, devices are re-allocated and their itte are
re-built. LPI is re-created and its config gets updated.

Signed-off-by: Eric Auger 

---
---
 virt/kvm/arm/vgic/vgic-its.c | 158 ++-
 1 file changed, 156 insertions(+), 2 deletions(-)

diff --git a/virt/kvm/arm/vgic/vgic-its.c b/virt/kvm/arm/vgic/vgic-its.c
index a1861b8..16a9aac 100644
--- a/virt/kvm/arm/vgic/vgic-its.c
+++ b/virt/kvm/arm/vgic/vgic-its.c
@@ -1592,13 +1592,129 @@ static int vgic_its_restore_pending_tables(struct kvm 
*kvm,
return -ENXIO;
 }
 
+static int vgic_its_flush_itt(struct kvm *kvm, struct its_device *device)
+{
+   struct its_itte *itte;
+   size_t max_size, filled = 0;
+   u64 val, *ptr;
+   int ret;
+
+   ptr = (u64 *)device->itt_addr;
+   max_size = device->itt_size;
+
+   list_for_each_entry(itte, >itt_head, itte_list) {
+   if (filled == max_size)
+   return -ENOSPC;
+   val = ((u64)device->device_id << 32) | (1ULL << 16) |
+   itte->collection->collection_id;
+   ret = kvm_write_guest(kvm, (gpa_t)ptr++, , 8);
+   if (ret)
+   return ret;
+   val = (u64)itte->lpi << 32 | itte->event_id;
+   ret = kvm_write_guest(kvm, (gpa_t)ptr++, , 8);
+   if (ret)
+   return ret;
+   filled += ITTE_SIZE;
+   }
+   return 0;
+}
+
+static int vgic_its_restore_itt(struct kvm *kvm, struct vgic_its *its,
+   struct its_device *dev)
+{
+   u64 val, *ptr = (u64 *)dev->itt_addr;
+   size_t max_size, read = 0;
+   bool valid;
+
+   max_size = dev->itt_size;
+
+   while (read < max_size) {
+   u32 coll_id, device_id, lpi_id, event_id;
+   struct its_collection *collection;
+   struct its_itte *itte;
+   struct vgic_irq *irq;
+   int ret;
+
+   ret = kvm_read_guest(kvm, (gpa_t)ptr++, , 8);
+   if (ret)
+   return ret;
+   valid = val & 0x1;
+   if (!valid)
+   break;
+   coll_id = val & 0x;
+   device_id = val >> 32;
+   ret = kvm_read_guest(kvm, (gpa_t)ptr++, , 8);
+   if (ret)
+   return ret;
+   lpi_id = val >> 32;
+   event_id = val & 0x;
+   collection = find_collection(its, coll_id);
+   if (!collection)
+   return -EINVAL;
+   ret = vgic_its_alloc_itte(dev, , collection,
+ lpi_id, event_id);
+   if (ret)
+   return ret;
+
+   irq = vgic_add_lpi(kvm, lpi_id);
+   if (IS_ERR(irq))
+   return PTR_ERR(irq);
+   itte->irq = irq;
+
+   /* restores the configuration of the LPI */
+   ret = update_lpi_config(kvm, irq, NULL);
+   if (ret)
+   return ret;
+
+   update_affinity_itte(kvm, itte);
+   read += ITTE_SIZE;
+   }
+   return 0;
+}
+
 /**
  * vgic_its_flush_device_tables - flush the device table and all ITT
  * into guest RAM
  */
 static int vgic_its_flush_device_tables(struct kvm *kvm, struct vgic_its *its)
 {
-   return -ENXIO;
+   struct its_device *dev;
+   size_t max_size, filled = 0;
+   u64 val, *ptr;
+   int ret;
+
+   ptr = (u64 *)(its->baser_device_table & 0xF000);
+
+   /* max size of the device table (64kB page size) */
+   max_size = its->device_table_size;
+
+   list_for_each_entry(dev, >device_list, dev_list) {
+   if (filled == max_size)
+   return -ENOSPC;
+   ret = vgic_its_flush_itt(kvm, dev);
+   if (ret)
+   return ret;
+   val = ((u64)dev->device_id << 32) | (1 << 5) |
+   (dev->nb_eventid_bits - 1);
+   ret = kvm_write_guest(kvm, (gpa_t)ptr++, , 8);
+   if (ret)
+   return ret;
+   val = dev->itt_addr >> 8;
+   ret = kvm_write_guest(kvm, (gpa_t)ptr++, , 8);
+   if (ret)
+   return ret;
+   filled += DEV_ENTRY_SIZE;
+   }
+   if (filled == max_size)
+   return 0;
+
+   /**
+* table is not fully filled, add a last dummy element
+* element with 

[RFC 09/13] KVM: arm64: ITS: KVM_DEV_ARM_VGIC_GRP_ITS_TABLES group

2017-01-12 Thread Eric Auger
Introduce a new group aiming at saving/restoring the ITS
tables to/from the guest memory.

We hold the its lock during the save and restore to prevent
any command from being executed. This also garantees the LPI
list is not going to change and no MSI injection can happen
during the operation.

At this stage the functionality is not yet implemented. Only
the skeleton is put in place.

Signed-off-by: Eric Auger 

---

At the end of the restore I trigger a map_resources. This aims
at accomodating the virtio-net-pci guest use case. On restore I
can see the virtio-net-pci device sends MSI before the first
VCPU run. The fact we are not able to handle MSIs at that stage
stalls the virtio-net-pci device. We may fix this issue at QEMU
level instead.
---
 arch/arm/include/uapi/asm/kvm.h   |   1 +
 arch/arm64/include/uapi/asm/kvm.h |   1 +
 virt/kvm/arm/vgic/vgic-its.c  | 120 +-
 3 files changed, 121 insertions(+), 1 deletion(-)

diff --git a/arch/arm/include/uapi/asm/kvm.h b/arch/arm/include/uapi/asm/kvm.h
index f6a167c..84fdfb0 100644
--- a/arch/arm/include/uapi/asm/kvm.h
+++ b/arch/arm/include/uapi/asm/kvm.h
@@ -193,6 +193,7 @@ struct kvm_arch_memory_slot {
 #define KVM_DEV_ARM_VGIC_CPU_SYSREGS6
 #define KVM_DEV_ARM_VGIC_GRP_LEVEL_INFO 7
 #define KVM_DEV_ARM_VGIC_GRP_ITS_REGS  8
+#define KVM_DEV_ARM_VGIC_GRP_ITS_TABLES9
 #define KVM_DEV_ARM_VGIC_LINE_LEVEL_INFO_SHIFT 10
 #define KVM_DEV_ARM_VGIC_LINE_LEVEL_INFO_MASK \
(0x3fULL << KVM_DEV_ARM_VGIC_LINE_LEVEL_INFO_SHIFT)
diff --git a/arch/arm64/include/uapi/asm/kvm.h 
b/arch/arm64/include/uapi/asm/kvm.h
index 8850e9b..31e1137 100644
--- a/arch/arm64/include/uapi/asm/kvm.h
+++ b/arch/arm64/include/uapi/asm/kvm.h
@@ -213,6 +213,7 @@ struct kvm_arch_memory_slot {
 #define KVM_DEV_ARM_VGIC_CPU_SYSREGS6
 #define KVM_DEV_ARM_VGIC_GRP_LEVEL_INFO 7
 #define KVM_DEV_ARM_VGIC_GRP_ITS_REGS 8
+#define KVM_DEV_ARM_VGIC_GRP_ITS_TABLES 9
 #define KVM_DEV_ARM_VGIC_LINE_LEVEL_INFO_SHIFT 10
 #define KVM_DEV_ARM_VGIC_LINE_LEVEL_INFO_MASK \
(0x3fULL << KVM_DEV_ARM_VGIC_LINE_LEVEL_INFO_SHIFT)
diff --git a/virt/kvm/arm/vgic/vgic-its.c b/virt/kvm/arm/vgic/vgic-its.c
index 32429fd..aeb9d08 100644
--- a/virt/kvm/arm/vgic/vgic-its.c
+++ b/virt/kvm/arm/vgic/vgic-its.c
@@ -1548,6 +1548,117 @@ int vgic_its_attr_regs_access(struct kvm_device *dev,
return 0;
 }
 
+/**
+ * vgic_its_flush_pending_tables - Flush the pending tables into guest RAM
+ */
+static int vgic_its_flush_pending_tables(struct kvm *kvm,
+struct vgic_its *its)
+{
+   return -ENXIO;
+}
+
+/**
+ * vgic_its_restore_pending_tables - Restore the pending tables from guest
+ * RAM to internal data structs
+ */
+static int vgic_its_restore_pending_tables(struct kvm *kvm,
+  struct vgic_its *its)
+{
+   return -ENXIO;
+}
+
+/**
+ * vgic_its_flush_device_tables - flush the device table and all ITT
+ * into guest RAM
+ */
+static int vgic_its_flush_device_tables(struct kvm *kvm, struct vgic_its *its)
+{
+   return -ENXIO;
+}
+
+/**
+ * vgic_its_restore_device_tables - restore the device table and all ITT
+ * from guest RAM to internal data structs
+ */
+static int vgic_its_restore_device_tables(struct kvm *kvm,
+ struct vgic_its *its)
+{
+   return -ENXIO;
+}
+
+/**
+ * vgic_its_flush_collection_table - flush the collection table into
+ * guest RAM
+ */
+static int vgic_its_flush_collection_table(struct kvm *kvm,
+  struct vgic_its *its)
+{
+   return -ENXIO;
+}
+
+/**
+ * vgic_its_restore_collection_table - reads the collection table
+ * in guest memory and restores the ITS internal state. Requires the
+ * BASER registers to be restored before.
+ */
+static int vgic_its_restore_collection_table(struct kvm *kvm,
+struct vgic_its *its)
+{
+   return -ENXIO;
+}
+
+/**
+ * vgic_its_table_flush - Flush all the tables into guest RAM
+ */
+static int vgic_its_table_flush(struct kvm *kvm,  struct vgic_its *its)
+{
+   int ret;
+
+   mutex_lock(>its_lock);
+
+   ret = vgic_its_flush_pending_tables(kvm, its);
+   if (ret)
+   goto out;
+   ret = vgic_its_flush_device_tables(kvm, its);
+   if (ret)
+   goto out;
+
+   ret = vgic_its_flush_collection_table(kvm, its);
+out:
+   mutex_unlock(>its_lock);
+   return ret;
+}
+
+/**
+ * vgic_its_table_restore - Restore all tables from guest RAM to internal
+ * data structs
+ */
+static int vgic_its_table_restore(struct kvm *kvm,  struct vgic_its *its)
+{
+   int ret;
+
+   mutex_lock(>its_lock);
+   ret = vgic_its_restore_collection_table(kvm, its);
+   if (ret)
+   goto out;
+
+   ret = vgic_its_restore_device_tables(kvm, its);
+   if (ret)
+ 

[RFC 02/13] arm/arm64: vgic: turn vgic_find_mmio_region into public

2017-01-12 Thread Eric Auger
We plan to use vgic_find_mmio_region in vgic-its.c so let's
turn it into a public function.

Signed-off-by: Eric Auger 
---
 virt/kvm/arm/vgic/vgic-mmio.c | 3 +--
 virt/kvm/arm/vgic/vgic-mmio.h | 5 +
 2 files changed, 6 insertions(+), 2 deletions(-)

diff --git a/virt/kvm/arm/vgic/vgic-mmio.c b/virt/kvm/arm/vgic/vgic-mmio.c
index 28fef92..568bffa 100644
--- a/virt/kvm/arm/vgic/vgic-mmio.c
+++ b/virt/kvm/arm/vgic/vgic-mmio.c
@@ -431,8 +431,7 @@ static int match_region(const void *key, const void *elt)
return 0;
 }
 
-/* Find the proper register handler entry given a certain address offset. */
-static const struct vgic_register_region *
+const struct vgic_register_region *
 vgic_find_mmio_region(const struct vgic_register_region *region, int 
nr_regions,
  unsigned int offset)
 {
diff --git a/virt/kvm/arm/vgic/vgic-mmio.h b/virt/kvm/arm/vgic/vgic-mmio.h
index b9423b4..ecf8bfb 100644
--- a/virt/kvm/arm/vgic/vgic-mmio.h
+++ b/virt/kvm/arm/vgic/vgic-mmio.h
@@ -196,4 +196,9 @@ u64 vgic_sanitise_shareability(u64 reg);
 u64 vgic_sanitise_field(u64 reg, u64 field_mask, int field_shift,
u64 (*sanitise_fn)(u64));
 
+/* Find the proper register handler entry given a certain address offset */
+const struct vgic_register_region *
+vgic_find_mmio_region(const struct vgic_register_region *region,
+ int nr_regions, unsigned int offset);
+
 #endif
-- 
2.5.5

___
kvmarm mailing list
kvmarm@lists.cs.columbia.edu
https://lists.cs.columbia.edu/mailman/listinfo/kvmarm


[RFC 06/13] KVM: arm64: ITS: Expose ITT_Entry_Size in GITS_TYPER

2017-01-12 Thread Eric Auger
An ITT_Entry_Size of 2x8Bytes is reported to the guest
to provision for ITTE state storage.

Signed-off-by: Eric Auger 
---
 include/linux/irqchip/arm-gic-v3.h | 1 +
 virt/kvm/arm/vgic/vgic-its.c   | 3 +++
 2 files changed, 4 insertions(+)

diff --git a/include/linux/irqchip/arm-gic-v3.h 
b/include/linux/irqchip/arm-gic-v3.h
index 170e00a..8cfd81bc 100644
--- a/include/linux/irqchip/arm-gic-v3.h
+++ b/include/linux/irqchip/arm-gic-v3.h
@@ -233,6 +233,7 @@
 #define GITS_CTLR_QUIESCENT(1U << 31)
 
 #define GITS_TYPER_PLPIS   (1UL << 0)
+#define GITS_TYPER_ITT_ENTRY_SIZE_SHIFT4
 #define GITS_TYPER_IDBITS_SHIFT8
 #define GITS_TYPER_DEVBITS_SHIFT   13
 #define GITS_TYPER_DEVBITS(r)  r) >> GITS_TYPER_DEVBITS_SHIFT) & 
0x1f) + 1)
diff --git a/virt/kvm/arm/vgic/vgic-its.c b/virt/kvm/arm/vgic/vgic-its.c
index e174220..96378b8 100644
--- a/virt/kvm/arm/vgic/vgic-its.c
+++ b/virt/kvm/arm/vgic/vgic-its.c
@@ -33,6 +33,8 @@
 #include "vgic.h"
 #include "vgic-mmio.h"
 
+#define ITTE_SIZE 16
+
 /*
  * Creates a new (reference to a) struct vgic_irq for a given LPI.
  * If this LPI is already mapped on another ITS, we increase its refcount
@@ -399,6 +401,7 @@ static unsigned long vgic_mmio_read_its_typer(struct kvm 
*kvm,
 */
reg |= 0x0f << GITS_TYPER_DEVBITS_SHIFT;
reg |= 0x0f << GITS_TYPER_IDBITS_SHIFT;
+   reg |= ITTE_SIZE << GITS_TYPER_ITT_ENTRY_SIZE_SHIFT;
 
return extract_bytes(reg, addr & 7, len);
 }
-- 
2.5.5

___
kvmarm mailing list
kvmarm@lists.cs.columbia.edu
https://lists.cs.columbia.edu/mailman/listinfo/kvmarm


[RFC 08/13] KVM: arm64: ITS: On MAPD interpret and store itt_addr and size

2017-01-12 Thread Eric Auger
Up to now the ITT_addr and size fields of the MAPD command were
ignored. We will need them to handle save/restore. Let's record
the reconstructed number of eventid bits, the ITT base gpa and
the size in bytes in the its device struct.

Signed-off-by: Eric Auger 
---
 virt/kvm/arm/vgic/vgic-its.c | 10 ++
 1 file changed, 10 insertions(+)

diff --git a/virt/kvm/arm/vgic/vgic-its.c b/virt/kvm/arm/vgic/vgic-its.c
index 358ae38..32429fd 100644
--- a/virt/kvm/arm/vgic/vgic-its.c
+++ b/virt/kvm/arm/vgic/vgic-its.c
@@ -103,6 +103,9 @@ struct its_device {
 
/* the head for the list of ITTEs */
struct list_head itt_head;
+   gpa_t itt_addr;
+   size_t itt_size;
+   u32 nb_eventid_bits;
u32 device_id;
 };
 
@@ -562,6 +565,8 @@ static u64 its_cmd_mask_field(u64 *its_cmd, int word, int 
shift, int size)
 #define its_cmd_get_collection(cmd)its_cmd_mask_field(cmd, 2,  0, 16)
 #define its_cmd_get_target_addr(cmd)   its_cmd_mask_field(cmd, 2, 16, 32)
 #define its_cmd_get_validbit(cmd)  its_cmd_mask_field(cmd, 2, 63,  1)
+#define its_cmd_get_ittaddr(cmd)   its_cmd_mask_field(cmd, 2, 8,  44)
+#define its_cmd_get_size(cmd)  its_cmd_mask_field(cmd, 1, 0,  5)
 
 /*
  * The DISCARD command frees an Interrupt Translation Table Entry (ITTE).
@@ -827,6 +832,8 @@ static int vgic_its_cmd_handle_mapd(struct kvm *kvm, struct 
vgic_its *its,
 {
u32 device_id = its_cmd_get_deviceid(its_cmd);
bool valid = its_cmd_get_validbit(its_cmd);
+   size_t size = its_cmd_get_size(its_cmd);
+   gpa_t itt_addr = its_cmd_get_ittaddr(its_cmd);
struct its_device *device;
 
if (!vgic_its_check_id(its, its->baser_device_table, device_id))
@@ -854,6 +861,9 @@ static int vgic_its_cmd_handle_mapd(struct kvm *kvm, struct 
vgic_its *its,
return -ENOMEM;
 
device->device_id = device_id;
+   device->itt_addr = itt_addr << 8;
+   device->nb_eventid_bits = size + 1;
+   device->itt_size = ((2 << (size + 1)) - 1) * 16;
INIT_LIST_HEAD(>itt_head);
 
list_add_tail(>dev_list, >device_list);
-- 
2.5.5

___
kvmarm mailing list
kvmarm@lists.cs.columbia.edu
https://lists.cs.columbia.edu/mailman/listinfo/kvmarm


[RFC 07/13] KVM: arm64: ITS: Change entry_size and indirect bit in BASER

2017-01-12 Thread Eric Auger
Change the device table entry_size to 16 bytes instead of 8.
We also Store the device and collection device in the its
struct.

The patch also clears the indirect bit for the device BASER.
The indirect bit is set as read-only.

Signed-off-by: Eric Auger 

---

TODO: investigate support of 2 level tables, ie. enabling
Indirect = 1. Support of 2 level tables is implementation
defined.
---
 include/kvm/arm_vgic.h   |  3 +++
 virt/kvm/arm/vgic/vgic-its.c | 25 -
 2 files changed, 19 insertions(+), 9 deletions(-)

diff --git a/include/kvm/arm_vgic.h b/include/kvm/arm_vgic.h
index 730a18a..e6fc727 100644
--- a/include/kvm/arm_vgic.h
+++ b/include/kvm/arm_vgic.h
@@ -156,6 +156,9 @@ struct vgic_its {
u64 baser_device_table;
u64 baser_coll_table;
 
+   size_t  device_table_size;
+   size_t  collection_table_size;
+
/* Protects the command queue */
struct mutexcmd_lock;
u64 cbaser;
diff --git a/virt/kvm/arm/vgic/vgic-its.c b/virt/kvm/arm/vgic/vgic-its.c
index 96378b8..358ae38 100644
--- a/virt/kvm/arm/vgic/vgic-its.c
+++ b/virt/kvm/arm/vgic/vgic-its.c
@@ -34,6 +34,8 @@
 #include "vgic-mmio.h"
 
 #define ITTE_SIZE 16
+#define DEV_ENTRY_SIZE 16
+#define COLL_ENTRY_SIZE 16
 
 /*
  * Creates a new (reference to a) struct vgic_irq for a given LPI.
@@ -1269,14 +1271,16 @@ static unsigned long vgic_mmio_read_its_baser(struct 
kvm *kvm,
return extract_bytes(reg, addr & 7, len);
 }
 
-#define GITS_BASER_RO_MASK (GENMASK_ULL(52, 48) | GENMASK_ULL(58, 56))
+#define GITS_BASER_RO_MASK (GENMASK_ULL(52, 48) | GENMASK_ULL(58, 56) | \
+GENMASK_ULL(62, 62))
 static void vgic_mmio_write_its_baser(struct kvm *kvm,
  struct vgic_its *its,
  gpa_t addr, unsigned int len,
  unsigned long val)
 {
u64 entry_size, device_type;
-   u64 reg, *regptr, clearbits = 0;
+   u64 reg, *regptr;
+   size_t *psize;
 
/* When GITS_CTLR.Enable is 1, we ignore write accesses. */
if (its->enabled)
@@ -1285,14 +1289,15 @@ static void vgic_mmio_write_its_baser(struct kvm *kvm,
switch (BASER_INDEX(addr)) {
case 0:
regptr = >baser_device_table;
-   entry_size = 8;
+   entry_size = DEV_ENTRY_SIZE;
device_type = GITS_BASER_TYPE_DEVICE;
+   psize = >device_table_size;
break;
case 1:
regptr = >baser_coll_table;
-   entry_size = 8;
+   entry_size = COLL_ENTRY_SIZE;
device_type = GITS_BASER_TYPE_COLLECTION;
-   clearbits = GITS_BASER_INDIRECT;
+   psize = >collection_table_size;
break;
default:
return;
@@ -1300,12 +1305,13 @@ static void vgic_mmio_write_its_baser(struct kvm *kvm,
 
reg = update_64bit_reg(*regptr, addr & 7, len, val);
reg &= ~GITS_BASER_RO_MASK;
-   reg &= ~clearbits;
 
reg |= (entry_size - 1) << GITS_BASER_ENTRY_SIZE_SHIFT;
reg |= device_type << GITS_BASER_TYPE_SHIFT;
reg = vgic_sanitise_its_baser(reg);
 
+   *psize = ((reg & 0xFF) + 1) << 16;
+
*regptr = reg;
 }
 
@@ -1390,7 +1396,6 @@ static int vgic_register_its_iodev(struct kvm *kvm, 
struct vgic_its *its)
(GIC_BASER_CACHEABILITY(GITS_BASER, INNER, RaWb)| \
 GIC_BASER_CACHEABILITY(GITS_BASER, OUTER, SameAsInner) | \
 GIC_BASER_SHAREABILITY(GITS_BASER, InnerShareable) | \
-((8ULL - 1) << GITS_BASER_ENTRY_SIZE_SHIFT)| \
 GITS_BASER_PAGE_SIZE_64K)
 
 #define INITIAL_PROPBASER_VALUE
  \
@@ -1423,9 +1428,11 @@ static int vgic_its_create(struct kvm_device *dev, u32 
type)
its->dev = dev;
 
its->baser_device_table = INITIAL_BASER_VALUE   |
-   ((u64)GITS_BASER_TYPE_DEVICE << GITS_BASER_TYPE_SHIFT);
+   ((u64)GITS_BASER_TYPE_DEVICE << GITS_BASER_TYPE_SHIFT)  |
+   ((u64)(DEV_ENTRY_SIZE - 1) << GITS_BASER_ENTRY_SIZE_SHIFT);
its->baser_coll_table = INITIAL_BASER_VALUE |
-   ((u64)GITS_BASER_TYPE_COLLECTION << GITS_BASER_TYPE_SHIFT);
+   ((u64)GITS_BASER_TYPE_COLLECTION << GITS_BASER_TYPE_SHIFT) |
+   ((u64)(COLL_ENTRY_SIZE - 1) << GITS_BASER_ENTRY_SIZE_SHIFT);
dev->kvm->arch.vgic.propbaser = INITIAL_PROPBASER_VALUE;
 
dev->private = its;
-- 
2.5.5

___
kvmarm mailing list
kvmarm@lists.cs.columbia.edu
https://lists.cs.columbia.edu/mailman/listinfo/kvmarm


[RFC 05/13] KVM: arm64: ITS: Implement vgic_mmio_uaccess_write_its_creadr

2017-01-12 Thread Eric Auger
GITS_CREADR needs to be restored so let's implement the associated
uaccess_write_its callback.

Signed-off-by: Eric Auger 

---
---
 virt/kvm/arm/vgic/vgic-its.c | 22 +-
 1 file changed, 21 insertions(+), 1 deletion(-)

diff --git a/virt/kvm/arm/vgic/vgic-its.c b/virt/kvm/arm/vgic/vgic-its.c
index 55671585..e174220 100644
--- a/virt/kvm/arm/vgic/vgic-its.c
+++ b/virt/kvm/arm/vgic/vgic-its.c
@@ -1225,6 +1225,25 @@ static unsigned long vgic_mmio_read_its_creadr(struct 
kvm *kvm,
return extract_bytes(its->creadr, addr & 0x7, len);
 }
 
+static void vgic_mmio_uaccess_write_its_creadr(struct kvm *kvm,
+  struct vgic_its *its,
+  gpa_t addr, unsigned int len,
+  unsigned long val)
+{
+   u32 reg;
+
+   mutex_lock(>cmd_lock);
+   reg = update_64bit_reg(its->creadr, addr & 7, len, val);
+   reg = ITS_CMD_OFFSET(reg);
+   if (reg >= ITS_CMD_BUFFER_SIZE(its->cbaser)) {
+   mutex_unlock(>cmd_lock);
+   return;
+   }
+
+   its->creadr = reg;
+   mutex_unlock(>cmd_lock);
+}
+
 #define BASER_INDEX(addr) (((addr) / sizeof(u64)) & 0x7)
 static unsigned long vgic_mmio_read_its_baser(struct kvm *kvm,
  struct vgic_its *its,
@@ -1320,7 +1339,8 @@ static struct vgic_register_region its_registers[] = {
vgic_mmio_read_its_cwriter, vgic_mmio_write_its_cwriter, NULL,
8, VGIC_ACCESS_64bit | VGIC_ACCESS_32bit),
REGISTER_ITS_DESC(GITS_CREADR,
-   vgic_mmio_read_its_creadr, its_mmio_write_wi, NULL, 8,
+   vgic_mmio_read_its_creadr, its_mmio_write_wi,
+   vgic_mmio_uaccess_write_its_creadr, 8,
VGIC_ACCESS_64bit | VGIC_ACCESS_32bit),
REGISTER_ITS_DESC(GITS_BASER,
vgic_mmio_read_its_baser, vgic_mmio_write_its_baser, NULL, 0x40,
-- 
2.5.5

___
kvmarm mailing list
kvmarm@lists.cs.columbia.edu
https://lists.cs.columbia.edu/mailman/listinfo/kvmarm


[RFC 04/13] KVM: arm64: ITS: Implement vgic_its_has_attr_regs and attr_regs_access

2017-01-12 Thread Eric Auger
This patch implements vgic_its_has_attr_regs and vgic_its_attr_regs_access
upon the MMIO framework. VGIC ITS KVM device KVM_DEV_ARM_VGIC_GRP_ITS_REGS
group becomes functional.

At least GITS_CREADR requires to differentiate a guest write action from a
user access. As such let's introduce a new uaccess_its_write
vgic_register_region callback.

Signed-off-by: Eric Auger 
---
 virt/kvm/arm/vgic/vgic-its.c  | 74 ---
 virt/kvm/arm/vgic/vgic-mmio.h |  9 --
 2 files changed, 69 insertions(+), 14 deletions(-)

diff --git a/virt/kvm/arm/vgic/vgic-its.c b/virt/kvm/arm/vgic/vgic-its.c
index 058034d..55671585 100644
--- a/virt/kvm/arm/vgic/vgic-its.c
+++ b/virt/kvm/arm/vgic/vgic-its.c
@@ -1287,13 +1287,14 @@ static void vgic_mmio_write_its_baser(struct kvm *kvm,
*regptr = reg;
 }
 
-#define REGISTER_ITS_DESC(off, rd, wr, length, acc)\
+#define REGISTER_ITS_DESC(off, rd, wr, uwr, length, acc)   \
 {  \
.reg_offset = off,  \
.len = length,  \
.access_flags = acc,\
.its_read = rd, \
.its_write = wr,\
+   .uaccess_its_write = uwr,   \
 }
 
 static void its_mmio_write_wi(struct kvm *kvm, struct vgic_its *its,
@@ -1304,28 +1305,28 @@ static void its_mmio_write_wi(struct kvm *kvm, struct 
vgic_its *its,
 
 static struct vgic_register_region its_registers[] = {
REGISTER_ITS_DESC(GITS_CTLR,
-   vgic_mmio_read_its_ctlr, vgic_mmio_write_its_ctlr, 4,
+   vgic_mmio_read_its_ctlr, vgic_mmio_write_its_ctlr, NULL, 4,
VGIC_ACCESS_32bit),
REGISTER_ITS_DESC(GITS_IIDR,
-   vgic_mmio_read_its_iidr, its_mmio_write_wi, 4,
+   vgic_mmio_read_its_iidr, its_mmio_write_wi, NULL, 4,
VGIC_ACCESS_32bit),
REGISTER_ITS_DESC(GITS_TYPER,
-   vgic_mmio_read_its_typer, its_mmio_write_wi, 8,
+   vgic_mmio_read_its_typer, its_mmio_write_wi, NULL, 8,
VGIC_ACCESS_64bit | VGIC_ACCESS_32bit),
REGISTER_ITS_DESC(GITS_CBASER,
-   vgic_mmio_read_its_cbaser, vgic_mmio_write_its_cbaser, 8,
+   vgic_mmio_read_its_cbaser, vgic_mmio_write_its_cbaser, NULL, 8,
VGIC_ACCESS_64bit | VGIC_ACCESS_32bit),
REGISTER_ITS_DESC(GITS_CWRITER,
-   vgic_mmio_read_its_cwriter, vgic_mmio_write_its_cwriter, 8,
-   VGIC_ACCESS_64bit | VGIC_ACCESS_32bit),
+   vgic_mmio_read_its_cwriter, vgic_mmio_write_its_cwriter, NULL,
+   8, VGIC_ACCESS_64bit | VGIC_ACCESS_32bit),
REGISTER_ITS_DESC(GITS_CREADR,
-   vgic_mmio_read_its_creadr, its_mmio_write_wi, 8,
+   vgic_mmio_read_its_creadr, its_mmio_write_wi, NULL, 8,
VGIC_ACCESS_64bit | VGIC_ACCESS_32bit),
REGISTER_ITS_DESC(GITS_BASER,
-   vgic_mmio_read_its_baser, vgic_mmio_write_its_baser, 0x40,
+   vgic_mmio_read_its_baser, vgic_mmio_write_its_baser, NULL, 0x40,
VGIC_ACCESS_64bit | VGIC_ACCESS_32bit),
REGISTER_ITS_DESC(GITS_IDREGS_BASE,
-   vgic_mmio_read_its_idregs, its_mmio_write_wi, 0x30,
+   vgic_mmio_read_its_idregs, its_mmio_write_wi, NULL, 0x30,
VGIC_ACCESS_32bit),
 };
 
@@ -1448,14 +1449,63 @@ static void vgic_its_destroy(struct kvm_device *kvm_dev)
 int vgic_its_has_attr_regs(struct kvm_device *dev,
   struct kvm_device_attr *attr)
 {
-   return -ENXIO;
+   const struct vgic_register_region *region;
+   struct vgic_io_device iodev = {
+   .regions = its_registers,
+   .nr_regions = ARRAY_SIZE(its_registers),
+   };
+   gpa_t offset;
+
+   offset = attr->attr;
+
+   region = vgic_find_mmio_region(iodev.regions,
+  iodev.nr_regions,
+  offset);
+   if (!region)
+   return -ENXIO;
+   return 0;
 }
 
 int vgic_its_attr_regs_access(struct kvm_device *dev,
  struct kvm_device_attr *attr,
  u64 *reg, bool is_write)
 {
-   return -ENXIO;
+   const struct vgic_register_region *region;
+   struct vgic_io_device iodev = {
+   .regions = its_registers,
+   .nr_regions = ARRAY_SIZE(its_registers),
+   };
+   struct vgic_its *its = dev->private;
+   gpa_t addr, offset;
+   unsigned int len;
+
+   if (IS_VGIC_ADDR_UNDEF(its->vgic_its_base))
+   return -ENXIO;
+
+   offset = attr->attr;
+   if (offset & 0x7)
+   return -EINVAL;
+
+   

[RFC 00/13] vITS save/restore

2017-01-12 Thread Eric Auger
This series specifies and implements an API aimed at saving and restoring
the state of the in-kernel emulated ITS device.

The ITS is programmed through registers and tables. Those later tables
are allocated by the guest.  Their base address is programmed in
registers or table entries before the ITS is enabled.

The ITS is free to use some of them to flush its internal caches. This
is likely to be used when entering low power state.

Therefore, for save/restore use case, it looks natural to use this
guest RAM allocated space to save the table related data. However,
currently,The ITS in-kernel emulated device does not use all of those
tables and for those it uses, it does not always sync them with its
cached data. Additional sync must happen for:
- the collection table
- the device table
- the per-device translation tables
- the LPI pending tables.

The LPI configration table and the command queues do not need extra
syncs.

So the bulk of the work in this series consists in the table
save/restore rather than register save/restore.

An alternative to flushing the tables into guest RAM could have been
to flush them into a separate user-space buffer. However the drawback
of this alternative is that the virtualizer would allocate dedicated
buffers to store the data that should normally be laid out in guest
RAM. It would also be obliged to re-compute their size from
register/table content.

So saving the tables in guest RAM better fit the ITS programming
model and optimizes the memory usage. The drawback of this solution
is it brings additional challenges at user-space level to make sure
the guest RAM is frozen after table sync.

The code is functional while saving/restoring a guest using
virtio-net-pci.  However many points deserve additional tests.
I share the series at that stage to get the documentation reviewed
and main principles discussed.

The series applies on top of Vijaya's series:
- [PATCH v10 0/8] arm/arm64: vgic: Implement API for vGICv3 live
  migration
  http://www.spinics.net/lists/arm-kernel/msg546383.html

Best Regards

Eric

Git: complete series available at
https://github.com/eauger/linux/tree/v4.10-rc3-its-mig-rfc-v1

* Testing:
- on Cavium using a virtio-net-pci guest and virsh save/restore
  commands

Eric Auger (13):
  KVM: arm/arm64: Add vITS save/restore API documentation
  arm/arm64: vgic: turn vgic_find_mmio_region into public
  KVM: arm64: ITS: KVM_DEV_ARM_VGIC_GRP_ITS_REGS group
  KVM: arm64: ITS: Implement vgic_its_has_attr_regs and attr_regs_access
  KVM: arm64: ITS: Implement vgic_mmio_uaccess_write_its_creadr
  KVM: arm64: ITS: Expose ITT_Entry_Size in GITS_TYPER
  KVM: arm64: ITS: Change entry_size and indirect bit in BASER
  KVM: arm64: ITS: On MAPD interpret and store itt_addr and size
  KVM: arm64: ITS: KVM_DEV_ARM_VGIC_GRP_ITS_TABLES group
  KVM: arm64: ITS: vgic_its_alloc_itte/device
  KVM: arm64: ITS: Collection table save/restore
  KVM: arm64: ITS: Device and translation table flush
  KVM: arm64: ITS: Pending table save/restore

 Documentation/virtual/kvm/devices/arm-vgic-its.txt |  70 +++
 arch/arm/include/uapi/asm/kvm.h|   2 +
 arch/arm64/include/uapi/asm/kvm.h  |   2 +
 include/kvm/arm_vgic.h |   3 +
 include/linux/irqchip/arm-gic-v3.h |   1 +
 virt/kvm/arm/vgic/vgic-its.c   | 655 +++--
 virt/kvm/arm/vgic/vgic-mmio.c  |   3 +-
 virt/kvm/arm/vgic/vgic-mmio.h  |  14 +-
 8 files changed, 705 insertions(+), 45 deletions(-)

-- 
2.5.5

___
kvmarm mailing list
kvmarm@lists.cs.columbia.edu
https://lists.cs.columbia.edu/mailman/listinfo/kvmarm


[RFC 01/13] KVM: arm/arm64: Add vITS save/restore API documentation

2017-01-12 Thread Eric Auger
Add description for how to access vITS registers and how to flush/restore
vITS tables into/from memory

Signed-off-by: Eric Auger 
---
 Documentation/virtual/kvm/devices/arm-vgic-its.txt | 70 ++
 1 file changed, 70 insertions(+)

diff --git a/Documentation/virtual/kvm/devices/arm-vgic-its.txt 
b/Documentation/virtual/kvm/devices/arm-vgic-its.txt
index 6081a5b..bd74613 100644
--- a/Documentation/virtual/kvm/devices/arm-vgic-its.txt
+++ b/Documentation/virtual/kvm/devices/arm-vgic-its.txt
@@ -36,3 +36,73 @@ Groups:
 -ENXIO:  ITS not properly configured as required prior to setting
  this attribute
 -ENOMEM: Memory shortage when allocating ITS internal data
+
+  KVM_DEV_ARM_VGIC_GRP_ITS_REGS
+  Attributes:
+  The attr field of kvm_device_attr encodes the offset of the
+  ITS register, relative to the ITS control frame base address
+  (ITS_base).
+
+  kvm_device_attr.addr points to a __u64 value whatever the width
+  of the addressed register (32/64 bits).
+
+  Writes to read-only registers are ignored by the kernel except
+  for a single register, GITS_READR. Normally this register is RO
+  but it needs to be restored otherwise commands in the queue will
+  be re-executed after CWRITER setting.
+
+  For other registers, Getting or setting a register has the same
+  effect as reading/writing the register on real hardware.
+  Errors:
+-ENXIO: Offset does not correspond to any supported register
+-EFAULT: Invalid user pointer for attr->addr
+-EINVAL: Offset is not 64-bit aligned
+
+  KVM_DEV_ARM_VGIC_GRP_ITS_TABLES
+  Attributes
+   The attr field of kvm_device_attr is not used.
+
+   request the flush-save/restore of the ITS tables, namely
+   the device table, the collection table, all the ITT tables,
+   the LPI pending tables. On save, the tables are flushed
+   into guest memory at the location provisionned by the guest
+   in GITS_BASER (device and collection tables), on MAPD command
+   (ITT_addr), GICR_PENDBASERs (pending tables).
+
+   This means the GIC should be restored before the ITS and all
+   ITS registers but the GITS_CTRL must be restored before
+   restoring the ITS tables.
+
+   Note the LPI configuration table is read-only for the
+   in-kernel ITS and its save/restore goes through the standard
+   RAM save/restore.
+
+   The layout of the tables in guest memory defines an ABI.
+   The entries are laid out in memory as follows;
+
+Device Table Entry (DTE) layout: entry size = 16 bytes
+
+bits: | 63   ...  32  | 31 ... 6 | 5 | 4   ...   0 |
+values:   |   DeviceID|   Resv   | V |Size |
+
+bits: | 63 ... 44 | 43  ...  0  |
+values:   |Resv   |  ITT_addr   |
+
+Collection Table Entry (CTE) layout: entry size = 8 bytes
+
+bits: | 63| 62 ..  52  | 51 ... 16 | 15  ...   0 |
+values:   | V |RES0|  RDBase   |ICID |
+
+Interrupt Translation Table Entry (ITTE) layout: entry size = 16 bytes
+
+bits: | 63   ...  32  | 31 ... 17 | 16 | 15  ...  0 |
+values:   |   DeviceID|RES0   | V  |ICID|
+
+bits: | 63 ...  32| 31  ...0 |
+values:   |   pINTID  |EventID   |
+
+LPI Pending Table layout:
+
+As specified in the ARM Generic Interrupt Controller Architecture
+Specification GIC Architecture version 3.0 and version 4. The first
+1kB contains only zeros.
-- 
2.5.5

___
kvmarm mailing list
kvmarm@lists.cs.columbia.edu
https://lists.cs.columbia.edu/mailman/listinfo/kvmarm


[RFC 03/13] KVM: arm64: ITS: KVM_DEV_ARM_VGIC_GRP_ITS_REGS group

2017-01-12 Thread Eric Auger
The ITS KVM device exposes a new KVM_DEV_ARM_VGIC_GRP_ITS_REGS
group which allows the userspace to save/restore ITS registers.

At this stage the get/set/has operations are not yet implemented.

Signed-off-by: Eric Auger 
---
 arch/arm/include/uapi/asm/kvm.h   |  1 +
 arch/arm64/include/uapi/asm/kvm.h |  1 +
 virt/kvm/arm/vgic/vgic-its.c  | 36 +++-
 3 files changed, 37 insertions(+), 1 deletion(-)

diff --git a/arch/arm/include/uapi/asm/kvm.h b/arch/arm/include/uapi/asm/kvm.h
index f347779..f6a167c 100644
--- a/arch/arm/include/uapi/asm/kvm.h
+++ b/arch/arm/include/uapi/asm/kvm.h
@@ -192,6 +192,7 @@ struct kvm_arch_memory_slot {
 #define KVM_DEV_ARM_VGIC_GRP_REDIST_REGS 5
 #define KVM_DEV_ARM_VGIC_CPU_SYSREGS6
 #define KVM_DEV_ARM_VGIC_GRP_LEVEL_INFO 7
+#define KVM_DEV_ARM_VGIC_GRP_ITS_REGS  8
 #define KVM_DEV_ARM_VGIC_LINE_LEVEL_INFO_SHIFT 10
 #define KVM_DEV_ARM_VGIC_LINE_LEVEL_INFO_MASK \
(0x3fULL << KVM_DEV_ARM_VGIC_LINE_LEVEL_INFO_SHIFT)
diff --git a/arch/arm64/include/uapi/asm/kvm.h 
b/arch/arm64/include/uapi/asm/kvm.h
index 4100f8c..8850e9b 100644
--- a/arch/arm64/include/uapi/asm/kvm.h
+++ b/arch/arm64/include/uapi/asm/kvm.h
@@ -212,6 +212,7 @@ struct kvm_arch_memory_slot {
 #define KVM_DEV_ARM_VGIC_GRP_REDIST_REGS 5
 #define KVM_DEV_ARM_VGIC_CPU_SYSREGS6
 #define KVM_DEV_ARM_VGIC_GRP_LEVEL_INFO 7
+#define KVM_DEV_ARM_VGIC_GRP_ITS_REGS 8
 #define KVM_DEV_ARM_VGIC_LINE_LEVEL_INFO_SHIFT 10
 #define KVM_DEV_ARM_VGIC_LINE_LEVEL_INFO_MASK \
(0x3fULL << KVM_DEV_ARM_VGIC_LINE_LEVEL_INFO_SHIFT)
diff --git a/virt/kvm/arm/vgic/vgic-its.c b/virt/kvm/arm/vgic/vgic-its.c
index 8c2b3cd..058034d 100644
--- a/virt/kvm/arm/vgic/vgic-its.c
+++ b/virt/kvm/arm/vgic/vgic-its.c
@@ -1445,6 +1445,19 @@ static void vgic_its_destroy(struct kvm_device *kvm_dev)
kfree(its);
 }
 
+int vgic_its_has_attr_regs(struct kvm_device *dev,
+  struct kvm_device_attr *attr)
+{
+   return -ENXIO;
+}
+
+int vgic_its_attr_regs_access(struct kvm_device *dev,
+ struct kvm_device_attr *attr,
+ u64 *reg, bool is_write)
+{
+   return -ENXIO;
+}
+
 static int vgic_its_has_attr(struct kvm_device *dev,
 struct kvm_device_attr *attr)
 {
@@ -1461,6 +1474,8 @@ static int vgic_its_has_attr(struct kvm_device *dev,
return 0;
}
break;
+   case KVM_DEV_ARM_VGIC_GRP_ITS_REGS:
+   return vgic_its_has_attr_regs(dev, attr);
}
return -ENXIO;
 }
@@ -1500,6 +1515,15 @@ static int vgic_its_set_attr(struct kvm_device *dev,
return 0;
}
break;
+   case KVM_DEV_ARM_VGIC_GRP_ITS_REGS: {
+   u64 __user *uaddr = (u64 __user *)(long)attr->addr;
+   u64 reg;
+
+   if (get_user(reg, uaddr))
+   return -EFAULT;
+
+   return vgic_its_attr_regs_access(dev, attr, , true);
+   }
}
return -ENXIO;
 }
@@ -1520,10 +1544,20 @@ static int vgic_its_get_attr(struct kvm_device *dev,
if (copy_to_user(uaddr, , sizeof(addr)))
return -EFAULT;
break;
+   }
+   case KVM_DEV_ARM_VGIC_GRP_ITS_REGS: {
+   u64 __user *uaddr = (u64 __user *)(long)attr->addr;
+   u64 reg;
+   int ret;
+
+   ret = vgic_its_attr_regs_access(dev, attr, , false);
+   if (ret)
+   return ret;
+   return put_user(reg, uaddr);
+   }
default:
return -ENXIO;
}
-   }
 
return 0;
 }
-- 
2.5.5

___
kvmarm mailing list
kvmarm@lists.cs.columbia.edu
https://lists.cs.columbia.edu/mailman/listinfo/kvmarm


Re: [PATCH v3 2/5] arm64: Work around Falkor erratum 1003

2017-01-12 Thread Catalin Marinas
On Wed, Jan 11, 2017 at 06:22:08PM +, Marc Zyngier wrote:
> On 11/01/17 18:06, Catalin Marinas wrote:
> > On Wed, Jan 11, 2017 at 09:41:15AM -0500, Christopher Covington wrote:
> >> diff --git a/arch/arm64/mm/proc.S b/arch/arm64/mm/proc.S
> >> index 32682be..9ee46df 100644
> >> --- a/arch/arm64/mm/proc.S
> >> +++ b/arch/arm64/mm/proc.S
> >> @@ -23,6 +23,7 @@
> >>  #include 
> >>  #include 
> >>  #include 
> >> +#include 
> >>  #include 
> >>  #include 
> >>  #include 
> >> @@ -140,6 +141,18 @@ ENDPROC(cpu_do_resume)
> >>  ENTRY(cpu_do_switch_mm)
> >>mmidx1, x1  // get mm->context.id
> >>bfi x0, x1, #48, #16// set the ASID
> >> +#ifdef CONFIG_QCOM_FALKOR_ERRATUM_1003
> >> +alternative_if ARM64_WORKAROUND_QCOM_FALKOR_E1003
> >> +  mrs x2, ttbr0_el1
> >> +  mov x3, #FALKOR_RESERVED_ASID
> >> +  bfi x2, x3, #48, #16// reserved ASID + old BADDR
> >> +  msr ttbr0_el1, x2
> >> +  isb
> >> +  bfi x2, x0, #0, #48 // reserved ASID + new BADDR
> >> +  msr ttbr0_el1, x2
> >> +  isb
> >> +alternative_else_nop_endif
> >> +#endif
> >>msr ttbr0_el1, x0   // set TTBR0
> >>isb
> >>post_ttbr0_update_workaround
> > 
> > Please move the above hunk to a pre_ttbr0_update_workaround macro for
> > consistency with post_ttbr0_update_workaround.
> 
> In which case (and also for consistency), should we add that pre_ttbr0
> macro to entry.S, just before __uaccess_ttbr0_enable? It may not be
> needed in the SW pan case, but it is probably worth entertaining the
> idea that there may be something to do there...

It may actually be needed in entry.S as well. With SW PAN, we move the
context switching from cpu_do_switch_mm to the kernel_exit macro when
returning to user. In this case we are switching from the reserved ASID
0 and reserved TTBR0_EL1 (pointing to a zeroed page) to the user's
TTBR0_EL1 and ASID. If the ASID switch isn't taken into account, we may
end up with new TLB entries being tagged with the reserved ASID. Apart
from a potential loss of protection with TTBR0 PAN, is there anything
else that could go wrong? Maybe a TLB conflict if we mix TLBs from
multiple address spaces tagged with the same reserved ASID.

If the above is an issue, we would need to patch
__uaccess_ttbr0_enable() as well, though I'm more inclined to make this
erratum not selectable when TTBR0 PAN is enabled.

-- 
Catalin
___
kvmarm mailing list
kvmarm@lists.cs.columbia.edu
https://lists.cs.columbia.edu/mailman/listinfo/kvmarm


Re: [PATCH v3 2/5] arm64: Work around Falkor erratum 1003

2017-01-12 Thread Catalin Marinas
On Wed, Jan 11, 2017 at 06:40:52PM +, Mark Rutland wrote:
> On Wed, Jan 11, 2017 at 06:22:08PM +, Marc Zyngier wrote:
> > On 11/01/17 18:06, Catalin Marinas wrote:
> > > On Wed, Jan 11, 2017 at 09:41:15AM -0500, Christopher Covington wrote:
> > >> diff --git a/arch/arm64/mm/proc.S b/arch/arm64/mm/proc.S
> > >> index 32682be..9ee46df 100644
> > >> --- a/arch/arm64/mm/proc.S
> > >> +++ b/arch/arm64/mm/proc.S
> > >> @@ -23,6 +23,7 @@
> > >>  #include 
> > >>  #include 
> > >>  #include 
> > >> +#include 
> > >>  #include 
> > >>  #include 
> > >>  #include 
> > >> @@ -140,6 +141,18 @@ ENDPROC(cpu_do_resume)
> > >>  ENTRY(cpu_do_switch_mm)
> > >>  mmidx1, x1  // get mm->context.id
> > >>  bfi x0, x1, #48, #16// set the ASID
> > >> +#ifdef CONFIG_QCOM_FALKOR_ERRATUM_1003
> > >> +alternative_if ARM64_WORKAROUND_QCOM_FALKOR_E1003
> > >> +mrs x2, ttbr0_el1
> > >> +mov x3, #FALKOR_RESERVED_ASID
> > >> +bfi x2, x3, #48, #16// reserved ASID + old 
> > >> BADDR
> > >> +msr ttbr0_el1, x2
> > >> +isb
> > >> +bfi x2, x0, #0, #48 // reserved ASID + new 
> > >> BADDR
> > >> +msr ttbr0_el1, x2
> > >> +isb
> > >> +alternative_else_nop_endif
> > >> +#endif
> > >>  msr ttbr0_el1, x0   // set TTBR0
> > >>  isb
> > >>  post_ttbr0_update_workaround
> > > 
> > > Please move the above hunk to a pre_ttbr0_update_workaround macro for
> > > consistency with post_ttbr0_update_workaround.
> > 
> > In which case (and also for consistency), should we add that pre_ttbr0
> > macro to entry.S, just before __uaccess_ttbr0_enable? It may not be
> > needed in the SW pan case, but it is probably worth entertaining the
> > idea that there may be something to do there...
> 
> Likewise, I beleive we may need to modify cpu_set_reserved_ttbr0().

This may be fine if my assumptions about this erratum are correct. In
the cpu_set_reserved_ttbr0() case we set TTBR0_EL1 to a table without
any entries, so no new entries could be tagged with the old ASID.

-- 
Catalin
___
kvmarm mailing list
kvmarm@lists.cs.columbia.edu
https://lists.cs.columbia.edu/mailman/listinfo/kvmarm


Re: kvm: deadlock in kvm_vgic_map_resources

2017-01-12 Thread Christoffer Dall
On Thu, Jan 12, 2017 at 10:50:04AM +, Marc Zyngier wrote:
> On 12/01/17 10:42, Christoffer Dall wrote:
> > On Thu, Jan 12, 2017 at 10:30:39AM +, Marc Zyngier wrote:
> >> On 12/01/17 09:55, Andre Przywara wrote:
> >>> Hi,
> >>>
> >>> On 12/01/17 09:32, Marc Zyngier wrote:
>  Hi Dmitry,
> 
>  On 11/01/17 19:01, Dmitry Vyukov wrote:
> > Hello,
> >
> > While running syzkaller fuzzer I've got the following deadlock.
> > On commit 9c763584b7c8911106bb77af7e648bef09af9d80.
> >
> >
> > =
> > [ INFO: possible recursive locking detected ]
> > 4.9.0-rc6-xc2-00056-g08372dd4b91d-dirty #50 Not tainted
> > -
> > syz-executor/20805 is trying to acquire lock:
> > (
> > >lock
> > ){+.+.+.}
> > , at:
> > [< inline >] kvm_vgic_dist_destroy
> > arch/arm64/kvm/../../../virt/kvm/arm/vgic/vgic-init.c:271
> > [] kvm_vgic_destroy+0x34/0x250
> > arch/arm64/kvm/../../../virt/kvm/arm/vgic/vgic-init.c:294
> > but task is already holding lock:
> > (>lock){+.+.+.}, at:
> > [] kvm_vgic_map_resources+0x2c/0x108
> > arch/arm64/kvm/../../../virt/kvm/arm/vgic/vgic-init.c:343
> > other info that might help us debug this:
> > Possible unsafe locking scenario:
> > CPU0
> > 
> > lock(>lock);
> > lock(>lock);
> > *** DEADLOCK ***
> > May be due to missing lock nesting notation
> > 2 locks held by syz-executor/20805:
> > #0:(>mutex){+.+.+.}, at:
> > [] vcpu_load+0x28/0x1d0
> > arch/arm64/kvm/../../../virt/kvm/kvm_main.c:143
> > #1:(>lock){+.+.+.}, at:
> > [] kvm_vgic_map_resources+0x2c/0x108
> > arch/arm64/kvm/../../../virt/kvm/arm/vgic/vgic-init.c:343
> > stack backtrace:
> > CPU: 2 PID: 20805 Comm: syz-executor Not tainted
> > 4.9.0-rc6-xc2-00056-g08372dd4b91d-dirty #50
> > Hardware name: Hardkernel ODROID-C2 (DT)
> > Call trace:
> > [] dump_backtrace+0x0/0x3c8 
> > arch/arm64/kernel/traps.c:69
> > [] show_stack+0x20/0x30 arch/arm64/kernel/traps.c:219
> > [< inline >] __dump_stack lib/dump_stack.c:15
> > [] dump_stack+0x100/0x150 lib/dump_stack.c:51
> > [< inline >] print_deadlock_bug kernel/locking/lockdep.c:1728
> > [< inline >] check_deadlock kernel/locking/lockdep.c:1772
> > [< inline >] validate_chain kernel/locking/lockdep.c:2250
> > [] __lock_acquire+0x1938/0x3440 
> > kernel/locking/lockdep.c:3335
> > [] lock_acquire+0xdc/0x1d8 
> > kernel/locking/lockdep.c:3746
> > [< inline >] __mutex_lock_common kernel/locking/mutex.c:521
> > [] mutex_lock_nested+0xdc/0x7b8 
> > kernel/locking/mutex.c:621
> > [< inline >] kvm_vgic_dist_destroy
> > arch/arm64/kvm/../../../virt/kvm/arm/vgic/vgic-init.c:271
> > [] kvm_vgic_destroy+0x34/0x250
> > arch/arm64/kvm/../../../virt/kvm/arm/vgic/vgic-init.c:294
> > [] vgic_v2_map_resources+0x218/0x430
> > arch/arm64/kvm/../../../virt/kvm/arm/vgic/vgic-v2.c:295
> > [] kvm_vgic_map_resources+0xcc/0x108
> > arch/arm64/kvm/../../../virt/kvm/arm/vgic/vgic-init.c:348
> > [< inline >] kvm_vcpu_first_run_init
> > arch/arm64/kvm/../../../arch/arm/kvm/arm.c:505
> > [] kvm_arch_vcpu_ioctl_run+0xab8/0xce0
> > arch/arm64/kvm/../../../arch/arm/kvm/arm.c:591
> > [] kvm_vcpu_ioctl+0x434/0xc08
> > arch/arm64/kvm/../../../virt/kvm/kvm_main.c:2557
> > [< inline >] vfs_ioctl fs/ioctl.c:43
> > [] do_vfs_ioctl+0x128/0xfc0 fs/ioctl.c:679
> > [< inline >] SYSC_ioctl fs/ioctl.c:694
> > [] SyS_ioctl+0xa8/0xb8 fs/ioctl.c:685
> > [] el0_svc_naked+0x24/0x28 
> > arch/arm64/kernel/entry.S:755
> 
>  Nice catch, and many thanks for reporting this.
> 
>  The bug is fairly obvious. Christoffer, what do you think? I don't think
>  we need to hold the kvm->lock all the way, but I'd like another pair of
>  eyes (the coffee machine is out of order again, and tea doesn't cut it).
> 
>  Thanks,
> 
>   M.
> 
>  From 93f80b20fb9351a49ee8b74eed3fc59c84651371 Mon Sep 17 00:00:00 2001
>  From: Marc Zyngier 
>  Date: Thu, 12 Jan 2017 09:21:56 +
>  Subject: [PATCH] KVM: arm/arm64: vgic: Fix deadlock on error handling
> 
>  Dmitry Vyukov reported that the syzkaller fuzzer triggered a
>  deadlock in the vgic setup code when an error was detected, as
>  the cleanup code tries to take a lock that is already held by
>  the setup code.
> 
>  The fix is pretty obvious: move the cleaup call after having
>  dropped the lock, since not much can happen at that point.
> >>>   
> >>> Is that really true? If for instance the calls to
> >>> vgic_register_dist_iodev() or kvm_phys_addr_ioremap() in
> >>> vgic_v2_map_resources() fail, we leave the function with a half
> >>> initialized VGIC (because vgic_init() 

Re: [kvm-unit-tests PATCH 1/6] libcflat: add PRI(dux)32 format types

2017-01-12 Thread Alex Bennée

Paolo Bonzini  writes:

> On 11/01/2017 17:28, Alex Bennée wrote:
>> So we can have portable formatting of uint32_t types. However there is
>> a catch. Different compilers can use legally subtly different types
>> though so we need to probe the compiler defined intdef.h first.
>
> Interesting, what platform has long uint32_t?  I thought the issue was
> whether 64-bit is long or "long long".

I haven't run into that one. This came up with the arm-none-eabi-gcc on
my overdrive01 box. According to the toolchain guys there is no particular
reason a 32bit compiler can't use long for its natural word length.

The native compiler on Debian armhf doesn't do this but the
arm-none-eabi-gcc compilers on both 64bit and 32bit Debian need this.

>
> Paolo
>
>> Signed-off-by: Alex Bennée 
>> ---
>>  Makefile   |  1 +
>>  configure  | 13 +
>>  lib/libcflat.h |  9 +
>>  3 files changed, 23 insertions(+)
>>
>> diff --git a/Makefile b/Makefile
>> index a32333b..9822d9a 100644
>> --- a/Makefile
>> +++ b/Makefile
>> @@ -55,6 +55,7 @@ CFLAGS += $(fomit_frame_pointer)
>>  CFLAGS += $(fno_stack_protector)
>>  CFLAGS += $(fno_stack_protector_all)
>>  CFLAGS += $(wno_frame_address)
>> +CFLAGS += $(if $(U32_LONG_FMT),-D__U32_LONG_FMT__,)
>>
>>  CXXFLAGS += $(CFLAGS)
>>
>> diff --git a/configure b/configure
>> index 995c8fa..127868c 100755
>> --- a/configure
>> +++ b/configure
>> @@ -109,6 +109,18 @@ if [ -f $testdir/run ]; then
>>  ln -fs $testdir/run $testdir-run
>>  fi
>>
>> +# check if uint32_t needs a long format modifier
>> +cat << EOF > lib_test.c
>> +#include 
>> +EOF
>> +
>> +$cross_prefix$cc lib_test.c -E | grep "typedef" | grep "long" | grep 
>> "uint32_t" &> /dev/null
>> +exit=$?
>> +if [ $exit -eq 0 ]; then
>> +u32_long=true
>> +fi
>> +rm -f lib_test.c
>> +
>>  # check for dependent 32 bit libraries
>>  if [ "$arch" != "arm" ]; then
>>  cat << EOF > lib_test.c
>> @@ -155,4 +167,5 @@ TEST_DIR=$testdir
>>  FIRMWARE=$firmware
>>  ENDIAN=$endian
>>  PRETTY_PRINT_STACKS=$pretty_print_stacks
>> +U32_LONG_FMT=$u32_long
>>  EOF
>> diff --git a/lib/libcflat.h b/lib/libcflat.h
>> index 380395f..e80fc50 100644
>> --- a/lib/libcflat.h
>> +++ b/lib/libcflat.h
>> @@ -58,12 +58,21 @@ typedef _Boolbool;
>>  #define true  1
>>
>>  #if __SIZEOF_LONG__ == 8
>> +#  define __PRI32_PREFIX
>>  #  define __PRI64_PREFIX"l"
>>  #  define __PRIPTR_PREFIX   "l"
>>  #else
>> +#if defined(__U32_LONG_FMT__)
>> +#  define __PRI32_PREFIX"l"
>> +#else
>> +#  define __PRI32_PREFIX
>> +#endif
>>  #  define __PRI64_PREFIX"ll"
>>  #  define __PRIPTR_PREFIX
>>  #endif
>> +#define PRId32  __PRI32_PREFIX  "d"
>> +#define PRIu32  __PRI32_PREFIX  "u"
>> +#define PRIx32  __PRI32_PREFIX  "x"
>>  #define PRId64  __PRI64_PREFIX  "d"
>>  #define PRIu64  __PRI64_PREFIX  "u"
>>  #define PRIx64  __PRI64_PREFIX  "x"
>>


--
Alex Bennée
___
kvmarm mailing list
kvmarm@lists.cs.columbia.edu
https://lists.cs.columbia.edu/mailman/listinfo/kvmarm


Re: [kvm-unit-tests PATCH 2/6] lib/pci: fix BAR format strings

2017-01-12 Thread Paolo Bonzini


On 11/01/2017 17:28, Alex Bennée wrote:
> Using %x as a format string is not portable across 32/64 bit builds.
> Use explicit PRIx32 format strings like the 64 bit version above.
> 
> Signed-off-by: Alex Bennée 
> ---
>  lib/pci.c | 4 ++--
>  1 file changed, 2 insertions(+), 2 deletions(-)
> 
> diff --git a/lib/pci.c b/lib/pci.c
> index 6416191..597d8f2 100644
> --- a/lib/pci.c
> +++ b/lib/pci.c
> @@ -67,7 +67,7 @@ bool pci_setup_msi(struct pci_dev *dev, uint64_t msi_addr, 
> uint32_t msi_data)
>   pci_config_writel(addr, offset + PCI_MSI_DATA_32, msi_data);
>   printf("MSI: dev 0x%x init 32bit address: ", addr);
>   }
> - printf("addr=0x%lx, data=0x%x\n", msi_addr, msi_data);
> + printf("addr=0x%" PRIx64 ", data=0x%" PRIx32 "\n", msi_addr, msi_data);

Applying only the %lx -> %"PRIx64" change for now (though it's also in
Andrew's 32-bit compilation fixes patch).

Paolo
>  
>   msi_control |= PCI_MSI_FLAGS_ENABLE;
>   pci_config_writew(addr, offset + PCI_MSI_FLAGS, msi_control);
> @@ -237,7 +237,7 @@ void pci_bar_print(struct pci_dev *dev, int bar_num)
>   printf("BAR#%d,%d [%" PRIx64 "-%" PRIx64 " ",
>  bar_num, bar_num + 1, start, end);
>   } else {
> - printf("BAR#%d [%02x-%02x ",
> + printf("BAR#%d [%" PRIx32 "-%" PRIx32 " ",
>  bar_num, (uint32_t)start, (uint32_t)end);
>   }
>  
> 
___
kvmarm mailing list
kvmarm@lists.cs.columbia.edu
https://lists.cs.columbia.edu/mailman/listinfo/kvmarm


Re: [kvm-unit-tests PATCH 4/6] docs: mention checkpatch in the README

2017-01-12 Thread Alex Bennée

Paolo Bonzini  writes:

> On 11/01/2017 17:28, Alex Bennée wrote:
>> Signed-off-by: Alex Bennée 
>> ---
>>  README.md | 1 +
>>  1 file changed, 1 insertion(+)
>>
>> diff --git a/README.md b/README.md
>> index 5027b62..9462824 100644
>> --- a/README.md
>> +++ b/README.md
>> @@ -79,3 +79,4 @@ You can add the following to .git/config to do this 
>> automatically for you:
>>  [format]
>>  subjectprefix = kvm-unit-tests PATCH
>>
>> +Please run the kernel's ./scripts/checkpatch.pl on new patches
>
> Does it really work well on kvm-unit-tests?

Well I keep getting pulled up on kernel coding style on my reviews so if
we mean it we should enforce it.

--
Alex Bennée
___
kvmarm mailing list
kvmarm@lists.cs.columbia.edu
https://lists.cs.columbia.edu/mailman/listinfo/kvmarm


Re: [kvm-unit-tests PATCH 6/6] run_tests: allow passing of options to QEMU

2017-01-12 Thread Paolo Bonzini


On 11/01/2017 17:28, Alex Bennée wrote:
> This allows additional options to be passed to QEMU. It follows the
> convention of passing parameters after a -- to the child process. In
> my case I'm using it to toggle MTTCG on an off:
> 
>   ./run_tests.sh -- --accel tcg,thread=multi
> 
> Signed-off-by: Alex Bennée 
> 
> ---
> v1
>   - changes from -o to --
>   - fixed whitespace damage
> ---
>  README.md  |  6 ++
>  run_tests.sh   | 13 +++--
>  scripts/functions.bash |  7 ---
>  3 files changed, 21 insertions(+), 5 deletions(-)
> 
> diff --git a/README.md b/README.md
> index fa3a445..1bd6dcb 100644
> --- a/README.md
> +++ b/README.md
> @@ -55,6 +55,12 @@ To extend or disable the timeouts:
>  
>  TIMEOUT=0 ./run_tests.sh
>  
> +Any arguments past the end-of-arguments marker (--) is passed on down
> +to the QEMU invocation. This can of course be combined with the other
> +modifiers:
> +
> +ACCEL=tcg ./run_tests.sh -v -- --accel tcg,thread=multi
> +
>  # Contributing
>  
>  ## Directory structure
> diff --git a/run_tests.sh b/run_tests.sh
> index 254129d..3270fba 100755
> --- a/run_tests.sh
> +++ b/run_tests.sh
> @@ -13,7 +13,7 @@ function usage()
>  {
>  cat <  
> -Usage: $0 [-g group] [-h] [-v]
> +Usage: $0 [-g group] [-h] [-v] [-- QEMU options]
>  
>  -g: Only execute tests in the given group
>  -h: Output this help text
> @@ -22,6 +22,8 @@ Usage: $0 [-g group] [-h] [-v]
>  Set the environment variable QEMU=/path/to/qemu-system-ARCH to
>  specify the appropriate qemu binary for ARCH-run.
>  
> +All options specified after -- are passed on to QEMU.
> +
>  EOF
>  }
>  
> @@ -29,6 +31,7 @@ RUNTIME_arch_run="./$TEST_DIR/run"
>  source scripts/runtime.bash
>  
>  while getopts "g:hv" opt; do
> +
>  case $opt in
>  g)
>  only_group=$OPTARG
> @@ -46,6 +49,12 @@ while getopts "g:hv" opt; do
>  esac
>  done
>  
> +# Any options left for QEMU?
> +shift $((OPTIND-1))
> +if [ "$#" -gt  0 ]; then
> +extra_opts="$@"
> +fi
> +
>  RUNTIME_log_stderr () { cat >> test.log; }
>  RUNTIME_log_stdout () {
>  if [ "$PRETTY_PRINT_STACKS" = "yes" ]; then
> @@ -59,4 +68,4 @@ RUNTIME_log_stdout () {
>  config=$TEST_DIR/unittests.cfg
>  rm -f test.log
>  printf "BUILD_HEAD=$(cat build-head)\n\n" > test.log
> -for_each_unittest $config run
> +for_each_unittest $config run "$extra_opts"
> diff --git a/scripts/functions.bash b/scripts/functions.bash
> index ee9143c..60fbc6a 100644
> --- a/scripts/functions.bash
> +++ b/scripts/functions.bash
> @@ -3,10 +3,11 @@ function for_each_unittest()
>  {
>   local unittests="$1"
>   local cmd="$2"
> + local extra_opts=$3
>   local testname
>   local smp
>   local kernel
> - local opts
> + local opts=$extra_opts
>   local groups
>   local arch
>   local check
> @@ -21,7 +22,7 @@ function for_each_unittest()
>   testname=${BASH_REMATCH[1]}
>   smp=1
>   kernel=""
> - opts=""
> + opts=$extra_opts
>   groups=""
>   arch=""
>   check=""
> @@ -32,7 +33,7 @@ function for_each_unittest()
>   elif [[ $line =~ ^smp\ *=\ *(.*)$ ]]; then
>   smp=${BASH_REMATCH[1]}
>   elif [[ $line =~ ^extra_params\ *=\ *(.*)$ ]]; then
> - opts=${BASH_REMATCH[1]}
> + opts="$opts ${BASH_REMATCH[1]}"
>   elif [[ $line =~ ^groups\ *=\ *(.*)$ ]]; then
>   groups=${BASH_REMATCH[1]}
>   elif [[ $line =~ ^arch\ *=\ *(.*)$ ]]; then
> 

Great idea!

Paolo
___
kvmarm mailing list
kvmarm@lists.cs.columbia.edu
https://lists.cs.columbia.edu/mailman/listinfo/kvmarm


Re: [kvm-unit-tests PATCH 4/6] docs: mention checkpatch in the README

2017-01-12 Thread Paolo Bonzini


On 11/01/2017 17:28, Alex Bennée wrote:
> Signed-off-by: Alex Bennée 
> ---
>  README.md | 1 +
>  1 file changed, 1 insertion(+)
> 
> diff --git a/README.md b/README.md
> index 5027b62..9462824 100644
> --- a/README.md
> +++ b/README.md
> @@ -79,3 +79,4 @@ You can add the following to .git/config to do this 
> automatically for you:
>  [format]
>  subjectprefix = kvm-unit-tests PATCH
>  
> +Please run the kernel's ./scripts/checkpatch.pl on new patches

Does it really work well on kvm-unit-tests?

Paolo
___
kvmarm mailing list
kvmarm@lists.cs.columbia.edu
https://lists.cs.columbia.edu/mailman/listinfo/kvmarm


Re: [kvm-unit-tests PATCH 1/6] libcflat: add PRI(dux)32 format types

2017-01-12 Thread Paolo Bonzini


On 11/01/2017 17:28, Alex Bennée wrote:
> So we can have portable formatting of uint32_t types. However there is
> a catch. Different compilers can use legally subtly different types
> though so we need to probe the compiler defined intdef.h first.

Interesting, what platform has long uint32_t?  I thought the issue was
whether 64-bit is long or "long long".

Paolo

> Signed-off-by: Alex Bennée 
> ---
>  Makefile   |  1 +
>  configure  | 13 +
>  lib/libcflat.h |  9 +
>  3 files changed, 23 insertions(+)
> 
> diff --git a/Makefile b/Makefile
> index a32333b..9822d9a 100644
> --- a/Makefile
> +++ b/Makefile
> @@ -55,6 +55,7 @@ CFLAGS += $(fomit_frame_pointer)
>  CFLAGS += $(fno_stack_protector)
>  CFLAGS += $(fno_stack_protector_all)
>  CFLAGS += $(wno_frame_address)
> +CFLAGS += $(if $(U32_LONG_FMT),-D__U32_LONG_FMT__,)
>  
>  CXXFLAGS += $(CFLAGS)
>  
> diff --git a/configure b/configure
> index 995c8fa..127868c 100755
> --- a/configure
> +++ b/configure
> @@ -109,6 +109,18 @@ if [ -f $testdir/run ]; then
>  ln -fs $testdir/run $testdir-run
>  fi
>  
> +# check if uint32_t needs a long format modifier
> +cat << EOF > lib_test.c
> +#include 
> +EOF
> +
> +$cross_prefix$cc lib_test.c -E | grep "typedef" | grep "long" | grep 
> "uint32_t" &> /dev/null
> +exit=$?
> +if [ $exit -eq 0 ]; then
> +u32_long=true
> +fi
> +rm -f lib_test.c
> +
>  # check for dependent 32 bit libraries
>  if [ "$arch" != "arm" ]; then
>  cat << EOF > lib_test.c
> @@ -155,4 +167,5 @@ TEST_DIR=$testdir
>  FIRMWARE=$firmware
>  ENDIAN=$endian
>  PRETTY_PRINT_STACKS=$pretty_print_stacks
> +U32_LONG_FMT=$u32_long
>  EOF
> diff --git a/lib/libcflat.h b/lib/libcflat.h
> index 380395f..e80fc50 100644
> --- a/lib/libcflat.h
> +++ b/lib/libcflat.h
> @@ -58,12 +58,21 @@ typedef _Bool bool;
>  #define true  1
>  
>  #if __SIZEOF_LONG__ == 8
> +#  define __PRI32_PREFIX
>  #  define __PRI64_PREFIX "l"
>  #  define __PRIPTR_PREFIX"l"
>  #else
> +#if defined(__U32_LONG_FMT__)
> +#  define __PRI32_PREFIX"l"
> +#else
> +#  define __PRI32_PREFIX
> +#endif
>  #  define __PRI64_PREFIX "ll"
>  #  define __PRIPTR_PREFIX
>  #endif
> +#define PRId32  __PRI32_PREFIX   "d"
> +#define PRIu32  __PRI32_PREFIX   "u"
> +#define PRIx32  __PRI32_PREFIX   "x"
>  #define PRId64  __PRI64_PREFIX   "d"
>  #define PRIu64  __PRI64_PREFIX   "u"
>  #define PRIx64  __PRI64_PREFIX   "x"
> 
___
kvmarm mailing list
kvmarm@lists.cs.columbia.edu
https://lists.cs.columbia.edu/mailman/listinfo/kvmarm


Re: kvm: deadlock in kvm_vgic_map_resources

2017-01-12 Thread Andre Przywara
Hi,

On 12/01/17 10:42, Christoffer Dall wrote:
> On Thu, Jan 12, 2017 at 10:30:39AM +, Marc Zyngier wrote:
>> On 12/01/17 09:55, Andre Przywara wrote:
>>> Hi,
>>>
>>> On 12/01/17 09:32, Marc Zyngier wrote:
 Hi Dmitry,

 On 11/01/17 19:01, Dmitry Vyukov wrote:
> Hello,
>
> While running syzkaller fuzzer I've got the following deadlock.
> On commit 9c763584b7c8911106bb77af7e648bef09af9d80.
>
>
> =
> [ INFO: possible recursive locking detected ]
> 4.9.0-rc6-xc2-00056-g08372dd4b91d-dirty #50 Not tainted
> -
> syz-executor/20805 is trying to acquire lock:
> (
> >lock
> ){+.+.+.}
> , at:
> [< inline >] kvm_vgic_dist_destroy
> arch/arm64/kvm/../../../virt/kvm/arm/vgic/vgic-init.c:271
> [] kvm_vgic_destroy+0x34/0x250
> arch/arm64/kvm/../../../virt/kvm/arm/vgic/vgic-init.c:294
> but task is already holding lock:
> (>lock){+.+.+.}, at:
> [] kvm_vgic_map_resources+0x2c/0x108
> arch/arm64/kvm/../../../virt/kvm/arm/vgic/vgic-init.c:343
> other info that might help us debug this:
> Possible unsafe locking scenario:
> CPU0
> 
> lock(>lock);
> lock(>lock);
> *** DEADLOCK ***
> May be due to missing lock nesting notation
> 2 locks held by syz-executor/20805:
> #0:(>mutex){+.+.+.}, at:
> [] vcpu_load+0x28/0x1d0
> arch/arm64/kvm/../../../virt/kvm/kvm_main.c:143
> #1:(>lock){+.+.+.}, at:
> [] kvm_vgic_map_resources+0x2c/0x108
> arch/arm64/kvm/../../../virt/kvm/arm/vgic/vgic-init.c:343
> stack backtrace:
> CPU: 2 PID: 20805 Comm: syz-executor Not tainted
> 4.9.0-rc6-xc2-00056-g08372dd4b91d-dirty #50
> Hardware name: Hardkernel ODROID-C2 (DT)
> Call trace:
> [] dump_backtrace+0x0/0x3c8 arch/arm64/kernel/traps.c:69
> [] show_stack+0x20/0x30 arch/arm64/kernel/traps.c:219
> [< inline >] __dump_stack lib/dump_stack.c:15
> [] dump_stack+0x100/0x150 lib/dump_stack.c:51
> [< inline >] print_deadlock_bug kernel/locking/lockdep.c:1728
> [< inline >] check_deadlock kernel/locking/lockdep.c:1772
> [< inline >] validate_chain kernel/locking/lockdep.c:2250
> [] __lock_acquire+0x1938/0x3440 
> kernel/locking/lockdep.c:3335
> [] lock_acquire+0xdc/0x1d8 kernel/locking/lockdep.c:3746
> [< inline >] __mutex_lock_common kernel/locking/mutex.c:521
> [] mutex_lock_nested+0xdc/0x7b8 
> kernel/locking/mutex.c:621
> [< inline >] kvm_vgic_dist_destroy
> arch/arm64/kvm/../../../virt/kvm/arm/vgic/vgic-init.c:271
> [] kvm_vgic_destroy+0x34/0x250
> arch/arm64/kvm/../../../virt/kvm/arm/vgic/vgic-init.c:294
> [] vgic_v2_map_resources+0x218/0x430
> arch/arm64/kvm/../../../virt/kvm/arm/vgic/vgic-v2.c:295
> [] kvm_vgic_map_resources+0xcc/0x108
> arch/arm64/kvm/../../../virt/kvm/arm/vgic/vgic-init.c:348
> [< inline >] kvm_vcpu_first_run_init
> arch/arm64/kvm/../../../arch/arm/kvm/arm.c:505
> [] kvm_arch_vcpu_ioctl_run+0xab8/0xce0
> arch/arm64/kvm/../../../arch/arm/kvm/arm.c:591
> [] kvm_vcpu_ioctl+0x434/0xc08
> arch/arm64/kvm/../../../virt/kvm/kvm_main.c:2557
> [< inline >] vfs_ioctl fs/ioctl.c:43
> [] do_vfs_ioctl+0x128/0xfc0 fs/ioctl.c:679
> [< inline >] SYSC_ioctl fs/ioctl.c:694
> [] SyS_ioctl+0xa8/0xb8 fs/ioctl.c:685
> [] el0_svc_naked+0x24/0x28 arch/arm64/kernel/entry.S:755

 Nice catch, and many thanks for reporting this.

 The bug is fairly obvious. Christoffer, what do you think? I don't think
 we need to hold the kvm->lock all the way, but I'd like another pair of
 eyes (the coffee machine is out of order again, and tea doesn't cut it).

 Thanks,

M.

 From 93f80b20fb9351a49ee8b74eed3fc59c84651371 Mon Sep 17 00:00:00 2001
 From: Marc Zyngier 
 Date: Thu, 12 Jan 2017 09:21:56 +
 Subject: [PATCH] KVM: arm/arm64: vgic: Fix deadlock on error handling

 Dmitry Vyukov reported that the syzkaller fuzzer triggered a
 deadlock in the vgic setup code when an error was detected, as
 the cleanup code tries to take a lock that is already held by
 the setup code.

 The fix is pretty obvious: move the cleaup call after having
 dropped the lock, since not much can happen at that point.
>>>   
>>> Is that really true? If for instance the calls to
>>> vgic_register_dist_iodev() or kvm_phys_addr_ioremap() in
>>> vgic_v2_map_resources() fail, we leave the function with a half
>>> initialized VGIC (because vgic_init() succeeded). 
>>
>> But we only set dist->ready to true when everything went OK. How is 
>> that an issue?
>>
>>> Dropping the lock at
>>> this point without having the GIC cleaned up before sounds a bit
>>> suspicious (I may be wrong on this, though).
>>
>> Thinking of it, that may open a race with vgic 

Re: kvm: deadlock in kvm_vgic_map_resources

2017-01-12 Thread Marc Zyngier
On 12/01/17 10:42, Christoffer Dall wrote:
> On Thu, Jan 12, 2017 at 10:30:39AM +, Marc Zyngier wrote:
>> On 12/01/17 09:55, Andre Przywara wrote:
>>> Hi,
>>>
>>> On 12/01/17 09:32, Marc Zyngier wrote:
 Hi Dmitry,

 On 11/01/17 19:01, Dmitry Vyukov wrote:
> Hello,
>
> While running syzkaller fuzzer I've got the following deadlock.
> On commit 9c763584b7c8911106bb77af7e648bef09af9d80.
>
>
> =
> [ INFO: possible recursive locking detected ]
> 4.9.0-rc6-xc2-00056-g08372dd4b91d-dirty #50 Not tainted
> -
> syz-executor/20805 is trying to acquire lock:
> (
> >lock
> ){+.+.+.}
> , at:
> [< inline >] kvm_vgic_dist_destroy
> arch/arm64/kvm/../../../virt/kvm/arm/vgic/vgic-init.c:271
> [] kvm_vgic_destroy+0x34/0x250
> arch/arm64/kvm/../../../virt/kvm/arm/vgic/vgic-init.c:294
> but task is already holding lock:
> (>lock){+.+.+.}, at:
> [] kvm_vgic_map_resources+0x2c/0x108
> arch/arm64/kvm/../../../virt/kvm/arm/vgic/vgic-init.c:343
> other info that might help us debug this:
> Possible unsafe locking scenario:
> CPU0
> 
> lock(>lock);
> lock(>lock);
> *** DEADLOCK ***
> May be due to missing lock nesting notation
> 2 locks held by syz-executor/20805:
> #0:(>mutex){+.+.+.}, at:
> [] vcpu_load+0x28/0x1d0
> arch/arm64/kvm/../../../virt/kvm/kvm_main.c:143
> #1:(>lock){+.+.+.}, at:
> [] kvm_vgic_map_resources+0x2c/0x108
> arch/arm64/kvm/../../../virt/kvm/arm/vgic/vgic-init.c:343
> stack backtrace:
> CPU: 2 PID: 20805 Comm: syz-executor Not tainted
> 4.9.0-rc6-xc2-00056-g08372dd4b91d-dirty #50
> Hardware name: Hardkernel ODROID-C2 (DT)
> Call trace:
> [] dump_backtrace+0x0/0x3c8 arch/arm64/kernel/traps.c:69
> [] show_stack+0x20/0x30 arch/arm64/kernel/traps.c:219
> [< inline >] __dump_stack lib/dump_stack.c:15
> [] dump_stack+0x100/0x150 lib/dump_stack.c:51
> [< inline >] print_deadlock_bug kernel/locking/lockdep.c:1728
> [< inline >] check_deadlock kernel/locking/lockdep.c:1772
> [< inline >] validate_chain kernel/locking/lockdep.c:2250
> [] __lock_acquire+0x1938/0x3440 
> kernel/locking/lockdep.c:3335
> [] lock_acquire+0xdc/0x1d8 kernel/locking/lockdep.c:3746
> [< inline >] __mutex_lock_common kernel/locking/mutex.c:521
> [] mutex_lock_nested+0xdc/0x7b8 
> kernel/locking/mutex.c:621
> [< inline >] kvm_vgic_dist_destroy
> arch/arm64/kvm/../../../virt/kvm/arm/vgic/vgic-init.c:271
> [] kvm_vgic_destroy+0x34/0x250
> arch/arm64/kvm/../../../virt/kvm/arm/vgic/vgic-init.c:294
> [] vgic_v2_map_resources+0x218/0x430
> arch/arm64/kvm/../../../virt/kvm/arm/vgic/vgic-v2.c:295
> [] kvm_vgic_map_resources+0xcc/0x108
> arch/arm64/kvm/../../../virt/kvm/arm/vgic/vgic-init.c:348
> [< inline >] kvm_vcpu_first_run_init
> arch/arm64/kvm/../../../arch/arm/kvm/arm.c:505
> [] kvm_arch_vcpu_ioctl_run+0xab8/0xce0
> arch/arm64/kvm/../../../arch/arm/kvm/arm.c:591
> [] kvm_vcpu_ioctl+0x434/0xc08
> arch/arm64/kvm/../../../virt/kvm/kvm_main.c:2557
> [< inline >] vfs_ioctl fs/ioctl.c:43
> [] do_vfs_ioctl+0x128/0xfc0 fs/ioctl.c:679
> [< inline >] SYSC_ioctl fs/ioctl.c:694
> [] SyS_ioctl+0xa8/0xb8 fs/ioctl.c:685
> [] el0_svc_naked+0x24/0x28 arch/arm64/kernel/entry.S:755

 Nice catch, and many thanks for reporting this.

 The bug is fairly obvious. Christoffer, what do you think? I don't think
 we need to hold the kvm->lock all the way, but I'd like another pair of
 eyes (the coffee machine is out of order again, and tea doesn't cut it).

 Thanks,

M.

 From 93f80b20fb9351a49ee8b74eed3fc59c84651371 Mon Sep 17 00:00:00 2001
 From: Marc Zyngier 
 Date: Thu, 12 Jan 2017 09:21:56 +
 Subject: [PATCH] KVM: arm/arm64: vgic: Fix deadlock on error handling

 Dmitry Vyukov reported that the syzkaller fuzzer triggered a
 deadlock in the vgic setup code when an error was detected, as
 the cleanup code tries to take a lock that is already held by
 the setup code.

 The fix is pretty obvious: move the cleaup call after having
 dropped the lock, since not much can happen at that point.
>>>   
>>> Is that really true? If for instance the calls to
>>> vgic_register_dist_iodev() or kvm_phys_addr_ioremap() in
>>> vgic_v2_map_resources() fail, we leave the function with a half
>>> initialized VGIC (because vgic_init() succeeded). 
>>
>> But we only set dist->ready to true when everything went OK. How is 
>> that an issue?
>>
>>> Dropping the lock at
>>> this point without having the GIC cleaned up before sounds a bit
>>> suspicious (I may be wrong on this, though).
>>
>> Thinking of it, that may open a race with vgic init 

Re: kvm: deadlock in kvm_vgic_map_resources

2017-01-12 Thread Christoffer Dall
On Thu, Jan 12, 2017 at 10:30:39AM +, Marc Zyngier wrote:
> On 12/01/17 09:55, Andre Przywara wrote:
> > Hi,
> > 
> > On 12/01/17 09:32, Marc Zyngier wrote:
> >> Hi Dmitry,
> >>
> >> On 11/01/17 19:01, Dmitry Vyukov wrote:
> >>> Hello,
> >>>
> >>> While running syzkaller fuzzer I've got the following deadlock.
> >>> On commit 9c763584b7c8911106bb77af7e648bef09af9d80.
> >>>
> >>>
> >>> =
> >>> [ INFO: possible recursive locking detected ]
> >>> 4.9.0-rc6-xc2-00056-g08372dd4b91d-dirty #50 Not tainted
> >>> -
> >>> syz-executor/20805 is trying to acquire lock:
> >>> (
> >>> >lock
> >>> ){+.+.+.}
> >>> , at:
> >>> [< inline >] kvm_vgic_dist_destroy
> >>> arch/arm64/kvm/../../../virt/kvm/arm/vgic/vgic-init.c:271
> >>> [] kvm_vgic_destroy+0x34/0x250
> >>> arch/arm64/kvm/../../../virt/kvm/arm/vgic/vgic-init.c:294
> >>> but task is already holding lock:
> >>> (>lock){+.+.+.}, at:
> >>> [] kvm_vgic_map_resources+0x2c/0x108
> >>> arch/arm64/kvm/../../../virt/kvm/arm/vgic/vgic-init.c:343
> >>> other info that might help us debug this:
> >>> Possible unsafe locking scenario:
> >>> CPU0
> >>> 
> >>> lock(>lock);
> >>> lock(>lock);
> >>> *** DEADLOCK ***
> >>> May be due to missing lock nesting notation
> >>> 2 locks held by syz-executor/20805:
> >>> #0:(>mutex){+.+.+.}, at:
> >>> [] vcpu_load+0x28/0x1d0
> >>> arch/arm64/kvm/../../../virt/kvm/kvm_main.c:143
> >>> #1:(>lock){+.+.+.}, at:
> >>> [] kvm_vgic_map_resources+0x2c/0x108
> >>> arch/arm64/kvm/../../../virt/kvm/arm/vgic/vgic-init.c:343
> >>> stack backtrace:
> >>> CPU: 2 PID: 20805 Comm: syz-executor Not tainted
> >>> 4.9.0-rc6-xc2-00056-g08372dd4b91d-dirty #50
> >>> Hardware name: Hardkernel ODROID-C2 (DT)
> >>> Call trace:
> >>> [] dump_backtrace+0x0/0x3c8 arch/arm64/kernel/traps.c:69
> >>> [] show_stack+0x20/0x30 arch/arm64/kernel/traps.c:219
> >>> [< inline >] __dump_stack lib/dump_stack.c:15
> >>> [] dump_stack+0x100/0x150 lib/dump_stack.c:51
> >>> [< inline >] print_deadlock_bug kernel/locking/lockdep.c:1728
> >>> [< inline >] check_deadlock kernel/locking/lockdep.c:1772
> >>> [< inline >] validate_chain kernel/locking/lockdep.c:2250
> >>> [] __lock_acquire+0x1938/0x3440 
> >>> kernel/locking/lockdep.c:3335
> >>> [] lock_acquire+0xdc/0x1d8 kernel/locking/lockdep.c:3746
> >>> [< inline >] __mutex_lock_common kernel/locking/mutex.c:521
> >>> [] mutex_lock_nested+0xdc/0x7b8 
> >>> kernel/locking/mutex.c:621
> >>> [< inline >] kvm_vgic_dist_destroy
> >>> arch/arm64/kvm/../../../virt/kvm/arm/vgic/vgic-init.c:271
> >>> [] kvm_vgic_destroy+0x34/0x250
> >>> arch/arm64/kvm/../../../virt/kvm/arm/vgic/vgic-init.c:294
> >>> [] vgic_v2_map_resources+0x218/0x430
> >>> arch/arm64/kvm/../../../virt/kvm/arm/vgic/vgic-v2.c:295
> >>> [] kvm_vgic_map_resources+0xcc/0x108
> >>> arch/arm64/kvm/../../../virt/kvm/arm/vgic/vgic-init.c:348
> >>> [< inline >] kvm_vcpu_first_run_init
> >>> arch/arm64/kvm/../../../arch/arm/kvm/arm.c:505
> >>> [] kvm_arch_vcpu_ioctl_run+0xab8/0xce0
> >>> arch/arm64/kvm/../../../arch/arm/kvm/arm.c:591
> >>> [] kvm_vcpu_ioctl+0x434/0xc08
> >>> arch/arm64/kvm/../../../virt/kvm/kvm_main.c:2557
> >>> [< inline >] vfs_ioctl fs/ioctl.c:43
> >>> [] do_vfs_ioctl+0x128/0xfc0 fs/ioctl.c:679
> >>> [< inline >] SYSC_ioctl fs/ioctl.c:694
> >>> [] SyS_ioctl+0xa8/0xb8 fs/ioctl.c:685
> >>> [] el0_svc_naked+0x24/0x28 arch/arm64/kernel/entry.S:755
> >>
> >> Nice catch, and many thanks for reporting this.
> >>
> >> The bug is fairly obvious. Christoffer, what do you think? I don't think
> >> we need to hold the kvm->lock all the way, but I'd like another pair of
> >> eyes (the coffee machine is out of order again, and tea doesn't cut it).
> >>
> >> Thanks,
> >>
> >>M.
> >>
> >> From 93f80b20fb9351a49ee8b74eed3fc59c84651371 Mon Sep 17 00:00:00 2001
> >> From: Marc Zyngier 
> >> Date: Thu, 12 Jan 2017 09:21:56 +
> >> Subject: [PATCH] KVM: arm/arm64: vgic: Fix deadlock on error handling
> >>
> >> Dmitry Vyukov reported that the syzkaller fuzzer triggered a
> >> deadlock in the vgic setup code when an error was detected, as
> >> the cleanup code tries to take a lock that is already held by
> >> the setup code.
> >>
> >> The fix is pretty obvious: move the cleaup call after having
> >> dropped the lock, since not much can happen at that point.
> >   
> > Is that really true? If for instance the calls to
> > vgic_register_dist_iodev() or kvm_phys_addr_ioremap() in
> > vgic_v2_map_resources() fail, we leave the function with a half
> > initialized VGIC (because vgic_init() succeeded). 
> 
> But we only set dist->ready to true when everything went OK. How is 
> that an issue?
> 
> > Dropping the lock at
> > this point without having the GIC cleaned up before sounds a bit
> > suspicious (I may be wrong on this, though).
> 
> Thinking of it, that may open a race with vgic init call, leading to 
> leaking distributor memory.
> 

Re: kvm: deadlock in kvm_vgic_map_resources

2017-01-12 Thread Marc Zyngier
On 12/01/17 09:55, Andre Przywara wrote:
> Hi,
> 
> On 12/01/17 09:32, Marc Zyngier wrote:
>> Hi Dmitry,
>>
>> On 11/01/17 19:01, Dmitry Vyukov wrote:
>>> Hello,
>>>
>>> While running syzkaller fuzzer I've got the following deadlock.
>>> On commit 9c763584b7c8911106bb77af7e648bef09af9d80.
>>>
>>>
>>> =
>>> [ INFO: possible recursive locking detected ]
>>> 4.9.0-rc6-xc2-00056-g08372dd4b91d-dirty #50 Not tainted
>>> -
>>> syz-executor/20805 is trying to acquire lock:
>>> (
>>> >lock
>>> ){+.+.+.}
>>> , at:
>>> [< inline >] kvm_vgic_dist_destroy
>>> arch/arm64/kvm/../../../virt/kvm/arm/vgic/vgic-init.c:271
>>> [] kvm_vgic_destroy+0x34/0x250
>>> arch/arm64/kvm/../../../virt/kvm/arm/vgic/vgic-init.c:294
>>> but task is already holding lock:
>>> (>lock){+.+.+.}, at:
>>> [] kvm_vgic_map_resources+0x2c/0x108
>>> arch/arm64/kvm/../../../virt/kvm/arm/vgic/vgic-init.c:343
>>> other info that might help us debug this:
>>> Possible unsafe locking scenario:
>>> CPU0
>>> 
>>> lock(>lock);
>>> lock(>lock);
>>> *** DEADLOCK ***
>>> May be due to missing lock nesting notation
>>> 2 locks held by syz-executor/20805:
>>> #0:(>mutex){+.+.+.}, at:
>>> [] vcpu_load+0x28/0x1d0
>>> arch/arm64/kvm/../../../virt/kvm/kvm_main.c:143
>>> #1:(>lock){+.+.+.}, at:
>>> [] kvm_vgic_map_resources+0x2c/0x108
>>> arch/arm64/kvm/../../../virt/kvm/arm/vgic/vgic-init.c:343
>>> stack backtrace:
>>> CPU: 2 PID: 20805 Comm: syz-executor Not tainted
>>> 4.9.0-rc6-xc2-00056-g08372dd4b91d-dirty #50
>>> Hardware name: Hardkernel ODROID-C2 (DT)
>>> Call trace:
>>> [] dump_backtrace+0x0/0x3c8 arch/arm64/kernel/traps.c:69
>>> [] show_stack+0x20/0x30 arch/arm64/kernel/traps.c:219
>>> [< inline >] __dump_stack lib/dump_stack.c:15
>>> [] dump_stack+0x100/0x150 lib/dump_stack.c:51
>>> [< inline >] print_deadlock_bug kernel/locking/lockdep.c:1728
>>> [< inline >] check_deadlock kernel/locking/lockdep.c:1772
>>> [< inline >] validate_chain kernel/locking/lockdep.c:2250
>>> [] __lock_acquire+0x1938/0x3440 
>>> kernel/locking/lockdep.c:3335
>>> [] lock_acquire+0xdc/0x1d8 kernel/locking/lockdep.c:3746
>>> [< inline >] __mutex_lock_common kernel/locking/mutex.c:521
>>> [] mutex_lock_nested+0xdc/0x7b8 kernel/locking/mutex.c:621
>>> [< inline >] kvm_vgic_dist_destroy
>>> arch/arm64/kvm/../../../virt/kvm/arm/vgic/vgic-init.c:271
>>> [] kvm_vgic_destroy+0x34/0x250
>>> arch/arm64/kvm/../../../virt/kvm/arm/vgic/vgic-init.c:294
>>> [] vgic_v2_map_resources+0x218/0x430
>>> arch/arm64/kvm/../../../virt/kvm/arm/vgic/vgic-v2.c:295
>>> [] kvm_vgic_map_resources+0xcc/0x108
>>> arch/arm64/kvm/../../../virt/kvm/arm/vgic/vgic-init.c:348
>>> [< inline >] kvm_vcpu_first_run_init
>>> arch/arm64/kvm/../../../arch/arm/kvm/arm.c:505
>>> [] kvm_arch_vcpu_ioctl_run+0xab8/0xce0
>>> arch/arm64/kvm/../../../arch/arm/kvm/arm.c:591
>>> [] kvm_vcpu_ioctl+0x434/0xc08
>>> arch/arm64/kvm/../../../virt/kvm/kvm_main.c:2557
>>> [< inline >] vfs_ioctl fs/ioctl.c:43
>>> [] do_vfs_ioctl+0x128/0xfc0 fs/ioctl.c:679
>>> [< inline >] SYSC_ioctl fs/ioctl.c:694
>>> [] SyS_ioctl+0xa8/0xb8 fs/ioctl.c:685
>>> [] el0_svc_naked+0x24/0x28 arch/arm64/kernel/entry.S:755
>>
>> Nice catch, and many thanks for reporting this.
>>
>> The bug is fairly obvious. Christoffer, what do you think? I don't think
>> we need to hold the kvm->lock all the way, but I'd like another pair of
>> eyes (the coffee machine is out of order again, and tea doesn't cut it).
>>
>> Thanks,
>>
>>  M.
>>
>> From 93f80b20fb9351a49ee8b74eed3fc59c84651371 Mon Sep 17 00:00:00 2001
>> From: Marc Zyngier 
>> Date: Thu, 12 Jan 2017 09:21:56 +
>> Subject: [PATCH] KVM: arm/arm64: vgic: Fix deadlock on error handling
>>
>> Dmitry Vyukov reported that the syzkaller fuzzer triggered a
>> deadlock in the vgic setup code when an error was detected, as
>> the cleanup code tries to take a lock that is already held by
>> the setup code.
>>
>> The fix is pretty obvious: move the cleaup call after having
>> dropped the lock, since not much can happen at that point.
>   
> Is that really true? If for instance the calls to
> vgic_register_dist_iodev() or kvm_phys_addr_ioremap() in
> vgic_v2_map_resources() fail, we leave the function with a half
> initialized VGIC (because vgic_init() succeeded). 

But we only set dist->ready to true when everything went OK. How is 
that an issue?

> Dropping the lock at
> this point without having the GIC cleaned up before sounds a bit
> suspicious (I may be wrong on this, though).

Thinking of it, that may open a race with vgic init call, leading to 
leaking distributor memory.

> 
> Can't we just document that kvm_vgic_destroy() needs to be called with
> the kvm->lock held and take the lock around the only other caller
> (kvm_arch_destroy_vm() in arch/arm/kvm/arm.c)?
> We can then keep holding the lock in the map_resources calls.
> Though we might still move the calls to 

Re: kvm: deadlock in kvm_vgic_map_resources

2017-01-12 Thread Christoffer Dall
On Thu, Jan 12, 2017 at 09:55:21AM +, Andre Przywara wrote:
> Hi,
> 
> On 12/01/17 09:32, Marc Zyngier wrote:
> > Hi Dmitry,
> > 
> > On 11/01/17 19:01, Dmitry Vyukov wrote:
> >> Hello,
> >>
> >> While running syzkaller fuzzer I've got the following deadlock.
> >> On commit 9c763584b7c8911106bb77af7e648bef09af9d80.
> >>
> >>
> >> =
> >> [ INFO: possible recursive locking detected ]
> >> 4.9.0-rc6-xc2-00056-g08372dd4b91d-dirty #50 Not tainted
> >> -
> >> syz-executor/20805 is trying to acquire lock:
> >> (
> >> >lock
> >> ){+.+.+.}
> >> , at:
> >> [< inline >] kvm_vgic_dist_destroy
> >> arch/arm64/kvm/../../../virt/kvm/arm/vgic/vgic-init.c:271
> >> [] kvm_vgic_destroy+0x34/0x250
> >> arch/arm64/kvm/../../../virt/kvm/arm/vgic/vgic-init.c:294
> >> but task is already holding lock:
> >> (>lock){+.+.+.}, at:
> >> [] kvm_vgic_map_resources+0x2c/0x108
> >> arch/arm64/kvm/../../../virt/kvm/arm/vgic/vgic-init.c:343
> >> other info that might help us debug this:
> >> Possible unsafe locking scenario:
> >> CPU0
> >> 
> >> lock(>lock);
> >> lock(>lock);
> >> *** DEADLOCK ***
> >> May be due to missing lock nesting notation
> >> 2 locks held by syz-executor/20805:
> >> #0:(>mutex){+.+.+.}, at:
> >> [] vcpu_load+0x28/0x1d0
> >> arch/arm64/kvm/../../../virt/kvm/kvm_main.c:143
> >> #1:(>lock){+.+.+.}, at:
> >> [] kvm_vgic_map_resources+0x2c/0x108
> >> arch/arm64/kvm/../../../virt/kvm/arm/vgic/vgic-init.c:343
> >> stack backtrace:
> >> CPU: 2 PID: 20805 Comm: syz-executor Not tainted
> >> 4.9.0-rc6-xc2-00056-g08372dd4b91d-dirty #50
> >> Hardware name: Hardkernel ODROID-C2 (DT)
> >> Call trace:
> >> [] dump_backtrace+0x0/0x3c8 arch/arm64/kernel/traps.c:69
> >> [] show_stack+0x20/0x30 arch/arm64/kernel/traps.c:219
> >> [< inline >] __dump_stack lib/dump_stack.c:15
> >> [] dump_stack+0x100/0x150 lib/dump_stack.c:51
> >> [< inline >] print_deadlock_bug kernel/locking/lockdep.c:1728
> >> [< inline >] check_deadlock kernel/locking/lockdep.c:1772
> >> [< inline >] validate_chain kernel/locking/lockdep.c:2250
> >> [] __lock_acquire+0x1938/0x3440 
> >> kernel/locking/lockdep.c:3335
> >> [] lock_acquire+0xdc/0x1d8 kernel/locking/lockdep.c:3746
> >> [< inline >] __mutex_lock_common kernel/locking/mutex.c:521
> >> [] mutex_lock_nested+0xdc/0x7b8 
> >> kernel/locking/mutex.c:621
> >> [< inline >] kvm_vgic_dist_destroy
> >> arch/arm64/kvm/../../../virt/kvm/arm/vgic/vgic-init.c:271
> >> [] kvm_vgic_destroy+0x34/0x250
> >> arch/arm64/kvm/../../../virt/kvm/arm/vgic/vgic-init.c:294
> >> [] vgic_v2_map_resources+0x218/0x430
> >> arch/arm64/kvm/../../../virt/kvm/arm/vgic/vgic-v2.c:295
> >> [] kvm_vgic_map_resources+0xcc/0x108
> >> arch/arm64/kvm/../../../virt/kvm/arm/vgic/vgic-init.c:348
> >> [< inline >] kvm_vcpu_first_run_init
> >> arch/arm64/kvm/../../../arch/arm/kvm/arm.c:505
> >> [] kvm_arch_vcpu_ioctl_run+0xab8/0xce0
> >> arch/arm64/kvm/../../../arch/arm/kvm/arm.c:591
> >> [] kvm_vcpu_ioctl+0x434/0xc08
> >> arch/arm64/kvm/../../../virt/kvm/kvm_main.c:2557
> >> [< inline >] vfs_ioctl fs/ioctl.c:43
> >> [] do_vfs_ioctl+0x128/0xfc0 fs/ioctl.c:679
> >> [< inline >] SYSC_ioctl fs/ioctl.c:694
> >> [] SyS_ioctl+0xa8/0xb8 fs/ioctl.c:685
> >> [] el0_svc_naked+0x24/0x28 arch/arm64/kernel/entry.S:755
> > 
> > Nice catch, and many thanks for reporting this.
> > 
> > The bug is fairly obvious. Christoffer, what do you think? I don't think
> > we need to hold the kvm->lock all the way, but I'd like another pair of
> > eyes (the coffee machine is out of order again, and tea doesn't cut it).
> > 
> > Thanks,
> > 
> > M.
> > 
> > From 93f80b20fb9351a49ee8b74eed3fc59c84651371 Mon Sep 17 00:00:00 2001
> > From: Marc Zyngier 
> > Date: Thu, 12 Jan 2017 09:21:56 +
> > Subject: [PATCH] KVM: arm/arm64: vgic: Fix deadlock on error handling
> > 
> > Dmitry Vyukov reported that the syzkaller fuzzer triggered a
> > deadlock in the vgic setup code when an error was detected, as
> > the cleanup code tries to take a lock that is already held by
> > the setup code.
> > 
> > The fix is pretty obvious: move the cleaup call after having
> > dropped the lock, since not much can happen at that point.
>   
> Is that really true? If for instance the calls to
> vgic_register_dist_iodev() or kvm_phys_addr_ioremap() in
> vgic_v2_map_resources() fail, we leave the function with a half
> initialized VGIC (because vgic_init() succeeded). Dropping the lock at
> this point without having the GIC cleaned up before sounds a bit
> suspicious (I may be wrong on this, though).
> 
> Can't we just document that kvm_vgic_destroy() needs to be called with
> the kvm->lock held and take the lock around the only other caller
> (kvm_arch_destroy_vm() in arch/arm/kvm/arm.c)?
> We can then keep holding the lock in the map_resources calls.
> Though we might still move the calls to kvm_vgic_destroy() into the
> wrapper function as a cleanup 

Re: [PATCH v3 2/5] arm64: Work around Falkor erratum 1003

2017-01-12 Thread Catalin Marinas
On Wed, Jan 11, 2017 at 06:37:39PM +, Mark Rutland wrote:
> On Wed, Jan 11, 2017 at 12:35:55PM -0600, Timur Tabi wrote:
> > On 01/11/2017 12:33 PM, Mark Rutland wrote:
> > >It'll need to affect all lines since the kconfig column needs to expand
> > >by at least one character to fit QCOM_FALKOR_ERRATUM_1003.
> > 
> > Or we can make the macro shorter.
> 
> The name, as it is, is perfectly descriptive.
> 
> Let's not sacrifice legibility over a non-issue.

I agree, I didn't realise that the we expand the last column already.
It's a non-issue indeed.

-- 
Catalin
___
kvmarm mailing list
kvmarm@lists.cs.columbia.edu
https://lists.cs.columbia.edu/mailman/listinfo/kvmarm


Re: kvm: deadlock in kvm_vgic_map_resources

2017-01-12 Thread Andre Przywara
Hi,

On 12/01/17 09:32, Marc Zyngier wrote:
> Hi Dmitry,
> 
> On 11/01/17 19:01, Dmitry Vyukov wrote:
>> Hello,
>>
>> While running syzkaller fuzzer I've got the following deadlock.
>> On commit 9c763584b7c8911106bb77af7e648bef09af9d80.
>>
>>
>> =
>> [ INFO: possible recursive locking detected ]
>> 4.9.0-rc6-xc2-00056-g08372dd4b91d-dirty #50 Not tainted
>> -
>> syz-executor/20805 is trying to acquire lock:
>> (
>> >lock
>> ){+.+.+.}
>> , at:
>> [< inline >] kvm_vgic_dist_destroy
>> arch/arm64/kvm/../../../virt/kvm/arm/vgic/vgic-init.c:271
>> [] kvm_vgic_destroy+0x34/0x250
>> arch/arm64/kvm/../../../virt/kvm/arm/vgic/vgic-init.c:294
>> but task is already holding lock:
>> (>lock){+.+.+.}, at:
>> [] kvm_vgic_map_resources+0x2c/0x108
>> arch/arm64/kvm/../../../virt/kvm/arm/vgic/vgic-init.c:343
>> other info that might help us debug this:
>> Possible unsafe locking scenario:
>> CPU0
>> 
>> lock(>lock);
>> lock(>lock);
>> *** DEADLOCK ***
>> May be due to missing lock nesting notation
>> 2 locks held by syz-executor/20805:
>> #0:(>mutex){+.+.+.}, at:
>> [] vcpu_load+0x28/0x1d0
>> arch/arm64/kvm/../../../virt/kvm/kvm_main.c:143
>> #1:(>lock){+.+.+.}, at:
>> [] kvm_vgic_map_resources+0x2c/0x108
>> arch/arm64/kvm/../../../virt/kvm/arm/vgic/vgic-init.c:343
>> stack backtrace:
>> CPU: 2 PID: 20805 Comm: syz-executor Not tainted
>> 4.9.0-rc6-xc2-00056-g08372dd4b91d-dirty #50
>> Hardware name: Hardkernel ODROID-C2 (DT)
>> Call trace:
>> [] dump_backtrace+0x0/0x3c8 arch/arm64/kernel/traps.c:69
>> [] show_stack+0x20/0x30 arch/arm64/kernel/traps.c:219
>> [< inline >] __dump_stack lib/dump_stack.c:15
>> [] dump_stack+0x100/0x150 lib/dump_stack.c:51
>> [< inline >] print_deadlock_bug kernel/locking/lockdep.c:1728
>> [< inline >] check_deadlock kernel/locking/lockdep.c:1772
>> [< inline >] validate_chain kernel/locking/lockdep.c:2250
>> [] __lock_acquire+0x1938/0x3440 
>> kernel/locking/lockdep.c:3335
>> [] lock_acquire+0xdc/0x1d8 kernel/locking/lockdep.c:3746
>> [< inline >] __mutex_lock_common kernel/locking/mutex.c:521
>> [] mutex_lock_nested+0xdc/0x7b8 kernel/locking/mutex.c:621
>> [< inline >] kvm_vgic_dist_destroy
>> arch/arm64/kvm/../../../virt/kvm/arm/vgic/vgic-init.c:271
>> [] kvm_vgic_destroy+0x34/0x250
>> arch/arm64/kvm/../../../virt/kvm/arm/vgic/vgic-init.c:294
>> [] vgic_v2_map_resources+0x218/0x430
>> arch/arm64/kvm/../../../virt/kvm/arm/vgic/vgic-v2.c:295
>> [] kvm_vgic_map_resources+0xcc/0x108
>> arch/arm64/kvm/../../../virt/kvm/arm/vgic/vgic-init.c:348
>> [< inline >] kvm_vcpu_first_run_init
>> arch/arm64/kvm/../../../arch/arm/kvm/arm.c:505
>> [] kvm_arch_vcpu_ioctl_run+0xab8/0xce0
>> arch/arm64/kvm/../../../arch/arm/kvm/arm.c:591
>> [] kvm_vcpu_ioctl+0x434/0xc08
>> arch/arm64/kvm/../../../virt/kvm/kvm_main.c:2557
>> [< inline >] vfs_ioctl fs/ioctl.c:43
>> [] do_vfs_ioctl+0x128/0xfc0 fs/ioctl.c:679
>> [< inline >] SYSC_ioctl fs/ioctl.c:694
>> [] SyS_ioctl+0xa8/0xb8 fs/ioctl.c:685
>> [] el0_svc_naked+0x24/0x28 arch/arm64/kernel/entry.S:755
> 
> Nice catch, and many thanks for reporting this.
> 
> The bug is fairly obvious. Christoffer, what do you think? I don't think
> we need to hold the kvm->lock all the way, but I'd like another pair of
> eyes (the coffee machine is out of order again, and tea doesn't cut it).
> 
> Thanks,
> 
>   M.
> 
> From 93f80b20fb9351a49ee8b74eed3fc59c84651371 Mon Sep 17 00:00:00 2001
> From: Marc Zyngier 
> Date: Thu, 12 Jan 2017 09:21:56 +
> Subject: [PATCH] KVM: arm/arm64: vgic: Fix deadlock on error handling
> 
> Dmitry Vyukov reported that the syzkaller fuzzer triggered a
> deadlock in the vgic setup code when an error was detected, as
> the cleanup code tries to take a lock that is already held by
> the setup code.
> 
> The fix is pretty obvious: move the cleaup call after having
> dropped the lock, since not much can happen at that point.
  
Is that really true? If for instance the calls to
vgic_register_dist_iodev() or kvm_phys_addr_ioremap() in
vgic_v2_map_resources() fail, we leave the function with a half
initialized VGIC (because vgic_init() succeeded). Dropping the lock at
this point without having the GIC cleaned up before sounds a bit
suspicious (I may be wrong on this, though).

Can't we just document that kvm_vgic_destroy() needs to be called with
the kvm->lock held and take the lock around the only other caller
(kvm_arch_destroy_vm() in arch/arm/kvm/arm.c)?
We can then keep holding the lock in the map_resources calls.
Though we might still move the calls to kvm_vgic_destroy() into the
wrapper function as a cleanup (as shown below), just before dropping the
lock.

Cheers,
Andre.

> Cc: sta...@vger.kernel.org
> Signed-off-by: Marc Zyngier 
> ---
>  virt/kvm/arm/vgic/vgic-init.c | 4 
>  virt/kvm/arm/vgic/vgic-v2.c   | 2 --
>  virt/kvm/arm/vgic/vgic-v3.c   | 2 --
>  3 files changed, 4 

Re: kvm: deadlock in kvm_vgic_map_resources

2017-01-12 Thread Marc Zyngier
Hi Dmitry,

On 11/01/17 19:01, Dmitry Vyukov wrote:
> Hello,
> 
> While running syzkaller fuzzer I've got the following deadlock.
> On commit 9c763584b7c8911106bb77af7e648bef09af9d80.
> 
> 
> =
> [ INFO: possible recursive locking detected ]
> 4.9.0-rc6-xc2-00056-g08372dd4b91d-dirty #50 Not tainted
> -
> syz-executor/20805 is trying to acquire lock:
> (
> >lock
> ){+.+.+.}
> , at:
> [< inline >] kvm_vgic_dist_destroy
> arch/arm64/kvm/../../../virt/kvm/arm/vgic/vgic-init.c:271
> [] kvm_vgic_destroy+0x34/0x250
> arch/arm64/kvm/../../../virt/kvm/arm/vgic/vgic-init.c:294
> but task is already holding lock:
> (>lock){+.+.+.}, at:
> [] kvm_vgic_map_resources+0x2c/0x108
> arch/arm64/kvm/../../../virt/kvm/arm/vgic/vgic-init.c:343
> other info that might help us debug this:
> Possible unsafe locking scenario:
> CPU0
> 
> lock(>lock);
> lock(>lock);
> *** DEADLOCK ***
> May be due to missing lock nesting notation
> 2 locks held by syz-executor/20805:
> #0:(>mutex){+.+.+.}, at:
> [] vcpu_load+0x28/0x1d0
> arch/arm64/kvm/../../../virt/kvm/kvm_main.c:143
> #1:(>lock){+.+.+.}, at:
> [] kvm_vgic_map_resources+0x2c/0x108
> arch/arm64/kvm/../../../virt/kvm/arm/vgic/vgic-init.c:343
> stack backtrace:
> CPU: 2 PID: 20805 Comm: syz-executor Not tainted
> 4.9.0-rc6-xc2-00056-g08372dd4b91d-dirty #50
> Hardware name: Hardkernel ODROID-C2 (DT)
> Call trace:
> [] dump_backtrace+0x0/0x3c8 arch/arm64/kernel/traps.c:69
> [] show_stack+0x20/0x30 arch/arm64/kernel/traps.c:219
> [< inline >] __dump_stack lib/dump_stack.c:15
> [] dump_stack+0x100/0x150 lib/dump_stack.c:51
> [< inline >] print_deadlock_bug kernel/locking/lockdep.c:1728
> [< inline >] check_deadlock kernel/locking/lockdep.c:1772
> [< inline >] validate_chain kernel/locking/lockdep.c:2250
> [] __lock_acquire+0x1938/0x3440 
> kernel/locking/lockdep.c:3335
> [] lock_acquire+0xdc/0x1d8 kernel/locking/lockdep.c:3746
> [< inline >] __mutex_lock_common kernel/locking/mutex.c:521
> [] mutex_lock_nested+0xdc/0x7b8 kernel/locking/mutex.c:621
> [< inline >] kvm_vgic_dist_destroy
> arch/arm64/kvm/../../../virt/kvm/arm/vgic/vgic-init.c:271
> [] kvm_vgic_destroy+0x34/0x250
> arch/arm64/kvm/../../../virt/kvm/arm/vgic/vgic-init.c:294
> [] vgic_v2_map_resources+0x218/0x430
> arch/arm64/kvm/../../../virt/kvm/arm/vgic/vgic-v2.c:295
> [] kvm_vgic_map_resources+0xcc/0x108
> arch/arm64/kvm/../../../virt/kvm/arm/vgic/vgic-init.c:348
> [< inline >] kvm_vcpu_first_run_init
> arch/arm64/kvm/../../../arch/arm/kvm/arm.c:505
> [] kvm_arch_vcpu_ioctl_run+0xab8/0xce0
> arch/arm64/kvm/../../../arch/arm/kvm/arm.c:591
> [] kvm_vcpu_ioctl+0x434/0xc08
> arch/arm64/kvm/../../../virt/kvm/kvm_main.c:2557
> [< inline >] vfs_ioctl fs/ioctl.c:43
> [] do_vfs_ioctl+0x128/0xfc0 fs/ioctl.c:679
> [< inline >] SYSC_ioctl fs/ioctl.c:694
> [] SyS_ioctl+0xa8/0xb8 fs/ioctl.c:685
> [] el0_svc_naked+0x24/0x28 arch/arm64/kernel/entry.S:755

Nice catch, and many thanks for reporting this.

The bug is fairly obvious. Christoffer, what do you think? I don't think
we need to hold the kvm->lock all the way, but I'd like another pair of
eyes (the coffee machine is out of order again, and tea doesn't cut it).

Thanks,

M.

>From 93f80b20fb9351a49ee8b74eed3fc59c84651371 Mon Sep 17 00:00:00 2001
From: Marc Zyngier 
Date: Thu, 12 Jan 2017 09:21:56 +
Subject: [PATCH] KVM: arm/arm64: vgic: Fix deadlock on error handling

Dmitry Vyukov reported that the syzkaller fuzzer triggered a
deadlock in the vgic setup code when an error was detected, as
the cleanup code tries to take a lock that is already held by
the setup code.

The fix is pretty obvious: move the cleaup call after having
dropped the lock, since not much can happen at that point.

Cc: sta...@vger.kernel.org
Signed-off-by: Marc Zyngier 
---
 virt/kvm/arm/vgic/vgic-init.c | 4 
 virt/kvm/arm/vgic/vgic-v2.c   | 2 --
 virt/kvm/arm/vgic/vgic-v3.c   | 2 --
 3 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/virt/kvm/arm/vgic/vgic-init.c b/virt/kvm/arm/vgic/vgic-init.c
index 5114391..0e0c295 100644
--- a/virt/kvm/arm/vgic/vgic-init.c
+++ b/virt/kvm/arm/vgic/vgic-init.c
@@ -350,6 +350,10 @@ int kvm_vgic_map_resources(struct kvm *kvm)
ret = vgic_v3_map_resources(kvm);
 out:
mutex_unlock(>lock);
+
+   if (ret)
+   kvm_vgic_destroy(kvm);
+
return ret;
 }
 
diff --git a/virt/kvm/arm/vgic/vgic-v2.c b/virt/kvm/arm/vgic/vgic-v2.c
index 9bab867..834137e 100644
--- a/virt/kvm/arm/vgic/vgic-v2.c
+++ b/virt/kvm/arm/vgic/vgic-v2.c
@@ -293,8 +293,6 @@ int vgic_v2_map_resources(struct kvm *kvm)
dist->ready = true;
 
 out:
-   if (ret)
-   kvm_vgic_destroy(kvm);
return ret;
 }
 
diff --git a/virt/kvm/arm/vgic/vgic-v3.c b/virt/kvm/arm/vgic/vgic-v3.c
index 7df1b90..a4c7fff 100644
--- a/virt/kvm/arm/vgic/vgic-v3.c
+++ b/virt/kvm/arm/vgic/vgic-v3.c
@@ -308,8 +308,6 @@