date:20201203

Currently the "memory-encryption" property is only looked at once we
get to kvm_init().  Although protection of guest memory from the
hypervisor isn't something that could really ever work with TCG, it's
not conceptually tied to the KVM accelerator.

In addition, the way the string property is resolved to an object is
almost identical to how a QOM link property is handled.

So, create a new "securable-guest-memory" link property which sets
this QOM interface link directly in the machine.  For compatibility we
keep the "memory-encryption" property, but now implemented in terms of
the new property.

Signed-off-by: David Gibson 
Reviewed-by: Richard Henderson 
---
 accel/kvm/kvm-all.c | 22 ++
 hw/core/machine.c   | 43 +--
 include/hw/boards.h |  2 +-
 3 files changed, 44 insertions(+), 23 deletions(-)

diff --git a/accel/kvm/kvm-all.c b/accel/kvm/kvm-all.c
index 9e7cea64d6..92a49b328a 100644
--- a/accel/kvm/kvm-all.c
+++ b/accel/kvm/kvm-all.c
@@ -2207,24 +2207,14 @@ static int kvm_init(MachineState *ms)
  * if memory encryption object is specified then initialize the memory
  * encryption context.
  */
-if (ms->memory_encryption) {
-Object *obj = object_resolve_path_component(object_get_objects_root(),
-ms->memory_encryption);
-
-if (object_dynamic_cast(obj, TYPE_SECURABLE_GUEST_MEMORY)) {
-SecurableGuestMemory *sgm = SECURABLE_GUEST_MEMORY(obj);
-
-/* FIXME handle mechanisms other than SEV */
-ret = sev_kvm_init(sgm);
-if (ret < 0) {
-goto err;
-}
-
-kvm_state->sgm = sgm;
-} else {
-ret = -1;
+if (ms->sgm) {
+/* FIXME handle mechanisms other than SEV */
+ret = sev_kvm_init(ms->sgm);
+if (ret < 0) {
 goto err;
 }
+
+kvm_state->sgm = ms->sgm;
 }
 
 ret = kvm_arch_init(ms, s);
diff --git a/hw/core/machine.c b/hw/core/machine.c
index cb0711508d..816ea3ae3e 100644
--- a/hw/core/machine.c
+++ b/hw/core/machine.c
@@ -27,6 +27,7 @@
 #include "hw/pci/pci.h"
 #include "hw/mem/nvdimm.h"
 #include "migration/vmstate.h"
+#include "exec/securable-guest-memory.h"
 
 GlobalProperty hw_compat_5_1[] = {
 { "vhost-scsi", "num_queues", "1"},
@@ -417,16 +418,37 @@ static char *machine_get_memory_encryption(Object *obj, 
Error **errp)
 {
 MachineState *ms = MACHINE(obj);
 
-return g_strdup(ms->memory_encryption);
+if (ms->sgm) {
+return g_strdup(object_get_canonical_path_component(OBJECT(ms->sgm)));
+}
+
+return NULL;
 }
 
 static void machine_set_memory_encryption(Object *obj, const char *value,
 Error **errp)
 {
-MachineState *ms = MACHINE(obj);
+Object *sgm =
+object_resolve_path_component(object_get_objects_root(), value);
+
+if (!sgm) {
+error_setg(errp, "No such memory encryption object '%s'", value);
+return;
+}
 
-g_free(ms->memory_encryption);
-ms->memory_encryption = g_strdup(value);
+object_property_set_link(obj, "securable-guest-memory", sgm, errp);
+}
+
+static void machine_check_securable_guest_memory(const Object *obj,
+ const char *name,
+ Object *new_target,
+ Error **errp)
+{
+/*
+ * So far the only constraint is that the target has the
+ * TYPE_SECURABLE_GUEST_MEMORY interface, and that's checked by
+ * the QOM core
+ */
 }
 
 static bool machine_get_nvdimm(Object *obj, Error **errp)
@@ -833,6 +855,15 @@ static void machine_class_init(ObjectClass *oc, void *data)
 object_class_property_set_description(oc, "suppress-vmdesc",
 "Set on to disable self-describing migration");
 
+object_class_property_add_link(oc, "securable-guest-memory",
+   TYPE_SECURABLE_GUEST_MEMORY,
+   offsetof(MachineState, sgm),
+   machine_check_securable_guest_memory,
+   OBJ_PROP_LINK_STRONG);
+object_class_property_set_description(oc, "securable-guest-memory",
+"Set securable guest memory scheme to use");
+
+/* For compatibility */
 object_class_property_add_str(oc, "memory-encryption",
 machine_get_memory_encryption, machine_set_memory_encryption);
 object_class_property_set_description(oc, "memory-encryption",
@@ -1123,9 +1154,9 @@ void machine_run_board_init(MachineState *machine)
 cc->deprecation_note);
 }
 
-if (machine->memory_encryption) {
+if (machine->sgm) {
 /*
- * With memory encryption, the host can't see the real
+ * With securable guest memory, the host can't see the real
  * contents of RAM,

[for-6.0 v5 13/13] s390: Recognize securable-guest-memory option

At least some s390 cpu models support "Protected Virtualization" (PV),
a mechanism to protect guests from eavesdropping by a compromised
hypervisor.

This is similar in function to other mechanisms like AMD's SEV and
POWER's PEF, which are controlled bythe "securable-guest-memory" machine
option.  s390 is a slightly special case, because we already supported
PV, simply by using a CPU model with the required feature
(S390_FEAT_UNPACK).

To integrate this with the option used by other platforms, we
implement the following compromise:

 - When the securable-guest-memory option is set, s390 will recognize it,
   verify that the CPU can support PV (failing if not) and set virtio
   default options necessary for encrypted or protected guests, as on
   other platforms.  i.e. if securable-guest-memory is set, we will
   either create a guest capable of entering PV mode, or fail outright

 - If securable-guest-memory is not set, guest's might still be able to
   enter PV mode, if the CPU has the right model.  This may be a
   little surprising, but shouldn't actually be harmful.

To start a guest supporting Protected Virtualization using the new
option use the command line arguments:
-object s390-pv-guest,id=pv0 -machine securable-guest-memory=pv0

Signed-off-by: David Gibson 
---
 hw/s390x/pv.c | 58 +++
 include/hw/s390x/pv.h |  1 +
 target/s390x/kvm.c|  3 +++
 3 files changed, 62 insertions(+)

diff --git a/hw/s390x/pv.c b/hw/s390x/pv.c
index ab3a2482aa..9fddc196a3 100644
--- a/hw/s390x/pv.c
+++ b/hw/s390x/pv.c
@@ -14,8 +14,11 @@
 #include 
 
 #include "cpu.h"
+#include "qapi/error.h"
 #include "qemu/error-report.h"
 #include "sysemu/kvm.h"
+#include "qom/object_interfaces.h"
+#include "exec/securable-guest-memory.h"
 #include "hw/s390x/ipl.h"
 #include "hw/s390x/pv.h"
 
@@ -111,3 +114,58 @@ void s390_pv_inject_reset_error(CPUState *cs)
 /* Report that we are unable to enter protected mode */
 env->regs[r1 + 1] = DIAG_308_RC_INVAL_FOR_PV;
 }
+
+#define TYPE_S390_PV_GUEST "s390-pv-guest"
+#define S390_PV_GUEST(obj)  \
+OBJECT_CHECK(S390PVGuestState, (obj), TYPE_S390_PV_GUEST)
+
+typedef struct S390PVGuestState S390PVGuestState;
+
+/**
+ * S390PVGuestState:
+ *
+ * The S390PVGuestState object is basically a dummy used to tell the
+ * securable guest memory system to use s390's PV mechanism.
+ *
+ * # $QEMU \
+ * -object s390-pv-guest,id=pv0 \
+ * -machine ...,securable-guest-memory=pv0
+ */
+struct S390PVGuestState {
+Object parent_obj;
+};
+
+int s390_pv_init(SecurableGuestMemory *sgm, Error **errp)
+{
+if (!object_dynamic_cast(OBJECT(sgm), TYPE_S390_PV_GUEST)) {
+return 0;
+}
+
+if (!s390_has_feat(S390_FEAT_UNPACK)) {
+error_setg(errp,
+   "CPU model does not support Protected Virtualization");
+return -1;
+}
+
+sgm->ready = true;
+
+return 0;
+}
+
+static const TypeInfo s390_pv_guest_info = {
+.parent = TYPE_SECURABLE_GUEST_MEMORY,
+.name = TYPE_S390_PV_GUEST,
+.instance_size = sizeof(S390PVGuestState),
+.interfaces = (InterfaceInfo[]) {
+{ TYPE_USER_CREATABLE },
+{ }
+}
+};
+
+static void
+s390_pv_register_types(void)
+{
+type_register_static(_pv_guest_info);
+}
+
+type_init(s390_pv_register_types);
diff --git a/include/hw/s390x/pv.h b/include/hw/s390x/pv.h
index aee758bc2d..4250af699b 100644
--- a/include/hw/s390x/pv.h
+++ b/include/hw/s390x/pv.h
@@ -43,6 +43,7 @@ void s390_pv_prep_reset(void);
 int s390_pv_verify(void);
 void s390_pv_unshare(void);
 void s390_pv_inject_reset_error(CPUState *cs);
+int s390_pv_init(SecurableGuestMemory *sgm, Error **errp);
 #else /* CONFIG_KVM */
 static inline bool s390_is_pv(void) { return false; }
 static inline int s390_pv_vm_enable(void) { return 0; }
diff --git a/target/s390x/kvm.c b/target/s390x/kvm.c
index b8385e6b95..3383487463 100644
--- a/target/s390x/kvm.c
+++ b/target/s390x/kvm.c
@@ -387,6 +387,9 @@ int kvm_arch_init(MachineState *ms, KVMState *s)
 }
 
 kvm_set_max_memslot_size(KVM_SLOT_MAX_BYTES);
+
+s390_pv_init(ms->sgm, _fatal);
+
 return 0;
 }
 
-- 
2.28.0

[for-6.0 v5 07/13] sev: Add Error ** to sev_kvm_init()

This allows failures to be reported richly and idiomatically.

Signed-off-by: David Gibson 
Reviewed-by: Philippe Mathieu-Daudé 
Reviewed-by: Richard Henderson 
---
 accel/kvm/kvm-all.c  |  4 +++-
 accel/kvm/sev-stub.c |  5 +++--
 include/sysemu/sev.h |  2 +-
 target/i386/sev.c| 31 +++
 4 files changed, 22 insertions(+), 20 deletions(-)

diff --git a/accel/kvm/kvm-all.c b/accel/kvm/kvm-all.c
index c6bd7b9d02..724e9294d0 100644
--- a/accel/kvm/kvm-all.c
+++ b/accel/kvm/kvm-all.c
@@ -2183,9 +2183,11 @@ static int kvm_init(MachineState *ms)
  * encryption context.
  */
 if (ms->sgm) {
+Error *local_err = NULL;
 /* FIXME handle mechanisms other than SEV */
-ret = sev_kvm_init(ms->sgm);
+ret = sev_kvm_init(ms->sgm, _err);
 if (ret < 0) {
+error_report_err(local_err);
 goto err;
 }
 }
diff --git a/accel/kvm/sev-stub.c b/accel/kvm/sev-stub.c
index 3df3c88eeb..537c91d9f8 100644
--- a/accel/kvm/sev-stub.c
+++ b/accel/kvm/sev-stub.c
@@ -15,7 +15,8 @@
 #include "qemu-common.h"
 #include "sysemu/sev.h"
 
-int sev_kvm_init(SecurableGuestMemory *sgm)
+int sev_kvm_init(SecurableGuestMemory *sgm, Error **errp)
 {
-return -1;
+/* SEV can't be selected if it's not compiled */
+g_assert_not_reached();
 }
diff --git a/include/sysemu/sev.h b/include/sysemu/sev.h
index 36d038a36f..7aa35821f0 100644
--- a/include/sysemu/sev.h
+++ b/include/sysemu/sev.h
@@ -17,6 +17,6 @@
 #include "sysemu/kvm.h"
 #include "exec/securable-guest-memory.h"
 
-int sev_kvm_init(SecurableGuestMemory *sgm);
+int sev_kvm_init(SecurableGuestMemory *sgm, Error **errp);
 
 #endif
diff --git a/target/i386/sev.c b/target/i386/sev.c
index 7b8ce590f7..7333a60dc0 100644
--- a/target/i386/sev.c
+++ b/target/i386/sev.c
@@ -626,7 +626,7 @@ sev_vm_state_change(void *opaque, int running, RunState 
state)
 }
 }
 
-int sev_kvm_init(SecurableGuestMemory *sgm)
+int sev_kvm_init(SecurableGuestMemory *sgm, Error **errp)
 {
 SevGuestState *sev = SEV_GUEST(sgm);
 char *devname;
@@ -648,14 +648,14 @@ int sev_kvm_init(SecurableGuestMemory *sgm)
 host_cbitpos = ebx & 0x3f;
 
 if (host_cbitpos != sev->cbitpos) {
-error_report("%s: cbitpos check failed, host '%d' requested '%d'",
- __func__, host_cbitpos, sev->cbitpos);
+error_setg(errp, "%s: cbitpos check failed, host '%d' requested '%d'",
+   __func__, host_cbitpos, sev->cbitpos);
 goto err;
 }
 
 if (sev->reduced_phys_bits < 1) {
-error_report("%s: reduced_phys_bits check failed, it should be >=1,"
- " requested '%d'", __func__, sev->reduced_phys_bits);
+error_setg(errp, "%s: reduced_phys_bits check failed, it should be 
>=1,"
+   " requested '%d'", __func__, sev->reduced_phys_bits);
 goto err;
 }
 
@@ -664,20 +664,19 @@ int sev_kvm_init(SecurableGuestMemory *sgm)
 devname = object_property_get_str(OBJECT(sev), "sev-device", NULL);
 sev->sev_fd = open(devname, O_RDWR);
 if (sev->sev_fd < 0) {
-error_report("%s: Failed to open %s '%s'", __func__,
- devname, strerror(errno));
-}
-g_free(devname);
-if (sev->sev_fd < 0) {
+error_setg(errp, "%s: Failed to open %s '%s'", __func__,
+   devname, strerror(errno));
+g_free(devname);
 goto err;
 }
+g_free(devname);
 
 ret = sev_platform_ioctl(sev->sev_fd, SEV_PLATFORM_STATUS, ,
  _error);
 if (ret) {
-error_report("%s: failed to get platform status ret=%d "
- "fw_error='%d: %s'", __func__, ret, fw_error,
- fw_error_to_str(fw_error));
+error_setg(errp, "%s: failed to get platform status ret=%d "
+   "fw_error='%d: %s'", __func__, ret, fw_error,
+   fw_error_to_str(fw_error));
 goto err;
 }
 sev->build_id = status.build;
@@ -687,14 +686,14 @@ int sev_kvm_init(SecurableGuestMemory *sgm)
 trace_kvm_sev_init();
 ret = sev_ioctl(sev->sev_fd, KVM_SEV_INIT, NULL, _error);
 if (ret) {
-error_report("%s: failed to initialize ret=%d fw_error=%d '%s'",
- __func__, ret, fw_error, fw_error_to_str(fw_error));
+error_setg(errp, "%s: failed to initialize ret=%d fw_error=%d '%s'",
+   __func__, ret, fw_error, fw_error_to_str(fw_error));
 goto err;
 }
 
 ret = sev_launch_start(sev);
 if (ret) {
-error_report("%s: failed to create encryption context", __func__);
+error_setg(errp, "%s: failed to create encryption context", __func__);
 goto err;
 }
 
-- 
2.28.0

[for-6.0 v5 09/13] securable guest memory: Move SEV initialization into arch specific code

While we've abstracted some (potential) differences between mechanisms for
securing guest memory, the initialization is still specific to SEV.  Given
that, move it into x86's kvm_arch_init() code, rather than the generic
kvm_init() code.

Signed-off-by: David Gibson 
---
 accel/kvm/kvm-all.c | 14 --
 target/i386/kvm.c   | 12 
 target/i386/sev.c   |  7 ++-
 3 files changed, 18 insertions(+), 15 deletions(-)

diff --git a/accel/kvm/kvm-all.c b/accel/kvm/kvm-all.c
index 724e9294d0..1b676da6c2 100644
--- a/accel/kvm/kvm-all.c
+++ b/accel/kvm/kvm-all.c
@@ -2178,20 +2178,6 @@ static int kvm_init(MachineState *ms)
 
 kvm_state = s;
 
-/*
- * if memory encryption object is specified then initialize the memory
- * encryption context.
- */
-if (ms->sgm) {
-Error *local_err = NULL;
-/* FIXME handle mechanisms other than SEV */
-ret = sev_kvm_init(ms->sgm, _err);
-if (ret < 0) {
-error_report_err(local_err);
-goto err;
-}
-}
-
 ret = kvm_arch_init(ms, s);
 if (ret < 0) {
 goto err;
diff --git a/target/i386/kvm.c b/target/i386/kvm.c
index a2934dda02..8e3617f3cd 100644
--- a/target/i386/kvm.c
+++ b/target/i386/kvm.c
@@ -42,6 +42,7 @@
 #include "hw/i386/intel_iommu.h"
 #include "hw/i386/x86-iommu.h"
 #include "hw/i386/e820_memory_layout.h"
+#include "sysemu/sev.h"
 
 #include "hw/pci/pci.h"
 #include "hw/pci/msi.h"
@@ -2110,6 +2111,17 @@ int kvm_arch_init(MachineState *ms, KVMState *s)
 uint64_t shadow_mem;
 int ret;
 struct utsname utsname;
+Error *local_err = NULL;
+
+/*
+ * if memory encryption object is specified then initialize the
+ * memory encryption context (no-op otherwise)
+ */
+ret = sev_kvm_init(ms->sgm, _err);
+if (ret < 0) {
+error_report_err(local_err);
+return ret;
+}
 
 if (!kvm_check_extension(s, KVM_CAP_IRQ_ROUTING)) {
 error_report("kvm: KVM_CAP_IRQ_ROUTING not supported by KVM");
diff --git a/target/i386/sev.c b/target/i386/sev.c
index 022ce5fc3a..8c19f4aea6 100644
--- a/target/i386/sev.c
+++ b/target/i386/sev.c
@@ -628,13 +628,18 @@ sev_vm_state_change(void *opaque, int running, RunState 
state)
 
 int sev_kvm_init(SecurableGuestMemory *sgm, Error **errp)
 {
-SevGuestState *sev = SEV_GUEST(sgm);
+SevGuestState *sev
+= (SevGuestState *)object_dynamic_cast(OBJECT(sgm), TYPE_SEV_GUEST);
 char *devname;
 int ret, fw_error;
 uint32_t ebx;
 uint32_t host_cbitpos;
 struct sev_user_data_status status = {};
 
+if (!sev) {
+return 0;
+}
+
 ret = ram_block_discard_disable(true);
 if (ret) {
 error_report("%s: cannot disable RAM discard", __func__);
-- 
2.28.0

[for-6.0 v5 10/13] spapr: Add PEF based securable guest memory

Some upcoming POWER machines have a system called PEF (Protected
Execution Facility) which uses a small ultravisor to allow guests to
run in a way that they can't be eavesdropped by the hypervisor.  The
effect is roughly similar to AMD SEV, although the mechanisms are
quite different.

Most of the work of this is done between the guest, KVM and the
ultravisor, with little need for involvement by qemu.  However qemu
does need to tell KVM to allow secure VMs.

Because the availability of secure mode is a guest visible difference
which depends on having the right hardware and firmware, we don't
enable this by default.  In order to run a secure guest you need to
create a "pef-guest" object and set the securable-guest-memory machine
property to point to it.

Note that this just *allows* secure guests, the architecture of PEF is
such that the guest still needs to talk to the ultravisor to enter
secure mode.  Qemu has no directl way of knowing if the guest is in
secure mode, and certainly can't know until well after machine
creation time.

To start a PEF-capable guest, use the command line options:
-object pef-guest,id=pef0 -machine securable-guest-memory=pef0

Signed-off-by: David Gibson 
Acked-by: Ram Pai 
---
 hw/ppc/meson.build   |   1 +
 hw/ppc/pef.c | 115 +++
 hw/ppc/spapr.c   |  10 
 include/hw/ppc/pef.h |  26 ++
 target/ppc/kvm.c |  18 ---
 target/ppc/kvm_ppc.h |   6 ---
 6 files changed, 152 insertions(+), 24 deletions(-)
 create mode 100644 hw/ppc/pef.c
 create mode 100644 include/hw/ppc/pef.h

diff --git a/hw/ppc/meson.build b/hw/ppc/meson.build
index ffa2ec37fa..218631c883 100644
--- a/hw/ppc/meson.build
+++ b/hw/ppc/meson.build
@@ -27,6 +27,7 @@ ppc_ss.add(when: 'CONFIG_PSERIES', if_true: files(
   'spapr_nvdimm.c',
   'spapr_rtas_ddw.c',
   'spapr_numa.c',
+  'pef.c',
 ))
 ppc_ss.add(when: 'CONFIG_SPAPR_RNG', if_true: files('spapr_rng.c'))
 ppc_ss.add(when: ['CONFIG_PSERIES', 'CONFIG_LINUX'], if_true: files(
diff --git a/hw/ppc/pef.c b/hw/ppc/pef.c
new file mode 100644
index 00..3ae3059cfe
--- /dev/null
+++ b/hw/ppc/pef.c
@@ -0,0 +1,115 @@
+/*
+ * PEF (Protected Execution Facility) for POWER support
+ *
+ * Copyright David Gibson, Redhat Inc. 2020
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ *
+ */
+
+#include "qemu/osdep.h"
+
+#include "qapi/error.h"
+#include "qom/object_interfaces.h"
+#include "sysemu/kvm.h"
+#include "migration/blocker.h"
+#include "exec/securable-guest-memory.h"
+#include "hw/ppc/pef.h"
+
+#define TYPE_PEF_GUEST "pef-guest"
+#define PEF_GUEST(obj)  \
+OBJECT_CHECK(PefGuestState, (obj), TYPE_PEF_GUEST)
+
+typedef struct PefGuestState PefGuestState;
+
+/**
+ * PefGuestState:
+ *
+ * The PefGuestState object is used for creating and managing a PEF
+ * guest.
+ *
+ * # $QEMU \
+ * -object pef-guest,id=pef0 \
+ * -machine ...,securable-guest-memory=pef0
+ */
+struct PefGuestState {
+Object parent_obj;
+};
+
+#ifdef CONFIG_KVM
+static int kvmppc_svm_init(Error **errp)
+{
+if (!kvm_check_extension(kvm_state, KVM_CAP_PPC_SECURABLE_GUEST)) {
+error_setg(errp,
+   "KVM implementation does not support Secure VMs (is an 
ultravisor running?)");
+return -1;
+} else {
+int ret = kvm_vm_enable_cap(kvm_state, KVM_CAP_PPC_SECURE_GUEST, 0, 1);
+
+if (ret < 0) {
+error_setg(errp,
+   "Error enabling PEF with KVM");
+return -1;
+}
+}
+
+return 0;
+}
+
+/*
+ * Don't set error if KVM_PPC_SVM_OFF ioctl is invoked on kernels
+ * that don't support this ioctl.
+ */
+void kvmppc_svm_off(Error **errp)
+{
+int rc;
+
+if (!kvm_enabled()) {
+return;
+}
+
+rc = kvm_vm_ioctl(KVM_STATE(current_accel()), KVM_PPC_SVM_OFF);
+if (rc && rc != -ENOTTY) {
+error_setg_errno(errp, -rc, "KVM_PPC_SVM_OFF ioctl failed");
+}
+}
+#else
+static int kvmppc_svm_init(Error **errp)
+{
+g_assert_not_reached();
+}
+#endif
+
+int pef_kvm_init(SecurableGuestMemory *sgm, Error **errp)
+{
+if (!object_dynamic_cast(OBJECT(sgm), TYPE_PEF_GUEST)) {
+return 0;
+}
+
+if (!kvm_enabled()) {
+error_setg(errp, "PEF requires KVM");
+return -1;
+}
+
+return kvmppc_svm_init(errp);
+}
+
+static const TypeInfo pef_guest_info = {
+.parent = TYPE_OBJECT,
+.name = TYPE_PEF_GUEST,
+.instance_size = sizeof(PefGuestState),
+.interfaces = (InterfaceInfo[]) {
+{ TYPE_SECURABLE_GUEST_MEMORY },
+{ TYPE_USER_CREATABLE },
+{ }
+}
+};
+
+static void
+pef_register_types(void)
+{
+type_register_static(_guest_info);
+}
+
+type_init(pef_register_types);
diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c
index 12a012d9dd..d95b60f712 100644
--- a/hw/ppc/spapr.c
+++ b/hw/ppc/spapr.c
@@ -82,6 +82,7

[for-6.0 v5 06/13] securable guest memory: Decouple kvm_memcrypt_*() helpers from KVM

The kvm_memcrypt_enabled() and kvm_memcrypt_encrypt_data() helper functions
don't conceptually have any connection to KVM (although it's not possible
in practice to use them without it).

They also rely on looking at the global KVMState.  But the same information
is available from the machine, and the only existing callers have natural
access to the machine state.

Therefore, move and rename them to helpers in securable-guest-memory.h,
taking an explicit machine parameter.

Signed-off-by: David Gibson 
Reviewed-by: Richard Henderson 
---
 accel/kvm/kvm-all.c   | 27 
 accel/stubs/kvm-stub.c| 10 
 hw/i386/pc_sysfw.c|  6 +++--
 include/exec/securable-guest-memory.h | 36 +++
 include/sysemu/kvm.h  | 17 -
 5 files changed, 40 insertions(+), 56 deletions(-)

diff --git a/accel/kvm/kvm-all.c b/accel/kvm/kvm-all.c
index 92a49b328a..c6bd7b9d02 100644
--- a/accel/kvm/kvm-all.c
+++ b/accel/kvm/kvm-all.c
@@ -121,9 +121,6 @@ struct KVMState
 KVMMemoryListener memory_listener;
 QLIST_HEAD(, KVMParkedVcpu) kvm_parked_vcpus;
 
-/* securable guest memory (e.g. by guest memory encryption) */
-SecurableGuestMemory *sgm;
-
 /* For "info mtree -f" to tell if an MR is registered in KVM */
 int nr_as;
 struct KVMAs {
@@ -222,28 +219,6 @@ int kvm_get_max_memslots(void)
 return s->nr_slots;
 }
 
-bool kvm_memcrypt_enabled(void)
-{
-if (kvm_state && kvm_state->sgm) {
-return true;
-}
-
-return false;
-}
-
-int kvm_memcrypt_encrypt_data(uint8_t *ptr, uint64_t len)
-{
-SecurableGuestMemory *sgm = kvm_state->sgm;
-
-if (sgm) {
-SecurableGuestMemoryClass *sgmc = 
SECURABLE_GUEST_MEMORY_GET_CLASS(sgm);
-
-return sgmc->encrypt_data(sgm, ptr, len);
-}
-
-return 1;
-}
-
 /* Called with KVMMemoryListener.slots_lock held */
 static KVMSlot *kvm_get_free_slot(KVMMemoryListener *kml)
 {
@@ -2213,8 +2188,6 @@ static int kvm_init(MachineState *ms)
 if (ret < 0) {
 goto err;
 }
-
-kvm_state->sgm = ms->sgm;
 }
 
 ret = kvm_arch_init(ms, s);
diff --git a/accel/stubs/kvm-stub.c b/accel/stubs/kvm-stub.c
index 680e099463..0f17acfac0 100644
--- a/accel/stubs/kvm-stub.c
+++ b/accel/stubs/kvm-stub.c
@@ -81,16 +81,6 @@ int kvm_on_sigbus(int code, void *addr)
 return 1;
 }
 
-bool kvm_memcrypt_enabled(void)
-{
-return false;
-}
-
-int kvm_memcrypt_encrypt_data(uint8_t *ptr, uint64_t len)
-{
-  return 1;
-}
-
 #ifndef CONFIG_USER_ONLY
 int kvm_irqchip_add_msi_route(KVMState *s, int vector, PCIDevice *dev)
 {
diff --git a/hw/i386/pc_sysfw.c b/hw/i386/pc_sysfw.c
index b6c0822fe3..439ac78970 100644
--- a/hw/i386/pc_sysfw.c
+++ b/hw/i386/pc_sysfw.c
@@ -38,6 +38,7 @@
 #include "sysemu/sysemu.h"
 #include "hw/block/flash.h"
 #include "sysemu/kvm.h"
+#include "exec/securable-guest-memory.h"
 
 /*
  * We don't have a theoretically justifiable exact lower bound on the base
@@ -201,10 +202,11 @@ static void pc_system_flash_map(PCMachineState *pcms,
 pc_isa_bios_init(rom_memory, flash_mem, size);
 
 /* Encrypt the pflash boot ROM */
-if (kvm_memcrypt_enabled()) {
+if (securable_guest_memory_enabled(MACHINE(pcms))) {
 flash_ptr = memory_region_get_ram_ptr(flash_mem);
 flash_size = memory_region_size(flash_mem);
-ret = kvm_memcrypt_encrypt_data(flash_ptr, flash_size);
+ret = securable_guest_memory_encrypt(MACHINE(pcms),
+ flash_ptr, flash_size);
 if (ret) {
 error_report("failed to encrypt pflash rom");
 exit(1);
diff --git a/include/exec/securable-guest-memory.h 
b/include/exec/securable-guest-memory.h
index 4e2ae27040..7325b504ba 100644
--- a/include/exec/securable-guest-memory.h
+++ b/include/exec/securable-guest-memory.h
@@ -21,6 +21,7 @@
 #ifndef CONFIG_USER_ONLY
 
 #include "qom/object.h"
+#include "hw/boards.h"
 
 #define TYPE_SECURABLE_GUEST_MEMORY "securable-guest-memory"
 #define SECURABLE_GUEST_MEMORY(obj)\
@@ -43,6 +44,41 @@ typedef struct SecurableGuestMemoryClass {
 int (*encrypt_data)(SecurableGuestMemory *, uint8_t *, uint64_t);
 } SecurableGuestMemoryClass;
 
+/**
+ * securable_guest_memory_enabled - return whether guest memory is protected
+ *   from hypervisor access (with memory
+ *   encryption or otherwise)
+ * Returns: true guest memory is not directly accessible to qemu
+ *  false guest memory is directly accessible to qemu
+ */
+static inline bool securable_guest_memory_enabled(MachineState *machine)
+{
+return !!machine->sgm;
+}
+
+/**
+ * securable_guest_memory_encrypt: encrypt the memory range to make
+ *  it guest accessible
+ *

[for-6.0 v5 12/13] securable guest memory: Alter virtio default properties for protected guests

The default behaviour for virtio devices is not to use the platforms normal
DMA paths, but instead to use the fact that it's running in a hypervisor
to directly access guest memory.  That doesn't work if the guest's memory
is protected from hypervisor access, such as with AMD's SEV or POWER's PEF.

So, if a securable guest memory mechanism is enabled, then apply the
iommu_platform=on option so it will go through normal DMA mechanisms.
Those will presumably have some way of marking memory as shared with
the hypervisor or hardware so that DMA will work.

Signed-off-by: David Gibson 
Reviewed-by: Dr. David Alan Gilbert 
---
 hw/core/machine.c | 13 +
 1 file changed, 13 insertions(+)

diff --git a/hw/core/machine.c b/hw/core/machine.c
index a67a27d03c..d16273d75d 100644
--- a/hw/core/machine.c
+++ b/hw/core/machine.c
@@ -28,6 +28,8 @@
 #include "hw/mem/nvdimm.h"
 #include "migration/vmstate.h"
 #include "exec/securable-guest-memory.h"
+#include "hw/virtio/virtio.h"
+#include "hw/virtio/virtio-pci.h"
 
 GlobalProperty hw_compat_5_1[] = {
 { "vhost-scsi", "num_queues", "1"},
@@ -1169,6 +1171,17 @@ void machine_run_board_init(MachineState *machine)
  * areas.
  */
 machine_set_mem_merge(OBJECT(machine), false, _abort);
+
+/*
+ * Virtio devices can't count on directly accessing guest
+ * memory, so they need iommu_platform=on to use normal DMA
+ * mechanisms.  That requires also disabling legacy virtio
+ * support for those virtio pci devices which allow it.
+ */
+object_register_sugar_prop(TYPE_VIRTIO_PCI, "disable-legacy",
+   "on", true);
+object_register_sugar_prop(TYPE_VIRTIO_DEVICE, "iommu_platform",
+   "on", false);
 }
 
 machine_class->init(machine);
-- 
2.28.0

[for-6.0 v5 08/13] securable guest memory: Introduce sgm "ready" flag

The platform specific details of mechanisms for implementing securable
guest memory may require setup at various points during initialization.
Thus, it's not really feasible to have a single sgm initialization hook,
but instead each mechanism needs its own initialization calls in arch or
machine specific code.

However, to make it harder to have a bug where a mechanism isn't properly
initialized under some circumstances, we want to have a common place,
relatively late in boot, where we verify that sgm has been initialized if
it was requested.

This patch introduces a ready flag to the SecurableGuestMemory base type
to accomplish this, which we verify just before the machine specific
initialization function.

Signed-off-by: David Gibson 
---
 hw/core/machine.c | 8 
 include/exec/securable-guest-memory.h | 2 ++
 target/i386/sev.c | 2 ++
 3 files changed, 12 insertions(+)

diff --git a/hw/core/machine.c b/hw/core/machine.c
index 816ea3ae3e..a67a27d03c 100644
--- a/hw/core/machine.c
+++ b/hw/core/machine.c
@@ -1155,6 +1155,14 @@ void machine_run_board_init(MachineState *machine)
 }
 
 if (machine->sgm) {
+/*
+ * Where securable guest memory is initialized depends on the
+ * specific mechanism in use.  But, we need to make sure it's
+ * ready by now.  If it isn't, that's a bug in the
+ * implementation of that sgm mechanism.
+ */
+assert(machine->sgm->ready);
+
 /*
  * With securable guest memory, the host can't see the real
  * contents of RAM, so there's no point in it trying to merge
diff --git a/include/exec/securable-guest-memory.h 
b/include/exec/securable-guest-memory.h
index 7325b504ba..20cf13777b 100644
--- a/include/exec/securable-guest-memory.h
+++ b/include/exec/securable-guest-memory.h
@@ -36,6 +36,8 @@
 
 struct SecurableGuestMemory {
 Object parent;
+
+bool ready;
 };
 
 typedef struct SecurableGuestMemoryClass {
diff --git a/target/i386/sev.c b/target/i386/sev.c
index 7333a60dc0..022ce5fc3a 100644
--- a/target/i386/sev.c
+++ b/target/i386/sev.c
@@ -701,6 +701,8 @@ int sev_kvm_init(SecurableGuestMemory *sgm, Error **errp)
 qemu_add_machine_init_done_notifier(_machine_done_notify);
 qemu_add_vm_change_state_handler(sev_vm_state_change, sev);
 
+sgm->ready = true;
+
 return 0;
 err:
 sev_guest = NULL;
-- 
2.28.0

[for-6.0 v5 01/13] qom: Allow optional sugar props

From: Greg Kurz 

Global properties have an @optional field, which allows to apply a given
property to a given type even if one of its subclasses doesn't support
it. This is especially used in the compat code when dealing with the
"disable-modern" and "disable-legacy" properties and the "virtio-pci"
type.

Allow object_register_sugar_prop() to set this field as well.

Signed-off-by: Greg Kurz 
Message-Id: <159738953558.377274.16617742952571083440.st...@bahia.lan>
Signed-off-by: David Gibson 
---
 include/qom/object.h |  3 ++-
 qom/object.c |  4 +++-
 softmmu/vl.c | 16 ++--
 3 files changed, 15 insertions(+), 8 deletions(-)

diff --git a/include/qom/object.h b/include/qom/object.h
index d378f13a11..6721cd312e 100644
--- a/include/qom/object.h
+++ b/include/qom/object.h
@@ -638,7 +638,8 @@ bool object_apply_global_props(Object *obj, const GPtrArray 
*props,
Error **errp);
 void object_set_machine_compat_props(GPtrArray *compat_props);
 void object_set_accelerator_compat_props(GPtrArray *compat_props);
-void object_register_sugar_prop(const char *driver, const char *prop, const 
char *value);
+void object_register_sugar_prop(const char *driver, const char *prop,
+const char *value, bool optional);
 void object_apply_compat_props(Object *obj);
 
 /**
diff --git a/qom/object.c b/qom/object.c
index 1065355233..62218bb17d 100644
--- a/qom/object.c
+++ b/qom/object.c
@@ -442,7 +442,8 @@ static GPtrArray *object_compat_props[3];
  * other than "-global".  These are generally used for syntactic
  * sugar and legacy command line options.
  */
-void object_register_sugar_prop(const char *driver, const char *prop, const 
char *value)
+void object_register_sugar_prop(const char *driver, const char *prop,
+const char *value, bool optional)
 {
 GlobalProperty *g;
 if (!object_compat_props[2]) {
@@ -452,6 +453,7 @@ void object_register_sugar_prop(const char *driver, const 
char *prop, const char
 g->driver = g_strdup(driver);
 g->property = g_strdup(prop);
 g->value = g_strdup(value);
+g->optional = optional;
 g_ptr_array_add(object_compat_props[2], g);
 }
 
diff --git a/softmmu/vl.c b/softmmu/vl.c
index e6e0ad5a92..cf4a9dc198 100644
--- a/softmmu/vl.c
+++ b/softmmu/vl.c
@@ -884,7 +884,7 @@ static void configure_rtc(QemuOpts *opts)
 if (!strcmp(value, "slew")) {
 object_register_sugar_prop("mc146818rtc",
"lost_tick_policy",
-   "slew");
+   "slew", false);
 } else if (!strcmp(value, "none")) {
 /* discard is default */
 } else {
@@ -2498,12 +2498,14 @@ static int machine_set_property(void *opaque,
 return 0;
 }
 if (g_str_equal(qom_name, "igd-passthru")) {
-object_register_sugar_prop(ACCEL_CLASS_NAME("xen"), qom_name, value);
+object_register_sugar_prop(ACCEL_CLASS_NAME("xen"), qom_name, value,
+   false);
 return 0;
 }
 if (g_str_equal(qom_name, "kvm-shadow-mem") ||
 g_str_equal(qom_name, "kernel-irqchip")) {
-object_register_sugar_prop(ACCEL_CLASS_NAME("kvm"), qom_name, value);
+object_register_sugar_prop(ACCEL_CLASS_NAME("kvm"), qom_name, value,
+   false);
 return 0;
 }
 
@@ -3645,7 +3647,8 @@ void qemu_init(int argc, char **argv, char **envp)
 exit(1);
 #endif
 warn_report("The -tb-size option is deprecated, use -accel 
tcg,tb-size instead");
-object_register_sugar_prop(ACCEL_CLASS_NAME("tcg"), "tb-size", 
optarg);
+object_register_sugar_prop(ACCEL_CLASS_NAME("tcg"), "tb-size",
+   optarg, false);
 break;
 case QEMU_OPTION_icount:
 icount_opts = qemu_opts_parse_noisily(qemu_find_opts("icount"),
@@ -3996,9 +3999,10 @@ void qemu_init(int argc, char **argv, char **envp)
 char *val;
 
 val = g_strdup_printf("%d", current_machine->smp.cpus);
-object_register_sugar_prop("memory-backend", "prealloc-threads", val);
+object_register_sugar_prop("memory-backend", "prealloc-threads", val,
+   false);
 g_free(val);
-object_register_sugar_prop("memory-backend", "prealloc", "on");
+object_register_sugar_prop("memory-backend", "prealloc", "on", false);
 }
 
 /*
-- 
2.28.0

[for-6.0 v5 11/13] spapr: PEF: prevent migration

We haven't yet implemented the fairly involved handshaking that will be
needed to migrate PEF protected guests.  For now, just use a migration
blocker so we get a meaningful error if someone attempts this (this is the
same approach used by AMD SEV).

Signed-off-by: David Gibson 
Reviewed-by: Dr. David Alan Gilbert 
---
 hw/ppc/pef.c | 9 +
 1 file changed, 9 insertions(+)

diff --git a/hw/ppc/pef.c b/hw/ppc/pef.c
index 3ae3059cfe..edc3e744ba 100644
--- a/hw/ppc/pef.c
+++ b/hw/ppc/pef.c
@@ -38,7 +38,11 @@ struct PefGuestState {
 };
 
 #ifdef CONFIG_KVM
+static Error *pef_mig_blocker;
+
 static int kvmppc_svm_init(Error **errp)
+
+int kvmppc_svm_init(SecurableGuestMemory *sgm, Error **errp)
 {
 if (!kvm_check_extension(kvm_state, KVM_CAP_PPC_SECURABLE_GUEST)) {
 error_setg(errp,
@@ -54,6 +58,11 @@ static int kvmppc_svm_init(Error **errp)
 }
 }
 
+/* add migration blocker */
+error_setg(_mig_blocker, "PEF: Migration is not implemented");
+/* NB: This can fail if --only-migratable is used */
+migrate_add_blocker(pef_mig_blocker, _fatal);
+
 return 0;
 }
 
-- 
2.28.0

[for-6.0 v5 03/13] securable guest memory: Handle memory encryption via interface

At the moment AMD SEV sets a special function pointer, plus an opaque
handle in KVMState to let things know how to encrypt guest memory.

Now that we have a QOM interface for handling things related to securable
guest memory, use a QOM method on that interface, rather than a bare
function pointer for this.

Signed-off-by: David Gibson 
Reviewed-by: Richard Henderson 
---
 accel/kvm/kvm-all.c   |  36 +---
 accel/kvm/sev-stub.c  |   9 +-
 include/exec/securable-guest-memory.h |   2 +
 include/sysemu/sev.h  |   5 +-
 target/i386/monitor.c |   1 -
 target/i386/sev.c | 116 ++
 6 files changed, 77 insertions(+), 92 deletions(-)

diff --git a/accel/kvm/kvm-all.c b/accel/kvm/kvm-all.c
index baaa54249d..9e7cea64d6 100644
--- a/accel/kvm/kvm-all.c
+++ b/accel/kvm/kvm-all.c
@@ -47,6 +47,7 @@
 #include "qemu/guest-random.h"
 #include "sysemu/hw_accel.h"
 #include "kvm-cpus.h"
+#include "exec/securable-guest-memory.h"
 
 #include "hw/boards.h"
 
@@ -120,9 +121,8 @@ struct KVMState
 KVMMemoryListener memory_listener;
 QLIST_HEAD(, KVMParkedVcpu) kvm_parked_vcpus;
 
-/* memory encryption */
-void *memcrypt_handle;
-int (*memcrypt_encrypt_data)(void *handle, uint8_t *ptr, uint64_t len);
+/* securable guest memory (e.g. by guest memory encryption) */
+SecurableGuestMemory *sgm;
 
 /* For "info mtree -f" to tell if an MR is registered in KVM */
 int nr_as;
@@ -224,7 +224,7 @@ int kvm_get_max_memslots(void)
 
 bool kvm_memcrypt_enabled(void)
 {
-if (kvm_state && kvm_state->memcrypt_handle) {
+if (kvm_state && kvm_state->sgm) {
 return true;
 }
 
@@ -233,10 +233,12 @@ bool kvm_memcrypt_enabled(void)
 
 int kvm_memcrypt_encrypt_data(uint8_t *ptr, uint64_t len)
 {
-if (kvm_state->memcrypt_handle &&
-kvm_state->memcrypt_encrypt_data) {
-return kvm_state->memcrypt_encrypt_data(kvm_state->memcrypt_handle,
-  ptr, len);
+SecurableGuestMemory *sgm = kvm_state->sgm;
+
+if (sgm) {
+SecurableGuestMemoryClass *sgmc = 
SECURABLE_GUEST_MEMORY_GET_CLASS(sgm);
+
+return sgmc->encrypt_data(sgm, ptr, len);
 }
 
 return 1;
@@ -2206,13 +2208,23 @@ static int kvm_init(MachineState *ms)
  * encryption context.
  */
 if (ms->memory_encryption) {
-kvm_state->memcrypt_handle = sev_guest_init(ms->memory_encryption);
-if (!kvm_state->memcrypt_handle) {
+Object *obj = object_resolve_path_component(object_get_objects_root(),
+ms->memory_encryption);
+
+if (object_dynamic_cast(obj, TYPE_SECURABLE_GUEST_MEMORY)) {
+SecurableGuestMemory *sgm = SECURABLE_GUEST_MEMORY(obj);
+
+/* FIXME handle mechanisms other than SEV */
+ret = sev_kvm_init(sgm);
+if (ret < 0) {
+goto err;
+}
+
+kvm_state->sgm = sgm;
+} else {
 ret = -1;
 goto err;
 }
-
-kvm_state->memcrypt_encrypt_data = sev_encrypt_data;
 }
 
 ret = kvm_arch_init(ms, s);
diff --git a/accel/kvm/sev-stub.c b/accel/kvm/sev-stub.c
index 4f97452585..3df3c88eeb 100644
--- a/accel/kvm/sev-stub.c
+++ b/accel/kvm/sev-stub.c
@@ -15,12 +15,7 @@
 #include "qemu-common.h"
 #include "sysemu/sev.h"
 
-int sev_encrypt_data(void *handle, uint8_t *ptr, uint64_t len)
+int sev_kvm_init(SecurableGuestMemory *sgm)
 {
-abort();
-}
-
-void *sev_guest_init(const char *id)
-{
-return NULL;
+return -1;
 }
diff --git a/include/exec/securable-guest-memory.h 
b/include/exec/securable-guest-memory.h
index 0d5ecfb681..4e2ae27040 100644
--- a/include/exec/securable-guest-memory.h
+++ b/include/exec/securable-guest-memory.h
@@ -39,6 +39,8 @@ struct SecurableGuestMemory {
 
 typedef struct SecurableGuestMemoryClass {
 ObjectClass parent;
+
+int (*encrypt_data)(SecurableGuestMemory *, uint8_t *, uint64_t);
 } SecurableGuestMemoryClass;
 
 #endif /* !CONFIG_USER_ONLY */
diff --git a/include/sysemu/sev.h b/include/sysemu/sev.h
index 98c1ec8d38..36d038a36f 100644
--- a/include/sysemu/sev.h
+++ b/include/sysemu/sev.h
@@ -15,7 +15,8 @@
 #define QEMU_SEV_H
 
 #include "sysemu/kvm.h"
+#include "exec/securable-guest-memory.h"
+
+int sev_kvm_init(SecurableGuestMemory *sgm);
 
-void *sev_guest_init(const char *id);
-int sev_encrypt_data(void *handle, uint8_t *ptr, uint64_t len);
 #endif
diff --git a/target/i386/monitor.c b/target/i386/monitor.c
index 9f9e1c42f4..db6aeaf43a 100644
--- a/target/i386/monitor.c
+++ b/target/i386/monitor.c
@@ -29,7 +29,6 @@
 #include "monitor/hmp.h"
 #include "qapi/qmp/qdict.h"
 #include "sysemu/kvm.h"
-#include "sysemu/sev.h"
 #include "qapi/error.h"
 #include "sev_i386.h"
 #include "qapi/qapi-commands-misc-target.h"
diff --git a/target/i386/sev.c b/target/i386/sev.c
index 53f00a24cf..7b8ce590f7 100644
---

[for-6.0 v5 04/13] securable guest memory: Move side effect out of machine_set_memory_encryption()

When the "memory-encryption" property is set, we also disable KSM
merging for the guest, since it won't accomplish anything.

We want that, but doing it in the property set function itself is
thereoretically incorrect, in the unlikely event of some configuration
environment that set the property then cleared it again before
constructing the guest.

More importantly, it makes some other cleanups we want more difficult.
So, instead move this logic to machine_run_board_init() conditional on
the final value of the property.

Signed-off-by: David Gibson 
Reviewed-by: Richard Henderson 
---
 hw/core/machine.c | 17 +
 1 file changed, 9 insertions(+), 8 deletions(-)

diff --git a/hw/core/machine.c b/hw/core/machine.c
index d0408049b5..cb0711508d 100644
--- a/hw/core/machine.c
+++ b/hw/core/machine.c
@@ -427,14 +427,6 @@ static void machine_set_memory_encryption(Object *obj, 
const char *value,
 
 g_free(ms->memory_encryption);
 ms->memory_encryption = g_strdup(value);
-
-/*
- * With memory encryption, the host can't see the real contents of RAM,
- * so there's no point in it trying to merge areas.
- */
-if (value) {
-machine_set_mem_merge(obj, false, errp);
-}
 }
 
 static bool machine_get_nvdimm(Object *obj, Error **errp)
@@ -1131,6 +1123,15 @@ void machine_run_board_init(MachineState *machine)
 cc->deprecation_note);
 }
 
+if (machine->memory_encryption) {
+/*
+ * With memory encryption, the host can't see the real
+ * contents of RAM, so there's no point in it trying to merge
+ * areas.
+ */
+machine_set_mem_merge(OBJECT(machine), false, _abort);
+}
+
 machine_class->init(machine);
 }
 
-- 
2.28.0

[for-6.0 v5 02/13] securable guest memory: Introduce new securable guest memory base class

Several architectures have mechanisms which are designed to protect guest
memory from interference or eavesdropping by a compromised hypervisor.  AMD
SEV does this with in-chip memory encryption and Intel's MKTME can do
similar things.  POWER's Protected Execution Framework (PEF) accomplishes a
similar goal using an ultravisor and new memory protection features,
instead of encryption.

To (partially) unify handling for these, this introduces a new
SecurableGuestMemoryState QOM base class.  "Securable" is kind of vague,
but "secure memory" or "secure guest" seems to be a common theme in the
lexicon around these schemes, so it's the best name I've managed to find
so far.  It's "securable" rather than "secure", because in at least some of
the cases it requires the guest to take specific actions in order to
protect itself from hypervisor eavesdropping.

Signed-off-by: David Gibson 
---
 backends/meson.build  |  1 +
 backends/securable-guest-memory.c | 30 +
 include/exec/securable-guest-memory.h | 46 +++
 include/qemu/typedefs.h   |  1 +
 target/i386/sev.c |  3 +-
 5 files changed, 80 insertions(+), 1 deletion(-)
 create mode 100644 backends/securable-guest-memory.c
 create mode 100644 include/exec/securable-guest-memory.h

diff --git a/backends/meson.build b/backends/meson.build
index 484456ece7..781594af86 100644
--- a/backends/meson.build
+++ b/backends/meson.build
@@ -6,6 +6,7 @@ softmmu_ss.add([files(
   'rng-builtin.c',
   'rng-egd.c',
   'rng.c',
+  'securable-guest-memory.c',
 ), numa])
 
 softmmu_ss.add(when: 'CONFIG_POSIX', if_true: files('rng-random.c'))
diff --git a/backends/securable-guest-memory.c 
b/backends/securable-guest-memory.c
new file mode 100644
index 00..5bf380fd84
--- /dev/null
+++ b/backends/securable-guest-memory.c
@@ -0,0 +1,30 @@
+/*
+ * QEMU Securable Guest Memory interface
+ *
+ * Copyright: David Gibson, Red Hat Inc. 2020
+ *
+ * Authors:
+ *  David Gibson 
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or
+ * later.  See the COPYING file in the top-level directory.
+ *
+ */
+
+#include "qemu/osdep.h"
+
+#include "exec/securable-guest-memory.h"
+
+static const TypeInfo securable_guest_memory_info = {
+.parent = TYPE_OBJECT,
+.name = TYPE_SECURABLE_GUEST_MEMORY,
+.class_size = sizeof(SecurableGuestMemoryClass),
+.instance_size = sizeof(SecurableGuestMemory),
+};
+
+static void securable_guest_memory_register_types(void)
+{
+type_register_static(_guest_memory_info);
+}
+
+type_init(securable_guest_memory_register_types)
diff --git a/include/exec/securable-guest-memory.h 
b/include/exec/securable-guest-memory.h
new file mode 100644
index 00..0d5ecfb681
--- /dev/null
+++ b/include/exec/securable-guest-memory.h
@@ -0,0 +1,46 @@
+/*
+ * QEMU Securable Guest Memory interface
+ *   This interface describes the common pieces between various
+ *   schemes for protecting guest memory against a compromised
+ *   hypervisor.  This includes memory encryption (AMD's SEV and
+ *   Intel's MKTME) or special protection modes (PEF on POWER, or PV
+ *   on s390x).
+ *
+ * Copyright: David Gibson, Red Hat Inc. 2020
+ *
+ * Authors:
+ *  David Gibson 
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or
+ * later.  See the COPYING file in the top-level directory.
+ *
+ */
+#ifndef QEMU_SECURABLE_GUEST_MEMORY_H
+#define QEMU_SECURABLE_GUEST_MEMORY_H
+
+#ifndef CONFIG_USER_ONLY
+
+#include "qom/object.h"
+
+#define TYPE_SECURABLE_GUEST_MEMORY "securable-guest-memory"
+#define SECURABLE_GUEST_MEMORY(obj)\
+OBJECT_CHECK(SecurableGuestMemory, (obj),  \
+ TYPE_SECURABLE_GUEST_MEMORY)
+#define SECURABLE_GUEST_MEMORY_CLASS(klass)\
+OBJECT_CLASS_CHECK(SecurableGuestMemoryClass, (klass), \
+   TYPE_SECURABLE_GUEST_MEMORY)
+#define SECURABLE_GUEST_MEMORY_GET_CLASS(obj)  \
+OBJECT_GET_CLASS(SecurableGuestMemoryClass, (obj), \
+ TYPE_SECURABLE_GUEST_MEMORY)
+
+struct SecurableGuestMemory {
+Object parent;
+};
+
+typedef struct SecurableGuestMemoryClass {
+ObjectClass parent;
+} SecurableGuestMemoryClass;
+
+#endif /* !CONFIG_USER_ONLY */
+
+#endif /* QEMU_SECURABLE_GUEST_MEMORY_H */
diff --git a/include/qemu/typedefs.h b/include/qemu/typedefs.h
index 6281eae3b5..79d53746f1 100644
--- a/include/qemu/typedefs.h
+++ b/include/qemu/typedefs.h
@@ -116,6 +116,7 @@ typedef struct QString QString;
 typedef struct RAMBlock RAMBlock;
 typedef struct Range Range;
 typedef struct SavedIOTLB SavedIOTLB;
+typedef struct SecurableGuestMemory SecurableGuestMemory;
 typedef struct SHPCDevice SHPCDevice;
 typedef struct SSIBus SSIBus;
 typedef struct VirtIODevice VirtIODevice;
diff --git a/target/i386/sev.c b/target/i386/sev.c
index

[for-6.0 v5 00/13] Generalize memory encryption models

A number of hardware platforms are implementing mechanisms whereby the
hypervisor does not have unfettered access to guest memory, in order
to mitigate the security impact of a compromised hypervisor.

AMD's SEV implements this with in-cpu memory encryption, and Intel has
its own memory encryption mechanism.  POWER has an upcoming mechanism
to accomplish this in a different way, using a new memory protection
level plus a small trusted ultravisor.  s390 also has a protected
execution environment.

The current code (committed or draft) for these features has each
platform's version configured entirely differently.  That doesn't seem
ideal for users, or particularly for management layers.

AMD SEV introduces a notionally generic machine option
"machine-encryption", but it doesn't actually cover any cases other
than SEV.

This series is a proposal to at least partially unify configuration
for these mechanisms, by renaming and generalizing AMD's
"memory-encryption" property.  It is replaced by a
"securable-guest-memory" property pointing to a platform specific
object which configures and manages the specific details.

Changes since v4:
 * Renamed from "host trust limitation" to "securable guest memory",
   which I think is marginally more descriptive
 * Re-organized initialization, because the previous model called at
   kvm_init didn't work for s390
* Assorted fixes to the s390 implementation; rudimentary testing
  (gitlab CI) only
Changes since v3:
 * Rebased
 * Added first cut at handling of s390 protected virtualization
Changes since RFCv2:
 * Rebased
 * Removed preliminary SEV cleanups (they've been merged)
 * Changed name to "host trust limitation"
 * Added migration blocker to the PEF code (based on SEV's version)
Changes since RFCv1:
 * Rebased
 * Fixed some errors pointed out by Dave Gilbert

David Gibson (12):
  securable guest memory: Introduce new securable guest memory base
class
  securable guest memory: Handle memory encryption via interface
  securable guest memory: Move side effect out of
machine_set_memory_encryption()
  securable guest memory: Rework the "memory-encryption" property
  securable guest memory: Decouple kvm_memcrypt_*() helpers from KVM
  sev: Add Error ** to sev_kvm_init()
  securable guest memory: Introduce sgm "ready" flag
  securable guest memory: Move SEV initialization into arch specific
code
  spapr: Add PEF based securable guest memory
  spapr: PEF: prevent migration
  securable guest memory: Alter virtio default properties for protected
guests
  s390: Recognize securable-guest-memory option

Greg Kurz (1):
  qom: Allow optional sugar props

 accel/kvm/kvm-all.c   |  39 +--
 accel/kvm/sev-stub.c  |  10 +-
 accel/stubs/kvm-stub.c|  10 --
 backends/meson.build  |   1 +
 backends/securable-guest-memory.c |  30 +
 hw/core/machine.c |  71 ++--
 hw/i386/pc_sysfw.c|   6 +-
 hw/ppc/meson.build|   1 +
 hw/ppc/pef.c  | 124 +
 hw/ppc/spapr.c|  10 ++
 hw/s390x/pv.c |  58 ++
 include/exec/securable-guest-memory.h |  86 +++
 include/hw/boards.h   |   2 +-
 include/hw/ppc/pef.h  |  26 +
 include/hw/s390x/pv.h |   1 +
 include/qemu/typedefs.h   |   1 +
 include/qom/object.h  |   3 +-
 include/sysemu/kvm.h  |  17 ---
 include/sysemu/sev.h  |   5 +-
 qom/object.c  |   4 +-
 softmmu/vl.c  |  16 ++-
 target/i386/kvm.c |  12 ++
 target/i386/monitor.c |   1 -
 target/i386/sev.c | 153 --
 target/ppc/kvm.c  |  18 ---
 target/ppc/kvm_ppc.h  |   6 -
 target/s390x/kvm.c|   3 +
 27 files changed, 510 insertions(+), 204 deletions(-)
 create mode 100644 backends/securable-guest-memory.c
 create mode 100644 hw/ppc/pef.c
 create mode 100644 include/exec/securable-guest-memory.h
 create mode 100644 include/hw/ppc/pef.h

-- 
2.28.0

Re: [PATCH v2 2/4] hw/scsi/megasas: Assert cdb_len is valid in megasas_handle_scsi()

2020-12-03 Thread Li Qiang

Philippe Mathieu-Daudé  于2020年12月3日周四 下午8:38写道：
>
> On 12/3/20 1:02 PM, Li Qiang wrote:
> > Philippe Mathieu-Daudé  于2020年12月3日周四 下午7:37写道：
> >>
> >> Hi Li,
> >>
> >> On 12/3/20 12:21 PM, Li Qiang wrote:
> >>> Philippe Mathieu-Daudé  于2020年12月2日周三 上午3:13写道：
> 
>  cdb_len can not be zero... (or less than 6) here, else we have a
>  out-of-bound read first in scsi_cdb_length():
> 
>   71 int scsi_cdb_length(uint8_t *buf)
>   72 {
>   73 int cdb_len;
>   74
>   75 switch (buf[0] >> 5) {
> >>>
> >>> Hi Philippe,
> >>>
> >>> Here I not read the spec.
> >>
> >> Neither did I...
> >>
> >>> Just guest from your patch, this 'buf[0]>>5'
> >>> indicates/related with the cdb length, right?
> >>
> >> This is my understanding too.
> >>
> >>> So here(this patch) you  just want to ensure the 'buf[0]>>5' and the
> >>> 'cdb_len' is consistent.
> >>>
> >>> But I don't  think here is a 'out-of-bound' read issue.
> >>>
> >>>
> >>> The 'buf' is from here 'cdb'.
> >>> static int megasas_handle_scsi(MegasasState *s, MegasasCmd *cmd,
> >>>int frame_cmd)
> >>> {
> >>>
> >>> cdb = cmd->frame->pass.cdb;
> >>>
> >>> 'cmd->frame->pass.cdb' is an array in heap and  its data is mmaped
> >>> from the guest.
> >>>
> >>> The guest can put any data in 'pass.cdb' which 'buf[0]>>5' can be 0 or
> >>> less than 6.
> >>>
> >>> So every read of this 'pass.cdb'[0~15] is valid. Not an oob.
> >>
> >> OK maybe not OOB but infoleak?
> >
> > No. We refer 'infoleak' in qemu situation if the guest can get some
> > memory(not the guest itself, but the qemu's process memory) from the
> > qemu.
> >
> > However here the 'cdb' is actually mmaped from guest. It can be anything.
> > I think here it is just no use data.
>
> 'pass.cdb'[0~15] is allocated. If it gets filled with only
> 1 byte: 0x04, then scsi_cdb_length() returns buflen = 16
> while only 1 byte is filled, so callers will read 1 byte
> set and 15 random bytes.

Yes but no harm.

>
> You are saying this is not an "INFOleak" because the
> leaked memory is allocated on the heap, so nothing critical /
> useful gets stored there?

Yes, 'cmd->frame' is totally mapped from guest in here:

'cmd->frame = pci_dma_map(pcid, frame, _size_p, 0);'

What's the data in 'cdb' is not important from security perspective.




>
> While this might not be a security problem, this still produces
> unpredictable code behavior, so deserve to be fixed.

Yes I agree this. If we follow the exact hardware spec we need to
check how hardware handle this issue.
However as there is no harmful occurs, I think it's enough to focus
the origin issue--"g_mamloc overflow because scsi_cdb_length return
-1"


Thanks,
Li Qiang

>
> >>
>   76 case 0:
>   77 cdb_len = 6;
>   78 break;
> 
>  Then another out-of-bound read when the size returned by
>  scsi_cdb_length() is used.
> >>>
> >>> Where is this?
> >>
> >> IIRC scsi_req_parse_cdb().
> >>
> >>>
> >>> So I think your intention is to ensure  'cdb_len' is consistent with
> >>> 'cdb[0]>>5'.
> >>>
> >>> Please correct me if I'm wrong.
> >>
> >> I'll recheck and go back to you during January.
> >>
> >> Regards,
> >>
> >> Phil.
> >>
> 
>  Figured out after reviewing:
>  https://www.mail-archive.com/qemu-devel@nongnu.org/msg757937.html
> 
>  And reproduced fuzzing:
> 
>    qemu-fuzz-i386: hw/scsi/megasas.c:1679: int 
>  megasas_handle_scsi(MegasasState *, MegasasCmd *, int):
>    Assertion `len > 0 && cdb_len >= len' failed.
>    ==1689590== ERROR: libFuzzer: deadly signal
>    #8 0x7f7a5d918e75 in __assert_fail (/lib64/libc.so.6+0x34e75)
>    #9 0x55a1b95cf6d4 in megasas_handle_scsi hw/scsi/megasas.c:1679:5
>    #10 0x55a1b95cf6d4 in megasas_handle_frame 
>  hw/scsi/megasas.c:1975:24
>    #11 0x55a1b95cf6d4 in megasas_mmio_write hw/scsi/megasas.c:2132:9
>    #12 0x55a1b981972e in memory_region_write_accessor 
>  softmmu/memory.c:491:5
>    #13 0x55a1b981972e in access_with_adjusted_size 
>  softmmu/memory.c:552:18
>    #14 0x55a1b981972e in memory_region_dispatch_write 
>  softmmu/memory.c:1501:16
>    #15 0x55a1b97f0ab0 in flatview_write_continue 
>  softmmu/physmem.c:2759:23
>    #16 0x55a1b97ec3f2 in flatview_write softmmu/physmem.c:2799:14
>    #17 0x55a1b97ec3f2 in address_space_write softmmu/physmem.c:2891:18
>    #18 0x55a1b985c7cd in cpu_outw softmmu/ioport.c:70:5
>    #19 0x55a1b99577ac in qtest_process_command softmmu/qtest.c:481:13
> 
>  Inspired-by: Daniele Buono 
>  Inspired-by: Alexander Bulekov 
>  Signed-off-by: Philippe Mathieu-Daudé 
>  ---
>   hw/scsi/megasas.c | 5 +
>   1 file changed, 5 insertions(+)
> 
>  diff --git a/hw/scsi/megasas.c b/hw/scsi/megasas.c
>  index 1a5fc5857db..f5ad4425b5b 100644
>  --- a/hw/scsi/megasas.c
>  +++

[PATCH] pcie_aer: Fix help message of pcie_aer_inject_error command

2020-12-03 Thread Zenghui Yu

There is an interesting typo in the help message of pcie_aer_inject_error
command. Use 'tlp' instead of 'tlb' to match the PCIe AER term.

Signed-off-by: Zenghui Yu 
---
 hmp-commands.hx | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/hmp-commands.hx b/hmp-commands.hx
index ff2d7aa8f3..dd460eb908 100644
--- a/hmp-commands.hx
+++ b/hmp-commands.hx
@@ -1302,8 +1302,8 @@ ERST
  " -c for correctable error\n\t\t\t"
   " = qdev device id\n\t\t\t"
   " = error string or 32bit\n\t\t\t"
-  " = 32bit x 4\n\t\t\t"
-  " = 32bit x 4",
+  " = 32bit x 4\n\t\t\t"
+  " = 32bit x 4",
 .cmd= hmp_pcie_aer_inject_error,
 },
 
-- 
2.19.1

[PATCH] vfio: Fix vfio_listener_log_sync function name typo

2020-12-03 Thread Zenghui Yu

There is an obvious typo in the function name of the .log_sync() callback.
Spell it correctly.

Signed-off-by: Zenghui Yu 
---
 hw/vfio/common.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/hw/vfio/common.c b/hw/vfio/common.c
index 6ff1daa763..d360d6f2da 100644
--- a/hw/vfio/common.c
+++ b/hw/vfio/common.c
@@ -1118,7 +1118,7 @@ static int vfio_sync_dirty_bitmap(VFIOContainer 
*container,
int128_get64(section->size), ram_addr);
 }
 
-static void vfio_listerner_log_sync(MemoryListener *listener,
+static void vfio_listener_log_sync(MemoryListener *listener,
 MemoryRegionSection *section)
 {
 VFIOContainer *container = container_of(listener, VFIOContainer, listener);
@@ -1136,7 +1136,7 @@ static void vfio_listerner_log_sync(MemoryListener 
*listener,
 static const MemoryListener vfio_memory_listener = {
 .region_add = vfio_listener_region_add,
 .region_del = vfio_listener_region_del,
-.log_sync = vfio_listerner_log_sync,
+.log_sync = vfio_listener_log_sync,
 };
 
 static void vfio_listener_release(VFIOContainer *container)
-- 
2.19.1

ANNOUNCEMENT: New co-maintainer and tree move for qemu ppc target

Hi folks,

Here are a couple of procedural announcements about the qemu ppc
target.

First, I'm pleased to introduce Greg Kurz as co-maintainer, he's been
a contributor and common reviewed for the ppc target code in qemu for
some time.  With him as co-maintainer we should have a bit more
redundancy to handle things like urgent bugs while I'm away.  A patch
adding him to the MAINTAINERS file is in my ppc-for-6.0 tree, so will
be merged into mainline soon after qemu-5.2 releases.

Second, I'm moving the main location of my ppc-for-XX branch from
github[0] to gitlab[1].  This is mostly for my convenience, making it
easier to access the gitlab CI facilities which are already used in
mainline.  I might be updating the existing github tree from time to
time, but don't count on it.

[0] https://github.com/dgibson/qemu.git
[1] https://gitlab.com/dgibson/qemu.git

-- 
David Gibson| I'll have my music baroque, and my code
david AT gibson.dropbear.id.au  | minimalist, thank you.  NOT _the_ _other_
| _way_ _around_!
http://www.ozlabs.org/~dgibson


signature.asc
Description: PGP signature

[for-6.0] MAINTAINERS: Add Greg Kurz as co-maintainer for ppc

Greg has agreed to be co-maintainer of the ppc target and machines.
This should avoid repeats of the problem we had in qemu-5.2 where a
last minute fix was needed while I was on holiday.

Signed-off-by: David Gibson 
---
 MAINTAINERS | 17 -
 1 file changed, 16 insertions(+), 1 deletion(-)

I've also applied this change to my ppc-for-6.0 branch.

diff --git a/MAINTAINERS b/MAINTAINERS
index 68bc160f41..bb7e3b3203 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -272,6 +272,7 @@ F: tests/tcg/openrisc/
 
 PowerPC TCG CPUs
 M: David Gibson 
+M: Greg Kurz 
 L: qemu-...@nongnu.org
 S: Maintained
 F: target/ppc/
@@ -394,6 +395,7 @@ F: target/mips/kvm.c
 
 PPC KVM CPUs
 M: David Gibson 
+M: Greg Kurz 
 S: Maintained
 F: target/ppc/kvm.c
 
@@ -1183,18 +1185,21 @@ PowerPC Machines
 
 405
 M: David Gibson 
+M: Greg Kurz 
 L: qemu-...@nongnu.org
 S: Odd Fixes
 F: hw/ppc/ppc405_boards.c
 
 Bamboo
 M: David Gibson 
+M: Greg Kurz 
 L: qemu-...@nongnu.org
 S: Odd Fixes
 F: hw/ppc/ppc440_bamboo.c
 
 e500
 M: David Gibson 
+M: Greg Kurz 
 L: qemu-...@nongnu.org
 S: Odd Fixes
 F: hw/ppc/e500*
@@ -1208,6 +1213,7 @@ F: pc-bios/u-boot.e500
 
 mpc8544ds
 M: David Gibson 
+M: Greg Kurz 
 L: qemu-...@nongnu.org
 S: Odd Fixes
 F: hw/ppc/mpc8544ds.c
@@ -1216,6 +1222,7 @@ F: hw/ppc/mpc8544_guts.c
 New World (mac99)
 M: Mark Cave-Ayland 
 R: David Gibson 
+R: Greg Kurz 
 L: qemu-...@nongnu.org
 S: Odd Fixes
 F: hw/ppc/mac_newworld.c
@@ -1235,6 +1242,7 @@ F: pc-bios/qemu_vga.ndrv
 Old World (g3beige)
 M: Mark Cave-Ayland 
 R: David Gibson 
+R: Greg Kurz 
 L: qemu-...@nongnu.org
 S: Odd Fixes
 F: hw/ppc/mac_oldworld.c
@@ -1248,6 +1256,8 @@ F: pc-bios/qemu_vga.ndrv
 
 PReP
 M: Hervé Poussineau 
+R: David Gibson 
+R: Greg Kurz 
 L: qemu-...@nongnu.org
 S: Maintained
 F: hw/ppc/prep.c
@@ -1264,6 +1274,7 @@ F: tests/acceptance/ppc_prep_40p.py
 
 sPAPR
 M: David Gibson 
+M: Greg Kurz 
 L: qemu-...@nongnu.org
 S: Supported
 F: hw/*/spapr*
@@ -1281,6 +1292,7 @@ F: tests/qtest/libqos/rtas*
 PowerNV (Non-Virtualized)
 M: Cédric Le Goater 
 M: David Gibson 
+M: Greg Kurz 
 L: qemu-...@nongnu.org
 S: Maintained
 F: hw/ppc/pnv*
@@ -1300,6 +1312,8 @@ F: hw/ppc/virtex_ml507.c
 
 sam460ex
 M: BALATON Zoltan 
+R: David Gibson 
+R: Greg Kurz 
 L: qemu-...@nongnu.org
 S: Maintained
 F: hw/ppc/sam460ex.c
@@ -2078,8 +2092,9 @@ F: tests/qtest/fw_cfg-test.c
 T: git https://github.com/philmd/qemu.git fw_cfg-next
 
 XIVE
-M: David Gibson 
 M: Cédric Le Goater 
+R: David Gibson 
+R: Greg Kurz 
 L: qemu-...@nongnu.org
 S: Supported
 F: hw/*/*xive*
-- 
2.28.0

Re: [PATCH v4 00/11] hvf: Implement Apple Silicon Support

2020-12-03 Thread no-reply

Patchew URL: https://patchew.org/QEMU/20201203234857.21051-1-ag...@csgraf.de/



Hi,

This series seems to have some coding style problems. See output below for
more information:

Type: series
Message-id: 20201203234857.21051-1-ag...@csgraf.de
Subject: [PATCH v4 00/11] hvf: Implement Apple Silicon Support

=== TEST SCRIPT BEGIN ===
#!/bin/bash
git rev-parse base > /dev/null || exit 0
git config --local diff.renamelimit 0
git config --local diff.renames True
git config --local diff.algorithm histogram
./scripts/checkpatch.pl --mailback base..
=== TEST SCRIPT END ===

Updating 3c8cf5a9c21ff8782164d1def7f44bd888713384
From https://github.com/patchew-project/qemu
 * [new tag] patchew/20201203234857.21051-1-ag...@csgraf.de -> 
patchew/20201203234857.21051-1-ag...@csgraf.de
Switched to a new branch 'test'
6caf21a hvf: arm: Implement -cpu host
fa2298a hvf: arm: Add support for GICv3
305f942 arm/hvf: Add a WFI handler
56f6fc7 arm: Add Hypervisor.framework build target
fb16e85 hvf: Add Apple Silicon support
223e02c hvf: Simplify post reset/init/loadvm hooks
013e250 arm: Set PSCI to 0.2 for HVF
d13aee0 hvf: Introduce hvf vcpu struct
eb82971 hvf: Move common code out
3805a3a hvf: x86: Remove unused definitions
4437c7f hvf: Add hypervisor entitlement to output binaries

=== OUTPUT BEGIN ===
1/11 Checking commit 4437c7f94205 (hvf: Add hypervisor entitlement to output 
binaries)
WARNING: added, moved or deleted file(s), does MAINTAINERS need updating?
#16: 
new file mode 100644

total: 0 errors, 1 warnings, 62 lines checked

Patch 1/11 has style problems, please review.  If any of these errors
are false positives report them to the maintainer, see
CHECKPATCH in MAINTAINERS.
2/11 Checking commit 3805a3a9e516 (hvf: x86: Remove unused definitions)
3/11 Checking commit eb8297184ca0 (hvf: Move common code out)
WARNING: added, moved or deleted file(s), does MAINTAINERS need updating?
#39: 
new file mode 100644

total: 0 errors, 1 warnings, 1054 lines checked

Patch 3/11 has style problems, please review.  If any of these errors
are false positives report them to the maintainer, see
CHECKPATCH in MAINTAINERS.
4/11 Checking commit d13aee07fc54 (hvf: Introduce hvf vcpu struct)
WARNING: line over 80 characters
#140: FILE: target/i386/hvf/hvf.c:213:
+wvmcs(cpu->hvf->fd, VMCS_ENTRY_CTLS, 
cap2ctrl(hvf_state->hvf_caps->vmx_cap_entry,

ERROR: "(foo*)" should be "(foo *)"
#748: FILE: target/i386/hvf/x86hvf.c:85:
+if (hv_vcpu_write_fpstate(cpu_state->hvf->fd, (void*)xsave, 4096)) {

ERROR: "(foo*)" should be "(foo *)"
#829: FILE: target/i386/hvf/x86hvf.c:167:
+if (hv_vcpu_read_fpstate(cpu_state->hvf->fd, (void*)xsave, 4096)) {

total: 2 errors, 1 warnings, 996 lines checked

Patch 4/11 has style problems, please review.  If any of these errors
are false positives report them to the maintainer, see
CHECKPATCH in MAINTAINERS.

5/11 Checking commit 013e2506166a (arm: Set PSCI to 0.2 for HVF)
6/11 Checking commit 223e02cbfa0f (hvf: Simplify post reset/init/loadvm hooks)
7/11 Checking commit fb16e85b6ca3 (hvf: Add Apple Silicon support)
WARNING: architecture specific defines should be avoided
#47: FILE: accel/hvf/hvf-cpus.c:61:
+#ifdef __aarch64__

WARNING: architecture specific defines should be avoided
#58: FILE: accel/hvf/hvf-cpus.c:335:
+#ifdef __aarch64__

WARNING: architecture specific defines should be avoided
#90: FILE: include/sysemu/hvf_int.h:15:
+#ifdef __aarch64__

WARNING: added, moved or deleted file(s), does MAINTAINERS need updating?
#116: 
new file mode 100644

WARNING: line over 80 characters
#578: FILE: target/arm/hvf/hvf.c:458:
+hv_vcpu_set_pending_interrupt(cpu->hvf->fd, HV_INTERRUPT_TYPE_FIQ, 
true);

WARNING: line over 80 characters
#583: FILE: target/arm/hvf/hvf.c:463:
+hv_vcpu_set_pending_interrupt(cpu->hvf->fd, HV_INTERRUPT_TYPE_IRQ, 
true);

total: 0 errors, 6 warnings, 691 lines checked

Patch 7/11 has style problems, please review.  If any of these errors
are false positives report them to the maintainer, see
CHECKPATCH in MAINTAINERS.
8/11 Checking commit 56f6fc7a992e (arm: Add Hypervisor.framework build target)
WARNING: added, moved or deleted file(s), does MAINTAINERS need updating?
#47: 
new file mode 100644

total: 0 errors, 1 warnings, 36 lines checked

Patch 8/11 has style problems, please review.  If any of these errors
are false positives report them to the maintainer, see
CHECKPATCH in MAINTAINERS.
9/11 Checking commit 305f94272b01 (arm/hvf: Add a WFI handler)
10/11 Checking commit fa2298a6f780 (hvf: arm: Add support for GICv3)
11/11 Checking commit 6caf21aa9877 (hvf: arm: Implement -cpu host)
=== OUTPUT END ===

Test command exited with code: 1


The full log is available at
http://patchew.org/logs/20201203234857.21051-1-ag...@csgraf.de/testing.checkpatch/?type=message.
---
Email generated automatically by Patchew [https://patchew.org/].
Please send your feedback to patchew-de...@redhat.com

[PATCH v4 11/11] hvf: arm: Implement -cpu host

Now that we have working system register sync, we push more target CPU
properties into the virtual machine. That might be useful in some
situations, but is not the typical case that users want.

So let's add a -cpu host option that allows them to explicitly pass all
CPU capabilities of their host CPU into the guest.

Signed-off-by: Alexander Graf 
---
 include/sysemu/hvf.h |  2 ++
 target/arm/cpu.c |  9 ++---
 target/arm/cpu.h |  2 ++
 target/arm/hvf/hvf.c | 41 +
 target/arm/kvm_arm.h |  2 --
 5 files changed, 51 insertions(+), 5 deletions(-)

diff --git a/include/sysemu/hvf.h b/include/sysemu/hvf.h
index f893768df9..7eb61cf094 100644
--- a/include/sysemu/hvf.h
+++ b/include/sysemu/hvf.h
@@ -19,6 +19,8 @@
 #ifdef CONFIG_HVF
 uint32_t hvf_get_supported_cpuid(uint32_t func, uint32_t idx,
  int reg);
+struct ARMCPU;
+void hvf_arm_set_cpu_features_from_host(struct ARMCPU *cpu);
 extern bool hvf_allowed;
 #define hvf_enabled() (hvf_allowed)
 #else /* !CONFIG_HVF */
diff --git a/target/arm/cpu.c b/target/arm/cpu.c
index 6728426551..bee804b7a8 100644
--- a/target/arm/cpu.c
+++ b/target/arm/cpu.c
@@ -2273,12 +2273,16 @@ static void arm_cpu_class_init(ObjectClass *oc, void 
*data)
 #endif
 }
 
-#ifdef CONFIG_KVM
+#if defined(CONFIG_KVM) || defined(CONFIG_HVF)
 static void arm_host_initfn(Object *obj)
 {
 ARMCPU *cpu = ARM_CPU(obj);
 
+#ifdef CONFIG_KVM
 kvm_arm_set_cpu_features_from_host(cpu);
+#else
+hvf_arm_set_cpu_features_from_host(cpu);
+#endif
 if (arm_feature(>env, ARM_FEATURE_AARCH64)) {
 aarch64_add_sve_properties(obj);
 }
@@ -2290,7 +2294,6 @@ static const TypeInfo host_arm_cpu_type_info = {
 .parent = TYPE_AARCH64_CPU,
 .instance_init = arm_host_initfn,
 };
-
 #endif
 
 static void arm_cpu_instance_init(Object *obj)
@@ -2349,7 +2352,7 @@ static void arm_cpu_register_types(void)
 
 type_register_static(_cpu_type_info);
 
-#ifdef CONFIG_KVM
+#if defined(CONFIG_KVM) || defined(CONFIG_HVF)
 type_register_static(_arm_cpu_type_info);
 #endif
 
diff --git a/target/arm/cpu.h b/target/arm/cpu.h
index e5514c8286..e54963aa8b 100644
--- a/target/arm/cpu.h
+++ b/target/arm/cpu.h
@@ -2823,6 +2823,8 @@ bool write_cpustate_to_list(ARMCPU *cpu, bool kvm_sync);
 #define ARM_CPU_TYPE_NAME(name) (name ARM_CPU_TYPE_SUFFIX)
 #define CPU_RESOLVING_TYPE TYPE_ARM_CPU
 
+#define TYPE_ARM_HOST_CPU "host-" TYPE_ARM_CPU
+
 #define cpu_signal_handler cpu_arm_signal_handler
 #define cpu_list arm_cpu_list
 
diff --git a/target/arm/hvf/hvf.c b/target/arm/hvf/hvf.c
index bc955c097f..87b32dc8c9 100644
--- a/target/arm/hvf/hvf.c
+++ b/target/arm/hvf/hvf.c
@@ -372,6 +372,47 @@ static uint64_t hvf_get_reg(CPUState *cpu, int rt)
 return val;
 }
 
+void hvf_arm_set_cpu_features_from_host(ARMCPU *cpu)
+{
+ARMISARegisters host_isar;
+const struct isar_regs {
+int reg;
+uint64_t *val;
+} regs[] = {
+{ HV_SYS_REG_ID_AA64PFR0_EL1, _isar.id_aa64pfr0 },
+{ HV_SYS_REG_ID_AA64PFR1_EL1, _isar.id_aa64pfr1 },
+{ HV_SYS_REG_ID_AA64DFR0_EL1, _isar.id_aa64dfr0 },
+{ HV_SYS_REG_ID_AA64DFR1_EL1, _isar.id_aa64dfr1 },
+{ HV_SYS_REG_ID_AA64ISAR0_EL1, _isar.id_aa64isar0 },
+{ HV_SYS_REG_ID_AA64ISAR1_EL1, _isar.id_aa64isar1 },
+{ HV_SYS_REG_ID_AA64MMFR0_EL1, _isar.id_aa64mmfr0 },
+{ HV_SYS_REG_ID_AA64MMFR1_EL1, _isar.id_aa64mmfr1 },
+{ HV_SYS_REG_ID_AA64MMFR2_EL1, _isar.id_aa64mmfr2 },
+};
+hv_vcpu_t fd;
+hv_vcpu_exit_t *exit;
+int i;
+
+cpu->dtb_compatible = "arm,arm-v8";
+cpu->env.features = (1ULL << ARM_FEATURE_V8) |
+(1ULL << ARM_FEATURE_NEON) |
+(1ULL << ARM_FEATURE_AARCH64) |
+(1ULL << ARM_FEATURE_PMU) |
+(1ULL << ARM_FEATURE_GENERIC_TIMER);
+
+/* We set up a small vcpu to extract host registers */
+
+assert_hvf_ok(hv_vcpu_create(, , NULL));
+for (i = 0; i < ARRAY_SIZE(regs); i++) {
+assert_hvf_ok(hv_vcpu_get_sys_reg(fd, regs[i].reg, regs[i].val));
+}
+assert_hvf_ok(hv_vcpu_get_sys_reg(fd, HV_SYS_REG_MIDR_EL1, >midr));
+assert_hvf_ok(hv_vcpu_destroy(fd));
+
+cpu->isar = host_isar;
+cpu->reset_sctlr = 0x00c50078;
+}
+
 void hvf_arch_vcpu_destroy(CPUState *cpu)
 {
 }
diff --git a/target/arm/kvm_arm.h b/target/arm/kvm_arm.h
index eb81b7059e..081727a37e 100644
--- a/target/arm/kvm_arm.h
+++ b/target/arm/kvm_arm.h
@@ -214,8 +214,6 @@ bool kvm_arm_create_scratch_host_vcpu(const uint32_t 
*cpus_to_try,
  */
 void kvm_arm_destroy_scratch_host_vcpu(int *fdarray);
 
-#define TYPE_ARM_HOST_CPU "host-" TYPE_ARM_CPU
-
 /**
  * ARMHostCPUFeatures: information about the host CPU (identified
  * by asking the host kernel)
-- 
2.24.3 (Apple Git-128)

[PATCH v4 09/11] arm/hvf: Add a WFI handler

From: Peter Collingbourne 

Sleep on WFI until the VTIMER is due but allow ourselves to be woken
up on IPI.

In this implementation IPI is blocked on the CPU thread at startup and
pselect() is used to atomically unblock the signal and begin sleeping.
The signal is sent unconditionally so there's no need to worry about
races between actually sleeping and the "we think we're sleeping"
state. It may lead to an extra wakeup but that's better than missing
it entirely.

Signed-off-by: Peter Collingbourne 
[agraf: Remove unused 'set' variable, always advance PC on WFX trap]
Signed-off-by: Alexander Graf 
---
 accel/hvf/hvf-cpus.c |  5 ++--
 include/sysemu/hvf_int.h |  1 +
 target/arm/hvf/hvf.c | 56 
 3 files changed, 59 insertions(+), 3 deletions(-)

diff --git a/accel/hvf/hvf-cpus.c b/accel/hvf/hvf-cpus.c
index ef18f01a7d..3414c190c3 100644
--- a/accel/hvf/hvf-cpus.c
+++ b/accel/hvf/hvf-cpus.c
@@ -322,15 +322,14 @@ static int hvf_init_vcpu(CPUState *cpu)
 cpu->hvf = g_malloc0(sizeof(*cpu->hvf));
 
 /* init cpu signals */
-sigset_t set;
 struct sigaction sigact;
 
 memset(, 0, sizeof(sigact));
 sigact.sa_handler = dummy_signal;
 sigaction(SIG_IPI, , NULL);
 
-pthread_sigmask(SIG_BLOCK, NULL, );
-sigdelset(, SIG_IPI);
+pthread_sigmask(SIG_BLOCK, NULL, >hvf->unblock_ipi_mask);
+sigdelset(>hvf->unblock_ipi_mask, SIG_IPI);
 
 #ifdef __aarch64__
 r = hv_vcpu_create(>hvf->fd, (hv_vcpu_exit_t **)>hvf->exit, 
NULL);
diff --git a/include/sysemu/hvf_int.h b/include/sysemu/hvf_int.h
index c2ac6c8f97..7a397fe85a 100644
--- a/include/sysemu/hvf_int.h
+++ b/include/sysemu/hvf_int.h
@@ -51,6 +51,7 @@ extern HVFState *hvf_state;
 struct hvf_vcpu_state {
 uint64_t fd;
 void *exit;
+sigset_t unblock_ipi_mask;
 };
 
 void assert_hvf_ok(hv_return_t ret);
diff --git a/target/arm/hvf/hvf.c b/target/arm/hvf/hvf.c
index 8f5e2b0bd0..979008e188 100644
--- a/target/arm/hvf/hvf.c
+++ b/target/arm/hvf/hvf.c
@@ -2,6 +2,7 @@
  * QEMU Hypervisor.framework support for Apple Silicon
 
  * Copyright 2020 Alexander Graf 
+ * Copyright 2020 Google LLC
  *
  * This work is licensed under the terms of the GNU GPL, version 2 or later.
  * See the COPYING file in the top-level directory.
@@ -17,6 +18,8 @@
 #include "sysemu/hvf_int.h"
 #include "sysemu/hw_accel.h"
 
+#include 
+
 #include "exec/address-spaces.h"
 #include "hw/irq.h"
 #include "qemu/main-loop.h"
@@ -411,6 +414,7 @@ int hvf_arch_init_vcpu(CPUState *cpu)
 
 void hvf_kick_vcpu_thread(CPUState *cpu)
 {
+cpus_kick_thread(cpu);
 hv_vcpus_exit(>hvf->fd, 1);
 }
 
@@ -466,6 +470,18 @@ static int hvf_inject_interrupts(CPUState *cpu)
 return 0;
 }
 
+static void hvf_wait_for_ipi(CPUState *cpu, struct timespec *ts)
+{
+/*
+ * Use pselect to sleep so that other threads can IPI us while we're
+ * sleeping.
+ */
+qatomic_mb_set(>thread_kicked, false);
+qemu_mutex_unlock_iothread();
+pselect(0, 0, 0, 0, ts, >hvf->unblock_ipi_mask);
+qemu_mutex_lock_iothread();
+}
+
 int hvf_vcpu_exec(CPUState *cpu)
 {
 ARMCPU *arm_cpu = ARM_CPU(cpu);
@@ -577,6 +593,46 @@ int hvf_vcpu_exec(CPUState *cpu)
 }
 case EC_WFX_TRAP:
 advance_pc = true;
+if (!(syndrome & WFX_IS_WFE) && !(cpu->interrupt_request &
+(CPU_INTERRUPT_HARD | CPU_INTERRUPT_FIQ))) {
+
+uint64_t ctl;
+r = hv_vcpu_get_sys_reg(cpu->hvf->fd, HV_SYS_REG_CNTV_CTL_EL0,
+);
+assert_hvf_ok(r);
+
+if (!(ctl & 1) || (ctl & 2)) {
+/* Timer disabled or masked, just wait for an IPI. */
+hvf_wait_for_ipi(cpu, NULL);
+break;
+}
+
+uint64_t cval;
+r = hv_vcpu_get_sys_reg(cpu->hvf->fd, HV_SYS_REG_CNTV_CVAL_EL0,
+);
+assert_hvf_ok(r);
+
+int64_t ticks_to_sleep = cval - mach_absolute_time();
+if (ticks_to_sleep < 0) {
+break;
+}
+
+uint64_t seconds = ticks_to_sleep / arm_cpu->gt_cntfrq_hz;
+uint64_t nanos =
+(ticks_to_sleep - arm_cpu->gt_cntfrq_hz * seconds) *
+10 / arm_cpu->gt_cntfrq_hz;
+
+/*
+ * Don't sleep for less than 2ms. This is believed to improve
+ * latency of message passing workloads.
+ */
+if (!seconds && nanos < 200) {
+break;
+}
+
+struct timespec ts = { seconds, nanos };
+hvf_wait_for_ipi(cpu, );
+}
 break;
 case EC_AA64_HVC:
 cpu_synchronize_state(cpu);
-- 
2.24.3 (Apple Git-128)

[PATCH v4 07/11] hvf: Add Apple Silicon support

With Apple Silicon available to the masses, it's a good time to add support
for driving its virtualization extensions from QEMU.

This patch adds all necessary architecture specific code to get basic VMs
working. It's still pretty raw, but definitely functional.

Known limitations:

  - Vtimer acknowledgement is hacky
  - Should implement more sysregs and fault on invalid ones then
  - WFI handling is missing, need to marry it with vtimer

Signed-off-by: Alexander Graf 

---

v1 -> v2:

  - Merge vcpu kick function patch
  - Implement WFI handling (allows vCPUs to sleep)
  - Synchronize system registers (fixes OVMF crashes and reboot)
  - Don't always call cpu_synchronize_state()
  - Use more fine grained iothread locking
  - Populate aa64mmfr0 from hardware

v2 -> v3:

  - Advance PC on SMC
  - Use cp list interface for sysreg syncs
  - Do not set current_cpu
  - Fix sysreg isread mask
  - Move sysreg handling to functions
  - Remove WFI logic again
  - Revert to global iothread locking
  - Use Hypervisor.h on arm, hv.h does not contain aarch64 definitions

v3 -> v4:

  - No longer include Hypervisor.h
---
 MAINTAINERS  |   5 +
 accel/hvf/hvf-cpus.c |  14 +
 include/sysemu/hvf_int.h |   9 +-
 target/arm/hvf/hvf.c | 618 +++
 4 files changed, 645 insertions(+), 1 deletion(-)
 create mode 100644 target/arm/hvf/hvf.c

diff --git a/MAINTAINERS b/MAINTAINERS
index ca4b6d9279..9cd1d9d448 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -439,6 +439,11 @@ F: accel/accel.c
 F: accel/Makefile.objs
 F: accel/stubs/Makefile.objs
 
+Apple Silicon HVF CPUs
+M: Alexander Graf 
+S: Maintained
+F: target/arm/hvf/
+
 X86 HVF CPUs
 M: Cameron Esfahani 
 M: Roman Bolshakov 
diff --git a/accel/hvf/hvf-cpus.c b/accel/hvf/hvf-cpus.c
index 71721e17de..ef18f01a7d 100644
--- a/accel/hvf/hvf-cpus.c
+++ b/accel/hvf/hvf-cpus.c
@@ -58,6 +58,10 @@
 #include "sysemu/runstate.h"
 #include "qemu/guest-random.h"
 
+#ifdef __aarch64__
+#define HV_VM_DEFAULT NULL
+#endif
+
 /* Memory slots */
 
 struct mac_slot {
@@ -328,7 +332,11 @@ static int hvf_init_vcpu(CPUState *cpu)
 pthread_sigmask(SIG_BLOCK, NULL, );
 sigdelset(, SIG_IPI);
 
+#ifdef __aarch64__
+r = hv_vcpu_create(>hvf->fd, (hv_vcpu_exit_t **)>hvf->exit, 
NULL);
+#else
 r = hv_vcpu_create((hv_vcpuid_t *)>hvf->fd, HV_VCPU_DEFAULT);
+#endif
 cpu->vcpu_dirty = 1;
 assert_hvf_ok(r);
 
@@ -399,8 +407,14 @@ static void hvf_start_vcpu_thread(CPUState *cpu)
cpu, QEMU_THREAD_JOINABLE);
 }
 
+__attribute__((weak)) void hvf_kick_vcpu_thread(CPUState *cpu)
+{
+cpus_kick_thread(cpu);
+}
+
 static const CpusAccel hvf_cpus = {
 .create_vcpu_thread = hvf_start_vcpu_thread,
+.kick_vcpu_thread = hvf_kick_vcpu_thread,
 
 .synchronize_post_reset = hvf_cpu_synchronize_post_reset,
 .synchronize_post_init = hvf_cpu_synchronize_post_init,
diff --git a/include/sysemu/hvf_int.h b/include/sysemu/hvf_int.h
index 9d3cb53e47..c2ac6c8f97 100644
--- a/include/sysemu/hvf_int.h
+++ b/include/sysemu/hvf_int.h
@@ -11,7 +11,12 @@
 #ifndef HVF_INT_H
 #define HVF_INT_H
 
+#include "qemu/osdep.h"
+#ifdef __aarch64__
+#include 
+#else
 #include 
+#endif
 
 /* hvf_slot flags */
 #define HVF_SLOT_LOG (1 << 0)
@@ -44,7 +49,8 @@ struct HVFState {
 extern HVFState *hvf_state;
 
 struct hvf_vcpu_state {
-int fd;
+uint64_t fd;
+void *exit;
 };
 
 void assert_hvf_ok(hv_return_t ret);
@@ -54,5 +60,6 @@ int hvf_arch_init_vcpu(CPUState *cpu);
 void hvf_arch_vcpu_destroy(CPUState *cpu);
 int hvf_vcpu_exec(CPUState *cpu);
 hvf_slot *hvf_find_overlap_slot(uint64_t, uint64_t);
+void hvf_kick_vcpu_thread(CPUState *cpu);
 
 #endif
diff --git a/target/arm/hvf/hvf.c b/target/arm/hvf/hvf.c
new file mode 100644
index 00..8f5e2b0bd0
--- /dev/null
+++ b/target/arm/hvf/hvf.c
@@ -0,0 +1,618 @@
+/*
+ * QEMU Hypervisor.framework support for Apple Silicon
+
+ * Copyright 2020 Alexander Graf 
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ *
+ */
+
+#include "qemu/osdep.h"
+#include "qemu-common.h"
+#include "qemu/error-report.h"
+
+#include "sysemu/runstate.h"
+#include "sysemu/hvf.h"
+#include "sysemu/hvf_int.h"
+#include "sysemu/hw_accel.h"
+
+#include "exec/address-spaces.h"
+#include "hw/irq.h"
+#include "qemu/main-loop.h"
+#include "sysemu/accel.h"
+#include "sysemu/cpus.h"
+#include "target/arm/cpu.h"
+#include "target/arm/internals.h"
+
+#define HVF_DEBUG 0
+#define DPRINTF(...)\
+if (HVF_DEBUG) {\
+fprintf(stderr, "HVF %s:%d ", __func__, __LINE__);  \
+fprintf(stderr, __VA_ARGS__);   \
+fprintf(stderr, "\n");  \
+}
+
+#define HVF_SYSREG(crn, crm, op0, op1, op2) \
+ENCODE_AA64_CP_REG(CP_REG_ARM64_SYSREG_CP, crn, crm, op0, op1, op2)
+#define

[PATCH v4 04/11] hvf: Introduce hvf vcpu struct

We will need more than a single field for hvf going forward. To keep
the global vcpu struct uncluttered, let's allocate a special hvf vcpu
struct, similar to how hax does it.

Signed-off-by: Alexander Graf 
Reviewed-by: Roman Bolshakov 
Tested-by: Roman Bolshakov 
---
 accel/hvf/hvf-cpus.c|   8 +-
 include/hw/core/cpu.h   |   3 +-
 include/sysemu/hvf_int.h|   4 +
 target/i386/hvf/hvf.c   | 102 +-
 target/i386/hvf/vmx.h   |  24 +++--
 target/i386/hvf/x86.c   |  28 ++---
 target/i386/hvf/x86_descr.c |  26 ++---
 target/i386/hvf/x86_emu.c   |  62 +--
 target/i386/hvf/x86_mmu.c   |   4 +-
 target/i386/hvf/x86_task.c  |  12 +--
 target/i386/hvf/x86hvf.c| 210 ++--
 11 files changed, 247 insertions(+), 236 deletions(-)

diff --git a/accel/hvf/hvf-cpus.c b/accel/hvf/hvf-cpus.c
index 60f6d76bf3..1b0c868944 100644
--- a/accel/hvf/hvf-cpus.c
+++ b/accel/hvf/hvf-cpus.c
@@ -312,10 +312,12 @@ static void hvf_cpu_synchronize_pre_loadvm(CPUState *cpu)
 
 static void hvf_vcpu_destroy(CPUState *cpu)
 {
-hv_return_t ret = hv_vcpu_destroy(cpu->hvf_fd);
+hv_return_t ret = hv_vcpu_destroy(cpu->hvf->fd);
 assert_hvf_ok(ret);
 
 hvf_arch_vcpu_destroy(cpu);
+free(cpu->hvf);
+cpu->hvf = NULL;
 }
 
 static void dummy_signal(int sig)
@@ -326,6 +328,8 @@ static int hvf_init_vcpu(CPUState *cpu)
 {
 int r;
 
+cpu->hvf = g_malloc0(sizeof(*cpu->hvf));
+
 /* init cpu signals */
 sigset_t set;
 struct sigaction sigact;
@@ -337,7 +341,7 @@ static int hvf_init_vcpu(CPUState *cpu)
 pthread_sigmask(SIG_BLOCK, NULL, );
 sigdelset(, SIG_IPI);
 
-r = hv_vcpu_create((hv_vcpuid_t *)>hvf_fd, HV_VCPU_DEFAULT);
+r = hv_vcpu_create((hv_vcpuid_t *)>hvf->fd, HV_VCPU_DEFAULT);
 cpu->vcpu_dirty = 1;
 assert_hvf_ok(r);
 
diff --git a/include/hw/core/cpu.h b/include/hw/core/cpu.h
index 3d92c967ff..6032d8a52c 100644
--- a/include/hw/core/cpu.h
+++ b/include/hw/core/cpu.h
@@ -280,6 +280,7 @@ struct KVMState;
 struct kvm_run;
 
 struct hax_vcpu_state;
+struct hvf_vcpu_state;
 
 #define TB_JMP_CACHE_BITS 12
 #define TB_JMP_CACHE_SIZE (1 << TB_JMP_CACHE_BITS)
@@ -463,7 +464,7 @@ struct CPUState {
 
 struct hax_vcpu_state *hax_vcpu;
 
-int hvf_fd;
+struct hvf_vcpu_state *hvf;
 
 /* track IOMMUs whose translations we've cached in the TCG TLB */
 GArray *iommu_notifiers;
diff --git a/include/sysemu/hvf_int.h b/include/sysemu/hvf_int.h
index 69de46db7d..9d3cb53e47 100644
--- a/include/sysemu/hvf_int.h
+++ b/include/sysemu/hvf_int.h
@@ -43,6 +43,10 @@ struct HVFState {
 };
 extern HVFState *hvf_state;
 
+struct hvf_vcpu_state {
+int fd;
+};
+
 void assert_hvf_ok(hv_return_t ret);
 int hvf_get_registers(CPUState *cpu);
 int hvf_put_registers(CPUState *cpu);
diff --git a/target/i386/hvf/hvf.c b/target/i386/hvf/hvf.c
index 8b96ecd619..08b4adecd9 100644
--- a/target/i386/hvf/hvf.c
+++ b/target/i386/hvf/hvf.c
@@ -80,11 +80,11 @@ void vmx_update_tpr(CPUState *cpu)
 int tpr = cpu_get_apic_tpr(x86_cpu->apic_state) << 4;
 int irr = apic_get_highest_priority_irr(x86_cpu->apic_state);
 
-wreg(cpu->hvf_fd, HV_X86_TPR, tpr);
+wreg(cpu->hvf->fd, HV_X86_TPR, tpr);
 if (irr == -1) {
-wvmcs(cpu->hvf_fd, VMCS_TPR_THRESHOLD, 0);
+wvmcs(cpu->hvf->fd, VMCS_TPR_THRESHOLD, 0);
 } else {
-wvmcs(cpu->hvf_fd, VMCS_TPR_THRESHOLD, (irr > tpr) ? tpr >> 4 :
+wvmcs(cpu->hvf->fd, VMCS_TPR_THRESHOLD, (irr > tpr) ? tpr >> 4 :
   irr >> 4);
 }
 }
@@ -92,7 +92,7 @@ void vmx_update_tpr(CPUState *cpu)
 static void update_apic_tpr(CPUState *cpu)
 {
 X86CPU *x86_cpu = X86_CPU(cpu);
-int tpr = rreg(cpu->hvf_fd, HV_X86_TPR) >> 4;
+int tpr = rreg(cpu->hvf->fd, HV_X86_TPR) >> 4;
 cpu_set_apic_tpr(x86_cpu->apic_state, tpr);
 }
 
@@ -194,43 +194,43 @@ int hvf_arch_init_vcpu(CPUState *cpu)
 }
 
 /* set VMCS control fields */
-wvmcs(cpu->hvf_fd, VMCS_PIN_BASED_CTLS,
+wvmcs(cpu->hvf->fd, VMCS_PIN_BASED_CTLS,
   cap2ctrl(hvf_state->hvf_caps->vmx_cap_pinbased,
   VMCS_PIN_BASED_CTLS_EXTINT |
   VMCS_PIN_BASED_CTLS_NMI |
   VMCS_PIN_BASED_CTLS_VNMI));
-wvmcs(cpu->hvf_fd, VMCS_PRI_PROC_BASED_CTLS,
+wvmcs(cpu->hvf->fd, VMCS_PRI_PROC_BASED_CTLS,
   cap2ctrl(hvf_state->hvf_caps->vmx_cap_procbased,
   VMCS_PRI_PROC_BASED_CTLS_HLT |
   VMCS_PRI_PROC_BASED_CTLS_MWAIT |
   VMCS_PRI_PROC_BASED_CTLS_TSC_OFFSET |
   VMCS_PRI_PROC_BASED_CTLS_TPR_SHADOW) |
   VMCS_PRI_PROC_BASED_CTLS_SEC_CONTROL);
-wvmcs(cpu->hvf_fd, VMCS_SEC_PROC_BASED_CTLS,
+wvmcs(cpu->hvf->fd, VMCS_SEC_PROC_BASED_CTLS,
   cap2ctrl(hvf_state->hvf_caps->vmx_cap_procbased2,
VMCS_PRI_PROC_BASED2_CTLS_APIC_ACCESSES));
 
-wvmcs(cpu->hvf_fd, VMCS_ENTRY_CTLS, 
cap2ctrl(hvf_state->hvf_caps->vmx_cap_entry,
+wvmcs(cpu->hvf->fd, VMCS_ENTRY_CTLS,

[PATCH v4 10/11] hvf: arm: Add support for GICv3

We currently only support GICv2 emulation. To also support GICv3, we will
need to pass a few system registers into their respective handler functions.

This patch adds handling for all of the required system registers, so that
we can run with more than 8 vCPUs.

Signed-off-by: Alexander Graf 
---
 target/arm/hvf/hvf.c | 141 +++
 1 file changed, 141 insertions(+)

diff --git a/target/arm/hvf/hvf.c b/target/arm/hvf/hvf.c
index 979008e188..bc955c097f 100644
--- a/target/arm/hvf/hvf.c
+++ b/target/arm/hvf/hvf.c
@@ -22,6 +22,7 @@
 
 #include "exec/address-spaces.h"
 #include "hw/irq.h"
+#include "hw/intc/gicv3_internal.h"
 #include "qemu/main-loop.h"
 #include "sysemu/accel.h"
 #include "sysemu/cpus.h"
@@ -46,6 +47,33 @@
 #define SYSREG_CNTPCT_EL0 SYSREG(3, 3, 1, 14, 0)
 #define SYSREG_PMCCNTR_EL0SYSREG(3, 3, 0, 9, 13)
 
+#define SYSREG_ICC_AP0R0_EL1 SYSREG(3, 0, 4, 12, 8)
+#define SYSREG_ICC_AP0R1_EL1 SYSREG(3, 0, 5, 12, 8)
+#define SYSREG_ICC_AP0R2_EL1 SYSREG(3, 0, 6, 12, 8)
+#define SYSREG_ICC_AP0R3_EL1 SYSREG(3, 0, 7, 12, 8)
+#define SYSREG_ICC_AP1R0_EL1 SYSREG(3, 0, 0, 12, 9)
+#define SYSREG_ICC_AP1R1_EL1 SYSREG(3, 0, 1, 12, 9)
+#define SYSREG_ICC_AP1R2_EL1 SYSREG(3, 0, 2, 12, 9)
+#define SYSREG_ICC_AP1R3_EL1 SYSREG(3, 0, 3, 12, 9)
+#define SYSREG_ICC_ASGI1R_EL1SYSREG(3, 0, 6, 12, 11)
+#define SYSREG_ICC_BPR0_EL1  SYSREG(3, 0, 3, 12, 8)
+#define SYSREG_ICC_BPR1_EL1  SYSREG(3, 0, 3, 12, 12)
+#define SYSREG_ICC_CTLR_EL1  SYSREG(3, 0, 4, 12, 12)
+#define SYSREG_ICC_DIR_EL1   SYSREG(3, 0, 1, 12, 11)
+#define SYSREG_ICC_EOIR0_EL1 SYSREG(3, 0, 1, 12, 8)
+#define SYSREG_ICC_EOIR1_EL1 SYSREG(3, 0, 1, 12, 12)
+#define SYSREG_ICC_HPPIR0_EL1SYSREG(3, 0, 2, 12, 8)
+#define SYSREG_ICC_HPPIR1_EL1SYSREG(3, 0, 2, 12, 12)
+#define SYSREG_ICC_IAR0_EL1  SYSREG(3, 0, 0, 12, 8)
+#define SYSREG_ICC_IAR1_EL1  SYSREG(3, 0, 0, 12, 12)
+#define SYSREG_ICC_IGRPEN0_EL1   SYSREG(3, 0, 6, 12, 12)
+#define SYSREG_ICC_IGRPEN1_EL1   SYSREG(3, 0, 7, 12, 12)
+#define SYSREG_ICC_PMR_EL1   SYSREG(3, 0, 0, 4, 6)
+#define SYSREG_ICC_RPR_EL1   SYSREG(3, 0, 3, 12, 11)
+#define SYSREG_ICC_SGI0R_EL1 SYSREG(3, 0, 7, 12, 11)
+#define SYSREG_ICC_SGI1R_EL1 SYSREG(3, 0, 5, 12, 11)
+#define SYSREG_ICC_SRE_EL1   SYSREG(3, 0, 5, 12, 12)
+
 #define WFX_IS_WFE (1 << 0)
 
 struct hvf_reg_match {
@@ -418,6 +446,38 @@ void hvf_kick_vcpu_thread(CPUState *cpu)
 hv_vcpus_exit(>hvf->fd, 1);
 }
 
+static uint32_t hvf_reg2cp_reg(uint32_t reg)
+{
+return ENCODE_AA64_CP_REG(CP_REG_ARM64_SYSREG_CP,
+  (reg >> 10) & 0xf,
+  (reg >> 1) & 0xf,
+  (reg >> 20) & 0x3,
+  (reg >> 14) & 0x7,
+  (reg >> 17) & 0x7);
+}
+
+static uint64_t hvf_sysreg_read_cp(CPUState *cpu, uint32_t reg)
+{
+ARMCPU *arm_cpu = ARM_CPU(cpu);
+CPUARMState *env = _cpu->env;
+const ARMCPRegInfo *ri;
+uint64_t val = 0;
+
+ri = get_arm_cp_reginfo(arm_cpu->cp_regs, hvf_reg2cp_reg(reg));
+if (ri) {
+if (ri->type & ARM_CP_CONST) {
+val = ri->resetvalue;
+} else if (ri->readfn) {
+val = ri->readfn(env, ri);
+} else {
+val = CPREG_FIELD64(env, ri);
+}
+DPRINTF("vgic read from %s [val=%016llx]", ri->name, val);
+}
+
+return val;
+}
+
 static uint64_t hvf_sysreg_read(CPUState *cpu, uint32_t reg)
 {
 ARMCPU *arm_cpu = ARM_CPU(cpu);
@@ -431,6 +491,39 @@ static uint64_t hvf_sysreg_read(CPUState *cpu, uint32_t 
reg)
 case SYSREG_PMCCNTR_EL0:
 val = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL);
 break;
+case SYSREG_ICC_AP0R0_EL1:
+case SYSREG_ICC_AP0R1_EL1:
+case SYSREG_ICC_AP0R2_EL1:
+case SYSREG_ICC_AP0R3_EL1:
+case SYSREG_ICC_AP1R0_EL1:
+case SYSREG_ICC_AP1R1_EL1:
+case SYSREG_ICC_AP1R2_EL1:
+case SYSREG_ICC_AP1R3_EL1:
+case SYSREG_ICC_ASGI1R_EL1:
+case SYSREG_ICC_BPR0_EL1:
+case SYSREG_ICC_BPR1_EL1:
+case SYSREG_ICC_DIR_EL1:
+case SYSREG_ICC_EOIR0_EL1:
+case SYSREG_ICC_EOIR1_EL1:
+case SYSREG_ICC_HPPIR0_EL1:
+case SYSREG_ICC_HPPIR1_EL1:
+case SYSREG_ICC_IAR0_EL1:
+case SYSREG_ICC_IAR1_EL1:
+case SYSREG_ICC_IGRPEN0_EL1:
+case SYSREG_ICC_IGRPEN1_EL1:
+case SYSREG_ICC_PMR_EL1:
+case SYSREG_ICC_SGI0R_EL1:
+case SYSREG_ICC_SGI1R_EL1:
+case SYSREG_ICC_SRE_EL1:
+val = hvf_sysreg_read_cp(cpu, reg);
+break;
+case SYSREG_ICC_CTLR_EL1:
+val = hvf_sysreg_read_cp(cpu, reg);
+
+/* AP0R registers above 0 don't trap, expose less PRIs to fit */
+val &= ~ICC_CTLR_EL1_PRIBITS_MASK;
+val |= 4 << ICC_CTLR_EL1_PRIBITS_SHIFT;
+break;
 default:
 DPRINTF("unhandled sysreg read %08x (op0=%d op1=%d op2=%d "
 "crn=%d crm=%d)", reg, (reg >> 20) & 0x3,

[PATCH v4 08/11] arm: Add Hypervisor.framework build target

Now that we have all logic in place that we need to handle Hypervisor.framework
on Apple Silicon systems, let's add CONFIG_HVF for aarch64 as well so that we
can build it.

Signed-off-by: Alexander Graf 

---

v1 -> v2:

  - Fix build on 32bit arm

v3 -> v4:

  - Remove i386-softmmu target
---
 meson.build| 11 ++-
 target/arm/hvf/meson.build |  3 +++
 target/arm/meson.build |  2 ++
 3 files changed, 15 insertions(+), 1 deletion(-)
 create mode 100644 target/arm/hvf/meson.build

diff --git a/meson.build b/meson.build
index 86d433c8a4..a2323e8d23 100644
--- a/meson.build
+++ b/meson.build
@@ -74,16 +74,25 @@ else
 endif
 
 accelerator_targets = { 'CONFIG_KVM': kvm_targets }
+
+if cpu in ['x86', 'x86_64']
+  hvf_targets = ['x86_64-softmmu']
+elif cpu in ['aarch64']
+  hvf_targets = ['aarch64-softmmu']
+else
+  hvf_targets = []
+endif
+
 if cpu in ['x86', 'x86_64', 'arm', 'aarch64']
   # i368 emulator provides xenpv machine type for multiple architectures
   accelerator_targets += {
 'CONFIG_XEN': ['i386-softmmu', 'x86_64-softmmu'],
+'CONFIG_HVF': hvf_targets,
   }
 endif
 if cpu in ['x86', 'x86_64']
   accelerator_targets += {
 'CONFIG_HAX': ['i386-softmmu', 'x86_64-softmmu'],
-'CONFIG_HVF': ['x86_64-softmmu'],
 'CONFIG_WHPX': ['i386-softmmu', 'x86_64-softmmu'],
   }
 endif
diff --git a/target/arm/hvf/meson.build b/target/arm/hvf/meson.build
new file mode 100644
index 00..855e6cce5a
--- /dev/null
+++ b/target/arm/hvf/meson.build
@@ -0,0 +1,3 @@
+arm_softmmu_ss.add(when: [hvf, 'CONFIG_HVF'], if_true: files(
+  'hvf.c',
+))
diff --git a/target/arm/meson.build b/target/arm/meson.build
index f5de2a77b8..95bebae216 100644
--- a/target/arm/meson.build
+++ b/target/arm/meson.build
@@ -56,5 +56,7 @@ arm_softmmu_ss.add(files(
   'psci.c',
 ))
 
+subdir('hvf')
+
 target_arch += {'arm': arm_ss}
 target_softmmu_arch += {'arm': arm_softmmu_ss}
-- 
2.24.3 (Apple Git-128)

[PATCH v4 03/11] hvf: Move common code out

Until now, Hypervisor.framework has only been available on x86_64 systems.
With Apple Silicon shipping now, it extends its reach to aarch64. To
prepare for support for multiple architectures, let's move common code out
into its own accel directory.

Signed-off-by: Alexander Graf 

---

v3 -> v4:

  - Use hv.h instead of Hypervisor.h for 10.15 compat
  - Remove manual inclusion of Hypervisor.h in common .c files
---
 MAINTAINERS |   9 +-
 accel/hvf/hvf-all.c |  54 +
 accel/hvf/hvf-cpus.c| 462 
 accel/hvf/meson.build   |   7 +
 accel/meson.build   |   1 +
 include/sysemu/hvf_int.h|  54 +
 target/i386/hvf/hvf-cpus.c  | 131 --
 target/i386/hvf/hvf-cpus.h  |  25 --
 target/i386/hvf/hvf-i386.h  |  33 +--
 target/i386/hvf/hvf.c   | 360 +---
 target/i386/hvf/meson.build |   1 -
 target/i386/hvf/x86hvf.c|  11 +-
 target/i386/hvf/x86hvf.h|   2 -
 13 files changed, 596 insertions(+), 554 deletions(-)
 create mode 100644 accel/hvf/hvf-all.c
 create mode 100644 accel/hvf/hvf-cpus.c
 create mode 100644 accel/hvf/meson.build
 create mode 100644 include/sysemu/hvf_int.h
 delete mode 100644 target/i386/hvf/hvf-cpus.c
 delete mode 100644 target/i386/hvf/hvf-cpus.h

diff --git a/MAINTAINERS b/MAINTAINERS
index 68bc160f41..ca4b6d9279 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -444,9 +444,16 @@ M: Cameron Esfahani 
 M: Roman Bolshakov 
 W: https://wiki.qemu.org/Features/HVF
 S: Maintained
-F: accel/stubs/hvf-stub.c
 F: target/i386/hvf/
+
+HVF
+M: Cameron Esfahani 
+M: Roman Bolshakov 
+W: https://wiki.qemu.org/Features/HVF
+S: Maintained
+F: accel/hvf/
 F: include/sysemu/hvf.h
+F: include/sysemu/hvf_int.h
 
 WHPX CPUs
 M: Sunil Muthuswamy 
diff --git a/accel/hvf/hvf-all.c b/accel/hvf/hvf-all.c
new file mode 100644
index 00..5b415eb0ed
--- /dev/null
+++ b/accel/hvf/hvf-all.c
@@ -0,0 +1,54 @@
+/*
+ * QEMU Hypervisor.framework support
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2.  See
+ * the COPYING file in the top-level directory.
+ *
+ * Contributions after 2012-01-13 are licensed under the terms of the
+ * GNU GPL, version 2 or (at your option) any later version.
+ */
+
+#include "qemu/osdep.h"
+#include "qemu-common.h"
+#include "qemu/error-report.h"
+#include "sysemu/hvf.h"
+#include "sysemu/hvf_int.h"
+#include "sysemu/runstate.h"
+
+#include "qemu/main-loop.h"
+#include "sysemu/accel.h"
+
+bool hvf_allowed;
+HVFState *hvf_state;
+
+void assert_hvf_ok(hv_return_t ret)
+{
+if (ret == HV_SUCCESS) {
+return;
+}
+
+switch (ret) {
+case HV_ERROR:
+error_report("Error: HV_ERROR");
+break;
+case HV_BUSY:
+error_report("Error: HV_BUSY");
+break;
+case HV_BAD_ARGUMENT:
+error_report("Error: HV_BAD_ARGUMENT");
+break;
+case HV_NO_RESOURCES:
+error_report("Error: HV_NO_RESOURCES");
+break;
+case HV_NO_DEVICE:
+error_report("Error: HV_NO_DEVICE");
+break;
+case HV_UNSUPPORTED:
+error_report("Error: HV_UNSUPPORTED");
+break;
+default:
+error_report("Unknown Error");
+}
+
+abort();
+}
diff --git a/accel/hvf/hvf-cpus.c b/accel/hvf/hvf-cpus.c
new file mode 100644
index 00..60f6d76bf3
--- /dev/null
+++ b/accel/hvf/hvf-cpus.c
@@ -0,0 +1,462 @@
+/*
+ * Copyright 2008 IBM Corporation
+ *   2008 Red Hat, Inc.
+ * Copyright 2011 Intel Corporation
+ * Copyright 2016 Veertu, Inc.
+ * Copyright 2017 The Android Open Source Project
+ *
+ * QEMU Hypervisor.framework support
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public
+ * License as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, see .
+ *
+ * This file contain code under public domain from the hvdos project:
+ * https://github.com/mist64/hvdos
+ *
+ * Parts Copyright (c) 2011 NetApp, Inc.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *notice, this list of conditions and the following disclaimer in the
+ *documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS

[PATCH v4 06/11] hvf: Simplify post reset/init/loadvm hooks

The hooks we have that call us after reset, init and loadvm really all
just want to say "The reference of all register state is in the QEMU
vcpu struct, please push it".

We already have a working pushing mechanism though called cpu->vcpu_dirty,
so we can just reuse that for all of the above, syncing state properly the
next time we actually execute a vCPU.

This fixes PSCI resets on ARM, as they modify CPU state even after the
post init call has completed, but before we execute the vCPU again.

To also make the scheme work for x86, we have to make sure we don't
move stale eflags into our env when the vcpu state is dirty.

Signed-off-by: Alexander Graf 
---
 accel/hvf/hvf-cpus.c | 27 +++
 target/i386/hvf/x86hvf.c |  5 -
 2 files changed, 11 insertions(+), 21 deletions(-)

diff --git a/accel/hvf/hvf-cpus.c b/accel/hvf/hvf-cpus.c
index 1b0c868944..71721e17de 100644
--- a/accel/hvf/hvf-cpus.c
+++ b/accel/hvf/hvf-cpus.c
@@ -275,39 +275,26 @@ static void hvf_cpu_synchronize_state(CPUState *cpu)
 }
 }
 
-static void do_hvf_cpu_synchronize_post_reset(CPUState *cpu,
-  run_on_cpu_data arg)
+static void do_hvf_cpu_synchronize_set_dirty(CPUState *cpu,
+ run_on_cpu_data arg)
 {
-hvf_put_registers(cpu);
-cpu->vcpu_dirty = false;
+/* QEMU state is the reference, push it to HVF now and on next entry */
+cpu->vcpu_dirty = true;
 }
 
 static void hvf_cpu_synchronize_post_reset(CPUState *cpu)
 {
-run_on_cpu(cpu, do_hvf_cpu_synchronize_post_reset, RUN_ON_CPU_NULL);
-}
-
-static void do_hvf_cpu_synchronize_post_init(CPUState *cpu,
- run_on_cpu_data arg)
-{
-hvf_put_registers(cpu);
-cpu->vcpu_dirty = false;
+run_on_cpu(cpu, do_hvf_cpu_synchronize_set_dirty, RUN_ON_CPU_NULL);
 }
 
 static void hvf_cpu_synchronize_post_init(CPUState *cpu)
 {
-run_on_cpu(cpu, do_hvf_cpu_synchronize_post_init, RUN_ON_CPU_NULL);
-}
-
-static void do_hvf_cpu_synchronize_pre_loadvm(CPUState *cpu,
-  run_on_cpu_data arg)
-{
-cpu->vcpu_dirty = true;
+run_on_cpu(cpu, do_hvf_cpu_synchronize_set_dirty, RUN_ON_CPU_NULL);
 }
 
 static void hvf_cpu_synchronize_pre_loadvm(CPUState *cpu)
 {
-run_on_cpu(cpu, do_hvf_cpu_synchronize_pre_loadvm, RUN_ON_CPU_NULL);
+run_on_cpu(cpu, do_hvf_cpu_synchronize_set_dirty, RUN_ON_CPU_NULL);
 }
 
 static void hvf_vcpu_destroy(CPUState *cpu)
diff --git a/target/i386/hvf/x86hvf.c b/target/i386/hvf/x86hvf.c
index 0f2aeb1cf8..3111c0be4c 100644
--- a/target/i386/hvf/x86hvf.c
+++ b/target/i386/hvf/x86hvf.c
@@ -435,7 +435,10 @@ int hvf_process_events(CPUState *cpu_state)
 X86CPU *cpu = X86_CPU(cpu_state);
 CPUX86State *env = >env;
 
-env->eflags = rreg(cpu_state->hvf->fd, HV_X86_RFLAGS);
+if (!cpu_state->vcpu_dirty) {
+/* light weight sync for CPU_INTERRUPT_HARD and IF_MASK */
+env->eflags = rreg(cpu_state->hvf->fd, HV_X86_RFLAGS);
+}
 
 if (cpu_state->interrupt_request & CPU_INTERRUPT_INIT) {
 cpu_synchronize_state(cpu_state);
-- 
2.24.3 (Apple Git-128)

[PATCH v4 00/11] hvf: Implement Apple Silicon Support

Now that Apple Silicon is widely available, people are obviously excited
to try and run virtualized workloads on them, such as Linux and Windows.

This patch set implements a fully functional version to get the ball
going on that. With this applied, I can successfully run both Linux and
Windows as guests. I am not aware of any limitations specific to
Hypervisor.framework apart from:

  - Live migration / savevm
  - gdbstub debugging (SP register)


Enjoy!

Alex

v1 -> v2:

  - New patch: hvf: Actually set SIG_IPI mask
  - New patch: hvf: Introduce hvf vcpu struct
  - New patch: hvf: arm: Mark CPU as dirty on reset
  - Removed patch: hw/arm/virt: Disable highmem when on hypervisor.framework
  - Removed patch: arm: Synchronize CPU on PSCI on
  - Fix build on 32bit arm
  - Merge vcpu kick function patch into ARM enablement
  - Implement WFI handling (allows vCPUs to sleep)
  - Synchronize system registers (fixes OVMF crashes and reboot)
  - Don't always call cpu_synchronize_state()
  - Use more fine grained iothread locking
  - Populate aa64mmfr0 from hardware
  - Make safe to ctrl-C entitlement application

v2 -> v3:

  - Removed patch: hvf: Actually set SIG_IPI mask
  - New patch: hvf: arm: Add support for GICv3
  - New patch: hvf: arm: Implement -cpu host
  - Advance PC on SMC
  - Use cp list interface for sysreg syncs
  - Do not set current_cpu
  - Fix sysreg isread mask
  - Move sysreg handling to functions
  - Remove WFI logic again
  - Revert to global iothread locking

v3 -> v4:

  - Removed patch: hvf: arm: Mark CPU as dirty on reset
  - New patch: hvf: Simplify post reset/init/loadvm hooks
  - Remove i386-softmmu target (meson.build for hvf target)
  - Combine both if statements (PSCI)
  - Use hv.h instead of Hypervisor.h for 10.15 compat
  - Remove manual inclusion of Hypervisor.h in common .c files
  - No longer include Hypervisor.h in arm hvf .c files
  - Remove unused exe_full variable
  - Reuse exe_name variable

Alexander Graf (10):
  hvf: Add hypervisor entitlement to output binaries
  hvf: x86: Remove unused definitions
  hvf: Move common code out
  hvf: Introduce hvf vcpu struct
  arm: Set PSCI to 0.2 for HVF
  hvf: Simplify post reset/init/loadvm hooks
  hvf: Add Apple Silicon support
  arm: Add Hypervisor.framework build target
  hvf: arm: Add support for GICv3
  hvf: arm: Implement -cpu host

Peter Collingbourne (1):
  arm/hvf: Add a WFI handler

 MAINTAINERS  |  14 +-
 accel/hvf/entitlements.plist |   8 +
 accel/hvf/hvf-all.c  |  54 +++
 accel/hvf/hvf-cpus.c | 466 +++
 accel/hvf/meson.build|   7 +
 accel/meson.build|   1 +
 include/hw/core/cpu.h|   3 +-
 include/sysemu/hvf.h |   2 +
 include/sysemu/hvf_int.h |  66 +++
 meson.build  |  40 +-
 scripts/entitlement.sh   |  13 +
 target/arm/cpu.c |  13 +-
 target/arm/cpu.h |   2 +
 target/arm/hvf/hvf.c | 856 +++
 target/arm/hvf/meson.build   |   3 +
 target/arm/kvm_arm.h |   2 -
 target/arm/meson.build   |   2 +
 target/i386/hvf/hvf-cpus.c   | 131 --
 target/i386/hvf/hvf-cpus.h   |  25 -
 target/i386/hvf/hvf-i386.h   |  49 +-
 target/i386/hvf/hvf.c| 462 +++
 target/i386/hvf/meson.build  |   1 -
 target/i386/hvf/vmx.h|  24 +-
 target/i386/hvf/x86.c|  28 +-
 target/i386/hvf/x86_descr.c  |  26 +-
 target/i386/hvf/x86_emu.c|  62 +--
 target/i386/hvf/x86_mmu.c|   4 +-
 target/i386/hvf/x86_task.c   |  12 +-
 target/i386/hvf/x86hvf.c | 224 -
 target/i386/hvf/x86hvf.h |   2 -
 30 files changed, 1786 insertions(+), 816 deletions(-)
 create mode 100644 accel/hvf/entitlements.plist
 create mode 100644 accel/hvf/hvf-all.c
 create mode 100644 accel/hvf/hvf-cpus.c
 create mode 100644 accel/hvf/meson.build
 create mode 100644 include/sysemu/hvf_int.h
 create mode 100755 scripts/entitlement.sh
 create mode 100644 target/arm/hvf/hvf.c
 create mode 100644 target/arm/hvf/meson.build
 delete mode 100644 target/i386/hvf/hvf-cpus.c
 delete mode 100644 target/i386/hvf/hvf-cpus.h

-- 
2.24.3 (Apple Git-128)

[PATCH v4 05/11] arm: Set PSCI to 0.2 for HVF

In Hypervisor.framework, we just pass PSCI calls straight on to the QEMU 
emulation
of it. That means, if TCG is compatible with PSCI 0.2, so are we. Let's 
transpose
that fact in code too.

Signed-off-by: Alexander Graf 

---

v3 -> v4:

  - Combine both if statements
---
 target/arm/cpu.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/target/arm/cpu.c b/target/arm/cpu.c
index 07492e9f9a..6728426551 100644
--- a/target/arm/cpu.c
+++ b/target/arm/cpu.c
@@ -1059,8 +1059,8 @@ static void arm_cpu_initfn(Object *obj)
 cpu->psci_version = 1; /* By default assume PSCI v0.1 */
 cpu->kvm_target = QEMU_KVM_ARM_TARGET_NONE;
 
-if (tcg_enabled()) {
-cpu->psci_version = 2; /* TCG implements PSCI 0.2 */
+if (tcg_enabled() || hvf_enabled()) {
+cpu->psci_version = 2; /* TCG and HVF implement PSCI 0.2 */
 }
 }
 
-- 
2.24.3 (Apple Git-128)

[PATCH v4 01/11] hvf: Add hypervisor entitlement to output binaries

In macOS 11, QEMU only gets access to Hypervisor.framework if it has the
respective entitlement. Add an entitlement template and automatically self
sign and apply the entitlement in the build.

Signed-off-by: Alexander Graf 

---

v1 -> v2:

  - Make safe to ctrl-C

v3 -> v4:

  - Remove unused exe_full variable
  - Reuse exe_name variable
---
 accel/hvf/entitlements.plist |  8 
 meson.build  | 29 +
 scripts/entitlement.sh   | 13 +
 3 files changed, 46 insertions(+), 4 deletions(-)
 create mode 100644 accel/hvf/entitlements.plist
 create mode 100755 scripts/entitlement.sh

diff --git a/accel/hvf/entitlements.plist b/accel/hvf/entitlements.plist
new file mode 100644
index 00..154f3308ef
--- /dev/null
+++ b/accel/hvf/entitlements.plist
@@ -0,0 +1,8 @@
+
+http://www.apple.com/DTDs/PropertyList-1.0.dtd;>
+
+
+com.apple.security.hypervisor
+
+
+
diff --git a/meson.build b/meson.build
index e3386196ba..86d433c8a4 100644
--- a/meson.build
+++ b/meson.build
@@ -1843,9 +1843,14 @@ foreach target : target_dirs
 }]
   endif
   foreach exe: execs
-emulators += {exe['name']:
- executable(exe['name'], exe['sources'],
-   install: true,
+exe_name = exe['name']
+exe_sign = 'CONFIG_HVF' in config_target
+if exe_sign
+  exe_name += '-unsigned'
+endif
+
+emulator = executable(exe_name, exe['sources'],
+   install: not exe_sign,
c_args: c_args,
dependencies: arch_deps + deps + exe['dependencies'],
objects: lib.extract_all_objects(recursive: true),
@@ -1853,7 +1858,23 @@ foreach target : target_dirs
link_depends: [block_syms, qemu_syms] + exe.get('link_depends', 
[]),
link_args: link_args,
gui_app: exe['gui'])
-}
+
+if exe_sign
+  emulators += {exe['name'] : custom_target(exe['name'],
+   install: true,
+   install_dir: get_option('bindir'),
+   depends: emulator,
+   output: exe['name'],
+   command: [
+ meson.current_source_dir() / 'scripts/entitlement.sh',
+ meson.current_build_dir() / exe_name,
+ meson.current_build_dir() / exe['name'],
+ meson.current_source_dir() / 
'accel/hvf/entitlements.plist'
+   ])
+  }
+else
+  emulators += {exe['name']: emulator}
+endif
 
 if 'CONFIG_TRACE_SYSTEMTAP' in config_host
   foreach stp: [
diff --git a/scripts/entitlement.sh b/scripts/entitlement.sh
new file mode 100755
index 00..c540fa6435
--- /dev/null
+++ b/scripts/entitlement.sh
@@ -0,0 +1,13 @@
+#!/bin/sh -e
+#
+# Helper script for the build process to apply entitlements
+
+SRC="$1"
+DST="$2"
+ENTITLEMENT="$3"
+
+trap 'rm "$DST.tmp"' exit
+cp -af "$SRC" "$DST.tmp"
+codesign --entitlements "$ENTITLEMENT" --force -s - "$DST.tmp"
+mv "$DST.tmp" "$DST"
+trap '' exit
-- 
2.24.3 (Apple Git-128)

[PATCH v4 02/11] hvf: x86: Remove unused definitions

The hvf i386 has a few struct and cpp definitions that are never
used. Remove them.

Suggested-by: Roman Bolshakov 
Signed-off-by: Alexander Graf 
---
 target/i386/hvf/hvf-i386.h | 16 
 1 file changed, 16 deletions(-)

diff --git a/target/i386/hvf/hvf-i386.h b/target/i386/hvf/hvf-i386.h
index e0edffd077..e31938e5ff 100644
--- a/target/i386/hvf/hvf-i386.h
+++ b/target/i386/hvf/hvf-i386.h
@@ -21,21 +21,6 @@
 #include "cpu.h"
 #include "x86.h"
 
-#define HVF_MAX_VCPU 0x10
-
-extern struct hvf_state hvf_global;
-
-struct hvf_vm {
-int id;
-struct hvf_vcpu_state *vcpus[HVF_MAX_VCPU];
-};
-
-struct hvf_state {
-uint32_t version;
-struct hvf_vm *vm;
-uint64_t mem_quota;
-};
-
 /* hvf_slot flags */
 #define HVF_SLOT_LOG (1 << 0)
 
@@ -75,7 +60,6 @@ hvf_slot *hvf_find_overlap_slot(uint64_t, uint64_t);
 
 /* Host specific functions */
 int hvf_inject_interrupt(CPUArchState *env, int vector);
-int hvf_vcpu_run(struct hvf_vcpu_state *vcpu);
 #endif
 
 #endif
-- 
2.24.3 (Apple Git-128)

Re: [PATCH 2/8] hvf: Move common code out

2020-12-03 Thread Roman Bolshakov

On Thu, Dec 03, 2020 at 11:13:35PM +0100, Alexander Graf wrote:
> 
> On 03.12.20 19:42, Peter Collingbourne wrote:
> > On Thu, Dec 3, 2020 at 1:41 AM Roman Bolshakov  
> > wrote:
> > > On Mon, Nov 30, 2020 at 04:00:11PM -0800, Peter Collingbourne wrote:
> > > > What I observe is that when returning from a pending signal pselect
> > > > consumes the signal (which is also consistent with my understanding of
> > > > what pselect does). That means that it doesn't matter if we take a
> > > > second WFx exit because once we reach the pselect in the second WFx
> > > > exit the signal will have been consumed by the pselect in the first
> > > > exit and we will just wait for the next one.
> > > > 
> > > Aha! Thanks for the explanation. So, the first WFI in the series of
> > > guest WFIs will likely wake up immediately? After a period without WFIs
> > > there must be a pending SIG_IPI...
> > > 
> > > It shouldn't be a critical issue though because (as defined in D1.16.2)
> > > "the architecture permits a PE to leave the low-power state for any
> > > reason, it is permissible for a PE to treat WFI as a NOP, but this is
> > > not recommended for lowest power operation."
> > > 
> > > BTW. I think a bit from the thread should go into the description of
> > > patch 8, because it's not trivial and it would really be helpful to keep
> > > in repo history. At least something like this (taken from an earlier
> > > reply in the thread):
> > > 
> > >In this implementation IPI is blocked on the CPU thread at startup and
> > >pselect() is used to atomically unblock the signal and begin sleeping.
> > >The signal is sent unconditionally so there's no need to worry about
> > >races between actually sleeping and the "we think we're sleeping"
> > >state. It may lead to an extra wakeup but that's better than missing
> > >it entirely.
> > Okay, I'll add something like that to the next version of the patch I send 
> > out.
> 
> 
> If this is the only change, I've already added it for v4. If you want me to
> change it further, just let me know what to replace the patch description
> with.
> 
> 

Thanks, Alex.

I'm fine with the description and all set.

-Roman

[PATCH 3/4] block/io: bdrv_check_byte_request(): drop bdrv_is_inserted()

Move bdrv_is_inserted() calls into callers.

We are going to make bdrv_check_byte_request() a clean thing.
bdrv_is_inserted() is not about checking the request, it's about
checking the bs. So, it should be separate.

With this patch we probably change error path for some failure
scenarios. But depending on the fact that querying too big request on
empty cdrom (or corrupted qcow2 node with no drv) will result in EIO
and not ENOMEDIUM would be very strange. More over, we are going to
move to 64bit requests, so larger requests will be allowed anyway.

More over, keeping in mind that cdrom is the only driver that has
.bdrv_is_inserted() handler it's strange that we should care so much
about it in generic block layer, intuitively we should just do read and
write, and cdrom driver should return correct errors if it is not
inserted. But it's a work for another series.

Signed-off-by: Vladimir Sementsov-Ogievskiy 
---
 block/io.c | 25 -
 1 file changed, 12 insertions(+), 13 deletions(-)

diff --git a/block/io.c b/block/io.c
index 3e91074c9f..ef75a5abb4 100644
--- a/block/io.c
+++ b/block/io.c
@@ -884,17 +884,12 @@ static bool coroutine_fn 
bdrv_wait_serialising_requests(BdrvTrackedRequest *self
 return waited;
 }
 
-static int bdrv_check_byte_request(BlockDriverState *bs, int64_t offset,
-   size_t size)
+static int bdrv_check_byte_request(int64_t offset, size_t size)
 {
 if (size > BDRV_REQUEST_MAX_BYTES) {
 return -EIO;
 }
 
-if (!bdrv_is_inserted(bs)) {
-return -ENOMEDIUM;
-}
-
 if (offset < 0) {
 return -EIO;
 }
@@ -1642,7 +1637,11 @@ int coroutine_fn bdrv_co_preadv_part(BdrvChild *child,
 
 trace_bdrv_co_preadv(bs, offset, bytes, flags);
 
-ret = bdrv_check_byte_request(bs, offset, bytes);
+if (!bdrv_is_inserted(bs)) {
+return -ENOMEDIUM;
+}
+
+ret = bdrv_check_byte_request(offset, bytes);
 if (ret < 0) {
 return ret;
 }
@@ -2054,11 +2053,11 @@ int coroutine_fn bdrv_co_pwritev_part(BdrvChild *child,
 
 trace_bdrv_co_pwritev(child->bs, offset, bytes, flags);
 
-if (!bs->drv) {
+if (!bdrv_is_inserted(bs)) {
 return -ENOMEDIUM;
 }
 
-ret = bdrv_check_byte_request(bs, offset, bytes);
+ret = bdrv_check_byte_request(offset, bytes);
 if (ret < 0) {
 return ret;
 }
@@ -3045,10 +3044,10 @@ static int coroutine_fn bdrv_co_copy_range_internal(
 assert(!(read_flags & BDRV_REQ_NO_FALLBACK));
 assert(!(write_flags & BDRV_REQ_NO_FALLBACK));
 
-if (!dst || !dst->bs) {
+if (!dst || !dst->bs || !bdrv_is_inserted(dst->bs)) {
 return -ENOMEDIUM;
 }
-ret = bdrv_check_byte_request(dst->bs, dst_offset, bytes);
+ret = bdrv_check_byte_request(dst_offset, bytes);
 if (ret) {
 return ret;
 }
@@ -3056,10 +3055,10 @@ static int coroutine_fn bdrv_co_copy_range_internal(
 return bdrv_co_pwrite_zeroes(dst, dst_offset, bytes, write_flags);
 }
 
-if (!src || !src->bs) {
+if (!src || !src->bs || !bdrv_is_inserted(src->bs)) {
 return -ENOMEDIUM;
 }
-ret = bdrv_check_byte_request(src->bs, src_offset, bytes);
+ret = bdrv_check_byte_request(src_offset, bytes);
 if (ret) {
 return ret;
 }
-- 
2.21.3

[PATCH 1/4] block/file-posix: fix workaround in raw_do_pwrite_zeroes()

We should not set overlap_bytes:

1. Don't worry: it is calculated by bdrv_mark_request_serialising() and
   will be equal to or greater than bytes anyway.

2. If the request was already aligned up to some greater alignment,
   than we may break things: we reduce overlap_bytes, and further
   bdrv_mark_request_serialising() may not help, as it will not restore
   old bigger alignment.

Signed-off-by: Vladimir Sementsov-Ogievskiy 
---
 block/file-posix.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/block/file-posix.c b/block/file-posix.c
index d5fd1dbcd2..1b35bd6cfa 100644
--- a/block/file-posix.c
+++ b/block/file-posix.c
@@ -2952,7 +2952,6 @@ raw_do_pwrite_zeroes(BlockDriverState *bs, int64_t 
offset, int bytes,
 
 end = INT64_MAX & -(uint64_t)bs->bl.request_alignment;
 req->bytes = end - req->offset;
-req->overlap_bytes = req->bytes;
 
 bdrv_mark_request_serialising(req, bs->bl.request_alignment);
 }
-- 
2.21.3

[PATCH 4/4] block: introduce BDRV_MAX_LENGTH

We are going to modify block layer to work with 64bit requests. And
first step is moving to int64_t type for both offset and bytes
arguments in all block request related functions.

It's mostly safe (when widening signed or unsigned int to int64_t), but
switching from uint64_t is questionable.

So, let's first establish the set of requests we want to work with.
First signed int64_t should be enough, as off_t is signed anyway. Then,
obviously offset + bytes should not overflow.

And most interesting: (offset + bytes) being aligned up should not
overflow as well. Aligned to what alignment? First thing that comes in
mind is bs->bl.request_alignment, as we align up request to this
alignment. But there is another thing: look at
bdrv_mark_request_serialising(). It aligns request up to some given
alignment. And this parameter may be bdrv_get_cluster_size(), which is
often a lot greater than bs->bl.request_alignment.
Note also, that bdrv_mark_request_serialising() uses signed int64_t for
calculations. So, actually, we already depend on some restrictions.

Happily, bdrv_get_cluster_size() returns int and
bs->bl.request_alignment has 32bit unsigned type, but defined to be a
power of 2 less than INT_MAX. So, we may establish, that INT_MAX is
absolute maximum for any kind of alignment that may occur with the
request.

Note, that bdrv_get_cluster_size() is not documented to return power
of 2, still bdrv_mark_request_serialising() behaves like it is.
Also, backup uses bdi.cluster_size and is not prepared to it not being
power of 2.
So, let's establish that Qemu supports only power-of-2 clusters and
alignments.

So, alignment can't be greater than 2^30.

Finally to be safe with calculations, to not calculate different
maximums for different nodes (depending on cluster size and
request_alignment), let's simply set QEMU_ALIGN_DOWN(INT64_MAX, 2^30)
as absolute maximum bytes length for Qemu. Actually, it's not much less
than INT64_MAX.

OK, then, let's apply it to block/io.

Let's consider all block/io entry points of offset/bytes:

4 bytes/offset interface functions: bdrv_co_preadv_part(),
bdrv_co_pwritev_part(), bdrv_co_copy_range_internal() and
bdrv_co_pdiscard() and we check them all with bdrv_check_request().

We also have one entry point with only offset: bdrv_co_truncate().
Check the offset.

And one public structure: BdrvTrackedRequest. Happily, it has only
three external users:

 file-posix.c: adopted by this patch
 write-threshold.c: only read fields
 test-write-threshold.c: sets obviously small constant values

Better is to make the structure private and add corresponding
interfaces.. Still it's not obvious what kind of interface is needed
for file-posix.c. Let's keep it public but add corresponding
assertions.

After this patch we'll convert functions in block/io.c to int64_t bytes
and offset parameters. We can assume that offset/bytes pair always
satisfy new restrictions, and make
corresponding assertions where needed. If we reach some offset/bytes
point in block/io.c missing bdrv_check_request() it is considered a
bug. As well, if block/io.c modifies a offset/bytes request, expanding
it more then aligning up to request_alignment, it's a bug too.

For all io requests except for discard we keep for now old restriction
of 32bit request length.

iotest 206 output error message changed, as now test disk size is
larger than new limit. Add one more test case with new maximum disk
size to cover too-big-L1 case.

Signed-off-by: Vladimir Sementsov-Ogievskiy 
---
 include/block/block.h| 10 +++
 include/block/block_int.h|  8 ++
 block.c  | 17 +++-
 block/file-posix.c   |  6 ++---
 block/io.c   | 51 +---
 tests/test-write-threshold.c |  4 +++
 tests/qemu-iotests/206   |  2 +-
 tests/qemu-iotests/206.out   |  6 +
 8 files changed, 90 insertions(+), 14 deletions(-)

diff --git a/include/block/block.h b/include/block/block.h
index c9d7c58765..5b81e33e94 100644
--- a/include/block/block.h
+++ b/include/block/block.h
@@ -142,6 +142,16 @@ typedef struct HDGeometry {
INT_MAX >> BDRV_SECTOR_BITS)
 #define BDRV_REQUEST_MAX_BYTES (BDRV_REQUEST_MAX_SECTORS << BDRV_SECTOR_BITS)
 
+/*
+ * We want allow aligning requests and disk length up to any 32bit alignment
+ * and don't afraid of overflow.
+ * To achieve it, and in the same time use some pretty number as maximum disk
+ * size, let's define maximum "length" (a limit for any offset/bytes request 
and
+ * for disk size) to be the greatest power of 2 less than INT64_MAX.
+ */
+#define BDRV_MAX_ALIGNMENT (1L << 30)
+#define BDRV_MAX_LENGTH (QEMU_ALIGN_DOWN(INT64_MAX, BDRV_MAX_ALIGNMENT))
+
 /*
  * Allocation status flags for bdrv_block_status() and friends.
  *
diff --git a/include/block/block_int.h b/include/block/block_int.h
index 95d9333be1..1eeafc118c 100644
--- a/include/block/block_int.h
+++ b/include/block/block_int.h
@@

[PATCH 2/4] block/io: bdrv_refresh_limits(): use ERRP_GUARD

This simplifies following commit.

Signed-off-by: Vladimir Sementsov-Ogievskiy 
---
 block/io.c | 7 +++
 1 file changed, 3 insertions(+), 4 deletions(-)

diff --git a/block/io.c b/block/io.c
index ec5e152bb7..3e91074c9f 100644
--- a/block/io.c
+++ b/block/io.c
@@ -135,10 +135,10 @@ static void bdrv_merge_limits(BlockLimits *dst, const 
BlockLimits *src)
 
 void bdrv_refresh_limits(BlockDriverState *bs, Error **errp)
 {
+ERRP_GUARD();
 BlockDriver *drv = bs->drv;
 BdrvChild *c;
 bool have_limits;
-Error *local_err = NULL;
 
 memset(>bl, 0, sizeof(bs->bl));
 
@@ -156,9 +156,8 @@ void bdrv_refresh_limits(BlockDriverState *bs, Error **errp)
 QLIST_FOREACH(c, >children, next) {
 if (c->role & (BDRV_CHILD_DATA | BDRV_CHILD_FILTERED | BDRV_CHILD_COW))
 {
-bdrv_refresh_limits(c->bs, _err);
-if (local_err) {
-error_propagate(errp, local_err);
+bdrv_refresh_limits(c->bs, errp);
+if (*errp) {
 return;
 }
 bdrv_merge_limits(>bl, >bs->bl);
-- 
2.21.3

[PATCH 0/4] block: prepare for 64bit

Hi all!

This is a preparation series for v4 of "[PATCH v3 00/17] 64bit
block-layer".

The whole thing is in 04, and 01-03 are small preparations.

Vladimir Sementsov-Ogievskiy (4):
  block/file-posix: fix workaround in raw_do_pwrite_zeroes()
  block/io: bdrv_refresh_limits(): use ERRP_GUARD
  block/io: bdrv_check_byte_request(): drop bdrv_is_inserted()
  block: introduce BDRV_MAX_LENGTH

 include/block/block.h| 10 ++
 include/block/block_int.h|  8 +
 block.c  | 17 -
 block/file-posix.c   |  7 ++--
 block/io.c   | 69 ++--
 tests/test-write-threshold.c |  4 +++
 tests/qemu-iotests/206   |  2 +-
 tests/qemu-iotests/206.out   |  6 
 8 files changed, 98 insertions(+), 25 deletions(-)

-- 
2.21.3

Re: [PATCH 2/8] hvf: Move common code out

On 03.12.20 19:42, Peter Collingbourne wrote:

On Thu, Dec 3, 2020 at 1:41 AM Roman Bolshakov wrote:

On Mon, Nov 30, 2020 at 04:00:11PM -0800, Peter Collingbourne wrote:

On Mon, Nov 30, 2020 at 3:18 PM Alexander Graf wrote:

On 01.12.20 00:01, Peter Collingbourne wrote:

On Mon, Nov 30, 2020 at 1:40 PM Alexander Graf wrote:

Hi Peter,

On 30.11.20 22:08, Peter Collingbourne wrote:

On Mon, Nov 30, 2020 at 12:56 PM Frank Yang wrote:

On Mon, Nov 30, 2020 at 12:34 PM Alexander Graf wrote:

Hi Frank,

Thanks for the update :). Your previous email nudged me into the right
direction. I previously had implemented WFI through the internal timer
framework which performed way worse.

Cool, glad it's helping. Also, Peter found out that the main thing keeping us from
just using cntpct_el0 on the host directly and compare with cval is that if we
sleep, cval is going to be much < cntpct_el0 by the sleep time. If we can get
either the architecture or macos to read out the sleep time then we might be able
to not have to use a poll interval either!

Along the way, I stumbled over a few issues though. For starters, the signal
mask for SIG_IPI was not set correctly, so while pselect() would exit, the
signal would never get delivered to the thread! For a fix, check out

https://patchew.org/QEMU/20201130030723.78326-1-ag...@csgraf.de/20201130030723.78326-4-ag...@csgraf.de/

Thanks, we'll take a look :)

Please also have a look at my latest stab at WFI emulation. It doesn't handle
WFE (that's only relevant in overcommitted scenarios). But it does handle WFI
and even does something similar to hlt polling, albeit not with an adaptive
threshold.

Sorry I'm not subscribed to qemu-devel (I'll subscribe in a bit) so
I'll reply to your patch here. You have:

+/* Set cpu->hvf->sleeping so that we get a
SIG_IPI signal. */
+cpu->hvf->sleeping = true;
+smp_mb();
+
+/* Bail out if we received an IRQ meanwhile */
+if (cpu->thread_kicked || (cpu->interrupt_request &
+(CPU_INTERRUPT_HARD | CPU_INTERRUPT_FIQ))) {
+cpu->hvf->sleeping = false;
+break;
+}
+
+/* nanosleep returns on signal, so we wake up on kick. */
+nanosleep(ts, NULL);

and then send the signal conditional on whether sleeping is true, but
I think this is racy. If the signal is sent after sleeping is set to
true but before entering nanosleep then I think it will be ignored and
we will miss the wakeup. That's why in my implementation I block IPI
on the CPU thread at startup and then use pselect to atomically
unblock and begin sleeping. The signal is sent unconditionally so
there's no need to worry about races between actually sleeping and the
"we think we're sleeping" state. It may lead to an extra wakeup but
that's better than missing it entirely.

Thanks a bunch for the comment! So the trick I was using here is to > > >>
modify the timespec from the kick function before sending the IPI
signal. That way, we know that either we are inside the sleep (where the
signal wakes it up) or we are outside the sleep (where timespec={} will
make it return immediately).

The only race I can think of is if nanosleep does calculations based on
the timespec and we happen to send the signal right there and then.

Yes that's the race I was thinking of. Admittedly it's a small window
but it's theoretically possible and part of the reason why pselect was
created.

The problem with blocking IPIs is basically what Frank was describing
earlier: How do you unset the IPI signal pending status? If the signal
is never delivered, how can pselect differentiate "signal from last time
is still pending" from "new signal because I got an IPI"?

In this case we would take the additional wakeup which should be
harmless since we will take the WFx exit again and put us in the
correct state. But that's a lot better than busy looping.

I'm not sure I follow. I'm thinking of the following scenario:

- trap into WFI handler
- go to sleep with blocked SIG_IPI
- SIG_IPI arrives, pselect() exits
- signal is still pending because it's blocked
- enter guest
- trap into WFI handler
- run pselect(), but it immediate exits because SIG_IPI is still pending

This was the loop I was seeing when running with SIG_IPI blocked. That's
part of the reason why I switched to a different model.

What I observe is that when returning from a pending signal pselect
consumes the signal (which is also consistent with my understanding of
what pselect does). That means that it doesn't matter if we take a
second WFx exit because once we reach the pselect in the second WFx
exit the signal will have been consumed by the pselect in the first
exit and we will just wait for the next one.

Aha! Thanks for the explanation. So, the first WFI in the series of
guest

Re: [PATCH v4 2/2] hw/virtio-pci Added AER capability.

2020-12-03 Thread Michael S. Tsirkin

On Thu, Dec 03, 2020 at 03:25:17PM +0200, Andrew Melnychenko wrote:
> From: Andrew 
> 
> Added AER capability for virtio-pci devices.
> Also added property for devices, by default AER is disabled.
> 
> Signed-off-by: Andrew Melnychenko 
> ---
>  hw/virtio/virtio-pci.c | 16 
>  hw/virtio/virtio-pci.h |  4 
>  2 files changed, 20 insertions(+)
> 
> diff --git a/hw/virtio/virtio-pci.c b/hw/virtio/virtio-pci.c
> index ceaa233129..f863f69ede 100644
> --- a/hw/virtio/virtio-pci.c
> +++ b/hw/virtio/virtio-pci.c
> @@ -1817,6 +1817,12 @@ static void virtio_pci_realize(PCIDevice *pci_dev, 
> Error **errp)
>   */
>  pci_set_word(pci_dev->config + pos + PCI_PM_PMC, 0x3);
>  
> +if (proxy->flags & VIRTIO_PCI_FLAG_AER) {
> +pcie_aer_init(pci_dev, PCI_ERR_VER, last_pcie_cap_offset,
> +  PCI_ERR_SIZEOF, NULL);
> +last_pcie_cap_offset += PCI_ERR_SIZEOF;
> +}
> +


What I dislike here is that the property can be added to
pci devices (not express) and will apparently succeed.
Pls add code to validate and fail init.


>  if (proxy->flags & VIRTIO_PCI_FLAG_INIT_DEVERR) {
>  /* Init error enabling flags */
>  pcie_cap_deverr_init(pci_dev);
> @@ -1858,7 +1864,15 @@ static void virtio_pci_realize(PCIDevice *pci_dev, 
> Error **errp)
>  
>  static void virtio_pci_exit(PCIDevice *pci_dev)
>  {
> +VirtIOPCIProxy *proxy = VIRTIO_PCI(pci_dev);
> +bool pcie_port = pci_bus_is_express(pci_get_bus(pci_dev)) &&
> + !pci_bus_is_root(pci_get_bus(pci_dev));
> +
>  msix_uninit_exclusive_bar(pci_dev);
> +if (proxy->flags & VIRTIO_PCI_FLAG_AER && pcie_port &&
> +pci_is_express(pci_dev)) {
> +pcie_aer_exit(pci_dev);
> +}
>  }
>  
>  static void virtio_pci_reset(DeviceState *qdev)
> @@ -1911,6 +1925,8 @@ static Property virtio_pci_properties[] = {
>  VIRTIO_PCI_FLAG_INIT_PM_BIT, true),
>  DEFINE_PROP_BIT("x-pcie-flr-init", VirtIOPCIProxy, flags,
>  VIRTIO_PCI_FLAG_INIT_FLR_BIT, true),
> +DEFINE_PROP_BIT("aer", VirtIOPCIProxy, flags,
> +VIRTIO_PCI_FLAG_AER_BIT, false),
>  DEFINE_PROP_END_OF_LIST(),
>  };
>
> diff --git a/hw/virtio/virtio-pci.h b/hw/virtio/virtio-pci.h
> index 06e2af12de..d7d5d403a9 100644
> --- a/hw/virtio/virtio-pci.h
> +++ b/hw/virtio/virtio-pci.h
> @@ -41,6 +41,7 @@ enum {
>  VIRTIO_PCI_FLAG_INIT_LNKCTL_BIT,
>  VIRTIO_PCI_FLAG_INIT_PM_BIT,
>  VIRTIO_PCI_FLAG_INIT_FLR_BIT,
> +VIRTIO_PCI_FLAG_AER_BIT,
>  };
>  
>  /* Need to activate work-arounds for buggy guests at vmstate load. */
> @@ -80,6 +81,9 @@ enum {
>  /* Init Function Level Reset capability */
>  #define VIRTIO_PCI_FLAG_INIT_FLR (1 << VIRTIO_PCI_FLAG_INIT_FLR_BIT)
>  
> +/* Advanced Error Reporting capability */
> +#define VIRTIO_PCI_FLAG_AER (1 << VIRTIO_PCI_FLAG_AER_BIT)
> +
>  typedef struct {
>  MSIMessage msg;
>  int virq;
> -- 
> 2.29.2

Re: [PATCH v12 00/19] Initial support for multi-process Qemu

2020-12-03 Thread Jagannathan Raman



> On Dec 3, 2020, at 4:53 AM, Stefan Hajnoczi  wrote:
> 
> On Tue, Dec 01, 2020 at 03:22:35PM -0500, Jagannathan Raman wrote:
>> This is the v12 of the patchset. Thank you very much for the
>> review of the v11 of the series.
> 
> I'm in favor of merging this for QEMU 6.0. The command-line interface
> has the x- prefix so QEMU is not committing to a stable interface.
> Changes needed to support additional device types or to switch to the
> vfio-user protocol can be made later.

Hi Stefan,

Thank you for much for reviewing the patches! :)

—
Jag

> 
> Jag, Elena, JJ: I suggest getting your GPG key to Peter Maydell so you
> can send multi-process QEMU pull requests.
> 
> Reviewed-by: Stefan Hajnoczi

Re: [PATCH v12 00/19] Initial support for multi-process Qemu

2020-12-03 Thread Peter Maydell

On Thu, 3 Dec 2020 at 09:51, Stefan Hajnoczi  wrote:
>
> On Tue, Dec 01, 2020 at 03:22:35PM -0500, Jagannathan Raman wrote:
> > This is the v12 of the patchset. Thank you very much for the
> > review of the v11 of the series.
>
> I'm in favor of merging this for QEMU 6.0. The command-line interface
> has the x- prefix so QEMU is not committing to a stable interface.
> Changes needed to support additional device types or to switch to the
> vfio-user protocol can be made later.
>
> Jag, Elena, JJ: I suggest getting your GPG key to Peter Maydell so you
> can send multi-process QEMU pull requests.

I would prefer to see this going through the tree of an
established QEMU developer who's already sending pullrequests,
at least initially.

thanks
-- PMM

Re: [PATCH] tests/acceptance: test hot(un)plug of ccw devices

2020-12-03 Thread Philippe Mathieu-Daudé

On 12/3/20 7:14 PM, Thomas Huth wrote:
> On 03/12/2020 19.11, Philippe Mathieu-Daudé wrote:
>> On 12/3/20 6:22 PM, Thomas Huth wrote:
>>> On 03/12/2020 16.39, Cornelia Huck wrote:
 Hotplug a virtio-net-ccw device, and then hotunplug it again.
>>>
>>> Good idea! ... is it also possible with a pci device?
>>>
 Signed-off-by: Cornelia Huck 
 ---
>> ...
 +exec_command_and_wait_for_pattern(self, 'dmesg | tail -n 1', 
 'CRW')
>>>
>>> That looks like it could be a little bit racy ... what if the kernel outputs
>>> another log message by chance, so that tail -n 1 reports that instead.
>>>
>>> I think it would be better to clear the dmesg log ("dmesg -c") before
>>> plugging, and then look at all the new output of "dmesg" without using
>>> "tail" afterwards.
>>
>> "dmesg --follow"?
> 
> Then you'd need to send control-c afterwards to stop it? ... not sure
> whether that's such a better solution...

Oh indeed, I missed there is further interaction.

Thanks,

Phil.

[PATCH v4] Fix build with 64 bits time_t

2020-12-03 Thread Fabrice Fontaine

time element is deprecated on new input_event structure in kernel's
input.h [1]

This will avoid the following build failure:

hw/input/virtio-input-host.c: In function 'virtio_input_host_handle_status':
hw/input/virtio-input-host.c:198:28: error: 'struct input_event' has no member 
named 'time'
  198 | if (gettimeofday(, NULL)) {
  |^

Fixes:
 - 
http://autobuild.buildroot.org/results/a538167e288c14208d557cd45446df86d3d599d5
 - 
http://autobuild.buildroot.org/results/efd4474fb4b6c0ce0ab3838ce130429c51e43bbb

[1] 
https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit?id=152194fe9c3f

Signed-off-by: Fabrice Fontaine 
---
Changes v3 -> v4 (after review of Gerd Hoffmann):
 - Include 

Changes v2 -> v3 (after review of Gerd Hoffmann):
 - Replace include on  by
   "standard-headers/linux/input.h" to try to fix build on rhel-7

Changes v1 -> v2 (after review of Michael S. Tsirkin):
 - Drop define of input_event_{sec,usec} as it is already done in
   include/standard-headers/linux/input.h

 contrib/vhost-user-input/main.c | 8 ++--
 hw/input/virtio-input-host.c| 5 -
 2 files changed, 10 insertions(+), 3 deletions(-)

diff --git a/contrib/vhost-user-input/main.c b/contrib/vhost-user-input/main.c
index 6020c6f33a..b27b12733b 100644
--- a/contrib/vhost-user-input/main.c
+++ b/contrib/vhost-user-input/main.c
@@ -7,13 +7,14 @@
 #include "qemu/osdep.h"
 
 #include 
-#include 
+#include 
 
 #include "qemu/iov.h"
 #include "qemu/bswap.h"
 #include "qemu/sockets.h"
 #include "contrib/libvhost-user/libvhost-user.h"
 #include "contrib/libvhost-user/libvhost-user-glib.h"
+#include "standard-headers/linux/input.h"
 #include "standard-headers/linux/virtio_input.h"
 #include "qapi/error.h"
 
@@ -115,13 +116,16 @@ vi_evdev_watch(VuDev *dev, int condition, void *data)
 static void vi_handle_status(VuInput *vi, virtio_input_event *event)
 {
 struct input_event evdev;
+struct timeval tval;
 int rc;
 
-if (gettimeofday(, NULL)) {
+if (gettimeofday(, NULL)) {
 perror("vi_handle_status: gettimeofday");
 return;
 }
 
+evdev.input_event_sec = tval.tv_sec;
+evdev.input_event_usec = tval.tv_usec;
 evdev.type = le16toh(event->type);
 evdev.code = le16toh(event->code);
 evdev.value = le32toh(event->value);
diff --git a/hw/input/virtio-input-host.c b/hw/input/virtio-input-host.c
index 85daf73f1a..137efba57b 100644
--- a/hw/input/virtio-input-host.c
+++ b/hw/input/virtio-input-host.c
@@ -193,13 +193,16 @@ static void virtio_input_host_handle_status(VirtIOInput 
*vinput,
 {
 VirtIOInputHost *vih = VIRTIO_INPUT_HOST(vinput);
 struct input_event evdev;
+struct timeval tval;
 int rc;
 
-if (gettimeofday(, NULL)) {
+if (gettimeofday(, NULL)) {
 perror("virtio_input_host_handle_status: gettimeofday");
 return;
 }
 
+evdev.input_event_sec = tval.tv_sec;
+evdev.input_event_usec = tval.tv_usec;
 evdev.type = le16_to_cpu(event->type);
 evdev.code = le16_to_cpu(event->code);
 evdev.value = le32_to_cpu(event->value);
-- 
2.29.2

Re: [PATCH 06/15] arc: TCG instruction definitions

2020-12-03 Thread Cupertino Miranda

Seems perfectly doable, no objections.
It will probably take me longer to integrate it in the build system
then to get the scripts ready.
I will start by placing the ruby tool and documentation there, and
later on, integrate it in the build system.

Hope that you get re-motivated to review our patches. No pressure though ;-)
Very valuable comments, lots of improvements happening here.

On Thu, Dec 3, 2020 at 7:34 PM Richard Henderson
 wrote:
>
> On 12/3/20 10:54 AM, Cupertino Miranda wrote:
> > Our generation tool has different levels of verbosity, expressing
> > instruction semantics from a pattern level up to what it is shown in
> >  as comments, which is later converted to TCG format.
> > For QEMU purposes I would say that input format should be what is
> > shown as comments in  file.
>
> That seems reasonable.
>
> > Also, as is, the generator is done in Ruby, and to be honest, would
> > not be very easy to redo in some other language. Would this be
> > considered a problem if we would include it as Ruby code ?
> > IMO execution of these scripts should not be a step of build process
> > but rather a development one, such that Ruby does not become a
> > requirement to build QEmu.
>
> It's not ideal -- I would have preferred python or C -- but I won't object.
>
> At minimum, I would expect build system changes that detects ruby support in
> the system, and a manual build rule that rebuilds the generated files.  This
> build + check-in process would want documenting in target/arc/README or
> something.  If there are any ruby packages required apart from the base
> language, this should be documented as well (I know nothing about ruby myself,
> just guessing based on what happens with python and perl).
>
> Even better would be build system changes that, if ruby is installed runs the
> generator, and only fall-back to the checked-in files if ruby is missing.
>
> In this way, anyone who wants to modify the code generator would merely have 
> to
> install the ruby packages on their system, but they would not be required for 
> a
> non-ARC developer to build.
>
>
> r~

Re: [PATCH 06/15] arc: TCG instruction definitions

On 12/3/20 10:54 AM, Cupertino Miranda wrote:
> Our generation tool has different levels of verbosity, expressing
> instruction semantics from a pattern level up to what it is shown in
>  as comments, which is later converted to TCG format.
> For QEMU purposes I would say that input format should be what is
> shown as comments in  file.

That seems reasonable.

> Also, as is, the generator is done in Ruby, and to be honest, would
> not be very easy to redo in some other language. Would this be
> considered a problem if we would include it as Ruby code ?
> IMO execution of these scripts should not be a step of build process
> but rather a development one, such that Ruby does not become a
> requirement to build QEmu.

It's not ideal -- I would have preferred python or C -- but I won't object.

At minimum, I would expect build system changes that detects ruby support in
the system, and a manual build rule that rebuilds the generated files.  This
build + check-in process would want documenting in target/arc/README or
something.  If there are any ruby packages required apart from the base
language, this should be documented as well (I know nothing about ruby myself,
just guessing based on what happens with python and perl).

Even better would be build system changes that, if ruby is installed runs the
generator, and only fall-back to the checked-in files if ruby is missing.

In this way, anyone who wants to modify the code generator would merely have to
install the ruby packages on their system, but they would not be required for a
non-ARC developer to build.

r~

Re: [PATCH v12 00/19] Initial support for multi-process Qemu

2020-12-03 Thread Elena Ufimtseva

On Thu, Dec 03, 2020 at 09:14:04AM +, Stefan Hajnoczi wrote:
> On Tue, Dec 01, 2020 at 03:22:35PM -0500, Jagannathan Raman wrote:
> > This is the v12 of the patchset. Thank you very much for the
> > review of the v11 of the series.
> 
> I'm in favor of merging this for QEMU 6.0. The command-line interface
> has the x- prefix so QEMU is not committing to a stable interface.
> Changes needed to support additional device types or to switch to the
> vfio-user protocol can be made later.
> 

Woot! Thank you Stefan!

> Jag, Elena, JJ: I suggest getting your GPG key to Peter Maydell so you
> can send multi-process QEMU pull requests.
> 
> Reviewed-by: Stefan Hajnoczi 

In progress.
Do we need to add some tagging for the PULL patches?
Should we include the git repo and have the proper tag as well?

Elena

Re: [PATCH v12 14/14] block: apply COR-filter to block-stream jobs


02.12.2020 21:18, Andrey Shinkevich wrote:


On 27.10.2020 21:24, Andrey Shinkevich wrote:


On 27.10.2020 20:57, Vladimir Sementsov-Ogievskiy wrote:

27.10.2020 20:48, Andrey Shinkevich wrote:


On 27.10.2020 19:13, Vladimir Sementsov-Ogievskiy wrote:

22.10.2020 21:13, Andrey Shinkevich wrote:

This patch completes the series with the COR-filter insertion for
block-stream operations. Adding the filter makes it possible for copied
regions to be discarded in backing files during the block-stream job,
what will reduce the disk overuse.
The COR-filter insertion incurs changes in the iotests case
245:test_block_stream_4 that reopens the backing chain during a
block-stream job. There are changes in the iotests #030 as well.
The iotests case 030:test_stream_parallel was deleted due to multiple
conflicts between the concurrent job operations over the same backing
chain. The base backing node for one job is the top node for another
job. It may change due to the filter node inserted into the backing
chain while both jobs are running. Another issue is that the parts of
the backing chain are being frozen by the running job and may not be
changed by the concurrent job when needed. The concept of the parallel
jobs with common nodes is considered vital no more.

Signed-off-by: Andrey Shinkevich 
---
  block/stream.c | 98 ++
  tests/qemu-iotests/030 | 51 +++-
  tests/qemu-iotests/030.out |  4 +-
  tests/qemu-iotests/141.out |  2 +-
  tests/qemu-iotests/245 | 22 +++
  5 files changed, 87 insertions(+), 90 deletions(-)

diff --git a/block/stream.c b/block/stream.c



[...]


+    s = block_job_create(job_id, _job_driver, NULL, cor_filter_bs,
+ BLK_PERM_CONSISTENT_READ,
+ basic_flags | BLK_PERM_WRITE | BLK_PERM_GRAPH_MOD,


I think that BLK_PERM_GRAPH_MOD is something outdated. We have chain-feeze, 
what BLK_PERM_GRAPH_MOD adds to it? I don't know, and doubt that somebody knows.



That is true for the commit/mirror jobs also. If we agree to remove the flag 
BLK_PERM_GRAPH_MOD from all these jobs, it will be made in a separate series, 
won't it?


Hmm. At least, let's not implement new logic based on BLK_PERM_GRAPH_MOD. In 
original code it's only block_job_create's perm, not in shared_perm, not 
somewhere else.. So, if we keep it, let's keep it as is: only in perm in 
block_job_create, not implementing additional perm/shared_perm logic.



With @perm=0 in the block_job_add_bdrv(>common, "active node"...), it won't.




   speed, creation_flags, NULL, NULL, errp);
  if (!s) {
  goto fail;
  }
+    /*
+ * Prevent concurrent jobs trying to modify the graph structure here, we
+ * already have our own plans. Also don't allow resize as the image size is
+ * queried only at the job start and then cached.
+ */
+    if (block_job_add_bdrv(>common, "active node", bs,
+   basic_flags | BLK_PERM_GRAPH_MOD,


why not 0, like for other nodes? We don't use this BdrvChild at all, why to 
requre permissions?



Yes, '0' s right.


+   basic_flags | BLK_PERM_WRITE, _abort)) {
+    goto fail;
+    }
+
  /* Block all intermediate nodes between bs and base, because 



[...]


diff --git a/tests/qemu-iotests/030 b/tests/qemu-iotests/030
index dcb4b5d..0064590 100755
--- a/tests/qemu-iotests/030
+++ b/tests/qemu-iotests/030
@@ -227,61 +227,20 @@ class TestParallelOps(iotests.QMPTestCase):
  for img in self.imgs:
  os.remove(img)
-    # Test that it's possible to run several block-stream operations
-    # in parallel in the same snapshot chain
-    @unittest.skipIf(os.environ.get('QEMU_CHECK_BLOCK_AUTO'), 'disabled in CI')
-    def test_stream_parallel(self):


Didn't we agree to add "bottom" paramter to qmp? Than this test-case can be 
rewritten using
node-names and new "bottom" stream argument.



The QMP new "bottom" option is passed to the COR-driver. It is done withing the 
stream-job code. So, it works.


Yes. But we also want "bottom" option for stream-job, and deprecate "base" option. Then 
we can rewrite the test using "bottom" option, all should work





I guess it will not help for the whole test. Particularly, there is an issue 
with freezing the child link to COR-filter of the cuncurrent job, then it fails 
to finish first.


We should not have such frozen link, as our bottom node should be above 
COR-filter of concurrent job.




The bdrv_freeze_backing_chain(bs, above_base, errp) does that job. Max insisted 
on keeping it.

Andrey


I have kept the test_stream_parallel() deleted in the coming v13 because it was 
agreed to make the above_base node frozen. With this, the test case can not 
pass. It is also true because the operations over the COR-filter node are 
blocked for the parallel jobs.

Andrey



--
Best regards,
Vladimir

[PATCH v15 1/4] usb: Add versal-usb2-ctrl-regs module

This module emulates control registers of versal usb2 controller, this is added
just to make guest happy. In general this module would control the phy-reset
signal from usb controller, data coherency of the transactions, signals
the host system errors received from controller.

Signed-off-by: Sai Pavan Boddu 
Signed-off-by: Vikram Garhwal 
Reviewed-by: Edgar E. Iglesias 
Reviewed-by: Peter Maydell 
---
 hw/usb/meson.build  |   1 +
 hw/usb/xlnx-versal-usb2-ctrl-regs.c | 229 
 include/hw/usb/xlnx-versal-usb2-ctrl-regs.h |  45 ++
 3 files changed, 275 insertions(+)
 create mode 100644 hw/usb/xlnx-versal-usb2-ctrl-regs.c
 create mode 100644 include/hw/usb/xlnx-versal-usb2-ctrl-regs.h

diff --git a/hw/usb/meson.build b/hw/usb/meson.build
index 934e4fa..ecfec0a 100644
--- a/hw/usb/meson.build
+++ b/hw/usb/meson.build
@@ -30,6 +30,7 @@ softmmu_ss.add(when: 'CONFIG_USB_DWC2', if_true: 
files('hcd-dwc2.c'))
 softmmu_ss.add(when: 'CONFIG_TUSB6010', if_true: files('tusb6010.c'))
 softmmu_ss.add(when: 'CONFIG_IMX', if_true: files('chipidea.c'))
 softmmu_ss.add(when: 'CONFIG_IMX_USBPHY', if_true: files('imx-usb-phy.c'))
+specific_ss.add(when: 'CONFIG_XLNX_VERSAL', if_true: 
files('xlnx-versal-usb2-ctrl-regs.c'))
 
 # emulated usb devices
 softmmu_ss.add(when: 'CONFIG_USB', if_true: files('dev-hub.c'))
diff --git a/hw/usb/xlnx-versal-usb2-ctrl-regs.c 
b/hw/usb/xlnx-versal-usb2-ctrl-regs.c
new file mode 100644
index 000..9eaa59e
--- /dev/null
+++ b/hw/usb/xlnx-versal-usb2-ctrl-regs.c
@@ -0,0 +1,229 @@
+/*
+ * QEMU model of the VersalUsb2CtrlRegs Register control/Status block for
+ * USB2.0 controller
+ *
+ * This module should control phy_reset, permanent device plugs, frame length
+ * time adjust & setting of coherency paths. None of which are emulated in
+ * present model.
+ *
+ * Copyright (c) 2020 Xilinx Inc. Vikram Garhwal 
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to 
deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 
FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+#include "qemu/osdep.h"
+#include "hw/sysbus.h"
+#include "hw/irq.h"
+#include "hw/register.h"
+#include "qemu/bitops.h"
+#include "qemu/log.h"
+#include "qom/object.h"
+#include "migration/vmstate.h"
+#include "hw/usb/xlnx-versal-usb2-ctrl-regs.h"
+
+#ifndef XILINX_VERSAL_USB2_CTRL_REGS_ERR_DEBUG
+#define XILINX_VERSAL_USB2_CTRL_REGS_ERR_DEBUG 0
+#endif
+
+REG32(BUS_FILTER, 0x30)
+FIELD(BUS_FILTER, BYPASS, 0, 4)
+REG32(PORT, 0x34)
+FIELD(PORT, HOST_SMI_BAR_WR, 4, 1)
+FIELD(PORT, HOST_SMI_PCI_CMD_REG_WR, 3, 1)
+FIELD(PORT, HOST_MSI_ENABLE, 2, 1)
+FIELD(PORT, PWR_CTRL_PRSNT, 1, 1)
+FIELD(PORT, HUB_PERM_ATTACH, 0, 1)
+REG32(JITTER_ADJUST, 0x38)
+FIELD(JITTER_ADJUST, FLADJ, 0, 6)
+REG32(BIGENDIAN, 0x40)
+FIELD(BIGENDIAN, ENDIAN_GS, 0, 1)
+REG32(COHERENCY, 0x44)
+FIELD(COHERENCY, USB_COHERENCY, 0, 1)
+REG32(XHC_BME, 0x48)
+FIELD(XHC_BME, XHC_BME, 0, 1)
+REG32(REG_CTRL, 0x60)
+FIELD(REG_CTRL, SLVERR_ENABLE, 0, 1)
+REG32(IR_STATUS, 0x64)
+FIELD(IR_STATUS, HOST_SYS_ERR, 1, 1)
+FIELD(IR_STATUS, ADDR_DEC_ERR, 0, 1)
+REG32(IR_MASK, 0x68)
+FIELD(IR_MASK, HOST_SYS_ERR, 1, 1)
+FIELD(IR_MASK, ADDR_DEC_ERR, 0, 1)
+REG32(IR_ENABLE, 0x6c)
+FIELD(IR_ENABLE, HOST_SYS_ERR, 1, 1)
+FIELD(IR_ENABLE, ADDR_DEC_ERR, 0, 1)
+REG32(IR_DISABLE, 0x70)
+FIELD(IR_DISABLE, HOST_SYS_ERR, 1, 1)
+FIELD(IR_DISABLE, ADDR_DEC_ERR, 0, 1)
+REG32(USB3, 0x78)
+
+static void ir_update_irq(VersalUsb2CtrlRegs *s)
+{
+bool pending = s->regs[R_IR_STATUS] & ~s->regs[R_IR_MASK];
+qemu_set_irq(s->irq_ir, pending);
+}
+
+static void ir_status_postw(RegisterInfo *reg, uint64_t val64)
+{
+VersalUsb2CtrlRegs *s = XILINX_VERSAL_USB2_CTRL_REGS(reg->opaque);
+/*
+ * TODO: This should also clear USBSTS.HSE field in USB XHCI register.
+ * May be combine both the modules.
+ */
+ir_update_irq(s);
+}
+
+static uint64_t ir_enable_prew(RegisterInfo *reg, uint64_t val64)
+{
+VersalUsb2CtrlRegs *s =

[PATCH v15 4/4] arm: xlnx-versal: Connect usb to virt-versal

From: Vikram Garhwal 

Connect VersalUsb2 subsystem to xlnx-versal SOC, its placed
in iou of lpd domain and configure it as dual port host controller.
Add the respective guest dts nodes for "xlnx-versal-virt" machine.

Signed-off-by: Vikram Garhwal 
Signed-off-by: Sai Pavan Boddu 
Reviewed-by: Edgar E. Iglesias 
---
 hw/arm/xlnx-versal-virt.c| 55 
 hw/arm/xlnx-versal.c | 26 +
 include/hw/arm/xlnx-versal.h |  9 
 3 files changed, 90 insertions(+)

diff --git a/hw/arm/xlnx-versal-virt.c b/hw/arm/xlnx-versal-virt.c
index ee12822..8482cd6 100644
--- a/hw/arm/xlnx-versal-virt.c
+++ b/hw/arm/xlnx-versal-virt.c
@@ -39,6 +39,8 @@ struct VersalVirt {
 uint32_t ethernet_phy[2];
 uint32_t clk_125Mhz;
 uint32_t clk_25Mhz;
+uint32_t usb;
+uint32_t dwc;
 } phandle;
 struct arm_boot_info binfo;
 
@@ -66,6 +68,8 @@ static void fdt_create(VersalVirt *s)
 s->phandle.clk_25Mhz = qemu_fdt_alloc_phandle(s->fdt);
 s->phandle.clk_125Mhz = qemu_fdt_alloc_phandle(s->fdt);
 
+s->phandle.usb = qemu_fdt_alloc_phandle(s->fdt);
+s->phandle.dwc = qemu_fdt_alloc_phandle(s->fdt);
 /* Create /chosen node for load_dtb.  */
 qemu_fdt_add_subnode(s->fdt, "/chosen");
 
@@ -148,6 +152,56 @@ static void fdt_add_timer_nodes(VersalVirt *s)
  compat, sizeof(compat));
 }
 
+static void fdt_add_usb_xhci_nodes(VersalVirt *s)
+{
+const char clocknames[] = "bus_clk\0ref_clk";
+const char irq_name[] = "dwc_usb3";
+const char compatVersalDWC3[] = "xlnx,versal-dwc3";
+const char compatDWC3[] = "snps,dwc3";
+char *name = g_strdup_printf("/usb@%" PRIx32, MM_USB2_CTRL_REGS);
+
+qemu_fdt_add_subnode(s->fdt, name);
+qemu_fdt_setprop(s->fdt, name, "compatible",
+ compatVersalDWC3, sizeof(compatVersalDWC3));
+qemu_fdt_setprop_sized_cells(s->fdt, name, "reg",
+ 2, MM_USB2_CTRL_REGS,
+ 2, MM_USB2_CTRL_REGS_SIZE);
+qemu_fdt_setprop(s->fdt, name, "clock-names",
+ clocknames, sizeof(clocknames));
+qemu_fdt_setprop_cells(s->fdt, name, "clocks",
+   s->phandle.clk_25Mhz, s->phandle.clk_125Mhz);
+qemu_fdt_setprop(s->fdt, name, "ranges", NULL, 0);
+qemu_fdt_setprop_cell(s->fdt, name, "#address-cells", 2);
+qemu_fdt_setprop_cell(s->fdt, name, "#size-cells", 2);
+qemu_fdt_setprop_cell(s->fdt, name, "phandle", s->phandle.usb);
+g_free(name);
+
+name = g_strdup_printf("/usb@%" PRIx32 "/dwc3@%" PRIx32,
+   MM_USB2_CTRL_REGS, MM_USB_0);
+qemu_fdt_add_subnode(s->fdt, name);
+qemu_fdt_setprop(s->fdt, name, "compatible",
+ compatDWC3, sizeof(compatDWC3));
+qemu_fdt_setprop_sized_cells(s->fdt, name, "reg",
+ 2, MM_USB_0, 2, MM_USB_0_SIZE);
+qemu_fdt_setprop(s->fdt, name, "interrupt-names",
+ irq_name, sizeof(irq_name));
+qemu_fdt_setprop_cells(s->fdt, name, "interrupts",
+   GIC_FDT_IRQ_TYPE_SPI, VERSAL_USB0_IRQ_0,
+   GIC_FDT_IRQ_FLAGS_LEVEL_HI);
+qemu_fdt_setprop_cell(s->fdt, name,
+  "snps,quirk-frame-length-adjustment", 0x20);
+qemu_fdt_setprop_cells(s->fdt, name, "#stream-id-cells", 1);
+qemu_fdt_setprop_string(s->fdt, name, "dr_mode", "host");
+qemu_fdt_setprop_string(s->fdt, name, "phy-names", "usb3-phy");
+qemu_fdt_setprop(s->fdt, name, "snps,dis_u2_susphy_quirk", NULL, 0);
+qemu_fdt_setprop(s->fdt, name, "snps,dis_u3_susphy_quirk", NULL, 0);
+qemu_fdt_setprop(s->fdt, name, "snps,refclk_fladj", NULL, 0);
+qemu_fdt_setprop(s->fdt, name, "snps,mask_phy_reset", NULL, 0);
+qemu_fdt_setprop_cell(s->fdt, name, "phandle", s->phandle.dwc);
+qemu_fdt_setprop_string(s->fdt, name, "maximum-speed", "high-speed");
+g_free(name);
+}
+
 static void fdt_add_uart_nodes(VersalVirt *s)
 {
 uint64_t addrs[] = { MM_UART1, MM_UART0 };
@@ -515,6 +569,7 @@ static void versal_virt_init(MachineState *machine)
 fdt_add_gic_nodes(s);
 fdt_add_timer_nodes(s);
 fdt_add_zdma_nodes(s);
+fdt_add_usb_xhci_nodes(s);
 fdt_add_sd_nodes(s);
 fdt_add_rtc_node(s);
 fdt_add_cpu_nodes(s, psci_conduit);
diff --git a/hw/arm/xlnx-versal.c b/hw/arm/xlnx-versal.c
index 12ba6c4..b077716 100644
--- a/hw/arm/xlnx-versal.c
+++ b/hw/arm/xlnx-versal.c
@@ -145,6 +145,31 @@ static void versal_create_uarts(Versal *s, qemu_irq *pic)
 }
 }
 
+static void versal_create_usbs(Versal *s, qemu_irq *pic)
+{
+DeviceState *dev;
+MemoryRegion *mr;
+
+object_initialize_child(OBJECT(s), "usb2", >lpd.iou.usb,
+TYPE_XILINX_VERSAL_USB2);
+dev = DEVICE(>lpd.iou.usb);
+
+object_property_set_link(OBJECT(dev), "dma", OBJECT(>mr_ps),
+

[PATCH v15 0/4] Add Versal usb model

This patch series adds dwc3 usb controller to versal SOC.

Changes for V2:
Make XHCIState non-qom
Use container_of functions for retriving pci device instance
Initialize the AddressSpace pointer in PATCH 1/3 itself Changes for V3:
Convert XHCIState to TYPE_DEVICE and register as a child of XHCIPciState.
Changes for V4:
Add DWC3 usb controller
Add versal, usb2-reg module
Connect sysbus xhci to versal virt board Changes for V5:
Add extra info about dwc3 and usb2_regs devices in commit messages
Use only one irq for versal usb controller
Mark the unimplemented registers in dwc3 controller
Rebase the patches over master.
Move few mispalced contents from patch 2/7 to 3/7.
Fix the author names in the header.
Move the inclusion of "sysemu/dma.h" from patch 1/7 to 3/7 Changes for V6:
Fixed style issue in patch 7/7
Renamed usb2_reg model to VersalUsb2CtrlReg
Fixed author in headers
Changes for V7:
Create a usb structure to keep things clean
Remove the repeated patch in the series i.e 5/7 Changes for V8:
Fix vmstate sturcts to support cross version migration.
Changes for V9:
Added recommended changes to fix vmstate migration.
Fixed commit message on 3/7.
Changes for V10:
use vmstate_post_load avaialble in VMStateDescription
tested vmstate cross migration.
Changes for V11:
Removed the patches which got accepted
Changed object name "USB2Reg" -> "ctrl"
Updated Subject line on cover letter.
Changes for V12:
Use reset class for usb2-ctrl-regs module
Move the few register update to realize
Marked registers which are unimplemented in regapi model
Changs for V13:
Add usb subsystem for xilinx devices,
Memory Map xhci internally to dwc3 device,
Add respective changes to connect VersalUsb2 subsystem to xilinx-versal.
Changes for V14:
Fix "make check" failures, by specifing right dependencies in Kconfig for
usb_dwc3 device.
Changes for V15:
Fixed commit message in Patch 4/4, and addressed code style issues.
In VersalDWC3 model added vmstate struct.

Sai Pavan Boddu (2):
  usb: Add versal-usb2-ctrl-regs module
  usb: xlnx-usb-subsystem: Add xilinx usb subsystem

Vikram Garhwal (2):
  usb: Add DWC3 model
  arm: xlnx-versal: Connect usb to virt-versal

 hw/arm/xlnx-versal-virt.c   |  55 +++
 hw/arm/xlnx-versal.c|  26 ++
 hw/usb/Kconfig  |  10 +
 hw/usb/hcd-dwc3.c   | 689 
 hw/usb/meson.build  |   3 +
 hw/usb/xlnx-usb-subsystem.c |  94 
 hw/usb/xlnx-versal-usb2-ctrl-regs.c | 229 +
 include/hw/arm/xlnx-versal.h|   9 +
 include/hw/usb/hcd-dwc3.h   |  55 +++
 include/hw/usb/xlnx-usb-subsystem.h |  45 ++
 include/hw/usb/xlnx-versal-usb2-ctrl-regs.h |  45 ++
 11 files changed, 1260 insertions(+)
 create mode 100644 hw/usb/hcd-dwc3.c
 create mode 100644 hw/usb/xlnx-usb-subsystem.c
 create mode 100644 hw/usb/xlnx-versal-usb2-ctrl-regs.c
 create mode 100644 include/hw/usb/hcd-dwc3.h
 create mode 100644 include/hw/usb/xlnx-usb-subsystem.h
 create mode 100644 include/hw/usb/xlnx-versal-usb2-ctrl-regs.h

-- 
2.7.4

[PATCH v15 2/4] usb: Add DWC3 model

From: Vikram Garhwal 

This patch adds skeleton model of dwc3 usb controller attached to
xhci-sysbus device. It defines global register space of DWC3 controller,
global registers control the AXI/AHB interfaces properties, external FIFO
support and event count support. All of which are unimplemented at
present,we are only supporting core reset and read of ID register.

Signed-off-by: Vikram Garhwal 
Signed-off-by: Sai Pavan Boddu 
Reviewed-by: Edgar E. Iglesias 
---
 hw/usb/Kconfig|   5 +
 hw/usb/hcd-dwc3.c | 689 ++
 hw/usb/meson.build|   1 +
 include/hw/usb/hcd-dwc3.h |  55 
 4 files changed, 750 insertions(+)
 create mode 100644 hw/usb/hcd-dwc3.c
 create mode 100644 include/hw/usb/hcd-dwc3.h

diff --git a/hw/usb/Kconfig b/hw/usb/Kconfig
index 7fbae18..56da78a 100644
--- a/hw/usb/Kconfig
+++ b/hw/usb/Kconfig
@@ -112,3 +112,8 @@ config IMX_USBPHY
 bool
 default y
 depends on USB
+
+config USB_DWC3
+bool
+select USB_XHCI_SYSBUS
+select REGISTER
diff --git a/hw/usb/hcd-dwc3.c b/hw/usb/hcd-dwc3.c
new file mode 100644
index 000..d547d05
--- /dev/null
+++ b/hw/usb/hcd-dwc3.c
@@ -0,0 +1,689 @@
+/*
+ * QEMU model of the USB DWC3 host controller emulation.
+ *
+ * This model defines global register space of DWC3 controller. Global
+ * registers control the AXI/AHB interfaces properties, external FIFO support
+ * and event count support. All of which are unimplemented at present. We are
+ * only supporting core reset and read of ID register.
+ *
+ * Copyright (c) 2020 Xilinx Inc. Vikram Garhwal
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to 
deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 
FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+#include "qemu/osdep.h"
+#include "hw/sysbus.h"
+#include "hw/register.h"
+#include "qemu/bitops.h"
+#include "qemu/log.h"
+#include "qom/object.h"
+#include "migration/vmstate.h"
+#include "hw/qdev-properties.h"
+#include "hw/usb/hcd-dwc3.h"
+#include "qapi/error.h"
+
+#ifndef USB_DWC3_ERR_DEBUG
+#define USB_DWC3_ERR_DEBUG 0
+#endif
+
+#define HOST_MODE   1
+#define FIFO_LEN 0x1000
+
+REG32(GSBUSCFG0, 0x00)
+FIELD(GSBUSCFG0, DATRDREQINFO, 28, 4)
+FIELD(GSBUSCFG0, DESRDREQINFO, 24, 4)
+FIELD(GSBUSCFG0, DATWRREQINFO, 20, 4)
+FIELD(GSBUSCFG0, DESWRREQINFO, 16, 4)
+FIELD(GSBUSCFG0, RESERVED_15_12, 12, 4)
+FIELD(GSBUSCFG0, DATBIGEND, 11, 1)
+FIELD(GSBUSCFG0, DESBIGEND, 10, 1)
+FIELD(GSBUSCFG0, RESERVED_9_8, 8, 2)
+FIELD(GSBUSCFG0, INCR256BRSTENA, 7, 1)
+FIELD(GSBUSCFG0, INCR128BRSTENA, 6, 1)
+FIELD(GSBUSCFG0, INCR64BRSTENA, 5, 1)
+FIELD(GSBUSCFG0, INCR32BRSTENA, 4, 1)
+FIELD(GSBUSCFG0, INCR16BRSTENA, 3, 1)
+FIELD(GSBUSCFG0, INCR8BRSTENA, 2, 1)
+FIELD(GSBUSCFG0, INCR4BRSTENA, 1, 1)
+FIELD(GSBUSCFG0, INCRBRSTENA, 0, 1)
+REG32(GSBUSCFG1, 0x04)
+FIELD(GSBUSCFG1, RESERVED_31_13, 13, 19)
+FIELD(GSBUSCFG1, EN1KPAGE, 12, 1)
+FIELD(GSBUSCFG1, PIPETRANSLIMIT, 8, 4)
+FIELD(GSBUSCFG1, RESERVED_7_0, 0, 8)
+REG32(GTXTHRCFG, 0x08)
+FIELD(GTXTHRCFG, RESERVED_31, 31, 1)
+FIELD(GTXTHRCFG, RESERVED_30, 30, 1)
+FIELD(GTXTHRCFG, USBTXPKTCNTSEL, 29, 1)
+FIELD(GTXTHRCFG, RESERVED_28, 28, 1)
+FIELD(GTXTHRCFG, USBTXPKTCNT, 24, 4)
+FIELD(GTXTHRCFG, USBMAXTXBURSTSIZE, 16, 8)
+FIELD(GTXTHRCFG, RESERVED_15, 15, 1)
+FIELD(GTXTHRCFG, RESERVED_14, 14, 1)
+FIELD(GTXTHRCFG, RESERVED_13_11, 11, 3)
+FIELD(GTXTHRCFG, RESERVED_10_0, 0, 11)
+REG32(GRXTHRCFG, 0x0c)
+FIELD(GRXTHRCFG, RESERVED_31_30, 30, 2)
+FIELD(GRXTHRCFG, USBRXPKTCNTSEL, 29, 1)
+FIELD(GRXTHRCFG, RESERVED_28, 28, 1)
+FIELD(GRXTHRCFG, USBRXPKTCNT, 24, 4)
+FIELD(GRXTHRCFG, USBMAXRXBURSTSIZE, 19, 5)
+FIELD(GRXTHRCFG, RESERVED_18_16, 16, 3)
+FIELD(GRXTHRCFG, RESERVED_15, 15, 1)
+FIELD(GRXTHRCFG, RESERVED_14_13, 13, 2)
+FIELD(GRXTHRCFG, RESVISOCOUTSPC, 0, 13)
+REG32(GCTL, 0x10)
+FIELD(GCTL, PWRDNSCALE, 19, 13)
+FIELD(GCTL, MASTERFILTBYPASS, 18,

[PATCH v15 3/4] usb: xlnx-usb-subsystem: Add xilinx usb subsystem

This model is a top level integration wrapper for hcd-dwc3 and
versal-usb2-ctrl-regs modules, this is used by xilinx versal soc's and
future xilinx usb subsystems would also be part of it.

Signed-off-by: Sai Pavan Boddu 
Reviewed-by: Edgar E. Iglesias 
Reviewed-by: Peter Maydell 
---
 hw/usb/Kconfig  |  5 ++
 hw/usb/meson.build  |  1 +
 hw/usb/xlnx-usb-subsystem.c | 94 +
 include/hw/usb/xlnx-usb-subsystem.h | 45 ++
 4 files changed, 145 insertions(+)
 create mode 100644 hw/usb/xlnx-usb-subsystem.c
 create mode 100644 include/hw/usb/xlnx-usb-subsystem.h

diff --git a/hw/usb/Kconfig b/hw/usb/Kconfig
index 56da78a..40093d7 100644
--- a/hw/usb/Kconfig
+++ b/hw/usb/Kconfig
@@ -117,3 +117,8 @@ config USB_DWC3
 bool
 select USB_XHCI_SYSBUS
 select REGISTER
+
+config XLNX_USB_SUBSYS
+bool
+default y if XLNX_VERSAL
+select USB_DWC3
diff --git a/hw/usb/meson.build b/hw/usb/meson.build
index 433c27e..f46c6b6 100644
--- a/hw/usb/meson.build
+++ b/hw/usb/meson.build
@@ -32,6 +32,7 @@ softmmu_ss.add(when: 'CONFIG_TUSB6010', if_true: 
files('tusb6010.c'))
 softmmu_ss.add(when: 'CONFIG_IMX', if_true: files('chipidea.c'))
 softmmu_ss.add(when: 'CONFIG_IMX_USBPHY', if_true: files('imx-usb-phy.c'))
 specific_ss.add(when: 'CONFIG_XLNX_VERSAL', if_true: 
files('xlnx-versal-usb2-ctrl-regs.c'))
+specific_ss.add(when: 'CONFIG_XLNX_USB_SUBSYS', if_true: 
files('xlnx-usb-subsystem.c'))
 
 # emulated usb devices
 softmmu_ss.add(when: 'CONFIG_USB', if_true: files('dev-hub.c'))
diff --git a/hw/usb/xlnx-usb-subsystem.c b/hw/usb/xlnx-usb-subsystem.c
new file mode 100644
index 000..5682573
--- /dev/null
+++ b/hw/usb/xlnx-usb-subsystem.c
@@ -0,0 +1,94 @@
+/*
+ * QEMU model of the Xilinx usb subsystem
+ *
+ * Copyright (c) 2020 Xilinx Inc. Sai Pavan Boddu 
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to 
deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 
FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+#include "qemu/osdep.h"
+#include "hw/sysbus.h"
+#include "hw/irq.h"
+#include "hw/register.h"
+#include "qemu/bitops.h"
+#include "qemu/log.h"
+#include "qom/object.h"
+#include "qapi/error.h"
+#include "hw/qdev-properties.h"
+#include "hw/usb/xlnx-usb-subsystem.h"
+
+static void versal_usb2_realize(DeviceState *dev, Error **errp)
+{
+VersalUsb2 *s = VERSAL_USB2(dev);
+SysBusDevice *sbd = SYS_BUS_DEVICE(dev);
+Error *err = NULL;
+
+sysbus_realize(SYS_BUS_DEVICE(>dwc3), );
+if (err) {
+error_propagate(errp, err);
+return;
+}
+sysbus_realize(SYS_BUS_DEVICE(>usb2Ctrl), );
+if (err) {
+error_propagate(errp, err);
+return;
+}
+sysbus_init_mmio(sbd, >dwc3_mr);
+sysbus_init_mmio(sbd, >usb2Ctrl_mr);
+qdev_pass_gpios(DEVICE(>dwc3.sysbus_xhci), dev, SYSBUS_DEVICE_GPIO_IRQ);
+}
+
+static void versal_usb2_init(Object *obj)
+{
+VersalUsb2 *s = VERSAL_USB2(obj);
+
+object_initialize_child(obj, "versal.dwc3", >dwc3,
+TYPE_USB_DWC3);
+object_initialize_child(obj, "versal.usb2-ctrl", >usb2Ctrl,
+TYPE_XILINX_VERSAL_USB2_CTRL_REGS);
+memory_region_init_alias(>dwc3_mr, obj, "versal.dwc3_alias",
+ >dwc3.iomem, 0, DWC3_SIZE);
+memory_region_init_alias(>usb2Ctrl_mr, obj, "versal.usb2Ctrl_alias",
+ >usb2Ctrl.iomem, 0, USB2_REGS_R_MAX * 4);
+qdev_alias_all_properties(DEVICE(>dwc3), obj);
+qdev_alias_all_properties(DEVICE(>dwc3.sysbus_xhci), obj);
+object_property_add_alias(obj, "dma", OBJECT(>dwc3.sysbus_xhci), "dma");
+}
+
+static void versal_usb2_class_init(ObjectClass *klass, void *data)
+{
+DeviceClass *dc = DEVICE_CLASS(klass);
+
+dc->realize = versal_usb2_realize;
+}
+
+static const TypeInfo versal_usb2_info = {
+.name  = TYPE_XILINX_VERSAL_USB2,
+.parent= TYPE_SYS_BUS_DEVICE,
+.instance_size = sizeof(VersalUsb2),
+.class_init=

RE: [PATCH v14 4/4] arm: xlnx-versal: Connect usb to virt-versal

Hi Peter/Edgar,

> -Original Message-
> From: Edgar E. Iglesias 
> Sent: Thursday, December 3, 2020 11:35 PM
> To: Peter Maydell 
> Cc: Sai Pavan Boddu ; Markus Armbruster
> ; Marc-André Lureau ;
> Paolo Bonzini ; Gerd Hoffmann ;
> Edgar Iglesias ; Francisco Eduardo Iglesias
> ; Alistair Francis ; Eduardo
> Habkost ; Ying Fang ;
> Philippe Mathieu-Daudé ; Vikram Garhwal
> ; Paul Zimmerman ; Sai Pavan Boddu
> ; QEMU Developers 
> Subject: Re: [PATCH v14 4/4] arm: xlnx-versal: Connect usb to virt-versal
> 
> On Tue, Dec 01, 2020 at 11:34:25AM +, Peter Maydell wrote:
> > On Tue, 1 Dec 2020 at 08:34, Sai Pavan Boddu 
> wrote:
> > >
> > > From: Vikram Garhwal 
> > >
> > > Connect VersalUbs2 subsystem to xlnx-versal SOC, its placed
> >
> > Typo : "VersalUSB2".
> >
> >
> > > in iou of lpd domain and configure it as dual port host controller.
> > > Add the respective guest dts nodes for "xlnx-versal-virt" machine.
> > >
> > > Signed-off-by: Vikram Garhwal 
> > > Signed-off-by: Sai Pavan Boddu 
> >
> > Code looks OK but I'll let somebody else from Xilinx review the detail.
> >
> > > +static void fdt_add_usb_xhci_nodes(VersalVirt *s) {
> > > +const char clocknames[] = "bus_clk\0ref_clk";
> > > +char *name = g_strdup_printf("/usb@%" PRIx32,
> MM_USB2_CTRL_REGS);
> > > +const char compat[] = "xlnx,versal-dwc3";
> > > +
> > > +qemu_fdt_add_subnode(s->fdt, name);
> > > +qemu_fdt_setprop(s->fdt, name, "compatible",
> > > + compat, sizeof(compat));
> > > +qemu_fdt_setprop_sized_cells(s->fdt, name, "reg",
> > > + 2, MM_USB2_CTRL_REGS,
> > > + 2, MM_USB2_CTRL_REGS_SIZE);
> > > +qemu_fdt_setprop(s->fdt, name, "clock-names",
> > > + clocknames, sizeof(clocknames));
> > > +qemu_fdt_setprop_cells(s->fdt, name, "clocks",
> > > +   s->phandle.clk_25Mhz, 
> > > s->phandle.clk_125Mhz);
> > > +qemu_fdt_setprop(s->fdt, name, "ranges", NULL, 0);
> > > +qemu_fdt_setprop_cell(s->fdt, name, "#address-cells", 2);
> > > +qemu_fdt_setprop_cell(s->fdt, name, "#size-cells", 2);
> > > +qemu_fdt_setprop_cell(s->fdt, name, "phandle", s->phandle.usb);
> > > +g_free(name);
> > > +
> > > +{
> > > +const char irq_name[] = "dwc_usb3";
> > > +const char compat[] = "snps,dwc3";
> >
> > Minor coding style side note, but I'm not hugely fond of code blocks
> > in the middle of functions just for declaring variables. You could
> > either put these variable declarations at the top of the function, or
> > if you think the code in the block is self contained and worth putting
> > in its own function you could do that.
[Sai Pavan Boddu] Yeah. I could fix this in V15, Thanks.

> >
> 
> Hi Sai, I beleive I had already reviewed a previous version of this patch so 
> after
> you fix the stuff the Peter pointed out feel free to add my
> Rb:
> 
> Reviewed-by: Edgar E. Iglesias 
[Sai Pavan Boddu] Thanks Edgar.

Regards,
Sai Pavan
> 
> Cheers,
> Edgar

Re: [PATCH v13 09/10] stream: skip filters when writing backing file name to QCOW2 header


02.12.2020 21:31, Andrey Shinkevich wrote:

Avoid writing a filter JSON file name and a filter format name to QCOW2
image when the backing file is being changed after the block stream
job. It can occur due to a concurrent commit job on the same backing
chain.
A user is still able to assign the 'backing-file' parameter for a
block-stream job keeping in mind the possible issue mentioned above.
If the user does not specify the 'backing-file' parameter, QEMU will
assign it automatically.

Signed-off-by: Andrey Shinkevich 



Reviewed-by: Vladimir Sementsov-Ogievskiy 



--
Best regards,
Vladimir

Re: [PATCH 2/8] hvf: Move common code out

2020-12-03 Thread Peter Collingbourne

On Thu, Dec 3, 2020 at 1:41 AM Roman Bolshakov  wrote:
>
> On Mon, Nov 30, 2020 at 04:00:11PM -0800, Peter Collingbourne wrote:
> > On Mon, Nov 30, 2020 at 3:18 PM Alexander Graf  wrote:
> > >
> > >
> > > On 01.12.20 00:01, Peter Collingbourne wrote:
> > > > On Mon, Nov 30, 2020 at 1:40 PM Alexander Graf  wrote:
> > > >> Hi Peter,
> > > >>
> > > >> On 30.11.20 22:08, Peter Collingbourne wrote:
> > > >>> On Mon, Nov 30, 2020 at 12:56 PM Frank Yang  wrote:
> > > 
> > >  On Mon, Nov 30, 2020 at 12:34 PM Alexander Graf  
> > >  wrote:
> > > > Hi Frank,
> > > >
> > > > Thanks for the update :). Your previous email nudged me into the 
> > > > right direction. I previously had implemented WFI through the 
> > > > internal timer framework which performed way worse.
> > >  Cool, glad it's helping. Also, Peter found out that the main thing 
> > >  keeping us from just using cntpct_el0 on the host directly and 
> > >  compare with cval is that if we sleep, cval is going to be much < 
> > >  cntpct_el0 by the sleep time. If we can get either the architecture 
> > >  or macos to read out the sleep time then we might be able to not 
> > >  have to use a poll interval either!
> > > > Along the way, I stumbled over a few issues though. For starters, 
> > > > the signal mask for SIG_IPI was not set correctly, so while 
> > > > pselect() would exit, the signal would never get delivered to the 
> > > > thread! For a fix, check out
> > > >
> > > > 
> > > > https://patchew.org/QEMU/20201130030723.78326-1-ag...@csgraf.de/20201130030723.78326-4-ag...@csgraf.de/
> > > >
> > >  Thanks, we'll take a look :)
> > > 
> > > > Please also have a look at my latest stab at WFI emulation. It 
> > > > doesn't handle WFE (that's only relevant in overcommitted 
> > > > scenarios). But it does handle WFI and even does something similar 
> > > > to hlt polling, albeit not with an adaptive threshold.
> > > >>> Sorry I'm not subscribed to qemu-devel (I'll subscribe in a bit) so
> > > >>> I'll reply to your patch here. You have:
> > > >>>
> > > >>> +/* Set cpu->hvf->sleeping so that we get a
> > > >>> SIG_IPI signal. */
> > > >>> +cpu->hvf->sleeping = true;
> > > >>> +smp_mb();
> > > >>> +
> > > >>> +/* Bail out if we received an IRQ meanwhile */
> > > >>> +if (cpu->thread_kicked || 
> > > >>> (cpu->interrupt_request &
> > > >>> +(CPU_INTERRUPT_HARD | CPU_INTERRUPT_FIQ))) {
> > > >>> +cpu->hvf->sleeping = false;
> > > >>> +break;
> > > >>> +}
> > > >>> +
> > > >>> +/* nanosleep returns on signal, so we wake up on 
> > > >>> kick. */
> > > >>> +nanosleep(ts, NULL);
> > > >>>
> > > >>> and then send the signal conditional on whether sleeping is true, but
> > > >>> I think this is racy. If the signal is sent after sleeping is set to
> > > >>> true but before entering nanosleep then I think it will be ignored and
> > > >>> we will miss the wakeup. That's why in my implementation I block IPI
> > > >>> on the CPU thread at startup and then use pselect to atomically
> > > >>> unblock and begin sleeping. The signal is sent unconditionally so
> > > >>> there's no need to worry about races between actually sleeping and the
> > > >>> "we think we're sleeping" state. It may lead to an extra wakeup but
> > > >>> that's better than missing it entirely.
> > > >>
> > > >> Thanks a bunch for the comment! So the trick I was using here is to > 
> > > >> > >> modify the timespec from the kick function before sending the IPI
> > > >> signal. That way, we know that either we are inside the sleep (where 
> > > >> the
> > > >> signal wakes it up) or we are outside the sleep (where timespec={} will
> > > >> make it return immediately).
> > > >>
> > > >> The only race I can think of is if nanosleep does calculations based on
> > > >> the timespec and we happen to send the signal right there and then.
> > > > Yes that's the race I was thinking of. Admittedly it's a small window
> > > > but it's theoretically possible and part of the reason why pselect was
> > > > created.
> > > >
> > > >> The problem with blocking IPIs is basically what Frank was describing
> > > >> earlier: How do you unset the IPI signal pending status? If the signal
> > > >> is never delivered, how can pselect differentiate "signal from last 
> > > >> time
> > > >> is still pending" from "new signal because I got an IPI"?
> > > > In this case we would take the additional wakeup which should be
> > > > harmless since we will take the WFx exit again and put us in the
> > > > correct state. But that's a lot better than busy looping.
> > >
> > >
> > > I'm not sure I follow. I'm thinking of the following scenario:
> > >
> > >- trap into WFI handler
> > >- go

Re: [PATCH] arm/hvf: Optimize and simplify WFI handling

2020-12-03 Thread Peter Collingbourne

On Thu, Dec 3, 2020 at 2:12 AM Roman Bolshakov  wrote:
>
> On Tue, Dec 01, 2020 at 10:59:50AM -0800, Peter Collingbourne wrote:
> > On Tue, Dec 1, 2020 at 3:16 AM Alexander Graf  wrote:
> > >
> > > Hi Peter,
> > >
> > > On 01.12.20 09:21, Peter Collingbourne wrote:
> > > > Sleep on WFx until the VTIMER is due but allow ourselves to be woken
> > > > up on IPI.
> > > >
> > > > Signed-off-by: Peter Collingbourne 
> > >
> > >
> > > Thanks a bunch!
> > >
> > >
> > > > ---
> > > > Alexander Graf wrote:
> > > >> I would love to take a patch from you here :). I'll still be stuck for 
> > > >> a
> > > >> while with the sysreg sync rework that Peter asked for before I can 
> > > >> look
> > > >> at WFI again.
> > > > Okay, here's a patch :) It's a relatively straightforward adaptation
> > > > of what we have in our fork, which can now boot Android to GUI while
> > > > remaining at around 4% CPU when idle.
> > > >
> > > > I'm not set up to boot a full Linux distribution at the moment so I
> > > > tested it on upstream QEMU by running a recent mainline Linux kernel
> > > > with a rootfs containing an init program that just does sleep(5)
> > > > and verified that the qemu process remains at low CPU usage during
> > > > the sleep. This was on top of your v2 plus the last patch of your v1
> > > > since it doesn't look like you have a replacement for that logic yet.
> > > >
> > > >   accel/hvf/hvf-cpus.c |  5 +--
> > > >   include/sysemu/hvf_int.h |  3 +-
> > > >   target/arm/hvf/hvf.c | 94 +++-
> > > >   3 files changed, 28 insertions(+), 74 deletions(-)
> > > >
> > > > diff --git a/accel/hvf/hvf-cpus.c b/accel/hvf/hvf-cpus.c
> > > > index 4360f64671..b2c8fb57f6 100644
> > > > --- a/accel/hvf/hvf-cpus.c
> > > > +++ b/accel/hvf/hvf-cpus.c
> > > > @@ -344,9 +344,8 @@ static int hvf_init_vcpu(CPUState *cpu)
> > > >   sigact.sa_handler = dummy_signal;
> > > >   sigaction(SIG_IPI, , NULL);
> > > >
> > > > -pthread_sigmask(SIG_BLOCK, NULL, );
> > > > -sigdelset(, SIG_IPI);
> > > > -pthread_sigmask(SIG_SETMASK, , NULL);
> > > > +pthread_sigmask(SIG_BLOCK, NULL, >hvf->unblock_ipi_mask);
> > > > +sigdelset(>hvf->unblock_ipi_mask, SIG_IPI);
> > >
> > >
> > > What will this do to the x86 hvf implementation? We're now not
> > > unblocking SIG_IPI again for that, right?
> >
> > Yes and that was the case before your patch series.
> >
> > > >
> > > >   #ifdef __aarch64__
> > > >   r = hv_vcpu_create(>hvf->fd, (hv_vcpu_exit_t 
> > > > **)>hvf->exit, NULL);
> > > > diff --git a/include/sysemu/hvf_int.h b/include/sysemu/hvf_int.h
> > > > index c56baa3ae8..13adf6ea77 100644
> > > > --- a/include/sysemu/hvf_int.h
> > > > +++ b/include/sysemu/hvf_int.h
> > > > @@ -62,8 +62,7 @@ extern HVFState *hvf_state;
> > > >   struct hvf_vcpu_state {
> > > >   uint64_t fd;
> > > >   void *exit;
> > > > -struct timespec ts;
> > > > -bool sleeping;
> > > > +sigset_t unblock_ipi_mask;
> > > >   };
> > > >
> > > >   void assert_hvf_ok(hv_return_t ret);
> > > > diff --git a/target/arm/hvf/hvf.c b/target/arm/hvf/hvf.c
> > > > index 8fe10966d2..60a361ff38 100644
> > > > --- a/target/arm/hvf/hvf.c
> > > > +++ b/target/arm/hvf/hvf.c
> > > > @@ -2,6 +2,7 @@
> > > >* QEMU Hypervisor.framework support for Apple Silicon
> > > >
> > > >* Copyright 2020 Alexander Graf 
> > > > + * Copyright 2020 Google LLC
> > > >*
> > > >* This work is licensed under the terms of the GNU GPL, version 2 or 
> > > > later.
> > > >* See the COPYING file in the top-level directory.
> > > > @@ -18,6 +19,7 @@
> > > >   #include "sysemu/hw_accel.h"
> > > >
> > > >   #include 
> > > > +#include 
> > > >
> > > >   #include "exec/address-spaces.h"
> > > >   #include "hw/irq.h"
> > > > @@ -320,18 +322,8 @@ int hvf_arch_init_vcpu(CPUState *cpu)
> > > >
> > > >   void hvf_kick_vcpu_thread(CPUState *cpu)
> > > >   {
> > > > -if (cpu->hvf->sleeping) {
> > > > -/*
> > > > - * When sleeping, make sure we always send signals. Also, 
> > > > clear the
> > > > - * timespec, so that an IPI that arrives between setting 
> > > > hvf->sleeping
> > > > - * and the nanosleep syscall still aborts the sleep.
> > > > - */
> > > > -cpu->thread_kicked = false;
> > > > -cpu->hvf->ts = (struct timespec){ };
> > > > -cpus_kick_thread(cpu);
> > > > -} else {
> > > > -hv_vcpus_exit(>hvf->fd, 1);
> > > > -}
> > > > +cpus_kick_thread(cpu);
> > > > +hv_vcpus_exit(>hvf->fd, 1);
> > >
> > >
> > > This means your first WFI will almost always return immediately due to a
> > > pending signal, because there probably was an IRQ pending before on the
> > > same CPU, no?
> >
> > That's right. Any approach involving the "sleeping" field would need
> > to be implemented carefully to avoid races that may result in missed
> > wakeups so for simplicity I just decided to send both kinds of
> > wakeups. In particular the approach in

Re: [PATCH v14 4/4] arm: xlnx-versal: Connect usb to virt-versal

2020-12-03 Thread Edgar E. Iglesias

On Tue, Dec 01, 2020 at 11:34:25AM +, Peter Maydell wrote:
> On Tue, 1 Dec 2020 at 08:34, Sai Pavan Boddu  
> wrote:
> >
> > From: Vikram Garhwal 
> >
> > Connect VersalUbs2 subsystem to xlnx-versal SOC, its placed
> 
> Typo : "VersalUSB2".
> 
> 
> > in iou of lpd domain and configure it as dual port host controller.
> > Add the respective guest dts nodes for "xlnx-versal-virt" machine.
> >
> > Signed-off-by: Vikram Garhwal 
> > Signed-off-by: Sai Pavan Boddu 
> 
> Code looks OK but I'll let somebody else from Xilinx review the detail.
> 
> > +static void fdt_add_usb_xhci_nodes(VersalVirt *s)
> > +{
> > +const char clocknames[] = "bus_clk\0ref_clk";
> > +char *name = g_strdup_printf("/usb@%" PRIx32, MM_USB2_CTRL_REGS);
> > +const char compat[] = "xlnx,versal-dwc3";
> > +
> > +qemu_fdt_add_subnode(s->fdt, name);
> > +qemu_fdt_setprop(s->fdt, name, "compatible",
> > + compat, sizeof(compat));
> > +qemu_fdt_setprop_sized_cells(s->fdt, name, "reg",
> > + 2, MM_USB2_CTRL_REGS,
> > + 2, MM_USB2_CTRL_REGS_SIZE);
> > +qemu_fdt_setprop(s->fdt, name, "clock-names",
> > + clocknames, sizeof(clocknames));
> > +qemu_fdt_setprop_cells(s->fdt, name, "clocks",
> > +   s->phandle.clk_25Mhz, 
> > s->phandle.clk_125Mhz);
> > +qemu_fdt_setprop(s->fdt, name, "ranges", NULL, 0);
> > +qemu_fdt_setprop_cell(s->fdt, name, "#address-cells", 2);
> > +qemu_fdt_setprop_cell(s->fdt, name, "#size-cells", 2);
> > +qemu_fdt_setprop_cell(s->fdt, name, "phandle", s->phandle.usb);
> > +g_free(name);
> > +
> > +{
> > +const char irq_name[] = "dwc_usb3";
> > +const char compat[] = "snps,dwc3";
> 
> Minor coding style side note, but I'm not hugely fond of
> code blocks in the middle of functions just for declaring
> variables. You could either put these variable declarations
> at the top of the function, or if you think the code in the
> block is self contained and worth putting in its own function
> you could do that.
>

Hi Sai, I beleive I had already reviewed a previous version of this
patch so after you fix the stuff the Peter pointed out feel free to add my
Rb:

Reviewed-by: Edgar E. Iglesias 

Cheers,
Edgar

Re: [PATCH v3 08/10] arm/hvf: Add a WFI handler

2020-12-03 Thread Peter Collingbourne

On Thu, Dec 3, 2020 at 2:39 AM Roman Bolshakov  wrote:
>
> On Wed, Dec 02, 2020 at 08:04:06PM +0100, Alexander Graf wrote:
> > From: Peter Collingbourne 
> >
> > Sleep on WFI until the VTIMER is due but allow ourselves to be woken
> > up on IPI.
> >
> > Signed-off-by: Peter Collingbourne 
> > [agraf: Remove unused 'set' variable, always advance PC on WFX trap]
> > Signed-off-by: Alexander Graf 
> > ---
> >  accel/hvf/hvf-cpus.c |  5 ++--
> >  include/sysemu/hvf_int.h |  1 +
> >  target/arm/hvf/hvf.c | 55 
> >  3 files changed, 58 insertions(+), 3 deletions(-)
> >
> > diff --git a/accel/hvf/hvf-cpus.c b/accel/hvf/hvf-cpus.c
> > index e613c22ad0..a981ccde70 100644
> > --- a/accel/hvf/hvf-cpus.c
> > +++ b/accel/hvf/hvf-cpus.c
> > @@ -337,15 +337,14 @@ static int hvf_init_vcpu(CPUState *cpu)
> >  cpu->hvf = g_malloc0(sizeof(*cpu->hvf));
> >
> >  /* init cpu signals */
> > -sigset_t set;
> >  struct sigaction sigact;
> >
> >  memset(, 0, sizeof(sigact));
> >  sigact.sa_handler = dummy_signal;
> >  sigaction(SIG_IPI, , NULL);
> >
> > -pthread_sigmask(SIG_BLOCK, NULL, );
> > -sigdelset(, SIG_IPI);
> > +pthread_sigmask(SIG_BLOCK, NULL, >hvf->unblock_ipi_mask);
> > +sigdelset(>hvf->unblock_ipi_mask, SIG_IPI);
> >
> >  #ifdef __aarch64__
> >  r = hv_vcpu_create(>hvf->fd, (hv_vcpu_exit_t **)>hvf->exit, 
> > NULL);
> > diff --git a/include/sysemu/hvf_int.h b/include/sysemu/hvf_int.h
> > index 5f15119184..13adf6ea77 100644
> > --- a/include/sysemu/hvf_int.h
> > +++ b/include/sysemu/hvf_int.h
> > @@ -62,6 +62,7 @@ extern HVFState *hvf_state;
> >  struct hvf_vcpu_state {
> >  uint64_t fd;
> >  void *exit;
> > +sigset_t unblock_ipi_mask;
> >  };
> >
> >  void assert_hvf_ok(hv_return_t ret);
> > diff --git a/target/arm/hvf/hvf.c b/target/arm/hvf/hvf.c
> > index 5ecce36d4a..79aeeb237b 100644
> > --- a/target/arm/hvf/hvf.c
> > +++ b/target/arm/hvf/hvf.c
> > @@ -2,6 +2,7 @@
> >   * QEMU Hypervisor.framework support for Apple Silicon
> >
> >   * Copyright 2020 Alexander Graf 
> > + * Copyright 2020 Google LLC
> >   *
> >   * This work is licensed under the terms of the GNU GPL, version 2 or 
> > later.
> >   * See the COPYING file in the top-level directory.
> > @@ -18,6 +19,7 @@
> >  #include "sysemu/hw_accel.h"
> >
> >  #include 
> > +#include 
> >
> >  #include "exec/address-spaces.h"
> >  #include "hw/irq.h"
> > @@ -413,6 +415,7 @@ int hvf_arch_init_vcpu(CPUState *cpu)
> >
> >  void hvf_kick_vcpu_thread(CPUState *cpu)
> >  {
> > +cpus_kick_thread(cpu);
> >  hv_vcpus_exit(>hvf->fd, 1);
> >  }
> >
> > @@ -468,6 +471,18 @@ static int hvf_inject_interrupts(CPUState *cpu)
> >  return 0;
> >  }
> >
> > +static void hvf_wait_for_ipi(CPUState *cpu, struct timespec *ts)
> > +{
> > +/*
> > + * Use pselect to sleep so that other threads can IPI us while we're
> > + * sleeping.
> > + */
> > +qatomic_mb_set(>thread_kicked, false);
> > +qemu_mutex_unlock_iothread();
>
> I raised a concern earlier, but I don't for sure if a kick could be lost
> right here. On x86 it could be lost.

If the signal is sent right before the pselect() it will be blocked
i.e. left pending. With the pselect() we get an atomic unblock of
SIG_IPI at the same time as we begin sleeping, which means that we
will receive the signal and leave the pselect() immediately.

I think at some point macOS had an incorrect implementation of
pselect() where the signal mask was non-atomically set in userspace
which could lead to the signal being missed but I checked the latest
XNU sources and it looks like the pselect() implementation has been
moved to the kernel.

> > +pselect(0, 0, 0, 0, ts, >hvf->unblock_ipi_mask);
> > +qemu_mutex_lock_iothread();
> > +}
> > +
> >  int hvf_vcpu_exec(CPUState *cpu)
> >  {
> >  ARMCPU *arm_cpu = ARM_CPU(cpu);
> > @@ -579,6 +594,46 @@ int hvf_vcpu_exec(CPUState *cpu)
> >  }
> >  case EC_WFX_TRAP:
> >  advance_pc = true;
> > +if (!(syndrome & WFX_IS_WFE) && !(cpu->interrupt_request &
> > +(CPU_INTERRUPT_HARD | CPU_INTERRUPT_FIQ))) {
> > +
> > +uint64_t ctl;
> > +r = hv_vcpu_get_sys_reg(cpu->hvf->fd, 
> > HV_SYS_REG_CNTV_CTL_EL0,
> > +);
> > +assert_hvf_ok(r);
> > +
> > +if (!(ctl & 1) || (ctl & 2)) {
> > +/* Timer disabled or masked, just wait for an IPI. */
> > +hvf_wait_for_ipi(cpu, NULL);
> > +break;
> > +}
> > +
> > +uint64_t cval;
> > +r = hv_vcpu_get_sys_reg(cpu->hvf->fd, 
> > HV_SYS_REG_CNTV_CVAL_EL0,
> > +);
> > +assert_hvf_ok(r);
> > +
> > +int64_t ticks_to_sleep = cval - mach_absolute_time();
>
>
> Apple reference recommends to use [1]:
>
>

Re: [PATCH 00/18] qapi/qom: QAPIfy object-add

2020-12-03 Thread Eduardo Habkost

On Thu, Dec 03, 2020 at 07:10:37PM +0100, Paolo Bonzini wrote:
> On 03/12/20 18:52, Eduardo Habkost wrote:
> > On Thu, Dec 03, 2020 at 05:50:46PM +0100, Paolo Bonzini wrote:
> > > On 03/12/20 16:15, Kevin Wolf wrote:
> > > > I don't think this is an intermediate state like Eduardo wants to have.
> > > > Creating the object, then setting properties, then realize [1] will fail
> > > > after your change. But keeping it working was the whole point of the
> > > > exercise.
> > > 
> > > With the sample code, you must remove object_class_property_set calls at 
> > > the
> > 
> > Do you mean object_property_set()?
> 
> Yes.
> 
> > > same time as you remove the setters.  Usually that'd be when you convert 
> > > to
> > > QAPI and oc->configure, but it doesn't have to be that way if there are 
> > > good
> > > reasons not to do so.
> > 
> > Having two (or more) similar but incompatible APIs to do exactly
> > the same thing is a mistake we did before, and I wouldn't like us
> > to repeat it.
> > 
> > If we can keep qdev_new() + object_property_set() + realize
> > working after the device is converted, we should.  I believe we
> > can.
> 
> You can.  If you want to do that, you have to give up on removing the
> setters; but that's not so beneficial for devices because they already use
> static properties anyway.  They have much less boilerplate than -object
> objects.

Understood.

We can also get rid of most setters in -object backends using
field properties.  Maybe not a necessary step, but a useful
intermediate step in case the new API takes time to be ready.

> 
> > If we can make object_new_configure() work with all (or most)
> > device types before we manually convert them to the new system,
> > we should.  I believe we can.
> 
> Yup, object_new_configure() is the low-level visitor-based API and therefore
> it supports both properties and oc->configure.

Perfect.  That part was not clear yet to me (I just skimmed to
the example code you posted on the wiki).

> 
> > We may be able avoid these questions with -object because
> > converting all backends at the same time is doable.  With
> > devices, API usability and maintainability during the transition
> > period (which could be very long) needs to be taken into account.
> 
> I think we're in violent agreement. :)
> 
> Paolo
> 

-- 
Eduardo

Re: [PATCH] tests/acceptance: test hot(un)plug of ccw devices

2020-12-03 Thread Thomas Huth

On 03/12/2020 19.11, Philippe Mathieu-Daudé wrote:
> On 12/3/20 6:22 PM, Thomas Huth wrote:
>> On 03/12/2020 16.39, Cornelia Huck wrote:
>>> Hotplug a virtio-net-ccw device, and then hotunplug it again.
>>
>> Good idea! ... is it also possible with a pci device?
>>
>>> Signed-off-by: Cornelia Huck 
>>> ---
> ...
>>> +exec_command_and_wait_for_pattern(self, 'dmesg | tail -n 1', 'CRW')
>>
>> That looks like it could be a little bit racy ... what if the kernel outputs
>> another log message by chance, so that tail -n 1 reports that instead.
>>
>> I think it would be better to clear the dmesg log ("dmesg -c") before
>> plugging, and then look at all the new output of "dmesg" without using
>> "tail" afterwards.
> 
> "dmesg --follow"?

Then you'd need to send control-c afterwards to stop it? ... not sure
whether that's such a better solution...

 Thomas

Re: [PATCH] tests/acceptance: test hot(un)plug of ccw devices

2020-12-03 Thread Philippe Mathieu-Daudé

On 12/3/20 6:22 PM, Thomas Huth wrote:
> On 03/12/2020 16.39, Cornelia Huck wrote:
>> Hotplug a virtio-net-ccw device, and then hotunplug it again.
> 
> Good idea! ... is it also possible with a pci device?
> 
>> Signed-off-by: Cornelia Huck 
>> ---
...
>> +exec_command_and_wait_for_pattern(self, 'dmesg | tail -n 1', 'CRW')
> 
> That looks like it could be a little bit racy ... what if the kernel outputs
> another log message by chance, so that tail -n 1 reports that instead.
> 
> I think it would be better to clear the dmesg log ("dmesg -c") before
> plugging, and then look at all the new output of "dmesg" without using
> "tail" afterwards.

"dmesg --follow"?

Re: [PATCH-for-5.2? 1/1] Acceptance tests: bump Fedora to 32

2020-12-03 Thread Willian Rampazzo

On Thu, Dec 3, 2020 at 2:30 PM Cleber Rosa  wrote:
>
> On Thu, Dec 03, 2020 at 05:02:33PM +, Daniel P. Berrangé wrote:
> > I think the problem with the Fedora acceptance is that we'll be constantly
> > chasing a moving target. Every URL we pick will go away 6-12 months later.
> > IOW, while the acceptance test pass today, in 6 months time they'll be
> > failing.  IOW,  switching to F32 doesn't solve the root cause, it just
> > pushs the problem down the road for 6 months until F32 is EOL and hits
> > the same URL change problem.
> >
>
> Just FIY, the tests will not FAIL when the images are removed from the
> official locations.  This is what happens Today:
>
>JOB ID : e85527a9d75023070f15b833eac0f91f803afc83
>JOB LOG: 
> /home/cleber/avocado/job-results/job-2020-12-03T12.21-e85527a/job.log
> (1/1) tests/acceptance/boot_linux.py:BootLinuxX8664.test_pc_q35_kvm: 
> CANCEL: Failed to download/prepare boot image (0.33 s)
>RESULTS: PASS 0 | ERROR 0 | FAIL 0 | SKIP 0 | WARN 0 | INTERRUPT 0 | 
> CANCEL 1
>JOB HTML   : 
> /home/cleber/avocado/job-results/job-2020-12-03T12.21-e85527a/results.html
>JOB TIME   : 0.76 s
>

In the CI, enabling the GitLab cache will help, but will also hide the
URL change problem when it happens.

It is also true if the person ran the test and it successfully
downloaded the image once. For people running the test for the first
time, it will look like your output.

Re: [PATCH 00/18] qapi/qom: QAPIfy object-add

2020-12-03 Thread Paolo Bonzini


On 03/12/20 18:52, Eduardo Habkost wrote:

On Thu, Dec 03, 2020 at 05:50:46PM +0100, Paolo Bonzini wrote:

On 03/12/20 16:15, Kevin Wolf wrote:

I don't think this is an intermediate state like Eduardo wants to have.
Creating the object, then setting properties, then realize [1] will fail
after your change. But keeping it working was the whole point of the
exercise.


With the sample code, you must remove object_class_property_set calls at the


Do you mean object_property_set()?


Yes.


same time as you remove the setters.  Usually that'd be when you convert to
QAPI and oc->configure, but it doesn't have to be that way if there are good
reasons not to do so.


Having two (or more) similar but incompatible APIs to do exactly
the same thing is a mistake we did before, and I wouldn't like us
to repeat it.

If we can keep qdev_new() + object_property_set() + realize
working after the device is converted, we should.  I believe we
can.


You can.  If you want to do that, you have to give up on removing the 
setters; but that's not so beneficial for devices because they already 
use static properties anyway.  They have much less boilerplate than 
-object objects.



If we can make object_new_configure() work with all (or most)
device types before we manually convert them to the new system,
we should.  I believe we can.


Yup, object_new_configure() is the low-level visitor-based API and 
therefore it supports both properties and oc->configure.



We may be able avoid these questions with -object because
converting all backends at the same time is doable.  With
devices, API usability and maintainability during the transition
period (which could be very long) needs to be taken into account.


I think we're in violent agreement. :)

Paolo

Re: [PATCH 00/18] qapi/qom: QAPIfy object-add

2020-12-03 Thread Paolo Bonzini


On 03/12/20 18:43, Kevin Wolf wrote:

I think I'd want to do step 2 and 3 combined, because converting
user-creatable objects to oc->configure means manually writing the
configure function that will be generated from QAPI in step 3. Writing
code just to throw it away isn't my favourite pastime.


It would only be a couple lines of extra code, but yeah you don't have 
to do it.  It still is clearer to show the steps one by one as they are 
logically separate and it shows what (modulo inlining) the generated 
code ends up doing.


That said having no setter might simplify the introduction of field 
properties too (no allow_set to worry about); perhaps that's a good 
reason to do the oc->configure conversion earlier rather than later, 
especially if QAPI code generation ends up taking a bit longer.


Another good reason is to make sure the API is stable before moving to 
generated code, especially with respect to inheritance.


Paolo

RE: [PATCH v14 3/4] usb: xlnx-usb-subsystem: Add xilinx usb subsystem

Hi Peter,
> -Original Message-
> From: Peter Maydell 
> Sent: Tuesday, December 1, 2020 4:57 PM
> To: Sai Pavan Boddu 
> Cc: Markus Armbruster ; Marc-André Lureau
> ; Paolo Bonzini ;
> Gerd Hoffmann ; Edgar Iglesias ;
> Francisco Eduardo Iglesias ; Alistair Francis
> ; Eduardo Habkost ; Ying
> Fang ; Philippe Mathieu-Daudé
> ; Vikram Garhwal ; Paul Zimmerman
> ; Sai Pavan Boddu ; QEMU
> Developers 
> Subject: Re: [PATCH v14 3/4] usb: xlnx-usb-subsystem: Add xilinx usb
> subsystem
> 
> On Tue, 1 Dec 2020 at 08:34, Sai Pavan Boddu 
> wrote:
> >
> > This model is a top level integration wrapper for hcd-dwc3 and
> > versal-usb2-ctrl-regs modules, this is used by xilinx versal soc's and
> > future xilinx usb subsystems would also be part of it.
> >
> > Signed-off-by: Sai Pavan Boddu 
> 
> Reviewed-by: Peter Maydell 
[Sai Pavan Boddu] Thanks for the review.

Regards,
Sai Pavan
> 
> thanks
> -- PMM

RE: [PATCH v14 2/4] usb: Add DWC3 model

Hi Peter,

> -Original Message-
> From: Peter Maydell 
> Sent: Tuesday, December 1, 2020 4:56 PM
> To: Sai Pavan Boddu 
> Cc: Markus Armbruster ; Marc-André Lureau
> ; Paolo Bonzini ;
> Gerd Hoffmann ; Edgar Iglesias ;
> Francisco Eduardo Iglesias ; Alistair Francis
> ; Eduardo Habkost ; Ying
> Fang ; Philippe Mathieu-Daudé
> ; Vikram Garhwal ; Paul Zimmerman
> ; Sai Pavan Boddu ; QEMU
> Developers 
> Subject: Re: [PATCH v14 2/4] usb: Add DWC3 model
> 
> On Tue, 1 Dec 2020 at 08:34, Sai Pavan Boddu 
> wrote:
> >
> > From: Vikram Garhwal 
> >
> > This patch adds skeleton model of dwc3 usb controller attached to
> > xhci-sysbus device. It defines global register space of DWC3
> > controller, global registers control the AXI/AHB interfaces
> > properties, external FIFO support and event count support. All of
> > which are unimplemented at present,we are only supporting core reset and
> read of ID register.
> >
> > Signed-off-by: Vikram Garhwal 
> > Signed-off-by: Sai Pavan Boddu 
> > Reviewed-by: Edgar E. Iglesias 
> > ---
> 
> 
> > +typedef struct USBDWC3 {
> > +SysBusDevice parent_obj;
> > +MemoryRegion iomem;
> > +MemoryRegion fifos;
> > +XHCISysbusState sysbus_xhci;
> > +
> > +uint32_t regs[USB_DWC3_R_MAX];
> > +RegisterInfo regs_info[USB_DWC3_R_MAX];
> 
> This device has state but is missing the VMState struct support for migration.
> Otherwise it looks OK.
[Sai Pavan Boddu] Ok, I will fix this in V15

Thanks,
Sai Pavan
> 
> thanks
> -- PMM

Re: [PATCH v2] e1000e: Added ICR clearing by corresponding IMS bit.

2020-12-03 Thread Alexander Duyck

On Thu, Dec 3, 2020 at 5:00 AM Andrew Melnychenko  wrote:
>
> Buglink: https://bugzilla.redhat.com/show_bug.cgi?id=1707441

So the bugzilla seems to be reporting that the NIC operstate is being
misreported when qemu has configured the link down. Based on the
description it isn't clear to me how this patch addresses that. Some
documentation on how you reproduced the issue, and what was seen
before and after this patch would be useful.

> Added ICR clearing if there is IMS bit - according to the note by

Should probably be "Add" instead of "Added". Same for the title of the patch.

> section 13.3.27 of the 8257X developers manual.
>
> Signed-off-by: Andrew Melnychenko 
> ---
>  hw/net/e1000e_core.c | 10 ++
>  hw/net/trace-events  |  1 +
>  2 files changed, 11 insertions(+)
>
> diff --git a/hw/net/e1000e_core.c b/hw/net/e1000e_core.c
> index 095c01ebc6..9705f5c52e 100644
> --- a/hw/net/e1000e_core.c
> +++ b/hw/net/e1000e_core.c
> @@ -2624,6 +2624,16 @@ e1000e_mac_icr_read(E1000ECore *core, int index)
>  e1000e_clear_ims_bits(core, core->mac[IAM]);
>  }
>
> +/*
> + * PCIe* GbE Controllers Open Source Software Developer's Manual
> + * 13.3.27 Interrupt Cause Read Register
> + */
> +if ((core->mac[ICR] & E1000_ICR_ASSERTED) &&
> +(core->mac[ICR] & core->mac[IMS])) {
> +trace_e1000e_irq_icr_clear_icr_bit_ims(core->mac[ICR], 
> core->mac[IMS]);
> +core->mac[ICR] = 0;
> +}
> +
>  trace_e1000e_irq_icr_read_exit(core->mac[ICR]);
>  e1000e_update_interrupt_state(core);
>  return ret;

Changes like this have historically been problematic. I am curious
what testing had been done on this and with what drivers? Keep in mind
that we have to support several flavors of BSD, Windows, and Linux
with this.

> diff --git a/hw/net/trace-events b/hw/net/trace-events
> index 5db45456d9..2c3521a19c 100644
> --- a/hw/net/trace-events
> +++ b/hw/net/trace-events
> @@ -237,6 +237,7 @@ e1000e_irq_icr_read_entry(uint32_t icr) "Starting ICR 
> read. Current ICR: 0x%x"
>  e1000e_irq_icr_read_exit(uint32_t icr) "Ending ICR read. Current ICR: 0x%x"
>  e1000e_irq_icr_clear_zero_ims(void) "Clearing ICR on read due to zero IMS"
>  e1000e_irq_icr_clear_iame(void) "Clearing ICR on read due to IAME"
> +e1000e_irq_icr_clear_icr_bit_ims(uint32_t icr, uint32_t ims) "Clearing ICR 
> on read due corresponding IMS bit: 0x%x & 0x%x"
>  e1000e_irq_iam_clear_eiame(uint32_t iam, uint32_t cause) "Clearing IMS due 
> to EIAME, IAM: 0x%X, cause: 0x%X"
>  e1000e_irq_icr_clear_eiac(uint32_t icr, uint32_t eiac) "Clearing ICR bits 
> due to EIAC, ICR: 0x%X, EIAC: 0x%X"
>  e1000e_irq_ims_clear_set_imc(uint32_t val) "Clearing IMS bits due to IMC 
> write 0x%x"
> --
> 2.29.2
>

Re: [PATCH 00/18] qapi/qom: QAPIfy object-add

2020-12-03 Thread Eduardo Habkost

On Thu, Dec 03, 2020 at 05:50:46PM +0100, Paolo Bonzini wrote:
> On 03/12/20 16:15, Kevin Wolf wrote:
> > I don't think this is an intermediate state like Eduardo wants to have.
> > Creating the object, then setting properties, then realize [1] will fail
> > after your change. But keeping it working was the whole point of the
> > exercise.
> 
> With the sample code, you must remove object_class_property_set calls at the

Do you mean object_property_set()?

> same time as you remove the setters.  Usually that'd be when you convert to
> QAPI and oc->configure, but it doesn't have to be that way if there are good
> reasons not to do so.

Having two (or more) similar but incompatible APIs to do exactly
the same thing is a mistake we did before, and I wouldn't like us
to repeat it.

If we can keep qdev_new() + object_property_set() + realize
working after the device is converted, we should.  I believe we
can.

If we can make object_new_configure() work with all (or most)
device types before we manually convert them to the new system,
we should.  I believe we can.

We may be able avoid these questions with -object because
converting all backends at the same time is doable.  With
devices, API usability and maintainability during the transition
period (which could be very long) needs to be taken into account.

-- 
Eduardo

Re: [PATCH 00/18] qapi/qom: QAPIfy object-add

Am 03.12.2020 um 17:50 hat Paolo Bonzini geschrieben:
> On 03/12/20 16:15, Kevin Wolf wrote:
> > I don't think this is an intermediate state like Eduardo wants to have.
> > Creating the object, then setting properties, then realize [1] will fail
> > after your change. But keeping it working was the whole point of the
> > exercise.
> 
> With the sample code, you must remove object_class_property_set calls at the
> same time as you remove the setters.  Usually that'd be when you convert to
> QAPI and oc->configure, but it doesn't have to be that way if there are good
> reasons not to do so.

Okay, thanks, I think I understand now.

So I assume that in the common case, we'll never have the state that you
describe, but we'll want to directly skip to QAPI generated code. But
it's good to know that we can make smaller steps if we need to in more
complicated cases.

> Also, it still allows you to do so one class at a time, and I *think* the
> presence of subclasses or superclasses doesn't matter (only whether
> properties are still writable).  We can use chardevs (see ChardevCommon in
> qapi/char.json) to validate that before tackling devices.

Yes, it looks like it should be working.

> (In fact, this means that your series---plus -object and object_add
> conversion---would be good, pretty much unchanged, as a first step.  The
> second would be adding oc->configure and object_configure, and converting
> all user-creatable objects to oc->configure.  The third would involve QAPI
> code generation).

I think I'd want to do step 2 and 3 combined, because converting
user-creatable objects to oc->configure means manually writing the
configure function that will be generated from QAPI in step 3. Writing
code just to throw it away isn't my favourite pastime.

> > I'm also not really sure why you go from RngEgdOptions to QObject to a
> > visitor, only to reconstruct RngEgdOptions at the end.
> 
> The two visits are just because you cannot create an input visitor directly
> on C data. I stole that from your patch 18/18 actually, just with
> object_new+object_configure instead of user_creatable_add_type.
> 
> But I wouldn't read too much in the automatically-generated *_new functions
> since they are already in QAPI code generator territory. Instead the basic
> object_configure idea can be applied even without having automatic code
> generation.

Yes, I was just wondering why we're going through visitors at all. But
this is what provides the compatibility with the old property system, so
it makes sense if you need an intermediate step.

> > I think the class
> > implementations should have a normal C interface without visitors and we
> > should be able to just pass the existing RngEgdOptions object (or the
> > individual values for its fields for 'boxed': false).
> 
> Sure, however that requires changes to the QAPI code generator which was
> only item (3) in your list list.  Until then you can already work with a
> visitor interface:
> 
>   void rng_egd_configure(Object *obj, Visitor *v, Error **errp)
>   {
>   RngEgd *s = RNG_EGD(obj);
>   s->config = g_new0(MemoryBackendOptions, 1);
>   visit_type_MemoryBackendOptions(v, NULL, >config, errp);
> 
>   s->config->share = (s->config->has_share
>   ? s->config->share : false);
>   ...
>   }
> 
> but if you had a QAPI description
> 
>   { 'object': 'RngEgd',
> 'qom-type': 'rng-egd',
> 'configuration': 'RngEgdOptions',
> 'boxed': true
>   }
> 
> the QAPI generator could produce the oc->configure implementation. Similar
> to commands, that implementation would be an unmarshaling wrapper that calls
> out to the natural C interface:
> 
>   void qapi_RngEgd_configure(Object *obj, Visitor *v, Error **errp);
>   {
>   Error *local_err = NULL;
>   g_autoptr(MemoryBackendOptions) *config =
>   g_new0(MemoryBackendOptions, 1);
>   visit_type_MemoryBackendOptions(v, NULL, >config, _err);
>   if (local_err) {
>   error_propagate(errp, local_err);
>   return;
>   }
>   qom_rng_egd_configure(RNG_EGD(obj), config, errp);
>   }
> 
>   void qom_rng_egd_configure(RngEng *s,
>  RngEgdOptions *config,
>  Error **errp)
>   {
>   config->share = (config->has_share
>? config->share : false);
>   ...
>   s->config = QAPI_CLONE(RngEgdOptions, config);
>   }

Yes, exactly.

Kevin

Re: [PATCH-for-5.2? 1/1] Acceptance tests: bump Fedora to 32

2020-12-03 Thread Daniel P . Berrangé

On Thu, Dec 03, 2020 at 12:29:59PM -0500, Cleber Rosa wrote:
> On Thu, Dec 03, 2020 at 05:02:33PM +, Daniel P. Berrangé wrote:
> > I think the problem with the Fedora acceptance is that we'll be constantly
> > chasing a moving target. Every URL we pick will go away 6-12 months later.
> > IOW, while the acceptance test pass today, in 6 months time they'll be
> > failing.  IOW,  switching to F32 doesn't solve the root cause, it just
> > pushs the problem down the road for 6 months until F32 is EOL and hits
> > the same URL change problem.
> >
> 
> Just FIY, the tests will not FAIL when the images are removed from the
> official locations.  This is what happens Today:
> 
>JOB ID : e85527a9d75023070f15b833eac0f91f803afc83
>JOB LOG: 
> /home/cleber/avocado/job-results/job-2020-12-03T12.21-e85527a/job.log
> (1/1) tests/acceptance/boot_linux.py:BootLinuxX8664.test_pc_q35_kvm: 
> CANCEL: Failed to download/prepare boot image (0.33 s)
>RESULTS: PASS 0 | ERROR 0 | FAIL 0 | SKIP 0 | WARN 0 | INTERRUPT 0 | 
> CANCEL 1
>JOB HTML   : 
> /home/cleber/avocado/job-results/job-2020-12-03T12.21-e85527a/results.html
>JOB TIME   : 0.76 s
> 
> And *normally*, we'd have 12+ months between updates, that is from
> Fedora 31 -> 33, 33 -> 35, etc.
> 
> > One way to avoid this is to *not* actually  test a current Fedora.
> > Instead intentionally point at an EOL Fedora release whose URL has
> > already moved to the archive site which is long term stable.
> >
> 
> So the tradeoff is, a patch every 6 or 12 months, versus using a more
> modern guest.  With other tests, such as virtiofs_submounts.py,
> already depending on the same decision (to avoid multiple guest images
> downloaded), I think this tradeoff decision needs more visibility.
> 
> IMO, the cost of such a simple patch every 6 or 12 months is very low
> provided we'll benefit from the newer guests.

I don't think changing the OS version typically changes the level of
coverage in aggregate.  The new OS may exercise new code paths, but
it will stop exercising old code paths, so most of the time it'll
be net-zero.  The ideal would be to test a representative selection
of both old and new versions but capacity limits.

The only time there's probably a notable difference is if we need to
access to a new type of device that the old OS doesn't have, but
that's relatively rare.



Regards,
Daniel
-- 
|: https://berrange.com  -o-https://www.flickr.com/photos/dberrange :|
|: https://libvirt.org -o-https://fstop138.berrange.com :|
|: https://entangle-photo.org-o-https://www.instagram.com/dberrange :|

Re: [PATCH] tests/acceptance: test hot(un)plug of ccw devices

2020-12-03 Thread Cornelia Huck

On Thu, 3 Dec 2020 18:22:35 +0100
Thomas Huth  wrote:

> On 03/12/2020 16.39, Cornelia Huck wrote:
> > Hotplug a virtio-net-ccw device, and then hotunplug it again.  
> 
> Good idea! ... is it also possible with a pci device?

It should be, I guess; it's not part of my normal test workflow,
however.

> 
> > Signed-off-by: Cornelia Huck 
> > ---
> > 
> > This is on top of "tests/acceptance: enhance s390x devices test"
> > 
> > ---
> >  tests/acceptance/machine_s390_ccw_virtio.py | 14 ++
> >  1 file changed, 14 insertions(+)
> > 
> > diff --git a/tests/acceptance/machine_s390_ccw_virtio.py 
> > b/tests/acceptance/machine_s390_ccw_virtio.py
> > index 53b8484f8f9c..487c25c31d3c 100644
> > --- a/tests/acceptance/machine_s390_ccw_virtio.py
> > +++ b/tests/acceptance/machine_s390_ccw_virtio.py
> > @@ -97,3 +97,17 @@ class S390CCWVirtioMachine(Test):
> >  exec_command_and_wait_for_pattern(self,
> >'cat 
> > /sys/bus/pci/devices/000a\:00\:00.0/function_id',
> >'0x000c')
> > +# add another device
> > +self.vm.command('device_add', driver='virtio-net-ccw',
> > +devno='fe.0.4711', id='xxx')  
> 
> Could we use a different id, please? xxx sounds so ... well, use your
> imagination.

It is taken straight from my usual testing workflow :) But yeah, I can
call this net_4711 or so.

> 
> > +exec_command_and_wait_for_pattern(self, 'dmesg | tail -n 1', 
> > 'CRW')  
> 
> That looks like it could be a little bit racy ... what if the kernel outputs
> another log message by chance, so that tail -n 1 reports that instead.
> 
> I think it would be better to clear the dmesg log ("dmesg -c") before
> plugging, and then look at all the new output of "dmesg" without using
> "tail" afterwards.

Yes, good idea. Would need to do the same dance below as well (we get a
new crw).

> 
> > +exec_command_and_wait_for_pattern(self, 'ls /sys/bus/ccw/devices/',
> > +  '0.0.4711')
> > +# and detach it again
> > +self.vm.command('device_del', id='xxx')
> > +self.vm.event_wait(name='DEVICE_DELETED',
> > +   match={'data': {'device': 'xxx'}})
> > +exec_command_and_wait_for_pattern(self, 'dmesg | tail -n 1', 
> > 'CRW')  
> 
> dito
> 
> > +exec_command_and_wait_for_pattern(self,
> > +  'ls 
> > /sys/bus/ccw/devices/0.0.4711',
> > +  'No such file or directory')
> >   
> 
>  Thomas
> 
> 
> PS: Another idea for a test: Looks like that initrd also has a
> virtio-balloon driver ... we could maybe start with "-device
> virito-balloon", then change the size of the balloon and check whether the
> MemTotal in /proc/meminfo changed...

Added to my to-test list.

Thanks!

Re: [PATCH v2 0/4] Use lock guard macros in block

Am 03.12.2020 um 08:50 hat Gan Qixin geschrieben:
> v1->v2:
> 
> -Patch1:
> Add Paolo Bonzini reviewed tag and delete the .c suffix in the commit
> message.
> 
> -Patch2:
> Add Paolo Bonzini reviewed tag and delete the .c suffix in the commit
> message.
> 
> -Patch3:
> Delete the .c suffix in the commit.
> Changes suggested by Kevin Wolf: Fix wrong indentation format.
> 
> -Patch4:
> Delete the .c suffix in the commit.
> Changes suggested by Kevin Wolf: Replace QEMU_LOCK_GUARD with
> WITH_QEMU_LOCK_GUARD, and delete the redundant qemu_mutex_unlock().

Thanks, applied to the block branch.

Kevin

Re: [PATCH-for-5.2? 1/1] Acceptance tests: bump Fedora to 32

2020-12-03 Thread Cleber Rosa

On Thu, Dec 03, 2020 at 05:02:33PM +, Daniel P. Berrangé wrote:
> I think the problem with the Fedora acceptance is that we'll be constantly
> chasing a moving target. Every URL we pick will go away 6-12 months later.
> IOW, while the acceptance test pass today, in 6 months time they'll be
> failing.  IOW,  switching to F32 doesn't solve the root cause, it just
> pushs the problem down the road for 6 months until F32 is EOL and hits
> the same URL change problem.
>

Just FIY, the tests will not FAIL when the images are removed from the
official locations.  This is what happens Today:

   JOB ID : e85527a9d75023070f15b833eac0f91f803afc83
   JOB LOG: 
/home/cleber/avocado/job-results/job-2020-12-03T12.21-e85527a/job.log
(1/1) tests/acceptance/boot_linux.py:BootLinuxX8664.test_pc_q35_kvm: 
CANCEL: Failed to download/prepare boot image (0.33 s)
   RESULTS: PASS 0 | ERROR 0 | FAIL 0 | SKIP 0 | WARN 0 | INTERRUPT 0 | 
CANCEL 1
   JOB HTML   : 
/home/cleber/avocado/job-results/job-2020-12-03T12.21-e85527a/results.html
   JOB TIME   : 0.76 s

And *normally*, we'd have 12+ months between updates, that is from
Fedora 31 -> 33, 33 -> 35, etc.

> One way to avoid this is to *not* actually  test a current Fedora.
> Instead intentionally point at an EOL Fedora release whose URL has
> already moved to the archive site which is long term stable.
>

So the tradeoff is, a patch every 6 or 12 months, versus using a more
modern guest.  With other tests, such as virtiofs_submounts.py,
already depending on the same decision (to avoid multiple guest images
downloaded), I think this tradeoff decision needs more visibility.

IMO, the cost of such a simple patch every 6 or 12 months is very low
provided we'll benefit from the newer guests.

Cheers,
- Cleber.

> Regards,
> Daniel
> -- 
> |: https://berrange.com  -o-https://www.flickr.com/photos/dberrange :|
> |: https://libvirt.org -o-https://fstop138.berrange.com :|
> |: https://entangle-photo.org-o-https://www.instagram.com/dberrange :|
> 
> 


signature.asc
Description: PGP signature

[PATCH 3/3] block: Fix deadlock in bdrv_co_yield_to_drain()

If bdrv_co_yield_to_drain() is called for draining a block node that
runs in a different AioContext, it keeps that AioContext locked while it
yields and schedules a BH in the AioContext to do the actual drain.

As long as executing the BH is the very next thing the event loop of the
node's AioContext, this actually happens to work, but when it tries to
execute something else that wants to take the AioContext lock, it will
deadlock. (In the bug report, this other thing is a virtio-scsi device
running virtio_scsi_data_plane_handle_cmd().)

Instead, always drop the AioContext lock across the yield and reacquire
it only when the coroutine is reentered. The BH needs to unconditionally
take the lock for itself now.

This fixes the 'block_resize' QMP command on a block node that runs in
an iothread.

Cc: qemu-sta...@nongnu.org
Fixes: eb94b81a94bce112e6b206df846c1551aaf6cab6
Fixes: https://bugzilla.redhat.com/show_bug.cgi?id=1903511
Signed-off-by: Kevin Wolf 
---
 block/io.c | 41 -
 1 file changed, 24 insertions(+), 17 deletions(-)

diff --git a/block/io.c b/block/io.c
index ec5e152bb7..a9f56a9ab1 100644
--- a/block/io.c
+++ b/block/io.c
@@ -306,17 +306,7 @@ static void bdrv_co_drain_bh_cb(void *opaque)
 
 if (bs) {
 AioContext *ctx = bdrv_get_aio_context(bs);
-AioContext *co_ctx = qemu_coroutine_get_aio_context(co);
-
-/*
- * When the coroutine yielded, the lock for its home context was
- * released, so we need to re-acquire it here. If it explicitly
- * acquired a different context, the lock is still held and we don't
- * want to lock it a second time (or AIO_WAIT_WHILE() would hang).
- */
-if (ctx == co_ctx) {
-aio_context_acquire(ctx);
-}
+aio_context_acquire(ctx);
 bdrv_dec_in_flight(bs);
 if (data->begin) {
 assert(!data->drained_end_counter);
@@ -328,9 +318,7 @@ static void bdrv_co_drain_bh_cb(void *opaque)
 data->ignore_bds_parents,
 data->drained_end_counter);
 }
-if (ctx == co_ctx) {
-aio_context_release(ctx);
-}
+aio_context_release(ctx);
 } else {
 assert(data->begin);
 bdrv_drain_all_begin();
@@ -348,13 +336,16 @@ static void coroutine_fn 
bdrv_co_yield_to_drain(BlockDriverState *bs,
 int *drained_end_counter)
 {
 BdrvCoDrainData data;
+Coroutine *self = qemu_coroutine_self();
+AioContext *ctx = bdrv_get_aio_context(bs);
+AioContext *co_ctx = qemu_coroutine_get_aio_context(self);
 
 /* Calling bdrv_drain() from a BH ensures the current coroutine yields and
  * other coroutines run if they were queued by aio_co_enter(). */
 
 assert(qemu_in_coroutine());
 data = (BdrvCoDrainData) {
-.co = qemu_coroutine_self(),
+.co = self,
 .bs = bs,
 .done = false,
 .begin = begin,
@@ -368,13 +359,29 @@ static void coroutine_fn 
bdrv_co_yield_to_drain(BlockDriverState *bs,
 if (bs) {
 bdrv_inc_in_flight(bs);
 }
-replay_bh_schedule_oneshot_event(bdrv_get_aio_context(bs),
- bdrv_co_drain_bh_cb, );
+
+/*
+ * Temporarily drop the lock across yield or we would get deadlocks.
+ * bdrv_co_drain_bh_cb() reaquires the lock as needed.
+ *
+ * When we yield below, the lock for the current context will be
+ * released, so if this is actually the lock that protects bs, don't drop
+ * it a second time.
+ */
+if (ctx != co_ctx) {
+aio_context_release(ctx);
+}
+replay_bh_schedule_oneshot_event(ctx, bdrv_co_drain_bh_cb, );
 
 qemu_coroutine_yield();
 /* If we are resumed from some other event (such as an aio completion or a
  * timer callback), it is a bug in the caller that should be fixed. */
 assert(data.done);
+
+/* Reaquire the AioContext of bs if we dropped it */
+if (ctx != co_ctx) {
+aio_context_acquire(ctx);
+}
 }
 
 void bdrv_do_drained_begin_quiesce(BlockDriverState *bs,
-- 
2.28.0

Re: [PATCH] tests/acceptance: test hot(un)plug of ccw devices

2020-12-03 Thread Thomas Huth

On 03/12/2020 16.39, Cornelia Huck wrote:
> Hotplug a virtio-net-ccw device, and then hotunplug it again.

Good idea! ... is it also possible with a pci device?

> Signed-off-by: Cornelia Huck 
> ---
> 
> This is on top of "tests/acceptance: enhance s390x devices test"
> 
> ---
>  tests/acceptance/machine_s390_ccw_virtio.py | 14 ++
>  1 file changed, 14 insertions(+)
> 
> diff --git a/tests/acceptance/machine_s390_ccw_virtio.py 
> b/tests/acceptance/machine_s390_ccw_virtio.py
> index 53b8484f8f9c..487c25c31d3c 100644
> --- a/tests/acceptance/machine_s390_ccw_virtio.py
> +++ b/tests/acceptance/machine_s390_ccw_virtio.py
> @@ -97,3 +97,17 @@ class S390CCWVirtioMachine(Test):
>  exec_command_and_wait_for_pattern(self,
>'cat 
> /sys/bus/pci/devices/000a\:00\:00.0/function_id',
>'0x000c')
> +# add another device
> +self.vm.command('device_add', driver='virtio-net-ccw',
> +devno='fe.0.4711', id='xxx')

Could we use a different id, please? xxx sounds so ... well, use your
imagination.

> +exec_command_and_wait_for_pattern(self, 'dmesg | tail -n 1', 'CRW')

That looks like it could be a little bit racy ... what if the kernel outputs
another log message by chance, so that tail -n 1 reports that instead.

I think it would be better to clear the dmesg log ("dmesg -c") before
plugging, and then look at all the new output of "dmesg" without using
"tail" afterwards.

> +exec_command_and_wait_for_pattern(self, 'ls /sys/bus/ccw/devices/',
> +  '0.0.4711')
> +# and detach it again
> +self.vm.command('device_del', id='xxx')
> +self.vm.event_wait(name='DEVICE_DELETED',
> +   match={'data': {'device': 'xxx'}})
> +exec_command_and_wait_for_pattern(self, 'dmesg | tail -n 1', 'CRW')

dito

> +exec_command_and_wait_for_pattern(self,
> +  'ls /sys/bus/ccw/devices/0.0.4711',
> +  'No such file or directory')
> 

 Thomas

PS: Another idea for a test: Looks like that initrd also has a
virtio-balloon driver ... we could maybe start with "-device
virito-balloon", then change the size of the balloon and check whether the
MemTotal in /proc/meminfo changed...

[PATCH 2/3] block: Fix locking in qmp_block_resize()

The drain functions assume that we hold the AioContext lock of the
drained block node. Make sure to actually take the lock.

Cc: qemu-sta...@nongnu.org
Fixes: eb94b81a94bce112e6b206df846c1551aaf6cab6
Signed-off-by: Kevin Wolf 
---
 blockdev.c | 5 -
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/blockdev.c b/blockdev.c
index 229d2cce1b..0535a8dc9e 100644
--- a/blockdev.c
+++ b/blockdev.c
@@ -2481,13 +2481,16 @@ void coroutine_fn qmp_block_resize(bool has_device, 
const char *device,
 return;
 }
 
+bdrv_co_lock(bs);
 bdrv_drained_begin(bs);
+bdrv_co_unlock(bs);
+
 old_ctx = bdrv_co_enter(bs);
 blk_truncate(blk, size, false, PREALLOC_MODE_OFF, 0, errp);
 bdrv_co_leave(bs, old_ctx);
-bdrv_drained_end(bs);
 
 bdrv_co_lock(bs);
+bdrv_drained_end(bs);
 blk_unref(blk);
 bdrv_co_unlock(bs);
 }
-- 
2.28.0

[PATCH 0/3] block: Fix block_resize deadlock with iothreads

The conversion of qmp_block_resize() to coroutines exposed a preexisting
locking bug in the drain implementation that can cause deadlocks.

As it happens, fixing this bug reveals in turn that the locking in
qmp_block_resize() itself is incomplete, too.

Kevin Wolf (3):
  block: Simplify qmp_block_resize() error paths
  block: Fix locking in qmp_block_resize()
  block: Fix deadlock in bdrv_co_yield_to_drain()

 block/io.c | 41 -
 blockdev.c | 12 +++-
 2 files changed, 31 insertions(+), 22 deletions(-)

-- 
2.28.0

[PATCH v2 2/2] Implement support for precise TSC migration

2020-12-03 Thread Maxim Levitsky

To enable it, you need to set -accel kvm,x-precise-tsc=on,
and have a kernel that supports this feature.

Signed-off-by: Maxim Levitsky 
---
 accel/kvm/kvm-all.c   |  28 +
 include/sysemu/kvm.h  |   1 +
 target/i386/cpu.h |   1 +
 target/i386/kvm.c | 140 +-
 target/i386/machine.c |  19 ++
 5 files changed, 161 insertions(+), 28 deletions(-)

diff --git a/accel/kvm/kvm-all.c b/accel/kvm/kvm-all.c
index baaa54249d..3829f2e7a3 100644
--- a/accel/kvm/kvm-all.c
+++ b/accel/kvm/kvm-all.c
@@ -104,6 +104,8 @@ struct KVMState
 OnOffAuto kernel_irqchip_split;
 bool sync_mmu;
 uint64_t manual_dirty_log_protect;
+/* Use KVM_GET_TSC_PRECISE/KVM_SET_TSC_PRECISE to access IA32_TSC */
+bool precise_tsc;
 /* The man page (and posix) say ioctl numbers are signed int, but
  * they're not.  Linux, glibc and *BSD all treat ioctl numbers as
  * unsigned, and treating them as signed here can break things */
@@ -3194,6 +3196,24 @@ bool kvm_kernel_irqchip_split(void)
 return kvm_state->kernel_irqchip_split == ON_OFF_AUTO_ON;
 }
 
+bool kvm_has_precise_tsc(void)
+{
+return kvm_state && kvm_state->precise_tsc;
+}
+
+static void kvm_set_precise_tsc(Object *obj,
+bool value, Error **errp G_GNUC_UNUSED)
+{
+KVMState *s = KVM_STATE(obj);
+s->precise_tsc = value;
+}
+
+static bool kvm_get_precise_tsc(Object *obj, Error **errp G_GNUC_UNUSED)
+{
+KVMState *s = KVM_STATE(obj);
+return s->precise_tsc;
+}
+
 static void kvm_accel_instance_init(Object *obj)
 {
 KVMState *s = KVM_STATE(obj);
@@ -3222,6 +3242,14 @@ static void kvm_accel_class_init(ObjectClass *oc, void 
*data)
 NULL, NULL);
 object_class_property_set_description(oc, "kvm-shadow-mem",
 "KVM shadow MMU size");
+
+object_class_property_add_bool(oc, "x-precise-tsc",
+   kvm_get_precise_tsc,
+   kvm_set_precise_tsc);
+
+object_class_property_set_description(oc, "x-precise-tsc",
+  "Use precise tsc kvm API");
+
 }
 
 static const TypeInfo kvm_accel_type = {
diff --git a/include/sysemu/kvm.h b/include/sysemu/kvm.h
index bb5d5cf497..14eff2b1c9 100644
--- a/include/sysemu/kvm.h
+++ b/include/sysemu/kvm.h
@@ -519,6 +519,7 @@ void kvm_init_irq_routing(KVMState *s);
 bool kvm_kernel_irqchip_allowed(void);
 bool kvm_kernel_irqchip_required(void);
 bool kvm_kernel_irqchip_split(void);
+bool kvm_has_precise_tsc(void);
 
 /**
  * kvm_arch_irqchip_create:
diff --git a/target/i386/cpu.h b/target/i386/cpu.h
index 88e8586f8f..d2230d9735 100644
--- a/target/i386/cpu.h
+++ b/target/i386/cpu.h
@@ -1460,6 +1460,7 @@ typedef struct CPUX86State {
 uint64_t tsc_adjust;
 uint64_t tsc_deadline;
 uint64_t tsc_aux;
+uint64_t tsc_ns_timestamp;
 
 uint64_t xcr0;
 
diff --git a/target/i386/kvm.c b/target/i386/kvm.c
index a2934dda02..4adb7d6246 100644
--- a/target/i386/kvm.c
+++ b/target/i386/kvm.c
@@ -121,7 +121,6 @@ static int has_xsave;
 static int has_xcrs;
 static int has_pit_state2;
 static int has_exception_payload;
-
 static bool has_msr_mcg_ext_ctl;
 
 static struct kvm_cpuid2 *cpuid_cache;
@@ -196,31 +195,112 @@ static int kvm_get_tsc(CPUState *cs)
 {
 X86CPU *cpu = X86_CPU(cs);
 CPUX86State *env = >env;
-struct {
-struct kvm_msrs info;
-struct kvm_msr_entry entries[1];
-} msr_data = {};
 int ret;
 
 if (env->tsc_valid) {
 return 0;
 }
 
-memset(_data, 0, sizeof(msr_data));
-msr_data.info.nmsrs = 1;
-msr_data.entries[0].index = MSR_IA32_TSC;
-env->tsc_valid = !runstate_is_running();
+if (kvm_has_precise_tsc()) {
+struct kvm_tsc_state tsc_state;
 
-ret = kvm_vcpu_ioctl(CPU(cpu), KVM_GET_MSRS, _data);
-if (ret < 0) {
-return ret;
+ret = kvm_vcpu_ioctl(CPU(cpu), KVM_GET_TSC_STATE, _state);
+if (ret < 0) {
+return ret;
+}
+
+env->tsc = tsc_state.tsc;
+
+if (tsc_state.flags & KVM_TSC_STATE_TIMESTAMP_VALID) {
+env->tsc_ns_timestamp = tsc_state.nsec;
+}
+
+if (tsc_state.flags & KVM_TSC_STATE_TSC_ADJUST_VALID) {
+env->tsc_adjust = tsc_state.tsc_adjust;
+}
+
+} else {
+struct {
+struct kvm_msrs info;
+struct kvm_msr_entry entries[2];
+} msr_data = {
+.info.nmsrs = 1,
+.entries[0].index = MSR_IA32_TSC,
+};
+
+if (has_msr_tsc_adjust) {
+msr_data.info.nmsrs++;
+msr_data.entries[1].index = MSR_TSC_ADJUST;
+}
+
+ret = kvm_vcpu_ioctl(CPU(cpu), KVM_GET_MSRS, _data);
+if (ret < 0) {
+return ret;
+}
+
+assert(ret == msr_data.info.nmsrs);
+env->tsc = msr_data.entries[0].data;
+if (has_msr_tsc_adjust) {
+env->tsc_adjust = msr_data.entries[1].data;

[PATCH 1/3] block: Simplify qmp_block_resize() error paths

The only thing that happens after the 'out:' label is blk_unref(blk).
However, blk = NULL in all of the error cases, so instead of jumping to
'out:', we can just return directly.

Cc: qemu-sta...@nongnu.org
Signed-off-by: Kevin Wolf 
---
 blockdev.c | 7 +++
 1 file changed, 3 insertions(+), 4 deletions(-)

diff --git a/blockdev.c b/blockdev.c
index fe6fb5dc1d..229d2cce1b 100644
--- a/blockdev.c
+++ b/blockdev.c
@@ -2468,17 +2468,17 @@ void coroutine_fn qmp_block_resize(bool has_device, 
const char *device,
 
 if (size < 0) {
 error_setg(errp, QERR_INVALID_PARAMETER_VALUE, "size", "a >0 size");
-goto out;
+return;
 }
 
 if (bdrv_op_is_blocked(bs, BLOCK_OP_TYPE_RESIZE, NULL)) {
 error_setg(errp, QERR_DEVICE_IN_USE, device);
-goto out;
+return;
 }
 
 blk = blk_new_with_bs(bs, BLK_PERM_RESIZE, BLK_PERM_ALL, errp);
 if (!blk) {
-goto out;
+return;
 }
 
 bdrv_drained_begin(bs);
@@ -2487,7 +2487,6 @@ void coroutine_fn qmp_block_resize(bool has_device, const 
char *device,
 bdrv_co_leave(bs, old_ctx);
 bdrv_drained_end(bs);
 
-out:
 bdrv_co_lock(bs);
 blk_unref(blk);
 bdrv_co_unlock(bs);
-- 
2.28.0

[PATCH v2 1/2] Update the kernel headers for 5.10-rc5 + TSC

2020-12-03 Thread Maxim Levitsky

Signed-off-by: Maxim Levitsky 
---
 include/standard-headers/asm-x86/kvm_para.h |  1 +
 linux-headers/asm-x86/kvm.h |  2 +
 linux-headers/linux/kvm.h   | 71 -
 3 files changed, 73 insertions(+), 1 deletion(-)

diff --git a/include/standard-headers/asm-x86/kvm_para.h 
b/include/standard-headers/asm-x86/kvm_para.h
index 07877d3295..215d01b4ec 100644
--- a/include/standard-headers/asm-x86/kvm_para.h
+++ b/include/standard-headers/asm-x86/kvm_para.h
@@ -32,6 +32,7 @@
 #define KVM_FEATURE_POLL_CONTROL   12
 #define KVM_FEATURE_PV_SCHED_YIELD 13
 #define KVM_FEATURE_ASYNC_PF_INT   14
+#define KVM_FEATURE_MSI_EXT_DEST_ID15
 
 #define KVM_HINTS_REALTIME  0
 
diff --git a/linux-headers/asm-x86/kvm.h b/linux-headers/asm-x86/kvm.h
index 89e5f3d1bb..2a60fc6674 100644
--- a/linux-headers/asm-x86/kvm.h
+++ b/linux-headers/asm-x86/kvm.h
@@ -12,6 +12,7 @@
 
 #define KVM_PIO_PAGE_OFFSET 1
 #define KVM_COALESCED_MMIO_PAGE_OFFSET 2
+#define KVM_DIRTY_LOG_PAGE_OFFSET 64
 
 #define DE_VECTOR 0
 #define DB_VECTOR 1
@@ -403,6 +404,7 @@ struct kvm_sync_regs {
 #define KVM_X86_QUIRK_LAPIC_MMIO_HOLE (1 << 2)
 #define KVM_X86_QUIRK_OUT_7E_INC_RIP  (1 << 3)
 #define KVM_X86_QUIRK_MISC_ENABLE_NO_MWAIT (1 << 4)
+#define KVM_X86_QUIRK_TSC_HOST_ACCESS  (1 << 5)
 
 #define KVM_STATE_NESTED_FORMAT_VMX0
 #define KVM_STATE_NESTED_FORMAT_SVM1
diff --git a/linux-headers/linux/kvm.h b/linux-headers/linux/kvm.h
index 56ce14ad20..9eedc6e835 100644
--- a/linux-headers/linux/kvm.h
+++ b/linux-headers/linux/kvm.h
@@ -250,6 +250,7 @@ struct kvm_hyperv_exit {
 #define KVM_EXIT_ARM_NISV 28
 #define KVM_EXIT_X86_RDMSR29
 #define KVM_EXIT_X86_WRMSR30
+#define KVM_EXIT_DIRTY_RING_FULL  31
 
 /* For KVM_EXIT_INTERNAL_ERROR */
 /* Emulate instruction failed. */
@@ -1053,6 +1054,9 @@ struct kvm_ppc_resize_hpt {
 #define KVM_CAP_X86_USER_SPACE_MSR 188
 #define KVM_CAP_X86_MSR_FILTER 189
 #define KVM_CAP_ENFORCE_PV_FEATURE_CPUID 190
+#define KVM_CAP_SYS_HYPERV_CPUID 191
+#define KVM_CAP_DIRTY_LOG_RING 192
+#define KVM_CAP_PRECISE_TSC 193
 
 #ifdef KVM_CAP_IRQ_ROUTING
 
@@ -1166,6 +1170,16 @@ struct kvm_clock_data {
__u32 pad[9];
 };
 
+
+#define KVM_TSC_STATE_TIMESTAMP_VALID 1
+#define KVM_TSC_STATE_TSC_ADJUST_VALID 2
+struct kvm_tsc_state {
+   __u32 flags;
+   __u64 nsec;
+   __u64 tsc;
+   __u64 tsc_adjust;
+};
+
 /* For KVM_CAP_SW_TLB */
 
 #define KVM_MMU_FSL_BOOKE_NOHV 0
@@ -1511,7 +1525,7 @@ struct kvm_enc_region {
 /* Available with KVM_CAP_MANUAL_DIRTY_LOG_PROTECT_2 */
 #define KVM_CLEAR_DIRTY_LOG  _IOWR(KVMIO, 0xc0, struct 
kvm_clear_dirty_log)
 
-/* Available with KVM_CAP_HYPERV_CPUID */
+/* Available with KVM_CAP_HYPERV_CPUID (vcpu) / KVM_CAP_SYS_HYPERV_CPUID 
(system) */
 #define KVM_GET_SUPPORTED_HV_CPUID _IOWR(KVMIO, 0xc1, struct kvm_cpuid2)
 
 /* Available with KVM_CAP_ARM_SVE */
@@ -1557,6 +1571,13 @@ struct kvm_pv_cmd {
 /* Available with KVM_CAP_X86_MSR_FILTER */
 #define KVM_X86_SET_MSR_FILTER _IOW(KVMIO,  0xc6, struct kvm_msr_filter)
 
+/* Available with KVM_CAP_DIRTY_LOG_RING */
+#define KVM_RESET_DIRTY_RINGS  _IO(KVMIO, 0xc7)
+
+/* Available with KVM_CAP_PRECISE_TSC*/
+#define KVM_SET_TSC_STATE  _IOW(KVMIO,  0xc8, struct kvm_tsc_state)
+#define KVM_GET_TSC_STATE  _IOR(KVMIO,  0xc9, struct kvm_tsc_state)
+
 /* Secure Encrypted Virtualization command */
 enum sev_cmd_id {
/* Guest initialization commands */
@@ -1710,4 +1731,52 @@ struct kvm_hyperv_eventfd {
 #define KVM_DIRTY_LOG_MANUAL_PROTECT_ENABLE(1 << 0)
 #define KVM_DIRTY_LOG_INITIALLY_SET(1 << 1)
 
+/*
+ * Arch needs to define the macro after implementing the dirty ring
+ * feature.  KVM_DIRTY_LOG_PAGE_OFFSET should be defined as the
+ * starting page offset of the dirty ring structures.
+ */
+#ifndef KVM_DIRTY_LOG_PAGE_OFFSET
+#define KVM_DIRTY_LOG_PAGE_OFFSET 0
+#endif
+
+/*
+ * KVM dirty GFN flags, defined as:
+ *
+ * |---+---+--|
+ * | bit 1 (reset) | bit 0 (dirty) | Status   |
+ * |---+---+--|
+ * | 0 | 0 | Invalid GFN  |
+ * | 0 | 1 | Dirty GFN|
+ * | 1 | X | GFN to reset |
+ * |---+---+--|
+ *
+ * Lifecycle of a dirty GFN goes like:
+ *
+ *  dirtied harvestedreset
+ * 00 ---> 01 -> 1X ---+
+ *  ^  |
+ *  |  |
+ *  +--+
+ *
+ * The userspace program is only responsible for the 01->1X state
+ * conversion after harvesting an entry.  Also, it must not skip any
+ * dirty bits, so that dirty bits are always harvested in sequence.
+ */
+#define KVM_DIRTY_GFN_F_DIRTY   BIT(0)
+#define KVM_DIRTY_GFN_F_RESET   BIT(1)

[PATCH v2 0/2] RFC: Precise TSC migration

2020-12-03 Thread Maxim Levitsky

Note that to use this feature you need the kernel patches which are
posted to LKML and k...@vger.kernel.org

The feature is disabled by default, and can be enabled with
-accel kvm,x-precise-tsc=on.

I changed the TSC and TSC adjust read/write code to go though a special
function kvm_get_tsc/kvm_set_tsc regardless of enablement of this feature.

The side effect of this is that now we upload to the kernel the TSC_ADJUST
msr only on KVM_PUT_RESET_STATE reset level.
This shouldn't matter as I don't think that qemu changes this msr on its own.

For migration I added a new state field 'cpu/tsc_ns_timestamp',
where I save the TSC nanosecond timestamp, which is the only
new thing that was added to the migration state.

First patch in this series is temporary and it just updates the kernel
headers to make qemu compile.

When the feature is merged to the kernel, a kernel header sync will bring
the same changes to the qemu, making this patch unnecessary.

V2:

- switched to -accel for enablement
- sync with updated kernel patches
- minor cleanups, renames, etc

Best regards,
Maxim Levitsky

Maxim Levitsky (2):
  Update the kernel headers for 5.10-rc5 + TSC
  Implement support for precise TSC migration

 accel/kvm/kvm-all.c |  28 
 include/standard-headers/asm-x86/kvm_para.h |   1 +
 include/sysemu/kvm.h|   1 +
 linux-headers/asm-x86/kvm.h |   2 +
 linux-headers/linux/kvm.h   |  71 +-
 target/i386/cpu.h   |   1 +
 target/i386/kvm.c   | 140 
 target/i386/machine.c   |  19 +++
 8 files changed, 234 insertions(+), 29 deletions(-)

-- 
2.26.2

Re: [PATCH 4/9] target/mips: Simplify MSA TCG logic

On 12/2/20 12:44 PM, Philippe Mathieu-Daudé wrote:
> Only decode MSA opcodes if MSA is present (implemented).
> 
> Now than check_msa_access() will only be called if MSA is
> present, the only way to have MIPS_HFLAG_MSA unset is if
> MSA is disabled (bit CP0C5_MSAEn cleared, see previous
> commit). Therefore we can remove the 'reserved instruction'
> exception.
> 
> Signed-off-by: Philippe Mathieu-Daudé 
> ---
>  target/mips/translate.c | 22 ++
>  1 file changed, 10 insertions(+), 12 deletions(-)

Reviewed-by: Richard Henderson 

r~

Re: [PATCH 5/9] target/mips: Remove now unused ASE_MSA definition

On 12/2/20 12:44 PM, Philippe Mathieu-Daudé wrote:
> We don't use ASE_MSA anymore (replaced by ase_msa_available()
> checking MSAP bit from CP0_Config3). Remove it.
> 
> Signed-off-by: Philippe Mathieu-Daudé 
> ---
>  target/mips/mips-defs.h  | 1 -
>  target/mips/translate_init.c.inc | 8 
>  2 files changed, 4 insertions(+), 5 deletions(-)

Reviewed-by: Richard Henderson 

r~

Re: [PATCH 1/9] target/mips: Introduce ase_msa_available() helper

On 12/2/20 12:44 PM, Philippe Mathieu-Daudé wrote:
> Instead of accessing CP0_Config3 directly and checking
> the 'MSA Present' bit, introduce an explicit helper,
> making the code easier to read.
> 
> Signed-off-by: Philippe Mathieu-Daudé 
> ---
>  target/mips/internal.h  |  6 ++
>  target/mips/kvm.c   | 12 ++--
>  target/mips/translate.c |  8 +++-
>  3 files changed, 15 insertions(+), 11 deletions(-)

Reviewed-by: Richard Henderson 

r~

Re: [PATCH 3/9] target/mips: Use CP0_Config3 to set MIPS_HFLAG_MSA

On 12/2/20 12:44 PM, Philippe Mathieu-Daudé wrote:
> MSA presence is expressed by the MSAP bit of CP0_Config3.
> We don't need to check anything else.
> 
> Signed-off-by: Philippe Mathieu-Daudé 
> ---
>  target/mips/internal.h | 2 +-
>  1 file changed, 1 insertion(+), 1 deletion(-)

Reviewed-by: Richard Henderson 

r~

[Bug 1906694] Re: Assertion Failure in bdrv_co_write_req_prepare through megasas

2020-12-03 Thread Alexander Bulekov

*** This bug is a duplicate of bug 1906693 ***
https://bugs.launchpad.net/bugs/1906693

** This bug has been marked a duplicate of bug 1906693
   Assertion Failure in bdrv_co_write_req_prepare through megasas

-- 
You received this bug notification because you are a member of qemu-
devel-ml, which is subscribed to QEMU.
https://bugs.launchpad.net/bugs/1906694

Title:
  Assertion Failure in bdrv_co_write_req_prepare through megasas

Status in QEMU:
  New

Bug description:
   affects qemu
   subscribe phi...@redhat.com
   subscribe kw...@redhat.com

  === Stack Trace ===
  qemu-fuzz-i386: block/io.c:1835: int bdrv_co_write_req_prepare(BdrvChild *, 
int64_t, uint64_t, BdrvTrackedRequest *, int): Assertion `child->perm & 
BLK_PERM_WRITE' failed.
  ==1505128== ERROR: libFuzzer: deadly signal
  #0 0x55a083b92cee in __sanitizer_print_stack_trace 
(qemu-fuzz-i386+0x793cee)
  #1 0x55a083b6c1d1 in fuzzer::PrintStackTrace() (qemu-fuzz-i386+0x76d1d1)
  #2 0x55a083b4f0d6 in fuzzer::Fuzzer::CrashCallback() (.part.0) 
(qemu-fuzz-i386+0x7500d6)
  #3 0x55a083b4f19b in fuzzer::Fuzzer::StaticCrashSignalCallback() 
(qemu-fuzz-i386+0x75019b)
  #4 0x7f8d24ed6a8f  (/lib64/libpthread.so.0+0x14a8f)
  #5 0x7f8d24d079e4 in raise (/lib64/libc.so.6+0x3c9e4)
  #6 0x7f8d24cf0894 in abort (/lib64/libc.so.6+0x25894)
  #7 0x7f8d24cf0768 in __assert_fail_base.cold (/lib64/libc.so.6+0x25768)
  #8 0x7f8d24cffe75 in __assert_fail (/lib64/libc.so.6+0x34e75)
  #9 0x55a08423763f in bdrv_co_write_req_prepare block/io.c:1835:13
  #10 0x55a0842343a8 in bdrv_aligned_pwritev block/io.c:1915:11
  #11 0x55a084233765 in bdrv_co_pwritev_part block/io.c:2104:11
  #12 0x55a084260d1a in blk_do_pwritev_part block/block-backend.c:1260:11
  #13 0x55a08426163e in blk_aio_write_entry block/block-backend.c:1476:17
  #14 0x55a0843b0d23 in coroutine_trampoline util/coroutine-ucontext.c:173:9
  #15 0x7f8d24d1d22f  (/lib64/libc.so.6+0x5222f)

  === Reproducer===
  cat << EOF | ./qemu-system-i386 -M q35 \
  -device megasas-gen2 -device scsi-cd,drive=null0 \
  -blockdev driver=null-co,read-zeroes=on,node-name=null0 \
  -monitor none -serial none -display none \
  -machine accel=qtest -m 64 -qtest stdio
  outl 0xcf8 0x80001804
  outl 0xcfc 0xff
  outl 0xcf8 0x8000181b
  outl 0xcfc 0x7052005
  write 0x5cc0 0x1 0x03
  write 0x5cc7 0x1 0x40
  write 0x5ce0 0x1 0x0a
  write 0x5cf3 0x1 0x01
  write 0x5cf7 0x1 0x40
  write 0x5cf8 0x1 0x0a
  write 0x5cff 0x1 0x05
  write 0x5d03 0x1 0x5b
  write 0x5d06 0x1 0x4f
  write 0x5d0b 0x1 0x01
  write 0x5d0f 0x1 0x40
  write 0x5d10 0x1 0x0a
  write 0x5d17 0x1 0x05
  write 0x5d1b 0x1 0x5b
  write 0x5d1e 0x1 0x4f
  write 0x5d23 0x1 0x01
  write 0x5d27 0x1 0x40
  write 0x5d28 0x1 0x0a
  write 0x5d2f 0x1 0x05
  write 0x5d33 0x1 0x5b
  write 0x5d36 0x1 0x4f
  write 0x5d3b 0x1 0x01
  write 0x5d3f 0x1 0x40
  write 0x5d40 0x1 0x0a
  write 0x5d47 0x1 0x05
  write 0x5d4b 0x1 0x5b
  write 0x5d4e 0x1 0x4f
  write 0x5d53 0x1 0x01
  write 0x5d57 0x1 0x40
  write 0x5d58 0x1 0x0a
  write 0x5d5f 0x1 0x05
  write 0x5d63 0x1 0x5b
  write 0x5d66 0x1 0x4f
  write 0x5d6b 0x1 0x01
  write 0x5d6f 0x1 0x40
  write 0x5d70 0x1 0x0a
  write 0x5d77 0x1 0x05
  write 0x5d7b 0x1 0x5b
  write 0x5d7e 0x1 0x4f
  write 0x5d83 0x1 0x01
  write 0x5d87 0x1 0x40
  write 0x5d88 0x1 0x0a
  write 0x5d8f 0x1 0x05
  write 0x5d93 0x1 0x5b
  write 0x5d96 0x1 0x4f
  write 0x5d9b 0x1 0x01
  write 0x5d9f 0x1 0x40
  write 0x5da0 0x1 0x0a
  write 0x5da7 0x1 0x05
  write 0x5dab 0x1 0x5b
  write 0x5dae 0x1 0x4f
  write 0x5db3 0x1 0x01
  write 0x5db7 0x1 0x40
  write 0x5db8 0x1 0x0a
  write 0x5dbf 0x1 0x05
  write 0x5dc3 0x1 0x5b
  write 0x5dc6 0x1 0x4f
  write 0x5dcb 0x1 0x01
  write 0x5dcf 0x1 0x40
  write 0x5dd0 0x1 0x0a
  write 0x5dd7 0x1 0x05
  write 0x5ddb 0x1 0x5b
  write 0x5dde 0x1 0x4f
  write 0x5de3 0x1 0x01
  write 0x5de7 0x1 0x40
  write 0x5de8 0x1 0x0a
  write 0x5def 0x1 0x05
  write 0x5df3 0x1 0x5b
  write 0x5df6 0x1 0x4f
  write 0x5dfb 0x1 0x01
  write 0x5dff 0x1 0x40
  write 0x5e00 0x1 0x0a
  write 0x5e07 0x1 0x05
  write 0x5e0b 0x1 0x5b
  write 0x5e0e 0x1 0x4f
  write 0x5e13 0x1 0x01
  write 0x5e17 0x1 0x40
  write 0x5e18 0x1 0x0a
  write 0x5e1f 0x1 0x05
  write 0x5e23 0x1 0x5b
  write 0x5e26 0x1 0x4f
  write 0x5e2b 0x1 0x01
  write 0x5e2f 0x1 0x40
  write 0x5e30 0x1 0x0a
  write 0x5e37 0x1 0x05
  write 0x5e3b 0x1 0x5b
  write 0x5e3e 0x1 0x4f
  write 0x5e43 0x1 0x01
  write 0x5e47 0x1 0x40
  write 0x5e48 0x1 0x0a
  write 0x5e4f 0x1 0x05
  write 0x5e53 0x1 0x5b
  write 0x5e56 0x1 0x4f
  write 0x5e5b 0x1 0x01
  write 0x5e5f 0x1 0x40
  write 0x5e60 0x1 0x0a
  write 0x5e67 0x1 0x05
  write 0x5e6b 0x1 0x5b
  write 0x5e6e 0x1 0x4f
  write 0x5e73 0x1 0x01
  write 0x5e77 0x1 0x40
  write 0x5e78 0x1 0x0a
  write 0x5e7f 0x1 0x05
  write 0x5e83 0x1 0x5b
  write 0x5e86 0x1 0x4f
  write 0x5e8b 0x1 0x01
  write 0x5e8f 0x1 0x40
  write 0x5e90 0x1 0x0a
  write 0x5e97 0x1 0x05
  write 0x5e9b 0x1 0x5b
  write 0x5e9e 0x1

Re: [PATCH v6 1/2] arm64: kvm: Save/restore MTE registers

2020-12-03 Thread Marc Zyngier




diff --git a/arch/arm64/include/asm/sysreg.h 
b/arch/arm64/include/asm/sysreg.h

index e2ef4c2edf06..b6668ffa04d9 100644
--- a/arch/arm64/include/asm/sysreg.h
+++ b/arch/arm64/include/asm/sysreg.h
@@ -569,7 +569,8 @@
 #define SCTLR_ELx_M(BIT(0))

 #define SCTLR_ELx_FLAGS(SCTLR_ELx_M  | SCTLR_ELx_A | SCTLR_ELx_C | \
-SCTLR_ELx_SA | SCTLR_ELx_I | SCTLR_ELx_IESB)
+SCTLR_ELx_SA | SCTLR_ELx_I | SCTLR_ELx_IESB | \
+SCTLR_ELx_ITFSB)

 /* SCTLR_EL2 specific flags. */
 #define SCTLR_EL2_RES1	((BIT(4))  | (BIT(5))  | (BIT(11)) | (BIT(16)) 
| \

diff --git a/arch/arm64/kvm/hyp/include/hyp/sysreg-sr.h
b/arch/arm64/kvm/hyp/include/hyp/sysreg-sr.h
index cce43bfe158f..45255ba60152 100644
--- a/arch/arm64/kvm/hyp/include/hyp/sysreg-sr.h
+++ b/arch/arm64/kvm/hyp/include/hyp/sysreg-sr.h
@@ -18,6 +18,11 @@
 static inline void __sysreg_save_common_state(struct kvm_cpu_context 
*ctxt)

 {
ctxt_sys_reg(ctxt, MDSCR_EL1)   = read_sysreg(mdscr_el1);
+   if (system_supports_mte()) {


Please move the per-VM predicate to this patch so that it can be used
not to save/restore the MTE registers if we don't need to.


+   ctxt_sys_reg(ctxt, RGSR_EL1)= read_sysreg_s(SYS_RGSR_EL1);
+   ctxt_sys_reg(ctxt, GCR_EL1) = read_sysreg_s(SYS_GCR_EL1);
+   ctxt_sys_reg(ctxt, TFSRE0_EL1)  = read_sysreg_s(SYS_TFSRE0_EL1);
+   }


Overall, I still don't understand how this is going to work once
we have MTE in the kernel. You mentioned having the ability to
create turn off the tag checks at times, but I don't see that
in this patch (and I'm not sure we want that either).

Thanks,

M.
--
Jazz is not dead. It just smells funny...

Re: [PATCH 2/9] target/mips: Simplify msa_reset()