From: David Woodhouse <[email protected]> Commit 3617c0ee7decb ("KVM: x86/xen: Only write Xen hypercall page for guest writes to MSR") blocked host-initiated writes from triggering the Xen hypercall page setup, to fix an SRCU usage violation when the hypercall MSR index collides with a real MSR written during vCPU reset.
However, some VMMs legitimately need to trigger hypercall page setup
from host context. For example, a VMM may intercept the guest's MSR
write to track an epoch (for kexec/crash recovery), and then replay the
write as a host-initiated KVM_SET_MSRS to populate the hypercall page.
The host_initiated check breaks this use case.
Add KVM_XEN_VCPU_ATTR_TYPE_WRITE_HYPERCALL_PAGE as a new vcpu attribute
that explicitly invokes kvm_xen_write_hypercall_page() under proper
locking. This gives userspace a safe interface to trigger hypercall page
setup without going through the MSR write path, preserving the
host_initiated defence in depth while restoring the lost functionality.
Fixes: 3617c0ee7dec ("KVM: x86/xen: Only write Xen hypercall page for guest
writes to MSR")
Signed-off-by: David Woodhouse <[email protected]>
---
Documentation/virt/kvm/api.rst | 11 +++
arch/x86/include/uapi/asm/kvm.h | 3 +
arch/x86/kvm/x86.c | 3 +-
arch/x86/kvm/xen.c | 7 ++
.../selftests/kvm/x86/xen_vmcall_test.c | 96 +++++++++++++++++++
5 files changed, 119 insertions(+), 1 deletion(-)
diff --git a/Documentation/virt/kvm/api.rst b/Documentation/virt/kvm/api.rst
index 52bbbb553ce1..63423c375a78 100644
--- a/Documentation/virt/kvm/api.rst
+++ b/Documentation/virt/kvm/api.rst
@@ -5800,6 +5800,17 @@ KVM_XEN_VCPU_ATTR_TYPE_UPCALL_VECTOR
vector configured with HVM_PARAM_CALLBACK_IRQ. It is disabled by
setting the vector to zero.
+KVM_XEN_VCPU_ATTR_TYPE_WRITE_HYPERCALL_PAGE
+ This attribute is available when the KVM_CAP_XEN_HVM ioctl indicates
+ support for KVM_XEN_HVM_CONFIG_WRITE_HYPERCALL_PAGE. It triggers
+ population of the Xen hypercall page at the guest physical address
+ specified in ``gpa``, just as if the guest had written to the
+ hypercall MSR. This is intended for VMMs that intercept the guest's
+ MSR write (e.g. to track an epoch for kexec/crash recovery) and need
+ to replay the write from host context. Direct host-initiated writes
+ via KVM_SET_MSRS are blocked for safety; this attribute provides the
+ correct alternative.
+
4.129 KVM_XEN_VCPU_GET_ATTR
---------------------------
diff --git a/arch/x86/include/uapi/asm/kvm.h b/arch/x86/include/uapi/asm/kvm.h
index 5f2b30d0405c..977f3aa66c18 100644
--- a/arch/x86/include/uapi/asm/kvm.h
+++ b/arch/x86/include/uapi/asm/kvm.h
@@ -596,6 +596,7 @@ struct kvm_x86_mce {
#define KVM_XEN_HVM_CONFIG_RUNSTATE_UPDATE_FLAG (1 << 6)
#define KVM_XEN_HVM_CONFIG_PVCLOCK_TSC_UNSTABLE (1 << 7)
#define KVM_XEN_HVM_CONFIG_SHARED_INFO_HVA (1 << 8)
+#define KVM_XEN_HVM_CONFIG_WRITE_HYPERCALL_PAGE (1 << 9)
#define KVM_XEN_MSR_MIN_INDEX 0x40000000u
#define KVM_XEN_MSR_MAX_INDEX 0x4fffffffu
@@ -704,6 +705,8 @@ struct kvm_xen_vcpu_attr {
#define KVM_XEN_VCPU_ATTR_TYPE_UPCALL_VECTOR 0x8
/* Available with KVM_CAP_XEN_HVM / KVM_XEN_HVM_CONFIG_SHARED_INFO_HVA */
#define KVM_XEN_VCPU_ATTR_TYPE_VCPU_INFO_HVA 0x9
+/* Available with KVM_CAP_XEN_HVM / KVM_XEN_HVM_CONFIG_WRITE_HYPERCALL_PAGE */
+#define KVM_XEN_VCPU_ATTR_TYPE_WRITE_HYPERCALL_PAGE 0xa
/* Secure Encrypted Virtualization command */
enum sev_cmd_id {
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 0a1b63c63d1a..3facf0429c0a 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -4891,7 +4891,8 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long
ext)
KVM_XEN_HVM_CONFIG_EVTCHN_2LEVEL |
KVM_XEN_HVM_CONFIG_EVTCHN_SEND |
KVM_XEN_HVM_CONFIG_PVCLOCK_TSC_UNSTABLE |
- KVM_XEN_HVM_CONFIG_SHARED_INFO_HVA;
+ KVM_XEN_HVM_CONFIG_SHARED_INFO_HVA |
+ KVM_XEN_HVM_CONFIG_WRITE_HYPERCALL_PAGE;
if (sched_info_on())
r |= KVM_XEN_HVM_CONFIG_RUNSTATE |
KVM_XEN_HVM_CONFIG_RUNSTATE_UPDATE_FLAG;
diff --git a/arch/x86/kvm/xen.c b/arch/x86/kvm/xen.c
index 91fd3673c09a..c16b4560c9e7 100644
--- a/arch/x86/kvm/xen.c
+++ b/arch/x86/kvm/xen.c
@@ -907,6 +907,13 @@ int kvm_xen_vcpu_set_attr(struct kvm_vcpu *vcpu, struct
kvm_xen_vcpu_attr *data)
{
int idx, r = -ENOENT;
+ /*
+ * kvm_xen_write_hypercall_page() manages its own locking.
+ * Handle it before taking xen_lock to avoid a deadlock.
+ */
+ if (data->type == KVM_XEN_VCPU_ATTR_TYPE_WRITE_HYPERCALL_PAGE)
+ return kvm_xen_write_hypercall_page(vcpu, data->u.gpa) ? -EIO :
0;
+
mutex_lock(&vcpu->kvm->arch.xen.xen_lock);
idx = srcu_read_lock(&vcpu->kvm->srcu);
diff --git a/tools/testing/selftests/kvm/x86/xen_vmcall_test.c
b/tools/testing/selftests/kvm/x86/xen_vmcall_test.c
index 2585087cdf5c..1536d510ab30 100644
--- a/tools/testing/selftests/kvm/x86/xen_vmcall_test.c
+++ b/tools/testing/selftests/kvm/x86/xen_vmcall_test.c
@@ -12,6 +12,8 @@
#include "processor.h"
#include "hyperv.h"
+#include <string.h>
+
#define HCALL_REGION_GPA 0xc0000000ULL
#define HCALL_REGION_SLOT 10
@@ -26,6 +28,10 @@
#define HVCALL_SIGNAL_EVENT 0x005d
#define HV_STATUS_INVALID_ALIGNMENT 4
+enum {
+ TEST_WRITE_HYPERCALL_PAGE = 1,
+};
+
static void guest_code(void)
{
unsigned long rax = INPUTVALUE;
@@ -76,17 +82,65 @@ static void guest_code(void)
"r"(r8));
GUEST_ASSERT(rax == HV_STATUS_INVALID_ALIGNMENT);
+ /*
+ * Test KVM_XEN_VCPU_ATTR_TYPE_WRITE_HYPERCALL_PAGE: ask userspace
+ * to set up MSR filtering, then write the MSR. The WRMSR will exit
+ * to userspace (not populate the page). Userspace verifies the page
+ * is empty, uses the attr to populate it, then resumes us.
+ */
+ GUEST_SYNC(TEST_WRITE_HYPERCALL_PAGE);
+
+ __asm__ __volatile__("wrmsr" : : "c" (XEN_HYPERCALL_MSR),
+ "a" (HCALL_REGION_GPA & 0xffffffff),
+ "d" (HCALL_REGION_GPA >> 32));
+
+ /* Userspace populated the page via the attr — verify it works */
+ rax = INPUTVALUE;
+ rdi = ARGVALUE(1);
+ rsi = ARGVALUE(2);
+ rdx = ARGVALUE(3);
+ r10 = ARGVALUE(4);
+ r8 = ARGVALUE(5);
+ r9 = ARGVALUE(6);
+ __asm__ __volatile__("call *%1" : "=a"(rax) :
+ "r"(HCALL_REGION_GPA + INPUTVALUE * 32),
+ "a"(rax), "D"(rdi), "S"(rsi), "d"(rdx),
+ "r"(r10), "r"(r8), "r"(r9));
+ GUEST_ASSERT(rax == RETVALUE);
+
GUEST_DONE();
}
+static void setup_msr_filter(struct kvm_vm *vm)
+{
+ uint64_t deny_bits = 0;
+ struct kvm_msr_filter filter = {
+ .flags = KVM_MSR_FILTER_DEFAULT_ALLOW,
+ .ranges = {
+ {
+ .flags = KVM_MSR_FILTER_WRITE,
+ .nmsrs = 1,
+ .base = XEN_HYPERCALL_MSR,
+ .bitmap = (uint8_t *)&deny_bits,
+ },
+ },
+ };
+
+ vm_ioctl(vm, KVM_X86_SET_MSR_FILTER, &filter);
+}
+
int main(int argc, char *argv[])
{
unsigned int xen_caps;
struct kvm_vcpu *vcpu;
struct kvm_vm *vm;
+ bool msr_filter_ready = false;
xen_caps = kvm_check_cap(KVM_CAP_XEN_HVM);
TEST_REQUIRE(xen_caps & KVM_XEN_HVM_CONFIG_INTERCEPT_HCALL);
+ TEST_REQUIRE(xen_caps & KVM_XEN_HVM_CONFIG_WRITE_HYPERCALL_PAGE);
+ TEST_REQUIRE(kvm_check_cap(KVM_CAP_X86_USER_SPACE_MSR));
+ TEST_REQUIRE(kvm_check_cap(KVM_CAP_X86_MSR_FILTER));
vm = vm_create_with_one_vcpu(&vcpu, guest_code);
vcpu_set_hv_cpuid(vcpu);
@@ -123,6 +177,36 @@ int main(int argc, char *argv[])
continue;
}
+ if (run->exit_reason == KVM_EXIT_X86_WRMSR) {
+ /* MSR filter caught the Xen hypercall MSR write */
+ TEST_ASSERT(msr_filter_ready,
+ "Unexpected WRMSR exit before filter
setup");
+ TEST_ASSERT_EQ(run->msr.index, XEN_HYPERCALL_MSR);
+
+ /*
+ * The host_initiated check should have prevented
+ * KVM from populating the page. Verify it's empty.
+ */
+ uint8_t *hcall_page = addr_gpa2hva(vm,
HCALL_REGION_GPA);
+ TEST_ASSERT_EQ(hcall_page[0], 0);
+
+ /*
+ * Now use the attr to populate the page, as a
+ * VMM would after intercepting the MSR write.
+ */
+ struct kvm_xen_vcpu_attr attr = {
+ .type =
KVM_XEN_VCPU_ATTR_TYPE_WRITE_HYPERCALL_PAGE,
+ .u.gpa = HCALL_REGION_GPA,
+ };
+ vcpu_ioctl(vcpu, KVM_XEN_VCPU_SET_ATTR, &attr);
+
+ /* Verify the page is now populated */
+ TEST_ASSERT_EQ(hcall_page[0], 0xb8);
+
+ run->msr.error = 0;
+ continue;
+ }
+
TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_IO);
switch (get_ucall(vcpu, &uc)) {
@@ -130,6 +214,18 @@ int main(int argc, char *argv[])
REPORT_GUEST_ASSERT(uc);
/* NOT REACHED */
case UCALL_SYNC:
+ TEST_ASSERT_EQ(uc.args[1], TEST_WRITE_HYPERCALL_PAGE);
+
+ /*
+ * Guest is about to write the Xen MSR. Clear the
+ * hypercall page, install MSR filter to intercept
+ * the write, and enable userspace MSR exits.
+ */
+ memset(addr_gpa2hva(vm, HCALL_REGION_GPA), 0,
PAGE_SIZE);
+ vm_enable_cap(vm, KVM_CAP_X86_USER_SPACE_MSR,
+ KVM_MSR_EXIT_REASON_FILTER);
+ setup_msr_filter(vm);
+ msr_filter_ready = true;
break;
case UCALL_DONE:
goto done;
--
2.43.0
smime.p7s
Description: S/MIME cryptographic signature

