From: David Woodhouse <[email protected]>

Commit 3617c0ee7decb ("KVM: x86/xen: Only write Xen hypercall page for
guest writes to MSR") blocked host-initiated writes from triggering the
Xen hypercall page setup, to fix an SRCU usage violation when the
hypercall MSR index collides with a real MSR written during vCPU reset.

However, some VMMs legitimately need to trigger hypercall page setup
from host context. For example, a VMM may intercept the guest's MSR
write to track an epoch (for kexec/crash recovery), and then replay the
write as a host-initiated KVM_SET_MSRS to populate the hypercall page.
The host_initiated check breaks this use case.

Add KVM_XEN_VCPU_ATTR_TYPE_WRITE_HYPERCALL_PAGE as a new vcpu attribute
that explicitly invokes kvm_xen_write_hypercall_page() under proper
locking. This gives userspace a safe interface to trigger hypercall page
setup without going through the MSR write path, preserving the
host_initiated defence in depth while restoring the lost functionality.

Fixes: 3617c0ee7dec ("KVM: x86/xen: Only write Xen hypercall page for guest 
writes to MSR")
Signed-off-by: David Woodhouse <[email protected]>
---
 Documentation/virt/kvm/api.rst                | 11 +++
 arch/x86/include/uapi/asm/kvm.h               |  3 +
 arch/x86/kvm/x86.c                            |  3 +-
 arch/x86/kvm/xen.c                            |  7 ++
 .../selftests/kvm/x86/xen_vmcall_test.c       | 96 +++++++++++++++++++
 5 files changed, 119 insertions(+), 1 deletion(-)

diff --git a/Documentation/virt/kvm/api.rst b/Documentation/virt/kvm/api.rst
index 52bbbb553ce1..63423c375a78 100644
--- a/Documentation/virt/kvm/api.rst
+++ b/Documentation/virt/kvm/api.rst
@@ -5800,6 +5800,17 @@ KVM_XEN_VCPU_ATTR_TYPE_UPCALL_VECTOR
   vector configured with HVM_PARAM_CALLBACK_IRQ. It is disabled by
   setting the vector to zero.
 
+KVM_XEN_VCPU_ATTR_TYPE_WRITE_HYPERCALL_PAGE
+  This attribute is available when the KVM_CAP_XEN_HVM ioctl indicates
+  support for KVM_XEN_HVM_CONFIG_WRITE_HYPERCALL_PAGE. It triggers
+  population of the Xen hypercall page at the guest physical address
+  specified in ``gpa``, just as if the guest had written to the
+  hypercall MSR. This is intended for VMMs that intercept the guest's
+  MSR write (e.g. to track an epoch for kexec/crash recovery) and need
+  to replay the write from host context. Direct host-initiated writes
+  via KVM_SET_MSRS are blocked for safety; this attribute provides the
+  correct alternative.
+
 
 4.129 KVM_XEN_VCPU_GET_ATTR
 ---------------------------
diff --git a/arch/x86/include/uapi/asm/kvm.h b/arch/x86/include/uapi/asm/kvm.h
index 5f2b30d0405c..977f3aa66c18 100644
--- a/arch/x86/include/uapi/asm/kvm.h
+++ b/arch/x86/include/uapi/asm/kvm.h
@@ -596,6 +596,7 @@ struct kvm_x86_mce {
 #define KVM_XEN_HVM_CONFIG_RUNSTATE_UPDATE_FLAG        (1 << 6)
 #define KVM_XEN_HVM_CONFIG_PVCLOCK_TSC_UNSTABLE        (1 << 7)
 #define KVM_XEN_HVM_CONFIG_SHARED_INFO_HVA     (1 << 8)
+#define KVM_XEN_HVM_CONFIG_WRITE_HYPERCALL_PAGE        (1 << 9)
 
 #define KVM_XEN_MSR_MIN_INDEX                  0x40000000u
 #define KVM_XEN_MSR_MAX_INDEX                  0x4fffffffu
@@ -704,6 +705,8 @@ struct kvm_xen_vcpu_attr {
 #define KVM_XEN_VCPU_ATTR_TYPE_UPCALL_VECTOR   0x8
 /* Available with KVM_CAP_XEN_HVM / KVM_XEN_HVM_CONFIG_SHARED_INFO_HVA */
 #define KVM_XEN_VCPU_ATTR_TYPE_VCPU_INFO_HVA   0x9
+/* Available with KVM_CAP_XEN_HVM / KVM_XEN_HVM_CONFIG_WRITE_HYPERCALL_PAGE */
+#define KVM_XEN_VCPU_ATTR_TYPE_WRITE_HYPERCALL_PAGE 0xa
 
 /* Secure Encrypted Virtualization command */
 enum sev_cmd_id {
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 0a1b63c63d1a..3facf0429c0a 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -4891,7 +4891,8 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long 
ext)
                    KVM_XEN_HVM_CONFIG_EVTCHN_2LEVEL |
                    KVM_XEN_HVM_CONFIG_EVTCHN_SEND |
                    KVM_XEN_HVM_CONFIG_PVCLOCK_TSC_UNSTABLE |
-                   KVM_XEN_HVM_CONFIG_SHARED_INFO_HVA;
+                   KVM_XEN_HVM_CONFIG_SHARED_INFO_HVA |
+                   KVM_XEN_HVM_CONFIG_WRITE_HYPERCALL_PAGE;
                if (sched_info_on())
                        r |= KVM_XEN_HVM_CONFIG_RUNSTATE |
                             KVM_XEN_HVM_CONFIG_RUNSTATE_UPDATE_FLAG;
diff --git a/arch/x86/kvm/xen.c b/arch/x86/kvm/xen.c
index 91fd3673c09a..c16b4560c9e7 100644
--- a/arch/x86/kvm/xen.c
+++ b/arch/x86/kvm/xen.c
@@ -907,6 +907,13 @@ int kvm_xen_vcpu_set_attr(struct kvm_vcpu *vcpu, struct 
kvm_xen_vcpu_attr *data)
 {
        int idx, r = -ENOENT;
 
+       /*
+        * kvm_xen_write_hypercall_page() manages its own locking.
+        * Handle it before taking xen_lock to avoid a deadlock.
+        */
+       if (data->type == KVM_XEN_VCPU_ATTR_TYPE_WRITE_HYPERCALL_PAGE)
+               return kvm_xen_write_hypercall_page(vcpu, data->u.gpa) ? -EIO : 
0;
+
        mutex_lock(&vcpu->kvm->arch.xen.xen_lock);
        idx = srcu_read_lock(&vcpu->kvm->srcu);
 
diff --git a/tools/testing/selftests/kvm/x86/xen_vmcall_test.c 
b/tools/testing/selftests/kvm/x86/xen_vmcall_test.c
index 2585087cdf5c..1536d510ab30 100644
--- a/tools/testing/selftests/kvm/x86/xen_vmcall_test.c
+++ b/tools/testing/selftests/kvm/x86/xen_vmcall_test.c
@@ -12,6 +12,8 @@
 #include "processor.h"
 #include "hyperv.h"
 
+#include <string.h>
+
 #define HCALL_REGION_GPA       0xc0000000ULL
 #define HCALL_REGION_SLOT      10
 
@@ -26,6 +28,10 @@
 #define HVCALL_SIGNAL_EVENT            0x005d
 #define HV_STATUS_INVALID_ALIGNMENT    4
 
+enum {
+       TEST_WRITE_HYPERCALL_PAGE = 1,
+};
+
 static void guest_code(void)
 {
        unsigned long rax = INPUTVALUE;
@@ -76,17 +82,65 @@ static void guest_code(void)
                             "r"(r8));
        GUEST_ASSERT(rax == HV_STATUS_INVALID_ALIGNMENT);
 
+       /*
+        * Test KVM_XEN_VCPU_ATTR_TYPE_WRITE_HYPERCALL_PAGE: ask userspace
+        * to set up MSR filtering, then write the MSR. The WRMSR will exit
+        * to userspace (not populate the page). Userspace verifies the page
+        * is empty, uses the attr to populate it, then resumes us.
+        */
+       GUEST_SYNC(TEST_WRITE_HYPERCALL_PAGE);
+
+       __asm__ __volatile__("wrmsr" : : "c" (XEN_HYPERCALL_MSR),
+                            "a" (HCALL_REGION_GPA & 0xffffffff),
+                            "d" (HCALL_REGION_GPA >> 32));
+
+       /* Userspace populated the page via the attr — verify it works */
+       rax = INPUTVALUE;
+       rdi = ARGVALUE(1);
+       rsi = ARGVALUE(2);
+       rdx = ARGVALUE(3);
+       r10 = ARGVALUE(4);
+       r8 = ARGVALUE(5);
+       r9 = ARGVALUE(6);
+       __asm__ __volatile__("call *%1" : "=a"(rax) :
+                            "r"(HCALL_REGION_GPA + INPUTVALUE * 32),
+                            "a"(rax), "D"(rdi), "S"(rsi), "d"(rdx),
+                            "r"(r10), "r"(r8), "r"(r9));
+       GUEST_ASSERT(rax == RETVALUE);
+
        GUEST_DONE();
 }
 
+static void setup_msr_filter(struct kvm_vm *vm)
+{
+       uint64_t deny_bits = 0;
+       struct kvm_msr_filter filter = {
+               .flags = KVM_MSR_FILTER_DEFAULT_ALLOW,
+               .ranges = {
+                       {
+                               .flags = KVM_MSR_FILTER_WRITE,
+                               .nmsrs = 1,
+                               .base = XEN_HYPERCALL_MSR,
+                               .bitmap = (uint8_t *)&deny_bits,
+                       },
+               },
+       };
+
+       vm_ioctl(vm, KVM_X86_SET_MSR_FILTER, &filter);
+}
+
 int main(int argc, char *argv[])
 {
        unsigned int xen_caps;
        struct kvm_vcpu *vcpu;
        struct kvm_vm *vm;
+       bool msr_filter_ready = false;
 
        xen_caps = kvm_check_cap(KVM_CAP_XEN_HVM);
        TEST_REQUIRE(xen_caps & KVM_XEN_HVM_CONFIG_INTERCEPT_HCALL);
+       TEST_REQUIRE(xen_caps & KVM_XEN_HVM_CONFIG_WRITE_HYPERCALL_PAGE);
+       TEST_REQUIRE(kvm_check_cap(KVM_CAP_X86_USER_SPACE_MSR));
+       TEST_REQUIRE(kvm_check_cap(KVM_CAP_X86_MSR_FILTER));
 
        vm = vm_create_with_one_vcpu(&vcpu, guest_code);
        vcpu_set_hv_cpuid(vcpu);
@@ -123,6 +177,36 @@ int main(int argc, char *argv[])
                        continue;
                }
 
+               if (run->exit_reason == KVM_EXIT_X86_WRMSR) {
+                       /* MSR filter caught the Xen hypercall MSR write */
+                       TEST_ASSERT(msr_filter_ready,
+                                   "Unexpected WRMSR exit before filter 
setup");
+                       TEST_ASSERT_EQ(run->msr.index, XEN_HYPERCALL_MSR);
+
+                       /*
+                        * The host_initiated check should have prevented
+                        * KVM from populating the page. Verify it's empty.
+                        */
+                       uint8_t *hcall_page = addr_gpa2hva(vm, 
HCALL_REGION_GPA);
+                       TEST_ASSERT_EQ(hcall_page[0], 0);
+
+                       /*
+                        * Now use the attr to populate the page, as a
+                        * VMM would after intercepting the MSR write.
+                        */
+                       struct kvm_xen_vcpu_attr attr = {
+                               .type = 
KVM_XEN_VCPU_ATTR_TYPE_WRITE_HYPERCALL_PAGE,
+                               .u.gpa = HCALL_REGION_GPA,
+                       };
+                       vcpu_ioctl(vcpu, KVM_XEN_VCPU_SET_ATTR, &attr);
+
+                       /* Verify the page is now populated */
+                       TEST_ASSERT_EQ(hcall_page[0], 0xb8);
+
+                       run->msr.error = 0;
+                       continue;
+               }
+
                TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_IO);
 
                switch (get_ucall(vcpu, &uc)) {
@@ -130,6 +214,18 @@ int main(int argc, char *argv[])
                        REPORT_GUEST_ASSERT(uc);
                        /* NOT REACHED */
                case UCALL_SYNC:
+                       TEST_ASSERT_EQ(uc.args[1], TEST_WRITE_HYPERCALL_PAGE);
+
+                       /*
+                        * Guest is about to write the Xen MSR. Clear the
+                        * hypercall page, install MSR filter to intercept
+                        * the write, and enable userspace MSR exits.
+                        */
+                       memset(addr_gpa2hva(vm, HCALL_REGION_GPA), 0, 
PAGE_SIZE);
+                       vm_enable_cap(vm, KVM_CAP_X86_USER_SPACE_MSR,
+                                     KVM_MSR_EXIT_REASON_FILTER);
+                       setup_msr_filter(vm);
+                       msr_filter_ready = true;
                        break;
                case UCALL_DONE:
                        goto done;
-- 
2.43.0


Attachment: smime.p7s
Description: S/MIME cryptographic signature

Reply via email to