[PATCH v12 31/77] KVM: introspection: add permission access ioctls

2021-10-06 Thread Adalbert Lazăr
KVM_INTROSPECTION_COMMAND and KVM_INTROSPECTION_EVENTS ioctls are used
by the device manager to allow/disallow access to specific (or all)
introspection commands and events. The introspection tool will get the
KVM_EPERM error code on any attempt to use a disallowed command.

By default, all events and almost all commands are disallowed.
Some commands are always allowed (those querying the introspection
capabilities).

Signed-off-by: Adalbert Lazăr 
---
 Documentation/virt/kvm/api.rst|  68 ++
 include/linux/kvmi_host.h |   7 +
 include/uapi/linux/kvm.h  |   8 ++
 include/uapi/linux/kvmi.h |  22 
 .../testing/selftests/kvm/x86_64/kvmi_test.c  |  49 +++
 virt/kvm/introspection/kvmi.c | 122 ++
 virt/kvm/kvm_main.c   |  18 +++
 7 files changed, 294 insertions(+)

diff --git a/Documentation/virt/kvm/api.rst b/Documentation/virt/kvm/api.rst
index 682380425ef6..85f02eda4895 100644
--- a/Documentation/virt/kvm/api.rst
+++ b/Documentation/virt/kvm/api.rst
@@ -5526,6 +5526,74 @@ Errors:
 This ioctl is used to free all introspection structures
 related to this VM.
 
+4.136 KVM_INTROSPECTION_COMMAND
+---
+
+:Capability: KVM_CAP_INTROSPECTION
+:Architectures: x86
+:Type: vm ioctl
+:Parameters: struct kvm_introspection_feature (in)
+:Returns: 0 on success, a negative value on error
+
+Errors:
+
+  == ===
+  EFAULT the VM is not introspected yet (use KVM_INTROSPECTION_HOOK)
+  EINVAL the command is unknown
+  EPERM  the command can't be disallowed (e.g. KVMI_GET_VERSION)
+  EPERM  the introspection is disabled (kvm.introspection=0)
+  == ===
+
+This ioctl is used to allow or disallow introspection commands
+for the current VM. By default, almost all commands are disallowed
+except for those used to query the API features.
+
+::
+
+  struct kvm_introspection_feature {
+   __u32 allow;
+   __s32 id;
+  };
+
+If allow is 1, the command specified by id is allowed. If allow is 0,
+the command is disallowed.
+
+Unless set to -1 (meaning all commands), id must be a command ID
+(e.g. KVMI_GET_VERSION)
+
+4.137 KVM_INTROSPECTION_EVENT
+-
+
+:Capability: KVM_CAP_INTROSPECTION
+:Architectures: x86
+:Type: vm ioctl
+:Parameters: struct kvm_introspection_feature (in)
+:Returns: 0 on success, a negative value on error
+
+Errors:
+
+  == ===
+  EFAULT the VM is not introspected yet (use KVM_INTROSPECTION_HOOK)
+  EINVAL the event is unknown
+  EPERM  the introspection is disabled (kvm.introspection=0)
+  == ===
+
+This ioctl is used to allow or disallow introspection events
+for the current VM. By default, all events are disallowed.
+
+::
+
+  struct kvm_introspection_feature {
+   __u32 allow;
+   __s32 id;
+  };
+
+If allow is 1, the event specified by id is allowed. If allow is 0,
+the event is disallowed.
+
+Unless set to -1 (meaning all events), id must be a event ID
+(e.g. KVMI_VM_EVENT_UNHOOK, KVMI_VCPU_EVENT_CR, etc.)
+
 5. The kvm_run structure
 
 
diff --git a/include/linux/kvmi_host.h b/include/linux/kvmi_host.h
index 8574b9688736..a5ede07686b9 100644
--- a/include/linux/kvmi_host.h
+++ b/include/linux/kvmi_host.h
@@ -14,6 +14,9 @@ struct kvm_introspection {
 
struct socket *sock;
struct task_struct *recv;
+
+   unsigned long *cmd_allow_mask;
+   unsigned long *event_allow_mask;
 };
 
 int kvmi_version(void);
@@ -25,6 +28,10 @@ void kvmi_destroy_vm(struct kvm *kvm);
 int kvmi_ioctl_hook(struct kvm *kvm,
const struct kvm_introspection_hook *hook);
 int kvmi_ioctl_unhook(struct kvm *kvm);
+int kvmi_ioctl_command(struct kvm *kvm,
+  const struct kvm_introspection_feature *feat);
+int kvmi_ioctl_event(struct kvm *kvm,
+const struct kvm_introspection_feature *feat);
 
 #else
 
diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h
index e33213f123b0..c56f40c47890 100644
--- a/include/uapi/linux/kvm.h
+++ b/include/uapi/linux/kvm.h
@@ -1826,6 +1826,14 @@ struct kvm_introspection_hook {
 #define KVM_INTROSPECTION_HOOK_IOW(KVMIO, 0xc8, struct 
kvm_introspection_hook)
 #define KVM_INTROSPECTION_UNHOOK  _IO(KVMIO, 0xc9)
 
+struct kvm_introspection_feature {
+   __u32 allow;
+   __s32 id;
+};
+
+#define KVM_INTROSPECTION_COMMAND _IOW(KVMIO, 0xca, struct 
kvm_introspection_feature)
+#define KVM_INTROSPECTION_EVENT   _IOW(KVMIO, 0xcb, struct 
kvm_introspection_feature)
+
 #define KVM_DEV_ASSIGN_ENABLE_IOMMU(1 << 0)
 #define KVM_DEV_ASSIGN_PCI_2_3 (1 << 1)
 #define KVM_DEV_ASSIGN_MASK_INTX  

[PATCH v12 46/77] KVM: introspection: add support for vCPU events

2021-10-06 Thread Adalbert Lazăr
This is the common code used by vCPU threads to send events and wait for
replies (received and dispatched by the receiving thread). While waiting
for an event reply, the vCPU thread will handle any introspection command
already queued or received during this period.

Signed-off-by: Adalbert Lazăr 
---
 Documentation/virt/kvm/kvmi.rst   |  56 ++-
 arch/x86/include/uapi/asm/kvmi.h  |  20 
 arch/x86/kvm/kvmi.c   |  85 
 include/linux/kvmi_host.h |  11 ++
 include/uapi/linux/kvmi.h |  23 +
 virt/kvm/introspection/kvmi.c |   1 +
 virt/kvm/introspection/kvmi_int.h |   6 ++
 virt/kvm/introspection/kvmi_msg.c | 161 +-
 8 files changed, 359 insertions(+), 4 deletions(-)

diff --git a/Documentation/virt/kvm/kvmi.rst b/Documentation/virt/kvm/kvmi.rst
index 9f6905456923..c46e870cc728 100644
--- a/Documentation/virt/kvm/kvmi.rst
+++ b/Documentation/virt/kvm/kvmi.rst
@@ -521,7 +521,61 @@ The message data begins with a common structure having the 
event id::
__u16 padding[3];
};
 
-Specific event data can follow this common structure.
+The vCPU introspection events are sent using the KVMI_VCPU_EVENT message id.
+No event is sent unless it is explicitly enabled or requested
+(e.g. *KVMI_VCPU_EVENT_PAUSE*).
+A vCPU event begins with a common structure having the size of the
+structure and the vCPU index::
+
+   struct kvmi_vcpu_event {
+   __u16 size;
+   __u16 vcpu;
+   __u32 padding;
+   struct kvmi_vcpu_event_arch arch;
+   };
+
+On x86::
+
+   struct kvmi_vcpu_event_arch {
+   __u8 mode;
+   __u8 padding[7];
+   struct kvm_regs regs;
+   struct kvm_sregs sregs;
+   struct {
+   __u64 sysenter_cs;
+   __u64 sysenter_esp;
+   __u64 sysenter_eip;
+   __u64 efer;
+   __u64 star;
+   __u64 lstar;
+   __u64 cstar;
+   __u64 pat;
+   __u64 shadow_gs;
+   } msrs;
+   };
+
+It contains information about the vCPU state at the time of the event.
+
+A vCPU event reply begins with two common structures::
+
+   struct kvmi_vcpu_hdr;
+   struct kvmi_vcpu_event_reply {
+   __u8 action;
+   __u8 event;
+   __u16 padding1;
+   __u32 padding2;
+   };
+
+All events accept the KVMI_EVENT_ACTION_CRASH action, which stops the
+guest ungracefully, but as soon as possible.
+
+Most events accept the KVMI_EVENT_ACTION_CONTINUE action, which
+means that KVM will continue handling the event.
+
+Some events accept the KVMI_EVENT_ACTION_RETRY action, which means that
+KVM will stop handling the event and re-enter in guest.
+
+Specific event data can follow these common structures.
 
 1. KVMI_VM_EVENT_UNHOOK
 ---
diff --git a/arch/x86/include/uapi/asm/kvmi.h b/arch/x86/include/uapi/asm/kvmi.h
index 2b6192e1a9a4..9d9df09d381a 100644
--- a/arch/x86/include/uapi/asm/kvmi.h
+++ b/arch/x86/include/uapi/asm/kvmi.h
@@ -6,8 +6,28 @@
  * KVM introspection - x86 specific structures and definitions
  */
 
+#include 
+
 struct kvmi_vcpu_get_info_reply {
__u64 tsc_speed;
 };
 
+struct kvmi_vcpu_event_arch {
+   __u8 mode;  /* 2, 4 or 8 */
+   __u8 padding[7];
+   struct kvm_regs regs;
+   struct kvm_sregs sregs;
+   struct {
+   __u64 sysenter_cs;
+   __u64 sysenter_esp;
+   __u64 sysenter_eip;
+   __u64 efer;
+   __u64 star;
+   __u64 lstar;
+   __u64 cstar;
+   __u64 pat;
+   __u64 shadow_gs;
+   } msrs;
+};
+
 #endif /* _UAPI_ASM_X86_KVMI_H */
diff --git a/arch/x86/kvm/kvmi.c b/arch/x86/kvm/kvmi.c
index 4388f2d9ff2c..90c5be3c462f 100644
--- a/arch/x86/kvm/kvmi.c
+++ b/arch/x86/kvm/kvmi.c
@@ -5,6 +5,91 @@
  * Copyright (C) 2019-2021 Bitdefender S.R.L.
  */
 
+#include "linux/kvm_host.h"
+#include "x86.h"
+#include "../../../virt/kvm/introspection/kvmi_int.h"
+
 void kvmi_arch_init_vcpu_events_mask(unsigned long *supported)
 {
 }
+
+static unsigned int kvmi_vcpu_mode(const struct kvm_vcpu *vcpu,
+  const struct kvm_sregs *sregs)
+{
+   unsigned int mode = 0;
+
+   if (is_long_mode((struct kvm_vcpu *) vcpu)) {
+   if (sregs->cs.l)
+   mode = 8;
+   else if (!sregs->cs.db)
+   mode = 2;
+   else
+   mode = 4;
+   } else if (sregs->cr0 & X86_CR0_PE) {
+   if (!sregs->cs.db)
+   mode = 2;
+   else
+   mode = 4;
+   } else if (!sregs->cs.db) {
+   mo

[PATCH v12 55/77] KVM: introspection: add cleanup support for vCPUs

2021-10-06 Thread Adalbert Lazăr
From: Nicușor Cîțu 

On unhook the introspection channel is closed. This will signal the
receiving thread to call kvmi_put() and exit. There might be vCPU threads
handling introspection commands or waiting for event replies. These
will also call kvmi_put() and re-enter in guest. Once the reference
counter reaches zero, the structures keeping the introspection data
(kvm_introspection and kvm_vcpu_introspection) will be freed.

In order to restore the interception of CRs, MSRs, BP, descriptor-table
registers, from all vCPUs (some of which might run from userspace),
we keep the needed information in another structure (kvmi_interception)
which will be used and freed by each of them before re-entering in guest.

Signed-off-by: Nicușor Cîțu 
Signed-off-by: Adalbert Lazăr 
---
 arch/x86/include/asm/kvm_host.h   |  3 ++
 arch/x86/include/asm/kvmi_host.h  |  4 +++
 arch/x86/kvm/kvmi.c   | 49 +++
 virt/kvm/introspection/kvmi.c | 32 ++--
 virt/kvm/introspection/kvmi_int.h |  5 
 5 files changed, 90 insertions(+), 3 deletions(-)

diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 1970c21c2270..f1e9adc24025 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -919,6 +919,9 @@ struct kvm_vcpu_arch {
 
/* #PF translated error code from EPT/NPT exit reason */
u64 error_code;
+
+   /* Control the interception of MSRs/CRs/BP... */
+   struct kvmi_interception *kvmi;
 };
 
 struct kvm_lpage_info {
diff --git a/arch/x86/include/asm/kvmi_host.h b/arch/x86/include/asm/kvmi_host.h
index cc945151cb36..b776be4bb49f 100644
--- a/arch/x86/include/asm/kvmi_host.h
+++ b/arch/x86/include/asm/kvmi_host.h
@@ -4,6 +4,10 @@
 
 #include 
 
+struct kvmi_interception {
+   bool restore_interception;
+};
+
 struct kvm_vcpu_arch_introspection {
struct kvm_regs delayed_regs;
bool have_delayed_regs;
diff --git a/arch/x86/kvm/kvmi.c b/arch/x86/kvm/kvmi.c
index e4358bc3f09a..6a7fc8059f23 100644
--- a/arch/x86/kvm/kvmi.c
+++ b/arch/x86/kvm/kvmi.c
@@ -210,3 +210,52 @@ void kvmi_arch_breakpoint_event(struct kvm_vcpu *vcpu, u64 
gva, u8 insn_len)
kvmi_handle_common_event_actions(vcpu, action);
}
 }
+
+static void kvmi_arch_restore_interception(struct kvm_vcpu *vcpu)
+{
+}
+
+bool kvmi_arch_clean_up_interception(struct kvm_vcpu *vcpu)
+{
+   struct kvmi_interception *arch_vcpui = vcpu->arch.kvmi;
+
+   if (!arch_vcpui)
+   return false;
+
+   if (!arch_vcpui->restore_interception)
+   return false;
+
+   kvmi_arch_restore_interception(vcpu);
+
+   return true;
+}
+
+bool kvmi_arch_vcpu_alloc_interception(struct kvm_vcpu *vcpu)
+{
+   struct kvmi_interception *arch_vcpui;
+
+   arch_vcpui = kzalloc(sizeof(*arch_vcpui), GFP_KERNEL);
+   if (!arch_vcpui)
+   return false;
+
+   return true;
+}
+
+void kvmi_arch_vcpu_free_interception(struct kvm_vcpu *vcpu)
+{
+   kfree(vcpu->arch.kvmi);
+   WRITE_ONCE(vcpu->arch.kvmi, NULL);
+}
+
+bool kvmi_arch_vcpu_introspected(struct kvm_vcpu *vcpu)
+{
+   return !!READ_ONCE(vcpu->arch.kvmi);
+}
+
+void kvmi_arch_request_interception_cleanup(struct kvm_vcpu *vcpu)
+{
+   struct kvmi_interception *arch_vcpui = READ_ONCE(vcpu->arch.kvmi);
+
+   if (arch_vcpui)
+   arch_vcpui->restore_interception = true;
+}
diff --git a/virt/kvm/introspection/kvmi.c b/virt/kvm/introspection/kvmi.c
index 304bae43cb78..f73f49fc381c 100644
--- a/virt/kvm/introspection/kvmi.c
+++ b/virt/kvm/introspection/kvmi.c
@@ -206,7 +206,7 @@ static bool kvmi_alloc_vcpui(struct kvm_vcpu *vcpu)
 
vcpu->kvmi = vcpui;
 
-   return true;
+   return kvmi_arch_vcpu_alloc_interception(vcpu);
 }
 
 static int kvmi_create_vcpui(struct kvm_vcpu *vcpu)
@@ -240,6 +240,9 @@ static void kvmi_free_vcpui(struct kvm_vcpu *vcpu)
 
kfree(vcpui);
vcpu->kvmi = NULL;
+
+   kvmi_arch_request_interception_cleanup(vcpu);
+   kvmi_make_request(vcpu, false);
 }
 
 static void kvmi_free(struct kvm *kvm)
@@ -262,6 +265,7 @@ void kvmi_vcpu_uninit(struct kvm_vcpu *vcpu)
 {
mutex_lock(>kvm->kvmi_lock);
kvmi_free_vcpui(vcpu);
+   kvmi_arch_vcpu_free_interception(vcpu);
mutex_unlock(>kvm->kvmi_lock);
 }
 
@@ -410,6 +414,21 @@ static int kvmi_recv_thread(void *arg)
return 0;
 }
 
+static bool ready_to_hook(struct kvm *kvm)
+{
+   struct kvm_vcpu *vcpu;
+   int i;
+
+   if (kvm->kvmi)
+   return false;
+
+   kvm_for_each_vcpu(i, vcpu, kvm)
+   if (kvmi_arch_vcpu_introspected(vcpu))
+   return false;
+
+   return true;
+}
+
 static int kvmi_hook(struct kvm *kvm,
 const struct kvm_introspection_hook *hook)
 {
@@ -418,7 +437,7 @@ static int kvmi_hook(struct kvm *kvm,
 
mutex

[PATCH v12 00/77] VM introspection

2021-10-06 Thread Adalbert Lazăr
The KVM introspection subsystem provides a facility for applications
running on the host or in a separate VM, to control the execution of
other VMs (pause, resume, shutdown), query the state of the vCPUs (GPRs,
MSRs etc.), alter the page access bits in the shadow page tables (only
for the hardware backed ones, eg. Intel's EPT) and receive notifications
when events of interest have taken place (shadow page table level faults,
key MSR writes, hypercalls etc.). Some notifications can be responded
to with an action (like preventing an MSR from being written), others
are mere informative (like breakpoint events which can be used for
execution tracing).  With few exceptions, all events are optional. An
application using this subsystem will explicitly register for them.

The use case that gave way for the creation of this subsystem is to
monitor the guest OS and as such the ABI/API is highly influenced by how
the guest software (kernel, applications) sees the world. For example,
some events provide information specific for the host CPU architecture
(eg. MSR_IA32_SYSENTER_EIP) merely because its leveraged by guest software
to implement a critical feature (fast system calls).

At the moment, the target audience for KVMI are security software authors
that wish to perform forensics on newly discovered threats (exploits)
or to implement another layer of security like preventing a large set
of kernel rootkits simply by "locking" the kernel image in the shadow
page tables (ie. enforce .text r-x, .rodata rw- etc.). It's the latter
case that made KVMI a separate subsystem, even though many of these
features are available in the device manager. The ability to build a
security application that does not interfere (in terms of performance)
with the guest software asks for a specialized interface that is designed
for minimum overhead.

Patches 1-28: make preparatory changes

Patches 29-75: add basic introspection capabilities

Patch 76: support introspection tools that write-protect guest page tables

Patch 77: notify the introspection tool even on emulation failures
  (when the read/write callbacks used by the emulator,
   kvm_page_preread/kvm_page_prewrite, are not called)

Changes since 
[v11](https://lore.kernel.org/kvm/20201207204622.15258-1-ala...@bitdefender.com/):
  - rebase to 5.15 (from 5.10)
  - remove patches no longer needed
  - remove kvm_get_max_gfn()/KVMI_VM_GET_MAX_GFN (a couple of tests are needed 
to see
if it is better to send the memory size from QEMU, during handshake)

Adalbert Lazăr (23):
  KVM: UAPI: add error codes used by the VM introspection code
  KVM: add kvm_vcpu_kick_and_wait()
  KVM: x86: add kvm_x86_ops.control_cr3_intercept()
  KVM: x86: add kvm_x86_ops.desc_ctrl_supported()
  KVM: x86: add kvm_x86_ops.control_desc_intercept()
  KVM: x86: export kvm_vcpu_ioctl_x86_set_xsave()
  KVM: introspection: add hook/unhook ioctls
  KVM: introspection: add permission access ioctls
  KVM: introspection: add the read/dispatch message function
  KVM: introspection: add KVMI_GET_VERSION
  KVM: introspection: add KVMI_VM_CHECK_COMMAND and KVMI_VM_CHECK_EVENT
  KVM: introspection: add KVM_INTROSPECTION_PREUNHOOK
  KVM: introspection: add KVMI_VM_EVENT_UNHOOK
  KVM: introspection: add KVMI_VM_CONTROL_EVENTS
  KVM: introspection: add a jobs list to every introspected vCPU
  KVM: introspection: add KVMI_VM_PAUSE_VCPU
  KVM: introspection: add support for vCPU events
  KVM: introspection: add KVMI_VCPU_EVENT_PAUSE
  KVM: introspection: add KVMI_VM_CONTROL_CLEANUP
  KVM: introspection: add KVMI_VCPU_GET_XCR
  KVM: introspection: add KVMI_VCPU_SET_XSAVE
  KVM: introspection: extend KVMI_GET_VERSION with struct kvmi_features
  KVM: introspection: add KVMI_VCPU_TRANSLATE_GVA

Marian Rotariu (1):
  KVM: introspection: add KVMI_VCPU_GET_CPUID

Mihai Donțu (32):
  KVM: x86: add kvm_arch_vcpu_get_regs() and kvm_arch_vcpu_get_sregs()
  KVM: x86: avoid injecting #PF when emulate the VMCALL instruction
  KVM: x86: add kvm_x86_ops.control_msr_intercept()
  KVM: x86: save the error code during EPT/NPF exits handling
  KVM: x86: add kvm_x86_ops.fault_gla()
  KVM: x86: extend kvm_mmu_gva_to_gpa_system() with the 'access'
parameter
  KVM: x86: page track: provide all callbacks with the guest virtual
address
  KVM: x86: page track: add track_create_slot() callback
  KVM: x86: page_track: add support for preread, prewrite and preexec
  KVM: x86: wire in the preread/prewrite/preexec page trackers
  KVM: introduce VM introspection
  KVM: introspection: add KVMI_VM_GET_INFO
  KVM: introspection: add KVMI_VM_READ_PHYSICAL/KVMI_VM_WRITE_PHYSICAL
  KVM: introspection: handle vCPU introspection requests
  KVM: introspection: handle vCPU commands
  KVM: introspection: add KVMI_VCPU_GET_INFO
  KVM: introspection: add the crash action handling on the event reply
  KVM: introspection: add KVMI_VCPU_CONTROL_EVENTS
  KVM: introspection: add KVMI_VCPU_GET_REGISTERS
  KVM: introspection: add KVMI_VCPU_SET

[PATCH v12 40/77] KVM: introspection: add vCPU related data

2021-10-06 Thread Adalbert Lazăr
From: Mircea Cîrjaliu 

Add an introspection structure to all vCPUs when the VM is hooked.

Signed-off-by: Mircea Cîrjaliu 
Signed-off-by: Adalbert Lazăr 
---
 arch/x86/include/asm/kvmi_host.h |  3 ++
 include/linux/kvm_host.h |  2 ++
 include/linux/kvmi_host.h|  6 
 virt/kvm/introspection/kvmi.c| 51 
 virt/kvm/kvm_main.c  |  2 ++
 5 files changed, 64 insertions(+)

diff --git a/arch/x86/include/asm/kvmi_host.h b/arch/x86/include/asm/kvmi_host.h
index 38c398262913..360a57dd9019 100644
--- a/arch/x86/include/asm/kvmi_host.h
+++ b/arch/x86/include/asm/kvmi_host.h
@@ -2,6 +2,9 @@
 #ifndef _ASM_X86_KVMI_HOST_H
 #define _ASM_X86_KVMI_HOST_H
 
+struct kvm_vcpu_arch_introspection {
+};
+
 struct kvm_arch_introspection {
 };
 
diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index f9f5c661c056..30bf1227c4a7 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -360,6 +360,8 @@ struct kvm_vcpu {
 * it is a valid slot.
 */
int last_used_slot;
+
+   struct kvm_vcpu_introspection *kvmi;
 };
 
 /* must be called with irqs disabled */
diff --git a/include/linux/kvmi_host.h b/include/linux/kvmi_host.h
index a59307dac6bf..9b0008c66321 100644
--- a/include/linux/kvmi_host.h
+++ b/include/linux/kvmi_host.h
@@ -6,6 +6,10 @@
 
 #include 
 
+struct kvm_vcpu_introspection {
+   struct kvm_vcpu_arch_introspection arch;
+};
+
 struct kvm_introspection {
struct kvm_arch_introspection arch;
struct kvm *kvm;
@@ -28,6 +32,7 @@ int kvmi_init(void);
 void kvmi_uninit(void);
 void kvmi_create_vm(struct kvm *kvm);
 void kvmi_destroy_vm(struct kvm *kvm);
+void kvmi_vcpu_uninit(struct kvm_vcpu *vcpu);
 
 int kvmi_ioctl_hook(struct kvm *kvm,
const struct kvm_introspection_hook *hook);
@@ -45,6 +50,7 @@ static inline int kvmi_init(void) { return 0; }
 static inline void kvmi_uninit(void) { }
 static inline void kvmi_create_vm(struct kvm *kvm) { }
 static inline void kvmi_destroy_vm(struct kvm *kvm) { }
+static inline void kvmi_vcpu_uninit(struct kvm_vcpu *vcpu) { }
 
 #endif /* CONFIG_KVM_INTROSPECTION */
 
diff --git a/virt/kvm/introspection/kvmi.c b/virt/kvm/introspection/kvmi.c
index 207dabb8b040..492a29987965 100644
--- a/virt/kvm/introspection/kvmi.c
+++ b/virt/kvm/introspection/kvmi.c
@@ -118,8 +118,41 @@ void kvmi_uninit(void)
kvmi_cache_destroy();
 }
 
+static bool kvmi_alloc_vcpui(struct kvm_vcpu *vcpu)
+{
+   struct kvm_vcpu_introspection *vcpui;
+
+   vcpui = kzalloc(sizeof(*vcpui), GFP_KERNEL);
+   if (!vcpui)
+   return false;
+
+   vcpu->kvmi = vcpui;
+
+   return true;
+}
+
+static int kvmi_create_vcpui(struct kvm_vcpu *vcpu)
+{
+   if (!kvmi_alloc_vcpui(vcpu))
+   return -ENOMEM;
+
+   return 0;
+}
+
+static void kvmi_free_vcpui(struct kvm_vcpu *vcpu)
+{
+   kfree(vcpu->kvmi);
+   vcpu->kvmi = NULL;
+}
+
 static void kvmi_free(struct kvm *kvm)
 {
+   struct kvm_vcpu *vcpu;
+   int i;
+
+   kvm_for_each_vcpu(i, vcpu, kvm)
+   kvmi_free_vcpui(vcpu);
+
bitmap_free(kvm->kvmi->cmd_allow_mask);
bitmap_free(kvm->kvmi->event_allow_mask);
bitmap_free(kvm->kvmi->vm_event_enable_mask);
@@ -128,10 +161,19 @@ static void kvmi_free(struct kvm *kvm)
kvm->kvmi = NULL;
 }
 
+void kvmi_vcpu_uninit(struct kvm_vcpu *vcpu)
+{
+   mutex_lock(>kvm->kvmi_lock);
+   kvmi_free_vcpui(vcpu);
+   mutex_unlock(>kvm->kvmi_lock);
+}
+
 static struct kvm_introspection *
 kvmi_alloc(struct kvm *kvm, const struct kvm_introspection_hook *hook)
 {
struct kvm_introspection *kvmi;
+   struct kvm_vcpu *vcpu;
+   int i;
 
kvmi = kzalloc(sizeof(*kvmi), GFP_KERNEL);
if (!kvmi)
@@ -157,6 +199,15 @@ kvmi_alloc(struct kvm *kvm, const struct 
kvm_introspection_hook *hook)
 
atomic_set(>ev_seq, 0);
 
+   kvm_for_each_vcpu(i, vcpu, kvm) {
+   int err = kvmi_create_vcpui(vcpu);
+
+   if (err) {
+   kvmi_free(kvm);
+   return NULL;
+   }
+   }
+
kvmi->kvm = kvm;
 
return kvmi;
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index b0a23fcb935c..d70ec110696f 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -441,6 +441,7 @@ static void kvm_vcpu_init(struct kvm_vcpu *vcpu, struct kvm 
*kvm, unsigned id)
 
 void kvm_vcpu_destroy(struct kvm_vcpu *vcpu)
 {
+   kvmi_vcpu_uninit(vcpu);
kvm_dirty_ring_free(>dirty_ring);
kvm_arch_vcpu_destroy(vcpu);
 
@@ -3694,6 +3695,7 @@ static int kvm_vm_ioctl_create_vcpu(struct kvm *kvm, u32 
id)
 
 unlock_vcpu_destroy:
mutex_unlock(>lock);
+   kvmi_vcpu_uninit(vcpu);
kvm_dirty_ring_free(>dirty_ring);
 arch_vcpu_destroy:
kvm_arch_vcpu_destroy(vcpu);
_

[PATCH v12 49/77] KVM: introspection: add KVMI_VCPU_CONTROL_EVENTS

2021-10-06 Thread Adalbert Lazăr
From: Mihai Donțu 

By default, all introspection events are disabled. The introspection tool
must explicitly enable the events it wants to receive. With this command
(KVMI_VCPU_CONTROL_EVENTS) it can enable/disable any vCPU event allowed
by the device manager.

Some vCPU events doesn't have to be explicitly enabled (and can't
be disabled) with this command because they are implicitly enabled
or requested by the use of certain commands. For example, if the
introspection tool uses the KVMI_VM_PAUSE_VCPU command, it wants to
receive an KVMI_VCPU_EVENT_PAUSE event.

Signed-off-by: Mihai Donțu 
Co-developed-by: Adalbert Lazăr 
Signed-off-by: Adalbert Lazăr 
---
 Documentation/virt/kvm/kvmi.rst   | 48 +++
 include/linux/kvmi_host.h |  2 +
 include/uapi/linux/kvmi.h | 10 +++-
 .../testing/selftests/kvm/x86_64/kvmi_test.c  | 46 ++
 virt/kvm/introspection/kvmi.c | 26 ++
 virt/kvm/introspection/kvmi_int.h |  3 ++
 virt/kvm/introspection/kvmi_msg.c | 24 +-
 7 files changed, 157 insertions(+), 2 deletions(-)

diff --git a/Documentation/virt/kvm/kvmi.rst b/Documentation/virt/kvm/kvmi.rst
index 7f3daeb6..892b960d978d 100644
--- a/Documentation/virt/kvm/kvmi.rst
+++ b/Documentation/virt/kvm/kvmi.rst
@@ -367,6 +367,9 @@ the following events::
 
KVMI_VM_EVENT_UNHOOK
 
+The vCPU events (e.g. *KVMI_VCPU_EVENT_PAUSE*) are controlled with
+the *KVMI_VCPU_CONTROL_EVENTS* command.
+
 :Errors:
 
 * -KVM_EINVAL - the padding is not zero
@@ -509,6 +512,51 @@ command) before returning to guest.
 *KVMI_VCPU_EVENT_PAUSE* events
 * -KVM_EPERM  - the *KVMI_VCPU_EVENT_PAUSE* event is disallowed
 
+10. KVMI_VCPU_CONTROL_EVENTS
+
+
+:Architectures: all
+:Versions: >= 1
+:Parameters:
+
+::
+
+   struct kvmi_vcpu_hdr;
+   struct kvmi_vcpu_control_events {
+   __u16 event_id;
+   __u8 enable;
+   __u8 padding1;
+   __u32 padding2;
+   };
+
+:Returns:
+
+::
+
+   struct kvmi_error_code
+
+Enables/disables vCPU introspection events.
+
+When an event is enabled, the introspection tool is notified and
+must reply with: continue, retry, crash, etc. (see **Events** below).
+
+The following vCPU events doesn't have to be enabled and can't be disabled,
+because these are sent as a result of certain commands (but they can be
+disallowed by the device manager) ::
+
+   KVMI_VCPU_EVENT_PAUSE
+
+The VM events (e.g. *KVMI_VM_EVENT_UNHOOK*) are controlled with
+the *KVMI_VM_CONTROL_EVENTS* command.
+
+:Errors:
+
+* -KVM_EINVAL - the padding is not zero
+* -KVM_EINVAL - the selected vCPU is invalid
+* -KVM_EINVAL - the event ID is unknown (use *KVMI_VM_CHECK_EVENT* first)
+* -KVM_EPERM - the access is disallowed (use *KVMI_VM_CHECK_EVENT* first)
+* -KVM_EAGAIN - the selected vCPU can't be introspected yet
+
 Events
 ==
 
diff --git a/include/linux/kvmi_host.h b/include/linux/kvmi_host.h
index 4a43e51a44c9..5e5d255e5a2c 100644
--- a/include/linux/kvmi_host.h
+++ b/include/linux/kvmi_host.h
@@ -31,6 +31,8 @@ struct kvm_vcpu_introspection {
 
struct kvmi_vcpu_reply reply;
bool waiting_for_reply;
+
+   unsigned long *ev_enable_mask;
 };
 
 struct kvm_introspection {
diff --git a/include/uapi/linux/kvmi.h b/include/uapi/linux/kvmi.h
index 757d4b84f473..acd00e883dc9 100644
--- a/include/uapi/linux/kvmi.h
+++ b/include/uapi/linux/kvmi.h
@@ -35,7 +35,8 @@ enum {
 enum {
KVMI_VCPU_EVENT = KVMI_VCPU_MESSAGE_ID(0),
 
-   KVMI_VCPU_GET_INFO = KVMI_VCPU_MESSAGE_ID(1),
+   KVMI_VCPU_GET_INFO   = KVMI_VCPU_MESSAGE_ID(1),
+   KVMI_VCPU_CONTROL_EVENTS = KVMI_VCPU_MESSAGE_ID(2),
 
KVMI_NEXT_VCPU_MESSAGE
 };
@@ -148,4 +149,11 @@ struct kvmi_vcpu_event_reply {
__u32 padding2;
 };
 
+struct kvmi_vcpu_control_events {
+   __u16 event_id;
+   __u8 enable;
+   __u8 padding1;
+   __u32 padding2;
+};
+
 #endif /* _UAPI__LINUX_KVMI_H */
diff --git a/tools/testing/selftests/kvm/x86_64/kvmi_test.c 
b/tools/testing/selftests/kvm/x86_64/kvmi_test.c
index 9eb17d820a8b..e2d09e0cbce8 100644
--- a/tools/testing/selftests/kvm/x86_64/kvmi_test.c
+++ b/tools/testing/selftests/kvm/x86_64/kvmi_test.c
@@ -777,6 +777,51 @@ static void test_pause(struct kvm_vm *vm)
allow_event(vm, KVMI_VCPU_EVENT_PAUSE);
 }
 
+static void cmd_vcpu_control_event(struct kvm_vm *vm, __u16 event_id,
+  __u8 enable, int expected_err)
+{
+   struct {
+   struct kvmi_msg_hdr hdr;
+   struct kvmi_vcpu_hdr vcpu_hdr;
+   struct kvmi_vcpu_control_events cmd;
+   } req = {};
+
+   req.cmd.event_id = event_id;
+   req.cmd.enable = enable;
+
+   test_vcpu0_command(vm, KVMI_VCPU_CONTROL_EVENTS,
+  , sizeof(req), NULL, 0,
+  expected_

[PATCH v12 06/77] KVM: x86: add kvm_x86_ops.bp_intercepted()

2021-10-06 Thread Adalbert Lazăr
From: Nicușor Cîțu 

Both, the introspection tool and the device manager can request #BP
interception. This function will be used to check if this interception
is already enabled by either side.

Signed-off-by: Nicușor Cîțu 
Signed-off-by: Adalbert Lazăr 
---
 arch/x86/include/asm/kvm-x86-ops.h | 1 +
 arch/x86/include/asm/kvm_host.h| 1 +
 arch/x86/kvm/svm/svm.c | 8 
 arch/x86/kvm/svm/svm.h | 8 
 arch/x86/kvm/vmx/vmx.c | 6 ++
 5 files changed, 24 insertions(+)

diff --git a/arch/x86/include/asm/kvm-x86-ops.h 
b/arch/x86/include/asm/kvm-x86-ops.h
index cefe1d81e2e8..31af251c5622 100644
--- a/arch/x86/include/asm/kvm-x86-ops.h
+++ b/arch/x86/include/asm/kvm-x86-ops.h
@@ -121,6 +121,7 @@ KVM_X86_OP_NULL(enable_direct_tlbflush)
 KVM_X86_OP_NULL(migrate_timers)
 KVM_X86_OP(msr_filter_changed)
 KVM_X86_OP_NULL(complete_emulated_msr)
+KVM_X86_OP(bp_intercepted)
 
 #undef KVM_X86_OP
 #undef KVM_X86_OP_NULL
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 5271fce6cd65..26a52520b8bd 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -1323,6 +1323,7 @@ struct kvm_x86_ops {
void (*vcpu_load)(struct kvm_vcpu *vcpu, int cpu);
void (*vcpu_put)(struct kvm_vcpu *vcpu);
 
+   bool (*bp_intercepted)(struct kvm_vcpu *vcpu);
void (*update_exception_bitmap)(struct kvm_vcpu *vcpu);
int (*get_msr)(struct kvm_vcpu *vcpu, struct msr_data *msr);
int (*set_msr)(struct kvm_vcpu *vcpu, struct msr_data *msr);
diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c
index 89077160d463..abecc1234161 100644
--- a/arch/x86/kvm/svm/svm.c
+++ b/arch/x86/kvm/svm/svm.c
@@ -1881,6 +1881,13 @@ static void svm_set_segment(struct kvm_vcpu *vcpu,
vmcb_mark_dirty(svm->vmcb, VMCB_SEG);
 }
 
+static bool svm_bp_intercepted(struct kvm_vcpu *vcpu)
+{
+   struct vcpu_svm *svm = to_svm(vcpu);
+
+   return get_exception_intercept(svm, BP_VECTOR);
+}
+
 static void svm_update_exception_bitmap(struct kvm_vcpu *vcpu)
 {
struct vcpu_svm *svm = to_svm(vcpu);
@@ -4600,6 +4607,7 @@ static struct kvm_x86_ops svm_x86_ops __initdata = {
.vcpu_blocking = svm_vcpu_blocking,
.vcpu_unblocking = svm_vcpu_unblocking,
 
+   .bp_intercepted = svm_bp_intercepted,
.update_exception_bitmap = svm_update_exception_bitmap,
.get_msr_feature = svm_get_msr_feature,
.get_msr = svm_get_msr,
diff --git a/arch/x86/kvm/svm/svm.h b/arch/x86/kvm/svm/svm.h
index 0d7bbe548ac3..32c2d6d3424b 100644
--- a/arch/x86/kvm/svm/svm.h
+++ b/arch/x86/kvm/svm/svm.h
@@ -359,6 +359,14 @@ static inline void clr_exception_intercept(struct vcpu_svm 
*svm, u32 bit)
recalc_intercepts(svm);
 }
 
+static inline bool get_exception_intercept(struct vcpu_svm *svm, int bit)
+{
+   struct vmcb *vmcb = svm->vmcb01.ptr;
+
+   WARN_ON_ONCE(bit >= 32);
+   return vmcb_is_intercept(>control, INTERCEPT_EXCEPTION_OFFSET + 
bit);
+}
+
 static inline void svm_set_intercept(struct vcpu_svm *svm, int bit)
 {
struct vmcb *vmcb = svm->vmcb01.ptr;
diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
index 1c8b2b6e7ed9..6fdc3d10b2b4 100644
--- a/arch/x86/kvm/vmx/vmx.c
+++ b/arch/x86/kvm/vmx/vmx.c
@@ -715,6 +715,11 @@ static u32 vmx_read_guest_seg_ar(struct vcpu_vmx *vmx, 
unsigned seg)
return *p;
 }
 
+static bool vmx_bp_intercepted(struct kvm_vcpu *vcpu)
+{
+   return (vmcs_read32(EXCEPTION_BITMAP) & (1u << BP_VECTOR));
+}
+
 void vmx_update_exception_bitmap(struct kvm_vcpu *vcpu)
 {
u32 eb;
@@ -7586,6 +7591,7 @@ static struct kvm_x86_ops vmx_x86_ops __initdata = {
.vcpu_load = vmx_vcpu_load,
.vcpu_put = vmx_vcpu_put,
 
+   .bp_intercepted = vmx_bp_intercepted,
.update_exception_bitmap = vmx_update_exception_bitmap,
.get_msr_feature = vmx_get_msr_feature,
.get_msr = vmx_get_msr,
___
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization

[PATCH v12 14/77] KVM: x86: svm: use the vmx convention to control the MSR interception

2021-10-06 Thread Adalbert Lazăr
From: Nicușor Cîțu 

This is a preparatory patch in order to use a common interface to
enable/disable the MSR interception.

Also, it will allow to independently control the read and write
interceptions.

Signed-off-by: Nicușor Cîțu 
Signed-off-by: Adalbert Lazăr 
---
 arch/x86/include/asm/kvm_host.h |   4 ++
 arch/x86/kvm/svm/sev.c  |  18 --
 arch/x86/kvm/svm/svm.c  | 103 
 arch/x86/kvm/svm/svm.h  |   2 +-
 arch/x86/kvm/vmx/vmx.h  |   4 --
 5 files changed, 83 insertions(+), 48 deletions(-)

diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 1e77cb825ec4..79b2d8abff36 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -150,6 +150,10 @@
 #define CR_TYPE_W  2
 #define CR_TYPE_RW 3
 
+#define MSR_TYPE_R 1
+#define MSR_TYPE_W 2
+#define MSR_TYPE_RW3
+
 #define ASYNC_PF_PER_VCPU 64
 
 enum kvm_reg {
diff --git a/arch/x86/kvm/svm/sev.c b/arch/x86/kvm/svm/sev.c
index 1e8b26b93b4f..29bf93c97b65 100644
--- a/arch/x86/kvm/svm/sev.c
+++ b/arch/x86/kvm/svm/sev.c
@@ -2623,12 +2623,18 @@ void sev_es_init_vmcb(struct vcpu_svm *svm)
svm_clr_intercept(svm, INTERCEPT_XSETBV);
 
/* Clear intercepts on selected MSRs */
-   set_msr_interception(vcpu, svm->msrpm, MSR_EFER, 1, 1);
-   set_msr_interception(vcpu, svm->msrpm, MSR_IA32_CR_PAT, 1, 1);
-   set_msr_interception(vcpu, svm->msrpm, MSR_IA32_LASTBRANCHFROMIP, 1, 1);
-   set_msr_interception(vcpu, svm->msrpm, MSR_IA32_LASTBRANCHTOIP, 1, 1);
-   set_msr_interception(vcpu, svm->msrpm, MSR_IA32_LASTINTFROMIP, 1, 1);
-   set_msr_interception(vcpu, svm->msrpm, MSR_IA32_LASTINTTOIP, 1, 1);
+   set_msr_interception(vcpu, svm->msrpm, MSR_EFER, MSR_TYPE_RW,
+1);
+   set_msr_interception(vcpu, svm->msrpm, MSR_IA32_CR_PAT, MSR_TYPE_RW,
+1);
+   set_msr_interception(vcpu, svm->msrpm, MSR_IA32_LASTBRANCHFROMIP,
+MSR_TYPE_RW, 1);
+   set_msr_interception(vcpu, svm->msrpm, MSR_IA32_LASTBRANCHTOIP,
+MSR_TYPE_RW, 1);
+   set_msr_interception(vcpu, svm->msrpm, MSR_IA32_LASTINTFROMIP,
+MSR_TYPE_RW, 1);
+   set_msr_interception(vcpu, svm->msrpm, MSR_IA32_LASTINTTOIP,
+MSR_TYPE_RW, 1);
 }
 
 void sev_es_vcpu_reset(struct vcpu_svm *svm)
diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c
index 31109961183e..97f7406cf7d6 100644
--- a/arch/x86/kvm/svm/svm.c
+++ b/arch/x86/kvm/svm/svm.c
@@ -616,8 +616,8 @@ static int direct_access_msr_slot(u32 msr)
return -ENOENT;
 }
 
-static void set_shadow_msr_intercept(struct kvm_vcpu *vcpu, u32 msr, int read,
-int write)
+static void set_shadow_msr_intercept(struct kvm_vcpu *vcpu, u32 msr,
+int type, bool value)
 {
struct vcpu_svm *svm = to_svm(vcpu);
int slot = direct_access_msr_slot(msr);
@@ -626,15 +626,19 @@ static void set_shadow_msr_intercept(struct kvm_vcpu 
*vcpu, u32 msr, int read,
return;
 
/* Set the shadow bitmaps to the desired intercept states */
-   if (read)
-   set_bit(slot, svm->shadow_msr_intercept.read);
-   else
-   clear_bit(slot, svm->shadow_msr_intercept.read);
+   if (type & MSR_TYPE_R) {
+   if (value)
+   set_bit(slot, svm->shadow_msr_intercept.read);
+   else
+   clear_bit(slot, svm->shadow_msr_intercept.read);
+   }
 
-   if (write)
-   set_bit(slot, svm->shadow_msr_intercept.write);
-   else
-   clear_bit(slot, svm->shadow_msr_intercept.write);
+   if (type & MSR_TYPE_W) {
+   if (value)
+   set_bit(slot, svm->shadow_msr_intercept.write);
+   else
+   clear_bit(slot, svm->shadow_msr_intercept.write);
+   }
 }
 
 static bool valid_msr_intercept(u32 index)
@@ -662,7 +666,7 @@ static bool msr_write_intercepted(struct kvm_vcpu *vcpu, 
u32 msr)
 }
 
 static void set_msr_interception_bitmap(struct kvm_vcpu *vcpu, u32 *msrpm,
-   u32 msr, int read, int write)
+   u32 msr, int type, bool value)
 {
u8 bit_read, bit_write;
unsigned long tmp;
@@ -675,11 +679,13 @@ static void set_msr_interception_bitmap(struct kvm_vcpu 
*vcpu, u32 *msrpm,
WARN_ON(!valid_msr_intercept(msr));
 
/* Enforce non allowed MSRs to trap */
-   if (read && !kvm_msr_allowed(vcpu, msr, KVM_MSR_FILTER_READ))
-   read = 0;
+   if (value && (type & MSR_TYPE_R) &&
+   !kvm_msr_allowed(vcpu, msr, KVM_MSR_FILTER_READ))
+   

[PATCH v12 22/77] KVM: x86: export kvm_vcpu_ioctl_x86_get_xsave()

2021-10-06 Thread Adalbert Lazăr
From: Nicușor Cîțu 

This function is needed for the KVMI_VCPU_GET_XSAVE command.

Signed-off-by: Nicușor Cîțu 
Signed-off-by: Adalbert Lazăr 
---
 arch/x86/include/asm/kvm_host.h | 3 +++
 arch/x86/kvm/x86.c  | 4 ++--
 2 files changed, 5 insertions(+), 2 deletions(-)

diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 681e27c2065d..1d3a62536a93 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -1740,6 +1740,9 @@ unsigned long kvm_get_rflags(struct kvm_vcpu *vcpu);
 void kvm_set_rflags(struct kvm_vcpu *vcpu, unsigned long rflags);
 int kvm_emulate_rdpmc(struct kvm_vcpu *vcpu);
 
+void kvm_vcpu_ioctl_x86_get_xsave(struct kvm_vcpu *vcpu,
+ struct kvm_xsave *guest_xsave);
+
 bool kvm_inject_pending_exception(struct kvm_vcpu *vcpu);
 void kvm_queue_exception(struct kvm_vcpu *vcpu, unsigned nr);
 void kvm_queue_exception_e(struct kvm_vcpu *vcpu, unsigned nr, u32 error_code);
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 0cd329622e1e..0b88e05e94f7 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -4791,8 +4791,8 @@ static void load_xsave(struct kvm_vcpu *vcpu, u8 *src)
}
 }
 
-static void kvm_vcpu_ioctl_x86_get_xsave(struct kvm_vcpu *vcpu,
-struct kvm_xsave *guest_xsave)
+void kvm_vcpu_ioctl_x86_get_xsave(struct kvm_vcpu *vcpu,
+ struct kvm_xsave *guest_xsave)
 {
if (!vcpu->arch.guest_fpu)
return;
___
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization

[PATCH v12 26/77] KVM: x86: page_track: add support for preread, prewrite and preexec

2021-10-06 Thread Adalbert Lazăr
From: Mihai Donțu 

The access to a tracked memory page leads to two types of actions from the
introspection tool: either the access is allowed (maybe with different
data for the source operand) or the vCPU should re-enter in guest
(the page is not tracked anymore, the instruction was skipped/emulated by
the introspection tool, etc.). These new callbacks must return 'true'
for the first case and 'false' for the second.

Signed-off-by: Mihai Donțu 
Signed-off-by: Adalbert Lazăr 
---
 arch/x86/include/asm/kvm_page_track.h |  48 +-
 arch/x86/kvm/mmu/mmu.c|  95 
 arch/x86/kvm/mmu/mmu_internal.h   |   6 ++
 arch/x86/kvm/mmu/page_track.c | 123 --
 arch/x86/kvm/mmu/tdp_mmu.c| 106 ++
 arch/x86/kvm/mmu/tdp_mmu.h|   6 ++
 6 files changed, 374 insertions(+), 10 deletions(-)

diff --git a/arch/x86/include/asm/kvm_page_track.h 
b/arch/x86/include/asm/kvm_page_track.h
index df6e5674ea5c..56ba4d2d0a31 100644
--- a/arch/x86/include/asm/kvm_page_track.h
+++ b/arch/x86/include/asm/kvm_page_track.h
@@ -3,7 +3,10 @@
 #define _ASM_X86_KVM_PAGE_TRACK_H
 
 enum kvm_page_track_mode {
+   KVM_PAGE_TRACK_PREREAD,
+   KVM_PAGE_TRACK_PREWRITE,
KVM_PAGE_TRACK_WRITE,
+   KVM_PAGE_TRACK_PREEXEC,
KVM_PAGE_TRACK_MAX,
 };
 
@@ -22,6 +25,33 @@ struct kvm_page_track_notifier_head {
 struct kvm_page_track_notifier_node {
struct hlist_node node;
 
+   /*
+* It is called when guest is reading the read-tracked page
+* and the read emulation is about to happen.
+*
+* @vcpu: the vcpu where the read access happened.
+* @gpa: the physical address read by guest.
+* @gva: the virtual address read by guest.
+* @bytes: the read length.
+* @node: this node.
+*/
+   bool (*track_preread)(struct kvm_vcpu *vcpu, gpa_t gpa, gva_t gva,
+ int bytes,
+ struct kvm_page_track_notifier_node *node);
+   /*
+* It is called when guest is writing the write-tracked page
+* and the write emulation didn't happened yet.
+*
+* @vcpu: the vcpu where the write access happened.
+* @gpa: the physical address written by guest.
+* @gva: the virtual address written by guest.
+* @new: the data was written to the address.
+* @bytes: the written length.
+* @node: this node
+*/
+   bool (*track_prewrite)(struct kvm_vcpu *vcpu, gpa_t gpa, gva_t gva,
+  const u8 *new, int bytes,
+  struct kvm_page_track_notifier_node *node);
/*
 * It is called when guest is writing the write-tracked page
 * and write emulation is finished at that time.
@@ -36,6 +66,17 @@ struct kvm_page_track_notifier_node {
void (*track_write)(struct kvm_vcpu *vcpu, gpa_t gpa, gva_t gva,
const u8 *new, int bytes,
struct kvm_page_track_notifier_node *node);
+   /*
+* It is called when guest is fetching from a exec-tracked page
+* and the fetch emulation is about to happen.
+*
+* @vcpu: the vcpu where the fetch access happened.
+* @gpa: the physical address fetched by guest.
+* @gva: the virtual address fetched by guest.
+* @node: this node.
+*/
+   bool (*track_preexec)(struct kvm_vcpu *vcpu, gpa_t gpa, gva_t gva,
+ struct kvm_page_track_notifier_node *node);
/*
 * It is called when memory slot is being created
 *
@@ -49,7 +90,7 @@ struct kvm_page_track_notifier_node {
  struct kvm_page_track_notifier_node *node);
/*
 * It is called when memory slot is being moved or removed
-* users can drop write-protection for the pages in that memory slot
+* users can drop active protection for the pages in that memory slot
 *
 * @kvm: the kvm where memory slot being moved or removed
 * @slot: the memory slot being moved or removed
@@ -85,7 +126,12 @@ kvm_page_track_register_notifier(struct kvm *kvm,
 void
 kvm_page_track_unregister_notifier(struct kvm *kvm,
   struct kvm_page_track_notifier_node *n);
+bool kvm_page_track_preread(struct kvm_vcpu *vcpu, gpa_t gpa, gva_t gva,
+   int bytes);
+bool kvm_page_track_prewrite(struct kvm_vcpu *vcpu, gpa_t gpa, gva_t gva,
+const u8 *new, int bytes);
 void kvm_page_track_write(struct kvm_vcpu *vcpu, gpa_t gpa, gva_t gva,
  const u8 *new, int bytes);
+bool kvm_page_track_preexec(struct kvm_vcpu *vcpu, gpa_t gpa, gva_t gva);
 void kvm_page_track_flush_slot(struct kvm *kvm, struct kvm_memory_slot *slot);
 #endif
diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm

[PATCH v12 32/77] KVM: introspection: add the read/dispatch message function

2021-10-06 Thread Adalbert Lazăr
Based on the common header (struct kvmi_msg_hdr), the receiving thread
will read/validate all messages, execute the VM introspection commands
(eg. KVMI_VM_GET_INFO) and dispatch the vCPU introspection commands
(eg. KVMI_VCPU_GET_REGISTERS) to the vCPU threads.

The vCPU threads will reply to vCPU introspection commands without
the help of the receiving thread. Same for sending vCPU events, but
the vCPU thread will wait for the receiving thread to get the event
reply. Meanwhile, it will execute any queued vCPU introspection command.

The receiving thread will end when the socket is closed or on the first
API error (eg. wrong message size).

Signed-off-by: Adalbert Lazăr 
---
 Documentation/virt/kvm/kvmi.rst   |  75 
 include/uapi/linux/kvmi.h |  11 ++
 .../testing/selftests/kvm/x86_64/kvmi_test.c  | 100 +++
 virt/kvm/introspection/kvmi.c |  43 -
 virt/kvm/introspection/kvmi_int.h |  10 ++
 virt/kvm/introspection/kvmi_msg.c | 161 +-
 6 files changed, 398 insertions(+), 2 deletions(-)

diff --git a/Documentation/virt/kvm/kvmi.rst b/Documentation/virt/kvm/kvmi.rst
index 59cc33a39f9f..ae6bbf37aef3 100644
--- a/Documentation/virt/kvm/kvmi.rst
+++ b/Documentation/virt/kvm/kvmi.rst
@@ -65,6 +65,74 @@ been used on that guest (if requested). Obviously, whether 
the guest can
 really continue normal execution depends on whether the introspection
 tool has made any modifications that require an active KVMI channel.
 
+All messages (commands or events) have a common header::
+
+   struct kvmi_msg_hdr {
+   __u16 id;
+   __u16 size;
+   __u32 seq;
+   };
+
+The replies have the same header, with the sequence number (``seq``)
+and message id (``id``) matching the command/event.
+
+After ``kvmi_msg_hdr``, ``id`` specific data of ``size`` bytes will
+follow.
+
+The message header and its data must be sent with one ``sendmsg()`` call
+to the socket. This simplifies the receiver loop and avoids
+the reconstruction of messages on the other side.
+
+The wire protocol uses the host native byte-order. The introspection tool
+must check this during the handshake and do the necessary conversion.
+
+A command reply begins with::
+
+   struct kvmi_error_code {
+   __s32 err;
+   __u32 padding;
+   }
+
+followed by the command specific data if the error code ``err`` is zero.
+
+The error code -KVM_ENOSYS is returned for unsupported commands.
+
+The error code -KVM_EPERM is returned for disallowed commands (see 
**Hooking**).
+
+Other error codes can be returned during message handling, but for
+some errors (incomplete messages, wrong sequence numbers, socket errors
+etc.) the socket will be closed. The device manager should reconnect.
+
+When a vCPU thread sends an introspection event, it will wait (and handle
+any related introspection command) until it gets the event reply::
+
+   Host kernel   Introspection tool
+   ---   --
+   event 1 ->
+ <- command 1
+   command 1 reply ->
+ <- command 2
+   command 2 reply ->
+ <- event 1 reply
+
+As it can be seen below, the wire protocol specifies occasional padding. This
+is to permit working with the data by directly using C structures or to round
+the structure size to a multiple of 8 bytes (64bit) to improve the copy
+operations that happen during ``recvmsg()`` or ``sendmsg()``. The members
+should have the native alignment of the host. All padding must be
+initialized with zero otherwise the respective command will fail with
+-KVM_EINVAL.
+
+To describe the commands/events, we reuse some conventions from api.rst:
+
+  - Architectures: which instruction set architectures provide this 
command/event
+
+  - Versions: which versions provide this command/event
+
+  - Parameters: incoming message data
+
+  - Returns: outgoing/reply message data
+
 Handshake
 -
 
@@ -99,6 +167,13 @@ In the end, the device manager will pass the file 
descriptor (plus
 the allowed commands/events) to KVM. It will detect when the socket is
 shutdown and it will reinitiate the handshake.
 
+Once the file descriptor reaches KVM, the introspection tool should
+use the *KVMI_GET_VERSION* command to get the API version and/or the
+*KVMI_VM_CHECK_COMMAND* and *KVMI_VM_CHECK_EVENT* commands to see which
+commands/events are allowed for this guest. The error code -KVM_EPERM
+will be returned if the introspection tool uses a command or tries to
+enable an event which is disallowed.
+
 Unhooking
 -
 
diff --git a/include/uapi/linux/kvmi.h b/include/uapi/linux/kvmi.h
index 85f8622ddf95..2b37eee82c52 100644
--- a/include/uapi/linux/kvmi.h
+++ b/include/uapi/linux/kvmi.h
@@ -32,4 +32,15 @@ enum {
KVMI_NEXT_VCPU_EVENT
 };
 
+struct kvmi_msg_hdr {
+   __u16 id;

[PATCH v12 39/77] KVM: introspection: add KVMI_VM_READ_PHYSICAL/KVMI_VM_WRITE_PHYSICAL

2021-10-06 Thread Adalbert Lazăr
From: Mihai Donțu 

These commands allow the introspection tool to read/write from/to
the guest memory.

Signed-off-by: Mihai Donțu 
Co-developed-by: Adalbert Lazăr 
Signed-off-by: Adalbert Lazăr 
---
 Documentation/virt/kvm/kvmi.rst   |  68 ++
 include/uapi/linux/kvmi.h |  17 +++
 .../testing/selftests/kvm/x86_64/kvmi_test.c  | 124 ++
 virt/kvm/introspection/kvmi.c |  98 ++
 virt/kvm/introspection/kvmi_int.h |   7 +
 virt/kvm/introspection/kvmi_msg.c |  44 +++
 6 files changed, 358 insertions(+)

diff --git a/Documentation/virt/kvm/kvmi.rst b/Documentation/virt/kvm/kvmi.rst
index ecf809f40771..c424acf01784 100644
--- a/Documentation/virt/kvm/kvmi.rst
+++ b/Documentation/virt/kvm/kvmi.rst
@@ -365,6 +365,74 @@ the following events::
 * -KVM_EINVAL - the event ID is unknown (use *KVMI_VM_CHECK_EVENT* first)
 * -KVM_EPERM - the access is disallowed (use *KVMI_VM_CHECK_EVENT* first)
 
+6. KVMI_VM_READ_PHYSICAL
+
+
+:Architectures: all
+:Versions: >= 1
+:Parameters:
+
+::
+
+   struct kvmi_vm_read_physical {
+   __u64 gpa;
+   __u16 size;
+   __u16 padding1;
+   __u32 padding2;
+   };
+
+:Returns:
+
+::
+
+   struct kvmi_error_code;
+   __u8 data[0];
+
+Reads from the guest memory.
+
+Currently, the size must be non-zero and the read must be restricted to
+one page (offset + size <= PAGE_SIZE).
+
+:Errors:
+
+* -KVM_ENOENT - the guest page doesn't exists
+* -KVM_EINVAL - the specified gpa/size pair is invalid
+* -KVM_EINVAL - the padding is not zero
+
+7. KVMI_VM_WRITE_PHYSICAL
+-
+
+:Architectures: all
+:Versions: >= 1
+:Parameters:
+
+::
+
+   struct kvmi_vm_write_physical {
+   __u64 gpa;
+   __u16 size;
+   __u16 padding1;
+   __u32 padding2;
+   __u8  data[0];
+   };
+
+:Returns:
+
+::
+
+   struct kvmi_error_code
+
+Writes into the guest memory.
+
+Currently, the size must be non-zero and the write must be restricted to
+one page (offset + size <= PAGE_SIZE).
+
+:Errors:
+
+* -KVM_ENOENT - the guest page doesn't exists
+* -KVM_EINVAL - the specified gpa/size pair is invalid
+* -KVM_EINVAL - the padding is not zero
+
 Events
 ==
 
diff --git a/include/uapi/linux/kvmi.h b/include/uapi/linux/kvmi.h
index 9a10ef2cd890..048afad01be6 100644
--- a/include/uapi/linux/kvmi.h
+++ b/include/uapi/linux/kvmi.h
@@ -24,6 +24,8 @@ enum {
KVMI_VM_CHECK_EVENT= KVMI_VM_MESSAGE_ID(3),
KVMI_VM_GET_INFO   = KVMI_VM_MESSAGE_ID(4),
KVMI_VM_CONTROL_EVENTS = KVMI_VM_MESSAGE_ID(5),
+   KVMI_VM_READ_PHYSICAL  = KVMI_VM_MESSAGE_ID(6),
+   KVMI_VM_WRITE_PHYSICAL = KVMI_VM_MESSAGE_ID(7),
 
KVMI_NEXT_VM_MESSAGE
 };
@@ -90,4 +92,19 @@ struct kvmi_vm_control_events {
__u32 padding2;
 };
 
+struct kvmi_vm_read_physical {
+   __u64 gpa;
+   __u16 size;
+   __u16 padding1;
+   __u32 padding2;
+};
+
+struct kvmi_vm_write_physical {
+   __u64 gpa;
+   __u16 size;
+   __u16 padding1;
+   __u32 padding2;
+   __u8  data[0];
+};
+
 #endif /* _UAPI__LINUX_KVMI_H */
diff --git a/tools/testing/selftests/kvm/x86_64/kvmi_test.c 
b/tools/testing/selftests/kvm/x86_64/kvmi_test.c
index f5fdb09b9be6..28dd6414a7e8 100644
--- a/tools/testing/selftests/kvm/x86_64/kvmi_test.c
+++ b/tools/testing/selftests/kvm/x86_64/kvmi_test.c
@@ -8,6 +8,7 @@
 #define _GNU_SOURCE /* for program_invocation_short_name */
 #include 
 #include 
+#include 
 
 #include "test_util.h"
 
@@ -24,6 +25,12 @@ static int socket_pair[2];
 #define Kvm_socket   socket_pair[0]
 #define Userspace_socket socket_pair[1]
 
+static vm_vaddr_t test_gva;
+static void *test_hva;
+static vm_paddr_t test_gpa;
+
+static int page_size;
+
 void setup_socket(void)
 {
int r;
@@ -427,8 +434,112 @@ static void test_cmd_vm_control_events(struct kvm_vm *vm)
allow_event(vm, id);
 }
 
+static void cmd_vm_write_page(__u64 gpa, __u64 size, void *p,
+ int expected_err)
+{
+   struct kvmi_vm_write_physical *cmd;
+   struct kvmi_msg_hdr *req;
+   size_t req_size;
+
+   req_size = sizeof(*req) + sizeof(*cmd) + size;
+   req = calloc(1, req_size);
+
+   cmd = (struct kvmi_vm_write_physical *)(req + 1);
+   cmd->gpa = gpa;
+   cmd->size = size;
+
+   memcpy(cmd + 1, p, size);
+
+   test_vm_command(KVMI_VM_WRITE_PHYSICAL, req, req_size, NULL, 0,
+   expected_err);
+
+   free(req);
+}
+
+static void write_guest_page(__u64 gpa, void *p)
+{
+   cmd_vm_write_page(gpa, page_size, p, 0);
+}
+
+static void write_with_invalid_arguments(__u64 gpa, __u64 size, void *p)
+{
+   cmd_vm_write_page(gpa, size, p, -KVM_EINVAL);
+}
+
+static void write_invalid_guest_page(struct kvm_vm *vm, void *p)
+{
+  

[PATCH v12 13/77] KVM: x86: add kvm_x86_ops.msr_write_intercepted()

2021-10-06 Thread Adalbert Lazăr
From: Nicușor Cîțu 

This function will be used to check if the write access for a specific
MSR is already intercepted. The information will be used to restore the
interception status when the introspection tool is no longer interested
in that MSR.

Signed-off-by: Nicușor Cîțu 
Signed-off-by: Adalbert Lazăr 
---
 arch/x86/include/asm/kvm-x86-ops.h | 1 +
 arch/x86/include/asm/kvm_host.h| 1 +
 arch/x86/kvm/svm/svm.c | 1 +
 arch/x86/kvm/vmx/vmx.c | 1 +
 4 files changed, 4 insertions(+)

diff --git a/arch/x86/include/asm/kvm-x86-ops.h 
b/arch/x86/include/asm/kvm-x86-ops.h
index 30d01c9ed31b..90e913408c6e 100644
--- a/arch/x86/include/asm/kvm-x86-ops.h
+++ b/arch/x86/include/asm/kvm-x86-ops.h
@@ -127,6 +127,7 @@ KVM_X86_OP(cr3_write_intercepted)
 KVM_X86_OP(desc_ctrl_supported)
 KVM_X86_OP(control_desc_intercept)
 KVM_X86_OP(desc_intercepted)
+KVM_X86_OP(msr_write_intercepted)
 
 #undef KVM_X86_OP
 #undef KVM_X86_OP_NULL
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 1182b0fbd245..1e77cb825ec4 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -1331,6 +1331,7 @@ struct kvm_x86_ops {
void (*update_exception_bitmap)(struct kvm_vcpu *vcpu);
int (*get_msr)(struct kvm_vcpu *vcpu, struct msr_data *msr);
int (*set_msr)(struct kvm_vcpu *vcpu, struct msr_data *msr);
+   bool (*msr_write_intercepted)(struct kvm_vcpu *vcpu, u32 msr);
u64 (*get_segment_base)(struct kvm_vcpu *vcpu, int seg);
void (*get_segment)(struct kvm_vcpu *vcpu,
struct kvm_segment *var, int seg);
diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c
index c1b1e5cdd508..31109961183e 100644
--- a/arch/x86/kvm/svm/svm.c
+++ b/arch/x86/kvm/svm/svm.c
@@ -4689,6 +4689,7 @@ static struct kvm_x86_ops svm_x86_ops __initdata = {
.get_msr_feature = svm_get_msr_feature,
.get_msr = svm_get_msr,
.set_msr = svm_set_msr,
+   .msr_write_intercepted = msr_write_intercepted,
.get_segment_base = svm_get_segment_base,
.get_segment = svm_get_segment,
.set_segment = svm_set_segment,
diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
index 8f34b19827a3..b036aed96912 100644
--- a/arch/x86/kvm/vmx/vmx.c
+++ b/arch/x86/kvm/vmx/vmx.c
@@ -7637,6 +7637,7 @@ static struct kvm_x86_ops vmx_x86_ops __initdata = {
.get_msr_feature = vmx_get_msr_feature,
.get_msr = vmx_get_msr,
.set_msr = vmx_set_msr,
+   .msr_write_intercepted = msr_write_intercepted,
.get_segment_base = vmx_get_segment_base,
.get_segment = vmx_get_segment,
.set_segment = vmx_set_segment,
___
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization

[PATCH v12 05/77] KVM: x86: avoid injecting #PF when emulate the VMCALL instruction

2021-10-06 Thread Adalbert Lazăr
From: Mihai Donțu 

It can happened to end up emulating the VMCALL instruction as a result
of the handling of an EPT write fault. In this situation,
the emulator will try to unconditionally patch the correct hypercall
opcode bytes using emulator_write_emulated(). However, this last call
uses the fault GPA (if available) or walks the guest page tables at RIP,
otherwise. The trouble begins when using VM introspection,
when we forbid the use of the fault GPA and fallback to the guest pt walk:
in Windows (8.1 and newer) the page that we try to write into
is marked read-execute and as such emulator_write_emulated() fails
and we inject a write #PF, leading to a guest crash.

Signed-off-by: Mihai Donțu 
Signed-off-by: Adalbert Lazăr 
---
 arch/x86/kvm/x86.c | 6 +-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index bbcd256dc2f4..2b5c3c22c48e 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -8748,11 +8748,15 @@ static int emulator_fix_hypercall(struct 
x86_emulate_ctxt *ctxt)
struct kvm_vcpu *vcpu = emul_to_vcpu(ctxt);
char instruction[3];
unsigned long rip = kvm_rip_read(vcpu);
+   int err;
 
static_call(kvm_x86_patch_hypercall)(vcpu, instruction);
 
-   return emulator_write_emulated(ctxt, rip, instruction, 3,
+   err = emulator_write_emulated(ctxt, rip, instruction, 3,
>exception);
+   if (err == X86EMUL_PROPAGATE_FAULT)
+   err = X86EMUL_CONTINUE;
+   return err;
 }
 
 static int dm_request_for_irq_injection(struct kvm_vcpu *vcpu)
___
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization

[PATCH v12 23/77] KVM: x86: export kvm_vcpu_ioctl_x86_set_xsave()

2021-10-06 Thread Adalbert Lazăr
This function is needed for the KVMI_VCPU_SET_XSAVE command.

Signed-off-by: Adalbert Lazăr 
---
 arch/x86/include/asm/kvm_host.h | 2 ++
 arch/x86/kvm/x86.c  | 4 ++--
 2 files changed, 4 insertions(+), 2 deletions(-)

diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 1d3a62536a93..43569a6fc776 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -1742,6 +1742,8 @@ int kvm_emulate_rdpmc(struct kvm_vcpu *vcpu);
 
 void kvm_vcpu_ioctl_x86_get_xsave(struct kvm_vcpu *vcpu,
  struct kvm_xsave *guest_xsave);
+int kvm_vcpu_ioctl_x86_set_xsave(struct kvm_vcpu *vcpu,
+struct kvm_xsave *guest_xsave);
 
 bool kvm_inject_pending_exception(struct kvm_vcpu *vcpu);
 void kvm_queue_exception(struct kvm_vcpu *vcpu, unsigned nr);
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 0b88e05e94f7..b01d865f6047 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -4811,8 +4811,8 @@ void kvm_vcpu_ioctl_x86_get_xsave(struct kvm_vcpu *vcpu,
 
 #define XSAVE_MXCSR_OFFSET 24
 
-static int kvm_vcpu_ioctl_x86_set_xsave(struct kvm_vcpu *vcpu,
-   struct kvm_xsave *guest_xsave)
+int kvm_vcpu_ioctl_x86_set_xsave(struct kvm_vcpu *vcpu,
+struct kvm_xsave *guest_xsave)
 {
u64 xstate_bv;
u32 mxcsr;
___
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization

[PATCH v12 21/77] KVM: x86: export kvm_inject_pending_exception()

2021-10-06 Thread Adalbert Lazăr
From: Nicușor Cîțu 

This function is needed for the KVMI_VCPU_INJECT_EXCEPTION command.

Signed-off-by: Nicușor Cîțu 
Signed-off-by: Adalbert Lazăr 
---
 arch/x86/include/asm/kvm_host.h |  1 +
 arch/x86/kvm/x86.c  | 52 +++--
 2 files changed, 31 insertions(+), 22 deletions(-)

diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 49734fea7c4f..681e27c2065d 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -1740,6 +1740,7 @@ unsigned long kvm_get_rflags(struct kvm_vcpu *vcpu);
 void kvm_set_rflags(struct kvm_vcpu *vcpu, unsigned long rflags);
 int kvm_emulate_rdpmc(struct kvm_vcpu *vcpu);
 
+bool kvm_inject_pending_exception(struct kvm_vcpu *vcpu);
 void kvm_queue_exception(struct kvm_vcpu *vcpu, unsigned nr);
 void kvm_queue_exception_e(struct kvm_vcpu *vcpu, unsigned nr, u32 error_code);
 void kvm_queue_exception_p(struct kvm_vcpu *vcpu, unsigned nr, unsigned long 
payload);
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index de0fc15ab7cb..0cd329622e1e 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -8831,6 +8831,35 @@ static void kvm_inject_exception(struct kvm_vcpu *vcpu)
static_call(kvm_x86_queue_exception)(vcpu);
 }
 
+bool kvm_inject_pending_exception(struct kvm_vcpu *vcpu)
+{
+   if (vcpu->arch.exception.pending) {
+   trace_kvm_inj_exception(vcpu->arch.exception.nr,
+   vcpu->arch.exception.has_error_code,
+   vcpu->arch.exception.error_code);
+
+   vcpu->arch.exception.pending = false;
+   vcpu->arch.exception.injected = true;
+
+   if (exception_type(vcpu->arch.exception.nr) == EXCPT_FAULT)
+   __kvm_set_rflags(vcpu, kvm_get_rflags(vcpu) |
+X86_EFLAGS_RF);
+
+   if (vcpu->arch.exception.nr == DB_VECTOR) {
+   kvm_deliver_exception_payload(vcpu);
+   if (vcpu->arch.dr7 & DR7_GD) {
+   vcpu->arch.dr7 &= ~DR7_GD;
+   kvm_update_dr7(vcpu);
+   }
+   }
+
+   kvm_inject_exception(vcpu);
+   return true;
+   }
+
+   return false;
+}
+
 static int inject_pending_event(struct kvm_vcpu *vcpu, bool 
*req_immediate_exit)
 {
int r;
@@ -8882,29 +8911,8 @@ static int inject_pending_event(struct kvm_vcpu *vcpu, 
bool *req_immediate_exit)
}
 
/* try to inject new event if pending */
-   if (vcpu->arch.exception.pending) {
-   trace_kvm_inj_exception(vcpu->arch.exception.nr,
-   vcpu->arch.exception.has_error_code,
-   vcpu->arch.exception.error_code);
-
-   vcpu->arch.exception.pending = false;
-   vcpu->arch.exception.injected = true;
-
-   if (exception_type(vcpu->arch.exception.nr) == EXCPT_FAULT)
-   __kvm_set_rflags(vcpu, kvm_get_rflags(vcpu) |
-X86_EFLAGS_RF);
-
-   if (vcpu->arch.exception.nr == DB_VECTOR) {
-   kvm_deliver_exception_payload(vcpu);
-   if (vcpu->arch.dr7 & DR7_GD) {
-   vcpu->arch.dr7 &= ~DR7_GD;
-   kvm_update_dr7(vcpu);
-   }
-   }
-
-   kvm_inject_exception(vcpu);
+   if (kvm_inject_pending_exception(vcpu))
can_inject = false;
-   }
 
/* Don't inject interrupts if the user asked to avoid doing so */
if (vcpu->guest_debug & KVM_GUESTDBG_BLOCKIRQ)
___
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization

[PATCH v12 25/77] KVM: x86: page track: add track_create_slot() callback

2021-10-06 Thread Adalbert Lazăr
From: Mihai Donțu 

This is used to add page access notifications as soon as a slot appears
or when a slot is moved.

Signed-off-by: Mihai Donțu 
Signed-off-by: Adalbert Lazăr 
---
 arch/x86/include/asm/kvm_page_track.h | 11 +++
 arch/x86/kvm/mmu/page_track.c | 15 +++
 2 files changed, 26 insertions(+)

diff --git a/arch/x86/include/asm/kvm_page_track.h 
b/arch/x86/include/asm/kvm_page_track.h
index f981b6360de5..df6e5674ea5c 100644
--- a/arch/x86/include/asm/kvm_page_track.h
+++ b/arch/x86/include/asm/kvm_page_track.h
@@ -36,6 +36,17 @@ struct kvm_page_track_notifier_node {
void (*track_write)(struct kvm_vcpu *vcpu, gpa_t gpa, gva_t gva,
const u8 *new, int bytes,
struct kvm_page_track_notifier_node *node);
+   /*
+* It is called when memory slot is being created
+*
+* @kvm: the kvm where memory slot being moved or removed
+* @slot: the memory slot being moved or removed
+* @npages: the number of pages
+* @node: this node
+*/
+   void (*track_create_slot)(struct kvm *kvm, struct kvm_memory_slot *slot,
+ unsigned long npages,
+ struct kvm_page_track_notifier_node *node);
/*
 * It is called when memory slot is being moved or removed
 * users can drop write-protection for the pages in that memory slot
diff --git a/arch/x86/kvm/mmu/page_track.c b/arch/x86/kvm/mmu/page_track.c
index e0b1cdd3013e..f18be17b56a3 100644
--- a/arch/x86/kvm/mmu/page_track.c
+++ b/arch/x86/kvm/mmu/page_track.c
@@ -43,6 +43,9 @@ int kvm_page_track_create_memslot(struct kvm *kvm,
  struct kvm_memory_slot *slot,
  unsigned long npages)
 {
+   struct kvm_page_track_notifier_head *head;
+   struct kvm_page_track_notifier_node *n;
+   int idx;
int i;
 
for (i = 0; i < KVM_PAGE_TRACK_MAX; i++) {
@@ -56,6 +59,18 @@ int kvm_page_track_create_memslot(struct kvm *kvm,
goto track_free;
}
 
+   head = >arch.track_notifier_head;
+
+   if (hlist_empty(>track_notifier_list))
+   return 0;
+
+   idx = srcu_read_lock(>track_srcu);
+   hlist_for_each_entry_srcu(n, >track_notifier_list, node,
+   srcu_read_lock_held(>track_srcu))
+   if (n->track_create_slot)
+   n->track_create_slot(kvm, slot, npages, n);
+   srcu_read_unlock(>track_srcu, idx);
+
return 0;
 
 track_free:
___
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization

[PATCH v12 34/77] KVM: introspection: add KVMI_VM_CHECK_COMMAND and KVMI_VM_CHECK_EVENT

2021-10-06 Thread Adalbert Lazăr
These commands are used to check what introspection commands and events
are supported (kernel) and allowed (device manager).

These are alternative methods to KVMI_GET_VERSION in checking if the
introspection supports a specific command/event.

As with the KVMI_GET_VERSION command, these two commands can never be
disallowed by the device manager.

Signed-off-by: Adalbert Lazăr 
---
 Documentation/virt/kvm/kvmi.rst   | 62 +++
 include/uapi/linux/kvmi.h | 16 -
 .../testing/selftests/kvm/x86_64/kvmi_test.c  | 45 ++
 virt/kvm/introspection/kvmi.c | 19 ++
 virt/kvm/introspection/kvmi_int.h |  2 +
 virt/kvm/introspection/kvmi_msg.c | 40 +++-
 6 files changed, 182 insertions(+), 2 deletions(-)

diff --git a/Documentation/virt/kvm/kvmi.rst b/Documentation/virt/kvm/kvmi.rst
index d3d672a07872..13169575f75f 100644
--- a/Documentation/virt/kvm/kvmi.rst
+++ b/Documentation/virt/kvm/kvmi.rst
@@ -250,3 +250,65 @@ larger messages.
 The introspection tool should use this command to identify the features
 supported by the kernel side and what messages must be used for event
 replies.
+
+2. KVMI_VM_CHECK_COMMAND
+
+
+:Architectures: all
+:Versions: >= 1
+:Parameters:
+
+::
+
+   struct kvmi_vm_check_command {
+   __u16 id;
+   __u16 padding1;
+   __u32 padding2;
+   };
+
+:Returns:
+
+::
+
+   struct kvmi_error_code;
+
+Checks if the command specified by ``id`` is supported and allowed.
+
+This command is always allowed.
+
+:Errors:
+
+* -KVM_ENOENT - the command specified by ``id`` is unsupported
+* -KVM_EPERM - the command specified by ``id`` is disallowed
+* -KVM_EINVAL - the padding is not zero
+
+3. KVMI_VM_CHECK_EVENT
+--
+
+:Architectures: all
+:Versions: >= 1
+:Parameters:
+
+::
+
+   struct kvmi_vm_check_event {
+   __u16 id;
+   __u16 padding1;
+   __u32 padding2;
+   };
+
+:Returns:
+
+::
+
+   struct kvmi_error_code;
+
+Checks if the event specified by ``id`` is supported and allowed.
+
+This command is always allowed.
+
+:Errors:
+
+* -KVM_ENOENT - the event specified by ``id`` is unsupported
+* -KVM_EPERM - the event specified by ``id`` is disallowed
+* -KVM_EINVAL - the padding is not zero
diff --git a/include/uapi/linux/kvmi.h b/include/uapi/linux/kvmi.h
index 77dd727dfe18..0c2d0cedde6f 100644
--- a/include/uapi/linux/kvmi.h
+++ b/include/uapi/linux/kvmi.h
@@ -17,7 +17,9 @@ enum {
 #define KVMI_VCPU_MESSAGE_ID(id) (((id) << 1) | 1)
 
 enum {
-   KVMI_GET_VERSION = KVMI_VM_MESSAGE_ID(1),
+   KVMI_GET_VERSION  = KVMI_VM_MESSAGE_ID(1),
+   KVMI_VM_CHECK_COMMAND = KVMI_VM_MESSAGE_ID(2),
+   KVMI_VM_CHECK_EVENT   = KVMI_VM_MESSAGE_ID(3),
 
KVMI_NEXT_VM_MESSAGE
 };
@@ -53,4 +55,16 @@ struct kvmi_get_version_reply {
__u32 max_msg_size;
 };
 
+struct kvmi_vm_check_command {
+   __u16 id;
+   __u16 padding1;
+   __u32 padding2;
+};
+
+struct kvmi_vm_check_event {
+   __u16 id;
+   __u16 padding1;
+   __u32 padding2;
+};
+
 #endif /* _UAPI__LINUX_KVMI_H */
diff --git a/tools/testing/selftests/kvm/x86_64/kvmi_test.c 
b/tools/testing/selftests/kvm/x86_64/kvmi_test.c
index 69993e54334a..57f68ff60eb9 100644
--- a/tools/testing/selftests/kvm/x86_64/kvmi_test.c
+++ b/tools/testing/selftests/kvm/x86_64/kvmi_test.c
@@ -100,6 +100,8 @@ static void hook_introspection(struct kvm_vm *vm)
do_hook_ioctl(vm, Kvm_socket, EEXIST);
 
set_command_perm(vm, KVMI_GET_VERSION, disallow, EPERM);
+   set_command_perm(vm, KVMI_VM_CHECK_COMMAND, disallow, EPERM);
+   set_command_perm(vm, KVMI_VM_CHECK_EVENT, disallow, EPERM);
set_command_perm(vm, all_IDs, allow_inval, EINVAL);
set_command_perm(vm, all_IDs, disallow, 0);
set_command_perm(vm, all_IDs, allow, 0);
@@ -248,6 +250,47 @@ static void test_cmd_get_version(void)
pr_debug("Max message size: %u\n", rpl.max_msg_size);
 }
 
+static void cmd_vm_check_command(__u16 id, int expected_err)
+{
+   struct {
+   struct kvmi_msg_hdr hdr;
+   struct kvmi_vm_check_command cmd;
+   } req = {};
+
+   req.cmd.id = id;
+
+   test_vm_command(KVMI_VM_CHECK_COMMAND, , sizeof(req), NULL, 0,
+   expected_err);
+}
+
+static void test_cmd_vm_check_command(void)
+{
+   __u16 valid_id = KVMI_GET_VERSION, invalid_id = 0x;
+
+   cmd_vm_check_command(valid_id, 0);
+   cmd_vm_check_command(invalid_id, -KVM_ENOENT);
+}
+
+static void cmd_vm_check_event(__u16 id, int expected_err)
+{
+   struct {
+   struct kvmi_msg_hdr hdr;
+   struct kvmi_vm_check_event cmd;
+   } req = {};
+
+   req.cmd.id = id;
+
+   test_vm_command(KVMI_VM_CHECK_EVENT, , sizeof(req), NULL, 0,
+   expected_e

[PATCH v12 08/77] KVM: x86: add kvm_x86_ops.cr3_write_intercepted()

2021-10-06 Thread Adalbert Lazăr
From: Nicușor Cîțu 

This function will be used to allow the introspection tool to disable the
CR3-write interception when it is no longer interested in these events,
but only if nothing else depends on these VM-exits.

Signed-off-by: Nicușor Cîțu 
Signed-off-by: Adalbert Lazăr 
---
 arch/x86/include/asm/kvm-x86-ops.h | 1 +
 arch/x86/include/asm/kvm_host.h| 1 +
 arch/x86/kvm/svm/svm.c | 8 
 arch/x86/kvm/vmx/vmx.c | 8 
 4 files changed, 18 insertions(+)

diff --git a/arch/x86/include/asm/kvm-x86-ops.h 
b/arch/x86/include/asm/kvm-x86-ops.h
index e1f63d36efb7..04a77a0858ef 100644
--- a/arch/x86/include/asm/kvm-x86-ops.h
+++ b/arch/x86/include/asm/kvm-x86-ops.h
@@ -123,6 +123,7 @@ KVM_X86_OP(msr_filter_changed)
 KVM_X86_OP_NULL(complete_emulated_msr)
 KVM_X86_OP(bp_intercepted)
 KVM_X86_OP(control_cr3_intercept)
+KVM_X86_OP(cr3_write_intercepted)
 
 #undef KVM_X86_OP
 #undef KVM_X86_OP_NULL
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 89d53e55e1f9..9c3133380028 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -1343,6 +1343,7 @@ struct kvm_x86_ops {
void (*set_cr4)(struct kvm_vcpu *vcpu, unsigned long cr4);
void (*control_cr3_intercept)(struct kvm_vcpu *vcpu, int type,
  bool enable);
+   bool (*cr3_write_intercepted)(struct kvm_vcpu *vcpu);
int (*set_efer)(struct kvm_vcpu *vcpu, u64 efer);
void (*get_idt)(struct kvm_vcpu *vcpu, struct desc_ptr *dt);
void (*set_idt)(struct kvm_vcpu *vcpu, struct desc_ptr *dt);
diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c
index 5a051fa19c7e..9fac69c8e135 100644
--- a/arch/x86/kvm/svm/svm.c
+++ b/arch/x86/kvm/svm/svm.c
@@ -1863,6 +1863,13 @@ static void svm_control_cr3_intercept(struct kvm_vcpu 
*vcpu, int type,
 svm_clr_intercept(svm, INTERCEPT_CR3_WRITE);
 }
 
+static bool svm_cr3_write_intercepted(struct kvm_vcpu *vcpu)
+{
+   struct vcpu_svm *svm = to_svm(vcpu);
+
+   return svm_is_intercept(svm, INTERCEPT_CR3_WRITE);
+}
+
 static void svm_set_segment(struct kvm_vcpu *vcpu,
struct kvm_segment *var, int seg)
 {
@@ -4634,6 +4641,7 @@ static struct kvm_x86_ops svm_x86_ops __initdata = {
.is_valid_cr4 = svm_is_valid_cr4,
.set_cr4 = svm_set_cr4,
.control_cr3_intercept = svm_control_cr3_intercept,
+   .cr3_write_intercepted = svm_cr3_write_intercepted,
.set_efer = svm_set_efer,
.get_idt = svm_get_idt,
.set_idt = svm_set_idt,
diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
index c8f5bc371f38..3f5731213acf 100644
--- a/arch/x86/kvm/vmx/vmx.c
+++ b/arch/x86/kvm/vmx/vmx.c
@@ -3021,6 +3021,13 @@ static void vmx_control_cr3_intercept(struct kvm_vcpu 
*vcpu, int type,
exec_controls_clearbit(vmx, cr3_exec_control);
 }
 
+static bool vmx_cr3_write_intercepted(struct kvm_vcpu *vcpu)
+{
+   struct vcpu_vmx *vmx = to_vmx(vcpu);
+
+   return !!(exec_controls_get(vmx) & CPU_BASED_CR3_LOAD_EXITING);
+}
+
 void vmx_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0)
 {
struct vcpu_vmx *vmx = to_vmx(vcpu);
@@ -7622,6 +7629,7 @@ static struct kvm_x86_ops vmx_x86_ops __initdata = {
.is_valid_cr4 = vmx_is_valid_cr4,
.set_cr4 = vmx_set_cr4,
.control_cr3_intercept = vmx_control_cr3_intercept,
+   .cr3_write_intercepted = vmx_cr3_write_intercepted,
.set_efer = vmx_set_efer,
.get_idt = vmx_get_idt,
.set_idt = vmx_set_idt,
___
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization

[PATCH v12 17/77] KVM: x86: add kvm_x86_ops.fault_gla()

2021-10-06 Thread Adalbert Lazăr
From: Mihai Donțu 

This function is needed for kvmi_update_ad_flags()
and kvm_page_track_emulation_failure().

kvmi_update_ad_flags() uses the existing guest page table walk code to
update the A/D bits and return to guest (when the introspection tool
write-protects the guest page tables).

kvm_page_track_emulation_failure() calls the page tracking code, that
can trigger an event for the introspection tool (which might need the
GVA in addition to the GPA).

Signed-off-by: Mihai Donțu 
Co-developed-by: Nicușor Cîțu 
Signed-off-by: Nicușor Cîțu 
Signed-off-by: Adalbert Lazăr 
---
 arch/x86/include/asm/kvm-x86-ops.h | 1 +
 arch/x86/include/asm/kvm_host.h| 2 ++
 arch/x86/include/asm/vmx.h | 2 ++
 arch/x86/kvm/svm/svm.c | 9 +
 arch/x86/kvm/vmx/vmx.c | 9 +
 5 files changed, 23 insertions(+)

diff --git a/arch/x86/include/asm/kvm-x86-ops.h 
b/arch/x86/include/asm/kvm-x86-ops.h
index 4228b775a48e..ad6c19d9bef5 100644
--- a/arch/x86/include/asm/kvm-x86-ops.h
+++ b/arch/x86/include/asm/kvm-x86-ops.h
@@ -129,6 +129,7 @@ KVM_X86_OP(control_desc_intercept)
 KVM_X86_OP(desc_intercepted)
 KVM_X86_OP(msr_write_intercepted)
 KVM_X86_OP(control_msr_intercept)
+KVM_X86_OP(fault_gla)
 
 #undef KVM_X86_OP
 #undef KVM_X86_OP_NULL
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index db88d38e485d..96058a8a1e5e 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -1509,6 +1509,8 @@ struct kvm_x86_ops {
int (*complete_emulated_msr)(struct kvm_vcpu *vcpu, int err);
 
void (*vcpu_deliver_sipi_vector)(struct kvm_vcpu *vcpu, u8 vector);
+
+   u64 (*fault_gla)(struct kvm_vcpu *vcpu);
 };
 
 struct kvm_x86_nested_ops {
diff --git a/arch/x86/include/asm/vmx.h b/arch/x86/include/asm/vmx.h
index 0ffaa3156a4e..5cef3b96e29a 100644
--- a/arch/x86/include/asm/vmx.h
+++ b/arch/x86/include/asm/vmx.h
@@ -546,6 +546,7 @@ enum vm_entry_failure_code {
 #define EPT_VIOLATION_READABLE_BIT 3
 #define EPT_VIOLATION_WRITABLE_BIT 4
 #define EPT_VIOLATION_EXECUTABLE_BIT   5
+#define EPT_VIOLATION_GLA_VALID_BIT7
 #define EPT_VIOLATION_GVA_TRANSLATED_BIT 8
 #define EPT_VIOLATION_ACC_READ (1 << EPT_VIOLATION_ACC_READ_BIT)
 #define EPT_VIOLATION_ACC_WRITE(1 << 
EPT_VIOLATION_ACC_WRITE_BIT)
@@ -553,6 +554,7 @@ enum vm_entry_failure_code {
 #define EPT_VIOLATION_READABLE (1 << EPT_VIOLATION_READABLE_BIT)
 #define EPT_VIOLATION_WRITABLE (1 << EPT_VIOLATION_WRITABLE_BIT)
 #define EPT_VIOLATION_EXECUTABLE   (1 << EPT_VIOLATION_EXECUTABLE_BIT)
+#define EPT_VIOLATION_GLA_VALID(1 << 
EPT_VIOLATION_GLA_VALID_BIT)
 #define EPT_VIOLATION_GVA_TRANSLATED   (1 << EPT_VIOLATION_GVA_TRANSLATED_BIT)
 
 /*
diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c
index de6cb59a332d..5409438766ee 100644
--- a/arch/x86/kvm/svm/svm.c
+++ b/arch/x86/kvm/svm/svm.c
@@ -4704,6 +4704,13 @@ static int svm_vm_init(struct kvm *kvm)
return 0;
 }
 
+static u64 svm_fault_gla(struct kvm_vcpu *vcpu)
+{
+   const struct vcpu_svm *svm = to_svm(vcpu);
+
+   return svm->vcpu.arch.cr2 ? svm->vcpu.arch.cr2 : ~0ull;
+}
+
 static struct kvm_x86_ops svm_x86_ops __initdata = {
.hardware_unsetup = svm_hardware_teardown,
.hardware_enable = svm_hardware_enable,
@@ -4839,6 +4846,8 @@ static struct kvm_x86_ops svm_x86_ops __initdata = {
.complete_emulated_msr = svm_complete_emulated_msr,
 
.vcpu_deliver_sipi_vector = svm_vcpu_deliver_sipi_vector,
+
+   .fault_gla = svm_fault_gla,
 };
 
 static struct kvm_x86_init_ops svm_init_ops __initdata = {
diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
index ceba2e112e26..f3e880ef22c8 100644
--- a/arch/x86/kvm/vmx/vmx.c
+++ b/arch/x86/kvm/vmx/vmx.c
@@ -7620,6 +7620,13 @@ static bool vmx_check_apicv_inhibit_reasons(ulong bit)
return supported & BIT(bit);
 }
 
+static u64 vmx_fault_gla(struct kvm_vcpu *vcpu)
+{
+   if (vcpu->arch.exit_qualification & EPT_VIOLATION_GLA_VALID)
+   return vmcs_readl(GUEST_LINEAR_ADDRESS);
+   return ~0ull;
+}
+
 static struct kvm_x86_ops vmx_x86_ops __initdata = {
.hardware_unsetup = hardware_unsetup,
 
@@ -7762,6 +7769,8 @@ static struct kvm_x86_ops vmx_x86_ops __initdata = {
.complete_emulated_msr = kvm_complete_insn_gp,
 
.vcpu_deliver_sipi_vector = kvm_vcpu_deliver_sipi_vector,
+
+   .fault_gla = vmx_fault_gla,
 };
 
 static __init void vmx_setup_user_return_msrs(void)
___
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization

[PATCH v12 66/77] KVM: introspection: add KVMI_VCPU_EVENT_DESCRIPTOR

2021-10-06 Thread Adalbert Lazăr
From: Nicușor Cîțu 

This event is sent when IDTR, GDTR, LDTR or TR are accessed.

These could be used to implement a tiny agent which runs in the context
of an introspected guest and uses virtualized exceptions (#VE) and
alternate EPT views (VMFUNC #0) to filter converted VMEXITS. The events
of interested will be suppressed (after some appropriate guest-side
handling) while the rest will be sent to the introspector via a VMCALL.

Signed-off-by: Nicușor Cîțu 
Co-developed-by: Adalbert Lazăr 
Signed-off-by: Adalbert Lazăr 
---
 Documentation/virt/kvm/kvmi.rst   | 43 +++
 arch/x86/include/asm/kvmi_host.h  |  3 +
 arch/x86/include/uapi/asm/kvmi.h  | 13 
 arch/x86/kvm/kvmi.c   | 58 ++
 arch/x86/kvm/kvmi.h   |  1 +
 arch/x86/kvm/kvmi_msg.c   | 19 +
 arch/x86/kvm/svm/svm.c| 35 +
 arch/x86/kvm/vmx/vmx.c| 21 ++
 include/uapi/linux/kvmi.h |  1 +
 .../testing/selftests/kvm/x86_64/kvmi_test.c  | 75 +++
 10 files changed, 269 insertions(+)

diff --git a/Documentation/virt/kvm/kvmi.rst b/Documentation/virt/kvm/kvmi.rst
index d60a69e00e0f..e24a93e93fe8 100644
--- a/Documentation/virt/kvm/kvmi.rst
+++ b/Documentation/virt/kvm/kvmi.rst
@@ -540,6 +540,7 @@ the following events::
 
KVMI_VCPU_EVENT_BREAKPOINT
KVMI_VCPU_EVENT_CR
+   KVMI_VCPU_EVENT_DESCRIPTOR
KVMI_VCPU_EVENT_HYPERCALL
KVMI_VCPU_EVENT_XSETBV
 
@@ -563,6 +564,8 @@ the *KVMI_VM_CONTROL_EVENTS* command.
 * -KVM_EINVAL - the event ID is unknown (use *KVMI_VM_CHECK_EVENT* first)
 * -KVM_EPERM - the access is disallowed (use *KVMI_VM_CHECK_EVENT* first)
 * -KVM_EAGAIN - the selected vCPU can't be introspected yet
+* -KVM_EOPNOTSUPP - the event can't be intercepted in the current setup
+(e.g. KVMI_VCPU_EVENT_DESCRIPTOR with AMD)
 * -KVM_EBUSY - the event can't be intercepted right now
(e.g. KVMI_VCPU_EVENT_BREAKPOINT if the #BP event
 is already intercepted by userspace)
@@ -1198,3 +1201,43 @@ to be changed and the introspection has been enabled for 
this event
 ``kvmi_vcpu_event`` (with the vCPU state), the extended control register
 number (``xcr``), the old value (``old_value``) and the new value
 (``new_value``) are sent to the introspection tool.
+
+8. KVMI_VCPU_EVENT_DESCRIPTOR
+-
+
+:Architectures: x86
+:Versions: >= 1
+:Actions: CONTINUE, RETRY, CRASH
+:Parameters:
+
+::
+
+   struct kvmi_vcpu_event;
+   struct kvmi_vcpu_event_descriptor {
+   __u8 descriptor;
+   __u8 write;
+   __u8 padding[6];
+   };
+
+:Returns:
+
+::
+
+   struct kvmi_vcpu_hdr;
+   struct kvmi_vcpu_event_reply;
+
+This event is sent when a descriptor table register is accessed and the
+introspection has been enabled for this event (see 
**KVMI_VCPU_CONTROL_EVENTS**).
+
+``kvmi_vcpu_event`` (with the vCPU state), the descriptor-table register
+(``descriptor``) and the access type (``write``) are sent to the
+introspection tool.
+
+``descriptor`` can be one of::
+
+   KVMI_DESC_IDTR
+   KVMI_DESC_GDTR
+   KVMI_DESC_LDTR
+   KVMI_DESC_TR
+
+``write`` is 1 if the descriptor was written, 0 otherwise.
diff --git a/arch/x86/include/asm/kvmi_host.h b/arch/x86/include/asm/kvmi_host.h
index d66349208a6b..a24ba87036f7 100644
--- a/arch/x86/include/asm/kvmi_host.h
+++ b/arch/x86/include/asm/kvmi_host.h
@@ -48,6 +48,7 @@ bool kvmi_monitor_cr3w_intercept(struct kvm_vcpu *vcpu, bool 
enable);
 void kvmi_enter_guest(struct kvm_vcpu *vcpu);
 void kvmi_xsetbv_event(struct kvm_vcpu *vcpu, u8 xcr,
   u64 old_value, u64 new_value);
+bool kvmi_descriptor_event(struct kvm_vcpu *vcpu, u8 descriptor, bool write);
 
 #else /* CONFIG_KVM_INTROSPECTION */
 
@@ -63,6 +64,8 @@ static inline bool kvmi_monitor_cr3w_intercept(struct 
kvm_vcpu *vcpu,
 static inline void kvmi_enter_guest(struct kvm_vcpu *vcpu) { }
 static inline void kvmi_xsetbv_event(struct kvm_vcpu *vcpu, u8 xcr,
u64 old_value, u64 new_value) { }
+static inline bool kvmi_descriptor_event(struct kvm_vcpu *vcpu, u8 descriptor,
+bool write) { return true; }
 
 #endif /* CONFIG_KVM_INTROSPECTION */
 
diff --git a/arch/x86/include/uapi/asm/kvmi.h b/arch/x86/include/uapi/asm/kvmi.h
index 7b93450d0d62..9c608ef5daa3 100644
--- a/arch/x86/include/uapi/asm/kvmi.h
+++ b/arch/x86/include/uapi/asm/kvmi.h
@@ -128,4 +128,17 @@ struct kvmi_vcpu_get_mtrr_type_reply {
__u8 padding[7];
 };
 
+enum {
+   KVMI_DESC_IDTR = 1,
+   KVMI_DESC_GDTR = 2,
+   KVMI_DESC_LDTR = 3,
+   KVMI_DESC_TR   = 4,
+};
+
+struct kvmi_vcpu_event_descriptor {
+   __u8 descriptor;
+   __u8 write;
+   __u8 padding[6];
+};
+
 #endif /* _UAPI_ASM_X86_KVM

[PATCH v12 65/77] KVM: introspection: add KVMI_VCPU_GET_MTRR_TYPE

2021-10-06 Thread Adalbert Lazăr
From: Mihai Donțu 

This command returns the memory type for a guest physical address.

Signed-off-by: Mihai Donțu 
Co-developed-by: Nicușor Cîțu 
Signed-off-by: Nicușor Cîțu 
Signed-off-by: Adalbert Lazăr 
---
 Documentation/virt/kvm/kvmi.rst   | 32 +++
 arch/x86/include/uapi/asm/kvmi.h  |  9 ++
 arch/x86/kvm/kvmi_msg.c   | 17 ++
 include/uapi/linux/kvmi.h |  1 +
 .../testing/selftests/kvm/x86_64/kvmi_test.c  | 18 +++
 5 files changed, 77 insertions(+)

diff --git a/Documentation/virt/kvm/kvmi.rst b/Documentation/virt/kvm/kvmi.rst
index eedcae3900c5..d60a69e00e0f 100644
--- a/Documentation/virt/kvm/kvmi.rst
+++ b/Documentation/virt/kvm/kvmi.rst
@@ -868,6 +868,38 @@ Modifies the XSAVE area.
 * -KVM_EINVAL - the padding is not zero
 * -KVM_EAGAIN - the selected vCPU can't be introspected yet
 
+20. KVMI_VCPU_GET_MTRR_TYPE
+---
+
+:Architectures: x86
+:Versions: >= 1
+:Parameters:
+
+::
+
+   struct kvmi_vcpu_hdr;
+   struct kvmi_vcpu_get_mtrr_type {
+   __u64 gpa;
+   };
+
+:Returns:
+
+::
+
+   struct kvmi_error_code;
+   struct kvmi_vcpu_get_mtrr_type_reply {
+   __u8 type;
+   __u8 padding[7];
+   };
+
+Returns the guest memory type for a specific guest physical address (``gpa``).
+
+:Errors:
+
+* -KVM_EINVAL - the selected vCPU is invalid
+* -KVM_EINVAL - the padding is not zero
+* -KVM_EAGAIN - the selected vCPU can't be introspected yet
+
 Events
 ==
 
diff --git a/arch/x86/include/uapi/asm/kvmi.h b/arch/x86/include/uapi/asm/kvmi.h
index 89f3dc9269c1..7b93450d0d62 100644
--- a/arch/x86/include/uapi/asm/kvmi.h
+++ b/arch/x86/include/uapi/asm/kvmi.h
@@ -119,4 +119,13 @@ struct kvmi_vcpu_set_xsave {
struct kvm_xsave xsave;
 };
 
+struct kvmi_vcpu_get_mtrr_type {
+   __u64 gpa;
+};
+
+struct kvmi_vcpu_get_mtrr_type_reply {
+   __u8 type;
+   __u8 padding[7];
+};
+
 #endif /* _UAPI_ASM_X86_KVMI_H */
diff --git a/arch/x86/kvm/kvmi_msg.c b/arch/x86/kvm/kvmi_msg.c
index ecad1882cdd8..c890c2396fbc 100644
--- a/arch/x86/kvm/kvmi_msg.c
+++ b/arch/x86/kvm/kvmi_msg.c
@@ -232,10 +232,27 @@ static int handle_vcpu_set_xsave(const struct 
kvmi_vcpu_msg_job *job,
return kvmi_msg_vcpu_reply(job, msg, ec, NULL, 0);
 }
 
+static int handle_vcpu_get_mtrr_type(const struct kvmi_vcpu_msg_job *job,
+const struct kvmi_msg_hdr *msg,
+const void *_req)
+{
+   const struct kvmi_vcpu_get_mtrr_type *req = _req;
+   struct kvmi_vcpu_get_mtrr_type_reply rpl;
+   gfn_t gfn;
+
+   gfn = gpa_to_gfn(req->gpa);
+
+   memset(, 0, sizeof(rpl));
+   rpl.type = kvm_mtrr_get_guest_memory_type(job->vcpu, gfn);
+
+   return kvmi_msg_vcpu_reply(job, msg, 0, , sizeof(rpl));
+}
+
 static const kvmi_vcpu_msg_job_fct msg_vcpu[] = {
[KVMI_VCPU_CONTROL_CR]   = handle_vcpu_control_cr,
[KVMI_VCPU_GET_CPUID]= handle_vcpu_get_cpuid,
[KVMI_VCPU_GET_INFO] = handle_vcpu_get_info,
+   [KVMI_VCPU_GET_MTRR_TYPE]= handle_vcpu_get_mtrr_type,
[KVMI_VCPU_GET_REGISTERS]= handle_vcpu_get_registers,
[KVMI_VCPU_GET_XCR]  = handle_vcpu_get_xcr,
[KVMI_VCPU_GET_XSAVE]= handle_vcpu_get_xsave,
diff --git a/include/uapi/linux/kvmi.h b/include/uapi/linux/kvmi.h
index 4671e0e3cb45..a48cf2c1f9a7 100644
--- a/include/uapi/linux/kvmi.h
+++ b/include/uapi/linux/kvmi.h
@@ -46,6 +46,7 @@ enum {
KVMI_VCPU_GET_XCR  = KVMI_VCPU_MESSAGE_ID(8),
KVMI_VCPU_GET_XSAVE= KVMI_VCPU_MESSAGE_ID(9),
KVMI_VCPU_SET_XSAVE= KVMI_VCPU_MESSAGE_ID(10),
+   KVMI_VCPU_GET_MTRR_TYPE= KVMI_VCPU_MESSAGE_ID(11),
 
KVMI_NEXT_VCPU_MESSAGE
 };
diff --git a/tools/testing/selftests/kvm/x86_64/kvmi_test.c 
b/tools/testing/selftests/kvm/x86_64/kvmi_test.c
index adac0edddc50..231d574ed592 100644
--- a/tools/testing/selftests/kvm/x86_64/kvmi_test.c
+++ b/tools/testing/selftests/kvm/x86_64/kvmi_test.c
@@ -1477,6 +1477,23 @@ static void test_cmd_vcpu_xsave(struct kvm_vm *vm)
cmd_vcpu_set_xsave(vm, );
 }
 
+static void test_cmd_vcpu_get_mtrr_type(struct kvm_vm *vm)
+{
+   struct {
+   struct kvmi_msg_hdr hdr;
+   struct kvmi_vcpu_hdr vcpu_hdr;
+   struct kvmi_vcpu_get_mtrr_type cmd;
+   } req = {};
+   struct kvmi_vcpu_get_mtrr_type_reply rpl;
+
+   req.cmd.gpa = test_gpa;
+
+   test_vcpu0_command(vm, KVMI_VCPU_GET_MTRR_TYPE,
+  , sizeof(req), , sizeof(rpl), 0);
+
+   pr_debug("mtrr_type: gpa 0x%lx type 0x%x\n", test_gpa, rpl.type);
+}
+
 static void test_introspection(struct kvm_vm *vm)
 {
srandom(time(0));
@@ -1505,6 +1522,7 @@ static void test_introspection(struct kvm_vm *vm)
test_event_xsetbv(vm);
test_

[PATCH v12 57/77] KVM: introspection: add KVMI_VM_CONTROL_CLEANUP

2021-10-06 Thread Adalbert Lazăr
This command will allow more control over the guest state on
unhook.  However, the memory restrictions (e.g. those set with
KVMI_VM_SET_PAGE_ACCESS) will be removed on unhook.

Signed-off-by: Adalbert Lazăr 
---
 Documentation/virt/kvm/kvmi.rst   | 28 +++
 arch/x86/include/asm/kvmi_host.h  |  1 +
 arch/x86/kvm/kvmi.c   | 17 +-
 include/linux/kvmi_host.h |  2 ++
 include/uapi/linux/kvmi.h | 22 +++-
 .../testing/selftests/kvm/x86_64/kvmi_test.c  | 24 +
 virt/kvm/introspection/kvmi.c | 18 +++---
 virt/kvm/introspection/kvmi_int.h | 12 ++-
 virt/kvm/introspection/kvmi_msg.c | 34 ++-
 9 files changed, 129 insertions(+), 29 deletions(-)

diff --git a/Documentation/virt/kvm/kvmi.rst b/Documentation/virt/kvm/kvmi.rst
index 56cf64f32740..89c3320f4bd5 100644
--- a/Documentation/virt/kvm/kvmi.rst
+++ b/Documentation/virt/kvm/kvmi.rst
@@ -673,6 +673,34 @@ Returns a CPUID leaf (as seen by the guest OS).
 * -KVM_EAGAIN - the selected vCPU can't be introspected yet
 * -KVM_ENOENT - the selected leaf is not present or is invalid
 
+14. KVMI_VM_CONTROL_CLEANUP
+---
+:Architectures: all
+:Versions: >= 1
+:Parameters:
+
+::
+
+   struct kvmi_vm_control_cleanup {
+   __u8 enable;
+   __u8 padding[7];
+   };
+
+:Returns:
+
+::
+
+   struct kvmi_error_code
+
+Enables/disables the automatic cleanup of the changes made by
+the introspection tool at the hypervisor level (e.g. CR/MSR/BP
+interceptions). By default it is enabled.
+
+:Errors:
+
+* -KVM_EINVAL - the padding is not zero
+* -KVM_EINVAL - ``enable`` is not 1 or 0
+
 Events
 ==
 
diff --git a/arch/x86/include/asm/kvmi_host.h b/arch/x86/include/asm/kvmi_host.h
index e008662f91a5..161d1ae5a7cf 100644
--- a/arch/x86/include/asm/kvmi_host.h
+++ b/arch/x86/include/asm/kvmi_host.h
@@ -11,6 +11,7 @@ struct kvmi_monitor_interception {
 };
 
 struct kvmi_interception {
+   bool cleanup;
bool restore_interception;
struct kvmi_monitor_interception breakpoint;
 };
diff --git a/arch/x86/kvm/kvmi.c b/arch/x86/kvm/kvmi.c
index 2bbeadb9daba..3fd7d691eeda 100644
--- a/arch/x86/kvm/kvmi.c
+++ b/arch/x86/kvm/kvmi.c
@@ -273,13 +273,11 @@ bool kvmi_arch_clean_up_interception(struct kvm_vcpu 
*vcpu)
 {
struct kvmi_interception *arch_vcpui = vcpu->arch.kvmi;
 
-   if (!arch_vcpui)
-   return false;
-
-   if (!arch_vcpui->restore_interception)
+   if (!arch_vcpui || !arch_vcpui->cleanup)
return false;
 
-   kvmi_arch_restore_interception(vcpu);
+   if (arch_vcpui->restore_interception)
+   kvmi_arch_restore_interception(vcpu);
 
return true;
 }
@@ -312,10 +310,13 @@ bool kvmi_arch_vcpu_introspected(struct kvm_vcpu *vcpu)
return !!READ_ONCE(vcpu->arch.kvmi);
 }
 
-void kvmi_arch_request_interception_cleanup(struct kvm_vcpu *vcpu)
+void kvmi_arch_request_interception_cleanup(struct kvm_vcpu *vcpu,
+   bool restore_interception)
 {
struct kvmi_interception *arch_vcpui = READ_ONCE(vcpu->arch.kvmi);
 
-   if (arch_vcpui)
-   arch_vcpui->restore_interception = true;
+   if (arch_vcpui) {
+   arch_vcpui->restore_interception = restore_interception;
+   arch_vcpui->cleanup = true;
+   }
 }
diff --git a/include/linux/kvmi_host.h b/include/linux/kvmi_host.h
index 30b7269468dd..7a7360306812 100644
--- a/include/linux/kvmi_host.h
+++ b/include/linux/kvmi_host.h
@@ -50,6 +50,8 @@ struct kvm_introspection {
unsigned long *vm_event_enable_mask;
 
atomic_t ev_seq;
+
+   bool restore_on_unhook;
 };
 
 int kvmi_version(void);
diff --git a/include/uapi/linux/kvmi.h b/include/uapi/linux/kvmi.h
index ea66f3f803e7..9e28961a8387 100644
--- a/include/uapi/linux/kvmi.h
+++ b/include/uapi/linux/kvmi.h
@@ -20,14 +20,15 @@ enum {
 enum {
KVMI_VM_EVENT = KVMI_VM_MESSAGE_ID(0),
 
-   KVMI_GET_VERSION   = KVMI_VM_MESSAGE_ID(1),
-   KVMI_VM_CHECK_COMMAND  = KVMI_VM_MESSAGE_ID(2),
-   KVMI_VM_CHECK_EVENT= KVMI_VM_MESSAGE_ID(3),
-   KVMI_VM_GET_INFO   = KVMI_VM_MESSAGE_ID(4),
-   KVMI_VM_CONTROL_EVENTS = KVMI_VM_MESSAGE_ID(5),
-   KVMI_VM_READ_PHYSICAL  = KVMI_VM_MESSAGE_ID(6),
-   KVMI_VM_WRITE_PHYSICAL = KVMI_VM_MESSAGE_ID(7),
-   KVMI_VM_PAUSE_VCPU = KVMI_VM_MESSAGE_ID(8),
+   KVMI_GET_VERSION= KVMI_VM_MESSAGE_ID(1),
+   KVMI_VM_CHECK_COMMAND   = KVMI_VM_MESSAGE_ID(2),
+   KVMI_VM_CHECK_EVENT = KVMI_VM_MESSAGE_ID(3),
+   KVMI_VM_GET_INFO= KVMI_VM_MESSAGE_ID(4),
+   KVMI_VM_CONTROL_EVENTS  = KVMI_VM_MESSAGE_ID(5),
+   KVMI_VM_READ_PHYSICAL   = KVMI_VM_MESSAGE_ID(6),
+   KVMI_VM_WRITE_PHYSICAL  = KVMI_VM_MESSAGE_ID(

[PATCH v12 69/77] KVM: introspection: restore the state of MSR interception on unhook

2021-10-06 Thread Adalbert Lazăr
From: Nicușor Cîțu 

This commit also ensures that the introspection tool and the userspace
do not disable each other the MSR access VM-exit.

Signed-off-by: Nicușor Cîțu 
Signed-off-by: Adalbert Lazăr 
---
 arch/x86/include/asm/kvmi_host.h |  12 +++
 arch/x86/kvm/kvmi.c  | 124 +++
 arch/x86/kvm/svm/svm.c   |  10 +++
 arch/x86/kvm/vmx/vmx.c   |  11 +++
 4 files changed, 142 insertions(+), 15 deletions(-)

diff --git a/arch/x86/include/asm/kvmi_host.h b/arch/x86/include/asm/kvmi_host.h
index 5a4fc5b80907..8822f0310156 100644
--- a/arch/x86/include/asm/kvmi_host.h
+++ b/arch/x86/include/asm/kvmi_host.h
@@ -26,6 +26,12 @@ struct kvmi_interception {
DECLARE_BITMAP(low, KVMI_NUM_MSR);
DECLARE_BITMAP(high, KVMI_NUM_MSR);
} kvmi_mask;
+   struct {
+   DECLARE_BITMAP(low, KVMI_NUM_MSR);
+   DECLARE_BITMAP(high, KVMI_NUM_MSR);
+   } kvm_mask;
+   bool (*monitor_fct)(struct kvm_vcpu *vcpu, u32 msr,
+   bool enable);
} msrw;
 };
 
@@ -61,6 +67,8 @@ void kvmi_xsetbv_event(struct kvm_vcpu *vcpu, u8 xcr,
 bool kvmi_monitor_desc_intercept(struct kvm_vcpu *vcpu, bool enable);
 bool kvmi_descriptor_event(struct kvm_vcpu *vcpu, u8 descriptor, bool write);
 bool kvmi_msr_event(struct kvm_vcpu *vcpu, struct msr_data *msr);
+bool kvmi_monitor_msrw_intercept(struct kvm_vcpu *vcpu, u32 msr, bool enable);
+bool kvmi_msrw_intercept_originator(struct kvm_vcpu *vcpu);
 
 #else /* CONFIG_KVM_INTROSPECTION */
 
@@ -82,6 +90,10 @@ static inline bool kvmi_descriptor_event(struct kvm_vcpu 
*vcpu, u8 descriptor,
 bool write) { return true; }
 static inline bool kvmi_msr_event(struct kvm_vcpu *vcpu, struct msr_data *msr)
{ return true; }
+static inline bool kvmi_monitor_msrw_intercept(struct kvm_vcpu *vcpu, u32 msr,
+  bool enable) { return false; }
+static inline bool kvmi_msrw_intercept_originator(struct kvm_vcpu *vcpu)
+   { return false; }
 
 #endif /* CONFIG_KVM_INTROSPECTION */
 
diff --git a/arch/x86/kvm/kvmi.c b/arch/x86/kvm/kvmi.c
index c84a140db451..4e25ffc3d131 100644
--- a/arch/x86/kvm/kvmi.c
+++ b/arch/x86/kvm/kvmi.c
@@ -345,22 +345,25 @@ static void kvmi_arch_disable_desc_intercept(struct 
kvm_vcpu *vcpu)
vcpu->arch.kvmi->descriptor.kvm_intercepted = false;
 }
 
-static unsigned long *msr_mask(struct kvm_vcpu *vcpu, unsigned int *msr)
+static unsigned long *msr_mask(struct kvm_vcpu *vcpu, unsigned int *msr,
+  bool kvmi)
 {
switch (*msr) {
case 0 ... 0x1fff:
-   return vcpu->arch.kvmi->msrw.kvmi_mask.low;
+   return kvmi ? vcpu->arch.kvmi->msrw.kvmi_mask.low :
+ vcpu->arch.kvmi->msrw.kvm_mask.low;
case 0xc000 ... 0xc0001fff:
*msr &= 0x1fff;
-   return vcpu->arch.kvmi->msrw.kvmi_mask.high;
+   return kvmi ? vcpu->arch.kvmi->msrw.kvmi_mask.high :
+ vcpu->arch.kvmi->msrw.kvm_mask.high;
}
 
return NULL;
 }
 
-static bool test_msr_mask(struct kvm_vcpu *vcpu, unsigned int msr)
+static bool test_msr_mask(struct kvm_vcpu *vcpu, unsigned int msr, bool kvmi)
 {
-   unsigned long *mask = msr_mask(vcpu, );
+   unsigned long *mask = msr_mask(vcpu, , kvmi);
 
if (!mask)
return false;
@@ -368,9 +371,27 @@ static bool test_msr_mask(struct kvm_vcpu *vcpu, unsigned 
int msr)
return !!test_bit(msr, mask);
 }
 
-static bool msr_control(struct kvm_vcpu *vcpu, unsigned int msr, bool enable)
+/*
+ * Returns true if one side (kvm or kvmi) tries to disable the MSR write
+ * interception while the other side is still tracking it.
+ */
+bool kvmi_monitor_msrw_intercept(struct kvm_vcpu *vcpu, u32 msr, bool enable)
+{
+   struct kvmi_interception *arch_vcpui;
+
+   if (!vcpu)
+   return false;
+
+   arch_vcpui = READ_ONCE(vcpu->arch.kvmi);
+
+   return (arch_vcpui && arch_vcpui->msrw.monitor_fct(vcpu, msr, enable));
+}
+EXPORT_SYMBOL(kvmi_monitor_msrw_intercept);
+
+static bool msr_control(struct kvm_vcpu *vcpu, unsigned int msr, bool enable,
+   bool kvmi)
 {
-   unsigned long *mask = msr_mask(vcpu, );
+   unsigned long *mask = msr_mask(vcpu, , kvmi);
 
if (!mask)
return false;
@@ -383,6 +404,63 @@ static bool msr_control(struct kvm_vcpu *vcpu, unsigned 
int msr, bool enable)
return true;
 }
 
+static bool msr_intercepted_by_kvmi(struct kvm_vcpu *vcpu, u32 msr)
+{
+   return test_msr_mask(vcpu, msr, true);
+}
+
+static bool msr_intercepted_by_kvm(struct kvm_vcpu *vcpu, u32 msr)
+{
+

[PATCH v12 59/77] KVM: introspection: restore the state of CR3 interception on unhook

2021-10-06 Thread Adalbert Lazăr
From: Nicușor Cîțu 

This commit also ensures that the introspection tool and the userspace
do not disable each other the CR3-write VM-exit.

Signed-off-by: Nicușor Cîțu 
Signed-off-by: Adalbert Lazăr 
---
 arch/x86/include/asm/kvmi_host.h |  4 ++
 arch/x86/kvm/kvmi.c  | 68 +---
 arch/x86/kvm/kvmi.h  |  4 +-
 arch/x86/kvm/kvmi_msg.c  |  4 +-
 arch/x86/kvm/svm/svm.c   |  5 +++
 arch/x86/kvm/vmx/vmx.c   |  5 +++
 6 files changed, 81 insertions(+), 9 deletions(-)

diff --git a/arch/x86/include/asm/kvmi_host.h b/arch/x86/include/asm/kvmi_host.h
index 7613088d0ae2..edbedf031467 100644
--- a/arch/x86/include/asm/kvmi_host.h
+++ b/arch/x86/include/asm/kvmi_host.h
@@ -16,6 +16,7 @@ struct kvmi_interception {
bool cleanup;
bool restore_interception;
struct kvmi_monitor_interception breakpoint;
+   struct kvmi_monitor_interception cr3w;
 };
 
 struct kvm_vcpu_arch_introspection {
@@ -34,6 +35,7 @@ bool kvmi_monitor_bp_intercept(struct kvm_vcpu *vcpu, u32 
dbg);
 bool kvmi_cr_event(struct kvm_vcpu *vcpu, unsigned int cr,
   unsigned long old_value, unsigned long *new_value);
 bool kvmi_cr3_intercepted(struct kvm_vcpu *vcpu);
+bool kvmi_monitor_cr3w_intercept(struct kvm_vcpu *vcpu, bool enable);
 
 #else /* CONFIG_KVM_INTROSPECTION */
 
@@ -44,6 +46,8 @@ static inline bool kvmi_cr_event(struct kvm_vcpu *vcpu, 
unsigned int cr,
 unsigned long *new_value)
{ return true; }
 static inline bool kvmi_cr3_intercepted(struct kvm_vcpu *vcpu) { return false; 
}
+static inline bool kvmi_monitor_cr3w_intercept(struct kvm_vcpu *vcpu,
+   bool enable) { return false; }
 
 #endif /* CONFIG_KVM_INTROSPECTION */
 
diff --git a/arch/x86/kvm/kvmi.c b/arch/x86/kvm/kvmi.c
index 31a11c7120c5..acd655ab770d 100644
--- a/arch/x86/kvm/kvmi.c
+++ b/arch/x86/kvm/kvmi.c
@@ -230,6 +230,59 @@ static void kvmi_arch_disable_bp_intercept(struct kvm_vcpu 
*vcpu)
vcpu->arch.kvmi->breakpoint.kvm_intercepted = false;
 }
 
+static bool monitor_cr3w_fct_kvmi(struct kvm_vcpu *vcpu, bool enable)
+{
+   vcpu->arch.kvmi->cr3w.kvmi_intercepted = enable;
+
+   if (enable)
+   vcpu->arch.kvmi->cr3w.kvm_intercepted =
+   static_call(kvm_x86_cr3_write_intercepted)(vcpu);
+   else if (vcpu->arch.kvmi->cr3w.kvm_intercepted)
+   return true;
+
+   return false;
+}
+
+static bool monitor_cr3w_fct_kvm(struct kvm_vcpu *vcpu, bool enable)
+{
+   if (!vcpu->arch.kvmi->cr3w.kvmi_intercepted)
+   return false;
+
+   vcpu->arch.kvmi->cr3w.kvm_intercepted = enable;
+
+   if (!enable)
+   return true;
+
+   return false;
+}
+
+/*
+ * Returns true if one side (kvm or kvmi) tries to disable the CR3 write
+ * interception while the other side is still tracking it.
+ */
+bool kvmi_monitor_cr3w_intercept(struct kvm_vcpu *vcpu, bool enable)
+{
+   struct kvmi_interception *arch_vcpui = READ_ONCE(vcpu->arch.kvmi);
+
+   return (arch_vcpui && arch_vcpui->cr3w.monitor_fct(vcpu, enable));
+}
+EXPORT_SYMBOL(kvmi_monitor_cr3w_intercept);
+
+static void kvmi_control_cr3w_intercept(struct kvm_vcpu *vcpu, bool enable)
+{
+   vcpu->arch.kvmi->cr3w.monitor_fct = monitor_cr3w_fct_kvmi;
+   static_call(kvm_x86_control_cr3_intercept)(vcpu, CR_TYPE_W, enable);
+   vcpu->arch.kvmi->cr3w.monitor_fct = monitor_cr3w_fct_kvm;
+}
+
+static void kvmi_arch_disable_cr3w_intercept(struct kvm_vcpu *vcpu)
+{
+   kvmi_control_cr3w_intercept(vcpu, false);
+
+   vcpu->arch.kvmi->cr3w.kvmi_intercepted = false;
+   vcpu->arch.kvmi->cr3w.kvm_intercepted = false;
+}
+
 int kvmi_arch_cmd_control_intercept(struct kvm_vcpu *vcpu,
unsigned int event_id, bool enable)
 {
@@ -269,6 +322,7 @@ void kvmi_arch_breakpoint_event(struct kvm_vcpu *vcpu, u64 
gva, u8 insn_len)
 static void kvmi_arch_restore_interception(struct kvm_vcpu *vcpu)
 {
kvmi_arch_disable_bp_intercept(vcpu);
+   kvmi_arch_disable_cr3w_intercept(vcpu);
 }
 
 bool kvmi_arch_clean_up_interception(struct kvm_vcpu *vcpu)
@@ -293,8 +347,13 @@ bool kvmi_arch_vcpu_alloc_interception(struct kvm_vcpu 
*vcpu)
return false;
 
arch_vcpui->breakpoint.monitor_fct = monitor_bp_fct_kvm;
+   arch_vcpui->cr3w.monitor_fct = monitor_cr3w_fct_kvm;
 
-   /* pair with kvmi_monitor_bp_intercept() */
+   /*
+* paired with:
+*  - kvmi_monitor_bp_intercept()
+*  - kvmi_monitor_cr3w_intercept()
+*/
smp_wmb();
WRITE_ONCE(vcpu->arch.kvmi, arch_vcpui);
 
@@ -326,8 +385,7 @@ void kvmi_arch_request_interception_cleanup(struct kvm_vcpu 
*vcpu,
 int kvmi_arch_cmd_vcpu_control_cr(struct kvm_vcpu *vcpu, int cr, bool enable)
 {
 

[PATCH v12 41/77] KVM: introspection: add a jobs list to every introspected vCPU

2021-10-06 Thread Adalbert Lazăr
Every vCPU has a lock-protected list in which the receiving thread
places the jobs that has to be done by the vCPU thread
once it is kicked out of guest (KVM_REQ_INTROSPECTION).

Co-developed-by: Nicușor Cîțu 
Signed-off-by: Nicușor Cîțu 
Signed-off-by: Adalbert Lazăr 
---
 include/linux/kvmi_host.h | 10 +
 virt/kvm/introspection/kvmi.c | 72 ++-
 virt/kvm/introspection/kvmi_int.h |  1 +
 3 files changed, 81 insertions(+), 2 deletions(-)

diff --git a/include/linux/kvmi_host.h b/include/linux/kvmi_host.h
index 9b0008c66321..b3874419511d 100644
--- a/include/linux/kvmi_host.h
+++ b/include/linux/kvmi_host.h
@@ -6,8 +6,18 @@
 
 #include 
 
+struct kvmi_job {
+   struct list_head link;
+   void *ctx;
+   void (*fct)(struct kvm_vcpu *vcpu, void *ctx);
+   void (*free_fct)(void *ctx);
+};
+
 struct kvm_vcpu_introspection {
struct kvm_vcpu_arch_introspection arch;
+
+   struct list_head job_list;
+   spinlock_t job_lock;
 };
 
 struct kvm_introspection {
diff --git a/virt/kvm/introspection/kvmi.c b/virt/kvm/introspection/kvmi.c
index 492a29987965..e8d2d280fb43 100644
--- a/virt/kvm/introspection/kvmi.c
+++ b/virt/kvm/introspection/kvmi.c
@@ -23,6 +23,7 @@ static DECLARE_BITMAP(Kvmi_known_vm_events, KVMI_NUM_EVENTS);
 static DECLARE_BITMAP(Kvmi_known_vcpu_events, KVMI_NUM_EVENTS);
 
 static struct kmem_cache *msg_cache;
+static struct kmem_cache *job_cache;
 
 void *kvmi_msg_alloc(void)
 {
@@ -39,14 +40,19 @@ static void kvmi_cache_destroy(void)
 {
kmem_cache_destroy(msg_cache);
msg_cache = NULL;
+   kmem_cache_destroy(job_cache);
+   job_cache = NULL;
 }
 
 static int kvmi_cache_create(void)
 {
msg_cache = kmem_cache_create("kvmi_msg", KVMI_MSG_SIZE_ALLOC,
  4096, SLAB_ACCOUNT, NULL);
+   job_cache = kmem_cache_create("kvmi_job",
+ sizeof(struct kvmi_job),
+ 0, SLAB_ACCOUNT, NULL);
 
-   if (!msg_cache) {
+   if (!msg_cache || !job_cache) {
kvmi_cache_destroy();
 
return -1;
@@ -118,6 +124,48 @@ void kvmi_uninit(void)
kvmi_cache_destroy();
 }
 
+static int __kvmi_add_job(struct kvm_vcpu *vcpu,
+ void (*fct)(struct kvm_vcpu *vcpu, void *ctx),
+ void *ctx, void (*free_fct)(void *ctx))
+{
+   struct kvm_vcpu_introspection *vcpui = VCPUI(vcpu);
+   struct kvmi_job *job;
+
+   job = kmem_cache_zalloc(job_cache, GFP_KERNEL);
+   if (unlikely(!job))
+   return -ENOMEM;
+
+   INIT_LIST_HEAD(>link);
+   job->fct = fct;
+   job->ctx = ctx;
+   job->free_fct = free_fct;
+
+   spin_lock(>job_lock);
+   list_add_tail(>link, >job_list);
+   spin_unlock(>job_lock);
+
+   return 0;
+}
+
+int kvmi_add_job(struct kvm_vcpu *vcpu,
+void (*fct)(struct kvm_vcpu *vcpu, void *ctx),
+void *ctx, void (*free_fct)(void *ctx))
+{
+   int err;
+
+   err = __kvmi_add_job(vcpu, fct, ctx, free_fct);
+
+   return err;
+}
+
+static void kvmi_free_job(struct kvmi_job *job)
+{
+   if (job->free_fct)
+   job->free_fct(job->ctx);
+
+   kmem_cache_free(job_cache, job);
+}
+
 static bool kvmi_alloc_vcpui(struct kvm_vcpu *vcpu)
 {
struct kvm_vcpu_introspection *vcpui;
@@ -126,6 +174,9 @@ static bool kvmi_alloc_vcpui(struct kvm_vcpu *vcpu)
if (!vcpui)
return false;
 
+   INIT_LIST_HEAD(>job_list);
+   spin_lock_init(>job_lock);
+
vcpu->kvmi = vcpui;
 
return true;
@@ -139,9 +190,26 @@ static int kvmi_create_vcpui(struct kvm_vcpu *vcpu)
return 0;
 }
 
+static void kvmi_free_vcpu_jobs(struct kvm_vcpu_introspection *vcpui)
+{
+   struct kvmi_job *cur, *next;
+
+   list_for_each_entry_safe(cur, next, >job_list, link) {
+   list_del(>link);
+   kvmi_free_job(cur);
+   }
+}
+
 static void kvmi_free_vcpui(struct kvm_vcpu *vcpu)
 {
-   kfree(vcpu->kvmi);
+   struct kvm_vcpu_introspection *vcpui = VCPUI(vcpu);
+
+   if (!vcpui)
+   return;
+
+   kvmi_free_vcpu_jobs(vcpui);
+
+   kfree(vcpui);
vcpu->kvmi = NULL;
 }
 
diff --git a/virt/kvm/introspection/kvmi_int.h 
b/virt/kvm/introspection/kvmi_int.h
index b7c8730e7e6d..c3aa12554c2b 100644
--- a/virt/kvm/introspection/kvmi_int.h
+++ b/virt/kvm/introspection/kvmi_int.h
@@ -7,6 +7,7 @@
 #include 
 
 #define KVMI(kvm) ((kvm)->kvmi)
+#define VCPUI(vcpu) ((vcpu)->kvmi)
 /*
  * This limit is used to accommodate the largest known fixed-length
  * message.
___
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization

[PATCH v12 48/77] KVM: introspection: add the crash action handling on the event reply

2021-10-06 Thread Adalbert Lazăr
From: Mihai Donțu 

This action is used in extreme cases such as blocking the spread of
malware as fast as possible.

Signed-off-by: Mihai Donțu 
Signed-off-by: Adalbert Lazăr 
---
 virt/kvm/introspection/kvmi.c | 4 
 1 file changed, 4 insertions(+)

diff --git a/virt/kvm/introspection/kvmi.c b/virt/kvm/introspection/kvmi.c
index 39c43b8a127d..93fb20c43b9c 100644
--- a/virt/kvm/introspection/kvmi.c
+++ b/virt/kvm/introspection/kvmi.c
@@ -752,6 +752,10 @@ void kvmi_handle_common_event_actions(struct kvm_vcpu 
*vcpu, u32 action)
struct kvm *kvm = vcpu->kvm;
 
switch (action) {
+   case KVMI_EVENT_ACTION_CRASH:
+   vcpu->run->exit_reason = KVM_EXIT_SHUTDOWN;
+   break;
+
default:
kvmi_handle_unsupported_event_action(kvm);
}
___
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization

[PATCH v12 71/77] KVM: introspection: add KVMI_VCPU_EVENT_PF

2021-10-06 Thread Adalbert Lazăr
From: Mihai Donțu 

This event is sent when a #PF occurs due to a failed permission check
in the shadow page tables, for a page in which the introspection tool
has shown interest.

Signed-off-by: Mihai Donțu 
Co-developed-by: Adalbert Lazăr 
Signed-off-by: Adalbert Lazăr 
---
 Documentation/virt/kvm/kvmi.rst   |  66 ++
 arch/x86/include/asm/kvmi_host.h  |   1 +
 arch/x86/kvm/kvmi.c   | 122 ++
 include/uapi/linux/kvmi.h |  10 ++
 .../testing/selftests/kvm/x86_64/kvmi_test.c  |  76 +++
 virt/kvm/introspection/kvmi.c | 116 +
 virt/kvm/introspection/kvmi_int.h |   7 +
 virt/kvm/introspection/kvmi_msg.c |  19 +++
 8 files changed, 417 insertions(+)

diff --git a/Documentation/virt/kvm/kvmi.rst b/Documentation/virt/kvm/kvmi.rst
index 02b9f0a240c0..b12affb0d64f 100644
--- a/Documentation/virt/kvm/kvmi.rst
+++ b/Documentation/virt/kvm/kvmi.rst
@@ -543,6 +543,7 @@ the following events::
KVMI_VCPU_EVENT_DESCRIPTOR
KVMI_VCPU_EVENT_HYPERCALL
KVMI_VCPU_EVENT_MSR
+   KVMI_VCPU_EVENT_PF
KVMI_VCPU_EVENT_XSETBV
 
 When an event is enabled, the introspection tool is notified and
@@ -1379,3 +1380,68 @@ register (see **KVMI_VCPU_CONTROL_EVENTS**).
 ``kvmi_vcpu_event`` (with the vCPU state), the MSR number (``msr``),
 the old value (``old_value``) and the new value (``new_value``) are sent
 to the introspection tool. The *CONTINUE* action will set the ``new_val``.
+
+10. KVMI_VCPU_EVENT_PF
+--
+
+:Architectures: x86
+:Versions: >= 1
+:Actions: CONTINUE, CRASH, RETRY
+:Parameters:
+
+::
+
+   struct kvmi_vcpu_event;
+   struct kvmi_vcpu_event_pf {
+   __u64 gva;
+   __u64 gpa;
+   __u8 access;
+   __u8 padding1;
+   __u16 padding2;
+   __u32 padding3;
+   };
+
+:Returns:
+
+::
+
+   struct kvmi_vcpu_hdr;
+   struct kvmi_vcpu_event_reply;
+
+This event is sent when a hypervisor page fault occurs due to a failed
+permission checks, the introspection has been enabled for this event
+(see *KVMI_VCPU_CONTROL_EVENTS*) and the event was generated for a
+page in which the introspection tool has shown interest (ie. has
+previously touched it by adjusting the spte permissions; see
+*KVMI_VM_SET_PAGE_ACCESS*).
+
+These permissions can be used by the introspection tool to guarantee
+the purpose of code areas inside the guest (code, rodata, stack, heap
+etc.) Each attempt at an operation unfitting for a certain memory
+range (eg. execute code in heap) triggers a page fault and gives the
+introspection tool the chance to audit the code attempting the operation.
+
+``kvmi_vcpu_event`` (with the vCPU state), guest virtual address (``gva``)
+if available or ~0 (UNMAPPED_GVA), guest physical address (``gpa``)
+and the ``access`` flags (e.g. KVMI_PAGE_ACCESS_R) are sent to the
+introspection tool.
+
+In case of a restricted read access, the guest address is the location
+of the memory being read. On write access, the guest address is the
+location of the memory being written. On execute access, the guest
+address is the location of the instruction being executed
+(``gva == kvmi_vcpu_event.arch.regs.rip``).
+
+In the current implementation, most of these events are sent during
+emulation. If the page fault has set more than one access bit
+(e.g. r-x/-rw), the introspection tool may receive more than one
+KVMI_VCPU_EVENT_PF and the order depends on the KVM emulator. Another
+cause of multiple events is when the page fault is triggered on access
+crossing the page boundary.
+
+The *CONTINUE* action will continue the page fault handling (e.g. via
+emulation).
+
+The *RETRY* action is used by the introspection tool to retry the
+execution of the current instruction, usually because it changed the
+instruction pointer or the page restrictions.
diff --git a/arch/x86/include/asm/kvmi_host.h b/arch/x86/include/asm/kvmi_host.h
index 3e749208b8a1..045bc885f007 100644
--- a/arch/x86/include/asm/kvmi_host.h
+++ b/arch/x86/include/asm/kvmi_host.h
@@ -53,6 +53,7 @@ struct kvm_vcpu_arch_introspection {
 };
 
 struct kvm_arch_introspection {
+   struct kvm_page_track_notifier_node kptn_node;
 };
 
 #define KVMI_MEM_SLOTS_NUM SHRT_MAX
diff --git a/arch/x86/kvm/kvmi.c b/arch/x86/kvm/kvmi.c
index 73eae96d2167..6432c40817d2 100644
--- a/arch/x86/kvm/kvmi.c
+++ b/arch/x86/kvm/kvmi.c
@@ -19,10 +19,26 @@ void kvmi_arch_init_vcpu_events_mask(unsigned long 
*supported)
set_bit(KVMI_VCPU_EVENT_HYPERCALL, supported);
set_bit(KVMI_VCPU_EVENT_DESCRIPTOR, supported);
set_bit(KVMI_VCPU_EVENT_MSR, supported);
+   set_bit(KVMI_VCPU_EVENT_PF, supported);
set_bit(KVMI_VCPU_EVENT_TRAP, supported);
set_bit(KVMI_VCPU_EVENT_XSETBV, supported);
 }
 
+static bool kvmi_track_preread(struct kvm_vcpu *vcpu, gpa_t gpa, gva_t 

[PATCH v12 52/77] KVM: introspection: add KVMI_VCPU_GET_CPUID

2021-10-06 Thread Adalbert Lazăr
From: Marian Rotariu 

This command returns a CPUID leaf (as seen by the guest OS).

Signed-off-by: Marian Rotariu 
Co-developed-by: Adalbert Lazăr 
Signed-off-by: Adalbert Lazăr 
---
 Documentation/virt/kvm/kvmi.rst   | 36 +++
 arch/x86/include/uapi/asm/kvmi.h  | 12 +++
 arch/x86/kvm/kvmi_msg.c   | 26 ++
 include/uapi/linux/kvmi.h |  1 +
 .../testing/selftests/kvm/x86_64/kvmi_test.c  | 30 
 5 files changed, 105 insertions(+)

diff --git a/Documentation/virt/kvm/kvmi.rst b/Documentation/virt/kvm/kvmi.rst
index beea20a306ef..8b9938032650 100644
--- a/Documentation/virt/kvm/kvmi.rst
+++ b/Documentation/virt/kvm/kvmi.rst
@@ -630,6 +630,42 @@ currently being handled is replied to.
 * -KVM_EAGAIN - the selected vCPU can't be introspected yet
 * -KVM_EOPNOTSUPP - the command hasn't been received during an introspection 
event
 
+13. KVMI_VCPU_GET_CPUID
+---
+
+:Architectures: x86
+:Versions: >= 1
+:Parameters:
+
+::
+
+   struct kvmi_vcpu_hdr;
+   struct kvmi_vcpu_get_cpuid {
+   __u32 function;
+   __u32 index;
+   };
+
+:Returns:
+
+::
+
+   struct kvmi_error_code;
+   struct kvmi_vcpu_get_cpuid_reply {
+   __u32 eax;
+   __u32 ebx;
+   __u32 ecx;
+   __u32 edx;
+   };
+
+Returns a CPUID leaf (as seen by the guest OS).
+
+:Errors:
+
+* -KVM_EINVAL - the selected vCPU is invalid
+* -KVM_EINVAL - the padding is not zero
+* -KVM_EAGAIN - the selected vCPU can't be introspected yet
+* -KVM_ENOENT - the selected leaf is not present or is invalid
+
 Events
 ==
 
diff --git a/arch/x86/include/uapi/asm/kvmi.h b/arch/x86/include/uapi/asm/kvmi.h
index 11835bf9bdc6..3631da9eef8c 100644
--- a/arch/x86/include/uapi/asm/kvmi.h
+++ b/arch/x86/include/uapi/asm/kvmi.h
@@ -45,4 +45,16 @@ struct kvmi_vcpu_get_registers_reply {
struct kvm_msrs msrs;
 };
 
+struct kvmi_vcpu_get_cpuid {
+   __u32 function;
+   __u32 index;
+};
+
+struct kvmi_vcpu_get_cpuid_reply {
+   __u32 eax;
+   __u32 ebx;
+   __u32 ecx;
+   __u32 edx;
+};
+
 #endif /* _UAPI_ASM_X86_KVMI_H */
diff --git a/arch/x86/kvm/kvmi_msg.c b/arch/x86/kvm/kvmi_msg.c
index b4113749bcd0..d48abfe5133b 100644
--- a/arch/x86/kvm/kvmi_msg.c
+++ b/arch/x86/kvm/kvmi_msg.c
@@ -6,6 +6,7 @@
  *
  */
 
+#include "cpuid.h"
 #include "../../../virt/kvm/introspection/kvmi_int.h"
 #include "kvmi.h"
 
@@ -110,7 +111,32 @@ static int handle_vcpu_set_registers(const struct 
kvmi_vcpu_msg_job *job,
return kvmi_msg_vcpu_reply(job, msg, ec, NULL, 0);
 }
 
+static int handle_vcpu_get_cpuid(const struct kvmi_vcpu_msg_job *job,
+const struct kvmi_msg_hdr *msg,
+const void *_req)
+{
+   const struct kvmi_vcpu_get_cpuid *req = _req;
+   struct kvmi_vcpu_get_cpuid_reply rpl;
+   struct kvm_cpuid_entry2 *entry;
+   int ec = 0;
+
+   entry = kvm_find_cpuid_entry(job->vcpu, req->function, req->index);
+   if (!entry) {
+   ec = -KVM_ENOENT;
+   } else {
+   memset(, 0, sizeof(rpl));
+
+   rpl.eax = entry->eax;
+   rpl.ebx = entry->ebx;
+   rpl.ecx = entry->ecx;
+   rpl.edx = entry->edx;
+   }
+
+   return kvmi_msg_vcpu_reply(job, msg, ec, , sizeof(rpl));
+}
+
 static const kvmi_vcpu_msg_job_fct msg_vcpu[] = {
+   [KVMI_VCPU_GET_CPUID] = handle_vcpu_get_cpuid,
[KVMI_VCPU_GET_INFO]  = handle_vcpu_get_info,
[KVMI_VCPU_GET_REGISTERS] = handle_vcpu_get_registers,
[KVMI_VCPU_SET_REGISTERS] = handle_vcpu_set_registers,
diff --git a/include/uapi/linux/kvmi.h b/include/uapi/linux/kvmi.h
index 4b756d388ad3..2c93a36bfa43 100644
--- a/include/uapi/linux/kvmi.h
+++ b/include/uapi/linux/kvmi.h
@@ -39,6 +39,7 @@ enum {
KVMI_VCPU_CONTROL_EVENTS = KVMI_VCPU_MESSAGE_ID(2),
KVMI_VCPU_GET_REGISTERS  = KVMI_VCPU_MESSAGE_ID(3),
KVMI_VCPU_SET_REGISTERS  = KVMI_VCPU_MESSAGE_ID(4),
+   KVMI_VCPU_GET_CPUID  = KVMI_VCPU_MESSAGE_ID(5),
 
KVMI_NEXT_VCPU_MESSAGE
 };
diff --git a/tools/testing/selftests/kvm/x86_64/kvmi_test.c 
b/tools/testing/selftests/kvm/x86_64/kvmi_test.c
index 4e93b70dd7d3..837d14dae448 100644
--- a/tools/testing/selftests/kvm/x86_64/kvmi_test.c
+++ b/tools/testing/selftests/kvm/x86_64/kvmi_test.c
@@ -962,6 +962,35 @@ static void test_cmd_vcpu_set_registers(struct kvm_vm *vm)
wait_vcpu_worker(vcpu_thread);
 }
 
+static void cmd_vcpu_get_cpuid(struct kvm_vm *vm,
+  __u32 function, __u32 index,
+  struct kvmi_vcpu_get_cpuid_reply *rpl)
+{
+   struct {
+   struct kvmi_msg_hdr hdr;
+   struct kvmi_vcpu_hdr vcpu_hdr;
+   struct kvmi_vcpu_get_cpu

[PATCH v12 61/77] KVM: introspection: add KVMI_VCPU_EVENT_XSETBV

2021-10-06 Thread Adalbert Lazăr
From: Mihai Donțu 

This event is sent when an extended control register XCR is going to
be changed.

Signed-off-by: Mihai Donțu 
Co-developed-by: Nicușor Cîțu 
Signed-off-by: Nicușor Cîțu 
Signed-off-by: Adalbert Lazăr 
---
 Documentation/virt/kvm/kvmi.rst   | 34 
 arch/x86/include/asm/kvmi_host.h  |  4 +
 arch/x86/include/uapi/asm/kvmi.h  |  7 ++
 arch/x86/kvm/kvmi.c   | 30 +++
 arch/x86/kvm/kvmi.h   |  2 +
 arch/x86/kvm/kvmi_msg.c   | 20 +
 arch/x86/kvm/x86.c|  6 ++
 include/uapi/linux/kvmi.h |  1 +
 .../testing/selftests/kvm/x86_64/kvmi_test.c  | 84 +++
 9 files changed, 188 insertions(+)

diff --git a/Documentation/virt/kvm/kvmi.rst b/Documentation/virt/kvm/kvmi.rst
index 1fbc2a03f5bd..d3a0bea64e02 100644
--- a/Documentation/virt/kvm/kvmi.rst
+++ b/Documentation/virt/kvm/kvmi.rst
@@ -541,6 +541,7 @@ the following events::
KVMI_VCPU_EVENT_BREAKPOINT
KVMI_VCPU_EVENT_CR
KVMI_VCPU_EVENT_HYPERCALL
+   KVMI_VCPU_EVENT_XSETBV
 
 When an event is enabled, the introspection tool is notified and
 must reply with: continue, retry, crash, etc. (see **Events** below).
@@ -1042,3 +1043,36 @@ other vCPU introspection event.
 (``nr``), exception code (``error_code``) and ``address`` are sent to
 the introspection tool, which should check if its exception has been
 injected or overridden.
+
+7. KVMI_VCPU_EVENT_XSETBV
+-
+
+:Architectures: x86
+:Versions: >= 1
+:Actions: CONTINUE, CRASH
+:Parameters:
+
+::
+
+   struct kvmi_vcpu_event;
+   struct kvmi_vcpu_event_xsetbv {
+   __u8 xcr;
+   __u8 padding[7];
+   __u64 old_value;
+   __u64 new_value;
+   };
+
+:Returns:
+
+::
+
+   struct kvmi_vcpu_hdr;
+   struct kvmi_vcpu_event_reply;
+
+This event is sent when an extended control register XCR is going
+to be changed and the introspection has been enabled for this event
+(see *KVMI_VCPU_CONTROL_EVENTS*).
+
+``kvmi_vcpu_event`` (with the vCPU state), the extended control register
+number (``xcr``), the old value (``old_value``) and the new value
+(``new_value``) are sent to the introspection tool.
diff --git a/arch/x86/include/asm/kvmi_host.h b/arch/x86/include/asm/kvmi_host.h
index 97f5b1a01c9e..d66349208a6b 100644
--- a/arch/x86/include/asm/kvmi_host.h
+++ b/arch/x86/include/asm/kvmi_host.h
@@ -46,6 +46,8 @@ bool kvmi_cr_event(struct kvm_vcpu *vcpu, unsigned int cr,
 bool kvmi_cr3_intercepted(struct kvm_vcpu *vcpu);
 bool kvmi_monitor_cr3w_intercept(struct kvm_vcpu *vcpu, bool enable);
 void kvmi_enter_guest(struct kvm_vcpu *vcpu);
+void kvmi_xsetbv_event(struct kvm_vcpu *vcpu, u8 xcr,
+  u64 old_value, u64 new_value);
 
 #else /* CONFIG_KVM_INTROSPECTION */
 
@@ -59,6 +61,8 @@ static inline bool kvmi_cr3_intercepted(struct kvm_vcpu 
*vcpu) { return false; }
 static inline bool kvmi_monitor_cr3w_intercept(struct kvm_vcpu *vcpu,
bool enable) { return false; }
 static inline void kvmi_enter_guest(struct kvm_vcpu *vcpu) { }
+static inline void kvmi_xsetbv_event(struct kvm_vcpu *vcpu, u8 xcr,
+   u64 old_value, u64 new_value) { }
 
 #endif /* CONFIG_KVM_INTROSPECTION */
 
diff --git a/arch/x86/include/uapi/asm/kvmi.h b/arch/x86/include/uapi/asm/kvmi.h
index aa991fbab473..604a8b3d4ac2 100644
--- a/arch/x86/include/uapi/asm/kvmi.h
+++ b/arch/x86/include/uapi/asm/kvmi.h
@@ -95,4 +95,11 @@ struct kvmi_vcpu_inject_exception {
__u64 address;
 };
 
+struct kvmi_vcpu_event_xsetbv {
+   __u8 xcr;
+   __u8 padding[7];
+   __u64 old_value;
+   __u64 new_value;
+};
+
 #endif /* _UAPI_ASM_X86_KVMI_H */
diff --git a/arch/x86/kvm/kvmi.c b/arch/x86/kvm/kvmi.c
index 93123d47752c..d34f5f03a56f 100644
--- a/arch/x86/kvm/kvmi.c
+++ b/arch/x86/kvm/kvmi.c
@@ -16,6 +16,7 @@ void kvmi_arch_init_vcpu_events_mask(unsigned long *supported)
set_bit(KVMI_VCPU_EVENT_CR, supported);
set_bit(KVMI_VCPU_EVENT_HYPERCALL, supported);
set_bit(KVMI_VCPU_EVENT_TRAP, supported);
+   set_bit(KVMI_VCPU_EVENT_XSETBV, supported);
 }
 
 static unsigned int kvmi_vcpu_mode(const struct kvm_vcpu *vcpu,
@@ -567,3 +568,32 @@ void kvmi_arch_send_pending_event(struct kvm_vcpu *vcpu)
kvmi_send_trap_event(vcpu);
}
 }
+
+static void __kvmi_xsetbv_event(struct kvm_vcpu *vcpu, u8 xcr,
+   u64 old_value, u64 new_value)
+{
+   u32 action;
+
+   action = kvmi_msg_send_vcpu_xsetbv(vcpu, xcr, old_value, new_value);
+   switch (action) {
+   case KVMI_EVENT_ACTION_CONTINUE:
+   break;
+   default:
+   kvmi_handle_common_event_actions(vcpu, action);
+   }
+}
+
+void kvmi_xsetbv_event(struct kvm_vcpu *vcpu, u8 

[PATCH v12 62/77] KVM: introspection: add KVMI_VCPU_GET_XCR

2021-10-06 Thread Adalbert Lazăr
This can be used by the introspection tool to emulate SSE instructions.

Signed-off-by: Adalbert Lazăr 
---
 Documentation/virt/kvm/kvmi.rst   | 33 +++
 arch/x86/include/uapi/asm/kvmi.h  |  9 +
 arch/x86/kvm/kvmi_msg.c   | 21 
 include/uapi/linux/kvmi.h |  1 +
 .../testing/selftests/kvm/x86_64/kvmi_test.c  | 33 +++
 5 files changed, 97 insertions(+)

diff --git a/Documentation/virt/kvm/kvmi.rst b/Documentation/virt/kvm/kvmi.rst
index d3a0bea64e02..389d69e3fd7e 100644
--- a/Documentation/virt/kvm/kvmi.rst
+++ b/Documentation/virt/kvm/kvmi.rst
@@ -778,6 +778,39 @@ exception.
 * -KVM_EBUSY - another *KVMI_VCPU_INJECT_EXCEPTION*-*KVMI_VCPU_EVENT_TRAP*
pair is in progress
 
+17. KVMI_VCPU_GET_XCR
+-
+
+:Architectures: x86
+:Versions: >= 1
+:Parameters:
+
+::
+
+   struct kvmi_vcpu_hdr;
+   struct kvmi_vcpu_get_xcr {
+   __u8 xcr;
+   __u8 padding[7];
+   };
+
+:Returns:
+
+::
+
+   struct kvmi_error_code;
+   struct kvmi_vcpu_get_xcr_reply {
+   __u64 value;
+   };
+
+Returns the value of an extended control register XCR.
+
+:Errors:
+
+* -KVM_EINVAL - the selected vCPU is invalid
+* -KVM_EINVAL - the specified control register is not XCR0
+* -KVM_EINVAL - the padding is not zero
+* -KVM_EAGAIN - the selected vCPU can't be introspected yet
+
 Events
 ==
 
diff --git a/arch/x86/include/uapi/asm/kvmi.h b/arch/x86/include/uapi/asm/kvmi.h
index 604a8b3d4ac2..c0a73051d667 100644
--- a/arch/x86/include/uapi/asm/kvmi.h
+++ b/arch/x86/include/uapi/asm/kvmi.h
@@ -102,4 +102,13 @@ struct kvmi_vcpu_event_xsetbv {
__u64 new_value;
 };
 
+struct kvmi_vcpu_get_xcr {
+   __u8 xcr;
+   __u8 padding[7];
+};
+
+struct kvmi_vcpu_get_xcr_reply {
+   __u64 value;
+};
+
 #endif /* _UAPI_ASM_X86_KVMI_H */
diff --git a/arch/x86/kvm/kvmi_msg.c b/arch/x86/kvm/kvmi_msg.c
index c767b969df53..21624568e329 100644
--- a/arch/x86/kvm/kvmi_msg.c
+++ b/arch/x86/kvm/kvmi_msg.c
@@ -174,11 +174,32 @@ static int handle_vcpu_inject_exception(const struct 
kvmi_vcpu_msg_job *job,
return kvmi_msg_vcpu_reply(job, msg, ec, NULL, 0);
 }
 
+static int handle_vcpu_get_xcr(const struct kvmi_vcpu_msg_job *job,
+  const struct kvmi_msg_hdr *msg,
+  const void *_req)
+{
+   const struct kvmi_vcpu_get_xcr *req = _req;
+   struct kvmi_vcpu_get_xcr_reply rpl;
+   int ec = 0;
+
+   memset(, 0, sizeof(rpl));
+
+   if (non_zero_padding(req->padding, ARRAY_SIZE(req->padding)))
+   ec = -KVM_EINVAL;
+   else if (req->xcr != 0)
+   ec = -KVM_EINVAL;
+   else
+   rpl.value = job->vcpu->arch.xcr0;
+
+   return kvmi_msg_vcpu_reply(job, msg, ec, , sizeof(rpl));
+}
+
 static const kvmi_vcpu_msg_job_fct msg_vcpu[] = {
[KVMI_VCPU_CONTROL_CR]   = handle_vcpu_control_cr,
[KVMI_VCPU_GET_CPUID]= handle_vcpu_get_cpuid,
[KVMI_VCPU_GET_INFO] = handle_vcpu_get_info,
[KVMI_VCPU_GET_REGISTERS]= handle_vcpu_get_registers,
+   [KVMI_VCPU_GET_XCR]  = handle_vcpu_get_xcr,
[KVMI_VCPU_INJECT_EXCEPTION] = handle_vcpu_inject_exception,
[KVMI_VCPU_SET_REGISTERS]= handle_vcpu_set_registers,
 };
diff --git a/include/uapi/linux/kvmi.h b/include/uapi/linux/kvmi.h
index 4b71c6b0b16c..ac23754627ff 100644
--- a/include/uapi/linux/kvmi.h
+++ b/include/uapi/linux/kvmi.h
@@ -43,6 +43,7 @@ enum {
KVMI_VCPU_GET_CPUID= KVMI_VCPU_MESSAGE_ID(5),
KVMI_VCPU_CONTROL_CR   = KVMI_VCPU_MESSAGE_ID(6),
KVMI_VCPU_INJECT_EXCEPTION = KVMI_VCPU_MESSAGE_ID(7),
+   KVMI_VCPU_GET_XCR  = KVMI_VCPU_MESSAGE_ID(8),
 
KVMI_NEXT_VCPU_MESSAGE
 };
diff --git a/tools/testing/selftests/kvm/x86_64/kvmi_test.c 
b/tools/testing/selftests/kvm/x86_64/kvmi_test.c
index 380aa3d2d8f3..d9497727e859 100644
--- a/tools/testing/selftests/kvm/x86_64/kvmi_test.c
+++ b/tools/testing/selftests/kvm/x86_64/kvmi_test.c
@@ -1405,6 +1405,38 @@ static void test_event_xsetbv(struct kvm_vm *vm)
disable_vcpu_event(vm, event_id);
 }
 
+static void cmd_vcpu_get_xcr(struct kvm_vm *vm, u8 xcr, u64 *value,
+int expected_err)
+{
+   struct {
+   struct kvmi_msg_hdr hdr;
+   struct kvmi_vcpu_hdr vcpu_hdr;
+   struct kvmi_vcpu_get_xcr cmd;
+   } req = { 0 };
+   struct kvmi_vcpu_get_xcr_reply rpl = { 0 };
+   int r;
+
+   req.cmd.xcr = xcr;
+
+   r = do_vcpu0_command(vm, KVMI_VCPU_GET_XCR, , sizeof(req),
+, sizeof(rpl));
+   TEST_ASSERT(r == expected_err,
+   "KVMI_VCPU_GET_XCR failed, error %d (%s), expected %d\n",
+   -r, kvm_strerror(-r), expected_err);
+
+   *value = r == 0 ? rpl.

[PATCH v12 64/77] KVM: introspection: add KVMI_VCPU_SET_XSAVE

2021-10-06 Thread Adalbert Lazăr
This can be used by the introspection tool to emulate SSE instructions.

Signed-off-by: Adalbert Lazăr 
---
 Documentation/virt/kvm/kvmi.rst   | 28 +++
 arch/x86/include/uapi/asm/kvmi.h  |  4 +++
 arch/x86/kvm/kvmi_msg.c   | 20 +
 include/uapi/linux/kvmi.h |  1 +
 .../testing/selftests/kvm/x86_64/kvmi_test.c  | 27 ++
 5 files changed, 74 insertions(+), 6 deletions(-)

diff --git a/Documentation/virt/kvm/kvmi.rst b/Documentation/virt/kvm/kvmi.rst
index 8f2f105a527f..eedcae3900c5 100644
--- a/Documentation/virt/kvm/kvmi.rst
+++ b/Documentation/virt/kvm/kvmi.rst
@@ -840,6 +840,34 @@ Returns a buffer containing the XSAVE area.
 * -KVM_EAGAIN - the selected vCPU can't be introspected yet
 * -KVM_ENOMEM - there is not enough memory to allocate the reply
 
+19. KVMI_VCPU_SET_XSAVE
+---
+
+:Architectures: x86
+:Versions: >= 1
+:Parameters:
+
+::
+
+   struct kvmi_vcpu_hdr;
+   struct kvmi_vcpu_set_xsave {
+   struct kvm_xsave xsave;
+   };
+
+:Returns:
+
+::
+
+   struct kvmi_error_code;
+
+Modifies the XSAVE area.
+
+:Errors:
+
+* -KVM_EINVAL - the selected vCPU is invalid
+* -KVM_EINVAL - the padding is not zero
+* -KVM_EAGAIN - the selected vCPU can't be introspected yet
+
 Events
 ==
 
diff --git a/arch/x86/include/uapi/asm/kvmi.h b/arch/x86/include/uapi/asm/kvmi.h
index c6a46252a684..89f3dc9269c1 100644
--- a/arch/x86/include/uapi/asm/kvmi.h
+++ b/arch/x86/include/uapi/asm/kvmi.h
@@ -115,4 +115,8 @@ struct kvmi_vcpu_get_xsave_reply {
struct kvm_xsave xsave;
 };
 
+struct kvmi_vcpu_set_xsave {
+   struct kvm_xsave xsave;
+};
+
 #endif /* _UAPI_ASM_X86_KVMI_H */
diff --git a/arch/x86/kvm/kvmi_msg.c b/arch/x86/kvm/kvmi_msg.c
index 90e19244044a..ecad1882cdd8 100644
--- a/arch/x86/kvm/kvmi_msg.c
+++ b/arch/x86/kvm/kvmi_msg.c
@@ -213,6 +213,25 @@ static int handle_vcpu_get_xsave(const struct 
kvmi_vcpu_msg_job *job,
return err;
 }
 
+static int handle_vcpu_set_xsave(const struct kvmi_vcpu_msg_job *job,
+const struct kvmi_msg_hdr *msg,
+const void *req)
+{
+   const struct kvm_xsave *area = req;
+   size_t cmd_size;
+   int ec = 0;
+
+   cmd_size = sizeof(struct kvmi_vcpu_hdr) + sizeof(*area);
+
+   if (cmd_size > msg->size)
+   ec = -KVM_EINVAL;
+   else if (kvm_vcpu_ioctl_x86_set_xsave(job->vcpu,
+ (struct kvm_xsave *) area))
+   ec = -KVM_EINVAL;
+
+   return kvmi_msg_vcpu_reply(job, msg, ec, NULL, 0);
+}
+
 static const kvmi_vcpu_msg_job_fct msg_vcpu[] = {
[KVMI_VCPU_CONTROL_CR]   = handle_vcpu_control_cr,
[KVMI_VCPU_GET_CPUID]= handle_vcpu_get_cpuid,
@@ -222,6 +241,7 @@ static const kvmi_vcpu_msg_job_fct msg_vcpu[] = {
[KVMI_VCPU_GET_XSAVE]= handle_vcpu_get_xsave,
[KVMI_VCPU_INJECT_EXCEPTION] = handle_vcpu_inject_exception,
[KVMI_VCPU_SET_REGISTERS]= handle_vcpu_set_registers,
+   [KVMI_VCPU_SET_XSAVE]= handle_vcpu_set_xsave,
 };
 
 kvmi_vcpu_msg_job_fct kvmi_arch_vcpu_msg_handler(u16 id)
diff --git a/include/uapi/linux/kvmi.h b/include/uapi/linux/kvmi.h
index b99763e580e4..4671e0e3cb45 100644
--- a/include/uapi/linux/kvmi.h
+++ b/include/uapi/linux/kvmi.h
@@ -45,6 +45,7 @@ enum {
KVMI_VCPU_INJECT_EXCEPTION = KVMI_VCPU_MESSAGE_ID(7),
KVMI_VCPU_GET_XCR  = KVMI_VCPU_MESSAGE_ID(8),
KVMI_VCPU_GET_XSAVE= KVMI_VCPU_MESSAGE_ID(9),
+   KVMI_VCPU_SET_XSAVE= KVMI_VCPU_MESSAGE_ID(10),
 
KVMI_NEXT_VCPU_MESSAGE
 };
diff --git a/tools/testing/selftests/kvm/x86_64/kvmi_test.c 
b/tools/testing/selftests/kvm/x86_64/kvmi_test.c
index a9ab0e973340..adac0edddc50 100644
--- a/tools/testing/selftests/kvm/x86_64/kvmi_test.c
+++ b/tools/testing/selftests/kvm/x86_64/kvmi_test.c
@@ -1437,21 +1437,35 @@ static void test_cmd_vcpu_get_xcr(struct kvm_vm *vm)
cmd_vcpu_get_xcr(vm, xcr1, , -KVM_EINVAL);
 }
 
-static void cmd_vcpu_get_xsave(struct kvm_vm *vm)
+static void cmd_vcpu_get_xsave(struct kvm_vm *vm, struct kvm_xsave *rpl)
 {
struct {
struct kvmi_msg_hdr hdr;
struct kvmi_vcpu_hdr vcpu_hdr;
} req = {};
-   struct kvm_xsave rpl;
 
test_vcpu0_command(vm, KVMI_VCPU_GET_XSAVE, , sizeof(req),
-  , sizeof(rpl), 0);
+  rpl, sizeof(*rpl), 0);
+}
+
+static void cmd_vcpu_set_xsave(struct kvm_vm *vm, struct kvm_xsave *rpl)
+{
+   struct {
+   struct kvmi_msg_hdr hdr;
+   struct kvmi_vcpu_hdr vcpu_hdr;
+   struct kvm_xsave xsave;
+   } req = {};
+
+   memcpy(, rpl, sizeof(*rpl));
+
+   test_vcpu0_command(vm, KVMI_VCPU_SET_XSAVE, , sizeof(req),
+  NULL, 0, 0);
 }
 

[PATCH v12 53/77] KVM: introspection: add KVMI_VCPU_EVENT_HYPERCALL

2021-10-06 Thread Adalbert Lazăr
From: Mihai Donțu 

This event is sent on a specific hypercall.

It is used by the code residing inside the introspected guest to call the
introspection tool and to report certain details about its operation.
For example, a classic antimalware remediation tool can report
what it has found during a scan.

Signed-off-by: Mihai Donțu 
Co-developed-by: Adalbert Lazăr 
Signed-off-by: Adalbert Lazăr 
---
 Documentation/virt/kvm/hypercalls.rst | 35 
 Documentation/virt/kvm/kvmi.rst   | 40 +-
 arch/x86/include/uapi/asm/kvmi.h  |  4 ++
 arch/x86/kvm/kvmi.c   | 20 +
 arch/x86/kvm/x86.c| 18 ++--
 include/linux/kvmi_host.h |  2 +
 include/uapi/linux/kvm_para.h |  1 +
 include/uapi/linux/kvmi.h |  3 +-
 .../testing/selftests/kvm/x86_64/kvmi_test.c  | 42 +++
 virt/kvm/introspection/kvmi.c | 38 +
 virt/kvm/introspection/kvmi_int.h |  8 
 virt/kvm/introspection/kvmi_msg.c | 13 ++
 12 files changed, 218 insertions(+), 6 deletions(-)

diff --git a/Documentation/virt/kvm/hypercalls.rst 
b/Documentation/virt/kvm/hypercalls.rst
index e56fa8b9cfca..df6b907fd108 100644
--- a/Documentation/virt/kvm/hypercalls.rst
+++ b/Documentation/virt/kvm/hypercalls.rst
@@ -190,3 +190,38 @@ the KVM_CAP_EXIT_HYPERCALL capability. Userspace must 
enable that capability
 before advertising KVM_FEATURE_HC_MAP_GPA_RANGE in the guest CPUID.  In
 addition, if the guest supports KVM_FEATURE_MIGRATION_CONTROL, userspace
 must also set up an MSR filter to process writes to MSR_KVM_MIGRATION_CONTROL.
+
+9. KVM_HC_XEN_HVM_OP
+
+
+:Architecture: x86
+:Status: active
+:Purpose: To enable communication between a guest agent and a VMI application
+
+Usage:
+
+An event will be sent to the VMI application (see kvmi.rst) if the following
+registers, which differ between 32bit and 64bit, have the following values:
+
+   = =
+   32bit   64bit value
+   = =
+   ebx (a0)rdi   KVM_HC_XEN_HVM_OP_GUEST_REQUEST_VM_EVENT
+   ecx (a1)rsi   0
+   = =
+
+This specification copies Xen's { __HYPERVISOR_hvm_op,
+HVMOP_guest_request_vm_event } hypercall and can originate from kernel or
+userspace.
+
+It returns 0 if successful, or a negative POSIX.1 error code if it fails. The
+absence of an active VMI application is not signaled in any way.
+
+The following registers are clobbered:
+
+  * 32bit: edx, esi, edi, ebp
+  * 64bit: rdx, r10, r8, r9
+
+In particular, for KVM_HC_XEN_HVM_OP_GUEST_REQUEST_VM_EVENT, the last two
+registers can be poisoned deliberately and cannot be used for passing
+information.
diff --git a/Documentation/virt/kvm/kvmi.rst b/Documentation/virt/kvm/kvmi.rst
index 8b9938032650..0facdc4595ed 100644
--- a/Documentation/virt/kvm/kvmi.rst
+++ b/Documentation/virt/kvm/kvmi.rst
@@ -535,7 +535,10 @@ command) before returning to guest.
 
struct kvmi_error_code
 
-Enables/disables vCPU introspection events.
+Enables/disables vCPU introspection events. This command can be used with
+the following events::
+
+   KVMI_VCPU_EVENT_HYPERCALL
 
 When an event is enabled, the introspection tool is notified and
 must reply with: continue, retry, crash, etc. (see **Events** below).
@@ -779,3 +782,38 @@ cannot be controlled with *KVMI_VCPU_CONTROL_EVENTS*.
 Because it has a low priority, it will be sent after any other vCPU
 introspection event and when no other vCPU introspection command is
 queued.
+
+3. KVMI_VCPU_EVENT_HYPERCALL
+
+
+:Architectures: x86
+:Versions: >= 1
+:Actions: CONTINUE, CRASH
+:Parameters:
+
+::
+
+   struct kvmi_event_hdr;
+   struct kvmi_vcpu_event;
+
+:Returns:
+
+::
+
+   struct kvmi_vcpu_hdr;
+   struct kvmi_vcpu_event_reply;
+
+This event is sent on a specific user hypercall when the introspection has
+been enabled for this event (see *KVMI_VCPU_CONTROL_EVENTS*).
+
+The hypercall number must be ``KVM_HC_XEN_HVM_OP`` with the
+``KVM_HC_XEN_HVM_OP_GUEST_REQUEST_VM_EVENT`` sub-function
+(see hypercalls.rst).
+
+It is used by the code residing inside the introspected guest to call the
+introspection tool and to report certain details about its operation. For
+example, a classic antimalware remediation tool can report what it has
+found during a scan.
+
+The most useful registers describing the vCPU state can be read from
+``kvmi_vcpu_event.arch.regs``.
diff --git a/arch/x86/include/uapi/asm/kvmi.h b/arch/x86/include/uapi/asm/kvmi.h
index 3631da9eef8c..a442ba4d2190 100644
--- a/arch/x86/include/uapi/asm/kvmi.h
+++ b/arch/x86/include/uapi/asm/kvmi.h
@@ -8,6 +8,10 @@
 
 #include 
 
+enum {
+   KVM_HC_XEN_HVM_OP_GUEST_REQUEST_VM_EVENT = 24,
+};
+
 struct kvmi_vcpu_get_info_reply {
__

[PATCH v12 45/77] KVM: introspection: add KVMI_VM_PAUSE_VCPU

2021-10-06 Thread Adalbert Lazăr
This command increments a pause requests counter for a vCPU and kicks
it out of guest.

The introspection tool can pause a VM by sending this command for all
vCPUs. If it sets 'wait=1', it can consider that the VM is paused when
it receives the reply for the last KVMI_VM_PAUSE_VCPU command.

Signed-off-by: Adalbert Lazăr 
---
 Documentation/virt/kvm/kvmi.rst   | 39 +++
 include/linux/kvmi_host.h |  2 +
 include/uapi/linux/kvmi.h |  8 
 .../testing/selftests/kvm/x86_64/kvmi_test.c  | 30 
 virt/kvm/introspection/kvmi.c | 47 +--
 virt/kvm/introspection/kvmi_int.h |  1 +
 virt/kvm/introspection/kvmi_msg.c | 24 ++
 7 files changed, 147 insertions(+), 4 deletions(-)

diff --git a/Documentation/virt/kvm/kvmi.rst b/Documentation/virt/kvm/kvmi.rst
index 2f41fce79d95..9f6905456923 100644
--- a/Documentation/virt/kvm/kvmi.rst
+++ b/Documentation/virt/kvm/kvmi.rst
@@ -470,6 +470,45 @@ Returns the TSC frequency (in HZ) for the specified vCPU 
if available
 * -KVM_EINVAL - the selected vCPU is invalid
 * -KVM_EAGAIN - the selected vCPU can't be introspected yet
 
+9. KVMI_VM_PAUSE_VCPU
+-
+
+:Architectures: all
+:Versions: >= 1
+:Parameters:
+
+::
+
+   struct kvmi_vm_pause_vcpu {
+   __u16 vcpu;
+   __u8 wait;
+   __u8 padding1;
+   __u32 padding2;
+   };
+
+:Returns:
+
+::
+
+   struct kvmi_error_code;
+
+Kicks the vCPU out of guest.
+
+If `wait` is 1, the command will wait for vCPU to acknowledge the IPI.
+
+The vCPU will handle the pending commands/events and send the
+*KVMI_VCPU_EVENT_PAUSE* event (one for every successful *KVMI_VM_PAUSE_VCPU*
+command) before returning to guest.
+
+:Errors:
+
+* -KVM_EINVAL - the padding is not zero
+* -KVM_EINVAL - the selected vCPU is invalid
+* -KVM_EAGAIN - the selected vCPU can't be introspected yet
+* -KVM_EBUSY  - the selected vCPU has too many queued
+*KVMI_VCPU_EVENT_PAUSE* events
+* -KVM_EPERM  - the *KVMI_VCPU_EVENT_PAUSE* event is disallowed
+
 Events
 ==
 
diff --git a/include/linux/kvmi_host.h b/include/linux/kvmi_host.h
index 736edb400c05..59e645d9ea34 100644
--- a/include/linux/kvmi_host.h
+++ b/include/linux/kvmi_host.h
@@ -18,6 +18,8 @@ struct kvm_vcpu_introspection {
 
struct list_head job_list;
spinlock_t job_lock;
+
+   atomic_t pause_requests;
 };
 
 struct kvm_introspection {
diff --git a/include/uapi/linux/kvmi.h b/include/uapi/linux/kvmi.h
index da766427231e..bb90d03f059b 100644
--- a/include/uapi/linux/kvmi.h
+++ b/include/uapi/linux/kvmi.h
@@ -26,6 +26,7 @@ enum {
KVMI_VM_CONTROL_EVENTS = KVMI_VM_MESSAGE_ID(5),
KVMI_VM_READ_PHYSICAL  = KVMI_VM_MESSAGE_ID(6),
KVMI_VM_WRITE_PHYSICAL = KVMI_VM_MESSAGE_ID(7),
+   KVMI_VM_PAUSE_VCPU = KVMI_VM_MESSAGE_ID(8),
 
KVMI_NEXT_VM_MESSAGE
 };
@@ -115,4 +116,11 @@ struct kvmi_vcpu_hdr {
__u32 padding2;
 };
 
+struct kvmi_vm_pause_vcpu {
+   __u16 vcpu;
+   __u8 wait;
+   __u8 padding1;
+   __u32 padding2;
+};
+
 #endif /* _UAPI__LINUX_KVMI_H */
diff --git a/tools/testing/selftests/kvm/x86_64/kvmi_test.c 
b/tools/testing/selftests/kvm/x86_64/kvmi_test.c
index 337f295d69ff..f8d355aff5fa 100644
--- a/tools/testing/selftests/kvm/x86_64/kvmi_test.c
+++ b/tools/testing/selftests/kvm/x86_64/kvmi_test.c
@@ -685,6 +685,35 @@ static void test_cmd_vcpu_get_info(struct kvm_vm *vm)
, sizeof(rpl), -KVM_EINVAL);
 }
 
+static void cmd_vcpu_pause(__u8 wait, int expected_err)
+{
+   struct {
+   struct kvmi_msg_hdr hdr;
+   struct kvmi_vm_pause_vcpu cmd;
+   } req = {};
+   __u16 vcpu_idx = 0;
+
+   req.cmd.wait = wait;
+   req.cmd.vcpu = vcpu_idx;
+
+   test_vm_command(KVMI_VM_PAUSE_VCPU, , sizeof(req), NULL, 0, 
expected_err);
+}
+
+static void pause_vcpu(void)
+{
+   cmd_vcpu_pause(1, 0);
+}
+
+static void test_pause(struct kvm_vm *vm)
+{
+   __u8 wait = 1, wait_inval = 2;
+
+   pause_vcpu();
+
+   cmd_vcpu_pause(wait, 0);
+   cmd_vcpu_pause(wait_inval, -KVM_EINVAL);
+}
+
 static void test_introspection(struct kvm_vm *vm)
 {
srandom(time(0));
@@ -700,6 +729,7 @@ static void test_introspection(struct kvm_vm *vm)
test_cmd_vm_control_events(vm);
test_memory_access(vm);
test_cmd_vcpu_get_info(vm);
+   test_pause(vm);
 
unhook_introspection(vm);
 }
diff --git a/virt/kvm/introspection/kvmi.c b/virt/kvm/introspection/kvmi.c
index 93b1bec23e48..faf443d6ce82 100644
--- a/virt/kvm/introspection/kvmi.c
+++ b/virt/kvm/introspection/kvmi.c
@@ -17,6 +17,8 @@
 
 #define KVMI_MSG_SIZE_ALLOC (sizeof(struct kvmi_msg_hdr) + KVMI_MAX_MSG_SIZE)
 
+#define MAX_PAUSE_REQUESTS 1001
+
 static DECLARE_BITMAP(Kvmi_always_allowed_commands, KVMI_NUM_COMMANDS);
 static DECLARE_BITMAP(Kvmi_known_eve

[PATCH v12 43/77] KVM: introspection: handle vCPU commands

2021-10-06 Thread Adalbert Lazăr
From: Mihai Donțu 

Based on the common structure (kvmi_vcpu_hdr) used for all vCPU commands,
the receiving thread validates and dispatches the message to the proper
vCPU (adding the handling function to its jobs list).

Signed-off-by: Mihai Donțu 
Co-developed-by: Nicușor Cîțu 
Signed-off-by: Nicușor Cîțu 
Co-developed-by: Adalbert Lazăr 
Signed-off-by: Adalbert Lazăr 
---
 Documentation/virt/kvm/kvmi.rst   |   8 ++
 arch/x86/kvm/Makefile |   2 +-
 arch/x86/kvm/kvmi_msg.c   |  17 
 include/uapi/linux/kvmi.h |   6 ++
 virt/kvm/introspection/kvmi_int.h |  16 
 virt/kvm/introspection/kvmi_msg.c | 150 +-
 6 files changed, 196 insertions(+), 3 deletions(-)
 create mode 100644 arch/x86/kvm/kvmi_msg.c

diff --git a/Documentation/virt/kvm/kvmi.rst b/Documentation/virt/kvm/kvmi.rst
index c424acf01784..79152f47b30f 100644
--- a/Documentation/virt/kvm/kvmi.rst
+++ b/Documentation/virt/kvm/kvmi.rst
@@ -221,6 +221,14 @@ The following C structures are meant to be used directly 
when communicating
 over the wire. The peer that detects any size mismatch should simply close
 the connection and report the error.
 
+The vCPU commands start with::
+
+   struct kvmi_vcpu_hdr {
+   __u16 vcpu;
+   __u16 padding1;
+   __u32 padding2;
+   }
+
 1. KVMI_GET_VERSION
 ---
 
diff --git a/arch/x86/kvm/Makefile b/arch/x86/kvm/Makefile
index af58e1daf7cf..6b2e5f0524e5 100644
--- a/arch/x86/kvm/Makefile
+++ b/arch/x86/kvm/Makefile
@@ -14,7 +14,7 @@ kvm-y += $(KVM)/kvm_main.o 
$(KVM)/coalesced_mmio.o \
$(KVM)/eventfd.o $(KVM)/irqchip.o $(KVM)/vfio.o 
\
$(KVM)/dirty_ring.o $(KVM)/binary_stats.o
 kvm-$(CONFIG_KVM_ASYNC_PF) += $(KVM)/async_pf.o
-kvm-$(CONFIG_KVM_INTROSPECTION) += $(KVMI)/kvmi.o $(KVMI)/kvmi_msg.o kvmi.o
+kvm-$(CONFIG_KVM_INTROSPECTION) += $(KVMI)/kvmi.o $(KVMI)/kvmi_msg.o kvmi.o 
kvmi_msg.o
 
 kvm-y  += x86.o emulate.o i8259.o irq.o lapic.o \
   i8254.o ioapic.o irq_comm.o cpuid.o pmu.o mtrr.o \
diff --git a/arch/x86/kvm/kvmi_msg.c b/arch/x86/kvm/kvmi_msg.c
new file mode 100644
index ..0a6edfe16f17
--- /dev/null
+++ b/arch/x86/kvm/kvmi_msg.c
@@ -0,0 +1,17 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * KVM introspection (message handling) - x86
+ *
+ * Copyright (C) 2020-2021 Bitdefender S.R.L.
+ *
+ */
+
+#include "../../../virt/kvm/introspection/kvmi_int.h"
+
+static const kvmi_vcpu_msg_job_fct msg_vcpu[] = {
+};
+
+kvmi_vcpu_msg_job_fct kvmi_arch_vcpu_msg_handler(u16 id)
+{
+   return id < ARRAY_SIZE(msg_vcpu) ? msg_vcpu[id] : NULL;
+}
diff --git a/include/uapi/linux/kvmi.h b/include/uapi/linux/kvmi.h
index 048afad01be6..7ba1c8758aba 100644
--- a/include/uapi/linux/kvmi.h
+++ b/include/uapi/linux/kvmi.h
@@ -107,4 +107,10 @@ struct kvmi_vm_write_physical {
__u8  data[0];
 };
 
+struct kvmi_vcpu_hdr {
+   __u16 vcpu;
+   __u16 padding1;
+   __u32 padding2;
+};
+
 #endif /* _UAPI__LINUX_KVMI_H */
diff --git a/virt/kvm/introspection/kvmi_int.h 
b/virt/kvm/introspection/kvmi_int.h
index c3aa12554c2b..c3e4da7e7f20 100644
--- a/virt/kvm/introspection/kvmi_int.h
+++ b/virt/kvm/introspection/kvmi_int.h
@@ -14,6 +14,18 @@
  */
 #define KVMI_MAX_MSG_SIZE (4096 * 2 - sizeof(struct kvmi_msg_hdr))
 
+struct kvmi_vcpu_msg_job {
+   struct {
+   struct kvmi_msg_hdr hdr;
+   struct kvmi_vcpu_hdr vcpu_hdr;
+   } *msg;
+   struct kvm_vcpu *vcpu;
+};
+
+typedef int (*kvmi_vcpu_msg_job_fct)(const struct kvmi_vcpu_msg_job *job,
+const struct kvmi_msg_hdr *msg,
+const void *req);
+
 /* kvmi_msg.c */
 bool kvmi_sock_get(struct kvm_introspection *kvmi, int fd);
 void kvmi_sock_shutdown(struct kvm_introspection *kvmi);
@@ -28,6 +40,9 @@ bool kvmi_is_command_allowed(struct kvm_introspection *kvmi, 
u16 id);
 bool kvmi_is_event_allowed(struct kvm_introspection *kvmi, u16 id);
 bool kvmi_is_known_event(u16 id);
 bool kvmi_is_known_vm_event(u16 id);
+int kvmi_add_job(struct kvm_vcpu *vcpu,
+void (*fct)(struct kvm_vcpu *vcpu, void *ctx),
+void *ctx, void (*free_fct)(void *ctx));
 int kvmi_cmd_vm_control_events(struct kvm_introspection *kvmi,
   u16 event_id, bool enable);
 int kvmi_cmd_read_physical(struct kvm *kvm, u64 gpa, size_t size,
@@ -40,5 +55,6 @@ int kvmi_cmd_write_physical(struct kvm *kvm, u64 gpa, size_t 
size,
 
 /* arch */
 void kvmi_arch_init_vcpu_events_mask(unsigned long *supported);
+kvmi_vcpu_msg_job_fct kvmi_arch_vcpu_msg_handler(u16 id);
 
 #endif
diff --git a/virt/kvm/introspection/kvmi_msg.c 
b/virt/kvm/introspection/kvmi_msg.c
index 697c307ea745..82e2b49a960e 100644
--- a/virt/kvm/introspection/kvmi_msg.c
+++ b/virt/kvm/introspection/kvmi_msg.c
@@ -13,6 +13,7 @

[PATCH v12 42/77] KVM: introspection: handle vCPU introspection requests

2021-10-06 Thread Adalbert Lazăr
From: Mihai Donțu 

The receiving thread dispatches the vCPU introspection commands by
adding them to the vCPU's jobs list and kicking the vCPU. Before
entering in guest, the vCPU thread checks the introspection request
(KVM_REQ_INTROSPECTION) and runs its queued jobs.

Signed-off-by: Mihai Donțu 
Co-developed-by: Mircea Cîrjaliu 
Signed-off-by: Mircea Cîrjaliu 
Co-developed-by: Adalbert Lazăr 
Signed-off-by: Adalbert Lazăr 
---
 arch/x86/kvm/x86.c|  3 ++
 include/linux/kvm_host.h  |  1 +
 include/linux/kvmi_host.h |  4 ++
 virt/kvm/introspection/kvmi.c | 73 +++
 virt/kvm/kvm_main.c   |  2 +
 5 files changed, 83 insertions(+)

diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index ee08fb330f0f..0315c5a94af3 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -9827,6 +9827,9 @@ static int vcpu_run(struct kvm_vcpu *vcpu)
vcpu->arch.l1tf_flush_l1d = true;
 
for (;;) {
+   if (kvm_check_request(KVM_REQ_INTROSPECTION, vcpu))
+   kvmi_handle_requests(vcpu);
+
if (kvm_vcpu_running(vcpu)) {
r = vcpu_enter_guest(vcpu);
} else {
diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index 30bf1227c4a7..7d429d3afbb6 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -151,6 +151,7 @@ static inline bool is_error_page(struct page *page)
 #define KVM_REQ_UNBLOCK   2
 #define KVM_REQ_UNHALT3
 #define KVM_REQ_VM_BUGGED (4 | KVM_REQUEST_WAIT | 
KVM_REQUEST_NO_WAKEUP)
+#define KVM_REQ_INTROSPECTION 5
 #define KVM_REQUEST_ARCH_BASE 8
 
 #define KVM_ARCH_REQ_FLAGS(nr, flags) ({ \
diff --git a/include/linux/kvmi_host.h b/include/linux/kvmi_host.h
index b3874419511d..736edb400c05 100644
--- a/include/linux/kvmi_host.h
+++ b/include/linux/kvmi_host.h
@@ -53,6 +53,8 @@ int kvmi_ioctl_event(struct kvm *kvm,
 const struct kvm_introspection_feature *feat);
 int kvmi_ioctl_preunhook(struct kvm *kvm);
 
+void kvmi_handle_requests(struct kvm_vcpu *vcpu);
+
 #else
 
 static inline int kvmi_version(void) { return 0; }
@@ -62,6 +64,8 @@ static inline void kvmi_create_vm(struct kvm *kvm) { }
 static inline void kvmi_destroy_vm(struct kvm *kvm) { }
 static inline void kvmi_vcpu_uninit(struct kvm_vcpu *vcpu) { }
 
+static inline void kvmi_handle_requests(struct kvm_vcpu *vcpu) { }
+
 #endif /* CONFIG_KVM_INTROSPECTION */
 
 #endif
diff --git a/virt/kvm/introspection/kvmi.c b/virt/kvm/introspection/kvmi.c
index e8d2d280fb43..93b1bec23e48 100644
--- a/virt/kvm/introspection/kvmi.c
+++ b/virt/kvm/introspection/kvmi.c
@@ -124,6 +124,12 @@ void kvmi_uninit(void)
kvmi_cache_destroy();
 }
 
+static void kvmi_make_request(struct kvm_vcpu *vcpu)
+{
+   kvm_make_request(KVM_REQ_INTROSPECTION, vcpu);
+   kvm_vcpu_kick(vcpu);
+}
+
 static int __kvmi_add_job(struct kvm_vcpu *vcpu,
  void (*fct)(struct kvm_vcpu *vcpu, void *ctx),
  void *ctx, void (*free_fct)(void *ctx))
@@ -155,6 +161,9 @@ int kvmi_add_job(struct kvm_vcpu *vcpu,
 
err = __kvmi_add_job(vcpu, fct, ctx, free_fct);
 
+   if (!err)
+   kvmi_make_request(vcpu);
+
return err;
 }
 
@@ -323,6 +332,14 @@ int kvmi_ioctl_unhook(struct kvm *kvm)
return 0;
 }
 
+struct kvm_introspection * __must_check kvmi_get(struct kvm *kvm)
+{
+   if (refcount_inc_not_zero(>kvmi_ref))
+   return kvm->kvmi;
+
+   return NULL;
+}
+
 static void kvmi_put(struct kvm *kvm)
 {
if (refcount_dec_and_test(>kvmi_ref))
@@ -340,6 +357,19 @@ static int __kvmi_hook(struct kvm *kvm,
return 0;
 }
 
+static void kvmi_job_release_vcpu(struct kvm_vcpu *vcpu, void *ctx)
+{
+}
+
+static void kvmi_release_vcpus(struct kvm *kvm)
+{
+   struct kvm_vcpu *vcpu;
+   int i;
+
+   kvm_for_each_vcpu(i, vcpu, kvm)
+   kvmi_add_job(vcpu, kvmi_job_release_vcpu, NULL, NULL);
+}
+
 static int kvmi_recv_thread(void *arg)
 {
struct kvm_introspection *kvmi = arg;
@@ -350,6 +380,8 @@ static int kvmi_recv_thread(void *arg)
/* Signal userspace and prevent the vCPUs from sending events. */
kvmi_sock_shutdown(kvmi);
 
+   kvmi_release_vcpus(kvmi->kvm);
+
kvmi_put(kvmi->kvm);
return 0;
 }
@@ -382,6 +414,10 @@ static int kvmi_hook(struct kvm *kvm,
init_completion(>kvmi_complete);
 
refcount_set(>kvmi_ref, 1);
+   /*
+* Paired with refcount_inc_not_zero() from kvmi_get().
+*/
+   smp_wmb();
 
kvmi->recv = kthread_run(kvmi_recv_thread, kvmi, "kvmi-recv");
if (IS_ERR(kvmi->recv)) {
@@ -670,3 +706,40 @@ int kvmi_cmd_write_physical(struct kvm *kvm, u64 gpa, 
size_t size,
 
return ec;
 }
+
+static struct kvmi_job *kvmi_pull_job(struct kvm_vcpu_introspection *vcpui)
+{
+   struct kvmi_job *j

[PATCH v12 35/77] KVM: introspection: add KVMI_VM_GET_INFO

2021-10-06 Thread Adalbert Lazăr
From: Mihai Donțu 

This command returns the number of online vCPUs.

The introspection tool uses the vCPU index to specify to which vCPU
the introspection command applies to.

Signed-off-by: Mihai Donțu 
Signed-off-by: Adalbert Lazăr 
---
 Documentation/virt/kvm/kvmi.rst   | 18 ++
 include/uapi/linux/kvmi.h |  6 
 .../testing/selftests/kvm/x86_64/kvmi_test.c  | 35 +--
 virt/kvm/introspection/kvmi_msg.c | 13 +++
 4 files changed, 69 insertions(+), 3 deletions(-)

diff --git a/Documentation/virt/kvm/kvmi.rst b/Documentation/virt/kvm/kvmi.rst
index 13169575f75f..6f8583d4aeb2 100644
--- a/Documentation/virt/kvm/kvmi.rst
+++ b/Documentation/virt/kvm/kvmi.rst
@@ -312,3 +312,21 @@ This command is always allowed.
 * -KVM_ENOENT - the event specified by ``id`` is unsupported
 * -KVM_EPERM - the event specified by ``id`` is disallowed
 * -KVM_EINVAL - the padding is not zero
+
+4. KVMI_VM_GET_INFO
+---
+
+:Architectures: all
+:Versions: >= 1
+:Parameters: none
+:Returns:
+
+::
+
+   struct kvmi_error_code;
+   struct kvmi_vm_get_info_reply {
+   __u32 vcpu_count;
+   __u32 padding[3];
+   };
+
+Returns the number of online vCPUs.
diff --git a/include/uapi/linux/kvmi.h b/include/uapi/linux/kvmi.h
index 0c2d0cedde6f..e06a7b80d4d9 100644
--- a/include/uapi/linux/kvmi.h
+++ b/include/uapi/linux/kvmi.h
@@ -20,6 +20,7 @@ enum {
KVMI_GET_VERSION  = KVMI_VM_MESSAGE_ID(1),
KVMI_VM_CHECK_COMMAND = KVMI_VM_MESSAGE_ID(2),
KVMI_VM_CHECK_EVENT   = KVMI_VM_MESSAGE_ID(3),
+   KVMI_VM_GET_INFO  = KVMI_VM_MESSAGE_ID(4),
 
KVMI_NEXT_VM_MESSAGE
 };
@@ -67,4 +68,9 @@ struct kvmi_vm_check_event {
__u32 padding2;
 };
 
+struct kvmi_vm_get_info_reply {
+   __u32 vcpu_count;
+   __u32 padding[3];
+};
+
 #endif /* _UAPI__LINUX_KVMI_H */
diff --git a/tools/testing/selftests/kvm/x86_64/kvmi_test.c 
b/tools/testing/selftests/kvm/x86_64/kvmi_test.c
index 57f68ff60eb9..2ada3d9bc230 100644
--- a/tools/testing/selftests/kvm/x86_64/kvmi_test.c
+++ b/tools/testing/selftests/kvm/x86_64/kvmi_test.c
@@ -80,6 +80,16 @@ static void set_command_perm(struct kvm_vm *vm, __s32 id, 
__u32 allow,
 "KVM_INTROSPECTION_COMMAND");
 }
 
+static void disallow_command(struct kvm_vm *vm, __s32 id)
+{
+   set_command_perm(vm, id, 0, 0);
+}
+
+static void allow_command(struct kvm_vm *vm, __s32 id)
+{
+   set_command_perm(vm, id, 1, 0);
+}
+
 static void hook_introspection(struct kvm_vm *vm)
 {
__u32 allow = 1, disallow = 0, allow_inval = 2;
@@ -263,12 +273,16 @@ static void cmd_vm_check_command(__u16 id, int 
expected_err)
expected_err);
 }
 
-static void test_cmd_vm_check_command(void)
+static void test_cmd_vm_check_command(struct kvm_vm *vm)
 {
-   __u16 valid_id = KVMI_GET_VERSION, invalid_id = 0x;
+   __u16 valid_id = KVMI_VM_GET_INFO, invalid_id = 0x;
 
cmd_vm_check_command(valid_id, 0);
cmd_vm_check_command(invalid_id, -KVM_ENOENT);
+
+   disallow_command(vm, valid_id);
+   cmd_vm_check_command(valid_id, -KVM_EPERM);
+   allow_command(vm, valid_id);
 }
 
 static void cmd_vm_check_event(__u16 id, int expected_err)
@@ -291,6 +305,20 @@ static void test_cmd_vm_check_event(void)
cmd_vm_check_event(invalid_id, -KVM_ENOENT);
 }
 
+static void test_cmd_vm_get_info(void)
+{
+   struct kvmi_vm_get_info_reply rpl;
+   struct kvmi_msg_hdr req;
+
+   test_vm_command(KVMI_VM_GET_INFO, , sizeof(req), ,
+   sizeof(rpl), 0);
+   TEST_ASSERT(rpl.vcpu_count == 1,
+   "Unexpected number of vCPU count %u\n",
+   rpl.vcpu_count);
+
+   pr_debug("vcpu count: %u\n", rpl.vcpu_count);
+}
+
 static void test_introspection(struct kvm_vm *vm)
 {
setup_socket();
@@ -298,8 +326,9 @@ static void test_introspection(struct kvm_vm *vm)
 
test_cmd_invalid();
test_cmd_get_version();
-   test_cmd_vm_check_command();
+   test_cmd_vm_check_command(vm);
test_cmd_vm_check_event();
+   test_cmd_vm_get_info();
 
unhook_introspection(vm);
 }
diff --git a/virt/kvm/introspection/kvmi_msg.c 
b/virt/kvm/introspection/kvmi_msg.c
index 16dda34d1acd..795495340dc0 100644
--- a/virt/kvm/introspection/kvmi_msg.c
+++ b/virt/kvm/introspection/kvmi_msg.c
@@ -150,6 +150,18 @@ static int handle_vm_check_event(struct kvm_introspection 
*kvmi,
return kvmi_msg_vm_reply(kvmi, msg, ec, NULL, 0);
 }
 
+static int handle_vm_get_info(struct kvm_introspection *kvmi,
+ const struct kvmi_msg_hdr *msg,
+ const void *req)
+{
+   struct kvmi_vm_get_info_reply rpl;
+
+   memset(, 0, sizeof(rpl));
+   rpl.vcpu_count = atomic_read(>kvm->online_vcpus);
+
+   return kvmi_msg_vm_reply(kvmi, msg, 0, , si

[PATCH v12 63/77] KVM: introspection: add KVMI_VCPU_GET_XSAVE

2021-10-06 Thread Adalbert Lazăr
From: Mihai Donțu 

This vCPU command is used to get the XSAVE area.

Signed-off-by: Mihai Donțu 
Co-developed-by: Adalbert Lazăr 
Signed-off-by: Adalbert Lazăr 
---
 Documentation/virt/kvm/kvmi.rst   | 29 +++
 arch/x86/include/uapi/asm/kvmi.h  |  4 +++
 arch/x86/kvm/kvmi_msg.c   | 20 +
 include/uapi/linux/kvmi.h |  1 +
 .../testing/selftests/kvm/x86_64/kvmi_test.c  | 26 +
 5 files changed, 80 insertions(+)

diff --git a/Documentation/virt/kvm/kvmi.rst b/Documentation/virt/kvm/kvmi.rst
index 389d69e3fd7e..8f2f105a527f 100644
--- a/Documentation/virt/kvm/kvmi.rst
+++ b/Documentation/virt/kvm/kvmi.rst
@@ -811,6 +811,35 @@ Returns the value of an extended control register XCR.
 * -KVM_EINVAL - the padding is not zero
 * -KVM_EAGAIN - the selected vCPU can't be introspected yet
 
+18. KVMI_VCPU_GET_XSAVE
+---
+
+:Architectures: x86
+:Versions: >= 1
+:Parameters:
+
+::
+
+   struct kvmi_vcpu_hdr;
+
+:Returns:
+
+::
+
+   struct kvmi_error_code;
+   struct kvmi_vcpu_get_xsave_reply {
+   struct kvm_xsave xsave;
+   };
+
+Returns a buffer containing the XSAVE area.
+
+:Errors:
+
+* -KVM_EINVAL - the selected vCPU is invalid
+* -KVM_EINVAL - the padding is not zero
+* -KVM_EAGAIN - the selected vCPU can't be introspected yet
+* -KVM_ENOMEM - there is not enough memory to allocate the reply
+
 Events
 ==
 
diff --git a/arch/x86/include/uapi/asm/kvmi.h b/arch/x86/include/uapi/asm/kvmi.h
index c0a73051d667..c6a46252a684 100644
--- a/arch/x86/include/uapi/asm/kvmi.h
+++ b/arch/x86/include/uapi/asm/kvmi.h
@@ -111,4 +111,8 @@ struct kvmi_vcpu_get_xcr_reply {
__u64 value;
 };
 
+struct kvmi_vcpu_get_xsave_reply {
+   struct kvm_xsave xsave;
+};
+
 #endif /* _UAPI_ASM_X86_KVMI_H */
diff --git a/arch/x86/kvm/kvmi_msg.c b/arch/x86/kvm/kvmi_msg.c
index 21624568e329..90e19244044a 100644
--- a/arch/x86/kvm/kvmi_msg.c
+++ b/arch/x86/kvm/kvmi_msg.c
@@ -194,12 +194,32 @@ static int handle_vcpu_get_xcr(const struct 
kvmi_vcpu_msg_job *job,
return kvmi_msg_vcpu_reply(job, msg, ec, , sizeof(rpl));
 }
 
+static int handle_vcpu_get_xsave(const struct kvmi_vcpu_msg_job *job,
+const struct kvmi_msg_hdr *msg,
+const void *req)
+{
+   struct kvmi_vcpu_get_xsave_reply *rpl;
+   int err, ec = 0;
+
+   rpl = kvmi_msg_alloc();
+   if (!rpl)
+   ec = -KVM_ENOMEM;
+   else
+   kvm_vcpu_ioctl_x86_get_xsave(job->vcpu, >xsave);
+
+   err = kvmi_msg_vcpu_reply(job, msg, ec, rpl, sizeof(*rpl));
+
+   kvmi_msg_free(rpl);
+   return err;
+}
+
 static const kvmi_vcpu_msg_job_fct msg_vcpu[] = {
[KVMI_VCPU_CONTROL_CR]   = handle_vcpu_control_cr,
[KVMI_VCPU_GET_CPUID]= handle_vcpu_get_cpuid,
[KVMI_VCPU_GET_INFO] = handle_vcpu_get_info,
[KVMI_VCPU_GET_REGISTERS]= handle_vcpu_get_registers,
[KVMI_VCPU_GET_XCR]  = handle_vcpu_get_xcr,
+   [KVMI_VCPU_GET_XSAVE]= handle_vcpu_get_xsave,
[KVMI_VCPU_INJECT_EXCEPTION] = handle_vcpu_inject_exception,
[KVMI_VCPU_SET_REGISTERS]= handle_vcpu_set_registers,
 };
diff --git a/include/uapi/linux/kvmi.h b/include/uapi/linux/kvmi.h
index ac23754627ff..b99763e580e4 100644
--- a/include/uapi/linux/kvmi.h
+++ b/include/uapi/linux/kvmi.h
@@ -44,6 +44,7 @@ enum {
KVMI_VCPU_CONTROL_CR   = KVMI_VCPU_MESSAGE_ID(6),
KVMI_VCPU_INJECT_EXCEPTION = KVMI_VCPU_MESSAGE_ID(7),
KVMI_VCPU_GET_XCR  = KVMI_VCPU_MESSAGE_ID(8),
+   KVMI_VCPU_GET_XSAVE= KVMI_VCPU_MESSAGE_ID(9),
 
KVMI_NEXT_VCPU_MESSAGE
 };
diff --git a/tools/testing/selftests/kvm/x86_64/kvmi_test.c 
b/tools/testing/selftests/kvm/x86_64/kvmi_test.c
index d9497727e859..a9ab0e973340 100644
--- a/tools/testing/selftests/kvm/x86_64/kvmi_test.c
+++ b/tools/testing/selftests/kvm/x86_64/kvmi_test.c
@@ -1437,6 +1437,31 @@ static void test_cmd_vcpu_get_xcr(struct kvm_vm *vm)
cmd_vcpu_get_xcr(vm, xcr1, , -KVM_EINVAL);
 }
 
+static void cmd_vcpu_get_xsave(struct kvm_vm *vm)
+{
+   struct {
+   struct kvmi_msg_hdr hdr;
+   struct kvmi_vcpu_hdr vcpu_hdr;
+   } req = {};
+   struct kvm_xsave rpl;
+
+   test_vcpu0_command(vm, KVMI_VCPU_GET_XSAVE, , sizeof(req),
+  , sizeof(rpl), 0);
+}
+
+static void test_cmd_vcpu_get_xsave(struct kvm_vm *vm)
+{
+   struct kvm_cpuid_entry2 *entry;
+
+   entry = kvm_get_supported_cpuid_entry(1);
+   if (!(entry->ecx & X86_FEATURE_XSAVE)) {
+   print_skip("XSAVE not supported, ecx 0x%x", entry->ecx);
+   return;
+   }
+
+   cmd_vcpu_get_xsave(vm);
+}
+
 static void test_introspection(struct kvm_vm *vm)
 {
srandom(time(0));
@@ -1464,6 +1489,7 @@ static void tes

[PATCH v12 07/77] KVM: x86: add kvm_x86_ops.control_cr3_intercept()

2021-10-06 Thread Adalbert Lazăr
This function is needed for the KVMI_VCPU_CONTROL_CR command, when the
introspection tool has to intercept the read/write access to CR3.

Co-developed-by: Nicușor Cîțu 
Signed-off-by: Nicușor Cîțu 
Signed-off-by: Adalbert Lazăr 
---
 arch/x86/include/asm/kvm-x86-ops.h |  1 +
 arch/x86/include/asm/kvm_host.h|  6 ++
 arch/x86/kvm/svm/svm.c | 14 ++
 arch/x86/kvm/vmx/vmx.c | 18 ++
 4 files changed, 39 insertions(+)

diff --git a/arch/x86/include/asm/kvm-x86-ops.h 
b/arch/x86/include/asm/kvm-x86-ops.h
index 31af251c5622..e1f63d36efb7 100644
--- a/arch/x86/include/asm/kvm-x86-ops.h
+++ b/arch/x86/include/asm/kvm-x86-ops.h
@@ -122,6 +122,7 @@ KVM_X86_OP_NULL(migrate_timers)
 KVM_X86_OP(msr_filter_changed)
 KVM_X86_OP_NULL(complete_emulated_msr)
 KVM_X86_OP(bp_intercepted)
+KVM_X86_OP(control_cr3_intercept)
 
 #undef KVM_X86_OP
 #undef KVM_X86_OP_NULL
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 26a52520b8bd..89d53e55e1f9 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -146,6 +146,10 @@
 #define KVM_NR_FIXED_MTRR_REGION 88
 #define KVM_NR_VAR_MTRR 8
 
+#define CR_TYPE_R  1
+#define CR_TYPE_W  2
+#define CR_TYPE_RW 3
+
 #define ASYNC_PF_PER_VCPU 64
 
 enum kvm_reg {
@@ -1337,6 +1341,8 @@ struct kvm_x86_ops {
void (*set_cr0)(struct kvm_vcpu *vcpu, unsigned long cr0);
bool (*is_valid_cr4)(struct kvm_vcpu *vcpu, unsigned long cr0);
void (*set_cr4)(struct kvm_vcpu *vcpu, unsigned long cr4);
+   void (*control_cr3_intercept)(struct kvm_vcpu *vcpu, int type,
+ bool enable);
int (*set_efer)(struct kvm_vcpu *vcpu, u64 efer);
void (*get_idt)(struct kvm_vcpu *vcpu, struct desc_ptr *dt);
void (*set_idt)(struct kvm_vcpu *vcpu, struct desc_ptr *dt);
diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c
index abecc1234161..5a051fa19c7e 100644
--- a/arch/x86/kvm/svm/svm.c
+++ b/arch/x86/kvm/svm/svm.c
@@ -1850,6 +1850,19 @@ void svm_set_cr4(struct kvm_vcpu *vcpu, unsigned long 
cr4)
kvm_update_cpuid_runtime(vcpu);
 }
 
+static void svm_control_cr3_intercept(struct kvm_vcpu *vcpu, int type,
+ bool enable)
+{
+   struct vcpu_svm *svm = to_svm(vcpu);
+
+   if (type & CR_TYPE_R)
+   enable ? svm_set_intercept(svm, INTERCEPT_CR3_READ) :
+svm_clr_intercept(svm, INTERCEPT_CR3_READ);
+   if (type & CR_TYPE_W)
+   enable ? svm_set_intercept(svm, INTERCEPT_CR3_WRITE) :
+svm_clr_intercept(svm, INTERCEPT_CR3_WRITE);
+}
+
 static void svm_set_segment(struct kvm_vcpu *vcpu,
struct kvm_segment *var, int seg)
 {
@@ -4620,6 +4633,7 @@ static struct kvm_x86_ops svm_x86_ops __initdata = {
.set_cr0 = svm_set_cr0,
.is_valid_cr4 = svm_is_valid_cr4,
.set_cr4 = svm_set_cr4,
+   .control_cr3_intercept = svm_control_cr3_intercept,
.set_efer = svm_set_efer,
.get_idt = svm_get_idt,
.set_idt = svm_set_idt,
diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
index 6fdc3d10b2b4..c8f5bc371f38 100644
--- a/arch/x86/kvm/vmx/vmx.c
+++ b/arch/x86/kvm/vmx/vmx.c
@@ -3004,6 +3004,23 @@ void ept_save_pdptrs(struct kvm_vcpu *vcpu)
 #define CR3_EXITING_BITS (CPU_BASED_CR3_LOAD_EXITING | \
  CPU_BASED_CR3_STORE_EXITING)
 
+static void vmx_control_cr3_intercept(struct kvm_vcpu *vcpu, int type,
+ bool enable)
+{
+   struct vcpu_vmx *vmx = to_vmx(vcpu);
+   u32 cr3_exec_control = 0;
+
+   if (type & CR_TYPE_R)
+   cr3_exec_control |= CPU_BASED_CR3_STORE_EXITING;
+   if (type & CR_TYPE_W)
+   cr3_exec_control |= CPU_BASED_CR3_LOAD_EXITING;
+
+   if (enable)
+   exec_controls_setbit(vmx, cr3_exec_control);
+   else
+   exec_controls_clearbit(vmx, cr3_exec_control);
+}
+
 void vmx_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0)
 {
struct vcpu_vmx *vmx = to_vmx(vcpu);
@@ -7604,6 +7621,7 @@ static struct kvm_x86_ops vmx_x86_ops __initdata = {
.set_cr0 = vmx_set_cr0,
.is_valid_cr4 = vmx_is_valid_cr4,
.set_cr4 = vmx_set_cr4,
+   .control_cr3_intercept = vmx_control_cr3_intercept,
.set_efer = vmx_set_efer,
.get_idt = vmx_get_idt,
.set_idt = vmx_set_idt,
___
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization

[PATCH v12 33/77] KVM: introspection: add KVMI_GET_VERSION

2021-10-06 Thread Adalbert Lazăr
When handling introspection commands from tools built with older or
newer versions of the introspection API, the receiving thread silently
accepts smaller/larger messages, but it replies with messages related to
current/kernel version. Smaller introspection event replies are accepted
too. However, larger messages for event replies are not allowed.

Even if an introspection tool can use the API version returned by the
KVMI_GET_VERSION command to check the supported features, the most
important usage of this command is to avoid sending newer versions of
event replies that the kernel side doesn't know. On larger messages,
the introspection socket will be closed.

Any attempt from the device manager to explicitly disallow this command
through the KVM_INTROSPECTION_COMMAND ioctl will get -EPERM, unless all
commands are disallowed (using id=-1), in which case KVMI_GET_VERSION
is silently allowed, without error.

Signed-off-by: Adalbert Lazăr 
---
 Documentation/virt/kvm/kvmi.rst   | 38 +++
 include/uapi/linux/kvmi.h | 10 +
 .../testing/selftests/kvm/x86_64/kvmi_test.c  | 35 +
 virt/kvm/introspection/kvmi.c | 27 +++--
 virt/kvm/introspection/kvmi_msg.c | 13 +++
 5 files changed, 119 insertions(+), 4 deletions(-)

diff --git a/Documentation/virt/kvm/kvmi.rst b/Documentation/virt/kvm/kvmi.rst
index ae6bbf37aef3..d3d672a07872 100644
--- a/Documentation/virt/kvm/kvmi.rst
+++ b/Documentation/virt/kvm/kvmi.rst
@@ -212,3 +212,41 @@ device-specific memory (DMA, emulated MMIO, reserved by a 
passthrough
 device etc.). It is up to the user to determine, using the guest operating
 system data structures, the areas that are safe to access (code, stack, heap
 etc.).
+
+Commands
+
+
+The following C structures are meant to be used directly when communicating
+over the wire. The peer that detects any size mismatch should simply close
+the connection and report the error.
+
+1. KVMI_GET_VERSION
+---
+
+:Architectures: all
+:Versions: >= 1
+:Parameters: none
+:Returns:
+
+::
+
+   struct kvmi_error_code;
+   struct kvmi_get_version_reply {
+   __u32 version;
+   __u32 max_msg_size;
+   };
+
+Returns the introspection API version and the largest accepted message
+size (useful for variable length messages).
+
+This command is always allowed and successful.
+
+The messages used for introspection commands/events might be extended
+in future versions and while the kernel will accept commands with
+shorter messages (older versions) or larger messages (newer versions,
+ignoring the extra information), it will not accept event replies with
+larger messages.
+
+The introspection tool should use this command to identify the features
+supported by the kernel side and what messages must be used for event
+replies.
diff --git a/include/uapi/linux/kvmi.h b/include/uapi/linux/kvmi.h
index 2b37eee82c52..77dd727dfe18 100644
--- a/include/uapi/linux/kvmi.h
+++ b/include/uapi/linux/kvmi.h
@@ -6,6 +6,9 @@
  * KVMI structures and definitions
  */
 
+#include 
+#include 
+
 enum {
KVMI_VERSION = 0x0001
 };
@@ -14,6 +17,8 @@ enum {
 #define KVMI_VCPU_MESSAGE_ID(id) (((id) << 1) | 1)
 
 enum {
+   KVMI_GET_VERSION = KVMI_VM_MESSAGE_ID(1),
+
KVMI_NEXT_VM_MESSAGE
 };
 
@@ -43,4 +48,9 @@ struct kvmi_error_code {
__u32 padding;
 };
 
+struct kvmi_get_version_reply {
+   __u32 version;
+   __u32 max_msg_size;
+};
+
 #endif /* _UAPI__LINUX_KVMI_H */
diff --git a/tools/testing/selftests/kvm/x86_64/kvmi_test.c 
b/tools/testing/selftests/kvm/x86_64/kvmi_test.c
index 6d7802403f16..69993e54334a 100644
--- a/tools/testing/selftests/kvm/x86_64/kvmi_test.c
+++ b/tools/testing/selftests/kvm/x86_64/kvmi_test.c
@@ -99,6 +99,7 @@ static void hook_introspection(struct kvm_vm *vm)
do_hook_ioctl(vm, Kvm_socket, 0);
do_hook_ioctl(vm, Kvm_socket, EEXIST);
 
+   set_command_perm(vm, KVMI_GET_VERSION, disallow, EPERM);
set_command_perm(vm, all_IDs, allow_inval, EINVAL);
set_command_perm(vm, all_IDs, disallow, 0);
set_command_perm(vm, all_IDs, allow, 0);
@@ -214,12 +215,46 @@ static void test_cmd_invalid(void)
-r, kvm_strerror(-r));
 }
 
+static void test_vm_command(int cmd_id, struct kvmi_msg_hdr *req,
+   size_t req_size, void *rpl, size_t rpl_size,
+   int expected_err)
+{
+   int r;
+
+   r = do_command(cmd_id, req, req_size, rpl, rpl_size);
+   TEST_ASSERT(r == expected_err,
+   "Command %d failed, error %d (%s) instead of %d (%s)\n",
+   cmd_id, -r, kvm_strerror(-r),
+   expected_err, kvm_strerror(expected_err));
+}
+
+static void cmd_vm_get_version(struct kvmi_get_version_reply *ver)
+{
+   struct kvmi_msg_hdr req;
+
+   test_vm_command(KVMI_GET_VERSION, , sizeof(req), ve

[PATCH v12 36/77] KVM: introspection: add KVM_INTROSPECTION_PREUNHOOK

2021-10-06 Thread Adalbert Lazăr
In certain situations (when the guest has to be paused, suspended,
migrated, etc.), the device manager will use this new ioctl in order to
trigger the KVMI_VM_EVENT_UNHOOK event. If the event is sent successfully
(the VM has an active introspection channel), the device manager should
delay the action (pause/suspend/...) to give the introspection tool the
chance to remove its hooks (eg. breakpoints) while the guest is still
running. Once a timeout is reached or the introspection tool has closed
the socket, the device manager should resume the action.

Signed-off-by: Adalbert Lazăr 
---
 Documentation/virt/kvm/api.rst| 28 
 Documentation/virt/kvm/kvmi.rst   |  7 ---
 include/linux/kvmi_host.h |  1 +
 include/uapi/linux/kvm.h  |  2 ++
 virt/kvm/introspection/kvmi.c | 30 ++
 virt/kvm/introspection/kvmi_int.h |  1 +
 virt/kvm/introspection/kvmi_msg.c |  5 +
 virt/kvm/kvm_main.c   |  5 +
 8 files changed, 76 insertions(+), 3 deletions(-)

diff --git a/Documentation/virt/kvm/api.rst b/Documentation/virt/kvm/api.rst
index 85f02eda4895..abb289883542 100644
--- a/Documentation/virt/kvm/api.rst
+++ b/Documentation/virt/kvm/api.rst
@@ -5594,6 +5594,34 @@ the event is disallowed.
 Unless set to -1 (meaning all events), id must be a event ID
 (e.g. KVMI_VM_EVENT_UNHOOK, KVMI_VCPU_EVENT_CR, etc.)
 
+4.131 KVM_INTROSPECTION_PREUNHOOK
+-
+
+:Capability: KVM_CAP_INTROSPECTION
+:Architectures: x86
+:Type: vm ioctl
+:Parameters: none
+:Returns: 0 on success, a negative value on error
+
+Errors:
+
+  == 
+  EFAULT the VM is not introspected yet (use KVM_INTROSPECTION_HOOK)
+  ENOENT the socket (passed with KVM_INTROSPECTION_HOOK) had an error
+  ENOENT the introspection tool didn't subscribed
+ to this type of introspection event (unhook)
+  == 
+
+This ioctl is used to inform that the current VM is
+paused/suspended/migrated/etc.
+
+KVM should send an 'unhook' introspection event to the introspection tool.
+
+If this ioctl is successful, the userspace should give the
+introspection tool a chance to unhook the VM and then it should use
+KVM_INTROSPECTION_UNHOOK to make sure all the introspection structures
+are freed.
+
 5. The kvm_run structure
 
 
diff --git a/Documentation/virt/kvm/kvmi.rst b/Documentation/virt/kvm/kvmi.rst
index 6f8583d4aeb2..33490bc9d1c1 100644
--- a/Documentation/virt/kvm/kvmi.rst
+++ b/Documentation/virt/kvm/kvmi.rst
@@ -183,9 +183,10 @@ becomes necessary to remove them before the guest is 
suspended, moved
 (migrated) or a snapshot with memory is created.
 
 The actions are normally performed by the device manager. In the case
-of QEMU, it will use another ioctl to notify the introspection tool and
-wait for a limited amount of time (a few seconds) for a confirmation that
-is OK to proceed (the introspection tool will close the connection).
+of QEMU, it will use the *KVM_INTROSPECTION_PREUNHOOK* ioctl to trigger
+the *KVMI_VM_EVENT_UNHOOK* event and wait for a limited amount of time (a
+few seconds) for a confirmation that is OK to proceed. The introspection
+tool will close the connection to signal this.
 
 Live migrations
 ---
diff --git a/include/linux/kvmi_host.h b/include/linux/kvmi_host.h
index a5ede07686b9..81eac9f53a3f 100644
--- a/include/linux/kvmi_host.h
+++ b/include/linux/kvmi_host.h
@@ -32,6 +32,7 @@ int kvmi_ioctl_command(struct kvm *kvm,
   const struct kvm_introspection_feature *feat);
 int kvmi_ioctl_event(struct kvm *kvm,
 const struct kvm_introspection_feature *feat);
+int kvmi_ioctl_preunhook(struct kvm *kvm);
 
 #else
 
diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h
index c56f40c47890..160c7ba1c666 100644
--- a/include/uapi/linux/kvm.h
+++ b/include/uapi/linux/kvm.h
@@ -1834,6 +1834,8 @@ struct kvm_introspection_feature {
 #define KVM_INTROSPECTION_COMMAND _IOW(KVMIO, 0xca, struct 
kvm_introspection_feature)
 #define KVM_INTROSPECTION_EVENT   _IOW(KVMIO, 0xcb, struct 
kvm_introspection_feature)
 
+#define KVM_INTROSPECTION_PREUNHOOK  _IO(KVMIO, 0xcc)
+
 #define KVM_DEV_ASSIGN_ENABLE_IOMMU(1 << 0)
 #define KVM_DEV_ASSIGN_PCI_2_3 (1 << 1)
 #define KVM_DEV_ASSIGN_MASK_INTX   (1 << 2)
diff --git a/virt/kvm/introspection/kvmi.c b/virt/kvm/introspection/kvmi.c
index e39fe4721243..a527795f01a4 100644
--- a/virt/kvm/introspection/kvmi.c
+++ b/virt/kvm/introspection/kvmi.c
@@ -384,3 +384,33 @@ int kvmi_ioctl_command(struct kvm *kvm,
mutex_unlock(>kvmi_lock);
return err;
 }
+
+static bool kvmi_unhook_event(struct kvm_introspection *kvmi)
+{
+   int err;
+
+   err = kvmi_msg_send_unhook(kvmi);
+
+   return !err;
+}
+
+int kvmi_ioctl_preu

[PATCH v12 38/77] KVM: introspection: add KVMI_VM_CONTROL_EVENTS

2021-10-06 Thread Adalbert Lazăr
By default, all introspection VM events are disabled. The introspection
tool must explicitly enable the VM events it wants to receive. With this
command it can enable/disable any VM event (e.g. KVMI_VM_EVENT_UNHOOK)
if allowed by the device manager.

Signed-off-by: Adalbert Lazăr 
---
 Documentation/virt/kvm/kvmi.rst   | 42 ++--
 include/linux/kvmi_host.h |  2 +
 include/uapi/linux/kvmi.h | 16 +--
 .../testing/selftests/kvm/x86_64/kvmi_test.c  | 48 +++
 virt/kvm/introspection/kvmi.c | 30 +++-
 virt/kvm/introspection/kvmi_int.h |  3 ++
 virt/kvm/introspection/kvmi_msg.c | 29 +--
 7 files changed, 158 insertions(+), 12 deletions(-)

diff --git a/Documentation/virt/kvm/kvmi.rst b/Documentation/virt/kvm/kvmi.rst
index 2eecd7e8bfd0..ecf809f40771 100644
--- a/Documentation/virt/kvm/kvmi.rst
+++ b/Documentation/virt/kvm/kvmi.rst
@@ -332,10 +332,44 @@ This command is always allowed.
 
 Returns the number of online vCPUs.
 
+5. KVMI_VM_CONTROL_EVENTS
+-
+
+:Architectures: all
+:Versions: >= 1
+:Parameters:
+
+::
+
+   struct kvmi_vm_control_events {
+   __u16 event_id;
+   __u8 enable;
+   __u8 padding1;
+   __u32 padding2;
+   };
+
+:Returns:
+
+::
+
+   struct kvmi_error_code
+
+Enables/disables VM introspection events. This command can be used with
+the following events::
+
+   KVMI_VM_EVENT_UNHOOK
+
+:Errors:
+
+* -KVM_EINVAL - the padding is not zero
+* -KVM_EINVAL - the event ID is unknown (use *KVMI_VM_CHECK_EVENT* first)
+* -KVM_EPERM - the access is disallowed (use *KVMI_VM_CHECK_EVENT* first)
+
 Events
 ==
 
 The VM introspection events are sent with the KVMI_VM_EVENT message id.
+No event is sent unless it is explicitly enabled.
 The message data begins with a common structure having the event id::
 
struct kvmi_event_hdr {
@@ -359,6 +393,8 @@ Specific event data can follow this common structure.
 
 :Returns: none
 
-This event is sent when the device manager has to pause/stop/migrate the
-guest (see **Unhooking**). The introspection tool has a chance to unhook
-and close the KVMI channel (signaling that the operation can proceed).
+This event is sent when the device manager has to pause/stop/migrate
+the guest (see **Unhooking**) and the introspection has been enabled for
+this event (see **KVMI_VM_CONTROL_EVENTS**). The introspection tool has
+a chance to unhook and close the introspection socket (signaling that
+the operation can proceed).
diff --git a/include/linux/kvmi_host.h b/include/linux/kvmi_host.h
index 6476c7d6a4d3..a59307dac6bf 100644
--- a/include/linux/kvmi_host.h
+++ b/include/linux/kvmi_host.h
@@ -18,6 +18,8 @@ struct kvm_introspection {
unsigned long *cmd_allow_mask;
unsigned long *event_allow_mask;
 
+   unsigned long *vm_event_enable_mask;
+
atomic_t ev_seq;
 };
 
diff --git a/include/uapi/linux/kvmi.h b/include/uapi/linux/kvmi.h
index 18fb51078d48..9a10ef2cd890 100644
--- a/include/uapi/linux/kvmi.h
+++ b/include/uapi/linux/kvmi.h
@@ -19,10 +19,11 @@ enum {
 enum {
KVMI_VM_EVENT = KVMI_VM_MESSAGE_ID(0),
 
-   KVMI_GET_VERSION  = KVMI_VM_MESSAGE_ID(1),
-   KVMI_VM_CHECK_COMMAND = KVMI_VM_MESSAGE_ID(2),
-   KVMI_VM_CHECK_EVENT   = KVMI_VM_MESSAGE_ID(3),
-   KVMI_VM_GET_INFO  = KVMI_VM_MESSAGE_ID(4),
+   KVMI_GET_VERSION   = KVMI_VM_MESSAGE_ID(1),
+   KVMI_VM_CHECK_COMMAND  = KVMI_VM_MESSAGE_ID(2),
+   KVMI_VM_CHECK_EVENT= KVMI_VM_MESSAGE_ID(3),
+   KVMI_VM_GET_INFO   = KVMI_VM_MESSAGE_ID(4),
+   KVMI_VM_CONTROL_EVENTS = KVMI_VM_MESSAGE_ID(5),
 
KVMI_NEXT_VM_MESSAGE
 };
@@ -82,4 +83,11 @@ struct kvmi_event_hdr {
__u16 padding[3];
 };
 
+struct kvmi_vm_control_events {
+   __u16 event_id;
+   __u8 enable;
+   __u8 padding1;
+   __u32 padding2;
+};
+
 #endif /* _UAPI__LINUX_KVMI_H */
diff --git a/tools/testing/selftests/kvm/x86_64/kvmi_test.c 
b/tools/testing/selftests/kvm/x86_64/kvmi_test.c
index 722737e01d9b..f5fdb09b9be6 100644
--- a/tools/testing/selftests/kvm/x86_64/kvmi_test.c
+++ b/tools/testing/selftests/kvm/x86_64/kvmi_test.c
@@ -339,6 +339,31 @@ static void trigger_event_unhook_notification(struct 
kvm_vm *vm)
errno, strerror(errno));
 }
 
+static void cmd_vm_control_events(__u16 event_id, __u8 enable,
+ int expected_err)
+{
+   struct {
+   struct kvmi_msg_hdr hdr;
+   struct kvmi_vm_control_events cmd;
+   } req = {};
+
+   req.cmd.event_id = event_id;
+   req.cmd.enable = enable;
+
+   test_vm_command(KVMI_VM_CONTROL_EVENTS, , sizeof(req),
+   NULL, 0, expected_err);
+}
+
+static void enable_vm_event(__u16 event_id)
+{
+   cmd_vm_control_events(event_id, 1, 0);
+}
+
+static void disable_vm_event(__

[PATCH v12 20/77] KVM: x86: extend kvm_mmu_gva_to_gpa_system() with the 'access' parameter

2021-10-06 Thread Adalbert Lazăr
From: Mihai Donțu 

This is needed for kvmi_update_ad_flags() to emulate a guest page
table walk on SPT violations due to A/D bit updates.

Signed-off-by: Mihai Donțu 
Signed-off-by: Adalbert Lazăr 
---
 arch/x86/include/asm/kvm_host.h | 2 +-
 arch/x86/kvm/x86.c  | 6 +++---
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index dfc52e451f9b..49734fea7c4f 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -1788,7 +1788,7 @@ gpa_t kvm_mmu_gva_to_gpa_fetch(struct kvm_vcpu *vcpu, 
gva_t gva,
 gpa_t kvm_mmu_gva_to_gpa_write(struct kvm_vcpu *vcpu, gva_t gva,
   struct x86_exception *exception);
 gpa_t kvm_mmu_gva_to_gpa_system(struct kvm_vcpu *vcpu, gva_t gva,
-   struct x86_exception *exception);
+   u32 access, struct x86_exception *exception);
 
 bool kvm_apicv_activated(struct kvm *kvm);
 void kvm_vcpu_update_apicv(struct kvm_vcpu *vcpu);
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 9fe3b53fd1e3..de0fc15ab7cb 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -6367,9 +6367,9 @@ EXPORT_SYMBOL_GPL(kvm_mmu_gva_to_gpa_write);
 
 /* uses this to access any guest's mapped memory without checking CPL */
 gpa_t kvm_mmu_gva_to_gpa_system(struct kvm_vcpu *vcpu, gva_t gva,
-   struct x86_exception *exception)
+   u32 access, struct x86_exception *exception)
 {
-   return vcpu->arch.walk_mmu->gva_to_gpa(vcpu, gva, 0, exception);
+   return vcpu->arch.walk_mmu->gva_to_gpa(vcpu, gva, access, exception);
 }
 
 static int kvm_read_guest_virt_helper(gva_t addr, void *val, unsigned int 
bytes,
@@ -10544,7 +10544,7 @@ int kvm_arch_vcpu_ioctl_translate(struct kvm_vcpu *vcpu,
vcpu_load(vcpu);
 
idx = srcu_read_lock(>kvm->srcu);
-   gpa = kvm_mmu_gva_to_gpa_system(vcpu, vaddr, NULL);
+   gpa = kvm_mmu_gva_to_gpa_system(vcpu, vaddr, 0, NULL);
srcu_read_unlock(>kvm->srcu, idx);
tr->physical_address = gpa;
tr->valid = gpa != UNMAPPED_GVA;
___
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization

[PATCH v12 73/77] KVM: introspection: add KVMI_VCPU_CONTROL_SINGLESTEP

2021-10-06 Thread Adalbert Lazăr
From: Nicușor Cîțu 

The next commit that adds the KVMI_VCPU_EVENT_SINGLESTEP event will make
this command more useful.

Signed-off-by: Nicușor Cîțu 
Co-developed-by: Adalbert Lazăr 
Signed-off-by: Adalbert Lazăr 
---
 Documentation/virt/kvm/kvmi.rst   | 33 +++
 arch/x86/kvm/kvmi.c   | 14 -
 arch/x86/kvm/kvmi_msg.c   | 56 +++
 arch/x86/kvm/x86.c| 18 --
 include/linux/kvmi_host.h |  7 +++
 include/uapi/linux/kvmi.h | 30 ++
 .../testing/selftests/kvm/x86_64/kvmi_test.c  | 39 +
 virt/kvm/introspection/kvmi.c | 22 
 virt/kvm/introspection/kvmi_int.h |  2 +
 9 files changed, 190 insertions(+), 31 deletions(-)

diff --git a/Documentation/virt/kvm/kvmi.rst b/Documentation/virt/kvm/kvmi.rst
index 54cb3fbe184e..7f70345ebaac 100644
--- a/Documentation/virt/kvm/kvmi.rst
+++ b/Documentation/virt/kvm/kvmi.rst
@@ -791,6 +791,7 @@ exception.
 * -KVM_EINVAL - the selected vCPU is invalid
 * -KVM_EINVAL - the padding is not zero
 * -KVM_EAGAIN - the selected vCPU can't be introspected yet
+* -KVM_EBUSY - the vCPU is switched in singlestep mode 
(*KVMI_VCPU_CONTROL_SINGLESTEP*)
 * -KVM_EBUSY - another *KVMI_VCPU_INJECT_EXCEPTION*-*KVMI_VCPU_EVENT_TRAP*
pair is in progress
 
@@ -1017,6 +1018,38 @@ In order to 'forget' an address, all three bits ('rwx') 
must be set.
 * -KVM_EAGAIN - the selected vCPU can't be introspected yet
 * -KVM_ENOMEM - there is not enough memory to add the page tracking structures
 
+23. KVMI_VCPU_CONTROL_SINGLESTEP
+
+
+:Architectures: x86 (vmx)
+:Versions: >= 1
+:Parameters:
+
+::
+
+   struct kvmi_vcpu_hdr;
+   struct kvmi_vcpu_control_singlestep {
+   __u8 enable;
+   __u8 padding[7];
+   };
+
+:Returns:
+
+::
+
+   struct kvmi_error_code;
+
+Enables/disables singlestep for the selected vCPU.
+
+The introspection tool should use *KVMI_GET_VERSION*, to check
+if the hardware supports singlestep (see **KVMI_GET_VERSION**).
+
+:Errors:
+
+* -KVM_EOPNOTSUPP - the hardware doesn't support singlestep
+* -KVM_EINVAL - the padding is not zero
+* -KVM_EAGAIN - the selected vCPU can't be introspected yet
+
 Events
 ==
 
diff --git a/arch/x86/kvm/kvmi.c b/arch/x86/kvm/kvmi.c
index eee874890e29..e26a0eee1592 100644
--- a/arch/x86/kvm/kvmi.c
+++ b/arch/x86/kvm/kvmi.c
@@ -779,7 +779,9 @@ void kvmi_enter_guest(struct kvm_vcpu *vcpu)
if (kvmi) {
vcpui = VCPUI(vcpu);
 
-   if (vcpui->arch.exception.pending)
+   if (vcpui->singlestep.loop)
+   kvmi_arch_start_singlestep(vcpu);
+   else if (vcpui->arch.exception.pending)
kvmi_inject_pending_exception(vcpu);
 
kvmi_put(vcpu->kvm);
@@ -1089,3 +1091,13 @@ void kvmi_arch_features(struct kvmi_features *feat)
 {
feat->singlestep = !!kvm_x86_ops.control_singlestep;
 }
+
+void kvmi_arch_start_singlestep(struct kvm_vcpu *vcpu)
+{
+   static_call(kvm_x86_control_singlestep)(vcpu, true);
+}
+
+void kvmi_arch_stop_singlestep(struct kvm_vcpu *vcpu)
+{
+   static_call(kvm_x86_control_singlestep)(vcpu, false);
+}
diff --git a/arch/x86/kvm/kvmi_msg.c b/arch/x86/kvm/kvmi_msg.c
index 98e29e1d3961..6d3980e18281 100644
--- a/arch/x86/kvm/kvmi_msg.c
+++ b/arch/x86/kvm/kvmi_msg.c
@@ -166,7 +166,8 @@ static int handle_vcpu_inject_exception(const struct 
kvmi_vcpu_msg_job *job,
else if (req->padding1 || req->padding2)
ec = -KVM_EINVAL;
else if (VCPUI(vcpu)->arch.exception.pending ||
-   VCPUI(vcpu)->arch.exception.send_event)
+   VCPUI(vcpu)->arch.exception.send_event ||
+   VCPUI(vcpu)->singlestep.loop)
ec = -KVM_EBUSY;
else
ec = kvmi_arch_cmd_vcpu_inject_exception(vcpu, req);
@@ -275,18 +276,49 @@ static int handle_vcpu_control_msr(const struct 
kvmi_vcpu_msg_job *job,
return kvmi_msg_vcpu_reply(job, msg, ec, NULL, 0);
 }
 
+static int handle_vcpu_control_singlestep(const struct kvmi_vcpu_msg_job *job,
+ const struct kvmi_msg_hdr *msg,
+ const void *_req)
+{
+   const struct kvmi_vcpu_control_singlestep *req = _req;
+   struct kvm_vcpu *vcpu = job->vcpu;
+   int ec = 0;
+
+   if (non_zero_padding(req->padding, ARRAY_SIZE(req->padding)) ||
+   req->enable > 1) {
+   ec = -KVM_EINVAL;
+   goto reply;
+   }
+
+   if (!kvm_x86_ops.control_singlestep) {
+   ec = -KVM_EOPNOTSUPP;
+   goto reply;
+   }
+
+   if (req->enable)
+   kvmi_arch_start_singlestep(vcpu);
+   else
+  

[PATCH v12 27/77] KVM: x86: wire in the preread/prewrite/preexec page trackers

2021-10-06 Thread Adalbert Lazăr
From: Mihai Donțu 

These are needed in order to notify the introspection tool when
read/write/execute access happens on one of the tracked memory pages.

Also, this patch adds the case when the introspection tool requests
that the vCPU re-enter in guest (and abort the emulation of the current
instruction).

Signed-off-by: Mihai Donțu 
Co-developed-by: Marian Rotariu 
Signed-off-by: Marian Rotariu 
Co-developed-by: Ștefan Șicleru 
Signed-off-by: Ștefan Șicleru 
Signed-off-by: Adalbert Lazăr 
---
 arch/x86/kvm/emulate.c |  4 
 arch/x86/kvm/kvm_emulate.h |  1 +
 arch/x86/kvm/mmu/mmu.c | 42 +--
 arch/x86/kvm/mmu/spte.c| 23 +++
 arch/x86/kvm/x86.c | 45 ++
 5 files changed, 94 insertions(+), 21 deletions(-)

diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c
index c589ac832265..5e3e8cb0375e 100644
--- a/arch/x86/kvm/emulate.c
+++ b/arch/x86/kvm/emulate.c
@@ -5307,6 +5307,8 @@ int x86_decode_insn(struct x86_emulate_ctxt *ctxt, void 
*insn, int insn_len, int
ctxt->memopp->addr.mem.ea + ctxt->_eip);
 
 done:
+   if (rc == X86EMUL_RETRY_INSTR)
+   return EMULATION_RETRY_INSTR;
if (rc == X86EMUL_PROPAGATE_FAULT)
ctxt->have_exception = true;
return (rc != X86EMUL_CONTINUE) ? EMULATION_FAILED : EMULATION_OK;
@@ -5678,6 +5680,8 @@ int x86_emulate_insn(struct x86_emulate_ctxt *ctxt)
if (rc == X86EMUL_INTERCEPTED)
return EMULATION_INTERCEPTED;
 
+   if (rc == X86EMUL_RETRY_INSTR)
+   return EMULATION_RETRY_INSTR;
if (rc == X86EMUL_CONTINUE)
writeback_registers(ctxt);
 
diff --git a/arch/x86/kvm/kvm_emulate.h b/arch/x86/kvm/kvm_emulate.h
index 68b420289d7e..1752679f8cd3 100644
--- a/arch/x86/kvm/kvm_emulate.h
+++ b/arch/x86/kvm/kvm_emulate.h
@@ -494,6 +494,7 @@ bool x86_page_table_writing_insn(struct x86_emulate_ctxt 
*ctxt);
 #define EMULATION_OK 0
 #define EMULATION_RESTART 1
 #define EMULATION_INTERCEPTED 2
+#define EMULATION_RETRY_INSTR 3
 void init_decode_cache(struct x86_emulate_ctxt *ctxt);
 int x86_emulate_insn(struct x86_emulate_ctxt *ctxt);
 int emulator_task_switch(struct x86_emulate_ctxt *ctxt,
diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c
index 8124fdd78aad..b5685e342945 100644
--- a/arch/x86/kvm/mmu/mmu.c
+++ b/arch/x86/kvm/mmu/mmu.c
@@ -837,9 +837,13 @@ static void account_shadowed(struct kvm *kvm, struct 
kvm_mmu_page *sp)
slot = __gfn_to_memslot(slots, gfn);
 
/* the non-leaf shadow pages are keeping readonly. */
-   if (sp->role.level > PG_LEVEL_4K)
-   return kvm_slot_page_track_add_page(kvm, slot, gfn,
-   KVM_PAGE_TRACK_WRITE);
+   if (sp->role.level > PG_LEVEL_4K) {
+   kvm_slot_page_track_add_page(kvm, slot, gfn,
+KVM_PAGE_TRACK_PREWRITE);
+   kvm_slot_page_track_add_page(kvm, slot, gfn,
+KVM_PAGE_TRACK_WRITE);
+   return;
+   }
 
kvm_mmu_gfn_disallow_lpage(slot, gfn);
 }
@@ -865,9 +869,13 @@ static void unaccount_shadowed(struct kvm *kvm, struct 
kvm_mmu_page *sp)
gfn = sp->gfn;
slots = kvm_memslots_for_spte_role(kvm, sp->role);
slot = __gfn_to_memslot(slots, gfn);
-   if (sp->role.level > PG_LEVEL_4K)
-   return kvm_slot_page_track_remove_page(kvm, slot, gfn,
-  KVM_PAGE_TRACK_WRITE);
+   if (sp->role.level > PG_LEVEL_4K) {
+   kvm_slot_page_track_remove_page(kvm, slot, gfn,
+   KVM_PAGE_TRACK_PREWRITE);
+   kvm_slot_page_track_remove_page(kvm, slot, gfn,
+   KVM_PAGE_TRACK_WRITE);
+   return;
+   }
 
kvm_mmu_gfn_allow_lpage(slot, gfn);
 }
@@ -2678,7 +2686,10 @@ int mmu_try_to_unsync_pages(struct kvm_vcpu *vcpu, 
struct kvm_memory_slot *slot,
 * track machinery is used to write-protect upper-level shadow pages,
 * i.e. this guards the role.level == 4K assertion below!
 */
-   if (kvm_slot_page_track_is_active(vcpu, slot, gfn, 
KVM_PAGE_TRACK_WRITE))
+   if (kvm_slot_page_track_is_active(vcpu, slot, gfn,
+   KVM_PAGE_TRACK_WRITE) ||
+   kvm_slot_page_track_is_active(vcpu, slot, gfn,
+   KVM_PAGE_TRACK_PREWRITE))
return -EPERM;
 
/*
@@ -3882,14 +3893,21 @@ static bool page_fault_handle_page_track(struct 
kvm_vcpu *vcpu,
if (unlikely(fault->rsvd))
return false;
 
-   if (!fault->present || !fault->write)
-   return false;
-
/*

[PATCH v12 16/77] KVM: x86: save the error code during EPT/NPF exits handling

2021-10-06 Thread Adalbert Lazăr
From: Mihai Donțu 

This is needed for kvm_page_track_emulation_failure().

When the introspection tool {read,write,exec}-protect a guest memory
page, it is notified from the read/write/fetch callbacks used by
the KVM emulator. If the emulation fails it is possible that the
read/write callbacks were not used. In such cases, the emulator will
call kvm_page_track_emulation_failure() to ensure that the introspection
tool is notified of the read/write #PF (based on this saved error code),
which in turn can emulate the instruction or unprotect the memory page
(and let the guest execute the instruction).

Signed-off-by: Mihai Donțu 
Signed-off-by: Adalbert Lazăr 
---
 arch/x86/include/asm/kvm_host.h | 3 +++
 arch/x86/kvm/svm/svm.c  | 2 ++
 arch/x86/kvm/vmx/vmx.c  | 1 +
 3 files changed, 6 insertions(+)

diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 29f4e8b619e1..db88d38e485d 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -916,6 +916,9 @@ struct kvm_vcpu_arch {
 #if IS_ENABLED(CONFIG_HYPERV)
hpa_t hv_root_tdp;
 #endif
+
+   /* #PF translated error code from EPT/NPT exit reason */
+   u64 error_code;
 };
 
 struct kvm_lpage_info {
diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c
index b7ef0671863e..de6cb59a332d 100644
--- a/arch/x86/kvm/svm/svm.c
+++ b/arch/x86/kvm/svm/svm.c
@@ -2077,6 +2077,8 @@ static int npf_interception(struct kvm_vcpu *vcpu)
u64 fault_address = svm->vmcb->control.exit_info_2;
u64 error_code = svm->vmcb->control.exit_info_1;
 
+   svm->vcpu.arch.error_code = error_code;
+
trace_kvm_page_fault(fault_address, error_code);
return kvm_mmu_page_fault(vcpu, fault_address, error_code,
static_cpu_has(X86_FEATURE_DECODEASSISTS) ?
diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
index a140d69b1bd3..ceba2e112e26 100644
--- a/arch/x86/kvm/vmx/vmx.c
+++ b/arch/x86/kvm/vmx/vmx.c
@@ -5389,6 +5389,7 @@ static int handle_ept_violation(struct kvm_vcpu *vcpu)
   PFERR_GUEST_FINAL_MASK : PFERR_GUEST_PAGE_MASK;
 
vcpu->arch.exit_qualification = exit_qualification;
+   vcpu->arch.error_code = error_code;
 
/*
 * Check that the GPA doesn't exceed physical memory limits, as that is
___
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization

[PATCH v12 02/77] KVM: add kvm_vcpu_kick_and_wait()

2021-10-06 Thread Adalbert Lazăr
This function is needed for the KVMI_VM_PAUSE_VCPU command, which sets
the introspection request flag, kicks the vCPU out of guest and returns
a success error code (0). The vCPU will send the KVMI_VCPU_EVENT_PAUSE
event as soon as possible. Once the introspection tool receives the event,
it knows that the vCPU doesn't run guest code and can handle introspection
commands (until the reply for the pause event is sent).

To implement the "pause VM" command, the introspection tool will send
a KVMI_VM_PAUSE_VCPU command for every vCPU. To know when the VM is
paused, userspace has to receive and "parse" all events. For example,
with a 4 vCPU VM, if "pause VM" was sent by userspace while handling
an event from vCPU0 and at the same time a new vCPU was hot-plugged
(which could send another event for vCPU4), the "pause VM" command has
to receive and check all events until it gets the pause events for vCPU1,
vCPU2 and vCPU3 before returning to the upper layer.

In order to make it easier for userspace to implement the "pause VM"
command, KVMI_VM_PAUSE_VCPU has an optional 'wait' parameter. If this is
set, kvm_vcpu_kick_and_wait() will be used instead of kvm_vcpu_kick().
Once a sequence of KVMI_VM_PAUSE_VCPU commands with the 'wait' flag set
is handled, the introspection tool can consider the VM paused, without
the need to wait and check events.

Signed-off-by: Adalbert Lazăr 
---
 include/linux/kvm_host.h |  1 +
 virt/kvm/kvm_main.c  | 10 ++
 2 files changed, 11 insertions(+)

diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index 60a35d9fe259..6795ea7e357d 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -970,6 +970,7 @@ void kvm_arch_vcpu_blocking(struct kvm_vcpu *vcpu);
 void kvm_arch_vcpu_unblocking(struct kvm_vcpu *vcpu);
 bool kvm_vcpu_wake_up(struct kvm_vcpu *vcpu);
 void kvm_vcpu_kick(struct kvm_vcpu *vcpu);
+void kvm_vcpu_kick_and_wait(struct kvm_vcpu *vcpu);
 int kvm_vcpu_yield_to(struct kvm_vcpu *target);
 void kvm_vcpu_on_spin(struct kvm_vcpu *vcpu, bool usermode_vcpu_not_eligible);
 
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index 3f6d450355f0..85f2dd8a79d1 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -3343,6 +3343,16 @@ void kvm_vcpu_kick(struct kvm_vcpu *vcpu)
 EXPORT_SYMBOL_GPL(kvm_vcpu_kick);
 #endif /* !CONFIG_S390 */
 
+void kvm_vcpu_kick_and_wait(struct kvm_vcpu *vcpu)
+{
+   if (kvm_vcpu_wake_up(vcpu))
+   return;
+
+   if (kvm_request_needs_ipi(vcpu, KVM_REQUEST_WAIT))
+   smp_call_function_single(vcpu->cpu, ack_flush, NULL, 1);
+}
+EXPORT_SYMBOL_GPL(kvm_vcpu_kick_and_wait);
+
 int kvm_vcpu_yield_to(struct kvm_vcpu *target)
 {
struct pid *pid;
___
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization

[PATCH v12 29/77] KVM: introduce VM introspection

2021-10-06 Thread Adalbert Lazăr
From: Mihai Donțu 

The KVM introspection subsystem provides a facility for applications
to control the execution of any running VMs (pause, resume, shutdown),
query the state of the vCPUs (GPRs, MSRs etc.), alter the page access bits
in the shadow page tables and receive notifications when events of interest
have taken place (shadow page table level faults, key MSR writes,
hypercalls etc.). Some notifications can be responded to with an action
(like preventing an MSR from being written), others are mere informative
(like breakpoint events which can be used for execution tracing).

Signed-off-by: Mihai Donțu 
Co-developed-by: Marian Rotariu 
Signed-off-by: Marian Rotariu 
Signed-off-by: Adalbert Lazăr 
---
 Documentation/virt/kvm/kvmi.rst   | 139 ++
 arch/x86/include/asm/kvm_host.h   |   2 +
 arch/x86/kvm/Kconfig  |  10 +++
 arch/x86/kvm/Makefile |   2 +
 include/linux/kvmi_host.h |  21 +
 virt/kvm/introspection/kvmi.c |  25 ++
 virt/kvm/introspection/kvmi_int.h |   7 ++
 virt/kvm/kvm_main.c   |  15 
 8 files changed, 221 insertions(+)
 create mode 100644 Documentation/virt/kvm/kvmi.rst
 create mode 100644 include/linux/kvmi_host.h
 create mode 100644 virt/kvm/introspection/kvmi.c
 create mode 100644 virt/kvm/introspection/kvmi_int.h

diff --git a/Documentation/virt/kvm/kvmi.rst b/Documentation/virt/kvm/kvmi.rst
new file mode 100644
index ..59cc33a39f9f
--- /dev/null
+++ b/Documentation/virt/kvm/kvmi.rst
@@ -0,0 +1,139 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+=
+KVMI - The kernel virtual machine introspection subsystem
+=
+
+The KVM introspection subsystem provides a facility for applications running
+on the host or in a separate VM, to control the execution of any running VMs
+(pause, resume, shutdown), query the state of the vCPUs (GPRs, MSRs etc.),
+alter the page access bits in the shadow page tables (only for the hardware
+backed ones, eg. Intel's EPT) and receive notifications when events of
+interest have taken place (shadow page table level faults, key MSR writes,
+hypercalls etc.). Some notifications can be responded to with an action
+(like preventing an MSR from being written), others are mere informative
+(like breakpoint events which can be used for execution tracing).
+With few exceptions, all events are optional. An application using this
+subsystem will explicitly register for them.
+
+The use case that gave way for the creation of this subsystem is to monitor
+the guest OS and as such the ABI/API is highly influenced by how the guest
+software (kernel, applications) sees the world. For example, some events
+provide information specific for the host CPU architecture
+(eg. MSR_IA32_SYSENTER_EIP) merely because its leveraged by guest software
+to implement a critical feature (fast system calls).
+
+At the moment, the target audience for KVMI are security software authors
+that wish to perform forensics on newly discovered threats (exploits) or
+to implement another layer of security like preventing a large set of
+kernel rootkits simply by "locking" the kernel image in the shadow page
+tables (ie. enforce .text r-x, .rodata rw- etc.). It's the latter case that
+made KVMI a separate subsystem, even though many of these features are
+available in the device manager (eg. QEMU). The ability to build a security
+application that does not interfere (in terms of performance) with the
+guest software asks for a specialized interface that is designed for minimum
+overhead.
+
+API/ABI
+===
+
+This chapter describes the VMI interface used to monitor and control local
+guests from a user application.
+
+Overview
+
+
+The interface is socket based, one connection for every VM. One end is in the
+host kernel while the other is held by the user application (introspection
+tool).
+
+The initial connection is established by an application running on the
+host (eg. QEMU) that connects to the introspection tool and after a
+handshake the file descriptor is passed to the host kernel making all
+further communication take place between it and the introspection tool.
+
+The socket protocol allows for commands and events to be multiplexed over
+the same connection. As such, it is possible for the introspection tool to
+receive an event while waiting for the result of a command. Also, it can
+send a command while the host kernel is waiting for a reply to an event.
+
+The kernel side of the socket communication is blocking and will wait
+for an answer from its peer indefinitely or until the guest is powered
+off (killed), restarted or the peer goes away, at which point it will
+wake up and properly cleanup as if the introspection subsystem has never
+been used on that guest (if requested). Obviously, whether the guest can
+really continue normal execution depends on whether the int

[PATCH v12 60/77] KVM: introspection: add KVMI_VCPU_INJECT_EXCEPTION + KVMI_VCPU_EVENT_TRAP

2021-10-06 Thread Adalbert Lazăr
From: Mihai Donțu 

The KVMI_VCPU_INJECT_EXCEPTION command is used by the introspection tool
to inject exceptions, for example, to get a page from swap.

The exception is injected right before entering in guest unless there is
already an exception pending. The introspection tool is notified with
an KVMI_VCPU_EVENT_TRAP event about the success of the injection. In
case of failure, the introspection tool is expected to try again later.

Signed-off-by: Mihai Donțu 
Co-developed-by: Nicușor Cîțu 
Signed-off-by: Nicușor Cîțu 
Co-developed-by: Adalbert Lazăr 
Signed-off-by: Adalbert Lazăr 
---
 Documentation/virt/kvm/kvmi.rst   |  76 +++
 arch/x86/include/asm/kvmi_host.h  |  11 ++
 arch/x86/include/uapi/asm/kvmi.h  |  16 +++
 arch/x86/kvm/kvmi.c   | 110 
 arch/x86/kvm/kvmi.h   |   3 +
 arch/x86/kvm/kvmi_msg.c   |  52 +++-
 arch/x86/kvm/x86.c|   2 +
 include/uapi/linux/kvmi.h |  14 +-
 .../testing/selftests/kvm/x86_64/kvmi_test.c  | 124 ++
 virt/kvm/introspection/kvmi.c |   2 +
 virt/kvm/introspection/kvmi_int.h |   4 +
 virt/kvm/introspection/kvmi_msg.c |  16 ++-
 12 files changed, 416 insertions(+), 14 deletions(-)

diff --git a/Documentation/virt/kvm/kvmi.rst b/Documentation/virt/kvm/kvmi.rst
index a4705acddeb2..1fbc2a03f5bd 100644
--- a/Documentation/virt/kvm/kvmi.rst
+++ b/Documentation/virt/kvm/kvmi.rst
@@ -550,6 +550,7 @@ because these are sent as a result of certain commands (but 
they can be
 disallowed by the device manager) ::
 
KVMI_VCPU_EVENT_PAUSE
+   KVMI_VCPU_EVENT_TRAP
 
 The VM events (e.g. *KVMI_VM_EVENT_UNHOOK*) are controlled with
 the *KVMI_VM_CONTROL_EVENTS* command.
@@ -736,6 +737,46 @@ ID set.
 * -KVM_EINVAL - the padding is not zero
 * -KVM_EAGAIN - the selected vCPU can't be introspected yet
 
+16. KVMI_VCPU_INJECT_EXCEPTION
+--
+
+:Architectures: x86
+:Versions: >= 1
+:Parameters:
+
+::
+
+   struct kvmi_vcpu_hdr;
+   struct kvmi_vcpu_inject_exception {
+   __u8 nr;
+   __u8 padding1;
+   __u16 padding2;
+   __u32 error_code;
+   __u64 address;
+   };
+
+:Returns:
+
+::
+
+   struct kvmi_error_code
+
+Injects a vCPU exception (``nr``) with or without an error code 
(``error_code``).
+For page fault exceptions, the guest virtual address (``address``)
+has to be specified too.
+
+The *KVMI_VCPU_EVENT_TRAP* event will be sent with the effective injected
+exception.
+
+:Errors:
+
+* -KVM_EPERM  - the *KVMI_VCPU_EVENT_TRAP* event is disallowed
+* -KVM_EINVAL - the selected vCPU is invalid
+* -KVM_EINVAL - the padding is not zero
+* -KVM_EAGAIN - the selected vCPU can't be introspected yet
+* -KVM_EBUSY - another *KVMI_VCPU_INJECT_EXCEPTION*-*KVMI_VCPU_EVENT_TRAP*
+   pair is in progress
+
 Events
 ==
 
@@ -966,3 +1007,38 @@ register (see **KVMI_VCPU_CONTROL_EVENTS**).
 (``cr``), the old value (``old_value``) and the new value (``new_value``)
 are sent to the introspection tool. The *CONTINUE* action will set the
 ``new_val``.
+
+6. KVMI_VCPU_EVENT_TRAP
+---
+
+:Architectures: x86
+:Versions: >= 1
+:Actions: CONTINUE, CRASH
+:Parameters:
+
+::
+
+   struct kvmi_vcpu_event;
+   struct kvmi_vcpu_event_trap {
+   __u8 nr;
+   __u8 padding1;
+   __u16 padding2;
+   __u32 error_code;
+   __u64 address;
+   };
+
+:Returns:
+
+::
+
+   struct kvmi_vcpu_hdr;
+   struct kvmi_vcpu_event_reply;
+
+This event is sent if a previous *KVMI_VCPU_INJECT_EXCEPTION* command
+took place. Because it has a high priority, it will be sent before any
+other vCPU introspection event.
+
+``kvmi_vcpu_event`` (with the vCPU state), exception/interrupt number
+(``nr``), exception code (``error_code``) and ``address`` are sent to
+the introspection tool, which should check if its exception has been
+injected or overridden.
diff --git a/arch/x86/include/asm/kvmi_host.h b/arch/x86/include/asm/kvmi_host.h
index edbedf031467..97f5b1a01c9e 100644
--- a/arch/x86/include/asm/kvmi_host.h
+++ b/arch/x86/include/asm/kvmi_host.h
@@ -24,6 +24,15 @@ struct kvm_vcpu_arch_introspection {
bool have_delayed_regs;
 
DECLARE_BITMAP(cr_mask, KVMI_NUM_CR);
+
+   struct {
+   u8 nr;
+   u32 error_code;
+   bool error_code_valid;
+   u64 address;
+   bool pending;
+   bool send_event;
+   } exception;
 };
 
 struct kvm_arch_introspection {
@@ -36,6 +45,7 @@ bool kvmi_cr_event(struct kvm_vcpu *vcpu, unsigned int cr,
   unsigned long old_value, unsigned long *new_value);
 bool kvmi_cr3_intercepted(struct kvm_vcpu *vcpu);
 bool kvmi_monitor_cr3w_intercept(struct kvm_vcpu

[PATCH v12 47/77] KVM: introspection: add KVMI_VCPU_EVENT_PAUSE

2021-10-06 Thread Adalbert Lazăr
This event is sent by the vCPU thread as a response to the
KVMI_VM_PAUSE_VCPU command, but it has a lower priority, being sent
after any other introspection event and when no other introspection
command is queued.

The number of KVMI_VCPU_EVENT_PAUSE will match the number of successful
KVMI_VM_PAUSE_VCPU commands.

Signed-off-by: Adalbert Lazăr 
---
 Documentation/virt/kvm/kvmi.rst   | 26 
 include/uapi/linux/kvmi.h |  2 +
 .../testing/selftests/kvm/x86_64/kvmi_test.c  | 65 ++-
 virt/kvm/introspection/kvmi.c | 26 +++-
 virt/kvm/introspection/kvmi_int.h |  1 +
 virt/kvm/introspection/kvmi_msg.c | 18 +
 6 files changed, 136 insertions(+), 2 deletions(-)

diff --git a/Documentation/virt/kvm/kvmi.rst b/Documentation/virt/kvm/kvmi.rst
index c46e870cc728..7f3daeb6 100644
--- a/Documentation/virt/kvm/kvmi.rst
+++ b/Documentation/virt/kvm/kvmi.rst
@@ -596,3 +596,29 @@ the guest (see **Unhooking**) and the introspection has 
been enabled for
 this event (see **KVMI_VM_CONTROL_EVENTS**). The introspection tool has
 a chance to unhook and close the introspection socket (signaling that
 the operation can proceed).
+
+2. KVMI_VCPU_EVENT_PAUSE
+
+
+:Architectures: all
+:Versions: >= 1
+:Actions: CONTINUE, CRASH
+:Parameters:
+
+::
+
+   struct kvmi_event_hdr;
+   struct kvmi_vcpu_event;
+
+:Returns:
+
+::
+
+   struct kvmi_vcpu_hdr;
+   struct kvmi_vcpu_event_reply;
+
+This event is sent in response to a *KVMI_VCPU_PAUSE* command and
+cannot be controlled with *KVMI_VCPU_CONTROL_EVENTS*.
+Because it has a low priority, it will be sent after any other vCPU
+introspection event and when no other vCPU introspection command is
+queued.
diff --git a/include/uapi/linux/kvmi.h b/include/uapi/linux/kvmi.h
index 6a57efb5664d..757d4b84f473 100644
--- a/include/uapi/linux/kvmi.h
+++ b/include/uapi/linux/kvmi.h
@@ -50,6 +50,8 @@ enum {
 };
 
 enum {
+   KVMI_VCPU_EVENT_PAUSE = KVMI_VCPU_EVENT_ID(0),
+
KVMI_NEXT_VCPU_EVENT
 };
 
diff --git a/tools/testing/selftests/kvm/x86_64/kvmi_test.c 
b/tools/testing/selftests/kvm/x86_64/kvmi_test.c
index f8d355aff5fa..9eb17d820a8b 100644
--- a/tools/testing/selftests/kvm/x86_64/kvmi_test.c
+++ b/tools/testing/selftests/kvm/x86_64/kvmi_test.c
@@ -34,6 +34,17 @@ static vm_paddr_t test_gpa;
 
 static int page_size;
 
+struct vcpu_event {
+   struct kvmi_event_hdr hdr;
+   struct kvmi_vcpu_event common;
+};
+
+struct vcpu_reply {
+   struct kvmi_msg_hdr hdr;
+   struct kvmi_vcpu_hdr vcpu_hdr;
+   struct kvmi_vcpu_event_reply reply;
+};
+
 struct vcpu_worker_data {
struct kvm_vm *vm;
int vcpu_id;
@@ -704,14 +715,66 @@ static void pause_vcpu(void)
cmd_vcpu_pause(1, 0);
 }
 
+static void reply_to_event(struct kvmi_msg_hdr *ev_hdr, struct vcpu_event *ev,
+  __u8 action, struct vcpu_reply *rpl, size_t rpl_size)
+{
+   ssize_t r;
+
+   rpl->hdr.id = ev_hdr->id;
+   rpl->hdr.seq = ev_hdr->seq;
+   rpl->hdr.size = rpl_size - sizeof(rpl->hdr);
+
+   rpl->vcpu_hdr.vcpu = ev->common.vcpu;
+
+   rpl->reply.action = action;
+   rpl->reply.event = ev->hdr.event;
+
+   r = send(Userspace_socket, rpl, rpl_size, 0);
+   TEST_ASSERT(r == rpl_size,
+   "send() failed, sending %zd, result %zd, errno %d (%s)\n",
+   rpl_size, r, errno, strerror(errno));
+}
+
+static void receive_vcpu_event(struct kvmi_msg_hdr *msg_hdr,
+  struct vcpu_event *ev,
+  size_t ev_size, u16 ev_id)
+{
+   receive_event(msg_hdr, KVMI_VCPU_EVENT,
+ >hdr, ev_id, ev_size);
+}
+
+static void discard_pause_event(struct kvm_vm *vm)
+{
+   struct vcpu_worker_data data = {.vm = vm, .vcpu_id = VCPU_ID};
+   struct vcpu_reply rpl = {};
+   struct kvmi_msg_hdr hdr;
+   pthread_t vcpu_thread;
+   struct vcpu_event ev;
+
+   vcpu_thread = start_vcpu_worker();
+
+   receive_vcpu_event(, , sizeof(ev), KVMI_VCPU_EVENT_PAUSE);
+
+   reply_to_event(, , KVMI_EVENT_ACTION_CONTINUE,
+   , sizeof(rpl));
+
+   wait_vcpu_worker(vcpu_thread);
+}
+
 static void test_pause(struct kvm_vm *vm)
 {
-   __u8 wait = 1, wait_inval = 2;
+   __u8 no_wait = 0, wait = 1, wait_inval = 2;
 
pause_vcpu();
+   discard_pause_event(vm);
 
cmd_vcpu_pause(wait, 0);
+   discard_pause_event(vm);
cmd_vcpu_pause(wait_inval, -KVM_EINVAL);
+
+   disallow_event(vm, KVMI_VCPU_EVENT_PAUSE);
+   cmd_vcpu_pause(no_wait, -KVM_EPERM);
+   allow_event(vm, KVMI_VCPU_EVENT_PAUSE);
 }
 
 static void test_introspection(struct kvm_vm *vm)
diff --git a/virt/kvm/introspection/kvmi.c b/virt/kvm/introspection/kvmi.c
index 26ab5b8526d1..39c43b8a127d 100644
--- a/virt/kvm/introspection/kvmi.c
+++

[PATCH v12 15/77] KVM: x86: add kvm_x86_ops.control_msr_intercept()

2021-10-06 Thread Adalbert Lazăr
From: Mihai Donțu 

This is needed for the KVMI_VCPU_EVENT_MSR event, which is used notify
the introspection tool about any change made to a MSR of interest.

Signed-off-by: Mihai Donțu 
Co-developed-by: Nicușor Cîțu 
Signed-off-by: Nicușor Cîțu 
Signed-off-by: Adalbert Lazăr 
---
 arch/x86/include/asm/kvm-x86-ops.h |  1 +
 arch/x86/include/asm/kvm_host.h|  2 ++
 arch/x86/kvm/svm/svm.c | 11 +++
 arch/x86/kvm/vmx/vmx.c |  7 +++
 4 files changed, 21 insertions(+)

diff --git a/arch/x86/include/asm/kvm-x86-ops.h 
b/arch/x86/include/asm/kvm-x86-ops.h
index 90e913408c6e..4228b775a48e 100644
--- a/arch/x86/include/asm/kvm-x86-ops.h
+++ b/arch/x86/include/asm/kvm-x86-ops.h
@@ -128,6 +128,7 @@ KVM_X86_OP(desc_ctrl_supported)
 KVM_X86_OP(control_desc_intercept)
 KVM_X86_OP(desc_intercepted)
 KVM_X86_OP(msr_write_intercepted)
+KVM_X86_OP(control_msr_intercept)
 
 #undef KVM_X86_OP
 #undef KVM_X86_OP_NULL
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 79b2d8abff36..29f4e8b619e1 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -1335,6 +1335,8 @@ struct kvm_x86_ops {
void (*update_exception_bitmap)(struct kvm_vcpu *vcpu);
int (*get_msr)(struct kvm_vcpu *vcpu, struct msr_data *msr);
int (*set_msr)(struct kvm_vcpu *vcpu, struct msr_data *msr);
+   void (*control_msr_intercept)(struct kvm_vcpu *vcpu, unsigned int msr,
+ int type, bool enable);
bool (*msr_write_intercepted)(struct kvm_vcpu *vcpu, u32 msr);
u64 (*get_segment_base)(struct kvm_vcpu *vcpu, int seg);
void (*get_segment)(struct kvm_vcpu *vcpu,
diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c
index 97f7406cf7d6..b7ef0671863e 100644
--- a/arch/x86/kvm/svm/svm.c
+++ b/arch/x86/kvm/svm/svm.c
@@ -712,6 +712,16 @@ void set_msr_interception(struct kvm_vcpu *vcpu, u32 
*msrpm, u32 msr,
set_msr_interception_bitmap(vcpu, msrpm, msr, type, value);
 }
 
+static void svm_control_msr_intercept(struct kvm_vcpu *vcpu, unsigned int msr,
+ int type, bool enable)
+{
+   const struct vcpu_svm *svm = to_svm(vcpu);
+   u32 *msrpm = is_guest_mode(vcpu) ? svm->nested.msrpm :
+  svm->msrpm;
+
+   set_msr_interception(vcpu, msrpm, msr, type, enable);
+}
+
 u32 *svm_vcpu_alloc_msrpm(void)
 {
unsigned int order = get_order(MSRPM_SIZE);
@@ -4718,6 +4728,7 @@ static struct kvm_x86_ops svm_x86_ops __initdata = {
.get_msr_feature = svm_get_msr_feature,
.get_msr = svm_get_msr,
.set_msr = svm_set_msr,
+   .control_msr_intercept = svm_control_msr_intercept,
.msr_write_intercepted = msr_write_intercepted,
.get_segment_base = svm_get_segment_base,
.get_segment = svm_get_segment,
diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
index b036aed96912..a140d69b1bd3 100644
--- a/arch/x86/kvm/vmx/vmx.c
+++ b/arch/x86/kvm/vmx/vmx.c
@@ -3859,6 +3859,12 @@ void vmx_enable_intercept_for_msr(struct kvm_vcpu *vcpu, 
u32 msr, int type)
vmx_set_msr_bitmap_write(msr_bitmap, msr);
 }
 
+static void vmx_control_msr_intercept(struct kvm_vcpu *vcpu, unsigned int msr,
+ int type, bool enable)
+{
+   vmx_set_intercept_for_msr(vcpu, msr, type, enable);
+}
+
 static void vmx_reset_x2apic_msrs(struct kvm_vcpu *vcpu, u8 mode)
 {
unsigned long *msr_bitmap = to_vmx(vcpu)->vmcs01.msr_bitmap;
@@ -7637,6 +7643,7 @@ static struct kvm_x86_ops vmx_x86_ops __initdata = {
.get_msr_feature = vmx_get_msr_feature,
.get_msr = vmx_get_msr,
.set_msr = vmx_set_msr,
+   .control_msr_intercept = vmx_control_msr_intercept,
.msr_write_intercepted = msr_write_intercepted,
.get_segment_base = vmx_get_segment_base,
.get_segment = vmx_get_segment,
___
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization

[PATCH v12 18/77] KVM: x86: add kvm_x86_ops.control_singlestep()

2021-10-06 Thread Adalbert Lazăr
From: Nicușor Cîțu 

This function is needed for KVMI_VCPU_CONTROL_SINGLESTEP.

Signed-off-by: Nicușor Cîțu 
Signed-off-by: Adalbert Lazăr 
---
 arch/x86/include/asm/kvm-x86-ops.h |  1 +
 arch/x86/include/asm/kvm_host.h|  1 +
 arch/x86/kvm/vmx/vmx.c | 11 +++
 3 files changed, 13 insertions(+)

diff --git a/arch/x86/include/asm/kvm-x86-ops.h 
b/arch/x86/include/asm/kvm-x86-ops.h
index ad6c19d9bef5..80cd010ab3fd 100644
--- a/arch/x86/include/asm/kvm-x86-ops.h
+++ b/arch/x86/include/asm/kvm-x86-ops.h
@@ -130,6 +130,7 @@ KVM_X86_OP(desc_intercepted)
 KVM_X86_OP(msr_write_intercepted)
 KVM_X86_OP(control_msr_intercept)
 KVM_X86_OP(fault_gla)
+KVM_X86_OP_NULL(control_singlestep)
 
 #undef KVM_X86_OP
 #undef KVM_X86_OP_NULL
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 96058a8a1e5e..dfc52e451f9b 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -1511,6 +1511,7 @@ struct kvm_x86_ops {
void (*vcpu_deliver_sipi_vector)(struct kvm_vcpu *vcpu, u8 vector);
 
u64 (*fault_gla)(struct kvm_vcpu *vcpu);
+   void (*control_singlestep)(struct kvm_vcpu *vcpu, bool enable);
 };
 
 struct kvm_x86_nested_ops {
diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
index f3e880ef22c8..86fa84205d23 100644
--- a/arch/x86/kvm/vmx/vmx.c
+++ b/arch/x86/kvm/vmx/vmx.c
@@ -7627,6 +7627,16 @@ static u64 vmx_fault_gla(struct kvm_vcpu *vcpu)
return ~0ull;
 }
 
+static void vmx_control_singlestep(struct kvm_vcpu *vcpu, bool enable)
+{
+   if (enable)
+   exec_controls_setbit(to_vmx(vcpu),
+ CPU_BASED_MONITOR_TRAP_FLAG);
+   else
+   exec_controls_clearbit(to_vmx(vcpu),
+   CPU_BASED_MONITOR_TRAP_FLAG);
+}
+
 static struct kvm_x86_ops vmx_x86_ops __initdata = {
.hardware_unsetup = hardware_unsetup,
 
@@ -7771,6 +7781,7 @@ static struct kvm_x86_ops vmx_x86_ops __initdata = {
.vcpu_deliver_sipi_vector = kvm_vcpu_deliver_sipi_vector,
 
.fault_gla = vmx_fault_gla,
+   .control_singlestep = vmx_control_singlestep,
 };
 
 static __init void vmx_setup_user_return_msrs(void)
___
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization

[PATCH v12 75/77] KVM: introspection: add KVMI_VCPU_TRANSLATE_GVA

2021-10-06 Thread Adalbert Lazăr
This helps the introspection tool with the GVA to GPA translations
without the need to read or monitor the guest page tables.

Signed-off-by: Adalbert Lazăr 
---
 Documentation/virt/kvm/kvmi.rst   | 32 +++
 arch/x86/kvm/kvmi_msg.c   | 15 +
 include/uapi/linux/kvmi.h |  9 ++
 .../testing/selftests/kvm/x86_64/kvmi_test.c  | 30 +
 4 files changed, 86 insertions(+)

diff --git a/Documentation/virt/kvm/kvmi.rst b/Documentation/virt/kvm/kvmi.rst
index 84922d327255..17735d099be2 100644
--- a/Documentation/virt/kvm/kvmi.rst
+++ b/Documentation/virt/kvm/kvmi.rst
@@ -1055,6 +1055,38 @@ to the introspection tool.
 * -KVM_EINVAL - the padding is not zero
 * -KVM_EAGAIN - the selected vCPU can't be introspected yet
 
+24. KVMI_VCPU_TRANSLATE_GVA
+---
+
+:Architectures: x86
+:Versions: >= 1
+:Parameters:
+
+::
+
+   struct kvmi_vcpu_hdr;
+   struct kvmi_vcpu_translate_gva {
+   __u64 gva;
+   };
+
+:Returns:
+
+::
+
+   struct kvmi_error_code;
+   struct kvmi_vcpu_translate_gva_reply {
+   __u64 gpa;
+   };
+
+Translates a guest virtual address (``gva``) to a guest physical address
+(``gpa``) or ~0 if the address cannot be translated.
+
+:Errors:
+
+* -KVM_EINVAL - the selected vCPU is invalid
+* -KVM_EINVAL - the padding is not zero
+* -KVM_EAGAIN - the selected vCPU can't be introspected yet
+
 Events
 ==
 
diff --git a/arch/x86/kvm/kvmi_msg.c b/arch/x86/kvm/kvmi_msg.c
index ea38eb7ccb7c..cdd497a342a0 100644
--- a/arch/x86/kvm/kvmi_msg.c
+++ b/arch/x86/kvm/kvmi_msg.c
@@ -312,6 +312,20 @@ static int handle_vcpu_control_singlestep(const struct 
kvmi_vcpu_msg_job *job,
return kvmi_msg_vcpu_reply(job, msg, ec, NULL, 0);
 }
 
+static int handle_vcpu_translate_gva(const struct kvmi_vcpu_msg_job *job,
+const struct kvmi_msg_hdr *msg,
+const void *_req)
+{
+   const struct kvmi_vcpu_translate_gva *req = _req;
+   struct kvmi_vcpu_translate_gva_reply rpl;
+
+   memset(, 0, sizeof(rpl));
+
+   rpl.gpa = kvm_mmu_gva_to_gpa_system(job->vcpu, req->gva, 0, NULL);
+
+   return kvmi_msg_vcpu_reply(job, msg, 0, , sizeof(rpl));
+}
+
 static const kvmi_vcpu_msg_job_fct msg_vcpu[] = {
[KVMI_VCPU_CONTROL_CR] = handle_vcpu_control_cr,
[KVMI_VCPU_CONTROL_MSR]= handle_vcpu_control_msr,
@@ -325,6 +339,7 @@ static const kvmi_vcpu_msg_job_fct msg_vcpu[] = {
[KVMI_VCPU_INJECT_EXCEPTION]   = handle_vcpu_inject_exception,
[KVMI_VCPU_SET_REGISTERS]  = handle_vcpu_set_registers,
[KVMI_VCPU_SET_XSAVE]  = handle_vcpu_set_xsave,
+   [KVMI_VCPU_TRANSLATE_GVA]  = handle_vcpu_translate_gva,
 };
 
 kvmi_vcpu_msg_job_fct kvmi_arch_vcpu_msg_handler(u16 id)
diff --git a/include/uapi/linux/kvmi.h b/include/uapi/linux/kvmi.h
index 148d145ddea0..a1bfa845fed8 100644
--- a/include/uapi/linux/kvmi.h
+++ b/include/uapi/linux/kvmi.h
@@ -50,6 +50,7 @@ enum {
KVMI_VCPU_GET_MTRR_TYPE  = KVMI_VCPU_MESSAGE_ID(11),
KVMI_VCPU_CONTROL_MSR= KVMI_VCPU_MESSAGE_ID(12),
KVMI_VCPU_CONTROL_SINGLESTEP = KVMI_VCPU_MESSAGE_ID(13),
+   KVMI_VCPU_TRANSLATE_GVA  = KVMI_VCPU_MESSAGE_ID(14),
 
KVMI_NEXT_VCPU_MESSAGE
 };
@@ -228,4 +229,12 @@ struct kvmi_vcpu_event_singlestep {
__u8 padding[7];
 };
 
+struct kvmi_vcpu_translate_gva {
+   __u64 gva;
+};
+
+struct kvmi_vcpu_translate_gva_reply {
+   __u64 gpa;
+};
+
 #endif /* _UAPI__LINUX_KVMI_H */
diff --git a/tools/testing/selftests/kvm/x86_64/kvmi_test.c 
b/tools/testing/selftests/kvm/x86_64/kvmi_test.c
index faef908eeedd..fc59a8b9f99a 100644
--- a/tools/testing/selftests/kvm/x86_64/kvmi_test.c
+++ b/tools/testing/selftests/kvm/x86_64/kvmi_test.c
@@ -1894,6 +1894,35 @@ static void test_cmd_vcpu_control_singlestep(struct 
kvm_vm *vm)
test_unsupported_singlestep(vm);
 }
 
+static void cmd_translate_gva(struct kvm_vm *vm, vm_vaddr_t gva,
+ vm_paddr_t expected_gpa)
+{
+   struct {
+   struct kvmi_msg_hdr hdr;
+   struct kvmi_vcpu_hdr vcpu_hdr;
+   struct kvmi_vcpu_translate_gva cmd;
+   } req = { 0 };
+   struct kvmi_vcpu_translate_gva_reply rpl;
+
+   req.cmd.gva = gva;
+
+   test_vcpu0_command(vm, KVMI_VCPU_TRANSLATE_GVA, , sizeof(req),
+ , sizeof(rpl), 0);
+   TEST_ASSERT(rpl.gpa == expected_gpa,
+   "Translation failed for gva 0x%lx -> gpa 0x%llx instead of 
0x%lx\n",
+   gva, rpl.gpa, expected_gpa);
+}
+
+static void test_cmd_translate_gva(struct kvm_vm *vm)
+{
+   cmd_translate_gva(vm, test_gva, test_gpa);
+   pr_debug("Tested gva 0x%lx to gpa 0x%lx\n", test_gva, test_gpa);
+
+   cmd_translate_gva(vm, -1, ~0);
+   pr_debug(

[PATCH v12 54/77] KVM: introspection: add KVMI_VCPU_EVENT_BREAKPOINT

2021-10-06 Thread Adalbert Lazăr
From: Mihai Donțu 

This event is sent when a breakpoint was reached.

The introspection tool can place breakpoints and use them as notification
for when the OS or an application has reached a certain state or is
trying to perform a certain operation (eg. create a process).

Signed-off-by: Mihai Donțu 
Co-developed-by: Nicușor Cîțu 
Signed-off-by: Nicușor Cîțu 
Co-developed-by: Adalbert Lazăr 
Signed-off-by: Adalbert Lazăr 
---
 Documentation/virt/kvm/kvmi.rst   | 48 ++
 arch/x86/kvm/kvmi.c   | 50 +++
 arch/x86/kvm/svm/svm.c| 34 +
 arch/x86/kvm/vmx/vmx.c| 13 -
 include/linux/kvmi_host.h |  3 ++
 include/uapi/linux/kvmi.h | 11 +++-
 .../testing/selftests/kvm/x86_64/kvmi_test.c  | 46 +
 virt/kvm/introspection/kvmi.c | 25 ++
 virt/kvm/introspection/kvmi_int.h |  4 ++
 virt/kvm/introspection/kvmi_msg.c | 18 +++
 10 files changed, 248 insertions(+), 4 deletions(-)

diff --git a/Documentation/virt/kvm/kvmi.rst b/Documentation/virt/kvm/kvmi.rst
index 0facdc4595ed..56cf64f32740 100644
--- a/Documentation/virt/kvm/kvmi.rst
+++ b/Documentation/virt/kvm/kvmi.rst
@@ -538,6 +538,7 @@ command) before returning to guest.
 Enables/disables vCPU introspection events. This command can be used with
 the following events::
 
+   KVMI_VCPU_EVENT_BREAKPOINT
KVMI_VCPU_EVENT_HYPERCALL
 
 When an event is enabled, the introspection tool is notified and
@@ -559,6 +560,9 @@ the *KVMI_VM_CONTROL_EVENTS* command.
 * -KVM_EINVAL - the event ID is unknown (use *KVMI_VM_CHECK_EVENT* first)
 * -KVM_EPERM - the access is disallowed (use *KVMI_VM_CHECK_EVENT* first)
 * -KVM_EAGAIN - the selected vCPU can't be introspected yet
+* -KVM_EBUSY - the event can't be intercepted right now
+   (e.g. KVMI_VCPU_EVENT_BREAKPOINT if the #BP event
+is already intercepted by userspace)
 
 11. KVMI_VCPU_GET_REGISTERS
 ---
@@ -817,3 +821,47 @@ found during a scan.
 
 The most useful registers describing the vCPU state can be read from
 ``kvmi_vcpu_event.arch.regs``.
+
+4. KVMI_VCPU_EVENT_BREAKPOINT
+-
+
+:Architectures: x86
+:Versions: >= 1
+:Actions: CONTINUE, CRASH, RETRY
+:Parameters:
+
+::
+
+   struct kvmi_event_hdr;
+   struct kvmi_vcpu_event;
+   struct kvmi_vcpu_event_breakpoint {
+   __u64 gpa;
+   __u8 insn_len;
+   __u8 padding[7];
+   };
+
+:Returns:
+
+::
+
+   struct kvmi_vcpu_hdr;
+   struct kvmi_vcpu_event_reply;
+
+This event is sent when a breakpoint was reached and the introspection has
+been enabled for this event (see *KVMI_VCPU_CONTROL_EVENTS*).
+
+Some of these breakpoints could have been injected by the introspection tool,
+placed in the slack space of various functions and used as notification
+for when the OS or an application has reached a certain state or is
+trying to perform a certain operation (like creating a process).
+
+``kvmi_vcpu_event`` (with the vCPU state), the guest physical address
+(``gpa``) where the breakpoint instruction is placed and the breakpoint
+instruction length (``insn_len``) are sent to the introspection tool.
+
+The *RETRY* action is used by the introspection tool for its own
+breakpoints. In most cases, the tool will change the instruction pointer
+before returning this action.
+
+The *CONTINUE* action will cause the breakpoint exception to be reinjected
+(the OS will handle it).
diff --git a/arch/x86/kvm/kvmi.c b/arch/x86/kvm/kvmi.c
index 5d9891299a56..e4358bc3f09a 100644
--- a/arch/x86/kvm/kvmi.c
+++ b/arch/x86/kvm/kvmi.c
@@ -11,6 +11,7 @@
 
 void kvmi_arch_init_vcpu_events_mask(unsigned long *supported)
 {
+   set_bit(KVMI_VCPU_EVENT_BREAKPOINT, supported);
set_bit(KVMI_VCPU_EVENT_HYPERCALL, supported);
 }
 
@@ -160,3 +161,52 @@ bool kvmi_arch_is_agent_hypercall(struct kvm_vcpu *vcpu)
return (subfunc1 == KVM_HC_XEN_HVM_OP_GUEST_REQUEST_VM_EVENT
&& subfunc2 == 0);
 }
+
+static int kvmi_control_bp_intercept(struct kvm_vcpu *vcpu, bool enable)
+{
+   struct kvm_guest_debug dbg = {};
+   int err = 0;
+
+   if (enable)
+   dbg.control = KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_USE_SW_BP;
+
+   err = kvm_arch_vcpu_set_guest_debug(vcpu, );
+
+   return err;
+}
+
+int kvmi_arch_cmd_control_intercept(struct kvm_vcpu *vcpu,
+   unsigned int event_id, bool enable)
+{
+   int err = 0;
+
+   switch (event_id) {
+   case KVMI_VCPU_EVENT_BREAKPOINT:
+   err = kvmi_control_bp_intercept(vcpu, enable);
+   break;
+   default:
+   break;
+   }
+
+   return err;
+}
+
+void kvmi_arch_breakpoint_event(struct kvm_vcpu *vcpu, u64 gva, u8 insn_len)
+{
+   u32 action;
+   

[PATCH v12 56/77] KVM: introspection: restore the state of #BP interception on unhook

2021-10-06 Thread Adalbert Lazăr
From: Nicușor Cîțu 

This commit also ensures that only the userspace or the introspection
tool can control the #BP interception exclusively at one time.

Signed-off-by: Nicușor Cîțu 
Signed-off-by: Adalbert Lazăr 
---
 arch/x86/include/asm/kvmi_host.h | 18 ++
 arch/x86/kvm/kvmi.c  | 60 
 arch/x86/kvm/x86.c   |  5 +++
 3 files changed, 83 insertions(+)

diff --git a/arch/x86/include/asm/kvmi_host.h b/arch/x86/include/asm/kvmi_host.h
index b776be4bb49f..e008662f91a5 100644
--- a/arch/x86/include/asm/kvmi_host.h
+++ b/arch/x86/include/asm/kvmi_host.h
@@ -4,8 +4,15 @@
 
 #include 
 
+struct kvmi_monitor_interception {
+   bool kvmi_intercepted;
+   bool kvm_intercepted;
+   bool (*monitor_fct)(struct kvm_vcpu *vcpu, bool enable);
+};
+
 struct kvmi_interception {
bool restore_interception;
+   struct kvmi_monitor_interception breakpoint;
 };
 
 struct kvm_vcpu_arch_introspection {
@@ -16,4 +23,15 @@ struct kvm_vcpu_arch_introspection {
 struct kvm_arch_introspection {
 };
 
+#ifdef CONFIG_KVM_INTROSPECTION
+
+bool kvmi_monitor_bp_intercept(struct kvm_vcpu *vcpu, u32 dbg);
+
+#else /* CONFIG_KVM_INTROSPECTION */
+
+static inline bool kvmi_monitor_bp_intercept(struct kvm_vcpu *vcpu, u32 dbg)
+   { return false; }
+
+#endif /* CONFIG_KVM_INTROSPECTION */
+
 #endif /* _ASM_X86_KVMI_HOST_H */
diff --git a/arch/x86/kvm/kvmi.c b/arch/x86/kvm/kvmi.c
index 6a7fc8059f23..2bbeadb9daba 100644
--- a/arch/x86/kvm/kvmi.c
+++ b/arch/x86/kvm/kvmi.c
@@ -162,19 +162,72 @@ bool kvmi_arch_is_agent_hypercall(struct kvm_vcpu *vcpu)
&& subfunc2 == 0);
 }
 
+/*
+ * Returns true if one side (kvm or kvmi) tries to enable/disable the 
breakpoint
+ * interception while the other side is still tracking it.
+ */
+bool kvmi_monitor_bp_intercept(struct kvm_vcpu *vcpu, u32 dbg)
+{
+   struct kvmi_interception *arch_vcpui = READ_ONCE(vcpu->arch.kvmi);
+   u32 bp_mask = KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_USE_SW_BP;
+   bool enable = false;
+
+   if ((dbg & bp_mask) == bp_mask)
+   enable = true;
+
+   return (arch_vcpui && arch_vcpui->breakpoint.monitor_fct(vcpu, enable));
+}
+EXPORT_SYMBOL(kvmi_monitor_bp_intercept);
+
+static bool monitor_bp_fct_kvmi(struct kvm_vcpu *vcpu, bool enable)
+{
+   if (enable) {
+   if (static_call(kvm_x86_bp_intercepted)(vcpu))
+   return true;
+   } else if (!vcpu->arch.kvmi->breakpoint.kvmi_intercepted)
+   return true;
+
+   vcpu->arch.kvmi->breakpoint.kvmi_intercepted = enable;
+
+   return false;
+}
+
+static bool monitor_bp_fct_kvm(struct kvm_vcpu *vcpu, bool enable)
+{
+   if (enable) {
+   if (static_call(kvm_x86_bp_intercepted)(vcpu))
+   return true;
+   } else if (!vcpu->arch.kvmi->breakpoint.kvm_intercepted)
+   return true;
+
+   vcpu->arch.kvmi->breakpoint.kvm_intercepted = enable;
+
+   return false;
+}
+
 static int kvmi_control_bp_intercept(struct kvm_vcpu *vcpu, bool enable)
 {
struct kvm_guest_debug dbg = {};
int err = 0;
 
+   vcpu->arch.kvmi->breakpoint.monitor_fct = monitor_bp_fct_kvmi;
if (enable)
dbg.control = KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_USE_SW_BP;
 
err = kvm_arch_vcpu_set_guest_debug(vcpu, );
+   vcpu->arch.kvmi->breakpoint.monitor_fct = monitor_bp_fct_kvm;
 
return err;
 }
 
+static void kvmi_arch_disable_bp_intercept(struct kvm_vcpu *vcpu)
+{
+   kvmi_control_bp_intercept(vcpu, false);
+
+   vcpu->arch.kvmi->breakpoint.kvmi_intercepted = false;
+   vcpu->arch.kvmi->breakpoint.kvm_intercepted = false;
+}
+
 int kvmi_arch_cmd_control_intercept(struct kvm_vcpu *vcpu,
unsigned int event_id, bool enable)
 {
@@ -213,6 +266,7 @@ void kvmi_arch_breakpoint_event(struct kvm_vcpu *vcpu, u64 
gva, u8 insn_len)
 
 static void kvmi_arch_restore_interception(struct kvm_vcpu *vcpu)
 {
+   kvmi_arch_disable_bp_intercept(vcpu);
 }
 
 bool kvmi_arch_clean_up_interception(struct kvm_vcpu *vcpu)
@@ -238,6 +292,12 @@ bool kvmi_arch_vcpu_alloc_interception(struct kvm_vcpu 
*vcpu)
if (!arch_vcpui)
return false;
 
+   arch_vcpui->breakpoint.monitor_fct = monitor_bp_fct_kvm;
+
+   /* pair with kvmi_monitor_bp_intercept() */
+   smp_wmb();
+   WRITE_ONCE(vcpu->arch.kvmi, arch_vcpui);
+
return true;
 }
 
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 415934624afb..f192c713b740 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -10533,6 +10533,11 @@ int kvm_arch_vcpu_set_guest_debug(struct kvm_vcpu 
*vcpu,
kvm_queue_exception(vcpu, BP_VECTOR);
}
 
+   if (kvmi_monitor_bp_intercept(vcpu, dbg->control)) {
+   r = -EBUSY;
+   goto

[PATCH v12 51/77] KVM: introspection: add KVMI_VCPU_SET_REGISTERS

2021-10-06 Thread Adalbert Lazăr
From: Mihai Donțu 

During an introspection event, the introspection tool might need to
change the vCPU state, for example, to skip the current instruction.

This command is allowed only during vCPU events and the registers will
be set when the reply has been received.

Signed-off-by: Mihai Donțu 
Co-developed-by: Mircea Cîrjaliu 
Signed-off-by: Mircea Cîrjaliu 
Co-developed-by: Adalbert Lazăr 
Signed-off-by: Adalbert Lazăr 
---
 Documentation/virt/kvm/kvmi.rst   | 29 +++
 arch/x86/include/asm/kvmi_host.h  |  2 +
 arch/x86/kvm/kvmi.c   | 22 +
 arch/x86/kvm/kvmi.h   |  2 +
 arch/x86/kvm/kvmi_msg.c   | 21 +
 include/uapi/linux/kvmi.h |  1 +
 .../testing/selftests/kvm/x86_64/kvmi_test.c  | 83 +++
 virt/kvm/introspection/kvmi_int.h |  1 +
 virt/kvm/introspection/kvmi_msg.c |  6 +-
 9 files changed, 165 insertions(+), 2 deletions(-)

diff --git a/Documentation/virt/kvm/kvmi.rst b/Documentation/virt/kvm/kvmi.rst
index 3a77c7bef18a..beea20a306ef 100644
--- a/Documentation/virt/kvm/kvmi.rst
+++ b/Documentation/virt/kvm/kvmi.rst
@@ -601,6 +601,35 @@ registers, the special registers and the requested set of 
MSRs.
 * -KVM_EAGAIN - the selected vCPU can't be introspected yet
 * -KVM_ENOMEM - there is not enough memory to allocate the reply
 
+12. KVMI_VCPU_SET_REGISTERS
+---
+
+:Architectures: x86
+:Versions: >= 1
+:Parameters:
+
+::
+
+   struct kvmi_vcpu_hdr;
+   struct kvm_regs;
+
+:Returns:
+
+::
+
+   struct kvmi_error_code
+
+Sets the general purpose registers for the given vCPU. The changes become
+visible to other threads accessing the KVM vCPU structure after the event
+currently being handled is replied to.
+
+:Errors:
+
+* -KVM_EINVAL - the selected vCPU is invalid
+* -KVM_EINVAL - the padding is not zero
+* -KVM_EAGAIN - the selected vCPU can't be introspected yet
+* -KVM_EOPNOTSUPP - the command hasn't been received during an introspection 
event
+
 Events
 ==
 
diff --git a/arch/x86/include/asm/kvmi_host.h b/arch/x86/include/asm/kvmi_host.h
index 05ade3a16b24..cc945151cb36 100644
--- a/arch/x86/include/asm/kvmi_host.h
+++ b/arch/x86/include/asm/kvmi_host.h
@@ -5,6 +5,8 @@
 #include 
 
 struct kvm_vcpu_arch_introspection {
+   struct kvm_regs delayed_regs;
+   bool have_delayed_regs;
 };
 
 struct kvm_arch_introspection {
diff --git a/arch/x86/kvm/kvmi.c b/arch/x86/kvm/kvmi.c
index bc40d7dcc456..808b7176e7d8 100644
--- a/arch/x86/kvm/kvmi.c
+++ b/arch/x86/kvm/kvmi.c
@@ -118,3 +118,25 @@ int kvmi_arch_cmd_vcpu_get_registers(struct kvm_vcpu *vcpu,
 
return err ? -KVM_EINVAL : 0;
 }
+
+void kvmi_arch_cmd_vcpu_set_registers(struct kvm_vcpu *vcpu,
+ const struct kvm_regs *regs)
+{
+   struct kvm_vcpu_introspection *vcpui = VCPUI(vcpu);
+   struct kvm_regs *dest = >arch.delayed_regs;
+
+   memcpy(dest, regs, sizeof(*dest));
+
+   vcpui->arch.have_delayed_regs = true;
+}
+
+void kvmi_arch_post_reply(struct kvm_vcpu *vcpu)
+{
+   struct kvm_vcpu_introspection *vcpui = VCPUI(vcpu);
+
+   if (!vcpui->arch.have_delayed_regs)
+   return;
+
+   kvm_arch_vcpu_set_regs(vcpu, >arch.delayed_regs, false);
+   vcpui->arch.have_delayed_regs = false;
+}
diff --git a/arch/x86/kvm/kvmi.h b/arch/x86/kvm/kvmi.h
index 7aab4aaabcda..4eeb0c900083 100644
--- a/arch/x86/kvm/kvmi.h
+++ b/arch/x86/kvm/kvmi.h
@@ -5,5 +5,7 @@
 int kvmi_arch_cmd_vcpu_get_registers(struct kvm_vcpu *vcpu,
const struct kvmi_vcpu_get_registers *req,
struct kvmi_vcpu_get_registers_reply *rpl);
+void kvmi_arch_cmd_vcpu_set_registers(struct kvm_vcpu *vcpu,
+ const struct kvm_regs *regs);
 
 #endif
diff --git a/arch/x86/kvm/kvmi_msg.c b/arch/x86/kvm/kvmi_msg.c
index ab4b99b66acc..b4113749bcd0 100644
--- a/arch/x86/kvm/kvmi_msg.c
+++ b/arch/x86/kvm/kvmi_msg.c
@@ -90,9 +90,30 @@ static int handle_vcpu_get_registers(const struct 
kvmi_vcpu_msg_job *job,
return err;
 }
 
+static int handle_vcpu_set_registers(const struct kvmi_vcpu_msg_job *job,
+const struct kvmi_msg_hdr *msg,
+const void *req)
+{
+   const struct kvm_regs *regs = req;
+   size_t cmd_size;
+   int ec = 0;
+
+   cmd_size = sizeof(struct kvmi_vcpu_hdr) + sizeof(*regs);
+
+   if (cmd_size > msg->size)
+   ec = -KVM_EINVAL;
+   else if (!VCPUI(job->vcpu)->waiting_for_reply)
+   ec = -KVM_EOPNOTSUPP;
+   else
+   kvmi_arch_cmd_vcpu_set_registers(job->vcpu, regs);
+
+   return kvmi_msg_vcpu_reply(job, msg, ec, NULL, 0);
+}
+
 static const kvmi_vcpu_msg_job_fct msg_vcpu[] = {
[KVMI_VCPU_GET_INFO]  = handle_vcpu_get_info,

[PATCH v12 76/77] KVM: introspection: emulate a guest page table walk on SPT violations due to A/D bit updates

2021-10-06 Thread Adalbert Lazăr
From: Mihai Donțu 

On SPT page faults caused by guest page table walks, use the existing
guest page table walk code to make the necessary adjustments to the A/D
bits and return to guest. This effectively bypasses the x86 emulator
who was making the wrong modifications leading one OS (Windows 8.1 x64)
to triple-fault very early in the boot process with the introspection
enabled.

With introspection disabled, these faults are handled by simply removing
the protection from the affected guest page and returning to guest.

Signed-off-by: Mihai Donțu 
Signed-off-by: Adalbert Lazăr 
---
 arch/x86/include/asm/kvmi_host.h |  2 ++
 arch/x86/kvm/kvmi.c  | 30 ++
 arch/x86/kvm/mmu/mmu.c   | 12 ++--
 include/linux/kvmi_host.h|  3 +++
 virt/kvm/introspection/kvmi.c| 26 ++
 5 files changed, 71 insertions(+), 2 deletions(-)

diff --git a/arch/x86/include/asm/kvmi_host.h b/arch/x86/include/asm/kvmi_host.h
index 045bc885f007..5a1700d36b67 100644
--- a/arch/x86/include/asm/kvmi_host.h
+++ b/arch/x86/include/asm/kvmi_host.h
@@ -78,6 +78,7 @@ bool kvmi_descriptor_event(struct kvm_vcpu *vcpu, u8 
descriptor, bool write);
 bool kvmi_msr_event(struct kvm_vcpu *vcpu, struct msr_data *msr);
 bool kvmi_monitor_msrw_intercept(struct kvm_vcpu *vcpu, u32 msr, bool enable);
 bool kvmi_msrw_intercept_originator(struct kvm_vcpu *vcpu);
+bool kvmi_update_ad_flags(struct kvm_vcpu *vcpu);
 
 #else /* CONFIG_KVM_INTROSPECTION */
 
@@ -103,6 +104,7 @@ static inline bool kvmi_monitor_msrw_intercept(struct 
kvm_vcpu *vcpu, u32 msr,
   bool enable) { return false; }
 static inline bool kvmi_msrw_intercept_originator(struct kvm_vcpu *vcpu)
{ return false; }
+static inline bool kvmi_update_ad_flags(struct kvm_vcpu *vcpu) { return false; 
}
 
 #endif /* CONFIG_KVM_INTROSPECTION */
 
diff --git a/arch/x86/kvm/kvmi.c b/arch/x86/kvm/kvmi.c
index 58c2debd8815..ce387c8a1367 100644
--- a/arch/x86/kvm/kvmi.c
+++ b/arch/x86/kvm/kvmi.c
@@ -1102,3 +1102,33 @@ void kvmi_arch_stop_singlestep(struct kvm_vcpu *vcpu)
 {
static_call(kvm_x86_control_singlestep)(vcpu, false);
 }
+
+bool kvmi_update_ad_flags(struct kvm_vcpu *vcpu)
+{
+   struct kvm_introspection *kvmi;
+   bool ret = false;
+   gva_t gva;
+   gpa_t gpa;
+
+   kvmi = kvmi_get(vcpu->kvm);
+   if (!kvmi)
+   return false;
+
+   gva = static_call(kvm_x86_fault_gla)(vcpu);
+   if (gva == ~0ull)
+   goto out;
+
+   gpa = kvm_mmu_gva_to_gpa_system(vcpu, gva, PFERR_WRITE_MASK, NULL);
+   if (gpa == UNMAPPED_GVA) {
+   struct x86_exception exception = { };
+
+   gpa = kvm_mmu_gva_to_gpa_system(vcpu, gva, 0, );
+   }
+
+   ret = (gpa != UNMAPPED_GVA);
+
+out:
+   kvmi_put(vcpu->kvm);
+
+   return ret;
+}
diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c
index c90683284098..d0a8ac891bc2 100644
--- a/arch/x86/kvm/mmu/mmu.c
+++ b/arch/x86/kvm/mmu/mmu.c
@@ -43,6 +43,7 @@
 #include 
 #include 
 #include 
+#include 
 
 #include 
 #include 
@@ -5333,8 +5334,15 @@ int kvm_mmu_page_fault(struct kvm_vcpu *vcpu, gpa_t 
cr2_or_gpa, u64 error_code,
 */
if (vcpu->arch.mmu->direct_map &&
(error_code & PFERR_NESTED_GUEST_PAGE) == PFERR_NESTED_GUEST_PAGE) {
-   kvm_mmu_unprotect_page(vcpu->kvm, gpa_to_gfn(cr2_or_gpa));
-   return 1;
+   gfn_t gfn = gpa_to_gfn(cr2_or_gpa);
+
+   if (kvmi_tracked_gfn(vcpu, gfn)) {
+   if (kvmi_update_ad_flags(vcpu))
+   return 1;
+   } else {
+   kvm_mmu_unprotect_page(vcpu->kvm, gfn);
+   return 1;
+   }
}
 
/*
diff --git a/include/linux/kvmi_host.h b/include/linux/kvmi_host.h
index ec38e434c8e9..90647bb2a570 100644
--- a/include/linux/kvmi_host.h
+++ b/include/linux/kvmi_host.h
@@ -83,6 +83,7 @@ bool kvmi_breakpoint_event(struct kvm_vcpu *vcpu, u64 gva, u8 
insn_len);
 bool kvmi_vcpu_running_singlestep(struct kvm_vcpu *vcpu);
 void kvmi_singlestep_done(struct kvm_vcpu *vcpu);
 void kvmi_singlestep_failed(struct kvm_vcpu *vcpu);
+bool kvmi_tracked_gfn(struct kvm_vcpu *vcpu, gfn_t gfn);
 
 #else
 
@@ -101,6 +102,8 @@ static inline bool kvmi_vcpu_running_singlestep(struct 
kvm_vcpu *vcpu)
{ return false; }
 static inline void kvmi_singlestep_done(struct kvm_vcpu *vcpu) { }
 static inline void kvmi_singlestep_failed(struct kvm_vcpu *vcpu) { }
+static inline bool kvmi_tracked_gfn(struct kvm_vcpu *vcpu, gfn_t gfn)
+   { return false; }
 
 #endif /* CONFIG_KVM_INTROSPECTION */
 
diff --git a/virt/kvm/introspection/kvmi.c b/virt/kvm/introspection/kvmi.c
index 4ed145421d69..8ef7b3a4ef51 100644
--- a/virt/kvm/introspection/kvmi.c
+++ b

[PATCH v12 04/77] KVM: x86: add kvm_arch_vcpu_set_regs()

2021-10-06 Thread Adalbert Lazăr
From: Nicușor Cîțu 

This is needed for the KVMI_VCPU_SET_REGISTERS command, which allows
an introspection tool to override the kvm_regs structure for a specific
vCPU without clearing the pending exception.

Signed-off-by: Nicușor Cîțu 
Signed-off-by: Adalbert Lazăr 
---
 arch/x86/kvm/x86.c   | 13 ++---
 include/linux/kvm_host.h |  2 ++
 2 files changed, 12 insertions(+), 3 deletions(-)

diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index f7d09757b85f..bbcd256dc2f4 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -10104,8 +10104,15 @@ static void __set_regs(struct kvm_vcpu *vcpu, struct 
kvm_regs *regs)
 
kvm_rip_write(vcpu, regs->rip);
kvm_set_rflags(vcpu, regs->rflags | X86_EFLAGS_FIXED);
+}
+
+void kvm_arch_vcpu_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs,
+   bool clear_exception)
+{
+   __set_regs(vcpu, regs);
 
-   vcpu->arch.exception.pending = false;
+   if (clear_exception)
+   vcpu->arch.exception.pending = false;
 
kvm_make_request(KVM_REQ_EVENT, vcpu);
 }
@@ -10113,7 +10120,7 @@ static void __set_regs(struct kvm_vcpu *vcpu, struct 
kvm_regs *regs)
 int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
 {
vcpu_load(vcpu);
-   __set_regs(vcpu, regs);
+   kvm_arch_vcpu_set_regs(vcpu, regs, true);
vcpu_put(vcpu);
return 0;
 }
@@ -10601,7 +10608,7 @@ static void store_regs(struct kvm_vcpu *vcpu)
 static int sync_regs(struct kvm_vcpu *vcpu)
 {
if (vcpu->run->kvm_dirty_regs & KVM_SYNC_X86_REGS) {
-   __set_regs(vcpu, >run->s.regs.regs);
+   kvm_arch_vcpu_set_regs(vcpu, >run->s.regs.regs, true);
vcpu->run->kvm_dirty_regs &= ~KVM_SYNC_X86_REGS;
}
if (vcpu->run->kvm_dirty_regs & KVM_SYNC_X86_SREGS) {
diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index 7bc45e1879db..712642be3307 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -1028,6 +1028,8 @@ int kvm_arch_vcpu_ioctl_translate(struct kvm_vcpu *vcpu,
 int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs);
 void kvm_arch_vcpu_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs);
 int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs);
+void kvm_arch_vcpu_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs,
+   bool clear_exception);
 int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu,
  struct kvm_sregs *sregs);
 void kvm_arch_vcpu_get_sregs(struct kvm_vcpu *vcpu,
___
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization

[PATCH v12 68/77] KVM: introspection: add KVMI_VCPU_CONTROL_MSR and KVMI_VCPU_EVENT_MSR

2021-10-06 Thread Adalbert Lazăr
From: Mihai Donțu 

This command is used to enable/disable introspection for a specific
MSR. The KVMI_VCPU_EVENT_MSR event is sent when the tracked MSR is going
to be changed. The introspection tool can respond by allowing the guest
to continue with normal execution or by discarding the change.

This is meant to prevent malicious changes to MSRs
such as MSR_IA32_SYSENTER_EIP.

Signed-off-by: Mihai Donțu 
Co-developed-by: Nicușor Cîțu 
Signed-off-by: Nicușor Cîțu 
Co-developed-by: Adalbert Lazăr 
Signed-off-by: Adalbert Lazăr 
---
 Documentation/virt/kvm/kvmi.rst   |  79 +++
 arch/x86/include/asm/kvmi_host.h  |  12 ++
 arch/x86/include/uapi/asm/kvmi.h  |  18 +++
 arch/x86/kvm/kvmi.c   | 126 ++
 arch/x86/kvm/kvmi.h   |   3 +
 arch/x86/kvm/kvmi_msg.c   |  52 
 arch/x86/kvm/x86.c|   3 +
 include/uapi/linux/kvmi.h |   2 +
 .../testing/selftests/kvm/x86_64/kvmi_test.c  | 111 +++
 9 files changed, 406 insertions(+)

diff --git a/Documentation/virt/kvm/kvmi.rst b/Documentation/virt/kvm/kvmi.rst
index e24a93e93fe8..b39d6ac47c9a 100644
--- a/Documentation/virt/kvm/kvmi.rst
+++ b/Documentation/virt/kvm/kvmi.rst
@@ -542,6 +542,7 @@ the following events::
KVMI_VCPU_EVENT_CR
KVMI_VCPU_EVENT_DESCRIPTOR
KVMI_VCPU_EVENT_HYPERCALL
+   KVMI_VCPU_EVENT_MSR
KVMI_VCPU_EVENT_XSETBV
 
 When an event is enabled, the introspection tool is notified and
@@ -903,6 +904,48 @@ Returns the guest memory type for a specific guest 
physical address (``gpa``).
 * -KVM_EINVAL - the padding is not zero
 * -KVM_EAGAIN - the selected vCPU can't be introspected yet
 
+21. KVMI_VCPU_CONTROL_MSR
+-
+
+:Architectures: x86
+:Versions: >= 1
+:Parameters:
+
+::
+
+   struct kvmi_vcpu_hdr;
+   struct kvmi_vcpu_control_msr {
+   __u8 enable;
+   __u8 padding1;
+   __u16 padding2;
+   __u32 msr;
+   };
+
+:Returns:
+
+::
+
+   struct kvmi_error_code
+
+Enables/disables introspection for a specific MSR and must be used
+in addition to *KVMI_VCPU_CONTROL_EVENTS* with the *KVMI_VCPU_EVENT_MSR*
+ID set.
+
+Currently, only MSRs within the following two ranges are supported. Trying
+to control events for any other register will fail with -KVM_EINVAL::
+
+   0  ... 0x1fff
+   0xc000 ... 0xc0001fff
+
+:Errors:
+
+* -KVM_EINVAL - the selected vCPU is invalid
+* -KVM_EINVAL - the specified MSR is invalid
+* -KVM_EINVAL - the padding is not zero
+* -KVM_EAGAIN - the selected vCPU can't be introspected yet
+* -KVM_EPERM  - the interception of the selected MSR is disallowed
+from userspace (KVM_X86_SET_MSR_FILTER)
+
 Events
 ==
 
@@ -1241,3 +1284,39 @@ introspection tool.
KVMI_DESC_TR
 
 ``write`` is 1 if the descriptor was written, 0 otherwise.
+
+9. KVMI_VCPU_EVENT_MSR
+--
+
+:Architectures: x86
+:Versions: >= 1
+:Actions: CONTINUE, CRASH
+:Parameters:
+
+::
+
+   struct kvmi_vcpu_event;
+   struct kvmi_vcpu_event_msr {
+   __u32 msr;
+   __u32 padding;
+   __u64 old_value;
+   __u64 new_value;
+   };
+
+:Returns:
+
+::
+
+   struct kvmi_vcpu_hdr;
+   struct kvmi_vcpu_event_reply;
+   struct kvmi_vcpu_event_msr_reply {
+   __u64 new_val;
+   };
+
+This event is sent when a model specific register is going to be changed
+and the introspection has been enabled for this event and for this specific
+register (see **KVMI_VCPU_CONTROL_EVENTS**).
+
+``kvmi_vcpu_event`` (with the vCPU state), the MSR number (``msr``),
+the old value (``old_value``) and the new value (``new_value``) are sent
+to the introspection tool. The *CONTINUE* action will set the ``new_val``.
diff --git a/arch/x86/include/asm/kvmi_host.h b/arch/x86/include/asm/kvmi_host.h
index a872277eba67..5a4fc5b80907 100644
--- a/arch/x86/include/asm/kvmi_host.h
+++ b/arch/x86/include/asm/kvmi_host.h
@@ -4,7 +4,10 @@
 
 #include 
 
+struct msr_data;
+
 #define KVMI_NUM_CR 5
+#define KVMI_NUM_MSR 0x2000
 
 struct kvmi_monitor_interception {
bool kvmi_intercepted;
@@ -18,6 +21,12 @@ struct kvmi_interception {
struct kvmi_monitor_interception breakpoint;
struct kvmi_monitor_interception cr3w;
struct kvmi_monitor_interception descriptor;
+   struct {
+   struct {
+   DECLARE_BITMAP(low, KVMI_NUM_MSR);
+   DECLARE_BITMAP(high, KVMI_NUM_MSR);
+   } kvmi_mask;
+   } msrw;
 };
 
 struct kvm_vcpu_arch_introspection {
@@ -51,6 +60,7 @@ void kvmi_xsetbv_event(struct kvm_vcpu *vcpu, u8 xcr,
   u64 old_value, u64 new_value);
 bool kvmi_monitor_desc_intercept(struct kvm_vcpu *vcpu, bool enable);
 bool kvmi_descriptor_event(

[PATCH v12 11/77] KVM: x86: add kvm_x86_ops.control_desc_intercept()

2021-10-06 Thread Adalbert Lazăr
This function is needed to intercept descriptor-table registers access.

Signed-off-by: Adalbert Lazăr 
---
 arch/x86/include/asm/kvm-x86-ops.h |  1 +
 arch/x86/include/asm/kvm_host.h|  1 +
 arch/x86/kvm/svm/svm.c | 26 ++
 arch/x86/kvm/vmx/vmx.c | 15 +--
 4 files changed, 41 insertions(+), 2 deletions(-)

diff --git a/arch/x86/include/asm/kvm-x86-ops.h 
b/arch/x86/include/asm/kvm-x86-ops.h
index 9a962bd098d0..dd08f3120f8f 100644
--- a/arch/x86/include/asm/kvm-x86-ops.h
+++ b/arch/x86/include/asm/kvm-x86-ops.h
@@ -125,6 +125,7 @@ KVM_X86_OP(bp_intercepted)
 KVM_X86_OP(control_cr3_intercept)
 KVM_X86_OP(cr3_write_intercepted)
 KVM_X86_OP(desc_ctrl_supported)
+KVM_X86_OP(control_desc_intercept)
 
 #undef KVM_X86_OP
 #undef KVM_X86_OP_NULL
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 1acaa27ffd8f..2e5ddb18804b 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -1350,6 +1350,7 @@ struct kvm_x86_ops {
void (*get_gdt)(struct kvm_vcpu *vcpu, struct desc_ptr *dt);
void (*set_gdt)(struct kvm_vcpu *vcpu, struct desc_ptr *dt);
bool (*desc_ctrl_supported)(void);
+   void (*control_desc_intercept)(struct kvm_vcpu *vcpu, bool enable);
void (*sync_dirty_debug_regs)(struct kvm_vcpu *vcpu);
void (*set_dr7)(struct kvm_vcpu *vcpu, unsigned long value);
void (*cache_reg)(struct kvm_vcpu *vcpu, enum kvm_reg reg);
diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c
index e5cd8813cca6..0d46f5aa20c3 100644
--- a/arch/x86/kvm/svm/svm.c
+++ b/arch/x86/kvm/svm/svm.c
@@ -1780,6 +1780,31 @@ static bool svm_desc_ctrl_supported(void)
return true;
 }
 
+static void svm_control_desc_intercept(struct kvm_vcpu *vcpu, bool enable)
+{
+   struct vcpu_svm *svm = to_svm(vcpu);
+
+   if (enable) {
+   svm_set_intercept(svm, INTERCEPT_STORE_IDTR);
+   svm_set_intercept(svm, INTERCEPT_STORE_GDTR);
+   svm_set_intercept(svm, INTERCEPT_STORE_LDTR);
+   svm_set_intercept(svm, INTERCEPT_STORE_TR);
+   svm_set_intercept(svm, INTERCEPT_LOAD_IDTR);
+   svm_set_intercept(svm, INTERCEPT_LOAD_GDTR);
+   svm_set_intercept(svm, INTERCEPT_LOAD_LDTR);
+   svm_set_intercept(svm, INTERCEPT_LOAD_TR);
+   } else {
+   svm_clr_intercept(svm, INTERCEPT_STORE_IDTR);
+   svm_clr_intercept(svm, INTERCEPT_STORE_GDTR);
+   svm_clr_intercept(svm, INTERCEPT_STORE_LDTR);
+   svm_clr_intercept(svm, INTERCEPT_STORE_TR);
+   svm_clr_intercept(svm, INTERCEPT_LOAD_IDTR);
+   svm_clr_intercept(svm, INTERCEPT_LOAD_GDTR);
+   svm_clr_intercept(svm, INTERCEPT_LOAD_LDTR);
+   svm_clr_intercept(svm, INTERCEPT_LOAD_TR);
+   }
+}
+
 void svm_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0)
 {
struct vcpu_svm *svm = to_svm(vcpu);
@@ -4666,6 +4691,7 @@ static struct kvm_x86_ops svm_x86_ops __initdata = {
.get_gdt = svm_get_gdt,
.set_gdt = svm_set_gdt,
.desc_ctrl_supported = svm_desc_ctrl_supported,
+   .control_desc_intercept = svm_control_desc_intercept,
.set_dr7 = svm_set_dr7,
.sync_dirty_debug_regs = svm_sync_dirty_debug_regs,
.cache_reg = svm_cache_reg,
diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
index 026d678b82b9..d0f02d52b401 100644
--- a/arch/x86/kvm/vmx/vmx.c
+++ b/arch/x86/kvm/vmx/vmx.c
@@ -3160,6 +3160,16 @@ static void vmx_load_mmu_pgd(struct kvm_vcpu *vcpu, 
hpa_t root_hpa,
vmcs_writel(GUEST_CR3, guest_cr3);
 }
 
+static void vmx_control_desc_intercept(struct kvm_vcpu *vcpu, bool enable)
+{
+   struct vcpu_vmx *vmx = to_vmx(vcpu);
+
+   if (enable)
+   secondary_exec_controls_setbit(vmx, SECONDARY_EXEC_DESC);
+   else
+   secondary_exec_controls_clearbit(vmx, SECONDARY_EXEC_DESC);
+}
+
 static bool vmx_is_valid_cr4(struct kvm_vcpu *vcpu, unsigned long cr4)
 {
/*
@@ -3197,11 +3207,11 @@ void vmx_set_cr4(struct kvm_vcpu *vcpu, unsigned long 
cr4)
 
if (!boot_cpu_has(X86_FEATURE_UMIP) && vmx_umip_emulated()) {
if (cr4 & X86_CR4_UMIP) {
-   secondary_exec_controls_setbit(vmx, 
SECONDARY_EXEC_DESC);
+   vmx_control_desc_intercept(vcpu, true);
hw_cr4 &= ~X86_CR4_UMIP;
} else if (!is_guest_mode(vcpu) ||
!nested_cpu_has2(get_vmcs12(vcpu), 
SECONDARY_EXEC_DESC)) {
-   secondary_exec_controls_clearbit(vmx, 
SECONDARY_EXEC_DESC);
+   vmx_control_desc_intercept(vcpu, false);
}
}
 
@@ -7636,6 +7646,7 @@ static struct kvm_x86_ops vmx_x86_ops __initdata = {
.get_gdt = vmx_get_gdt,
.set_gdt = vmx_set_gdt,
.des

[PATCH v12 30/77] KVM: introspection: add hook/unhook ioctls

2021-10-06 Thread Adalbert Lazăr
On hook, a new thread is created to handle the messages coming from the
introspection tool (commands or event replies). The VM related commands
are handled by this thread, while the vCPU commands and events replies
are dispatched to the vCPU threads.

On unhook, the socket is shut down, which will signal: the receiving
thread to quit (because it might be blocked in recvmsg()) and the
introspection tool to clean up.

The mutex is used to protect the 'kvm->kvmi' pointer when accessed
through ioctls.

The reference counter is incremented by the receiving thread (for
its entire life time) and by the vCPU threads while sending events or
handling commands.

The completion objects is set when the reference counter reaches zero,
allowing the unhook process to continue and free the introspection
structures.

Co-developed-by: Mircea Cîrjaliu 
Signed-off-by: Mircea Cîrjaliu 
Co-developed-by: Nicușor Cîțu 
Signed-off-by: Nicușor Cîțu 
Signed-off-by: Adalbert Lazăr 
---
 Documentation/virt/kvm/api.rst|  64 +++
 arch/x86/include/asm/kvmi_host.h  |   8 +
 arch/x86/kvm/Makefile |   2 +-
 arch/x86/kvm/x86.c|   5 +
 include/linux/kvm_host.h  |   5 +
 include/linux/kvmi_host.h |  18 ++
 include/uapi/linux/kvm.h  |  10 ++
 include/uapi/linux/kvmi.h |  13 ++
 tools/testing/selftests/kvm/Makefile  |   1 +
 .../testing/selftests/kvm/x86_64/kvmi_test.c  |  94 ++
 virt/kvm/introspection/kvmi.c | 160 ++
 virt/kvm/introspection/kvmi_int.h |  10 ++
 virt/kvm/introspection/kvmi_msg.c |  39 +
 virt/kvm/kvm_main.c   |  21 +++
 14 files changed, 449 insertions(+), 1 deletion(-)
 create mode 100644 arch/x86/include/asm/kvmi_host.h
 create mode 100644 include/uapi/linux/kvmi.h
 create mode 100644 tools/testing/selftests/kvm/x86_64/kvmi_test.c
 create mode 100644 virt/kvm/introspection/kvmi_msg.c

diff --git a/Documentation/virt/kvm/api.rst b/Documentation/virt/kvm/api.rst
index 0c0bf26426b3..682380425ef6 100644
--- a/Documentation/virt/kvm/api.rst
+++ b/Documentation/virt/kvm/api.rst
@@ -5473,6 +5473,59 @@ the trailing ``'\0'``, is indicated by ``name_size`` in 
the header.
 The Stats Data block contains an array of 64-bit values in the same order
 as the descriptors in Descriptors block.
 
+4.134 KVM_INTROSPECTION_HOOK
+
+
+:Capability: KVM_CAP_INTROSPECTION
+:Architectures: x86
+:Type: vm ioctl
+:Parameters: struct kvm_introspection (in)
+:Returns: 0 on success, a negative value on error
+
+Errors:
+
+  == ==
+  ENOMEM the memory allocation failed
+  EEXIST the VM is already introspected
+  EINVAL the file descriptor doesn't correspond to an active socket
+  EINVAL the padding is not zero
+  EPERM  the introspection is disabled (kvm.introspection=0)
+  == ==
+
+This ioctl is used to enable the introspection of the current VM.
+
+::
+
+  struct kvm_introspection {
+   __s32 fd;
+   __u32 padding;
+   __u8 uuid[16];
+  };
+
+fd is the file descriptor of a socket connected to the introspection tool,
+
+padding must be zero (it might be used in the future),
+
+uuid is used for debug and error messages.
+
+4.135 KVM_INTROSPECTION_UNHOOK
+--
+
+:Capability: KVM_CAP_INTROSPECTION
+:Architectures: x86
+:Type: vm ioctl
+:Parameters: none
+:Returns: 0 on success, a negative value on error
+
+Errors:
+
+  == ==
+  EPERM  the introspection is disabled (kvm.introspection=0)
+  == ==
+
+This ioctl is used to free all introspection structures
+related to this VM.
+
 5. The kvm_run structure
 
 
@@ -7440,3 +7493,14 @@ The argument to KVM_ENABLE_CAP is also a bitmask, and 
must be a subset
 of the result of KVM_CHECK_EXTENSION.  KVM will forward to userspace
 the hypercalls whose corresponding bit is in the argument, and return
 ENOSYS for the others.
+
+8.35 KVM_CAP_INTROSPECTION
+--
+
+:Architectures: x86
+
+This capability indicates that KVM supports VM introspection
+and it is enabled.
+
+The KVM_CHECK_EXTENSION ioctl returns the introspection API version
+(a number larger than 0).
diff --git a/arch/x86/include/asm/kvmi_host.h b/arch/x86/include/asm/kvmi_host.h
new file mode 100644
index ..38c398262913
--- /dev/null
+++ b/arch/x86/include/asm/kvmi_host.h
@@ -0,0 +1,8 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_X86_KVMI_HOST_H
+#define _ASM_X86_KVMI_HOST_H
+
+struct kvm_arch_introspection {
+};
+
+#endif /* _ASM_X86_KVMI_HOST_H */
diff --git a/arch/x86/kvm/Makefile b/arch/

[PATCH v12 24/77] KVM: x86: page track: provide all callbacks with the guest virtual address

2021-10-06 Thread Adalbert Lazăr
From: Mihai Donțu 

This is needed because the emulator calls the page tracking code
irrespective of the current VM-exit reason or available information.

Signed-off-by: Mihai Donțu 
Signed-off-by: Adalbert Lazăr 
---
 arch/x86/include/asm/kvm_host.h   |  2 +-
 arch/x86/include/asm/kvm_page_track.h | 10 ++
 arch/x86/kvm/mmu/mmu.c|  2 +-
 arch/x86/kvm/mmu/page_track.c |  6 +++---
 arch/x86/kvm/x86.c| 16 
 drivers/gpu/drm/i915/gvt/kvmgt.c  |  2 +-
 6 files changed, 20 insertions(+), 18 deletions(-)

diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 43569a6fc776..692e55a5c312 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -1610,7 +1610,7 @@ void kvm_mmu_change_mmu_pages(struct kvm *kvm, unsigned 
long kvm_nr_mmu_pages);
 
 int load_pdptrs(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu, unsigned long cr3);
 
-int emulator_write_phys(struct kvm_vcpu *vcpu, gpa_t gpa,
+int emulator_write_phys(struct kvm_vcpu *vcpu, gpa_t gpa, gva_t gva,
  const void *val, int bytes);
 
 struct kvm_irq_mask_notifier {
diff --git a/arch/x86/include/asm/kvm_page_track.h 
b/arch/x86/include/asm/kvm_page_track.h
index 79d84a94f8eb..f981b6360de5 100644
--- a/arch/x86/include/asm/kvm_page_track.h
+++ b/arch/x86/include/asm/kvm_page_track.h
@@ -28,12 +28,14 @@ struct kvm_page_track_notifier_node {
 *
 * @vcpu: the vcpu where the write access happened.
 * @gpa: the physical address written by guest.
+* @gva: the virtual address written by guest.
 * @new: the data was written to the address.
 * @bytes: the written length.
 * @node: this node
 */
-   void (*track_write)(struct kvm_vcpu *vcpu, gpa_t gpa, const u8 *new,
-   int bytes, struct kvm_page_track_notifier_node 
*node);
+   void (*track_write)(struct kvm_vcpu *vcpu, gpa_t gpa, gva_t gva,
+   const u8 *new, int bytes,
+   struct kvm_page_track_notifier_node *node);
/*
 * It is called when memory slot is being moved or removed
 * users can drop write-protection for the pages in that memory slot
@@ -72,7 +74,7 @@ kvm_page_track_register_notifier(struct kvm *kvm,
 void
 kvm_page_track_unregister_notifier(struct kvm *kvm,
   struct kvm_page_track_notifier_node *n);
-void kvm_page_track_write(struct kvm_vcpu *vcpu, gpa_t gpa, const u8 *new,
- int bytes);
+void kvm_page_track_write(struct kvm_vcpu *vcpu, gpa_t gpa, gva_t gva,
+ const u8 *new, int bytes);
 void kvm_page_track_flush_slot(struct kvm *kvm, struct kvm_memory_slot *slot);
 #endif
diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c
index 24a9f4c3f5e7..a802c46d0e16 100644
--- a/arch/x86/kvm/mmu/mmu.c
+++ b/arch/x86/kvm/mmu/mmu.c
@@ -5115,7 +5115,7 @@ static u64 *get_written_sptes(struct kvm_mmu_page *sp, 
gpa_t gpa, int *nspte)
return spte;
 }
 
-static void kvm_mmu_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa,
+static void kvm_mmu_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa, gva_t gva,
  const u8 *new, int bytes,
  struct kvm_page_track_notifier_node *node)
 {
diff --git a/arch/x86/kvm/mmu/page_track.c b/arch/x86/kvm/mmu/page_track.c
index bb5d60bd4dbf..e0b1cdd3013e 100644
--- a/arch/x86/kvm/mmu/page_track.c
+++ b/arch/x86/kvm/mmu/page_track.c
@@ -281,8 +281,8 @@ EXPORT_SYMBOL_GPL(kvm_page_track_unregister_notifier);
  * The node should figure out if the written page is the one that node is
  * interested in by itself.
  */
-void kvm_page_track_write(struct kvm_vcpu *vcpu, gpa_t gpa, const u8 *new,
- int bytes)
+void kvm_page_track_write(struct kvm_vcpu *vcpu, gpa_t gpa, gva_t gva,
+ const u8 *new, int bytes)
 {
struct kvm_page_track_notifier_head *head;
struct kvm_page_track_notifier_node *n;
@@ -297,7 +297,7 @@ void kvm_page_track_write(struct kvm_vcpu *vcpu, gpa_t gpa, 
const u8 *new,
hlist_for_each_entry_srcu(n, >track_notifier_list, node,
srcu_read_lock_held(>track_srcu))
if (n->track_write)
-   n->track_write(vcpu, gpa, new, bytes, n);
+   n->track_write(vcpu, gpa, gva, new, bytes, n);
srcu_read_unlock(>track_srcu, idx);
 }
 
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index b01d865f6047..723ef3b7f95f 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -6592,7 +6592,7 @@ static int vcpu_mmio_gva_to_gpa(struct kvm_vcpu *vcpu, 
unsigned long gva,
return vcpu_is_mmio_gpa(vcpu, gva, *gpa, write);
 }
 
-int emulator_write_phys(struct kvm_vcpu *vcpu, gpa_t gpa,
+int emulator_write_phys(struct kvm_vcpu *vcpu, gpa_t gpa, gva_t gva,
   

[PATCH v12 74/77] KVM: introspection: add KVMI_VCPU_EVENT_SINGLESTEP

2021-10-06 Thread Adalbert Lazăr
From: Nicușor Cîțu 

This event is sent after each instruction when the singlestep has been
enabled for a vCPU.

Signed-off-by: Nicușor Cîțu 
Co-developed-by: Adalbert Lazăr 
Signed-off-by: Adalbert Lazăr 
---
 Documentation/virt/kvm/kvmi.rst   | 31 +++
 arch/x86/kvm/kvmi.c   |  1 +
 arch/x86/kvm/kvmi_msg.c   |  6 +++
 arch/x86/kvm/vmx/vmx.c|  6 +++
 include/linux/kvmi_host.h |  4 ++
 include/uapi/linux/kvmi.h |  6 +++
 .../testing/selftests/kvm/x86_64/kvmi_test.c  | 54 +--
 virt/kvm/introspection/kvmi.c | 43 +++
 virt/kvm/introspection/kvmi_int.h |  1 +
 virt/kvm/introspection/kvmi_msg.c | 17 ++
 10 files changed, 166 insertions(+), 3 deletions(-)

diff --git a/Documentation/virt/kvm/kvmi.rst b/Documentation/virt/kvm/kvmi.rst
index 7f70345ebaac..84922d327255 100644
--- a/Documentation/virt/kvm/kvmi.rst
+++ b/Documentation/virt/kvm/kvmi.rst
@@ -565,6 +565,7 @@ because these are sent as a result of certain commands (but 
they can be
 disallowed by the device manager) ::
 
KVMI_VCPU_EVENT_PAUSE
+   KVMI_VCPU_EVENT_SINGLESTEP
KVMI_VCPU_EVENT_TRAP
 
 The VM events (e.g. *KVMI_VM_EVENT_UNHOOK*) are controlled with
@@ -1044,8 +1045,12 @@ Enables/disables singlestep for the selected vCPU.
 The introspection tool should use *KVMI_GET_VERSION*, to check
 if the hardware supports singlestep (see **KVMI_GET_VERSION**).
 
+After every instruction, a *KVMI_VCPU_EVENT_SINGLESTEP* event is sent
+to the introspection tool.
+
 :Errors:
 
+* -KVM_EPERM  - the *KVMI_VCPU_EVENT_SINGLESTEP* event is disallowed
 * -KVM_EOPNOTSUPP - the hardware doesn't support singlestep
 * -KVM_EINVAL - the padding is not zero
 * -KVM_EAGAIN - the selected vCPU can't be introspected yet
@@ -1489,3 +1494,29 @@ emulation).
 The *RETRY* action is used by the introspection tool to retry the
 execution of the current instruction, usually because it changed the
 instruction pointer or the page restrictions.
+
+11. KVMI_VCPU_EVENT_SINGLESTEP
+--
+
+:Architectures: x86
+:Versions: >= 1
+:Actions: CONTINUE, CRASH
+:Parameters:
+
+::
+
+   struct kvmi_vcpu_event;
+
+:Returns:
+
+::
+
+   struct kvmi_vcpu_hdr;
+   struct kvmi_vcpu_event_reply;
+   struct kvmi_vcpu_event_singlestep {
+   __u8 failed;
+   __u8 padding[7];
+   };
+
+This event is sent after each instruction, as long as the singlestep is
+enabled for the current vCPU (see **KVMI_VCPU_CONTROL_SINGLESTEP**).
diff --git a/arch/x86/kvm/kvmi.c b/arch/x86/kvm/kvmi.c
index e26a0eee1592..58c2debd8815 100644
--- a/arch/x86/kvm/kvmi.c
+++ b/arch/x86/kvm/kvmi.c
@@ -20,6 +20,7 @@ void kvmi_arch_init_vcpu_events_mask(unsigned long *supported)
set_bit(KVMI_VCPU_EVENT_DESCRIPTOR, supported);
set_bit(KVMI_VCPU_EVENT_MSR, supported);
set_bit(KVMI_VCPU_EVENT_PF, supported);
+   set_bit(KVMI_VCPU_EVENT_SINGLESTEP, supported);
set_bit(KVMI_VCPU_EVENT_TRAP, supported);
set_bit(KVMI_VCPU_EVENT_XSETBV, supported);
 }
diff --git a/arch/x86/kvm/kvmi_msg.c b/arch/x86/kvm/kvmi_msg.c
index 6d3980e18281..ea38eb7ccb7c 100644
--- a/arch/x86/kvm/kvmi_msg.c
+++ b/arch/x86/kvm/kvmi_msg.c
@@ -284,6 +284,12 @@ static int handle_vcpu_control_singlestep(const struct 
kvmi_vcpu_msg_job *job,
struct kvm_vcpu *vcpu = job->vcpu;
int ec = 0;
 
+   if (!kvmi_is_event_allowed(KVMI(vcpu->kvm),
+  KVMI_VCPU_EVENT_SINGLESTEP)) {
+   ec = -KVM_EPERM;
+   goto reply;
+   }
+
if (non_zero_padding(req->padding, ARRAY_SIZE(req->padding)) ||
req->enable > 1) {
ec = -KVM_EINVAL;
diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
index 1df0a29d1d8d..4f5b04fb1ede 100644
--- a/arch/x86/kvm/vmx/vmx.c
+++ b/arch/x86/kvm/vmx/vmx.c
@@ -5587,6 +5587,7 @@ static int handle_pause(struct kvm_vcpu *vcpu)
 
 static int handle_monitor_trap(struct kvm_vcpu *vcpu)
 {
+   kvmi_singlestep_done(vcpu);
return 1;
 }
 
@@ -6137,6 +6138,11 @@ static int __vmx_handle_exit(struct kvm_vcpu *vcpu, 
fastpath_t exit_fastpath)
}
}
 
+   if (kvmi_vcpu_running_singlestep(vcpu) &&
+   exit_reason.basic != EXIT_REASON_EPT_VIOLATION &&
+   exit_reason.basic != EXIT_REASON_MONITOR_TRAP_FLAG)
+   kvmi_singlestep_failed(vcpu);
+
if (exit_fastpath != EXIT_FASTPATH_NONE)
return 1;
 
diff --git a/include/linux/kvmi_host.h b/include/linux/kvmi_host.h
index e2103ab9d0d5..ec38e434c8e9 100644
--- a/include/linux/kvmi_host.h
+++ b/include/linux/kvmi_host.h
@@ -81,6 +81,8 @@ void kvmi_handle_requests(struct kvm_vcpu *vcpu);
 bool kvmi_hypercall_event(struct kvm_vcpu *vcpu);
 bool kvmi_breakpoint_event(struct

[PATCH v12 50/77] KVM: introspection: add KVMI_VCPU_GET_REGISTERS

2021-10-06 Thread Adalbert Lazăr
From: Mihai Donțu 

This command is used to get kvm_regs and kvm_sregs structures,
plus a list of struct kvm_msrs from a specific vCPU.

While the kvm_regs and kvm_sregs structures are included with every
event, this command allows reading any MSR.

Signed-off-by: Mihai Donțu 
Co-developed-by: Adalbert Lazăr 
Signed-off-by: Adalbert Lazăr 
---
 Documentation/virt/kvm/kvmi.rst   | 44 
 arch/x86/include/uapi/asm/kvmi.h  | 15 
 arch/x86/kvm/kvmi.c   | 25 +++
 arch/x86/kvm/kvmi.h   |  9 +++
 arch/x86/kvm/kvmi_msg.c   | 72 ++-
 include/uapi/linux/kvmi.h |  1 +
 .../testing/selftests/kvm/x86_64/kvmi_test.c  | 59 +++
 7 files changed, 224 insertions(+), 1 deletion(-)
 create mode 100644 arch/x86/kvm/kvmi.h

diff --git a/Documentation/virt/kvm/kvmi.rst b/Documentation/virt/kvm/kvmi.rst
index 892b960d978d..3a77c7bef18a 100644
--- a/Documentation/virt/kvm/kvmi.rst
+++ b/Documentation/virt/kvm/kvmi.rst
@@ -557,6 +557,50 @@ the *KVMI_VM_CONTROL_EVENTS* command.
 * -KVM_EPERM - the access is disallowed (use *KVMI_VM_CHECK_EVENT* first)
 * -KVM_EAGAIN - the selected vCPU can't be introspected yet
 
+11. KVMI_VCPU_GET_REGISTERS
+---
+
+:Architectures: x86
+:Versions: >= 1
+:Parameters:
+
+::
+
+   struct kvmi_vcpu_hdr;
+   struct kvmi_vcpu_get_registers {
+   __u16 nmsrs;
+   __u16 padding1;
+   __u32 padding2;
+   __u32 msrs_idx[0];
+   };
+
+:Returns:
+
+::
+
+   struct kvmi_error_code;
+   struct kvmi_vcpu_get_registers_reply {
+   __u32 mode;
+   __u32 padding;
+   struct kvm_regs regs;
+   struct kvm_sregs sregs;
+   struct kvm_msrs msrs;
+   };
+
+For the given vCPU and the ``nmsrs`` sized array of MSRs registers,
+returns the current vCPU mode (in bytes: 2, 4 or 8), the general purpose
+registers, the special registers and the requested set of MSRs.
+
+:Errors:
+
+* -KVM_EINVAL - the selected vCPU is invalid
+* -KVM_EINVAL - one of the indicated MSRs is invalid
+* -KVM_EINVAL - the padding is not zero
+* -KVM_EINVAL - the reply size is larger than
+kvmi_get_version_reply.max_msg_size (too many MSRs)
+* -KVM_EAGAIN - the selected vCPU can't be introspected yet
+* -KVM_ENOMEM - there is not enough memory to allocate the reply
+
 Events
 ==
 
diff --git a/arch/x86/include/uapi/asm/kvmi.h b/arch/x86/include/uapi/asm/kvmi.h
index 9d9df09d381a..11835bf9bdc6 100644
--- a/arch/x86/include/uapi/asm/kvmi.h
+++ b/arch/x86/include/uapi/asm/kvmi.h
@@ -30,4 +30,19 @@ struct kvmi_vcpu_event_arch {
} msrs;
 };
 
+struct kvmi_vcpu_get_registers {
+   __u16 nmsrs;
+   __u16 padding1;
+   __u32 padding2;
+   __u32 msrs_idx[0];
+};
+
+struct kvmi_vcpu_get_registers_reply {
+   __u32 mode;
+   __u32 padding;
+   struct kvm_regs regs;
+   struct kvm_sregs sregs;
+   struct kvm_msrs msrs;
+};
+
 #endif /* _UAPI_ASM_X86_KVMI_H */
diff --git a/arch/x86/kvm/kvmi.c b/arch/x86/kvm/kvmi.c
index 90c5be3c462f..bc40d7dcc456 100644
--- a/arch/x86/kvm/kvmi.c
+++ b/arch/x86/kvm/kvmi.c
@@ -93,3 +93,28 @@ void kvmi_arch_setup_vcpu_event(struct kvm_vcpu *vcpu,
ev->arch.mode = kvmi_vcpu_mode(vcpu, >sregs);
kvmi_get_msrs(vcpu, event);
 }
+
+int kvmi_arch_cmd_vcpu_get_registers(struct kvm_vcpu *vcpu,
+   const struct kvmi_vcpu_get_registers *req,
+   struct kvmi_vcpu_get_registers_reply *rpl)
+{
+   struct msr_data m = {.host_initiated = true};
+   int k, err = 0;
+
+   kvm_arch_vcpu_get_regs(vcpu, >regs);
+   kvm_arch_vcpu_get_sregs(vcpu, >sregs);
+   rpl->mode = kvmi_vcpu_mode(vcpu, >sregs);
+   rpl->msrs.nmsrs = req->nmsrs;
+
+   for (k = 0; k < req->nmsrs && !err; k++) {
+   m.index = req->msrs_idx[k];
+
+   err = static_call(kvm_x86_get_msr)(vcpu, );
+   if (!err) {
+   rpl->msrs.entries[k].index = m.index;
+   rpl->msrs.entries[k].data = m.data;
+   }
+   }
+
+   return err ? -KVM_EINVAL : 0;
+}
diff --git a/arch/x86/kvm/kvmi.h b/arch/x86/kvm/kvmi.h
new file mode 100644
index ..7aab4aaabcda
--- /dev/null
+++ b/arch/x86/kvm/kvmi.h
@@ -0,0 +1,9 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef ARCH_X86_KVM_KVMI_H
+#define ARCH_X86_KVM_KVMI_H
+
+int kvmi_arch_cmd_vcpu_get_registers(struct kvm_vcpu *vcpu,
+   const struct kvmi_vcpu_get_registers *req,
+   struct kvmi_vcpu_get_registers_reply *rpl);
+
+#endif
diff --git a/arch/x86/kvm/kvmi_msg.c b/arch/x86/kvm/kvmi_msg.c
index 0d83c47f3758..ab4b99b66acc 100644
--- a/arch/x86/kvm/kvmi_msg.c
+++ b/arch/x86/kvm/kvmi_msg

[PATCH v12 70/77] KVM: introspection: add KVMI_VM_SET_PAGE_ACCESS

2021-10-06 Thread Adalbert Lazăr
From: Mihai Donțu 

This command sets the spte access bits (rwx) for an array of guest
physical addresses (through the page tracking subsystem).

These GPAs, with the requested access bits, are also kept in a radix
tree in order to filter out the #PF events which are of no interest to
the introspection tool and to reapply the settings when a memory slot
is moved.

Signed-off-by: Mihai Donțu 
Co-developed-by: Adalbert Lazăr 
Signed-off-by: Adalbert Lazăr 
---
 Documentation/virt/kvm/kvmi.rst   |  59 +
 arch/x86/include/asm/kvm_host.h   |   2 +
 arch/x86/include/asm/kvmi_host.h  |   8 ++
 arch/x86/kvm/kvmi.c   |  42 ++
 include/linux/kvmi_host.h |   3 +
 include/uapi/linux/kvmi.h |  20 +++
 .../testing/selftests/kvm/x86_64/kvmi_test.c  |  50 
 virt/kvm/introspection/kvmi.c | 120 +-
 virt/kvm/introspection/kvmi_int.h |  10 ++
 virt/kvm/introspection/kvmi_msg.c |  59 +
 10 files changed, 372 insertions(+), 1 deletion(-)

diff --git a/Documentation/virt/kvm/kvmi.rst b/Documentation/virt/kvm/kvmi.rst
index b39d6ac47c9a..02b9f0a240c0 100644
--- a/Documentation/virt/kvm/kvmi.rst
+++ b/Documentation/virt/kvm/kvmi.rst
@@ -946,6 +946,65 @@ to control events for any other register will fail with 
-KVM_EINVAL::
 * -KVM_EPERM  - the interception of the selected MSR is disallowed
 from userspace (KVM_X86_SET_MSR_FILTER)
 
+22. KVMI_VM_SET_PAGE_ACCESS
+---
+
+:Architectures: x86
+:Versions: >= 1
+:Parameters:
+
+::
+
+   struct kvmi_vm_set_page_access {
+   __u16 count;
+   __u16 padding1;
+   __u32 padding2;
+   struct kvmi_page_access_entry entries[0];
+   };
+
+where::
+
+   struct kvmi_page_access_entry {
+   __u64 gpa;
+   __u8 access;
+   __u8 padding[7];
+   };
+
+
+:Returns:
+
+::
+
+   struct kvmi_error_code
+
+Sets the access bits (rwx) for an array of ``count`` guest physical
+addresses (``gpa``).
+
+The valid access bits are::
+
+   KVMI_PAGE_ACCESS_R
+   KVMI_PAGE_ACCESS_W
+   KVMI_PAGE_ACCESS_X
+
+
+The command will fail with -KVM_EINVAL if any of the specified combination
+of access bits is not supported or the address (``gpa``) is not valid
+(visible).
+
+The command will try to apply all changes and return the first error if
+some failed. The introspection tool should handle the rollback.
+
+In order to 'forget' an address, all three bits ('rwx') must be set.
+
+:Errors:
+
+* -KVM_EINVAL - the specified access bits combination is invalid
+* -KVM_EINVAL - the address is not valid
+* -KVM_EINVAL - the padding is not zero
+* -KVM_EINVAL - the message size is invalid
+* -KVM_EAGAIN - the selected vCPU can't be introspected yet
+* -KVM_ENOMEM - there is not enough memory to add the page tracking structures
+
 Events
 ==
 
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index f1e9adc24025..d0ce63217502 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -55,6 +55,8 @@
 /* memory slots that are not exposed to userspace */
 #define KVM_PRIVATE_MEM_SLOTS 3
 
+#include 
+
 #define KVM_HALT_POLL_NS_DEFAULT 20
 
 #define KVM_IRQCHIP_NUM_PINS  KVM_IOAPIC_NUM_PINS
diff --git a/arch/x86/include/asm/kvmi_host.h b/arch/x86/include/asm/kvmi_host.h
index 8822f0310156..3e749208b8a1 100644
--- a/arch/x86/include/asm/kvmi_host.h
+++ b/arch/x86/include/asm/kvmi_host.h
@@ -2,6 +2,7 @@
 #ifndef _ASM_X86_KVMI_HOST_H
 #define _ASM_X86_KVMI_HOST_H
 
+#include 
 #include 
 
 struct msr_data;
@@ -54,6 +55,13 @@ struct kvm_vcpu_arch_introspection {
 struct kvm_arch_introspection {
 };
 
+#define KVMI_MEM_SLOTS_NUM SHRT_MAX
+#define SLOTS_SIZE BITS_TO_LONGS(KVMI_MEM_SLOTS_NUM)
+
+struct kvmi_arch_mem_access {
+   unsigned long active[KVM_PAGE_TRACK_MAX][SLOTS_SIZE];
+};
+
 #ifdef CONFIG_KVM_INTROSPECTION
 
 bool kvmi_monitor_bp_intercept(struct kvm_vcpu *vcpu, u32 dbg);
diff --git a/arch/x86/kvm/kvmi.c b/arch/x86/kvm/kvmi.c
index 4e25ffc3d131..73eae96d2167 100644
--- a/arch/x86/kvm/kvmi.c
+++ b/arch/x86/kvm/kvmi.c
@@ -12,6 +12,8 @@
 
 void kvmi_arch_init_vcpu_events_mask(unsigned long *supported)
 {
+   BUILD_BUG_ON(KVM_MEM_SLOTS_NUM != KVMI_MEM_SLOTS_NUM);
+
set_bit(KVMI_VCPU_EVENT_BREAKPOINT, supported);
set_bit(KVMI_VCPU_EVENT_CR, supported);
set_bit(KVMI_VCPU_EVENT_HYPERCALL, supported);
@@ -920,3 +922,43 @@ bool kvmi_msr_event(struct kvm_vcpu *vcpu, struct msr_data 
*msr)
 
return ret;
 }
+
+static const struct {
+   unsigned int allow_bit;
+   enum kvm_page_track_mode track_mode;
+} track_modes[] = {
+   { KVMI_PAGE_ACCESS_R, KVM_PAGE_TRACK_PREREAD },
+   { KVMI_PAGE_ACCESS_W, KVM_PAGE_TRACK_PREWRITE },
+   { KVMI_PAGE_ACCESS_X, KVM_PAGE_TRACK_PREEXEC },
+};
+
+v

[PATCH v12 01/77] KVM: UAPI: add error codes used by the VM introspection code

2021-10-06 Thread Adalbert Lazăr
These new error codes help the introspection tool to identify the cause
of the introspection command failure and to recover from some error
cases or to give more information to the user.

Signed-off-by: Adalbert Lazăr 
---
 include/uapi/linux/kvm_para.h | 4 
 1 file changed, 4 insertions(+)

diff --git a/include/uapi/linux/kvm_para.h b/include/uapi/linux/kvm_para.h
index 960c7e93d1a9..16a867910459 100644
--- a/include/uapi/linux/kvm_para.h
+++ b/include/uapi/linux/kvm_para.h
@@ -17,6 +17,10 @@
 #define KVM_E2BIG  E2BIG
 #define KVM_EPERM  EPERM
 #define KVM_EOPNOTSUPP 95
+#define KVM_EAGAIN 11
+#define KVM_ENOENT ENOENT
+#define KVM_ENOMEM ENOMEM
+#define KVM_EBUSY  EBUSY
 
 #define KVM_HC_VAPIC_POLL_IRQ  1
 #define KVM_HC_MMU_OP  2
___
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization

[PATCH v12 58/77] KVM: introspection: add KVMI_VCPU_CONTROL_CR and KVMI_VCPU_EVENT_CR

2021-10-06 Thread Adalbert Lazăr
From: Mihai Donțu 

Using the KVMI_VCPU_CONTROL_CR command, the introspection tool subscribes
to KVMI_VCPU_EVENT_CR events that will be sent when a control register
(CR0, CR3 or CR4) is going to be changed.

Signed-off-by: Mihai Donțu 
Co-developed-by: Adalbert Lazăr 
Signed-off-by: Adalbert Lazăr 
---
 Documentation/virt/kvm/kvmi.rst   |  73 +
 arch/x86/include/asm/kvmi_host.h  |  12 +++
 arch/x86/include/uapi/asm/kvmi.h  |  18 
 arch/x86/kvm/kvmi.c   |  79 ++
 arch/x86/kvm/kvmi.h   |   4 +
 arch/x86/kvm/kvmi_msg.c   |  44 
 arch/x86/kvm/vmx/vmx.c|   6 +-
 arch/x86/kvm/x86.c|  14 ++-
 include/uapi/linux/kvmi.h |   2 +
 .../testing/selftests/kvm/x86_64/kvmi_test.c  | 100 ++
 virt/kvm/introspection/kvmi.c |   2 +-
 virt/kvm/introspection/kvmi_int.h |   2 +
 12 files changed, 351 insertions(+), 5 deletions(-)

diff --git a/Documentation/virt/kvm/kvmi.rst b/Documentation/virt/kvm/kvmi.rst
index 89c3320f4bd5..a4705acddeb2 100644
--- a/Documentation/virt/kvm/kvmi.rst
+++ b/Documentation/virt/kvm/kvmi.rst
@@ -539,6 +539,7 @@ Enables/disables vCPU introspection events. This command 
can be used with
 the following events::
 
KVMI_VCPU_EVENT_BREAKPOINT
+   KVMI_VCPU_EVENT_CR
KVMI_VCPU_EVENT_HYPERCALL
 
 When an event is enabled, the introspection tool is notified and
@@ -701,6 +702,40 @@ interceptions). By default it is enabled.
 * -KVM_EINVAL - the padding is not zero
 * -KVM_EINVAL - ``enable`` is not 1 or 0
 
+15. KVMI_VCPU_CONTROL_CR
+
+
+:Architectures: x86
+:Versions: >= 1
+:Parameters:
+
+::
+
+   struct kvmi_vcpu_hdr;
+   struct kvmi_vcpu_control_cr {
+   __u8 cr;
+   __u8 enable;
+   __u16 padding1;
+   __u32 padding2;
+   };
+
+:Returns:
+
+::
+
+   struct kvmi_error_code
+
+Enables/disables introspection for a specific control register and must
+be used in addition to *KVMI_VCPU_CONTROL_EVENTS* with the *KVMI_VCPU_EVENT_CR*
+ID set.
+
+:Errors:
+
+* -KVM_EINVAL - the selected vCPU is invalid
+* -KVM_EINVAL - the specified control register is not CR0, CR3 or CR4
+* -KVM_EINVAL - the padding is not zero
+* -KVM_EAGAIN - the selected vCPU can't be introspected yet
+
 Events
 ==
 
@@ -893,3 +928,41 @@ before returning this action.
 
 The *CONTINUE* action will cause the breakpoint exception to be reinjected
 (the OS will handle it).
+
+5. KVMI_VCPU_EVENT_CR
+-
+
+:Architectures: x86
+:Versions: >= 1
+:Actions: CONTINUE, CRASH
+:Parameters:
+
+::
+
+   struct kvmi_event_hdr;
+   struct kvmi_vcpu_event;
+   struct kvmi_vcpu_event_cr {
+   __u8 cr;
+   __u8 padding[7];
+   __u64 old_value;
+   __u64 new_value;
+   };
+
+:Returns:
+
+::
+
+   struct kvmi_vcpu_hdr;
+   struct kvmi_vcpu_event_reply;
+   struct kvmi_vcpu_event_cr_reply {
+   __u64 new_val;
+   };
+
+This event is sent when a control register is going to be changed and the
+introspection has been enabled for this event and for this specific
+register (see **KVMI_VCPU_CONTROL_EVENTS**).
+
+``kvmi_vcpu_event`` (with the vCPU state), the control register number
+(``cr``), the old value (``old_value``) and the new value (``new_value``)
+are sent to the introspection tool. The *CONTINUE* action will set the
+``new_val``.
diff --git a/arch/x86/include/asm/kvmi_host.h b/arch/x86/include/asm/kvmi_host.h
index 161d1ae5a7cf..7613088d0ae2 100644
--- a/arch/x86/include/asm/kvmi_host.h
+++ b/arch/x86/include/asm/kvmi_host.h
@@ -4,6 +4,8 @@
 
 #include 
 
+#define KVMI_NUM_CR 5
+
 struct kvmi_monitor_interception {
bool kvmi_intercepted;
bool kvm_intercepted;
@@ -19,6 +21,8 @@ struct kvmi_interception {
 struct kvm_vcpu_arch_introspection {
struct kvm_regs delayed_regs;
bool have_delayed_regs;
+
+   DECLARE_BITMAP(cr_mask, KVMI_NUM_CR);
 };
 
 struct kvm_arch_introspection {
@@ -27,11 +31,19 @@ struct kvm_arch_introspection {
 #ifdef CONFIG_KVM_INTROSPECTION
 
 bool kvmi_monitor_bp_intercept(struct kvm_vcpu *vcpu, u32 dbg);
+bool kvmi_cr_event(struct kvm_vcpu *vcpu, unsigned int cr,
+  unsigned long old_value, unsigned long *new_value);
+bool kvmi_cr3_intercepted(struct kvm_vcpu *vcpu);
 
 #else /* CONFIG_KVM_INTROSPECTION */
 
 static inline bool kvmi_monitor_bp_intercept(struct kvm_vcpu *vcpu, u32 dbg)
{ return false; }
+static inline bool kvmi_cr_event(struct kvm_vcpu *vcpu, unsigned int cr,
+unsigned long old_value,
+unsigned long *new_value)
+   { return true; }
+static inline bool kvmi_cr3_intercepted(struct kvm_vcpu *vcpu) { return

[PATCH v12 44/77] KVM: introspection: add KVMI_VCPU_GET_INFO

2021-10-06 Thread Adalbert Lazăr
From: Mihai Donțu 

This command returns the TSC frequency (in HZ) for the specified
vCPU if available (otherwise it returns zero).

Signed-off-by: Mihai Donțu 
Co-developed-by: Adalbert Lazăr 
Signed-off-by: Adalbert Lazăr 
---
 Documentation/virt/kvm/kvmi.rst   |  29 
 arch/x86/include/asm/kvmi_host.h  |   2 +
 arch/x86/include/uapi/asm/kvmi.h  |  13 ++
 arch/x86/kvm/kvmi_msg.c   |  14 ++
 include/uapi/linux/kvmi.h |   2 +
 .../testing/selftests/kvm/x86_64/kvmi_test.c  | 153 +-
 virt/kvm/introspection/kvmi_int.h |   3 +
 virt/kvm/introspection/kvmi_msg.c |   9 ++
 8 files changed, 223 insertions(+), 2 deletions(-)
 create mode 100644 arch/x86/include/uapi/asm/kvmi.h

diff --git a/Documentation/virt/kvm/kvmi.rst b/Documentation/virt/kvm/kvmi.rst
index 79152f47b30f..2f41fce79d95 100644
--- a/Documentation/virt/kvm/kvmi.rst
+++ b/Documentation/virt/kvm/kvmi.rst
@@ -441,6 +441,35 @@ one page (offset + size <= PAGE_SIZE).
 * -KVM_EINVAL - the specified gpa/size pair is invalid
 * -KVM_EINVAL - the padding is not zero
 
+8. KVMI_VCPU_GET_INFO
+-
+
+:Architectures: x86
+:Versions: >= 1
+:Parameters:
+
+::
+
+   struct kvmi_vcpu_hdr;
+
+:Returns:
+
+::
+
+   struct kvmi_error_code;
+   struct kvmi_vcpu_get_info_reply {
+   __u64 tsc_speed;
+   };
+
+Returns the TSC frequency (in HZ) for the specified vCPU if available
+(otherwise it returns zero).
+
+:Errors:
+
+* -KVM_EINVAL - the padding is not zero
+* -KVM_EINVAL - the selected vCPU is invalid
+* -KVM_EAGAIN - the selected vCPU can't be introspected yet
+
 Events
 ==
 
diff --git a/arch/x86/include/asm/kvmi_host.h b/arch/x86/include/asm/kvmi_host.h
index 360a57dd9019..05ade3a16b24 100644
--- a/arch/x86/include/asm/kvmi_host.h
+++ b/arch/x86/include/asm/kvmi_host.h
@@ -2,6 +2,8 @@
 #ifndef _ASM_X86_KVMI_HOST_H
 #define _ASM_X86_KVMI_HOST_H
 
+#include 
+
 struct kvm_vcpu_arch_introspection {
 };
 
diff --git a/arch/x86/include/uapi/asm/kvmi.h b/arch/x86/include/uapi/asm/kvmi.h
new file mode 100644
index ..2b6192e1a9a4
--- /dev/null
+++ b/arch/x86/include/uapi/asm/kvmi.h
@@ -0,0 +1,13 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+#ifndef _UAPI_ASM_X86_KVMI_H
+#define _UAPI_ASM_X86_KVMI_H
+
+/*
+ * KVM introspection - x86 specific structures and definitions
+ */
+
+struct kvmi_vcpu_get_info_reply {
+   __u64 tsc_speed;
+};
+
+#endif /* _UAPI_ASM_X86_KVMI_H */
diff --git a/arch/x86/kvm/kvmi_msg.c b/arch/x86/kvm/kvmi_msg.c
index 0a6edfe16f17..0d83c47f3758 100644
--- a/arch/x86/kvm/kvmi_msg.c
+++ b/arch/x86/kvm/kvmi_msg.c
@@ -8,7 +8,21 @@
 
 #include "../../../virt/kvm/introspection/kvmi_int.h"
 
+static int handle_vcpu_get_info(const struct kvmi_vcpu_msg_job *job,
+   const struct kvmi_msg_hdr *msg,
+   const void *req)
+{
+   struct kvmi_vcpu_get_info_reply rpl;
+
+   memset(, 0, sizeof(rpl));
+   if (kvm_has_tsc_control)
+   rpl.tsc_speed = 1000ul * job->vcpu->arch.virtual_tsc_khz;
+
+   return kvmi_msg_vcpu_reply(job, msg, 0, , sizeof(rpl));
+}
+
 static const kvmi_vcpu_msg_job_fct msg_vcpu[] = {
+   [KVMI_VCPU_GET_INFO] = handle_vcpu_get_info,
 };
 
 kvmi_vcpu_msg_job_fct kvmi_arch_vcpu_msg_handler(u16 id)
diff --git a/include/uapi/linux/kvmi.h b/include/uapi/linux/kvmi.h
index 7ba1c8758aba..da766427231e 100644
--- a/include/uapi/linux/kvmi.h
+++ b/include/uapi/linux/kvmi.h
@@ -31,6 +31,8 @@ enum {
 };
 
 enum {
+   KVMI_VCPU_GET_INFO = KVMI_VCPU_MESSAGE_ID(1),
+
KVMI_NEXT_VCPU_MESSAGE
 };
 
diff --git a/tools/testing/selftests/kvm/x86_64/kvmi_test.c 
b/tools/testing/selftests/kvm/x86_64/kvmi_test.c
index 28dd6414a7e8..337f295d69ff 100644
--- a/tools/testing/selftests/kvm/x86_64/kvmi_test.c
+++ b/tools/testing/selftests/kvm/x86_64/kvmi_test.c
@@ -9,6 +9,7 @@
 #include 
 #include 
 #include 
+#include 
 
 #include "test_util.h"
 
@@ -18,6 +19,7 @@
 
 #include "linux/kvm_para.h"
 #include "linux/kvmi.h"
+#include "asm/kvmi.h"
 
 #define VCPU_ID 1
 
@@ -25,12 +27,49 @@ static int socket_pair[2];
 #define Kvm_socket   socket_pair[0]
 #define Userspace_socket socket_pair[1]
 
+static int test_id;
 static vm_vaddr_t test_gva;
 static void *test_hva;
 static vm_paddr_t test_gpa;
 
 static int page_size;
 
+struct vcpu_worker_data {
+   struct kvm_vm *vm;
+   int vcpu_id;
+   int test_id;
+};
+
+enum {
+   GUEST_TEST_NOOP = 0,
+};
+
+#define GUEST_REQUEST_TEST() GUEST_SYNC(0)
+#define GUEST_SIGNAL_TEST_DONE() GUEST_SYNC(1)
+
+#define HOST_SEND_TEST(uc)   (uc.cmd == UCALL_SYNC && uc.args[1] == 0)
+#define HOST_TEST_DONE(uc)   (uc.cmd == UCALL_SYNC && uc.args[1] == 1)
+
+static pthread_t start_vcpu_worker(struct vcpu_worker_data *data);
+static 

[PATCH v12 09/77] KVM: x86: add kvm_x86_ops.desc_ctrl_supported()

2021-10-06 Thread Adalbert Lazăr
When the introspection tool tries to enable the KVMI_VCPU_EVENT_DESCRIPTOR
event, this function is used to check if the control of VM-exits caused
by descriptor-table registers access is supported.

Signed-off-by: Adalbert Lazăr 
---
 arch/x86/include/asm/kvm-x86-ops.h | 1 +
 arch/x86/include/asm/kvm_host.h| 1 +
 arch/x86/kvm/svm/svm.c | 6 ++
 arch/x86/kvm/vmx/capabilities.h| 7 ++-
 arch/x86/kvm/vmx/vmx.c | 1 +
 5 files changed, 15 insertions(+), 1 deletion(-)

diff --git a/arch/x86/include/asm/kvm-x86-ops.h 
b/arch/x86/include/asm/kvm-x86-ops.h
index 04a77a0858ef..9a962bd098d0 100644
--- a/arch/x86/include/asm/kvm-x86-ops.h
+++ b/arch/x86/include/asm/kvm-x86-ops.h
@@ -124,6 +124,7 @@ KVM_X86_OP_NULL(complete_emulated_msr)
 KVM_X86_OP(bp_intercepted)
 KVM_X86_OP(control_cr3_intercept)
 KVM_X86_OP(cr3_write_intercepted)
+KVM_X86_OP(desc_ctrl_supported)
 
 #undef KVM_X86_OP
 #undef KVM_X86_OP_NULL
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 9c3133380028..1acaa27ffd8f 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -1349,6 +1349,7 @@ struct kvm_x86_ops {
void (*set_idt)(struct kvm_vcpu *vcpu, struct desc_ptr *dt);
void (*get_gdt)(struct kvm_vcpu *vcpu, struct desc_ptr *dt);
void (*set_gdt)(struct kvm_vcpu *vcpu, struct desc_ptr *dt);
+   bool (*desc_ctrl_supported)(void);
void (*sync_dirty_debug_regs)(struct kvm_vcpu *vcpu);
void (*set_dr7)(struct kvm_vcpu *vcpu, unsigned long value);
void (*cache_reg)(struct kvm_vcpu *vcpu, enum kvm_reg reg);
diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c
index 9fac69c8e135..ce45fe0d35bc 100644
--- a/arch/x86/kvm/svm/svm.c
+++ b/arch/x86/kvm/svm/svm.c
@@ -1775,6 +1775,11 @@ static void svm_set_gdt(struct kvm_vcpu *vcpu, struct 
desc_ptr *dt)
vmcb_mark_dirty(svm->vmcb, VMCB_DT);
 }
 
+static bool svm_desc_ctrl_supported(void)
+{
+   return true;
+}
+
 void svm_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0)
 {
struct vcpu_svm *svm = to_svm(vcpu);
@@ -4647,6 +4652,7 @@ static struct kvm_x86_ops svm_x86_ops __initdata = {
.set_idt = svm_set_idt,
.get_gdt = svm_get_gdt,
.set_gdt = svm_set_gdt,
+   .desc_ctrl_supported = svm_desc_ctrl_supported,
.set_dr7 = svm_set_dr7,
.sync_dirty_debug_regs = svm_sync_dirty_debug_regs,
.cache_reg = svm_cache_reg,
diff --git a/arch/x86/kvm/vmx/capabilities.h b/arch/x86/kvm/vmx/capabilities.h
index 4705ad55abb5..9a25aa0dd9c8 100644
--- a/arch/x86/kvm/vmx/capabilities.h
+++ b/arch/x86/kvm/vmx/capabilities.h
@@ -143,12 +143,17 @@ static inline bool cpu_has_vmx_ept(void)
SECONDARY_EXEC_ENABLE_EPT;
 }
 
-static inline bool vmx_umip_emulated(void)
+static inline bool vmx_desc_ctrl_supported(void)
 {
return vmcs_config.cpu_based_2nd_exec_ctrl &
SECONDARY_EXEC_DESC;
 }
 
+static inline bool vmx_umip_emulated(void)
+{
+   return vmx_desc_ctrl_supported();
+}
+
 static inline bool cpu_has_vmx_rdtscp(void)
 {
return vmcs_config.cpu_based_2nd_exec_ctrl &
diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
index 3f5731213acf..026d678b82b9 100644
--- a/arch/x86/kvm/vmx/vmx.c
+++ b/arch/x86/kvm/vmx/vmx.c
@@ -7635,6 +7635,7 @@ static struct kvm_x86_ops vmx_x86_ops __initdata = {
.set_idt = vmx_set_idt,
.get_gdt = vmx_get_gdt,
.set_gdt = vmx_set_gdt,
+   .desc_ctrl_supported = vmx_desc_ctrl_supported,
.set_dr7 = vmx_set_dr7,
.sync_dirty_debug_regs = vmx_sync_dirty_debug_regs,
.cache_reg = vmx_cache_reg,
___
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization

[PATCH v12 03/77] KVM: x86: add kvm_arch_vcpu_get_regs() and kvm_arch_vcpu_get_sregs()

2021-10-06 Thread Adalbert Lazăr
From: Mihai Donțu 

These functions are used by the VM introspection code
(for the KVMI_VCPU_GET_REGISTERS command and all events sending the vCPU
registers to the introspection tool).

Signed-off-by: Mihai Donțu 
Signed-off-by: Adalbert Lazăr 
---
 arch/x86/kvm/x86.c   | 10 ++
 include/linux/kvm_host.h |  3 +++
 2 files changed, 13 insertions(+)

diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index db7fa1398f0d..f7d09757b85f 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -10073,6 +10073,11 @@ int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu 
*vcpu, struct kvm_regs *regs)
return 0;
 }
 
+void kvm_arch_vcpu_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
+{
+   __get_regs(vcpu, regs);
+}
+
 static void __set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
 {
vcpu->arch.emulate_regs_need_sync_from_vcpu = true;
@@ -10195,6 +10200,11 @@ int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu 
*vcpu,
return 0;
 }
 
+void kvm_arch_vcpu_get_sregs(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs)
+{
+   __get_sregs(vcpu, sregs);
+}
+
 int kvm_arch_vcpu_ioctl_get_mpstate(struct kvm_vcpu *vcpu,
struct kvm_mp_state *mp_state)
 {
diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index 6795ea7e357d..7bc45e1879db 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -1026,9 +1026,12 @@ int kvm_arch_vcpu_ioctl_translate(struct kvm_vcpu *vcpu,
struct kvm_translation *tr);
 
 int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs);
+void kvm_arch_vcpu_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs);
 int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs);
 int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu,
  struct kvm_sregs *sregs);
+void kvm_arch_vcpu_get_sregs(struct kvm_vcpu *vcpu,
+ struct kvm_sregs *sregs);
 int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu,
  struct kvm_sregs *sregs);
 int kvm_arch_vcpu_ioctl_get_mpstate(struct kvm_vcpu *vcpu,
___
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization

[PATCH v12 28/77] KVM: x86: disable gpa_available optimization for fetch and page-walk SPT violations

2021-10-06 Thread Adalbert Lazăr
From: Mircea Cîrjaliu 

This change is needed because the introspection tool can write-protect
guest page tables or exec-protect heap/stack pages.

Signed-off-by: Mircea Cîrjaliu 
Co-developed-by: Adalbert Lazăr 
Signed-off-by: Adalbert Lazăr 
---
 arch/x86/include/asm/kvm_host.h | 5 +
 arch/x86/kvm/mmu/mmu.c  | 7 +++
 arch/x86/kvm/x86.c  | 2 +-
 3 files changed, 13 insertions(+), 1 deletion(-)

diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 692e55a5c312..dc3c83edc4bc 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -1664,6 +1664,10 @@ extern u64 kvm_mce_cap_supported;
  *  retry native execution under certain conditions,
  *  Can only be set in conjunction with EMULTYPE_PF.
  *
+ * EMULTYPE_GPA_AVAILABLE_PF - Set when the emulator can avoid a page walk
+ *   to get the GPA.
+ *   Can only be set in conjunction with EMULTYPE_PF.
+ *
  * EMULTYPE_TRAP_UD_FORCED - Set when emulating an intercepted #UD that was
  *  triggered by KVM's magic "force emulation" prefix,
  *  which is opt in via module param (off by default).
@@ -1686,6 +1690,7 @@ extern u64 kvm_mce_cap_supported;
 #define EMULTYPE_TRAP_UD_FORCED(1 << 4)
 #define EMULTYPE_VMWARE_GP (1 << 5)
 #define EMULTYPE_PF(1 << 6)
+#define EMULTYPE_GPA_AVAILABLE_PF   (1 << 7)
 
 int kvm_emulate_instruction(struct kvm_vcpu *vcpu, int emulation_type);
 int kvm_emulate_instruction_from_buffer(struct kvm_vcpu *vcpu,
diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c
index b5685e342945..c90683284098 100644
--- a/arch/x86/kvm/mmu/mmu.c
+++ b/arch/x86/kvm/mmu/mmu.c
@@ -5297,6 +5297,13 @@ int kvm_mmu_page_fault(struct kvm_vcpu *vcpu, gpa_t 
cr2_or_gpa, u64 error_code,
 
if (WARN_ON(!VALID_PAGE(vcpu->arch.mmu->root_hpa)))
return RET_PF_RETRY;
+   /*
+* With shadow page tables, fault_address contains a GVA or nGPA.
+* On a fetch fault, fault_address contains the instruction pointer.
+*/
+   if (direct && likely(!(error_code & PFERR_FETCH_MASK)) &&
+   (error_code & PFERR_GUEST_FINAL_MASK))
+   emulation_type |= EMULTYPE_GPA_AVAILABLE_PF;
 
r = RET_PF_INVALID;
if (unlikely(error_code & PFERR_RSVD_MASK)) {
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index c52ac5e9a020..ab97e0175c04 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -7933,7 +7933,7 @@ int x86_emulate_instruction(struct kvm_vcpu *vcpu, gpa_t 
cr2_or_gpa,
ctxt->exception.address = cr2_or_gpa;
 
/* With shadow page tables, cr2 contains a GVA or nGPA. */
-   if (vcpu->arch.mmu->direct_map) {
+   if (emulation_type & EMULTYPE_GPA_AVAILABLE_PF) {
ctxt->gpa_available = true;
ctxt->gpa_val = cr2_or_gpa;
}
___
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization

[PATCH v12 67/77] KVM: introspection: restore the state of descriptor-table register interception on unhook

2021-10-06 Thread Adalbert Lazăr
From: Nicușor Cîțu 

This commit also ensures that the introspection tool and the userspace
do not disable each other the descriptor-table access VM-exit.

Signed-off-by: Nicușor Cîțu 
Signed-off-by: Adalbert Lazăr 
---
 arch/x86/include/asm/kvmi_host.h |  4 +++
 arch/x86/kvm/kvmi.c  | 45 
 arch/x86/kvm/svm/svm.c   |  3 +++
 arch/x86/kvm/vmx/vmx.c   |  3 +++
 4 files changed, 55 insertions(+)

diff --git a/arch/x86/include/asm/kvmi_host.h b/arch/x86/include/asm/kvmi_host.h
index a24ba87036f7..a872277eba67 100644
--- a/arch/x86/include/asm/kvmi_host.h
+++ b/arch/x86/include/asm/kvmi_host.h
@@ -17,6 +17,7 @@ struct kvmi_interception {
bool restore_interception;
struct kvmi_monitor_interception breakpoint;
struct kvmi_monitor_interception cr3w;
+   struct kvmi_monitor_interception descriptor;
 };
 
 struct kvm_vcpu_arch_introspection {
@@ -48,6 +49,7 @@ bool kvmi_monitor_cr3w_intercept(struct kvm_vcpu *vcpu, bool 
enable);
 void kvmi_enter_guest(struct kvm_vcpu *vcpu);
 void kvmi_xsetbv_event(struct kvm_vcpu *vcpu, u8 xcr,
   u64 old_value, u64 new_value);
+bool kvmi_monitor_desc_intercept(struct kvm_vcpu *vcpu, bool enable);
 bool kvmi_descriptor_event(struct kvm_vcpu *vcpu, u8 descriptor, bool write);
 
 #else /* CONFIG_KVM_INTROSPECTION */
@@ -64,6 +66,8 @@ static inline bool kvmi_monitor_cr3w_intercept(struct 
kvm_vcpu *vcpu,
 static inline void kvmi_enter_guest(struct kvm_vcpu *vcpu) { }
 static inline void kvmi_xsetbv_event(struct kvm_vcpu *vcpu, u8 xcr,
u64 old_value, u64 new_value) { }
+static inline bool kvmi_monitor_desc_intercept(struct kvm_vcpu *vcpu,
+  bool enable) { return false; }
 static inline bool kvmi_descriptor_event(struct kvm_vcpu *vcpu, u8 descriptor,
 bool write) { return true; }
 
diff --git a/arch/x86/kvm/kvmi.c b/arch/x86/kvm/kvmi.c
index cf7167366214..c0c38e6478cb 100644
--- a/arch/x86/kvm/kvmi.c
+++ b/arch/x86/kvm/kvmi.c
@@ -286,12 +286,52 @@ static void kvmi_arch_disable_cr3w_intercept(struct 
kvm_vcpu *vcpu)
vcpu->arch.kvmi->cr3w.kvm_intercepted = false;
 }
 
+/*
+ * Returns true if one side (kvm or kvmi) tries to disable the descriptor
+ * interception while the other side is still tracking it.
+ */
+bool kvmi_monitor_desc_intercept(struct kvm_vcpu *vcpu, bool enable)
+{
+   struct kvmi_interception *arch_vcpui = READ_ONCE(vcpu->arch.kvmi);
+
+   return (arch_vcpui && arch_vcpui->descriptor.monitor_fct(vcpu, enable));
+}
+EXPORT_SYMBOL(kvmi_monitor_desc_intercept);
+
+static bool monitor_desc_fct_kvmi(struct kvm_vcpu *vcpu, bool enable)
+{
+   vcpu->arch.kvmi->descriptor.kvmi_intercepted = enable;
+
+   if (enable)
+   vcpu->arch.kvmi->descriptor.kvm_intercepted =
+   static_call(kvm_x86_desc_intercepted)(vcpu);
+   else if (vcpu->arch.kvmi->descriptor.kvm_intercepted)
+   return true;
+
+   return false;
+}
+
+static bool monitor_desc_fct_kvm(struct kvm_vcpu *vcpu, bool enable)
+{
+   if (!vcpu->arch.kvmi->descriptor.kvmi_intercepted)
+   return false;
+
+   vcpu->arch.kvmi->descriptor.kvm_intercepted = enable;
+
+   if (!enable)
+   return true;
+
+   return false;
+}
+
 static int kvmi_control_desc_intercept(struct kvm_vcpu *vcpu, bool enable)
 {
if (!static_call(kvm_x86_desc_ctrl_supported)())
return -KVM_EOPNOTSUPP;
 
+   vcpu->arch.kvmi->descriptor.monitor_fct = monitor_desc_fct_kvmi;
static_call(kvm_x86_control_desc_intercept)(vcpu, enable);
+   vcpu->arch.kvmi->descriptor.monitor_fct = monitor_desc_fct_kvm;
 
return 0;
 }
@@ -299,6 +339,9 @@ static int kvmi_control_desc_intercept(struct kvm_vcpu 
*vcpu, bool enable)
 static void kvmi_arch_disable_desc_intercept(struct kvm_vcpu *vcpu)
 {
kvmi_control_desc_intercept(vcpu, false);
+
+   vcpu->arch.kvmi->descriptor.kvmi_intercepted = false;
+   vcpu->arch.kvmi->descriptor.kvm_intercepted = false;
 }
 
 int kvmi_arch_cmd_control_intercept(struct kvm_vcpu *vcpu,
@@ -370,11 +413,13 @@ bool kvmi_arch_vcpu_alloc_interception(struct kvm_vcpu 
*vcpu)
 
arch_vcpui->breakpoint.monitor_fct = monitor_bp_fct_kvm;
arch_vcpui->cr3w.monitor_fct = monitor_cr3w_fct_kvm;
+   arch_vcpui->descriptor.monitor_fct = monitor_desc_fct_kvm;
 
/*
 * paired with:
 *  - kvmi_monitor_bp_intercept()
 *  - kvmi_monitor_cr3w_intercept()
+*  - kvmi_monitor_desc_intercept()
 */
smp_wmb();
WRITE_ONCE(vcpu->arch.kvmi, arch_vcpui);
diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c
index 98b4909254a0..d3d061615536 100644
--- a/arch/x86/kvm/svm/svm.c
+++ b/arch/x86/kvm/svm/svm

[PATCH v12 72/77] KVM: introspection: extend KVMI_GET_VERSION with struct kvmi_features

2021-10-06 Thread Adalbert Lazăr
This is used by the introspection tool to check the hardware support
for the single step feature.

Signed-off-by: Adalbert Lazăr 
---
 Documentation/virt/kvm/kvmi.rst| 15 +--
 arch/x86/include/uapi/asm/kvmi.h   |  5 +
 arch/x86/kvm/kvmi.c|  5 +
 include/uapi/linux/kvmi.h  |  1 +
 tools/testing/selftests/kvm/x86_64/kvmi_test.c |  6 ++
 virt/kvm/introspection/kvmi_int.h  |  1 +
 virt/kvm/introspection/kvmi_msg.c  |  2 ++
 7 files changed, 33 insertions(+), 2 deletions(-)

diff --git a/Documentation/virt/kvm/kvmi.rst b/Documentation/virt/kvm/kvmi.rst
index b12affb0d64f..54cb3fbe184e 100644
--- a/Documentation/virt/kvm/kvmi.rst
+++ b/Documentation/virt/kvm/kvmi.rst
@@ -243,10 +243,21 @@ The vCPU commands start with::
struct kvmi_get_version_reply {
__u32 version;
__u32 max_msg_size;
+   struct kvmi_features features;
};
 
-Returns the introspection API version and the largest accepted message
-size (useful for variable length messages).
+For x86
+
+::
+
+   struct kvmi_features {
+   __u8 singlestep;
+   __u8 padding[7];
+   };
+
+Returns the introspection API version, the largest accepted message size
+(useful for variable length messages) and some of the hardware supported
+features.
 
 This command is always allowed and successful.
 
diff --git a/arch/x86/include/uapi/asm/kvmi.h b/arch/x86/include/uapi/asm/kvmi.h
index 6ef144ddb4bb..c5a2cb1b54f1 100644
--- a/arch/x86/include/uapi/asm/kvmi.h
+++ b/arch/x86/include/uapi/asm/kvmi.h
@@ -159,4 +159,9 @@ struct kvmi_vcpu_event_msr_reply {
__u64 new_val;
 };
 
+struct kvmi_features {
+   __u8 singlestep;
+   __u8 padding[7];
+};
+
 #endif /* _UAPI_ASM_X86_KVMI_H */
diff --git a/arch/x86/kvm/kvmi.c b/arch/x86/kvm/kvmi.c
index 6432c40817d2..eee874890e29 100644
--- a/arch/x86/kvm/kvmi.c
+++ b/arch/x86/kvm/kvmi.c
@@ -1084,3 +1084,8 @@ static void kvmi_track_flush_slot(struct kvm *kvm, struct 
kvm_memory_slot *slot,
 
kvmi_put(kvm);
 }
+
+void kvmi_arch_features(struct kvmi_features *feat)
+{
+   feat->singlestep = !!kvm_x86_ops.control_singlestep;
+}
diff --git a/include/uapi/linux/kvmi.h b/include/uapi/linux/kvmi.h
index bb6265e4539a..b594463795c6 100644
--- a/include/uapi/linux/kvmi.h
+++ b/include/uapi/linux/kvmi.h
@@ -102,6 +102,7 @@ struct kvmi_error_code {
 struct kvmi_get_version_reply {
__u32 version;
__u32 max_msg_size;
+   struct kvmi_features features;
 };
 
 struct kvmi_vm_check_command {
diff --git a/tools/testing/selftests/kvm/x86_64/kvmi_test.c 
b/tools/testing/selftests/kvm/x86_64/kvmi_test.c
index 9cf099b38bdf..f7735e3ea9e8 100644
--- a/tools/testing/selftests/kvm/x86_64/kvmi_test.c
+++ b/tools/testing/selftests/kvm/x86_64/kvmi_test.c
@@ -59,6 +59,8 @@ struct vcpu_worker_data {
bool restart_on_shutdown;
 };
 
+static struct kvmi_features features;
+
 typedef void (*fct_pf_event)(struct kvm_vm *vm, struct kvmi_msg_hdr *hdr,
struct pf_ev *ev,
struct vcpu_reply *rpl);
@@ -443,6 +445,10 @@ static void test_cmd_get_version(void)
 
pr_debug("KVMI version: %u\n", rpl.version);
pr_debug("Max message size: %u\n", rpl.max_msg_size);
+
+   features = rpl.features;
+
+   pr_debug("singlestep support: %u\n", features.singlestep);
 }
 
 static void cmd_vm_check_command(__u16 id, int expected_err)
diff --git a/virt/kvm/introspection/kvmi_int.h 
b/virt/kvm/introspection/kvmi_int.h
index bf6545e66425..a51e7e4ed511 100644
--- a/virt/kvm/introspection/kvmi_int.h
+++ b/virt/kvm/introspection/kvmi_int.h
@@ -121,5 +121,6 @@ void kvmi_arch_update_page_tracking(struct kvm *kvm,
struct kvmi_mem_access *m);
 void kvmi_arch_hook(struct kvm *kvm);
 void kvmi_arch_unhook(struct kvm *kvm);
+void kvmi_arch_features(struct kvmi_features *feat);
 
 #endif
diff --git a/virt/kvm/introspection/kvmi_msg.c 
b/virt/kvm/introspection/kvmi_msg.c
index 745d10981b6f..e2aef76bfd16 100644
--- a/virt/kvm/introspection/kvmi_msg.c
+++ b/virt/kvm/introspection/kvmi_msg.c
@@ -134,6 +134,8 @@ static int handle_get_version(struct kvm_introspection 
*kvmi,
rpl.version = kvmi_version();
rpl.max_msg_size = KVMI_MAX_MSG_SIZE;
 
+   kvmi_arch_features();
+
return kvmi_msg_vm_reply(kvmi, msg, 0, , sizeof(rpl));
 }
 
___
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization

[PATCH v12 12/77] KVM: x86: add kvm_x86_ops.desc_intercepted()

2021-10-06 Thread Adalbert Lazăr
From: Nicușor Cîțu 

This function will be used to test if the descriptor-table registers
access is already tracked by userspace.

Signed-off-by: Nicușor Cîțu 
Signed-off-by: Adalbert Lazăr 
---
 arch/x86/include/asm/kvm-x86-ops.h |  1 +
 arch/x86/include/asm/kvm_host.h|  1 +
 arch/x86/kvm/svm/svm.c | 15 +++
 arch/x86/kvm/vmx/vmx.c |  8 
 4 files changed, 25 insertions(+)

diff --git a/arch/x86/include/asm/kvm-x86-ops.h 
b/arch/x86/include/asm/kvm-x86-ops.h
index dd08f3120f8f..30d01c9ed31b 100644
--- a/arch/x86/include/asm/kvm-x86-ops.h
+++ b/arch/x86/include/asm/kvm-x86-ops.h
@@ -126,6 +126,7 @@ KVM_X86_OP(control_cr3_intercept)
 KVM_X86_OP(cr3_write_intercepted)
 KVM_X86_OP(desc_ctrl_supported)
 KVM_X86_OP(control_desc_intercept)
+KVM_X86_OP(desc_intercepted)
 
 #undef KVM_X86_OP
 #undef KVM_X86_OP_NULL
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 2e5ddb18804b..1182b0fbd245 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -1351,6 +1351,7 @@ struct kvm_x86_ops {
void (*set_gdt)(struct kvm_vcpu *vcpu, struct desc_ptr *dt);
bool (*desc_ctrl_supported)(void);
void (*control_desc_intercept)(struct kvm_vcpu *vcpu, bool enable);
+   bool (*desc_intercepted)(struct kvm_vcpu *vcpu);
void (*sync_dirty_debug_regs)(struct kvm_vcpu *vcpu);
void (*set_dr7)(struct kvm_vcpu *vcpu, unsigned long value);
void (*cache_reg)(struct kvm_vcpu *vcpu, enum kvm_reg reg);
diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c
index 0d46f5aa20c3..c1b1e5cdd508 100644
--- a/arch/x86/kvm/svm/svm.c
+++ b/arch/x86/kvm/svm/svm.c
@@ -1805,6 +1805,20 @@ static void svm_control_desc_intercept(struct kvm_vcpu 
*vcpu, bool enable)
}
 }
 
+static bool svm_desc_intercepted(struct kvm_vcpu *vcpu)
+{
+   struct vcpu_svm *svm = to_svm(vcpu);
+
+   return (svm_is_intercept(svm, INTERCEPT_STORE_IDTR) ||
+   svm_is_intercept(svm, INTERCEPT_STORE_GDTR) ||
+   svm_is_intercept(svm, INTERCEPT_STORE_LDTR) ||
+   svm_is_intercept(svm, INTERCEPT_STORE_TR) ||
+   svm_is_intercept(svm, INTERCEPT_LOAD_IDTR) ||
+   svm_is_intercept(svm, INTERCEPT_LOAD_GDTR) ||
+   svm_is_intercept(svm, INTERCEPT_LOAD_LDTR) ||
+   svm_is_intercept(svm, INTERCEPT_LOAD_TR));
+}
+
 void svm_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0)
 {
struct vcpu_svm *svm = to_svm(vcpu);
@@ -4692,6 +4706,7 @@ static struct kvm_x86_ops svm_x86_ops __initdata = {
.set_gdt = svm_set_gdt,
.desc_ctrl_supported = svm_desc_ctrl_supported,
.control_desc_intercept = svm_control_desc_intercept,
+   .desc_intercepted = svm_desc_intercepted,
.set_dr7 = svm_set_dr7,
.sync_dirty_debug_regs = svm_sync_dirty_debug_regs,
.cache_reg = svm_cache_reg,
diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
index d0f02d52b401..8f34b19827a3 100644
--- a/arch/x86/kvm/vmx/vmx.c
+++ b/arch/x86/kvm/vmx/vmx.c
@@ -3405,6 +3405,13 @@ static void vmx_set_gdt(struct kvm_vcpu *vcpu, struct 
desc_ptr *dt)
vmcs_writel(GUEST_GDTR_BASE, dt->address);
 }
 
+static bool vmx_desc_intercepted(struct kvm_vcpu *vcpu)
+{
+   struct vcpu_vmx *vmx = to_vmx(vcpu);
+
+   return !!(secondary_exec_controls_get(vmx) & SECONDARY_EXEC_DESC);
+}
+
 static bool rmode_segment_valid(struct kvm_vcpu *vcpu, int seg)
 {
struct kvm_segment var;
@@ -7647,6 +7654,7 @@ static struct kvm_x86_ops vmx_x86_ops __initdata = {
.set_gdt = vmx_set_gdt,
.desc_ctrl_supported = vmx_desc_ctrl_supported,
.control_desc_intercept = vmx_control_desc_intercept,
+   .desc_intercepted = vmx_desc_intercepted,
.set_dr7 = vmx_set_dr7,
.sync_dirty_debug_regs = vmx_sync_dirty_debug_regs,
.cache_reg = vmx_cache_reg,
___
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization

[PATCH v12 10/77] KVM: svm: add support for descriptor-table VM-exits

2021-10-06 Thread Adalbert Lazăr
From: Nicușor Cîțu 

This function is needed for the KVMI_VCPU_EVENT_DESCRIPTOR event.

Signed-off-by: Nicușor Cîțu 
Signed-off-by: Adalbert Lazăr 
---
 arch/x86/kvm/svm/svm.c | 13 +
 1 file changed, 13 insertions(+)

diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c
index ce45fe0d35bc..e5cd8813cca6 100644
--- a/arch/x86/kvm/svm/svm.c
+++ b/arch/x86/kvm/svm/svm.c
@@ -2525,6 +2525,11 @@ static int emulate_on_interception(struct kvm_vcpu *vcpu)
return kvm_emulate_instruction(vcpu, 0);
 }
 
+static int descriptor_access_interception(struct kvm_vcpu *vcpu)
+{
+   return kvm_emulate_instruction(vcpu, 0);
+}
+
 static int rsm_interception(struct kvm_vcpu *vcpu)
 {
return kvm_emulate_instruction_from_buffer(vcpu, rsm_ins_bytes, 2);
@@ -3231,6 +3236,14 @@ static int (*const svm_exit_handlers[])(struct kvm_vcpu 
*vcpu) = {
[SVM_EXIT_AVIC_INCOMPLETE_IPI]  = 
avic_incomplete_ipi_interception,
[SVM_EXIT_AVIC_UNACCELERATED_ACCESS]= 
avic_unaccelerated_access_interception,
[SVM_EXIT_VMGEXIT]  = sev_handle_vmgexit,
+   [SVM_EXIT_IDTR_READ]= 
descriptor_access_interception,
+   [SVM_EXIT_GDTR_READ]= 
descriptor_access_interception,
+   [SVM_EXIT_LDTR_READ]= 
descriptor_access_interception,
+   [SVM_EXIT_TR_READ]  = 
descriptor_access_interception,
+   [SVM_EXIT_IDTR_WRITE]   = 
descriptor_access_interception,
+   [SVM_EXIT_GDTR_WRITE]   = 
descriptor_access_interception,
+   [SVM_EXIT_LDTR_WRITE]   = 
descriptor_access_interception,
+   [SVM_EXIT_TR_WRITE] = 
descriptor_access_interception,
 };
 
 static void dump_vmcb(struct kvm_vcpu *vcpu)
___
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization

[PATCH v12 37/77] KVM: introspection: add KVMI_VM_EVENT_UNHOOK

2021-10-06 Thread Adalbert Lazăr
This event is sent when the guest is about to be
paused/suspended/migrated. The introspection tool has the chance to
remove its hooks (e.g. breakpoints) while the guest is still running.

Signed-off-by: Adalbert Lazăr 
---
 Documentation/virt/kvm/kvmi.rst   | 31 +
 arch/x86/kvm/Makefile |  2 +-
 arch/x86/kvm/kvmi.c   | 10 +++
 include/linux/kvmi_host.h |  2 +
 include/uapi/linux/kvmi.h |  9 +++
 .../testing/selftests/kvm/x86_64/kvmi_test.c  | 68 ++-
 virt/kvm/introspection/kvmi.c | 13 +++-
 virt/kvm/introspection/kvmi_int.h |  3 +
 virt/kvm/introspection/kvmi_msg.c | 42 +++-
 9 files changed, 173 insertions(+), 7 deletions(-)
 create mode 100644 arch/x86/kvm/kvmi.c

diff --git a/Documentation/virt/kvm/kvmi.rst b/Documentation/virt/kvm/kvmi.rst
index 33490bc9d1c1..2eecd7e8bfd0 100644
--- a/Documentation/virt/kvm/kvmi.rst
+++ b/Documentation/virt/kvm/kvmi.rst
@@ -331,3 +331,34 @@ This command is always allowed.
};
 
 Returns the number of online vCPUs.
+
+Events
+==
+
+The VM introspection events are sent with the KVMI_VM_EVENT message id.
+The message data begins with a common structure having the event id::
+
+   struct kvmi_event_hdr {
+   __u16 event;
+   __u16 padding[3];
+   };
+
+Specific event data can follow this common structure.
+
+1. KVMI_VM_EVENT_UNHOOK
+---
+
+:Architectures: all
+:Versions: >= 1
+:Actions: none
+:Parameters:
+
+::
+
+   struct kvmi_event_hdr;
+
+:Returns: none
+
+This event is sent when the device manager has to pause/stop/migrate the
+guest (see **Unhooking**). The introspection tool has a chance to unhook
+and close the KVMI channel (signaling that the operation can proceed).
diff --git a/arch/x86/kvm/Makefile b/arch/x86/kvm/Makefile
index 3a0cc42f36c9..af58e1daf7cf 100644
--- a/arch/x86/kvm/Makefile
+++ b/arch/x86/kvm/Makefile
@@ -14,7 +14,7 @@ kvm-y += $(KVM)/kvm_main.o 
$(KVM)/coalesced_mmio.o \
$(KVM)/eventfd.o $(KVM)/irqchip.o $(KVM)/vfio.o 
\
$(KVM)/dirty_ring.o $(KVM)/binary_stats.o
 kvm-$(CONFIG_KVM_ASYNC_PF) += $(KVM)/async_pf.o
-kvm-$(CONFIG_KVM_INTROSPECTION) += $(KVMI)/kvmi.o $(KVMI)/kvmi_msg.o
+kvm-$(CONFIG_KVM_INTROSPECTION) += $(KVMI)/kvmi.o $(KVMI)/kvmi_msg.o kvmi.o
 
 kvm-y  += x86.o emulate.o i8259.o irq.o lapic.o \
   i8254.o ioapic.o irq_comm.o cpuid.o pmu.o mtrr.o \
diff --git a/arch/x86/kvm/kvmi.c b/arch/x86/kvm/kvmi.c
new file mode 100644
index ..4388f2d9ff2c
--- /dev/null
+++ b/arch/x86/kvm/kvmi.c
@@ -0,0 +1,10 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * KVM introspection - x86
+ *
+ * Copyright (C) 2019-2021 Bitdefender S.R.L.
+ */
+
+void kvmi_arch_init_vcpu_events_mask(unsigned long *supported)
+{
+}
diff --git a/include/linux/kvmi_host.h b/include/linux/kvmi_host.h
index 81eac9f53a3f..6476c7d6a4d3 100644
--- a/include/linux/kvmi_host.h
+++ b/include/linux/kvmi_host.h
@@ -17,6 +17,8 @@ struct kvm_introspection {
 
unsigned long *cmd_allow_mask;
unsigned long *event_allow_mask;
+
+   atomic_t ev_seq;
 };
 
 int kvmi_version(void);
diff --git a/include/uapi/linux/kvmi.h b/include/uapi/linux/kvmi.h
index e06a7b80d4d9..18fb51078d48 100644
--- a/include/uapi/linux/kvmi.h
+++ b/include/uapi/linux/kvmi.h
@@ -17,6 +17,8 @@ enum {
 #define KVMI_VCPU_MESSAGE_ID(id) (((id) << 1) | 1)
 
 enum {
+   KVMI_VM_EVENT = KVMI_VM_MESSAGE_ID(0),
+
KVMI_GET_VERSION  = KVMI_VM_MESSAGE_ID(1),
KVMI_VM_CHECK_COMMAND = KVMI_VM_MESSAGE_ID(2),
KVMI_VM_CHECK_EVENT   = KVMI_VM_MESSAGE_ID(3),
@@ -33,6 +35,8 @@ enum {
 #define KVMI_VCPU_EVENT_ID(id) (((id) << 1) | 1)
 
 enum {
+   KVMI_VM_EVENT_UNHOOK = KVMI_VM_EVENT_ID(0),
+
KVMI_NEXT_VM_EVENT
 };
 
@@ -73,4 +77,9 @@ struct kvmi_vm_get_info_reply {
__u32 padding[3];
 };
 
+struct kvmi_event_hdr {
+   __u16 event;
+   __u16 padding[3];
+};
+
 #endif /* _UAPI__LINUX_KVMI_H */
diff --git a/tools/testing/selftests/kvm/x86_64/kvmi_test.c 
b/tools/testing/selftests/kvm/x86_64/kvmi_test.c
index 2ada3d9bc230..722737e01d9b 100644
--- a/tools/testing/selftests/kvm/x86_64/kvmi_test.c
+++ b/tools/testing/selftests/kvm/x86_64/kvmi_test.c
@@ -68,6 +68,11 @@ static void set_event_perm(struct kvm_vm *vm, __s32 id, 
__u32 allow,
 "KVM_INTROSPECTION_EVENT");
 }
 
+static void disallow_event(struct kvm_vm *vm, __s32 event_id)
+{
+   set_event_perm(vm, event_id, 0, 0);
+}
+
 static void allow_event(struct kvm_vm *vm, __s32 event_id)
 {
set_event_perm(vm, event_id, 1, 0);
@@ -298,11 +303,16 @@ static void cmd_vm_check_event(__u16 id, int expected_err)
expected_err);
 }
 
-static void test_cmd_vm_check

[PATCH v12 19/77] KVM: x86: export kvm_arch_vcpu_set_guest_debug()

2021-10-06 Thread Adalbert Lazăr
From: Nicușor Cîțu 

This function is needed in order to notify the introspection tool
through KVMI_VCPU_EVENT_BP events on guest breakpoints.

Signed-off-by: Nicușor Cîțu 
Signed-off-by: Adalbert Lazăr 
---
 arch/x86/kvm/x86.c   | 24 
 include/linux/kvm_host.h |  2 ++
 2 files changed, 18 insertions(+), 8 deletions(-)

diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 2b5c3c22c48e..9fe3b53fd1e3 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -10464,17 +10464,12 @@ int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu 
*vcpu,
return ret;
 }
 
-int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu,
-   struct kvm_guest_debug *dbg)
+int kvm_arch_vcpu_set_guest_debug(struct kvm_vcpu *vcpu,
+ struct kvm_guest_debug *dbg)
 {
unsigned long rflags;
int i, r;
 
-   if (vcpu->arch.guest_state_protected)
-   return -EINVAL;
-
-   vcpu_load(vcpu);
-
if (dbg->control & (KVM_GUESTDBG_INJECT_DB | KVM_GUESTDBG_INJECT_BP)) {
r = -EBUSY;
if (vcpu->arch.exception.pending)
@@ -10519,10 +10514,23 @@ int kvm_arch_vcpu_ioctl_set_guest_debug(struct 
kvm_vcpu *vcpu,
r = 0;
 
 out:
-   vcpu_put(vcpu);
return r;
 }
 
+int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu,
+   struct kvm_guest_debug *dbg)
+{
+   int ret;
+
+   if (vcpu->arch.guest_state_protected)
+   return -EINVAL;
+
+   vcpu_load(vcpu);
+   ret = kvm_arch_vcpu_set_guest_debug(vcpu, dbg);
+   vcpu_put(vcpu);
+   return ret;
+}
+
 /*
  * Translate a guest virtual address to a guest physical address.
  */
diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index 712642be3307..2e4300bf936f 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -1043,6 +1043,8 @@ int kvm_arch_vcpu_ioctl_set_mpstate(struct kvm_vcpu *vcpu,
 int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu,
struct kvm_guest_debug *dbg);
 int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu);
+int kvm_arch_vcpu_set_guest_debug(struct kvm_vcpu *vcpu,
+ struct kvm_guest_debug *dbg);
 
 int kvm_arch_init(void *opaque);
 void kvm_arch_exit(void);
___
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization

[PATCH v12 77/77] KVM: x86: call the page tracking code on emulation failure

2021-10-06 Thread Adalbert Lazăr
From: Mihai Donțu 

The information we can provide this way is incomplete, but current users
of the page tracking code can work with it.

Signed-off-by: Mihai Donțu 
Signed-off-by: Adalbert Lazăr 
---
 arch/x86/kvm/x86.c | 49 ++
 1 file changed, 49 insertions(+)

diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 9a3fac9b30ba..a9caacea0207 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -7872,6 +7872,51 @@ int x86_decode_emulated_instruction(struct kvm_vcpu 
*vcpu, int emulation_type,
 }
 EXPORT_SYMBOL_GPL(x86_decode_emulated_instruction);
 
+/*
+ * With introspection enabled, emulation failures translate in events being
+ * missed because the read/write callbacks are not invoked. All we have is
+ * the fetch event (kvm_page_track_preexec). Below we use the EPT/NPT VMEXIT
+ * information to generate the events, but without providing accurate
+ * data and size (the emulator would have computed those). If an instruction
+ * would happen to read and write in the same page, the second event will
+ * initially be missed and we rely on the page tracking mechanism to bring
+ * us back here to send it.
+ */
+static bool kvm_page_track_emulation_failure(struct kvm_vcpu *vcpu, gpa_t gpa)
+{
+   u64 error_code = vcpu->arch.error_code;
+   u8 data = 0;
+   gva_t gva;
+   bool ret;
+
+   /* MMIO emulation failures should be treated the normal way */
+   if (unlikely(error_code & PFERR_RSVD_MASK))
+   return true;
+
+   /* EPT/NTP must be enabled */
+   if (unlikely(!vcpu->arch.mmu->direct_map))
+   return true;
+
+   /*
+* The A/D bit emulation should make this test unneeded, but just
+* in case
+*/
+   if (unlikely((error_code & PFERR_NESTED_GUEST_PAGE) ==
+PFERR_NESTED_GUEST_PAGE))
+   return true;
+
+   gva = static_call(kvm_x86_fault_gla)(vcpu);
+
+   if (error_code & PFERR_WRITE_MASK)
+   ret = kvm_page_track_prewrite(vcpu, gpa, gva, , 0);
+   else if (error_code & PFERR_USER_MASK)
+   ret = kvm_page_track_preread(vcpu, gpa, gva, 0);
+   else
+   ret = true;
+
+   return ret;
+}
+
 int x86_emulate_instruction(struct kvm_vcpu *vcpu, gpa_t cr2_or_gpa,
int emulation_type, void *insn, int insn_len)
 {
@@ -7905,6 +7950,8 @@ int x86_emulate_instruction(struct kvm_vcpu *vcpu, gpa_t 
cr2_or_gpa,
kvm_queue_exception(vcpu, UD_VECTOR);
return 1;
}
+   if (!kvm_page_track_emulation_failure(vcpu, cr2_or_gpa))
+   return 1;
if (reexecute_instruction(vcpu, cr2_or_gpa,
  write_fault_to_spt,
  emulation_type))
@@ -7974,6 +8021,8 @@ int x86_emulate_instruction(struct kvm_vcpu *vcpu, gpa_t 
cr2_or_gpa,
return 1;
 
if (r == EMULATION_FAILED) {
+   if (!kvm_page_track_emulation_failure(vcpu, cr2_or_gpa))
+   return 1;
if (reexecute_instruction(vcpu, cr2_or_gpa, write_fault_to_spt,
emulation_type))
return 1;
___
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization

[PATCH v11 58/81] KVM: introspection: add cleanup support for vCPUs

2020-12-07 Thread Adalbert Lazăr
From: Nicușor Cîțu 

On unhook the introspection channel is closed. This will signal the
receiving thread to call kvmi_put() and exit. There might be vCPU threads
handling introspection commands or waiting for event replies. These
will also call kvmi_put() and re-enter in guest. Once the reference
counter reaches zero, the structures keeping the introspection data
(kvm_introspection and kvm_vcpu_introspection) will be freed.

In order to restore the interception of CRs, MSRs, BP, descriptor-table
registers, from all vCPUs (some of which might run from userspace),
we keep the needed information in another structure (kvmi_interception)
which will be used and freed by each of them before re-entering in guest.

Signed-off-by: Nicușor Cîțu 
Signed-off-by: Adalbert Lazăr 
---
 arch/x86/include/asm/kvm_host.h   |  3 ++
 arch/x86/include/asm/kvmi_host.h  |  4 +++
 arch/x86/kvm/kvmi.c   | 49 +++
 virt/kvm/introspection/kvmi.c | 32 ++--
 virt/kvm/introspection/kvmi_int.h |  5 
 5 files changed, 90 insertions(+), 3 deletions(-)

diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 197dc160a61e..3639e50b6488 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -816,6 +816,9 @@ struct kvm_vcpu_arch {
 
/* #PF translated error code from EPT/NPT exit reason */
u64 error_code;
+
+   /* Control the interception of MSRs/CRs/BP... */
+   struct kvmi_interception *kvmi;
 };
 
 struct kvm_lpage_info {
diff --git a/arch/x86/include/asm/kvmi_host.h b/arch/x86/include/asm/kvmi_host.h
index cc945151cb36..b776be4bb49f 100644
--- a/arch/x86/include/asm/kvmi_host.h
+++ b/arch/x86/include/asm/kvmi_host.h
@@ -4,6 +4,10 @@
 
 #include 
 
+struct kvmi_interception {
+   bool restore_interception;
+};
+
 struct kvm_vcpu_arch_introspection {
struct kvm_regs delayed_regs;
bool have_delayed_regs;
diff --git a/arch/x86/kvm/kvmi.c b/arch/x86/kvm/kvmi.c
index 0bb6f38f1213..b4a7d581f68c 100644
--- a/arch/x86/kvm/kvmi.c
+++ b/arch/x86/kvm/kvmi.c
@@ -210,3 +210,52 @@ void kvmi_arch_breakpoint_event(struct kvm_vcpu *vcpu, u64 
gva, u8 insn_len)
kvmi_handle_common_event_actions(vcpu, action);
}
 }
+
+static void kvmi_arch_restore_interception(struct kvm_vcpu *vcpu)
+{
+}
+
+bool kvmi_arch_clean_up_interception(struct kvm_vcpu *vcpu)
+{
+   struct kvmi_interception *arch_vcpui = vcpu->arch.kvmi;
+
+   if (!arch_vcpui)
+   return false;
+
+   if (!arch_vcpui->restore_interception)
+   return false;
+
+   kvmi_arch_restore_interception(vcpu);
+
+   return true;
+}
+
+bool kvmi_arch_vcpu_alloc_interception(struct kvm_vcpu *vcpu)
+{
+   struct kvmi_interception *arch_vcpui;
+
+   arch_vcpui = kzalloc(sizeof(*arch_vcpui), GFP_KERNEL);
+   if (!arch_vcpui)
+   return false;
+
+   return true;
+}
+
+void kvmi_arch_vcpu_free_interception(struct kvm_vcpu *vcpu)
+{
+   kfree(vcpu->arch.kvmi);
+   WRITE_ONCE(vcpu->arch.kvmi, NULL);
+}
+
+bool kvmi_arch_vcpu_introspected(struct kvm_vcpu *vcpu)
+{
+   return !!READ_ONCE(vcpu->arch.kvmi);
+}
+
+void kvmi_arch_request_interception_cleanup(struct kvm_vcpu *vcpu)
+{
+   struct kvmi_interception *arch_vcpui = READ_ONCE(vcpu->arch.kvmi);
+
+   if (arch_vcpui)
+   arch_vcpui->restore_interception = true;
+}
diff --git a/virt/kvm/introspection/kvmi.c b/virt/kvm/introspection/kvmi.c
index bd1e6afaed9f..25af27aaf9ec 100644
--- a/virt/kvm/introspection/kvmi.c
+++ b/virt/kvm/introspection/kvmi.c
@@ -206,7 +206,7 @@ static bool kvmi_alloc_vcpui(struct kvm_vcpu *vcpu)
 
vcpu->kvmi = vcpui;
 
-   return true;
+   return kvmi_arch_vcpu_alloc_interception(vcpu);
 }
 
 static int kvmi_create_vcpui(struct kvm_vcpu *vcpu)
@@ -240,6 +240,9 @@ static void kvmi_free_vcpui(struct kvm_vcpu *vcpu)
 
kfree(vcpui);
vcpu->kvmi = NULL;
+
+   kvmi_arch_request_interception_cleanup(vcpu);
+   kvmi_make_request(vcpu, false);
 }
 
 static void kvmi_free(struct kvm *kvm)
@@ -262,6 +265,7 @@ void kvmi_vcpu_uninit(struct kvm_vcpu *vcpu)
 {
mutex_lock(>kvm->kvmi_lock);
kvmi_free_vcpui(vcpu);
+   kvmi_arch_vcpu_free_interception(vcpu);
mutex_unlock(>kvm->kvmi_lock);
 }
 
@@ -410,6 +414,21 @@ static int kvmi_recv_thread(void *arg)
return 0;
 }
 
+static bool ready_to_hook(struct kvm *kvm)
+{
+   struct kvm_vcpu *vcpu;
+   int i;
+
+   if (kvm->kvmi)
+   return false;
+
+   kvm_for_each_vcpu(i, vcpu, kvm)
+   if (kvmi_arch_vcpu_introspected(vcpu))
+   return false;
+
+   return true;
+}
+
 static int kvmi_hook(struct kvm *kvm,
 const struct kvm_introspection_hook *hook)
 {
@@ -418,7 +437,7 @@ static int kvmi_hook(struct kvm *kvm,
 
mutex

[PATCH v11 67/81] KVM: introspection: add KVMI_VCPU_GET_XSAVE

2020-12-07 Thread Adalbert Lazăr
From: Mihai Donțu 

This vCPU command is used to get the XSAVE area.

Signed-off-by: Mihai Donțu 
Co-developed-by: Adalbert Lazăr 
Signed-off-by: Adalbert Lazăr 
---
 Documentation/virt/kvm/kvmi.rst   | 29 +++
 arch/x86/include/uapi/asm/kvmi.h  |  4 +++
 arch/x86/kvm/kvmi_msg.c   | 20 +
 include/uapi/linux/kvmi.h |  1 +
 .../testing/selftests/kvm/x86_64/kvmi_test.c  | 26 +
 5 files changed, 80 insertions(+)

diff --git a/Documentation/virt/kvm/kvmi.rst b/Documentation/virt/kvm/kvmi.rst
index 3846fec72f14..3b7a68cc8faf 100644
--- a/Documentation/virt/kvm/kvmi.rst
+++ b/Documentation/virt/kvm/kvmi.rst
@@ -830,6 +830,35 @@ Returns the value of an extended control register XCR.
 * -KVM_EINVAL - the padding is not zero
 * -KVM_EAGAIN - the selected vCPU can't be introspected yet
 
+19. KVMI_VCPU_GET_XSAVE
+---
+
+:Architectures: x86
+:Versions: >= 1
+:Parameters:
+
+::
+
+   struct kvmi_vcpu_hdr;
+
+:Returns:
+
+::
+
+   struct kvmi_error_code;
+   struct kvmi_vcpu_get_xsave_reply {
+   struct kvm_xsave xsave;
+   };
+
+Returns a buffer containing the XSAVE area.
+
+:Errors:
+
+* -KVM_EINVAL - the selected vCPU is invalid
+* -KVM_EINVAL - the padding is not zero
+* -KVM_EAGAIN - the selected vCPU can't be introspected yet
+* -KVM_ENOMEM - there is not enough memory to allocate the reply
+
 Events
 ==
 
diff --git a/arch/x86/include/uapi/asm/kvmi.h b/arch/x86/include/uapi/asm/kvmi.h
index c0a73051d667..c6a46252a684 100644
--- a/arch/x86/include/uapi/asm/kvmi.h
+++ b/arch/x86/include/uapi/asm/kvmi.h
@@ -111,4 +111,8 @@ struct kvmi_vcpu_get_xcr_reply {
__u64 value;
 };
 
+struct kvmi_vcpu_get_xsave_reply {
+   struct kvm_xsave xsave;
+};
+
 #endif /* _UAPI_ASM_X86_KVMI_H */
diff --git a/arch/x86/kvm/kvmi_msg.c b/arch/x86/kvm/kvmi_msg.c
index 8badef7003fd..befb1c288045 100644
--- a/arch/x86/kvm/kvmi_msg.c
+++ b/arch/x86/kvm/kvmi_msg.c
@@ -194,12 +194,32 @@ static int handle_vcpu_get_xcr(const struct 
kvmi_vcpu_msg_job *job,
return kvmi_msg_vcpu_reply(job, msg, ec, , sizeof(rpl));
 }
 
+static int handle_vcpu_get_xsave(const struct kvmi_vcpu_msg_job *job,
+const struct kvmi_msg_hdr *msg,
+const void *req)
+{
+   struct kvmi_vcpu_get_xsave_reply *rpl;
+   int err, ec = 0;
+
+   rpl = kvmi_msg_alloc();
+   if (!rpl)
+   ec = -KVM_ENOMEM;
+   else
+   kvm_vcpu_ioctl_x86_get_xsave(job->vcpu, >xsave);
+
+   err = kvmi_msg_vcpu_reply(job, msg, ec, rpl, sizeof(*rpl));
+
+   kvmi_msg_free(rpl);
+   return err;
+}
+
 static kvmi_vcpu_msg_job_fct const msg_vcpu[] = {
[KVMI_VCPU_CONTROL_CR]   = handle_vcpu_control_cr,
[KVMI_VCPU_GET_CPUID]= handle_vcpu_get_cpuid,
[KVMI_VCPU_GET_INFO] = handle_vcpu_get_info,
[KVMI_VCPU_GET_REGISTERS]= handle_vcpu_get_registers,
[KVMI_VCPU_GET_XCR]  = handle_vcpu_get_xcr,
+   [KVMI_VCPU_GET_XSAVE]= handle_vcpu_get_xsave,
[KVMI_VCPU_INJECT_EXCEPTION] = handle_vcpu_inject_exception,
[KVMI_VCPU_SET_REGISTERS]= handle_vcpu_set_registers,
 };
diff --git a/include/uapi/linux/kvmi.h b/include/uapi/linux/kvmi.h
index 07b6d383641a..e47c4ce0f8ed 100644
--- a/include/uapi/linux/kvmi.h
+++ b/include/uapi/linux/kvmi.h
@@ -45,6 +45,7 @@ enum {
KVMI_VCPU_CONTROL_CR   = KVMI_VCPU_MESSAGE_ID(6),
KVMI_VCPU_INJECT_EXCEPTION = KVMI_VCPU_MESSAGE_ID(7),
KVMI_VCPU_GET_XCR  = KVMI_VCPU_MESSAGE_ID(8),
+   KVMI_VCPU_GET_XSAVE= KVMI_VCPU_MESSAGE_ID(9),
 
KVMI_NEXT_VCPU_MESSAGE
 };
diff --git a/tools/testing/selftests/kvm/x86_64/kvmi_test.c 
b/tools/testing/selftests/kvm/x86_64/kvmi_test.c
index da90c6a8d535..277b1061410b 100644
--- a/tools/testing/selftests/kvm/x86_64/kvmi_test.c
+++ b/tools/testing/selftests/kvm/x86_64/kvmi_test.c
@@ -1448,6 +1448,31 @@ static void test_cmd_vcpu_get_xcr(struct kvm_vm *vm)
cmd_vcpu_get_xcr(vm, xcr1, , -KVM_EINVAL);
 }
 
+static void cmd_vcpu_get_xsave(struct kvm_vm *vm)
+{
+   struct {
+   struct kvmi_msg_hdr hdr;
+   struct kvmi_vcpu_hdr vcpu_hdr;
+   } req = {};
+   struct kvm_xsave rpl;
+
+   test_vcpu0_command(vm, KVMI_VCPU_GET_XSAVE, , sizeof(req),
+  , sizeof(rpl), 0);
+}
+
+static void test_cmd_vcpu_get_xsave(struct kvm_vm *vm)
+{
+   struct kvm_cpuid_entry2 *entry;
+
+   entry = kvm_get_supported_cpuid_entry(1);
+   if (!(entry->ecx & X86_FEATURE_XSAVE)) {
+   print_skip("XSAVE not supported, ecx 0x%x", entry->ecx);
+   return;
+   }
+
+   cmd_vcpu_get_xsave(vm);
+}
+
 static void test_introspection(struct kvm_vm *vm)
 {
srandom(time(0));
@@ -1476,6 +1501,7 @@ static void tes

[PATCH v11 74/81] KVM: introspection: add KVMI_VM_SET_PAGE_ACCESS

2020-12-07 Thread Adalbert Lazăr
From: Mihai Donțu 

This command sets the spte access bits (rwx) for an array of guest
physical addresses (through the page tracking subsystem).

These GPAs, with the requested access bits, are also kept in a radix
tree in order to filter out the #PF events which are of no interest to
the introspection tool and to reapply the settings when a memory slot
is moved.

Signed-off-by: Mihai Donțu 
Co-developed-by: Adalbert Lazăr 
Signed-off-by: Adalbert Lazăr 
---
 Documentation/virt/kvm/kvmi.rst   |  59 +
 arch/x86/include/asm/kvm_host.h   |   2 +
 arch/x86/include/asm/kvmi_host.h  |   7 ++
 arch/x86/kvm/kvmi.c   |  40 ++
 include/linux/kvmi_host.h |   3 +
 include/uapi/linux/kvmi.h |  20 +++
 .../testing/selftests/kvm/x86_64/kvmi_test.c  |  50 
 virt/kvm/introspection/kvmi.c | 119 +-
 virt/kvm/introspection/kvmi_int.h |  10 ++
 virt/kvm/introspection/kvmi_msg.c |  59 +
 10 files changed, 368 insertions(+), 1 deletion(-)

diff --git a/Documentation/virt/kvm/kvmi.rst b/Documentation/virt/kvm/kvmi.rst
index 7220f27ea5c3..dc96f935320a 100644
--- a/Documentation/virt/kvm/kvmi.rst
+++ b/Documentation/virt/kvm/kvmi.rst
@@ -965,6 +965,65 @@ to control events for any other register will fail with 
-KVM_EINVAL::
 * -KVM_EPERM  - the interception of the selected MSR is disallowed
 from userspace (KVM_X86_SET_MSR_FILTER)
 
+23. KVMI_VM_SET_PAGE_ACCESS
+---
+
+:Architectures: x86
+:Versions: >= 1
+:Parameters:
+
+::
+
+   struct kvmi_vm_set_page_access {
+   __u16 count;
+   __u16 padding1;
+   __u32 padding2;
+   struct kvmi_page_access_entry entries[0];
+   };
+
+where::
+
+   struct kvmi_page_access_entry {
+   __u64 gpa;
+   __u8 access;
+   __u8 padding[7];
+   };
+
+
+:Returns:
+
+::
+
+   struct kvmi_error_code
+
+Sets the access bits (rwx) for an array of ``count`` guest physical
+addresses (``gpa``).
+
+The valid access bits are::
+
+   KVMI_PAGE_ACCESS_R
+   KVMI_PAGE_ACCESS_W
+   KVMI_PAGE_ACCESS_X
+
+
+The command will fail with -KVM_EINVAL if any of the specified combination
+of access bits is not supported or the address (``gpa``) is not valid
+(visible).
+
+The command will try to apply all changes and return the first error if
+some failed. The introspection tool should handle the rollback.
+
+In order to 'forget' an address, all three bits ('rwx') must be set.
+
+:Errors:
+
+* -KVM_EINVAL - the specified access bits combination is invalid
+* -KVM_EINVAL - the address is not valid
+* -KVM_EINVAL - the padding is not zero
+* -KVM_EINVAL - the message size is invalid
+* -KVM_EAGAIN - the selected vCPU can't be introspected yet
+* -KVM_ENOMEM - there is not enough memory to add the page tracking structures
+
 Events
 ==
 
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 3639e50b6488..3503457f1de1 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -45,6 +45,8 @@
 #define KVM_PRIVATE_MEM_SLOTS 3
 #define KVM_MEM_SLOTS_NUM (KVM_USER_MEM_SLOTS + KVM_PRIVATE_MEM_SLOTS)
 
+#include 
+
 #define KVM_HALT_POLL_NS_DEFAULT 20
 
 #define KVM_IRQCHIP_NUM_PINS  KVM_IOAPIC_NUM_PINS
diff --git a/arch/x86/include/asm/kvmi_host.h b/arch/x86/include/asm/kvmi_host.h
index 8822f0310156..420358c4a9ae 100644
--- a/arch/x86/include/asm/kvmi_host.h
+++ b/arch/x86/include/asm/kvmi_host.h
@@ -2,6 +2,7 @@
 #ifndef _ASM_X86_KVMI_HOST_H
 #define _ASM_X86_KVMI_HOST_H
 
+#include 
 #include 
 
 struct msr_data;
@@ -54,6 +55,12 @@ struct kvm_vcpu_arch_introspection {
 struct kvm_arch_introspection {
 };
 
+#define SLOTS_SIZE BITS_TO_LONGS(KVM_MEM_SLOTS_NUM)
+
+struct kvmi_arch_mem_access {
+   unsigned long active[KVM_PAGE_TRACK_MAX][SLOTS_SIZE];
+};
+
 #ifdef CONFIG_KVM_INTROSPECTION
 
 bool kvmi_monitor_bp_intercept(struct kvm_vcpu *vcpu, u32 dbg);
diff --git a/arch/x86/kvm/kvmi.c b/arch/x86/kvm/kvmi.c
index e325dad88dbb..acd4756e0d78 100644
--- a/arch/x86/kvm/kvmi.c
+++ b/arch/x86/kvm/kvmi.c
@@ -919,3 +919,43 @@ bool kvmi_msr_event(struct kvm_vcpu *vcpu, struct msr_data 
*msr)
 
return ret;
 }
+
+static const struct {
+   unsigned int allow_bit;
+   enum kvm_page_track_mode track_mode;
+} track_modes[] = {
+   { KVMI_PAGE_ACCESS_R, KVM_PAGE_TRACK_PREREAD },
+   { KVMI_PAGE_ACCESS_W, KVM_PAGE_TRACK_PREWRITE },
+   { KVMI_PAGE_ACCESS_X, KVM_PAGE_TRACK_PREEXEC },
+};
+
+void kvmi_arch_update_page_tracking(struct kvm *kvm,
+   struct kvm_memory_slot *slot,
+   struct kvmi_mem_access *m)
+{
+   struct kvmi_arch_mem_access *arch = >arch;
+   int i;
+
+   if (!slot) {
+   slot = gfn_to_memslot(kvm, 

[PATCH v11 69/81] KVM: introspection: add KVMI_VCPU_GET_MTRR_TYPE

2020-12-07 Thread Adalbert Lazăr
From: Mihai Donțu 

This command returns the memory type for a guest physical address.

Signed-off-by: Mihai Donțu 
Co-developed-by: Nicușor Cîțu 
Signed-off-by: Nicușor Cîțu 
Signed-off-by: Adalbert Lazăr 
---
 Documentation/virt/kvm/kvmi.rst   | 32 +++
 arch/x86/include/uapi/asm/kvmi.h  |  9 ++
 arch/x86/kvm/kvmi_msg.c   | 17 ++
 include/uapi/linux/kvmi.h |  1 +
 .../testing/selftests/kvm/x86_64/kvmi_test.c  | 18 +++
 5 files changed, 77 insertions(+)

diff --git a/Documentation/virt/kvm/kvmi.rst b/Documentation/virt/kvm/kvmi.rst
index 70d1f96bc4f0..b3527d10a44e 100644
--- a/Documentation/virt/kvm/kvmi.rst
+++ b/Documentation/virt/kvm/kvmi.rst
@@ -887,6 +887,38 @@ Modifies the XSAVE area.
 * -KVM_EINVAL - the padding is not zero
 * -KVM_EAGAIN - the selected vCPU can't be introspected yet
 
+21. KVMI_VCPU_GET_MTRR_TYPE
+---
+
+:Architectures: x86
+:Versions: >= 1
+:Parameters:
+
+::
+
+   struct kvmi_vcpu_hdr;
+   struct kvmi_vcpu_get_mtrr_type {
+   __u64 gpa;
+   };
+
+:Returns:
+
+::
+
+   struct kvmi_error_code;
+   struct kvmi_vcpu_get_mtrr_type_reply {
+   __u8 type;
+   __u8 padding[7];
+   };
+
+Returns the guest memory type for a specific guest physical address (``gpa``).
+
+:Errors:
+
+* -KVM_EINVAL - the selected vCPU is invalid
+* -KVM_EINVAL - the padding is not zero
+* -KVM_EAGAIN - the selected vCPU can't be introspected yet
+
 Events
 ==
 
diff --git a/arch/x86/include/uapi/asm/kvmi.h b/arch/x86/include/uapi/asm/kvmi.h
index 89f3dc9269c1..7b93450d0d62 100644
--- a/arch/x86/include/uapi/asm/kvmi.h
+++ b/arch/x86/include/uapi/asm/kvmi.h
@@ -119,4 +119,13 @@ struct kvmi_vcpu_set_xsave {
struct kvm_xsave xsave;
 };
 
+struct kvmi_vcpu_get_mtrr_type {
+   __u64 gpa;
+};
+
+struct kvmi_vcpu_get_mtrr_type_reply {
+   __u8 type;
+   __u8 padding[7];
+};
+
 #endif /* _UAPI_ASM_X86_KVMI_H */
diff --git a/arch/x86/kvm/kvmi_msg.c b/arch/x86/kvm/kvmi_msg.c
index 7df71b5cd50a..2617ada3a692 100644
--- a/arch/x86/kvm/kvmi_msg.c
+++ b/arch/x86/kvm/kvmi_msg.c
@@ -232,10 +232,27 @@ static int handle_vcpu_set_xsave(const struct 
kvmi_vcpu_msg_job *job,
return kvmi_msg_vcpu_reply(job, msg, ec, NULL, 0);
 }
 
+static int handle_vcpu_get_mtrr_type(const struct kvmi_vcpu_msg_job *job,
+const struct kvmi_msg_hdr *msg,
+const void *_req)
+{
+   const struct kvmi_vcpu_get_mtrr_type *req = _req;
+   struct kvmi_vcpu_get_mtrr_type_reply rpl;
+   gfn_t gfn;
+
+   gfn = gpa_to_gfn(req->gpa);
+
+   memset(, 0, sizeof(rpl));
+   rpl.type = kvm_mtrr_get_guest_memory_type(job->vcpu, gfn);
+
+   return kvmi_msg_vcpu_reply(job, msg, 0, , sizeof(rpl));
+}
+
 static kvmi_vcpu_msg_job_fct const msg_vcpu[] = {
[KVMI_VCPU_CONTROL_CR]   = handle_vcpu_control_cr,
[KVMI_VCPU_GET_CPUID]= handle_vcpu_get_cpuid,
[KVMI_VCPU_GET_INFO] = handle_vcpu_get_info,
+   [KVMI_VCPU_GET_MTRR_TYPE]= handle_vcpu_get_mtrr_type,
[KVMI_VCPU_GET_REGISTERS]= handle_vcpu_get_registers,
[KVMI_VCPU_GET_XCR]  = handle_vcpu_get_xcr,
[KVMI_VCPU_GET_XSAVE]= handle_vcpu_get_xsave,
diff --git a/include/uapi/linux/kvmi.h b/include/uapi/linux/kvmi.h
index 3baf5c7842bb..8d7c6027f12c 100644
--- a/include/uapi/linux/kvmi.h
+++ b/include/uapi/linux/kvmi.h
@@ -47,6 +47,7 @@ enum {
KVMI_VCPU_GET_XCR  = KVMI_VCPU_MESSAGE_ID(8),
KVMI_VCPU_GET_XSAVE= KVMI_VCPU_MESSAGE_ID(9),
KVMI_VCPU_SET_XSAVE= KVMI_VCPU_MESSAGE_ID(10),
+   KVMI_VCPU_GET_MTRR_TYPE= KVMI_VCPU_MESSAGE_ID(11),
 
KVMI_NEXT_VCPU_MESSAGE
 };
diff --git a/tools/testing/selftests/kvm/x86_64/kvmi_test.c 
b/tools/testing/selftests/kvm/x86_64/kvmi_test.c
index 45c1f3132a3c..b0906c7fb954 100644
--- a/tools/testing/selftests/kvm/x86_64/kvmi_test.c
+++ b/tools/testing/selftests/kvm/x86_64/kvmi_test.c
@@ -1488,6 +1488,23 @@ static void test_cmd_vcpu_xsave(struct kvm_vm *vm)
cmd_vcpu_set_xsave(vm, );
 }
 
+static void test_cmd_vcpu_get_mtrr_type(struct kvm_vm *vm)
+{
+   struct {
+   struct kvmi_msg_hdr hdr;
+   struct kvmi_vcpu_hdr vcpu_hdr;
+   struct kvmi_vcpu_get_mtrr_type cmd;
+   } req = {};
+   struct kvmi_vcpu_get_mtrr_type_reply rpl;
+
+   req.cmd.gpa = test_gpa;
+
+   test_vcpu0_command(vm, KVMI_VCPU_GET_MTRR_TYPE,
+  , sizeof(req), , sizeof(rpl), 0);
+
+   pr_debug("mtrr_type: gpa 0x%lx type 0x%x\n", test_gpa, rpl.type);
+}
+
 static void test_introspection(struct kvm_vm *vm)
 {
srandom(time(0));
@@ -1517,6 +1534,7 @@ static void test_introspection(struct kvm_vm *vm)
test_event_xsetbv(vm);
test_

[PATCH v11 48/81] KVM: introspection: add KVMI_VM_PAUSE_VCPU

2020-12-07 Thread Adalbert Lazăr
This command increments a pause requests counter for a vCPU and kicks
it out of guest.

The introspection tool can pause a VM by sending this command for all
vCPUs. If it sets 'wait=1', it can consider that the VM is paused when
it receives the reply for the last KVMI_VM_PAUSE_VCPU command.

Signed-off-by: Adalbert Lazăr 
---
 Documentation/virt/kvm/kvmi.rst   | 39 +++
 include/linux/kvmi_host.h |  2 +
 include/uapi/linux/kvmi.h |  8 
 .../testing/selftests/kvm/x86_64/kvmi_test.c  | 30 
 virt/kvm/introspection/kvmi.c | 47 +--
 virt/kvm/introspection/kvmi_int.h |  1 +
 virt/kvm/introspection/kvmi_msg.c | 24 ++
 7 files changed, 147 insertions(+), 4 deletions(-)

diff --git a/Documentation/virt/kvm/kvmi.rst b/Documentation/virt/kvm/kvmi.rst
index 902ced4dd0c4..a71fb78d546e 100644
--- a/Documentation/virt/kvm/kvmi.rst
+++ b/Documentation/virt/kvm/kvmi.rst
@@ -470,6 +470,45 @@ Returns the TSC frequency (in HZ) for the specified vCPU 
if available
 * -KVM_EINVAL - the selected vCPU is invalid
 * -KVM_EAGAIN - the selected vCPU can't be introspected yet
 
+9. KVMI_VM_PAUSE_VCPU
+-
+
+:Architectures: all
+:Versions: >= 1
+:Parameters:
+
+::
+
+   struct kvmi_vm_pause_vcpu {
+   __u16 vcpu;
+   __u8 wait;
+   __u8 padding1;
+   __u32 padding2;
+   };
+
+:Returns:
+
+::
+
+   struct kvmi_error_code;
+
+Kicks the vCPU out of guest.
+
+If `wait` is 1, the command will wait for vCPU to acknowledge the IPI.
+
+The vCPU will handle the pending commands/events and send the
+*KVMI_VCPU_EVENT_PAUSE* event (one for every successful *KVMI_VM_PAUSE_VCPU*
+command) before returning to guest.
+
+:Errors:
+
+* -KVM_EINVAL - the padding is not zero
+* -KVM_EINVAL - the selected vCPU is invalid
+* -KVM_EAGAIN - the selected vCPU can't be introspected yet
+* -KVM_EBUSY  - the selected vCPU has too many queued
+*KVMI_VCPU_EVENT_PAUSE* events
+* -KVM_EPERM  - the *KVMI_VCPU_EVENT_PAUSE* event is disallowed
+
 Events
 ==
 
diff --git a/include/linux/kvmi_host.h b/include/linux/kvmi_host.h
index 736edb400c05..59e645d9ea34 100644
--- a/include/linux/kvmi_host.h
+++ b/include/linux/kvmi_host.h
@@ -18,6 +18,8 @@ struct kvm_vcpu_introspection {
 
struct list_head job_list;
spinlock_t job_lock;
+
+   atomic_t pause_requests;
 };
 
 struct kvm_introspection {
diff --git a/include/uapi/linux/kvmi.h b/include/uapi/linux/kvmi.h
index da766427231e..bb90d03f059b 100644
--- a/include/uapi/linux/kvmi.h
+++ b/include/uapi/linux/kvmi.h
@@ -26,6 +26,7 @@ enum {
KVMI_VM_CONTROL_EVENTS = KVMI_VM_MESSAGE_ID(5),
KVMI_VM_READ_PHYSICAL  = KVMI_VM_MESSAGE_ID(6),
KVMI_VM_WRITE_PHYSICAL = KVMI_VM_MESSAGE_ID(7),
+   KVMI_VM_PAUSE_VCPU = KVMI_VM_MESSAGE_ID(8),
 
KVMI_NEXT_VM_MESSAGE
 };
@@ -115,4 +116,11 @@ struct kvmi_vcpu_hdr {
__u32 padding2;
 };
 
+struct kvmi_vm_pause_vcpu {
+   __u16 vcpu;
+   __u8 wait;
+   __u8 padding1;
+   __u32 padding2;
+};
+
 #endif /* _UAPI__LINUX_KVMI_H */
diff --git a/tools/testing/selftests/kvm/x86_64/kvmi_test.c 
b/tools/testing/selftests/kvm/x86_64/kvmi_test.c
index 9350ba8b7f9b..52765ca3f9c8 100644
--- a/tools/testing/selftests/kvm/x86_64/kvmi_test.c
+++ b/tools/testing/selftests/kvm/x86_64/kvmi_test.c
@@ -671,6 +671,35 @@ static void test_cmd_vcpu_get_info(struct kvm_vm *vm)
, sizeof(rpl), -KVM_EINVAL);
 }
 
+static void cmd_vcpu_pause(__u8 wait, int expected_err)
+{
+   struct {
+   struct kvmi_msg_hdr hdr;
+   struct kvmi_vm_pause_vcpu cmd;
+   } req = {};
+   __u16 vcpu_idx = 0;
+
+   req.cmd.wait = wait;
+   req.cmd.vcpu = vcpu_idx;
+
+   test_vm_command(KVMI_VM_PAUSE_VCPU, , sizeof(req), NULL, 0, 
expected_err);
+}
+
+static void pause_vcpu(void)
+{
+   cmd_vcpu_pause(1, 0);
+}
+
+static void test_pause(struct kvm_vm *vm)
+{
+   __u8 wait = 1, wait_inval = 2;
+
+   pause_vcpu();
+
+   cmd_vcpu_pause(wait, 0);
+   cmd_vcpu_pause(wait_inval, -KVM_EINVAL);
+}
+
 static void test_introspection(struct kvm_vm *vm)
 {
srandom(time(0));
@@ -686,6 +715,7 @@ static void test_introspection(struct kvm_vm *vm)
test_cmd_vm_control_events(vm);
test_memory_access(vm);
test_cmd_vcpu_get_info(vm);
+   test_pause(vm);
 
unhook_introspection(vm);
 }
diff --git a/virt/kvm/introspection/kvmi.c b/virt/kvm/introspection/kvmi.c
index 95677cb9a657..904362d00e62 100644
--- a/virt/kvm/introspection/kvmi.c
+++ b/virt/kvm/introspection/kvmi.c
@@ -17,6 +17,8 @@
 
 #define KVMI_MSG_SIZE_ALLOC (sizeof(struct kvmi_msg_hdr) + KVMI_MAX_MSG_SIZE)
 
+#define MAX_PAUSE_REQUESTS 1001
+
 static DECLARE_BITMAP(Kvmi_always_allowed_commands, KVMI_NUM_COMMANDS);
 static DECLARE_BITMAP(Kvmi_known_eve

[PATCH v11 71/81] KVM: introspection: restore the state of descriptor-table register interception on unhook

2020-12-07 Thread Adalbert Lazăr
From: Nicușor Cîțu 

This commit also ensures that the introspection tool and the userspace
do not disable each other the descriptor-table access VM-exit.

Signed-off-by: Nicușor Cîțu 
Signed-off-by: Adalbert Lazăr 
---
 arch/x86/include/asm/kvmi_host.h |  4 +++
 arch/x86/kvm/kvmi.c  | 45 
 arch/x86/kvm/svm/svm.c   |  3 +++
 arch/x86/kvm/vmx/vmx.c   |  3 +++
 4 files changed, 55 insertions(+)

diff --git a/arch/x86/include/asm/kvmi_host.h b/arch/x86/include/asm/kvmi_host.h
index a24ba87036f7..a872277eba67 100644
--- a/arch/x86/include/asm/kvmi_host.h
+++ b/arch/x86/include/asm/kvmi_host.h
@@ -17,6 +17,7 @@ struct kvmi_interception {
bool restore_interception;
struct kvmi_monitor_interception breakpoint;
struct kvmi_monitor_interception cr3w;
+   struct kvmi_monitor_interception descriptor;
 };
 
 struct kvm_vcpu_arch_introspection {
@@ -48,6 +49,7 @@ bool kvmi_monitor_cr3w_intercept(struct kvm_vcpu *vcpu, bool 
enable);
 void kvmi_enter_guest(struct kvm_vcpu *vcpu);
 void kvmi_xsetbv_event(struct kvm_vcpu *vcpu, u8 xcr,
   u64 old_value, u64 new_value);
+bool kvmi_monitor_desc_intercept(struct kvm_vcpu *vcpu, bool enable);
 bool kvmi_descriptor_event(struct kvm_vcpu *vcpu, u8 descriptor, bool write);
 
 #else /* CONFIG_KVM_INTROSPECTION */
@@ -64,6 +66,8 @@ static inline bool kvmi_monitor_cr3w_intercept(struct 
kvm_vcpu *vcpu,
 static inline void kvmi_enter_guest(struct kvm_vcpu *vcpu) { }
 static inline void kvmi_xsetbv_event(struct kvm_vcpu *vcpu, u8 xcr,
u64 old_value, u64 new_value) { }
+static inline bool kvmi_monitor_desc_intercept(struct kvm_vcpu *vcpu,
+  bool enable) { return false; }
 static inline bool kvmi_descriptor_event(struct kvm_vcpu *vcpu, u8 descriptor,
 bool write) { return true; }
 
diff --git a/arch/x86/kvm/kvmi.c b/arch/x86/kvm/kvmi.c
index 3d5b041de634..4106ae63a115 100644
--- a/arch/x86/kvm/kvmi.c
+++ b/arch/x86/kvm/kvmi.c
@@ -286,12 +286,52 @@ static void kvmi_arch_disable_cr3w_intercept(struct 
kvm_vcpu *vcpu)
vcpu->arch.kvmi->cr3w.kvm_intercepted = false;
 }
 
+/*
+ * Returns true if one side (kvm or kvmi) tries to disable the descriptor
+ * interception while the other side is still tracking it.
+ */
+bool kvmi_monitor_desc_intercept(struct kvm_vcpu *vcpu, bool enable)
+{
+   struct kvmi_interception *arch_vcpui = READ_ONCE(vcpu->arch.kvmi);
+
+   return (arch_vcpui && arch_vcpui->descriptor.monitor_fct(vcpu, enable));
+}
+EXPORT_SYMBOL(kvmi_monitor_desc_intercept);
+
+static bool monitor_desc_fct_kvmi(struct kvm_vcpu *vcpu, bool enable)
+{
+   vcpu->arch.kvmi->descriptor.kvmi_intercepted = enable;
+
+   if (enable)
+   vcpu->arch.kvmi->descriptor.kvm_intercepted =
+   kvm_x86_ops.desc_intercepted(vcpu);
+   else if (vcpu->arch.kvmi->descriptor.kvm_intercepted)
+   return true;
+
+   return false;
+}
+
+static bool monitor_desc_fct_kvm(struct kvm_vcpu *vcpu, bool enable)
+{
+   if (!vcpu->arch.kvmi->descriptor.kvmi_intercepted)
+   return false;
+
+   vcpu->arch.kvmi->descriptor.kvm_intercepted = enable;
+
+   if (!enable)
+   return true;
+
+   return false;
+}
+
 static int kvmi_control_desc_intercept(struct kvm_vcpu *vcpu, bool enable)
 {
if (!kvm_x86_ops.desc_ctrl_supported())
return -KVM_EOPNOTSUPP;
 
+   vcpu->arch.kvmi->descriptor.monitor_fct = monitor_desc_fct_kvmi;
kvm_x86_ops.control_desc_intercept(vcpu, enable);
+   vcpu->arch.kvmi->descriptor.monitor_fct = monitor_desc_fct_kvm;
 
return 0;
 }
@@ -299,6 +339,9 @@ static int kvmi_control_desc_intercept(struct kvm_vcpu 
*vcpu, bool enable)
 static void kvmi_arch_disable_desc_intercept(struct kvm_vcpu *vcpu)
 {
kvmi_control_desc_intercept(vcpu, false);
+
+   vcpu->arch.kvmi->descriptor.kvmi_intercepted = false;
+   vcpu->arch.kvmi->descriptor.kvm_intercepted = false;
 }
 
 int kvmi_arch_cmd_control_intercept(struct kvm_vcpu *vcpu,
@@ -370,11 +413,13 @@ bool kvmi_arch_vcpu_alloc_interception(struct kvm_vcpu 
*vcpu)
 
arch_vcpui->breakpoint.monitor_fct = monitor_bp_fct_kvm;
arch_vcpui->cr3w.monitor_fct = monitor_cr3w_fct_kvm;
+   arch_vcpui->descriptor.monitor_fct = monitor_desc_fct_kvm;
 
/*
 * paired with:
 *  - kvmi_monitor_bp_intercept()
 *  - kvmi_monitor_cr3w_intercept()
+*  - kvmi_monitor_desc_intercept()
 */
smp_wmb();
WRITE_ONCE(vcpu->arch.kvmi, arch_vcpui);
diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c
index 5b689d3fe3e4..834e4b6c4112 100644
--- a/arch/x86/kvm/svm/svm.c
+++ b/arch/x86/kvm/svm/svm.c
@@ -1670,6

[PATCH v11 75/81] KVM: introspection: add KVMI_VCPU_EVENT_PF

2020-12-07 Thread Adalbert Lazăr
From: Mihai Donțu 

This event is sent when a #PF occurs due to a failed permission check
in the shadow page tables, for a page in which the introspection tool
has shown interest.

Signed-off-by: Mihai Donțu 
Co-developed-by: Adalbert Lazăr 
Signed-off-by: Adalbert Lazăr 
---
 Documentation/virt/kvm/kvmi.rst   |  66 ++
 arch/x86/include/asm/kvmi_host.h  |   1 +
 arch/x86/kvm/kvmi.c   | 122 ++
 include/uapi/linux/kvmi.h |  10 ++
 .../testing/selftests/kvm/x86_64/kvmi_test.c  |  76 +++
 virt/kvm/introspection/kvmi.c | 116 +
 virt/kvm/introspection/kvmi_int.h |   7 +
 virt/kvm/introspection/kvmi_msg.c |  19 +++
 8 files changed, 417 insertions(+)

diff --git a/Documentation/virt/kvm/kvmi.rst b/Documentation/virt/kvm/kvmi.rst
index dc96f935320a..c5afb4c91ca2 100644
--- a/Documentation/virt/kvm/kvmi.rst
+++ b/Documentation/virt/kvm/kvmi.rst
@@ -543,6 +543,7 @@ the following events::
KVMI_VCPU_EVENT_DESCRIPTOR
KVMI_VCPU_EVENT_HYPERCALL
KVMI_VCPU_EVENT_MSR
+   KVMI_VCPU_EVENT_PF
KVMI_VCPU_EVENT_XSETBV
 
 When an event is enabled, the introspection tool is notified and
@@ -1398,3 +1399,68 @@ register (see **KVMI_VCPU_CONTROL_EVENTS**).
 ``kvmi_vcpu_event`` (with the vCPU state), the MSR number (``msr``),
 the old value (``old_value``) and the new value (``new_value``) are sent
 to the introspection tool. The *CONTINUE* action will set the ``new_val``.
+
+10. KVMI_VCPU_EVENT_PF
+--
+
+:Architectures: x86
+:Versions: >= 1
+:Actions: CONTINUE, CRASH, RETRY
+:Parameters:
+
+::
+
+   struct kvmi_vcpu_event;
+   struct kvmi_vcpu_event_pf {
+   __u64 gva;
+   __u64 gpa;
+   __u8 access;
+   __u8 padding1;
+   __u16 padding2;
+   __u32 padding3;
+   };
+
+:Returns:
+
+::
+
+   struct kvmi_vcpu_hdr;
+   struct kvmi_vcpu_event_reply;
+
+This event is sent when a hypervisor page fault occurs due to a failed
+permission checks, the introspection has been enabled for this event
+(see *KVMI_VCPU_CONTROL_EVENTS*) and the event was generated for a
+page in which the introspection tool has shown interest (ie. has
+previously touched it by adjusting the spte permissions; see
+*KVMI_VM_SET_PAGE_ACCESS*).
+
+These permissions can be used by the introspection tool to guarantee
+the purpose of code areas inside the guest (code, rodata, stack, heap
+etc.) Each attempt at an operation unfitting for a certain memory
+range (eg. execute code in heap) triggers a page fault and gives the
+introspection tool the chance to audit the code attempting the operation.
+
+``kvmi_vcpu_event`` (with the vCPU state), guest virtual address (``gva``)
+if available or ~0 (UNMAPPED_GVA), guest physical address (``gpa``)
+and the ``access`` flags (e.g. KVMI_PAGE_ACCESS_R) are sent to the
+introspection tool.
+
+In case of a restricted read access, the guest address is the location
+of the memory being read. On write access, the guest address is the
+location of the memory being written. On execute access, the guest
+address is the location of the instruction being executed
+(``gva == kvmi_vcpu_event.arch.regs.rip``).
+
+In the current implementation, most of these events are sent during
+emulation. If the page fault has set more than one access bit
+(e.g. r-x/-rw), the introspection tool may receive more than one
+KVMI_VCPU_EVENT_PF and the order depends on the KVM emulator. Another
+cause of multiple events is when the page fault is triggered on access
+crossing the page boundary.
+
+The *CONTINUE* action will continue the page fault handling (e.g. via
+emulation).
+
+The *RETRY* action is used by the introspection tool to retry the
+execution of the current instruction, usually because it changed the
+instruction pointer or the page restrictions.
diff --git a/arch/x86/include/asm/kvmi_host.h b/arch/x86/include/asm/kvmi_host.h
index 420358c4a9ae..31500d3ff69d 100644
--- a/arch/x86/include/asm/kvmi_host.h
+++ b/arch/x86/include/asm/kvmi_host.h
@@ -53,6 +53,7 @@ struct kvm_vcpu_arch_introspection {
 };
 
 struct kvm_arch_introspection {
+   struct kvm_page_track_notifier_node kptn_node;
 };
 
 #define SLOTS_SIZE BITS_TO_LONGS(KVM_MEM_SLOTS_NUM)
diff --git a/arch/x86/kvm/kvmi.c b/arch/x86/kvm/kvmi.c
index acd4756e0d78..cd64762643d6 100644
--- a/arch/x86/kvm/kvmi.c
+++ b/arch/x86/kvm/kvmi.c
@@ -17,10 +17,26 @@ void kvmi_arch_init_vcpu_events_mask(unsigned long 
*supported)
set_bit(KVMI_VCPU_EVENT_HYPERCALL, supported);
set_bit(KVMI_VCPU_EVENT_DESCRIPTOR, supported);
set_bit(KVMI_VCPU_EVENT_MSR, supported);
+   set_bit(KVMI_VCPU_EVENT_PF, supported);
set_bit(KVMI_VCPU_EVENT_TRAP, supported);
set_bit(KVMI_VCPU_EVENT_XSETBV, supported);
 }
 
+static bool kvmi_track_preread(struct kvm_vcpu *vcpu, gpa_t gpa, gva_t 

[PATCH v11 70/81] KVM: introspection: add KVMI_VCPU_EVENT_DESCRIPTOR

2020-12-07 Thread Adalbert Lazăr
From: Nicușor Cîțu 

This event is sent when IDTR, GDTR, LDTR or TR are accessed.

These could be used to implement a tiny agent which runs in the context
of an introspected guest and uses virtualized exceptions (#VE) and
alternate EPT views (VMFUNC #0) to filter converted VMEXITS. The events
of interested will be suppressed (after some appropriate guest-side
handling) while the rest will be sent to the introspector via a VMCALL.

Signed-off-by: Nicușor Cîțu 
Co-developed-by: Adalbert Lazăr 
Signed-off-by: Adalbert Lazăr 
---
 Documentation/virt/kvm/kvmi.rst   | 43 +++
 arch/x86/include/asm/kvmi_host.h  |  3 +
 arch/x86/include/uapi/asm/kvmi.h  | 13 
 arch/x86/kvm/kvmi.c   | 58 ++
 arch/x86/kvm/kvmi.h   |  1 +
 arch/x86/kvm/kvmi_msg.c   | 19 +
 arch/x86/kvm/svm/svm.c| 33 
 arch/x86/kvm/vmx/vmx.c| 23 ++
 include/uapi/linux/kvmi.h |  1 +
 .../testing/selftests/kvm/x86_64/kvmi_test.c  | 75 +++
 10 files changed, 269 insertions(+)

diff --git a/Documentation/virt/kvm/kvmi.rst b/Documentation/virt/kvm/kvmi.rst
index b3527d10a44e..2bfb2bf0e778 100644
--- a/Documentation/virt/kvm/kvmi.rst
+++ b/Documentation/virt/kvm/kvmi.rst
@@ -540,6 +540,7 @@ the following events::
 
KVMI_VCPU_EVENT_BREAKPOINT
KVMI_VCPU_EVENT_CR
+   KVMI_VCPU_EVENT_DESCRIPTOR
KVMI_VCPU_EVENT_HYPERCALL
KVMI_VCPU_EVENT_XSETBV
 
@@ -563,6 +564,8 @@ the *KVMI_VM_CONTROL_EVENTS* command.
 * -KVM_EINVAL - the event ID is unknown (use *KVMI_VM_CHECK_EVENT* first)
 * -KVM_EPERM - the access is disallowed (use *KVMI_VM_CHECK_EVENT* first)
 * -KVM_EAGAIN - the selected vCPU can't be introspected yet
+* -KVM_EOPNOTSUPP - the event can't be intercepted in the current setup
+(e.g. KVMI_VCPU_EVENT_DESCRIPTOR with AMD)
 * -KVM_EBUSY - the event can't be intercepted right now
(e.g. KVMI_VCPU_EVENT_BREAKPOINT if the #BP event
 is already intercepted by userspace)
@@ -1217,3 +1220,43 @@ to be changed and the introspection has been enabled for 
this event
 ``kvmi_vcpu_event`` (with the vCPU state), the extended control register
 number (``xcr``), the old value (``old_value``) and the new value
 (``new_value``) are sent to the introspection tool.
+
+8. KVMI_VCPU_EVENT_DESCRIPTOR
+-
+
+:Architectures: x86
+:Versions: >= 1
+:Actions: CONTINUE, RETRY, CRASH
+:Parameters:
+
+::
+
+   struct kvmi_vcpu_event;
+   struct kvmi_vcpu_event_descriptor {
+   __u8 descriptor;
+   __u8 write;
+   __u8 padding[6];
+   };
+
+:Returns:
+
+::
+
+   struct kvmi_vcpu_hdr;
+   struct kvmi_vcpu_event_reply;
+
+This event is sent when a descriptor table register is accessed and the
+introspection has been enabled for this event (see 
**KVMI_VCPU_CONTROL_EVENTS**).
+
+``kvmi_vcpu_event`` (with the vCPU state), the descriptor-table register
+(``descriptor``) and the access type (``write``) are sent to the
+introspection tool.
+
+``descriptor`` can be one of::
+
+   KVMI_DESC_IDTR
+   KVMI_DESC_GDTR
+   KVMI_DESC_LDTR
+   KVMI_DESC_TR
+
+``write`` is 1 if the descriptor was written, 0 otherwise.
diff --git a/arch/x86/include/asm/kvmi_host.h b/arch/x86/include/asm/kvmi_host.h
index d66349208a6b..a24ba87036f7 100644
--- a/arch/x86/include/asm/kvmi_host.h
+++ b/arch/x86/include/asm/kvmi_host.h
@@ -48,6 +48,7 @@ bool kvmi_monitor_cr3w_intercept(struct kvm_vcpu *vcpu, bool 
enable);
 void kvmi_enter_guest(struct kvm_vcpu *vcpu);
 void kvmi_xsetbv_event(struct kvm_vcpu *vcpu, u8 xcr,
   u64 old_value, u64 new_value);
+bool kvmi_descriptor_event(struct kvm_vcpu *vcpu, u8 descriptor, bool write);
 
 #else /* CONFIG_KVM_INTROSPECTION */
 
@@ -63,6 +64,8 @@ static inline bool kvmi_monitor_cr3w_intercept(struct 
kvm_vcpu *vcpu,
 static inline void kvmi_enter_guest(struct kvm_vcpu *vcpu) { }
 static inline void kvmi_xsetbv_event(struct kvm_vcpu *vcpu, u8 xcr,
u64 old_value, u64 new_value) { }
+static inline bool kvmi_descriptor_event(struct kvm_vcpu *vcpu, u8 descriptor,
+bool write) { return true; }
 
 #endif /* CONFIG_KVM_INTROSPECTION */
 
diff --git a/arch/x86/include/uapi/asm/kvmi.h b/arch/x86/include/uapi/asm/kvmi.h
index 7b93450d0d62..9c608ef5daa3 100644
--- a/arch/x86/include/uapi/asm/kvmi.h
+++ b/arch/x86/include/uapi/asm/kvmi.h
@@ -128,4 +128,17 @@ struct kvmi_vcpu_get_mtrr_type_reply {
__u8 padding[7];
 };
 
+enum {
+   KVMI_DESC_IDTR = 1,
+   KVMI_DESC_GDTR = 2,
+   KVMI_DESC_LDTR = 3,
+   KVMI_DESC_TR   = 4,
+};
+
+struct kvmi_vcpu_event_descriptor {
+   __u8 descriptor;
+   __u8 write;
+   __u8 padding[6];
+};
+
 #endif /* _UAPI_ASM_X86_KVM

[PATCH v11 63/81] KVM: introspection: add KVMI_VCPU_INJECT_EXCEPTION + KVMI_VCPU_EVENT_TRAP

2020-12-07 Thread Adalbert Lazăr
From: Mihai Donțu 

The KVMI_VCPU_INJECT_EXCEPTION command is used by the introspection tool
to inject exceptions, for example, to get a page from swap.

The exception is injected right before entering in guest unless there is
already an exception pending. The introspection tool is notified with
an KVMI_VCPU_EVENT_TRAP event about the success of the injection. In
case of failure, the introspection tool is expected to try again later.

Signed-off-by: Mihai Donțu 
Co-developed-by: Nicușor Cîțu 
Signed-off-by: Nicușor Cîțu 
Co-developed-by: Adalbert Lazăr 
Signed-off-by: Adalbert Lazăr 
---
 Documentation/virt/kvm/kvmi.rst   |  76 +++
 arch/x86/include/asm/kvmi_host.h  |  11 ++
 arch/x86/include/uapi/asm/kvmi.h  |  16 +++
 arch/x86/kvm/kvmi.c   | 110 
 arch/x86/kvm/kvmi.h   |   3 +
 arch/x86/kvm/kvmi_msg.c   |  52 +++-
 arch/x86/kvm/x86.c|   2 +
 include/uapi/linux/kvmi.h |  14 +-
 .../testing/selftests/kvm/x86_64/kvmi_test.c  | 124 ++
 virt/kvm/introspection/kvmi.c |   2 +
 virt/kvm/introspection/kvmi_int.h |   4 +
 virt/kvm/introspection/kvmi_msg.c |  16 ++-
 12 files changed, 416 insertions(+), 14 deletions(-)

diff --git a/Documentation/virt/kvm/kvmi.rst b/Documentation/virt/kvm/kvmi.rst
index 85e14b82aa2f..e688ac387faf 100644
--- a/Documentation/virt/kvm/kvmi.rst
+++ b/Documentation/virt/kvm/kvmi.rst
@@ -550,6 +550,7 @@ because these are sent as a result of certain commands (but 
they can be
 disallowed by the device manager) ::
 
KVMI_VCPU_EVENT_PAUSE
+   KVMI_VCPU_EVENT_TRAP
 
 The VM events (e.g. *KVMI_VM_EVENT_UNHOOK*) are controlled with
 the *KVMI_VM_CONTROL_EVENTS* command.
@@ -736,6 +737,46 @@ ID set.
 * -KVM_EINVAL - the padding is not zero
 * -KVM_EAGAIN - the selected vCPU can't be introspected yet
 
+16. KVMI_VCPU_INJECT_EXCEPTION
+--
+
+:Architectures: x86
+:Versions: >= 1
+:Parameters:
+
+::
+
+   struct kvmi_vcpu_hdr;
+   struct kvmi_vcpu_inject_exception {
+   __u8 nr;
+   __u8 padding1;
+   __u16 padding2;
+   __u32 error_code;
+   __u64 address;
+   };
+
+:Returns:
+
+::
+
+   struct kvmi_error_code
+
+Injects a vCPU exception (``nr``) with or without an error code 
(``error_code``).
+For page fault exceptions, the guest virtual address (``address``)
+has to be specified too.
+
+The *KVMI_VCPU_EVENT_TRAP* event will be sent with the effective injected
+exception.
+
+:Errors:
+
+* -KVM_EPERM  - the *KVMI_VCPU_EVENT_TRAP* event is disallowed
+* -KVM_EINVAL - the selected vCPU is invalid
+* -KVM_EINVAL - the padding is not zero
+* -KVM_EAGAIN - the selected vCPU can't be introspected yet
+* -KVM_EBUSY - another *KVMI_VCPU_INJECT_EXCEPTION*-*KVMI_VCPU_EVENT_TRAP*
+   pair is in progress
+
 Events
 ==
 
@@ -966,3 +1007,38 @@ register (see **KVMI_VCPU_CONTROL_EVENTS**).
 (``cr``), the old value (``old_value``) and the new value (``new_value``)
 are sent to the introspection tool. The *CONTINUE* action will set the
 ``new_val``.
+
+6. KVMI_VCPU_EVENT_TRAP
+---
+
+:Architectures: x86
+:Versions: >= 1
+:Actions: CONTINUE, CRASH
+:Parameters:
+
+::
+
+   struct kvmi_vcpu_event;
+   struct kvmi_vcpu_event_trap {
+   __u8 nr;
+   __u8 padding1;
+   __u16 padding2;
+   __u32 error_code;
+   __u64 address;
+   };
+
+:Returns:
+
+::
+
+   struct kvmi_vcpu_hdr;
+   struct kvmi_vcpu_event_reply;
+
+This event is sent if a previous *KVMI_VCPU_INJECT_EXCEPTION* command
+took place. Because it has a high priority, it will be sent before any
+other vCPU introspection event.
+
+``kvmi_vcpu_event`` (with the vCPU state), exception/interrupt number
+(``nr``), exception code (``error_code``) and ``address`` are sent to
+the introspection tool, which should check if its exception has been
+injected or overridden.
diff --git a/arch/x86/include/asm/kvmi_host.h b/arch/x86/include/asm/kvmi_host.h
index edbedf031467..97f5b1a01c9e 100644
--- a/arch/x86/include/asm/kvmi_host.h
+++ b/arch/x86/include/asm/kvmi_host.h
@@ -24,6 +24,15 @@ struct kvm_vcpu_arch_introspection {
bool have_delayed_regs;
 
DECLARE_BITMAP(cr_mask, KVMI_NUM_CR);
+
+   struct {
+   u8 nr;
+   u32 error_code;
+   bool error_code_valid;
+   u64 address;
+   bool pending;
+   bool send_event;
+   } exception;
 };
 
 struct kvm_arch_introspection {
@@ -36,6 +45,7 @@ bool kvmi_cr_event(struct kvm_vcpu *vcpu, unsigned int cr,
   unsigned long old_value, unsigned long *new_value);
 bool kvmi_cr3_intercepted(struct kvm_vcpu *vcpu);
 bool kvmi_monitor_cr3w_intercept(struct kvm_vcpu

[PATCH v11 37/81] KVM: introspection: add KVMI_VM_CHECK_COMMAND and KVMI_VM_CHECK_EVENT

2020-12-07 Thread Adalbert Lazăr
These commands are used to check what introspection commands and events
are supported (kernel) and allowed (device manager).

These are alternative methods to KVMI_GET_VERSION in checking if the
introspection supports a specific command/event.

As with the KVMI_GET_VERSION command, these two commands can never be
disallowed by the device manager.

Signed-off-by: Adalbert Lazăr 
---
 Documentation/virt/kvm/kvmi.rst   | 62 +++
 include/uapi/linux/kvmi.h | 16 -
 .../testing/selftests/kvm/x86_64/kvmi_test.c  | 45 ++
 virt/kvm/introspection/kvmi.c | 19 ++
 virt/kvm/introspection/kvmi_int.h |  2 +
 virt/kvm/introspection/kvmi_msg.c | 40 +++-
 6 files changed, 182 insertions(+), 2 deletions(-)

diff --git a/Documentation/virt/kvm/kvmi.rst b/Documentation/virt/kvm/kvmi.rst
index d3d672a07872..13169575f75f 100644
--- a/Documentation/virt/kvm/kvmi.rst
+++ b/Documentation/virt/kvm/kvmi.rst
@@ -250,3 +250,65 @@ larger messages.
 The introspection tool should use this command to identify the features
 supported by the kernel side and what messages must be used for event
 replies.
+
+2. KVMI_VM_CHECK_COMMAND
+
+
+:Architectures: all
+:Versions: >= 1
+:Parameters:
+
+::
+
+   struct kvmi_vm_check_command {
+   __u16 id;
+   __u16 padding1;
+   __u32 padding2;
+   };
+
+:Returns:
+
+::
+
+   struct kvmi_error_code;
+
+Checks if the command specified by ``id`` is supported and allowed.
+
+This command is always allowed.
+
+:Errors:
+
+* -KVM_ENOENT - the command specified by ``id`` is unsupported
+* -KVM_EPERM - the command specified by ``id`` is disallowed
+* -KVM_EINVAL - the padding is not zero
+
+3. KVMI_VM_CHECK_EVENT
+--
+
+:Architectures: all
+:Versions: >= 1
+:Parameters:
+
+::
+
+   struct kvmi_vm_check_event {
+   __u16 id;
+   __u16 padding1;
+   __u32 padding2;
+   };
+
+:Returns:
+
+::
+
+   struct kvmi_error_code;
+
+Checks if the event specified by ``id`` is supported and allowed.
+
+This command is always allowed.
+
+:Errors:
+
+* -KVM_ENOENT - the event specified by ``id`` is unsupported
+* -KVM_EPERM - the event specified by ``id`` is disallowed
+* -KVM_EINVAL - the padding is not zero
diff --git a/include/uapi/linux/kvmi.h b/include/uapi/linux/kvmi.h
index 77dd727dfe18..0c2d0cedde6f 100644
--- a/include/uapi/linux/kvmi.h
+++ b/include/uapi/linux/kvmi.h
@@ -17,7 +17,9 @@ enum {
 #define KVMI_VCPU_MESSAGE_ID(id) (((id) << 1) | 1)
 
 enum {
-   KVMI_GET_VERSION = KVMI_VM_MESSAGE_ID(1),
+   KVMI_GET_VERSION  = KVMI_VM_MESSAGE_ID(1),
+   KVMI_VM_CHECK_COMMAND = KVMI_VM_MESSAGE_ID(2),
+   KVMI_VM_CHECK_EVENT   = KVMI_VM_MESSAGE_ID(3),
 
KVMI_NEXT_VM_MESSAGE
 };
@@ -53,4 +55,16 @@ struct kvmi_get_version_reply {
__u32 max_msg_size;
 };
 
+struct kvmi_vm_check_command {
+   __u16 id;
+   __u16 padding1;
+   __u32 padding2;
+};
+
+struct kvmi_vm_check_event {
+   __u16 id;
+   __u16 padding1;
+   __u32 padding2;
+};
+
 #endif /* _UAPI__LINUX_KVMI_H */
diff --git a/tools/testing/selftests/kvm/x86_64/kvmi_test.c 
b/tools/testing/selftests/kvm/x86_64/kvmi_test.c
index 30acd3a2d030..cd8f16a3ce3a 100644
--- a/tools/testing/selftests/kvm/x86_64/kvmi_test.c
+++ b/tools/testing/selftests/kvm/x86_64/kvmi_test.c
@@ -93,6 +93,8 @@ static void hook_introspection(struct kvm_vm *vm)
do_hook_ioctl(vm, Kvm_socket, EEXIST);
 
set_command_perm(vm, KVMI_GET_VERSION, disallow, EPERM);
+   set_command_perm(vm, KVMI_VM_CHECK_COMMAND, disallow, EPERM);
+   set_command_perm(vm, KVMI_VM_CHECK_EVENT, disallow, EPERM);
set_command_perm(vm, all_IDs, allow_inval, EINVAL);
set_command_perm(vm, all_IDs, disallow, 0);
set_command_perm(vm, all_IDs, allow, 0);
@@ -241,6 +243,47 @@ static void test_cmd_get_version(void)
pr_debug("Max message size: %u\n", rpl.max_msg_size);
 }
 
+static void cmd_vm_check_command(__u16 id, int expected_err)
+{
+   struct {
+   struct kvmi_msg_hdr hdr;
+   struct kvmi_vm_check_command cmd;
+   } req = {};
+
+   req.cmd.id = id;
+
+   test_vm_command(KVMI_VM_CHECK_COMMAND, , sizeof(req), NULL, 0,
+   expected_err);
+}
+
+static void test_cmd_vm_check_command(void)
+{
+   __u16 valid_id = KVMI_GET_VERSION, invalid_id = 0x;
+
+   cmd_vm_check_command(valid_id, 0);
+   cmd_vm_check_command(invalid_id, -KVM_ENOENT);
+}
+
+static void cmd_vm_check_event(__u16 id, int expected_err)
+{
+   struct {
+   struct kvmi_msg_hdr hdr;
+   struct kvmi_vm_check_event cmd;
+   } req = {};
+
+   req.cmd.id = id;
+
+   test_vm_command(KVMI_VM_CHECK_EVENT, , sizeof(req), NULL, 0,
+   expected_err);
+}
+
+static void t

[PATCH v11 64/81] KVM: introspection: add KVMI_VM_GET_MAX_GFN

2020-12-07 Thread Adalbert Lazăr
From: Ștefan Șicleru 

The introspection tool will use this command to get the memory address
range for which it can set access restrictions.

Signed-off-by: Ștefan Șicleru 
Co-developed-by: Nicușor Cîțu 
Signed-off-by: Nicușor Cîțu 
Signed-off-by: Adalbert Lazăr 
---
 Documentation/virt/kvm/kvmi.rst   | 19 +++
 include/uapi/linux/kvmi.h |  5 +
 .../testing/selftests/kvm/x86_64/kvmi_test.c  | 12 
 virt/kvm/introspection/kvmi_msg.c | 13 +
 4 files changed, 49 insertions(+)

diff --git a/Documentation/virt/kvm/kvmi.rst b/Documentation/virt/kvm/kvmi.rst
index e688ac387faf..ecf4207b42d0 100644
--- a/Documentation/virt/kvm/kvmi.rst
+++ b/Documentation/virt/kvm/kvmi.rst
@@ -777,6 +777,25 @@ exception.
 * -KVM_EBUSY - another *KVMI_VCPU_INJECT_EXCEPTION*-*KVMI_VCPU_EVENT_TRAP*
pair is in progress
 
+17. KVMI_VM_GET_MAX_GFN
+---
+
+:Architectures: all
+:Versions: >= 1
+:Parameters: none
+:Returns:
+
+::
+
+struct kvmi_error_code;
+struct kvmi_vm_get_max_gfn_reply {
+__u64 gfn;
+};
+
+Provides the maximum GFN allocated to the VM by walking through all
+memory slots. Stricly speaking, the returned value refers to the first
+inaccessible GFN, next to the maximum accessible GFN.
+
 Events
 ==
 
diff --git a/include/uapi/linux/kvmi.h b/include/uapi/linux/kvmi.h
index 263d98a5903e..d0e06363c407 100644
--- a/include/uapi/linux/kvmi.h
+++ b/include/uapi/linux/kvmi.h
@@ -29,6 +29,7 @@ enum {
KVMI_VM_WRITE_PHYSICAL  = KVMI_VM_MESSAGE_ID(7),
KVMI_VM_PAUSE_VCPU  = KVMI_VM_MESSAGE_ID(8),
KVMI_VM_CONTROL_CLEANUP = KVMI_VM_MESSAGE_ID(9),
+   KVMI_VM_GET_MAX_GFN = KVMI_VM_MESSAGE_ID(10),
 
KVMI_NEXT_VM_MESSAGE
 };
@@ -177,4 +178,8 @@ struct kvmi_vm_control_cleanup {
__u8 padding[7];
 };
 
+struct kvmi_vm_get_max_gfn_reply {
+   __u64 gfn;
+};
+
 #endif /* _UAPI__LINUX_KVMI_H */
diff --git a/tools/testing/selftests/kvm/x86_64/kvmi_test.c 
b/tools/testing/selftests/kvm/x86_64/kvmi_test.c
index dc9f2f0d99e8..b4565802db22 100644
--- a/tools/testing/selftests/kvm/x86_64/kvmi_test.c
+++ b/tools/testing/selftests/kvm/x86_64/kvmi_test.c
@@ -1322,6 +1322,17 @@ static void test_cmd_vcpu_inject_exception(struct kvm_vm 
*vm)
disable_vcpu_event(vm, KVMI_VCPU_EVENT_BREAKPOINT);
 }
 
+static void test_cmd_vm_get_max_gfn(void)
+{
+   struct kvmi_vm_get_max_gfn_reply rpl;
+   struct kvmi_msg_hdr req;
+
+   test_vm_command(KVMI_VM_GET_MAX_GFN, , sizeof(req),
+   , sizeof(rpl), 0);
+
+   pr_debug("max_gfn: 0x%llx\n", rpl.gfn);
+}
+
 static void test_introspection(struct kvm_vm *vm)
 {
srandom(time(0));
@@ -1347,6 +1358,7 @@ static void test_introspection(struct kvm_vm *vm)
test_cmd_vm_control_cleanup(vm);
test_cmd_vcpu_control_cr(vm);
test_cmd_vcpu_inject_exception(vm);
+   test_cmd_vm_get_max_gfn();
 
unhook_introspection(vm);
 }
diff --git a/virt/kvm/introspection/kvmi_msg.c 
b/virt/kvm/introspection/kvmi_msg.c
index 14ced3d8f648..30692d84a247 100644
--- a/virt/kvm/introspection/kvmi_msg.c
+++ b/virt/kvm/introspection/kvmi_msg.c
@@ -290,6 +290,18 @@ static int handle_vm_control_cleanup(struct 
kvm_introspection *kvmi,
return kvmi_msg_vm_reply(kvmi, msg, ec, NULL, 0);
 }
 
+static int handle_vm_get_max_gfn(struct kvm_introspection *kvmi,
+const struct kvmi_msg_hdr *msg,
+const void *req)
+{
+   struct kvmi_vm_get_max_gfn_reply rpl;
+
+   memset(, 0, sizeof(rpl));
+   rpl.gfn = kvm_get_max_gfn(kvmi->kvm);
+
+   return kvmi_msg_vm_reply(kvmi, msg, 0, , sizeof(rpl));
+}
+
 /*
  * These commands are executed by the receiving thread.
  */
@@ -300,6 +312,7 @@ static kvmi_vm_msg_fct const msg_vm[] = {
[KVMI_VM_CONTROL_CLEANUP] = handle_vm_control_cleanup,
[KVMI_VM_CONTROL_EVENTS]  = handle_vm_control_events,
[KVMI_VM_GET_INFO]= handle_vm_get_info,
+   [KVMI_VM_GET_MAX_GFN] = handle_vm_get_max_gfn,
[KVMI_VM_PAUSE_VCPU]  = handle_vm_pause_vcpu,
[KVMI_VM_READ_PHYSICAL]   = handle_vm_read_physical,
[KVMI_VM_WRITE_PHYSICAL]  = handle_vm_write_physical,
___
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization

[PATCH v11 65/81] KVM: introspection: add KVMI_VCPU_EVENT_XSETBV

2020-12-07 Thread Adalbert Lazăr
From: Mihai Donțu 

This event is sent when an extended control register XCR is going to
be changed.

Signed-off-by: Mihai Donțu 
Co-developed-by: Nicușor Cîțu 
Signed-off-by: Nicușor Cîțu 
Signed-off-by: Adalbert Lazăr 
---
 Documentation/virt/kvm/kvmi.rst   | 34 
 arch/x86/include/asm/kvmi_host.h  |  4 +
 arch/x86/include/uapi/asm/kvmi.h  |  7 ++
 arch/x86/kvm/kvmi.c   | 30 +++
 arch/x86/kvm/kvmi.h   |  2 +
 arch/x86/kvm/kvmi_msg.c   | 20 +
 arch/x86/kvm/x86.c|  6 ++
 include/uapi/linux/kvmi.h |  1 +
 .../testing/selftests/kvm/x86_64/kvmi_test.c  | 84 +++
 9 files changed, 188 insertions(+)

diff --git a/Documentation/virt/kvm/kvmi.rst b/Documentation/virt/kvm/kvmi.rst
index ecf4207b42d0..24dc1867c1f1 100644
--- a/Documentation/virt/kvm/kvmi.rst
+++ b/Documentation/virt/kvm/kvmi.rst
@@ -541,6 +541,7 @@ the following events::
KVMI_VCPU_EVENT_BREAKPOINT
KVMI_VCPU_EVENT_CR
KVMI_VCPU_EVENT_HYPERCALL
+   KVMI_VCPU_EVENT_XSETBV
 
 When an event is enabled, the introspection tool is notified and
 must reply with: continue, retry, crash, etc. (see **Events** below).
@@ -1061,3 +1062,36 @@ other vCPU introspection event.
 (``nr``), exception code (``error_code``) and ``address`` are sent to
 the introspection tool, which should check if its exception has been
 injected or overridden.
+
+7. KVMI_VCPU_EVENT_XSETBV
+-
+
+:Architectures: x86
+:Versions: >= 1
+:Actions: CONTINUE, CRASH
+:Parameters:
+
+::
+
+   struct kvmi_vcpu_event;
+   struct kvmi_vcpu_event_xsetbv {
+   __u8 xcr;
+   __u8 padding[7];
+   __u64 old_value;
+   __u64 new_value;
+   };
+
+:Returns:
+
+::
+
+   struct kvmi_vcpu_hdr;
+   struct kvmi_vcpu_event_reply;
+
+This event is sent when an extended control register XCR is going
+to be changed and the introspection has been enabled for this event
+(see *KVMI_VCPU_CONTROL_EVENTS*).
+
+``kvmi_vcpu_event`` (with the vCPU state), the extended control register
+number (``xcr``), the old value (``old_value``) and the new value
+(``new_value``) are sent to the introspection tool.
diff --git a/arch/x86/include/asm/kvmi_host.h b/arch/x86/include/asm/kvmi_host.h
index 97f5b1a01c9e..d66349208a6b 100644
--- a/arch/x86/include/asm/kvmi_host.h
+++ b/arch/x86/include/asm/kvmi_host.h
@@ -46,6 +46,8 @@ bool kvmi_cr_event(struct kvm_vcpu *vcpu, unsigned int cr,
 bool kvmi_cr3_intercepted(struct kvm_vcpu *vcpu);
 bool kvmi_monitor_cr3w_intercept(struct kvm_vcpu *vcpu, bool enable);
 void kvmi_enter_guest(struct kvm_vcpu *vcpu);
+void kvmi_xsetbv_event(struct kvm_vcpu *vcpu, u8 xcr,
+  u64 old_value, u64 new_value);
 
 #else /* CONFIG_KVM_INTROSPECTION */
 
@@ -59,6 +61,8 @@ static inline bool kvmi_cr3_intercepted(struct kvm_vcpu 
*vcpu) { return false; }
 static inline bool kvmi_monitor_cr3w_intercept(struct kvm_vcpu *vcpu,
bool enable) { return false; }
 static inline void kvmi_enter_guest(struct kvm_vcpu *vcpu) { }
+static inline void kvmi_xsetbv_event(struct kvm_vcpu *vcpu, u8 xcr,
+   u64 old_value, u64 new_value) { }
 
 #endif /* CONFIG_KVM_INTROSPECTION */
 
diff --git a/arch/x86/include/uapi/asm/kvmi.h b/arch/x86/include/uapi/asm/kvmi.h
index aa991fbab473..604a8b3d4ac2 100644
--- a/arch/x86/include/uapi/asm/kvmi.h
+++ b/arch/x86/include/uapi/asm/kvmi.h
@@ -95,4 +95,11 @@ struct kvmi_vcpu_inject_exception {
__u64 address;
 };
 
+struct kvmi_vcpu_event_xsetbv {
+   __u8 xcr;
+   __u8 padding[7];
+   __u64 old_value;
+   __u64 new_value;
+};
+
 #endif /* _UAPI_ASM_X86_KVMI_H */
diff --git a/arch/x86/kvm/kvmi.c b/arch/x86/kvm/kvmi.c
index 52b46d56ebb5..5219b6faf4b5 100644
--- a/arch/x86/kvm/kvmi.c
+++ b/arch/x86/kvm/kvmi.c
@@ -16,6 +16,7 @@ void kvmi_arch_init_vcpu_events_mask(unsigned long *supported)
set_bit(KVMI_VCPU_EVENT_CR, supported);
set_bit(KVMI_VCPU_EVENT_HYPERCALL, supported);
set_bit(KVMI_VCPU_EVENT_TRAP, supported);
+   set_bit(KVMI_VCPU_EVENT_XSETBV, supported);
 }
 
 static unsigned int kvmi_vcpu_mode(const struct kvm_vcpu *vcpu,
@@ -567,3 +568,32 @@ void kvmi_arch_send_pending_event(struct kvm_vcpu *vcpu)
kvmi_send_trap_event(vcpu);
}
 }
+
+static void __kvmi_xsetbv_event(struct kvm_vcpu *vcpu, u8 xcr,
+   u64 old_value, u64 new_value)
+{
+   u32 action;
+
+   action = kvmi_msg_send_vcpu_xsetbv(vcpu, xcr, old_value, new_value);
+   switch (action) {
+   case KVMI_EVENT_ACTION_CONTINUE:
+   break;
+   default:
+   kvmi_handle_common_event_actions(vcpu, action);
+   }
+}
+
+void kvmi_xsetbv_event(struct kvm_vcpu *vcpu, u8 

[PATCH v11 43/81] KVM: introspection: add vCPU related data

2020-12-07 Thread Adalbert Lazăr
From: Mircea Cîrjaliu 

Add an introspection structure to all vCPUs when the VM is hooked.

Signed-off-by: Mircea Cîrjaliu 
Signed-off-by: Adalbert Lazăr 
---
 arch/x86/include/asm/kvmi_host.h |  3 ++
 include/linux/kvm_host.h |  1 +
 include/linux/kvmi_host.h|  6 
 virt/kvm/introspection/kvmi.c| 51 
 virt/kvm/kvm_main.c  |  2 ++
 5 files changed, 63 insertions(+)

diff --git a/arch/x86/include/asm/kvmi_host.h b/arch/x86/include/asm/kvmi_host.h
index 38c398262913..360a57dd9019 100644
--- a/arch/x86/include/asm/kvmi_host.h
+++ b/arch/x86/include/asm/kvmi_host.h
@@ -2,6 +2,9 @@
 #ifndef _ASM_X86_KVMI_HOST_H
 #define _ASM_X86_KVMI_HOST_H
 
+struct kvm_vcpu_arch_introspection {
+};
+
 struct kvm_arch_introspection {
 };
 
diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index f3110e092ad0..9441008b18be 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -321,6 +321,7 @@ struct kvm_vcpu {
bool ready;
struct kvm_vcpu_arch arch;
struct kvm_dirty_ring dirty_ring;
+   struct kvm_vcpu_introspection *kvmi;
 };
 
 static inline int kvm_vcpu_exiting_guest_mode(struct kvm_vcpu *vcpu)
diff --git a/include/linux/kvmi_host.h b/include/linux/kvmi_host.h
index a59307dac6bf..9b0008c66321 100644
--- a/include/linux/kvmi_host.h
+++ b/include/linux/kvmi_host.h
@@ -6,6 +6,10 @@
 
 #include 
 
+struct kvm_vcpu_introspection {
+   struct kvm_vcpu_arch_introspection arch;
+};
+
 struct kvm_introspection {
struct kvm_arch_introspection arch;
struct kvm *kvm;
@@ -28,6 +32,7 @@ int kvmi_init(void);
 void kvmi_uninit(void);
 void kvmi_create_vm(struct kvm *kvm);
 void kvmi_destroy_vm(struct kvm *kvm);
+void kvmi_vcpu_uninit(struct kvm_vcpu *vcpu);
 
 int kvmi_ioctl_hook(struct kvm *kvm,
const struct kvm_introspection_hook *hook);
@@ -45,6 +50,7 @@ static inline int kvmi_init(void) { return 0; }
 static inline void kvmi_uninit(void) { }
 static inline void kvmi_create_vm(struct kvm *kvm) { }
 static inline void kvmi_destroy_vm(struct kvm *kvm) { }
+static inline void kvmi_vcpu_uninit(struct kvm_vcpu *vcpu) { }
 
 #endif /* CONFIG_KVM_INTROSPECTION */
 
diff --git a/virt/kvm/introspection/kvmi.c b/virt/kvm/introspection/kvmi.c
index c99c27c249ea..5bccc3fb9ff2 100644
--- a/virt/kvm/introspection/kvmi.c
+++ b/virt/kvm/introspection/kvmi.c
@@ -118,8 +118,41 @@ void kvmi_uninit(void)
kvmi_cache_destroy();
 }
 
+static bool kvmi_alloc_vcpui(struct kvm_vcpu *vcpu)
+{
+   struct kvm_vcpu_introspection *vcpui;
+
+   vcpui = kzalloc(sizeof(*vcpui), GFP_KERNEL);
+   if (!vcpui)
+   return false;
+
+   vcpu->kvmi = vcpui;
+
+   return true;
+}
+
+static int kvmi_create_vcpui(struct kvm_vcpu *vcpu)
+{
+   if (!kvmi_alloc_vcpui(vcpu))
+   return -ENOMEM;
+
+   return 0;
+}
+
+static void kvmi_free_vcpui(struct kvm_vcpu *vcpu)
+{
+   kfree(vcpu->kvmi);
+   vcpu->kvmi = NULL;
+}
+
 static void kvmi_free(struct kvm *kvm)
 {
+   struct kvm_vcpu *vcpu;
+   int i;
+
+   kvm_for_each_vcpu(i, vcpu, kvm)
+   kvmi_free_vcpui(vcpu);
+
bitmap_free(kvm->kvmi->cmd_allow_mask);
bitmap_free(kvm->kvmi->event_allow_mask);
bitmap_free(kvm->kvmi->vm_event_enable_mask);
@@ -128,10 +161,19 @@ static void kvmi_free(struct kvm *kvm)
kvm->kvmi = NULL;
 }
 
+void kvmi_vcpu_uninit(struct kvm_vcpu *vcpu)
+{
+   mutex_lock(>kvm->kvmi_lock);
+   kvmi_free_vcpui(vcpu);
+   mutex_unlock(>kvm->kvmi_lock);
+}
+
 static struct kvm_introspection *
 kvmi_alloc(struct kvm *kvm, const struct kvm_introspection_hook *hook)
 {
struct kvm_introspection *kvmi;
+   struct kvm_vcpu *vcpu;
+   int i;
 
kvmi = kzalloc(sizeof(*kvmi), GFP_KERNEL);
if (!kvmi)
@@ -157,6 +199,15 @@ kvmi_alloc(struct kvm *kvm, const struct 
kvm_introspection_hook *hook)
 
atomic_set(>ev_seq, 0);
 
+   kvm_for_each_vcpu(i, vcpu, kvm) {
+   int err = kvmi_create_vcpui(vcpu);
+
+   if (err) {
+   kvmi_free(kvm);
+   return NULL;
+   }
+   }
+
kvmi->kvm = kvm;
 
return kvmi;
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index 0af64c12a2a9..783eec72bc73 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -421,6 +421,7 @@ static void kvm_vcpu_init(struct kvm_vcpu *vcpu, struct kvm 
*kvm, unsigned id)
 
 void kvm_vcpu_destroy(struct kvm_vcpu *vcpu)
 {
+   kvmi_vcpu_uninit(vcpu);
kvm_dirty_ring_free(>dirty_ring);
kvm_arch_vcpu_destroy(vcpu);
 
@@ -3253,6 +3254,7 @@ static int kvm_vm_ioctl_create_vcpu(struct kvm *kvm, u32 
id)
 
 unlock_vcpu_destroy:
mutex_unlock(>lock);
+   kvmi_vcpu_uninit(vcpu);
kvm_dirty_ring_free(>dirty_ring

[PATCH v11 47/81] KVM: introspection: add KVMI_VCPU_GET_INFO

2020-12-07 Thread Adalbert Lazăr
From: Mihai Donțu 

This command returns the TSC frequency (in HZ) for the specified
vCPU if available (otherwise it returns zero).

Signed-off-by: Mihai Donțu 
Co-developed-by: Adalbert Lazăr 
Signed-off-by: Adalbert Lazăr 
---
 Documentation/virt/kvm/kvmi.rst   |  29 
 arch/x86/include/asm/kvmi_host.h  |   2 +
 arch/x86/include/uapi/asm/kvmi.h  |  13 ++
 arch/x86/kvm/kvmi_msg.c   |  14 ++
 include/uapi/linux/kvmi.h |   2 +
 .../testing/selftests/kvm/x86_64/kvmi_test.c  | 144 +-
 virt/kvm/introspection/kvmi_int.h |   3 +
 virt/kvm/introspection/kvmi_msg.c |   9 ++
 8 files changed, 215 insertions(+), 1 deletion(-)
 create mode 100644 arch/x86/include/uapi/asm/kvmi.h

diff --git a/Documentation/virt/kvm/kvmi.rst b/Documentation/virt/kvm/kvmi.rst
index 4d340528d2f4..902ced4dd0c4 100644
--- a/Documentation/virt/kvm/kvmi.rst
+++ b/Documentation/virt/kvm/kvmi.rst
@@ -441,6 +441,35 @@ one page (offset + size <= PAGE_SIZE).
 * -KVM_EINVAL - the specified gpa/size pair is invalid
 * -KVM_EINVAL - the padding is not zero
 
+8. KVMI_VCPU_GET_INFO
+-
+
+:Architectures: x86
+:Versions: >= 1
+:Parameters:
+
+::
+
+   struct kvmi_vcpu_hdr;
+
+:Returns:
+
+::
+
+   struct kvmi_error_code;
+   struct kvmi_vcpu_get_info_reply {
+   __u64 tsc_speed;
+   };
+
+Returns the TSC frequency (in HZ) for the specified vCPU if available
+(otherwise it returns zero).
+
+:Errors:
+
+* -KVM_EINVAL - the padding is not zero
+* -KVM_EINVAL - the selected vCPU is invalid
+* -KVM_EAGAIN - the selected vCPU can't be introspected yet
+
 Events
 ==
 
diff --git a/arch/x86/include/asm/kvmi_host.h b/arch/x86/include/asm/kvmi_host.h
index 360a57dd9019..05ade3a16b24 100644
--- a/arch/x86/include/asm/kvmi_host.h
+++ b/arch/x86/include/asm/kvmi_host.h
@@ -2,6 +2,8 @@
 #ifndef _ASM_X86_KVMI_HOST_H
 #define _ASM_X86_KVMI_HOST_H
 
+#include 
+
 struct kvm_vcpu_arch_introspection {
 };
 
diff --git a/arch/x86/include/uapi/asm/kvmi.h b/arch/x86/include/uapi/asm/kvmi.h
new file mode 100644
index ..2b6192e1a9a4
--- /dev/null
+++ b/arch/x86/include/uapi/asm/kvmi.h
@@ -0,0 +1,13 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+#ifndef _UAPI_ASM_X86_KVMI_H
+#define _UAPI_ASM_X86_KVMI_H
+
+/*
+ * KVM introspection - x86 specific structures and definitions
+ */
+
+struct kvmi_vcpu_get_info_reply {
+   __u64 tsc_speed;
+};
+
+#endif /* _UAPI_ASM_X86_KVMI_H */
diff --git a/arch/x86/kvm/kvmi_msg.c b/arch/x86/kvm/kvmi_msg.c
index 0f4717ca5fa8..77552bf50984 100644
--- a/arch/x86/kvm/kvmi_msg.c
+++ b/arch/x86/kvm/kvmi_msg.c
@@ -8,7 +8,21 @@
 
 #include "../../../virt/kvm/introspection/kvmi_int.h"
 
+static int handle_vcpu_get_info(const struct kvmi_vcpu_msg_job *job,
+   const struct kvmi_msg_hdr *msg,
+   const void *req)
+{
+   struct kvmi_vcpu_get_info_reply rpl;
+
+   memset(, 0, sizeof(rpl));
+   if (kvm_has_tsc_control)
+   rpl.tsc_speed = 1000ul * job->vcpu->arch.virtual_tsc_khz;
+
+   return kvmi_msg_vcpu_reply(job, msg, 0, , sizeof(rpl));
+}
+
 static kvmi_vcpu_msg_job_fct const msg_vcpu[] = {
+   [KVMI_VCPU_GET_INFO] = handle_vcpu_get_info,
 };
 
 kvmi_vcpu_msg_job_fct kvmi_arch_vcpu_msg_handler(u16 id)
diff --git a/include/uapi/linux/kvmi.h b/include/uapi/linux/kvmi.h
index 7ba1c8758aba..da766427231e 100644
--- a/include/uapi/linux/kvmi.h
+++ b/include/uapi/linux/kvmi.h
@@ -31,6 +31,8 @@ enum {
 };
 
 enum {
+   KVMI_VCPU_GET_INFO = KVMI_VCPU_MESSAGE_ID(1),
+
KVMI_NEXT_VCPU_MESSAGE
 };
 
diff --git a/tools/testing/selftests/kvm/x86_64/kvmi_test.c 
b/tools/testing/selftests/kvm/x86_64/kvmi_test.c
index b493edb534b0..9350ba8b7f9b 100644
--- a/tools/testing/selftests/kvm/x86_64/kvmi_test.c
+++ b/tools/testing/selftests/kvm/x86_64/kvmi_test.c
@@ -9,6 +9,7 @@
 #include 
 #include 
 #include 
+#include 
 
 #include "test_util.h"
 
@@ -18,6 +19,7 @@
 
 #include "linux/kvm_para.h"
 #include "linux/kvmi.h"
+#include "asm/kvmi.h"
 
 #define VCPU_ID 1
 
@@ -25,12 +27,46 @@ static int socket_pair[2];
 #define Kvm_socket   socket_pair[0]
 #define Userspace_socket socket_pair[1]
 
+static int test_id;
 static vm_vaddr_t test_gva;
 static void *test_hva;
 static vm_paddr_t test_gpa;
 
 static int page_size;
 
+struct vcpu_worker_data {
+   struct kvm_vm *vm;
+   int vcpu_id;
+   int test_id;
+};
+
+enum {
+   GUEST_TEST_NOOP = 0,
+};
+
+#define GUEST_REQUEST_TEST() GUEST_SYNC(0)
+#define GUEST_SIGNAL_TEST_DONE() GUEST_SYNC(1)
+
+#define HOST_SEND_TEST(uc)   (uc.cmd == UCALL_SYNC && uc.args[1] == 0)
+#define HOST_TEST_DONE(uc)   (uc.cmd == UCALL_SYNC && uc.args[1] == 1)
+
+static int guest_test_id(void)
+{
+   GUEST_REQUEST_TEST();
+   return

[PATCH v11 60/81] KVM: introspection: add KVMI_VM_CONTROL_CLEANUP

2020-12-07 Thread Adalbert Lazăr
This command will allow more control over the guest state on
unhook.  However, the memory restrictions (e.g. those set with
KVMI_VM_SET_PAGE_ACCESS) will be removed on unhook.

Signed-off-by: Adalbert Lazăr 
---
 Documentation/virt/kvm/kvmi.rst   | 28 +++
 arch/x86/include/asm/kvmi_host.h  |  1 +
 arch/x86/kvm/kvmi.c   | 17 +-
 include/linux/kvmi_host.h |  2 ++
 include/uapi/linux/kvmi.h | 22 +++-
 .../testing/selftests/kvm/x86_64/kvmi_test.c  | 24 +
 virt/kvm/introspection/kvmi.c | 18 +++---
 virt/kvm/introspection/kvmi_int.h | 12 ++-
 virt/kvm/introspection/kvmi_msg.c | 34 ++-
 9 files changed, 129 insertions(+), 29 deletions(-)

diff --git a/Documentation/virt/kvm/kvmi.rst b/Documentation/virt/kvm/kvmi.rst
index c89f383e48f9..f9c10d27ce14 100644
--- a/Documentation/virt/kvm/kvmi.rst
+++ b/Documentation/virt/kvm/kvmi.rst
@@ -673,6 +673,34 @@ Returns a CPUID leaf (as seen by the guest OS).
 * -KVM_EAGAIN - the selected vCPU can't be introspected yet
 * -KVM_ENOENT - the selected leaf is not present or is invalid
 
+14. KVMI_VM_CONTROL_CLEANUP
+---
+:Architectures: all
+:Versions: >= 1
+:Parameters:
+
+::
+
+   struct kvmi_vm_control_cleanup {
+   __u8 enable;
+   __u8 padding[7];
+   };
+
+:Returns:
+
+::
+
+   struct kvmi_error_code
+
+Enables/disables the automatic cleanup of the changes made by
+the introspection tool at the hypervisor level (e.g. CR/MSR/BP
+interceptions). By default it is enabled.
+
+:Errors:
+
+* -KVM_EINVAL - the padding is not zero
+* -KVM_EINVAL - ``enable`` is not 1 or 0
+
 Events
 ==
 
diff --git a/arch/x86/include/asm/kvmi_host.h b/arch/x86/include/asm/kvmi_host.h
index e008662f91a5..161d1ae5a7cf 100644
--- a/arch/x86/include/asm/kvmi_host.h
+++ b/arch/x86/include/asm/kvmi_host.h
@@ -11,6 +11,7 @@ struct kvmi_monitor_interception {
 };
 
 struct kvmi_interception {
+   bool cleanup;
bool restore_interception;
struct kvmi_monitor_interception breakpoint;
 };
diff --git a/arch/x86/kvm/kvmi.c b/arch/x86/kvm/kvmi.c
index 3fd73087276e..e7a4ef48ed61 100644
--- a/arch/x86/kvm/kvmi.c
+++ b/arch/x86/kvm/kvmi.c
@@ -273,13 +273,11 @@ bool kvmi_arch_clean_up_interception(struct kvm_vcpu 
*vcpu)
 {
struct kvmi_interception *arch_vcpui = vcpu->arch.kvmi;
 
-   if (!arch_vcpui)
+   if (!arch_vcpui || !arch_vcpui->cleanup)
return false;
 
-   if (!arch_vcpui->restore_interception)
-   return false;
-
-   kvmi_arch_restore_interception(vcpu);
+   if (arch_vcpui->restore_interception)
+   kvmi_arch_restore_interception(vcpu);
 
return true;
 }
@@ -312,10 +310,13 @@ bool kvmi_arch_vcpu_introspected(struct kvm_vcpu *vcpu)
return !!READ_ONCE(vcpu->arch.kvmi);
 }
 
-void kvmi_arch_request_interception_cleanup(struct kvm_vcpu *vcpu)
+void kvmi_arch_request_interception_cleanup(struct kvm_vcpu *vcpu,
+   bool restore_interception)
 {
struct kvmi_interception *arch_vcpui = READ_ONCE(vcpu->arch.kvmi);
 
-   if (arch_vcpui)
-   arch_vcpui->restore_interception = true;
+   if (arch_vcpui) {
+   arch_vcpui->restore_interception = restore_interception;
+   arch_vcpui->cleanup = true;
+   }
 }
diff --git a/include/linux/kvmi_host.h b/include/linux/kvmi_host.h
index 30b7269468dd..7a7360306812 100644
--- a/include/linux/kvmi_host.h
+++ b/include/linux/kvmi_host.h
@@ -50,6 +50,8 @@ struct kvm_introspection {
unsigned long *vm_event_enable_mask;
 
atomic_t ev_seq;
+
+   bool restore_on_unhook;
 };
 
 int kvmi_version(void);
diff --git a/include/uapi/linux/kvmi.h b/include/uapi/linux/kvmi.h
index ea66f3f803e7..9e28961a8387 100644
--- a/include/uapi/linux/kvmi.h
+++ b/include/uapi/linux/kvmi.h
@@ -20,14 +20,15 @@ enum {
 enum {
KVMI_VM_EVENT = KVMI_VM_MESSAGE_ID(0),
 
-   KVMI_GET_VERSION   = KVMI_VM_MESSAGE_ID(1),
-   KVMI_VM_CHECK_COMMAND  = KVMI_VM_MESSAGE_ID(2),
-   KVMI_VM_CHECK_EVENT= KVMI_VM_MESSAGE_ID(3),
-   KVMI_VM_GET_INFO   = KVMI_VM_MESSAGE_ID(4),
-   KVMI_VM_CONTROL_EVENTS = KVMI_VM_MESSAGE_ID(5),
-   KVMI_VM_READ_PHYSICAL  = KVMI_VM_MESSAGE_ID(6),
-   KVMI_VM_WRITE_PHYSICAL = KVMI_VM_MESSAGE_ID(7),
-   KVMI_VM_PAUSE_VCPU = KVMI_VM_MESSAGE_ID(8),
+   KVMI_GET_VERSION= KVMI_VM_MESSAGE_ID(1),
+   KVMI_VM_CHECK_COMMAND   = KVMI_VM_MESSAGE_ID(2),
+   KVMI_VM_CHECK_EVENT = KVMI_VM_MESSAGE_ID(3),
+   KVMI_VM_GET_INFO= KVMI_VM_MESSAGE_ID(4),
+   KVMI_VM_CONTROL_EVENTS  = KVMI_VM_MESSAGE_ID(5),
+   KVMI_VM_READ_PHYSICAL   = KVMI_VM_MESSAGE_ID(6),
+   KVMI_VM_WRITE_PHYSICAL  = KVMI_VM_MESSAGE_ID(

[PATCH v11 80/81] KVM: introspection: emulate a guest page table walk on SPT violations due to A/D bit updates

2020-12-07 Thread Adalbert Lazăr
From: Mihai Donțu 

On SPT page faults caused by guest page table walks, use the existing
guest page table walk code to make the necessary adjustments to the A/D
bits and return to guest. This effectively bypasses the x86 emulator
who was making the wrong modifications leading one OS (Windows 8.1 x64)
to triple-fault very early in the boot process with the introspection
enabled.

With introspection disabled, these faults are handled by simply removing
the protection from the affected guest page and returning to guest.

Signed-off-by: Mihai Donțu 
Signed-off-by: Adalbert Lazăr 
---
 arch/x86/include/asm/kvmi_host.h |  2 ++
 arch/x86/kvm/kvmi.c  | 30 ++
 arch/x86/kvm/mmu/mmu.c   | 12 ++--
 include/linux/kvmi_host.h|  3 +++
 virt/kvm/introspection/kvmi.c| 26 ++
 5 files changed, 71 insertions(+), 2 deletions(-)

diff --git a/arch/x86/include/asm/kvmi_host.h b/arch/x86/include/asm/kvmi_host.h
index 31500d3ff69d..0502293bd0c9 100644
--- a/arch/x86/include/asm/kvmi_host.h
+++ b/arch/x86/include/asm/kvmi_host.h
@@ -77,6 +77,7 @@ bool kvmi_descriptor_event(struct kvm_vcpu *vcpu, u8 
descriptor, bool write);
 bool kvmi_msr_event(struct kvm_vcpu *vcpu, struct msr_data *msr);
 bool kvmi_monitor_msrw_intercept(struct kvm_vcpu *vcpu, u32 msr, bool enable);
 bool kvmi_msrw_intercept_originator(struct kvm_vcpu *vcpu);
+bool kvmi_update_ad_flags(struct kvm_vcpu *vcpu);
 
 #else /* CONFIG_KVM_INTROSPECTION */
 
@@ -102,6 +103,7 @@ static inline bool kvmi_monitor_msrw_intercept(struct 
kvm_vcpu *vcpu, u32 msr,
   bool enable) { return false; }
 static inline bool kvmi_msrw_intercept_originator(struct kvm_vcpu *vcpu)
{ return false; }
+static inline bool kvmi_update_ad_flags(struct kvm_vcpu *vcpu) { return false; 
}
 
 #endif /* CONFIG_KVM_INTROSPECTION */
 
diff --git a/arch/x86/kvm/kvmi.c b/arch/x86/kvm/kvmi.c
index b010d2369756..6dc5df59f274 100644
--- a/arch/x86/kvm/kvmi.c
+++ b/arch/x86/kvm/kvmi.c
@@ -1099,3 +1099,33 @@ void kvmi_arch_stop_singlestep(struct kvm_vcpu *vcpu)
 {
kvm_x86_ops.control_singlestep(vcpu, false);
 }
+
+bool kvmi_update_ad_flags(struct kvm_vcpu *vcpu)
+{
+   struct kvm_introspection *kvmi;
+   bool ret = false;
+   gva_t gva;
+   gpa_t gpa;
+
+   kvmi = kvmi_get(vcpu->kvm);
+   if (!kvmi)
+   return false;
+
+   gva = kvm_x86_ops.fault_gla(vcpu);
+   if (gva == ~0ull)
+   goto out;
+
+   gpa = kvm_mmu_gva_to_gpa_system(vcpu, gva, PFERR_WRITE_MASK, NULL);
+   if (gpa == UNMAPPED_GVA) {
+   struct x86_exception exception = { };
+
+   gpa = kvm_mmu_gva_to_gpa_system(vcpu, gva, 0, );
+   }
+
+   ret = (gpa != UNMAPPED_GVA);
+
+out:
+   kvmi_put(vcpu->kvm);
+
+   return ret;
+}
diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c
index f79cf58a27dc..204e44d4e465 100644
--- a/arch/x86/kvm/mmu/mmu.c
+++ b/arch/x86/kvm/mmu/mmu.c
@@ -43,6 +43,7 @@
 #include 
 #include 
 #include 
+#include 
 
 #include 
 #include 
@@ -5184,8 +5185,15 @@ int kvm_mmu_page_fault(struct kvm_vcpu *vcpu, gpa_t 
cr2_or_gpa, u64 error_code,
 */
if (vcpu->arch.mmu->direct_map &&
(error_code & PFERR_NESTED_GUEST_PAGE) == PFERR_NESTED_GUEST_PAGE) {
-   kvm_mmu_unprotect_page(vcpu->kvm, gpa_to_gfn(cr2_or_gpa));
-   return 1;
+   gfn_t gfn = gpa_to_gfn(cr2_or_gpa);
+
+   if (kvmi_tracked_gfn(vcpu, gfn)) {
+   if (kvmi_update_ad_flags(vcpu))
+   return 1;
+   } else {
+   kvm_mmu_unprotect_page(vcpu->kvm, gfn);
+   return 1;
+   }
}
 
/*
diff --git a/include/linux/kvmi_host.h b/include/linux/kvmi_host.h
index ec38e434c8e9..90647bb2a570 100644
--- a/include/linux/kvmi_host.h
+++ b/include/linux/kvmi_host.h
@@ -83,6 +83,7 @@ bool kvmi_breakpoint_event(struct kvm_vcpu *vcpu, u64 gva, u8 
insn_len);
 bool kvmi_vcpu_running_singlestep(struct kvm_vcpu *vcpu);
 void kvmi_singlestep_done(struct kvm_vcpu *vcpu);
 void kvmi_singlestep_failed(struct kvm_vcpu *vcpu);
+bool kvmi_tracked_gfn(struct kvm_vcpu *vcpu, gfn_t gfn);
 
 #else
 
@@ -101,6 +102,8 @@ static inline bool kvmi_vcpu_running_singlestep(struct 
kvm_vcpu *vcpu)
{ return false; }
 static inline void kvmi_singlestep_done(struct kvm_vcpu *vcpu) { }
 static inline void kvmi_singlestep_failed(struct kvm_vcpu *vcpu) { }
+static inline bool kvmi_tracked_gfn(struct kvm_vcpu *vcpu, gfn_t gfn)
+   { return false; }
 
 #endif /* CONFIG_KVM_INTROSPECTION */
 
diff --git a/virt/kvm/introspection/kvmi.c b/virt/kvm/introspection/kvmi.c
index 4f9da76c6777..0474d85b54a4 100644
--- a/virt/kvm/introspection/kvmi.c
+++ b/virt/kvm/introspection/kvmi

[PATCH v11 54/81] KVM: introspection: add KVMI_VCPU_SET_REGISTERS

2020-12-07 Thread Adalbert Lazăr
From: Mihai Donțu 

During an introspection event, the introspection tool might need to
change the vCPU state, for example, to skip the current instruction.

This command is allowed only during vCPU events and the registers will
be set when the reply has been received.

Signed-off-by: Mihai Donțu 
Co-developed-by: Mircea Cîrjaliu 
Signed-off-by: Mircea Cîrjaliu 
Co-developed-by: Adalbert Lazăr 
Signed-off-by: Adalbert Lazăr 
---
 Documentation/virt/kvm/kvmi.rst   | 29 +++
 arch/x86/include/asm/kvmi_host.h  |  2 +
 arch/x86/kvm/kvmi.c   | 22 +
 arch/x86/kvm/kvmi.h   |  2 +
 arch/x86/kvm/kvmi_msg.c   | 21 +
 include/uapi/linux/kvmi.h |  1 +
 .../testing/selftests/kvm/x86_64/kvmi_test.c  | 83 +++
 virt/kvm/introspection/kvmi_int.h |  1 +
 virt/kvm/introspection/kvmi_msg.c |  6 +-
 9 files changed, 165 insertions(+), 2 deletions(-)

diff --git a/Documentation/virt/kvm/kvmi.rst b/Documentation/virt/kvm/kvmi.rst
index dbaedbee9dee..178832304458 100644
--- a/Documentation/virt/kvm/kvmi.rst
+++ b/Documentation/virt/kvm/kvmi.rst
@@ -601,6 +601,35 @@ registers, the special registers and the requested set of 
MSRs.
 * -KVM_EAGAIN - the selected vCPU can't be introspected yet
 * -KVM_ENOMEM - there is not enough memory to allocate the reply
 
+12. KVMI_VCPU_SET_REGISTERS
+---
+
+:Architectures: x86
+:Versions: >= 1
+:Parameters:
+
+::
+
+   struct kvmi_vcpu_hdr;
+   struct kvm_regs;
+
+:Returns:
+
+::
+
+   struct kvmi_error_code
+
+Sets the general purpose registers for the given vCPU. The changes become
+visible to other threads accessing the KVM vCPU structure after the event
+currently being handled is replied to.
+
+:Errors:
+
+* -KVM_EINVAL - the selected vCPU is invalid
+* -KVM_EINVAL - the padding is not zero
+* -KVM_EAGAIN - the selected vCPU can't be introspected yet
+* -KVM_EOPNOTSUPP - the command hasn't been received during an introspection 
event
+
 Events
 ==
 
diff --git a/arch/x86/include/asm/kvmi_host.h b/arch/x86/include/asm/kvmi_host.h
index 05ade3a16b24..cc945151cb36 100644
--- a/arch/x86/include/asm/kvmi_host.h
+++ b/arch/x86/include/asm/kvmi_host.h
@@ -5,6 +5,8 @@
 #include 
 
 struct kvm_vcpu_arch_introspection {
+   struct kvm_regs delayed_regs;
+   bool have_delayed_regs;
 };
 
 struct kvm_arch_introspection {
diff --git a/arch/x86/kvm/kvmi.c b/arch/x86/kvm/kvmi.c
index fa9b20277dad..39638af7757e 100644
--- a/arch/x86/kvm/kvmi.c
+++ b/arch/x86/kvm/kvmi.c
@@ -118,3 +118,25 @@ int kvmi_arch_cmd_vcpu_get_registers(struct kvm_vcpu *vcpu,
 
return err ? -KVM_EINVAL : 0;
 }
+
+void kvmi_arch_cmd_vcpu_set_registers(struct kvm_vcpu *vcpu,
+ const struct kvm_regs *regs)
+{
+   struct kvm_vcpu_introspection *vcpui = VCPUI(vcpu);
+   struct kvm_regs *dest = >arch.delayed_regs;
+
+   memcpy(dest, regs, sizeof(*dest));
+
+   vcpui->arch.have_delayed_regs = true;
+}
+
+void kvmi_arch_post_reply(struct kvm_vcpu *vcpu)
+{
+   struct kvm_vcpu_introspection *vcpui = VCPUI(vcpu);
+
+   if (!vcpui->arch.have_delayed_regs)
+   return;
+
+   kvm_arch_vcpu_set_regs(vcpu, >arch.delayed_regs, false);
+   vcpui->arch.have_delayed_regs = false;
+}
diff --git a/arch/x86/kvm/kvmi.h b/arch/x86/kvm/kvmi.h
index 7aab4aaabcda..4eeb0c900083 100644
--- a/arch/x86/kvm/kvmi.h
+++ b/arch/x86/kvm/kvmi.h
@@ -5,5 +5,7 @@
 int kvmi_arch_cmd_vcpu_get_registers(struct kvm_vcpu *vcpu,
const struct kvmi_vcpu_get_registers *req,
struct kvmi_vcpu_get_registers_reply *rpl);
+void kvmi_arch_cmd_vcpu_set_registers(struct kvm_vcpu *vcpu,
+ const struct kvm_regs *regs);
 
 #endif
diff --git a/arch/x86/kvm/kvmi_msg.c b/arch/x86/kvm/kvmi_msg.c
index 4288a91937f6..8ff3aa936ccd 100644
--- a/arch/x86/kvm/kvmi_msg.c
+++ b/arch/x86/kvm/kvmi_msg.c
@@ -90,9 +90,30 @@ static int handle_vcpu_get_registers(const struct 
kvmi_vcpu_msg_job *job,
return err;
 }
 
+static int handle_vcpu_set_registers(const struct kvmi_vcpu_msg_job *job,
+const struct kvmi_msg_hdr *msg,
+const void *req)
+{
+   const struct kvm_regs *regs = req;
+   size_t cmd_size;
+   int ec = 0;
+
+   cmd_size = sizeof(struct kvmi_vcpu_hdr) + sizeof(*regs);
+
+   if (cmd_size > msg->size)
+   ec = -KVM_EINVAL;
+   else if (!VCPUI(job->vcpu)->waiting_for_reply)
+   ec = -KVM_EOPNOTSUPP;
+   else
+   kvmi_arch_cmd_vcpu_set_registers(job->vcpu, regs);
+
+   return kvmi_msg_vcpu_reply(job, msg, ec, NULL, 0);
+}
+
 static kvmi_vcpu_msg_job_fct const msg_vcpu[] = {
[KVMI_VCPU_GET_INFO]  = handle_vcpu_get_info,

[PATCH v11 62/81] KVM: introspection: restore the state of CR3 interception on unhook

2020-12-07 Thread Adalbert Lazăr
From: Nicușor Cîțu 

This commit also ensures that the introspection tool and the userspace
do not disable each other the CR3-write VM-exit.

Signed-off-by: Nicușor Cîțu 
Signed-off-by: Adalbert Lazăr 
---
 arch/x86/include/asm/kvmi_host.h |  4 ++
 arch/x86/kvm/kvmi.c  | 67 ++--
 arch/x86/kvm/kvmi.h  |  4 +-
 arch/x86/kvm/kvmi_msg.c  |  4 +-
 arch/x86/kvm/svm/svm.c   |  5 +++
 arch/x86/kvm/vmx/vmx.c   |  5 +++
 6 files changed, 81 insertions(+), 8 deletions(-)

diff --git a/arch/x86/include/asm/kvmi_host.h b/arch/x86/include/asm/kvmi_host.h
index 7613088d0ae2..edbedf031467 100644
--- a/arch/x86/include/asm/kvmi_host.h
+++ b/arch/x86/include/asm/kvmi_host.h
@@ -16,6 +16,7 @@ struct kvmi_interception {
bool cleanup;
bool restore_interception;
struct kvmi_monitor_interception breakpoint;
+   struct kvmi_monitor_interception cr3w;
 };
 
 struct kvm_vcpu_arch_introspection {
@@ -34,6 +35,7 @@ bool kvmi_monitor_bp_intercept(struct kvm_vcpu *vcpu, u32 
dbg);
 bool kvmi_cr_event(struct kvm_vcpu *vcpu, unsigned int cr,
   unsigned long old_value, unsigned long *new_value);
 bool kvmi_cr3_intercepted(struct kvm_vcpu *vcpu);
+bool kvmi_monitor_cr3w_intercept(struct kvm_vcpu *vcpu, bool enable);
 
 #else /* CONFIG_KVM_INTROSPECTION */
 
@@ -44,6 +46,8 @@ static inline bool kvmi_cr_event(struct kvm_vcpu *vcpu, 
unsigned int cr,
 unsigned long *new_value)
{ return true; }
 static inline bool kvmi_cr3_intercepted(struct kvm_vcpu *vcpu) { return false; 
}
+static inline bool kvmi_monitor_cr3w_intercept(struct kvm_vcpu *vcpu,
+   bool enable) { return false; }
 
 #endif /* CONFIG_KVM_INTROSPECTION */
 
diff --git a/arch/x86/kvm/kvmi.c b/arch/x86/kvm/kvmi.c
index 2bb6b4bb932b..8ad3698e5988 100644
--- a/arch/x86/kvm/kvmi.c
+++ b/arch/x86/kvm/kvmi.c
@@ -230,6 +230,59 @@ static void kvmi_arch_disable_bp_intercept(struct kvm_vcpu 
*vcpu)
vcpu->arch.kvmi->breakpoint.kvm_intercepted = false;
 }
 
+static bool monitor_cr3w_fct_kvmi(struct kvm_vcpu *vcpu, bool enable)
+{
+   vcpu->arch.kvmi->cr3w.kvmi_intercepted = enable;
+
+   if (enable)
+   vcpu->arch.kvmi->cr3w.kvm_intercepted =
+   kvm_x86_ops.cr3_write_intercepted(vcpu);
+   else if (vcpu->arch.kvmi->cr3w.kvm_intercepted)
+   return true;
+
+   return false;
+}
+
+static bool monitor_cr3w_fct_kvm(struct kvm_vcpu *vcpu, bool enable)
+{
+   if (!vcpu->arch.kvmi->cr3w.kvmi_intercepted)
+   return false;
+
+   vcpu->arch.kvmi->cr3w.kvm_intercepted = enable;
+
+   if (!enable)
+   return true;
+
+   return false;
+}
+
+/*
+ * Returns true if one side (kvm or kvmi) tries to disable the CR3 write
+ * interception while the other side is still tracking it.
+ */
+bool kvmi_monitor_cr3w_intercept(struct kvm_vcpu *vcpu, bool enable)
+{
+   struct kvmi_interception *arch_vcpui = READ_ONCE(vcpu->arch.kvmi);
+
+   return (arch_vcpui && arch_vcpui->cr3w.monitor_fct(vcpu, enable));
+}
+EXPORT_SYMBOL(kvmi_monitor_cr3w_intercept);
+
+static void kvmi_control_cr3w_intercept(struct kvm_vcpu *vcpu, bool enable)
+{
+   vcpu->arch.kvmi->cr3w.monitor_fct = monitor_cr3w_fct_kvmi;
+   kvm_x86_ops.control_cr3_intercept(vcpu, CR_TYPE_W, enable);
+   vcpu->arch.kvmi->cr3w.monitor_fct = monitor_cr3w_fct_kvm;
+}
+
+static void kvmi_arch_disable_cr3w_intercept(struct kvm_vcpu *vcpu)
+{
+   kvmi_control_cr3w_intercept(vcpu, false);
+
+   vcpu->arch.kvmi->cr3w.kvmi_intercepted = false;
+   vcpu->arch.kvmi->cr3w.kvm_intercepted = false;
+}
+
 int kvmi_arch_cmd_control_intercept(struct kvm_vcpu *vcpu,
unsigned int event_id, bool enable)
 {
@@ -269,6 +322,7 @@ void kvmi_arch_breakpoint_event(struct kvm_vcpu *vcpu, u64 
gva, u8 insn_len)
 static void kvmi_arch_restore_interception(struct kvm_vcpu *vcpu)
 {
kvmi_arch_disable_bp_intercept(vcpu);
+   kvmi_arch_disable_cr3w_intercept(vcpu);
 }
 
 bool kvmi_arch_clean_up_interception(struct kvm_vcpu *vcpu)
@@ -293,8 +347,13 @@ bool kvmi_arch_vcpu_alloc_interception(struct kvm_vcpu 
*vcpu)
return false;
 
arch_vcpui->breakpoint.monitor_fct = monitor_bp_fct_kvm;
+   arch_vcpui->cr3w.monitor_fct = monitor_cr3w_fct_kvm;
 
-   /* pair with kvmi_monitor_bp_intercept() */
+   /*
+* paired with:
+*  - kvmi_monitor_bp_intercept()
+*  - kvmi_monitor_cr3w_intercept()
+*/
smp_wmb();
WRITE_ONCE(vcpu->arch.kvmi, arch_vcpui);
 
@@ -326,7 +385,7 @@ void kvmi_arch_request_interception_cleanup(struct kvm_vcpu 
*vcpu,
 int kvmi_arch_cmd_vcpu_control_cr(struct kvm_vcpu *vcpu, int cr, bool enable)
 {
if 

[PATCH v11 55/81] KVM: introspection: add KVMI_VCPU_GET_CPUID

2020-12-07 Thread Adalbert Lazăr
From: Marian Rotariu 

This command returns a CPUID leaf (as seen by the guest OS).

Signed-off-by: Marian Rotariu 
Co-developed-by: Adalbert Lazăr 
Signed-off-by: Adalbert Lazăr 
---
 Documentation/virt/kvm/kvmi.rst   | 36 +++
 arch/x86/include/uapi/asm/kvmi.h  | 12 +++
 arch/x86/kvm/kvmi_msg.c   | 26 ++
 include/uapi/linux/kvmi.h |  1 +
 .../testing/selftests/kvm/x86_64/kvmi_test.c  | 30 
 5 files changed, 105 insertions(+)

diff --git a/Documentation/virt/kvm/kvmi.rst b/Documentation/virt/kvm/kvmi.rst
index 178832304458..10966430621c 100644
--- a/Documentation/virt/kvm/kvmi.rst
+++ b/Documentation/virt/kvm/kvmi.rst
@@ -630,6 +630,42 @@ currently being handled is replied to.
 * -KVM_EAGAIN - the selected vCPU can't be introspected yet
 * -KVM_EOPNOTSUPP - the command hasn't been received during an introspection 
event
 
+13. KVMI_VCPU_GET_CPUID
+---
+
+:Architectures: x86
+:Versions: >= 1
+:Parameters:
+
+::
+
+   struct kvmi_vcpu_hdr;
+   struct kvmi_vcpu_get_cpuid {
+   __u32 function;
+   __u32 index;
+   };
+
+:Returns:
+
+::
+
+   struct kvmi_error_code;
+   struct kvmi_vcpu_get_cpuid_reply {
+   __u32 eax;
+   __u32 ebx;
+   __u32 ecx;
+   __u32 edx;
+   };
+
+Returns a CPUID leaf (as seen by the guest OS).
+
+:Errors:
+
+* -KVM_EINVAL - the selected vCPU is invalid
+* -KVM_EINVAL - the padding is not zero
+* -KVM_EAGAIN - the selected vCPU can't be introspected yet
+* -KVM_ENOENT - the selected leaf is not present or is invalid
+
 Events
 ==
 
diff --git a/arch/x86/include/uapi/asm/kvmi.h b/arch/x86/include/uapi/asm/kvmi.h
index 11835bf9bdc6..3631da9eef8c 100644
--- a/arch/x86/include/uapi/asm/kvmi.h
+++ b/arch/x86/include/uapi/asm/kvmi.h
@@ -45,4 +45,16 @@ struct kvmi_vcpu_get_registers_reply {
struct kvm_msrs msrs;
 };
 
+struct kvmi_vcpu_get_cpuid {
+   __u32 function;
+   __u32 index;
+};
+
+struct kvmi_vcpu_get_cpuid_reply {
+   __u32 eax;
+   __u32 ebx;
+   __u32 ecx;
+   __u32 edx;
+};
+
 #endif /* _UAPI_ASM_X86_KVMI_H */
diff --git a/arch/x86/kvm/kvmi_msg.c b/arch/x86/kvm/kvmi_msg.c
index 8ff3aa936ccd..c2fcfba9f315 100644
--- a/arch/x86/kvm/kvmi_msg.c
+++ b/arch/x86/kvm/kvmi_msg.c
@@ -6,6 +6,7 @@
  *
  */
 
+#include "cpuid.h"
 #include "../../../virt/kvm/introspection/kvmi_int.h"
 #include "kvmi.h"
 
@@ -110,7 +111,32 @@ static int handle_vcpu_set_registers(const struct 
kvmi_vcpu_msg_job *job,
return kvmi_msg_vcpu_reply(job, msg, ec, NULL, 0);
 }
 
+static int handle_vcpu_get_cpuid(const struct kvmi_vcpu_msg_job *job,
+const struct kvmi_msg_hdr *msg,
+const void *_req)
+{
+   const struct kvmi_vcpu_get_cpuid *req = _req;
+   struct kvmi_vcpu_get_cpuid_reply rpl;
+   struct kvm_cpuid_entry2 *entry;
+   int ec = 0;
+
+   entry = kvm_find_cpuid_entry(job->vcpu, req->function, req->index);
+   if (!entry) {
+   ec = -KVM_ENOENT;
+   } else {
+   memset(, 0, sizeof(rpl));
+
+   rpl.eax = entry->eax;
+   rpl.ebx = entry->ebx;
+   rpl.ecx = entry->ecx;
+   rpl.edx = entry->edx;
+   }
+
+   return kvmi_msg_vcpu_reply(job, msg, ec, , sizeof(rpl));
+}
+
 static kvmi_vcpu_msg_job_fct const msg_vcpu[] = {
+   [KVMI_VCPU_GET_CPUID] = handle_vcpu_get_cpuid,
[KVMI_VCPU_GET_INFO]  = handle_vcpu_get_info,
[KVMI_VCPU_GET_REGISTERS] = handle_vcpu_get_registers,
[KVMI_VCPU_SET_REGISTERS] = handle_vcpu_set_registers,
diff --git a/include/uapi/linux/kvmi.h b/include/uapi/linux/kvmi.h
index 4b756d388ad3..2c93a36bfa43 100644
--- a/include/uapi/linux/kvmi.h
+++ b/include/uapi/linux/kvmi.h
@@ -39,6 +39,7 @@ enum {
KVMI_VCPU_CONTROL_EVENTS = KVMI_VCPU_MESSAGE_ID(2),
KVMI_VCPU_GET_REGISTERS  = KVMI_VCPU_MESSAGE_ID(3),
KVMI_VCPU_SET_REGISTERS  = KVMI_VCPU_MESSAGE_ID(4),
+   KVMI_VCPU_GET_CPUID  = KVMI_VCPU_MESSAGE_ID(5),
 
KVMI_NEXT_VCPU_MESSAGE
 };
diff --git a/tools/testing/selftests/kvm/x86_64/kvmi_test.c 
b/tools/testing/selftests/kvm/x86_64/kvmi_test.c
index 311a050c26c1..542b59466d12 100644
--- a/tools/testing/selftests/kvm/x86_64/kvmi_test.c
+++ b/tools/testing/selftests/kvm/x86_64/kvmi_test.c
@@ -948,6 +948,35 @@ static void test_cmd_vcpu_set_registers(struct kvm_vm *vm)
wait_vcpu_worker(vcpu_thread);
 }
 
+static void cmd_vcpu_get_cpuid(struct kvm_vm *vm,
+  __u32 function, __u32 index,
+  struct kvmi_vcpu_get_cpuid_reply *rpl)
+{
+   struct {
+   struct kvmi_msg_hdr hdr;
+   struct kvmi_vcpu_hdr vcpu_hdr;
+   struct kvmi_vcpu_get_cpu

[PATCH v11 76/81] KVM: introspection: extend KVMI_GET_VERSION with struct kvmi_features

2020-12-07 Thread Adalbert Lazăr
This is used by the introspection tool to check the hardware support
for the single step feature.

Signed-off-by: Adalbert Lazăr 
---
 Documentation/virt/kvm/kvmi.rst| 15 +--
 arch/x86/include/uapi/asm/kvmi.h   |  5 +
 arch/x86/kvm/kvmi.c|  5 +
 include/uapi/linux/kvmi.h  |  1 +
 tools/testing/selftests/kvm/x86_64/kvmi_test.c |  6 ++
 virt/kvm/introspection/kvmi_int.h  |  1 +
 virt/kvm/introspection/kvmi_msg.c  |  2 ++
 7 files changed, 33 insertions(+), 2 deletions(-)

diff --git a/Documentation/virt/kvm/kvmi.rst b/Documentation/virt/kvm/kvmi.rst
index c5afb4c91ca2..a3c48cbd8c72 100644
--- a/Documentation/virt/kvm/kvmi.rst
+++ b/Documentation/virt/kvm/kvmi.rst
@@ -243,10 +243,21 @@ The vCPU commands start with::
struct kvmi_get_version_reply {
__u32 version;
__u32 max_msg_size;
+   struct kvmi_features features;
};
 
-Returns the introspection API version and the largest accepted message
-size (useful for variable length messages).
+For x86
+
+::
+
+   struct kvmi_features {
+   __u8 singlestep;
+   __u8 padding[7];
+   };
+
+Returns the introspection API version, the largest accepted message size
+(useful for variable length messages) and some of the hardware supported
+features.
 
 This command is always allowed and successful.
 
diff --git a/arch/x86/include/uapi/asm/kvmi.h b/arch/x86/include/uapi/asm/kvmi.h
index 6ef144ddb4bb..c5a2cb1b54f1 100644
--- a/arch/x86/include/uapi/asm/kvmi.h
+++ b/arch/x86/include/uapi/asm/kvmi.h
@@ -159,4 +159,9 @@ struct kvmi_vcpu_event_msr_reply {
__u64 new_val;
 };
 
+struct kvmi_features {
+   __u8 singlestep;
+   __u8 padding[7];
+};
+
 #endif /* _UAPI_ASM_X86_KVMI_H */
diff --git a/arch/x86/kvm/kvmi.c b/arch/x86/kvm/kvmi.c
index cd64762643d6..e0302883aec5 100644
--- a/arch/x86/kvm/kvmi.c
+++ b/arch/x86/kvm/kvmi.c
@@ -1081,3 +1081,8 @@ static void kvmi_track_flush_slot(struct kvm *kvm, struct 
kvm_memory_slot *slot,
 
kvmi_put(kvm);
 }
+
+void kvmi_arch_features(struct kvmi_features *feat)
+{
+   feat->singlestep = !!kvm_x86_ops.control_singlestep;
+}
diff --git a/include/uapi/linux/kvmi.h b/include/uapi/linux/kvmi.h
index 3b432b37b17c..43631ed2b06c 100644
--- a/include/uapi/linux/kvmi.h
+++ b/include/uapi/linux/kvmi.h
@@ -103,6 +103,7 @@ struct kvmi_error_code {
 struct kvmi_get_version_reply {
__u32 version;
__u32 max_msg_size;
+   struct kvmi_features features;
 };
 
 struct kvmi_vm_check_command {
diff --git a/tools/testing/selftests/kvm/x86_64/kvmi_test.c 
b/tools/testing/selftests/kvm/x86_64/kvmi_test.c
index e36b574c264e..9984b0247ae9 100644
--- a/tools/testing/selftests/kvm/x86_64/kvmi_test.c
+++ b/tools/testing/selftests/kvm/x86_64/kvmi_test.c
@@ -59,6 +59,8 @@ struct vcpu_worker_data {
bool restart_on_shutdown;
 };
 
+static struct kvmi_features features;
+
 typedef void (*fct_pf_event)(struct kvm_vm *vm, struct kvmi_msg_hdr *hdr,
struct pf_ev *ev,
struct vcpu_reply *rpl);
@@ -443,6 +445,10 @@ static void test_cmd_get_version(void)
 
pr_debug("KVMI version: %u\n", rpl.version);
pr_debug("Max message size: %u\n", rpl.max_msg_size);
+
+   features = rpl.features;
+
+   pr_debug("singlestep support: %u\n", features.singlestep);
 }
 
 static void cmd_vm_check_command(__u16 id, int expected_err)
diff --git a/virt/kvm/introspection/kvmi_int.h 
b/virt/kvm/introspection/kvmi_int.h
index bf6545e66425..a51e7e4ed511 100644
--- a/virt/kvm/introspection/kvmi_int.h
+++ b/virt/kvm/introspection/kvmi_int.h
@@ -121,5 +121,6 @@ void kvmi_arch_update_page_tracking(struct kvm *kvm,
struct kvmi_mem_access *m);
 void kvmi_arch_hook(struct kvm *kvm);
 void kvmi_arch_unhook(struct kvm *kvm);
+void kvmi_arch_features(struct kvmi_features *feat);
 
 #endif
diff --git a/virt/kvm/introspection/kvmi_msg.c 
b/virt/kvm/introspection/kvmi_msg.c
index 1388b5c768af..fc03223d115f 100644
--- a/virt/kvm/introspection/kvmi_msg.c
+++ b/virt/kvm/introspection/kvmi_msg.c
@@ -134,6 +134,8 @@ static int handle_get_version(struct kvm_introspection 
*kvmi,
rpl.version = kvmi_version();
rpl.max_msg_size = KVMI_MAX_MSG_SIZE;
 
+   kvmi_arch_features();
+
return kvmi_msg_vm_reply(kvmi, msg, 0, , sizeof(rpl));
 }
 
___
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization

[PATCH v11 73/81] KVM: introspection: restore the state of MSR interception on unhook

2020-12-07 Thread Adalbert Lazăr
From: Nicușor Cîțu 

This commit also ensures that the introspection tool and the userspace
do not disable each other the MSR access VM-exit.

Signed-off-by: Nicușor Cîțu 
Signed-off-by: Adalbert Lazăr 
---
 arch/x86/include/asm/kvmi_host.h |  12 +++
 arch/x86/kvm/kvmi.c  | 124 +++
 arch/x86/kvm/svm/svm.c   |  10 +++
 arch/x86/kvm/vmx/vmx.c   |  11 +++
 4 files changed, 142 insertions(+), 15 deletions(-)

diff --git a/arch/x86/include/asm/kvmi_host.h b/arch/x86/include/asm/kvmi_host.h
index 5a4fc5b80907..8822f0310156 100644
--- a/arch/x86/include/asm/kvmi_host.h
+++ b/arch/x86/include/asm/kvmi_host.h
@@ -26,6 +26,12 @@ struct kvmi_interception {
DECLARE_BITMAP(low, KVMI_NUM_MSR);
DECLARE_BITMAP(high, KVMI_NUM_MSR);
} kvmi_mask;
+   struct {
+   DECLARE_BITMAP(low, KVMI_NUM_MSR);
+   DECLARE_BITMAP(high, KVMI_NUM_MSR);
+   } kvm_mask;
+   bool (*monitor_fct)(struct kvm_vcpu *vcpu, u32 msr,
+   bool enable);
} msrw;
 };
 
@@ -61,6 +67,8 @@ void kvmi_xsetbv_event(struct kvm_vcpu *vcpu, u8 xcr,
 bool kvmi_monitor_desc_intercept(struct kvm_vcpu *vcpu, bool enable);
 bool kvmi_descriptor_event(struct kvm_vcpu *vcpu, u8 descriptor, bool write);
 bool kvmi_msr_event(struct kvm_vcpu *vcpu, struct msr_data *msr);
+bool kvmi_monitor_msrw_intercept(struct kvm_vcpu *vcpu, u32 msr, bool enable);
+bool kvmi_msrw_intercept_originator(struct kvm_vcpu *vcpu);
 
 #else /* CONFIG_KVM_INTROSPECTION */
 
@@ -82,6 +90,10 @@ static inline bool kvmi_descriptor_event(struct kvm_vcpu 
*vcpu, u8 descriptor,
 bool write) { return true; }
 static inline bool kvmi_msr_event(struct kvm_vcpu *vcpu, struct msr_data *msr)
{ return true; }
+static inline bool kvmi_monitor_msrw_intercept(struct kvm_vcpu *vcpu, u32 msr,
+  bool enable) { return false; }
+static inline bool kvmi_msrw_intercept_originator(struct kvm_vcpu *vcpu)
+   { return false; }
 
 #endif /* CONFIG_KVM_INTROSPECTION */
 
diff --git a/arch/x86/kvm/kvmi.c b/arch/x86/kvm/kvmi.c
index ce29e01ba7a6..e325dad88dbb 100644
--- a/arch/x86/kvm/kvmi.c
+++ b/arch/x86/kvm/kvmi.c
@@ -345,22 +345,25 @@ static void kvmi_arch_disable_desc_intercept(struct 
kvm_vcpu *vcpu)
vcpu->arch.kvmi->descriptor.kvm_intercepted = false;
 }
 
-static unsigned long *msr_mask(struct kvm_vcpu *vcpu, unsigned int *msr)
+static unsigned long *msr_mask(struct kvm_vcpu *vcpu, unsigned int *msr,
+  bool kvmi)
 {
switch (*msr) {
case 0 ... 0x1fff:
-   return vcpu->arch.kvmi->msrw.kvmi_mask.low;
+   return kvmi ? vcpu->arch.kvmi->msrw.kvmi_mask.low :
+ vcpu->arch.kvmi->msrw.kvm_mask.low;
case 0xc000 ... 0xc0001fff:
*msr &= 0x1fff;
-   return vcpu->arch.kvmi->msrw.kvmi_mask.high;
+   return kvmi ? vcpu->arch.kvmi->msrw.kvmi_mask.high :
+ vcpu->arch.kvmi->msrw.kvm_mask.high;
}
 
return NULL;
 }
 
-static bool test_msr_mask(struct kvm_vcpu *vcpu, unsigned int msr)
+static bool test_msr_mask(struct kvm_vcpu *vcpu, unsigned int msr, bool kvmi)
 {
-   unsigned long *mask = msr_mask(vcpu, );
+   unsigned long *mask = msr_mask(vcpu, , kvmi);
 
if (!mask)
return false;
@@ -368,9 +371,27 @@ static bool test_msr_mask(struct kvm_vcpu *vcpu, unsigned 
int msr)
return !!test_bit(msr, mask);
 }
 
-static bool msr_control(struct kvm_vcpu *vcpu, unsigned int msr, bool enable)
+/*
+ * Returns true if one side (kvm or kvmi) tries to disable the MSR write
+ * interception while the other side is still tracking it.
+ */
+bool kvmi_monitor_msrw_intercept(struct kvm_vcpu *vcpu, u32 msr, bool enable)
 {
-   unsigned long *mask = msr_mask(vcpu, );
+   struct kvmi_interception *arch_vcpui;
+
+   if (!vcpu)
+   return false;
+
+   arch_vcpui = READ_ONCE(vcpu->arch.kvmi);
+
+   return (arch_vcpui && arch_vcpui->msrw.monitor_fct(vcpu, msr, enable));
+}
+EXPORT_SYMBOL(kvmi_monitor_msrw_intercept);
+
+static bool msr_control(struct kvm_vcpu *vcpu, unsigned int msr, bool enable,
+   bool kvmi)
+{
+   unsigned long *mask = msr_mask(vcpu, , kvmi);
 
if (!mask)
return false;
@@ -383,6 +404,63 @@ static bool msr_control(struct kvm_vcpu *vcpu, unsigned 
int msr, bool enable)
return true;
 }
 
+static bool msr_intercepted_by_kvmi(struct kvm_vcpu *vcpu, u32 msr)
+{
+   return test_msr_mask(vcpu, msr, true);
+}
+
+static bool msr_intercepted_by_kvm(struct kvm_vcpu *vcpu, u32 msr)
+{
+

[PATCH v11 53/81] KVM: introspection: add KVMI_VCPU_GET_REGISTERS

2020-12-07 Thread Adalbert Lazăr
From: Mihai Donțu 

This command is used to get kvm_regs and kvm_sregs structures,
plus a list of struct kvm_msrs from a specific vCPU.

While the kvm_regs and kvm_sregs structures are included with every
event, this command allows reading any MSR.

Signed-off-by: Mihai Donțu 
Co-developed-by: Adalbert Lazăr 
Signed-off-by: Adalbert Lazăr 
---
 Documentation/virt/kvm/kvmi.rst   | 44 
 arch/x86/include/uapi/asm/kvmi.h  | 15 
 arch/x86/kvm/kvmi.c   | 25 +++
 arch/x86/kvm/kvmi.h   |  9 +++
 arch/x86/kvm/kvmi_msg.c   | 72 ++-
 include/uapi/linux/kvmi.h |  1 +
 .../testing/selftests/kvm/x86_64/kvmi_test.c  | 59 +++
 7 files changed, 224 insertions(+), 1 deletion(-)
 create mode 100644 arch/x86/kvm/kvmi.h

diff --git a/Documentation/virt/kvm/kvmi.rst b/Documentation/virt/kvm/kvmi.rst
index a502cf9baead..dbaedbee9dee 100644
--- a/Documentation/virt/kvm/kvmi.rst
+++ b/Documentation/virt/kvm/kvmi.rst
@@ -557,6 +557,50 @@ the *KVMI_VM_CONTROL_EVENTS* command.
 * -KVM_EPERM - the access is disallowed (use *KVMI_VM_CHECK_EVENT* first)
 * -KVM_EAGAIN - the selected vCPU can't be introspected yet
 
+11. KVMI_VCPU_GET_REGISTERS
+---
+
+:Architectures: x86
+:Versions: >= 1
+:Parameters:
+
+::
+
+   struct kvmi_vcpu_hdr;
+   struct kvmi_vcpu_get_registers {
+   __u16 nmsrs;
+   __u16 padding1;
+   __u32 padding2;
+   __u32 msrs_idx[0];
+   };
+
+:Returns:
+
+::
+
+   struct kvmi_error_code;
+   struct kvmi_vcpu_get_registers_reply {
+   __u32 mode;
+   __u32 padding;
+   struct kvm_regs regs;
+   struct kvm_sregs sregs;
+   struct kvm_msrs msrs;
+   };
+
+For the given vCPU and the ``nmsrs`` sized array of MSRs registers,
+returns the current vCPU mode (in bytes: 2, 4 or 8), the general purpose
+registers, the special registers and the requested set of MSRs.
+
+:Errors:
+
+* -KVM_EINVAL - the selected vCPU is invalid
+* -KVM_EINVAL - one of the indicated MSRs is invalid
+* -KVM_EINVAL - the padding is not zero
+* -KVM_EINVAL - the reply size is larger than
+kvmi_get_version_reply.max_msg_size (too many MSRs)
+* -KVM_EAGAIN - the selected vCPU can't be introspected yet
+* -KVM_ENOMEM - there is not enough memory to allocate the reply
+
 Events
 ==
 
diff --git a/arch/x86/include/uapi/asm/kvmi.h b/arch/x86/include/uapi/asm/kvmi.h
index 9d9df09d381a..11835bf9bdc6 100644
--- a/arch/x86/include/uapi/asm/kvmi.h
+++ b/arch/x86/include/uapi/asm/kvmi.h
@@ -30,4 +30,19 @@ struct kvmi_vcpu_event_arch {
} msrs;
 };
 
+struct kvmi_vcpu_get_registers {
+   __u16 nmsrs;
+   __u16 padding1;
+   __u32 padding2;
+   __u32 msrs_idx[0];
+};
+
+struct kvmi_vcpu_get_registers_reply {
+   __u32 mode;
+   __u32 padding;
+   struct kvm_regs regs;
+   struct kvm_sregs sregs;
+   struct kvm_msrs msrs;
+};
+
 #endif /* _UAPI_ASM_X86_KVMI_H */
diff --git a/arch/x86/kvm/kvmi.c b/arch/x86/kvm/kvmi.c
index 383b19dcf054..fa9b20277dad 100644
--- a/arch/x86/kvm/kvmi.c
+++ b/arch/x86/kvm/kvmi.c
@@ -93,3 +93,28 @@ void kvmi_arch_setup_vcpu_event(struct kvm_vcpu *vcpu,
ev->arch.mode = kvmi_vcpu_mode(vcpu, >sregs);
kvmi_get_msrs(vcpu, event);
 }
+
+int kvmi_arch_cmd_vcpu_get_registers(struct kvm_vcpu *vcpu,
+   const struct kvmi_vcpu_get_registers *req,
+   struct kvmi_vcpu_get_registers_reply *rpl)
+{
+   struct msr_data m = {.host_initiated = true};
+   int k, err = 0;
+
+   kvm_arch_vcpu_get_regs(vcpu, >regs);
+   kvm_arch_vcpu_get_sregs(vcpu, >sregs);
+   rpl->mode = kvmi_vcpu_mode(vcpu, >sregs);
+   rpl->msrs.nmsrs = req->nmsrs;
+
+   for (k = 0; k < req->nmsrs && !err; k++) {
+   m.index = req->msrs_idx[k];
+
+   err = kvm_x86_ops.get_msr(vcpu, );
+   if (!err) {
+   rpl->msrs.entries[k].index = m.index;
+   rpl->msrs.entries[k].data = m.data;
+   }
+   }
+
+   return err ? -KVM_EINVAL : 0;
+}
diff --git a/arch/x86/kvm/kvmi.h b/arch/x86/kvm/kvmi.h
new file mode 100644
index ..7aab4aaabcda
--- /dev/null
+++ b/arch/x86/kvm/kvmi.h
@@ -0,0 +1,9 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef ARCH_X86_KVM_KVMI_H
+#define ARCH_X86_KVM_KVMI_H
+
+int kvmi_arch_cmd_vcpu_get_registers(struct kvm_vcpu *vcpu,
+   const struct kvmi_vcpu_get_registers *req,
+   struct kvmi_vcpu_get_registers_reply *rpl);
+
+#endif
diff --git a/arch/x86/kvm/kvmi_msg.c b/arch/x86/kvm/kvmi_msg.c
index 77552bf50984..4288a91937f6 100644
--- a/arch/x86/kvm/kvmi_msg.c
+++ b/arch/x86/kvm/kvmi_msg.c
@

  1   2   3   4   5   6   7   >