[PATCH v9 68/84] KVM: introspection: add KVMI_EVENT_XSETBV

2020-07-21 Thread Adalbert Lazăr
From: Mihai Donțu 

This event is sent when an extended control register XCR is going to
be changed.

Signed-off-by: Mihai Donțu 
Co-developed-by: Nicușor Cîțu 
Signed-off-by: Nicușor Cîțu 
Signed-off-by: Adalbert Lazăr 
---
 Documentation/virt/kvm/kvmi.rst   | 33 
 arch/x86/include/asm/kvmi_host.h  |  4 +
 arch/x86/include/uapi/asm/kvmi.h  |  7 ++
 arch/x86/kvm/kvmi.c   | 48 +++
 arch/x86/kvm/x86.c|  6 ++
 include/uapi/linux/kvmi.h |  1 +
 .../testing/selftests/kvm/x86_64/kvmi_test.c  | 84 +++
 virt/kvm/introspection/kvmi.c |  1 +
 8 files changed, 184 insertions(+)

diff --git a/Documentation/virt/kvm/kvmi.rst b/Documentation/virt/kvm/kvmi.rst
index 7da8efd18b89..283e9a2dfda1 100644
--- a/Documentation/virt/kvm/kvmi.rst
+++ b/Documentation/virt/kvm/kvmi.rst
@@ -552,6 +552,7 @@ the following events::
KVMI_EVENT_BREAKPOINT
KVMI_EVENT_CR
KVMI_EVENT_HYPERCALL
+   KVMI_EVENT_XSETBV
 
 When an event is enabled, the introspection tool is notified and
 must reply with: continue, retry, crash, etc. (see **Events** below).
@@ -1053,3 +1054,35 @@ other vCPU introspection event.
 ``kvmi_event``, exception/interrupt number, exception code
 (``error_code``) and address are sent to the introspection tool,
 which should check if its exception has been injected or overridden.
+
+7. KVMI_EVENT_XSETBV
+
+
+:Architectures: x86
+:Versions: >= 1
+:Actions: CONTINUE, CRASH
+:Parameters:
+
+::
+
+   struct kvmi_event;
+   struct kvmi_event_xsetbv {
+   __u8 xcr;
+   __u8 padding[7];
+   __u64 old_value;
+   __u64 new_value;
+   };
+
+:Returns:
+
+::
+
+   struct kvmi_vcpu_hdr;
+   struct kvmi_event_reply;
+
+This event is sent when the extended control register XCR is going
+to be changed and the introspection has been enabled for this event
+(see *KVMI_VCPU_CONTROL_EVENTS*).
+
+``kvmi_event``, the extended control register number, the old value and
+the new value are sent to the introspection tool.
diff --git a/arch/x86/include/asm/kvmi_host.h b/arch/x86/include/asm/kvmi_host.h
index 44580f77e34e..aed8a4b88a68 100644
--- a/arch/x86/include/asm/kvmi_host.h
+++ b/arch/x86/include/asm/kvmi_host.h
@@ -33,6 +33,8 @@ bool kvmi_cr_event(struct kvm_vcpu *vcpu, unsigned int cr,
   unsigned long old_value, unsigned long *new_value);
 bool kvmi_cr3_intercepted(struct kvm_vcpu *vcpu);
 bool kvmi_monitor_cr3w_intercept(struct kvm_vcpu *vcpu, bool enable);
+void kvmi_xsetbv_event(struct kvm_vcpu *vcpu, u8 xcr,
+  u64 old_value, u64 new_value);
 
 #else /* CONFIG_KVM_INTROSPECTION */
 
@@ -45,6 +47,8 @@ static inline bool kvmi_cr_event(struct kvm_vcpu *vcpu, 
unsigned int cr,
 static inline bool kvmi_cr3_intercepted(struct kvm_vcpu *vcpu) { return false; 
}
 static inline bool kvmi_monitor_cr3w_intercept(struct kvm_vcpu *vcpu,
bool enable) { return false; }
+static inline void kvmi_xsetbv_event(struct kvm_vcpu *vcpu, u8 xcr,
+   u64 old_value, u64 new_value) { }
 
 #endif /* CONFIG_KVM_INTROSPECTION */
 
diff --git a/arch/x86/include/uapi/asm/kvmi.h b/arch/x86/include/uapi/asm/kvmi.h
index 4c59c9fe6b00..2f69a4f5d2e0 100644
--- a/arch/x86/include/uapi/asm/kvmi.h
+++ b/arch/x86/include/uapi/asm/kvmi.h
@@ -83,4 +83,11 @@ struct kvmi_event_cr_reply {
__u64 new_val;
 };
 
+struct kvmi_event_xsetbv {
+   __u8 xcr;
+   __u8 padding[7];
+   __u64 old_value;
+   __u64 new_value;
+};
+
 #endif /* _UAPI_ASM_X86_KVMI_H */
diff --git a/arch/x86/kvm/kvmi.c b/arch/x86/kvm/kvmi.c
index 0c6ab136084f..55c5e290730c 100644
--- a/arch/x86/kvm/kvmi.c
+++ b/arch/x86/kvm/kvmi.c
@@ -672,3 +672,51 @@ void kvmi_arch_send_trap_event(struct kvm_vcpu *vcpu)
kvmi_handle_common_event_actions(vcpu->kvm, action);
}
 }
+
+static u32 kvmi_send_xsetbv(struct kvm_vcpu *vcpu, u8 xcr, u64 old_value,
+   u64 new_value)
+{
+   struct kvmi_event_xsetbv e;
+   int err, action;
+
+   memset(, 0, sizeof(e));
+   e.xcr = xcr;
+   e.old_value = old_value;
+   e.new_value = new_value;
+
+   err = kvmi_send_event(vcpu, KVMI_EVENT_XSETBV, , sizeof(e),
+ NULL, 0, );
+   if (err)
+   return KVMI_EVENT_ACTION_CONTINUE;
+
+   return action;
+}
+
+static void __kvmi_xsetbv_event(struct kvm_vcpu *vcpu, u8 xcr,
+   u64 old_value, u64 new_value)
+{
+   u32 action;
+
+   action = kvmi_send_xsetbv(vcpu, xcr, old_value, new_value);
+   switch (action) {
+   case KVMI_EVENT_ACTION_CONTINUE:
+   break;
+   default:
+   kvmi_handle_common_event_actions(vcpu->kvm, action);
+   }
+

[PATCH v9 73/84] KVM: introspection: add KVMI_EVENT_DESCRIPTOR

2020-07-21 Thread Adalbert Lazăr
From: Nicușor Cîțu 

This event is sent when IDTR, GDTR, LDTR or TR are accessed.

These could be used to implement a tiny agent which runs in the context
of an introspected guest and uses virtualized exceptions (#VE) and
alternate EPT views (VMFUNC #0) to filter converted VMEXITS. The events
of interested will be suppressed (after some appropriate guest-side
handling) while the rest will be sent to the introspector via a VMCALL.

Signed-off-by: Nicușor Cîțu 
Co-developed-by: Adalbert Lazăr 
Signed-off-by: Adalbert Lazăr 
---
 Documentation/virt/kvm/kvmi.rst   | 41 ++
 arch/x86/include/asm/kvmi_host.h  |  3 +
 arch/x86/include/uapi/asm/kvmi.h  | 11 +++
 arch/x86/kvm/kvmi.c   | 75 +++
 arch/x86/kvm/svm/svm.c| 33 
 arch/x86/kvm/vmx/vmx.c| 23 ++
 include/uapi/linux/kvmi.h |  1 +
 .../testing/selftests/kvm/x86_64/kvmi_test.c  | 75 +++
 virt/kvm/introspection/kvmi.c |  1 +
 9 files changed, 263 insertions(+)

diff --git a/Documentation/virt/kvm/kvmi.rst b/Documentation/virt/kvm/kvmi.rst
index 6de260c09f3b..0294c141eb0a 100644
--- a/Documentation/virt/kvm/kvmi.rst
+++ b/Documentation/virt/kvm/kvmi.rst
@@ -551,6 +551,7 @@ the following events::
 
KVMI_EVENT_BREAKPOINT
KVMI_EVENT_CR
+   KVMI_EVENT_DESCRIPTOR
KVMI_EVENT_HYPERCALL
KVMI_EVENT_XSETBV
 
@@ -574,6 +575,8 @@ the *KVMI_VM_CONTROL_EVENTS* command.
 * -KVM_EINVAL - the event ID is unknown (use *KVMI_VM_CHECK_EVENT* first)
 * -KVM_EPERM - the access is disallowed (use *KVMI_VM_CHECK_EVENT* first)
 * -KVM_EAGAIN - the selected vCPU can't be introspected yet
+* -KVM_EOPNOTSUPP - the event can't be intercepted in the current setup
+(e.g. KVMI_EVENT_DESCRIPTOR with AMD)
 * -KVM_EBUSY - the event can't be intercepted right now
(e.g. KVMI_EVENT_BREAKPOINT if the #BP event is already 
intercepted
 by userspace)
@@ -1211,3 +1214,41 @@ to be changed and the introspection has been enabled for 
this event
 
 ``kvmi_event``, the extended control register number, the old value and
 the new value are sent to the introspection tool.
+
+8. KVMI_EVENT_DESCRIPTOR
+
+
+:Architecture: x86
+:Versions: >= 1
+:Actions: CONTINUE, RETRY, CRASH
+:Parameters:
+
+::
+
+   struct kvmi_event;
+   struct kvmi_event_descriptor {
+   __u8 descriptor;
+   __u8 write;
+   __u8 padding[6];
+   };
+
+:Returns:
+
+::
+
+   struct kvmi_vcpu_hdr;
+   struct kvmi_event_reply;
+
+This event is sent when a descriptor table register is accessed and the
+introspection has been enabled for this event (see 
**KVMI_VCPU_CONTROL_EVENTS**).
+
+``kvmi_event`` and ``kvmi_event_descriptor`` are sent to the introspection 
tool.
+
+``descriptor`` can be one of::
+
+   KVMI_DESC_IDTR
+   KVMI_DESC_GDTR
+   KVMI_DESC_LDTR
+   KVMI_DESC_TR
+
+``write`` is 1 if the descriptor was written, 0 otherwise.
diff --git a/arch/x86/include/asm/kvmi_host.h b/arch/x86/include/asm/kvmi_host.h
index aed8a4b88a68..09ebed80a8cc 100644
--- a/arch/x86/include/asm/kvmi_host.h
+++ b/arch/x86/include/asm/kvmi_host.h
@@ -35,6 +35,7 @@ bool kvmi_cr3_intercepted(struct kvm_vcpu *vcpu);
 bool kvmi_monitor_cr3w_intercept(struct kvm_vcpu *vcpu, bool enable);
 void kvmi_xsetbv_event(struct kvm_vcpu *vcpu, u8 xcr,
   u64 old_value, u64 new_value);
+bool kvmi_descriptor_event(struct kvm_vcpu *vcpu, u8 descriptor, bool write);
 
 #else /* CONFIG_KVM_INTROSPECTION */
 
@@ -49,6 +50,8 @@ static inline bool kvmi_monitor_cr3w_intercept(struct 
kvm_vcpu *vcpu,
bool enable) { return false; }
 static inline void kvmi_xsetbv_event(struct kvm_vcpu *vcpu, u8 xcr,
u64 old_value, u64 new_value) { }
+static inline bool kvmi_descriptor_event(struct kvm_vcpu *vcpu, u8 descriptor,
+bool write) { return true; }
 
 #endif /* CONFIG_KVM_INTROSPECTION */
 
diff --git a/arch/x86/include/uapi/asm/kvmi.h b/arch/x86/include/uapi/asm/kvmi.h
index 33edbb5b32f4..b6ff39ba0ab3 100644
--- a/arch/x86/include/uapi/asm/kvmi.h
+++ b/arch/x86/include/uapi/asm/kvmi.h
@@ -116,4 +116,15 @@ struct kvmi_vcpu_get_mtrr_type_reply {
__u8 padding[7];
 };
 
+#define KVMI_DESC_IDTR  1
+#define KVMI_DESC_GDTR  2
+#define KVMI_DESC_LDTR  3
+#define KVMI_DESC_TR4
+
+struct kvmi_event_descriptor {
+   __u8 descriptor;
+   __u8 write;
+   __u8 padding[6];
+};
+
 #endif /* _UAPI_ASM_X86_KVMI_H */
diff --git a/arch/x86/kvm/kvmi.c b/arch/x86/kvm/kvmi.c
index 80ad67e875c4..3ae43a4c8764 100644
--- a/arch/x86/kvm/kvmi.c
+++ b/arch/x86/kvm/kvmi.c
@@ -361,6 +361,21 @@ static void kvmi_arch_disable_cr3w_intercept(struct 
kvm_vcpu *vcpu)
vcpu->arc

[PATCH v9 82/84] KVM: introspection: add KVMI_VCPU_TRANSLATE_GVA

2020-07-21 Thread Adalbert Lazăr
This helps the introspection tool with the GVA to GPA translations
without the need to read or monitor the guest page tables.

Signed-off-by: Adalbert Lazăr 
---
 Documentation/virt/kvm/kvmi.rst   | 32 +++
 arch/x86/kvm/kvmi.c   |  5 +++
 include/uapi/linux/kvmi.h |  9 ++
 .../testing/selftests/kvm/x86_64/kvmi_test.c  | 30 +
 virt/kvm/introspection/kvmi_int.h |  1 +
 virt/kvm/introspection/kvmi_msg.c | 15 +
 6 files changed, 92 insertions(+)

diff --git a/Documentation/virt/kvm/kvmi.rst b/Documentation/virt/kvm/kvmi.rst
index 3c481c1b2186..62138fa4b65c 100644
--- a/Documentation/virt/kvm/kvmi.rst
+++ b/Documentation/virt/kvm/kvmi.rst
@@ -1086,6 +1086,38 @@ to the introspection tool.
 * -KVM_EINVAL - the padding is not zero
 * -KVM_EAGAIN - the selected vCPU can't be introspected yet
 
+25. KVMI_VCPU_TRANSLATE_GVA
+---
+
+:Architecture: x86
+:Versions: >= 1
+:Parameters:
+
+::
+
+   struct kvmi_vcpu_hdr;
+   struct kvmi_vcpu_translate_gva {
+   __u64 gva;
+   };
+
+:Returns:
+
+::
+
+   struct kvmi_error_code;
+   struct kvmi_vcpu_translate_gva_reply {
+   __u64 gpa;
+   };
+
+Translates a guest virtual address to a guest physical address or ~0 if
+the address cannot be translated.
+
+:Errors:
+
+* -KVM_EINVAL - the selected vCPU is invalid
+* -KVM_EINVAL - the padding is not zero
+* -KVM_EAGAIN - the selected vCPU can't be introspected yet
+
 Events
 ==
 
diff --git a/arch/x86/kvm/kvmi.c b/arch/x86/kvm/kvmi.c
index 18713004152d..8051d06064ab 100644
--- a/arch/x86/kvm/kvmi.c
+++ b/arch/x86/kvm/kvmi.c
@@ -1373,3 +1373,8 @@ bool kvmi_arch_stop_singlestep(struct kvm_vcpu *vcpu)
kvm_x86_ops.control_singlestep(vcpu, false);
return true;
 }
+
+gpa_t kvmi_arch_cmd_translate_gva(struct kvm_vcpu *vcpu, gva_t gva)
+{
+   return kvm_mmu_gva_to_gpa_system(vcpu, gva, 0, NULL);
+}
diff --git a/include/uapi/linux/kvmi.h b/include/uapi/linux/kvmi.h
index 040049abd450..3c15c17d28e3 100644
--- a/include/uapi/linux/kvmi.h
+++ b/include/uapi/linux/kvmi.h
@@ -48,6 +48,7 @@ enum {
KVMI_VM_SET_PAGE_ACCESS = 23,
 
KVMI_VCPU_CONTROL_SINGLESTEP = 24,
+   KVMI_VCPU_TRANSLATE_GVA  = 25,
 
KVMI_NUM_MESSAGES
 };
@@ -242,4 +243,12 @@ struct kvmi_event_singlestep {
__u8 padding[7];
 };
 
+struct kvmi_vcpu_translate_gva {
+   __u64 gva;
+};
+
+struct kvmi_vcpu_translate_gva_reply {
+   __u64 gpa;
+};
+
 #endif /* _UAPI__LINUX_KVMI_H */
diff --git a/tools/testing/selftests/kvm/x86_64/kvmi_test.c 
b/tools/testing/selftests/kvm/x86_64/kvmi_test.c
index 967ea568d93c..e968b1a6f969 100644
--- a/tools/testing/selftests/kvm/x86_64/kvmi_test.c
+++ b/tools/testing/selftests/kvm/x86_64/kvmi_test.c
@@ -2040,6 +2040,35 @@ static void test_cmd_vcpu_control_singlestep(struct 
kvm_vm *vm)
test_unsupported_singlestep(vm);
 }
 
+static void cmd_translate_gva(struct kvm_vm *vm, vm_vaddr_t gva,
+ vm_paddr_t expected_gpa)
+{
+   struct {
+   struct kvmi_msg_hdr hdr;
+   struct kvmi_vcpu_hdr vcpu_hdr;
+   struct kvmi_vcpu_translate_gva cmd;
+   } req = { 0 };
+   struct kvmi_vcpu_translate_gva_reply rpl;
+
+   req.cmd.gva = gva;
+
+   test_vcpu0_command(vm, KVMI_VCPU_TRANSLATE_GVA, , sizeof(req),
+ , sizeof(rpl));
+   TEST_ASSERT(rpl.gpa == expected_gpa,
+   "Translation failed for gva 0x%lx -> gpa 0x%llx instead of 
0x%lx\n",
+   gva, rpl.gpa, expected_gpa);
+}
+
+static void test_cmd_translate_gva(struct kvm_vm *vm)
+{
+   cmd_translate_gva(vm, test_gva, test_gpa);
+   pr_info("Tested gva 0x%lx to gpa 0x%lx\n", test_gva, test_gpa);
+
+   cmd_translate_gva(vm, -1, ~0);
+   pr_info("Tested gva 0x%lx to gpa 0x%lx\n",
+   (vm_vaddr_t)-1, (vm_paddr_t)-1);
+}
+
 static void test_introspection(struct kvm_vm *vm)
 {
srandom(time(0));
@@ -2075,6 +2104,7 @@ static void test_introspection(struct kvm_vm *vm)
test_cmd_vm_set_page_access(vm);
test_event_pf(vm);
test_cmd_vcpu_control_singlestep(vm);
+   test_cmd_translate_gva(vm);
 
unhook_introspection(vm);
 }
diff --git a/virt/kvm/introspection/kvmi_int.h 
b/virt/kvm/introspection/kvmi_int.h
index e5fca3502bab..cb8453f0fb87 100644
--- a/virt/kvm/introspection/kvmi_int.h
+++ b/virt/kvm/introspection/kvmi_int.h
@@ -141,5 +141,6 @@ void kvmi_arch_unhook(struct kvm *kvm);
 void kvmi_arch_features(struct kvmi_features *feat);
 bool kvmi_arch_start_singlestep(struct kvm_vcpu *vcpu);
 bool kvmi_arch_stop_singlestep(struct kvm_vcpu *vcpu);
+gpa_t kvmi_arch_cmd_translate_gva(struct kvm_vcpu *vcpu, gva_t gva);
 
 #endif
diff --git a/virt/kvm/introspection/kvmi_msg.c 
b/virt/kvm/introspection/kvmi_ms

[PATCH v9 18/84] KVM: svm: pass struct kvm_vcpu to set_msr_interception()

2020-07-21 Thread Adalbert Lazăr
From: Nicușor Cîțu 

This is preparatory patch to mediate the MSR interception between
the introspection tool and the device manager (one must not disable
the interception if the other one has enabled the interception).

Passing NULL during initialization is OK because a vCPU can be
introspected only after initialization.

Signed-off-by: Nicușor Cîțu 
Signed-off-by: Adalbert Lazăr 
---
 arch/x86/kvm/svm/svm.c | 27 ++-
 1 file changed, 14 insertions(+), 13 deletions(-)

diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c
index e16be80edd7e..dfa1a6e74bf7 100644
--- a/arch/x86/kvm/svm/svm.c
+++ b/arch/x86/kvm/svm/svm.c
@@ -583,7 +583,8 @@ static bool msr_write_intercepted(struct kvm_vcpu *vcpu, 
unsigned msr)
return !!test_bit(bit_write,  );
 }
 
-static void set_msr_interception(u32 *msrpm, unsigned msr,
+static void set_msr_interception(struct kvm_vcpu *vcpu,
+u32 *msrpm, unsigned msr,
 int type, bool value)
 {
u8 bit_read, bit_write;
@@ -621,7 +622,7 @@ static void svm_vcpu_init_msrpm(u32 *msrpm)
if (!direct_access_msrs[i].always)
continue;
 
-   set_msr_interception(msrpm, direct_access_msrs[i].index,
+   set_msr_interception(NULL, msrpm, direct_access_msrs[i].index,
 MSR_TYPE_RW, 1);
}
 }
@@ -674,13 +675,13 @@ static void svm_enable_lbrv(struct vcpu_svm *svm)
u32 *msrpm = svm->msrpm;
 
svm->vmcb->control.virt_ext |= LBR_CTL_ENABLE_MASK;
-   set_msr_interception(msrpm, MSR_IA32_LASTBRANCHFROMIP,
+   set_msr_interception(>vcpu, msrpm, MSR_IA32_LASTBRANCHFROMIP,
 MSR_TYPE_RW, 1);
-   set_msr_interception(msrpm, MSR_IA32_LASTBRANCHTOIP,
+   set_msr_interception(>vcpu, msrpm, MSR_IA32_LASTBRANCHTOIP,
 MSR_TYPE_RW, 1);
-   set_msr_interception(msrpm, MSR_IA32_LASTINTFROMIP,
+   set_msr_interception(>vcpu, msrpm, MSR_IA32_LASTINTFROMIP,
 MSR_TYPE_RW, 1);
-   set_msr_interception(msrpm, MSR_IA32_LASTINTTOIP,
+   set_msr_interception(>vcpu, msrpm, MSR_IA32_LASTINTTOIP,
 MSR_TYPE_RW, 1);
 }
 
@@ -689,13 +690,13 @@ static void svm_disable_lbrv(struct vcpu_svm *svm)
u32 *msrpm = svm->msrpm;
 
svm->vmcb->control.virt_ext &= ~LBR_CTL_ENABLE_MASK;
-   set_msr_interception(msrpm, MSR_IA32_LASTBRANCHFROMIP,
+   set_msr_interception(>vcpu, msrpm, MSR_IA32_LASTBRANCHFROMIP,
 MSR_TYPE_RW, 0);
-   set_msr_interception(msrpm, MSR_IA32_LASTBRANCHTOIP,
+   set_msr_interception(>vcpu, msrpm, MSR_IA32_LASTBRANCHTOIP,
 MSR_TYPE_RW, 0);
-   set_msr_interception(msrpm, MSR_IA32_LASTINTFROMIP,
+   set_msr_interception(>vcpu, msrpm, MSR_IA32_LASTINTFROMIP,
 MSR_TYPE_RW, 0);
-   set_msr_interception(msrpm, MSR_IA32_LASTINTTOIP,
+   set_msr_interception(>vcpu, msrpm, MSR_IA32_LASTINTTOIP,
 MSR_TYPE_RW, 0);
 }
 
@@ -2629,7 +2630,7 @@ static int svm_set_msr(struct kvm_vcpu *vcpu, struct 
msr_data *msr)
 * We update the L1 MSR bit as well since it will end up
 * touching the MSR anyway now.
 */
-   set_msr_interception(svm->msrpm, MSR_IA32_SPEC_CTRL,
+   set_msr_interception(>vcpu, svm->msrpm, MSR_IA32_SPEC_CTRL,
 MSR_TYPE_RW, 1);
break;
case MSR_IA32_PRED_CMD:
@@ -2645,9 +2646,9 @@ static int svm_set_msr(struct kvm_vcpu *vcpu, struct 
msr_data *msr)
break;
 
wrmsrl(MSR_IA32_PRED_CMD, PRED_CMD_IBPB);
-   set_msr_interception(svm->msrpm, MSR_IA32_PRED_CMD,
+   set_msr_interception(>vcpu, svm->msrpm, MSR_IA32_PRED_CMD,
 MSR_TYPE_R, 0);
-   set_msr_interception(svm->msrpm, MSR_IA32_PRED_CMD,
+   set_msr_interception(>vcpu, svm->msrpm, MSR_IA32_PRED_CMD,
 MSR_TYPE_W, 1);
break;
case MSR_AMD64_VIRT_SPEC_CTRL:
___
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization

[PATCH v9 64/84] KVM: introspection: add KVMI_VCPU_CONTROL_CR and KVMI_EVENT_CR

2020-07-21 Thread Adalbert Lazăr
From: Mihai Donțu 

Using the KVMI_VCPU_CONTROL_CR command, the introspection tool subscribes
to KVMI_EVENT_CR events that will be sent when a control register (CR0,
CR3 or CR4) is going to be changed.

Signed-off-by: Mihai Donțu 
Co-developed-by: Adalbert Lazăr 
Signed-off-by: Adalbert Lazăr 
---
 Documentation/virt/kvm/kvmi.rst   |  70 +++
 arch/x86/include/asm/kvmi_host.h  |  11 ++
 arch/x86/include/uapi/asm/kvmi.h  |  18 +++
 arch/x86/kvm/kvmi.c   | 111 ++
 arch/x86/kvm/vmx/vmx.c|   6 +-
 arch/x86/kvm/x86.c|  12 +-
 include/uapi/linux/kvmi.h |   3 +
 .../testing/selftests/kvm/x86_64/kvmi_test.c  | 108 +
 virt/kvm/introspection/kvmi.c |   1 +
 virt/kvm/introspection/kvmi_int.h |   7 ++
 virt/kvm/introspection/kvmi_msg.c |  18 ++-
 11 files changed, 359 insertions(+), 6 deletions(-)

diff --git a/Documentation/virt/kvm/kvmi.rst b/Documentation/virt/kvm/kvmi.rst
index f760957b27f4..e1f978fc799b 100644
--- a/Documentation/virt/kvm/kvmi.rst
+++ b/Documentation/virt/kvm/kvmi.rst
@@ -550,6 +550,7 @@ Enables/disables vCPU introspection events. This command 
can be used with
 the following events::
 
KVMI_EVENT_BREAKPOINT
+   KVMI_EVENT_CR
KVMI_EVENT_HYPERCALL
 
 When an event is enabled, the introspection tool is notified and
@@ -714,6 +715,40 @@ interceptions). By default it is disabled.
 * -KVM_EINVAL - the padding is not zero
 * -KVM_EINVAL - 'enabled' is not 1 or 0
 
+15. KVMI_VCPU_CONTROL_CR
+
+
+:Architectures: x86
+:Versions: >= 1
+:Parameters:
+
+::
+
+   struct kvmi_vcpu_hdr;
+   struct kvmi_vcpu_control_cr {
+   __u8 cr;
+   __u8 enable;
+   __u16 padding1;
+   __u32 padding2;
+   };
+
+:Returns:
+
+::
+
+   struct kvmi_error_code
+
+Enables/disables introspection for a specific control register and must
+be used in addition to *KVMI_VCPU_CONTROL_EVENTS* with the *KVMI_EVENT_CR*
+ID set.
+
+:Errors:
+
+* -KVM_EINVAL - the selected vCPU is invalid
+* -KVM_EINVAL - the specified control register is not CR0, CR3 or CR4
+* -KVM_EINVAL - the padding is not zero
+* -KVM_EAGAIN - the selected vCPU can't be introspected yet
+
 Events
 ==
 
@@ -890,3 +925,38 @@ trying to perform a certain operation (like creating a 
process).
 ``kvmi_event`` and the guest physical address are sent to the introspection 
tool.
 
 The *RETRY* action is used by the introspection tool for its own breakpoints.
+
+5. KVMI_EVENT_CR
+
+
+:Architectures: x86
+:Versions: >= 1
+:Actions: CONTINUE, CRASH
+:Parameters:
+
+::
+
+   struct kvmi_event;
+   struct kvmi_event_cr {
+   __u8 cr;
+   __u8 padding[7];
+   __u64 old_value;
+   __u64 new_value;
+   };
+
+:Returns:
+
+::
+
+   struct kvmi_vcpu_hdr;
+   struct kvmi_event_reply;
+   struct kvmi_event_cr_reply {
+   __u64 new_val;
+   };
+
+This event is sent when a control register is going to be changed and the
+introspection has been enabled for this event and for this specific
+register (see **KVMI_VCPU_CONTROL_EVENTS**).
+
+``kvmi_event``, the control register number, the old value and the new value
+are sent to the introspection tool. The *CONTINUE* action will set the 
``new_val``.
diff --git a/arch/x86/include/asm/kvmi_host.h b/arch/x86/include/asm/kvmi_host.h
index 3e85ae4fe5f0..1aff91ef8475 100644
--- a/arch/x86/include/asm/kvmi_host.h
+++ b/arch/x86/include/asm/kvmi_host.h
@@ -4,6 +4,8 @@
 
 #include 
 
+#define KVMI_NUM_CR 5
+
 struct kvmi_monitor_interception {
bool kvmi_intercepted;
bool kvm_intercepted;
@@ -17,6 +19,7 @@ struct kvmi_interception {
 };
 
 struct kvm_vcpu_arch_introspection {
+   DECLARE_BITMAP(cr_mask, KVMI_NUM_CR);
 };
 
 struct kvm_arch_introspection {
@@ -25,11 +28,19 @@ struct kvm_arch_introspection {
 #ifdef CONFIG_KVM_INTROSPECTION
 
 bool kvmi_monitor_bp_intercept(struct kvm_vcpu *vcpu, u32 dbg);
+bool kvmi_cr_event(struct kvm_vcpu *vcpu, unsigned int cr,
+  unsigned long old_value, unsigned long *new_value);
+bool kvmi_cr3_intercepted(struct kvm_vcpu *vcpu);
 
 #else /* CONFIG_KVM_INTROSPECTION */
 
 static inline bool kvmi_monitor_bp_intercept(struct kvm_vcpu *vcpu, u32 dbg)
{ return false; }
+static inline bool kvmi_cr_event(struct kvm_vcpu *vcpu, unsigned int cr,
+unsigned long old_value,
+unsigned long *new_value)
+   { return true; }
+static inline bool kvmi_cr3_intercepted(struct kvm_vcpu *vcpu) { return false; 
}
 
 #endif /* CONFIG_KVM_INTROSPECTION */
 
diff --git a/arch/x86/include/uapi/asm/kvmi.h b/arch/x86/include/uapi/asm/kvmi.h
index 1605777256a3..4c59c9fe6b00 100644
--- a/arch/x86/i

[PATCH v9 39/84] KVM: introspection: add permission access ioctls

2020-07-21 Thread Adalbert Lazăr
KVM_INTROSPECTION_COMMAND and KVM_INTROSPECTION_EVENTS ioctls are used
by the device manager to allow/disallow access to specific (or all)
introspection commands and events. The introspection tool will get the
KVM_EPERM error code on any attempt to use a disallowed command.

By default, all events and almost all commands are disallowed.
Some commands, those querying the introspection capabilities,
are always allowed.

Signed-off-by: Adalbert Lazăr 
---
 Documentation/virt/kvm/api.rst|  66 ++
 include/linux/kvmi_host.h |   7 ++
 include/uapi/linux/kvm.h  |   8 ++
 include/uapi/linux/kvmi.h |   8 ++
 .../testing/selftests/kvm/x86_64/kvmi_test.c  |  48 +++
 virt/kvm/introspection/kvmi.c | 119 ++
 virt/kvm/kvm_main.c   |  14 +++
 7 files changed, 270 insertions(+)

diff --git a/Documentation/virt/kvm/api.rst b/Documentation/virt/kvm/api.rst
index e34f20430eb1..174f13f2389d 100644
--- a/Documentation/virt/kvm/api.rst
+++ b/Documentation/virt/kvm/api.rst
@@ -4753,6 +4753,72 @@ Errors:
 This ioctl is used to free all introspection structures
 related to this VM.
 
+4.128 KVM_INTROSPECTION_COMMAND
+---
+
+:Capability: KVM_CAP_INTROSPECTION
+:Architectures: x86
+:Type: vm ioctl
+:Parameters: struct kvm_introspection_feature (in)
+:Returns: 0 on success, a negative value on error
+
+Errors:
+
+  == ===
+  EFAULT the VM is not introspected yet (use KVM_INTROSPECTION_HOOK)
+  EINVAL the command is unknown
+  EPERM  the command can't be disallowed (e.g. KVMI_GET_VERSION)
+  == ===
+
+This ioctl is used to allow or disallow introspection commands
+for the current VM. By default, almost all commands are disallowed
+except for those used to query the API.
+
+::
+
+  struct kvm_introspection_feature {
+   __u32 allow;
+   __s32 id;
+  };
+
+If allow is 1, the command specified by id is allowed. If allow is 0,
+the command is disallowed.
+
+Unless set to -1 (meaning all commands), id must be a command ID
+(e.g. KVMI_GET_VERSION)
+
+4.129 KVM_INTROSPECTION_EVENT
+-
+
+:Capability: KVM_CAP_INTROSPECTION
+:Architectures: x86
+:Type: vm ioctl
+:Parameters: struct kvm_introspection_feature (in)
+:Returns: 0 on success, a negative value on error
+
+Errors:
+
+  == ===
+  EFAULT the VM is not introspected yet (use KVM_INTROSPECTION_HOOK)
+  EINVAL the event is unknown
+  == ===
+
+This ioctl is used to allow or disallow introspection events
+for the current VM. By default, all events are disallowed.
+
+::
+
+  struct kvm_introspection_feature {
+   __u32 allow;
+   __s32 id;
+  };
+
+If allow is 1, the event specified by id is allowed. If allow is 0,
+the event is disallowed.
+
+Unless set to -1 (meaning all events), id must be a event ID
+(e.g. KVMI_EVENT_UNHOOK, KVMI_EVENT_CR, etc.)
+
 5. The kvm_run structure
 
 
diff --git a/include/linux/kvmi_host.h b/include/linux/kvmi_host.h
index 55ff571db40d..7efd071e398d 100644
--- a/include/linux/kvmi_host.h
+++ b/include/linux/kvmi_host.h
@@ -14,6 +14,9 @@ struct kvm_introspection {
 
struct socket *sock;
struct task_struct *recv;
+
+   unsigned long *cmd_allow_mask;
+   unsigned long *event_allow_mask;
 };
 
 int kvmi_version(void);
@@ -25,6 +28,10 @@ void kvmi_destroy_vm(struct kvm *kvm);
 int kvmi_ioctl_hook(struct kvm *kvm,
const struct kvm_introspection_hook *hook);
 int kvmi_ioctl_unhook(struct kvm *kvm);
+int kvmi_ioctl_command(struct kvm *kvm,
+  const struct kvm_introspection_feature *feat);
+int kvmi_ioctl_event(struct kvm *kvm,
+const struct kvm_introspection_feature *feat);
 
 #else
 
diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h
index dd84ebdfcd6d..17df03ceb483 100644
--- a/include/uapi/linux/kvm.h
+++ b/include/uapi/linux/kvm.h
@@ -1622,6 +1622,14 @@ struct kvm_introspection_hook {
 #define KVM_INTROSPECTION_HOOK_IOW(KVMIO, 0xc3, struct 
kvm_introspection_hook)
 #define KVM_INTROSPECTION_UNHOOK  _IO(KVMIO, 0xc4)
 
+struct kvm_introspection_feature {
+   __u32 allow;
+   __s32 id;
+};
+
+#define KVM_INTROSPECTION_COMMAND _IOW(KVMIO, 0xc5, struct 
kvm_introspection_feature)
+#define KVM_INTROSPECTION_EVENT   _IOW(KVMIO, 0xc6, struct 
kvm_introspection_feature)
+
 #define KVM_DEV_ASSIGN_ENABLE_IOMMU(1 << 0)
 #define KVM_DEV_ASSIGN_PCI_2_3 (1 << 1)
 #define KVM_DEV_ASSIGN_MASK_INTX   (1 << 2)
diff --git a/include/uapi/linux/kvmi.h b/include/uapi/linux/kvmi.h
index 34dda91016db..d7b18ffef4fa 100644
--- a/include/uapi/

[PATCH v9 30/84] KVM: x86: export kvm_vcpu_ioctl_x86_get_xsave()

2020-07-21 Thread Adalbert Lazăr
From: Nicușor Cîțu 

This function is needed for the KVMI_VCPU_GET_XSAVE command.

Signed-off-by: Nicușor Cîțu 
Signed-off-by: Adalbert Lazăr 
---
 arch/x86/kvm/x86.c   | 4 ++--
 include/linux/kvm_host.h | 2 ++
 2 files changed, 4 insertions(+), 2 deletions(-)

diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 52181eb131dd..4d5be48b5239 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -4207,8 +4207,8 @@ static void load_xsave(struct kvm_vcpu *vcpu, u8 *src)
}
 }
 
-static void kvm_vcpu_ioctl_x86_get_xsave(struct kvm_vcpu *vcpu,
-struct kvm_xsave *guest_xsave)
+void kvm_vcpu_ioctl_x86_get_xsave(struct kvm_vcpu *vcpu,
+ struct kvm_xsave *guest_xsave)
 {
if (boot_cpu_has(X86_FEATURE_XSAVE)) {
memset(guest_xsave, 0, sizeof(struct kvm_xsave));
diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index 01628f7bcbcd..f138d56450c0 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -883,6 +883,8 @@ int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu 
*vcpu,
 int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu);
 int kvm_arch_vcpu_set_guest_debug(struct kvm_vcpu *vcpu,
  struct kvm_guest_debug *dbg);
+void kvm_vcpu_ioctl_x86_get_xsave(struct kvm_vcpu *vcpu,
+ struct kvm_xsave *guest_xsave);
 
 int kvm_arch_init(void *opaque);
 void kvm_arch_exit(void);
___
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization

[PATCH v9 43/84] KVM: introspection: add KVMI_VM_GET_INFO

2020-07-21 Thread Adalbert Lazăr
From: Mihai Donțu 

For now, this command returns only the number of online vCPUs.

The introspection tool uses the vCPU index to specify to which vCPU
the introspection command applies to.

Signed-off-by: Mihai Donțu 
Signed-off-by: Adalbert Lazăr 
---
 Documentation/virt/kvm/kvmi.rst   | 18 ++
 include/uapi/linux/kvmi.h |  6 
 .../testing/selftests/kvm/x86_64/kvmi_test.c  | 35 +--
 virt/kvm/introspection/kvmi_msg.c | 13 +++
 4 files changed, 69 insertions(+), 3 deletions(-)

diff --git a/Documentation/virt/kvm/kvmi.rst b/Documentation/virt/kvm/kvmi.rst
index a2cda3268da0..a81f22cb8c18 100644
--- a/Documentation/virt/kvm/kvmi.rst
+++ b/Documentation/virt/kvm/kvmi.rst
@@ -323,3 +323,21 @@ This command is always allowed.
 * -KVM_ENOENT - the event specified by ``id`` is unsupported
 * -KVM_EPERM - the event specified by ``id`` is disallowed
 * -KVM_EINVAL - the padding is not zero
+
+4. KVMI_VM_GET_INFO
+---
+
+:Architectures: all
+:Versions: >= 1
+:Parameters: none
+:Returns:
+
+::
+
+   struct kvmi_error_code;
+   struct kvmi_vm_get_info_reply {
+   __u32 vcpu_count;
+   __u32 padding[3];
+   };
+
+Returns the number of online vCPUs.
diff --git a/include/uapi/linux/kvmi.h b/include/uapi/linux/kvmi.h
index e55a0fa66ac5..eabaf7cea1df 100644
--- a/include/uapi/linux/kvmi.h
+++ b/include/uapi/linux/kvmi.h
@@ -17,6 +17,7 @@ enum {
KVMI_GET_VERSION  = 1,
KVMI_VM_CHECK_COMMAND = 2,
KVMI_VM_CHECK_EVENT   = 3,
+   KVMI_VM_GET_INFO  = 4,
 
KVMI_NUM_MESSAGES
 };
@@ -63,4 +64,9 @@ struct kvmi_vm_check_event {
__u32 padding2;
 };
 
+struct kvmi_vm_get_info_reply {
+   __u32 vcpu_count;
+   __u32 padding[3];
+};
+
 #endif /* _UAPI__LINUX_KVMI_H */
diff --git a/tools/testing/selftests/kvm/x86_64/kvmi_test.c 
b/tools/testing/selftests/kvm/x86_64/kvmi_test.c
index 28216c4e8b9d..1f4a165ab640 100644
--- a/tools/testing/selftests/kvm/x86_64/kvmi_test.c
+++ b/tools/testing/selftests/kvm/x86_64/kvmi_test.c
@@ -84,6 +84,16 @@ static void set_command_perm(struct kvm_vm *vm, __s32 id, 
__u32 allow,
 "KVM_INTROSPECTION_COMMAND");
 }
 
+static void disallow_command(struct kvm_vm *vm, __s32 id)
+{
+   set_command_perm(vm, id, 0, 0);
+}
+
+static void allow_command(struct kvm_vm *vm, __s32 id)
+{
+   set_command_perm(vm, id, 1, 0);
+}
+
 static void hook_introspection(struct kvm_vm *vm)
 {
__u32 allow = 1, disallow = 0, allow_inval = 2;
@@ -258,14 +268,18 @@ static void cmd_vm_check_command(__u16 id, __u16 padding, 
int expected_err)
-r, kvm_strerror(-r), expected_err);
 }
 
-static void test_cmd_vm_check_command(void)
+static void test_cmd_vm_check_command(struct kvm_vm *vm)
 {
-   __u16 valid_id = KVMI_GET_VERSION, invalid_id = 0x;
+   __u16 valid_id = KVMI_VM_GET_INFO, invalid_id = 0x;
__u16 padding = 1, no_padding = 0;
 
cmd_vm_check_command(valid_id, no_padding, 0);
cmd_vm_check_command(valid_id, padding, -KVM_EINVAL);
cmd_vm_check_command(invalid_id, no_padding, -KVM_ENOENT);
+
+   disallow_command(vm, valid_id);
+   cmd_vm_check_command(valid_id, no_padding, -KVM_EPERM);
+   allow_command(vm, valid_id);
 }
 
 static void cmd_vm_check_event(__u16 id, __u16 padding, int expected_err)
@@ -295,6 +309,20 @@ static void test_cmd_vm_check_event(void)
cmd_vm_check_event(invalid_id, no_padding, -KVM_ENOENT);
 }
 
+static void test_cmd_vm_get_info(void)
+{
+   struct kvmi_vm_get_info_reply rpl;
+   struct kvmi_msg_hdr req;
+
+   test_vm_command(KVMI_VM_GET_INFO, , sizeof(req), ,
+   sizeof(rpl));
+   TEST_ASSERT(rpl.vcpu_count == 1,
+   "Unexpected number of vCPU count %u\n",
+   rpl.vcpu_count);
+
+   pr_info("vcpu count: %u\n", rpl.vcpu_count);
+}
+
 static void test_introspection(struct kvm_vm *vm)
 {
setup_socket();
@@ -302,8 +330,9 @@ static void test_introspection(struct kvm_vm *vm)
 
test_cmd_invalid();
test_cmd_get_version();
-   test_cmd_vm_check_command();
+   test_cmd_vm_check_command(vm);
test_cmd_vm_check_event();
+   test_cmd_vm_get_info();
 
unhook_introspection(vm);
 }
diff --git a/virt/kvm/introspection/kvmi_msg.c 
b/virt/kvm/introspection/kvmi_msg.c
index 86c356afc154..3df18f7965c0 100644
--- a/virt/kvm/introspection/kvmi_msg.c
+++ b/virt/kvm/introspection/kvmi_msg.c
@@ -150,6 +150,18 @@ static int handle_vm_check_event(struct kvm_introspection 
*kvmi,
return kvmi_msg_vm_reply(kvmi, msg, ec, NULL, 0);
 }
 
+static int handle_vm_get_info(struct kvm_introspection *kvmi,
+ const struct kvmi_msg_hdr *msg,
+ const void *req)
+{
+   struct kvmi_vm_get_info_reply rpl;
+
+   memset(, 0, siz

[PATCH v9 14/84] KVM: x86: add .control_desc_intercept()

2020-07-21 Thread Adalbert Lazăr
This function is needed to intercept descriptor-table registers access.

Signed-off-by: Adalbert Lazăr 
---
 arch/x86/include/asm/kvm_host.h |  1 +
 arch/x86/kvm/svm/svm.c  | 26 ++
 arch/x86/kvm/vmx/vmx.c  | 15 +--
 3 files changed, 40 insertions(+), 2 deletions(-)

diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index b3ca64a70bb5..83dfa0247130 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -1124,6 +1124,7 @@ struct kvm_x86_ops {
void (*get_gdt)(struct kvm_vcpu *vcpu, struct desc_ptr *dt);
void (*set_gdt)(struct kvm_vcpu *vcpu, struct desc_ptr *dt);
bool (*desc_ctrl_supported)(void);
+   void (*control_desc_intercept)(struct kvm_vcpu *vcpu, bool enable);
void (*sync_dirty_debug_regs)(struct kvm_vcpu *vcpu);
void (*set_dr7)(struct kvm_vcpu *vcpu, unsigned long value);
void (*cache_reg)(struct kvm_vcpu *vcpu, enum kvm_reg reg);
diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c
index b540af04b384..c70c14461483 100644
--- a/arch/x86/kvm/svm/svm.c
+++ b/arch/x86/kvm/svm/svm.c
@@ -1528,6 +1528,31 @@ static bool svm_desc_ctrl_supported(void)
return true;
 }
 
+static void svm_control_desc_intercept(struct kvm_vcpu *vcpu, bool enable)
+{
+   struct vcpu_svm *svm = to_svm(vcpu);
+
+   if (enable) {
+   set_intercept(svm, INTERCEPT_STORE_IDTR);
+   set_intercept(svm, INTERCEPT_STORE_GDTR);
+   set_intercept(svm, INTERCEPT_STORE_LDTR);
+   set_intercept(svm, INTERCEPT_STORE_TR);
+   set_intercept(svm, INTERCEPT_LOAD_IDTR);
+   set_intercept(svm, INTERCEPT_LOAD_GDTR);
+   set_intercept(svm, INTERCEPT_LOAD_LDTR);
+   set_intercept(svm, INTERCEPT_LOAD_TR);
+   } else {
+   clr_intercept(svm, INTERCEPT_STORE_IDTR);
+   clr_intercept(svm, INTERCEPT_STORE_GDTR);
+   clr_intercept(svm, INTERCEPT_STORE_LDTR);
+   clr_intercept(svm, INTERCEPT_STORE_TR);
+   clr_intercept(svm, INTERCEPT_LOAD_IDTR);
+   clr_intercept(svm, INTERCEPT_LOAD_GDTR);
+   clr_intercept(svm, INTERCEPT_LOAD_LDTR);
+   clr_intercept(svm, INTERCEPT_LOAD_TR);
+   }
+}
+
 static void update_cr0_intercept(struct vcpu_svm *svm)
 {
ulong gcr0 = svm->vcpu.arch.cr0;
@@ -4056,6 +4081,7 @@ static struct kvm_x86_ops svm_x86_ops __initdata = {
.get_gdt = svm_get_gdt,
.set_gdt = svm_set_gdt,
.desc_ctrl_supported = svm_desc_ctrl_supported,
+   .control_desc_intercept = svm_control_desc_intercept,
.set_dr7 = svm_set_dr7,
.sync_dirty_debug_regs = svm_sync_dirty_debug_regs,
.cache_reg = svm_cache_reg,
diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
index ecd4c50bf1a2..199ffd318145 100644
--- a/arch/x86/kvm/vmx/vmx.c
+++ b/arch/x86/kvm/vmx/vmx.c
@@ -3151,6 +3151,16 @@ void vmx_load_mmu_pgd(struct kvm_vcpu *vcpu, unsigned 
long pgd)
vmcs_writel(GUEST_CR3, guest_cr3);
 }
 
+static void vmx_control_desc_intercept(struct kvm_vcpu *vcpu, bool enable)
+{
+   struct vcpu_vmx *vmx = to_vmx(vcpu);
+
+   if (enable)
+   secondary_exec_controls_setbit(vmx, SECONDARY_EXEC_DESC);
+   else
+   secondary_exec_controls_clearbit(vmx, SECONDARY_EXEC_DESC);
+}
+
 int vmx_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4)
 {
struct vcpu_vmx *vmx = to_vmx(vcpu);
@@ -3171,11 +3181,11 @@ int vmx_set_cr4(struct kvm_vcpu *vcpu, unsigned long 
cr4)
 
if (!boot_cpu_has(X86_FEATURE_UMIP) && vmx_umip_emulated()) {
if (cr4 & X86_CR4_UMIP) {
-   secondary_exec_controls_setbit(vmx, 
SECONDARY_EXEC_DESC);
+   vmx_control_desc_intercept(vcpu, true);
hw_cr4 &= ~X86_CR4_UMIP;
} else if (!is_guest_mode(vcpu) ||
!nested_cpu_has2(get_vmcs12(vcpu), 
SECONDARY_EXEC_DESC)) {
-   secondary_exec_controls_clearbit(vmx, 
SECONDARY_EXEC_DESC);
+   vmx_control_desc_intercept(vcpu, false);
}
}
 
@@ -7904,6 +7914,7 @@ static struct kvm_x86_ops vmx_x86_ops __initdata = {
.get_gdt = vmx_get_gdt,
.set_gdt = vmx_set_gdt,
.desc_ctrl_supported = vmx_desc_ctrl_supported,
+   .control_desc_intercept = vmx_control_desc_intercept,
.set_dr7 = vmx_set_dr7,
.sync_dirty_debug_regs = vmx_sync_dirty_debug_regs,
.cache_reg = vmx_cache_reg,
___
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization

[PATCH v9 46/84] KVM: introspection: add KVMI_VM_READ_PHYSICAL/KVMI_VM_WRITE_PHYSICAL

2020-07-21 Thread Adalbert Lazăr
From: Mihai Donțu 

These commands allow the introspection tool to read/write from/to
the guest memory.

Signed-off-by: Mihai Donțu 
Co-developed-by: Adalbert Lazăr 
Signed-off-by: Adalbert Lazăr 
---
 Documentation/virt/kvm/kvmi.rst   |  68 +++
 include/uapi/linux/kvmi.h |  17 ++
 .../testing/selftests/kvm/x86_64/kvmi_test.c  | 170 ++
 virt/kvm/introspection/kvmi.c | 108 +++
 virt/kvm/introspection/kvmi_int.h |   7 +
 virt/kvm/introspection/kvmi_msg.c |  44 +
 6 files changed, 414 insertions(+)

diff --git a/Documentation/virt/kvm/kvmi.rst b/Documentation/virt/kvm/kvmi.rst
index 4ec0046b4138..be5a92e20173 100644
--- a/Documentation/virt/kvm/kvmi.rst
+++ b/Documentation/virt/kvm/kvmi.rst
@@ -375,6 +375,74 @@ the following events::
 * -KVM_EINVAL - the event ID is unknown (use *KVMI_VM_CHECK_EVENT* first)
 * -KVM_EPERM - the access is disallowed (use *KVMI_VM_CHECK_EVENT* first)
 
+6. KVMI_VM_READ_PHYSICAL
+
+
+:Architectures: all
+:Versions: >= 1
+:Parameters:
+
+::
+
+   struct kvmi_vm_read_physical {
+   __u64 gpa;
+   __u16 size;
+   __u16 padding1;
+   __u32 padding2;
+   };
+
+:Returns:
+
+::
+
+   struct kvmi_error_code;
+   __u8 data[0];
+
+Reads from the guest memory.
+
+Currently, the size must be non-zero and the read must be restricted to
+one page (offset + size <= PAGE_SIZE).
+
+:Errors:
+
+* -KVM_ENOENT - the guest page doesn't exists
+* -KVM_EINVAL - the specified gpa/size pair is invalid
+* -KVM_EINVAL - the padding is not zero
+
+7. KVMI_VM_WRITE_PHYSICAL
+-
+
+:Architectures: all
+:Versions: >= 1
+:Parameters:
+
+::
+
+   struct kvmi_vm_write_physical {
+   __u64 gpa;
+   __u16 size;
+   __u16 padding1;
+   __u32 padding2;
+   __u8  data[0];
+   };
+
+:Returns:
+
+::
+
+   struct kvmi_error_code
+
+Writes into the guest memory.
+
+Currently, the size must be non-zero and the write must be restricted to
+one page (offset + size <= PAGE_SIZE).
+
+:Errors:
+
+* -KVM_ENOENT - the guest page doesn't exists
+* -KVM_EINVAL - the specified gpa/size pair is invalid
+* -KVM_EINVAL - the padding is not zero
+
 Events
 ==
 
diff --git a/include/uapi/linux/kvmi.h b/include/uapi/linux/kvmi.h
index f9e2cb8a2c5e..9b2428963994 100644
--- a/include/uapi/linux/kvmi.h
+++ b/include/uapi/linux/kvmi.h
@@ -22,6 +22,8 @@ enum {
KVMI_VM_CHECK_EVENT= 3,
KVMI_VM_GET_INFO   = 4,
KVMI_VM_CONTROL_EVENTS = 5,
+   KVMI_VM_READ_PHYSICAL  = 6,
+   KVMI_VM_WRITE_PHYSICAL = 7,
 
KVMI_NUM_MESSAGES
 };
@@ -82,6 +84,21 @@ struct kvmi_vm_control_events {
__u32 padding2;
 };
 
+struct kvmi_vm_read_physical {
+   __u64 gpa;
+   __u16 size;
+   __u16 padding1;
+   __u32 padding2;
+};
+
+struct kvmi_vm_write_physical {
+   __u64 gpa;
+   __u16 size;
+   __u16 padding1;
+   __u32 padding2;
+   __u8  data[0];
+};
+
 struct kvmi_event {
__u16 size;
__u16 vcpu;
diff --git a/tools/testing/selftests/kvm/x86_64/kvmi_test.c 
b/tools/testing/selftests/kvm/x86_64/kvmi_test.c
index bb2daaca0291..97dec49d52b7 100644
--- a/tools/testing/selftests/kvm/x86_64/kvmi_test.c
+++ b/tools/testing/selftests/kvm/x86_64/kvmi_test.c
@@ -8,6 +8,7 @@
 #define _GNU_SOURCE /* for program_invocation_short_name */
 #include 
 #include 
+#include 
 
 #include "test_util.h"
 
@@ -24,6 +25,13 @@ static int socket_pair[2];
 #define Kvm_socket   socket_pair[0]
 #define Userspace_socket socket_pair[1]
 
+static vm_vaddr_t test_gva;
+static void *test_hva;
+static vm_paddr_t test_gpa;
+
+static uint8_t test_write_pattern;
+static int page_size;
+
 void setup_socket(void)
 {
int r;
@@ -434,8 +442,154 @@ static void test_cmd_vm_control_events(struct kvm_vm *vm)
allow_event(vm, id);
 }
 
+static void cmd_vm_write_page(__u64 gpa, __u64 size, void *p, __u16 padding,
+ int expected_err)
+{
+   struct kvmi_vm_write_physical *cmd;
+   struct kvmi_msg_hdr *req;
+   size_t req_size;
+   int r;
+
+   req_size = sizeof(*req) + sizeof(*cmd) + size;
+
+   req = calloc(1, req_size);
+   TEST_ASSERT(req, "Insufficient Memory\n");
+
+   cmd = (struct kvmi_vm_write_physical *)(req + 1);
+   cmd->gpa = gpa;
+   cmd->size = size;
+   cmd->padding1 = padding;
+   cmd->padding2 = padding;
+
+   memcpy(cmd + 1, p, size);
+
+   r = do_command(KVMI_VM_WRITE_PHYSICAL, req, req_size, NULL, 0);
+
+   free(req);
+
+   TEST_ASSERT(r == expected_err,
+   "KVMI_VM_WRITE_PHYSICAL failed, gpa 0x%llx, error %d (%s), 
expected error %d\n",
+   gpa, -r, kvm_strerror(-r), expected_err);
+}
+
+static void write_guest_pag

[PATCH v9 03/84] KVM: add kvm_vcpu_kick_and_wait()

2020-07-21 Thread Adalbert Lazăr
This function is needed for the KVMI_VCPU_PAUSE command, which sets the
introspection request flag, kicks the vCPU out of guest and returns a
success error code (0). The vCPU will send the KVMI_EVENT_PAUSE event
as soon as possible. Once the introspection tool receives the event, it
knows that the vCPU doesn't run guest code and can handle introspection
commands (until the reply for the pause event is sent).

To implement the "pause VM" command, the userspace code will send a
KVMI_VCPU_PAUSE command for every vCPU. To know when the VM is paused,
userspace has to receive and "parse" all events. For example, with a
4 vCPU VM, if the "pause VM" was sent by userspace while handling an event
from vCPU0 and at the same time a new vCPU was hot-plugged (which could
send another event for vCPU4), the "pause VM" command has to receive
and check all events until it gets the pause events for vCPU1, vCPU2
and vCPU3 before returning to the upper layer.

In order to make it easier for userspace to implement the "pause VM"
command, the KVMI_VCPU_PAUSE has an optional 'wait' parameter. If this is
set, kvm_vcpu_kick_and_wait() will be used instead of kvm_vcpu_kick().
And because this vCPU command (KVMI_VCPU_PAUSE) is handled by the
receiving thread (instead of the vCPU thread), once a string of
KVMI_VCPU_PAUSE commands with the 'wait' flag set is handled, the
introspection tool can consider the VM paused, without the need to wait
and check events.

Signed-off-by: Adalbert Lazăr 
---
 include/linux/kvm_host.h |  1 +
 virt/kvm/kvm_main.c  | 10 ++
 2 files changed, 11 insertions(+)

diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index 62ec926c78a0..92490279d65a 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -810,6 +810,7 @@ void kvm_arch_vcpu_blocking(struct kvm_vcpu *vcpu);
 void kvm_arch_vcpu_unblocking(struct kvm_vcpu *vcpu);
 bool kvm_vcpu_wake_up(struct kvm_vcpu *vcpu);
 void kvm_vcpu_kick(struct kvm_vcpu *vcpu);
+void kvm_vcpu_kick_and_wait(struct kvm_vcpu *vcpu);
 int kvm_vcpu_yield_to(struct kvm_vcpu *target);
 void kvm_vcpu_on_spin(struct kvm_vcpu *vcpu, bool usermode_vcpu_not_eligible);
 
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index 0a68c9d3d3ab..4d965913d347 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -2802,6 +2802,16 @@ void kvm_vcpu_kick(struct kvm_vcpu *vcpu)
 EXPORT_SYMBOL_GPL(kvm_vcpu_kick);
 #endif /* !CONFIG_S390 */
 
+void kvm_vcpu_kick_and_wait(struct kvm_vcpu *vcpu)
+{
+   if (kvm_vcpu_wake_up(vcpu))
+   return;
+
+   if (kvm_request_needs_ipi(vcpu, KVM_REQUEST_WAIT))
+   smp_call_function_single(vcpu->cpu, ack_flush, NULL, 1);
+}
+EXPORT_SYMBOL_GPL(kvm_vcpu_kick_and_wait);
+
 int kvm_vcpu_yield_to(struct kvm_vcpu *target)
 {
struct pid *pid;
___
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization

[PATCH v9 48/84] KVM: introspection: add a jobs list to every introspected vCPU

2020-07-21 Thread Adalbert Lazăr
Every vCPU has a lock-protected list in which the receiving thread
places the jobs that has to be done by the vCPU thread
once it is kicked out of guest (KVM_REQ_INTROSPECTION).

Co-developed-by: Nicușor Cîțu 
Signed-off-by: Nicușor Cîțu 
Signed-off-by: Adalbert Lazăr 
---
 include/linux/kvmi_host.h | 10 +
 virt/kvm/introspection/kvmi.c | 72 ++-
 virt/kvm/introspection/kvmi_int.h |  1 +
 3 files changed, 81 insertions(+), 2 deletions(-)

diff --git a/include/linux/kvmi_host.h b/include/linux/kvmi_host.h
index f96a9a3cfdd4..d3242a99f891 100644
--- a/include/linux/kvmi_host.h
+++ b/include/linux/kvmi_host.h
@@ -6,8 +6,18 @@
 
 #include 
 
+struct kvmi_job {
+   struct list_head link;
+   void *ctx;
+   void (*fct)(struct kvm_vcpu *vcpu, void *ctx);
+   void (*free_fct)(void *ctx);
+};
+
 struct kvm_vcpu_introspection {
struct kvm_vcpu_arch_introspection arch;
+
+   struct list_head job_list;
+   spinlock_t job_lock;
 };
 
 struct kvm_introspection {
diff --git a/virt/kvm/introspection/kvmi.c b/virt/kvm/introspection/kvmi.c
index a51e7342f837..b6595bca99f7 100644
--- a/virt/kvm/introspection/kvmi.c
+++ b/virt/kvm/introspection/kvmi.c
@@ -18,6 +18,7 @@ static DECLARE_BITMAP(Kvmi_known_vm_events, KVMI_NUM_EVENTS);
 static DECLARE_BITMAP(Kvmi_known_vcpu_events, KVMI_NUM_EVENTS);
 
 static struct kmem_cache *msg_cache;
+static struct kmem_cache *job_cache;
 
 void *kvmi_msg_alloc(void)
 {
@@ -34,14 +35,19 @@ static void kvmi_cache_destroy(void)
 {
kmem_cache_destroy(msg_cache);
msg_cache = NULL;
+   kmem_cache_destroy(job_cache);
+   job_cache = NULL;
 }
 
 static int kvmi_cache_create(void)
 {
msg_cache = kmem_cache_create("kvmi_msg", KVMI_MSG_SIZE_ALLOC,
  4096, SLAB_ACCOUNT, NULL);
+   job_cache = kmem_cache_create("kvmi_job",
+ sizeof(struct kvmi_job),
+ 0, SLAB_ACCOUNT, NULL);
 
-   if (!msg_cache) {
+   if (!msg_cache || !job_cache) {
kvmi_cache_destroy();
 
return -1;
@@ -107,6 +113,48 @@ void kvmi_uninit(void)
kvmi_cache_destroy();
 }
 
+static int __kvmi_add_job(struct kvm_vcpu *vcpu,
+ void (*fct)(struct kvm_vcpu *vcpu, void *ctx),
+ void *ctx, void (*free_fct)(void *ctx))
+{
+   struct kvm_vcpu_introspection *vcpui = VCPUI(vcpu);
+   struct kvmi_job *job;
+
+   job = kmem_cache_zalloc(job_cache, GFP_KERNEL);
+   if (unlikely(!job))
+   return -ENOMEM;
+
+   INIT_LIST_HEAD(>link);
+   job->fct = fct;
+   job->ctx = ctx;
+   job->free_fct = free_fct;
+
+   spin_lock(>job_lock);
+   list_add_tail(>link, >job_list);
+   spin_unlock(>job_lock);
+
+   return 0;
+}
+
+int kvmi_add_job(struct kvm_vcpu *vcpu,
+void (*fct)(struct kvm_vcpu *vcpu, void *ctx),
+void *ctx, void (*free_fct)(void *ctx))
+{
+   int err;
+
+   err = __kvmi_add_job(vcpu, fct, ctx, free_fct);
+
+   return err;
+}
+
+static void kvmi_free_job(struct kvmi_job *job)
+{
+   if (job->free_fct)
+   job->free_fct(job->ctx);
+
+   kmem_cache_free(job_cache, job);
+}
+
 static bool alloc_vcpui(struct kvm_vcpu *vcpu)
 {
struct kvm_vcpu_introspection *vcpui;
@@ -115,6 +163,9 @@ static bool alloc_vcpui(struct kvm_vcpu *vcpu)
if (!vcpui)
return false;
 
+   INIT_LIST_HEAD(>job_list);
+   spin_lock_init(>job_lock);
+
vcpu->kvmi = vcpui;
 
return true;
@@ -128,9 +179,26 @@ static int create_vcpui(struct kvm_vcpu *vcpu)
return 0;
 }
 
+static void free_vcpu_jobs(struct kvm_vcpu_introspection *vcpui)
+{
+   struct kvmi_job *cur, *next;
+
+   list_for_each_entry_safe(cur, next, >job_list, link) {
+   list_del(>link);
+   kvmi_free_job(cur);
+   }
+}
+
 static void free_vcpui(struct kvm_vcpu *vcpu)
 {
-   kfree(vcpu->kvmi);
+   struct kvm_vcpu_introspection *vcpui = VCPUI(vcpu);
+
+   if (!vcpui)
+   return;
+
+   free_vcpu_jobs(vcpui);
+
+   kfree(vcpui);
vcpu->kvmi = NULL;
 }
 
diff --git a/virt/kvm/introspection/kvmi_int.h 
b/virt/kvm/introspection/kvmi_int.h
index 40e8647a6fd4..ceed50722dc1 100644
--- a/virt/kvm/introspection/kvmi_int.h
+++ b/virt/kvm/introspection/kvmi_int.h
@@ -19,6 +19,7 @@
kvm_info("%pU ERROR: " fmt, >uuid, ## __VA_ARGS__)
 
 #define KVMI(kvm) ((kvm)->kvmi)
+#define VCPUI(vcpu) ((vcpu)->kvmi)
 
 /* kvmi_msg.c */
 bool kvmi_sock_get(struct kvm_introspection *kvmi, int fd);
___
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization

[PATCH v9 59/84] KVM: introspection: add KVMI_EVENT_HYPERCALL

2020-07-21 Thread Adalbert Lazăr
From: Mihai Donțu 

This event is sent on a specific hypercall.

It is used by the code residing inside the introspected guest to call the
introspection tool and to report certain details about its operation.
For example, a classic antimalware remediation tool can report
what it has found during a scan.

Signed-off-by: Mihai Donțu 
Co-developed-by: Adalbert Lazăr 
Signed-off-by: Adalbert Lazăr 
---
 Documentation/virt/kvm/hypercalls.rst | 35 
 Documentation/virt/kvm/kvmi.rst   | 36 +++-
 arch/x86/include/uapi/asm/kvmi.h  |  2 +
 arch/x86/kvm/kvmi.c   | 32 ++
 arch/x86/kvm/x86.c| 18 ++--
 include/linux/kvmi_host.h |  2 +
 include/uapi/linux/kvm_para.h |  1 +
 include/uapi/linux/kvmi.h |  1 +
 .../testing/selftests/kvm/x86_64/kvmi_test.c  | 42 +++
 virt/kvm/introspection/kvmi.c | 23 ++
 virt/kvm/introspection/kvmi_int.h |  9 
 virt/kvm/introspection/kvmi_msg.c | 12 ++
 12 files changed, 208 insertions(+), 5 deletions(-)

diff --git a/Documentation/virt/kvm/hypercalls.rst 
b/Documentation/virt/kvm/hypercalls.rst
index 70e77c66b64c..abfbff96b9e3 100644
--- a/Documentation/virt/kvm/hypercalls.rst
+++ b/Documentation/virt/kvm/hypercalls.rst
@@ -169,3 +169,38 @@ a0: destination APIC ID
 
 :Usage example: When sending a call-function IPI-many to vCPUs, yield if
any of the IPI target vCPUs was preempted.
+
+9. KVM_HC_XEN_HVM_OP
+
+
+:Architecture: x86
+:Status: active
+:Purpose: To enable communication between a guest agent and a VMI application
+
+Usage:
+
+An event will be sent to the VMI application (see kvmi.rst) if the following
+registers, which differ between 32bit and 64bit, have the following values:
+
+   = =
+   32bit   64bit value
+   = =
+   ebx (a0)rdi   KVM_HC_XEN_HVM_OP_GUEST_REQUEST_VM_EVENT
+   ecx (a1)rsi   0
+   = =
+
+This specification copies Xen's { __HYPERVISOR_hvm_op,
+HVMOP_guest_request_vm_event } hypercall and can originate from kernel or
+userspace.
+
+It returns 0 if successful, or a negative POSIX.1 error code if it fails. The
+absence of an active VMI application is not signaled in any way.
+
+The following registers are clobbered:
+
+  * 32bit: edx, esi, edi, ebp
+  * 64bit: rdx, r10, r8, r9
+
+In particular, for KVM_HC_XEN_HVM_OP_GUEST_REQUEST_VM_EVENT, the last two
+registers can be poisoned deliberately and cannot be used for passing
+information.
diff --git a/Documentation/virt/kvm/kvmi.rst b/Documentation/virt/kvm/kvmi.rst
index fc2e8c756191..d062f2ccf365 100644
--- a/Documentation/virt/kvm/kvmi.rst
+++ b/Documentation/virt/kvm/kvmi.rst
@@ -546,7 +546,10 @@ command) before returning to guest.
 
struct kvmi_error_code
 
-Enables/disables vCPU introspection events.
+Enables/disables vCPU introspection events. This command can be used with
+the following events::
+
+   KVMI_EVENT_HYPERCALL
 
 When an event is enabled, the introspection tool is notified and
 must reply with: continue, retry, crash, etc. (see **Events** below).
@@ -786,3 +789,34 @@ cannot be controlled with *KVMI_VCPU_CONTROL_EVENTS*.
 Because it has a low priority, it will be sent after any other vCPU
 introspection event and when no other vCPU introspection command is
 queued.
+
+3. KVMI_EVENT_HYPERCALL
+---
+
+:Architectures: x86
+:Versions: >= 1
+:Actions: CONTINUE, CRASH
+:Parameters:
+
+::
+
+   struct kvmi_event;
+
+:Returns:
+
+::
+
+   struct kvmi_vcpu_hdr;
+   struct kvmi_event_reply;
+
+This event is sent on a specific user hypercall when the introspection has
+been enabled for this event (see *KVMI_VCPU_CONTROL_EVENTS*).
+
+The hypercall number must be ``KVM_HC_XEN_HVM_OP`` with the
+``KVM_HC_XEN_HVM_OP_GUEST_REQUEST_VM_EVENT`` sub-function
+(see hypercalls.rst).
+
+It is used by the code residing inside the introspected guest to call the
+introspection tool and to report certain details about its operation. For
+example, a classic antimalware remediation tool can report what it has
+found during a scan.
diff --git a/arch/x86/include/uapi/asm/kvmi.h b/arch/x86/include/uapi/asm/kvmi.h
index 57c48ace417f..9882e68cab75 100644
--- a/arch/x86/include/uapi/asm/kvmi.h
+++ b/arch/x86/include/uapi/asm/kvmi.h
@@ -8,6 +8,8 @@
 
 #include 
 
+#define KVM_HC_XEN_HVM_OP_GUEST_REQUEST_VM_EVENT 24
+
 struct kvmi_event_arch {
__u8 mode;  /* 2, 4 or 8 */
__u8 padding[7];
diff --git a/arch/x86/kvm/kvmi.c b/arch/x86/kvm/kvmi.c
index 53c4a37e10c6..45f1a45d5c0f 100644
--- a/arch/x86/kvm/kvmi.c
+++ b/arch/x86/kvm/kvmi.c
@@ -210,3 +210,35 @@ int kvmi_arch_cmd_vcpu_get_cpuid(struct kvm_vcpu *vcpu,
 
return 0;
 }
+
+b

[PATCH v9 61/84] KVM: introspection: add cleanup support for vCPUs

2020-07-21 Thread Adalbert Lazăr
From: Nicușor Cîțu 

On unhook the introspection channel is closed. This will signal the
receiving thread to call kvmi_put() and exit. There might be vCPU threads
handling introspection commands or waiting for event replies. These will
also call kvmi_put() and re-enter in guest. Once the reference counter
reaches zero, the structures keeping the introspection data will be freed.

In order to restore the interception of CRs, MSRs, BP, descriptor-table
registers, from all vCPUs (some of which might run from userspace),
we keep the needed information in another structure (kvmi_interception)
which will be used and freed by each of them before re-entering in guest.

Signed-off-by: Nicușor Cîțu 
Signed-off-by: Adalbert Lazăr 
---
 arch/x86/include/asm/kvm_host.h   |  3 ++
 arch/x86/include/asm/kvmi_host.h  |  4 +++
 arch/x86/kvm/kvmi.c   | 49 +++
 virt/kvm/introspection/kvmi.c | 32 ++--
 virt/kvm/introspection/kvmi_int.h |  5 
 5 files changed, 90 insertions(+), 3 deletions(-)

diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 8a119fb7c623..acfcebce51dd 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -840,6 +840,9 @@ struct kvm_vcpu_arch {
 
/* #PF translated error code from EPT/NPT exit reason */
u64 error_code;
+
+   /* Control the interception for KVM Introspection */
+   struct kvmi_interception *kvmi;
 };
 
 struct kvm_lpage_info {
diff --git a/arch/x86/include/asm/kvmi_host.h b/arch/x86/include/asm/kvmi_host.h
index 05ade3a16b24..6d274f173fb5 100644
--- a/arch/x86/include/asm/kvmi_host.h
+++ b/arch/x86/include/asm/kvmi_host.h
@@ -4,6 +4,10 @@
 
 #include 
 
+struct kvmi_interception {
+   bool restore_interception;
+};
+
 struct kvm_vcpu_arch_introspection {
 };
 
diff --git a/arch/x86/kvm/kvmi.c b/arch/x86/kvm/kvmi.c
index f13272350bc9..ca2ce7498cfe 100644
--- a/arch/x86/kvm/kvmi.c
+++ b/arch/x86/kvm/kvmi.c
@@ -290,3 +290,52 @@ void kvmi_arch_breakpoint_event(struct kvm_vcpu *vcpu, u64 
gva, u8 insn_len)
kvmi_handle_common_event_actions(vcpu->kvm, action);
}
 }
+
+static void kvmi_arch_restore_interception(struct kvm_vcpu *vcpu)
+{
+}
+
+bool kvmi_arch_clean_up_interception(struct kvm_vcpu *vcpu)
+{
+   struct kvmi_interception *arch_vcpui = vcpu->arch.kvmi;
+
+   if (!arch_vcpui)
+   return false;
+
+   if (!arch_vcpui->restore_interception)
+   return false;
+
+   kvmi_arch_restore_interception(vcpu);
+
+   return true;
+}
+
+bool kvmi_arch_vcpu_alloc_interception(struct kvm_vcpu *vcpu)
+{
+   struct kvmi_interception *arch_vcpui;
+
+   arch_vcpui = kzalloc(sizeof(*arch_vcpui), GFP_KERNEL);
+   if (!arch_vcpui)
+   return false;
+
+   return true;
+}
+
+void kvmi_arch_vcpu_free_interception(struct kvm_vcpu *vcpu)
+{
+   kfree(vcpu->arch.kvmi);
+   WRITE_ONCE(vcpu->arch.kvmi, NULL);
+}
+
+bool kvmi_arch_vcpu_introspected(struct kvm_vcpu *vcpu)
+{
+   return !!READ_ONCE(vcpu->arch.kvmi);
+}
+
+void kvmi_arch_request_interception_cleanup(struct kvm_vcpu *vcpu)
+{
+   struct kvmi_interception *arch_vcpui = READ_ONCE(vcpu->arch.kvmi);
+
+   if (arch_vcpui)
+   arch_vcpui->restore_interception = true;
+}
diff --git a/virt/kvm/introspection/kvmi.c b/virt/kvm/introspection/kvmi.c
index a5264696c630..083dd8be9252 100644
--- a/virt/kvm/introspection/kvmi.c
+++ b/virt/kvm/introspection/kvmi.c
@@ -197,7 +197,7 @@ static bool alloc_vcpui(struct kvm_vcpu *vcpu)
 
vcpu->kvmi = vcpui;
 
-   return true;
+   return kvmi_arch_vcpu_alloc_interception(vcpu);
 }
 
 static int create_vcpui(struct kvm_vcpu *vcpu)
@@ -231,6 +231,9 @@ static void free_vcpui(struct kvm_vcpu *vcpu)
 
kfree(vcpui);
vcpu->kvmi = NULL;
+
+   kvmi_arch_request_interception_cleanup(vcpu);
+   kvmi_make_request(vcpu, false);
 }
 
 static void free_kvmi(struct kvm *kvm)
@@ -253,6 +256,7 @@ void kvmi_vcpu_uninit(struct kvm_vcpu *vcpu)
 {
mutex_lock(>kvm->kvmi_lock);
free_vcpui(vcpu);
+   kvmi_arch_vcpu_free_interception(vcpu);
mutex_unlock(>kvm->kvmi_lock);
 }
 
@@ -404,6 +408,21 @@ static int kvmi_recv_thread(void *arg)
return 0;
 }
 
+static bool ready_to_hook(struct kvm *kvm)
+{
+   struct kvm_vcpu *vcpu;
+   int i;
+
+   if (kvm->kvmi)
+   return false;
+
+   kvm_for_each_vcpu(i, vcpu, kvm)
+   if (kvmi_arch_vcpu_introspected(vcpu))
+   return false;
+
+   return true;
+}
+
 int kvmi_hook(struct kvm *kvm, const struct kvm_introspection_hook *hook)
 {
struct kvm_introspection *kvmi;
@@ -411,7 +430,7 @@ int kvmi_hook(struct kvm *kvm, const struct 
kvm_introspection_hook *hook)
 
mutex_lock(>kvmi_lock);
 
-   if (kvm->kvmi) {
+   if (!

[PATCH v9 22/84] KVM: x86: save the error code during EPT/NPF exits handling

2020-07-21 Thread Adalbert Lazăr
From: Mihai Donțu 

This is needed for kvm_page_track_emulation_failure().

When the introspection tool {read,write,exec}-protect a guest memory
page, it is notified from the read/write/fetch callbacks used by
the KVM emulator. If the emulation fails it is possible that the
read/write callbacks were not used. In such cases, the emulator will
call kvm_page_track_emulation_failure() to ensure that the introspection
tool is notified of the read/write #PF (based on this saved error code),
which in turn can emulate the instruction or unprotect the memory page
(and let the guest execute the instruction).

Signed-off-by: Mihai Donțu 
Signed-off-by: Adalbert Lazăr 
---
 arch/x86/include/asm/kvm_host.h | 3 +++
 arch/x86/kvm/svm/svm.c  | 2 ++
 arch/x86/kvm/vmx/vmx.c  | 1 +
 3 files changed, 6 insertions(+)

diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index f04a01dac423..2530af4420cf 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -837,6 +837,9 @@ struct kvm_vcpu_arch {
 
/* AMD MSRC001_0015 Hardware Configuration */
u64 msr_hwcr;
+
+   /* #PF translated error code from EPT/NPT exit reason */
+   u64 error_code;
 };
 
 struct kvm_lpage_info {
diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c
index 9c8e77193f98..1ec88ff241ab 100644
--- a/arch/x86/kvm/svm/svm.c
+++ b/arch/x86/kvm/svm/svm.c
@@ -1799,6 +1799,8 @@ static int npf_interception(struct vcpu_svm *svm)
u64 fault_address = __sme_clr(svm->vmcb->control.exit_info_2);
u64 error_code = svm->vmcb->control.exit_info_1;
 
+   svm->vcpu.arch.error_code = error_code;
+
trace_kvm_page_fault(fault_address, error_code);
return kvm_mmu_page_fault(>vcpu, fault_address, error_code,
static_cpu_has(X86_FEATURE_DECODEASSISTS) ?
diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
index cd498ece8b52..6554c2278176 100644
--- a/arch/x86/kvm/vmx/vmx.c
+++ b/arch/x86/kvm/vmx/vmx.c
@@ -5365,6 +5365,7 @@ static int handle_ept_violation(struct kvm_vcpu *vcpu)
  ? PFERR_GUEST_FINAL_MASK : PFERR_GUEST_PAGE_MASK;
 
vcpu->arch.exit_qualification = exit_qualification;
+   vcpu->arch.error_code = error_code;
return kvm_mmu_page_fault(vcpu, gpa, error_code, NULL, 0);
 }
 
___
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization

[PATCH v9 21/84] KVM: x86: vmx: use a symbolic constant when checking the exit qualifications

2020-07-21 Thread Adalbert Lazăr
From: Mihai Donțu 

This should make the code more readable.

Signed-off-by: Mihai Donțu 
Signed-off-by: Adalbert Lazăr 
---
 arch/x86/kvm/vmx/vmx.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
index fed661eb65a7..cd498ece8b52 100644
--- a/arch/x86/kvm/vmx/vmx.c
+++ b/arch/x86/kvm/vmx/vmx.c
@@ -5361,8 +5361,8 @@ static int handle_ept_violation(struct kvm_vcpu *vcpu)
EPT_VIOLATION_EXECUTABLE))
  ? PFERR_PRESENT_MASK : 0;
 
-   error_code |= (exit_qualification & 0x100) != 0 ?
-  PFERR_GUEST_FINAL_MASK : PFERR_GUEST_PAGE_MASK;
+   error_code |= (exit_qualification & EPT_VIOLATION_GVA_TRANSLATED)
+ ? PFERR_GUEST_FINAL_MASK : PFERR_GUEST_PAGE_MASK;
 
vcpu->arch.exit_qualification = exit_qualification;
return kvm_mmu_page_fault(vcpu, gpa, error_code, NULL, 0);
___
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization

[PATCH v9 01/84] signal: export kill_pid_info()

2020-07-21 Thread Adalbert Lazăr
From: Mathieu Tarral 

This function is used by VM introspection code to ungracefully shutdown
a guest at the request of the introspection tool.

A security application will use this as the last resort to stop the
spread of a malware from a guest.

Signed-off-by: Mathieu Tarral 
Signed-off-by: Adalbert Lazăr 
---
 kernel/signal.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/kernel/signal.c b/kernel/signal.c
index 5ca48cc5da76..c3af81d7b62a 100644
--- a/kernel/signal.c
+++ b/kernel/signal.c
@@ -1456,6 +1456,7 @@ int kill_pid_info(int sig, struct kernel_siginfo *info, 
struct pid *pid)
 */
}
 }
+EXPORT_SYMBOL(kill_pid_info);
 
 static int kill_proc_info(int sig, struct kernel_siginfo *info, pid_t pid)
 {
___
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization

[PATCH v9 65/84] KVM: introspection: restore the state of CR3 interception on unhook

2020-07-21 Thread Adalbert Lazăr
From: Nicușor Cîțu 

This commit also ensures that the introspection tool and the userspace
do not disable each other the CR3-write VM-exit.

Signed-off-by: Nicușor Cîțu 
Signed-off-by: Adalbert Lazăr 
---
 arch/x86/include/asm/kvmi_host.h |  4 ++
 arch/x86/kvm/kvmi.c  | 64 ++--
 arch/x86/kvm/svm/svm.c   |  5 +++
 arch/x86/kvm/vmx/vmx.c   |  5 +++
 4 files changed, 75 insertions(+), 3 deletions(-)

diff --git a/arch/x86/include/asm/kvmi_host.h b/arch/x86/include/asm/kvmi_host.h
index 1aff91ef8475..44580f77e34e 100644
--- a/arch/x86/include/asm/kvmi_host.h
+++ b/arch/x86/include/asm/kvmi_host.h
@@ -16,6 +16,7 @@ struct kvmi_interception {
bool cleanup;
bool restore_interception;
struct kvmi_monitor_interception breakpoint;
+   struct kvmi_monitor_interception cr3w;
 };
 
 struct kvm_vcpu_arch_introspection {
@@ -31,6 +32,7 @@ bool kvmi_monitor_bp_intercept(struct kvm_vcpu *vcpu, u32 
dbg);
 bool kvmi_cr_event(struct kvm_vcpu *vcpu, unsigned int cr,
   unsigned long old_value, unsigned long *new_value);
 bool kvmi_cr3_intercepted(struct kvm_vcpu *vcpu);
+bool kvmi_monitor_cr3w_intercept(struct kvm_vcpu *vcpu, bool enable);
 
 #else /* CONFIG_KVM_INTROSPECTION */
 
@@ -41,6 +43,8 @@ static inline bool kvmi_cr_event(struct kvm_vcpu *vcpu, 
unsigned int cr,
 unsigned long *new_value)
{ return true; }
 static inline bool kvmi_cr3_intercepted(struct kvm_vcpu *vcpu) { return false; 
}
+static inline bool kvmi_monitor_cr3w_intercept(struct kvm_vcpu *vcpu,
+   bool enable) { return false; }
 
 #endif /* CONFIG_KVM_INTROSPECTION */
 
diff --git a/arch/x86/kvm/kvmi.c b/arch/x86/kvm/kvmi.c
index e72b2ef5b28a..e340a2c3500f 100644
--- a/arch/x86/kvm/kvmi.c
+++ b/arch/x86/kvm/kvmi.c
@@ -308,6 +308,59 @@ static void kvmi_arch_disable_bp_intercept(struct kvm_vcpu 
*vcpu)
vcpu->arch.kvmi->breakpoint.kvm_intercepted = false;
 }
 
+static bool monitor_cr3w_fct_kvmi(struct kvm_vcpu *vcpu, bool enable)
+{
+   vcpu->arch.kvmi->cr3w.kvmi_intercepted = enable;
+
+   if (enable)
+   vcpu->arch.kvmi->cr3w.kvm_intercepted =
+   kvm_x86_ops.cr3_write_intercepted(vcpu);
+   else if (vcpu->arch.kvmi->cr3w.kvm_intercepted)
+   return true;
+
+   return false;
+}
+
+static bool monitor_cr3w_fct_kvm(struct kvm_vcpu *vcpu, bool enable)
+{
+   if (!vcpu->arch.kvmi->cr3w.kvmi_intercepted)
+   return false;
+
+   vcpu->arch.kvmi->cr3w.kvm_intercepted = enable;
+
+   if (!enable)
+   return true;
+
+   return false;
+}
+
+/*
+ * Returns true if one side (kvm or kvmi) tries to disable the CR3 write
+ * interception while the other side is still tracking it.
+ */
+bool kvmi_monitor_cr3w_intercept(struct kvm_vcpu *vcpu, bool enable)
+{
+   struct kvmi_interception *arch_vcpui = READ_ONCE(vcpu->arch.kvmi);
+
+   return (arch_vcpui && arch_vcpui->cr3w.monitor_fct(vcpu, enable));
+}
+EXPORT_SYMBOL(kvmi_monitor_cr3w_intercept);
+
+static void kvmi_control_cr3w_intercept(struct kvm_vcpu *vcpu, bool enable)
+{
+   vcpu->arch.kvmi->cr3w.monitor_fct = monitor_cr3w_fct_kvmi;
+   kvm_x86_ops.control_cr3_intercept(vcpu, CR_TYPE_W, enable);
+   vcpu->arch.kvmi->cr3w.monitor_fct = monitor_cr3w_fct_kvm;
+}
+
+static void kvmi_arch_disable_cr3w_intercept(struct kvm_vcpu *vcpu)
+{
+   kvmi_control_cr3w_intercept(vcpu, false);
+
+   vcpu->arch.kvmi->cr3w.kvmi_intercepted = false;
+   vcpu->arch.kvmi->cr3w.kvm_intercepted = false;
+}
+
 int kvmi_arch_cmd_control_intercept(struct kvm_vcpu *vcpu,
unsigned int event_id, bool enable)
 {
@@ -347,6 +400,7 @@ void kvmi_arch_breakpoint_event(struct kvm_vcpu *vcpu, u64 
gva, u8 insn_len)
 static void kvmi_arch_restore_interception(struct kvm_vcpu *vcpu)
 {
kvmi_arch_disable_bp_intercept(vcpu);
+   kvmi_arch_disable_cr3w_intercept(vcpu);
 }
 
 bool kvmi_arch_clean_up_interception(struct kvm_vcpu *vcpu)
@@ -371,8 +425,13 @@ bool kvmi_arch_vcpu_alloc_interception(struct kvm_vcpu 
*vcpu)
return false;
 
arch_vcpui->breakpoint.monitor_fct = monitor_bp_fct_kvm;
+   arch_vcpui->cr3w.monitor_fct = monitor_cr3w_fct_kvm;
 
-   /* pair with kvmi_monitor_bp_intercept() */
+   /*
+* paired with:
+*  - kvmi_monitor_bp_intercept()
+*  - kvmi_monitor_cr3w_intercept()
+*/
smp_wmb();
WRITE_ONCE(vcpu->arch.kvmi, arch_vcpui);
 
@@ -413,8 +472,7 @@ int kvmi_arch_cmd_vcpu_control_cr(struct kvm_vcpu *vcpu,
case 0:
break;
case 3:
-   kvm_x86_ops.control_cr3_intercept(vcpu, CR_TYPE_W,
- req->enable == 1);
+   

[PATCH v9 02/84] KVM: UAPI: add error codes used by the VM introspection code

2020-07-21 Thread Adalbert Lazăr
These new error codes help the introspection tool to identify the cause
of the introspection command failure and to recover from some error
cases or to give more information to the user.

Signed-off-by: Adalbert Lazăr 
---
 include/uapi/linux/kvm_para.h | 4 
 1 file changed, 4 insertions(+)

diff --git a/include/uapi/linux/kvm_para.h b/include/uapi/linux/kvm_para.h
index 8b86609849b9..3ce388249682 100644
--- a/include/uapi/linux/kvm_para.h
+++ b/include/uapi/linux/kvm_para.h
@@ -17,6 +17,10 @@
 #define KVM_E2BIG  E2BIG
 #define KVM_EPERM  EPERM
 #define KVM_EOPNOTSUPP 95
+#define KVM_EAGAIN 11
+#define KVM_ENOENT ENOENT
+#define KVM_ENOMEM ENOMEM
+#define KVM_EBUSY  EBUSY
 
 #define KVM_HC_VAPIC_POLL_IRQ  1
 #define KVM_HC_MMU_OP  2
___
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization

[PATCH v9 63/84] KVM: introspection: add KVMI_VM_CONTROL_CLEANUP

2020-07-21 Thread Adalbert Lazăr
This command will allow more control over the guest state on
unhook.  However, the memory restrictions (e.g. those set with
KVMI_VM_SET_PAGE_ACCESS) will be removed on unhook.

Signed-off-by: Adalbert Lazăr 

--
It will be more interesting if the userspace could control the cleanup
behavior through the use of the KVM_INTROSPECTION_COMMAND ioctl. Now, by
disallowing this command, the userspace can only keep the default behavior
(to not automatically clean up).

Signed-off-by: Adalbert Lazăr 
---
 Documentation/virt/kvm/kvmi.rst   | 30 
 arch/x86/include/asm/kvmi_host.h  |  1 +
 arch/x86/kvm/kvmi.c   | 17 +-
 include/linux/kvmi_host.h |  2 ++
 include/uapi/linux/kvmi.h |  9 +
 .../testing/selftests/kvm/x86_64/kvmi_test.c  | 34 +++
 virt/kvm/introspection/kvmi.c | 14 +---
 virt/kvm/introspection/kvmi_int.h |  4 ++-
 virt/kvm/introspection/kvmi_msg.c | 34 ++-
 9 files changed, 124 insertions(+), 21 deletions(-)

diff --git a/Documentation/virt/kvm/kvmi.rst b/Documentation/virt/kvm/kvmi.rst
index 110a6e7a7d2a..f760957b27f4 100644
--- a/Documentation/virt/kvm/kvmi.rst
+++ b/Documentation/virt/kvm/kvmi.rst
@@ -684,6 +684,36 @@ Returns a CPUID leaf (as seen by the guest OS).
 * -KVM_EAGAIN - the selected vCPU can't be introspected yet
 * -KVM_ENOENT - the selected leaf is not present or is invalid
 
+14. KVMI_VM_CONTROL_CLEANUP
+---
+:Architectures: all
+:Versions: >= 1
+:Parameters:
+
+::
+
+   struct kvmi_vm_control_cleanup {
+   __u8 enable;
+   __u8 padding1;
+   __u16 padding2;
+   __u32 padding3;
+   };
+
+:Returns:
+
+::
+
+   struct kvmi_error_code
+
+Enables/disables the automatic cleanup of the changes made by
+the introspection tool at the hypervisor level (e.g. CR/MSR/BP
+interceptions). By default it is disabled.
+
+:Errors:
+
+* -KVM_EINVAL - the padding is not zero
+* -KVM_EINVAL - 'enabled' is not 1 or 0
+
 Events
 ==
 
diff --git a/arch/x86/include/asm/kvmi_host.h b/arch/x86/include/asm/kvmi_host.h
index 5f2a968831d3..3e85ae4fe5f0 100644
--- a/arch/x86/include/asm/kvmi_host.h
+++ b/arch/x86/include/asm/kvmi_host.h
@@ -11,6 +11,7 @@ struct kvmi_monitor_interception {
 };
 
 struct kvmi_interception {
+   bool cleanup;
bool restore_interception;
struct kvmi_monitor_interception breakpoint;
 };
diff --git a/arch/x86/kvm/kvmi.c b/arch/x86/kvm/kvmi.c
index 56c02dad3b57..89fa158a6535 100644
--- a/arch/x86/kvm/kvmi.c
+++ b/arch/x86/kvm/kvmi.c
@@ -353,13 +353,11 @@ bool kvmi_arch_clean_up_interception(struct kvm_vcpu 
*vcpu)
 {
struct kvmi_interception *arch_vcpui = vcpu->arch.kvmi;
 
-   if (!arch_vcpui)
+   if (!arch_vcpui || !arch_vcpui->cleanup)
return false;
 
-   if (!arch_vcpui->restore_interception)
-   return false;
-
-   kvmi_arch_restore_interception(vcpu);
+   if (arch_vcpui->restore_interception)
+   kvmi_arch_restore_interception(vcpu);
 
return true;
 }
@@ -392,10 +390,13 @@ bool kvmi_arch_vcpu_introspected(struct kvm_vcpu *vcpu)
return !!READ_ONCE(vcpu->arch.kvmi);
 }
 
-void kvmi_arch_request_interception_cleanup(struct kvm_vcpu *vcpu)
+void kvmi_arch_request_interception_cleanup(struct kvm_vcpu *vcpu,
+   bool restore_interception)
 {
struct kvmi_interception *arch_vcpui = READ_ONCE(vcpu->arch.kvmi);
 
-   if (arch_vcpui)
-   arch_vcpui->restore_interception = true;
+   if (arch_vcpui) {
+   arch_vcpui->restore_interception = restore_interception;
+   arch_vcpui->cleanup = true;
+   }
 }
diff --git a/include/linux/kvmi_host.h b/include/linux/kvmi_host.h
index c4fac41bd5c7..01219c56d042 100644
--- a/include/linux/kvmi_host.h
+++ b/include/linux/kvmi_host.h
@@ -53,6 +53,8 @@ struct kvm_introspection {
unsigned long *vm_event_enable_mask;
 
atomic_t ev_seq;
+
+   bool cleanup_on_unhook;
 };
 
 int kvmi_version(void);
diff --git a/include/uapi/linux/kvmi.h b/include/uapi/linux/kvmi.h
index 026ae5911b1c..20bf5bf194a4 100644
--- a/include/uapi/linux/kvmi.h
+++ b/include/uapi/linux/kvmi.h
@@ -32,6 +32,8 @@ enum {
KVMI_VCPU_SET_REGISTERS  = 12,
KVMI_VCPU_GET_CPUID  = 13,
 
+   KVMI_VM_CONTROL_CLEANUP = 14,
+
KVMI_NUM_MESSAGES
 };
 
@@ -135,6 +137,13 @@ struct kvmi_vcpu_control_events {
__u32 padding2;
 };
 
+struct kvmi_vm_control_cleanup {
+   __u8 enable;
+   __u8 padding1;
+   __u16 padding2;
+   __u32 padding3;
+};
+
 struct kvmi_event {
__u16 size;
__u16 vcpu;
diff --git a/tools/testing/selftests/kvm/x86_64/kvmi_test.c 
b/tools/testing/selftests/kvm/x86_64/kvmi_test.c
index 1418e31918be..d3b7778a6

[PATCH v9 84/84] KVM: x86: call the page tracking code on emulation failure

2020-07-21 Thread Adalbert Lazăr
From: Mihai Donțu 

The information we can provide this way is incomplete, but current users
of the page tracking code can work with it.

Signed-off-by: Mihai Donțu 
Signed-off-by: Adalbert Lazăr 
---
 arch/x86/kvm/x86.c | 49 ++
 1 file changed, 49 insertions(+)

diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 02b74a57ca01..feb20b29bb92 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -6911,6 +6911,51 @@ static bool is_vmware_backdoor_opcode(struct 
x86_emulate_ctxt *ctxt)
return false;
 }
 
+/*
+ * With introspection enabled, emulation failures translate in events being
+ * missed because the read/write callbacks are not invoked. All we have is
+ * the fetch event (kvm_page_track_preexec). Below we use the EPT/NPT VMEXIT
+ * information to generate the events, but without providing accurate
+ * data and size (the emulator would have computed those). If an instruction
+ * would happen to read and write in the same page, the second event will
+ * initially be missed and we rely on the page tracking mechanism to bring
+ * us back here to send it.
+ */
+static bool kvm_page_track_emulation_failure(struct kvm_vcpu *vcpu, gpa_t gpa)
+{
+   u64 error_code = vcpu->arch.error_code;
+   u8 data = 0;
+   gva_t gva;
+   bool ret;
+
+   /* MMIO emulation failures should be treated the normal way */
+   if (unlikely(error_code & PFERR_RSVD_MASK))
+   return true;
+
+   /* EPT/NTP must be enabled */
+   if (unlikely(!vcpu->arch.mmu->direct_map))
+   return true;
+
+   /*
+* The A/D bit emulation should make this test unneeded, but just
+* in case
+*/
+   if (unlikely((error_code & PFERR_NESTED_GUEST_PAGE) ==
+PFERR_NESTED_GUEST_PAGE))
+   return true;
+
+   gva = kvm_x86_ops.fault_gla(vcpu);
+
+   if (error_code & PFERR_WRITE_MASK)
+   ret = kvm_page_track_prewrite(vcpu, gpa, gva, , 0);
+   else if (error_code & PFERR_USER_MASK)
+   ret = kvm_page_track_preread(vcpu, gpa, gva, 0);
+   else
+   ret = true;
+
+   return ret;
+}
+
 int x86_emulate_instruction(struct kvm_vcpu *vcpu, gpa_t cr2_or_gpa,
int emulation_type, void *insn, int insn_len)
 {
@@ -6960,6 +7005,8 @@ int x86_emulate_instruction(struct kvm_vcpu *vcpu, gpa_t 
cr2_or_gpa,
kvm_queue_exception(vcpu, UD_VECTOR);
return 1;
}
+   if (!kvm_page_track_emulation_failure(vcpu, cr2_or_gpa))
+   return 1;
if (reexecute_instruction(vcpu, cr2_or_gpa,
  write_fault_to_spt,
  emulation_type))
@@ -7029,6 +7076,8 @@ int x86_emulate_instruction(struct kvm_vcpu *vcpu, gpa_t 
cr2_or_gpa,
return 1;
 
if (r == EMULATION_FAILED) {
+   if (!kvm_page_track_emulation_failure(vcpu, cr2_or_gpa))
+   return 1;
if (reexecute_instruction(vcpu, cr2_or_gpa, write_fault_to_spt,
emulation_type))
return 1;
___
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization

[PATCH v9 27/84] KVM: x86: export kvm_arch_vcpu_set_guest_debug()

2020-07-21 Thread Adalbert Lazăr
From: Nicușor Cîțu 

This function is needed in order to notify the introspection tool
through KVMI_EVENT_BP events on guest breakpoints.

Signed-off-by: Nicușor Cîțu 
Signed-off-by: Adalbert Lazăr 
---
 arch/x86/kvm/x86.c   | 18 +-
 include/linux/kvm_host.h |  2 ++
 2 files changed, 15 insertions(+), 5 deletions(-)

diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 23bce3ef26d8..5611b6cd6d19 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -9254,14 +9254,12 @@ int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu,
return ret;
 }
 
-int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu,
-   struct kvm_guest_debug *dbg)
+int kvm_arch_vcpu_set_guest_debug(struct kvm_vcpu *vcpu,
+ struct kvm_guest_debug *dbg)
 {
unsigned long rflags;
int i, r;
 
-   vcpu_load(vcpu);
-
if (dbg->control & (KVM_GUESTDBG_INJECT_DB | KVM_GUESTDBG_INJECT_BP)) {
r = -EBUSY;
if (vcpu->arch.exception.pending)
@@ -9307,10 +9305,20 @@ int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu 
*vcpu,
r = 0;
 
 out:
-   vcpu_put(vcpu);
return r;
 }
 
+int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu,
+   struct kvm_guest_debug *dbg)
+{
+   int ret;
+
+   vcpu_load(vcpu);
+   ret = kvm_arch_vcpu_set_guest_debug(vcpu, dbg);
+   vcpu_put(vcpu);
+   return ret;
+}
+
 /*
  * Translate a guest virtual address to a guest physical address.
  */
diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index 49cbd175f45b..01628f7bcbcd 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -881,6 +881,8 @@ int kvm_arch_vcpu_ioctl_set_mpstate(struct kvm_vcpu *vcpu,
 int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu,
struct kvm_guest_debug *dbg);
 int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu);
+int kvm_arch_vcpu_set_guest_debug(struct kvm_vcpu *vcpu,
+ struct kvm_guest_debug *dbg);
 
 int kvm_arch_init(void *opaque);
 void kvm_arch_exit(void);
___
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization

[PATCH v9 09/84] KVM: x86: add .bp_intercepted() to struct kvm_x86_ops

2020-07-21 Thread Adalbert Lazăr
From: Nicușor Cîțu 

Both, the introspection tool and the device manager can request #BP
interception. This function will be used to check if this interception
is enabled by either side.

Signed-off-by: Nicușor Cîțu 
Signed-off-by: Adalbert Lazăr 
---
 arch/x86/include/asm/kvm_host.h | 1 +
 arch/x86/kvm/svm/svm.c  | 8 
 arch/x86/kvm/svm/svm.h  | 7 +++
 arch/x86/kvm/vmx/vmx.c  | 6 ++
 4 files changed, 22 insertions(+)

diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index be5363b21540..78fe3c7c814c 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -1098,6 +1098,7 @@ struct kvm_x86_ops {
void (*vcpu_load)(struct kvm_vcpu *vcpu, int cpu);
void (*vcpu_put)(struct kvm_vcpu *vcpu);
 
+   bool (*bp_intercepted)(struct kvm_vcpu *vcpu);
void (*update_bp_intercept)(struct kvm_vcpu *vcpu);
int (*get_msr)(struct kvm_vcpu *vcpu, struct msr_data *msr);
int (*set_msr)(struct kvm_vcpu *vcpu, struct msr_data *msr);
diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c
index c0da4dd78ac5..23b3cd057753 100644
--- a/arch/x86/kvm/svm/svm.c
+++ b/arch/x86/kvm/svm/svm.c
@@ -1627,6 +1627,13 @@ static void svm_set_segment(struct kvm_vcpu *vcpu,
mark_dirty(svm->vmcb, VMCB_SEG);
 }
 
+static bool svm_bp_intercepted(struct kvm_vcpu *vcpu)
+{
+   struct vcpu_svm *svm = to_svm(vcpu);
+
+   return get_exception_intercept(svm, BP_VECTOR);
+}
+
 static void update_bp_intercept(struct kvm_vcpu *vcpu)
 {
struct vcpu_svm *svm = to_svm(vcpu);
@@ -3989,6 +3996,7 @@ static struct kvm_x86_ops svm_x86_ops __initdata = {
.vcpu_blocking = svm_vcpu_blocking,
.vcpu_unblocking = svm_vcpu_unblocking,
 
+   .bp_intercepted = svm_bp_intercepted,
.update_bp_intercept = update_bp_intercept,
.get_msr_feature = svm_get_msr_feature,
.get_msr = svm_get_msr,
diff --git a/arch/x86/kvm/svm/svm.h b/arch/x86/kvm/svm/svm.h
index 6ac4c00a5d82..d5c956e07c12 100644
--- a/arch/x86/kvm/svm/svm.h
+++ b/arch/x86/kvm/svm/svm.h
@@ -293,6 +293,13 @@ static inline void clr_exception_intercept(struct vcpu_svm 
*svm, int bit)
recalc_intercepts(svm);
 }
 
+static inline bool get_exception_intercept(struct vcpu_svm *svm, int bit)
+{
+   struct vmcb *vmcb = get_host_vmcb(svm);
+
+   return (vmcb->control.intercept_exceptions & (1U << bit));
+}
+
 static inline void set_intercept(struct vcpu_svm *svm, int bit)
 {
struct vmcb *vmcb = get_host_vmcb(svm);
diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
index 13745f2a5ecd..069593f2f504 100644
--- a/arch/x86/kvm/vmx/vmx.c
+++ b/arch/x86/kvm/vmx/vmx.c
@@ -760,6 +760,11 @@ static u32 vmx_read_guest_seg_ar(struct vcpu_vmx *vmx, 
unsigned seg)
return *p;
 }
 
+static bool vmx_bp_intercepted(struct kvm_vcpu *vcpu)
+{
+   return (vmcs_read32(EXCEPTION_BITMAP) & (1u << BP_VECTOR));
+}
+
 void update_exception_bitmap(struct kvm_vcpu *vcpu)
 {
u32 eb;
@@ -7859,6 +7864,7 @@ static struct kvm_x86_ops vmx_x86_ops __initdata = {
.vcpu_load = vmx_vcpu_load,
.vcpu_put = vmx_vcpu_put,
 
+   .bp_intercepted = vmx_bp_intercepted,
.update_bp_intercept = update_exception_bitmap,
.get_msr_feature = vmx_get_msr_feature,
.get_msr = vmx_get_msr,
___
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization

[PATCH v9 16/84] KVM: x86: export .msr_write_intercepted()

2020-07-21 Thread Adalbert Lazăr
From: Nicușor Cîțu 

This function will be used to check if the access for a specific MSR is
already intercepted.

Signed-off-by: Nicușor Cîțu 
Signed-off-by: Adalbert Lazăr 
---
 arch/x86/include/asm/kvm_host.h | 1 +
 arch/x86/kvm/svm/svm.c  | 1 +
 arch/x86/kvm/vmx/vmx.c  | 1 +
 3 files changed, 3 insertions(+)

diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 2ed1e5621ccf..6be832ba9c97 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -1106,6 +1106,7 @@ struct kvm_x86_ops {
void (*update_bp_intercept)(struct kvm_vcpu *vcpu);
int (*get_msr)(struct kvm_vcpu *vcpu, struct msr_data *msr);
int (*set_msr)(struct kvm_vcpu *vcpu, struct msr_data *msr);
+   bool (*msr_write_intercepted)(struct kvm_vcpu *vcpu, u32 msr);
u64 (*get_segment_base)(struct kvm_vcpu *vcpu, int seg);
void (*get_segment)(struct kvm_vcpu *vcpu,
struct kvm_segment *var, int seg);
diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c
index cc55c571fe86..4e5b07606891 100644
--- a/arch/x86/kvm/svm/svm.c
+++ b/arch/x86/kvm/svm/svm.c
@@ -4080,6 +4080,7 @@ static struct kvm_x86_ops svm_x86_ops __initdata = {
.get_msr_feature = svm_get_msr_feature,
.get_msr = svm_get_msr,
.set_msr = svm_set_msr,
+   .msr_write_intercepted = msr_write_intercepted,
.get_segment_base = svm_get_segment_base,
.get_segment = svm_get_segment,
.set_segment = svm_set_segment,
diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
index 3b5778003b58..cf07db129670 100644
--- a/arch/x86/kvm/vmx/vmx.c
+++ b/arch/x86/kvm/vmx/vmx.c
@@ -7906,6 +7906,7 @@ static struct kvm_x86_ops vmx_x86_ops __initdata = {
.get_msr_feature = vmx_get_msr_feature,
.get_msr = vmx_get_msr,
.set_msr = vmx_set_msr,
+   .msr_write_intercepted = msr_write_intercepted,
.get_segment_base = vmx_get_segment_base,
.get_segment = vmx_get_segment,
.set_segment = vmx_set_segment,
___
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization

[PATCH v9 33/84] KVM: x86: page track: add track_create_slot() callback

2020-07-21 Thread Adalbert Lazăr
From: Mihai Donțu 

This is used to add page access notifications as soon as a slot appears.

Signed-off-by: Mihai Donțu 
Signed-off-by: Adalbert Lazăr 
---
 arch/x86/include/asm/kvm_page_track.h | 13 -
 arch/x86/kvm/mmu/page_track.c | 16 +++-
 arch/x86/kvm/x86.c|  7 ---
 3 files changed, 31 insertions(+), 5 deletions(-)

diff --git a/arch/x86/include/asm/kvm_page_track.h 
b/arch/x86/include/asm/kvm_page_track.h
index 9a261e463eb3..00a66c4d4d3c 100644
--- a/arch/x86/include/asm/kvm_page_track.h
+++ b/arch/x86/include/asm/kvm_page_track.h
@@ -36,6 +36,17 @@ struct kvm_page_track_notifier_node {
void (*track_write)(struct kvm_vcpu *vcpu, gpa_t gpa, gva_t gva,
const u8 *new, int bytes,
struct kvm_page_track_notifier_node *node);
+   /*
+* It is called when memory slot is being created
+*
+* @kvm: the kvm where memory slot being moved or removed
+* @slot: the memory slot being moved or removed
+* @npages: the number of pages
+* @node: this node
+*/
+   void (*track_create_slot)(struct kvm *kvm, struct kvm_memory_slot *slot,
+ unsigned long npages,
+ struct kvm_page_track_notifier_node *node);
/*
 * It is called when memory slot is being moved or removed
 * users can drop write-protection for the pages in that memory slot
@@ -52,7 +63,7 @@ void kvm_page_track_init(struct kvm *kvm);
 void kvm_page_track_cleanup(struct kvm *kvm);
 
 void kvm_page_track_free_memslot(struct kvm_memory_slot *slot);
-int kvm_page_track_create_memslot(struct kvm_memory_slot *slot,
+int kvm_page_track_create_memslot(struct kvm *kvm, struct kvm_memory_slot 
*slot,
  unsigned long npages);
 
 void kvm_slot_page_track_add_page(struct kvm *kvm,
diff --git a/arch/x86/kvm/mmu/page_track.c b/arch/x86/kvm/mmu/page_track.c
index 9642af1b2c21..02759b81a04c 100644
--- a/arch/x86/kvm/mmu/page_track.c
+++ b/arch/x86/kvm/mmu/page_track.c
@@ -28,9 +28,12 @@ void kvm_page_track_free_memslot(struct kvm_memory_slot 
*slot)
}
 }
 
-int kvm_page_track_create_memslot(struct kvm_memory_slot *slot,
+int kvm_page_track_create_memslot(struct kvm *kvm, struct kvm_memory_slot 
*slot,
  unsigned long npages)
 {
+   struct kvm_page_track_notifier_head *head;
+   struct kvm_page_track_notifier_node *n;
+   int idx;
int  i;
 
for (i = 0; i < KVM_PAGE_TRACK_MAX; i++) {
@@ -41,6 +44,17 @@ int kvm_page_track_create_memslot(struct kvm_memory_slot 
*slot,
goto track_free;
}
 
+   head = >arch.track_notifier_head;
+
+   if (hlist_empty(>track_notifier_list))
+   return 0;
+
+   idx = srcu_read_lock(>track_srcu);
+   hlist_for_each_entry_rcu(n, >track_notifier_list, node)
+   if (n->track_create_slot)
+   n->track_create_slot(kvm, slot, npages, n);
+   srcu_read_unlock(>track_srcu, idx);
+
return 0;
 
 track_free:
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index a59c935f4bbe..83424339ea9d 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -10070,7 +10070,8 @@ void kvm_arch_free_memslot(struct kvm *kvm, struct 
kvm_memory_slot *slot)
kvm_page_track_free_memslot(slot);
 }
 
-static int kvm_alloc_memslot_metadata(struct kvm_memory_slot *slot,
+static int kvm_alloc_memslot_metadata(struct kvm *kvm,
+ struct kvm_memory_slot *slot,
  unsigned long npages)
 {
int i;
@@ -10122,7 +10123,7 @@ static int kvm_alloc_memslot_metadata(struct 
kvm_memory_slot *slot,
}
}
 
-   if (kvm_page_track_create_memslot(slot, npages))
+   if (kvm_page_track_create_memslot(kvm, slot, npages))
goto out_free;
 
return 0;
@@ -10162,7 +10163,7 @@ int kvm_arch_prepare_memory_region(struct kvm *kvm,
enum kvm_mr_change change)
 {
if (change == KVM_MR_CREATE || change == KVM_MR_MOVE)
-   return kvm_alloc_memslot_metadata(memslot,
+   return kvm_alloc_memslot_metadata(kvm, memslot,
  mem->memory_size >> 
PAGE_SHIFT);
return 0;
 }
___
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization

[PATCH v9 51/84] KVM: introspection: add KVMI_VCPU_GET_INFO

2020-07-21 Thread Adalbert Lazăr
From: Mihai Donțu 

For now, this command returns the TSC frequency (in HZ) for the specified
vCPU if available (otherwise it returns zero).

Signed-off-by: Mihai Donțu 
Co-developed-by: Adalbert Lazăr 
Signed-off-by: Adalbert Lazăr 
---
 Documentation/virt/kvm/kvmi.rst   |  29 
 arch/x86/include/uapi/asm/kvmi.h  |   4 +
 arch/x86/kvm/Makefile |   2 +-
 arch/x86/kvm/kvmi.c   |  19 +++
 include/uapi/linux/kvmi.h |   2 +
 .../testing/selftests/kvm/x86_64/kvmi_test.c  | 152 +-
 virt/kvm/introspection/kvmi_int.h |   4 +
 virt/kvm/introspection/kvmi_msg.c |  22 +++
 8 files changed, 232 insertions(+), 2 deletions(-)
 create mode 100644 arch/x86/kvm/kvmi.c

diff --git a/Documentation/virt/kvm/kvmi.rst b/Documentation/virt/kvm/kvmi.rst
index 383bf39ec1e4..5ead29a7b2a7 100644
--- a/Documentation/virt/kvm/kvmi.rst
+++ b/Documentation/virt/kvm/kvmi.rst
@@ -451,6 +451,35 @@ one page (offset + size <= PAGE_SIZE).
 * -KVM_EINVAL - the specified gpa/size pair is invalid
 * -KVM_EINVAL - the padding is not zero
 
+8. KVMI_VCPU_GET_INFO
+-
+
+:Architectures: all
+:Versions: >= 1
+:Parameters:
+
+::
+
+   struct kvmi_vcpu_hdr;
+
+:Returns:
+
+::
+
+   struct kvmi_error_code;
+   struct kvmi_vcpu_get_info_reply {
+   __u64 tsc_speed;
+   };
+
+Returns the TSC frequency (in HZ) for the specified vCPU if available
+(otherwise it returns zero).
+
+:Errors:
+
+* -KVM_EINVAL - the padding is not zero
+* -KVM_EINVAL - the selected vCPU is invalid
+* -KVM_EAGAIN - the selected vCPU can't be introspected yet
+
 Events
 ==
 
diff --git a/arch/x86/include/uapi/asm/kvmi.h b/arch/x86/include/uapi/asm/kvmi.h
index 551f9ed1ed9c..89adf84cefe4 100644
--- a/arch/x86/include/uapi/asm/kvmi.h
+++ b/arch/x86/include/uapi/asm/kvmi.h
@@ -26,4 +26,8 @@ struct kvmi_event_arch {
} msrs;
 };
 
+struct kvmi_vcpu_get_info_reply {
+   __u64 tsc_speed;
+};
+
 #endif /* _UAPI_ASM_X86_KVMI_H */
diff --git a/arch/x86/kvm/Makefile b/arch/x86/kvm/Makefile
index fb0242032cd1..3cfe76299dee 100644
--- a/arch/x86/kvm/Makefile
+++ b/arch/x86/kvm/Makefile
@@ -13,7 +13,7 @@ KVMI := $(KVM)/introspection
 kvm-y  += $(KVM)/kvm_main.o $(KVM)/coalesced_mmio.o \
$(KVM)/eventfd.o $(KVM)/irqchip.o $(KVM)/vfio.o
 kvm-$(CONFIG_KVM_ASYNC_PF) += $(KVM)/async_pf.o
-kvm-$(CONFIG_KVM_INTROSPECTION) += $(KVMI)/kvmi.o $(KVMI)/kvmi_msg.o
+kvm-$(CONFIG_KVM_INTROSPECTION) += $(KVMI)/kvmi.o $(KVMI)/kvmi_msg.o kvmi.o
 
 kvm-y  += x86.o emulate.o i8259.o irq.o lapic.o \
   i8254.o ioapic.o irq_comm.o cpuid.o pmu.o mtrr.o \
diff --git a/arch/x86/kvm/kvmi.c b/arch/x86/kvm/kvmi.c
new file mode 100644
index ..cf7bfff6c8c5
--- /dev/null
+++ b/arch/x86/kvm/kvmi.c
@@ -0,0 +1,19 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * KVM Introspection - x86
+ *
+ * Copyright (C) 2019-2020 Bitdefender S.R.L.
+ */
+
+#include "../../../virt/kvm/introspection/kvmi_int.h"
+
+int kvmi_arch_cmd_vcpu_get_info(struct kvm_vcpu *vcpu,
+   struct kvmi_vcpu_get_info_reply *rpl)
+{
+   if (kvm_has_tsc_control)
+   rpl->tsc_speed = 1000ul * vcpu->arch.virtual_tsc_khz;
+   else
+   rpl->tsc_speed = 0;
+
+   return 0;
+}
diff --git a/include/uapi/linux/kvmi.h b/include/uapi/linux/kvmi.h
index b206b7441859..a3dca420c887 100644
--- a/include/uapi/linux/kvmi.h
+++ b/include/uapi/linux/kvmi.h
@@ -25,6 +25,8 @@ enum {
KVMI_VM_READ_PHYSICAL  = 6,
KVMI_VM_WRITE_PHYSICAL = 7,
 
+   KVMI_VCPU_GET_INFO = 8,
+
KVMI_NUM_MESSAGES
 };
 
diff --git a/tools/testing/selftests/kvm/x86_64/kvmi_test.c 
b/tools/testing/selftests/kvm/x86_64/kvmi_test.c
index 97dec49d52b7..107661fbe52f 100644
--- a/tools/testing/selftests/kvm/x86_64/kvmi_test.c
+++ b/tools/testing/selftests/kvm/x86_64/kvmi_test.c
@@ -9,6 +9,7 @@
 #include 
 #include 
 #include 
+#include 
 
 #include "test_util.h"
 
@@ -25,6 +26,7 @@ static int socket_pair[2];
 #define Kvm_socket   socket_pair[0]
 #define Userspace_socket socket_pair[1]
 
+static int test_id;
 static vm_vaddr_t test_gva;
 static void *test_hva;
 static vm_paddr_t test_gpa;
@@ -32,6 +34,39 @@ static vm_paddr_t test_gpa;
 static uint8_t test_write_pattern;
 static int page_size;
 
+struct vcpu_worker_data {
+   struct kvm_vm *vm;
+   int vcpu_id;
+   int test_id;
+   bool stop;
+};
+
+enum {
+   GUEST_TEST_NOOP = 0,
+};
+
+#define GUEST_REQUEST_TEST() GUEST_SYNC(0)
+#define GUEST_SIGNAL_TEST_DONE() GUEST_SYNC(1)
+
+#define HOST_SEND_TEST(uc)   (uc.cmd == UCALL_SYNC && uc.args[1] == 0)
+
+static int guest_test_id(void)
+{
+   GUEST_REQUEST_TEST();
+   return READ_ONCE(test_id);
+}
+
+static void guest_code(void)
+{
+   while (tr

[PATCH v9 76/84] KVM: introspection: restore the state of MSR interception on unhook

2020-07-21 Thread Adalbert Lazăr
From: Nicușor Cîțu 

This commit also ensures that the introspection tool and the userspace
do not disable each other the MSR access VM-exit.

Signed-off-by: Nicușor Cîțu 
Signed-off-by: Adalbert Lazăr 
---
 arch/x86/include/asm/kvmi_host.h |  12 +++
 arch/x86/kvm/kvmi.c  | 133 +++
 arch/x86/kvm/svm/svm.c   |  11 +++
 arch/x86/kvm/vmx/vmx.c   |  11 +++
 4 files changed, 150 insertions(+), 17 deletions(-)

diff --git a/arch/x86/include/asm/kvmi_host.h b/arch/x86/include/asm/kvmi_host.h
index 5f2967d86b72..acc003403c95 100644
--- a/arch/x86/include/asm/kvmi_host.h
+++ b/arch/x86/include/asm/kvmi_host.h
@@ -26,6 +26,12 @@ struct kvmi_interception {
DECLARE_BITMAP(low, KVMI_NUM_MSR);
DECLARE_BITMAP(high, KVMI_NUM_MSR);
} kvmi_mask;
+   struct {
+   DECLARE_BITMAP(low, KVMI_NUM_MSR);
+   DECLARE_BITMAP(high, KVMI_NUM_MSR);
+   } kvm_mask;
+   bool (*monitor_fct)(struct kvm_vcpu *vcpu, u32 msr,
+   bool enable);
} msrw;
 };
 
@@ -48,6 +54,8 @@ void kvmi_xsetbv_event(struct kvm_vcpu *vcpu, u8 xcr,
 bool kvmi_monitor_desc_intercept(struct kvm_vcpu *vcpu, bool enable);
 bool kvmi_descriptor_event(struct kvm_vcpu *vcpu, u8 descriptor, bool write);
 bool kvmi_msr_event(struct kvm_vcpu *vcpu, struct msr_data *msr);
+bool kvmi_monitor_msrw_intercept(struct kvm_vcpu *vcpu, u32 msr, bool enable);
+bool kvmi_msrw_intercept_originator(struct kvm_vcpu *vcpu);
 
 #else /* CONFIG_KVM_INTROSPECTION */
 
@@ -68,6 +76,10 @@ static inline bool kvmi_descriptor_event(struct kvm_vcpu 
*vcpu, u8 descriptor,
 bool write) { return true; }
 static inline bool kvmi_msr_event(struct kvm_vcpu *vcpu, struct msr_data *msr)
{ return true; }
+static inline bool kvmi_monitor_msrw_intercept(struct kvm_vcpu *vcpu, u32 msr,
+  bool enable) { return false; }
+static inline bool kvmi_msrw_intercept_originator(struct kvm_vcpu *vcpu)
+   { return false; }
 
 #endif /* CONFIG_KVM_INTROSPECTION */
 
diff --git a/arch/x86/kvm/kvmi.c b/arch/x86/kvm/kvmi.c
index a48e72f520da..0b1301ebafba 100644
--- a/arch/x86/kvm/kvmi.c
+++ b/arch/x86/kvm/kvmi.c
@@ -424,22 +424,25 @@ static bool kvmi_msr_valid(unsigned int msr)
return msr <= 0x1fff || (msr >= 0xc000 && msr <= 0xc0001fff);
 }
 
-static unsigned long *msr_mask(struct kvm_vcpu *vcpu, unsigned int *msr)
+static unsigned long *msr_mask(struct kvm_vcpu *vcpu, unsigned int *msr,
+  bool kvmi)
 {
switch (*msr) {
case 0 ... 0x1fff:
-   return vcpu->arch.kvmi->msrw.kvmi_mask.low;
+   return kvmi ? vcpu->arch.kvmi->msrw.kvmi_mask.low :
+ vcpu->arch.kvmi->msrw.kvm_mask.low;
case 0xc000 ... 0xc0001fff:
*msr &= 0x1fff;
-   return vcpu->arch.kvmi->msrw.kvmi_mask.high;
+   return kvmi ? vcpu->arch.kvmi->msrw.kvmi_mask.high :
+ vcpu->arch.kvmi->msrw.kvm_mask.high;
}
 
return NULL;
 }
 
-static bool test_msr_mask(struct kvm_vcpu *vcpu, unsigned int msr)
+static bool test_msr_mask(struct kvm_vcpu *vcpu, unsigned int msr, bool kvmi)
 {
-   unsigned long *mask = msr_mask(vcpu, );
+   unsigned long *mask = msr_mask(vcpu, , kvmi);
 
if (!mask)
return false;
@@ -447,9 +450,27 @@ static bool test_msr_mask(struct kvm_vcpu *vcpu, unsigned 
int msr)
return !!test_bit(msr, mask);
 }
 
-static bool msr_control(struct kvm_vcpu *vcpu, unsigned int msr, bool enable)
+/*
+ * Returns true if one side (kvm or kvmi) tries to disable the MSR write
+ * interception while the other side is still tracking it.
+ */
+bool kvmi_monitor_msrw_intercept(struct kvm_vcpu *vcpu, u32 msr, bool enable)
 {
-   unsigned long *mask = msr_mask(vcpu, );
+   struct kvmi_interception *arch_vcpui;
+
+   if (!vcpu)
+   return false;
+
+   arch_vcpui = READ_ONCE(vcpu->arch.kvmi);
+
+   return (arch_vcpui && arch_vcpui->msrw.monitor_fct(vcpu, msr, enable));
+}
+EXPORT_SYMBOL(kvmi_monitor_msrw_intercept);
+
+static bool msr_control(struct kvm_vcpu *vcpu, unsigned int msr, bool enable,
+   bool kvmi)
+{
+   unsigned long *mask = msr_mask(vcpu, , kvmi);
 
if (!mask)
return false;
@@ -462,6 +483,63 @@ static bool msr_control(struct kvm_vcpu *vcpu, unsigned 
int msr, bool enable)
return true;
 }
 
+static bool msr_intercepted_by_kvmi(struct kvm_vcpu *vcpu, u32 msr)
+{
+   return test_msr_mask(vcpu, msr, true);
+}
+
+static bool msr_intercepted_by_kvm(struct kvm_vcpu *vcpu, u32 ms

[PATCH v9 24/84] KVM: x86: add .spt_fault()

2020-07-21 Thread Adalbert Lazăr
From: Mihai Donțu 

This function is needed for the KVMI_EVENT_PF event, to avoid sending
such events to the introspection tool if not caused by a SPT page fault.

The code path is: emulator -> {read,write,fetch} callbacks -> page tracking
-> page tracking callbacks -> KVMI_EVENT_PF.

Signed-off-by: Mihai Donțu 
Signed-off-by: Adalbert Lazăr 
---
 arch/x86/include/asm/kvm_host.h | 1 +
 arch/x86/kvm/svm/svm.c  | 9 +
 arch/x86/kvm/vmx/vmx.c  | 8 
 3 files changed, 18 insertions(+)

diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index ccf2804f46b9..fb41199b33fc 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -1292,6 +1292,7 @@ struct kvm_x86_ops {
void (*migrate_timers)(struct kvm_vcpu *vcpu);
 
u64 (*fault_gla)(struct kvm_vcpu *vcpu);
+   bool (*spt_fault)(struct kvm_vcpu *vcpu);
 };
 
 struct kvm_x86_nested_ops {
diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c
index 86b670ff33dd..7ecfa10dce5d 100644
--- a/arch/x86/kvm/svm/svm.c
+++ b/arch/x86/kvm/svm/svm.c
@@ -4089,6 +4089,14 @@ static u64 svm_fault_gla(struct kvm_vcpu *vcpu)
return svm->vcpu.arch.cr2 ? svm->vcpu.arch.cr2 : ~0ull;
 }
 
+static bool svm_spt_fault(struct kvm_vcpu *vcpu)
+{
+   struct vcpu_svm *svm = to_svm(vcpu);
+   struct vmcb *vmcb = get_host_vmcb(svm);
+
+   return (vmcb->control.exit_code == SVM_EXIT_NPF);
+}
+
 static struct kvm_x86_ops svm_x86_ops __initdata = {
.hardware_unsetup = svm_hardware_teardown,
.hardware_enable = svm_hardware_enable,
@@ -4217,6 +4225,7 @@ static struct kvm_x86_ops svm_x86_ops __initdata = {
.apic_init_signal_blocked = svm_apic_init_signal_blocked,
 
.fault_gla = svm_fault_gla,
+   .spt_fault = svm_spt_fault,
 };
 
 static struct kvm_x86_init_ops svm_init_ops __initdata = {
diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
index a04c46cde5b3..17b88345dfb5 100644
--- a/arch/x86/kvm/vmx/vmx.c
+++ b/arch/x86/kvm/vmx/vmx.c
@@ -7909,6 +7909,13 @@ static u64 vmx_fault_gla(struct kvm_vcpu *vcpu)
return ~0ull;
 }
 
+static bool vmx_spt_fault(struct kvm_vcpu *vcpu)
+{
+   const struct vcpu_vmx *vmx = to_vmx(vcpu);
+
+   return (vmx->exit_reason == EXIT_REASON_EPT_VIOLATION);
+}
+
 static struct kvm_x86_ops vmx_x86_ops __initdata = {
.hardware_unsetup = hardware_unsetup,
 
@@ -8047,6 +8054,7 @@ static struct kvm_x86_ops vmx_x86_ops __initdata = {
.migrate_timers = vmx_migrate_timers,
 
.fault_gla = vmx_fault_gla,
+   .spt_fault = vmx_spt_fault,
 };
 
 static __init int hardware_setup(void)
___
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization

[PATCH v9 57/84] KVM: introspection: add KVMI_VCPU_SET_REGISTERS

2020-07-21 Thread Adalbert Lazăr
From: Mihai Donțu 

During an introspection event, the introspection tool might need to
change the vCPU state, for example, to skip the current instruction.

This command is allowed only during vCPU events and the registers will
be set when the reply has been received.

Signed-off-by: Mihai Donțu 
Co-developed-by: Mircea Cîrjaliu 
Signed-off-by: Mircea Cîrjaliu 
Co-developed-by: Adalbert Lazăr 
Signed-off-by: Adalbert Lazăr 
---
 Documentation/virt/kvm/kvmi.rst   | 29 
 include/linux/kvmi_host.h |  3 +
 include/uapi/linux/kvmi.h |  1 +
 .../testing/selftests/kvm/x86_64/kvmi_test.c  | 73 +++
 virt/kvm/introspection/kvmi.c | 24 ++
 virt/kvm/introspection/kvmi_int.h |  3 +
 virt/kvm/introspection/kvmi_msg.c | 17 -
 7 files changed, 149 insertions(+), 1 deletion(-)

diff --git a/Documentation/virt/kvm/kvmi.rst b/Documentation/virt/kvm/kvmi.rst
index f9095e1a9417..bd35002c3254 100644
--- a/Documentation/virt/kvm/kvmi.rst
+++ b/Documentation/virt/kvm/kvmi.rst
@@ -612,6 +612,35 @@ registers, the special registers and the requested set of 
MSRs.
 * -KVM_EAGAIN - the selected vCPU can't be introspected yet
 * -KVM_ENOMEM - there is not enough memory to allocate the reply
 
+12. KVMI_VCPU_SET_REGISTERS
+---
+
+:Architectures: x86
+:Versions: >= 1
+:Parameters:
+
+::
+
+   struct kvmi_vcpu_hdr;
+   struct kvm_regs;
+
+:Returns:
+
+::
+
+   struct kvmi_error_code
+
+Sets the general purpose registers for the given vCPU. The changes become
+visible to other threads accessing the KVM vCPU structure after the event
+currently being handled is replied to.
+
+:Errors:
+
+* -KVM_EINVAL - the selected vCPU is invalid
+* -KVM_EINVAL - the padding is not zero
+* -KVM_EAGAIN - the selected vCPU can't be introspected yet
+* -KVM_EOPNOTSUPP - the command hasn't been received during an introspection 
event
+
 Events
 ==
 
diff --git a/include/linux/kvmi_host.h b/include/linux/kvmi_host.h
index 9625c8f19379..857b75a2664a 100644
--- a/include/linux/kvmi_host.h
+++ b/include/linux/kvmi_host.h
@@ -33,6 +33,9 @@ struct kvm_vcpu_introspection {
bool waiting_for_reply;
 
unsigned long *ev_enable_mask;
+
+   struct kvm_regs delayed_regs;
+   bool have_delayed_regs;
 };
 
 struct kvm_introspection {
diff --git a/include/uapi/linux/kvmi.h b/include/uapi/linux/kvmi.h
index 39ff54b4b661..5f637a21a907 100644
--- a/include/uapi/linux/kvmi.h
+++ b/include/uapi/linux/kvmi.h
@@ -29,6 +29,7 @@ enum {
KVMI_VCPU_PAUSE  = 9,
KVMI_VCPU_CONTROL_EVENTS = 10,
KVMI_VCPU_GET_REGISTERS  = 11,
+   KVMI_VCPU_SET_REGISTERS  = 12,
 
KVMI_NUM_MESSAGES
 };
diff --git a/tools/testing/selftests/kvm/x86_64/kvmi_test.c 
b/tools/testing/selftests/kvm/x86_64/kvmi_test.c
index 73aafc5d959a..ffd0337d0567 100644
--- a/tools/testing/selftests/kvm/x86_64/kvmi_test.c
+++ b/tools/testing/selftests/kvm/x86_64/kvmi_test.c
@@ -961,6 +961,78 @@ static void test_cmd_vcpu_get_registers(struct kvm_vm *vm)
test_invalid_vcpu_get_registers(vm);
 }
 
+static int __cmd_vcpu_set_registers(struct kvm_vm *vm,
+   struct kvm_regs *regs)
+{
+   struct {
+   struct kvmi_msg_hdr hdr;
+   struct kvmi_vcpu_hdr vcpu_hdr;
+   struct kvm_regs regs;
+   } req = {};
+
+   memcpy(, regs, sizeof(req.regs));
+
+   return __do_vcpu0_command(KVMI_VCPU_SET_REGISTERS,
+ , sizeof(req), NULL, 0);
+}
+
+static void test_invalid_cmd_vcpu_set_registers(struct kvm_vm *vm)
+{
+   struct vcpu_worker_data data = {.vm = vm, .vcpu_id = VCPU_ID};
+   pthread_t vcpu_thread;
+   struct kvm_regs regs;
+   int r;
+
+   vcpu_thread = start_vcpu_worker();
+
+   r = __cmd_vcpu_set_registers(vm, );
+
+   stop_vcpu_worker(vcpu_thread, );
+
+   TEST_ASSERT(r == -KVM_EOPNOTSUPP,
+   "KVMI_VCPU_SET_REGISTERS didn't failed with KVM_EOPNOTSUPP, 
error %d(%s)\n",
+   -r, kvm_strerror(-r));
+}
+
+static void __set_registers(struct kvm_vm *vm,
+   struct kvm_regs *regs)
+{
+   int r;
+
+   r = __cmd_vcpu_set_registers(vm, regs);
+   TEST_ASSERT(r == 0,
+   "KVMI_VCPU_SET_REGISTERS failed, error %d(%s)\n",
+   -r, kvm_strerror(-r));
+}
+
+static void test_cmd_vcpu_set_registers(struct kvm_vm *vm)
+{
+   struct vcpu_worker_data data = {.vm = vm, .vcpu_id = VCPU_ID};
+   __u16 event_id = KVMI_EVENT_PAUSE_VCPU;
+   struct kvmi_msg_hdr hdr;
+   pthread_t vcpu_thread;
+   struct kvmi_event ev;
+   struct vcpu_reply rpl = {};
+   struct kvm_regs regs = {};
+
+   cmd_vcpu_get_registers(vm, );
+
+   test_invalid_cmd_vcpu_set_registers(vm);
+
+   pause_vcpu();
+
+   vcpu_thread = start_vcpu_worker();
+
+   rece

[PATCH v9 38/84] KVM: introspection: add hook/unhook ioctls

2020-07-21 Thread Adalbert Lazăr
On hook, a new thread is created to handle the messages coming from the
introspection tool (commands or event replies). The VM related commands
are handled by this thread, while the vCPU commands and events replies
are dispatched to the vCPU threads.

On unhook, the socket is shut down, which will signal: the receiving
thread to quit (because it might be blocked in recvmsg()) and the
introspection tool to clean up.

The mutex is used to protect the 'kvm->kvmi' pointer when accessed through
ioctls.

The reference counter is used by the receiving thread (for its entire
life time) and by the vCPU threads while sending introspection events
or handling introspection commands.

The completion objects is set when the reference counter reaches zero and
the unhook process is waiting for it in order to free the introspection
structures.

Co-developed-by: Mircea Cîrjaliu 
Signed-off-by: Mircea Cîrjaliu 
Co-developed-by: Nicușor Cîțu 
Signed-off-by: Nicușor Cîțu 
Signed-off-by: Adalbert Lazăr 
---
 Documentation/virt/kvm/api.rst|  55 ++
 arch/x86/include/asm/kvmi_host.h  |   8 +
 arch/x86/kvm/Makefile |   2 +-
 arch/x86/kvm/x86.c|   6 +
 include/linux/kvm_host.h  |   5 +
 include/linux/kvmi_host.h |  17 ++
 include/uapi/linux/kvm.h  |  10 ++
 include/uapi/linux/kvmi.h |  13 ++
 tools/testing/selftests/kvm/Makefile  |   1 +
 .../testing/selftests/kvm/x86_64/kvmi_test.c  |  94 ++
 virt/kvm/introspection/kvmi.c | 162 ++
 virt/kvm/introspection/kvmi_int.h |  22 +++
 virt/kvm/introspection/kvmi_msg.c |  39 +
 virt/kvm/kvm_main.c   |  19 ++
 14 files changed, 452 insertions(+), 1 deletion(-)
 create mode 100644 arch/x86/include/asm/kvmi_host.h
 create mode 100644 include/uapi/linux/kvmi.h
 create mode 100644 tools/testing/selftests/kvm/x86_64/kvmi_test.c
 create mode 100644 virt/kvm/introspection/kvmi_msg.c

diff --git a/Documentation/virt/kvm/api.rst b/Documentation/virt/kvm/api.rst
index 320788f81a05..e34f20430eb1 100644
--- a/Documentation/virt/kvm/api.rst
+++ b/Documentation/virt/kvm/api.rst
@@ -4697,6 +4697,61 @@ KVM_PV_VM_VERIFY
   Verify the integrity of the unpacked image. Only if this succeeds,
   KVM is allowed to start protected VCPUs.
 
+4.126 KVM_INTROSPECTION_HOOK
+
+
+:Capability: KVM_CAP_INTROSPECTION
+:Architectures: x86
+:Type: vm ioctl
+:Parameters: struct kvm_introspection (in)
+:Returns: 0 on success, a negative value on error
+
+Errors:
+
+  == ==
+  ENOMEM the memory allocation failed
+  EEXIST the VM is already introspected
+  EINVAL the file descriptor doesn't correspond to an active socket
+  EINVAL the padding is not zero
+  EPERM  the introspection is disabled (kvm.introspection=0)
+  == ==
+
+This ioctl is used to enable the introspection of the current VM.
+
+::
+
+  struct kvm_introspection {
+   __s32 fd;
+   __u32 padding;
+   __u8 uuid[16];
+  };
+
+fd is the file descriptor of a socket connected to the introspection tool,
+
+padding must be zero (it might be used in the future),
+
+uuid is used for debug and error messages.
+
+The KVMI version can be retrieved using the KVM_CAP_INTROSPECTION of
+the KVM_CHECK_EXTENSION ioctl() at run-time.
+
+4.127 KVM_INTROSPECTION_UNHOOK
+--
+
+:Capability: KVM_CAP_INTROSPECTION
+:Architectures: x86
+:Type: vm ioctl
+:Parameters: none
+:Returns: 0 on success, a negative value on error
+
+Errors:
+
+  == ==
+  EPERM  the introspection is disabled (kvm.introspection=0)
+  == ==
+
+This ioctl is used to free all introspection structures
+related to this VM.
 
 5. The kvm_run structure
 
diff --git a/arch/x86/include/asm/kvmi_host.h b/arch/x86/include/asm/kvmi_host.h
new file mode 100644
index ..38c398262913
--- /dev/null
+++ b/arch/x86/include/asm/kvmi_host.h
@@ -0,0 +1,8 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_X86_KVMI_HOST_H
+#define _ASM_X86_KVMI_HOST_H
+
+struct kvm_arch_introspection {
+};
+
+#endif /* _ASM_X86_KVMI_HOST_H */
diff --git a/arch/x86/kvm/Makefile b/arch/x86/kvm/Makefile
index 880b028c7f86..fb0242032cd1 100644
--- a/arch/x86/kvm/Makefile
+++ b/arch/x86/kvm/Makefile
@@ -13,7 +13,7 @@ KVMI := $(KVM)/introspection
 kvm-y  += $(KVM)/kvm_main.o $(KVM)/coalesced_mmio.o \
$(KVM)/eventfd.o $(KVM)/irqchip.o $(KVM)/vfio.o
 kvm-$(CONFIG_KVM_ASYNC_PF) += $(KVM)/async_pf.o
-kvm-$(CONFIG_KVM_INTROSPECTION) += $(KVMI)/kvmi.o
+

[PATCH v9 06/84] KVM: x86: add kvm_arch_vcpu_get_regs() and kvm_arch_vcpu_get_sregs()

2020-07-21 Thread Adalbert Lazăr
From: Mihai Donțu 

These functions are used by the VM introspection code
(for the KVMI_VCPU_GET_REGISTERS command and all events sending the vCPU
registers to the introspection tool).

Signed-off-by: Mihai Donțu 
Signed-off-by: Adalbert Lazăr 
---
 arch/x86/kvm/x86.c   | 10 ++
 include/linux/kvm_host.h |  3 +++
 2 files changed, 13 insertions(+)

diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 88c593f83b28..10410ebda034 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -8939,6 +8939,11 @@ int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, 
struct kvm_regs *regs)
return 0;
 }
 
+void kvm_arch_vcpu_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
+{
+   __get_regs(vcpu, regs);
+}
+
 static void __set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
 {
vcpu->arch.emulate_regs_need_sync_from_vcpu = true;
@@ -9034,6 +9039,11 @@ int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu,
return 0;
 }
 
+void kvm_arch_vcpu_get_sregs(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs)
+{
+   __get_sregs(vcpu, sregs);
+}
+
 int kvm_arch_vcpu_ioctl_get_mpstate(struct kvm_vcpu *vcpu,
struct kvm_mp_state *mp_state)
 {
diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index a4249fc88fc2..23ab4932f7e7 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -864,9 +864,12 @@ int kvm_arch_vcpu_ioctl_translate(struct kvm_vcpu *vcpu,
struct kvm_translation *tr);
 
 int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs);
+void kvm_arch_vcpu_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs);
 int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs);
 int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu,
  struct kvm_sregs *sregs);
+void kvm_arch_vcpu_get_sregs(struct kvm_vcpu *vcpu,
+ struct kvm_sregs *sregs);
 int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu,
  struct kvm_sregs *sregs);
 int kvm_arch_vcpu_ioctl_get_mpstate(struct kvm_vcpu *vcpu,
___
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization

[PATCH v9 53/84] KVM: introspection: add KVMI_EVENT_PAUSE_VCPU

2020-07-21 Thread Adalbert Lazăr
This event is sent by the vCPU thread as a response to the KVMI_VCPU_PAUSE
command, but it has a lower priority, being sent after any other
introspection event and when no other introspection command is queued.

The number of KVMI_EVENT_PAUSE_VCPU will match the number of successful
KVMI_VCPU_PAUSE commands.

Signed-off-by: Adalbert Lazăr 
---
 Documentation/virt/kvm/kvmi.rst   |  22 ++-
 arch/x86/kvm/kvmi.c   |  81 +
 include/linux/kvmi_host.h |  11 ++
 include/uapi/linux/kvmi.h |  13 ++
 .../testing/selftests/kvm/x86_64/kvmi_test.c  |  46 ++
 virt/kvm/introspection/kvmi.c |  24 ++-
 virt/kvm/introspection/kvmi_int.h |   3 +
 virt/kvm/introspection/kvmi_msg.c | 155 +-
 8 files changed, 351 insertions(+), 4 deletions(-)

diff --git a/Documentation/virt/kvm/kvmi.rst b/Documentation/virt/kvm/kvmi.rst
index 502ee06d5e77..06c1cb34209e 100644
--- a/Documentation/virt/kvm/kvmi.rst
+++ b/Documentation/virt/kvm/kvmi.rst
@@ -563,6 +563,25 @@ On x86 the structure looks like this::
 
 It contains information about the vCPU state at the time of the event.
 
+An event reply begins with two common structures::
+
+   struct kvmi_vcpu_hdr;
+   struct kvmi_event_reply {
+   __u8 action;
+   __u8 event;
+   __u16 padding1;
+   __u32 padding2;
+   };
+
+All events accept the KVMI_EVENT_ACTION_CRASH action, which stops the
+guest ungracefully, but as soon as possible.
+
+Most of the events accept the KVMI_EVENT_ACTION_CONTINUE action, which
+lets the instruction that caused the event to continue.
+
+Some of the events accept the KVMI_EVENT_ACTION_RETRY action, to continue
+by re-entering in guest.
+
 Specific event data can follow these common structures.
 
 1. KVMI_EVENT_UNHOOK
@@ -604,7 +623,8 @@ operation can proceed).
struct kvmi_vcpu_hdr;
struct kvmi_event_reply;
 
-This event is sent in response to a *KVMI_VCPU_PAUSE* command.
+This event is sent in response to a *KVMI_VCPU_PAUSE* command and
+cannot be controlled with *KVMI_VCPU_CONTROL_EVENTS*.
 Because it has a low priority, it will be sent after any other vCPU
 introspection event and when no other vCPU introspection command is
 queued.
diff --git a/arch/x86/kvm/kvmi.c b/arch/x86/kvm/kvmi.c
index cf7bfff6c8c5..ce7e2d5f2ab4 100644
--- a/arch/x86/kvm/kvmi.c
+++ b/arch/x86/kvm/kvmi.c
@@ -5,8 +5,89 @@
  * Copyright (C) 2019-2020 Bitdefender S.R.L.
  */
 
+#include "linux/kvm_host.h"
+#include "x86.h"
 #include "../../../virt/kvm/introspection/kvmi_int.h"
 
+static unsigned int kvmi_vcpu_mode(const struct kvm_vcpu *vcpu,
+  const struct kvm_sregs *sregs)
+{
+   unsigned int mode = 0;
+
+   if (is_long_mode((struct kvm_vcpu *) vcpu)) {
+   if (sregs->cs.l)
+   mode = 8;
+   else if (!sregs->cs.db)
+   mode = 2;
+   else
+   mode = 4;
+   } else if (sregs->cr0 & X86_CR0_PE) {
+   if (!sregs->cs.db)
+   mode = 2;
+   else
+   mode = 4;
+   } else if (!sregs->cs.db) {
+   mode = 2;
+   } else {
+   mode = 4;
+   }
+
+   return mode;
+}
+
+static void kvmi_get_msrs(struct kvm_vcpu *vcpu, struct kvmi_event_arch *event)
+{
+   struct msr_data msr;
+
+   msr.host_initiated = true;
+
+   msr.index = MSR_IA32_SYSENTER_CS;
+   kvm_x86_ops.get_msr(vcpu, );
+   event->msrs.sysenter_cs = msr.data;
+
+   msr.index = MSR_IA32_SYSENTER_ESP;
+   kvm_x86_ops.get_msr(vcpu, );
+   event->msrs.sysenter_esp = msr.data;
+
+   msr.index = MSR_IA32_SYSENTER_EIP;
+   kvm_x86_ops.get_msr(vcpu, );
+   event->msrs.sysenter_eip = msr.data;
+
+   msr.index = MSR_EFER;
+   kvm_x86_ops.get_msr(vcpu, );
+   event->msrs.efer = msr.data;
+
+   msr.index = MSR_STAR;
+   kvm_x86_ops.get_msr(vcpu, );
+   event->msrs.star = msr.data;
+
+   msr.index = MSR_LSTAR;
+   kvm_x86_ops.get_msr(vcpu, );
+   event->msrs.lstar = msr.data;
+
+   msr.index = MSR_CSTAR;
+   kvm_x86_ops.get_msr(vcpu, );
+   event->msrs.cstar = msr.data;
+
+   msr.index = MSR_IA32_CR_PAT;
+   kvm_x86_ops.get_msr(vcpu, );
+   event->msrs.pat = msr.data;
+
+   msr.index = MSR_KERNEL_GS_BASE;
+   kvm_x86_ops.get_msr(vcpu, );
+   event->msrs.shadow_gs = msr.data;
+}
+
+void kvmi_arch_setup_event(struct kvm_vcpu *vcpu, struct kvmi_event *ev)
+{
+   struct kvmi_event_arch *event = >arch;
+
+   kvm_arch_vcpu_get_regs(vcpu, >regs);
+   kvm_arch_vcpu_get_sregs(vcpu, >sregs);
+   ev->arch.mode = kvmi_vcpu_mode(vcpu, >sregs);
+   kvmi_get_msrs(vcpu, event);
+}
+
 in

[PATCH v9 66/84] KVM: introspection: add KVMI_VCPU_INJECT_EXCEPTION + KVMI_EVENT_TRAP

2020-07-21 Thread Adalbert Lazăr
From: Mihai Donțu 

The KVMI_VCPU_INJECT_EXCEPTION command is used by the introspection tool
to inject exceptions, for example, to get a page from swap.

The exception is queued right before entering in guest unless there is
already an exception pending. The introspection tool is notified with
an KVMI_EVENT_TRAP event about the success of the injection.  In case
of failure, the introspecion tool is expected to try again later.

Signed-off-by: Mihai Donțu 
Co-developed-by: Nicușor Cîțu 
Signed-off-by: Nicușor Cîțu 
Co-developed-by: Adalbert Lazăr 
Signed-off-by: Adalbert Lazăr 
---
 Documentation/virt/kvm/kvmi.rst   |  74 +++
 arch/x86/kvm/kvmi.c   | 103 
 arch/x86/kvm/x86.c|   3 +
 include/linux/kvmi_host.h |  12 ++
 include/uapi/linux/kvmi.h |  20 ++-
 .../testing/selftests/kvm/x86_64/kvmi_test.c  | 115 +-
 virt/kvm/introspection/kvmi.c |  45 +++
 virt/kvm/introspection/kvmi_int.h |   8 ++
 virt/kvm/introspection/kvmi_msg.c |  50 ++--
 9 files changed, 418 insertions(+), 12 deletions(-)

diff --git a/Documentation/virt/kvm/kvmi.rst b/Documentation/virt/kvm/kvmi.rst
index e1f978fc799b..4263a9ac90e4 100644
--- a/Documentation/virt/kvm/kvmi.rst
+++ b/Documentation/virt/kvm/kvmi.rst
@@ -561,6 +561,7 @@ because these are sent as a result of certain commands (but 
they can be
 disallowed by the device manager) ::
 
KVMI_EVENT_PAUSE_VCPU
+   KVMI_EVENT_TRAP
 
 The VM events (e.g. *KVMI_EVENT_UNHOOK*) are controlled with
 the *KVMI_VM_CONTROL_EVENTS* command.
@@ -749,6 +750,45 @@ ID set.
 * -KVM_EINVAL - the padding is not zero
 * -KVM_EAGAIN - the selected vCPU can't be introspected yet
 
+16. KVMI_VCPU_INJECT_EXCEPTION
+--
+
+:Architectures: all
+:Versions: >= 1
+:Parameters:
+
+::
+
+   struct kvmi_vcpu_hdr;
+   struct kvmi_vcpu_inject_exception {
+   __u8 nr;
+   __u8 padding1;
+   __u16 padding2;
+   __u32 error_code;
+   __u64 address;
+   };
+
+:Returns:
+
+::
+
+   struct kvmi_error_code
+
+Injects a vCPU exception with or without an error code. In case of page fault
+exception, the guest virtual address has to be specified.
+
+The *KVMI_EVENT_TRAP* event will be sent with the effective injected
+exception.
+
+:Errors:
+
+* -KVM_EPERM  - the *KVMI_EVENT_TRAP* event is disallowed
+* -KVM_EINVAL - the selected vCPU is invalid
+* -KVM_EINVAL - the padding is not zero
+* -KVM_EAGAIN - the selected vCPU can't be introspected yet
+* -KVM_EBUSY - another *KVMI_VCPU_INJECT_EXCEPTION*-*KVMI_EVENT_TRAP* pair
+   is in progress
+
 Events
 ==
 
@@ -960,3 +1000,37 @@ register (see **KVMI_VCPU_CONTROL_EVENTS**).
 
 ``kvmi_event``, the control register number, the old value and the new value
 are sent to the introspection tool. The *CONTINUE* action will set the 
``new_val``.
+
+6. KVMI_EVENT_TRAP
+--
+
+:Architectures: all
+:Versions: >= 1
+:Actions: CONTINUE, CRASH
+:Parameters:
+
+::
+
+   struct kvmi_event;
+   struct kvmi_event_trap {
+   __u8 nr;
+   __u8 padding1;
+   __u16 padding2;
+   __u32 error_code;
+   __u64 address;
+   };
+
+:Returns:
+
+::
+
+   struct kvmi_vcpu_hdr;
+   struct kvmi_event_reply;
+
+This event is sent if a previous *KVMI_VCPU_INJECT_EXCEPTION* command
+took place. Because it has a high priority, it will be sent before any
+other vCPU introspection event.
+
+``kvmi_event``, exception/interrupt number, exception code
+(``error_code``) and address are sent to the introspection tool,
+which should check if its exception has been injected or overridden.
diff --git a/arch/x86/kvm/kvmi.c b/arch/x86/kvm/kvmi.c
index e340a2c3500f..0c6ab136084f 100644
--- a/arch/x86/kvm/kvmi.c
+++ b/arch/x86/kvm/kvmi.c
@@ -569,3 +569,106 @@ bool kvmi_cr3_intercepted(struct kvm_vcpu *vcpu)
return ret;
 }
 EXPORT_SYMBOL(kvmi_cr3_intercepted);
+
+int kvmi_arch_cmd_vcpu_inject_exception(struct kvm_vcpu *vcpu, u8 vector,
+   u32 error_code, u64 address)
+{
+   struct kvm_vcpu_introspection *vcpui = VCPUI(vcpu);
+   bool has_error;
+
+   if (vcpui->exception.pending || vcpui->exception.send_event)
+   return -KVM_EBUSY;
+
+   vcpui->exception.pending = true;
+
+   has_error = x86_exception_has_error_code(vector);
+
+   vcpui->exception.nr = vector;
+   vcpui->exception.error_code = has_error ? error_code : 0;
+   vcpui->exception.error_code_valid = has_error;
+   vcpui->exception.address = address;
+
+   return 0;
+}
+
+static void kvmi_arch_queue_exception(struct kvm_vcpu *vcpu)
+{
+   struct kvm_vcpu_introspection *vcpui = VCPUI(vcpu);
+   struct x86_exception e = {
+   

[PATCH v9 49/84] KVM: introspection: handle vCPU introspection requests

2020-07-21 Thread Adalbert Lazăr
From: Mihai Donțu 

The receiving thread dispatches the vCPU introspection commands by
adding them to the vCPU's jobs list and kicking the vCPU. Before
entering in guest, the vCPU thread checks the introspection request
(KVM_REQ_INTROSPECTION) and runs its queued jobs.

Signed-off-by: Mihai Donțu 
Co-developed-by: Mircea Cîrjaliu 
Signed-off-by: Mircea Cîrjaliu 
Co-developed-by: Adalbert Lazăr 
Signed-off-by: Adalbert Lazăr 
---
 arch/x86/kvm/x86.c|  3 ++
 include/linux/kvm_host.h  |  1 +
 include/linux/kvmi_host.h |  4 +++
 virt/kvm/introspection/kvmi.c | 58 +++
 virt/kvm/kvm_main.c   |  2 ++
 5 files changed, 68 insertions(+)

diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index ff0d3c82de64..7f56e2149f18 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -8706,6 +8706,9 @@ static int vcpu_run(struct kvm_vcpu *vcpu)
vcpu->arch.l1tf_flush_l1d = true;
 
for (;;) {
+   if (kvm_check_request(KVM_REQ_INTROSPECTION, vcpu))
+   kvmi_handle_requests(vcpu);
+
if (kvm_vcpu_running(vcpu)) {
r = vcpu_enter_guest(vcpu);
} else {
diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index 8509d8272466..296b59ecc540 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -146,6 +146,7 @@ static inline bool is_error_page(struct page *page)
 #define KVM_REQ_MMU_RELOAD(1 | KVM_REQUEST_WAIT | 
KVM_REQUEST_NO_WAKEUP)
 #define KVM_REQ_PENDING_TIMER 2
 #define KVM_REQ_UNHALT3
+#define KVM_REQ_INTROSPECTION 4
 #define KVM_REQUEST_ARCH_BASE 8
 
 #define KVM_ARCH_REQ_FLAGS(nr, flags) ({ \
diff --git a/include/linux/kvmi_host.h b/include/linux/kvmi_host.h
index d3242a99f891..956b8d5c51e3 100644
--- a/include/linux/kvmi_host.h
+++ b/include/linux/kvmi_host.h
@@ -53,6 +53,8 @@ int kvmi_ioctl_event(struct kvm *kvm,
 const struct kvm_introspection_feature *feat);
 int kvmi_ioctl_preunhook(struct kvm *kvm);
 
+void kvmi_handle_requests(struct kvm_vcpu *vcpu);
+
 #else
 
 static inline int kvmi_init(void) { return 0; }
@@ -61,6 +63,8 @@ static inline void kvmi_create_vm(struct kvm *kvm) { }
 static inline void kvmi_destroy_vm(struct kvm *kvm) { }
 static inline void kvmi_vcpu_uninit(struct kvm_vcpu *vcpu) { }
 
+static inline void kvmi_handle_requests(struct kvm_vcpu *vcpu) { }
+
 #endif /* CONFIG_KVM_INTROSPECTION */
 
 #endif
diff --git a/virt/kvm/introspection/kvmi.c b/virt/kvm/introspection/kvmi.c
index b6595bca99f7..a9d406f276f5 100644
--- a/virt/kvm/introspection/kvmi.c
+++ b/virt/kvm/introspection/kvmi.c
@@ -113,6 +113,12 @@ void kvmi_uninit(void)
kvmi_cache_destroy();
 }
 
+static void kvmi_make_request(struct kvm_vcpu *vcpu)
+{
+   kvm_make_request(KVM_REQ_INTROSPECTION, vcpu);
+   kvm_vcpu_kick(vcpu);
+}
+
 static int __kvmi_add_job(struct kvm_vcpu *vcpu,
  void (*fct)(struct kvm_vcpu *vcpu, void *ctx),
  void *ctx, void (*free_fct)(void *ctx))
@@ -144,6 +150,9 @@ int kvmi_add_job(struct kvm_vcpu *vcpu,
 
err = __kvmi_add_job(vcpu, fct, ctx, free_fct);
 
+   if (!err)
+   kvmi_make_request(vcpu);
+
return err;
 }
 
@@ -312,6 +321,14 @@ int kvmi_ioctl_unhook(struct kvm *kvm)
return 0;
 }
 
+struct kvm_introspection * __must_check kvmi_get(struct kvm *kvm)
+{
+   if (refcount_inc_not_zero(>kvmi_ref))
+   return kvm->kvmi;
+
+   return NULL;
+}
+
 void kvmi_put(struct kvm *kvm)
 {
if (refcount_dec_and_test(>kvmi_ref))
@@ -373,6 +390,10 @@ int kvmi_hook(struct kvm *kvm, const struct 
kvm_introspection_hook *hook)
init_completion(>kvmi_complete);
 
refcount_set(>kvmi_ref, 1);
+   /*
+* Paired with refcount_inc_not_zero() from kvmi_get().
+*/
+   smp_wmb();
 
kvmi->recv = kthread_run(kvmi_recv_thread, kvmi, "kvmi-recv");
if (IS_ERR(kvmi->recv)) {
@@ -672,3 +693,40 @@ int kvmi_cmd_write_physical(struct kvm *kvm, u64 gpa, 
size_t size,
 
return 0;
 }
+
+static struct kvmi_job *kvmi_pull_job(struct kvm_vcpu_introspection *vcpui)
+{
+   struct kvmi_job *job = NULL;
+
+   spin_lock(>job_lock);
+   job = list_first_entry_or_null(>job_list, typeof(*job), link);
+   if (job)
+   list_del(>link);
+   spin_unlock(>job_lock);
+
+   return job;
+}
+
+void kvmi_run_jobs(struct kvm_vcpu *vcpu)
+{
+   struct kvm_vcpu_introspection *vcpui = VCPUI(vcpu);
+   struct kvmi_job *job;
+
+   while ((job = kvmi_pull_job(vcpui))) {
+   job->fct(vcpu, job->ctx);
+   kvmi_free_job(job);
+   }
+}
+
+void kvmi_handle_requests(struct kvm_vcpu *vcpu)
+{
+   struct kvm_introspection *kvmi;
+
+   kvmi = kvmi_get(vcpu->kvm);
+   if (!kvmi)
+   return;
+
+   kvmi

[PATCH v9 60/84] KVM: introspection: add KVMI_EVENT_BREAKPOINT

2020-07-21 Thread Adalbert Lazăr
From: Mihai Donțu 

This event is sent when a breakpoint was reached.

The introspection tool can place breakpoints and use them as notification
for when the OS or an application has reached a certain state or is
trying to perform a certain operation (eg. create a process).

Signed-off-by: Mihai Donțu 
Co-developed-by: Nicușor Cîțu 
Signed-off-by: Nicușor Cîțu 
Co-developed-by: Adalbert Lazăr 
Signed-off-by: Adalbert Lazăr 
---
 Documentation/virt/kvm/kvmi.rst   | 40 
 arch/x86/include/uapi/asm/kvmi.h  |  6 +++
 arch/x86/kvm/kvmi.c   | 48 +++
 arch/x86/kvm/svm/svm.c| 34 +
 arch/x86/kvm/vmx/vmx.c| 17 +--
 include/linux/kvmi_host.h |  4 ++
 include/uapi/linux/kvmi.h |  1 +
 .../testing/selftests/kvm/x86_64/kvmi_test.c  | 46 ++
 virt/kvm/introspection/kvmi.c | 23 -
 virt/kvm/introspection/kvmi_int.h |  4 ++
 virt/kvm/introspection/kvmi_msg.c | 17 +++
 11 files changed, 235 insertions(+), 5 deletions(-)

diff --git a/Documentation/virt/kvm/kvmi.rst b/Documentation/virt/kvm/kvmi.rst
index d062f2ccf365..110a6e7a7d2a 100644
--- a/Documentation/virt/kvm/kvmi.rst
+++ b/Documentation/virt/kvm/kvmi.rst
@@ -549,6 +549,7 @@ command) before returning to guest.
 Enables/disables vCPU introspection events. This command can be used with
 the following events::
 
+   KVMI_EVENT_BREAKPOINT
KVMI_EVENT_HYPERCALL
 
 When an event is enabled, the introspection tool is notified and
@@ -570,6 +571,9 @@ the *KVMI_VM_CONTROL_EVENTS* command.
 * -KVM_EINVAL - the event ID is unknown (use *KVMI_VM_CHECK_EVENT* first)
 * -KVM_EPERM - the access is disallowed (use *KVMI_VM_CHECK_EVENT* first)
 * -KVM_EAGAIN - the selected vCPU can't be introspected yet
+* -KVM_EBUSY - the event can't be intercepted right now
+   (e.g. KVMI_EVENT_BREAKPOINT if the #BP event is already 
intercepted
+by userspace)
 
 11. KVMI_VCPU_GET_REGISTERS
 ---
@@ -820,3 +824,39 @@ It is used by the code residing inside the introspected 
guest to call the
 introspection tool and to report certain details about its operation. For
 example, a classic antimalware remediation tool can report what it has
 found during a scan.
+
+4. KVMI_EVENT_BREAKPOINT
+
+
+:Architectures: x86
+:Versions: >= 1
+:Actions: CONTINUE, CRASH, RETRY
+:Parameters:
+
+::
+
+   struct kvmi_event;
+   struct kvmi_event_breakpoint {
+   __u64 gpa;
+   __u8 insn_len;
+   __u8 padding[7];
+   };
+
+:Returns:
+
+::
+
+   struct kvmi_vcpu_hdr;
+   struct kvmi_event_reply;
+
+This event is sent when a breakpoint was reached and the introspection has
+been enabled for this event (see *KVMI_VCPU_CONTROL_EVENTS*).
+
+Some of these breakpoints could have been injected by the introspection tool,
+placed in the slack space of various functions and used as notification
+for when the OS or an application has reached a certain state or is
+trying to perform a certain operation (like creating a process).
+
+``kvmi_event`` and the guest physical address are sent to the introspection 
tool.
+
+The *RETRY* action is used by the introspection tool for its own breakpoints.
diff --git a/arch/x86/include/uapi/asm/kvmi.h b/arch/x86/include/uapi/asm/kvmi.h
index 9882e68cab75..1605777256a3 100644
--- a/arch/x86/include/uapi/asm/kvmi.h
+++ b/arch/x86/include/uapi/asm/kvmi.h
@@ -59,4 +59,10 @@ struct kvmi_vcpu_get_cpuid_reply {
__u32 edx;
 };
 
+struct kvmi_event_breakpoint {
+   __u64 gpa;
+   __u8 insn_len;
+   __u8 padding[7];
+};
+
 #endif /* _UAPI_ASM_X86_KVMI_H */
diff --git a/arch/x86/kvm/kvmi.c b/arch/x86/kvm/kvmi.c
index 45f1a45d5c0f..f13272350bc9 100644
--- a/arch/x86/kvm/kvmi.c
+++ b/arch/x86/kvm/kvmi.c
@@ -242,3 +242,51 @@ void kvmi_arch_hypercall_event(struct kvm_vcpu *vcpu)
kvmi_handle_common_event_actions(vcpu->kvm, action);
}
 }
+
+static int kvmi_control_bp_intercept(struct kvm_vcpu *vcpu, bool enable)
+{
+   struct kvm_guest_debug dbg = {};
+   int err = 0;
+
+   if (enable)
+   dbg.control = KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_USE_SW_BP;
+   err = kvm_arch_vcpu_set_guest_debug(vcpu, );
+
+   return err;
+}
+
+int kvmi_arch_cmd_control_intercept(struct kvm_vcpu *vcpu,
+   unsigned int event_id, bool enable)
+{
+   int err = 0;
+
+   switch (event_id) {
+   case KVMI_EVENT_BREAKPOINT:
+   err = kvmi_control_bp_intercept(vcpu, enable);
+   break;
+   default:
+   break;
+   }
+
+   return err;
+}
+
+void kvmi_arch_breakpoint_event(struct kvm_vcpu *vcpu, u64 gva, u8 insn_len)
+{
+   u32 action;
+   u64 gpa;
+
+   gpa = kvm_mmu_gva_to_gpa_

[PATCH v9 12/84] KVM: x86: add .desc_ctrl_supported()

2020-07-21 Thread Adalbert Lazăr
When the introspection tool tries to enable the KVMI_EVENT_DESCRIPTOR
event, this function is used to check if it is supported.

Signed-off-by: Adalbert Lazăr 
---
 arch/x86/include/asm/kvm_host.h | 1 +
 arch/x86/kvm/svm/svm.c  | 6 ++
 arch/x86/kvm/vmx/capabilities.h | 7 ++-
 arch/x86/kvm/vmx/vmx.c  | 1 +
 4 files changed, 14 insertions(+), 1 deletion(-)

diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index ac45aacc9fc0..b3ca64a70bb5 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -1123,6 +1123,7 @@ struct kvm_x86_ops {
void (*set_idt)(struct kvm_vcpu *vcpu, struct desc_ptr *dt);
void (*get_gdt)(struct kvm_vcpu *vcpu, struct desc_ptr *dt);
void (*set_gdt)(struct kvm_vcpu *vcpu, struct desc_ptr *dt);
+   bool (*desc_ctrl_supported)(void);
void (*sync_dirty_debug_regs)(struct kvm_vcpu *vcpu);
void (*set_dr7)(struct kvm_vcpu *vcpu, unsigned long value);
void (*cache_reg)(struct kvm_vcpu *vcpu, enum kvm_reg reg);
diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c
index 7a4ec6fbffb9..f4d882ca0060 100644
--- a/arch/x86/kvm/svm/svm.c
+++ b/arch/x86/kvm/svm/svm.c
@@ -1523,6 +1523,11 @@ static void svm_set_gdt(struct kvm_vcpu *vcpu, struct 
desc_ptr *dt)
mark_dirty(svm->vmcb, VMCB_DT);
 }
 
+static bool svm_desc_ctrl_supported(void)
+{
+   return true;
+}
+
 static void update_cr0_intercept(struct vcpu_svm *svm)
 {
ulong gcr0 = svm->vcpu.arch.cr0;
@@ -4035,6 +4040,7 @@ static struct kvm_x86_ops svm_x86_ops __initdata = {
.set_idt = svm_set_idt,
.get_gdt = svm_get_gdt,
.set_gdt = svm_set_gdt,
+   .desc_ctrl_supported = svm_desc_ctrl_supported,
.set_dr7 = svm_set_dr7,
.sync_dirty_debug_regs = svm_sync_dirty_debug_regs,
.cache_reg = svm_cache_reg,
diff --git a/arch/x86/kvm/vmx/capabilities.h b/arch/x86/kvm/vmx/capabilities.h
index 4bbd8b448d22..e7d7fcb7e17f 100644
--- a/arch/x86/kvm/vmx/capabilities.h
+++ b/arch/x86/kvm/vmx/capabilities.h
@@ -142,12 +142,17 @@ static inline bool cpu_has_vmx_ept(void)
SECONDARY_EXEC_ENABLE_EPT;
 }
 
-static inline bool vmx_umip_emulated(void)
+static inline bool vmx_desc_ctrl_supported(void)
 {
return vmcs_config.cpu_based_2nd_exec_ctrl &
SECONDARY_EXEC_DESC;
 }
 
+static inline bool vmx_umip_emulated(void)
+{
+   return vmx_desc_ctrl_supported();
+}
+
 static inline bool cpu_has_vmx_rdtscp(void)
 {
return vmcs_config.cpu_based_2nd_exec_ctrl &
diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
index 61eb64cf25c7..ecd4c50bf1a2 100644
--- a/arch/x86/kvm/vmx/vmx.c
+++ b/arch/x86/kvm/vmx/vmx.c
@@ -7903,6 +7903,7 @@ static struct kvm_x86_ops vmx_x86_ops __initdata = {
.set_idt = vmx_set_idt,
.get_gdt = vmx_get_gdt,
.set_gdt = vmx_set_gdt,
+   .desc_ctrl_supported = vmx_desc_ctrl_supported,
.set_dr7 = vmx_set_dr7,
.sync_dirty_debug_regs = vmx_sync_dirty_debug_regs,
.cache_reg = vmx_cache_reg,
___
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization

[PATCH v9 80/84] KVM: introspection: add KVMI_VCPU_CONTROL_SINGLESTEP

2020-07-21 Thread Adalbert Lazăr
From: Nicușor Cîțu 

The next commit that adds the KVMI_EVENT_SINGLESTEP event will make this
command more useful.

Signed-off-by: Nicușor Cîțu 
Co-developed-by: Adalbert Lazăr 
Signed-off-by: Adalbert Lazăr 
---
 Documentation/virt/kvm/kvmi.rst   | 32 ++
 arch/x86/kvm/kvmi.c   | 18 ++
 arch/x86/kvm/x86.c| 12 +++-
 include/linux/kvmi_host.h |  7 +++
 include/uapi/linux/kvmi.h |  7 +++
 .../testing/selftests/kvm/x86_64/kvmi_test.c  | 46 ++
 virt/kvm/introspection/kvmi.c | 26 +++-
 virt/kvm/introspection/kvmi_int.h |  2 +
 virt/kvm/introspection/kvmi_msg.c | 60 +++
 9 files changed, 193 insertions(+), 17 deletions(-)

diff --git a/Documentation/virt/kvm/kvmi.rst b/Documentation/virt/kvm/kvmi.rst
index 47387f297029..0a07ef101302 100644
--- a/Documentation/virt/kvm/kvmi.rst
+++ b/Documentation/virt/kvm/kvmi.rst
@@ -1049,6 +1049,38 @@ In order to 'forget' an address, all three bits ('rwx') 
must be set.
 * -KVM_EAGAIN - the selected vCPU can't be introspected yet
 * -KVM_ENOMEM - there is not enough memory to add the page tracking structures
 
+24. KVMI_VCPU_CONTROL_SINGLESTEP
+
+
+:Architectures: x86 (vmx)
+:Versions: >= 1
+:Parameters:
+
+::
+
+   struct kvmi_vcpu_hdr;
+   struct kvmi_vcpu_control_singlestep {
+   __u8 enable;
+   __u8 padding[7];
+   };
+
+:Returns:
+
+::
+
+   struct kvmi_error_code;
+
+Enables/disables singlestep for the selected vCPU.
+
+The introspection tool should use *KVMI_GET_VERSION*, to check
+if the hardware supports singlestep (see **KVMI_GET_VERSION**).
+
+:Errors:
+
+* -KVM_EOPNOTSUPP - the hardware doesn't support singlestep
+* -KVM_EINVAL - the padding is not zero
+* -KVM_EAGAIN - the selected vCPU can't be introspected yet
+
 Events
 ==
 
diff --git a/arch/x86/kvm/kvmi.c b/arch/x86/kvm/kvmi.c
index 672a113b3bf4..18713004152d 100644
--- a/arch/x86/kvm/kvmi.c
+++ b/arch/x86/kvm/kvmi.c
@@ -1355,3 +1355,21 @@ void kvmi_arch_features(struct kvmi_features *feat)
 {
feat->singlestep = !!kvm_x86_ops.control_singlestep;
 }
+
+bool kvmi_arch_start_singlestep(struct kvm_vcpu *vcpu)
+{
+   if (!kvm_x86_ops.control_singlestep)
+   return false;
+
+   kvm_x86_ops.control_singlestep(vcpu, true);
+   return true;
+}
+
+bool kvmi_arch_stop_singlestep(struct kvm_vcpu *vcpu)
+{
+   if (!kvm_x86_ops.control_singlestep)
+   return false;
+
+   kvm_x86_ops.control_singlestep(vcpu, false);
+   return true;
+}
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 0add0b0b8f2d..02b74a57ca01 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -8515,9 +8515,15 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
goto out;
}
 
-   inject_pending_event(vcpu, _immediate_exit);
-   if (req_int_win)
-   kvm_x86_ops.enable_irq_window(vcpu);
+   if (!kvmi_vcpu_running_singlestep(vcpu)) {
+   /*
+* We cannot inject events during single-stepping.
+* Try again later.
+*/
+   inject_pending_event(vcpu, _immediate_exit);
+   if (req_int_win)
+   kvm_x86_ops.enable_irq_window(vcpu);
+   }
 
if (kvm_lapic_enabled(vcpu)) {
update_cr8_intercept(vcpu);
diff --git a/include/linux/kvmi_host.h b/include/linux/kvmi_host.h
index 11eb9b1c3c5e..a641768027cc 100644
--- a/include/linux/kvmi_host.h
+++ b/include/linux/kvmi_host.h
@@ -45,6 +45,10 @@ struct kvm_vcpu_introspection {
bool pending;
bool send_event;
} exception;
+
+   struct {
+   bool loop;
+   } singlestep;
 };
 
 struct kvm_introspection {
@@ -89,6 +93,7 @@ void kvmi_handle_requests(struct kvm_vcpu *vcpu);
 bool kvmi_hypercall_event(struct kvm_vcpu *vcpu);
 bool kvmi_breakpoint_event(struct kvm_vcpu *vcpu, u64 gva, u8 insn_len);
 bool kvmi_enter_guest(struct kvm_vcpu *vcpu);
+bool kvmi_vcpu_running_singlestep(struct kvm_vcpu *vcpu);
 
 #else
 
@@ -105,6 +110,8 @@ static inline bool kvmi_breakpoint_event(struct kvm_vcpu 
*vcpu, u64 gva,
{ return true; }
 static inline bool kvmi_enter_guest(struct kvm_vcpu *vcpu)
{ return true; }
+static inline bool kvmi_vcpu_running_singlestep(struct kvm_vcpu *vcpu)
+   { return false; }
 
 #endif /* CONFIG_KVM_INTROSPECTION */
 
diff --git a/include/uapi/linux/kvmi.h b/include/uapi/linux/kvmi.h
index a84affbafa67..bc515237612a 100644
--- a/include/uapi/linux/kvmi.h
+++ b/include/uapi/linux/kvmi.h
@@ -47,6 +47,8 @@ enum {
 
KVMI_VM_SET_PAGE_ACCES

[PATCH v9 28/84] KVM: x86: extend kvm_mmu_gva_to_gpa_system() with the 'access' parameter

2020-07-21 Thread Adalbert Lazăr
From: Mihai Donțu 

This is needed for kvmi_update_ad_flags() to emulate a guest page
table walk on SPT violations due to A/D bit updates.

Signed-off-by: Mihai Donțu 
Signed-off-by: Adalbert Lazăr 
---
 arch/x86/include/asm/kvm_host.h | 2 +-
 arch/x86/kvm/x86.c  | 6 +++---
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 487d1fa6e76d..e92a12647f4d 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -1556,7 +1556,7 @@ gpa_t kvm_mmu_gva_to_gpa_fetch(struct kvm_vcpu *vcpu, 
gva_t gva,
 gpa_t kvm_mmu_gva_to_gpa_write(struct kvm_vcpu *vcpu, gva_t gva,
   struct x86_exception *exception);
 gpa_t kvm_mmu_gva_to_gpa_system(struct kvm_vcpu *vcpu, gva_t gva,
-   struct x86_exception *exception);
+   u32 access, struct x86_exception *exception);
 
 bool kvm_apicv_activated(struct kvm *kvm);
 void kvm_apicv_init(struct kvm *kvm, bool enable);
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 5611b6cd6d19..0bfa800d0ca8 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -5498,9 +5498,9 @@ gpa_t kvm_mmu_gva_to_gpa_write(struct kvm_vcpu *vcpu, 
gva_t gva,
 
 /* uses this to access any guest's mapped memory without checking CPL */
 gpa_t kvm_mmu_gva_to_gpa_system(struct kvm_vcpu *vcpu, gva_t gva,
-   struct x86_exception *exception)
+   u32 access, struct x86_exception *exception)
 {
-   return vcpu->arch.walk_mmu->gva_to_gpa(vcpu, gva, 0, exception);
+   return vcpu->arch.walk_mmu->gva_to_gpa(vcpu, gva, access, exception);
 }
 
 static int kvm_read_guest_virt_helper(gva_t addr, void *val, unsigned int 
bytes,
@@ -9332,7 +9332,7 @@ int kvm_arch_vcpu_ioctl_translate(struct kvm_vcpu *vcpu,
vcpu_load(vcpu);
 
idx = srcu_read_lock(>kvm->srcu);
-   gpa = kvm_mmu_gva_to_gpa_system(vcpu, vaddr, NULL);
+   gpa = kvm_mmu_gva_to_gpa_system(vcpu, vaddr, 0, NULL);
srcu_read_unlock(>kvm->srcu, idx);
tr->physical_address = gpa;
tr->valid = gpa != UNMAPPED_GVA;
___
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization

[PATCH v9 35/84] KVM: x86: wire in the preread/prewrite/preexec page trackers

2020-07-21 Thread Adalbert Lazăr
From: Mihai Donțu 

These are needed in order to notify the introspection tool when
read/write/execute access happens on one of the tracked memory pages.

Also, this patch adds the case when the introspection tool requests
that the vCPU re-enter in guest (and abort the emulation of the current
instruction).

Signed-off-by: Mihai Donțu 
Co-developed-by: Marian Rotariu 
Signed-off-by: Marian Rotariu 
Co-developed-by: Stefan Sicleru 
Signed-off-by: Stefan Sicleru 
Signed-off-by: Adalbert Lazăr 
---
 arch/x86/kvm/emulate.c |  4 +++
 arch/x86/kvm/kvm_emulate.h |  1 +
 arch/x86/kvm/mmu/mmu.c | 58 +-
 arch/x86/kvm/x86.c | 45 +++--
 4 files changed, 85 insertions(+), 23 deletions(-)

diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c
index d0e2825ae617..15a005a3b3f5 100644
--- a/arch/x86/kvm/emulate.c
+++ b/arch/x86/kvm/emulate.c
@@ -5442,6 +5442,8 @@ int x86_decode_insn(struct x86_emulate_ctxt *ctxt, void 
*insn, int insn_len)
ctxt->memopp->addr.mem.ea + ctxt->_eip);
 
 done:
+   if (rc == X86EMUL_RETRY_INSTR)
+   return EMULATION_RETRY_INSTR;
if (rc == X86EMUL_PROPAGATE_FAULT)
ctxt->have_exception = true;
return (rc != X86EMUL_CONTINUE) ? EMULATION_FAILED : EMULATION_OK;
@@ -5813,6 +5815,8 @@ int x86_emulate_insn(struct x86_emulate_ctxt *ctxt)
if (rc == X86EMUL_INTERCEPTED)
return EMULATION_INTERCEPTED;
 
+   if (rc == X86EMUL_RETRY_INSTR)
+   return EMULATION_RETRY_INSTR;
if (rc == X86EMUL_CONTINUE)
writeback_registers(ctxt);
 
diff --git a/arch/x86/kvm/kvm_emulate.h b/arch/x86/kvm/kvm_emulate.h
index 43c93ffa76ed..5bfab8d65cd1 100644
--- a/arch/x86/kvm/kvm_emulate.h
+++ b/arch/x86/kvm/kvm_emulate.h
@@ -496,6 +496,7 @@ bool x86_page_table_writing_insn(struct x86_emulate_ctxt 
*ctxt);
 #define EMULATION_OK 0
 #define EMULATION_RESTART 1
 #define EMULATION_INTERCEPTED 2
+#define EMULATION_RETRY_INSTR 3
 void init_decode_cache(struct x86_emulate_ctxt *ctxt);
 int x86_emulate_insn(struct x86_emulate_ctxt *ctxt);
 int emulator_task_switch(struct x86_emulate_ctxt *ctxt,
diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c
index ede8ef6d1e34..da57321e0cec 100644
--- a/arch/x86/kvm/mmu/mmu.c
+++ b/arch/x86/kvm/mmu/mmu.c
@@ -1226,9 +1226,13 @@ static void account_shadowed(struct kvm *kvm, struct 
kvm_mmu_page *sp)
slot = __gfn_to_memslot(slots, gfn);
 
/* the non-leaf shadow pages are keeping readonly. */
-   if (sp->role.level > PG_LEVEL_4K)
-   return kvm_slot_page_track_add_page(kvm, slot, gfn,
-   KVM_PAGE_TRACK_WRITE);
+   if (sp->role.level > PG_LEVEL_4K) {
+   kvm_slot_page_track_add_page(kvm, slot, gfn,
+KVM_PAGE_TRACK_PREWRITE);
+   kvm_slot_page_track_add_page(kvm, slot, gfn,
+KVM_PAGE_TRACK_WRITE);
+   return;
+   }
 
kvm_mmu_gfn_disallow_lpage(slot, gfn);
 }
@@ -1254,9 +1258,13 @@ static void unaccount_shadowed(struct kvm *kvm, struct 
kvm_mmu_page *sp)
gfn = sp->gfn;
slots = kvm_memslots_for_spte_role(kvm, sp->role);
slot = __gfn_to_memslot(slots, gfn);
-   if (sp->role.level > PG_LEVEL_4K)
-   return kvm_slot_page_track_remove_page(kvm, slot, gfn,
-  KVM_PAGE_TRACK_WRITE);
+   if (sp->role.level > PG_LEVEL_4K) {
+   kvm_slot_page_track_remove_page(kvm, slot, gfn,
+   KVM_PAGE_TRACK_PREWRITE);
+   kvm_slot_page_track_remove_page(kvm, slot, gfn,
+   KVM_PAGE_TRACK_WRITE);
+   return;
+   }
 
kvm_mmu_gfn_allow_lpage(slot, gfn);
 }
@@ -2987,7 +2995,8 @@ static bool mmu_need_write_protect(struct kvm_vcpu *vcpu, 
gfn_t gfn,
 {
struct kvm_mmu_page *sp;
 
-   if (kvm_page_track_is_active(vcpu, gfn, KVM_PAGE_TRACK_WRITE))
+   if (kvm_page_track_is_active(vcpu, gfn, KVM_PAGE_TRACK_PREWRITE) ||
+   kvm_page_track_is_active(vcpu, gfn, KVM_PAGE_TRACK_WRITE))
return true;
 
for_each_gfn_indirect_valid_sp(vcpu->kvm, sp, gfn) {
@@ -3405,6 +3414,21 @@ static void disallowed_hugepage_adjust(struct 
kvm_shadow_walk_iterator it,
}
 }
 
+static unsigned int kvm_mmu_apply_introspection_access(struct kvm_vcpu *vcpu,
+   gfn_t gfn,
+   unsigned int acc)
+{
+   if (kvm_page_track_is_active(vcpu, gfn, KVM_PAGE_TRACK_PREREAD))
+   acc &= ~ACC_USER_MASK;
+   if (kvm_page_track_is_active(vcpu, gfn, KVM_PAGE_TRACK_PREWRITE) 

[PATCH v9 37/84] KVM: introduce VM introspection

2020-07-21 Thread Adalbert Lazăr
From: Mihai Donțu 

The KVM introspection subsystem provides a facility for applications
to control the execution of any running VMs (pause, resume, shutdown),
query the state of the vCPUs (GPRs, MSRs etc.), alter the page access bits
in the shadow page tables and receive notifications when events of interest
have taken place (shadow page table level faults, key MSR writes,
hypercalls etc.). Some notifications can be responded to with an action
(like preventing an MSR from being written), others are mere informative
(like breakpoint events which can be used for execution tracing).

Signed-off-by: Mihai Donțu 
Co-developed-by: Marian Rotariu 
Signed-off-by: Marian Rotariu 
Signed-off-by: Adalbert Lazăr 
---
 Documentation/virt/kvm/kvmi.rst   | 140 ++
 arch/x86/kvm/Kconfig  |  13 +++
 arch/x86/kvm/Makefile |   2 +
 include/linux/kvmi_host.h |  21 +
 virt/kvm/introspection/kvmi.c |  25 ++
 virt/kvm/introspection/kvmi_int.h |   7 ++
 virt/kvm/kvm_main.c   |  15 
 7 files changed, 223 insertions(+)
 create mode 100644 Documentation/virt/kvm/kvmi.rst
 create mode 100644 include/linux/kvmi_host.h
 create mode 100644 virt/kvm/introspection/kvmi.c
 create mode 100644 virt/kvm/introspection/kvmi_int.h

diff --git a/Documentation/virt/kvm/kvmi.rst b/Documentation/virt/kvm/kvmi.rst
new file mode 100644
index ..3a1b6c655de7
--- /dev/null
+++ b/Documentation/virt/kvm/kvmi.rst
@@ -0,0 +1,140 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+=
+KVMI - The kernel virtual machine introspection subsystem
+=
+
+The KVM introspection subsystem provides a facility for applications running
+on the host or in a separate VM, to control the execution of any running VMs
+(pause, resume, shutdown), query the state of the vCPUs (GPRs, MSRs etc.),
+alter the page access bits in the shadow page tables (only for the hardware
+backed ones, eg. Intel's EPT) and receive notifications when events of
+interest have taken place (shadow page table level faults, key MSR writes,
+hypercalls etc.). Some notifications can be responded to with an action
+(like preventing an MSR from being written), others are mere informative
+(like breakpoint events which can be used for execution tracing).
+With few exceptions, all events are optional. An application using this
+subsystem will explicitly register for them.
+
+The use case that gave way for the creation of this subsystem is to monitor
+the guest OS and as such the ABI/API is highly influenced by how the guest
+software (kernel, applications) sees the world. For example, some events
+provide information specific for the host CPU architecture
+(eg. MSR_IA32_SYSENTER_EIP) merely because its leveraged by guest software
+to implement a critical feature (fast system calls).
+
+At the moment, the target audience for KVMI are security software authors
+that wish to perform forensics on newly discovered threats (exploits) or
+to implement another layer of security like preventing a large set of
+kernel rootkits simply by "locking" the kernel image in the shadow page
+tables (ie. enforce .text r-x, .rodata rw- etc.). It's the latter case that
+made KVMI a separate subsystem, even though many of these features are
+available in the device manager (eg. QEMU). The ability to build a security
+application that does not interfere (in terms of performance) with the
+guest software asks for a specialized interface that is designed for minimum
+overhead.
+
+API/ABI
+===
+
+This chapter describes the VMI interface used to monitor and control local
+guests from a user application.
+
+Overview
+
+
+The interface is socket based, one connection for every VM. One end is in the
+host kernel while the other is held by the user application (introspection
+tool).
+
+The initial connection is established by an application running on the host
+(eg. QEMU) that connects to the introspection tool and after a handshake
+the socket is passed to the host kernel making all further communication
+take place between it and the introspection tool.
+
+The socket protocol allows for commands and events to be multiplexed over
+the same connection. As such, it is possible for the introspection tool to
+receive an event while waiting for the result of a command. Also, it can
+send a command while the host kernel is waiting for a reply to an event.
+
+The kernel side of the socket communication is blocking and will wait
+for an answer from its peer indefinitely or until the guest is powered
+off (killed), restarted or the peer goes away, at which point it will
+wake up and properly cleanup as if the introspection subsystem has never
+been used on that guest (if requested). Obviously, whether the guest can
+really continue normal execution depends on whether the introspection
+tool has made any modifications th

[PATCH v9 41/84] KVM: introspection: add KVMI_GET_VERSION

2020-07-21 Thread Adalbert Lazăr
The kernel side will accept older and newer versions of an introspection
command (having a smaller/larger message size), but it will not
accept newer versions for event replies (larger messages). Even if the
introspection tool can use the KVMI_GET_VERSION command to check the
supported features of the introspection API, the most important usage
of this command is to avoid sending newer versions of event replies that
the kernel side doesn't know.

Any attempt from the device manager to explicitly disallow this command
through the KVM_INTROSPECTION_COMMAND ioctl will get -EPERM, unless all
commands are disallowed (using id=-1) in which case KVMI_GET_VERSION is
silently allowed, without error.

Signed-off-by: Adalbert Lazăr 
---
 Documentation/virt/kvm/kvmi.rst   | 37 +++
 include/uapi/linux/kvmi.h | 10 +
 .../testing/selftests/kvm/x86_64/kvmi_test.c  | 26 +
 virt/kvm/introspection/kvmi.c | 27 --
 virt/kvm/introspection/kvmi_msg.c | 12 ++
 5 files changed, 108 insertions(+), 4 deletions(-)

diff --git a/Documentation/virt/kvm/kvmi.rst b/Documentation/virt/kvm/kvmi.rst
index f3d16971ba2b..41fd48222bcb 100644
--- a/Documentation/virt/kvm/kvmi.rst
+++ b/Documentation/virt/kvm/kvmi.rst
@@ -224,3 +224,40 @@ device-specific memory (DMA, emulated MMIO, reserved by a 
passthrough
 device etc.). It is up to the user to determine, using the guest operating
 system data structures, the areas that are safe to access (code, stack, heap
 etc.).
+
+Commands
+
+
+The following C structures are meant to be used directly when communicating
+over the wire. The peer that detects any size mismatch should simply close
+the connection and report the error.
+
+1. KVMI_GET_VERSION
+---
+
+:Architectures: all
+:Versions: >= 1
+:Parameters: none
+:Returns:
+
+::
+
+   struct kvmi_error_code;
+   struct kvmi_get_version_reply {
+   __u32 version;
+   __u32 padding;
+   };
+
+Returns the introspection API version.
+
+This command is always allowed and successful.
+
+The messages used for introspection commands/events might be extended
+in future versions and while the kernel will accept commands with
+shorter messages (older versions) or larger messages (newer versions,
+ignoring the extra information), it will not accept event replies with
+larger/newer messages.
+
+The introspection tool should use this command to identify the features
+supported by the kernel side and what messages must be used for event
+replies.
diff --git a/include/uapi/linux/kvmi.h b/include/uapi/linux/kvmi.h
index 9bfff484fd6f..896fcb6abf2c 100644
--- a/include/uapi/linux/kvmi.h
+++ b/include/uapi/linux/kvmi.h
@@ -6,11 +6,16 @@
  * KVMI structures and definitions
  */
 
+#include 
+#include 
+
 enum {
KVMI_VERSION = 0x0001
 };
 
 enum {
+   KVMI_GET_VERSION = 1,
+
KVMI_NUM_MESSAGES
 };
 
@@ -39,4 +44,9 @@ struct kvmi_error_code {
__u32 padding;
 };
 
+struct kvmi_get_version_reply {
+   __u32 version;
+   __u32 padding;
+};
+
 #endif /* _UAPI__LINUX_KVMI_H */
diff --git a/tools/testing/selftests/kvm/x86_64/kvmi_test.c 
b/tools/testing/selftests/kvm/x86_64/kvmi_test.c
index 9c591e0d9c8a..d15eccc330e5 100644
--- a/tools/testing/selftests/kvm/x86_64/kvmi_test.c
+++ b/tools/testing/selftests/kvm/x86_64/kvmi_test.c
@@ -98,6 +98,7 @@ static void hook_introspection(struct kvm_vm *vm)
do_hook_ioctl(vm, Kvm_socket, no_padding, 0);
do_hook_ioctl(vm, Kvm_socket, no_padding, EEXIST);
 
+   set_command_perm(vm, KVMI_GET_VERSION, disallow, EPERM);
set_command_perm(vm, all_IDs, allow_inval, EINVAL);
set_command_perm(vm, all_IDs, disallow, 0);
set_command_perm(vm, all_IDs, allow, 0);
@@ -213,12 +214,37 @@ static void test_cmd_invalid(void)
-r, kvm_strerror(-r));
 }
 
+static void test_vm_command(int cmd_id, struct kvmi_msg_hdr *req,
+   size_t req_size, void *rpl, size_t rpl_size)
+{
+   int r;
+
+   r = do_command(cmd_id, req, req_size, rpl, rpl_size);
+   TEST_ASSERT(r == 0,
+   "Command %d failed, error %d (%s)\n",
+   cmd_id, -r, kvm_strerror(-r));
+}
+
+static void test_cmd_get_version(void)
+{
+   struct kvmi_get_version_reply rpl;
+   struct kvmi_msg_hdr req;
+
+   test_vm_command(KVMI_GET_VERSION, , sizeof(req), , sizeof(rpl));
+   TEST_ASSERT(rpl.version == KVMI_VERSION,
+   "Unexpected KVMI version %d, expecting %d\n",
+   rpl.version, KVMI_VERSION);
+
+   pr_info("KVMI version: %u\n", rpl.version);
+}
+
 static void test_introspection(struct kvm_vm *vm)
 {
setup_socket();
hook_introspection(vm);
 
test_cmd_invalid();
+   test_cmd_get_version();
 
unhook_introspection(vm);
 }
diff --git a/virt/kvm/introspection/kvmi.

[PATCH v9 10/84] KVM: x86: add .control_cr3_intercept() to struct kvm_x86_ops

2020-07-21 Thread Adalbert Lazăr
This function is needed for the KVMI_VCPU_CONTROL_CR command, when the
introspection tool has to intercept the read/write access to CR3.

Co-developed-by: Nicușor Cîțu 
Signed-off-by: Nicușor Cîțu 
Signed-off-by: Adalbert Lazăr 
---
 arch/x86/include/asm/kvm_host.h |  6 ++
 arch/x86/kvm/svm/svm.c  | 14 ++
 arch/x86/kvm/vmx/vmx.c  | 26 --
 3 files changed, 40 insertions(+), 6 deletions(-)

diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 78fe3c7c814c..89c0bd6529a5 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -136,6 +136,10 @@ static inline gfn_t gfn_to_index(gfn_t gfn, gfn_t 
base_gfn, int level)
 #define KVM_NR_FIXED_MTRR_REGION 88
 #define KVM_NR_VAR_MTRR 8
 
+#define CR_TYPE_R  1
+#define CR_TYPE_W  2
+#define CR_TYPE_RW 3
+
 #define ASYNC_PF_PER_VCPU 64
 
 enum kvm_reg {
@@ -,6 +1115,8 @@ struct kvm_x86_ops {
void (*get_cs_db_l_bits)(struct kvm_vcpu *vcpu, int *db, int *l);
void (*set_cr0)(struct kvm_vcpu *vcpu, unsigned long cr0);
int (*set_cr4)(struct kvm_vcpu *vcpu, unsigned long cr4);
+   void (*control_cr3_intercept)(struct kvm_vcpu *vcpu, int type,
+ bool enable);
void (*set_efer)(struct kvm_vcpu *vcpu, u64 efer);
void (*get_idt)(struct kvm_vcpu *vcpu, struct desc_ptr *dt);
void (*set_idt)(struct kvm_vcpu *vcpu, struct desc_ptr *dt);
diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c
index 23b3cd057753..f14fc940538b 100644
--- a/arch/x86/kvm/svm/svm.c
+++ b/arch/x86/kvm/svm/svm.c
@@ -1596,6 +1596,19 @@ int svm_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4)
return 0;
 }
 
+static void svm_control_cr3_intercept(struct kvm_vcpu *vcpu, int type,
+ bool enable)
+{
+   struct vcpu_svm *svm = to_svm(vcpu);
+
+   if (type & CR_TYPE_R)
+   enable ? set_cr_intercept(svm, INTERCEPT_CR3_READ) :
+clr_cr_intercept(svm, INTERCEPT_CR3_READ);
+   if (type & CR_TYPE_W)
+   enable ? set_cr_intercept(svm, INTERCEPT_CR3_WRITE) :
+clr_cr_intercept(svm, INTERCEPT_CR3_WRITE);
+}
+
 static void svm_set_segment(struct kvm_vcpu *vcpu,
struct kvm_segment *var, int seg)
 {
@@ -4008,6 +4021,7 @@ static struct kvm_x86_ops svm_x86_ops __initdata = {
.get_cs_db_l_bits = kvm_get_cs_db_l_bits,
.set_cr0 = svm_set_cr0,
.set_cr4 = svm_set_cr4,
+   .control_cr3_intercept = svm_control_cr3_intercept,
.set_efer = svm_set_efer,
.get_idt = svm_get_idt,
.set_idt = svm_set_idt,
diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
index 069593f2f504..6b9639703560 100644
--- a/arch/x86/kvm/vmx/vmx.c
+++ b/arch/x86/kvm/vmx/vmx.c
@@ -3003,24 +3003,37 @@ void ept_save_pdptrs(struct kvm_vcpu *vcpu)
kvm_register_mark_dirty(vcpu, VCPU_EXREG_PDPTR);
 }
 
+static void vmx_control_cr3_intercept(struct kvm_vcpu *vcpu, int type,
+ bool enable)
+{
+   struct vcpu_vmx *vmx = to_vmx(vcpu);
+   u32 cr3_exec_control = 0;
+
+   if (type & CR_TYPE_R)
+   cr3_exec_control |= CPU_BASED_CR3_STORE_EXITING;
+   if (type & CR_TYPE_W)
+   cr3_exec_control |= CPU_BASED_CR3_LOAD_EXITING;
+
+   if (enable)
+   exec_controls_setbit(vmx, cr3_exec_control);
+   else
+   exec_controls_clearbit(vmx, cr3_exec_control);
+}
+
 static void ept_update_paging_mode_cr0(unsigned long *hw_cr0,
unsigned long cr0,
struct kvm_vcpu *vcpu)
 {
-   struct vcpu_vmx *vmx = to_vmx(vcpu);
-
if (!kvm_register_is_available(vcpu, VCPU_EXREG_CR3))
vmx_cache_reg(vcpu, VCPU_EXREG_CR3);
if (!(cr0 & X86_CR0_PG)) {
/* From paging/starting to nonpaging */
-   exec_controls_setbit(vmx, CPU_BASED_CR3_LOAD_EXITING |
- CPU_BASED_CR3_STORE_EXITING);
+   vmx_control_cr3_intercept(vcpu, CR_TYPE_RW, true);
vcpu->arch.cr0 = cr0;
vmx_set_cr4(vcpu, kvm_read_cr4(vcpu));
} else if (!is_paging(vcpu)) {
/* From nonpaging to paging */
-   exec_controls_clearbit(vmx, CPU_BASED_CR3_LOAD_EXITING |
-   CPU_BASED_CR3_STORE_EXITING);
+   vmx_control_cr3_intercept(vcpu, CR_TYPE_RW, false);
vcpu->arch.cr0 = cr0;
vmx_set_cr4(vcpu, kvm_read_cr4(vcpu));
}
@@ -7876,6 +7889,7 @@ static struct kvm_x86_ops vmx_x86_ops __initdata = {
.get_cs_db_l_bits = vmx_get_cs_db_l_bits,
.set_cr0 = vmx_set_cr0,
.set_cr4 = vmx_set_cr4,
+   .control_cr3_intercept =

[PATCH v9 19/84] KVM: vmx: pass struct kvm_vcpu to the intercept msr related functions

2020-07-21 Thread Adalbert Lazăr
From: Nicușor Cîțu 

This is preparatory patch to mediate the MSR interception between
the introspection tool and the device manager (one must not disable
the interception if the other one has enabled the interception).

Passing NULL during initialization is OK because a vCPU can be
introspected only after initialization.

Signed-off-by: Nicușor Cîțu 
Signed-off-by: Adalbert Lazăr 
---
 arch/x86/kvm/vmx/vmx.c | 74 --
 1 file changed, 42 insertions(+), 32 deletions(-)

diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
index cf07db129670..ecf7fb21b812 100644
--- a/arch/x86/kvm/vmx/vmx.c
+++ b/arch/x86/kvm/vmx/vmx.c
@@ -342,7 +342,8 @@ module_param_cb(vmentry_l1d_flush, _l1d_flush_ops, 
NULL, 0644);
 
 static bool guest_state_valid(struct kvm_vcpu *vcpu);
 static u32 vmx_segment_access_rights(struct kvm_segment *var);
-static __always_inline void vmx_disable_intercept_for_msr(unsigned long 
*msr_bitmap,
+static __always_inline void vmx_disable_intercept_for_msr(struct kvm_vcpu 
*vcpu,
+ unsigned long 
*msr_bitmap,
  u32 msr, int type);
 
 void vmx_vmexit(void);
@@ -2086,7 +2087,7 @@ static int vmx_set_msr(struct kvm_vcpu *vcpu, struct 
msr_data *msr_info)
 * in the merging. We update the vmcs01 here for L1 as well
 * since it will end up touching the MSR anyway now.
 */
-   vmx_disable_intercept_for_msr(vmx->vmcs01.msr_bitmap,
+   vmx_disable_intercept_for_msr(vcpu, vmx->vmcs01.msr_bitmap,
  MSR_IA32_SPEC_CTRL,
  MSR_TYPE_RW);
break;
@@ -2122,8 +2123,8 @@ static int vmx_set_msr(struct kvm_vcpu *vcpu, struct 
msr_data *msr_info)
 * vmcs02.msr_bitmap here since it gets completely overwritten
 * in the merging.
 */
-   vmx_disable_intercept_for_msr(vmx->vmcs01.msr_bitmap, 
MSR_IA32_PRED_CMD,
- MSR_TYPE_W);
+   vmx_disable_intercept_for_msr(vcpu, vmx->vmcs01.msr_bitmap,
+ MSR_IA32_PRED_CMD, MSR_TYPE_W);
break;
case MSR_IA32_CR_PAT:
if (!kvm_pat_valid(data))
@@ -3733,7 +3734,8 @@ void free_vpid(int vpid)
spin_unlock(_vpid_lock);
 }
 
-static __always_inline void vmx_disable_intercept_for_msr(unsigned long 
*msr_bitmap,
+static __always_inline void vmx_disable_intercept_for_msr(struct kvm_vcpu 
*vcpu,
+ unsigned long 
*msr_bitmap,
  u32 msr, int type)
 {
int f = sizeof(unsigned long);
@@ -3771,7 +3773,8 @@ static __always_inline void 
vmx_disable_intercept_for_msr(unsigned long *msr_bit
}
 }
 
-static __always_inline void vmx_enable_intercept_for_msr(unsigned long 
*msr_bitmap,
+static __always_inline void vmx_enable_intercept_for_msr(struct kvm_vcpu *vcpu,
+unsigned long 
*msr_bitmap,
 u32 msr, int type)
 {
int f = sizeof(unsigned long);
@@ -3809,13 +3812,14 @@ static __always_inline void 
vmx_enable_intercept_for_msr(unsigned long *msr_bitm
}
 }
 
-static __always_inline void vmx_set_intercept_for_msr(unsigned long 
*msr_bitmap,
+static __always_inline void vmx_set_intercept_for_msr(struct kvm_vcpu *vcpu,
+ unsigned long *msr_bitmap,
  u32 msr, int type, bool 
value)
 {
if (value)
-   vmx_enable_intercept_for_msr(msr_bitmap, msr, type);
+   vmx_enable_intercept_for_msr(vcpu, msr_bitmap, msr, type);
else
-   vmx_disable_intercept_for_msr(msr_bitmap, msr, type);
+   vmx_disable_intercept_for_msr(vcpu, msr_bitmap, msr, type);
 }
 
 static u8 vmx_msr_bitmap_mode(struct kvm_vcpu *vcpu)
@@ -3833,7 +3837,8 @@ static u8 vmx_msr_bitmap_mode(struct kvm_vcpu *vcpu)
return mode;
 }
 
-static void vmx_update_msr_bitmap_x2apic(unsigned long *msr_bitmap,
+static void vmx_update_msr_bitmap_x2apic(struct kvm_vcpu *vcpu,
+unsigned long *msr_bitmap,
 u8 mode)
 {
int msr;
@@ -3849,11 +3854,11 @@ static void vmx_update_msr_bitmap_x2apic(unsigned long 
*msr_bitmap,
 * TPR reads and writes can be virtualized even if virtual 
interrupt
 * delivery is not in use.
 */
-   vmx_disable_intercept_for_msr(msr_bitmap, 
X2APIC_MSR(APIC_TASKPRI), MSR_TYPE_RW);
+   vmx_disable_intercept_for_msr(vcpu, msr_bitmap,

[PATCH v9 78/84] KVM: introspection: add KVMI_EVENT_PF

2020-07-21 Thread Adalbert Lazăr
From: Mihai Donțu 

This event is sent when a #PF occurs due to a failed permission check
in the shadow page tables, for a page in which the introspection tool
has shown interest.

Signed-off-by: Mihai Donțu 
Co-developed-by: Adalbert Lazăr 
Signed-off-by: Adalbert Lazăr 
---
 Documentation/virt/kvm/kvmi.rst   |  51 +++
 arch/x86/include/asm/kvmi_host.h  |   1 +
 arch/x86/kvm/kvmi.c   | 141 ++
 include/uapi/linux/kvmi.h |  10 ++
 .../testing/selftests/kvm/x86_64/kvmi_test.c  |  76 ++
 virt/kvm/introspection/kvmi.c | 115 ++
 virt/kvm/introspection/kvmi_int.h |   9 ++
 virt/kvm/introspection/kvmi_msg.c |  18 +++
 8 files changed, 421 insertions(+)

diff --git a/Documentation/virt/kvm/kvmi.rst b/Documentation/virt/kvm/kvmi.rst
index 123b2360d2e0..b2e2a9edda77 100644
--- a/Documentation/virt/kvm/kvmi.rst
+++ b/Documentation/virt/kvm/kvmi.rst
@@ -554,6 +554,7 @@ the following events::
KVMI_EVENT_DESCRIPTOR
KVMI_EVENT_HYPERCALL
KVMI_EVENT_MSR
+   KVMI_EVENT_PF
KVMI_EVENT_XSETBV
 
 When an event is enabled, the introspection tool is notified and
@@ -1387,3 +1388,53 @@ register (see **KVMI_VCPU_CONTROL_EVENTS**).
 
 ``kvmi_event``, the MSR number, the old value and the new value are
 sent to the introspection tool. The *CONTINUE* action will set the ``new_val``.
+
+10. KVMI_EVENT_PF
+-
+
+:Architectures: x86
+:Versions: >= 1
+:Actions: CONTINUE, CRASH, RETRY
+:Parameters:
+
+::
+
+   struct kvmi_event;
+   struct kvmi_event_pf {
+   __u64 gva;
+   __u64 gpa;
+   __u8 access;
+   __u8 padding1;
+   __u16 padding2;
+   __u32 padding3;
+   };
+
+:Returns:
+
+::
+
+   struct kvmi_vcpu_hdr;
+   struct kvmi_event_reply;
+
+This event is sent when a hypervisor page fault occurs due to a failed
+permission check in the shadow page tables, the introspection has been
+enabled for this event (see *KVMI_VCPU_CONTROL_EVENTS*) and the event was
+generated for a page in which the introspection tool has shown interest
+(ie. has previously touched it by adjusting the spte permissions).
+
+The shadow page tables can be used by the introspection tool to guarantee
+the purpose of code areas inside the guest (code, rodata, stack, heap
+etc.) Each attempt at an operation unfitting for a certain memory
+range (eg. execute code in heap) triggers a page fault and gives the
+introspection tool the chance to audit the code attempting the operation.
+
+``kvmi_event``, guest virtual address (or 0x/UNMAPPED_GVA),
+guest physical address and the access flags (eg. KVMI_PAGE_ACCESS_R)
+are sent to the introspection tool.
+
+The *CONTINUE* action will continue the page fault handling (e.g. via
+emulation).
+
+The *RETRY* action is used by the introspection tool to retry the
+execution of the current instruction, usually because it changed the
+instruction pointer or the page restrictions.
diff --git a/arch/x86/include/asm/kvmi_host.h b/arch/x86/include/asm/kvmi_host.h
index 98ea548c0b15..25c7bb8a9082 100644
--- a/arch/x86/include/asm/kvmi_host.h
+++ b/arch/x86/include/asm/kvmi_host.h
@@ -41,6 +41,7 @@ struct kvm_vcpu_arch_introspection {
 };
 
 struct kvm_arch_introspection {
+   struct kvm_page_track_notifier_node kptn_node;
 };
 
 #define SLOTS_SIZE BITS_TO_LONGS(KVM_MEM_SLOTS_NUM)
diff --git a/arch/x86/kvm/kvmi.c b/arch/x86/kvm/kvmi.c
index b233a3c5becb..8fbf1720749b 100644
--- a/arch/x86/kvm/kvmi.c
+++ b/arch/x86/kvm/kvmi.c
@@ -10,6 +10,21 @@
 #include "cpuid.h"
 #include "../../../virt/kvm/introspection/kvmi_int.h"
 
+static bool kvmi_track_preread(struct kvm_vcpu *vcpu, gpa_t gpa, gva_t gva,
+  int bytes,
+  struct kvm_page_track_notifier_node *node);
+static bool kvmi_track_prewrite(struct kvm_vcpu *vcpu, gpa_t gpa, gva_t gva,
+   const u8 *new, int bytes,
+   struct kvm_page_track_notifier_node *node);
+static bool kvmi_track_preexec(struct kvm_vcpu *vcpu, gpa_t gpa, gva_t gva,
+  struct kvm_page_track_notifier_node *node);
+static void kvmi_track_create_slot(struct kvm *kvm,
+  struct kvm_memory_slot *slot,
+  unsigned long npages,
+  struct kvm_page_track_notifier_node *node);
+static void kvmi_track_flush_slot(struct kvm *kvm, struct kvm_memory_slot 
*slot,
+ struct kvm_page_track_notifier_node *node);
+
 static unsigned int kvmi_vcpu_mode(const struct kvm_vcpu *vcpu,
   const struct kvm_sregs *sregs)
 {
@@ -1209,3 +1224,129 @@ void kvmi_arch_update_page_tracking(struct kvm *kvm,
}
}
 }
+
+void kv

[PATCH v9 58/84] KVM: introspection: add KVMI_VCPU_GET_CPUID

2020-07-21 Thread Adalbert Lazăr
From: Marian Rotariu 

This command returns a CPUID leaf (as seen by the guest OS).

Signed-off-by: Marian Rotariu 
Co-developed-by: Adalbert Lazăr 
Signed-off-by: Adalbert Lazăr 
---
 Documentation/virt/kvm/kvmi.rst   | 36 +++
 arch/x86/include/uapi/asm/kvmi.h  | 12 +++
 arch/x86/kvm/kvmi.c   | 19 ++
 include/uapi/linux/kvmi.h |  1 +
 .../testing/selftests/kvm/x86_64/kvmi_test.c  | 34 ++
 virt/kvm/introspection/kvmi_int.h |  3 ++
 virt/kvm/introspection/kvmi_msg.c | 15 
 7 files changed, 120 insertions(+)

diff --git a/Documentation/virt/kvm/kvmi.rst b/Documentation/virt/kvm/kvmi.rst
index bd35002c3254..fc2e8c756191 100644
--- a/Documentation/virt/kvm/kvmi.rst
+++ b/Documentation/virt/kvm/kvmi.rst
@@ -641,6 +641,42 @@ currently being handled is replied to.
 * -KVM_EAGAIN - the selected vCPU can't be introspected yet
 * -KVM_EOPNOTSUPP - the command hasn't been received during an introspection 
event
 
+13. KVMI_VCPU_GET_CPUID
+---
+
+:Architectures: x86
+:Versions: >= 1
+:Parameters:
+
+::
+
+   struct kvmi_vcpu_hdr;
+   struct kvmi_vcpu_get_cpuid {
+   __u32 function;
+   __u32 index;
+   };
+
+:Returns:
+
+::
+
+   struct kvmi_error_code;
+   struct kvmi_vcpu_get_cpuid_reply {
+   __u32 eax;
+   __u32 ebx;
+   __u32 ecx;
+   __u32 edx;
+   };
+
+Returns a CPUID leaf (as seen by the guest OS).
+
+:Errors:
+
+* -KVM_EINVAL - the selected vCPU is invalid
+* -KVM_EINVAL - the padding is not zero
+* -KVM_EAGAIN - the selected vCPU can't be introspected yet
+* -KVM_ENOENT - the selected leaf is not present or is invalid
+
 Events
 ==
 
diff --git a/arch/x86/include/uapi/asm/kvmi.h b/arch/x86/include/uapi/asm/kvmi.h
index f14674c3c109..57c48ace417f 100644
--- a/arch/x86/include/uapi/asm/kvmi.h
+++ b/arch/x86/include/uapi/asm/kvmi.h
@@ -45,4 +45,16 @@ struct kvmi_vcpu_get_registers_reply {
struct kvm_msrs msrs;
 };
 
+struct kvmi_vcpu_get_cpuid {
+   __u32 function;
+   __u32 index;
+};
+
+struct kvmi_vcpu_get_cpuid_reply {
+   __u32 eax;
+   __u32 ebx;
+   __u32 ecx;
+   __u32 edx;
+};
+
 #endif /* _UAPI_ASM_X86_KVMI_H */
diff --git a/arch/x86/kvm/kvmi.c b/arch/x86/kvm/kvmi.c
index 4fd7a3c17ef5..53c4a37e10c6 100644
--- a/arch/x86/kvm/kvmi.c
+++ b/arch/x86/kvm/kvmi.c
@@ -7,6 +7,7 @@
 
 #include "linux/kvm_host.h"
 #include "x86.h"
+#include "cpuid.h"
 #include "../../../virt/kvm/introspection/kvmi_int.h"
 
 static unsigned int kvmi_vcpu_mode(const struct kvm_vcpu *vcpu,
@@ -191,3 +192,21 @@ int kvmi_arch_cmd_vcpu_get_registers(struct kvm_vcpu *vcpu,
 
return err;
 }
+
+int kvmi_arch_cmd_vcpu_get_cpuid(struct kvm_vcpu *vcpu,
+const struct kvmi_vcpu_get_cpuid *req,
+struct kvmi_vcpu_get_cpuid_reply *rpl)
+{
+   struct kvm_cpuid_entry2 *e;
+
+   e = kvm_find_cpuid_entry(vcpu, req->function, req->index);
+   if (!e)
+   return -KVM_ENOENT;
+
+   rpl->eax = e->eax;
+   rpl->ebx = e->ebx;
+   rpl->ecx = e->ecx;
+   rpl->edx = e->edx;
+
+   return 0;
+}
diff --git a/include/uapi/linux/kvmi.h b/include/uapi/linux/kvmi.h
index 5f637a21a907..d7f4360e609e 100644
--- a/include/uapi/linux/kvmi.h
+++ b/include/uapi/linux/kvmi.h
@@ -30,6 +30,7 @@ enum {
KVMI_VCPU_CONTROL_EVENTS = 10,
KVMI_VCPU_GET_REGISTERS  = 11,
KVMI_VCPU_SET_REGISTERS  = 12,
+   KVMI_VCPU_GET_CPUID  = 13,
 
KVMI_NUM_MESSAGES
 };
diff --git a/tools/testing/selftests/kvm/x86_64/kvmi_test.c 
b/tools/testing/selftests/kvm/x86_64/kvmi_test.c
index ffd0337d0567..7269afd4c36d 100644
--- a/tools/testing/selftests/kvm/x86_64/kvmi_test.c
+++ b/tools/testing/selftests/kvm/x86_64/kvmi_test.c
@@ -1033,6 +1033,39 @@ static void test_cmd_vcpu_set_registers(struct kvm_vm 
*vm)
stop_vcpu_worker(vcpu_thread, );
 }
 
+static int cmd_vcpu_get_cpuid(struct kvm_vm *vm,
+ __u32 function, __u32 index,
+ struct kvmi_vcpu_get_cpuid_reply *rpl)
+{
+   struct {
+   struct kvmi_msg_hdr hdr;
+   struct kvmi_vcpu_hdr vcpu_hdr;
+   struct kvmi_vcpu_get_cpuid cmd;
+   } req = {};
+
+   req.cmd.function = function;
+   req.cmd.index = index;
+
+   return do_vcpu0_command(vm, KVMI_VCPU_GET_CPUID, , sizeof(req),
+   rpl, sizeof(*rpl));
+}
+
+static void test_cmd_vcpu_get_cpuid(struct kvm_vm *vm)
+{
+   struct kvmi_vcpu_get_cpuid_reply rpl = {};
+   __u32 function = 0;
+   __u32 index = 0;
+   int r;
+
+   r = cmd_vcpu_get_cpuid(vm, function, index, );
+   TEST_ASSERT(r == 0,
+   "KV

[PATCH v9 42/84] KVM: introspection: add KVMI_VM_CHECK_COMMAND and KVMI_VM_CHECK_EVENT

2020-07-21 Thread Adalbert Lazăr
These commands are used to check what introspection commands and events
are supported (kernel) and allowed (device manager).

These are alternative methods to KVMI_GET_VERSION in checking if the
introspection supports a specific command/event.

As with the KVMI_GET_VERSION command, these two commands can never be
disallowed by the device manager.

Signed-off-by: Adalbert Lazăr 
---
 Documentation/virt/kvm/kvmi.rst   | 62 +++
 include/uapi/linux/kvmi.h | 16 -
 .../testing/selftests/kvm/x86_64/kvmi_test.c  | 59 ++
 virt/kvm/introspection/kvmi.c | 14 +
 virt/kvm/introspection/kvmi_int.h |  1 +
 virt/kvm/introspection/kvmi_msg.c | 45 +-
 6 files changed, 195 insertions(+), 2 deletions(-)

diff --git a/Documentation/virt/kvm/kvmi.rst b/Documentation/virt/kvm/kvmi.rst
index 41fd48222bcb..a2cda3268da0 100644
--- a/Documentation/virt/kvm/kvmi.rst
+++ b/Documentation/virt/kvm/kvmi.rst
@@ -261,3 +261,65 @@ larger/newer messages.
 The introspection tool should use this command to identify the features
 supported by the kernel side and what messages must be used for event
 replies.
+
+2. KVMI_VM_CHECK_COMMAND
+
+
+:Architectures: all
+:Versions: >= 1
+:Parameters:
+
+::
+
+   struct kvmi_vm_check_command {
+   __u16 id;
+   __u16 padding1;
+   __u32 padding2;
+   };
+
+:Returns:
+
+::
+
+   struct kvmi_error_code;
+
+Checks if the command specified by ``id`` is supported and allowed.
+
+This command is always allowed.
+
+:Errors:
+
+* -KVM_ENOENT - the command specified by ``id`` is unsupported
+* -KVM_EPERM - the command specified by ``id`` is disallowed
+* -KVM_EINVAL - the padding is not zero
+
+3. KVMI_VM_CHECK_EVENT
+--
+
+:Architectures: all
+:Versions: >= 1
+:Parameters:
+
+::
+
+   struct kvmi_vm_check_event {
+   __u16 id;
+   __u16 padding1;
+   __u32 padding2;
+   };
+
+:Returns:
+
+::
+
+   struct kvmi_error_code;
+
+Checks if the event specified by ``id`` is supported and allowed.
+
+This command is always allowed.
+
+:Errors:
+
+* -KVM_ENOENT - the event specified by ``id`` is unsupported
+* -KVM_EPERM - the event specified by ``id`` is disallowed
+* -KVM_EINVAL - the padding is not zero
diff --git a/include/uapi/linux/kvmi.h b/include/uapi/linux/kvmi.h
index 896fcb6abf2c..e55a0fa66ac5 100644
--- a/include/uapi/linux/kvmi.h
+++ b/include/uapi/linux/kvmi.h
@@ -14,7 +14,9 @@ enum {
 };
 
 enum {
-   KVMI_GET_VERSION = 1,
+   KVMI_GET_VERSION  = 1,
+   KVMI_VM_CHECK_COMMAND = 2,
+   KVMI_VM_CHECK_EVENT   = 3,
 
KVMI_NUM_MESSAGES
 };
@@ -49,4 +51,16 @@ struct kvmi_get_version_reply {
__u32 padding;
 };
 
+struct kvmi_vm_check_command {
+   __u16 id;
+   __u16 padding1;
+   __u32 padding2;
+};
+
+struct kvmi_vm_check_event {
+   __u16 id;
+   __u16 padding1;
+   __u32 padding2;
+};
+
 #endif /* _UAPI__LINUX_KVMI_H */
diff --git a/tools/testing/selftests/kvm/x86_64/kvmi_test.c 
b/tools/testing/selftests/kvm/x86_64/kvmi_test.c
index d15eccc330e5..28216c4e8b9d 100644
--- a/tools/testing/selftests/kvm/x86_64/kvmi_test.c
+++ b/tools/testing/selftests/kvm/x86_64/kvmi_test.c
@@ -99,6 +99,8 @@ static void hook_introspection(struct kvm_vm *vm)
do_hook_ioctl(vm, Kvm_socket, no_padding, EEXIST);
 
set_command_perm(vm, KVMI_GET_VERSION, disallow, EPERM);
+   set_command_perm(vm, KVMI_VM_CHECK_COMMAND, disallow, EPERM);
+   set_command_perm(vm, KVMI_VM_CHECK_EVENT, disallow, EPERM);
set_command_perm(vm, all_IDs, allow_inval, EINVAL);
set_command_perm(vm, all_IDs, disallow, 0);
set_command_perm(vm, all_IDs, allow, 0);
@@ -238,6 +240,61 @@ static void test_cmd_get_version(void)
pr_info("KVMI version: %u\n", rpl.version);
 }
 
+static void cmd_vm_check_command(__u16 id, __u16 padding, int expected_err)
+{
+   struct {
+   struct kvmi_msg_hdr hdr;
+   struct kvmi_vm_check_command cmd;
+   } req = {};
+   int r;
+
+   req.cmd.id = id;
+   req.cmd.padding1 = padding;
+   req.cmd.padding2 = padding;
+
+   r = do_command(KVMI_VM_CHECK_COMMAND, , sizeof(req), NULL, 0);
+   TEST_ASSERT(r == expected_err,
+   "KVMI_VM_CHECK_COMMAND failed, error %d (%s), expected %d\n",
+   -r, kvm_strerror(-r), expected_err);
+}
+
+static void test_cmd_vm_check_command(void)
+{
+   __u16 valid_id = KVMI_GET_VERSION, invalid_id = 0x;
+   __u16 padding = 1, no_padding = 0;
+
+   cmd_vm_check_command(valid_id, no_padding, 0);
+   cmd_vm_check_command(valid_id, padding, -KVM_EINVAL);
+   cmd_vm_check_command(invalid_id, no_padding, -KVM_ENOENT);
+}
+
+static void cmd_vm_check_event(__u16 id, __u16 padding, int expected_err)
+{
+   struc

[PATCH v9 83/84] KVM: introspection: emulate a guest page table walk on SPT violations due to A/D bit updates

2020-07-21 Thread Adalbert Lazăr
From: Mihai Donțu 

On SPT page faults caused by guest page table walks, use the existing
guest page table walk code to make the necessary adjustments to the A/D
bits and return to guest. This effectively bypasses the x86 emulator
who was making the wrong modifications leading one OS (Windows 8.1 x64)
to triple-fault very early in the boot process with the introspection
enabled.

With introspection disabled, these faults are handled by simply removing
the protection from the affected guest page and returning to guest.

Signed-off-by: Mihai Donțu 
Signed-off-by: Adalbert Lazăr 
---
 arch/x86/include/asm/kvmi_host.h |  2 ++
 arch/x86/kvm/kvmi.c  | 33 
 arch/x86/kvm/mmu/mmu.c   | 12 ++--
 include/linux/kvmi_host.h|  3 +++
 virt/kvm/introspection/kvmi.c| 26 +
 5 files changed, 74 insertions(+), 2 deletions(-)

diff --git a/arch/x86/include/asm/kvmi_host.h b/arch/x86/include/asm/kvmi_host.h
index 25c7bb8a9082..509fa3fff5e7 100644
--- a/arch/x86/include/asm/kvmi_host.h
+++ b/arch/x86/include/asm/kvmi_host.h
@@ -64,6 +64,7 @@ bool kvmi_descriptor_event(struct kvm_vcpu *vcpu, u8 
descriptor, bool write);
 bool kvmi_msr_event(struct kvm_vcpu *vcpu, struct msr_data *msr);
 bool kvmi_monitor_msrw_intercept(struct kvm_vcpu *vcpu, u32 msr, bool enable);
 bool kvmi_msrw_intercept_originator(struct kvm_vcpu *vcpu);
+bool kvmi_update_ad_flags(struct kvm_vcpu *vcpu);
 
 #else /* CONFIG_KVM_INTROSPECTION */
 
@@ -88,6 +89,7 @@ static inline bool kvmi_monitor_msrw_intercept(struct 
kvm_vcpu *vcpu, u32 msr,
   bool enable) { return false; }
 static inline bool kvmi_msrw_intercept_originator(struct kvm_vcpu *vcpu)
{ return false; }
+static inline bool kvmi_update_ad_flags(struct kvm_vcpu *vcpu) { return false; 
}
 
 #endif /* CONFIG_KVM_INTROSPECTION */
 
diff --git a/arch/x86/kvm/kvmi.c b/arch/x86/kvm/kvmi.c
index 8051d06064ab..4e75858c03b4 100644
--- a/arch/x86/kvm/kvmi.c
+++ b/arch/x86/kvm/kvmi.c
@@ -1378,3 +1378,36 @@ gpa_t kvmi_arch_cmd_translate_gva(struct kvm_vcpu *vcpu, 
gva_t gva)
 {
return kvm_mmu_gva_to_gpa_system(vcpu, gva, 0, NULL);
 }
+
+bool kvmi_update_ad_flags(struct kvm_vcpu *vcpu)
+{
+   struct kvm_introspection *kvmi;
+   bool ret = false;
+   gva_t gva;
+   gpa_t gpa;
+
+   kvmi = kvmi_get(vcpu->kvm);
+   if (!kvmi)
+   return false;
+
+   gva = kvm_x86_ops.fault_gla(vcpu);
+   if (gva == ~0ull) {
+   kvmi_warn_once(kvmi, "%s: cannot perform translation\n",
+  __func__);
+   goto out;
+   }
+
+   gpa = kvm_mmu_gva_to_gpa_system(vcpu, gva, PFERR_WRITE_MASK, NULL);
+   if (gpa == UNMAPPED_GVA) {
+   struct x86_exception exception = { };
+
+   gpa = kvm_mmu_gva_to_gpa_system(vcpu, gva, 0, );
+   }
+
+   ret = (gpa != UNMAPPED_GVA);
+
+out:
+   kvmi_put(vcpu->kvm);
+
+   return ret;
+}
diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c
index 4df5b729e2c5..97766f34910d 100644
--- a/arch/x86/kvm/mmu/mmu.c
+++ b/arch/x86/kvm/mmu/mmu.c
@@ -40,6 +40,7 @@
 #include 
 #include 
 #include 
+#include 
 
 #include 
 #include 
@@ -5549,8 +5550,15 @@ int kvm_mmu_page_fault(struct kvm_vcpu *vcpu, gpa_t 
cr2_or_gpa, u64 error_code,
 */
if (vcpu->arch.mmu->direct_map &&
(error_code & PFERR_NESTED_GUEST_PAGE) == PFERR_NESTED_GUEST_PAGE) {
-   kvm_mmu_unprotect_page(vcpu->kvm, gpa_to_gfn(cr2_or_gpa));
-   return 1;
+   gfn_t gfn = gpa_to_gfn(cr2_or_gpa);
+
+   if (kvmi_tracked_gfn(vcpu, gfn)) {
+   if (kvmi_update_ad_flags(vcpu))
+   return 1;
+   } else {
+   kvm_mmu_unprotect_page(vcpu->kvm, gfn);
+   return 1;
+   }
}
 
/*
diff --git a/include/linux/kvmi_host.h b/include/linux/kvmi_host.h
index b01e8505f493..5baef68d8cbe 100644
--- a/include/linux/kvmi_host.h
+++ b/include/linux/kvmi_host.h
@@ -96,6 +96,7 @@ bool kvmi_enter_guest(struct kvm_vcpu *vcpu);
 bool kvmi_vcpu_running_singlestep(struct kvm_vcpu *vcpu);
 void kvmi_singlestep_done(struct kvm_vcpu *vcpu);
 void kvmi_singlestep_failed(struct kvm_vcpu *vcpu);
+bool kvmi_tracked_gfn(struct kvm_vcpu *vcpu, gfn_t gfn);
 
 #else
 
@@ -116,6 +117,8 @@ static inline bool kvmi_vcpu_running_singlestep(struct 
kvm_vcpu *vcpu)
{ return false; }
 static inline void kvmi_singlestep_done(struct kvm_vcpu *vcpu) { }
 static inline void kvmi_singlestep_failed(struct kvm_vcpu *vcpu) { }
+static inline bool kvmi_tracked_gfn(struct kvm_vcpu *vcpu, gfn_t gfn)
+   { return false; }
 
 #endif /* CONFIG_KVM_INTROSPECTION */
 
diff --git a/virt/kvm/introspection/kvmi

[PATCH v9 77/84] KVM: introspection: add KVMI_VM_SET_PAGE_ACCESS

2020-07-21 Thread Adalbert Lazăr
From: Mihai Donțu 

This command sets the spte access bits (rwx) for an array of guest
physical addresses (through the page tracking subsystem).

These GPAs, with the requested access bits, are also kept in a radix
tree in order to filter out the #PF events which are of no interest to
the introspection tool.

The access restrictions for pages that are not visible to the guest are
silently ignored by default (the tool might set restrictions for the
whole memory, based on KVMI_VM_GET_MAX_GFN).

Signed-off-by: Mihai Donțu 
Co-developed-by: Adalbert Lazăr 
Signed-off-by: Adalbert Lazăr 
---
 Documentation/virt/kvm/kvmi.rst   |  60 ++
 arch/x86/include/asm/kvm_host.h   |   2 +
 arch/x86/include/asm/kvmi_host.h  |   7 +
 arch/x86/kvm/kvmi.c   |  40 
 include/linux/kvmi_host.h |   3 +
 include/uapi/linux/kvmi.h |  23 +++
 .../testing/selftests/kvm/x86_64/kvmi_test.c  |  52 ++
 virt/kvm/introspection/kvmi.c | 174 +-
 virt/kvm/introspection/kvmi_int.h |  12 ++
 virt/kvm/introspection/kvmi_msg.c |  12 ++
 10 files changed, 384 insertions(+), 1 deletion(-)

diff --git a/Documentation/virt/kvm/kvmi.rst b/Documentation/virt/kvm/kvmi.rst
index 536d6ecec026..123b2360d2e0 100644
--- a/Documentation/virt/kvm/kvmi.rst
+++ b/Documentation/virt/kvm/kvmi.rst
@@ -977,6 +977,66 @@ to control events for any other register will fail with 
-KVM_EINVAL::
 * -KVM_EINVAL - the padding is not zero
 * -KVM_EAGAIN - the selected vCPU can't be introspected yet
 
+23. KVMI_VM_SET_PAGE_ACCESS
+---
+
+:Architectures: x86
+:Versions: >= 1
+:Parameters:
+
+::
+
+   struct kvmi_vm_set_page_access {
+   __u16 count;
+   __u16 padding1;
+   __u32 padding2;
+   struct kvmi_page_access_entry entries[0];
+   };
+
+where::
+
+   struct kvmi_page_access_entry {
+   __u64 gpa;
+   __u8 access;
+   __u8 visible;
+   __u16 padding1;
+   __u32 padding2;
+   };
+
+
+:Returns:
+
+::
+
+   struct kvmi_error_code
+
+Sets the access bits (rwx) for an array of ``count`` guest physical
+addresses.
+
+The valid access bits are::
+
+   KVMI_PAGE_ACCESS_R
+   KVMI_PAGE_ACCESS_W
+   KVMI_PAGE_ACCESS_X
+
+
+The command will fail with -KVM_EINVAL if any of the specified combination
+of access bits is not supported. Also, if ``visible`` is set to 1 but
+the page is not visible.
+
+The command will try to apply all changes and return the first error if
+some failed. The introspection tool should handle the rollback.
+
+In order to 'forget' an address, all three bits ('rwx') must be set.
+
+:Errors:
+
+* -KVM_EINVAL - the specified access bits combination is invalid
+* -KVM_EINVAL - the padding is not zero
+* -KVM_EINVAL - the message size is invalid
+* -KVM_EAGAIN - the selected vCPU can't be introspected yet
+* -KVM_ENOMEM - there is not enough memory to add the page tracking structures
+
 Events
 ==
 
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index acfcebce51dd..d96bf0e15ea2 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -45,6 +45,8 @@
 #define KVM_PRIVATE_MEM_SLOTS 3
 #define KVM_MEM_SLOTS_NUM (KVM_USER_MEM_SLOTS + KVM_PRIVATE_MEM_SLOTS)
 
+#include 
+
 #define KVM_HALT_POLL_NS_DEFAULT 20
 
 #define KVM_IRQCHIP_NUM_PINS  KVM_IOAPIC_NUM_PINS
diff --git a/arch/x86/include/asm/kvmi_host.h b/arch/x86/include/asm/kvmi_host.h
index acc003403c95..98ea548c0b15 100644
--- a/arch/x86/include/asm/kvmi_host.h
+++ b/arch/x86/include/asm/kvmi_host.h
@@ -2,6 +2,7 @@
 #ifndef _ASM_X86_KVMI_HOST_H
 #define _ASM_X86_KVMI_HOST_H
 
+#include 
 #include 
 
 struct msr_data;
@@ -42,6 +43,12 @@ struct kvm_vcpu_arch_introspection {
 struct kvm_arch_introspection {
 };
 
+#define SLOTS_SIZE BITS_TO_LONGS(KVM_MEM_SLOTS_NUM)
+
+struct kvmi_arch_mem_access {
+   unsigned long active[KVM_PAGE_TRACK_MAX][SLOTS_SIZE];
+};
+
 #ifdef CONFIG_KVM_INTROSPECTION
 
 bool kvmi_monitor_bp_intercept(struct kvm_vcpu *vcpu, u32 dbg);
diff --git a/arch/x86/kvm/kvmi.c b/arch/x86/kvm/kvmi.c
index 0b1301ebafba..b233a3c5becb 100644
--- a/arch/x86/kvm/kvmi.c
+++ b/arch/x86/kvm/kvmi.c
@@ -1169,3 +1169,43 @@ bool kvmi_msr_event(struct kvm_vcpu *vcpu, struct 
msr_data *msr)
 
return ret;
 }
+
+static const struct {
+   unsigned int allow_bit;
+   enum kvm_page_track_mode track_mode;
+} track_modes[] = {
+   { KVMI_PAGE_ACCESS_R, KVM_PAGE_TRACK_PREREAD },
+   { KVMI_PAGE_ACCESS_W, KVM_PAGE_TRACK_PREWRITE },
+   { KVMI_PAGE_ACCESS_X, KVM_PAGE_TRACK_PREEXEC },
+};
+
+void kvmi_arch_update_page_tracking(struct kvm *kvm,
+   struct kvm_memory_slot *slot,
+   struct kvmi_mem_access *m)
+{
+   str

[PATCH v9 00/84] VM introspection

2020-07-21 Thread Adalbert Lazăr
The KVM introspection subsystem provides a facility for applications
running on the host or in a separate VM, to control the execution of
other VMs (pause, resume, shutdown), query the state of the vCPUs (GPRs,
MSRs etc.), alter the page access bits in the shadow page tables (only
for the hardware backed ones, eg. Intel's EPT) and receive notifications
when events of interest have taken place (shadow page table level faults,
key MSR writes, hypercalls etc.). Some notifications can be responded
to with an action (like preventing an MSR from being written), others
are mere informative (like breakpoint events which can be used for
execution tracing).  With few exceptions, all events are optional. An
application using this subsystem will explicitly register for them.

The use case that gave way for the creation of this subsystem is to
monitor the guest OS and as such the ABI/API is highly influenced by how
the guest software (kernel, applications) sees the world. For example,
some events provide information specific for the host CPU architecture
(eg. MSR_IA32_SYSENTER_EIP) merely because its leveraged by guest software
to implement a critical feature (fast system calls).

At the moment, the target audience for KVMI are security software authors
that wish to perform forensics on newly discovered threats (exploits)
or to implement another layer of security like preventing a large set
of kernel rootkits simply by "locking" the kernel image in the shadow
page tables (ie. enforce .text r-x, .rodata rw- etc.). It's the latter
case that made KVMI a separate subsystem, even though many of these
features are available in the device manager. The ability to build a
security application that does not interfere (in terms of performance)
with the guest software asks for a specialized interface that is designed
for minimum overhead.

This patch series is based on kvm/master,
commit 3d9fdc252b52 ("KVM: MIPS: Fix build errors for 32bit kernel").

The previous version (v8) can be read here:


https://lore.kernel.org/kvm/20200330101308.21702-1-ala...@bitdefender.com/

Patches 1-36: make preparatory changes

Patches 38-82: add basic introspection capabilities

Patch 83: support introspection tools that write-protect guest page tables

Patch 84: notify the introspection tool even on emulation failures
  (when the read/write callbacks used by the emulator,
   kvm_page_preread/kvm_page_prewrite, are not invoked)

Changes since v8:
  - rebase on 5.8
  - fix non-x86 builds (avoid including the UAPI headers from kvmi_host.h)
  - fix the clean-up for KVMI_VCPU_SINGLESTEP [Mathieu]
  - extend KVMI_VM_SET_PAGE_ACCESS with the 'visible' option
  - improve KVMI_VM_GET_MAX_GFN (skip read-only, invalid or non-user memslots)
  - add KVMI_VM_CONTROL_CLEANUP [Tamas, Mathieu]
  - add KVMI_VCPU_GET_XCR and KVMI_VCPU_SET_XSAVE (SSE emulation)
  - move KVM_REQ_INTROSPECTION in the range of arch-independent requests
  - better split of x86 vs arch-independent code
  - cover more error codes with tools/testing/selftests/kvm/x86_64/kvmi_test.c
  - remove more error messages and close the introspection connection
when an error code can't be sent back or it doesn't make sense to send it
  - other small changes (code refactoring, message validation, etc.).

Adalbert Lazăr (22):
  KVM: UAPI: add error codes used by the VM introspection code
  KVM: add kvm_vcpu_kick_and_wait()
  KVM: doc: fix the hypercall numbering
  KVM: x86: add .control_cr3_intercept() to struct kvm_x86_ops
  KVM: x86: add .desc_ctrl_supported()
  KVM: x86: add .control_desc_intercept()
  KVM: x86: export kvm_vcpu_ioctl_x86_set_xsave()
  KVM: introspection: add hook/unhook ioctls
  KVM: introspection: add permission access ioctls
  KVM: introspection: add the read/dispatch message function
  KVM: introspection: add KVMI_GET_VERSION
  KVM: introspection: add KVMI_VM_CHECK_COMMAND and KVMI_VM_CHECK_EVENT
  KVM: introspection: add KVMI_EVENT_UNHOOK
  KVM: introspection: add KVMI_VM_CONTROL_EVENTS
  KVM: introspection: add a jobs list to every introspected vCPU
  KVM: introspection: add KVMI_VCPU_PAUSE
  KVM: introspection: add KVMI_EVENT_PAUSE_VCPU
  KVM: introspection: add KVMI_VM_CONTROL_CLEANUP
  KVM: introspection: add KVMI_VCPU_GET_XCR
  KVM: introspection: add KVMI_VCPU_SET_XSAVE
  KVM: introspection: extend KVMI_GET_VERSION with struct kvmi_features
  KVM: introspection: add KVMI_VCPU_TRANSLATE_GVA

Marian Rotariu (1):
  KVM: introspection: add KVMI_VCPU_GET_CPUID

Mathieu Tarral (1):
  signal: export kill_pid_info()

Mihai Donțu (35):
  KVM: x86: add kvm_arch_vcpu_get_regs() and kvm_arch_vcpu_get_sregs()
  KVM: x86: avoid injecting #PF when emulate the VMCALL instruction
  KVM: x86: add .control_msr_intercept()
  KVM: x86: vmx: use a symbolic constant when checking the exit
qualifications
  KVM: x86: save the error code during EPT/NPF exits handling
  KVM: x86: add .fault_gla()
  KVM: x86: add .spt_fault()
  KVM: x86: add .g

[PATCH v9 11/84] KVM: x86: add .cr3_write_intercepted()

2020-07-21 Thread Adalbert Lazăr
From: Nicușor Cîțu 

This function will be used to allow the introspection tool to disable the
CR3-write interception when it is no longer interested in these events,
but only if nothing else depends on these VM-exits.

Signed-off-by: Nicușor Cîțu 
Signed-off-by: Adalbert Lazăr 
---
 arch/x86/include/asm/kvm_host.h | 1 +
 arch/x86/kvm/svm/svm.c  | 8 
 arch/x86/kvm/vmx/vmx.c  | 8 
 3 files changed, 17 insertions(+)

diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 89c0bd6529a5..ac45aacc9fc0 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -1117,6 +1117,7 @@ struct kvm_x86_ops {
int (*set_cr4)(struct kvm_vcpu *vcpu, unsigned long cr4);
void (*control_cr3_intercept)(struct kvm_vcpu *vcpu, int type,
  bool enable);
+   bool (*cr3_write_intercepted)(struct kvm_vcpu *vcpu);
void (*set_efer)(struct kvm_vcpu *vcpu, u64 efer);
void (*get_idt)(struct kvm_vcpu *vcpu, struct desc_ptr *dt);
void (*set_idt)(struct kvm_vcpu *vcpu, struct desc_ptr *dt);
diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c
index f14fc940538b..7a4ec6fbffb9 100644
--- a/arch/x86/kvm/svm/svm.c
+++ b/arch/x86/kvm/svm/svm.c
@@ -1609,6 +1609,13 @@ static void svm_control_cr3_intercept(struct kvm_vcpu 
*vcpu, int type,
 clr_cr_intercept(svm, INTERCEPT_CR3_WRITE);
 }
 
+static bool svm_cr3_write_intercepted(struct kvm_vcpu *vcpu)
+{
+   struct vcpu_svm *svm = to_svm(vcpu);
+
+   return is_cr_intercept(svm, INTERCEPT_CR3_WRITE);
+}
+
 static void svm_set_segment(struct kvm_vcpu *vcpu,
struct kvm_segment *var, int seg)
 {
@@ -4022,6 +4029,7 @@ static struct kvm_x86_ops svm_x86_ops __initdata = {
.set_cr0 = svm_set_cr0,
.set_cr4 = svm_set_cr4,
.control_cr3_intercept = svm_control_cr3_intercept,
+   .cr3_write_intercepted = svm_cr3_write_intercepted,
.set_efer = svm_set_efer,
.get_idt = svm_get_idt,
.set_idt = svm_set_idt,
diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
index 6b9639703560..61eb64cf25c7 100644
--- a/arch/x86/kvm/vmx/vmx.c
+++ b/arch/x86/kvm/vmx/vmx.c
@@ -3020,6 +3020,13 @@ static void vmx_control_cr3_intercept(struct kvm_vcpu 
*vcpu, int type,
exec_controls_clearbit(vmx, cr3_exec_control);
 }
 
+static bool vmx_cr3_write_intercepted(struct kvm_vcpu *vcpu)
+{
+   struct vcpu_vmx *vmx = to_vmx(vcpu);
+
+   return !!(exec_controls_get(vmx) & CPU_BASED_CR3_LOAD_EXITING);
+}
+
 static void ept_update_paging_mode_cr0(unsigned long *hw_cr0,
unsigned long cr0,
struct kvm_vcpu *vcpu)
@@ -7890,6 +7897,7 @@ static struct kvm_x86_ops vmx_x86_ops __initdata = {
.set_cr0 = vmx_set_cr0,
.set_cr4 = vmx_set_cr4,
.control_cr3_intercept = vmx_control_cr3_intercept,
+   .cr3_write_intercepted = vmx_cr3_write_intercepted,
.set_efer = vmx_set_efer,
.get_idt = vmx_get_idt,
.set_idt = vmx_set_idt,
___
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization

[PATCH v9 13/84] KVM: svm: add support for descriptor-table exits

2020-07-21 Thread Adalbert Lazăr
From: Nicușor Cîțu 

This function is needed for the KVMI_EVENT_DESCRIPTOR event.

Signed-off-by: Nicușor Cîțu 
Signed-off-by: Adalbert Lazăr 
---
 arch/x86/kvm/svm/svm.c | 15 +++
 1 file changed, 15 insertions(+)

diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c
index f4d882ca0060..b540af04b384 100644
--- a/arch/x86/kvm/svm/svm.c
+++ b/arch/x86/kvm/svm/svm.c
@@ -2220,6 +2220,13 @@ static int rsm_interception(struct vcpu_svm *svm)
return kvm_emulate_instruction_from_buffer(>vcpu, rsm_ins_bytes, 
2);
 }
 
+static int descriptor_access_interception(struct vcpu_svm *svm)
+{
+   struct kvm_vcpu *vcpu = >vcpu;
+
+   return kvm_emulate_instruction(vcpu, 0);
+}
+
 static int rdpmc_interception(struct vcpu_svm *svm)
 {
int err;
@@ -2815,6 +2822,14 @@ static int (*const svm_exit_handlers[])(struct vcpu_svm 
*svm) = {
[SVM_EXIT_RSM]  = rsm_interception,
[SVM_EXIT_AVIC_INCOMPLETE_IPI]  = 
avic_incomplete_ipi_interception,
[SVM_EXIT_AVIC_UNACCELERATED_ACCESS]= 
avic_unaccelerated_access_interception,
+   [SVM_EXIT_IDTR_READ]= 
descriptor_access_interception,
+   [SVM_EXIT_GDTR_READ]= 
descriptor_access_interception,
+   [SVM_EXIT_LDTR_READ]= 
descriptor_access_interception,
+   [SVM_EXIT_TR_READ]  = 
descriptor_access_interception,
+   [SVM_EXIT_IDTR_WRITE]   = 
descriptor_access_interception,
+   [SVM_EXIT_GDTR_WRITE]   = 
descriptor_access_interception,
+   [SVM_EXIT_LDTR_WRITE]   = 
descriptor_access_interception,
+   [SVM_EXIT_TR_WRITE] = 
descriptor_access_interception,
 };
 
 static void dump_vmcb(struct kvm_vcpu *vcpu)
___
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization

[PATCH v9 79/84] KVM: introspection: extend KVMI_GET_VERSION with struct kvmi_features

2020-07-21 Thread Adalbert Lazăr
This is used by the introspection tool to check the hardware support
for the single step feature.

Signed-off-by: Adalbert Lazăr 
---
 Documentation/virt/kvm/kvmi.rst| 13 -
 arch/x86/include/uapi/asm/kvmi.h   |  5 +
 arch/x86/kvm/kvmi.c|  5 +
 include/uapi/linux/kvmi.h  |  1 +
 tools/testing/selftests/kvm/x86_64/kvmi_test.c |  5 +
 virt/kvm/introspection/kvmi_int.h  |  1 +
 virt/kvm/introspection/kvmi_msg.c  |  2 ++
 7 files changed, 31 insertions(+), 1 deletion(-)

diff --git a/Documentation/virt/kvm/kvmi.rst b/Documentation/virt/kvm/kvmi.rst
index b2e2a9edda77..47387f297029 100644
--- a/Documentation/virt/kvm/kvmi.rst
+++ b/Documentation/virt/kvm/kvmi.rst
@@ -254,9 +254,20 @@ The vCPU commands start with::
struct kvmi_get_version_reply {
__u32 version;
__u32 padding;
+   struct kvmi_features features;
};
 
-Returns the introspection API version.
+For x86
+
+::
+
+   struct kvmi_features {
+   __u8 singlestep;
+   __u8 padding[7];
+   };
+
+Returns the introspection API version and some of the features supported
+by the hardware.
 
 This command is always allowed and successful.
 
diff --git a/arch/x86/include/uapi/asm/kvmi.h b/arch/x86/include/uapi/asm/kvmi.h
index 1bb13da61dbf..32af803f1d70 100644
--- a/arch/x86/include/uapi/asm/kvmi.h
+++ b/arch/x86/include/uapi/asm/kvmi.h
@@ -145,4 +145,9 @@ struct kvmi_event_msr_reply {
__u64 new_val;
 };
 
+struct kvmi_features {
+   __u8 singlestep;
+   __u8 padding[7];
+};
+
 #endif /* _UAPI_ASM_X86_KVMI_H */
diff --git a/arch/x86/kvm/kvmi.c b/arch/x86/kvm/kvmi.c
index 8fbf1720749b..672a113b3bf4 100644
--- a/arch/x86/kvm/kvmi.c
+++ b/arch/x86/kvm/kvmi.c
@@ -1350,3 +1350,8 @@ static void kvmi_track_flush_slot(struct kvm *kvm, struct 
kvm_memory_slot *slot,
 
kvmi_put(kvm);
 }
+
+void kvmi_arch_features(struct kvmi_features *feat)
+{
+   feat->singlestep = !!kvm_x86_ops.control_singlestep;
+}
diff --git a/include/uapi/linux/kvmi.h b/include/uapi/linux/kvmi.h
index dc7ba12498b7..a84affbafa67 100644
--- a/include/uapi/linux/kvmi.h
+++ b/include/uapi/linux/kvmi.h
@@ -101,6 +101,7 @@ struct kvmi_error_code {
 struct kvmi_get_version_reply {
__u32 version;
__u32 padding;
+   struct kvmi_features features;
 };
 
 struct kvmi_vm_check_command {
diff --git a/tools/testing/selftests/kvm/x86_64/kvmi_test.c 
b/tools/testing/selftests/kvm/x86_64/kvmi_test.c
index 21b3f7a459c8..eabe7dae149e 100644
--- a/tools/testing/selftests/kvm/x86_64/kvmi_test.c
+++ b/tools/testing/selftests/kvm/x86_64/kvmi_test.c
@@ -56,6 +56,8 @@ struct vcpu_worker_data {
bool restart_on_shutdown;
 };
 
+static struct kvmi_features features;
+
 typedef void (*fct_pf_event)(struct kvm_vm *vm, struct kvmi_msg_hdr *hdr,
struct pf_ev *ev,
struct vcpu_reply *rpl);
@@ -437,7 +439,10 @@ static void test_cmd_get_version(void)
"Unexpected KVMI version %d, expecting %d\n",
rpl.version, KVMI_VERSION);
 
+   features = rpl.features;
+
pr_info("KVMI version: %u\n", rpl.version);
+   pr_info("\tsinglestep: %u\n", features.singlestep);
 }
 
 static void cmd_vm_check_command(__u16 id, __u16 padding, int expected_err)
diff --git a/virt/kvm/introspection/kvmi_int.h 
b/virt/kvm/introspection/kvmi_int.h
index 9f2341fe21d5..68b8d60a7fac 100644
--- a/virt/kvm/introspection/kvmi_int.h
+++ b/virt/kvm/introspection/kvmi_int.h
@@ -138,5 +138,6 @@ void kvmi_arch_update_page_tracking(struct kvm *kvm,
struct kvmi_mem_access *m);
 void kvmi_arch_hook(struct kvm *kvm);
 void kvmi_arch_unhook(struct kvm *kvm);
+void kvmi_arch_features(struct kvmi_features *feat);
 
 #endif
diff --git a/virt/kvm/introspection/kvmi_msg.c 
b/virt/kvm/introspection/kvmi_msg.c
index 0a0d10b43f2d..e754cee48912 100644
--- a/virt/kvm/introspection/kvmi_msg.c
+++ b/virt/kvm/introspection/kvmi_msg.c
@@ -148,6 +148,8 @@ static int handle_get_version(struct kvm_introspection 
*kvmi,
memset(, 0, sizeof(rpl));
rpl.version = kvmi_version();
 
+   kvmi_arch_features();
+
return kvmi_msg_vm_reply(kvmi, msg, 0, , sizeof(rpl));
 }
 
___
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization

[PATCH v9 62/84] KVM: introspection: restore the state of #BP interception on unhook

2020-07-21 Thread Adalbert Lazăr
From: Nicușor Cîțu 

This commit also ensures that only the userspace or the introspection
tool can control the #BP interception exclusively at one time.

Signed-off-by: Nicușor Cîțu 
Signed-off-by: Adalbert Lazăr 
---
 arch/x86/include/asm/kvmi_host.h  | 18 ++
 arch/x86/kvm/kvmi.c   | 60 +++
 arch/x86/kvm/x86.c|  5 ++
 .../testing/selftests/kvm/x86_64/kvmi_test.c  | 16 +
 4 files changed, 99 insertions(+)

diff --git a/arch/x86/include/asm/kvmi_host.h b/arch/x86/include/asm/kvmi_host.h
index 6d274f173fb5..5f2a968831d3 100644
--- a/arch/x86/include/asm/kvmi_host.h
+++ b/arch/x86/include/asm/kvmi_host.h
@@ -4,8 +4,15 @@
 
 #include 
 
+struct kvmi_monitor_interception {
+   bool kvmi_intercepted;
+   bool kvm_intercepted;
+   bool (*monitor_fct)(struct kvm_vcpu *vcpu, bool enable);
+};
+
 struct kvmi_interception {
bool restore_interception;
+   struct kvmi_monitor_interception breakpoint;
 };
 
 struct kvm_vcpu_arch_introspection {
@@ -14,4 +21,15 @@ struct kvm_vcpu_arch_introspection {
 struct kvm_arch_introspection {
 };
 
+#ifdef CONFIG_KVM_INTROSPECTION
+
+bool kvmi_monitor_bp_intercept(struct kvm_vcpu *vcpu, u32 dbg);
+
+#else /* CONFIG_KVM_INTROSPECTION */
+
+static inline bool kvmi_monitor_bp_intercept(struct kvm_vcpu *vcpu, u32 dbg)
+   { return false; }
+
+#endif /* CONFIG_KVM_INTROSPECTION */
+
 #endif /* _ASM_X86_KVMI_HOST_H */
diff --git a/arch/x86/kvm/kvmi.c b/arch/x86/kvm/kvmi.c
index ca2ce7498cfe..56c02dad3b57 100644
--- a/arch/x86/kvm/kvmi.c
+++ b/arch/x86/kvm/kvmi.c
@@ -243,18 +243,71 @@ void kvmi_arch_hypercall_event(struct kvm_vcpu *vcpu)
}
 }
 
+/*
+ * Returns true if one side (kvm or kvmi) tries to enable/disable the 
breakpoint
+ * interception while the other side is still tracking it.
+ */
+bool kvmi_monitor_bp_intercept(struct kvm_vcpu *vcpu, u32 dbg)
+{
+   struct kvmi_interception *arch_vcpui = READ_ONCE(vcpu->arch.kvmi);
+   u32 bp_mask = KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_USE_SW_BP;
+   bool enable = false;
+
+   if ((dbg & bp_mask) == bp_mask)
+   enable = true;
+
+   return (arch_vcpui && arch_vcpui->breakpoint.monitor_fct(vcpu, enable));
+}
+EXPORT_SYMBOL(kvmi_monitor_bp_intercept);
+
+static bool monitor_bp_fct_kvmi(struct kvm_vcpu *vcpu, bool enable)
+{
+   if (enable) {
+   if (kvm_x86_ops.bp_intercepted(vcpu))
+   return true;
+   } else if (!vcpu->arch.kvmi->breakpoint.kvmi_intercepted)
+   return true;
+
+   vcpu->arch.kvmi->breakpoint.kvmi_intercepted = enable;
+
+   return false;
+}
+
+static bool monitor_bp_fct_kvm(struct kvm_vcpu *vcpu, bool enable)
+{
+   if (enable) {
+   if (kvm_x86_ops.bp_intercepted(vcpu))
+   return true;
+   } else if (!vcpu->arch.kvmi->breakpoint.kvm_intercepted)
+   return true;
+
+   vcpu->arch.kvmi->breakpoint.kvm_intercepted = enable;
+
+   return false;
+}
+
 static int kvmi_control_bp_intercept(struct kvm_vcpu *vcpu, bool enable)
 {
struct kvm_guest_debug dbg = {};
int err = 0;
 
+   vcpu->arch.kvmi->breakpoint.monitor_fct = monitor_bp_fct_kvmi;
if (enable)
dbg.control = KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_USE_SW_BP;
err = kvm_arch_vcpu_set_guest_debug(vcpu, );
+   vcpu->arch.kvmi->breakpoint.monitor_fct = monitor_bp_fct_kvm;
 
return err;
 }
 
+static void kvmi_arch_disable_bp_intercept(struct kvm_vcpu *vcpu)
+{
+   kvmi_control_bp_intercept(vcpu, false);
+
+   vcpu->arch.kvmi->breakpoint.kvmi_intercepted = false;
+   vcpu->arch.kvmi->breakpoint.kvm_intercepted = false;
+}
+
 int kvmi_arch_cmd_control_intercept(struct kvm_vcpu *vcpu,
unsigned int event_id, bool enable)
 {
@@ -293,6 +346,7 @@ void kvmi_arch_breakpoint_event(struct kvm_vcpu *vcpu, u64 
gva, u8 insn_len)
 
 static void kvmi_arch_restore_interception(struct kvm_vcpu *vcpu)
 {
+   kvmi_arch_disable_bp_intercept(vcpu);
 }
 
 bool kvmi_arch_clean_up_interception(struct kvm_vcpu *vcpu)
@@ -318,6 +372,12 @@ bool kvmi_arch_vcpu_alloc_interception(struct kvm_vcpu 
*vcpu)
if (!arch_vcpui)
return false;
 
+   arch_vcpui->breakpoint.monitor_fct = monitor_bp_fct_kvm;
+
+   /* pair with kvmi_monitor_bp_intercept() */
+   smp_wmb();
+   WRITE_ONCE(vcpu->arch.kvmi, arch_vcpui);
+
return true;
 }
 
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 0d5ce07c4164..9c8b7a3c5758 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -9325,6 +9325,11 @@ int kvm_arch_vcpu_set_guest_debug(struct kvm_vcpu *vcpu,
kvm_queue_exception(vcpu, BP_VECTOR);
}
 
+   if (kvmi_monitor_bp_intercept(vcpu, dbg->control)) {
+   r = -EBU

[PATCH v9 67/84] KVM: introspection: add KVMI_VM_GET_MAX_GFN

2020-07-21 Thread Adalbert Lazăr
From: Ștefan Șicleru 

The introspection tool will use this command to get the memory address
range for which it can set access restrictions.

Signed-off-by: Ștefan Șicleru 
Co-developed-by: Nicușor Cîțu 
Signed-off-by: Nicușor Cîțu 
Signed-off-by: Adalbert Lazăr 
---
 Documentation/virt/kvm/kvmi.rst   | 19 +++
 include/uapi/linux/kvmi.h |  6 ++
 .../testing/selftests/kvm/x86_64/kvmi_test.c  | 12 
 virt/kvm/introspection/kvmi_msg.c | 13 +
 4 files changed, 50 insertions(+)

diff --git a/Documentation/virt/kvm/kvmi.rst b/Documentation/virt/kvm/kvmi.rst
index 4263a9ac90e4..7da8efd18b89 100644
--- a/Documentation/virt/kvm/kvmi.rst
+++ b/Documentation/virt/kvm/kvmi.rst
@@ -789,6 +789,25 @@ exception.
 * -KVM_EBUSY - another *KVMI_VCPU_INJECT_EXCEPTION*-*KVMI_EVENT_TRAP* pair
is in progress
 
+17. KVMI_VM_GET_MAX_GFN
+---
+
+:Architecture: all
+:Versions: >= 1
+:Parameters: none
+:Returns:
+
+::
+
+struct kvmi_error_code;
+struct kvmi_vm_get_max_gfn_reply {
+__u64 gfn;
+};
+
+Provides the maximum GFN allocated to the VM by walking through all
+memory slots allocated by KVM. Stricly speaking, the returned value refers
+to the first inaccessible GFN, next to the maximum accessible GFN.
+
 Events
 ==
 
diff --git a/include/uapi/linux/kvmi.h b/include/uapi/linux/kvmi.h
index faf4624d7a97..2a4cc8c41465 100644
--- a/include/uapi/linux/kvmi.h
+++ b/include/uapi/linux/kvmi.h
@@ -37,6 +37,8 @@ enum {
KVMI_VCPU_CONTROL_CR   = 15,
KVMI_VCPU_INJECT_EXCEPTION = 16,
 
+   KVMI_VM_GET_MAX_GFN = 17,
+
KVMI_NUM_MESSAGES
 };
 
@@ -149,6 +151,10 @@ struct kvmi_vm_control_cleanup {
__u32 padding3;
 };
 
+struct kvmi_vm_get_max_gfn_reply {
+   __u64 gfn;
+};
+
 struct kvmi_event {
__u16 size;
__u16 vcpu;
diff --git a/tools/testing/selftests/kvm/x86_64/kvmi_test.c 
b/tools/testing/selftests/kvm/x86_64/kvmi_test.c
index 9abf4ec0d09a..105adf75a68d 100644
--- a/tools/testing/selftests/kvm/x86_64/kvmi_test.c
+++ b/tools/testing/selftests/kvm/x86_64/kvmi_test.c
@@ -1420,6 +1420,17 @@ static void test_cmd_vcpu_inject_exception(struct kvm_vm 
*vm)
disable_vcpu_event(vm, KVMI_EVENT_BREAKPOINT);
 }
 
+static void test_cmd_vm_get_max_gfn(void)
+{
+   struct kvmi_vm_get_max_gfn_reply rpl;
+   struct kvmi_msg_hdr req;
+
+   test_vm_command(KVMI_VM_GET_MAX_GFN, , sizeof(req),
+   , sizeof(rpl));
+
+   pr_info("max_gfn: 0x%llx\n", rpl.gfn);
+}
+
 static void test_introspection(struct kvm_vm *vm)
 {
srandom(time(0));
@@ -1445,6 +1456,7 @@ static void test_introspection(struct kvm_vm *vm)
test_cmd_vm_control_cleanup(vm);
test_cmd_vcpu_control_cr(vm);
test_cmd_vcpu_inject_exception(vm);
+   test_cmd_vm_get_max_gfn();
 
unhook_introspection(vm);
 }
diff --git a/virt/kvm/introspection/kvmi_msg.c 
b/virt/kvm/introspection/kvmi_msg.c
index 63efb85ff1ae..18bc1a711845 100644
--- a/virt/kvm/introspection/kvmi_msg.c
+++ b/virt/kvm/introspection/kvmi_msg.c
@@ -322,6 +322,18 @@ static int handle_vm_control_cleanup(struct 
kvm_introspection *kvmi,
return kvmi_msg_vm_reply(kvmi, msg, ec, NULL, 0);
 }
 
+static int handle_vm_get_max_gfn(struct kvm_introspection *kvmi,
+const struct kvmi_msg_hdr *msg,
+const void *req)
+{
+   struct kvmi_vm_get_max_gfn_reply rpl;
+
+   memset(, 0, sizeof(rpl));
+   rpl.gfn = kvm_get_max_gfn(kvmi->kvm);
+
+   return kvmi_msg_vm_reply(kvmi, msg, 0, , sizeof(rpl));
+}
+
 /*
  * These commands are executed by the receiving thread.
  */
@@ -334,6 +346,7 @@ static int(*const msg_vm[])(struct kvm_introspection *,
[KVMI_VM_CONTROL_CLEANUP] = handle_vm_control_cleanup,
[KVMI_VM_CONTROL_EVENTS]  = handle_vm_control_events,
[KVMI_VM_GET_INFO]= handle_vm_get_info,
+   [KVMI_VM_GET_MAX_GFN] = handle_vm_get_max_gfn,
[KVMI_VM_READ_PHYSICAL]   = handle_vm_read_physical,
[KVMI_VM_WRITE_PHYSICAL]  = handle_vm_write_physical,
 };
___
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization

[PATCH v9 55/84] KVM: introspection: add KVMI_VCPU_CONTROL_EVENTS

2020-07-21 Thread Adalbert Lazăr
From: Mihai Donțu 

By default, all introspection vCPU events are disabled. The introspection
tool must explicitly enable the vCPU events it wants to receive. With
this command (KVMI_VCPU_CONTROL_EVENTS) it can enable/disable any vCPU
event if allowed by the device manager.

Some vCPU events doesn't have to be explicitly enabled (and can't be
disabled) with this command because they are implicitly enabled/requested
by the use of certain commands. For example, if the introspection
tool uses the KVMI_VCPU_PAUSE command, it wants to receive an
KVMI_EVENT_PAUSE_VCPU event.

Signed-off-by: Mihai Donțu 
Co-developed-by: Adalbert Lazăr 
Signed-off-by: Adalbert Lazăr 
---
 Documentation/virt/kvm/kvmi.rst   | 51 +-
 include/linux/kvmi_host.h |  2 +
 include/uapi/linux/kvmi.h | 12 -
 .../testing/selftests/kvm/x86_64/kvmi_test.c  | 54 +++
 virt/kvm/introspection/kvmi.c | 26 +
 virt/kvm/introspection/kvmi_int.h |  3 ++
 virt/kvm/introspection/kvmi_msg.c | 26 -
 7 files changed, 169 insertions(+), 5 deletions(-)

diff --git a/Documentation/virt/kvm/kvmi.rst b/Documentation/virt/kvm/kvmi.rst
index 06c1cb34209e..4393ce89b2fa 100644
--- a/Documentation/virt/kvm/kvmi.rst
+++ b/Documentation/virt/kvm/kvmi.rst
@@ -377,6 +377,9 @@ the following events::
 
KVMI_EVENT_UNHOOK
 
+The vCPU events (e.g. *KVMI_EVENT_PAUSE_VCPU*) are controlled with
+the *KVMI_VCPU_CONTROL_EVENTS* command.
+
 :Errors:
 
 * -KVM_EINVAL - the padding is not zero
@@ -520,12 +523,58 @@ command) before returning to guest.
 *KVMI_EVENT_PAUSE_VCPU* events
 * -KVM_EPERM  - the *KVMI_EVENT_PAUSE_VCPU* event is disallowed
 
+10. KVMI_VCPU_CONTROL_EVENTS
+
+
+:Architectures: all
+:Versions: >= 1
+:Parameters:
+
+::
+
+   struct kvmi_vcpu_hdr;
+   struct kvmi_vcpu_control_events {
+   __u16 event_id;
+   __u8 enable;
+   __u8 padding1;
+   __u32 padding2;
+   };
+
+:Returns:
+
+::
+
+   struct kvmi_error_code
+
+Enables/disables vCPU introspection events.
+
+When an event is enabled, the introspection tool is notified and
+must reply with: continue, retry, crash, etc. (see **Events** below).
+
+The following vCPU events doesn't have to be enabled and can't be disabled,
+because these are sent as a result of certain commands (but they can be
+disallowed by the device manager) ::
+
+   KVMI_EVENT_PAUSE_VCPU
+
+The VM events (e.g. *KVMI_EVENT_UNHOOK*) are controlled with
+the *KVMI_VM_CONTROL_EVENTS* command.
+
+:Errors:
+
+* -KVM_EINVAL - the padding is not zero
+* -KVM_EINVAL - the selected vCPU is invalid
+* -KVM_EINVAL - the event ID is unknown (use *KVMI_VM_CHECK_EVENT* first)
+* -KVM_EPERM - the access is disallowed (use *KVMI_VM_CHECK_EVENT* first)
+* -KVM_EAGAIN - the selected vCPU can't be introspected yet
+
 Events
 ==
 
 All introspection events (VM or vCPU related) are sent
 using the *KVMI_EVENT* message id. No event will be sent unless
-it is explicitly enabled or requested (eg. *KVMI_EVENT_PAUSE_VCPU*).
+it is explicitly enabled (see *KVMI_VM_CONTROL_EVENTS*
+and *KVMI_VCPU_CONTROL_EVENTS*) or requested (eg. *KVMI_EVENT_PAUSE_VCPU*).
 
 The *KVMI_EVENT_UNHOOK* event doesn't have a reply and share the kvmi_event
 structure, for consistency with the vCPU events.
diff --git a/include/linux/kvmi_host.h b/include/linux/kvmi_host.h
index a87f0322c584..9625c8f19379 100644
--- a/include/linux/kvmi_host.h
+++ b/include/linux/kvmi_host.h
@@ -31,6 +31,8 @@ struct kvm_vcpu_introspection {
 
struct kvmi_vcpu_reply reply;
bool waiting_for_reply;
+
+   unsigned long *ev_enable_mask;
 };
 
 struct kvm_introspection {
diff --git a/include/uapi/linux/kvmi.h b/include/uapi/linux/kvmi.h
index 5a5b01df7e3e..9ebf17fa9564 100644
--- a/include/uapi/linux/kvmi.h
+++ b/include/uapi/linux/kvmi.h
@@ -25,8 +25,9 @@ enum {
KVMI_VM_READ_PHYSICAL  = 6,
KVMI_VM_WRITE_PHYSICAL = 7,
 
-   KVMI_VCPU_GET_INFO = 8,
-   KVMI_VCPU_PAUSE= 9,
+   KVMI_VCPU_GET_INFO   = 8,
+   KVMI_VCPU_PAUSE  = 9,
+   KVMI_VCPU_CONTROL_EVENTS = 10,
 
KVMI_NUM_MESSAGES
 };
@@ -122,6 +123,13 @@ struct kvmi_vcpu_pause {
__u32 padding3;
 };
 
+struct kvmi_vcpu_control_events {
+   __u16 event_id;
+   __u8 enable;
+   __u8 padding1;
+   __u32 padding2;
+};
+
 struct kvmi_event {
__u16 size;
__u16 vcpu;
diff --git a/tools/testing/selftests/kvm/x86_64/kvmi_test.c 
b/tools/testing/selftests/kvm/x86_64/kvmi_test.c
index 5c5c5018832d..da6a06fa0baa 100644
--- a/tools/testing/selftests/kvm/x86_64/kvmi_test.c
+++ b/tools/testing/selftests/kvm/x86_64/kvmi_test.c
@@ -834,6 +834,59 @@ static void test_pause(struct kvm_vm *vm)
allow_event(vm, KVMI_EVENT_PAUSE_VCPU);
 }
 
+static void cmd_vcpu_control_event(struct kvm_vm 

[PATCH v9 32/84] KVM: x86: page track: provide all callbacks with the guest virtual address

2020-07-21 Thread Adalbert Lazăr
From: Mihai Donțu 

This is needed because the emulator calls the page tracking code
irrespective of the current VM-exit reason or available information.

Signed-off-by: Mihai Donțu 
Signed-off-by: Adalbert Lazăr 
---
 arch/x86/include/asm/kvm_host.h   |  2 +-
 arch/x86/include/asm/kvm_page_track.h | 10 ++
 arch/x86/kvm/mmu/mmu.c|  2 +-
 arch/x86/kvm/mmu/page_track.c |  6 +++---
 arch/x86/kvm/x86.c| 16 
 drivers/gpu/drm/i915/gvt/kvmgt.c  |  2 +-
 6 files changed, 20 insertions(+), 18 deletions(-)

diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 4992afc19cf6..b6a1704e0f89 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -1384,7 +1384,7 @@ void kvm_mmu_change_mmu_pages(struct kvm *kvm, unsigned 
long kvm_nr_mmu_pages);
 int load_pdptrs(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu, unsigned long cr3);
 bool pdptrs_changed(struct kvm_vcpu *vcpu);
 
-int emulator_write_phys(struct kvm_vcpu *vcpu, gpa_t gpa,
+int emulator_write_phys(struct kvm_vcpu *vcpu, gpa_t gpa, gva_t gva,
  const void *val, int bytes);
 
 struct kvm_irq_mask_notifier {
diff --git a/arch/x86/include/asm/kvm_page_track.h 
b/arch/x86/include/asm/kvm_page_track.h
index 87bd6025d91d..9a261e463eb3 100644
--- a/arch/x86/include/asm/kvm_page_track.h
+++ b/arch/x86/include/asm/kvm_page_track.h
@@ -28,12 +28,14 @@ struct kvm_page_track_notifier_node {
 *
 * @vcpu: the vcpu where the write access happened.
 * @gpa: the physical address written by guest.
+* @gva: the virtual address written by guest.
 * @new: the data was written to the address.
 * @bytes: the written length.
 * @node: this node
 */
-   void (*track_write)(struct kvm_vcpu *vcpu, gpa_t gpa, const u8 *new,
-   int bytes, struct kvm_page_track_notifier_node 
*node);
+   void (*track_write)(struct kvm_vcpu *vcpu, gpa_t gpa, gva_t gva,
+   const u8 *new, int bytes,
+   struct kvm_page_track_notifier_node *node);
/*
 * It is called when memory slot is being moved or removed
 * users can drop write-protection for the pages in that memory slot
@@ -68,7 +70,7 @@ kvm_page_track_register_notifier(struct kvm *kvm,
 void
 kvm_page_track_unregister_notifier(struct kvm *kvm,
   struct kvm_page_track_notifier_node *n);
-void kvm_page_track_write(struct kvm_vcpu *vcpu, gpa_t gpa, const u8 *new,
- int bytes);
+void kvm_page_track_write(struct kvm_vcpu *vcpu, gpa_t gpa, gva_t gva,
+ const u8 *new, int bytes);
 void kvm_page_track_flush_slot(struct kvm *kvm, struct kvm_memory_slot *slot);
 #endif
diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c
index 6d6a0ae7800c..038a0e028e77 100644
--- a/arch/x86/kvm/mmu/mmu.c
+++ b/arch/x86/kvm/mmu/mmu.c
@@ -5318,7 +5318,7 @@ static const union kvm_mmu_page_role role_ign = {
.invalid = 0x1,
 };
 
-static void kvm_mmu_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa,
+static void kvm_mmu_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa, gva_t gva,
  const u8 *new, int bytes,
  struct kvm_page_track_notifier_node *node)
 {
diff --git a/arch/x86/kvm/mmu/page_track.c b/arch/x86/kvm/mmu/page_track.c
index a7bcde34d1f2..9642af1b2c21 100644
--- a/arch/x86/kvm/mmu/page_track.c
+++ b/arch/x86/kvm/mmu/page_track.c
@@ -216,8 +216,8 @@ EXPORT_SYMBOL_GPL(kvm_page_track_unregister_notifier);
  * The node should figure out if the written page is the one that node is
  * interested in by itself.
  */
-void kvm_page_track_write(struct kvm_vcpu *vcpu, gpa_t gpa, const u8 *new,
- int bytes)
+void kvm_page_track_write(struct kvm_vcpu *vcpu, gpa_t gpa, gva_t gva,
+ const u8 *new, int bytes)
 {
struct kvm_page_track_notifier_head *head;
struct kvm_page_track_notifier_node *n;
@@ -231,7 +231,7 @@ void kvm_page_track_write(struct kvm_vcpu *vcpu, gpa_t gpa, 
const u8 *new,
idx = srcu_read_lock(>track_srcu);
hlist_for_each_entry_rcu(n, >track_notifier_list, node)
if (n->track_write)
-   n->track_write(vcpu, gpa, new, bytes, n);
+   n->track_write(vcpu, gpa, gva, new, bytes, n);
srcu_read_unlock(>track_srcu, idx);
 }
 
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index b7eb223dc1aa..a59c935f4bbe 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -5720,7 +5720,7 @@ static int vcpu_mmio_gva_to_gpa(struct kvm_vcpu *vcpu, 
unsigned long gva,
return vcpu_is_mmio_gpa(vcpu, gva, *gpa, write);
 }
 
-int emulator_write_phys(struct kvm_vcpu *vcpu, gpa_t gpa,
+int emulator_write_phys(struct kvm_vcpu *vcpu, gpa_t gpa, gva_t gva,

[PATCH v9 52/84] KVM: introspection: add KVMI_VCPU_PAUSE

2020-07-21 Thread Adalbert Lazăr
This is the only vCPU command handled by the receiving thread.
It increments a pause requests counter and kicks the vCPU out of guest.

The introspection tool can pause a VM by sending this command to all
vCPUs. If it sets 'wait=1', it can consider that the VM is paused when
it receives the reply for last KVMI_VCPU_PAUSE command.

Usually, a vCPU command is dispatched to the vCPU thread after being read
from socket. This new command only signals the vCPU. Once out of guest,
the vCPU will send the event that caused the VM-exist (if it is the case),
handle the queued commands and only then checks its pause counter in
order to send the pause events requested by the introspection tool.

Signed-off-by: Adalbert Lazăr 
---
 Documentation/virt/kvm/kvmi.rst   | 66 ++-
 include/linux/kvmi_host.h |  2 +
 include/uapi/linux/kvmi.h | 11 +++-
 .../testing/selftests/kvm/x86_64/kvmi_test.c  | 53 +++
 virt/kvm/introspection/kvmi.c | 63 --
 virt/kvm/introspection/kvmi_int.h |  1 +
 virt/kvm/introspection/kvmi_msg.c | 42 
 7 files changed, 232 insertions(+), 6 deletions(-)

diff --git a/Documentation/virt/kvm/kvmi.rst b/Documentation/virt/kvm/kvmi.rst
index 5ead29a7b2a7..502ee06d5e77 100644
--- a/Documentation/virt/kvm/kvmi.rst
+++ b/Documentation/virt/kvm/kvmi.rst
@@ -480,12 +480,52 @@ Returns the TSC frequency (in HZ) for the specified vCPU 
if available
 * -KVM_EINVAL - the selected vCPU is invalid
 * -KVM_EAGAIN - the selected vCPU can't be introspected yet
 
+9. KVMI_VCPU_PAUSE
+--
+
+:Architecture: all
+:Versions: >= 1
+:Parameters:
+
+::
+
+   struct kvmi_vcpu_hdr;
+   struct kvmi_vcpu_pause {
+   __u8 wait;
+   __u8 padding1;
+   __u16 padding2;
+   __u32 padding3;
+   };
+
+:Returns:
+
+::
+
+   struct kvmi_error_code;
+
+Kicks the vCPU out of guest.
+
+If `wait` is 1, the command will wait for vCPU to acknowledge the IPI.
+
+The vCPU will handle the pending commands/events and send the
+*KVMI_EVENT_PAUSE_VCPU* event (one for every successful *KVMI_VCPU_PAUSE*
+command) before returning to guest.
+
+:Errors:
+
+* -KVM_EINVAL - the padding is not zero
+* -KVM_EINVAL - the selected vCPU is invalid
+* -KVM_EAGAIN - the selected vCPU can't be introspected yet
+* -KVM_EBUSY  - the selected vCPU has too many queued
+*KVMI_EVENT_PAUSE_VCPU* events
+* -KVM_EPERM  - the *KVMI_EVENT_PAUSE_VCPU* event is disallowed
+
 Events
 ==
 
 All introspection events (VM or vCPU related) are sent
 using the *KVMI_EVENT* message id. No event will be sent unless
-it is explicitly enabled.
+it is explicitly enabled or requested (eg. *KVMI_EVENT_PAUSE_VCPU*).
 
 The *KVMI_EVENT_UNHOOK* event doesn't have a reply and share the kvmi_event
 structure, for consistency with the vCPU events.
@@ -544,3 +584,27 @@ the guest (see **Unhooking**) and the introspection has 
been enabled
 for this event (see **KVMI_VM_CONTROL_EVENTS**). The introspection tool
 has a chance to unhook and close the KVMI channel (signaling that the
 operation can proceed).
+
+2. KVMI_EVENT_PAUSE_VCPU
+
+
+:Architectures: all
+:Versions: >= 1
+:Actions: CONTINUE, CRASH
+:Parameters:
+
+::
+
+   struct kvmi_event;
+
+:Returns:
+
+::
+
+   struct kvmi_vcpu_hdr;
+   struct kvmi_event_reply;
+
+This event is sent in response to a *KVMI_VCPU_PAUSE* command.
+Because it has a low priority, it will be sent after any other vCPU
+introspection event and when no other vCPU introspection command is
+queued.
diff --git a/include/linux/kvmi_host.h b/include/linux/kvmi_host.h
index 956b8d5c51e3..fdb8ce6fe6a5 100644
--- a/include/linux/kvmi_host.h
+++ b/include/linux/kvmi_host.h
@@ -18,6 +18,8 @@ struct kvm_vcpu_introspection {
 
struct list_head job_list;
spinlock_t job_lock;
+
+   atomic_t pause_requests;
 };
 
 struct kvm_introspection {
diff --git a/include/uapi/linux/kvmi.h b/include/uapi/linux/kvmi.h
index a3dca420c887..3ded22020bef 100644
--- a/include/uapi/linux/kvmi.h
+++ b/include/uapi/linux/kvmi.h
@@ -26,12 +26,14 @@ enum {
KVMI_VM_WRITE_PHYSICAL = 7,
 
KVMI_VCPU_GET_INFO = 8,
+   KVMI_VCPU_PAUSE= 9,
 
KVMI_NUM_MESSAGES
 };
 
 enum {
-   KVMI_EVENT_UNHOOK = 0,
+   KVMI_EVENT_UNHOOK = 0,
+   KVMI_EVENT_PAUSE_VCPU = 1,
 
KVMI_NUM_EVENTS
 };
@@ -107,6 +109,13 @@ struct kvmi_vcpu_hdr {
__u32 padding2;
 };
 
+struct kvmi_vcpu_pause {
+   __u8 wait;
+   __u8 padding1;
+   __u16 padding2;
+   __u32 padding3;
+};
+
 struct kvmi_event {
__u16 size;
__u16 vcpu;
diff --git a/tools/testing/selftests/kvm/x86_64/kvmi_test.c 
b/tools/testing/selftests/kvm/x86_64/kvmi_test.c
index 107661fbe52f..0df890b4b440 100644
--- a/tools/testing/selftests/kvm/x86_64/kvmi_test.c
+++ b/tools/t

[RFC PATCH v1 23/34] KVM: x86: mmu: fix: update present_mask in spte_read_protect()

2020-07-22 Thread Adalbert Lazăr
From: Ștefan Șicleru 

shadow_present_mask is not 0ull if #VE support is enabled.
If #VE support is enabled, shadow_present_mask is updated in
vmx_enable_tdp() with VMX_EPT_SUPPRESS_VE_BIT.

Signed-off-by: Ștefan Șicleru 
Signed-off-by: Adalbert Lazăr 
---
 arch/x86/kvm/mmu/mmu.c | 8 +++-
 1 file changed, 7 insertions(+), 1 deletion(-)

diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c
index 810e22f41306..28ab4a1ba25a 100644
--- a/arch/x86/kvm/mmu/mmu.c
+++ b/arch/x86/kvm/mmu/mmu.c
@@ -1601,7 +1601,13 @@ static bool spte_write_protect(u64 *sptep, bool 
pt_protect)
 static bool spte_read_protect(u64 *sptep)
 {
u64 spte = *sptep;
-   bool exec_only_supported = (shadow_present_mask == 0ull);
+   bool exec_only_supported;
+
+   if (kvm_ve_supported)
+   exec_only_supported =
+   (shadow_present_mask == VMX_EPT_SUPPRESS_VE_BIT);
+   else
+   exec_only_supported = (shadow_present_mask == 0ull);
 
rmap_printk("rmap_read_protect: spte %p %llx\n", sptep, *sptep);
 
___
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization

[RFC PATCH v1 04/34] KVM: x86: mmu: reindent to avoid lines longer than 80 chars

2020-07-22 Thread Adalbert Lazăr
Signed-off-by: Adalbert Lazăr 
---
 arch/x86/kvm/mmu/mmu.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c
index 97766f34910d..f3ba4d0452c9 100644
--- a/arch/x86/kvm/mmu/mmu.c
+++ b/arch/x86/kvm/mmu/mmu.c
@@ -2573,6 +2573,7 @@ static struct kvm_mmu_page *kvm_mmu_get_page(struct 
kvm_vcpu *vcpu,
bool flush = false;
int collisions = 0;
LIST_HEAD(invalid_list);
+   unsigned int pg_hash;
 
role = vcpu->arch.mmu->mmu_role.base;
role.level = level;
@@ -2623,8 +2624,9 @@ static struct kvm_mmu_page *kvm_mmu_get_page(struct 
kvm_vcpu *vcpu,
 
sp->gfn = gfn;
sp->role = role;
+   pg_hash = kvm_page_table_hashfn(gfn);
hlist_add_head(>hash_link,
-   >kvm->arch.mmu_page_hash[kvm_page_table_hashfn(gfn)]);
+   >kvm->arch.mmu_page_hash[pg_hash]);
if (!direct) {
/*
 * we should do write protection before syncing pages
___
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization

[RFC PATCH v1 11/34] KVM: x86: mmu: allow zapping shadow pages for specific EPT views

2020-07-22 Thread Adalbert Lazăr
From: Ștefan Șicleru 

Add a view mask for kvm_mmu_zap_all() in order to allow zapping
shadow pages for specific EPT views. This is required when an
introspected VM is unhooked. In that case, shadow pages that
belong to non-default views will be zapped.

Signed-off-by: Ștefan Șicleru 
Signed-off-by: Adalbert Lazăr 
---
 arch/x86/include/asm/kvm_host.h | 2 +-
 arch/x86/kvm/mmu/mmu.c  | 4 +++-
 arch/x86/kvm/x86.c  | 4 +++-
 3 files changed, 7 insertions(+), 3 deletions(-)

diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 2fbb26b54cf1..519b8210b8ef 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -1392,7 +1392,7 @@ void kvm_mmu_slot_set_dirty(struct kvm *kvm,
 void kvm_mmu_clear_dirty_pt_masked(struct kvm *kvm,
   struct kvm_memory_slot *slot,
   gfn_t gfn_offset, unsigned long mask);
-void kvm_mmu_zap_all(struct kvm *kvm);
+void kvm_mmu_zap_all(struct kvm *kvm, u16 view_mask);
 void kvm_mmu_invalidate_mmio_sptes(struct kvm *kvm, u64 gen);
 unsigned long kvm_mmu_calculate_default_mmu_pages(struct kvm *kvm);
 void kvm_mmu_change_mmu_pages(struct kvm *kvm, unsigned long kvm_nr_mmu_pages);
diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c
index cca12982b795..22c83192bba1 100644
--- a/arch/x86/kvm/mmu/mmu.c
+++ b/arch/x86/kvm/mmu/mmu.c
@@ -6166,7 +6166,7 @@ void kvm_mmu_slot_set_dirty(struct kvm *kvm,
 }
 EXPORT_SYMBOL_GPL(kvm_mmu_slot_set_dirty);
 
-void kvm_mmu_zap_all(struct kvm *kvm)
+void kvm_mmu_zap_all(struct kvm *kvm, u16 view_mask)
 {
struct kvm_mmu_page *sp, *node;
LIST_HEAD(invalid_list);
@@ -6175,6 +6175,8 @@ void kvm_mmu_zap_all(struct kvm *kvm)
spin_lock(>mmu_lock);
 restart:
list_for_each_entry_safe(sp, node, >arch.active_mmu_pages, link) {
+   if (!test_bit(sp->view, (unsigned long *)_mask))
+   continue;
if (sp->role.invalid && sp->root_count)
continue;
if (__kvm_mmu_prepare_zap_page(kvm, sp, _list, ))
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 2e2c56a37bdb..78aacac839bb 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -10406,7 +10406,9 @@ void kvm_arch_commit_memory_region(struct kvm *kvm,
 
 void kvm_arch_flush_shadow_all(struct kvm *kvm)
 {
-   kvm_mmu_zap_all(kvm);
+   u16 ept_views_to_keep = 0;
+
+   kvm_mmu_zap_all(kvm, ~ept_views_to_keep);
 }
 
 void kvm_arch_flush_shadow_memslot(struct kvm *kvm,
___
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization

[RFC PATCH v1 10/34] KVM: x86: page track: allow page tracking for different EPT views

2020-07-22 Thread Adalbert Lazăr
From: Ștefan Șicleru 

The introspection tool uses this to set distinct access rights on
different EPT views.

Signed-off-by: Ștefan Șicleru 
Signed-off-by: Adalbert Lazăr 
---
 arch/x86/include/asm/kvm_host.h   |  2 +-
 arch/x86/include/asm/kvm_page_track.h |  4 +-
 arch/x86/kvm/kvmi.c   |  6 ++-
 arch/x86/kvm/mmu.h|  9 ++--
 arch/x86/kvm/mmu/mmu.c| 60 +--
 arch/x86/kvm/mmu/page_track.c | 56 +
 drivers/gpu/drm/i915/gvt/kvmgt.c  |  8 ++--
 7 files changed, 86 insertions(+), 59 deletions(-)

diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 5e241863153f..2fbb26b54cf1 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -860,7 +860,7 @@ struct kvm_lpage_info {
 struct kvm_arch_memory_slot {
struct kvm_rmap_head *rmap[KVM_NR_PAGE_SIZES];
struct kvm_lpage_info *lpage_info[KVM_NR_PAGE_SIZES - 1];
-   unsigned short *gfn_track[KVM_PAGE_TRACK_MAX];
+   unsigned short *gfn_track[KVM_MAX_EPT_VIEWS][KVM_PAGE_TRACK_MAX];
 };
 
 /*
diff --git a/arch/x86/include/asm/kvm_page_track.h 
b/arch/x86/include/asm/kvm_page_track.h
index c10f0f65c77a..96d2ab7da4a7 100644
--- a/arch/x86/include/asm/kvm_page_track.h
+++ b/arch/x86/include/asm/kvm_page_track.h
@@ -109,10 +109,10 @@ int kvm_page_track_create_memslot(struct kvm *kvm, struct 
kvm_memory_slot *slot,
 
 void kvm_slot_page_track_add_page(struct kvm *kvm,
  struct kvm_memory_slot *slot, gfn_t gfn,
- enum kvm_page_track_mode mode);
+ enum kvm_page_track_mode mode, u16 view);
 void kvm_slot_page_track_remove_page(struct kvm *kvm,
 struct kvm_memory_slot *slot, gfn_t gfn,
-enum kvm_page_track_mode mode);
+enum kvm_page_track_mode mode, u16 view);
 bool kvm_page_track_is_active(struct kvm_vcpu *vcpu, gfn_t gfn,
  enum kvm_page_track_mode mode);
 
diff --git a/arch/x86/kvm/kvmi.c b/arch/x86/kvm/kvmi.c
index 4e75858c03b4..7b3b64d27d18 100644
--- a/arch/x86/kvm/kvmi.c
+++ b/arch/x86/kvm/kvmi.c
@@ -1215,11 +1215,13 @@ void kvmi_arch_update_page_tracking(struct kvm *kvm,
if (m->access & allow_bit) {
if (slot_tracked) {
kvm_slot_page_track_remove_page(kvm, slot,
-   m->gfn, mode);
+   m->gfn, mode,
+   0);
clear_bit(slot->id, arch->active[mode]);
}
} else if (!slot_tracked) {
-   kvm_slot_page_track_add_page(kvm, slot, m->gfn, mode);
+   kvm_slot_page_track_add_page(kvm, slot, m->gfn, mode,
+0);
set_bit(slot->id, arch->active[mode]);
}
}
diff --git a/arch/x86/kvm/mmu.h b/arch/x86/kvm/mmu.h
index e2c0518af750..2692b14fb605 100644
--- a/arch/x86/kvm/mmu.h
+++ b/arch/x86/kvm/mmu.h
@@ -221,11 +221,14 @@ void kvm_zap_gfn_range(struct kvm *kvm, gfn_t gfn_start, 
gfn_t gfn_end);
 void kvm_mmu_gfn_disallow_lpage(struct kvm_memory_slot *slot, gfn_t gfn);
 void kvm_mmu_gfn_allow_lpage(struct kvm_memory_slot *slot, gfn_t gfn);
 bool kvm_mmu_slot_gfn_write_protect(struct kvm *kvm,
-   struct kvm_memory_slot *slot, u64 gfn);
+   struct kvm_memory_slot *slot, u64 gfn,
+   u16 view);
 bool kvm_mmu_slot_gfn_read_protect(struct kvm *kvm,
-  struct kvm_memory_slot *slot, u64 gfn);
+  struct kvm_memory_slot *slot, u64 gfn,
+  u16 view);
 bool kvm_mmu_slot_gfn_exec_protect(struct kvm *kvm,
-  struct kvm_memory_slot *slot, u64 gfn);
+  struct kvm_memory_slot *slot, u64 gfn,
+  u16 view);
 int kvm_arch_write_log_dirty(struct kvm_vcpu *vcpu, gpa_t l2_gpa);
 
 int kvm_mmu_post_init_vm(struct kvm *kvm);
diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c
index 70461c7ef58c..cca12982b795 100644
--- a/arch/x86/kvm/mmu/mmu.c
+++ b/arch/x86/kvm/mmu/mmu.c
@@ -1231,9 +1231,9 @@ static void account_shadowed(struct kvm *kvm, struct 
kvm_mmu_page *sp)
/* the non-leaf shadow pages are keeping readonly. */
if (sp->role.level > PG_LEVEL_4K) {
kvm_slot_page_track_add_page(kvm, slot, gfn,
-  

[RFC PATCH v1 31/34] KVM: introspection: add #VE host capability checker

2020-07-22 Thread Adalbert Lazăr
From: Ștefan Șicleru 

Add one more field to struct kvmi_features in order to publish #VE
capabilities on the host as indicated by kvm_ve_supported flag.

Signed-off-by: Ștefan Șicleru 
Signed-off-by: Adalbert Lazăr 
---
 Documentation/virt/kvm/kvmi.rst| 5 +++--
 arch/x86/include/uapi/asm/kvmi.h   | 3 ++-
 arch/x86/kvm/kvmi.c| 1 +
 tools/testing/selftests/kvm/x86_64/kvmi_test.c | 1 +
 4 files changed, 7 insertions(+), 3 deletions(-)

diff --git a/Documentation/virt/kvm/kvmi.rst b/Documentation/virt/kvm/kvmi.rst
index 658c9df01469..caa51fccc463 100644
--- a/Documentation/virt/kvm/kvmi.rst
+++ b/Documentation/virt/kvm/kvmi.rst
@@ -265,11 +265,12 @@ For x86
__u8 singlestep;
__u8 vmfunc;
__u8 eptp;
-   __u8 padding[5];
+   __u8 ve;
+   __u8 padding[4];
};
 
 Returns the introspection API version and some of the features supported
-by the hardware (eg. alternate EPT views).
+by the hardware (eg. alternate EPT views, virtualization exception).
 
 This command is always allowed and successful.
 
diff --git a/arch/x86/include/uapi/asm/kvmi.h b/arch/x86/include/uapi/asm/kvmi.h
index fc35da900778..56992dacfb69 100644
--- a/arch/x86/include/uapi/asm/kvmi.h
+++ b/arch/x86/include/uapi/asm/kvmi.h
@@ -151,7 +151,8 @@ struct kvmi_features {
__u8 singlestep;
__u8 vmfunc;
__u8 eptp;
-   __u8 padding[5];
+   __u8 ve;
+   __u8 padding[4];
 };
 
 struct kvmi_vcpu_get_ept_view_reply {
diff --git a/arch/x86/kvm/kvmi.c b/arch/x86/kvm/kvmi.c
index 27fd732cff29..3e8c83623703 100644
--- a/arch/x86/kvm/kvmi.c
+++ b/arch/x86/kvm/kvmi.c
@@ -1383,6 +1383,7 @@ void kvmi_arch_features(struct kvmi_features *feat)
kvm_x86_ops.get_vmfunc_status();
feat->eptp = kvm_x86_ops.get_eptp_switching_status &&
kvm_x86_ops.get_eptp_switching_status();
+   feat->ve = kvm_ve_supported;
 }
 
 bool kvmi_arch_start_singlestep(struct kvm_vcpu *vcpu)
diff --git a/tools/testing/selftests/kvm/x86_64/kvmi_test.c 
b/tools/testing/selftests/kvm/x86_64/kvmi_test.c
index d808cb61463d..4e099cbfcf4e 100644
--- a/tools/testing/selftests/kvm/x86_64/kvmi_test.c
+++ b/tools/testing/selftests/kvm/x86_64/kvmi_test.c
@@ -459,6 +459,7 @@ static void test_cmd_get_version(void)
pr_info("\tsinglestep: %u\n", features.singlestep);
pr_info("\tvmfunc: %u\n", features.vmfunc);
pr_info("\teptp: %u\n", features.eptp);
+   pr_info("\tve: %u\n", features.ve);
 }
 
 static void cmd_vm_check_command(__u16 id, __u16 padding, int expected_err)
___
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization

[RFC PATCH v1 02/34] KVM: x86: export .get_eptp_switching_status()

2020-07-22 Thread Adalbert Lazăr
From: Marian Rotariu 

The introspection tool uses this function to check the hardware support
for EPT switching, which can be used either to singlestep vCPUs
on a unprotected EPT view or to use #VE in order to avoid filter out
VM-exits caused by EPT violations.

Signed-off-by: Marian Rotariu 
Co-developed-by: Ștefan Șicleru 
Signed-off-by: Ștefan Șicleru 
Signed-off-by: Adalbert Lazăr 
---
 arch/x86/include/asm/kvm_host.h | 2 ++
 arch/x86/kvm/vmx/capabilities.h | 8 
 arch/x86/kvm/vmx/vmx.c  | 8 
 arch/x86/kvm/x86.c  | 3 +++
 4 files changed, 21 insertions(+)

diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index ab6989745f9c..5eb26135e81b 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -1301,6 +1301,7 @@ struct kvm_x86_ops {
bool (*gpt_translation_fault)(struct kvm_vcpu *vcpu);
void (*control_singlestep)(struct kvm_vcpu *vcpu, bool enable);
bool (*get_vmfunc_status)(void);
+   bool (*get_eptp_switching_status)(void);
 };
 
 struct kvm_x86_nested_ops {
@@ -1422,6 +1423,7 @@ extern u64  kvm_max_tsc_scaling_ratio;
 extern u64  kvm_default_tsc_scaling_ratio;
 
 extern u64 kvm_mce_cap_supported;
+extern bool kvm_eptp_switching_supported;
 
 /*
  * EMULTYPE_NO_DECODE - Set when re-emulating an instruction (after completing
diff --git a/arch/x86/kvm/vmx/capabilities.h b/arch/x86/kvm/vmx/capabilities.h
index e7d7fcb7e17f..92781e2c523e 100644
--- a/arch/x86/kvm/vmx/capabilities.h
+++ b/arch/x86/kvm/vmx/capabilities.h
@@ -219,6 +219,14 @@ static inline bool cpu_has_vmx_vmfunc(void)
SECONDARY_EXEC_ENABLE_VMFUNC;
 }
 
+static inline bool cpu_has_vmx_eptp_switching(void)
+{
+   u64 vmx_msr;
+
+   rdmsrl(MSR_IA32_VMX_VMFUNC, vmx_msr);
+   return vmx_msr & VMX_VMFUNC_EPTP_SWITCHING;
+}
+
 static inline bool cpu_has_vmx_shadow_vmcs(void)
 {
u64 vmx_msr;
diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
index ec4396d5f36f..ccbf561b0fc4 100644
--- a/arch/x86/kvm/vmx/vmx.c
+++ b/arch/x86/kvm/vmx/vmx.c
@@ -7997,6 +7997,11 @@ static bool vmx_get_vmfunc_status(void)
return cpu_has_vmx_vmfunc();
 }
 
+static bool vmx_get_eptp_switching_status(void)
+{
+   return kvm_eptp_switching_supported;
+}
+
 static struct kvm_x86_ops vmx_x86_ops __initdata = {
.hardware_unsetup = hardware_unsetup,
 
@@ -8139,6 +8144,7 @@ static struct kvm_x86_ops vmx_x86_ops __initdata = {
.gpt_translation_fault = vmx_gpt_translation_fault,
.control_singlestep = vmx_control_singlestep,
.get_vmfunc_status = vmx_get_vmfunc_status,
+   .get_eptp_switching_status = vmx_get_eptp_switching_status,
 };
 
 static __init int hardware_setup(void)
@@ -8178,6 +8184,8 @@ static __init int hardware_setup(void)
!cpu_has_vmx_invept_global())
enable_ept = 0;
 
+   kvm_eptp_switching_supported = cpu_has_vmx_eptp_switching();
+
if (!cpu_has_vmx_ept_ad_bits() || !enable_ept)
enable_ept_ad_bits = 0;
 
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index feb20b29bb92..b16b018c74cc 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -161,6 +161,9 @@ module_param(force_emulation_prefix, bool, S_IRUGO);
 int __read_mostly pi_inject_timer = -1;
 module_param(pi_inject_timer, bint, S_IRUGO | S_IWUSR);
 
+bool __read_mostly kvm_eptp_switching_supported;
+EXPORT_SYMBOL_GPL(kvm_eptp_switching_supported);
+
 #define KVM_NR_SHARED_MSRS 16
 
 struct kvm_shared_msrs_global {
___
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization

[RFC PATCH v1 17/34] KVM: introspection: extend the access rights database with EPT view info

2020-07-22 Thread Adalbert Lazăr
From: Ștefan Șicleru 

On EPT violations, when we check if the introspection tool has shown
interest in the current guest page, we will take into consideration
the EPT view of the current vCPU too.

Signed-off-by: Ștefan Șicleru 
Signed-off-by: Adalbert Lazăr 
---
 arch/x86/include/asm/kvmi_host.h  |   1 +
 arch/x86/kvm/kvmi.c   |   9 +--
 include/linux/kvmi_host.h |   2 +-
 virt/kvm/introspection/kvmi.c | 107 +-
 virt/kvm/introspection/kvmi_int.h |   4 +-
 5 files changed, 71 insertions(+), 52 deletions(-)

diff --git a/arch/x86/include/asm/kvmi_host.h b/arch/x86/include/asm/kvmi_host.h
index 509fa3fff5e7..8af03ba38316 100644
--- a/arch/x86/include/asm/kvmi_host.h
+++ b/arch/x86/include/asm/kvmi_host.h
@@ -9,6 +9,7 @@ struct msr_data;
 
 #define KVMI_NUM_CR 5
 #define KVMI_NUM_MSR 0x2000
+#define KVMI_MAX_ACCESS_TREES KVM_MAX_EPT_VIEWS
 
 struct kvmi_monitor_interception {
bool kvmi_intercepted;
diff --git a/arch/x86/kvm/kvmi.c b/arch/x86/kvm/kvmi.c
index 06357b8ab54a..52885b9e5b6e 100644
--- a/arch/x86/kvm/kvmi.c
+++ b/arch/x86/kvm/kvmi.c
@@ -1197,7 +1197,7 @@ static const struct {
 
 void kvmi_arch_update_page_tracking(struct kvm *kvm,
struct kvm_memory_slot *slot,
-   struct kvmi_mem_access *m)
+   struct kvmi_mem_access *m, u16 view)
 {
struct kvmi_arch_mem_access *arch = >arch;
int i;
@@ -1217,12 +1217,12 @@ void kvmi_arch_update_page_tracking(struct kvm *kvm,
if (slot_tracked) {
kvm_slot_page_track_remove_page(kvm, slot,
m->gfn, mode,
-   0);
+   view);
clear_bit(slot->id, arch->active[mode]);
}
} else if (!slot_tracked) {
kvm_slot_page_track_add_page(kvm, slot, m->gfn, mode,
-0);
+view);
set_bit(slot->id, arch->active[mode]);
}
}
@@ -1256,7 +1256,8 @@ static bool is_pf_of_interest(struct kvm_vcpu *vcpu, 
gpa_t gpa, u8 access)
if (kvm_x86_ops.gpt_translation_fault(vcpu))
return false;
 
-   return kvmi_restricted_page_access(KVMI(vcpu->kvm), gpa, access);
+   return kvmi_restricted_page_access(KVMI(vcpu->kvm), gpa, access,
+  kvm_get_ept_view(vcpu));
 }
 
 static bool handle_pf_event(struct kvm_vcpu *vcpu, gpa_t gpa, gva_t gva,
diff --git a/include/linux/kvmi_host.h b/include/linux/kvmi_host.h
index 5baef68d8cbe..c38c7f16d5d0 100644
--- a/include/linux/kvmi_host.h
+++ b/include/linux/kvmi_host.h
@@ -69,7 +69,7 @@ struct kvm_introspection {
 
bool cleanup_on_unhook;
 
-   struct radix_tree_root access_tree;
+   struct radix_tree_root access_tree[KVMI_MAX_ACCESS_TREES];
rwlock_t access_tree_lock;
 };
 
diff --git a/virt/kvm/introspection/kvmi.c b/virt/kvm/introspection/kvmi.c
index 2a96b80bddb2..737fe3c7a956 100644
--- a/virt/kvm/introspection/kvmi.c
+++ b/virt/kvm/introspection/kvmi.c
@@ -258,20 +258,23 @@ static void kvmi_clear_mem_access(struct kvm *kvm)
struct kvm_introspection *kvmi = KVMI(kvm);
struct radix_tree_iter iter;
void **slot;
-   int idx;
+   int idx, view;
 
idx = srcu_read_lock(>srcu);
spin_lock(>mmu_lock);
 
-   radix_tree_for_each_slot(slot, >access_tree, , 0) {
-   struct kvmi_mem_access *m = *slot;
+   for (view = 0; view < KVMI_MAX_ACCESS_TREES; view++)
+   radix_tree_for_each_slot(slot, >access_tree[view],
+, 0) {
+   struct kvmi_mem_access *m = *slot;
 
-   m->access = full_access;
-   kvmi_arch_update_page_tracking(kvm, NULL, m);
+   m->access = full_access;
+   kvmi_arch_update_page_tracking(kvm, NULL, m, view);
 
-   radix_tree_iter_delete(>access_tree, , slot);
-   kmem_cache_free(radix_cache, m);
-   }
+   radix_tree_iter_delete(>access_tree[view],
+  , slot);
+   kmem_cache_free(radix_cache, m);
+   }
 
spin_unlock(>mmu_lock);
srcu_read_unlock(>srcu, idx);
@@ -335,8 +338,9 @@ alloc_kvmi(struct kvm *kvm, const struct 
kvm_introspection_hook *hook)
 
atomic_set(>ev_seq, 0);
 
-   INIT_RADIX_TREE(>access_tree,
-   GFP_KERNEL & ~__GFP_DIRECT_RECLAIM);
+   for (i = 0; i < ARRAY_SIZ

[RFC PATCH v1 29/34] KVM: vmx: make use of EPTP_INDEX in vmx_handle_exit()

2020-07-22 Thread Adalbert Lazăr
From: Ștefan Șicleru 

If the guest has EPTP switching capabilities with VMFUNC, read the
current view from VMCS instead of walking through the EPTP list when #VE
support is active.

Signed-off-by: Ștefan Șicleru 
Signed-off-by: Adalbert Lazăr 
---
 arch/x86/kvm/vmx/vmx.c | 22 ++
 1 file changed, 14 insertions(+), 8 deletions(-)

diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
index 96aa4b7e2857..035f6c43a2a4 100644
--- a/arch/x86/kvm/vmx/vmx.c
+++ b/arch/x86/kvm/vmx/vmx.c
@@ -6269,15 +6269,21 @@ void dump_vmcs(void)
 
 static unsigned int update_ept_view(struct vcpu_vmx *vmx)
 {
-   u64 *eptp_list = phys_to_virt(page_to_phys(vmx->eptp_list_pg));
-   u64 eptp = vmcs_read64(EPT_POINTER);
-   unsigned int view;
+   /* if #VE support is active, read the EPT index from VMCS */
+   if (kvm_ve_supported &&
+   secondary_exec_controls_get(vmx) & SECONDARY_EXEC_EPT_VE) {
+   vmx->view = vmcs_read16(EPTP_INDEX);
+   } else {
+   u64 *eptp_list = phys_to_virt(page_to_phys(vmx->eptp_list_pg));
+   u64 eptp = vmcs_read64(EPT_POINTER);
+   unsigned int view;
 
-   for (view = 0; view < KVM_MAX_EPT_VIEWS; view++)
-   if (eptp_list[view] == eptp) {
-   vmx->view = view;
-   break;
-   }
+   for (view = 0; view < KVM_MAX_EPT_VIEWS; view++)
+   if (eptp_list[view] == eptp) {
+   vmx->view = view;
+   break;
+   }
+   }
 
return vmx->view;
 }
___
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization

[RFC PATCH v1 03/34] KVM: x86: add kvm_get_ept_view()

2020-07-22 Thread Adalbert Lazăr
From: Ștefan Șicleru 

This function returns the EPT view of the current vCPU
or 0 if the hardware support is missing.

Signed-off-by: Ștefan Șicleru 
Signed-off-by: Adalbert Lazăr 
---
 arch/x86/include/asm/kvm_host.h |  3 +++
 arch/x86/kvm/vmx/vmx.c  |  8 
 arch/x86/kvm/vmx/vmx.h  |  3 +++
 arch/x86/kvm/x86.c  | 10 ++
 4 files changed, 24 insertions(+)

diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 5eb26135e81b..0acc21087caf 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -1302,6 +1302,7 @@ struct kvm_x86_ops {
void (*control_singlestep)(struct kvm_vcpu *vcpu, bool enable);
bool (*get_vmfunc_status)(void);
bool (*get_eptp_switching_status)(void);
+   u16 (*get_ept_view)(struct kvm_vcpu *vcpu);
 };
 
 struct kvm_x86_nested_ops {
@@ -1773,4 +1774,6 @@ static inline int kvm_cpu_get_apicid(int mps_cpu)
 #define GET_SMSTATE(type, buf, offset) \
(*(type *)((buf) + (offset) - 0x7e00))
 
+u16 kvm_get_ept_view(struct kvm_vcpu *vcpu);
+
 #endif /* _ASM_X86_KVM_HOST_H */
diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
index ccbf561b0fc4..0256c3a93c87 100644
--- a/arch/x86/kvm/vmx/vmx.c
+++ b/arch/x86/kvm/vmx/vmx.c
@@ -8002,6 +8002,13 @@ static bool vmx_get_eptp_switching_status(void)
return kvm_eptp_switching_supported;
 }
 
+static u16 vmx_get_ept_view(struct kvm_vcpu *vcpu)
+{
+   const struct vcpu_vmx *vmx = to_vmx(vcpu);
+
+   return vmx->view;
+}
+
 static struct kvm_x86_ops vmx_x86_ops __initdata = {
.hardware_unsetup = hardware_unsetup,
 
@@ -8145,6 +8152,7 @@ static struct kvm_x86_ops vmx_x86_ops __initdata = {
.control_singlestep = vmx_control_singlestep,
.get_vmfunc_status = vmx_get_vmfunc_status,
.get_eptp_switching_status = vmx_get_eptp_switching_status,
+   .get_ept_view = vmx_get_ept_view,
 };
 
 static __init int hardware_setup(void)
diff --git a/arch/x86/kvm/vmx/vmx.h b/arch/x86/kvm/vmx/vmx.h
index aa0c7ffd588b..14f0b9102d58 100644
--- a/arch/x86/kvm/vmx/vmx.h
+++ b/arch/x86/kvm/vmx/vmx.h
@@ -296,6 +296,9 @@ struct vcpu_vmx {
u64 ept_pointer;
 
struct pt_desc pt_desc;
+
+   /* The view this vcpu operates on. */
+   u16 view;
 };
 
 enum ept_pointers_status {
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index b16b018c74cc..2e2c56a37bdb 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -10869,6 +10869,16 @@ u64 kvm_spec_ctrl_valid_bits(struct kvm_vcpu *vcpu)
 }
 EXPORT_SYMBOL_GPL(kvm_spec_ctrl_valid_bits);
 
+u16 kvm_get_ept_view(struct kvm_vcpu *vcpu)
+{
+   if (!kvm_x86_ops.get_ept_view)
+   return 0;
+
+   return kvm_x86_ops.get_ept_view(vcpu);
+}
+EXPORT_SYMBOL_GPL(kvm_get_ept_view);
+
+
 EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_exit);
 EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_fast_mmio);
 EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_inj_virq);
___
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization

[RFC PATCH v1 24/34] KVM: vmx: trigger vm-exits for mmio sptes by default when #VE is enabled

2020-07-22 Thread Adalbert Lazăr
From: Ștefan Șicleru 

All sptes, including mmio sptes must have SVE bit set by default, in
order to trigger vm-exits instead of #VEs (in case of an EPT violation).
MMIO sptes were overlooked in commit 28b8bc704111 ("KVM: VMX: Suppress EPT 
violation #VE by default (when enabled)")
which provided a new mask for non-mmio sptes.

Signed-off-by: Ștefan Șicleru 
Signed-off-by: Adalbert Lazăr 
---
 arch/x86/kvm/vmx/vmx.c | 10 +-
 1 file changed, 9 insertions(+), 1 deletion(-)

diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
index 3428857c6157..b65bd0d144e5 100644
--- a/arch/x86/kvm/vmx/vmx.c
+++ b/arch/x86/kvm/vmx/vmx.c
@@ -4367,11 +4367,19 @@ static void vmx_compute_secondary_exec_control(struct 
vcpu_vmx *vmx)
 
 static void ept_set_mmio_spte_mask(void)
 {
+   u64 mmio_value = VMX_EPT_MISCONFIG_WX_VALUE;
+
+   /* All sptes, including mmio sptes should trigger vm-exits by
+* default, instead of #VE (when supported)
+*/
+   if (kvm_ve_supported)
+   mmio_value |= VMX_EPT_SUPPRESS_VE_BIT;
+
/*
 * EPT Misconfigurations can be generated if the value of bits 2:0
 * of an EPT paging-structure entry is 110b (write/execute).
 */
-   kvm_mmu_set_mmio_spte_mask(VMX_EPT_MISCONFIG_WX_VALUE, 0);
+   kvm_mmu_set_mmio_spte_mask(mmio_value, 0);
 }
 
 static int vmx_alloc_eptp_list_page(struct vcpu_vmx *vmx)
___
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization

[RFC PATCH v1 30/34] KVM: vmx: make use of EPTP_INDEX in vmx_set_ept_view()

2020-07-22 Thread Adalbert Lazăr
From: Ștefan Șicleru 

Signed-off-by: Ștefan Șicleru 
Signed-off-by: Adalbert Lazăr 
---
 arch/x86/kvm/vmx/vmx.c | 7 +++
 1 file changed, 7 insertions(+)

diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
index 035f6c43a2a4..736b6cc6ca8f 100644
--- a/arch/x86/kvm/vmx/vmx.c
+++ b/arch/x86/kvm/vmx/vmx.c
@@ -4408,6 +4408,13 @@ static int vmx_set_ept_view(struct kvm_vcpu *vcpu, u16 
view)
kvm_mmu_unload(vcpu);
r = kvm_mmu_reload(vcpu);
WARN_ON_ONCE(r);
+
+   /* When #VE happens, current EPT index will be saved
+* by the logical processor into VE information area,
+* see chapter 24.6.18 and 25.5.6.2 from Intel SDM.
+*/
+   if (kvm_ve_supported)
+   vmcs_write16(EPTP_INDEX, view);
}
 
return 0;
___
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization

[RFC PATCH v1 27/34] KVM: x86: add .disable_ve()

2020-07-22 Thread Adalbert Lazăr
From: Ștefan Șicleru 

This function is needed for the KVMI_VCPU_DISABLE_VE command.

Signed-off-by: Ștefan Șicleru 
Signed-off-by: Adalbert Lazăr 
---
 arch/x86/include/asm/kvm_host.h |  1 +
 arch/x86/kvm/vmx/vmx.c  | 10 ++
 2 files changed, 11 insertions(+)

diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 4cee641af48e..54969c2e804e 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -1316,6 +1316,7 @@ struct kvm_x86_ops {
int (*control_ept_view)(struct kvm_vcpu *vcpu, u16 view, u8 visible);
int (*set_ve_info)(struct kvm_vcpu *vcpu, unsigned long ve_info,
bool trigger_vmexit);
+   int (*disable_ve)(struct kvm_vcpu *vcpu);
 };
 
 struct kvm_x86_nested_ops {
diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
index 871cc49063d8..96aa4b7e2857 100644
--- a/arch/x86/kvm/vmx/vmx.c
+++ b/arch/x86/kvm/vmx/vmx.c
@@ -4464,6 +4464,15 @@ static int vmx_set_ve_info(struct kvm_vcpu *vcpu, 
unsigned long ve_info,
return 0;
 }
 
+static int vmx_disable_ve(struct kvm_vcpu *vcpu)
+{
+   if (kvm_ve_supported)
+   secondary_exec_controls_clearbit(to_vmx(vcpu),
+SECONDARY_EXEC_EPT_VE);
+
+   return 0;
+}
+
 #define VMX_XSS_EXIT_BITMAP 0
 
 /*
@@ -8390,6 +8399,7 @@ static struct kvm_x86_ops vmx_x86_ops __initdata = {
.set_ept_view = vmx_set_ept_view,
.control_ept_view = vmx_control_ept_view,
.set_ve_info = vmx_set_ve_info,
+   .disable_ve = vmx_disable_ve,
 };
 
 static __init int hardware_setup(void)
___
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization

[RFC PATCH v1 14/34] KVM: introspection: add 'view' field to struct kvmi_event_arch

2020-07-22 Thread Adalbert Lazăr
From: Ștefan Șicleru 

Report the view a vCPU operates on when sending events to the
introspection tool.

Signed-off-by: Ștefan Șicleru 
Signed-off-by: Adalbert Lazăr 
---
 arch/x86/include/uapi/asm/kvmi.h | 4 +++-
 arch/x86/kvm/kvmi.c  | 1 +
 2 files changed, 4 insertions(+), 1 deletion(-)

diff --git a/arch/x86/include/uapi/asm/kvmi.h b/arch/x86/include/uapi/asm/kvmi.h
index 3087c685c232..a13a98fa863f 100644
--- a/arch/x86/include/uapi/asm/kvmi.h
+++ b/arch/x86/include/uapi/asm/kvmi.h
@@ -12,7 +12,9 @@
 
 struct kvmi_event_arch {
__u8 mode;  /* 2, 4 or 8 */
-   __u8 padding[7];
+   __u8 padding1;
+   __u16 view;
+   __u32 padding2;
struct kvm_regs regs;
struct kvm_sregs sregs;
struct {
diff --git a/arch/x86/kvm/kvmi.c b/arch/x86/kvm/kvmi.c
index bd31809ff812..292606902338 100644
--- a/arch/x86/kvm/kvmi.c
+++ b/arch/x86/kvm/kvmi.c
@@ -102,6 +102,7 @@ void kvmi_arch_setup_event(struct kvm_vcpu *vcpu, struct 
kvmi_event *ev)
kvm_arch_vcpu_get_sregs(vcpu, >sregs);
ev->arch.mode = kvmi_vcpu_mode(vcpu, >sregs);
kvmi_get_msrs(vcpu, event);
+   event->view = kvm_get_ept_view(vcpu);
 }
 
 int kvmi_arch_cmd_vcpu_get_info(struct kvm_vcpu *vcpu,
___
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization

[RFC PATCH v1 28/34] KVM: x86: page_track: add support for suppress #VE bit

2020-07-22 Thread Adalbert Lazăr
From: Ștefan Șicleru 

Setting SPTEs from rmaps is not enough because rmaps contain only
present SPTEs. If there is no mapping created for the GFN, SPTEs must
be configured when they are created. Use the page tracking mechanism in
order to configure the SVE bit when a PF occurs. This is similar to how
access rights are configured using the page tracking mechanism.

Signed-off-by: Ștefan Șicleru 
Signed-off-by: Adalbert Lazăr 
---
 arch/x86/include/asm/kvm_page_track.h |  1 +
 arch/x86/kvm/mmu.h|  2 ++
 arch/x86/kvm/mmu/mmu.c| 38 +++
 arch/x86/kvm/mmu/page_track.c |  7 +
 4 files changed, 48 insertions(+)

diff --git a/arch/x86/include/asm/kvm_page_track.h 
b/arch/x86/include/asm/kvm_page_track.h
index 96d2ab7da4a7..108161f63a44 100644
--- a/arch/x86/include/asm/kvm_page_track.h
+++ b/arch/x86/include/asm/kvm_page_track.h
@@ -7,6 +7,7 @@ enum kvm_page_track_mode {
KVM_PAGE_TRACK_PREWRITE,
KVM_PAGE_TRACK_WRITE,
KVM_PAGE_TRACK_PREEXEC,
+   KVM_PAGE_TRACK_SVE,
KVM_PAGE_TRACK_MAX,
 };
 
diff --git a/arch/x86/kvm/mmu.h b/arch/x86/kvm/mmu.h
index 02fa0d30407f..160e66ae9852 100644
--- a/arch/x86/kvm/mmu.h
+++ b/arch/x86/kvm/mmu.h
@@ -234,5 +234,7 @@ int kvm_arch_write_log_dirty(struct kvm_vcpu *vcpu, gpa_t 
l2_gpa);
 
 int kvm_mmu_post_init_vm(struct kvm *kvm);
 void kvm_mmu_pre_destroy_vm(struct kvm *kvm);
+bool kvm_mmu_set_ept_page_sve(struct kvm *kvm, struct kvm_memory_slot *slot,
+ gfn_t gfn, u16 index, bool suppress);
 
 #endif
diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c
index 28ab4a1ba25a..7254f5679828 100644
--- a/arch/x86/kvm/mmu/mmu.c
+++ b/arch/x86/kvm/mmu/mmu.c
@@ -1890,6 +1890,41 @@ bool kvm_mmu_slot_gfn_exec_protect(struct kvm *kvm,
return exec_protected;
 }
 
+static bool spte_suppress_ve(u64 *sptep, bool suppress)
+{
+   u64 spte = *sptep;
+
+   if (suppress)
+   spte |= VMX_EPT_SUPPRESS_VE_BIT;
+   else
+   spte &= ~VMX_EPT_SUPPRESS_VE_BIT;
+
+   return mmu_spte_update(sptep, spte);
+}
+
+bool kvm_mmu_set_ept_page_sve(struct kvm *kvm, struct kvm_memory_slot *slot,
+ gfn_t gfn, u16 index, bool suppress)
+{
+   struct kvm_rmap_head *rmap_head;
+   struct rmap_iterator iter;
+   struct kvm_mmu_page *sp;
+   bool flush = false;
+   u64 *sptep;
+   int i;
+
+   for (i = PG_LEVEL_4K; i <= KVM_MAX_HUGEPAGE_LEVEL; i++) {
+   rmap_head = __gfn_to_rmap(gfn, i, slot);
+   for_each_rmap_spte(rmap_head, , sptep) {
+   sp = page_header(__pa(sptep));
+   if (index == 0 || (index > 0 && index == sp->view))
+   flush |= spte_suppress_ve(sptep, suppress);
+   }
+   }
+
+   return flush;
+}
+EXPORT_SYMBOL_GPL(kvm_mmu_set_ept_page_sve);
+
 static bool rmap_write_protect(struct kvm_vcpu *vcpu, u64 gfn)
 {
struct kvm_memory_slot *slot;
@@ -3171,6 +3206,9 @@ static int set_spte(struct kvm_vcpu *vcpu, u64 *sptep,
 
spte |= (u64)pfn << PAGE_SHIFT;
 
+   if (kvm_page_track_is_active(vcpu, gfn, KVM_PAGE_TRACK_SVE))
+   spte &= ~VMX_EPT_SUPPRESS_VE_BIT;
+
if (pte_access & ACC_WRITE_MASK) {
spte |= PT_WRITABLE_MASK | SPTE_MMU_WRITEABLE;
 
diff --git a/arch/x86/kvm/mmu/page_track.c b/arch/x86/kvm/mmu/page_track.c
index bf26b21cfeb8..153c5285361f 100644
--- a/arch/x86/kvm/mmu/page_track.c
+++ b/arch/x86/kvm/mmu/page_track.c
@@ -125,6 +125,9 @@ void kvm_slot_page_track_add_page(struct kvm *kvm,
} else if (mode == KVM_PAGE_TRACK_PREEXEC) {
if (kvm_mmu_slot_gfn_exec_protect(kvm, slot, gfn, view))
kvm_flush_remote_tlbs(kvm);
+   } else if (mode == KVM_PAGE_TRACK_SVE) {
+   if (kvm_mmu_set_ept_page_sve(kvm, slot, gfn, view, false))
+   kvm_flush_remote_tlbs(kvm);
}
 }
 EXPORT_SYMBOL_GPL(kvm_slot_page_track_add_page);
@@ -151,6 +154,10 @@ void kvm_slot_page_track_remove_page(struct kvm *kvm,
 
update_gfn_track(slot, gfn, mode, -1, view);
 
+   if (mode == KVM_PAGE_TRACK_SVE)
+   if (kvm_mmu_set_ept_page_sve(kvm, slot, gfn, view, true))
+   kvm_flush_remote_tlbs(kvm);
+
/*
 * allow large page mapping for the tracked page
 * after the tracker is gone.
___
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization

[RFC PATCH v1 16/34] KVM: introspection: add KVMI_VCPU_CONTROL_EPT_VIEW

2020-07-22 Thread Adalbert Lazăr
From: Ștefan Șicleru 

This will be used by the introspection tool to control the EPT views to
which the guest is allowed to switch.

Signed-off-by: Ștefan Șicleru 
Signed-off-by: Adalbert Lazăr 
---
 Documentation/virt/kvm/kvmi.rst   |  37 ++
 arch/x86/include/uapi/asm/kvmi.h  |   7 ++
 arch/x86/kvm/kvmi.c   |   9 ++
 include/uapi/linux/kvmi.h |   1 +
 .../testing/selftests/kvm/x86_64/kvmi_test.c  | 118 ++
 virt/kvm/introspection/kvmi_int.h |   2 +
 virt/kvm/introspection/kvmi_msg.c |  19 +++
 7 files changed, 193 insertions(+)

diff --git a/Documentation/virt/kvm/kvmi.rst b/Documentation/virt/kvm/kvmi.rst
index 02f03c62adef..f4c60aba9b53 100644
--- a/Documentation/virt/kvm/kvmi.rst
+++ b/Documentation/virt/kvm/kvmi.rst
@@ -1190,6 +1190,43 @@ EPTP switching mechanism (see **KVMI_GET_VERSION**).
 * -KVM_EAGAIN - the selected vCPU can't be introspected yet
 * -KVM_EOPNOTSUPP - an EPT view was selected but the hardware doesn't support 
it
 
+28. KVMI_VCPU_CONTROL_EPT_VIEW
+--
+
+:Architecture: x86
+:Versions: >= 1
+:Parameters:
+
+::
+
+   struct kvmi_vcpu_hdr;
+   struct kvmi_vcpu_control_ept_view {
+   __u16 view;
+   __u8  visible;
+   __u8  padding1;
+   __u32 padding2;
+   };
+
+:Returns:
+
+::
+
+   struct kvmi_error_code;
+
+Controls the capability of the guest to successfully change EPT views
+through VMFUNC instruction without triggering a vm-exit. If ``visible``
+is true, the guest will be capable to change EPT views through VMFUNC(0,
+``view``). If ``visible`` is false, VMFUNC(0, ``view``) triggers a
+vm-exit, a #UD exception is injected to guest and the guest application
+is terminated.
+
+:Errors:
+
+* -KVM_EINVAL - the selected vCPU is invalid
+* -KVM_EAGAIN - the selected vCPU can't be introspected yet
+* -KVM_EINVAL - padding is not zero
+* -KVM_EINVAL - the selected EPT view is not valid
+
 Events
 ==
 
diff --git a/arch/x86/include/uapi/asm/kvmi.h b/arch/x86/include/uapi/asm/kvmi.h
index f7a080d5e227..fc35da900778 100644
--- a/arch/x86/include/uapi/asm/kvmi.h
+++ b/arch/x86/include/uapi/asm/kvmi.h
@@ -166,4 +166,11 @@ struct kvmi_vcpu_set_ept_view {
__u32 padding2;
 };
 
+struct kvmi_vcpu_control_ept_view {
+   __u16 view;
+   __u8  visible;
+   __u8  padding1;
+   __u32 padding2;
+};
+
 #endif /* _UAPI_ASM_X86_KVMI_H */
diff --git a/arch/x86/kvm/kvmi.c b/arch/x86/kvm/kvmi.c
index 99ea8ef70be2..06357b8ab54a 100644
--- a/arch/x86/kvm/kvmi.c
+++ b/arch/x86/kvm/kvmi.c
@@ -1432,3 +1432,12 @@ int kvmi_arch_cmd_set_ept_view(struct kvm_vcpu *vcpu, 
u16 view)
 
return kvm_x86_ops.set_ept_view(vcpu, view);
 }
+
+int kvmi_arch_cmd_control_ept_view(struct kvm_vcpu *vcpu, u16 view,
+  bool visible)
+{
+   if (!kvm_x86_ops.control_ept_view)
+   return -KVM_EINVAL;
+
+   return kvm_x86_ops.control_ept_view(vcpu, view, visible);
+}
diff --git a/include/uapi/linux/kvmi.h b/include/uapi/linux/kvmi.h
index 8204661d944d..a72c536a2c80 100644
--- a/include/uapi/linux/kvmi.h
+++ b/include/uapi/linux/kvmi.h
@@ -51,6 +51,7 @@ enum {
KVMI_VCPU_TRANSLATE_GVA  = 25,
KVMI_VCPU_GET_EPT_VIEW   = 26,
KVMI_VCPU_SET_EPT_VIEW   = 27,
+   KVMI_VCPU_CONTROL_EPT_VIEW   = 28,
 
KVMI_NUM_MESSAGES
 };
diff --git a/tools/testing/selftests/kvm/x86_64/kvmi_test.c 
b/tools/testing/selftests/kvm/x86_64/kvmi_test.c
index c6f7d10563db..d808cb61463d 100644
--- a/tools/testing/selftests/kvm/x86_64/kvmi_test.c
+++ b/tools/testing/selftests/kvm/x86_64/kvmi_test.c
@@ -56,6 +56,7 @@ struct vcpu_worker_data {
bool stop;
bool shutdown;
bool restart_on_shutdown;
+   bool run_guest_once;
 };
 
 static struct kvmi_features features;
@@ -72,6 +73,7 @@ enum {
GUEST_TEST_HYPERCALL,
GUEST_TEST_MSR,
GUEST_TEST_PF,
+   GUEST_TEST_VMFUNC,
GUEST_TEST_XSETBV,
 };
 
@@ -130,6 +132,13 @@ static void guest_pf_test(void)
*((uint8_t *)test_gva) = READ_ONCE(test_write_pattern);
 }
 
+static void guest_vmfunc_test(void)
+{
+   asm volatile("mov $0, %rax");
+   asm volatile("mov $1, %rcx");
+   asm volatile(".byte 0x0f,0x01,0xd4");
+}
+
 /* from fpu/internal.h */
 static u64 xgetbv(u32 index)
 {
@@ -193,6 +202,9 @@ static void guest_code(void)
case GUEST_TEST_PF:
guest_pf_test();
break;
+   case GUEST_TEST_VMFUNC:
+   guest_vmfunc_test();
+   break;
case GUEST_TEST_XSETBV:
guest_xsetbv_test();
break;
@@ -777,6 +789,7 @@ static void test_memory_access(struct kvm_vm *vm)
 static void *vcpu_worker(void *data)
 {
struct vcpu_work

[RFC PATCH v1 19/34] KVM: introspection: clean non-default EPTs on unhook

2020-07-22 Thread Adalbert Lazăr
From: Ștefan Șicleru 

When a guest is unhooked, the VM is brought to default state and uses
default EPT view. Delete all shadow pages that belong to non-default EPT
views in order to free unused shadow pages. They are not used because
the guest cannot VMFUNC to any EPT view.

Signed-off-by: Ștefan Șicleru 
Signed-off-by: Adalbert Lazăr 
---
 arch/x86/include/asm/kvm_host.h |  2 ++
 arch/x86/kvm/kvmi.c | 23 ++-
 virt/kvm/introspection/kvmi.c   |  3 +++
 3 files changed, 27 insertions(+), 1 deletion(-)

diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 519b8210b8ef..086b6e2a2314 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -1026,6 +1026,8 @@ struct kvm_arch {
 
struct kvm_pmu_event_filter *pmu_event_filter;
struct task_struct *nx_lpage_recovery_thread;
+
+   refcount_t kvmi_refcount;
 };
 
 struct kvm_vm_stat {
diff --git a/arch/x86/kvm/kvmi.c b/arch/x86/kvm/kvmi.c
index 52885b9e5b6e..27fd732cff29 100644
--- a/arch/x86/kvm/kvmi.c
+++ b/arch/x86/kvm/kvmi.c
@@ -640,6 +640,25 @@ static void kvmi_arch_restore_interception(struct kvm_vcpu 
*vcpu)
kvmi_arch_disable_msrw_intercept(vcpu, arch_vcpui->msrw.kvmi_mask.high);
 }
 
+void kvmi_arch_restore_ept_view(struct kvm_vcpu *vcpu)
+{
+   struct kvm *kvm = vcpu->kvm;
+   u16 view, default_view = 0;
+   bool visible = false;
+
+   if (kvm_get_ept_view(vcpu) != default_view)
+   kvmi_arch_cmd_set_ept_view(vcpu, default_view);
+
+   for (view = 0; view < KVM_MAX_EPT_VIEWS; view++)
+   kvmi_arch_cmd_control_ept_view(vcpu, view, visible);
+
+   if (refcount_dec_and_test(>arch.kvmi_refcount)) {
+   u16 zap_mask = ~(1 << default_view);
+
+   kvm_mmu_zap_all(vcpu->kvm, zap_mask);
+   }
+}
+
 bool kvmi_arch_clean_up_interception(struct kvm_vcpu *vcpu)
 {
struct kvmi_interception *arch_vcpui = vcpu->arch.kvmi;
@@ -647,8 +666,10 @@ bool kvmi_arch_clean_up_interception(struct kvm_vcpu *vcpu)
if (!arch_vcpui || !arch_vcpui->cleanup)
return false;
 
-   if (arch_vcpui->restore_interception)
+   if (arch_vcpui->restore_interception) {
kvmi_arch_restore_interception(vcpu);
+   kvmi_arch_restore_ept_view(vcpu);
+   }
 
return true;
 }
diff --git a/virt/kvm/introspection/kvmi.c b/virt/kvm/introspection/kvmi.c
index 44b0092e304f..f3bdef3c54e6 100644
--- a/virt/kvm/introspection/kvmi.c
+++ b/virt/kvm/introspection/kvmi.c
@@ -288,6 +288,9 @@ static void free_kvmi(struct kvm *kvm)
 
kvmi_clear_mem_access(kvm);
 
+   refcount_set(>arch.kvmi_refcount,
+   atomic_read(>online_vcpus));
+
kvm_for_each_vcpu(i, vcpu, kvm)
free_vcpui(vcpu, restore_interception);
 
___
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization

[RFC PATCH v1 32/34] KVM: introspection: add KVMI_VCPU_SET_VE_INFO/KVMI_VCPU_DISABLE_VE

2020-07-22 Thread Adalbert Lazăr
From: Ștefan Șicleru 

The introspection tool can use #VE to reduce the number of VM-exits
caused by SPT violations for some guests.

Signed-off-by: Ștefan Șicleru 
Signed-off-by: Adalbert Lazăr 
---
 Documentation/virt/kvm/kvmi.rst   | 63 +++
 arch/x86/include/uapi/asm/kvmi.h  |  8 +++
 arch/x86/kvm/kvmi.c   | 19 ++
 include/uapi/linux/kvmi.h |  2 +
 .../testing/selftests/kvm/x86_64/kvmi_test.c  | 52 +++
 virt/kvm/introspection/kvmi_int.h |  3 +
 virt/kvm/introspection/kvmi_msg.c | 30 +
 7 files changed, 177 insertions(+)

diff --git a/Documentation/virt/kvm/kvmi.rst b/Documentation/virt/kvm/kvmi.rst
index caa51fccc463..c50c40638d46 100644
--- a/Documentation/virt/kvm/kvmi.rst
+++ b/Documentation/virt/kvm/kvmi.rst
@@ -1230,6 +1230,69 @@ is terminated.
 * -KVM_EINVAL - padding is not zero
 * -KVM_EINVAL - the selected EPT view is not valid
 
+29. KVMI_VCPU_SET_VE_INFO
+-
+
+:Architecture: x86
+:Versions: >= 1
+:Parameters:
+
+::
+
+   struct kvmi_vcpu_hdr;
+   struct kvmi_vcpu_set_ve_info {
+   __u64 gpa;
+   __u8 trigger_vmexit;
+   __u8 padding1;
+   __u16 padding2;
+   __u32 padding3;
+   };
+
+:Returns:
+
+::
+
+   struct kvmi_error_code;
+
+Configures the guest physical address for the #VE info page and enables
+the #VE mechanism. If ``trigger_vmexit`` is true, any virtualization
+exception will trigger a vm-exit. Otherwise, the exception is delivered
+using gate descriptor 20 from the Interrupt Descriptor Table (IDT).
+
+:Errors:
+
+* -KVM_EINVAL - the selected vCPU is invalid
+* -KVM_EINVAL - one of the specified GPAs is invalid
+* -KVM_EOPNOTSUPP - the hardware does not support #VE
+* -KVM_EINVAL - padding is not zero
+* -KVM_EAGAIN - the selected vCPU can't be introspected yet
+
+30. KVMI_VCPU_DISABLE_VE
+
+
+:Architecture: x86
+:Versions: >= 1
+:Parameters:
+
+::
+
+   struct kvmi_vcpu_hdr;
+
+:Returns:
+
+::
+
+   struct kvmi_error_code;
+
+Disables the #VE mechanism. All EPT violations will trigger a vm-exit,
+regardless of the corresponding spte 63rd bit (SVE) for the GPA that
+triggered the EPT violation within a specific EPT view.
+
+:Errors:
+
+* -KVM_EINVAL - the selected vCPU is invalid
+* -KVM_EAGAIN - the selected vCPU can't be introspected yet
+
 Events
 ==
 
diff --git a/arch/x86/include/uapi/asm/kvmi.h b/arch/x86/include/uapi/asm/kvmi.h
index 56992dacfb69..d925e6d49f50 100644
--- a/arch/x86/include/uapi/asm/kvmi.h
+++ b/arch/x86/include/uapi/asm/kvmi.h
@@ -174,4 +174,12 @@ struct kvmi_vcpu_control_ept_view {
__u32 padding2;
 };
 
+struct kvmi_vcpu_set_ve_info {
+   __u64 gpa;
+   __u8 trigger_vmexit;
+   __u8 padding1;
+   __u16 padding2;
+   __u32 padding3;
+};
+
 #endif /* _UAPI_ASM_X86_KVMI_H */
diff --git a/arch/x86/kvm/kvmi.c b/arch/x86/kvm/kvmi.c
index 3e8c83623703..e101ac390809 100644
--- a/arch/x86/kvm/kvmi.c
+++ b/arch/x86/kvm/kvmi.c
@@ -1464,3 +1464,22 @@ int kvmi_arch_cmd_control_ept_view(struct kvm_vcpu 
*vcpu, u16 view,
 
return kvm_x86_ops.control_ept_view(vcpu, view, visible);
 }
+
+int kvmi_arch_cmd_set_ve_info(struct kvm_vcpu *vcpu, u64 gpa,
+ bool trigger_vmexit)
+{
+   unsigned long ve_info = (unsigned long) gpa;
+
+   if (!kvm_x86_ops.set_ve_info)
+   return -KVM_EINVAL;
+
+   return kvm_x86_ops.set_ve_info(vcpu, ve_info, trigger_vmexit);
+}
+
+int kvmi_arch_cmd_disable_ve(struct kvm_vcpu *vcpu)
+{
+   if (!kvm_x86_ops.disable_ve)
+   return 0;
+
+   return kvm_x86_ops.disable_ve(vcpu);
+}
diff --git a/include/uapi/linux/kvmi.h b/include/uapi/linux/kvmi.h
index 505a865cd115..a17cd1fa16d0 100644
--- a/include/uapi/linux/kvmi.h
+++ b/include/uapi/linux/kvmi.h
@@ -52,6 +52,8 @@ enum {
KVMI_VCPU_GET_EPT_VIEW   = 26,
KVMI_VCPU_SET_EPT_VIEW   = 27,
KVMI_VCPU_CONTROL_EPT_VIEW   = 28,
+   KVMI_VCPU_SET_VE_INFO= 29,
+   KVMI_VCPU_DISABLE_VE = 30,
 
KVMI_NUM_MESSAGES
 };
diff --git a/tools/testing/selftests/kvm/x86_64/kvmi_test.c 
b/tools/testing/selftests/kvm/x86_64/kvmi_test.c
index 4e099cbfcf4e..a3ea22f546ec 100644
--- a/tools/testing/selftests/kvm/x86_64/kvmi_test.c
+++ b/tools/testing/selftests/kvm/x86_64/kvmi_test.c
@@ -35,6 +35,10 @@ static vm_vaddr_t test_gva;
 static void *test_hva;
 static vm_paddr_t test_gpa;
 
+static vm_vaddr_t test_ve_info_gva;
+static void *test_ve_info_hva;
+static vm_paddr_t test_ve_info_gpa;
+
 static uint8_t test_write_pattern;
 static int page_size;
 
@@ -2258,6 +2262,43 @@ static void test_cmd_vcpu_vmfunc(struct kvm_vm *vm)
test_guest_switch_to_visible_view(vm);
 }
 
+static void enable_ve(struct kvm_vm *vm)
+{
+   struct {
+   struct kvmi_msg_hdr hdr;
+   

[RFC PATCH v1 05/34] KVM: x86: mmu: add EPT view parameter to kvm_mmu_get_page()

2020-07-22 Thread Adalbert Lazăr
From: Ștefan Șicleru 

This will be used to create root_hpa for all the EPT views.

Signed-off-by: Ștefan Șicleru 
Signed-off-by: Adalbert Lazăr 
---
 arch/x86/include/asm/kvm_host.h |  7 +-
 arch/x86/kvm/mmu/mmu.c  | 43 -
 arch/x86/kvm/mmu/paging_tmpl.h  |  6 +++--
 3 files changed, 36 insertions(+), 20 deletions(-)

diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 0acc21087caf..bd45778e0904 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -243,6 +243,8 @@ enum x86_intercept_stage;
 PFERR_WRITE_MASK | \
 PFERR_PRESENT_MASK)
 
+#define KVM_MAX_EPT_VIEWS  3
+
 /* apic attention bits */
 #define KVM_APIC_CHECK_VAPIC   0
 /*
@@ -349,6 +351,9 @@ struct kvm_mmu_page {
union kvm_mmu_page_role role;
gfn_t gfn;
 
+   /* The view this shadow page belongs to */
+   u16 view;
+
u64 *spt;
/* hold the gfn of each spte inside spt */
gfn_t *gfns;
@@ -936,7 +941,7 @@ struct kvm_arch {
unsigned long n_max_mmu_pages;
unsigned int indirect_shadow_pages;
u8 mmu_valid_gen;
-   struct hlist_head mmu_page_hash[KVM_NUM_MMU_PAGES];
+   struct hlist_head mmu_page_hash[KVM_MAX_EPT_VIEWS][KVM_NUM_MMU_PAGES];
/*
 * Hash table of struct kvm_mmu_page.
 */
diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c
index f3ba4d0452c9..0b6527a1ebe6 100644
--- a/arch/x86/kvm/mmu/mmu.c
+++ b/arch/x86/kvm/mmu/mmu.c
@@ -2349,14 +2349,14 @@ static void kvm_mmu_commit_zap_page(struct kvm *kvm,
struct list_head *invalid_list);
 
 
-#define for_each_valid_sp(_kvm, _sp, _gfn) \
+#define for_each_valid_sp(_kvm, _sp, _gfn, view)   \
hlist_for_each_entry(_sp,   \
- &(_kvm)->arch.mmu_page_hash[kvm_page_table_hashfn(_gfn)], hash_link) \
+ &(_kvm)->arch.mmu_page_hash[view][kvm_page_table_hashfn(_gfn)], 
hash_link) \
if (is_obsolete_sp((_kvm), (_sp))) {\
} else
 
 #define for_each_gfn_indirect_valid_sp(_kvm, _sp, _gfn)
\
-   for_each_valid_sp(_kvm, _sp, _gfn)  \
+   for_each_valid_sp(_kvm, _sp, _gfn, 0)   \
if ((_sp)->gfn != (_gfn) || (_sp)->role.direct) {} else
 
 static inline bool is_ept_sp(struct kvm_mmu_page *sp)
@@ -2564,7 +2564,8 @@ static struct kvm_mmu_page *kvm_mmu_get_page(struct 
kvm_vcpu *vcpu,
 gva_t gaddr,
 unsigned level,
 int direct,
-unsigned int access)
+unsigned int access,
+u16 view)
 {
union kvm_mmu_page_role role;
unsigned quadrant;
@@ -2587,7 +2588,7 @@ static struct kvm_mmu_page *kvm_mmu_get_page(struct 
kvm_vcpu *vcpu,
quadrant &= (1 << ((PT32_PT_BITS - PT64_PT_BITS) * level)) - 1;
role.quadrant = quadrant;
}
-   for_each_valid_sp(vcpu->kvm, sp, gfn) {
+   for_each_valid_sp(vcpu->kvm, sp, gfn, view) {
if (sp->gfn != gfn) {
collisions++;
continue;
@@ -2624,9 +2625,10 @@ static struct kvm_mmu_page *kvm_mmu_get_page(struct 
kvm_vcpu *vcpu,
 
sp->gfn = gfn;
sp->role = role;
+   sp->view = view;
pg_hash = kvm_page_table_hashfn(gfn);
hlist_add_head(>hash_link,
-   >kvm->arch.mmu_page_hash[pg_hash]);
+   >kvm->arch.mmu_page_hash[view][pg_hash]);
if (!direct) {
/*
 * we should do write protection before syncing pages
@@ -3463,7 +3465,8 @@ static int __direct_map(struct kvm_vcpu *vcpu, gpa_t gpa, 
int write,
drop_large_spte(vcpu, it.sptep);
if (!is_shadow_present_pte(*it.sptep)) {
sp = kvm_mmu_get_page(vcpu, base_gfn, it.addr,
- it.level - 1, true, ACC_ALL);
+ it.level - 1, true, ACC_ALL,
+ kvm_get_ept_view(vcpu));
 
link_shadow_page(vcpu, it.sptep, sp);
if (account_disallowed_nx_lpage)
@@ -3788,7 +3791,7 @@ static int mmu_check_root(struct kvm_vcpu *vcpu, gfn_t 
root_gfn)
 }
 
 static hpa_t mmu_alloc_root(struct kvm_vcpu *vcpu, gfn_t gfn, gva_t gva,
-   u8 level, bool direct)
+   u8 level, bool direct, u16 view)
 {
  

[RFC PATCH v1 00/34] VM introspection - EPT Views and Virtualization Exceptions

2020-07-22 Thread Adalbert Lazăr
This patch series is based on the VM introspection patches
(https://lore.kernel.org/kvm/20200721210922.7646-1-ala...@bitdefender.com/),
extending the introspection API with EPT Views and Virtualization
Exceptions (#VE) support.

The purpose of this series is to get an initial feedback and to see if
we are on the right track, especially because the changes made to add
the EPT views are not small (even if they add support only for different
access rights for now, not for different content).

One use case for these extensions is to run a guest agent, isolated in
another EPT view and using Virtualization Exceptions (#VE), to reduce
the number of VM-Exits caused by EPT violations.

Another case for EPT views is to single-step one vCPU on a different view
(with more relaxed page access restrictions) while all the others run
on a main/restricted view.

Patches 1-11 make preparatory changes for EPT views.

Patches 12-19 extend the VM introspection API with EPT-views related
commands and data. The Get/Set/Control EPT view commands are added,
the KVMI_VM_SET_PAGE_ACCESS command and the vCPU introspection events
are extended with the EPT view.

Patches 20-30 make preparatory changes for #VE.

Patches 31-34 extend the VM introspection API with #VE related commands.

Adalbert Lazăr (2):
  KVM: x86: mmu: reindent to avoid lines longer than 80 chars
  KVM: introspection: mask out non-rwx flags when reading/writing
from/to the internal database

Marian Rotariu (5):
  KVM: x86: export .get_vmfunc_status()
  KVM: x86: export .get_eptp_switching_status()
  KVM: x86: mmu: add support for EPT switching
  KVM: x86: add .set_ept_view()
  KVM: x86: vmx: add support for virtualization exceptions

Sean Christopherson (2):
  KVM: VMX: Define EPT suppress #VE bit (bit 63 in EPT leaf entries)
  KVM: VMX: Suppress EPT violation #VE by default (when enabled)

Ștefan Șicleru (25):
  KVM: x86: add kvm_get_ept_view()
  KVM: x86: mmu: add EPT view parameter to kvm_mmu_get_page()
  KVM: x86: mmu: increase mmu_memory_cache size
  KVM: x86: add .control_ept_view()
  KVM: x86: page track: allow page tracking for different EPT views
  KVM: x86: mmu: allow zapping shadow pages for specific EPT views
  KVM: introspection: extend struct kvmi_features with the EPT views
status support
  KVM: introspection: add KVMI_VCPU_GET_EPT_VIEW
  KVM: introspection: add 'view' field to struct kvmi_event_arch
  KVM: introspection: add KVMI_VCPU_SET_EPT_VIEW
  KVM: introspection: add KVMI_VCPU_CONTROL_EPT_VIEW
  KVM: introspection: extend the access rights database with EPT view
info
  KVM: introspection: extend KVMI_VM_SET_PAGE_ACCESS with EPT view info
  KVM: introspection: clean non-default EPTs on unhook
  KVM: x86: mmu: fix: update present_mask in spte_read_protect()
  KVM: vmx: trigger vm-exits for mmio sptes by default when #VE is
enabled
  KVM: x86: svm: set .clear_page()
  KVM: x86: add .set_ve_info()
  KVM: x86: add .disable_ve()
  KVM: x86: page_track: add support for suppress #VE bit
  KVM: vmx: make use of EPTP_INDEX in vmx_handle_exit()
  KVM: vmx: make use of EPTP_INDEX in vmx_set_ept_view()
  KVM: introspection: add #VE host capability checker
  KVM: introspection: add KVMI_VCPU_SET_VE_INFO/KVMI_VCPU_DISABLE_VE
  KVM: introspection: add KVMI_VM_SET_PAGE_SVE

 Documentation/virt/kvm/kvmi.rst   | 227 +++-
 arch/x86/include/asm/kvm_host.h   |  27 +-
 arch/x86/include/asm/kvm_page_track.h |   5 +-
 arch/x86/include/asm/kvmi_host.h  |   1 +
 arch/x86/include/asm/vmx.h|   5 +
 arch/x86/include/uapi/asm/kvmi.h  |  44 ++-
 arch/x86/kvm/Makefile |   2 +-
 arch/x86/kvm/kvmi.c   |  83 -
 arch/x86/kvm/mmu.h|  12 +-
 arch/x86/kvm/mmu/mmu.c| 191 +++---
 arch/x86/kvm/mmu/page_track.c |  63 ++--
 arch/x86/kvm/mmu/paging_tmpl.h|   6 +-
 arch/x86/kvm/svm/svm.c|   1 +
 arch/x86/kvm/vmx/capabilities.h   |  13 +
 arch/x86/kvm/vmx/clear_page.S |  17 +
 arch/x86/kvm/vmx/vmx.c| 291 ++-
 arch/x86/kvm/vmx/vmx.h|  18 +
 arch/x86/kvm/x86.c|  20 +-
 drivers/gpu/drm/i915/gvt/kvmgt.c  |   8 +-
 include/linux/kvmi_host.h |   2 +-
 include/uapi/linux/kvmi.h |  12 +-
 .../testing/selftests/kvm/x86_64/kvmi_test.c  | 335 +-
 virt/kvm/introspection/kvmi.c | 175 ++---
 virt/kvm/introspection/kvmi_int.h |  17 +-
 virt/kvm/introspection/kvmi_msg.c | 106 ++
 25 files changed, 1512 insertions(+), 169 deletions(-)
 create mode 100644 arch/x86/kvm/vmx/clear_page.S


base-commit: d9da9f5842e0697564f0f3e586d858f2626e8f92
Based-on: <20200721210922.7646-1-ala...@bitdefender.com>
CC: Sean Christop

[RFC PATCH v1 20/34] KVM: x86: vmx: add support for virtualization exceptions

2020-07-22 Thread Adalbert Lazăr
From: Marian Rotariu 

Only the hardware support check function and the #VE info page management
are introduced.

Signed-off-by: Marian Rotariu 
Co-developed-by: Ștefan Șicleru 
Signed-off-by: Ștefan Șicleru 
Signed-off-by: Adalbert Lazăr 
---
 arch/x86/include/asm/kvm_host.h |  1 +
 arch/x86/include/asm/vmx.h  |  3 +++
 arch/x86/kvm/vmx/capabilities.h |  5 +
 arch/x86/kvm/vmx/vmx.c  | 31 +++
 arch/x86/kvm/vmx/vmx.h  | 12 
 arch/x86/kvm/x86.c  |  3 +++
 6 files changed, 55 insertions(+)

diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 086b6e2a2314..a9f225f9dd12 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -1435,6 +1435,7 @@ extern u64  kvm_default_tsc_scaling_ratio;
 
 extern u64 kvm_mce_cap_supported;
 extern bool kvm_eptp_switching_supported;
+extern bool kvm_ve_supported;
 
 /*
  * EMULTYPE_NO_DECODE - Set when re-emulating an instruction (after completing
diff --git a/arch/x86/include/asm/vmx.h b/arch/x86/include/asm/vmx.h
index 04487eb38b5c..177500e9e68c 100644
--- a/arch/x86/include/asm/vmx.h
+++ b/arch/x86/include/asm/vmx.h
@@ -67,6 +67,7 @@
 #define SECONDARY_EXEC_ENCLS_EXITING   VMCS_CONTROL_BIT(ENCLS_EXITING)
 #define SECONDARY_EXEC_RDSEED_EXITING  VMCS_CONTROL_BIT(RDSEED_EXITING)
 #define SECONDARY_EXEC_ENABLE_PML   
VMCS_CONTROL_BIT(PAGE_MOD_LOGGING)
+#define SECONDARY_EXEC_EPT_VE  
VMCS_CONTROL_BIT(EPT_VIOLATION_VE)
 #define SECONDARY_EXEC_PT_CONCEAL_VMX  VMCS_CONTROL_BIT(PT_CONCEAL_VMX)
 #define SECONDARY_EXEC_XSAVES  VMCS_CONTROL_BIT(XSAVES)
 #define SECONDARY_EXEC_MODE_BASED_EPT_EXEC 
VMCS_CONTROL_BIT(MODE_BASED_EPT_EXEC)
@@ -213,6 +214,8 @@ enum vmcs_field {
VMREAD_BITMAP_HIGH  = 0x2027,
VMWRITE_BITMAP  = 0x2028,
VMWRITE_BITMAP_HIGH = 0x2029,
+   VE_INFO_ADDRESS = 0x202A,
+   VE_INFO_ADDRESS_HIGH= 0x202B,
XSS_EXIT_BITMAP = 0x202C,
XSS_EXIT_BITMAP_HIGH= 0x202D,
ENCLS_EXITING_BITMAP= 0x202E,
diff --git a/arch/x86/kvm/vmx/capabilities.h b/arch/x86/kvm/vmx/capabilities.h
index 92781e2c523e..bc5bbc41ca92 100644
--- a/arch/x86/kvm/vmx/capabilities.h
+++ b/arch/x86/kvm/vmx/capabilities.h
@@ -257,6 +257,11 @@ static inline bool cpu_has_vmx_pml(void)
return vmcs_config.cpu_based_2nd_exec_ctrl & SECONDARY_EXEC_ENABLE_PML;
 }
 
+static inline bool cpu_has_vmx_ve(void)
+{
+   return vmcs_config.cpu_based_2nd_exec_ctrl & SECONDARY_EXEC_EPT_VE;
+}
+
 static inline bool vmx_xsaves_supported(void)
 {
return vmcs_config.cpu_based_2nd_exec_ctrl &
diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
index cbc943d217e3..1c1dda14d18d 100644
--- a/arch/x86/kvm/vmx/vmx.c
+++ b/arch/x86/kvm/vmx/vmx.c
@@ -2463,6 +2463,7 @@ static __init int setup_vmcs_config(struct vmcs_config 
*vmcs_conf,
SECONDARY_EXEC_RDSEED_EXITING |
SECONDARY_EXEC_RDRAND_EXITING |
SECONDARY_EXEC_ENABLE_PML |
+   SECONDARY_EXEC_EPT_VE |
SECONDARY_EXEC_TSC_SCALING |
SECONDARY_EXEC_ENABLE_USR_WAIT_PAUSE |
SECONDARY_EXEC_PT_USE_GPA |
@@ -4247,6 +4248,12 @@ static void vmx_compute_secondary_exec_control(struct 
vcpu_vmx *vmx)
*/
exec_control &= ~SECONDARY_EXEC_SHADOW_VMCS;
 
+   /* #VE must be disabled by default.
+* Once enabled, all EPT violations on pages missing the SVE bit
+* will be delivered to the guest.
+*/
+   exec_control &= ~SECONDARY_EXEC_EPT_VE;
+
if (!enable_pml)
exec_control &= ~SECONDARY_EXEC_ENABLE_PML;
 
@@ -6019,6 +6026,28 @@ static void dump_eptp_list(void)
pr_err("%d: %016llx\n", i, *(eptp_list + i));
 }
 
+static void dump_ve_info(void)
+{
+   phys_addr_t ve_info_phys;
+   struct vcpu_ve_info *ve_info = NULL;
+
+   if (!cpu_has_vmx_ve())
+   return;
+
+   ve_info_phys = (phys_addr_t)vmcs_read64(VE_INFO_ADDRESS);
+   if (!ve_info_phys)
+   return;
+
+   ve_info = (struct vcpu_ve_info *)phys_to_virt(ve_info_phys);
+
+   pr_err("*** Virtualization Exception Info ***\n");
+   pr_err("ExitReason: %x\n", ve_info->exit_reason);
+   pr_err("ExitQualification: %llx\n", ve_info->exit_qualification);
+   pr_err("GVA: %llx\n", ve_info->gva);
+   pr_err("GPA: %llx\n", ve_info->gpa);
+   pr_err("EPTPIndex: %x\n", ve_info->eptp_index);
+}
+
 void dump_vmcs(void)
 {
u32 vmentry_ctl, vmexit_ctl;
@@ -6169,6 +6198,7 @@ void dump_vmcs(void)
   vmcs

[RFC PATCH v1 18/34] KVM: introspection: extend KVMI_VM_SET_PAGE_ACCESS with EPT view info

2020-07-22 Thread Adalbert Lazăr
From: Ștefan Șicleru 

The introspection tool uses this command to set distinct access rights
on different EPT views.

Signed-off-by: Ștefan Șicleru 
Signed-off-by: Adalbert Lazăr 
---
 Documentation/virt/kvm/kvmi.rst |  8 +---
 include/uapi/linux/kvmi.h   |  4 ++--
 virt/kvm/introspection/kvmi.c   | 10 --
 3 files changed, 15 insertions(+), 7 deletions(-)

diff --git a/Documentation/virt/kvm/kvmi.rst b/Documentation/virt/kvm/kvmi.rst
index f4c60aba9b53..658c9df01469 100644
--- a/Documentation/virt/kvm/kvmi.rst
+++ b/Documentation/virt/kvm/kvmi.rst
@@ -1003,8 +1003,8 @@ to control events for any other register will fail with 
-KVM_EINVAL::
 
struct kvmi_vm_set_page_access {
__u16 count;
-   __u16 padding1;
-   __u32 padding2;
+   __u16 view;
+   __u32 padding;
struct kvmi_page_access_entry entries[0];
};
 
@@ -1026,7 +1026,7 @@ where::
struct kvmi_error_code
 
 Sets the access bits (rwx) for an array of ``count`` guest physical
-addresses.
+addresses, for the selected view.
 
 The valid access bits are::
 
@@ -1048,7 +1048,9 @@ In order to 'forget' an address, all three bits ('rwx') 
must be set.
 
 * -KVM_EINVAL - the specified access bits combination is invalid
 * -KVM_EINVAL - the padding is not zero
+* -KVM_EINVAL - the selected EPT view is invalid
 * -KVM_EINVAL - the message size is invalid
+* -KVM_EOPNOTSUPP - an EPT view was selected but the hardware doesn't support 
it
 * -KVM_EAGAIN - the selected vCPU can't be introspected yet
 * -KVM_ENOMEM - there is not enough memory to add the page tracking structures
 
diff --git a/include/uapi/linux/kvmi.h b/include/uapi/linux/kvmi.h
index a72c536a2c80..505a865cd115 100644
--- a/include/uapi/linux/kvmi.h
+++ b/include/uapi/linux/kvmi.h
@@ -191,8 +191,8 @@ struct kvmi_page_access_entry {
 
 struct kvmi_vm_set_page_access {
__u16 count;
-   __u16 padding1;
-   __u32 padding2;
+   __u16 view;
+   __u32 padding;
struct kvmi_page_access_entry entries[0];
 };
 
diff --git a/virt/kvm/introspection/kvmi.c b/virt/kvm/introspection/kvmi.c
index 737fe3c7a956..44b0092e304f 100644
--- a/virt/kvm/introspection/kvmi.c
+++ b/virt/kvm/introspection/kvmi.c
@@ -1187,14 +1187,20 @@ int kvmi_cmd_set_page_access(struct kvm_introspection 
*kvmi,
const struct kvmi_page_access_entry *end = req->entries + req->count;
int ec = 0;
 
-   if (req->padding1 || req->padding2)
+   if (req->padding)
return -KVM_EINVAL;
 
if (msg->size < struct_size(req, entries, req->count))
return -KVM_EINVAL;
 
+   if (!is_valid_view(req->view))
+   return -KVM_EINVAL;
+
+   if (req->view != 0 && !kvm_eptp_switching_supported)
+   return -KVM_EOPNOTSUPP;
+
for (; entry < end; entry++) {
-   int r = set_page_access_entry(kvmi, 0, entry);
+   int r = set_page_access_entry(kvmi, req->view, entry);
 
if (r && !ec)
ec = r;
___
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization

[RFC PATCH v1 09/34] KVM: x86: add .control_ept_view()

2020-07-22 Thread Adalbert Lazăr
From: Ștefan Șicleru 

This will be used by the introspection tool to control the EPT views to
which the guest is allowed to switch.

Signed-off-by: Ștefan Șicleru 
Signed-off-by: Adalbert Lazăr 
---
 arch/x86/include/asm/kvm_host.h |  1 +
 arch/x86/kvm/vmx/vmx.c  | 18 +-
 arch/x86/kvm/vmx/vmx.h  |  2 ++
 3 files changed, 20 insertions(+), 1 deletion(-)

diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 300f7fc43987..5e241863153f 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -1310,6 +1310,7 @@ struct kvm_x86_ops {
bool (*get_eptp_switching_status)(void);
u16 (*get_ept_view)(struct kvm_vcpu *vcpu);
int (*set_ept_view)(struct kvm_vcpu *vcpu, u16 view);
+   int (*control_ept_view)(struct kvm_vcpu *vcpu, u16 view, u8 visible);
 };
 
 struct kvm_x86_nested_ops {
diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
index 0d39487ce5c6..cbc943d217e3 100644
--- a/arch/x86/kvm/vmx/vmx.c
+++ b/arch/x86/kvm/vmx/vmx.c
@@ -3138,8 +3138,11 @@ static void vmx_construct_eptp_with_index(struct 
kvm_vcpu *vcpu,
if (!eptp_list)
return;
 
-   eptp_list[view] = construct_eptp(vcpu,
+   if (test_bit(view, >allowed_views))
+   eptp_list[view] = construct_eptp(vcpu,
vcpu->arch.mmu->root_hpa_altviews[view]);
+   else
+   eptp_list[view] = (~0ULL);
 }
 
 static void vmx_construct_eptp_list(struct kvm_vcpu *vcpu)
@@ -4395,6 +4398,18 @@ static int vmx_set_ept_view(struct kvm_vcpu *vcpu, u16 
view)
return 0;
 }
 
+static int vmx_control_ept_view(struct kvm_vcpu *vcpu, u16 view, u8 visible)
+{
+   if (visible)
+   set_bit(view, _vmx(vcpu)->allowed_views);
+   else
+   clear_bit(view, _vmx(vcpu)->allowed_views);
+
+   vmx_construct_eptp_with_index(vcpu, view);
+
+   return 0;
+}
+
 #define VMX_XSS_EXIT_BITMAP 0
 
 /*
@@ -8284,6 +8299,7 @@ static struct kvm_x86_ops vmx_x86_ops __initdata = {
.get_eptp_switching_status = vmx_get_eptp_switching_status,
.get_ept_view = vmx_get_ept_view,
.set_ept_view = vmx_set_ept_view,
+   .control_ept_view = vmx_control_ept_view,
 };
 
 static __init int hardware_setup(void)
diff --git a/arch/x86/kvm/vmx/vmx.h b/arch/x86/kvm/vmx/vmx.h
index 4e2f86458ca2..38d50fc7357b 100644
--- a/arch/x86/kvm/vmx/vmx.h
+++ b/arch/x86/kvm/vmx/vmx.h
@@ -300,6 +300,8 @@ struct vcpu_vmx {
struct page *eptp_list_pg;
/* The view this vcpu operates on. */
u16 view;
+   /* Visible EPT views bitmap for in-guest VMFUNC. */
+   unsigned long allowed_views;
 };
 
 enum ept_pointers_status {
___
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization

[RFC PATCH v1 13/34] KVM: introspection: add KVMI_VCPU_GET_EPT_VIEW

2020-07-22 Thread Adalbert Lazăr
From: Ștefan Șicleru 

The introspection tool uses this function to check the hardware support
for EPT switching, which can be used either to singlestep vCPUs
on a unprotected EPT view or to use #VE in order to avoid filter out
VM-exits caused by EPT violations.

Signed-off-by: Ștefan Șicleru 
Signed-off-by: Adalbert Lazăr 
---
 Documentation/virt/kvm/kvmi.rst   | 34 +++
 arch/x86/include/uapi/asm/kvmi.h  |  6 
 arch/x86/kvm/kvmi.c   |  5 +++
 include/uapi/linux/kvmi.h |  1 +
 .../testing/selftests/kvm/x86_64/kvmi_test.c  | 28 +++
 virt/kvm/introspection/kvmi_int.h |  1 +
 virt/kvm/introspection/kvmi_msg.c | 14 
 7 files changed, 89 insertions(+)

diff --git a/Documentation/virt/kvm/kvmi.rst b/Documentation/virt/kvm/kvmi.rst
index 234eacec4db1..76a2d0125f78 100644
--- a/Documentation/virt/kvm/kvmi.rst
+++ b/Documentation/virt/kvm/kvmi.rst
@@ -1120,6 +1120,40 @@ the address cannot be translated.
 * -KVM_EINVAL - the padding is not zero
 * -KVM_EAGAIN - the selected vCPU can't be introspected yet
 
+26. KVMI_VCPU_GET_EPT_VIEW
+--
+
+:Architecture: x86
+:Versions: >= 1
+:Parameters:
+
+::
+
+   struct kvmi_vcpu_hdr;
+
+:Returns:
+
+::
+
+   struct kvmi_error_code;
+   struct kvmi_vcpu_get_ept_view_reply {
+   __u16 view;
+   __u16 padding1;
+   __u32 padding2;
+   };
+
+Returns the EPT ``view`` the provided vCPU operates on.
+
+Before getting EPT views, the introspection tool should use
+*KVMI_GET_VERSION* to check if the hardware has support for VMFUNC and
+EPTP switching mechanism (see **KVMI_GET_VERSION**).  If the hardware
+does not provide support for these features, the returned EPT view will
+be zero.
+
+* -KVM_EINVAL - the selected vCPU is invalid
+* -KVM_EINVAL - the padding is not zero
+* -KVM_EAGAIN - the selected vCPU can't be introspected yet
+
 Events
 ==
 
diff --git a/arch/x86/include/uapi/asm/kvmi.h b/arch/x86/include/uapi/asm/kvmi.h
index 51b399d50a2a..3087c685c232 100644
--- a/arch/x86/include/uapi/asm/kvmi.h
+++ b/arch/x86/include/uapi/asm/kvmi.h
@@ -152,4 +152,10 @@ struct kvmi_features {
__u8 padding[5];
 };
 
+struct kvmi_vcpu_get_ept_view_reply {
+   __u16 view;
+   __u16 padding1;
+   __u32 padding2;
+};
+
 #endif /* _UAPI_ASM_X86_KVMI_H */
diff --git a/arch/x86/kvm/kvmi.c b/arch/x86/kvm/kvmi.c
index 25c1f8f2e221..bd31809ff812 100644
--- a/arch/x86/kvm/kvmi.c
+++ b/arch/x86/kvm/kvmi.c
@@ -1417,3 +1417,8 @@ bool kvmi_update_ad_flags(struct kvm_vcpu *vcpu)
 
return ret;
 }
+
+u16 kvmi_arch_cmd_get_ept_view(struct kvm_vcpu *vcpu)
+{
+   return kvm_get_ept_view(vcpu);
+}
diff --git a/include/uapi/linux/kvmi.h b/include/uapi/linux/kvmi.h
index 3c15c17d28e3..cf3422ec60a8 100644
--- a/include/uapi/linux/kvmi.h
+++ b/include/uapi/linux/kvmi.h
@@ -49,6 +49,7 @@ enum {
 
KVMI_VCPU_CONTROL_SINGLESTEP = 24,
KVMI_VCPU_TRANSLATE_GVA  = 25,
+   KVMI_VCPU_GET_EPT_VIEW   = 26,
 
KVMI_NUM_MESSAGES
 };
diff --git a/tools/testing/selftests/kvm/x86_64/kvmi_test.c 
b/tools/testing/selftests/kvm/x86_64/kvmi_test.c
index 33fffcb3a171..74eafbcae14a 100644
--- a/tools/testing/selftests/kvm/x86_64/kvmi_test.c
+++ b/tools/testing/selftests/kvm/x86_64/kvmi_test.c
@@ -2071,6 +2071,33 @@ static void test_cmd_translate_gva(struct kvm_vm *vm)
(vm_vaddr_t)-1, (vm_paddr_t)-1);
 }
 
+static __u16 get_ept_view(struct kvm_vm *vm)
+{
+   struct {
+   struct kvmi_msg_hdr hdr;
+   struct kvmi_vcpu_hdr vcpu_hdr;
+   } req = {};
+   struct kvmi_vcpu_get_ept_view_reply rpl;
+
+   test_vcpu0_command(vm, KVMI_VCPU_GET_EPT_VIEW,
+  , sizeof(req), , sizeof(rpl));
+
+   return rpl.view;
+}
+
+static void test_cmd_vcpu_get_ept_view(struct kvm_vm *vm)
+{
+   __u16 view;
+
+   if (!features.eptp) {
+   print_skip("EPT views not supported");
+   return;
+   }
+
+   view = get_ept_view(vm);
+   pr_info("EPT view %u\n", view);
+}
+
 static void test_introspection(struct kvm_vm *vm)
 {
srandom(time(0));
@@ -2107,6 +2134,7 @@ static void test_introspection(struct kvm_vm *vm)
test_event_pf(vm);
test_cmd_vcpu_control_singlestep(vm);
test_cmd_translate_gva(vm);
+   test_cmd_vcpu_get_ept_view(vm);
 
unhook_introspection(vm);
 }
diff --git a/virt/kvm/introspection/kvmi_int.h 
b/virt/kvm/introspection/kvmi_int.h
index cb8453f0fb87..f88999bf59e8 100644
--- a/virt/kvm/introspection/kvmi_int.h
+++ b/virt/kvm/introspection/kvmi_int.h
@@ -142,5 +142,6 @@ void kvmi_arch_features(struct kvmi_features *feat);
 bool kvmi_arch_start_singlestep(struct kvm_vcpu *vcpu);
 bool kvmi_arch_stop_singlestep(struct kvm_vcpu *vcpu);
 gpa_t kvmi_arch_cmd_translate_gva(struct kvm_vcpu 

[RFC PATCH v1 01/34] KVM: x86: export .get_vmfunc_status()

2020-07-22 Thread Adalbert Lazăr
From: Marian Rotariu 

The introspection tool uses this function to check the hardware support
for VMFUNC, which can be used either to singlestep vCPUs
on a unprotected EPT view or to use #VE in order to filter out
VM-exits caused by EPT violations.

Signed-off-by: Marian Rotariu 
Co-developed-by: Ștefan Șicleru 
Signed-off-by: Ștefan Șicleru 
Signed-off-by: Adalbert Lazăr 
---
 arch/x86/include/asm/kvm_host.h | 1 +
 arch/x86/kvm/vmx/vmx.c  | 6 ++
 2 files changed, 7 insertions(+)

diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index d96bf0e15ea2..ab6989745f9c 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -1300,6 +1300,7 @@ struct kvm_x86_ops {
bool (*spt_fault)(struct kvm_vcpu *vcpu);
bool (*gpt_translation_fault)(struct kvm_vcpu *vcpu);
void (*control_singlestep)(struct kvm_vcpu *vcpu, bool enable);
+   bool (*get_vmfunc_status)(void);
 };
 
 struct kvm_x86_nested_ops {
diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
index 8c9ccd1ba0f0..ec4396d5f36f 100644
--- a/arch/x86/kvm/vmx/vmx.c
+++ b/arch/x86/kvm/vmx/vmx.c
@@ -7992,6 +7992,11 @@ static void vmx_control_singlestep(struct kvm_vcpu 
*vcpu, bool enable)
CPU_BASED_MONITOR_TRAP_FLAG);
 }
 
+static bool vmx_get_vmfunc_status(void)
+{
+   return cpu_has_vmx_vmfunc();
+}
+
 static struct kvm_x86_ops vmx_x86_ops __initdata = {
.hardware_unsetup = hardware_unsetup,
 
@@ -8133,6 +8138,7 @@ static struct kvm_x86_ops vmx_x86_ops __initdata = {
.spt_fault = vmx_spt_fault,
.gpt_translation_fault = vmx_gpt_translation_fault,
.control_singlestep = vmx_control_singlestep,
+   .get_vmfunc_status = vmx_get_vmfunc_status,
 };
 
 static __init int hardware_setup(void)
___
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization

[RFC PATCH v1 08/34] KVM: x86: add .set_ept_view()

2020-07-22 Thread Adalbert Lazăr
From: Marian Rotariu 

The introspection tool uses this function to check the hardware support
for EPT switching, which can be used either to singlestep vCPUs
on a unprotected EPT view.

Signed-off-by: Marian Rotariu 
Co-developed-by: Ștefan Șicleru 
Signed-off-by: Ștefan Șicleru 
Signed-off-by: Adalbert Lazăr 
---
 arch/x86/include/asm/kvm_host.h |  1 +
 arch/x86/kvm/vmx/vmx.c  | 35 -
 2 files changed, 35 insertions(+), 1 deletion(-)

diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 1035308940fe..300f7fc43987 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -1309,6 +1309,7 @@ struct kvm_x86_ops {
bool (*get_vmfunc_status)(void);
bool (*get_eptp_switching_status)(void);
u16 (*get_ept_view)(struct kvm_vcpu *vcpu);
+   int (*set_ept_view)(struct kvm_vcpu *vcpu, u16 view);
 };
 
 struct kvm_x86_nested_ops {
diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
index 2024ef4d9a74..0d39487ce5c6 100644
--- a/arch/x86/kvm/vmx/vmx.c
+++ b/arch/x86/kvm/vmx/vmx.c
@@ -4373,6 +4373,28 @@ static int vmx_alloc_eptp_list_page(struct vcpu_vmx *vmx)
return 0;
 }
 
+static int vmx_set_ept_view(struct kvm_vcpu *vcpu, u16 view)
+{
+   if (view >= KVM_MAX_EPT_VIEWS)
+   return -EINVAL;
+
+   if (to_vmx(vcpu)->eptp_list_pg) {
+   int r;
+
+   to_vmx(vcpu)->view = view;
+
+   /*
+* Reload mmu and make sure vmx_load_mmu_pgd() is called so that
+* VMCS::EPT_POINTER is updated accordingly
+*/
+   kvm_mmu_unload(vcpu);
+   r = kvm_mmu_reload(vcpu);
+   WARN_ON_ONCE(r);
+   }
+
+   return 0;
+}
+
 #define VMX_XSS_EXIT_BITMAP 0
 
 /*
@@ -4463,9 +4485,15 @@ static void init_vmcs(struct vcpu_vmx *vmx)
if (cpu_has_vmx_encls_vmexit())
vmcs_write64(ENCLS_EXITING_BITMAP, -1ull);
 
-   if (vmx->eptp_list_pg)
+   if (vmx->eptp_list_pg) {
+   u64 vm_function_control;
+
vmcs_write64(EPTP_LIST_ADDRESS,
page_to_phys(vmx->eptp_list_pg));
+   vm_function_control = vmcs_read64(VM_FUNCTION_CONTROL);
+   vm_function_control |= VMX_VMFUNC_EPTP_SWITCHING;
+   vmcs_write64(VM_FUNCTION_CONTROL, vm_function_control);
+   }
 
if (vmx_pt_mode_is_host_guest()) {
memset(>pt_desc, 0, sizeof(vmx->pt_desc));
@@ -5965,6 +5993,10 @@ static void dump_eptp_list(void)
 
eptp_list = phys_to_virt(eptp_list_phys);
 
+   pr_err("VMFunctionControl=%08x VMFunctionControlHigh=%08x\n",
+  vmcs_read32(VM_FUNCTION_CONTROL),
+  vmcs_read32(VM_FUNCTION_CONTROL_HIGH));
+
pr_err("*** EPTP Switching ***\n");
pr_err("EPTP List Address: %p (phys %p)\n",
eptp_list, (void *)eptp_list_phys);
@@ -8251,6 +8283,7 @@ static struct kvm_x86_ops vmx_x86_ops __initdata = {
.get_vmfunc_status = vmx_get_vmfunc_status,
.get_eptp_switching_status = vmx_get_eptp_switching_status,
.get_ept_view = vmx_get_ept_view,
+   .set_ept_view = vmx_set_ept_view,
 };
 
 static __init int hardware_setup(void)
___
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization

[RFC PATCH v1 33/34] KVM: introspection: mask out non-rwx flags when reading/writing from/to the internal database

2020-07-22 Thread Adalbert Lazăr
This is needed because the KVMI_VM_SET_PAGE_SVE command we will use
the same database to keep the suppress #VE bit requested by the
introspection tool.

Signed-off-by: Adalbert Lazăr 
---
 virt/kvm/introspection/kvmi.c | 36 ---
 1 file changed, 25 insertions(+), 11 deletions(-)

diff --git a/virt/kvm/introspection/kvmi.c b/virt/kvm/introspection/kvmi.c
index f3bdef3c54e6..6bae2981cda7 100644
--- a/virt/kvm/introspection/kvmi.c
+++ b/virt/kvm/introspection/kvmi.c
@@ -23,9 +23,12 @@ static struct kmem_cache *msg_cache;
 static struct kmem_cache *job_cache;
 static struct kmem_cache *radix_cache;
 
-static const u8 full_access  = KVMI_PAGE_ACCESS_R |
-   KVMI_PAGE_ACCESS_W |
-   KVMI_PAGE_ACCESS_X;
+static const u8 rwx_access = KVMI_PAGE_ACCESS_R |
+KVMI_PAGE_ACCESS_W |
+KVMI_PAGE_ACCESS_X;
+static const u8 full_access = KVMI_PAGE_ACCESS_R |
+KVMI_PAGE_ACCESS_W |
+KVMI_PAGE_ACCESS_X;
 
 void *kvmi_msg_alloc(void)
 {
@@ -1100,7 +1103,7 @@ static void kvmi_insert_mem_access(struct kvm *kvm, 
struct kvmi_mem_access *m,
 }
 
 static void kvmi_set_mem_access(struct kvm *kvm, struct kvmi_mem_access *m,
-   u16 view, bool *used)
+   u8 mask, u16 view, bool *used)
 {
struct kvm_introspection *kvmi = KVMI(kvm);
struct kvmi_mem_access *found;
@@ -1112,11 +1115,14 @@ static void kvmi_set_mem_access(struct kvm *kvm, struct 
kvmi_mem_access *m,
 
found = __kvmi_get_gfn_access(kvmi, m->gfn, view);
if (found) {
-   found->access = m->access;
+   found->access = (m->access & mask) | (found->access & ~mask);
kvmi_update_mem_access(kvm, found, view);
-   } else if (m->access != full_access) {
-   kvmi_insert_mem_access(kvm, m, view);
-   *used = true;
+   } else {
+   m->access |= full_access & ~mask;
+   if (m->access != full_access) {
+   kvmi_insert_mem_access(kvm, m, view);
+   *used = true;
+   }
}
 
write_unlock(>access_tree_lock);
@@ -1141,7 +1147,7 @@ static int kvmi_set_gfn_access(struct kvm *kvm, gfn_t 
gfn, u8 access,
if (radix_tree_preload(GFP_KERNEL))
err = -KVM_ENOMEM;
else
-   kvmi_set_mem_access(kvm, m, view, );
+   kvmi_set_mem_access(kvm, m, rwx_access, view, );
 
radix_tree_preload_end();
 
@@ -1216,14 +1222,22 @@ static int kvmi_get_gfn_access(struct kvm_introspection 
*kvmi, const gfn_t gfn,
   u8 *access, u16 view)
 {
struct kvmi_mem_access *m;
+   u8 allowed = rwx_access;
+   bool restricted;
 
read_lock(>access_tree_lock);
m = __kvmi_get_gfn_access(kvmi, gfn, view);
if (m)
-   *access = m->access;
+   allowed = m->access;
read_unlock(>access_tree_lock);
 
-   return m ? 0 : -1;
+   restricted = (allowed & rwx_access) != rwx_access;
+
+   if (!restricted)
+   return -1;
+
+   *access = allowed;
+   return 0;
 }
 
 bool kvmi_restricted_page_access(struct kvm_introspection *kvmi, gpa_t gpa,
___
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization

[RFC PATCH v1 22/34] KVM: VMX: Suppress EPT violation #VE by default (when enabled)

2020-07-22 Thread Adalbert Lazăr
From: Sean Christopherson 

Unfortunately (for software), EPT violation #VEs are opt-out on a per
page basis, e.g. a not-present EPT violation on a zeroed EPT entry will
be morphed to a #VE due to the "suppress #VE" bit not being set.

When EPT violation #VEs are enabled, use a variation of clear_page()
that sets bit 63 (suppress #VE) in all 8-byte entries.  To wire up the
new behavior in the x86 MMU, add a new kvm_x86_ops hook and a new mask
to define a "shadow init value", which is needed to express the concept
that a cleared spte has a non-zero value when EPT violation #VEs are in
use.

Signed-off-by: Sean Christopherson 
Signed-off-by: Adalbert Lazăr 
---
 arch/x86/include/asm/kvm_host.h |  1 +
 arch/x86/kvm/Makefile   |  2 +-
 arch/x86/kvm/mmu.h  |  1 +
 arch/x86/kvm/mmu/mmu.c  | 22 +++---
 arch/x86/kvm/vmx/clear_page.S   | 17 +
 arch/x86/kvm/vmx/vmx.c  | 18 +++---
 6 files changed, 50 insertions(+), 11 deletions(-)
 create mode 100644 arch/x86/kvm/vmx/clear_page.S

diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index a9f225f9dd12..e89cea041ec9 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -1168,6 +1168,7 @@ struct kvm_x86_ops {
 * the implementation may choose to ignore if appropriate.
 */
void (*tlb_flush_gva)(struct kvm_vcpu *vcpu, gva_t addr);
+   void (*clear_page)(void *page);
 
/*
 * Flush any TLB entries created by the guest.  Like tlb_flush_gva(),
diff --git a/arch/x86/kvm/Makefile b/arch/x86/kvm/Makefile
index 3cfe76299dee..b5972a3fdfee 100644
--- a/arch/x86/kvm/Makefile
+++ b/arch/x86/kvm/Makefile
@@ -19,7 +19,7 @@ kvm-y += x86.o emulate.o i8259.o irq.o 
lapic.o \
   i8254.o ioapic.o irq_comm.o cpuid.o pmu.o mtrr.o \
   hyperv.o debugfs.o mmu/mmu.o mmu/page_track.o
 
-kvm-intel-y+= vmx/vmx.o vmx/vmenter.o vmx/pmu_intel.o vmx/vmcs12.o 
vmx/evmcs.o vmx/nested.o
+kvm-intel-y+= vmx/vmx.o vmx/vmenter.o vmx/pmu_intel.o vmx/vmcs12.o 
vmx/evmcs.o vmx/nested.o vmx/clear_page.o
 kvm-amd-y  += svm/svm.o svm/vmenter.o svm/pmu.o svm/nested.o 
svm/avic.o svm/sev.o
 
 obj-$(CONFIG_KVM)  += kvm.o
diff --git a/arch/x86/kvm/mmu.h b/arch/x86/kvm/mmu.h
index 2692b14fb605..02fa0d30407f 100644
--- a/arch/x86/kvm/mmu.h
+++ b/arch/x86/kvm/mmu.h
@@ -52,6 +52,7 @@ static inline u64 rsvd_bits(int s, int e)
 }
 
 void kvm_mmu_set_mmio_spte_mask(u64 mmio_value, u64 access_mask);
+void kvm_mmu_set_spte_init_value(u64 init_value);
 
 void
 reset_shadow_zero_bits_mask(struct kvm_vcpu *vcpu, struct kvm_mmu *context);
diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c
index 22c83192bba1..810e22f41306 100644
--- a/arch/x86/kvm/mmu/mmu.c
+++ b/arch/x86/kvm/mmu/mmu.c
@@ -253,6 +253,7 @@ static u64 __read_mostly shadow_mmio_value;
 static u64 __read_mostly shadow_mmio_access_mask;
 static u64 __read_mostly shadow_present_mask;
 static u64 __read_mostly shadow_me_mask;
+static u64 __read_mostly shadow_init_value;
 
 /*
  * SPTEs used by MMUs without A/D bits are marked with SPTE_AD_DISABLED_MASK;
@@ -542,6 +543,12 @@ void kvm_mmu_set_mask_ptes(u64 user_mask, u64 
accessed_mask,
 }
 EXPORT_SYMBOL_GPL(kvm_mmu_set_mask_ptes);
 
+void kvm_mmu_set_spte_init_value(u64 init_value)
+{
+   shadow_init_value = init_value;
+}
+EXPORT_SYMBOL_GPL(kvm_mmu_set_spte_init_value);
+
 static u8 kvm_get_shadow_phys_bits(void)
 {
/*
@@ -572,6 +579,7 @@ static void kvm_mmu_reset_all_pte_masks(void)
shadow_x_mask = 0;
shadow_present_mask = 0;
shadow_acc_track_mask = 0;
+   shadow_init_value = 0;
 
shadow_phys_bits = kvm_get_shadow_phys_bits();
 
@@ -612,7 +620,7 @@ static int is_nx(struct kvm_vcpu *vcpu)
 
 static int is_shadow_present_pte(u64 pte)
 {
-   return (pte != 0) && !is_mmio_spte(pte);
+   return (pte != 0) && pte != shadow_init_value && !is_mmio_spte(pte);
 }
 
 static int is_large_pte(u64 pte)
@@ -923,9 +931,9 @@ static int mmu_spte_clear_track_bits(u64 *sptep)
u64 old_spte = *sptep;
 
if (!spte_has_volatile_bits(old_spte))
-   __update_clear_spte_fast(sptep, 0ull);
+   __update_clear_spte_fast(sptep, shadow_init_value);
else
-   old_spte = __update_clear_spte_slow(sptep, 0ull);
+   old_spte = __update_clear_spte_slow(sptep, shadow_init_value);
 
if (!is_shadow_present_pte(old_spte))
return 0;
@@ -955,7 +963,7 @@ static int mmu_spte_clear_track_bits(u64 *sptep)
  */
 static void mmu_spte_clear_no_track(u64 *sptep)
 {
-   __update_clear_spte_fast(sptep, 0ull);
+   __update_clear_spte_fast(sptep, shadow_init_value);
 }
 
 static u64 mmu_spte_get_lockless(u64 *sptep)
@@ -2660,7 +2668,7 @@ static struct kvm_mmu_p

[RFC PATCH v1 26/34] KVM: x86: add .set_ve_info()

2020-07-22 Thread Adalbert Lazăr
From: Ștefan Șicleru 

This function is needed for the KVMI_VCPU_SET_VE_INFO command.

Signed-off-by: Ștefan Șicleru 
Signed-off-by: Adalbert Lazăr 
---
 arch/x86/include/asm/kvm_host.h |  2 ++
 arch/x86/include/asm/vmx.h  |  1 +
 arch/x86/kvm/vmx/vmx.c  | 40 +
 3 files changed, 43 insertions(+)

diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index e89cea041ec9..4cee641af48e 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -1314,6 +1314,8 @@ struct kvm_x86_ops {
u16 (*get_ept_view)(struct kvm_vcpu *vcpu);
int (*set_ept_view)(struct kvm_vcpu *vcpu, u16 view);
int (*control_ept_view)(struct kvm_vcpu *vcpu, u16 view, u8 visible);
+   int (*set_ve_info)(struct kvm_vcpu *vcpu, unsigned long ve_info,
+   bool trigger_vmexit);
 };
 
 struct kvm_x86_nested_ops {
diff --git a/arch/x86/include/asm/vmx.h b/arch/x86/include/asm/vmx.h
index 8082158e3e96..222fe9c7f463 100644
--- a/arch/x86/include/asm/vmx.h
+++ b/arch/x86/include/asm/vmx.h
@@ -157,6 +157,7 @@ static inline int vmx_misc_mseg_revid(u64 vmx_misc)
 enum vmcs_field {
VIRTUAL_PROCESSOR_ID= 0x,
POSTED_INTR_NV  = 0x0002,
+   EPTP_INDEX  = 0x0004,
GUEST_ES_SELECTOR   = 0x0800,
GUEST_CS_SELECTOR   = 0x0802,
GUEST_SS_SELECTOR   = 0x0804,
diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
index b65bd0d144e5..871cc49063d8 100644
--- a/arch/x86/kvm/vmx/vmx.c
+++ b/arch/x86/kvm/vmx/vmx.c
@@ -4425,6 +4425,45 @@ static int vmx_control_ept_view(struct kvm_vcpu *vcpu, 
u16 view, u8 visible)
return 0;
 }
 
+static int vmx_set_ve_info(struct kvm_vcpu *vcpu, unsigned long ve_info,
+   bool trigger_vmexit)
+{
+   struct page *ve_info_pg;
+   struct vcpu_vmx *vmx = to_vmx(vcpu);
+   int idx;
+   u32 eb;
+
+   if (!kvm_ve_supported)
+   return -KVM_EOPNOTSUPP;
+
+   idx = srcu_read_lock(>kvm->srcu);
+   ve_info_pg = kvm_vcpu_gpa_to_page(vcpu, ve_info);
+   srcu_read_unlock(>kvm->srcu, idx);
+
+   if (is_error_page(ve_info_pg))
+   return -KVM_EINVAL;
+
+   vmcs_write64(VE_INFO_ADDRESS, page_to_phys(ve_info_pg));
+
+   /* Make sure EPTP_INDEX is up-to-date before enabling #VE */
+   vmcs_write16(EPTP_INDEX, vmx->view);
+
+   /* Enable #VE mechanism */
+   secondary_exec_controls_setbit(vmx, SECONDARY_EXEC_EPT_VE);
+
+   /* Decide if #VE exception should trigger a VM exit */
+   eb = vmcs_read32(EXCEPTION_BITMAP);
+
+   if (trigger_vmexit)
+   eb |= (1u << VE_VECTOR);
+   else
+   eb &= ~(1u << VE_VECTOR);
+
+   vmcs_write32(EXCEPTION_BITMAP, eb);
+
+   return 0;
+}
+
 #define VMX_XSS_EXIT_BITMAP 0
 
 /*
@@ -8350,6 +8389,7 @@ static struct kvm_x86_ops vmx_x86_ops __initdata = {
.get_ept_view = vmx_get_ept_view,
.set_ept_view = vmx_set_ept_view,
.control_ept_view = vmx_control_ept_view,
+   .set_ve_info = vmx_set_ve_info,
 };
 
 static __init int hardware_setup(void)
___
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization

[RFC PATCH v1 34/34] KVM: introspection: add KVMI_VM_SET_PAGE_SVE

2020-07-22 Thread Adalbert Lazăr
From: Ștefan Șicleru 

This command is used by the introspection tool to set/clear
the suppress-VE bit for specific guest memory pages.

Signed-off-by: Ștefan Șicleru 
Signed-off-by: Adalbert Lazăr 
---
 Documentation/virt/kvm/kvmi.rst   | 42 +
 arch/x86/include/uapi/asm/kvmi.h  |  8 ++
 arch/x86/kvm/kvmi.c   |  1 +
 include/uapi/linux/kvmi.h |  3 +
 .../testing/selftests/kvm/x86_64/kvmi_test.c  | 91 ++-
 virt/kvm/introspection/kvmi.c | 29 +-
 virt/kvm/introspection/kvmi_int.h |  1 +
 virt/kvm/introspection/kvmi_msg.c | 23 +
 8 files changed, 196 insertions(+), 2 deletions(-)

diff --git a/Documentation/virt/kvm/kvmi.rst b/Documentation/virt/kvm/kvmi.rst
index c50c40638d46..0f87442f6881 100644
--- a/Documentation/virt/kvm/kvmi.rst
+++ b/Documentation/virt/kvm/kvmi.rst
@@ -1293,6 +1293,48 @@ triggered the EPT violation within a specific EPT view.
 * -KVM_EINVAL - the selected vCPU is invalid
 * -KVM_EAGAIN - the selected vCPU can't be introspected yet
 
+31. KVMI_VM_SET_PAGE_SVE
+
+
+:Architecture: x86
+:Versions: >= 1
+:Parameters:
+
+::
+
+   struct kvmi_vm_set_page_sve {
+   __u16 view;
+   __u8 suppress;
+   __u8 padding1;
+   __u32 padding2;
+   __u64 gpa;
+   };
+
+:Returns:
+
+::
+
+struct kvmi_error_code;
+
+Configures the spte 63rd bit (Suppress #VE, SVE) for ``gpa`` on the
+provided EPT ``view``. If ``suppress`` field is 1, the SVE bit will be set.
+If it is 0, the SVE it will be cleared.
+
+If the SVE bit is cleared, EPT violations generated by the provided
+guest physical address will trigger a #VE instead of a #PF, which is
+delivered using gate descriptor 20 in the IDT.
+
+Before configuring the SVE bit, the introspection tool should use
+*KVMI_GET_VERSION* to check if the hardware has support for the #VE
+mechanism (see **KVMI_GET_VERSION**).
+
+:Errors:
+
+* -KVM_EINVAL - padding is not zero
+* -KVM_ENOMEM - not enough memory to add the page tracking structures
+* -KVM_EOPNOTSUPP - an EPT view was selected but the hardware doesn't support 
it
+* -KVM_EINVAL - the selected EPT view is not valid
+
 Events
 ==
 
diff --git a/arch/x86/include/uapi/asm/kvmi.h b/arch/x86/include/uapi/asm/kvmi.h
index d925e6d49f50..17b02624cb4d 100644
--- a/arch/x86/include/uapi/asm/kvmi.h
+++ b/arch/x86/include/uapi/asm/kvmi.h
@@ -182,4 +182,12 @@ struct kvmi_vcpu_set_ve_info {
__u32 padding3;
 };
 
+struct kvmi_vm_set_page_sve {
+   __u16 view;
+   __u8 suppress;
+   __u8 padding1;
+   __u32 padding2;
+   __u64 gpa;
+};
+
 #endif /* _UAPI_ASM_X86_KVMI_H */
diff --git a/arch/x86/kvm/kvmi.c b/arch/x86/kvm/kvmi.c
index e101ac390809..f3c488f703ec 100644
--- a/arch/x86/kvm/kvmi.c
+++ b/arch/x86/kvm/kvmi.c
@@ -1214,6 +1214,7 @@ static const struct {
{ KVMI_PAGE_ACCESS_R, KVM_PAGE_TRACK_PREREAD },
{ KVMI_PAGE_ACCESS_W, KVM_PAGE_TRACK_PREWRITE },
{ KVMI_PAGE_ACCESS_X, KVM_PAGE_TRACK_PREEXEC },
+   { KVMI_PAGE_SVE,  KVM_PAGE_TRACK_SVE },
 };
 
 void kvmi_arch_update_page_tracking(struct kvm *kvm,
diff --git a/include/uapi/linux/kvmi.h b/include/uapi/linux/kvmi.h
index a17cd1fa16d0..110fb011260b 100644
--- a/include/uapi/linux/kvmi.h
+++ b/include/uapi/linux/kvmi.h
@@ -55,6 +55,8 @@ enum {
KVMI_VCPU_SET_VE_INFO= 29,
KVMI_VCPU_DISABLE_VE = 30,
 
+   KVMI_VM_SET_PAGE_SVE = 31,
+
KVMI_NUM_MESSAGES
 };
 
@@ -84,6 +86,7 @@ enum {
KVMI_PAGE_ACCESS_R = 1 << 0,
KVMI_PAGE_ACCESS_W = 1 << 1,
KVMI_PAGE_ACCESS_X = 1 << 2,
+   KVMI_PAGE_SVE  = 1 << 3,
 };
 
 struct kvmi_msg_hdr {
diff --git a/tools/testing/selftests/kvm/x86_64/kvmi_test.c 
b/tools/testing/selftests/kvm/x86_64/kvmi_test.c
index a3ea22f546ec..0dc5b150a739 100644
--- a/tools/testing/selftests/kvm/x86_64/kvmi_test.c
+++ b/tools/testing/selftests/kvm/x86_64/kvmi_test.c
@@ -19,6 +19,7 @@
 
 #include "linux/kvm_para.h"
 #include "linux/kvmi.h"
+#include "asm/kvmi.h"
 
 #define KVM_MAX_EPT_VIEWS 3
 
@@ -39,6 +40,15 @@ static vm_vaddr_t test_ve_info_gva;
 static void *test_ve_info_hva;
 static vm_paddr_t test_ve_info_gpa;
 
+struct vcpu_ve_info {
+   u32 exit_reason;
+   u32 unused;
+   u64 exit_qualification;
+   u64 gva;
+   u64 gpa;
+   u16 eptp_index;
+};
+
 static uint8_t test_write_pattern;
 static int page_size;
 
@@ -53,6 +63,11 @@ struct pf_ev {
struct kvmi_event_pf pf;
 };
 
+struct exception {
+   uint32_t exception;
+   uint32_t error_code;
+};
+
 struct vcpu_worker_data {
struct kvm_vm *vm;
int vcpu_id;
@@ -61,6 +76,8 @@ struct vcpu_worker_data {
bool shutdown;
bool restart_on_shutdown;
bool run_guest_once;
+   bool expect_exception;
+   struct 

[RFC PATCH v1 15/34] KVM: introspection: add KVMI_VCPU_SET_EPT_VIEW

2020-07-22 Thread Adalbert Lazăr
From: Ștefan Șicleru 

The introspection tool uses this function to check the hardware support
for EPT switching, which can be used to singlestep vCPUs
on a unprotected EPT view.

Signed-off-by: Ștefan Șicleru 
Signed-off-by: Adalbert Lazăr 
---
 Documentation/virt/kvm/kvmi.rst   | 36 
 arch/x86/include/uapi/asm/kvmi.h  |  6 +++
 arch/x86/kvm/kvmi.c   |  9 
 include/uapi/linux/kvmi.h |  1 +
 .../testing/selftests/kvm/x86_64/kvmi_test.c  | 43 +++
 virt/kvm/introspection/kvmi_int.h |  6 +++
 virt/kvm/introspection/kvmi_msg.c | 20 +
 7 files changed, 121 insertions(+)

diff --git a/Documentation/virt/kvm/kvmi.rst b/Documentation/virt/kvm/kvmi.rst
index 76a2d0125f78..02f03c62adef 100644
--- a/Documentation/virt/kvm/kvmi.rst
+++ b/Documentation/virt/kvm/kvmi.rst
@@ -1154,6 +1154,42 @@ be zero.
 * -KVM_EINVAL - the padding is not zero
 * -KVM_EAGAIN - the selected vCPU can't be introspected yet
 
+27. KVMI_VCPU_SET_EPT_VIEW
+--
+
+:Architecture: x86
+:Versions: >= 1
+:Parameters:
+
+::
+
+   struct kvmi_vcpu_hdr;
+   struct kvmi_vcpu_set_ept_view {
+   __u16 view;
+   __u16 padding1;
+   __u32 padding2;
+   };
+
+:Returns:
+
+::
+
+   struct kvmi_error_code;
+
+Configures the vCPU to use the provided ``view``.
+
+Before switching EPT views, the introspection tool should use
+*KVMI_GET_VERSION* to check if the hardware has support for VMFUNC and
+EPTP switching mechanism (see **KVMI_GET_VERSION**).
+
+:Errors:
+
+* -KVM_EINVAL - the selected vCPU is invalid
+* -KVM_EINVAL - padding is not zero
+* -KVM_EINVAL - the selected EPT view is invalid
+* -KVM_EAGAIN - the selected vCPU can't be introspected yet
+* -KVM_EOPNOTSUPP - an EPT view was selected but the hardware doesn't support 
it
+
 Events
 ==
 
diff --git a/arch/x86/include/uapi/asm/kvmi.h b/arch/x86/include/uapi/asm/kvmi.h
index a13a98fa863f..f7a080d5e227 100644
--- a/arch/x86/include/uapi/asm/kvmi.h
+++ b/arch/x86/include/uapi/asm/kvmi.h
@@ -160,4 +160,10 @@ struct kvmi_vcpu_get_ept_view_reply {
__u32 padding2;
 };
 
+struct kvmi_vcpu_set_ept_view {
+   __u16 view;
+   __u16 padding1;
+   __u32 padding2;
+};
+
 #endif /* _UAPI_ASM_X86_KVMI_H */
diff --git a/arch/x86/kvm/kvmi.c b/arch/x86/kvm/kvmi.c
index 292606902338..99ea8ef70be2 100644
--- a/arch/x86/kvm/kvmi.c
+++ b/arch/x86/kvm/kvmi.c
@@ -1423,3 +1423,12 @@ u16 kvmi_arch_cmd_get_ept_view(struct kvm_vcpu *vcpu)
 {
return kvm_get_ept_view(vcpu);
 }
+
+int kvmi_arch_cmd_set_ept_view(struct kvm_vcpu *vcpu, u16 view)
+{
+
+   if (!kvm_x86_ops.set_ept_view)
+   return -KVM_EINVAL;
+
+   return kvm_x86_ops.set_ept_view(vcpu, view);
+}
diff --git a/include/uapi/linux/kvmi.h b/include/uapi/linux/kvmi.h
index cf3422ec60a8..8204661d944d 100644
--- a/include/uapi/linux/kvmi.h
+++ b/include/uapi/linux/kvmi.h
@@ -50,6 +50,7 @@ enum {
KVMI_VCPU_CONTROL_SINGLESTEP = 24,
KVMI_VCPU_TRANSLATE_GVA  = 25,
KVMI_VCPU_GET_EPT_VIEW   = 26,
+   KVMI_VCPU_SET_EPT_VIEW   = 27,
 
KVMI_NUM_MESSAGES
 };
diff --git a/tools/testing/selftests/kvm/x86_64/kvmi_test.c 
b/tools/testing/selftests/kvm/x86_64/kvmi_test.c
index 74eafbcae14a..c6f7d10563db 100644
--- a/tools/testing/selftests/kvm/x86_64/kvmi_test.c
+++ b/tools/testing/selftests/kvm/x86_64/kvmi_test.c
@@ -20,6 +20,8 @@
 #include "linux/kvm_para.h"
 #include "linux/kvmi.h"
 
+#define KVM_MAX_EPT_VIEWS 3
+
 #define VCPU_ID 5
 
 #define X86_FEATURE_XSAVE  (1<<26)
@@ -2098,6 +2100,46 @@ static void test_cmd_vcpu_get_ept_view(struct kvm_vm *vm)
pr_info("EPT view %u\n", view);
 }
 
+static void set_ept_view(struct kvm_vm *vm, __u16 view)
+{
+   struct {
+   struct kvmi_msg_hdr hdr;
+   struct kvmi_vcpu_hdr vcpu_hdr;
+   struct kvmi_vcpu_set_ept_view cmd;
+   } req = {};
+
+   req.cmd.view = view;
+
+   test_vcpu0_command(vm, KVMI_VCPU_SET_EPT_VIEW,
+  , sizeof(req), NULL, 0);
+}
+
+static void test_cmd_vcpu_set_ept_view(struct kvm_vm *vm)
+{
+   __u16 old_view;
+   __u16 new_view;
+   __u16 check_view;
+
+   if (!features.eptp) {
+   print_skip("EPT views not supported");
+   return;
+   }
+
+   old_view = get_ept_view(vm);
+
+   new_view = (old_view + 1) % KVM_MAX_EPT_VIEWS;
+   pr_info("Change EPT view from %u to %u\n", old_view, new_view);
+   set_ept_view(vm, new_view);
+
+   check_view = get_ept_view(vm);
+   TEST_ASSERT(check_view == new_view,
+   "Switching EPT view failed, found ept view (%u), 
expected view (%u)\n",
+   check_view, new_view);
+
+   pr_info("Change EPT view from %u to %u\n"

[RFC PATCH v1 21/34] KVM: VMX: Define EPT suppress #VE bit (bit 63 in EPT leaf entries)

2020-07-22 Thread Adalbert Lazăr
From: Sean Christopherson 

VMX provides a capability that allows EPT violations to be reflected
into the guest as Virtualization Exceptions (#VE).  The primary use case
of EPT violation #VEs is to improve the performance of virtualization-
based security solutions, e.g. eliminate a VM-Exit -> VM-Exit roundtrip
when utilizing EPT to protect priveleged data structures or code.

The "Suppress #VE" bit allows a VMM to opt-out of EPT violation #VEs on
a per page basis, e.g. when a page is marked not-present due to lazy
installation or is write-protected for dirty page logging.

The "Suppress #VE" bit is ignored:

  - By hardware that does not support EPT violation #VEs
  - When the EPT violation #VE VMCS control is disabled
  - On non-leaf EPT entries

Signed-off-by: Sean Christopherson 
Signed-off-by: Adalbert Lazăr 
---
 arch/x86/include/asm/vmx.h | 1 +
 1 file changed, 1 insertion(+)

diff --git a/arch/x86/include/asm/vmx.h b/arch/x86/include/asm/vmx.h
index 177500e9e68c..8082158e3e96 100644
--- a/arch/x86/include/asm/vmx.h
+++ b/arch/x86/include/asm/vmx.h
@@ -498,6 +498,7 @@ enum vmcs_field {
 #define VMX_EPT_IPAT_BIT   (1ull << 6)
 #define VMX_EPT_ACCESS_BIT (1ull << 8)
 #define VMX_EPT_DIRTY_BIT  (1ull << 9)
+#define VMX_EPT_SUPPRESS_VE_BIT(1ull << 63)
 #define VMX_EPT_RWX_MASK(VMX_EPT_READABLE_MASK |   
\
 VMX_EPT_WRITABLE_MASK |   \
 VMX_EPT_EXECUTABLE_MASK)
___
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization

[RFC PATCH v1 12/34] KVM: introspection: extend struct kvmi_features with the EPT views status support

2020-07-22 Thread Adalbert Lazăr
From: Ștefan Șicleru 

The introspection tool will use these new fields to check
the hardware support before using the related introspection commands.

Signed-off-by: Ștefan Șicleru 
Signed-off-by: Adalbert Lazăr 
---
 Documentation/virt/kvm/kvmi.rst| 6 --
 arch/x86/include/uapi/asm/kvmi.h   | 4 +++-
 arch/x86/kvm/kvmi.c| 4 
 tools/testing/selftests/kvm/x86_64/kvmi_test.c | 2 ++
 4 files changed, 13 insertions(+), 3 deletions(-)

diff --git a/Documentation/virt/kvm/kvmi.rst b/Documentation/virt/kvm/kvmi.rst
index 62138fa4b65c..234eacec4db1 100644
--- a/Documentation/virt/kvm/kvmi.rst
+++ b/Documentation/virt/kvm/kvmi.rst
@@ -263,11 +263,13 @@ For x86
 
struct kvmi_features {
__u8 singlestep;
-   __u8 padding[7];
+   __u8 vmfunc;
+   __u8 eptp;
+   __u8 padding[5];
};
 
 Returns the introspection API version and some of the features supported
-by the hardware.
+by the hardware (eg. alternate EPT views).
 
 This command is always allowed and successful.
 
diff --git a/arch/x86/include/uapi/asm/kvmi.h b/arch/x86/include/uapi/asm/kvmi.h
index 32af803f1d70..51b399d50a2a 100644
--- a/arch/x86/include/uapi/asm/kvmi.h
+++ b/arch/x86/include/uapi/asm/kvmi.h
@@ -147,7 +147,9 @@ struct kvmi_event_msr_reply {
 
 struct kvmi_features {
__u8 singlestep;
-   __u8 padding[7];
+   __u8 vmfunc;
+   __u8 eptp;
+   __u8 padding[5];
 };
 
 #endif /* _UAPI_ASM_X86_KVMI_H */
diff --git a/arch/x86/kvm/kvmi.c b/arch/x86/kvm/kvmi.c
index 7b3b64d27d18..25c1f8f2e221 100644
--- a/arch/x86/kvm/kvmi.c
+++ b/arch/x86/kvm/kvmi.c
@@ -1356,6 +1356,10 @@ static void kvmi_track_flush_slot(struct kvm *kvm, 
struct kvm_memory_slot *slot,
 void kvmi_arch_features(struct kvmi_features *feat)
 {
feat->singlestep = !!kvm_x86_ops.control_singlestep;
+   feat->vmfunc = kvm_x86_ops.get_vmfunc_status &&
+   kvm_x86_ops.get_vmfunc_status();
+   feat->eptp = kvm_x86_ops.get_eptp_switching_status &&
+   kvm_x86_ops.get_eptp_switching_status();
 }
 
 bool kvmi_arch_start_singlestep(struct kvm_vcpu *vcpu)
diff --git a/tools/testing/selftests/kvm/x86_64/kvmi_test.c 
b/tools/testing/selftests/kvm/x86_64/kvmi_test.c
index e968b1a6f969..33fffcb3a171 100644
--- a/tools/testing/selftests/kvm/x86_64/kvmi_test.c
+++ b/tools/testing/selftests/kvm/x86_64/kvmi_test.c
@@ -443,6 +443,8 @@ static void test_cmd_get_version(void)
 
pr_info("KVMI version: %u\n", rpl.version);
pr_info("\tsinglestep: %u\n", features.singlestep);
+   pr_info("\tvmfunc: %u\n", features.vmfunc);
+   pr_info("\teptp: %u\n", features.eptp);
 }
 
 static void cmd_vm_check_command(__u16 id, __u16 padding, int expected_err)
___
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization

[RFC PATCH v1 06/34] KVM: x86: mmu: add support for EPT switching

2020-07-22 Thread Adalbert Lazăr
From: Marian Rotariu 

The introspection tool uses this function to check the hardware support
for EPT switching, which can be used either to singlestep vCPUs
on a unprotected EPT view or to use #VE in order to avoid filter out
VM-exits caused by EPT violations.

Signed-off-by: Marian Rotariu 
Co-developed-by: Ștefan Șicleru 
Signed-off-by: Ștefan Șicleru 
Signed-off-by: Adalbert Lazăr 
---
 arch/x86/include/asm/kvm_host.h |  1 +
 arch/x86/kvm/mmu/mmu.c  | 12 ++--
 arch/x86/kvm/vmx/vmx.c  | 98 +
 arch/x86/kvm/vmx/vmx.h  |  1 +
 4 files changed, 108 insertions(+), 4 deletions(-)

diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index bd45778e0904..1035308940fe 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -421,6 +421,7 @@ struct kvm_mmu {
void (*update_pte)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp,
   u64 *spte, const void *pte);
hpa_t root_hpa;
+   hpa_t root_hpa_altviews[KVM_MAX_EPT_VIEWS];
gpa_t root_pgd;
union kvm_mmu_role mmu_role;
u8 root_level;
diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c
index 0b6527a1ebe6..553425ab3518 100644
--- a/arch/x86/kvm/mmu/mmu.c
+++ b/arch/x86/kvm/mmu/mmu.c
@@ -3760,8 +3760,11 @@ void kvm_mmu_free_roots(struct kvm_vcpu *vcpu, struct 
kvm_mmu *mmu,
if (free_active_root) {
if (mmu->shadow_root_level >= PT64_ROOT_4LEVEL &&
(mmu->root_level >= PT64_ROOT_4LEVEL || mmu->direct_map)) {
-   mmu_free_root_page(vcpu->kvm, >root_hpa,
-  _list);
+   for (i = 0; i < KVM_MAX_EPT_VIEWS; i++)
+   mmu_free_root_page(vcpu->kvm,
+  mmu->root_hpa_altviews + i,
+  _list);
+   mmu->root_hpa = INVALID_PAGE;
} else {
for (i = 0; i < 4; ++i)
if (mmu->pae_root[i] != 0)
@@ -3821,9 +3824,10 @@ static int mmu_alloc_direct_roots(struct kvm_vcpu *vcpu)
  shadow_root_level, true, i);
if (!VALID_PAGE(root))
return -ENOSPC;
-   if (i == 0)
-   vcpu->arch.mmu->root_hpa = root;
+   vcpu->arch.mmu->root_hpa_altviews[i] = root;
}
+   vcpu->arch.mmu->root_hpa =
+ vcpu->arch.mmu->root_hpa_altviews[kvm_get_ept_view(vcpu)];
} else if (shadow_root_level == PT32E_ROOT_LEVEL) {
for (i = 0; i < 4; ++i) {
MMU_WARN_ON(VALID_PAGE(vcpu->arch.mmu->pae_root[i]));
diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
index 0256c3a93c87..2024ef4d9a74 100644
--- a/arch/x86/kvm/vmx/vmx.c
+++ b/arch/x86/kvm/vmx/vmx.c
@@ -3124,6 +3124,32 @@ u64 construct_eptp(struct kvm_vcpu *vcpu, unsigned long 
root_hpa)
return eptp;
 }
 
+static void vmx_construct_eptp_with_index(struct kvm_vcpu *vcpu,
+ unsigned short view)
+{
+   struct vcpu_vmx *vmx = to_vmx(vcpu);
+   u64 *eptp_list = NULL;
+
+   if (!vmx->eptp_list_pg)
+   return;
+
+   eptp_list = phys_to_virt(page_to_phys(vmx->eptp_list_pg));
+
+   if (!eptp_list)
+   return;
+
+   eptp_list[view] = construct_eptp(vcpu,
+   vcpu->arch.mmu->root_hpa_altviews[view]);
+}
+
+static void vmx_construct_eptp_list(struct kvm_vcpu *vcpu)
+{
+   unsigned short view;
+
+   for (view = 0; view < KVM_MAX_EPT_VIEWS; view++)
+   vmx_construct_eptp_with_index(vcpu, view);
+}
+
 void vmx_load_mmu_pgd(struct kvm_vcpu *vcpu, unsigned long pgd)
 {
struct kvm *kvm = vcpu->kvm;
@@ -3135,6 +3161,8 @@ void vmx_load_mmu_pgd(struct kvm_vcpu *vcpu, unsigned 
long pgd)
eptp = construct_eptp(vcpu, pgd);
vmcs_write64(EPT_POINTER, eptp);
 
+   vmx_construct_eptp_list(vcpu);
+
if (kvm_x86_ops.tlb_remote_flush) {
spin_lock(_kvm_vmx(kvm)->ept_pointer_lock);
to_vmx(vcpu)->ept_pointer = eptp;
@@ -4336,6 +4364,15 @@ static void ept_set_mmio_spte_mask(void)
kvm_mmu_set_mmio_spte_mask(VMX_EPT_MISCONFIG_WX_VALUE, 0);
 }
 
+static int vmx_alloc_eptp_list_page(struct vcpu_vmx *vmx)
+{
+   vmx->eptp_list_pg = alloc_page(GFP_KERNEL | __GFP_ZERO);
+   if (!vmx->eptp_list_pg)
+   return -ENOMEM;
+
+   return 0;
+}
+
 #define VMX_XSS_EXIT_BITMAP 0
 
 /*
@@ -4426,6 +4463,10 @@ static void init_vmcs(struct vcpu_vmx *vmx)
  

[RFC PATCH v1 07/34] KVM: x86: mmu: increase mmu_memory_cache size

2020-07-22 Thread Adalbert Lazăr
From: Ștefan Șicleru 

We use/allocate more root_hpa's every time mmu_alloc_roots()
is called.

Signed-off-by: Ștefan Șicleru 
Signed-off-by: Adalbert Lazăr 
---
 arch/x86/kvm/mmu/mmu.c | 6 --
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c
index 553425ab3518..70461c7ef58c 100644
--- a/arch/x86/kvm/mmu/mmu.c
+++ b/arch/x86/kvm/mmu/mmu.c
@@ -1119,11 +1119,13 @@ static int mmu_topup_memory_caches(struct kvm_vcpu 
*vcpu)
   pte_list_desc_cache, 8 + PTE_PREFETCH_NUM);
if (r)
goto out;
-   r = mmu_topup_memory_cache_page(>arch.mmu_page_cache, 8);
+   r = mmu_topup_memory_cache_page(>arch.mmu_page_cache,
+   8 * KVM_MAX_EPT_VIEWS);
if (r)
goto out;
r = mmu_topup_memory_cache(>arch.mmu_page_header_cache,
-  mmu_page_header_cache, 4);
+  mmu_page_header_cache,
+  4 * KVM_MAX_EPT_VIEWS);
 out:
return r;
 }
___
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization

[RFC PATCH v1 25/34] KVM: x86: svm: set .clear_page()

2020-07-22 Thread Adalbert Lazăr
From: Ștefan Șicleru 

Signed-off-by: Ștefan Șicleru 
Signed-off-by: Adalbert Lazăr 
---
 arch/x86/kvm/svm/svm.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c
index 5c2d4a0c3d31..1c78b913eb5d 100644
--- a/arch/x86/kvm/svm/svm.c
+++ b/arch/x86/kvm/svm/svm.c
@@ -4324,6 +4324,7 @@ static struct kvm_x86_ops svm_x86_ops __initdata = {
.fault_gla = svm_fault_gla,
.spt_fault = svm_spt_fault,
.gpt_translation_fault = svm_gpt_translation_fault,
+   .clear_page = clear_page,
 };
 
 static struct kvm_x86_init_ops svm_init_ops __initdata = {
___
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization

[PATCH v11 09/81] KVM: x86: add kvm_x86_ops.control_cr3_intercept()

2020-12-07 Thread Adalbert Lazăr
This function is needed for the KVMI_VCPU_CONTROL_CR command, when the
introspection tool has to intercept the read/write access to CR3.

Co-developed-by: Nicușor Cîțu 
Signed-off-by: Nicușor Cîțu 
Signed-off-by: Adalbert Lazăr 
---
 arch/x86/include/asm/kvm_host.h |  6 ++
 arch/x86/kvm/svm/svm.c  | 14 ++
 arch/x86/kvm/vmx/vmx.c  | 26 --
 3 files changed, 40 insertions(+), 6 deletions(-)

diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index e46fee59d4ed..0eeb1d829a1d 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -137,6 +137,10 @@ static inline gfn_t gfn_to_index(gfn_t gfn, gfn_t 
base_gfn, int level)
 #define KVM_NR_FIXED_MTRR_REGION 88
 #define KVM_NR_VAR_MTRR 8
 
+#define CR_TYPE_R  1
+#define CR_TYPE_W  2
+#define CR_TYPE_RW 3
+
 #define ASYNC_PF_PER_VCPU 64
 
 enum kvm_reg {
@@ -1118,6 +1122,8 @@ struct kvm_x86_ops {
void (*set_cr0)(struct kvm_vcpu *vcpu, unsigned long cr0);
bool (*is_valid_cr4)(struct kvm_vcpu *vcpu, unsigned long cr0);
void (*set_cr4)(struct kvm_vcpu *vcpu, unsigned long cr4);
+   void (*control_cr3_intercept)(struct kvm_vcpu *vcpu, int type,
+ bool enable);
int (*set_efer)(struct kvm_vcpu *vcpu, u64 efer);
void (*get_idt)(struct kvm_vcpu *vcpu, struct desc_ptr *dt);
void (*set_idt)(struct kvm_vcpu *vcpu, struct desc_ptr *dt);
diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c
index 95c7072cde8e..4f28fa035048 100644
--- a/arch/x86/kvm/svm/svm.c
+++ b/arch/x86/kvm/svm/svm.c
@@ -1707,6 +1707,19 @@ void svm_set_cr4(struct kvm_vcpu *vcpu, unsigned long 
cr4)
kvm_update_cpuid_runtime(vcpu);
 }
 
+static void svm_control_cr3_intercept(struct kvm_vcpu *vcpu, int type,
+ bool enable)
+{
+   struct vcpu_svm *svm = to_svm(vcpu);
+
+   if (type & CR_TYPE_R)
+   enable ? svm_set_intercept(svm, INTERCEPT_CR3_READ) :
+svm_clr_intercept(svm, INTERCEPT_CR3_READ);
+   if (type & CR_TYPE_W)
+   enable ? svm_set_intercept(svm, INTERCEPT_CR3_WRITE) :
+svm_clr_intercept(svm, INTERCEPT_CR3_WRITE);
+}
+
 static void svm_set_segment(struct kvm_vcpu *vcpu,
struct kvm_segment *var, int seg)
 {
@@ -4233,6 +4246,7 @@ static struct kvm_x86_ops svm_x86_ops __initdata = {
.set_cr0 = svm_set_cr0,
.is_valid_cr4 = svm_is_valid_cr4,
.set_cr4 = svm_set_cr4,
+   .control_cr3_intercept = svm_control_cr3_intercept,
.set_efer = svm_set_efer,
.get_idt = svm_get_idt,
.set_idt = svm_set_idt,
diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
index 93a97aa3d847..c5a53642d1c0 100644
--- a/arch/x86/kvm/vmx/vmx.c
+++ b/arch/x86/kvm/vmx/vmx.c
@@ -2978,24 +2978,37 @@ void ept_save_pdptrs(struct kvm_vcpu *vcpu)
kvm_register_mark_dirty(vcpu, VCPU_EXREG_PDPTR);
 }
 
+static void vmx_control_cr3_intercept(struct kvm_vcpu *vcpu, int type,
+ bool enable)
+{
+   struct vcpu_vmx *vmx = to_vmx(vcpu);
+   u32 cr3_exec_control = 0;
+
+   if (type & CR_TYPE_R)
+   cr3_exec_control |= CPU_BASED_CR3_STORE_EXITING;
+   if (type & CR_TYPE_W)
+   cr3_exec_control |= CPU_BASED_CR3_LOAD_EXITING;
+
+   if (enable)
+   exec_controls_setbit(vmx, cr3_exec_control);
+   else
+   exec_controls_clearbit(vmx, cr3_exec_control);
+}
+
 static void ept_update_paging_mode_cr0(unsigned long *hw_cr0,
unsigned long cr0,
struct kvm_vcpu *vcpu)
 {
-   struct vcpu_vmx *vmx = to_vmx(vcpu);
-
if (!kvm_register_is_available(vcpu, VCPU_EXREG_CR3))
vmx_cache_reg(vcpu, VCPU_EXREG_CR3);
if (!(cr0 & X86_CR0_PG)) {
/* From paging/starting to nonpaging */
-   exec_controls_setbit(vmx, CPU_BASED_CR3_LOAD_EXITING |
- CPU_BASED_CR3_STORE_EXITING);
+   vmx_control_cr3_intercept(vcpu, CR_TYPE_RW, true);
vcpu->arch.cr0 = cr0;
vmx_set_cr4(vcpu, kvm_read_cr4(vcpu));
} else if (!is_paging(vcpu)) {
/* From nonpaging to paging */
-   exec_controls_clearbit(vmx, CPU_BASED_CR3_LOAD_EXITING |
-   CPU_BASED_CR3_STORE_EXITING);
+   vmx_control_cr3_intercept(vcpu, CR_TYPE_RW, false);
vcpu->arch.cr0 = cr0;
vmx_set_cr4(vcpu, kvm_read_cr4(vcpu));
}
@@ -7629,6 +7642,7 @@ static struct kvm_x86_ops vmx_x86_ops __initdata = {
.set_cr0 = vmx_set_cr0,
.is_valid_cr4 = vmx_is_valid_cr4,
.set_cr4 = vmx_set_cr4,
+   

[PATCH v11 13/81] KVM: x86: add kvm_x86_ops.control_desc_intercept()

2020-12-07 Thread Adalbert Lazăr
This function is needed to intercept descriptor-table registers access.

Signed-off-by: Adalbert Lazăr 
---
 arch/x86/include/asm/kvm_host.h |  1 +
 arch/x86/kvm/svm/svm.c  | 26 ++
 arch/x86/kvm/vmx/vmx.c  | 15 +--
 3 files changed, 40 insertions(+), 2 deletions(-)

diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 1e9cb521324e..730429cd2e3d 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -1131,6 +1131,7 @@ struct kvm_x86_ops {
void (*get_gdt)(struct kvm_vcpu *vcpu, struct desc_ptr *dt);
void (*set_gdt)(struct kvm_vcpu *vcpu, struct desc_ptr *dt);
bool (*desc_ctrl_supported)(void);
+   void (*control_desc_intercept)(struct kvm_vcpu *vcpu, bool enable);
void (*sync_dirty_debug_regs)(struct kvm_vcpu *vcpu);
void (*set_dr7)(struct kvm_vcpu *vcpu, unsigned long value);
void (*cache_reg)(struct kvm_vcpu *vcpu, enum kvm_reg reg);
diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c
index 00bda794609c..c8e56ad9cbb1 100644
--- a/arch/x86/kvm/svm/svm.c
+++ b/arch/x86/kvm/svm/svm.c
@@ -1635,6 +1635,31 @@ static bool svm_desc_ctrl_supported(void)
return true;
 }
 
+static void svm_control_desc_intercept(struct kvm_vcpu *vcpu, bool enable)
+{
+   struct vcpu_svm *svm = to_svm(vcpu);
+
+   if (enable) {
+   svm_set_intercept(svm, INTERCEPT_STORE_IDTR);
+   svm_set_intercept(svm, INTERCEPT_STORE_GDTR);
+   svm_set_intercept(svm, INTERCEPT_STORE_LDTR);
+   svm_set_intercept(svm, INTERCEPT_STORE_TR);
+   svm_set_intercept(svm, INTERCEPT_LOAD_IDTR);
+   svm_set_intercept(svm, INTERCEPT_LOAD_GDTR);
+   svm_set_intercept(svm, INTERCEPT_LOAD_LDTR);
+   svm_set_intercept(svm, INTERCEPT_LOAD_TR);
+   } else {
+   svm_clr_intercept(svm, INTERCEPT_STORE_IDTR);
+   svm_clr_intercept(svm, INTERCEPT_STORE_GDTR);
+   svm_clr_intercept(svm, INTERCEPT_STORE_LDTR);
+   svm_clr_intercept(svm, INTERCEPT_STORE_TR);
+   svm_clr_intercept(svm, INTERCEPT_LOAD_IDTR);
+   svm_clr_intercept(svm, INTERCEPT_LOAD_GDTR);
+   svm_clr_intercept(svm, INTERCEPT_LOAD_LDTR);
+   svm_clr_intercept(svm, INTERCEPT_LOAD_TR);
+   }
+}
+
 static void update_cr0_intercept(struct vcpu_svm *svm)
 {
ulong gcr0 = svm->vcpu.arch.cr0;
@@ -4281,6 +4306,7 @@ static struct kvm_x86_ops svm_x86_ops __initdata = {
.get_gdt = svm_get_gdt,
.set_gdt = svm_set_gdt,
.desc_ctrl_supported = svm_desc_ctrl_supported,
+   .control_desc_intercept = svm_control_desc_intercept,
.set_dr7 = svm_set_dr7,
.sync_dirty_debug_regs = svm_sync_dirty_debug_regs,
.cache_reg = svm_cache_reg,
diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
index a5e1f61d2622..20351e027898 100644
--- a/arch/x86/kvm/vmx/vmx.c
+++ b/arch/x86/kvm/vmx/vmx.c
@@ -3120,6 +3120,16 @@ static void vmx_load_mmu_pgd(struct kvm_vcpu *vcpu, 
unsigned long pgd,
vmcs_writel(GUEST_CR3, guest_cr3);
 }
 
+static void vmx_control_desc_intercept(struct kvm_vcpu *vcpu, bool enable)
+{
+   struct vcpu_vmx *vmx = to_vmx(vcpu);
+
+   if (enable)
+   secondary_exec_controls_setbit(vmx, SECONDARY_EXEC_DESC);
+   else
+   secondary_exec_controls_clearbit(vmx, SECONDARY_EXEC_DESC);
+}
+
 static bool vmx_is_valid_cr4(struct kvm_vcpu *vcpu, unsigned long cr4)
 {
/*
@@ -3157,11 +3167,11 @@ void vmx_set_cr4(struct kvm_vcpu *vcpu, unsigned long 
cr4)
 
if (!boot_cpu_has(X86_FEATURE_UMIP) && vmx_umip_emulated()) {
if (cr4 & X86_CR4_UMIP) {
-   secondary_exec_controls_setbit(vmx, 
SECONDARY_EXEC_DESC);
+   vmx_control_desc_intercept(vcpu, true);
hw_cr4 &= ~X86_CR4_UMIP;
} else if (!is_guest_mode(vcpu) ||
!nested_cpu_has2(get_vmcs12(vcpu), 
SECONDARY_EXEC_DESC)) {
-   secondary_exec_controls_clearbit(vmx, 
SECONDARY_EXEC_DESC);
+   vmx_control_desc_intercept(vcpu, false);
}
}
 
@@ -7657,6 +7667,7 @@ static struct kvm_x86_ops vmx_x86_ops __initdata = {
.get_gdt = vmx_get_gdt,
.set_gdt = vmx_set_gdt,
.desc_ctrl_supported = vmx_desc_ctrl_supported,
+   .control_desc_intercept = vmx_control_desc_intercept,
.set_dr7 = vmx_set_dr7,
.sync_dirty_debug_regs = vmx_sync_dirty_debug_regs,
.cache_reg = vmx_cache_reg,
___
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization

[PATCH v11 20/81] KVM: x86: add kvm_x86_ops.fault_gla()

2020-12-07 Thread Adalbert Lazăr
From: Mihai Donțu 

This function is needed for kvmi_update_ad_flags()
and kvm_page_track_emulation_failure().

kvmi_update_ad_flags() uses the existing guest page table walk code to
update the A/D bits and return to guest (when the introspection tool
write-protects the guest page tables).

kvm_page_track_emulation_failure() calls the page tracking code, that
can trigger an event for the introspection tool (which might need the
GVA in addition to the GPA).

Signed-off-by: Mihai Donțu 
Co-developed-by: Nicușor Cîțu 
Signed-off-by: Nicușor Cîțu 
Signed-off-by: Adalbert Lazăr 
---
 arch/x86/include/asm/kvm_host.h | 2 ++
 arch/x86/include/asm/vmx.h  | 2 ++
 arch/x86/kvm/svm/svm.c  | 9 +
 arch/x86/kvm/vmx/vmx.c  | 9 +
 4 files changed, 22 insertions(+)

diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 86048037da23..45c72af05fa2 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -1303,6 +1303,8 @@ struct kvm_x86_ops {
 
void (*migrate_timers)(struct kvm_vcpu *vcpu);
void (*msr_filter_changed)(struct kvm_vcpu *vcpu);
+
+   u64 (*fault_gla)(struct kvm_vcpu *vcpu);
 };
 
 struct kvm_x86_nested_ops {
diff --git a/arch/x86/include/asm/vmx.h b/arch/x86/include/asm/vmx.h
index 38ca445a8429..5543332292b5 100644
--- a/arch/x86/include/asm/vmx.h
+++ b/arch/x86/include/asm/vmx.h
@@ -544,6 +544,7 @@ enum vm_entry_failure_code {
 #define EPT_VIOLATION_READABLE_BIT 3
 #define EPT_VIOLATION_WRITABLE_BIT 4
 #define EPT_VIOLATION_EXECUTABLE_BIT   5
+#define EPT_VIOLATION_GLA_VALID_BIT7
 #define EPT_VIOLATION_GVA_TRANSLATED_BIT 8
 #define EPT_VIOLATION_ACC_READ (1 << EPT_VIOLATION_ACC_READ_BIT)
 #define EPT_VIOLATION_ACC_WRITE(1 << 
EPT_VIOLATION_ACC_WRITE_BIT)
@@ -551,6 +552,7 @@ enum vm_entry_failure_code {
 #define EPT_VIOLATION_READABLE (1 << EPT_VIOLATION_READABLE_BIT)
 #define EPT_VIOLATION_WRITABLE (1 << EPT_VIOLATION_WRITABLE_BIT)
 #define EPT_VIOLATION_EXECUTABLE   (1 << EPT_VIOLATION_EXECUTABLE_BIT)
+#define EPT_VIOLATION_GLA_VALID(1 << 
EPT_VIOLATION_GLA_VALID_BIT)
 #define EPT_VIOLATION_GVA_TRANSLATED   (1 << EPT_VIOLATION_GVA_TRANSLATED_BIT)
 
 /*
diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c
index 43a2e4ec6178..c6730ec39c58 100644
--- a/arch/x86/kvm/svm/svm.c
+++ b/arch/x86/kvm/svm/svm.c
@@ -4314,6 +4314,13 @@ static int svm_vm_init(struct kvm *kvm)
return 0;
 }
 
+static u64 svm_fault_gla(struct kvm_vcpu *vcpu)
+{
+   const struct vcpu_svm *svm = to_svm(vcpu);
+
+   return svm->vcpu.arch.cr2 ? svm->vcpu.arch.cr2 : ~0ull;
+}
+
 static struct kvm_x86_ops svm_x86_ops __initdata = {
.hardware_unsetup = svm_hardware_teardown,
.hardware_enable = svm_hardware_enable,
@@ -4442,6 +4449,8 @@ static struct kvm_x86_ops svm_x86_ops __initdata = {
.apic_init_signal_blocked = svm_apic_init_signal_blocked,
 
.msr_filter_changed = svm_msr_filter_changed,
+
+   .fault_gla = svm_fault_gla,
 };
 
 static struct kvm_x86_init_ops svm_init_ops __initdata = {
diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
index d5d4203378d3..41ea1ee9d419 100644
--- a/arch/x86/kvm/vmx/vmx.c
+++ b/arch/x86/kvm/vmx/vmx.c
@@ -7641,6 +7641,13 @@ static int vmx_cpu_dirty_log_size(void)
return enable_pml ? PML_ENTITY_NUM : 0;
 }
 
+static u64 vmx_fault_gla(struct kvm_vcpu *vcpu)
+{
+   if (vcpu->arch.exit_qualification & EPT_VIOLATION_GLA_VALID)
+   return vmcs_readl(GUEST_LINEAR_ADDRESS);
+   return ~0ull;
+}
+
 static struct kvm_x86_ops vmx_x86_ops __initdata = {
.hardware_unsetup = hardware_unsetup,
 
@@ -7779,6 +7786,8 @@ static struct kvm_x86_ops vmx_x86_ops __initdata = {
 
.msr_filter_changed = vmx_msr_filter_changed,
.cpu_dirty_log_size = vmx_cpu_dirty_log_size,
+
+   .fault_gla = vmx_fault_gla,
 };
 
 static __init int hardware_setup(void)
___
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization

[PATCH v11 23/81] KVM: x86: extend kvm_mmu_gva_to_gpa_system() with the 'access' parameter

2020-12-07 Thread Adalbert Lazăr
From: Mihai Donțu 

This is needed for kvmi_update_ad_flags() to emulate a guest page
table walk on SPT violations due to A/D bit updates.

Signed-off-by: Mihai Donțu 
Signed-off-by: Adalbert Lazăr 
---
 arch/x86/include/asm/kvm_host.h | 2 +-
 arch/x86/kvm/x86.c  | 6 +++---
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index c2da5c24e825..3a06a7799571 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -1568,7 +1568,7 @@ gpa_t kvm_mmu_gva_to_gpa_fetch(struct kvm_vcpu *vcpu, 
gva_t gva,
 gpa_t kvm_mmu_gva_to_gpa_write(struct kvm_vcpu *vcpu, gva_t gva,
   struct x86_exception *exception);
 gpa_t kvm_mmu_gva_to_gpa_system(struct kvm_vcpu *vcpu, gva_t gva,
-   struct x86_exception *exception);
+   u32 access, struct x86_exception *exception);
 
 bool kvm_apicv_activated(struct kvm *kvm);
 void kvm_apicv_init(struct kvm *kvm, bool enable);
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 00ab76366868..8eda5c3bd244 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -5890,9 +5890,9 @@ gpa_t kvm_mmu_gva_to_gpa_write(struct kvm_vcpu *vcpu, 
gva_t gva,
 
 /* uses this to access any guest's mapped memory without checking CPL */
 gpa_t kvm_mmu_gva_to_gpa_system(struct kvm_vcpu *vcpu, gva_t gva,
-   struct x86_exception *exception)
+   u32 access, struct x86_exception *exception)
 {
-   return vcpu->arch.walk_mmu->gva_to_gpa(vcpu, gva, 0, exception);
+   return vcpu->arch.walk_mmu->gva_to_gpa(vcpu, gva, access, exception);
 }
 
 static int kvm_read_guest_virt_helper(gva_t addr, void *val, unsigned int 
bytes,
@@ -9762,7 +9762,7 @@ int kvm_arch_vcpu_ioctl_translate(struct kvm_vcpu *vcpu,
vcpu_load(vcpu);
 
idx = srcu_read_lock(>kvm->srcu);
-   gpa = kvm_mmu_gva_to_gpa_system(vcpu, vaddr, NULL);
+   gpa = kvm_mmu_gva_to_gpa_system(vcpu, vaddr, 0, NULL);
srcu_read_unlock(>kvm->srcu, idx);
tr->physical_address = gpa;
tr->valid = gpa != UNMAPPED_GVA;
___
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization

[PATCH v11 27/81] KVM: x86: page track: provide all callbacks with the guest virtual address

2020-12-07 Thread Adalbert Lazăr
From: Mihai Donțu 

This is needed because the emulator calls the page tracking code
irrespective of the current VM-exit reason or available information.

Signed-off-by: Mihai Donțu 
Signed-off-by: Adalbert Lazăr 
---
 arch/x86/include/asm/kvm_host.h   |  2 +-
 arch/x86/include/asm/kvm_page_track.h | 10 ++
 arch/x86/kvm/mmu/mmu.c|  2 +-
 arch/x86/kvm/mmu/page_track.c |  6 +++---
 arch/x86/kvm/x86.c| 16 
 drivers/gpu/drm/i915/gvt/kvmgt.c  |  2 +-
 6 files changed, 20 insertions(+), 18 deletions(-)

diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 65bce8aeede5..2ffc11c5c6c0 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -1396,7 +1396,7 @@ void kvm_mmu_change_mmu_pages(struct kvm *kvm, unsigned 
long kvm_nr_mmu_pages);
 int load_pdptrs(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu, unsigned long cr3);
 bool pdptrs_changed(struct kvm_vcpu *vcpu);
 
-int emulator_write_phys(struct kvm_vcpu *vcpu, gpa_t gpa,
+int emulator_write_phys(struct kvm_vcpu *vcpu, gpa_t gpa, gva_t gva,
  const void *val, int bytes);
 
 struct kvm_irq_mask_notifier {
diff --git a/arch/x86/include/asm/kvm_page_track.h 
b/arch/x86/include/asm/kvm_page_track.h
index 87bd6025d91d..9a261e463eb3 100644
--- a/arch/x86/include/asm/kvm_page_track.h
+++ b/arch/x86/include/asm/kvm_page_track.h
@@ -28,12 +28,14 @@ struct kvm_page_track_notifier_node {
 *
 * @vcpu: the vcpu where the write access happened.
 * @gpa: the physical address written by guest.
+* @gva: the virtual address written by guest.
 * @new: the data was written to the address.
 * @bytes: the written length.
 * @node: this node
 */
-   void (*track_write)(struct kvm_vcpu *vcpu, gpa_t gpa, const u8 *new,
-   int bytes, struct kvm_page_track_notifier_node 
*node);
+   void (*track_write)(struct kvm_vcpu *vcpu, gpa_t gpa, gva_t gva,
+   const u8 *new, int bytes,
+   struct kvm_page_track_notifier_node *node);
/*
 * It is called when memory slot is being moved or removed
 * users can drop write-protection for the pages in that memory slot
@@ -68,7 +70,7 @@ kvm_page_track_register_notifier(struct kvm *kvm,
 void
 kvm_page_track_unregister_notifier(struct kvm *kvm,
   struct kvm_page_track_notifier_node *n);
-void kvm_page_track_write(struct kvm_vcpu *vcpu, gpa_t gpa, const u8 *new,
- int bytes);
+void kvm_page_track_write(struct kvm_vcpu *vcpu, gpa_t gpa, gva_t gva,
+ const u8 *new, int bytes);
 void kvm_page_track_flush_slot(struct kvm *kvm, struct kvm_memory_slot *slot);
 #endif
diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c
index 5dfe0ede0e81..1631e2367085 100644
--- a/arch/x86/kvm/mmu/mmu.c
+++ b/arch/x86/kvm/mmu/mmu.c
@@ -4963,7 +4963,7 @@ static const union kvm_mmu_page_role role_ign = {
.invalid = 0x1,
 };
 
-static void kvm_mmu_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa,
+static void kvm_mmu_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa, gva_t gva,
  const u8 *new, int bytes,
  struct kvm_page_track_notifier_node *node)
 {
diff --git a/arch/x86/kvm/mmu/page_track.c b/arch/x86/kvm/mmu/page_track.c
index 8443a675715b..d7a591a85af8 100644
--- a/arch/x86/kvm/mmu/page_track.c
+++ b/arch/x86/kvm/mmu/page_track.c
@@ -216,8 +216,8 @@ EXPORT_SYMBOL_GPL(kvm_page_track_unregister_notifier);
  * The node should figure out if the written page is the one that node is
  * interested in by itself.
  */
-void kvm_page_track_write(struct kvm_vcpu *vcpu, gpa_t gpa, const u8 *new,
- int bytes)
+void kvm_page_track_write(struct kvm_vcpu *vcpu, gpa_t gpa, gva_t gva,
+ const u8 *new, int bytes)
 {
struct kvm_page_track_notifier_head *head;
struct kvm_page_track_notifier_node *n;
@@ -232,7 +232,7 @@ void kvm_page_track_write(struct kvm_vcpu *vcpu, gpa_t gpa, 
const u8 *new,
hlist_for_each_entry_srcu(n, >track_notifier_list, node,
srcu_read_lock_held(>track_srcu))
if (n->track_write)
-   n->track_write(vcpu, gpa, new, bytes, n);
+   n->track_write(vcpu, gpa, gva, new, bytes, n);
srcu_read_unlock(>track_srcu, idx);
 }
 
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index f48603c8e44d..c2f13a275448 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -6115,7 +6115,7 @@ static int vcpu_mmio_gva_to_gpa(struct kvm_vcpu *vcpu, 
unsigned long gva,
return vcpu_is_mmio_gpa(vcpu, gva, *gpa, write);
 }
 
-int emulator_write_phys(struct kvm_vcpu *vcpu, gpa_t gpa,
+int emulator_write_phys(struct kvm_vcpu *vcpu, gp

[PATCH v11 21/81] KVM: x86: add kvm_x86_ops.control_singlestep()

2020-12-07 Thread Adalbert Lazăr
From: Nicușor Cîțu 

This function is needed for KVMI_VCPU_CONTROL_SINGLESTEP.

Signed-off-by: Nicușor Cîțu 
Signed-off-by: Adalbert Lazăr 
---
 arch/x86/include/asm/kvm_host.h |  1 +
 arch/x86/kvm/vmx/vmx.c  | 11 +++
 2 files changed, 12 insertions(+)

diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 45c72af05fa2..c2da5c24e825 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -1305,6 +1305,7 @@ struct kvm_x86_ops {
void (*msr_filter_changed)(struct kvm_vcpu *vcpu);
 
u64 (*fault_gla)(struct kvm_vcpu *vcpu);
+   void (*control_singlestep)(struct kvm_vcpu *vcpu, bool enable);
 };
 
 struct kvm_x86_nested_ops {
diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
index 41ea1ee9d419..1c8fbd6209ce 100644
--- a/arch/x86/kvm/vmx/vmx.c
+++ b/arch/x86/kvm/vmx/vmx.c
@@ -7648,6 +7648,16 @@ static u64 vmx_fault_gla(struct kvm_vcpu *vcpu)
return ~0ull;
 }
 
+static void vmx_control_singlestep(struct kvm_vcpu *vcpu, bool enable)
+{
+   if (enable)
+   exec_controls_setbit(to_vmx(vcpu),
+ CPU_BASED_MONITOR_TRAP_FLAG);
+   else
+   exec_controls_clearbit(to_vmx(vcpu),
+   CPU_BASED_MONITOR_TRAP_FLAG);
+}
+
 static struct kvm_x86_ops vmx_x86_ops __initdata = {
.hardware_unsetup = hardware_unsetup,
 
@@ -7788,6 +7798,7 @@ static struct kvm_x86_ops vmx_x86_ops __initdata = {
.cpu_dirty_log_size = vmx_cpu_dirty_log_size,
 
.fault_gla = vmx_fault_gla,
+   .control_singlestep = vmx_control_singlestep,
 };
 
 static __init int hardware_setup(void)
___
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization

[PATCH v11 32/81] KVM: introduce VM introspection

2020-12-07 Thread Adalbert Lazăr
From: Mihai Donțu 

The KVM introspection subsystem provides a facility for applications
to control the execution of any running VMs (pause, resume, shutdown),
query the state of the vCPUs (GPRs, MSRs etc.), alter the page access bits
in the shadow page tables and receive notifications when events of interest
have taken place (shadow page table level faults, key MSR writes,
hypercalls etc.). Some notifications can be responded to with an action
(like preventing an MSR from being written), others are mere informative
(like breakpoint events which can be used for execution tracing).

Signed-off-by: Mihai Donțu 
Co-developed-by: Marian Rotariu 
Signed-off-by: Marian Rotariu 
Signed-off-by: Adalbert Lazăr 
---
 Documentation/virt/kvm/kvmi.rst   | 139 ++
 arch/x86/include/asm/kvm_host.h   |   2 +
 arch/x86/kvm/Kconfig  |   9 ++
 arch/x86/kvm/Makefile |   2 +
 include/linux/kvmi_host.h |  21 +
 virt/kvm/introspection/kvmi.c |  25 ++
 virt/kvm/introspection/kvmi_int.h |   7 ++
 virt/kvm/kvm_main.c   |  15 
 8 files changed, 220 insertions(+)
 create mode 100644 Documentation/virt/kvm/kvmi.rst
 create mode 100644 include/linux/kvmi_host.h
 create mode 100644 virt/kvm/introspection/kvmi.c
 create mode 100644 virt/kvm/introspection/kvmi_int.h

diff --git a/Documentation/virt/kvm/kvmi.rst b/Documentation/virt/kvm/kvmi.rst
new file mode 100644
index ..59cc33a39f9f
--- /dev/null
+++ b/Documentation/virt/kvm/kvmi.rst
@@ -0,0 +1,139 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+=
+KVMI - The kernel virtual machine introspection subsystem
+=
+
+The KVM introspection subsystem provides a facility for applications running
+on the host or in a separate VM, to control the execution of any running VMs
+(pause, resume, shutdown), query the state of the vCPUs (GPRs, MSRs etc.),
+alter the page access bits in the shadow page tables (only for the hardware
+backed ones, eg. Intel's EPT) and receive notifications when events of
+interest have taken place (shadow page table level faults, key MSR writes,
+hypercalls etc.). Some notifications can be responded to with an action
+(like preventing an MSR from being written), others are mere informative
+(like breakpoint events which can be used for execution tracing).
+With few exceptions, all events are optional. An application using this
+subsystem will explicitly register for them.
+
+The use case that gave way for the creation of this subsystem is to monitor
+the guest OS and as such the ABI/API is highly influenced by how the guest
+software (kernel, applications) sees the world. For example, some events
+provide information specific for the host CPU architecture
+(eg. MSR_IA32_SYSENTER_EIP) merely because its leveraged by guest software
+to implement a critical feature (fast system calls).
+
+At the moment, the target audience for KVMI are security software authors
+that wish to perform forensics on newly discovered threats (exploits) or
+to implement another layer of security like preventing a large set of
+kernel rootkits simply by "locking" the kernel image in the shadow page
+tables (ie. enforce .text r-x, .rodata rw- etc.). It's the latter case that
+made KVMI a separate subsystem, even though many of these features are
+available in the device manager (eg. QEMU). The ability to build a security
+application that does not interfere (in terms of performance) with the
+guest software asks for a specialized interface that is designed for minimum
+overhead.
+
+API/ABI
+===
+
+This chapter describes the VMI interface used to monitor and control local
+guests from a user application.
+
+Overview
+
+
+The interface is socket based, one connection for every VM. One end is in the
+host kernel while the other is held by the user application (introspection
+tool).
+
+The initial connection is established by an application running on the
+host (eg. QEMU) that connects to the introspection tool and after a
+handshake the file descriptor is passed to the host kernel making all
+further communication take place between it and the introspection tool.
+
+The socket protocol allows for commands and events to be multiplexed over
+the same connection. As such, it is possible for the introspection tool to
+receive an event while waiting for the result of a command. Also, it can
+send a command while the host kernel is waiting for a reply to an event.
+
+The kernel side of the socket communication is blocking and will wait
+for an answer from its peer indefinitely or until the guest is powered
+off (killed), restarted or the peer goes away, at which point it will
+wake up and properly cleanup as if the introspection subsystem has never
+been used on that guest (if requested). Obviously, whether the guest can
+really continue normal execution depends on whether the int

[PATCH v11 07/81] KVM: x86: avoid injecting #PF when emulate the VMCALL instruction

2020-12-07 Thread Adalbert Lazăr
From: Mihai Donțu 

It can happened to end up emulating the VMCALL instruction as a result
of the handling of an EPT write fault. In this situation,
the emulator will try to unconditionally patch the correct hypercall
opcode bytes using emulator_write_emulated(). However, this last call
uses the fault GPA (if available) or walks the guest page tables at RIP,
otherwise. The trouble begins when using VM introspection,
when we forbid the use of the fault GPA and fallback to the guest pt walk:
in Windows (8.1 and newer) the page that we try to write into
is marked read-execute and as such emulator_write_emulated() fails
and we inject a write #PF, leading to a guest crash.

Signed-off-by: Mihai Donțu 
Signed-off-by: Adalbert Lazăr 
---
 arch/x86/kvm/x86.c | 6 +-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 5951458408fb..816801d6c95d 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -8144,11 +8144,15 @@ static int emulator_fix_hypercall(struct 
x86_emulate_ctxt *ctxt)
struct kvm_vcpu *vcpu = emul_to_vcpu(ctxt);
char instruction[3];
unsigned long rip = kvm_rip_read(vcpu);
+   int err;
 
kvm_x86_ops.patch_hypercall(vcpu, instruction);
 
-   return emulator_write_emulated(ctxt, rip, instruction, 3,
+   err = emulator_write_emulated(ctxt, rip, instruction, 3,
>exception);
+   if (err == X86EMUL_PROPAGATE_FAULT)
+   err = X86EMUL_CONTINUE;
+   return err;
 }
 
 static int dm_request_for_irq_injection(struct kvm_vcpu *vcpu)
___
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization

<    1   2   3   4   5   6   7   >