[RFC PATCH v6 27/92] kvm: introspection: use page track

Adalbert Lazăr Fri, 09 Aug 2019 11:15:34 -0700

From: Mihai Donțu <[email protected]>

From preread, prewrite and preexec callbacks we will send the
KVMI_EVENT_PF events caused by access rights enforced by the introspection
tool.


Signed-off-by: Mihai Donțu <[email protected]>
Co-developed-by: Nicușor Cîțu <[email protected]>
Signed-off-by: Nicușor Cîțu <[email protected]>
Co-developed-by: Marian Rotariu <[email protected]>
Signed-off-by: Marian Rotariu <[email protected]>
Co-developed-by: Adalbert Lazăr <[email protected]>
Signed-off-by: Adalbert Lazăr <[email protected]>
---
 arch/x86/include/asm/kvmi_host.h |  12 ++
 arch/x86/kvm/kvmi.c              |  45 +++++
 include/uapi/linux/kvmi.h        |   4 +
 virt/kvm/kvmi.c                  | 293 ++++++++++++++++++++++++++++++-
 virt/kvm/kvmi_int.h              |  21 +++
 5 files changed, 374 insertions(+), 1 deletion(-)
 create mode 100644 arch/x86/include/asm/kvmi_host.h

diff --git a/arch/x86/include/asm/kvmi_host.h b/arch/x86/include/asm/kvmi_host.h
new file mode 100644
index 000000000000..7ab6dd71a0c2
--- /dev/null
+++ b/arch/x86/include/asm/kvmi_host.h
@@ -0,0 +1,12 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_X86_KVMI_HOST_H
+#define _ASM_X86_KVMI_HOST_H
+
+#include <asm/kvm_host.h>
+#include <asm/kvm_page_track.h>
+
+struct kvmi_arch_mem_access {
+       unsigned long 
active[KVM_PAGE_TRACK_MAX][BITS_TO_LONGS(KVM_MEM_SLOTS_NUM)];
+};
+
+#endif /* _ASM_X86_KVMI_HOST_H */
diff --git a/arch/x86/kvm/kvmi.c b/arch/x86/kvm/kvmi.c
index 97c72cdc6fb0..d7b9201582b4 100644
--- a/arch/x86/kvm/kvmi.c
+++ b/arch/x86/kvm/kvmi.c
@@ -91,6 +91,12 @@ void kvmi_arch_setup_event(struct kvm_vcpu *vcpu, struct 
kvmi_event *ev)
        kvmi_get_msrs(vcpu, event);
 }
 
+bool kvmi_arch_pf_event(struct kvm_vcpu *vcpu, gpa_t gpa, gva_t gva,
+                       u8 access)
+{
+       return KVMI_EVENT_ACTION_CONTINUE; /* TODO */
+}
+
 int kvmi_arch_cmd_get_vcpu_info(struct kvm_vcpu *vcpu,
                                struct kvmi_get_vcpu_info_reply *rpl)
 {
@@ -102,3 +108,42 @@ int kvmi_arch_cmd_get_vcpu_info(struct kvm_vcpu *vcpu,
        return 0;
 }
 
+static const struct {
+       unsigned int allow_bit;
+       enum kvm_page_track_mode track_mode;
+} track_modes[] = {
+       { KVMI_PAGE_ACCESS_R, KVM_PAGE_TRACK_PREREAD },
+       { KVMI_PAGE_ACCESS_W, KVM_PAGE_TRACK_PREWRITE },
+       { KVMI_PAGE_ACCESS_X, KVM_PAGE_TRACK_PREEXEC },
+};
+
+void kvmi_arch_update_page_tracking(struct kvm *kvm,
+                                   struct kvm_memory_slot *slot,
+                                   struct kvmi_mem_access *m)
+{
+       struct kvmi_arch_mem_access *arch = &m->arch;
+       int i;
+
+       if (!slot) {
+               slot = gfn_to_memslot(kvm, m->gfn);
+               if (!slot)
+                       return;
+       }
+
+       for (i = 0; i < ARRAY_SIZE(track_modes); i++) {
+               unsigned int allow_bit = track_modes[i].allow_bit;
+               enum kvm_page_track_mode mode = track_modes[i].track_mode;
+               bool slot_tracked = test_bit(slot->id, arch->active[mode]);
+
+               if (m->access & allow_bit) {
+                       if (slot_tracked) {
+                               kvm_slot_page_track_remove_page(kvm, slot,
+                                                               m->gfn, mode);
+                               clear_bit(slot->id, arch->active[mode]);
+                       }
+               } else if (!slot_tracked) {
+                       kvm_slot_page_track_add_page(kvm, slot, m->gfn, mode);
+                       set_bit(slot->id, arch->active[mode]);
+               }
+       }
+}
diff --git a/include/uapi/linux/kvmi.h b/include/uapi/linux/kvmi.h
index aa5bc909e278..c56e676ddb2b 100644
--- a/include/uapi/linux/kvmi.h
+++ b/include/uapi/linux/kvmi.h
@@ -70,6 +70,10 @@ enum {
 #define KVMI_EVENT_ACTION_RETRY         1
 #define KVMI_EVENT_ACTION_CRASH         2
 
+#define KVMI_PAGE_ACCESS_R (1 << 0)
+#define KVMI_PAGE_ACCESS_W (1 << 1)
+#define KVMI_PAGE_ACCESS_X (1 << 2)
+
 #define KVMI_MSG_SIZE (4096 - sizeof(struct kvmi_msg_hdr))
 
 struct kvmi_msg_hdr {
diff --git a/virt/kvm/kvmi.c b/virt/kvm/kvmi.c
index d0d9adf5b6ed..5cbc82b284f4 100644
--- a/virt/kvm/kvmi.c
+++ b/virt/kvm/kvmi.c
@@ -11,10 +11,27 @@
 #include <linux/bitmap.h>
 
 static struct kmem_cache *msg_cache;
+static struct kmem_cache *radix_cache;
 static struct kmem_cache *job_cache;
 
 static bool kvmi_create_vcpu_event(struct kvm_vcpu *vcpu);
 static void kvmi_abort_events(struct kvm *kvm);
+static bool kvmi_track_preread(struct kvm_vcpu *vcpu, gpa_t gpa, gva_t gva,
+       u8 *new, int bytes, struct kvm_page_track_notifier_node *node,
+       bool *data_ready);
+static bool kvmi_track_prewrite(struct kvm_vcpu *vcpu, gpa_t gpa, gva_t gva,
+       const u8 *new, int bytes, struct kvm_page_track_notifier_node *node);
+static bool kvmi_track_preexec(struct kvm_vcpu *vcpu, gpa_t gpa, gva_t gva,
+       struct kvm_page_track_notifier_node *node);
+static void kvmi_track_create_slot(struct kvm *kvm,
+       struct kvm_memory_slot *slot, unsigned long npages,
+       struct kvm_page_track_notifier_node *node);
+static void kvmi_track_flush_slot(struct kvm *kvm, struct kvm_memory_slot 
*slot,
+       struct kvm_page_track_notifier_node *node);
+
+static const u8 full_access  = KVMI_PAGE_ACCESS_R |
+                               KVMI_PAGE_ACCESS_W |
+                               KVMI_PAGE_ACCESS_X;
 
 void *kvmi_msg_alloc(void)
 {
@@ -34,23 +51,96 @@ void kvmi_msg_free(void *addr)
                kmem_cache_free(msg_cache, addr);
 }
 
+static struct kvmi_mem_access *__kvmi_get_gfn_access(struct kvmi *ikvm,
+                                                    const gfn_t gfn)
+{
+       return radix_tree_lookup(&ikvm->access_tree, gfn);
+}
+
+static int kvmi_get_gfn_access(struct kvmi *ikvm, const gfn_t gfn,
+                              u8 *access)
+{
+       struct kvmi_mem_access *m;
+
+       *access = full_access;
+
+       read_lock(&ikvm->access_tree_lock);
+       m = __kvmi_get_gfn_access(ikvm, gfn);
+       if (m)
+               *access = m->access;
+       read_unlock(&ikvm->access_tree_lock);
+
+       return m ? 0 : -1;
+}
+
+static bool kvmi_restricted_access(struct kvmi *ikvm, gpa_t gpa, u8 access)
+{
+       u8 allowed_access;
+       int err;
+
+       err = kvmi_get_gfn_access(ikvm, gpa_to_gfn(gpa), &allowed_access);
+
+       if (err)
+               return false;
+
+       /*
+        * We want to be notified only for violations involving access
+        * bits that we've specifically cleared
+        */
+       if ((~allowed_access) & access)
+               return true;
+
+       return false;
+}
+
+static void kvmi_clear_mem_access(struct kvm *kvm)
+{
+       void **slot;
+       struct radix_tree_iter iter;
+       struct kvmi *ikvm = IKVM(kvm);
+       int idx;
+
+       idx = srcu_read_lock(&kvm->srcu);
+       spin_lock(&kvm->mmu_lock);
+       write_lock(&ikvm->access_tree_lock);
+
+       radix_tree_for_each_slot(slot, &ikvm->access_tree, &iter, 0) {
+               struct kvmi_mem_access *m = *slot;
+
+               m->access = full_access;
+               kvmi_arch_update_page_tracking(kvm, NULL, m);
+
+               radix_tree_iter_delete(&ikvm->access_tree, &iter, slot);
+               kmem_cache_free(radix_cache, m);
+       }
+
+       write_unlock(&ikvm->access_tree_lock);
+       spin_unlock(&kvm->mmu_lock);
+       srcu_read_unlock(&kvm->srcu, idx);
+}
+
 static void kvmi_cache_destroy(void)
 {
        kmem_cache_destroy(msg_cache);
        msg_cache = NULL;
+       kmem_cache_destroy(radix_cache);
+       radix_cache = NULL;
        kmem_cache_destroy(job_cache);
        job_cache = NULL;
 }
 
 static int kvmi_cache_create(void)
 {
+       radix_cache = kmem_cache_create("kvmi_radix_tree",
+                                       sizeof(struct kvmi_mem_access),
+                                       0, SLAB_ACCOUNT, NULL);
        job_cache = kmem_cache_create("kvmi_job",
                                      sizeof(struct kvmi_job),
                                      0, SLAB_ACCOUNT, NULL);
        msg_cache = kmem_cache_create("kvmi_msg", KVMI_MSG_SIZE_ALLOC,
                                      4096, SLAB_ACCOUNT, NULL);
 
-       if (!msg_cache || !job_cache) {
+       if (!msg_cache || !radix_cache || !job_cache) {
                kvmi_cache_destroy();
 
                return -1;
@@ -77,6 +167,10 @@ static bool alloc_kvmi(struct kvm *kvm, const struct 
kvm_introspection *qemu)
        if (!ikvm)
                return false;
 
+       /* see comments of radix_tree_preload() - no direct reclaim */
+       INIT_RADIX_TREE(&ikvm->access_tree, GFP_KERNEL & ~__GFP_DIRECT_RECLAIM);
+       rwlock_init(&ikvm->access_tree_lock);
+
        atomic_set(&ikvm->ev_seq, 0);
 
        set_bit(KVMI_GET_VERSION, ikvm->cmd_allow_mask);
@@ -85,6 +179,12 @@ static bool alloc_kvmi(struct kvm *kvm, const struct 
kvm_introspection *qemu)
 
        memcpy(&ikvm->uuid, &qemu->uuid, sizeof(ikvm->uuid));
 
+       ikvm->kptn_node.track_preread = kvmi_track_preread;
+       ikvm->kptn_node.track_prewrite = kvmi_track_prewrite;
+       ikvm->kptn_node.track_preexec = kvmi_track_preexec;
+       ikvm->kptn_node.track_create_slot = kvmi_track_create_slot;
+       ikvm->kptn_node.track_flush_slot = kvmi_track_flush_slot;
+
        ikvm->kvm = kvm;
        kvm->kvmi = ikvm;
 
@@ -276,6 +376,179 @@ void kvmi_vcpu_uninit(struct kvm_vcpu *vcpu)
        vcpu->kvmi = NULL;
 }
 
+static bool is_pf_of_interest(struct kvm_vcpu *vcpu, gpa_t gpa, u8 access)
+{
+       struct kvm *kvm = vcpu->kvm;
+
+       if (kvm_mmu_nested_pagefault(vcpu))
+               return false;
+
+       /* Have we shown interest in this page? */
+       return kvmi_restricted_access(IKVM(kvm), gpa, access);
+}
+
+static bool __kvmi_track_preread(struct kvm_vcpu *vcpu, gpa_t gpa, gva_t gva,
+       u8 *new, int bytes, struct kvm_page_track_notifier_node *node,
+       bool *data_ready)
+{
+       bool ret;
+
+       if (!is_pf_of_interest(vcpu, gpa, KVMI_PAGE_ACCESS_R))
+               return true;
+
+       ret = kvmi_arch_pf_event(vcpu, gpa, gva, KVMI_PAGE_ACCESS_R);
+
+       return ret;
+}
+
+static bool kvmi_track_preread(struct kvm_vcpu *vcpu, gpa_t gpa, gva_t gva,
+       u8 *new, int bytes, struct kvm_page_track_notifier_node *node,
+       bool *data_ready)
+{
+       struct kvmi *ikvm;
+       bool ret = true;
+
+       ikvm = kvmi_get(vcpu->kvm);
+       if (!ikvm)
+               return true;
+
+       if (is_event_enabled(vcpu, KVMI_EVENT_PF))
+               ret = __kvmi_track_preread(vcpu, gpa, gva, new, bytes, node,
+                                          data_ready);
+
+       kvmi_put(vcpu->kvm);
+
+       return ret;
+}
+
+static bool __kvmi_track_prewrite(struct kvm_vcpu *vcpu, gpa_t gpa, gva_t gva,
+       const u8 *new, int bytes,
+       struct kvm_page_track_notifier_node *node)
+{
+       if (!is_pf_of_interest(vcpu, gpa, KVMI_PAGE_ACCESS_W))
+               return true;
+
+       return kvmi_arch_pf_event(vcpu, gpa, gva, KVMI_PAGE_ACCESS_W);
+}
+
+static bool kvmi_track_prewrite(struct kvm_vcpu *vcpu, gpa_t gpa, gva_t gva,
+       const u8 *new, int bytes,
+       struct kvm_page_track_notifier_node *node)
+{
+       struct kvmi *ikvm;
+       bool ret = true;
+
+       ikvm = kvmi_get(vcpu->kvm);
+       if (!ikvm)
+               return true;
+
+       if (is_event_enabled(vcpu, KVMI_EVENT_PF))
+               ret = __kvmi_track_prewrite(vcpu, gpa, gva, new, bytes, node);
+
+       kvmi_put(vcpu->kvm);
+
+       return ret;
+}
+
+static bool __kvmi_track_preexec(struct kvm_vcpu *vcpu, gpa_t gpa, gva_t gva,
+       struct kvm_page_track_notifier_node *node)
+{
+       if (!is_pf_of_interest(vcpu, gpa, KVMI_PAGE_ACCESS_X))
+               return true;
+
+       return kvmi_arch_pf_event(vcpu, gpa, gva, KVMI_PAGE_ACCESS_X);
+}
+
+static bool kvmi_track_preexec(struct kvm_vcpu *vcpu, gpa_t gpa, gva_t gva,
+       struct kvm_page_track_notifier_node *node)
+{
+       struct kvmi *ikvm;
+       bool ret = true;
+
+       ikvm = kvmi_get(vcpu->kvm);
+       if (!ikvm)
+               return true;
+
+       if (is_event_enabled(vcpu, KVMI_EVENT_PF))
+               ret = __kvmi_track_preexec(vcpu, gpa, gva, node);
+
+       kvmi_put(vcpu->kvm);
+
+       return ret;
+}
+
+static void kvmi_track_create_slot(struct kvm *kvm,
+       struct kvm_memory_slot *slot,
+       unsigned long npages,
+       struct kvm_page_track_notifier_node *node)
+{
+       struct kvmi *ikvm;
+       gfn_t start = slot->base_gfn;
+       const gfn_t end = start + npages;
+       int idx;
+
+       ikvm = kvmi_get(kvm);
+       if (!ikvm)
+               return;
+
+       idx = srcu_read_lock(&kvm->srcu);
+       spin_lock(&kvm->mmu_lock);
+       read_lock(&ikvm->access_tree_lock);
+
+       while (start < end) {
+               struct kvmi_mem_access *m;
+
+               m = __kvmi_get_gfn_access(ikvm, start);
+               if (m)
+                       kvmi_arch_update_page_tracking(kvm, slot, m);
+               start++;
+       }
+
+       read_unlock(&ikvm->access_tree_lock);
+       spin_unlock(&kvm->mmu_lock);
+       srcu_read_unlock(&kvm->srcu, idx);
+
+       kvmi_put(kvm);
+}
+
+static void kvmi_track_flush_slot(struct kvm *kvm, struct kvm_memory_slot 
*slot,
+       struct kvm_page_track_notifier_node *node)
+{
+       struct kvmi *ikvm;
+       gfn_t start = slot->base_gfn;
+       const gfn_t end = start + slot->npages;
+       int idx;
+
+       ikvm = kvmi_get(kvm);
+       if (!ikvm)
+               return;
+
+       idx = srcu_read_lock(&kvm->srcu);
+       spin_lock(&kvm->mmu_lock);
+       write_lock(&ikvm->access_tree_lock);
+
+       while (start < end) {
+               struct kvmi_mem_access *m;
+
+               m = __kvmi_get_gfn_access(ikvm, start);
+               if (m) {
+                       u8 prev_access = m->access;
+
+                       m->access = full_access;
+                       kvmi_arch_update_page_tracking(kvm, slot, m);
+                       m->access = prev_access;
+               }
+
+               start++;
+       }
+
+       write_unlock(&ikvm->access_tree_lock);
+       spin_unlock(&kvm->mmu_lock);
+       srcu_read_unlock(&kvm->srcu, idx);
+
+       kvmi_put(kvm);
+}
+
 static void kvmi_end_introspection(struct kvmi *ikvm)
 {
        struct kvm *kvm = ikvm->kvm;
@@ -290,6 +563,22 @@ static void kvmi_end_introspection(struct kvmi *ikvm)
         */
        kvmi_abort_events(kvm);
 
+       /*
+        * This may sleep on synchronize_srcu() so it's not allowed to be
+        * called under kvmi_put().
+        * Also synchronize_srcu() may deadlock on (page tracking) read-side
+        * regions that are waiting for reply to events, so must be called
+        * after kvmi_abort_events().
+        */
+       kvm_page_track_unregister_notifier(kvm, &ikvm->kptn_node);
+
+       /*
+        * This function uses kvm->mmu_lock so it's not allowed to be
+        * called under kvmi_put(). It can reach a deadlock if called
+        * from kvm_mmu_load -> kvmi_tracked_gfn -> kvmi_put.
+        */
+       kvmi_clear_mem_access(kvm);
+
        /*
         * At this moment the socket is shut down, no more commands will come
         * from the introspector, and the only way into the introspection is
@@ -351,6 +640,8 @@ int kvmi_hook(struct kvm *kvm, const struct 
kvm_introspection *qemu)
                goto err_alloc;
        }
 
+       kvm_page_track_register_notifier(kvm, &ikvm->kptn_node);
+
        /*
         * Make sure all the KVM/KVMI structures are linked and no pointer
         * is read as NULL after the reference count has been set.
diff --git a/virt/kvm/kvmi_int.h b/virt/kvm/kvmi_int.h
index 7cff91bc1acc..d798908d0f70 100644
--- a/virt/kvm/kvmi_int.h
+++ b/virt/kvm/kvmi_int.h
@@ -6,6 +6,7 @@
 #include <linux/kvm_host.h>
 
 #include <uapi/linux/kvmi.h>
+#include <asm/kvmi_host.h>
 
 #define kvmi_debug(ikvm, fmt, ...) \
        kvm_debug("%pU " fmt, &ikvm->uuid, ## __VA_ARGS__)
@@ -104,6 +105,10 @@ struct kvmi_vcpu {
 
 struct kvmi {
        struct kvm *kvm;
+       struct kvm_page_track_notifier_node kptn_node;
+
+       struct radix_tree_root access_tree;
+       rwlock_t access_tree_lock;
 
        struct socket *sock;
        struct task_struct *recv;
@@ -118,6 +123,17 @@ struct kvmi {
        bool cmd_reply_disabled;
 };
 
+struct kvmi_mem_access {
+       gfn_t gfn;
+       u8 access;
+       struct kvmi_arch_mem_access arch;
+};
+
+static inline bool is_event_enabled(struct kvm_vcpu *vcpu, int event)
+{
+       return false; /* TODO */
+}
+
 /* kvmi_msg.c */
 bool kvmi_sock_get(struct kvmi *ikvm, int fd);
 void kvmi_sock_shutdown(struct kvmi *ikvm);
@@ -138,7 +154,12 @@ int kvmi_add_job(struct kvm_vcpu *vcpu,
                 void *ctx, void (*free_fct)(void *ctx));
 
 /* arch */
+void kvmi_arch_update_page_tracking(struct kvm *kvm,
+                                   struct kvm_memory_slot *slot,
+                                   struct kvmi_mem_access *m);
 void kvmi_arch_setup_event(struct kvm_vcpu *vcpu, struct kvmi_event *ev);
+bool kvmi_arch_pf_event(struct kvm_vcpu *vcpu, gpa_t gpa, gva_t gva,
+                       u8 access);
 int kvmi_arch_cmd_get_vcpu_info(struct kvm_vcpu *vcpu,
                                struct kvmi_get_vcpu_info_reply *rpl);
 
_______________________________________________
Virtualization mailing list
[email protected]
https://lists.linuxfoundation.org/mailman/listinfo/virtualization

[RFC PATCH v6 27/92] kvm: introspection: use page track

Reply via email to