[RFC PATCH v6 49/92] kvm: introspection: add KVMI_PAUSE_VCPU and KVMI_EVENT_PAUSE_VCPU

Adalbert Lazăr Fri, 09 Aug 2019 09:16:07 -0700

This is the only vCPU command handled by the receiving worker.
It increments a pause request counter and kicks the vCPU.


This event is send by the vCPU thread, but has a low priority. It
will be sent after any other vCPU introspection event and when no vCPU
introspection command is queued.

Signed-off-by: Adalbert Lazăr <[email protected]>
---
 Documentation/virtual/kvm/kvmi.rst | 68 ++++++++++++++++++++++++++++++
 include/uapi/linux/kvm_para.h      |  1 +
 include/uapi/linux/kvmi.h          |  7 +++
 virt/kvm/kvmi.c                    | 65 ++++++++++++++++++++++++++++
 virt/kvm/kvmi_int.h                |  4 ++
 virt/kvm/kvmi_msg.c                | 61 +++++++++++++++++++++++++++
 6 files changed, 206 insertions(+)

diff --git a/Documentation/virtual/kvm/kvmi.rst 
b/Documentation/virtual/kvm/kvmi.rst
index eef32107837a..558d3eb6007f 100644
--- a/Documentation/virtual/kvm/kvmi.rst
+++ b/Documentation/virtual/kvm/kvmi.rst
@@ -820,6 +820,48 @@ one page (offset + size <= PAGE_SIZE).
 
 * -KVM_EINVAL - the specified gpa is invalid
 
+16. KVMI_PAUSE_VCPU
+-------------------
+
+:Architecture: all
+:Versions: >= 1
+:Parameters:
+
+       struct kvmi_vcpu_hdr;
+       struct kvmi_pause_vcpu {
+               __u8 wait;
+               __u8 padding1;
+               __u16 padding2;
+               __u32 padding3;
+       };
+
+:Returns:
+
+::
+
+       struct kvmi_error_code;
+
+Kicks the vCPU from guest.
+
+If `wait` is 1, the command will wait for vCPU to acknowledge the IPI.
+
+The vCPU will handle the pending commands/events and send the
+*KVMI_EVENT_PAUSE_VCPU* event (one for every successful *KVMI_PAUSE_VCPU*
+command) before returning to guest.
+
+Please note that new vCPUs might by created at any time.
+The introspection tool should use *KVMI_CONTROL_VM_EVENTS* to enable the
+*KVMI_EVENT_CREATE_VCPU* event in order to stop these new vCPUs as well
+(by delaying the event reply).
+
+:Errors:
+
+* -KVM_EINVAL - the selected vCPU is invalid
+* -KVM_EINVAL - padding is not zero
+* -KVM_EAGAIN - the selected vCPU can't be introspected yet
+* -KVM_EBUSY  - the selected vCPU has too many queued *KVMI_EVENT_PAUSE_VCPU* 
events
+* -KVM_EPERM  - the *KVMI_EVENT_PAUSE_VCPU* event is disallowed (see 
*KVMI_CONTROL_EVENTS*)
+               and the introspection tool expects a reply.
 Events
 ======
 
@@ -992,3 +1034,29 @@ The *RETRY* action is used by the introspector to retry 
the execution of
 the current instruction. Either using single-step (if ``singlestep`` is
 not zero) or return to guest (if the introspector changed the instruction
 pointer or the page restrictions).
+
+4. KVMI_EVENT_PAUSE_VCPU
+------------------------
+
+:Architectures: all
+:Versions: >= 1
+:Actions: CONTINUE, CRASH
+:Parameters:
+
+::
+
+       struct kvmi_event;
+
+:Returns:
+
+::
+
+       struct kvmi_vcpu_hdr;
+       struct kvmi_event_reply;
+
+This event is sent in response to a *KVMI_PAUSE_VCPU* command and
+cannot be disabled via *KVMI_CONTROL_EVENTS*.
+
+This event has a low priority. It will be sent after any other vCPU
+introspection event and when no vCPU introspection command is queued.
+
diff --git a/include/uapi/linux/kvm_para.h b/include/uapi/linux/kvm_para.h
index 54c0e20f5b64..07e3f2662b36 100644
--- a/include/uapi/linux/kvm_para.h
+++ b/include/uapi/linux/kvm_para.h
@@ -18,6 +18,7 @@
 #define KVM_EPERM              EPERM
 #define KVM_EOPNOTSUPP         95
 #define KVM_EAGAIN             11
+#define KVM_EBUSY              EBUSY
 #define KVM_ENOMEM             ENOMEM
 
 #define KVM_HC_VAPIC_POLL_IRQ          1
diff --git a/include/uapi/linux/kvmi.h b/include/uapi/linux/kvmi.h
index be3f066f314e..ca9c6b6aeed5 100644
--- a/include/uapi/linux/kvmi.h
+++ b/include/uapi/linux/kvmi.h
@@ -177,6 +177,13 @@ struct kvmi_get_vcpu_info_reply {
        __u64 tsc_speed;
 };
 
+struct kvmi_pause_vcpu {
+       __u8 wait;
+       __u8 padding1;
+       __u16 padding2;
+       __u32 padding3;
+};
+
 struct kvmi_control_events {
        __u16 event_id;
        __u8 enable;
diff --git a/virt/kvm/kvmi.c b/virt/kvm/kvmi.c
index a84eb150e116..85de2da3eb7b 100644
--- a/virt/kvm/kvmi.c
+++ b/virt/kvm/kvmi.c
@@ -11,6 +11,8 @@
 #include <linux/kthread.h>
 #include <linux/bitmap.h>
 
+#define MAX_PAUSE_REQUESTS 1001
+
 static struct kmem_cache *msg_cache;
 static struct kmem_cache *radix_cache;
 static struct kmem_cache *job_cache;
@@ -1090,6 +1092,39 @@ static bool kvmi_create_vcpu_event(struct kvm_vcpu *vcpu)
        return ret;
 }
 
+static bool __kvmi_pause_vcpu_event(struct kvm_vcpu *vcpu)
+{
+       u32 action;
+       bool ret = false;
+
+       action = kvmi_msg_send_pause_vcpu(vcpu);
+       switch (action) {
+       case KVMI_EVENT_ACTION_CONTINUE:
+               ret = true;
+               break;
+       default:
+               kvmi_handle_common_event_actions(vcpu, action, "PAUSE");
+       }
+
+       return ret;
+}
+
+static bool kvmi_pause_vcpu_event(struct kvm_vcpu *vcpu)
+{
+       struct kvmi *ikvm;
+       bool ret = true;
+
+       ikvm = kvmi_get(vcpu->kvm);
+       if (!ikvm)
+               return true;
+
+       ret = __kvmi_pause_vcpu_event(vcpu);
+
+       kvmi_put(vcpu->kvm);
+
+       return ret;
+}
+
 void kvmi_run_jobs(struct kvm_vcpu *vcpu)
 {
        struct kvmi_vcpu *ivcpu = IVCPU(vcpu);
@@ -1154,6 +1189,7 @@ int kvmi_run_jobs_and_wait(struct kvm_vcpu *vcpu)
 
 void kvmi_handle_requests(struct kvm_vcpu *vcpu)
 {
+       struct kvmi_vcpu *ivcpu = IVCPU(vcpu);
        struct kvmi *ikvm;
 
        ikvm = kvmi_get(vcpu->kvm);
@@ -1165,6 +1201,12 @@ void kvmi_handle_requests(struct kvm_vcpu *vcpu)
 
                if (err)
                        break;
+
+               if (!atomic_read(&ivcpu->pause_requests))
+                       break;
+
+               atomic_dec(&ivcpu->pause_requests);
+               kvmi_pause_vcpu_event(vcpu);
        }
 
        kvmi_put(vcpu->kvm);
@@ -1351,10 +1393,33 @@ int kvmi_cmd_control_vm_events(struct kvmi *ikvm, 
unsigned int event_id,
        return 0;
 }
 
+int kvmi_cmd_pause_vcpu(struct kvm_vcpu *vcpu, bool wait)
+{
+       struct kvmi_vcpu *ivcpu = IVCPU(vcpu);
+       unsigned int req = KVM_REQ_INTROSPECTION;
+
+       if (atomic_read(&ivcpu->pause_requests) > MAX_PAUSE_REQUESTS)
+               return -KVM_EBUSY;
+
+       atomic_inc(&ivcpu->pause_requests);
+       kvm_make_request(req, vcpu);
+       if (wait)
+               kvm_vcpu_kick_and_wait(vcpu);
+       else
+               kvm_vcpu_kick(vcpu);
+
+       return 0;
+}
+
 static void kvmi_job_abort(struct kvm_vcpu *vcpu, void *ctx)
 {
        struct kvmi_vcpu *ivcpu = IVCPU(vcpu);
 
+       /*
+        * The thread that might increment this atomic is stopped
+        * and this thread is the only one that could decrement it.
+        */
+       atomic_set(&ivcpu->pause_requests, 0);
        ivcpu->reply_waiting = false;
 }
 
diff --git a/virt/kvm/kvmi_int.h b/virt/kvm/kvmi_int.h
index 7bdff70d4309..cb3b0ce87bc1 100644
--- a/virt/kvm/kvmi_int.h
+++ b/virt/kvm/kvmi_int.h
@@ -100,6 +100,8 @@ struct kvmi_vcpu {
        bool rep_complete;
        bool effective_rep_complete;
 
+       atomic_t pause_requests;
+
        bool reply_waiting;
        struct kvmi_vcpu_reply reply;
 
@@ -164,6 +166,7 @@ u32 kvmi_msg_send_pf(struct kvm_vcpu *vcpu, u64 gpa, u64 
gva, u8 access,
                     bool *singlestep, bool *rep_complete,
                     u64 *ctx_addr, u8 *ctx, u32 *ctx_size);
 u32 kvmi_msg_send_create_vcpu(struct kvm_vcpu *vcpu);
+u32 kvmi_msg_send_pause_vcpu(struct kvm_vcpu *vcpu);
 int kvmi_msg_send_unhook(struct kvmi *ikvm);
 
 /* kvmi.c */
@@ -185,6 +188,7 @@ int kvmi_cmd_control_events(struct kvm_vcpu *vcpu, unsigned 
int event_id,
                            bool enable);
 int kvmi_cmd_control_vm_events(struct kvmi *ikvm, unsigned int event_id,
                               bool enable);
+int kvmi_cmd_pause_vcpu(struct kvm_vcpu *vcpu, bool wait);
 int kvmi_run_jobs_and_wait(struct kvm_vcpu *vcpu);
 int kvmi_add_job(struct kvm_vcpu *vcpu,
                 void (*fct)(struct kvm_vcpu *vcpu, void *ctx),
diff --git a/virt/kvm/kvmi_msg.c b/virt/kvm/kvmi_msg.c
index 9c20a9cfda42..a4446eed354d 100644
--- a/virt/kvm/kvmi_msg.c
+++ b/virt/kvm/kvmi_msg.c
@@ -34,6 +34,7 @@ static const char *const msg_IDs[] = {
        [KVMI_GET_PAGE_WRITE_BITMAP] = "KVMI_GET_PAGE_WRITE_BITMAP",
        [KVMI_GET_VCPU_INFO]         = "KVMI_GET_VCPU_INFO",
        [KVMI_GET_VERSION]           = "KVMI_GET_VERSION",
+       [KVMI_PAUSE_VCPU]            = "KVMI_PAUSE_VCPU",
        [KVMI_READ_PHYSICAL]         = "KVMI_READ_PHYSICAL",
        [KVMI_SET_PAGE_ACCESS]       = "KVMI_SET_PAGE_ACCESS",
        [KVMI_SET_PAGE_WRITE_BITMAP] = "KVMI_SET_PAGE_WRITE_BITMAP",
@@ -457,6 +458,53 @@ static bool invalid_vcpu_hdr(const struct kvmi_vcpu_hdr 
*hdr)
        return hdr->padding1 || hdr->padding2;
 }
 
+/*
+ * We handle this vCPU command on the receiving thread to make it easier
+ * for userspace to implement a 'pause VM' command. Usually, this is done
+ * by sending one 'pause vCPU' command for every vCPU. By handling the
+ * command here, the userspace can:
+ *    - optimize, by not requesting a reply for the first N-1 vCPU's
+ *    - consider the VM stopped once it receives the reply
+ *      for the last 'pause vCPU' command
+ */
+static int handle_pause_vcpu(struct kvmi *ikvm,
+                            const struct kvmi_msg_hdr *msg,
+                            const void *_req)
+{
+       const struct kvmi_pause_vcpu *req = _req;
+       const struct kvmi_vcpu_hdr *cmd;
+       struct kvm_vcpu *vcpu = NULL;
+       int err;
+
+       if (req->padding1 || req->padding2 || req->padding3) {
+               err = -KVM_EINVAL;
+               goto reply;
+       }
+
+       cmd = (const struct kvmi_vcpu_hdr *) (msg + 1);
+
+       if (invalid_vcpu_hdr(cmd)) {
+               err = -KVM_EINVAL;
+               goto reply;
+       }
+
+       if (!is_event_allowed(ikvm, KVMI_EVENT_PAUSE_VCPU)) {
+               err = -KVM_EPERM;
+
+               if (ikvm->cmd_reply_disabled)
+                       return kvmi_msg_vm_reply(ikvm, msg, err, NULL, 0);
+
+               goto reply;
+       }
+
+       err = kvmi_get_vcpu(ikvm, cmd->vcpu, &vcpu);
+       if (!err)
+               err = kvmi_cmd_pause_vcpu(vcpu, req->wait == 1);
+
+reply:
+       return kvmi_msg_vm_maybe_reply(ikvm, msg, err, NULL, 0);
+}
+
 /*
  * These commands are executed on the receiving thread/worker.
  */
@@ -471,6 +519,7 @@ static int(*const msg_vm[])(struct kvmi *, const struct 
kvmi_msg_hdr *,
        [KVMI_GET_PAGE_ACCESS]       = handle_get_page_access,
        [KVMI_GET_PAGE_WRITE_BITMAP] = handle_get_page_write_bitmap,
        [KVMI_GET_VERSION]           = handle_get_version,
+       [KVMI_PAUSE_VCPU]            = handle_pause_vcpu,
        [KVMI_READ_PHYSICAL]         = handle_read_physical,
        [KVMI_SET_PAGE_ACCESS]       = handle_set_page_access,
        [KVMI_SET_PAGE_WRITE_BITMAP] = handle_set_page_write_bitmap,
@@ -966,3 +1015,15 @@ u32 kvmi_msg_send_create_vcpu(struct kvm_vcpu *vcpu)
 
        return action;
 }
+
+u32 kvmi_msg_send_pause_vcpu(struct kvm_vcpu *vcpu)
+{
+       int err, action;
+
+       err = kvmi_send_event(vcpu, KVMI_EVENT_PAUSE_VCPU, NULL, 0,
+                             NULL, 0, &action);
+       if (err)
+               return KVMI_EVENT_ACTION_CONTINUE;
+
+       return action;
+}
_______________________________________________
Virtualization mailing list
[email protected]
https://lists.linuxfoundation.org/mailman/listinfo/virtualization

[RFC PATCH v6 49/92] kvm: introspection: add KVMI_PAUSE_VCPU and KVMI_EVENT_PAUSE_VCPU

Reply via email to