The branch main has been updated by jhb:

URL: 
https://cgit.FreeBSD.org/src/commit/?id=cd9618bdb274375139080ee4e33ccbdc980513f3

commit cd9618bdb274375139080ee4e33ccbdc980513f3
Author:     Vitaliy Gusev <gusev.vita...@gmail.com>
AuthorDate: 2022-06-23 18:46:06 +0000
Commit:     John Baldwin <j...@freebsd.org>
CommitDate: 2022-06-23 18:46:06 +0000

    bhyve: Snapshot impovements for 'blockif' backend
    
    When pausing a block I/O device model as part of suspending a VM, wait
    for all active block I/O requests to finish before saving snapshot
    data.  This avoids having to save information about in-flight requests
    both in the block_if layer and in storage device models.
    
    For the AHCI device model, the queues are now guaranteed to be idle
    when taking a snapshot, so remove the code to save queue state and
    rely on the initial state in a resumed VM having all queues already
    idle.
    
    This will also simplify adding NVMe snapshot support in the future.
    
    Reviewed by:    jhb
    Sponsored by:   vStack
    Differential Revision: https://reviews.freebsd.org/D26267
---
 usr.sbin/bhyve/block_if.c | 105 ++++------------------------------
 usr.sbin/bhyve/block_if.h |   4 --
 usr.sbin/bhyve/pci_ahci.c | 142 +---------------------------------------------
 3 files changed, 15 insertions(+), 236 deletions(-)

diff --git a/usr.sbin/bhyve/block_if.c b/usr.sbin/bhyve/block_if.c
index 06c3cb50fb13..05c9d9731754 100644
--- a/usr.sbin/bhyve/block_if.c
+++ b/usr.sbin/bhyve/block_if.c
@@ -109,11 +109,9 @@ struct blockif_ctxt {
        int                     bc_psectoff;
        int                     bc_closing;
        int                     bc_paused;
-       int                     bc_work_count;
        pthread_t               bc_btid[BLOCKIF_NUMTHR];
        pthread_mutex_t         bc_mtx;
        pthread_cond_t          bc_cond;
-       pthread_cond_t          bc_paused_cond;
        pthread_cond_t          bc_work_done_cond;
        blockif_resize_cb       *bc_resize_cb;
        void                    *bc_resize_cb_arg;
@@ -362,6 +360,12 @@ blockif_proc(struct blockif_ctxt *bc, struct blockif_elem 
*be, uint8_t *buf)
        (*br->br_callback)(br, err);
 }
 
+static inline bool
+blockif_empty(const struct blockif_ctxt *bc)
+{
+       return (TAILQ_EMPTY(&bc->bc_pendq) && TAILQ_EMPTY(&bc->bc_busyq));
+}
+
 static void *
 blockif_thr(void *arg)
 {
@@ -379,30 +383,21 @@ blockif_thr(void *arg)
 
        pthread_mutex_lock(&bc->bc_mtx);
        for (;;) {
-               bc->bc_work_count++;
-
-               /* We cannot process work if the interface is paused */
-               while (!bc->bc_paused && blockif_dequeue(bc, t, &be)) {
+               while (blockif_dequeue(bc, t, &be)) {
                        pthread_mutex_unlock(&bc->bc_mtx);
                        blockif_proc(bc, be, buf);
                        pthread_mutex_lock(&bc->bc_mtx);
                        blockif_complete(bc, be);
                }
 
-               bc->bc_work_count--;
-
-               /* If none of the workers are busy, notify the main thread */
-               if (bc->bc_work_count == 0)
+               /* If none to work, notify the main thread */
+               if (blockif_empty(bc))
                        pthread_cond_broadcast(&bc->bc_work_done_cond);
 
                /* Check ctxt status here to see if exit requested */
                if (bc->bc_closing)
                        break;
 
-               /* Make all worker threads wait here if the device is paused */
-               while (bc->bc_paused)
-                       pthread_cond_wait(&bc->bc_paused_cond, &bc->bc_mtx);
-
                pthread_cond_wait(&bc->bc_cond, &bc->bc_mtx);
        }
        pthread_mutex_unlock(&bc->bc_mtx);
@@ -638,8 +633,6 @@ blockif_open(nvlist_t *nvl, const char *ident)
        pthread_mutex_init(&bc->bc_mtx, NULL);
        pthread_cond_init(&bc->bc_cond, NULL);
        bc->bc_paused = 0;
-       bc->bc_work_count = 0;
-       pthread_cond_init(&bc->bc_paused_cond, NULL);
        pthread_cond_init(&bc->bc_work_done_cond, NULL);
        TAILQ_INIT(&bc->bc_freeq);
        TAILQ_INIT(&bc->bc_pendq);
@@ -737,6 +730,7 @@ blockif_request(struct blockif_ctxt *bc, struct blockif_req 
*breq,
        err = 0;
 
        pthread_mutex_lock(&bc->bc_mtx);
+       assert(!bc->bc_paused);
        if (!TAILQ_EMPTY(&bc->bc_freeq)) {
                /*
                 * Enqueue and inform the block i/o thread
@@ -761,7 +755,6 @@ blockif_request(struct blockif_ctxt *bc, struct blockif_req 
*breq,
 int
 blockif_read(struct blockif_ctxt *bc, struct blockif_req *breq)
 {
-
        assert(bc->bc_magic == BLOCKIF_SIG);
        return (blockif_request(bc, breq, BOP_READ));
 }
@@ -769,7 +762,6 @@ blockif_read(struct blockif_ctxt *bc, struct blockif_req 
*breq)
 int
 blockif_write(struct blockif_ctxt *bc, struct blockif_req *breq)
 {
-
        assert(bc->bc_magic == BLOCKIF_SIG);
        return (blockif_request(bc, breq, BOP_WRITE));
 }
@@ -777,7 +769,6 @@ blockif_write(struct blockif_ctxt *bc, struct blockif_req 
*breq)
 int
 blockif_flush(struct blockif_ctxt *bc, struct blockif_req *breq)
 {
-
        assert(bc->bc_magic == BLOCKIF_SIG);
        return (blockif_request(bc, breq, BOP_FLUSH));
 }
@@ -785,7 +776,6 @@ blockif_flush(struct blockif_ctxt *bc, struct blockif_req 
*breq)
 int
 blockif_delete(struct blockif_ctxt *bc, struct blockif_req *breq)
 {
-
        assert(bc->bc_magic == BLOCKIF_SIG);
        return (blockif_request(bc, breq, BOP_DELETE));
 }
@@ -955,7 +945,6 @@ blockif_chs(struct blockif_ctxt *bc, uint16_t *c, uint8_t 
*h, uint8_t *s)
 off_t
 blockif_size(struct blockif_ctxt *bc)
 {
-
        assert(bc->bc_magic == BLOCKIF_SIG);
        return (bc->bc_size);
 }
@@ -963,7 +952,6 @@ blockif_size(struct blockif_ctxt *bc)
 int
 blockif_sectsz(struct blockif_ctxt *bc)
 {
-
        assert(bc->bc_magic == BLOCKIF_SIG);
        return (bc->bc_sectsz);
 }
@@ -971,7 +959,6 @@ blockif_sectsz(struct blockif_ctxt *bc)
 void
 blockif_psectsz(struct blockif_ctxt *bc, int *size, int *off)
 {
-
        assert(bc->bc_magic == BLOCKIF_SIG);
        *size = bc->bc_psectsz;
        *off = bc->bc_psectoff;
@@ -980,7 +967,6 @@ blockif_psectsz(struct blockif_ctxt *bc, int *size, int 
*off)
 int
 blockif_queuesz(struct blockif_ctxt *bc)
 {
-
        assert(bc->bc_magic == BLOCKIF_SIG);
        return (BLOCKIF_MAXREQ - 1);
 }
@@ -988,7 +974,6 @@ blockif_queuesz(struct blockif_ctxt *bc)
 int
 blockif_is_ro(struct blockif_ctxt *bc)
 {
-
        assert(bc->bc_magic == BLOCKIF_SIG);
        return (bc->bc_rdonly);
 }
@@ -996,7 +981,6 @@ blockif_is_ro(struct blockif_ctxt *bc)
 int
 blockif_candelete(struct blockif_ctxt *bc)
 {
-
        assert(bc->bc_magic == BLOCKIF_SIG);
        return (bc->bc_candelete);
 }
@@ -1012,7 +996,7 @@ blockif_pause(struct blockif_ctxt *bc)
        bc->bc_paused = 1;
 
        /* The interface is paused. Wait for workers to finish their work */
-       while (bc->bc_work_count)
+       while (!blockif_empty(bc))
                pthread_cond_wait(&bc->bc_work_done_cond, &bc->bc_mtx);
        pthread_mutex_unlock(&bc->bc_mtx);
 
@@ -1029,71 +1013,6 @@ blockif_resume(struct blockif_ctxt *bc)
 
        pthread_mutex_lock(&bc->bc_mtx);
        bc->bc_paused = 0;
-       /* resume the threads waiting for paused */
-       pthread_cond_broadcast(&bc->bc_paused_cond);
-       /* kick the threads after restore */
-       pthread_cond_broadcast(&bc->bc_cond);
-       pthread_mutex_unlock(&bc->bc_mtx);
-}
-
-int
-blockif_snapshot_req(struct blockif_req *br, struct vm_snapshot_meta *meta)
-{
-       int i;
-       struct iovec *iov;
-       int ret;
-
-       SNAPSHOT_VAR_OR_LEAVE(br->br_iovcnt, meta, ret, done);
-       SNAPSHOT_VAR_OR_LEAVE(br->br_offset, meta, ret, done);
-       SNAPSHOT_VAR_OR_LEAVE(br->br_resid, meta, ret, done);
-
-       /*
-        * XXX: The callback and parameter must be filled by the virtualized
-        * device that uses the interface, during its init; we're not touching
-        * them here.
-        */
-
-       /* Snapshot the iovecs. */
-       for (i = 0; i < br->br_iovcnt; i++) {
-               iov = &br->br_iov[i];
-
-               SNAPSHOT_VAR_OR_LEAVE(iov->iov_len, meta, ret, done);
-
-               /* We assume the iov is a guest-mapped address. */
-               SNAPSHOT_GUEST2HOST_ADDR_OR_LEAVE(iov->iov_base, iov->iov_len,
-                       false, meta, ret, done);
-       }
-
-done:
-       return (ret);
-}
-
-int
-blockif_snapshot(struct blockif_ctxt *bc, struct vm_snapshot_meta *meta)
-{
-       int ret;
-
-       if (bc->bc_paused == 0) {
-               fprintf(stderr, "%s: Snapshot failed: "
-                       "interface not paused.\r\n", __func__);
-               return (ENXIO);
-       }
-
-       pthread_mutex_lock(&bc->bc_mtx);
-
-       SNAPSHOT_VAR_OR_LEAVE(bc->bc_magic, meta, ret, done);
-       SNAPSHOT_VAR_OR_LEAVE(bc->bc_ischr, meta, ret, done);
-       SNAPSHOT_VAR_OR_LEAVE(bc->bc_isgeom, meta, ret, done);
-       SNAPSHOT_VAR_OR_LEAVE(bc->bc_candelete, meta, ret, done);
-       SNAPSHOT_VAR_OR_LEAVE(bc->bc_rdonly, meta, ret, done);
-       SNAPSHOT_VAR_OR_LEAVE(bc->bc_size, meta, ret, done);
-       SNAPSHOT_VAR_OR_LEAVE(bc->bc_sectsz, meta, ret, done);
-       SNAPSHOT_VAR_OR_LEAVE(bc->bc_psectsz, meta, ret, done);
-       SNAPSHOT_VAR_OR_LEAVE(bc->bc_psectoff, meta, ret, done);
-       SNAPSHOT_VAR_OR_LEAVE(bc->bc_closing, meta, ret, done);
-
-done:
        pthread_mutex_unlock(&bc->bc_mtx);
-       return (ret);
 }
-#endif
+#endif /* BHYVE_SNAPSHOT */
diff --git a/usr.sbin/bhyve/block_if.h b/usr.sbin/bhyve/block_if.h
index 0407ff43cf94..cf3acc05c98f 100644
--- a/usr.sbin/bhyve/block_if.h
+++ b/usr.sbin/bhyve/block_if.h
@@ -87,10 +87,6 @@ int  blockif_close(struct blockif_ctxt *bc);
 #ifdef BHYVE_SNAPSHOT
 void   blockif_pause(struct blockif_ctxt *bc);
 void   blockif_resume(struct blockif_ctxt *bc);
-int    blockif_snapshot_req(struct blockif_req *br,
-    struct vm_snapshot_meta *meta);
-int    blockif_snapshot(struct blockif_ctxt *bc,
-    struct vm_snapshot_meta *meta);
 #endif
 
 #endif /* _BLOCK_IF_H_ */
diff --git a/usr.sbin/bhyve/pci_ahci.c b/usr.sbin/bhyve/pci_ahci.c
index d07b1f085e3d..ba8921ea3b6f 100644
--- a/usr.sbin/bhyve/pci_ahci.c
+++ b/usr.sbin/bhyve/pci_ahci.c
@@ -2562,114 +2562,14 @@ open_fail:
 }
 
 #ifdef BHYVE_SNAPSHOT
-static int
-pci_ahci_snapshot_save_queues(struct ahci_port *port,
-                             struct vm_snapshot_meta *meta)
-{
-       int ret;
-       int idx;
-       struct ahci_ioreq *ioreq;
-
-       STAILQ_FOREACH(ioreq, &port->iofhd, io_flist) {
-               idx = ((void *) ioreq - (void *) port->ioreq) / sizeof(*ioreq);
-               SNAPSHOT_VAR_OR_LEAVE(idx, meta, ret, done);
-       }
-
-       idx = -1;
-       SNAPSHOT_VAR_OR_LEAVE(idx, meta, ret, done);
-
-       TAILQ_FOREACH(ioreq, &port->iobhd, io_blist) {
-               idx = ((void *) ioreq - (void *) port->ioreq) / sizeof(*ioreq);
-               SNAPSHOT_VAR_OR_LEAVE(idx, meta, ret, done);
-
-               /*
-                * Snapshot only the busy requests; other requests are
-                * not valid.
-                */
-               ret = blockif_snapshot_req(&ioreq->io_req, meta);
-               if (ret != 0) {
-                       fprintf(stderr, "%s: failed to snapshot req\r\n",
-                               __func__);
-                       goto done;
-               }
-       }
-
-       idx = -1;
-       SNAPSHOT_VAR_OR_LEAVE(idx, meta, ret, done);
-
-done:
-       return (ret);
-}
-
-static int
-pci_ahci_snapshot_restore_queues(struct ahci_port *port,
-                                struct vm_snapshot_meta *meta)
-{
-       int ret;
-       int idx;
-       struct ahci_ioreq *ioreq;
-
-       /* Empty the free queue before restoring. */
-       while (!STAILQ_EMPTY(&port->iofhd))
-               STAILQ_REMOVE_HEAD(&port->iofhd, io_flist);
-
-       /* Restore the free queue. */
-       while (1) {
-               SNAPSHOT_VAR_OR_LEAVE(idx, meta, ret, done);
-               if (idx == -1)
-                       break;
-
-               STAILQ_INSERT_TAIL(&port->iofhd, &port->ioreq[idx], io_flist);
-       }
-
-       /* Restore the busy queue. */
-       while (1) {
-               SNAPSHOT_VAR_OR_LEAVE(idx, meta, ret, done);
-               if (idx == -1)
-                       break;
-
-               ioreq = &port->ioreq[idx];
-               TAILQ_INSERT_TAIL(&port->iobhd, ioreq, io_blist);
-
-               /*
-                * Restore only the busy requests; other requests are
-                * not valid.
-                */
-               ret = blockif_snapshot_req(&ioreq->io_req, meta);
-               if (ret != 0) {
-                       fprintf(stderr, "%s: failed to restore request\r\n",
-                               __func__);
-                       goto done;
-               }
-
-               /* Re-enqueue the requests in the block interface. */
-               if (ioreq->readop)
-                       ret = blockif_read(port->bctx, &ioreq->io_req);
-               else
-                       ret = blockif_write(port->bctx, &ioreq->io_req);
-
-               if (ret != 0) {
-                       fprintf(stderr,
-                               "%s: failed to re-enqueue request\r\n",
-                               __func__);
-                       goto done;
-               }
-       }
-
-done:
-       return (ret);
-}
-
 static int
 pci_ahci_snapshot(struct vm_snapshot_meta *meta)
 {
-       int i, j, ret;
+       int i, ret;
        void *bctx;
        struct pci_devinst *pi;
        struct pci_ahci_softc *sc;
        struct ahci_port *port;
-       struct ahci_cmd_hdr *hdr;
-       struct ahci_ioreq *ioreq;
 
        pi = meta->dev_data;
        sc = pi->pi_arg;
@@ -2753,43 +2653,7 @@ pci_ahci_snapshot(struct vm_snapshot_meta *meta)
                SNAPSHOT_VAR_OR_LEAVE(port->fbs, meta, ret, done);
                SNAPSHOT_VAR_OR_LEAVE(port->ioqsz, meta, ret, done);
 
-               for (j = 0; j < port->ioqsz; j++) {
-                       ioreq = &port->ioreq[j];
-
-                       /* blockif_req snapshot done only for busy requests. */
-                       hdr = (struct ahci_cmd_hdr *)(port->cmd_lst +
-                               ioreq->slot * AHCI_CL_SIZE);
-                       SNAPSHOT_GUEST2HOST_ADDR_OR_LEAVE(ioreq->cfis,
-                               0x80 + hdr->prdtl * sizeof(struct 
ahci_prdt_entry),
-                               false, meta, ret, done);
-
-                       SNAPSHOT_VAR_OR_LEAVE(ioreq->len, meta, ret, done);
-                       SNAPSHOT_VAR_OR_LEAVE(ioreq->done, meta, ret, done);
-                       SNAPSHOT_VAR_OR_LEAVE(ioreq->slot, meta, ret, done);
-                       SNAPSHOT_VAR_OR_LEAVE(ioreq->more, meta, ret, done);
-                       SNAPSHOT_VAR_OR_LEAVE(ioreq->readop, meta, ret, done);
-               }
-
-               /* Perform save / restore specific operations. */
-               if (meta->op == VM_SNAPSHOT_SAVE) {
-                       ret = pci_ahci_snapshot_save_queues(port, meta);
-                       if (ret != 0)
-                               goto done;
-               } else if (meta->op == VM_SNAPSHOT_RESTORE) {
-                       ret = pci_ahci_snapshot_restore_queues(port, meta);
-                       if (ret != 0)
-                               goto done;
-               } else {
-                       ret = EINVAL;
-                       goto done;
-               }
-
-               ret = blockif_snapshot(port->bctx, meta);
-               if (ret != 0) {
-                       fprintf(stderr, "%s: failed to restore blockif\r\n",
-                               __func__);
-                       goto done;
-               }
+               assert(TAILQ_EMPTY(&port->iobhd));
        }
 
 done:
@@ -2835,7 +2699,7 @@ pci_ahci_resume(struct vmctx *ctx, struct pci_devinst *pi)
 
        return (0);
 }
-#endif
+#endif /* BHYVE_SNAPSHOT */
 
 /*
  * Use separate emulation names to distinguish drive and atapi devices

Reply via email to