On Mon, Jun 01, 2015 at 01:11:44PM +0100, Patrick Welche wrote:
> On Fri, May 22, 2015 at 04:56:57PM +0000, Taylor R Campbell wrote:
> >    Date: Fri, 22 May 2015 13:09:58 +0100
> >    From: Patrick Welche <[email protected]>
> > 
> >    There are a load of locking PRs involving vnd. I thought I would have a
> >    stab at converting vnd to use condvars and mutexes (mutices?) as a first
> >    step. I cobbled something together which allows me to
> >    [...]
> >    with a LOCKDEBUG kernel, so I may have guessed right, but I haven't found
> >    any documentation on The Right Way, so please review carefully, assume
> >    the worst, and tell me how to do it better!
> 
> I may have regressed: with the attached patch, I now get a locking
> error in vndthread at:
> 
>         /*
>          * Allocate a header for this transfer and link it to the
>          * buffer
>          */
>         mutex_enter(&vnd->sc_sclock); <------------ XXX already locked?!
>         vnx = VND_GETXFER(vnd); /* may sleep */
>         mutex_exit(&vnd->sc_sclock);

Simply VND_GETXFER(vnd) is pool_get(&(vnd)->sc_vxpool, PR_WAITOK), and

                pool_init(&vnd->sc_vxpool, sizeof(struct vndxfer), 0,
                    0, 0, "vndxpl", NULL, IPL_BIO);

so I think the splbio replacement mutex_enter/exit is simply not
necessary. Removing them gets me a successful

  mkdir /mnt0 /mnt1 /mnt2 /mnt3
  dd if=/dev/zero of=/scratch/disk0.img bs=1m count=1100
  dd if=/dev/zero of=/scratch/disk1.img bs=1m count=1100
  dd if=/dev/zero of=/scratch2/disk2.img bs=1m count=1100
  dd if=/dev/zero of=/scratch3/disk3.img bs=1m count=1100
  vnconfig vnd0 /scratch/disk0.img
  vnconfig vnd1 /scratch/disk1.img
  vnconfig vnd2 /scratch2/disk2.img
  vnconfig vnd3 /scratch3/disk3.img
  for i in 0 1 2 3; do
          disklabel -rw vnd$i vndtest;
          newfs /dev/rvnd${i}a
          mount /dev/vnd${i}a /mnt${i}
  done
  for i in 0 1 2 3; do
          bonnie -d /mnt${i}&
  done

with a LOCKDEBUG kernel on a quad core computer, with scratch, scratch2
and scratch3 being on separate physical disks.

Cheers,

Patrick
Index: vndvar.h
===================================================================
RCS file: /cvsroot/src/sys/dev/vndvar.h,v
retrieving revision 1.34
diff -u -p -r1.34 vndvar.h
--- vndvar.h    25 May 2015 20:57:18 -0000      1.34
+++ vndvar.h    23 Jun 2015 17:16:50 -0000
@@ -107,14 +107,19 @@ struct vnode;
  */
 struct vnd_softc {
        device_t         sc_dev;
+       kmutex_t         sc_sclock;     /* protect this softc */
        int              sc_flags;      /* flags */
+       kcondvar_t       sc_intercv;    /* interruptible lock condvar */
        uint64_t         sc_size;       /* size of vnd */
        struct vnode    *sc_vp;         /* vnode */
        kauth_cred_t     sc_cred;       /* credentials */
-       int              sc_maxactive;  /* max # of active requests */
        struct bufq_state *sc_tab;      /* transfer queue */
+       kcondvar_t       sc_bufqcv;     /* queue's condvar */
        int              sc_pending;    /* number of pending transfers */
+       kcondvar_t       sc_pendingcv;  /* pending transfer condvar */
+       int              sc_maxactive;  /* max # of active requests */
        int              sc_active;     /* number of active transfers */
+       kcondvar_t       sc_activitycv; /* activity condvar */
        struct disk      sc_dkdev;      /* generic disk device info */
        struct vndgeom   sc_geom;       /* virtual geometry */
        struct pool      sc_vxpool;     /* vndxfer pool */
@@ -140,7 +145,6 @@ struct vnd_softc {
 #define        VNF_KLABEL      0x040   /* keep label on close */
 #define        VNF_VLABEL      0x080   /* label is valid */
 #define        VNF_KTHREAD     0x100   /* thread is running */
-#define        VNF_VUNCONF     0x200   /* device is unconfiguring */
 #define VNF_COMP       0x400   /* file is compressed */
 #define VNF_CLEARING   0x800   /* unit is being torn down */
 #define VNF_USE_VN_RDWR        0x1000  /* have to use vn_rdwr() */
Index: vnd.c
===================================================================
RCS file: /cvsroot/src/sys/dev/vnd.c,v
retrieving revision 1.245
diff -u -p -r1.245 vnd.c
--- vnd.c       25 May 2015 21:02:37 -0000      1.245
+++ vnd.c       23 Jun 2015 17:16:51 -0000
@@ -204,7 +204,7 @@ const struct bdevsw vnd_bdevsw = {
        .d_dump = vnddump,
        .d_psize = vndsize,
        .d_discard = nodiscard,
-       .d_flag = D_DISK
+       .d_flag = D_DISK | D_MPSAFE
 };
 
 const struct cdevsw vnd_cdevsw = {
@@ -219,7 +219,7 @@ const struct cdevsw vnd_cdevsw = {
        .d_mmap = nommap,
        .d_kqfilter = nokqfilter,
        .d_discard = nodiscard,
-       .d_flag = D_DISK
+       .d_flag = D_DISK | D_MPSAFE
 };
 
 static int     vnd_match(device_t, cfdata_t, void *);
@@ -241,12 +241,9 @@ static struct      dkdriver vnddkdriver = {
 void
 vndattach(int num)
 {
-       int error;
 
-       error = config_cfattach_attach(vnd_cd.cd_name, &vnd_ca);
-       if (error)
-               aprint_error("%s: unable to register cfattach\n",
-                   vnd_cd.cd_name);
+       if (config_cfattach_attach(vnd_cd.cd_name, &vnd_ca))
+               aprint_error("%s: unable to register\n", vnd_cd.cd_name);
 }
 
 static int
@@ -265,6 +262,11 @@ vnd_attach(device_t parent, device_t sel
        sc->sc_comp_offsets = NULL;
        sc->sc_comp_buff = NULL;
        sc->sc_comp_decombuf = NULL;
+       mutex_init(&sc->sc_sclock, MUTEX_DEFAULT, IPL_BIO);
+       cv_init(&sc->sc_intercv, "vndlck");
+       cv_init(&sc->sc_activitycv, "vndac");
+       cv_init(&sc->sc_bufqcv, "vndbp");
+       cv_init(&sc->sc_pendingcv, "vndpc");
        bufq_alloc(&sc->sc_tab, "disksort", BUFQ_SORT_RAWBLOCK);
        disk_init(&sc->sc_dkdev, device_xname(self), &vnddkdriver);
        if (!pmf_device_register(self, NULL, NULL))
@@ -284,6 +286,11 @@ vnd_detach(device_t self, int flags)
        }
 
        pmf_device_deregister(self);
+       mutex_destroy(&sc->sc_sclock);
+       cv_destroy(&sc->sc_intercv);
+       cv_destroy(&sc->sc_activitycv);
+       cv_destroy(&sc->sc_bufqcv);
+       cv_destroy(&sc->sc_pendingcv);
        bufq_free(sc->sc_tab);
        disk_destroy(&sc->sc_dkdev);
 
@@ -470,7 +477,8 @@ vndstrategy(struct buf *bp)
            device_lookup_private(&vnd_cd, unit);
        struct disklabel *lp;
        daddr_t blkno;
-       int s = splbio();
+
+       mutex_enter(&vnd->sc_sclock);
 
        if (vnd == NULL) {
                bp->b_error = ENXIO;
@@ -545,18 +553,20 @@ vndstrategy(struct buf *bp)
                KASSERT(vnd->sc_pending >= 0 &&
                    vnd->sc_pending <= VND_MAXPENDING(vnd));
                while (vnd->sc_pending == VND_MAXPENDING(vnd))
-                       tsleep(&vnd->sc_pending, PRIBIO, "vndpc", 0);
+                       cv_wait(&vnd->sc_pendingcv, &vnd->sc_sclock);
                vnd->sc_pending++;
        }
+       mutex_exit(&vnd->sc_sclock);
        bufq_put(vnd->sc_tab, bp);
-       wakeup(&vnd->sc_tab);
-       splx(s);
+       mutex_enter(&vnd->sc_sclock);
+       cv_signal(&vnd->sc_bufqcv);
+       mutex_exit(&vnd->sc_sclock);
        return;
 
 done:
+       mutex_exit(&vnd->sc_sclock);
        bp->b_resid = bp->b_bcount;
        biodone(bp);
-       splx(s);
 }
 
 static bool
@@ -601,7 +611,8 @@ static void
 vndthread(void *arg)
 {
        struct vnd_softc *vnd = arg;
-       int s;
+
+       mutex_enter(&vnd->sc_sclock);
 
        /* Determine whether we can *use* VOP_BMAP and VOP_STRATEGY to
         * directly access the backing vnode.  If we can, use these two
@@ -620,31 +631,27 @@ vndthread(void *arg)
                    "using read/write operations");
 #endif
 
-       s = splbio();
-       vnd->sc_flags |= VNF_KTHREAD;
-       wakeup(&vnd->sc_kthread);
-
        /*
         * Dequeue requests and serve them depending on the available
         * vnode operations.
         */
-       while ((vnd->sc_flags & VNF_VUNCONF) == 0) {
+       while ((vnd->sc_flags & VNF_KTHREAD) != 0) {
                struct vndxfer *vnx;
                struct buf *obp;
                struct buf *bp;
 
                obp = bufq_get(vnd->sc_tab);
                if (obp == NULL) {
-                       tsleep(&vnd->sc_tab, PRIBIO, "vndbp", 0);
+                       cv_wait(&vnd->sc_bufqcv, &vnd->sc_sclock);
                        continue;
                };
                if ((vnd->sc_flags & VNF_USE_VN_RDWR)) {
                        KASSERT(vnd->sc_pending > 0 &&
                            vnd->sc_pending <= VND_MAXPENDING(vnd));
                        if (vnd->sc_pending-- == VND_MAXPENDING(vnd))
-                               wakeup(&vnd->sc_pending);
+                               cv_signal(&vnd->sc_pendingcv);
                }
-               splx(s);
+               mutex_exit(&vnd->sc_sclock);
 #ifdef DEBUG
                if (vnddebug & VDB_FOLLOW)
                        printf("vndthread(%p)\n", obp);
@@ -672,17 +679,17 @@ vndthread(void *arg)
                 * Allocate a header for this transfer and link it to the
                 * buffer
                 */
-               s = splbio();
-               vnx = VND_GETXFER(vnd);
-               splx(s);
+               /* mutex_enter(&vnd->sc_sclock); */
+               vnx = VND_GETXFER(vnd); /* may sleep */
+               /* mutex_exit(&vnd->sc_sclock); */
                vnx->vx_vnd = vnd;
 
-               s = splbio();
+               mutex_enter(&vnd->sc_sclock);
                while (vnd->sc_active >= vnd->sc_maxactive) {
-                       tsleep(&vnd->sc_tab, PRIBIO, "vndac", 0);
+                       cv_wait(&vnd->sc_activitycv, &vnd->sc_sclock);
                }
                vnd->sc_active++;
-               splx(s);
+               mutex_exit(&vnd->sc_sclock);
 
                /* Instrumentation. */
                disk_busy(&vnd->sc_dkdev);
@@ -706,17 +713,15 @@ vndthread(void *arg)
                else
                        handle_with_rdwr(vnd, obp, bp);
 
-               s = splbio();
+               mutex_enter(&vnd->sc_sclock);
                continue;
 
 done:
                biodone(obp);
-               s = splbio();
+               mutex_enter(&vnd->sc_sclock);
        }
 
-       vnd->sc_flags &= (~VNF_KTHREAD | VNF_VUNCONF);
-       wakeup(&vnd->sc_kthread);
-       splx(s);
+       mutex_exit(&vnd->sc_sclock);
        kthread_exit(0);
 }
 
@@ -794,7 +799,7 @@ handle_with_rdwr(struct vnd_softc *vnd, 
 }
 
 /*
- * Handes the read/write request given in 'bp' using the vnode's VOP_BMAP
+ * Handles the read/write request given in 'bp' using the vnode's VOP_BMAP
  * and VOP_STRATEGY operations.
  *
  * 'obp' is a pointer to the original request fed to the vnd device.
@@ -917,7 +922,8 @@ vndiodone(struct buf *bp)
        struct vndxfer *vnx = VND_BUFTOXFER(bp);
        struct vnd_softc *vnd = vnx->vx_vnd;
        struct buf *obp = bp->b_private;
-       int s = splbio();
+
+       mutex_enter(&vnd->sc_sclock);
 
        KASSERT(&vnx->vx_buf == bp);
        KASSERT(vnd->sc_active > 0);
@@ -931,9 +937,9 @@ vndiodone(struct buf *bp)
            (bp->b_flags & B_READ));
        vnd->sc_active--;
        if (vnd->sc_active == 0) {
-               wakeup(&vnd->sc_tab);
+               cv_broadcast(&vnd->sc_activitycv);
        }
-       splx(s);
+       mutex_exit(&vnd->sc_sclock);
        obp->b_error = bp->b_error;
        obp->b_resid = bp->b_resid;
        buf_destroy(bp);
@@ -1163,7 +1169,7 @@ vndioctl(dev_t dev, u_long cmd, void *da
                }
                KASSERT(l);
                error = VOP_GETATTR(nd.ni_vp, &vattr, l->l_cred);
-               if (!error && nd.ni_vp->v_type != VREG)
+               if (!error && vattr.va_type != VREG)
                        error = EOPNOTSUPP;
                if (!error && vattr.va_bytes < vattr.va_size)
                        /* File is definitely sparse, use vn_rdwr() */
@@ -1359,13 +1365,14 @@ vndioctl(dev_t dev, u_long cmd, void *da
                        vio->vnd_size = dbtob(vnd->sc_size);
                vnd->sc_flags |= VNF_INITED;
 
-               /* create the kernel thread, wait for it to be up */
-               error = kthread_create(PRI_NONE, 0, NULL, vndthread, vnd,
-                   &vnd->sc_kthread, "%s", device_xname(vnd->sc_dev));
-               if (error)
+               vnd->sc_flags |= VNF_KTHREAD;
+               error = kthread_create(PRI_BIO,
+                   KTHREAD_MPSAFE | KTHREAD_MUSTJOIN, NULL,
+                   vndthread, vnd, &vnd->sc_kthread, "%s",
+                   device_xname(vnd->sc_dev));
+               if (error) {
+                       vnd->sc_flags &= ~VNF_KTHREAD;
                        goto close_and_exit;
-               while ((vnd->sc_flags & VNF_KTHREAD) == 0) {
-                       tsleep(&vnd->sc_kthread, PRIBIO, "vndthr", 0);
                }
 #ifdef DEBUG
                if (vnddebug & VDB_INIT)
@@ -1657,10 +1664,10 @@ vndshutdown(void)
 static void
 vndclear(struct vnd_softc *vnd, int myminor)
 {
+       struct lwp *kt;
        struct vnode *vp = vnd->sc_vp;
        int fflags = FREAD;
        int bmaj, cmaj, i, mn;
-       int s;
 
 #ifdef DEBUG
        if (vnddebug & VDB_FOLLOW)
@@ -1678,17 +1685,25 @@ vndclear(struct vnd_softc *vnd, int mymi
                        vdevgone(cmaj, mn, mn, VCHR);
        }
 
+       mutex_enter(&vnd->sc_sclock);
+
        if ((vnd->sc_flags & VNF_READONLY) == 0)
                fflags |= FWRITE;
 
-       s = splbio();
        bufq_drain(vnd->sc_tab);
-       splx(s);
+       cv_broadcast(&vnd->sc_bufqcv);
+
+       mutex_exit(&vnd->sc_sclock);
 
-       vnd->sc_flags |= VNF_VUNCONF;
-       wakeup(&vnd->sc_tab);
-       while (vnd->sc_flags & VNF_KTHREAD)
-               tsleep(&vnd->sc_kthread, PRIBIO, "vnthr", 0);
+       if (vnd->sc_flags & VNF_KTHREAD) {
+               mutex_enter(&vnd->sc_sclock);
+               vnd->sc_flags &= ~VNF_KTHREAD;
+               kt = vnd->sc_kthread;
+               cv_signal(&vnd->sc_bufqcv);
+               cv_signal(&vnd->sc_activitycv);
+               mutex_exit(&vnd->sc_sclock);
+               kthread_join(kt);
+       }
 
 #ifdef VND_COMPRESSION
        /* free the compressed file buffers */
@@ -1709,7 +1724,7 @@ vndclear(struct vnd_softc *vnd, int mymi
 #endif /* VND_COMPRESSION */
        vnd->sc_flags &=
            ~(VNF_INITED | VNF_READONLY | VNF_KLABEL | VNF_VLABEL
-             | VNF_VUNCONF | VNF_COMP | VNF_CLEARING);
+             | VNF_COMP | VNF_CLEARING);
        if (vp == NULL)
                panic("vndclear: null vp");
        (void) vn_close(vp, fflags, vnd->sc_cred);
@@ -1802,6 +1817,50 @@ vndgetdefaultlabel(struct vnd_softc *sc,
 }
 
 /*
+ * Wait interruptibly for an exclusive lock.
+ *
+ * XXX
+ * Several drivers do this; it should be abstracted and made MP-safe.
+ */
+static int
+vndlock(struct vnd_softc *sc)
+{
+       int error;
+
+       mutex_enter(&sc->sc_sclock);
+
+       error = 0;
+       while ((sc->sc_flags & VNF_LOCKED) != 0) {
+               sc->sc_flags |= VNF_WANTED;
+               error = cv_wait_sig(&sc->sc_intercv, &sc->sc_sclock);
+               if (error != 0)
+                       break;
+       }
+       if (error == 0)
+               sc->sc_flags |= VNF_LOCKED;
+
+       mutex_exit(&sc->sc_sclock);
+
+       return error;
+}
+
+/*
+ * Unlock and wake up any waiters.
+ */
+static void
+vndunlock(struct vnd_softc *sc)
+{
+
+       mutex_enter(&sc->sc_sclock);
+       sc->sc_flags &= ~VNF_LOCKED;
+       if ((sc->sc_flags & VNF_WANTED) != 0) {
+               sc->sc_flags &= ~VNF_WANTED;
+               cv_broadcast(&sc->sc_intercv);
+       }
+       mutex_exit(&sc->sc_sclock);
+}
+
+/*
  * Read the disklabel from a vnd.  If one is not present, create a fake one.
  */
 static void
@@ -1853,40 +1912,6 @@ vndgetdisklabel(dev_t dev, struct vnd_so
        }
 }
 
-/*
- * Wait interruptibly for an exclusive lock.
- *
- * XXX
- * Several drivers do this; it should be abstracted and made MP-safe.
- */
-static int
-vndlock(struct vnd_softc *sc)
-{
-       int error;
-
-       while ((sc->sc_flags & VNF_LOCKED) != 0) {
-               sc->sc_flags |= VNF_WANTED;
-               if ((error = tsleep(sc, PRIBIO | PCATCH, "vndlck", 0)) != 0)
-                       return error;
-       }
-       sc->sc_flags |= VNF_LOCKED;
-       return 0;
-}
-
-/*
- * Unlock and wake up any waiters.
- */
-static void
-vndunlock(struct vnd_softc *sc)
-{
-
-       sc->sc_flags &= ~VNF_LOCKED;
-       if ((sc->sc_flags & VNF_WANTED) != 0) {
-               sc->sc_flags &= ~VNF_WANTED;
-               wakeup(sc);
-       }
-}
-
 #ifdef VND_COMPRESSION
 /* compressed file read */
 static void
@@ -2051,7 +2076,7 @@ vnd_modcmd(modcmd_t cmd, void *arg)
                error = config_cfattach_attach(vnd_cd.cd_name, &vnd_ca);
                if (error) {
                        config_cfdriver_detach(&vnd_cd);
-                       aprint_error("%s: unable to register cfattach\n",
+                       aprint_error("%s: unable to register\n",
                            vnd_cd.cd_name);
                        break;
                }

Reply via email to