On Fri, Aug 09, 2024 at 07:29:52PM +0000, Taylor R Campbell wrote: > I don't know what the locking protocol is, but any changes to it > absolutely must be accompanied by comments identifying the lock order > and, if applicable, any rules like `forbidden in softint'.
Right, I go for not changing it. > I don't understand, how did you come to this conclusion? Now I retry to reproduce the problem and I cannot get it to show up. But I was hit by it a long time ago, I should have check what I recalled before starting this async sapshot creation thread thing. Let us forget about the snapshot creatin thread, it seems there are many locks missing. Attached patch tries to address that. I wonder if I should not add a vfs_busy/vfs_unbusy in such situation: mount = vp->v_mount; (...) vrele(vp); (...) check for another vnode's ->v_mount == mount -- Emmanuel Dreyfus m...@netbsd.org
Index: fss.c =================================================================== RCS file: /cvsroot/src/sys/dev/fss.c,v retrieving revision 1.114 diff -U4 -r1.114 fss.c --- fss.c 22 Mar 2023 21:14:46 -0000 1.114 +++ fss.c 10 Aug 2024 00:28:37 -0000 @@ -219,9 +219,12 @@ if (sc == NULL) { mutex_exit(&fss_device_lock); return ENOMEM; } + + mutex_enter(&sc->sc_slock); sc->sc_state = FSS_IDLE; + mutex_exit(&sc->sc_slock); } mutex_enter(&sc->sc_slock); @@ -359,11 +362,19 @@ error = EPERM; if (error == 0 && sc->sc_state != FSS_IDLE) { error = EBUSY; } else { + char mntname[MNAMELEN]; + sc->sc_state = FSS_CREATING; - copyinstr(fss->fss_mount, sc->sc_mntname, - sizeof(sc->sc_mntname), NULL); + mutex_exit(&sc->sc_slock); + + copyinstr(fss->fss_mount, mntname, + sizeof(mntname), NULL); + + mutex_enter(&sc->sc_slock); + memcpy(sc->sc_mntname, mntname, + sizeof(sc->sc_mntname)); memset(&sc->sc_time, 0, sizeof(sc->sc_time)); sc->sc_clshift = 0; } mutex_exit(&sc->sc_slock); @@ -701,13 +712,13 @@ struct timespec ts; /* distinguish lookup 1 from lookup 2 to reduce mistakes */ struct pathbuf *pb2; struct vnode *vp, *vp2; + struct mount *mount; /* * Get the mounted file system. */ - error = namei_simple_user(fss->fss_mount, NSM_FOLLOW_NOEMULROOT, &vp); if (error != 0) return error; @@ -716,10 +727,13 @@ vrele(vp); return EINVAL; } - sc->sc_mount = vp->v_mount; - memcpy(sc->sc_mntname, sc->sc_mount->mnt_stat.f_mntonname, MNAMELEN); + mount = vp->v_mount; + mutex_enter(&sc->sc_slock); + sc->sc_mount = mount; + memcpy(sc->sc_mntname, mount->mnt_stat.f_mntonname, MNAMELEN); + mutex_exit(&sc->sc_slock); vrele(vp); /* @@ -730,23 +744,29 @@ NSM_FOLLOW_NOEMULROOT, &vp); if (error != 0) return error; - if (vp->v_type == VREG && vp->v_mount == sc->sc_mount) { + mutex_enter(&sc->sc_slock); + if (vp->v_type == VREG && vp->v_mount == mount) { sc->sc_flags |= FSS_PERSISTENT; sc->sc_bs_vp = vp; - fsbsize = sc->sc_bs_vp->v_mount->mnt_stat.f_iosize; + fsbsize = mount->mnt_stat.f_iosize; bits = sizeof(sc->sc_bs_bshift)*NBBY; + for (sc->sc_bs_bshift = 1; sc->sc_bs_bshift < bits; sc->sc_bs_bshift++) if (FSS_FSBSIZE(sc) == fsbsize) break; - if (sc->sc_bs_bshift >= bits) + + if (sc->sc_bs_bshift >= bits) { + mutex_exit(&sc->sc_slock); return EINVAL; + } sc->sc_bs_bmask = FSS_FSBSIZE(sc)-1; sc->sc_clshift = 0; + mutex_exit(&sc->sc_slock); if ((fss->fss_flags & FSS_UNLINK_ON_CREATE) != 0) { error = do_sys_unlink(fss->fss_bstore, UIO_USERSPACE); if (error) @@ -754,12 +774,14 @@ } error = vn_lock(vp, LK_EXCLUSIVE); if (error != 0) return error; - error = VFS_SNAPSHOT(sc->sc_mount, sc->sc_bs_vp, &ts); + error = VFS_SNAPSHOT(mount, vp, &ts); + mutex_enter(&sc->sc_slock); TIMESPEC_TO_TIMEVAL(&sc->sc_time, &ts); + mutex_exit(&sc->sc_slock); - VOP_UNLOCK(sc->sc_bs_vp); + VOP_UNLOCK(vp); return error; } vrele(vp); @@ -767,12 +789,15 @@ /* * Get the block device it is mounted on and its size. */ - error = spec_node_lookup_by_mount(sc->sc_mount, &vp); + error = spec_node_lookup_by_mount(mount, &vp); if (error) return error; + + mutex_enter(&sc->sc_slock); sc->sc_bdev = vp->v_rdev; + mutex_exit(&sc->sc_slock); error = getdisksize(vp, &numsec, &secsize); vrele(vp); if (error) @@ -784,19 +809,21 @@ * Get the backing store */ error = pathbuf_copyin(fss->fss_bstore, &pb2); - if (error) { + if (error) return error; - } + error = vn_open(NULL, pb2, 0, FREAD|FWRITE, 0, &vp2, NULL, NULL); if (error != 0) { pathbuf_destroy(pb2); return error; } VOP_UNLOCK(vp2); + mutex_enter(&sc->sc_slock); sc->sc_bs_vp = vp2; + mutex_exit(&sc->sc_slock); if (vp2->v_type != VREG && vp2->v_type != VCHR) { vrele(vp2); pathbuf_destroy(pb2); @@ -808,23 +835,30 @@ error = do_sys_unlink(fss->fss_bstore, UIO_USERSPACE); if (error) return error; } + + mutex_enter(&sc->sc_slock); if (sc->sc_bs_vp->v_type == VREG) { fsbsize = sc->sc_bs_vp->v_mount->mnt_stat.f_iosize; - if (fsbsize & (fsbsize-1)) /* No power of two */ + if (fsbsize & (fsbsize-1)) { /* No power of two */ + mutex_exit(&sc->sc_slock); return EINVAL; + } for (sc->sc_bs_bshift = 1; sc->sc_bs_bshift < 32; sc->sc_bs_bshift++) if (FSS_FSBSIZE(sc) == fsbsize) break; - if (sc->sc_bs_bshift >= 32) + if (sc->sc_bs_bshift >= 32) { + mutex_exit(&sc->sc_slock); return EINVAL; + } sc->sc_bs_bmask = FSS_FSBSIZE(sc)-1; } else { sc->sc_bs_bshift = DEV_BSHIFT; sc->sc_bs_bmask = FSS_FSBSIZE(sc)-1; } + mutex_exit(&sc->sc_slock); return 0; } @@ -833,8 +867,10 @@ */ static int fss_create_snapshot(struct fss_softc *sc, struct fss_set *fss, struct lwp *l) { + struct mount *mount = sc->sc_mount; + struct vnode *vp; int len, error; u_int32_t csize; off_t bsize; @@ -845,15 +881,16 @@ */ if ((error = fss_create_files(sc, fss, &bsize, l)) != 0) goto bad; + mutex_enter(&sc->sc_slock); if (sc->sc_flags & FSS_PERSISTENT) { - fss_softc_alloc(sc); - mutex_enter(&sc->sc_slock); sc->sc_state = FSS_ACTIVE; mutex_exit(&sc->sc_slock); + fss_softc_alloc(sc); return 0; } + mutex_exit(&sc->sc_slock); /* * Set cluster size. Must be a power of two and * a multiple of backing store block size. @@ -864,13 +901,15 @@ csize = fss->fss_csize; if (bsize/csize > FSS_CLUSTER_MAX) csize = bsize/FSS_CLUSTER_MAX+1; + mutex_enter(&sc->sc_slock); for (sc->sc_clshift = sc->sc_bs_bshift; sc->sc_clshift < 32; sc->sc_clshift++) if (FSS_CLSIZE(sc) >= csize) break; if (sc->sc_clshift >= 32) { + mutex_exit(&sc->sc_slock); error = EINVAL; goto bad; } sc->sc_clmask = FSS_CLSIZE(sc)-1; @@ -898,52 +937,67 @@ sc->sc_indir_size = FSS_BTOCL(sc, len)+1; sc->sc_clnext = sc->sc_indir_size; sc->sc_indir_cur = 0; - if ((error = fss_softc_alloc(sc)) != 0) + mutex_exit(&sc->sc_slock); + + if ((error = fss_softc_alloc(sc)) != 0) { goto bad; + } /* * Activate the snapshot. */ - if ((error = vfs_suspend(sc->sc_mount, 0)) != 0) + if ((error = vfs_suspend(mount, 0)) != 0) goto bad; + mutex_enter(&sc->sc_slock); microtime(&sc->sc_time); + mutex_exit(&sc->sc_slock); - vrele_flush(sc->sc_mount); - error = VFS_SYNC(sc->sc_mount, MNT_WAIT, curlwp->l_cred); - if (error == 0) - error = fscow_establish(sc->sc_mount, fss_copy_on_write, sc); + vrele_flush(mount); + error = VFS_SYNC(mount, MNT_WAIT, curlwp->l_cred); + if (error == 0) + error = fscow_establish(mount, fss_copy_on_write, sc); if (error == 0) { mutex_enter(&sc->sc_slock); sc->sc_state = FSS_ACTIVE; mutex_exit(&sc->sc_slock); } - vfs_resume(sc->sc_mount); + vfs_resume(mount); if (error != 0) goto bad; + mutex_enter(&sc->sc_slock); aprint_debug_dev(sc->sc_dev, "%s snapshot active\n", sc->sc_mntname); aprint_debug_dev(sc->sc_dev, "%u clusters of %u, %u cache slots, %u indir clusters\n", sc->sc_clcount, FSS_CLSIZE(sc), sc->sc_cache_size, sc->sc_indir_size); + mutex_exit(&sc->sc_slock); return 0; bad: fss_softc_free(sc); - if (sc->sc_bs_vp != NULL) { - if (sc->sc_flags & FSS_PERSISTENT) - vrele(sc->sc_bs_vp); - else - vn_close(sc->sc_bs_vp, FREAD|FWRITE, l->l_cred); + + mutex_enter(&sc->sc_slock); + if ((vp = sc->sc_bs_vp) != NULL) { + if (sc->sc_flags & FSS_PERSISTENT) { + mutex_exit(&sc->sc_slock); + vrele(vp); + } else { + mutex_exit(&sc->sc_slock); + vn_close(vp, FREAD|FWRITE, l->l_cred); + } } + + mutex_enter(&sc->sc_slock); sc->sc_bs_vp = NULL; + mutex_exit(&sc->sc_slock); return error; } @@ -1390,23 +1444,26 @@ error = devsw_attach(fss_cd.cd_name, &fss_bdevsw, &fss_bmajor, &fss_cdevsw, &fss_cmajor); if (error) { + cv_destroy(&fss_device_cv); mutex_destroy(&fss_device_lock); break; } error = config_cfdriver_attach(&fss_cd); if (error) { devsw_detach(&fss_bdevsw, &fss_cdevsw); + cv_destroy(&fss_device_cv); mutex_destroy(&fss_device_lock); break; } error = config_cfattach_attach(fss_cd.cd_name, &fss_ca); if (error) { config_cfdriver_detach(&fss_cd); devsw_detach(&fss_bdevsw, &fss_cdevsw); + cv_destroy(&fss_device_cv); mutex_destroy(&fss_device_lock); break; }