I've been working for about a month to ensure filesystems are maximally syncronized and/or clean on-disk through a suspend/resume cycle.
The idea is if a suspend/resume or hibernate/resume sequence gets broken (by pulling the power+battery during suspend, or similar circumstances during the hiberate-write sequence), we can be assured that the filesystems are in the best shape. And if done correctly, we'll even have marked-clean filesystems which don't need a fsck, so that fresh boot is faster. There is also a similar case when softraid (layers) underly the filesystems. These layers need proper syncronization to disk also. Previously we've been ignoring this issue, and frankly we've done mostly fine... The changes starts with a series of changes to suspend. It is a bit tricky to syncronize the in-memory soft-state of the fileystems to disk, and block new in-memory changes from happening. New allocations of vnodes are caused to sleep-spin, so that other processes cannot advance creating new files. All mountpoints are told to non-lazy sync their filesystems and locks are held on these mountpoints so that no new activity can occur. During this phase, the number of dangling inodes (nlink == 0) is counted, and if any are found the on-disk filesystem is marked dirty, otherwise marked clean. Next, softraid can be told to save it's state, but it uses vnodes so a hack allows it to bypass the sleep-spin mentioned earlier. Once the suspend code knows there are no more tsleep, it can unwind the mount locks so there is less to worry about upon resume. I would appreciate reports, and later I'll cut this into pieces and commit incremental changes. Index: dev/acpi/acpi.c =================================================================== RCS file: /cvs/src/sys/dev/acpi/acpi.c,v retrieving revision 1.335 diff -u -p -u -r1.335 acpi.c --- dev/acpi/acpi.c 29 Nov 2017 22:51:01 -0000 1.335 +++ dev/acpi/acpi.c 5 Jan 2018 17:29:37 -0000 @@ -30,6 +30,8 @@ #include <sys/sched.h> #include <sys/reboot.h> #include <sys/sysctl.h> +#include <sys/mount.h> +#include <sys/syscallargs.h> #ifdef HIBERNATE #include <sys/hibernate.h> @@ -61,6 +63,7 @@ #include "wd.h" #include "wsdisplay.h" +#include "softraid.h" #ifdef ACPI_DEBUG int acpi_debug = 16; @@ -2438,11 +2441,15 @@ int acpi_sleep_state(struct acpi_softc *sc, int sleepmode) { extern int perflevel; + extern int vnode_sleep; extern int lid_action; int error = ENXIO; size_t rndbuflen = 0; char *rndbuf = NULL; int state, s; +#if NSOFTRAID > 0 + extern void sr_quiesce(void); +#endif switch (sleepmode) { case ACPI_SLEEP_SUSPEND: @@ -2481,8 +2488,12 @@ acpi_sleep_state(struct acpi_softc *sc, #ifdef HIBERNATE if (sleepmode == ACPI_SLEEP_HIBERNATE) { - uvmpd_hibernate(); + /* + * Discard useless memory, then attempt to + * create a hibernate work area + */ hibernate_suspend_bufcache(); + uvmpd_hibernate(); if (hibernate_alloc()) { printf("%s: failed to allocate hibernate memory\n", sc->sc_dev.dv_xname); @@ -2495,18 +2506,38 @@ acpi_sleep_state(struct acpi_softc *sc, if (config_suspend_all(DVACT_QUIESCE)) goto fail_quiesce; - bufq_quiesce(); - #ifdef MULTIPROCESSOR acpi_sleep_mp(); #endif + vnode_sleep = 1; + vfs_stall(curproc, 1); +#if NSOFTRAID > 0 + sr_quiesce(); +#endif + bufq_quiesce(); + +#ifdef HIBERNATE + if (sleepmode == ACPI_SLEEP_HIBERNATE) { + /* + * VFS syncing churned lots of memory; so discard + * useless memory again, hoping no processes are + * still allocating.. + */ + hibernate_suspend_bufcache(); + uvmpd_hibernate(); + } +#endif /* HIBERNATE */ + resettodr(); s = splhigh(); disable_intr(); /* PSL_I for resume; PIC/APIC broken until repair */ cold = 2; /* Force other code to delay() instead of tsleep() */ + vfs_stall(curproc, 0); + vnode_sleep = 0; + if (config_suspend_all(DVACT_SUSPEND) != 0) goto fail_suspend; acpi_sleep_clocks(sc, state); @@ -2568,6 +2599,7 @@ fail_suspend: #endif bufq_restart(); + wakeup(&vnode_sleep); fail_quiesce: config_suspend_all(DVACT_WAKEUP); @@ -2588,6 +2620,8 @@ fail_alloc: wsdisplay_resume(); rw_enter_write(&sc->sc_lck); #endif /* NWSDISPLAY > 0 */ + + sys_sync(curproc, NULL, NULL); /* Restore hw.setperf */ if (cpu_setperf != NULL) Index: dev/softraid.c =================================================================== RCS file: /cvs/src/sys/dev/softraid.c,v retrieving revision 1.389 diff -u -p -u -r1.389 softraid.c --- dev/softraid.c 21 Dec 2017 07:29:15 -0000 1.389 +++ dev/softraid.c 6 Jan 2018 00:24:52 -0000 @@ -33,6 +33,7 @@ #include <sys/fcntl.h> #include <sys/disklabel.h> #include <sys/vnode.h> +#include <sys/specdev.h> #include <sys/lock.h> #include <sys/mount.h> #include <sys/sensors.h> @@ -290,6 +291,39 @@ bad: return (rv); } +/* + * Create a vnode for a block device. + * Used for root filesystem, argdev, and swap areas. + * Also used for memory file system special devices. + */ +int +srdevvp(dev_t dev, struct vnode **vpp) +{ + struct vnode *vp; + struct vnode *nvp; + int error; + + if (dev == NODEV) { + *vpp = NULLVP; + return (0); + } + error = getnewvnode(VT_SOFTRAID, NULL, &spec_vops, &nvp); + if (error) { + *vpp = NULLVP; + return (error); + } + vp = nvp; + vp->v_type = VBLK; + if ((nvp = checkalias(vp, dev, NULL)) != 0) { + vput(vp); + vp = nvp; + } + if (vp->v_type == VCHR && cdevsw[major(vp->v_rdev)].d_type == D_TTY) + vp->v_flag |= VISTTY; + *vpp = vp; + return (0); +} + int sr_meta_probe(struct sr_discipline *sd, dev_t *dt, int no_chunk) { @@ -325,7 +359,7 @@ sr_meta_probe(struct sr_discipline *sd, continue; } else { sr_meta_getdevname(sc, dev, devname, sizeof(devname)); - if (bdevvp(dev, &vn)) { + if (srdevvp(dev, &vn)) { sr_error(sc, "sr_meta_probe: cannot allocate " "vnode"); goto unwind; @@ -414,7 +448,7 @@ sr_rw(struct sr_softc *sc, dev_t dev, ch dma_bufsize = (size > MAXPHYS) ? MAXPHYS : size; dma_buf = dma_alloc(dma_bufsize, PR_WAITOK); - if (bdevvp(dev, &vp)) { + if (srdevvp(dev, &vp)) { printf("%s: sr_rw: failed to allocate vnode\n", DEVNAME(sc)); goto done; } @@ -1086,7 +1120,7 @@ sr_meta_native_bootprobe(struct sr_softc /* open partition */ rawdev = MAKEDISKDEV(major(devno), DISKUNIT(devno), i); - if (bdevvp(rawdev, &vn)) { + if (srdevvp(rawdev, &vn)) { sr_error(sc, "sr_meta_native_bootprobe: cannot " "allocate vnode for partition"); goto done; @@ -2843,7 +2877,7 @@ sr_hotspare(struct sr_softc *sc, dev_t d /* XXX - See if there is an existing degraded volume... */ /* Open device. */ - if (bdevvp(dev, &vn)) { + if (srdevvp(dev, &vn)) { sr_error(sc, "sr_hotspare: cannot allocate vnode"); goto done; } @@ -3156,7 +3190,7 @@ sr_rebuild_init(struct sr_discipline *sd } sr_meta_getdevname(sc, dev, devname, sizeof(devname)); - if (bdevvp(dev, &vn)) { + if (srdevvp(dev, &vn)) { printf("%s: sr_rebuild_init: can't allocate vnode\n", DEVNAME(sc)); goto done; @@ -3930,6 +3964,12 @@ sr_discipline_shutdown(struct sr_discipl EWOULDBLOCK) break; + if (dying == -1) { + sd->sd_ready = 1; + splx(s); + return; + } + #ifndef SMALL_KERNEL sr_sensors_delete(sd); #endif /* SMALL_KERNEL */ @@ -4540,6 +4580,18 @@ sr_validate_stripsize(u_int32_t b) return(-1); return (s); +} + +void +sr_quiesce(void) +{ + struct sr_softc *sc = softraid0; + struct sr_discipline *sd, *nsd; + + /* Shutdown disciplines in reverse attach order. */ + TAILQ_FOREACH_REVERSE_SAFE(sd, &sc->sc_dis_list, + sr_discipline_list, sd_link, nsd) + sr_discipline_shutdown(sd, 1, -1); } void Index: dev/softraid_crypto.c =================================================================== RCS file: /cvs/src/sys/dev/softraid_crypto.c,v retrieving revision 1.137 diff -u -p -u -r1.137 softraid_crypto.c --- dev/softraid_crypto.c 12 Jun 2017 16:39:51 -0000 1.137 +++ dev/softraid_crypto.c 6 Jan 2018 00:05:32 -0000 @@ -640,7 +640,7 @@ sr_crypto_create_key_disk(struct sr_disc } /* Open device. */ - if (bdevvp(dev, &vn)) { + if (srdevvp(dev, &vn)) { sr_error(sc, "cannot open key disk %s", devname); goto done; } @@ -803,7 +803,7 @@ sr_crypto_read_key_disk(struct sr_discip } /* Open device. */ - if (bdevvp(dev, &vn)) { + if (srdevvp(dev, &vn)) { sr_error(sc, "cannot open key disk %s", devname); goto done; } Index: dev/softraidvar.h =================================================================== RCS file: /cvs/src/sys/dev/softraidvar.h,v retrieving revision 1.167 diff -u -p -u -r1.167 softraidvar.h --- dev/softraidvar.h 12 Jun 2017 16:39:51 -0000 1.167 +++ dev/softraidvar.h 6 Jan 2018 00:06:02 -0000 @@ -464,6 +464,8 @@ struct sr_crypto { struct sr_concat { }; +int srdevvp(dev_t dev, struct vnode **vpp); + struct sr_chunk { struct sr_meta_chunk src_meta; /* chunk meta data */ Index: isofs/cd9660/cd9660_extern.h =================================================================== RCS file: /cvs/src/sys/isofs/cd9660/cd9660_extern.h,v retrieving revision 1.13 diff -u -p -u -r1.13 cd9660_extern.h --- isofs/cd9660/cd9660_extern.h 2 Jun 2013 01:07:39 -0000 1.13 +++ isofs/cd9660/cd9660_extern.h 5 Jan 2018 06:11:19 -0000 @@ -87,7 +87,7 @@ int cd9660_unmount(struct mount *, int, int cd9660_root(struct mount *, struct vnode **); int cd9660_quotactl(struct mount *, int, uid_t, caddr_t, struct proc *); int cd9660_statfs(struct mount *, struct statfs *, struct proc *); -int cd9660_sync(struct mount *, int, struct ucred *, struct proc *); +int cd9660_sync(struct mount *, int, int, struct ucred *, struct proc *); int cd9660_vget(struct mount *, ino_t, struct vnode **); int cd9660_fhtovp(struct mount *, struct fid *, struct vnode **); int cd9660_vptofh(struct vnode *, struct fid *); Index: isofs/cd9660/cd9660_vfsops.c =================================================================== RCS file: /cvs/src/sys/isofs/cd9660/cd9660_vfsops.c,v retrieving revision 1.86 diff -u -p -u -r1.86 cd9660_vfsops.c --- isofs/cd9660/cd9660_vfsops.c 30 Dec 2017 23:08:29 -0000 1.86 +++ isofs/cd9660/cd9660_vfsops.c 5 Jan 2018 06:11:19 -0000 @@ -644,9 +644,10 @@ cd9660_statfs(mp, sbp, p) /* ARGSUSED */ int -cd9660_sync(mp, waitfor, cred, p) +cd9660_sync(mp, waitfor, stall, cred, p) struct mount *mp; int waitfor; + int stall; struct ucred *cred; struct proc *p; { Index: isofs/udf/udf_extern.h =================================================================== RCS file: /cvs/src/sys/isofs/udf/udf_extern.h,v retrieving revision 1.13 diff -u -p -u -r1.13 udf_extern.h --- isofs/udf/udf_extern.h 2 Jun 2013 15:35:18 -0000 1.13 +++ isofs/udf/udf_extern.h 5 Jan 2018 06:11:19 -0000 @@ -26,7 +26,7 @@ int udf_root(struct mount *, struct vnod int udf_quotactl(struct mount *, int, uid_t, caddr_t, struct proc *); int udf_statfs(struct mount *, struct statfs *, struct proc *); int udf_vget(struct mount *, ino_t, struct vnode **); -int udf_sync(struct mount *, int, struct ucred *, struct proc *); +int udf_sync(struct mount *, int, int, struct ucred *, struct proc *); int udf_sysctl(int *, u_int, void *, size_t *, void *, size_t, struct proc *); int udf_checkexp(struct mount *, struct mbuf *, int *, struct ucred **); int udf_fhtovp(struct mount *, struct fid *, struct vnode **); Index: isofs/udf/udf_vfsops.c =================================================================== RCS file: /cvs/src/sys/isofs/udf/udf_vfsops.c,v retrieving revision 1.61 diff -u -p -u -r1.61 udf_vfsops.c --- isofs/udf/udf_vfsops.c 11 Dec 2017 05:27:40 -0000 1.61 +++ isofs/udf/udf_vfsops.c 5 Jan 2018 06:11:20 -0000 @@ -537,7 +537,7 @@ udf_statfs(struct mount *mp, struct stat } int -udf_sync(struct mount *mp, int waitfor, struct ucred *cred, struct proc *p) +udf_sync(struct mount *mp, int waitfor, int stall, struct ucred *cred, struct proc *p) { return (0); } Index: kern/vfs_subr.c =================================================================== RCS file: /cvs/src/sys/kern/vfs_subr.c,v retrieving revision 1.265 diff -u -p -u -r1.265 vfs_subr.c --- kern/vfs_subr.c 14 Dec 2017 20:23:15 -0000 1.265 +++ kern/vfs_subr.c 6 Jan 2018 00:25:27 -0000 @@ -72,7 +72,7 @@ #include "softraid.h" -void sr_shutdown(int); +void sr_quiesce(void); enum vtype iftovt_tab[16] = { VNON, VFIFO, VCHR, VNON, VDIR, VNON, VBLK, VNON, @@ -330,6 +330,9 @@ vattr_null(struct vattr *vap) */ long numvnodes; +int vnode_sleep; + + /* * Return the next vnode from the free list. */ @@ -337,12 +340,20 @@ int getnewvnode(enum vtagtype tag, struct mount *mp, struct vops *vops, struct vnode **vpp) { + extern int vnode_sleep; struct proc *p = curproc; struct freelst *listhd; static int toggle; struct vnode *vp; int s; + if (tag == VT_SOFTRAID) { + tag = VT_NON; + } else { + while (vnode_sleep) + tsleep(&vnode_sleep, PINOD, "getnewvnode", 0); + } + /* * allow maxvnodes to increase if the buffer cache itself * is big enough to justify it. (we don't shrink it ever) @@ -1584,6 +1595,39 @@ vaccess(enum vtype type, mode_t file_mod } int +vfs_stall(struct proc *p, int stall) +{ + struct mount *mp, *nmp; + int allerror = 0, error; + + TAILQ_FOREACH_REVERSE_SAFE(mp, &mountlist, mntlist, mnt_list, nmp) { + if (stall) { + error = vfs_busy(mp, VB_WRITE|VB_WAIT); + if (error) { + printf("%s: busy\n", mp->mnt_stat.f_mntonname); + allerror = error; + continue; + } + uvm_vnp_sync(mp); + error = VFS_SYNC(mp, MNT_WAIT, stall, p->p_ucred, p); + if (error) { + printf("%s: failed to sync\n", mp->mnt_stat.f_mntonname); + vfs_unbusy(mp); + allerror = error; + continue; + } + mp->mnt_flag |= MNT_STALLED; + } else { + if (mp->mnt_flag & MNT_STALLED) { + vfs_unbusy(mp); + mp->mnt_flag &= ~MNT_STALLED; + } + } + } + return (allerror); +} + +int vfs_readonly(struct mount *mp, struct proc *p) { int error; @@ -1594,7 +1638,7 @@ vfs_readonly(struct mount *mp, struct pr return (error); } uvm_vnp_sync(mp); - error = VFS_SYNC(mp, MNT_WAIT, p->p_ucred, p); + error = VFS_SYNC(mp, MNT_WAIT, 0, p->p_ucred, p); if (error) { printf("%s: failed to sync\n", mp->mnt_stat.f_mntonname); vfs_unbusy(mp); @@ -1627,10 +1671,8 @@ vfs_rofs(struct proc *p) { struct mount *mp, *nmp; - TAILQ_FOREACH_REVERSE_SAFE(mp, &mountlist, mntlist, mnt_list, nmp) { - /* XXX Here is a race, the next pointer is not locked. */ + TAILQ_FOREACH_REVERSE_SAFE(mp, &mountlist, mntlist, mnt_list, nmp) (void) vfs_readonly(mp, p); - } } /* @@ -1651,14 +1693,14 @@ vfs_shutdown(struct proc *p) vfs_rofs(p); } +#if NSOFTRAID > 0 + sr_quiesce(); +#endif + if (vfs_syncwait(p, 1)) printf("giving up\n"); else printf("done\n"); - -#if NSOFTRAID > 0 - sr_shutdown(1); -#endif } /* Index: kern/vfs_sync.c =================================================================== RCS file: /cvs/src/sys/kern/vfs_sync.c,v retrieving revision 1.56 diff -u -p -u -r1.56 vfs_sync.c --- kern/vfs_sync.c 14 Feb 2017 10:31:15 -0000 1.56 +++ kern/vfs_sync.c 5 Jan 2018 06:11:20 -0000 @@ -339,7 +339,7 @@ sync_fsync(void *v) if (vfs_busy(mp, VB_READ|VB_NOWAIT) == 0) { asyncflag = mp->mnt_flag & MNT_ASYNC; mp->mnt_flag &= ~MNT_ASYNC; - VFS_SYNC(mp, MNT_LAZY, ap->a_cred, ap->a_p); + VFS_SYNC(mp, MNT_LAZY, 0, ap->a_cred, ap->a_p); if (asyncflag) mp->mnt_flag |= MNT_ASYNC; vfs_unbusy(mp); Index: kern/vfs_syscalls.c =================================================================== RCS file: /cvs/src/sys/kern/vfs_syscalls.c,v retrieving revision 1.274 diff -u -p -u -r1.274 vfs_syscalls.c --- kern/vfs_syscalls.c 2 Jan 2018 06:38:45 -0000 1.274 +++ kern/vfs_syscalls.c 5 Jan 2018 06:11:20 -0000 @@ -494,7 +494,7 @@ dounmount_leaf(struct mount *mp, int fla mp->mnt_syncer = NULL; } if (((mp->mnt_flag & MNT_RDONLY) || - (error = VFS_SYNC(mp, MNT_WAIT, p->p_ucred, p)) == 0) || + (error = VFS_SYNC(mp, MNT_WAIT, 0, p->p_ucred, p)) == 0) || (flags & MNT_FORCE)) error = VFS_UNMOUNT(mp, flags, p); @@ -543,7 +543,7 @@ sys_sync(struct proc *p, void *v, regist asyncflag = mp->mnt_flag & MNT_ASYNC; mp->mnt_flag &= ~MNT_ASYNC; uvm_vnp_sync(mp); - VFS_SYNC(mp, MNT_NOWAIT, p->p_ucred, p); + VFS_SYNC(mp, MNT_NOWAIT, 0, p->p_ucred, p); if (asyncflag) mp->mnt_flag |= MNT_ASYNC; } Index: miscfs/fuse/fuse_vfsops.c =================================================================== RCS file: /cvs/src/sys/miscfs/fuse/fuse_vfsops.c,v retrieving revision 1.31 diff -u -p -u -r1.31 fuse_vfsops.c --- miscfs/fuse/fuse_vfsops.c 4 Jan 2018 10:51:11 -0000 1.31 +++ miscfs/fuse/fuse_vfsops.c 5 Jan 2018 06:11:20 -0000 @@ -40,7 +40,7 @@ int fusefs_unmount(struct mount *, int, int fusefs_root(struct mount *, struct vnode **); int fusefs_quotactl(struct mount *, int, uid_t, caddr_t, struct proc *); int fusefs_statfs(struct mount *, struct statfs *, struct proc *); -int fusefs_sync(struct mount *, int, struct ucred *, struct proc *); +int fusefs_sync(struct mount *, int, int, struct ucred *, struct proc *); int fusefs_vget(struct mount *, ino_t, struct vnode **); int fusefs_fhtovp(struct mount *, struct fid *, struct vnode **); int fusefs_vptofh(struct vnode *, struct fid *); @@ -239,7 +239,7 @@ fusefs_statfs(struct mount *mp, struct s } int -fusefs_sync(struct mount *mp, int waitfor, struct ucred *cred, +fusefs_sync(struct mount *mp, int waitfor, int stall, struct ucred *cred, struct proc *p) { return (0); Index: msdosfs/msdosfs_vfsops.c =================================================================== RCS file: /cvs/src/sys/msdosfs/msdosfs_vfsops.c,v retrieving revision 1.86 diff -u -p -u -r1.86 msdosfs_vfsops.c --- msdosfs/msdosfs_vfsops.c 30 Dec 2017 23:08:29 -0000 1.86 +++ msdosfs/msdosfs_vfsops.c 5 Jan 2018 06:11:20 -0000 @@ -80,7 +80,7 @@ int msdosfs_start(struct mount *, int, s int msdosfs_unmount(struct mount *, int, struct proc *); int msdosfs_root(struct mount *, struct vnode **); int msdosfs_statfs(struct mount *, struct statfs *, struct proc *); -int msdosfs_sync(struct mount *, int, struct ucred *, struct proc *); +int msdosfs_sync(struct mount *, int, int, struct ucred *, struct proc *); int msdosfs_fhtovp(struct mount *, struct fid *, struct vnode **); int msdosfs_vptofh(struct vnode *, struct fid *); int msdosfs_check_export(struct mount *mp, struct mbuf *nam, @@ -118,7 +118,7 @@ msdosfs_mount(struct mount *mp, const ch if (!(pmp->pm_flags & MSDOSFSMNT_RONLY) && (mp->mnt_flag & MNT_RDONLY)) { mp->mnt_flag &= ~MNT_RDONLY; - VFS_SYNC(mp, MNT_WAIT, p->p_ucred, p); + VFS_SYNC(mp, MNT_WAIT, 0, p->p_ucred, p); mp->mnt_flag |= MNT_RDONLY; flags = WRITECLOSE; @@ -689,7 +689,8 @@ msdosfs_sync_vnode(struct vnode *vp, voi int -msdosfs_sync(struct mount *mp, int waitfor, struct ucred *cred, struct proc *p) +msdosfs_sync(struct mount *mp, int waitfor, int stall, struct ucred *cred, + struct proc *p) { struct msdosfsmount *pmp = VFSTOMSDOSFS(mp); struct msdosfs_sync_arg msa; Index: msdosfs/msdosfsmount.h =================================================================== RCS file: /cvs/src/sys/msdosfs/msdosfsmount.h,v retrieving revision 1.21 diff -u -p -u -r1.21 msdosfsmount.h --- msdosfs/msdosfsmount.h 21 May 2016 18:11:36 -0000 1.21 +++ msdosfs/msdosfsmount.h 5 Jan 2018 06:11:20 -0000 @@ -206,7 +206,7 @@ int msdosfs_unmount(struct mount *, int, int msdosfs_root(struct mount *, struct vnode **); int msdosfs_quotactl(struct mount *, int, uid_t, caddr_t, struct proc *); int msdosfs_statfs(struct mount *, struct statfs *, struct proc *); -int msdosfs_sync(struct mount *, int, struct ucred *, struct proc *); +int msdosfs_sync(struct mount *, int, int, struct ucred *, struct proc *); int msdosfs_fhtovp(struct mount *, struct fid *, struct vnode **); int msdosfs_vptofh(struct vnode *, struct fid *); int msdosfs_init(struct vfsconf *); Index: nfs/nfs_vfsops.c =================================================================== RCS file: /cvs/src/sys/nfs/nfs_vfsops.c,v retrieving revision 1.115 diff -u -p -u -r1.115 nfs_vfsops.c --- nfs/nfs_vfsops.c 11 Dec 2017 05:27:40 -0000 1.115 +++ nfs/nfs_vfsops.c 5 Jan 2018 06:11:20 -0000 @@ -80,7 +80,7 @@ int nfs_quotactl(struct mount *, int, ui int nfs_root(struct mount *, struct vnode **); int nfs_start(struct mount *, int, struct proc *); int nfs_statfs(struct mount *, struct statfs *, struct proc *); -int nfs_sync(struct mount *, int, struct ucred *, struct proc *); +int nfs_sync(struct mount *, int, int, struct ucred *, struct proc *); int nfs_unmount(struct mount *, int, struct proc *); int nfs_vget(struct mount *, ino_t, struct vnode **); int nfs_vptofh(struct vnode *, struct fid *); @@ -729,7 +729,7 @@ nfs_root(struct mount *mp, struct vnode * Flush out the buffer cache */ int -nfs_sync(struct mount *mp, int waitfor, struct ucred *cred, struct proc *p) +nfs_sync(struct mount *mp, int waitfor, int stall, struct ucred *cred, struct proc *p) { struct vnode *vp; int error, allerror = 0; Index: ntfs/ntfs_vfsops.c =================================================================== RCS file: /cvs/src/sys/ntfs/ntfs_vfsops.c,v retrieving revision 1.57 diff -u -p -u -r1.57 ntfs_vfsops.c --- ntfs/ntfs_vfsops.c 11 Dec 2017 05:27:40 -0000 1.57 +++ ntfs/ntfs_vfsops.c 5 Jan 2018 06:11:20 -0000 @@ -60,7 +60,7 @@ int ntfs_root(struct mount *, struct vno int ntfs_start(struct mount *, int, struct proc *); int ntfs_statfs(struct mount *, struct statfs *, struct proc *); -int ntfs_sync(struct mount *, int, struct ucred *, +int ntfs_sync(struct mount *, int, int, struct ucred *, struct proc *); int ntfs_unmount(struct mount *, int, struct proc *); int ntfs_vget(struct mount *mp, ino_t ino, @@ -612,7 +612,7 @@ ntfs_statfs(struct mount *mp, struct sta } int -ntfs_sync(struct mount *mp, int waitfor, struct ucred *cred, struct proc *p) +ntfs_sync(struct mount *mp, int waitfor, int stall, struct ucred *cred, struct proc *p) { /*DPRINTF("ntfs_sync():\n");*/ return (0); Index: sys/mount.h =================================================================== RCS file: /cvs/src/sys/sys/mount.h,v retrieving revision 1.134 diff -u -p -u -r1.134 mount.h --- sys/mount.h 5 Jan 2018 05:54:36 -0000 1.134 +++ sys/mount.h 5 Jan 2018 06:11:20 -0000 @@ -389,6 +389,7 @@ struct mount { #define MNT_DELEXPORT 0x00020000 /* delete export host lists */ #define MNT_RELOAD 0x00040000 /* reload filesystem data */ #define MNT_FORCE 0x00080000 /* force unmount or readonly change */ +#define MNT_STALLED 0x00100000 /* filesystem stalled */ #define MNT_WANTRDWR 0x02000000 /* want upgrade to read/write */ #define MNT_SOFTDEP 0x04000000 /* soft dependencies being done */ #define MNT_DOOMED 0x08000000 /* device behind filesystem is gone */ @@ -505,7 +506,7 @@ struct vfsops { caddr_t arg, struct proc *p); int (*vfs_statfs)(struct mount *mp, struct statfs *sbp, struct proc *p); - int (*vfs_sync)(struct mount *mp, int waitfor, + int (*vfs_sync)(struct mount *mp, int waitfor, int stall, struct ucred *cred, struct proc *p); int (*vfs_vget)(struct mount *mp, ino_t ino, struct vnode **vpp); @@ -526,7 +527,7 @@ struct vfsops { #define VFS_ROOT(MP, VPP) (*(MP)->mnt_op->vfs_root)(MP, VPP) #define VFS_QUOTACTL(MP,C,U,A,P) (*(MP)->mnt_op->vfs_quotactl)(MP, C, U, A, P) #define VFS_STATFS(MP, SBP, P) (*(MP)->mnt_op->vfs_statfs)(MP, SBP, P) -#define VFS_SYNC(MP, WAIT, C, P) (*(MP)->mnt_op->vfs_sync)(MP, WAIT, C, P) +#define VFS_SYNC(MP, W, S, C, P) (*(MP)->mnt_op->vfs_sync)(MP, W, S, C, P) #define VFS_VGET(MP, INO, VPP) (*(MP)->mnt_op->vfs_vget)(MP, INO, VPP) #define VFS_FHTOVP(MP, FIDP, VPP) \ (*(MP)->mnt_op->vfs_fhtovp)(MP, FIDP, VPP) @@ -573,6 +574,7 @@ int vfs_mountedon(struct vnode *); int vfs_rootmountalloc(char *, char *, struct mount **); void vfs_unbusy(struct mount *); extern TAILQ_HEAD(mntlist, mount) mountlist; +int vfs_stall(struct proc *, int); struct mount *getvfs(fsid_t *); /* return vfs given fsid */ /* process mount export info */ Index: sys/vnode.h =================================================================== RCS file: /cvs/src/sys/sys/vnode.h,v retrieving revision 1.142 diff -u -p -u -r1.142 vnode.h --- sys/vnode.h 14 Dec 2017 20:20:38 -0000 1.142 +++ sys/vnode.h 5 Jan 2018 23:56:40 -0000 @@ -67,12 +67,13 @@ enum vtagtype { VT_NON, VT_UFS, VT_NFS, VT_MFS, VT_MSDOSFS, VT_PORTAL, VT_PROCFS, VT_AFS, VT_ISOFS, VT_ADOSFS, VT_EXT2FS, VT_VFS, VT_NTFS, VT_UDF, VT_FUSEFS, VT_TMPFS, + VT_SOFTRAID, }; #define VTAG_NAMES \ "NON", "UFS", "NFS", "MFS", "MSDOSFS", \ "unused", "unused", "unused", "ISOFS", "unused", \ - "EXT2FS", "VFS", "NTFS", "UDF", "FUSEFS", "TMPFS" + "EXT2FS", "VFS", "NTFS", "UDF", "FUSEFS", "TMPFS", "softraid" /* * Each underlying filesystem allocates its own private area and hangs Index: ufs/ext2fs/ext2fs_extern.h =================================================================== RCS file: /cvs/src/sys/ufs/ext2fs/ext2fs_extern.h,v retrieving revision 1.36 diff -u -p -u -r1.36 ext2fs_extern.h --- ufs/ext2fs/ext2fs_extern.h 10 Aug 2016 07:53:02 -0000 1.36 +++ ufs/ext2fs/ext2fs_extern.h 5 Jan 2018 06:11:20 -0000 @@ -105,7 +105,7 @@ int ext2fs_mountfs(struct vnode *, struc int ext2fs_unmount(struct mount *, int, struct proc *); int ext2fs_flushfiles(struct mount *, int, struct proc *); int ext2fs_statfs(struct mount *, struct statfs *, struct proc *); -int ext2fs_sync(struct mount *, int, struct ucred *, struct proc *); +int ext2fs_sync(struct mount *, int, int, struct ucred *, struct proc *); int ext2fs_vget(struct mount *, ino_t, struct vnode **); int ext2fs_fhtovp(struct mount *, struct fid *, struct vnode **); int ext2fs_vptofh(struct vnode *, struct fid *); Index: ufs/ext2fs/ext2fs_vfsops.c =================================================================== RCS file: /cvs/src/sys/ufs/ext2fs/ext2fs_vfsops.c,v retrieving revision 1.101 diff -u -p -u -r1.101 ext2fs_vfsops.c --- ufs/ext2fs/ext2fs_vfsops.c 30 Dec 2017 23:08:29 -0000 1.101 +++ ufs/ext2fs/ext2fs_vfsops.c 6 Jan 2018 17:35:59 -0000 @@ -696,6 +696,7 @@ int ext2fs_sync_vnode(struct vnode *vp, struct ext2fs_sync_args { int allerror; int waitfor; + int nlink0; struct proc *p; struct ucred *cred; }; @@ -707,16 +708,23 @@ ext2fs_sync_vnode(struct vnode *vp, void struct inode *ip; int error; + if (vp->v_type == VNON) + return (0); + ip = VTOI(vp); - if (vp->v_type == VNON || - ((ip->i_flag & (IN_ACCESS | IN_CHANGE | IN_MODIFIED | IN_UPDATE)) == 0 && - LIST_EMPTY(&vp->v_dirtyblkhd)) || - esa->waitfor == MNT_LAZY) { + + if (ip->i_e2fs_nlink == 0) + esa->nlink0 = 1; + + if ((ip->i_flag & (IN_ACCESS | IN_CHANGE | IN_MODIFIED | IN_UPDATE)) == 0 && + LIST_EMPTY(&vp->v_dirtyblkhd)) { return (0); } - if (vget(vp, LK_EXCLUSIVE | LK_NOWAIT, esa->p)) + if (vget(vp, LK_EXCLUSIVE | LK_NOWAIT, esa->p)) { + esa->nlink0 = 1; return (0); + } if ((error = VOP_FSYNC(vp, esa->cred, esa->waitfor, esa->p)) != 0) esa->allerror = error; @@ -731,11 +739,12 @@ ext2fs_sync_vnode(struct vnode *vp, void * Should always be called with the mount point locked. */ int -ext2fs_sync(struct mount *mp, int waitfor, struct ucred *cred, struct proc *p) +ext2fs_sync(struct mount *mp, int waitfor, int stall, + struct ucred *cred, struct proc *p) { struct ufsmount *ump = VFSTOUFS(mp); struct m_ext2fs *fs; - int error, allerror = 0; + int error, allerror = 0, state, fmod; struct ext2fs_sync_args esa; fs = ump->um_e2fs; @@ -751,6 +760,7 @@ ext2fs_sync(struct mount *mp, int waitfo esa.cred = cred; esa.allerror = 0; esa.waitfor = waitfor; + esa.nlink0 = 0; vfs_mount_foreach_vnode(mp, ext2fs_sync_vnode, &esa); if (esa.allerror != 0) @@ -768,12 +778,33 @@ ext2fs_sync(struct mount *mp, int waitfo /* * Write back modified superblock. */ + state = fs->e2fs.e2fs_state; + fmod = fs->e2fs_fmod; + if (stall && fs->e2fs_ronly == 0) { + fs->e2fs_fmod = 1; + if (allerror == 0 && esa.nlink0 == 0) { + if ((fs->e2fs.e2fs_state & E2FS_ERRORS) == 0) + fs->e2fs.e2fs_state = E2FS_ISCLEAN; + printf("%s force clean (%d %d): fmod %d clean %d\n", + mp->mnt_stat.f_mntonname, + allerror, esa.nlink0, + fs->e2fs_fmod, fs->e2fs.e2fs_state); + } else { + fs->e2fs.e2fs_state = 0; + printf("%s force dirty (%d %d): fmod %d clean %d\n", + mp->mnt_stat.f_mntonname, + allerror, esa.nlink0, + fs->e2fs_fmod, fs->e2fs.e2fs_state); + } + } if (fs->e2fs_fmod != 0) { fs->e2fs_fmod = 0; fs->e2fs.e2fs_wtime = time_second; if ((error = ext2fs_cgupdate(ump, waitfor))) allerror = error; } + fs->e2fs.e2fs_state = state; + fs->e2fs_fmod = fmod; return (allerror); } Index: ufs/ffs/ffs_extern.h =================================================================== RCS file: /cvs/src/sys/ufs/ffs/ffs_extern.h,v retrieving revision 1.43 diff -u -p -u -r1.43 ffs_extern.h --- ufs/ffs/ffs_extern.h 10 Aug 2016 08:04:57 -0000 1.43 +++ ufs/ffs/ffs_extern.h 5 Jan 2018 06:11:20 -0000 @@ -144,7 +144,7 @@ int ffs_oldfscompat(struct fs *); int ffs_unmount(struct mount *, int, struct proc *); int ffs_flushfiles(struct mount *, int, struct proc *); int ffs_statfs(struct mount *, struct statfs *, struct proc *); -int ffs_sync(struct mount *, int, struct ucred *, struct proc *); +int ffs_sync(struct mount *, int, int, struct ucred *, struct proc *); int ffs_vget(struct mount *, ino_t, struct vnode **); int ffs_fhtovp(struct mount *, struct fid *, struct vnode **); int ffs_vptofh(struct vnode *, struct fid *); Index: ufs/ffs/ffs_softdep.c =================================================================== RCS file: /cvs/src/sys/ufs/ffs/ffs_softdep.c,v retrieving revision 1.137 diff -u -p -u -r1.137 ffs_softdep.c --- ufs/ffs/ffs_softdep.c 13 Dec 2017 16:38:34 -0000 1.137 +++ ufs/ffs/ffs_softdep.c 5 Jan 2018 06:11:20 -0000 @@ -4945,7 +4945,7 @@ loop: */ if (vn_isdisk(vp, NULL) && vp->v_specmountpoint && !VOP_ISLOCKED(vp) && - (error = VFS_SYNC(vp->v_specmountpoint, MNT_WAIT, ap->a_cred, + (error = VFS_SYNC(vp->v_specmountpoint, MNT_WAIT, 0, ap->a_cred, ap->a_p)) != 0) return (error); return (0); Index: ufs/ffs/ffs_vfsops.c =================================================================== RCS file: /cvs/src/sys/ufs/ffs/ffs_vfsops.c,v retrieving revision 1.171 diff -u -p -u -r1.171 ffs_vfsops.c --- ufs/ffs/ffs_vfsops.c 30 Dec 2017 23:08:29 -0000 1.171 +++ ufs/ffs/ffs_vfsops.c 6 Jan 2018 16:51:11 -0000 @@ -242,8 +242,10 @@ ffs_mount(struct mount *mp, const char * ronly = fs->fs_ronly; if (ronly == 0 && (mp->mnt_flag & MNT_RDONLY)) { + mp->mnt_flag |= MNT_RDONLY; + /* Flush any dirty data */ - VFS_SYNC(mp, MNT_WAIT, p->p_ucred, p); + VFS_SYNC(mp, MNT_WAIT, 0, p->p_ucred, p); /* * Get rid of files open for writing. @@ -258,7 +260,6 @@ ffs_mount(struct mount *mp, const char * mp->mnt_flag &= ~MNT_SOFTDEP; } else error = ffs_flushfiles(mp, flags, p); - mp->mnt_flag |= MNT_RDONLY; ronly = 1; } @@ -1141,11 +1142,13 @@ struct ffs_sync_args { int allerror; struct proc *p; int waitfor; + int nlink0; struct ucred *cred; }; int -ffs_sync_vnode(struct vnode *vp, void *arg) { +ffs_sync_vnode(struct vnode *vp, void *arg) +{ struct ffs_sync_args *fsa = arg; struct inode *ip; int error; @@ -1164,14 +1167,19 @@ ffs_sync_vnode(struct vnode *vp, void *a UFS_UPDATE(ip, 1); } + if (ip->i_effnlink == 0) + fsa->nlink0 = 1; + if ((ip->i_flag & - (IN_ACCESS | IN_CHANGE | IN_MODIFIED | IN_UPDATE)) == 0 && + (IN_ACCESS | IN_CHANGE | IN_MODIFIED | IN_UPDATE)) == 0 && LIST_EMPTY(&vp->v_dirtyblkhd)) { return (0); } - if (vget(vp, LK_EXCLUSIVE | LK_NOWAIT, fsa->p)) + if (vget(vp, LK_EXCLUSIVE | LK_NOWAIT, fsa->p)) { + fsa->nlink0 = 1; /* potentially.. */ return (0); + } if ((error = VOP_FSYNC(vp, fsa->cred, fsa->waitfor, fsa->p))) fsa->allerror = error; @@ -1189,11 +1197,11 @@ ffs_sync_vnode(struct vnode *vp, void *a * Should always be called with the mount point locked. */ int -ffs_sync(struct mount *mp, int waitfor, struct ucred *cred, struct proc *p) +ffs_sync(struct mount *mp, int waitfor, int stall, struct ucred *cred, struct proc *p) { struct ufsmount *ump = VFSTOUFS(mp); struct fs *fs; - int error, allerror = 0, count; + int error, allerror = 0, count, clean, fmod; struct ffs_sync_args fsa; fs = ump->um_fs; @@ -1214,6 +1222,7 @@ ffs_sync(struct mount *mp, int waitfor, fsa.p = p; fsa.cred = cred; fsa.waitfor = waitfor; + fsa.nlink0 = 0; /* * Don't traverse the vnode list if we want to skip all of them. @@ -1243,9 +1252,28 @@ ffs_sync(struct mount *mp, int waitfor, /* * Write back modified superblock. */ - + clean = fs->fs_clean; + fmod = fs->fs_fmod; + if (stall && fs->fs_ronly == 0) { + fs->fs_fmod = 1; + if (allerror == 0 && fsa.nlink0 == 0) { + fs->fs_clean = (fs->fs_flags & FS_UNCLEAN) ? 0 : 1; + printf("%s force clean (%d %d): fmod %d clean %d\n", + mp->mnt_stat.f_mntonname, + allerror, fsa.nlink0, + fs->fs_fmod, fs->fs_clean); + } else { + fs->fs_clean = 0; + printf("%s force dirty (%d %d): fmod %d clean %d\n", + mp->mnt_stat.f_mntonname, + allerror, fsa.nlink0, + fs->fs_fmod, fs->fs_clean); + } + } if (fs->fs_fmod != 0 && (error = ffs_sbupdate(ump, waitfor)) != 0) allerror = error; + fs->fs_clean = clean; + fs->fs_fmod = fmod; return (allerror); } Index: ufs/ufs/ufs_lookup.c =================================================================== RCS file: /cvs/src/sys/ufs/ufs/ufs_lookup.c,v retrieving revision 1.50 diff -u -p -u -r1.50 ufs_lookup.c --- ufs/ufs/ufs_lookup.c 10 Sep 2016 16:53:30 -0000 1.50 +++ ufs/ufs/ufs_lookup.c 5 Jan 2018 06:11:20 -0000 @@ -786,7 +786,7 @@ ufs_direnter(struct vnode *dvp, struct v return (error); } error = VOP_BWRITE(bp); - ret = UFS_UPDATE(dp, !DOINGSOFTDEP(dvp)); + ret = UFS_UPDATE(dp, 1); if (error == 0) return (ret); return (error); Index: ufs/ufs/ufs_vnops.c =================================================================== RCS file: /cvs/src/sys/ufs/ufs/ufs_vnops.c,v retrieving revision 1.135 diff -u -p -u -r1.135 ufs_vnops.c --- ufs/ufs/ufs_vnops.c 2 Jan 2018 06:38:45 -0000 1.135 +++ ufs/ufs/ufs_vnops.c 5 Jan 2018 06:11:20 -0000 @@ -370,8 +370,9 @@ ufs_setattr(void *v) DIP_OR(ip, flags, vap->va_flags & UF_SETTABLE); } ip->i_flag |= IN_CHANGE; + error = UFS_UPDATE(ip, 0); if (vap->va_flags & (IMMUTABLE | APPEND)) - return (0); + return error; } if (DIP(ip, flags) & (IMMUTABLE | APPEND)) return (EPERM); @@ -477,7 +478,8 @@ ufs_chmod(struct vnode *vp, int mode, st ip->i_flag |= IN_CHANGE; if ((vp->v_flag & VTEXT) && (DIP(ip, mode) & S_ISTXT) == 0) (void) uvm_vnp_uncache(vp); - return (0); + error = UFS_UPDATE(ip, 0); + return error; } /* @@ -553,7 +555,8 @@ ufs_chown(struct vnode *vp, uid_t uid, g if (ogid != gid && cred->cr_uid != 0 && (vp->v_mount->mnt_flag & MNT_NOPERM) == 0) DIP_AND(ip, mode, ~ISGID); - return (0); + error = UFS_UPDATE(ip, 0); + return error; error: (void) ufs_quota_delete(ip); @@ -1362,6 +1365,7 @@ ufs_rmdir(void *v) DIP_ADD(dp, nlink, -1); dp->i_flag |= IN_CHANGE; + error = UFS_UPDATE(dp, 0); DIP_ADD(ip, nlink, -1); ip->i_flag |= IN_CHANGE; ioflag = DOINGASYNC(vp) ? 0 : IO_SYNC; @@ -1404,6 +1408,7 @@ ufs_symlink(void *v) memcpy(SHORTLINK(ip), ap->a_target, len); DIP_ASSIGN(ip, size, len); ip->i_flag |= IN_CHANGE | IN_UPDATE; + error = UFS_UPDATE(ip, 0); } else error = vn_rdwr(UIO_WRITE, vp, ap->a_target, len, (off_t)0, UIO_SYSSPACE, IO_NODELOCKED, ap->a_cnp->cn_cred, NULL,