Just a followon for tech people - if you do use this and
hit a problem, I would very much appreciate your dmesg,
as well as from ddb, "ps" "trace" "show bcstats" and "show uvm".
-Bob
On Wed, Nov 07, 2012 at 10:35:03AM -0800, Mike Larkin wrote:
> On Wed, Nov 07, 2012 at 11:25:55AM -0700, Bob Beck wrote:
> > Hello tech@.
> >
> > I just committed some significant fixes to the buffer cache in -current.
> > (you want to look for version 1.139 of vfs_bio.c)
> >
> > If you have such a current kernel, you may wish to try the following
> > diff - it is a backout of a previous commit that emasculated softdep
> > in order to make is safe with our buffer cache at the time. With
> > version 1.139 of vfs_bio.c I believe this is no longer needed.
> >
> > I have done extensive testing with the diff below and beating up
> > softdep filesystems - it both works, and, actually can provide
> > significantly increased performance in some situations.
> >
> > You too may want to try what is below. If you do, please drop me a
> > note indicating what you were doing with it, and what you noticed,
> > sucesses, failures, performance changes, etc. I would like to know
> > about all.
> >
> > WARNING - if you apply this to something without version 1.139 of
> > vfs_bio.c, you may hang your machine.
> >
> > Thanks
> >
> > -Bob
> >
>
> Been running on my main dev machine for the past day or so, no regressions
> seen. (Note I'm referring to the previous version of the diff circulated).
>
> -ml
>
> >
> > ---------------8<----------------
> > Index: ufs/ffs/ffs_inode.c
> > ===================================================================
> > RCS file: /cvs/src/sys/ufs/ffs/ffs_inode.c,v
> > retrieving revision 1.61
> > diff -u -p -r1.61 ffs_inode.c
> > --- ufs/ffs/ffs_inode.c 23 Mar 2012 15:51:26 -0000 1.61
> > +++ ufs/ffs/ffs_inode.c 20 Sep 2012 13:27:05 -0000
> > @@ -164,10 +164,8 @@ ffs_truncate(struct inode *oip, off_t le
> > struct buf *bp;
> > int offset, size, level;
> > long count, nblocks, vflags, blocksreleased = 0;
> > - int i, aflags, error, allerror, indirect = 0;
> > + int i, aflags, error, allerror;
> > off_t osize;
> > - extern int num_indirdep;
> > - extern int max_indirdep;
> >
> > if (length < 0)
> > return (EINVAL);
> > @@ -243,8 +241,6 @@ ffs_truncate(struct inode *oip, off_t le
> > cred, aflags, &bp);
> > if (error)
> > return (error);
> > - if (bp->b_lblkno >= NDADDR)
> > - indirect = 1;
> > DIP_ASSIGN(oip, size, length);
> > uvm_vnp_setsize(ovp, length);
> > (void) uvm_vnp_uncache(ovp);
> > @@ -253,20 +249,7 @@ ffs_truncate(struct inode *oip, off_t le
> > else
> > bawrite(bp);
> > oip->i_flag |= IN_CHANGE | IN_UPDATE;
> > - error = UFS_UPDATE(oip, MNT_WAIT);
> > - if (DOINGSOFTDEP(ovp) && num_indirdep > max_indirdep)
> > - if (indirect) {
> > - /*
> > - * If the number of pending indirect block
> > - * dependencies is sufficiently close to the
> > - * maximum number of simultaneously mappable
> > - * buffers force a sync on the vnode to prevent
> > - * buffer cache exhaustion.
> > - */
> > - VOP_FSYNC(ovp, curproc->p_ucred, MNT_WAIT,
> > - curproc);
> > - }
> > - return (error);
> > + return (UFS_UPDATE(oip, MNT_WAIT));
> > }
> > uvm_vnp_setsize(ovp, length);
> >
> > Index: ufs/ffs/ffs_softdep.c
> > ===================================================================
> > RCS file: /cvs/src/sys/ufs/ffs/ffs_softdep.c,v
> > retrieving revision 1.112
> > diff -u -p -r1.112 ffs_softdep.c
> > --- ufs/ffs/ffs_softdep.c 18 Sep 2011 23:20:28 -0000 1.112
> > +++ ufs/ffs/ffs_softdep.c 20 Sep 2012 13:27:05 -0000
> > @@ -526,8 +526,6 @@ workitem_free(struct worklist *item)
> > STATIC struct workhead softdep_workitem_pending;
> > STATIC struct worklist *worklist_tail;
> > STATIC int num_on_worklist; /* number of worklist items to be
> > processed */
> > -STATIC int num_indirdep; /* number of indirdep items to be processed */
> > -STATIC int max_indirdep; /* maximum number of indirdep items allowed */
> > STATIC int softdep_worklist_busy; /* 1 => trying to do unmount */
> > STATIC int softdep_worklist_req; /* serialized waiters */
> > STATIC int max_softdeps; /* maximum number of structs before slowdown */
> > @@ -1142,8 +1140,6 @@ top:
> > void
> > softdep_initialize(void)
> > {
> > - extern vsize_t bufkvm;
> > - max_indirdep = (int)bufkvm / MAXPHYS * 80 / 100;
> >
> > bioops.io_start = softdep_disk_io_initiation;
> > bioops.io_complete = softdep_disk_write_complete;
> > @@ -1819,14 +1815,12 @@ setup_allocindir_phase2(struct buf *bp,
> > if (indirdep->ir_savebp != NULL)
> > brelse(newindirdep->ir_savebp);
> > WORKITEM_FREE(newindirdep, D_INDIRDEP);
> > - num_indirdep--;
> > }
> > if (indirdep)
> > break;
> > newindirdep = pool_get(&indirdep_pool, PR_WAITOK);
> > newindirdep->ir_list.wk_type = D_INDIRDEP;
> > newindirdep->ir_state = ATTACHED;
> > - num_indirdep++;
> > if (ip->i_ump->um_fstype == UM_UFS1)
> > newindirdep->ir_state |= UFS1FMT;
> > LIST_INIT(&newindirdep->ir_deplisthd);
> > @@ -2446,7 +2440,6 @@ indir_trunc(struct inode *ip, daddr64_t
> > }
> > WORKLIST_REMOVE(wk);
> > WORKITEM_FREE(indirdep, D_INDIRDEP);
> > - num_indirdep--;
> > if (LIST_FIRST(&bp->b_dep) != NULL) {
> > FREE_LOCK(&lk);
> > panic("indir_trunc: dangling dep");
> > @@ -3288,7 +3281,6 @@ softdep_disk_io_initiation(struct buf *b
> > wk->wk_state &= ~ONWORKLIST;
> > LIST_REMOVE(wk, wk_list);
> > WORKITEM_FREE(indirdep, D_INDIRDEP);
> > - num_indirdep--;
> > FREE_LOCK(&lk);
> > brelse(sbp);
> > ACQUIRE_LOCK(&lk);
> > Index: ufs/ffs/ffs_vnops.c
> > ===================================================================
> > RCS file: /cvs/src/sys/ufs/ffs/ffs_vnops.c,v
> > retrieving revision 1.69
> > diff -u -p -r1.69 ffs_vnops.c
> > --- ufs/ffs/ffs_vnops.c 17 Sep 2012 14:22:23 -0000 1.69
> > +++ ufs/ffs/ffs_vnops.c 20 Sep 2012 13:37:00 -0000
> > @@ -295,12 +295,10 @@ ffs_write(void *v)
> > struct inode *ip;
> > struct fs *fs;
> > struct buf *bp;
> > - struct proc *p;
> > daddr64_t lbn;
> > off_t osize;
> > int blkoffset, error, extended, flags, ioflag, resid, size, xfersize;
> > int overrun;
> > - extern int num_indirdep, max_indirdep;
> >
> > extended = 0;
> > ioflag = ap->a_ioflag;
> > @@ -367,11 +365,7 @@ ffs_write(void *v)
> > if (uio->uio_offset + xfersize > DIP(ip, size)) {
> > DIP_ASSIGN(ip, size, uio->uio_offset + xfersize);
> > uvm_vnp_setsize(vp, DIP(ip, size));
> > - /* Are we extending into an indirect block? */
> > - if (bp->b_lblkno < NDADDR)
> > - extended = 1;
> > - else
> > - extended = 2;
> > + extended = 1;
> > }
> > (void)uvm_vnp_uncache(vp);
> >
> > @@ -415,21 +409,8 @@ ffs_write(void *v)
> > uio->uio_offset -= resid - uio->uio_resid;
> > uio->uio_resid = resid;
> > }
> > - } else if (resid > uio->uio_resid) {
> > - if (ioflag & IO_SYNC)
> > - error = UFS_UPDATE(ip, MNT_WAIT);
> > - if (DOINGSOFTDEP(vp) && num_indirdep > max_indirdep)
> > - if (extended > 1) {
> > - /*
> > - * If the number of pending indirect block
> > - * dependencies is sufficiently close to the
> > - * maximum number of simultaneously mappable
> > - * buffers force a sync on the vnode to prevent
> > - * buffer cache exhaustion.
> > - */
> > - p = uio->uio_procp;
> > - VOP_FSYNC(vp, NULL, MNT_WAIT, p);
> > - }
> > + } else if (resid > uio->uio_resid && (ioflag & IO_SYNC)) {
> > + error = UFS_UPDATE(ip, MNT_WAIT);
> > }
> > /* correct the result for writes clamped by vn_fsizechk() */
> > uio->uio_resid += overrun;