Re: vfs: drop proc from VOP_OPEN
Note, this opens up pending items for cleanup, from my notes: - fuse_file_open (into fb_setup) - nfs_vinvalbuf (and vinvalbuf itself). I'm skeptical of touching the latter until I have (again) some facsimile of the setup that lives in theo's basement :) On Sat, Jul 15, 2023 at 02:20:36PM +0200, Thordur Bjornsson wrote: > First of a few. > > Many thanks to mpi@ for adding the curproc assert's! > > Two questions: > - NOCRED: Best this should just be NULL ? > - FSCRED: This is only AFAICT used for mounts > Not 100% clear, but this could perhaps be done away with at the loss > of some metadata (it just forces spec_open to open up given secure > levels) in ufs. beck@, thoughts ? (i want to say the S word, but not > sure if it is strictly true). > > diff --git sys/dev/softraid.c sys/dev/softraid.c > index decea16cb24..2319b85b946 100644 > --- sys/dev/softraid.c > +++ sys/dev/softraid.c > @@ -333,7 +333,7 @@ sr_meta_probe(struct sr_discipline *sd, dev_t *dt, int > no_chunk) >* XXX leaving dev open for now; move this to attach >* and figure out the open/close dance for unwind. >*/ > - error = VOP_OPEN(vn, FREAD | FWRITE, NOCRED, curproc); > + error = VOP_OPEN(vn, FREAD | FWRITE, NOCRED); > if (error) { > DNPRINTF(SR_D_META,"%s: sr_meta_probe can't " > "open %s\n", DEVNAME(sc), devname); > @@ -1037,7 +1037,7 @@ sr_meta_native_bootprobe(struct sr_softc *sc, dev_t > devno, > } > > /* open device */ > - error = VOP_OPEN(vn, FREAD, NOCRED, curproc); > + error = VOP_OPEN(vn, FREAD, NOCRED); > if (error) { > DNPRINTF(SR_D_META, "%s: sr_meta_native_bootprobe open " > "failed\n", DEVNAME(sc)); > @@ -1093,7 +1093,7 @@ sr_meta_native_bootprobe(struct sr_softc *sc, dev_t > devno, > "allocate vnode for partition"); > goto done; > } > - error = VOP_OPEN(vn, FREAD, NOCRED, curproc); > + error = VOP_OPEN(vn, FREAD, NOCRED); > if (error) { > DNPRINTF(SR_D_META, "%s: sr_meta_native_bootprobe " > "open failed, partition %d\n", > @@ -2833,7 +2833,7 @@ sr_hotspare(struct sr_softc *sc, dev_t dev) > sr_error(sc, "sr_hotspare: cannot allocate vnode"); > goto done; > } > - if (VOP_OPEN(vn, FREAD | FWRITE, NOCRED, curproc)) { > + if (VOP_OPEN(vn, FREAD | FWRITE, NOCRED)) { > DNPRINTF(SR_D_META,"%s: sr_hotspare cannot open %s\n", > DEVNAME(sc), devname); > vput(vn); > @@ -3147,7 +3147,7 @@ sr_rebuild_init(struct sr_discipline *sd, dev_t dev, > int hotspare) > DEVNAME(sc)); > goto done; > } > - if (VOP_OPEN(vn, FREAD | FWRITE, NOCRED, curproc)) { > + if (VOP_OPEN(vn, FREAD | FWRITE, NOCRED)) { > DNPRINTF(SR_D_META,"%s: sr_ioctl_setstate can't " > "open %s\n", DEVNAME(sc), devname); > vput(vn); > diff --git sys/dev/softraid_crypto.c sys/dev/softraid_crypto.c > index fbe8358e5dd..354c6560180 100644 > --- sys/dev/softraid_crypto.c > +++ sys/dev/softraid_crypto.c > @@ -665,7 +665,7 @@ sr_crypto_create_key_disk(struct sr_discipline *sd, > sr_error(sc, "cannot open key disk %s", devname); > goto done; > } > - if (VOP_OPEN(vn, FREAD | FWRITE, NOCRED, curproc)) { > + if (VOP_OPEN(vn, FREAD | FWRITE, NOCRED)) { > DNPRINTF(SR_D_META,"%s: sr_crypto_create_key_disk cannot " > "open %s\n", DEVNAME(sc), devname); > vput(vn); > @@ -829,7 +829,7 @@ sr_crypto_read_key_disk(struct sr_discipline *sd, struct > sr_crypto *mdd_crypto, > sr_error(sc, "cannot open key disk %s", devname); > goto done; > } > - if (VOP_OPEN(vn, FREAD, NOCRED, curproc)) { > + if (VOP_OPEN(vn, FREAD, NOCRED)) { > DNPRINTF(SR_D_META,"%s: sr_crypto_read_key_disk cannot " > "open %s\n", DEVNAME(sc), devname); > vput(vn); > diff --git sys/isofs/cd9660/cd9660_vfsops.c sys/isofs/cd9660/cd9660_vfsops.c > index b844a2ff709..601c761a000 100644 > --- sys/isofs/cd9660/cd9660_vfsops.c > +++ sys/isofs/cd9660/cd9660_vfsops.c > @@ -240,7 +240,7 @@ iso_mountfs(struct vnode *devvp, st
vfs: drop proc from VOP_OPEN
First of a few. Many thanks to mpi@ for adding the curproc assert's! Two questions: - NOCRED: Best this should just be NULL ? - FSCRED: This is only AFAICT used for mounts Not 100% clear, but this could perhaps be done away with at the loss of some metadata (it just forces spec_open to open up given secure levels) in ufs. beck@, thoughts ? (i want to say the S word, but not sure if it is strictly true). diff --git sys/dev/softraid.c sys/dev/softraid.c index decea16cb24..2319b85b946 100644 --- sys/dev/softraid.c +++ sys/dev/softraid.c @@ -333,7 +333,7 @@ sr_meta_probe(struct sr_discipline *sd, dev_t *dt, int no_chunk) * XXX leaving dev open for now; move this to attach * and figure out the open/close dance for unwind. */ - error = VOP_OPEN(vn, FREAD | FWRITE, NOCRED, curproc); + error = VOP_OPEN(vn, FREAD | FWRITE, NOCRED); if (error) { DNPRINTF(SR_D_META,"%s: sr_meta_probe can't " "open %s\n", DEVNAME(sc), devname); @@ -1037,7 +1037,7 @@ sr_meta_native_bootprobe(struct sr_softc *sc, dev_t devno, } /* open device */ - error = VOP_OPEN(vn, FREAD, NOCRED, curproc); + error = VOP_OPEN(vn, FREAD, NOCRED); if (error) { DNPRINTF(SR_D_META, "%s: sr_meta_native_bootprobe open " "failed\n", DEVNAME(sc)); @@ -1093,7 +1093,7 @@ sr_meta_native_bootprobe(struct sr_softc *sc, dev_t devno, "allocate vnode for partition"); goto done; } - error = VOP_OPEN(vn, FREAD, NOCRED, curproc); + error = VOP_OPEN(vn, FREAD, NOCRED); if (error) { DNPRINTF(SR_D_META, "%s: sr_meta_native_bootprobe " "open failed, partition %d\n", @@ -2833,7 +2833,7 @@ sr_hotspare(struct sr_softc *sc, dev_t dev) sr_error(sc, "sr_hotspare: cannot allocate vnode"); goto done; } - if (VOP_OPEN(vn, FREAD | FWRITE, NOCRED, curproc)) { + if (VOP_OPEN(vn, FREAD | FWRITE, NOCRED)) { DNPRINTF(SR_D_META,"%s: sr_hotspare cannot open %s\n", DEVNAME(sc), devname); vput(vn); @@ -3147,7 +3147,7 @@ sr_rebuild_init(struct sr_discipline *sd, dev_t dev, int hotspare) DEVNAME(sc)); goto done; } - if (VOP_OPEN(vn, FREAD | FWRITE, NOCRED, curproc)) { + if (VOP_OPEN(vn, FREAD | FWRITE, NOCRED)) { DNPRINTF(SR_D_META,"%s: sr_ioctl_setstate can't " "open %s\n", DEVNAME(sc), devname); vput(vn); diff --git sys/dev/softraid_crypto.c sys/dev/softraid_crypto.c index fbe8358e5dd..354c6560180 100644 --- sys/dev/softraid_crypto.c +++ sys/dev/softraid_crypto.c @@ -665,7 +665,7 @@ sr_crypto_create_key_disk(struct sr_discipline *sd, sr_error(sc, "cannot open key disk %s", devname); goto done; } - if (VOP_OPEN(vn, FREAD | FWRITE, NOCRED, curproc)) { + if (VOP_OPEN(vn, FREAD | FWRITE, NOCRED)) { DNPRINTF(SR_D_META,"%s: sr_crypto_create_key_disk cannot " "open %s\n", DEVNAME(sc), devname); vput(vn); @@ -829,7 +829,7 @@ sr_crypto_read_key_disk(struct sr_discipline *sd, struct sr_crypto *mdd_crypto, sr_error(sc, "cannot open key disk %s", devname); goto done; } - if (VOP_OPEN(vn, FREAD, NOCRED, curproc)) { + if (VOP_OPEN(vn, FREAD, NOCRED)) { DNPRINTF(SR_D_META,"%s: sr_crypto_read_key_disk cannot " "open %s\n", DEVNAME(sc), devname); vput(vn); diff --git sys/isofs/cd9660/cd9660_vfsops.c sys/isofs/cd9660/cd9660_vfsops.c index b844a2ff709..601c761a000 100644 --- sys/isofs/cd9660/cd9660_vfsops.c +++ sys/isofs/cd9660/cd9660_vfsops.c @@ -240,7 +240,7 @@ iso_mountfs(struct vnode *devvp, struct mount *mp, struct proc *p, if (error) return (error); - error = VOP_OPEN(devvp, ronly ? FREAD : FREAD|FWRITE, FSCRED, p); + error = VOP_OPEN(devvp, ronly ? FREAD : FREAD|FWRITE, FSCRED); if (error) return (error); diff --git sys/isofs/udf/udf_vfsops.c sys/isofs/udf/udf_vfsops.c index c0fc5068e08..2a5b7f03314 100644 --- sys/isofs/udf/udf_vfsops.c +++ sys/isofs/udf/udf_vfsops.c @@ -243,7 +243,7 @@ udf_mountfs(struct vnode *devvp, struct mount *mp, uint32_t lb, struct proc *p) if (error) return (error); - error = VOP_OPEN(devvp, FREAD, FSCRED, p); + error = VOP_OPEN(devvp, FREAD, FSCRED); if (error) return (error); diff --git sys/kern/kern_exec.c sys/kern/kern_exec.c index 3f3112b018f..7253bc3cc8e 100644 ---
Re: deadfs cleanup
ping beck@ stacking vp->v_lock (rwlock, couldn't bring myself to call it interlock) diffs ontop of this to kill v_id and VXLOCK. Could use an eyeball or two :) On Tue, Jul 11, 2023 at 09:34:01PM +0200, thib4711 wrote: > deadfs cleanup > > chkvnlock() is useless, since deadfs vops are only ever assigned > to a vnode at the tail end of vclean(), at which point the VXLOCK > has been cleared and won't be taken again for this particular > vnode until it is re-used through getnewvnode(). > > As a bonus, LK_DRAIN can soon retire as well. > Juggle the tail end (mtx enter/leave) and the knote at the tail > of vclean() for sanity while here. > > diff --git sys/kern/vfs_subr.c sys/kern/vfs_subr.c > index 650fe5b61a2..425b6871cdd 100644 > --- sys/kern/vfs_subr.c > +++ sys/kern/vfs_subr.c > @@ -1051,7 +1051,7 @@ vclean(struct vnode *vp, int flags, struct proc *p) >* For active vnodes, it ensures that no other activity can >* occur while the underlying object is being cleaned out. >*/ > - VOP_LOCK(vp, LK_EXCLUSIVE | LK_DRAIN); > + VOP_LOCK(vp, LK_EXCLUSIVE); > > /* >* Clean out any VM data associated with the vnode. > @@ -1099,19 +1099,21 @@ vclean(struct vnode *vp, int flags, struct proc *p) > /* >* Done with purge, notify sleepers of the grim news. >*/ > + mtx_enter(_mtx); > vp->v_op = _vops; > - VN_KNOTE(vp, NOTE_REVOKE); > vp->v_tag = VT_NON; > #ifdef VFSLCKDEBUG > vp->v_flag &= ~VLOCKSWORK; > #endif > - mtx_enter(_mtx); > vp->v_lflag &= ~VXLOCK; > if (vp->v_lflag & VXWANT) { > vp->v_lflag &= ~VXWANT; > do_wakeup = 1; > } > mtx_leave(_mtx); > + > + VN_KNOTE(vp, NOTE_REVOKE); > + > if (do_wakeup) > wakeup(vp); > } > diff --git sys/miscfs/deadfs/dead_vnops.c sys/miscfs/deadfs/dead_vnops.c > index 9711f1618be..44496815567 100644 > --- sys/miscfs/deadfs/dead_vnops.c > +++ sys/miscfs/deadfs/dead_vnops.c > @@ -49,16 +49,10 @@ int dead_ebadf(void *); > int dead_open(void *); > int dead_read(void *); > int dead_write(void *); > -int dead_ioctl(void *); > int dead_kqfilter(void *v); > -int dead_inactive(void *); > -int dead_lock(void *); > -int dead_bmap(void *); > int dead_strategy(void *); > int dead_print(void *); > > -int chkvnlock(struct vnode *); > - > const struct vops dead_vops = { > .vop_lookup = vop_generic_lookup, > .vop_create = vop_generic_badop, > @@ -70,7 +64,7 @@ const struct vops dead_vops = { > .vop_setattr= dead_ebadf, > .vop_read = dead_read, > .vop_write = dead_write, > - .vop_ioctl = dead_ioctl, > + .vop_ioctl = nullop, > .vop_kqfilter = dead_kqfilter, > .vop_revoke = NULL, > .vop_fsync = nullop, > @@ -83,12 +77,12 @@ const struct vops dead_vops = { > .vop_readdir= dead_ebadf, > .vop_readlink = dead_ebadf, > .vop_abortop= vop_generic_badop, > - .vop_inactive = dead_inactive, > + .vop_inactive = nullop, > .vop_reclaim= nullop, > - .vop_lock = dead_lock, > + .vop_lock = nullop, > .vop_unlock = nullop, > .vop_islocked = nullop, > - .vop_bmap = dead_bmap, > + .vop_bmap = nullop, > .vop_strategy = dead_strategy, > .vop_print = dead_print, > .vop_pathconf = dead_ebadf, > @@ -105,50 +99,25 @@ dead_open(void *v) > return (ENXIO); > } > > -/* > - * Vnode op for read > - */ > int > dead_read(void *v) > { > struct vop_read_args *ap = v; > > - if (chkvnlock(ap->a_vp)) > - panic("dead_read: lock"); > /* > - * Return EOF for tty devices, EIO for others > - */ > + * Return EOF for tty devices, EIO for others > + */ > if ((ap->a_vp->v_flag & VISTTY) == 0) > return (EIO); > return (0); > } > > -/* > - * Vnode op for write > - */ > int > dead_write(void *v) > { > - struct vop_write_args *ap = v; > - > - if (chkvnlock(ap->a_vp)) > - panic("dead_write: lock"); > return (EIO); > } > > -/* > - * Device ioctl operation. > - */ > -int > -dead_ioctl(void *v) > -{ > - struct vop_ioctl_args *ap = v; > - > - if (!chkvnlock(ap->a_vp)) > - return (EBADF); > - return ((ap->a_vp->v_op->vop_ioctl)(ap)); > -} > - > int > dead_kqfilter(void *v) > { > @@ -180,51 +149,11 @@ dead_strategy(void *v) > struct vop_strategy_args *ap = v; > int s; > > - if (ap->a_bp->b_vp == NULL || !chkvnlock(ap->a_bp->b_vp)) { > - ap->a_bp->b_flags |= B_ERROR; > - s = splbio(); > - biodone(ap->a_bp); > - splx(s); > - return (EIO); > - } > - return (VOP_STRATEGY(ap->a_bp->b_vp, ap->a_bp)); > -} > - > -int > -dead_inactive(void *v) > -{ > - struct vop_inactive_args *ap = v; > - > -
Re: vfs: drop unnecessary cache_purge()s
On Mon, Jul 17, 2023 at 11:38:49AM +0200, Sebastien Marie wrote: > On Sat, Jul 15, 2023 at 09:21:40AM +0200, Thordur Bjornsson wrote: > > yes, vclean() will call cache_purge() after calling VOP_RECLAIM(). so we > ended-up > to have called cache_purge() several times. > > but the vnode isn't in the same state inside VOP_RECLAIM() and after calling > it. > it seems fine as the *_reclaim() is freeing v_data contents, and > cache_purge() > doesn't touch that. I don't follow, that's exactly why this is redundant ? Barring bugs, no more cache entries will be added to vnode due to VXLOCK being held in vclean(). > also, you didn't change ufs_reclaim() to not call cache_purge() ? is it on > purpose ? Nope, straight up forgot it. diff --git sys/isofs/cd9660/cd9660_node.c sys/isofs/cd9660/cd9660_node.c index bce99d77c22..300277f3b37 100644 --- sys/isofs/cd9660/cd9660_node.c +++ sys/isofs/cd9660/cd9660_node.c @@ -218,7 +218,6 @@ cd9660_reclaim(void *v) /* * Purge old data structures associated with the inode. */ - cache_purge(vp); if (ip->i_devvp) { vrele(ip->i_devvp); ip->i_devvp = 0; diff --git sys/msdosfs/msdosfs_denode.c sys/msdosfs/msdosfs_denode.c index 7a33212b648..3707c97458e 100644 --- sys/msdosfs/msdosfs_denode.c +++ sys/msdosfs/msdosfs_denode.c @@ -600,7 +600,6 @@ msdosfs_reclaim(void *v) /* * Purge old data structures associated with the denode. */ - cache_purge(vp); if (dep->de_devvp) { vrele(dep->de_devvp); dep->de_devvp = 0; diff --git sys/nfs/nfs_node.c sys/nfs/nfs_node.c index c8ac3b9bb14..38ad5db82fc 100644 --- sys/nfs/nfs_node.c +++ sys/nfs/nfs_node.c @@ -237,7 +237,6 @@ nfs_reclaim(void *v) if (np->n_wcred) crfree(np->n_wcred); - cache_purge(vp); pool_put(_node_pool, vp->v_data); vp->v_data = NULL; diff --git sys/ntfs/ntfs_vnops.c sys/ntfs/ntfs_vnops.c index d239112e991..d40e3d254f6 100644 --- sys/ntfs/ntfs_vnops.c +++ sys/ntfs/ntfs_vnops.c @@ -221,8 +221,6 @@ ntfs_reclaim(void *v) return (error); /* Purge old data structures associated with the inode. */ - cache_purge(vp); - ntfs_frele(fp); ntfs_ntput(ip); diff --git sys/tmpfs/tmpfs_vnops.c sys/tmpfs/tmpfs_vnops.c index bc1390d72c9..6ec13e686b2 100644 --- sys/tmpfs/tmpfs_vnops.c +++ sys/tmpfs/tmpfs_vnops.c @@ -1079,8 +1079,6 @@ tmpfs_reclaim(void *v) racing = TMPFS_NODE_RECLAIMING(node); rw_exit_write(>tn_nlock); - cache_purge(vp); - /* * If inode is not referenced, i.e. no links, then destroy it. * Note: if racing - inode is about to get a new vnode, leave it. diff --git sys/ufs/ext2fs/ext2fs_vnops.c sys/ufs/ext2fs/ext2fs_vnops.c index 235590d7c74..006a06b0dc8 100644 --- sys/ufs/ext2fs/ext2fs_vnops.c +++ sys/ufs/ext2fs/ext2fs_vnops.c @@ -1247,7 +1247,6 @@ ext2fs_reclaim(void *v) /* * Purge old data structures associated with the inode. */ - cache_purge(vp); if (ip->i_devvp) vrele(ip->i_devvp); diff --git ufs/ufs_inode.c ufs/ufs_inode.c index cc3b8b83229..f665c45176a 100644 --- ufs/ufs_inode.c +++ ufs/ufs_inode.c @@ -153,8 +153,6 @@ ufs_reclaim(struct vnode *vp) /* * Purge old data structures associated with the inode. */ - cache_purge(vp); - if (ip->i_devvp) { vrele(ip->i_devvp); }
vfs: drop unnecessary cache_purge()s
VOP_RECLAIM is only ever called from vclean() to cleanup fs dependent data, and vclean() calls cache_purge(). Makes all of the reclaim implementations the same in this regard. diff --git sys/isofs/cd9660/cd9660_node.c sys/isofs/cd9660/cd9660_node.c index bce99d77c22..300277f3b37 100644 --- sys/isofs/cd9660/cd9660_node.c +++ sys/isofs/cd9660/cd9660_node.c @@ -218,7 +218,6 @@ cd9660_reclaim(void *v) /* * Purge old data structures associated with the inode. */ - cache_purge(vp); if (ip->i_devvp) { vrele(ip->i_devvp); ip->i_devvp = 0; diff --git sys/msdosfs/msdosfs_denode.c sys/msdosfs/msdosfs_denode.c index 7a33212b648..3707c97458e 100644 --- sys/msdosfs/msdosfs_denode.c +++ sys/msdosfs/msdosfs_denode.c @@ -600,7 +600,6 @@ msdosfs_reclaim(void *v) /* * Purge old data structures associated with the denode. */ - cache_purge(vp); if (dep->de_devvp) { vrele(dep->de_devvp); dep->de_devvp = 0; diff --git sys/nfs/nfs_node.c sys/nfs/nfs_node.c index c8ac3b9bb14..38ad5db82fc 100644 --- sys/nfs/nfs_node.c +++ sys/nfs/nfs_node.c @@ -237,7 +237,6 @@ nfs_reclaim(void *v) if (np->n_wcred) crfree(np->n_wcred); - cache_purge(vp); pool_put(_node_pool, vp->v_data); vp->v_data = NULL; diff --git sys/ntfs/ntfs_vnops.c sys/ntfs/ntfs_vnops.c index d239112e991..d40e3d254f6 100644 --- sys/ntfs/ntfs_vnops.c +++ sys/ntfs/ntfs_vnops.c @@ -221,8 +221,6 @@ ntfs_reclaim(void *v) return (error); /* Purge old data structures associated with the inode. */ - cache_purge(vp); - ntfs_frele(fp); ntfs_ntput(ip); diff --git sys/tmpfs/tmpfs_vnops.c sys/tmpfs/tmpfs_vnops.c index bc1390d72c9..6ec13e686b2 100644 --- sys/tmpfs/tmpfs_vnops.c +++ sys/tmpfs/tmpfs_vnops.c @@ -1079,8 +1079,6 @@ tmpfs_reclaim(void *v) racing = TMPFS_NODE_RECLAIMING(node); rw_exit_write(>tn_nlock); - cache_purge(vp); - /* * If inode is not referenced, i.e. no links, then destroy it. * Note: if racing - inode is about to get a new vnode, leave it. diff --git sys/ufs/ext2fs/ext2fs_vnops.c sys/ufs/ext2fs/ext2fs_vnops.c index 235590d7c74..006a06b0dc8 100644 --- sys/ufs/ext2fs/ext2fs_vnops.c +++ sys/ufs/ext2fs/ext2fs_vnops.c @@ -1247,7 +1247,6 @@ ext2fs_reclaim(void *v) /* * Purge old data structures associated with the inode. */ - cache_purge(vp); if (ip->i_devvp) vrele(ip->i_devvp);
Re: Expose some scheduler statistics to userland via sysctl
On 2011 Nov 17 (Thu) at 21:18:24 -0200 (-0200), Christiano F. Haesbaert wrote: Moving this to tech@ Hi, I was studying the scheduler code after watching tedu's talk, I'd like to expose this statistics to userland so that I can try playing with cache affinity in the future: gimli:src: sysctl kern.schedstat kern.schedstat.nmigrations=23744 kern.schedstat.noidle=0 kern.schedstat.stolen=9170 kern.schedstat.choose=834843 kern.schedstat.wasidle=808711 kern.schedstat.nomigrations=2388 Opinions ? I see no point in exporting this out. They are essentially pointless knobs that people _will_ fiddle with without a clue. This belongs in a developers tree IMO. Index: sys/sys//sched.h === RCS file: /cvs/src/sys/sys/sched.h,v retrieving revision 1.29 diff -d -u -p -w -r1.29 sched.h --- sys/sys//sched.h 7 Jul 2011 18:00:33 - 1.29 +++ sys/sys//sched.h 12 Nov 2011 13:51:04 - @@ -75,6 +75,34 @@ * Posix defines a sched.h which may want to include sys/sched.h */ +struct schedstat { + u_int64_t scs_nmigrations; + u_int64_t scs_noidle; + u_int64_t scs_stolen; + + u_int64_t scs_choose; + u_int64_t scs_wasidle; + u_int64_t scs_nomigrations; +}; + +/* These sysctl names are only really used by sysctl(8) */ +#define KERN_SCHEDSTAT_NMIGRATIONS 1 +#define KERN_SCHEDSTAT_NOIDLE2 +#define KERN_SCHEDSTAT_STOLEN3 +#define KERN_SCHEDSTAT_CHOOSE4 +#define KERN_SCHEDSTAT_WASIDLE 5 +#define KERN_SCHEDSTAT_NOMIGRATIONS 6 +#define KERN_SCHEDSTAT_MAXID 7 + +#define CTL_KERN_SCHEDSTAT_NAMES { \ + { 0, 0 }, \ + { nmigrations, CTLTYPE_QUAD },\ + { noidle, CTLTYPE_QUAD }, \ + { stolen, CTLTYPE_QUAD }, \ + { choose, CTLTYPE_QUAD }, \ + { wasidle, CTLTYPE_QUAD },\ + { nomigrations, CTLTYPE_QUAD }\ +} /* * CPU states. * XXX Not really scheduler state, but no other good place to put Index: sys/sys//sysctl.h === RCS file: /cvs/src/sys/sys/sysctl.h,v retrieving revision 1.117 diff -d -u -p -w -r1.117 sysctl.h --- sys/sys//sysctl.h 30 Aug 2011 01:09:29 - 1.117 +++ sys/sys//sysctl.h 12 Nov 2011 13:40:45 - @@ -189,7 +189,8 @@ struct ctlname { #define KERN_CONSDEV75 /* dev_t: console terminal device */ #define KERN_NETLIVELOCKS 76 /* int: number of network livelocks */ #define KERN_POOL_DEBUG 77 /* int: enable pool_debug */ -#define KERN_MAXID 78 /* number of valid kern ids */ +#define KERN_SCHEDSTAT 78 /* struct: sched statistics */ +#define KERN_MAXID 79 /* number of valid kern ids */ #define CTL_KERN_NAMES { \ { 0, 0 }, \ @@ -270,6 +271,7 @@ struct ctlname { { consdev, CTLTYPE_STRUCT }, \ { netlivelocks, CTLTYPE_INT }, \ { pool_debug, CTLTYPE_INT }, \ + { schedstat, CTLTYPE_STRUCT }, \ } /* Index: sys/kern//kern_sched.c === RCS file: /cvs/src/sys/kern/kern_sched.c,v retrieving revision 1.24 diff -d -u -p -w -r1.24 kern_sched.c --- sys/kern//kern_sched.c12 Oct 2011 18:30:09 - 1.24 +++ sys/kern//kern_sched.c12 Nov 2011 14:41:59 - @@ -35,6 +35,8 @@ void sched_kthreads_create(void *); int sched_proc_to_cpu_cost(struct cpu_info *ci, struct proc *p); struct proc *sched_steal_proc(struct cpu_info *); +struct schedstat schedstat; + /* * To help choosing which cpu should run which process we keep track * of cpus which are currently idle and which cpus have processes @@ -301,14 +303,6 @@ again: return (p); } -uint64_t sched_nmigrations; -uint64_t sched_noidle; -uint64_t sched_stolen; - -uint64_t sched_choose; -uint64_t sched_wasidle; -uint64_t sched_nomigrations; - struct cpu_info * sched_choosecpu_fork(struct proc *parent, int flags) { @@ -374,7 +368,7 @@ sched_choosecpu(struct proc *p) if (p-p_flag P_CPUPEG) return (p-p_cpu); - sched_choose++; + schedstat.scs_choose++; /* * Look at all cpus that are currently idle and have nothing queued. @@ -393,7 +387,7 @@ sched_choosecpu(struct proc *p) if (cpuset_isset(set, p-p_cpu) || (p-p_cpu == curcpu() p-p_cpu-ci_schedstate.spc_nrun == 0 curproc == p)) { - sched_wasidle++; + schedstat.scs_wasidle++; return (p-p_cpu); } @@ -411,9 +405,9 @@ sched_choosecpu(struct proc
Re: dd(1) human-readable output
On 2011 Aug 23 (Tue) at 20:48:09 +0200 (+0200), Thomas Pfaff wrote: This patch makes dd(1) output change from e.g. $ dd if=/dev/sd0c of=/dev/null bs=512 count=16000 16000+0 records in 16000+0 records out 8192000 bytes transferred in 3.002 secs (2728488 bytes/sec) to $ obj/dd if=/dev/sd0c of=/dev/null bs=512 count=16000 16000+0 records in 16000+0 records out 8192000 bytes (7.8MB) transferred in 3.009 seconds (2.6MB/s) Any interest? Yes. But with a caveat. This is going to break atleast 3 scripts that get run periodically on all of my development machines, while I'm happy with adjusting them (this change would make them smaller actually) I'm unsure if we want to throw this into the wild, since this output behaviour is _old_. Now, a -h button or similar would solve that, but thats fugly. So, while I'm all for it, I think the greybeards need to weigh in on this :) So, assuming we are fine with breaking script compatability (that I personally hate), the diff looks OK to me. Index: Makefile === RCS file: /cvs/src/bin/dd/Makefile,v retrieving revision 1.5 diff -u -p -r1.5 Makefile --- Makefile 29 May 1998 04:34:20 - 1.5 +++ Makefile 23 Aug 2011 18:43:43 - @@ -2,5 +2,7 @@ PROG=dd SRCS=args.c conv.c conv_tab.c dd.c misc.c position.c +DPADD= ${LIBUTIL} +LDADD= -lutil .include bsd.prog.mk Index: misc.c === RCS file: /cvs/src/bin/dd/misc.c,v retrieving revision 1.16 diff -u -p -r1.16 misc.c --- misc.c27 Oct 2009 23:59:21 - 1.16 +++ misc.c23 Aug 2011 18:43:43 - @@ -45,6 +45,7 @@ #include errno.h #include time.h #include unistd.h +#include util.h #include dd.h #include extern.h @@ -57,6 +58,7 @@ summary(void) struct iovec iov[4]; double microsecs; int i = 0; + char sizebuf[FMT_SCALED_STRSIZE], ratebuf[FMT_SCALED_STRSIZE]; (void)gettimeofday(nowtv, (struct timezone *)NULL); timersub(nowtv, st.startv, nowtv); @@ -85,10 +87,19 @@ summary(void) iov[i].iov_base = buf[2]; iov[i++].iov_len = strlen(buf[2]); } + + strlcpy(sizebuf, ?, sizeof sizebuf); + fmt_scaled(st.bytes, sizebuf); + sizebuf[strcspn(sizebuf, B)] = '\0'; + + strlcpy(ratebuf, ?, sizeof ratebuf); + fmt_scaled(st.bytes * 100.0 / microsecs, ratebuf); + ratebuf[strcspn(ratebuf, B)] = '\0'; + (void)snprintf(buf[3], sizeof(buf[3]), - %qd bytes transferred in %ld.%03ld secs (%0.0f bytes/sec)\n, - (long long)st.bytes, nowtv.tv_sec, nowtv.tv_usec / 1000, - ((double)st.bytes * 100) / microsecs); + %qd bytes (%sB) transferred in %ld.%03ld seconds (%sB/s)\n, + (long long)st.bytes, sizebuf, nowtv.tv_sec, nowtv.tv_usec / 1000, + ratebuf); iov[i].iov_base = buf[3]; iov[i++].iov_len = strlen(buf[3]);
Re: kdump: resolve sysctl numbers
On 2011 Jul 27 (Wed) at 19:22:34 +0200 (+0200), Jasper Lievisse Adriaanse wrote: On Wed, Jul 27, 2011 at 10:58:22AM -0400, Ted Unangst wrote: On Wed, Jul 27, 2011, Otto Moerbeek wrote: +#define SETNAME(name) do { names = (name); limit = nitems(name); } while (0) userland is not supposed to use nitems I think? But it keeps sneaking in because the kernel headers don't protect it. That's right. It's used in some places like pcidump, npppd and tmux, but it's locally defined as: #ifndef nitems #define nitems(_a)(sizeof((_a)) / sizeof((_a)[0])) #endif What is the reason for this not being kosher yet, and if it's not ment to be, why isn't it protected by _KERNEL ?
Re: vnode(9) man page
On Mon, Jul 18, 2011 at 07:11:54AM +0059, Jason McIntyre wrote: On Sun, Jul 17, 2011 at 03:25:29PM +, Thordur Bjornsson wrote: The commit comment indicates to me that the underlying mechanism changed significantly, so now I wonder if the information that once was in vnode_if.src (regarding vnode locking disciplines) is irrelevant, obsolete or if it needs to be updated and moved to somewhere more appropriate? The diff below just removes the reference to the file which may not be the right thing to do. The locking discipline in that file is subtly wrong in some places, overtly wrong in others. I'm reworking it actually, and I hope to have a man page for it at some point. For now, the best bet if you are messing with this is to check to see what UFS/FFS does, as it as the best chance of being the most correct. can we have the short term fix for now then, so at least the man page is not wrong? Err, yes. I intended to commit the diff from Benny last night. It's in now. Commited.
Introducing rrw locks;
) - wakeup((void *)(lkp)); - break; - - case LK_DRAIN: - /* -* Check that we do not already hold the lock, as it can -* never drain if we do. Unfortunately, we have no way to -* check for holding a shared lock, but at least we can -* check for an exclusive one. -*/ - if (WEHOLDIT(lkp, pid, cpu_id)) - panic(lockmgr: draining against myself); - /* -* If we are just polling, check to see if we will sleep. -*/ - if ((extflags LK_NOWAIT) ((lkp-lk_flags -(LK_HAVE_EXCL | LK_WANT_EXCL)) || -lkp-lk_sharecount != 0 || lkp-lk_waitcount != 0)) { - error = EBUSY; - break; - } - ACQUIRE(lkp, error, extflags, 1, - ((lkp-lk_flags -(LK_HAVE_EXCL | LK_WANT_EXCL)) || -lkp-lk_sharecount != 0 || -lkp-lk_waitcount != 0)); - if (error) - break; - lkp-lk_flags |= LK_DRAINING | LK_HAVE_EXCL; - SETHOLDER(lkp, pid, cpu_id); - lkp-lk_exclusivecount = 1; - break; - - default: - panic(lockmgr: unknown locktype request %d, - flags LK_TYPE_MASK); - /* NOTREACHED */ - } - if ((lkp-lk_flags LK_WAITDRAIN) != 0 - ((lkp-lk_flags - (LK_HAVE_EXCL | LK_WANT_EXCL)) == 0 - lkp-lk_sharecount == 0 lkp-lk_waitcount == 0)) { - lkp-lk_flags = ~LK_WAITDRAIN; - wakeup((void *)lkp-lk_flags); + if (flags LK_RELEASE) { + rrw_exit(lkp-lk_lck); + return (0); } - return (error); -} -#ifdef DIAGNOSTIC -/* - * Print out information about state of a lock. Used by VOP_PRINT - * routines to display status about contained locks. - */ -void -lockmgr_printinfo(__volatile struct lock *lkp) -{ + if (flags LK_SHARED) + rwflags |= RW_READ; + if (flags (LK_EXCLUSIVE|LK_DRAIN)) + rwflags |= RW_WRITE; - if (lkp-lk_sharecount) - printf( lock type %s: SHARED (count %d), lkp-lk_wmesg, - lkp-lk_sharecount); - else if (lkp-lk_flags LK_HAVE_EXCL) { - printf( lock type %s: EXCL (count %d) by , - lkp-lk_wmesg, lkp-lk_exclusivecount); - printf(pid %d, lkp-lk_lockholder); - } else - printf( not locked); - if (lkp-lk_waitcount 0) - printf( with %d pending, lkp-lk_waitcount); + if (flags LK_RECURSEFAIL) + rwflags |= RW_RECURSEFAIL; + if (flags LK_NOWAIT) + rwflags |= RW_NOSLEEP; + + return (rrw_enter(lkp-lk_lck, rwflags)); } -#endif /* DIAGNOSTIC */ #if defined(MULTIPROCESSOR) /* @@ -343,7 +104,7 @@ lockmgr_printinfo(__volatile struct lock *lkp) * so that they show up in profiles. */ -struct __mp_lock kernel_lock; +struct __mp_lock kernel_lock; void _kernel_lock_init(void) @@ -385,10 +146,4 @@ _kernel_proc_unlock(struct proc *p) { __mp_unlock(kernel_lock); } - -#ifdef MP_LOCKDEBUG -/* CPU-dependent timing, needs this to be settable from ddb. */ -int __mp_lock_spinout = 2; -#endif - #endif /* MULTIPROCESSOR */ diff --git a/kern/kern_rwlock.c b/kern/kern_rwlock.c index d22ae3a..a85f8b1 100644 --- a/kern/kern_rwlock.c +++ b/kern/kern_rwlock.c @@ -2,27 +2,19 @@ /* * Copyright (c) 2002, 2003 Artur Grabowski a...@openbsd.org - * All rights reserved. + * Copyright (c) 2011 Thordur Bjornsson t...@openbsd.org * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. * - * 1. Redistributions of source code must retain the above copyright - *notice, this list of conditions and the following disclaimer. - * 2. The name of the author may not be used to endorse or promote products - *derived from this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, - * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY - * AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL - * THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; - * OR BUSINESS INTERRUPTION) HOWEVER
Re: Move uvm_pglist* to uvm_page.c
On Thu, Jun 23, 2011 at 07:04:48PM +0100, Owain Ainsworth wrote: How about this now? On Tue, May 31, 2011 at 12:05:04AM +0100, Owain Ainsworth wrote: These functions used to be big and complicated, now they are glorified wrappers around pmemrange and don't really need their own file. Discussed with ariane@ a while ago. ok? OK. diff --git conf/files conf/files index 02da860..017e5f9 100644 --- conf/files +++ conf/files @@ -1007,7 +1007,6 @@ file uvm/uvm_object.c file uvm/uvm_page.c file uvm/uvm_pager.c file uvm/uvm_pdaemon.c -file uvm/uvm_pglist.c file uvm/uvm_pmemrange.c file uvm/uvm_stat.c file uvm/uvm_swap.c diff --git uvm/uvm_page.c uvm/uvm_page.c index 10ef7d1..ed8e6d4 100644 --- uvm/uvm_page.c +++ uvm/uvm_page.c @@ -806,6 +806,81 @@ uvm_pagealloc_pg(struct vm_page *pg, struct uvm_object *obj, voff_t off, } /* + * uvm_pglistalloc: allocate a list of pages + * + * = allocated pages are placed at the tail of rlist. rlist is + *assumed to be properly initialized by caller. + * = returns 0 on success or errno on failure + * = doesn't take into account clean non-busy pages on inactive list + * that could be used(?) + * = params: + * sizethe size of the allocation, rounded to page size. + * low the low address of the allowed allocation range. + * highthe high address of the allowed allocation range. + * alignment memory must be aligned to this power-of-two boundary. + * boundaryno segment in the allocation may cross this + * power-of-two boundary (relative to zero). + * = flags: + * UVM_PLA_NOWAIT fail if allocation fails + * UVM_PLA_WAITOK wait for memory to become avail + * UVM_PLA_ZEROreturn zeroed memory + */ +int +uvm_pglistalloc(psize_t size, paddr_t low, paddr_t high, paddr_t alignment, +paddr_t boundary, struct pglist *rlist, int nsegs, int flags) +{ + UVMHIST_FUNC(uvm_pglistalloc); UVMHIST_CALLED(pghist); + + KASSERT((alignment (alignment - 1)) == 0); + KASSERT((boundary (boundary - 1)) == 0); + KASSERT(!(flags UVM_PLA_WAITOK) ^ !(flags UVM_PLA_NOWAIT)); + + if (size == 0) + return (EINVAL); + + if ((high PAGE_MASK) != PAGE_MASK) { + printf(uvm_pglistalloc: Upper boundary 0x%lx + not on pagemask.\n, (unsigned long)high); + } + + /* +* Our allocations are always page granularity, so our alignment +* must be, too. +*/ + if (alignment PAGE_SIZE) + alignment = PAGE_SIZE; + + low = atop(roundup(low, alignment)); + /* +* high + 1 may result in overflow, in which case high becomes 0x0, +* which is the 'don't care' value. +* The only requirement in that case is that low is also 0x0, or the +* lowhigh assert will fail. +*/ + high = atop(high + 1); + size = atop(round_page(size)); + alignment = atop(alignment); + if (boundary PAGE_SIZE boundary != 0) + boundary = PAGE_SIZE; + boundary = atop(boundary); + + return uvm_pmr_getpages(size, low, high, alignment, boundary, nsegs, + flags, rlist); +} + +/* + * uvm_pglistfree: free a list of pages + * + * = pages should already be unmapped + */ +void +uvm_pglistfree(struct pglist *list) +{ + UVMHIST_FUNC(uvm_pglistfree); UVMHIST_CALLED(pghist); + uvm_pmr_freepageq(list); +} + +/* * interface used by the buffer cache to allocate a buffer at a time. * The pages are allocated wired in DMA accessible memory */ diff --git uvm/uvm_pglist.c uvm/uvm_pglist.c deleted file mode 100644 index d29fb14..000 --- uvm/uvm_pglist.c +++ /dev/null @@ -1,136 +0,0 @@ -/* $OpenBSD$ */ -/* $NetBSD: uvm_pglist.c,v 1.13 2001/02/18 21:19:08 chs Exp $ */ - -/*- - * Copyright (c) 1997 The NetBSD Foundation, Inc. - * All rights reserved. - * - * This code is derived from software contributed to The NetBSD Foundation - * by Jason R. Thorpe of the Numerical Aerospace Simulation Facility, - * NASA Ames Research Center. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - *notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - *notice, this list of conditions and the following disclaimer in the - *documentation and/or other materials provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS - * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED - * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
Re: Future of ccd(4) and raid(4)?
On Fri, Jun 24, 2011 at 03:38:48PM +, Christian Weisgerber wrote: Matthew Dempsky matt...@dempsky.org wrote: What should be done about ccd(4) and raid(4)? They both seem superseded in functionality by softraid(4), which also has much more developer interest and active development. Is softraid ready at all? I thought it was experimental, under construction, incomplete, don't-use-unless-you-want-to-contribute code. I'm pretty sure it left that state some time ago, in all fairness I'd sooner trust softraid for my data then ccd/raidframe. softraid needs some of the bells and whistles raidframe has as Benny already pointed out, but I think ccd(4) ought to go the way of the Dodo.
Re: Identifying disks by name
On Wed, Jun 22, 2011 at 09:07:41PM +0200, Wouter Coene wrote: On 22 jun 2011, at 20:53, Kenneth R Westerback wrote: On Wed, Jun 22, 2011 at 06:48:14PM +0200, Wouter Coene wrote: The patch below builds on the disk UID code to implement disk names. Disk names must match [a-zA-Z0-9_]{1,10} and are stored encoded as 6 bits per character into the disklabel UID field. With this patch, you can use disk names in /etc/fstab: bootdisk.a / ffs rw 1 1 Why? This seems to add nothing but extra code and increase the chances some twit will name multiple disks 'MYDISK' and screw her/himself royally. Also, DUIDs identify a DISKLABEL and not a disk. Right now, nothing is stopping you from naming multiple disks 1234567890abcdef either. Twits will be twits, I'm just looking for something that's easier to type (and remember) than a hex-string. Also, this is certainly not useless if you have more than a handfull of disks or SAN volumes, or for removable media. Which of the following is more readable? mount 1234567890abcdef.a /mnt mount backups.a /mnt For this type of stuff, you are looking for hotplugd(8); And duids make writing hotplug scripts much easier. While I do see the allure of having nice names for disks, this is problematic (like ken pointed out) so I think I'll have to agree with ken for now that this is extra code for a very minimal gain. Anyways, always nice to see diffs! cheers, thib.
Re: Identifying disks by name
On Wed, Jun 22, 2011 at 09:34:55PM +0200, Janjaap van Velthooven wrote: On Wed, Jun 22, 2011 at 08:12:28PM +0100, Stuart Henderson wrote: On 2011/06/22 21:07, Wouter Coene wrote: Also, this is certainly not useless if you have more than a handfull of disks or SAN volumes, or for removable media. Which of the following is more readable? mount 1234567890abcdef.a /mnt mount backups.a /mnt mount bac1.a /mnt isn't too bad :-) Just a vague idea for the moment; How aboot some mechanism that can do number lookups by name for disks? ( just like is done for host protocols ports or users and groups and possibly more things.. ) for instance an /etc/disks with lines like: 1234567890abcdef backups bac1 Anyways, as I have no code for something like this at this moment I'll shut up for now on this. /etc/disks DUIDmyrootdisk /etc/fstab myrootdisk.a / ffs rw,softdep 1 1 And now I'm royally fucked.
Re: Identifying disks by name
On Wed, Jun 22, 2011 at 09:54:07PM +0200, Janjaap van Velthooven wrote: Thordur Bjornsson wrote and mailed: On Wed, Jun 22, 2011 at 09:34:55PM +0200, Janjaap van Velthooven wrote: Just a vague idea for the moment; How aboot some mechanism that can do number lookups by name for disks? ( just like is done for host protocols ports or users and groups and possibly more things.. ) for instance an /etc/disks with lines like: 1234567890abcdef backups bac1 Anyways, as I have no code for something like this at this moment I'll shut up for now on this. /etc/disks DUIDmyrootdisk /etc/fstab myrootdisk.a / ffs rw,softdep 1 1 And now I'm royally fucked. And I smoke to much dope (or not enough?).
Re: AVL tree
On Thu, May 19, 2011 at 07:52:44PM +0300, Michael Pounov wrote: Add AVL tree implementation and merge few RB tree related macros. If you have comments or any claims, please send me feedback and I will fix them. cool. but tech@ removes attachments, send your diffs inline. I'm assuming you implemented this as a macro a la RB/SPAY in tree.h; That being said, there is already an AVL tree implementation floating around, that's not macros. I've been beating on it (with some of the RB trees diffs we have in the kernel switched over) for some time, and hopefully it will be committable soon. I think I'm not alone when I say that usage of yet another macro tree is not welcome, at least not in the kernel. ciao! thib
Re: AVL tree
On Thu, May 19, 2011 at 07:21:21PM +0200, Mike Belopuhov wrote: On Thu, May 19, 2011 at 7:12 PM, Thordur Bjornsson t...@openbsd.org wrote: On Thu, May 19, 2011 at 07:52:44PM +0300, Michael Pounov wrote: Add AVL tree implementation and merge few RB tree related macros. If you have comments or any claims, please send me feedback and I will fix them. cool. but tech@ removes attachments, send your diffs inline. I'm assuming you implemented this as a macro a la RB/SPAY in tree.h; That being said, there is already an AVL tree implementation floating around, that's not macros. I've been beating on it (with some of the RB trees diffs we have in the kernel switched over) for some time, and hopefully it will be committable soon. what do you need it for? it's pretty much the same as r/b tree. do you think that lookup speed up is considerable? same questions apply to Michael. It's not the same as an r/b tree. The main reason for it is to cut down on the code bloat that the tree.h macros introduce. Also, my (limited though, have not done proper networking checks) show no performance difference.
Re: Filesystem Hierarchy Standard (FHS) and OpenBSD
On Mon, May 09, 2011 at 11:33:27PM -0400, Jeff Licquia wrote: (Sorry if this isn't the proper list for this discussion. If not, please point me in the right direction.) This is the proper list. Despite all the Linux in the names above, we're wanting to make sure that the FHS remains independent of any particular UNIX implementation, and continues to be useful to non-Linux UNIXes. Good, at least the Linux kids haven't totally forgotten the other grumpies out there :) My question to you is: do you consider the FHS to be relevant to current and future development of OpenBSD? If not, is this simply due to lack of maintenance; would your interest in the FHS be greater with more consistent updates? If you are interested, consider this an invitation to participate. We've set up a mailing list, Web site, etc., and are reviving the old bug tracker. More details can be found here: http://www.linuxfoundation.org/collaborate/workgroups/lsb/fhs There are numerous show stoppers, IMO. First off, the document is very Linux specific. Although I can't back up the claim, I'm pretty sure that other OSes wheren't given much thought in the early days of this document. Here are what I would call, show stoppers. And this applies to OpenBSD, as I view it. - OpenBSD has gone to great lengths to centralize all it's configuration into one place: /etc so anything contrarty to that, is a simple no go. - A number of the directories do not make sense on OpenBSD: /lib For what libraries ? /bin and /sbin contains binaries that are statically linked (for a very good reason) so this is pointless. /opt Add-on application packages go into /usr/local/ on OpenBSD and the rest of the *BSDs Here there is one difference between Open and Free that I've come to dislike, FreeBSD stuffs configuration files into /usr/local/etc /media Mount point for removable media, okey; I thought that was what /mnt was for, and /mnt is still in the HFS ? (OK, I can see the point, just to help Gnome users :) /srv This doesn't even have a good rationale in the HFS, what exectly is this supposed to be, I think every *BSD Admin expects to find data for or from services provided by the system inside /var So the above things do not make sense in the general case, and as for the rest of the document, you can easly state that OpenBSD is atleast partially compliant! Unfortunetly, i don't think the HFS is relevant to current or future developments of OpenBSD; Atleast not in it's current state. But I think the document is intresting, and maybe I'll butt in and offer some of my opinions :) Oh! And I almost forgot, we already have our very own HFS, it's in hier(7) :-) regards, thib.
Re: vnds considerd harmful.
On Wed, Apr 06, 2011 at 04:25:15PM -0400, Jonathan Thornburg wrote: In http://marc.info/?l=openbsd-techm=130200205608892w=1, Thordur Bjornsson thib () openbsd ! org wrote: Now that I've disallowed swapping to vnd's the purpose of vnd (vs svnd) is suspect, it serves no purpose other then providing a different way of doing what svnd does (which imo, isn't even better). So, nuke vnds (keep svnds though!). This will make svndXn the same as vndXn etc. The idea is that in a few releases we'll simply remove the svnd0 notes. ^ - nodes. With this diff svnd0 == vnd0 in your /dev. Could you clarify the semantics of the in a few releases plan? That is, are you proposing that the in a few releases OpenBSD will have (a) vnd == today's svnd, bingo. (b) vnd == today's vnd, (c) vnd == some sort of merging of today's vnd and today's svnd, or (d) something else which hasn't occured to me yet I sort of think you're proposing (a), but I'm not entirely sure that I'm parsing your wording correctly... [Hmm, I wonder if my failure-to-parse is related to a recent bout of perl hashes holding references to anonymous hashes holding references to anonymous lists. :) ] So, yeah. vnd's will become today's svnd0's and the old style bypassing of the buffer cache is gone (leaving only svnd0s). Then in a few releases, the svnd device nodes will be removed.
vnds considerd harmful.
Hi, Now that I've disallowed swapping to vnd's the purpose of vnd (vs svnd) is suspect, it serves no purpose other then providing a different way of doing what svnd does (which imo, isn't even better). So, nuke vnds (keep svnds though!). This will make svndXn the same as vndXn etc. The idea is that in a few releases we'll simply remove the svnd0 notes. comments/ok ? Index: dev/vnd.c === RCS file: /home/thib/cvs/src/sys/dev/vnd.c,v retrieving revision 1.108 diff -u -p -r1.108 vnd.c --- dev/vnd.c 2 Apr 2011 15:24:03 - 1.108 +++ dev/vnd.c 3 Apr 2011 18:29:52 - @@ -33,25 +33,11 @@ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. - * - * from: Utah $Hdr: vn.c 1.13 94/04/02$ - * - * @(#)vn.c8.6 (Berkeley) 4/1/94 */ /* - * Vnode disk driver. - * - * Block/character interface to a vnode. Allows one to treat a file - * as a disk (e.g. build a filesystem in it, mount it, etc.). - * - * NOTE 1: This uses either the VOP_BMAP/VOP_STRATEGY interface to the - * vnode or simple VOP_READ/VOP_WRITE. The former is suitable for swapping - * as it doesn't distort the local buffer cache. The latter is good for - * building disk images as it keeps the cache consistent after the block - * device is closed. + * There is a security issue involved with this driver. * - * NOTE 2: There is a security issue involved with this driver. * Once mounted all access to the contents of the mapped file via * the special file is controlled by the permissions on the special * file, the protection of the mapped file is ignored (effectively, @@ -102,12 +88,8 @@ int vnddebug = 0x00; * DISKUNIT(), but with the minor masked off. */ #definevndunit(x) DISKUNIT(makedev(major(x), minor(x) 0x7ff)) -#definevndsimple(x)(minor(x) 0x800) - -/* same as MAKEDISKDEV, preserving the vndsimple() property */ #defineVNDLABELDEV(dev)\ - makedev(major(dev), DISKMINOR(vndunit(dev), RAW_PART) | \ - (vndsimple(dev) ? 0x800 : 0)) + makedev(major(dev), DISKMINOR(vndunit(dev), RAW_PART)) struct vndbuf { struct buf vb_buf; @@ -145,7 +127,6 @@ struct vnd_softc { #defineVNF_LABELLING 0x0100 #defineVNF_WLABEL 0x0200 #defineVNF_HAVELABEL 0x0400 -#defineVNF_SIMPLE 0x1000 #defineVNF_READONLY0x2000 #defineVNDRW(v)((v)-sc_flags VNF_READONLY ? FREAD : FREAD|FWRITE) @@ -157,7 +138,6 @@ int numvnd = 0; void vndattach(int); void vndclear(struct vnd_softc *); -void vndstart(struct vnd_softc *, struct buf *); intvndsetcred(struct vnd_softc *, struct ucred *); void vndiodone(struct buf *); void vndshutdown(void); @@ -232,12 +212,6 @@ vndopen(dev_t dev, int flags, int mode, if ((error = vndlock(sc)) != 0) return (error); - if (!vndsimple(dev) sc-sc_vp != NULL - (sc-sc_vp-v_type != VREG || sc-sc_keyctx != NULL)) { - error = EINVAL; - goto bad; - } - if ((flags FWRITE) (sc-sc_flags VNF_READONLY)) { error = EROFS; goto bad; @@ -252,20 +226,11 @@ vndopen(dev_t dev, int flags, int mode, part = DISKPART(dev); pmask = 1 part; - /* -* If any partition is open, all succeeding openings must be of the -* same type or read-only. -*/ - if (sc-sc_dk.dk_openmask) { - if (((sc-sc_flags VNF_SIMPLE) != 0) != - (vndsimple(dev) != 0) (flags FWRITE)) { - error = EBUSY; - goto bad; - } - } else if (vndsimple(dev)) - sc-sc_flags |= VNF_SIMPLE; - else - sc-sc_flags = ~VNF_SIMPLE; + /* XXX: OK ?*/ + if (sc-sc_dk.dk_openmask (flags FWRITE)) { + error = EBUSY; + goto bad; + } /* Check that the partition exists. */ if (part != RAW_PART @@ -360,30 +325,13 @@ vndclose(dev_t dev, int flags, int mode, return (0); } -/* - * Two methods are used, the traditional buffercache bypassing and the - * newer, cache-coherent on unmount, one. - * - * Former method: - * Break the request into bsize pieces and submit using VOP_BMAP/VOP_STRATEGY. - * Note that this driver can only be used for swapping over NFS on the hp - * since nfs_strategy on the vax cannot handle u-areas and page tables. - * - * Latter method: - * Repack the buffer into an uio structure and use VOP_READ/VOP_WRITE to - * access the underlying file. - */ void vndstrategy(struct buf *bp) { int unit = vndunit(bp-b_dev); struct vnd_softc *vnd = vnd_softc[unit]; - struct vndbuf *nbp; - int bsize; off_t bn; - caddr_t addr; -
merge vfs_conf.c and vfs_init.c
no need to have two tiny files around. stuff everything into vfs_init, it belongs there (along with other stuff, that will get moved soonish). OK ? Index: conf/files === RCS file: /home/thib/cvs/src/sys/conf/files,v retrieving revision 1.511 diff -u -p -r1.511 files --- conf/files 5 Apr 2011 18:51:25 - 1.511 +++ conf/files 5 Apr 2011 19:43:16 - @@ -759,7 +759,6 @@ file kern/vfs_bio.c file kern/vfs_biomem.c file kern/vfs_cache.c file kern/vfs_cluster.c -file kern/vfs_conf.c file kern/vfs_default.c file kern/vfs_init.c file kern/vfs_lockf.c Index: kern/vfs_conf.c === RCS file: kern/vfs_conf.c diff -N kern/vfs_conf.c --- kern/vfs_conf.c 5 Apr 2011 18:51:25 - 1.41 +++ /dev/null 1 Jan 1970 00:00:00 - @@ -1,179 +0,0 @@ -/* $OpenBSD: vfs_conf.c,v 1.41 2011/04/05 18:51:25 thib Exp $ */ -/* $NetBSD: vfs_conf.c,v 1.21.4.1 1995/11/01 00:06:26 jtc Exp $*/ - -/* - * Copyright (c) 1989, 1993 - * The Regents of the University of California. All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - *notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - *notice, this list of conditions and the following disclaimer in the - *documentation and/or other materials provided with the distribution. - * 3. Neither the name of the University nor the names of its contributors - *may be used to endorse or promote products derived from this software - *without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - * - * @(#)vfs_conf.c 8.8 (Berkeley) 3/31/94 - */ - -#include sys/param.h -#include sys/mount.h -#include sys/vnode.h -#include sys/timeout.h - -#ifdef FFS -#include ufs/ufs/quota.h -#include ufs/ufs/inode.h -#include ufs/ffs/ffs_extern.h -#endif - -#ifdef EXT2FS -#include ufs/ext2fs/ext2fs_extern.h -#endif - -#ifdef CD9660 -#include isofs/cd9660/iso.h -#include isofs/cd9660/cd9660_extern.h -#endif - -#ifdef MFS -#include ufs/mfs/mfs_extern.h -#endif - -#ifdef NFSCLIENT -#include nfs/rpcv2.h -#include nfs/nfsproto.h -#include nfs/nfsnode.h -#include nfs/nfs.h -#include nfs/nfsmount.h -#endif - -/* - * This defines the root filesystem. - */ -struct vnode *rootvnode; - -/* - * Set up the filesystem operations for vnodes. - * The types are defined in mount.h. - */ - - -#ifdef FFS -extern const struct vfsops ffs_vfsops; -#endif - -#ifdef MFS -extern const struct vfsops mfs_vfsops; -#endif - -#ifdef MSDOSFS -extern const struct vfsops msdosfs_vfsops; -#endif - -#ifdef NFSCLIENT -extern const struct vfsops nfs_vfsops; -#endif - -#ifdef PROCFS -extern const struct vfsops procfs_vfsops; -#endif - -#ifdef CD9660 -extern const struct vfsops cd9660_vfsops; -#endif - -#ifdef EXT2FS -extern const struct vfsops ext2fs_vfsops; -#endif - -#ifdef NNPFS -extern const struct vfsops nnpfs_vfsops; -#endif - -#ifdef NTFS -extern const struct vfsops ntfs_vfsops; -#endif - -#ifdef UDF -extern const struct vfsops udf_vfsops; -#endif - -/* - * Set up the filesystem operations for vnodes. - */ -static struct vfsconf vfsconflist[] = { - -/* Fast Filesystem */ -#ifdef FFS -{ ffs_vfsops, MOUNT_FFS, 1, 0, MNT_LOCAL, NULL }, -#endif - -/* Memory-based Filesystem */ -#ifdef MFS -{ mfs_vfsops, MOUNT_MFS, 3, 0, MNT_LOCAL, NULL }, -#endif - -#ifdef EXT2FS - { ext2fs_vfsops, MOUNT_EXT2FS, 17, 0, MNT_LOCAL, NULL }, -#endif -/* ISO9660 (aka CDROM) Filesystem */ -#ifdef CD9660 -{ cd9660_vfsops, MOUNT_CD9660, 14, 0, MNT_LOCAL, NULL }, -#endif - -/* MSDOS Filesystem */ -#ifdef MSDOSFS -{ msdosfs_vfsops, MOUNT_MSDOS, 4, 0, MNT_LOCAL, NULL }, -#endif - -/* Sun-compatible Network Filesystem */ -#ifdef NFSCLIENT -{ nfs_vfsops, MOUNT_NFS, 2, 0, 0, NULL }, -#endif - - /* NNPFS */ -#ifdef NNPFS - { nnpfs_vfsops, MOUNT_NNPFS, 21,
no swapping to vnds
Hi, 1) Swapping to svnds has issues (pagedaemon deadlocks) and has been broken since forever. 2) Swapping to vnds makes no sense, why add another layer when you can just swap to a regular file instead ? so stop supporting swapping to vnds. If this turns out to be kosher I have a diff tested that removes vnds in favour of svnds. OK ? Index: uvm/uvm_swap.c === RCS file: /home/thib/cvs/src/sys/uvm/uvm_swap.c,v retrieving revision 1.100 diff -u -p -r1.100 uvm_swap.c --- uvm/uvm_swap.c 21 Dec 2010 20:14:44 - 1.100 +++ uvm/uvm_swap.c 4 Apr 2011 09:14:59 - @@ -912,6 +912,10 @@ swap_on(struct proc *p, struct swapdev * vp = sdp-swd_vp; dev = sdp-swd_dev; + /* no swapping to vnds. */ + if (bdevsw[major(dev)].d_strategy == vndstrategy) + return (EOPNOTSUPP); + /* * open the swap file (mostly useful for block device files to * let device driver know what is up).
Re: no swapping to vnds
On Mon, Apr 04, 2011 at 12:34:17PM +0200, Otto Moerbeek wrote: On Mon, Apr 04, 2011 at 09:22:41AM +, Thordur Bjornsson wrote: Hi, 1) Swapping to svnds has issues (pagedaemon deadlocks) and has been broken since forever. 2) Swapping to vnds makes no sense, why add another layer when you can just swap to a regular file instead ? so stop supporting swapping to vnds. If this turns out to be kosher I have a diff tested that removes vnds in favour of svnds. I don't know if this is the right check, but the is redundant to get the address of a function. It's the easiest check. It's hard to map a dev_t to a device since it is MD, so checking for that function is the best way I could come up with. And doh on the ''. I'll commit with out it. -Otto OK ? Index: uvm/uvm_swap.c === RCS file: /home/thib/cvs/src/sys/uvm/uvm_swap.c,v retrieving revision 1.100 diff -u -p -r1.100 uvm_swap.c --- uvm/uvm_swap.c 21 Dec 2010 20:14:44 - 1.100 +++ uvm/uvm_swap.c 4 Apr 2011 09:14:59 - @@ -912,6 +912,10 @@ swap_on(struct proc *p, struct swapdev * vp = sdp-swd_vp; dev = sdp-swd_dev; + /* no swapping to vnds. */ + if (bdevsw[major(dev)].d_strategy == vndstrategy) + return (EOPNOTSUPP); + /* * open the swap file (mostly useful for block device files to * let device driver know what is up).
Re: pool_debug is good, but also bad
On Sun, Apr 03, 2011 at 06:38:51PM -0600, Theo de Raadt wrote: based on a conversation at the bar. POOL_DEBUG is expensive. But we really want it because it finds bugs before they hurt us. The solution to this is to make it simpler to turn off. This diff starts the kernel with pool debug on, but allows it to be turned off with sysctl kern.pool_debug=0. This does not gaurantee that all the pool pages will be unchecked, but it does help. This will let people who care about performance turn it off permanently in sysctl.conf; I think we will add a line there for people to know how to use it. I like this. Means I can turn it off and on easly when I'm testing diffs on my workstations. Index: kern/subr_pool.c === RCS file: /cvs/src/sys/kern/subr_pool.c,v retrieving revision 1.100 diff -u -r1.100 subr_pool.c --- kern/subr_pool.c 3 Apr 2011 22:07:37 - 1.100 +++ kern/subr_pool.c 3 Apr 2011 22:59:39 - @@ -42,7 +42,7 @@ #include sys/sysctl.h #include uvm/uvm.h - +#include dev/rndvar.h /* * Pool resource management utility. @@ -74,6 +74,7 @@ caddr_t ph_page;/* this page's address */ caddr_t ph_colored; /* page's colored address */ int ph_pagesize; + int ph_magic; }; struct pool_item { @@ -89,6 +90,7 @@ #else #define PI_MAGIC 0xdeafbeef #endif +int pool_debug = 1; #define POOL_NEEDS_CATCHUP(pp) \ ((pp)-pr_nitems (pp)-pr_minitems) @@ -441,7 +443,8 @@ else ph = pool_get(phpool, (flags ~(PR_WAITOK | PR_ZERO)) | PR_NOWAIT); - + if (pool_debug) + ph-ph_magic = PI_MAGIC; return (ph); } @@ -611,13 +614,15 @@ page %p; item addr %p; offset 0x%x=0x%x, pp-pr_wchan, ph-ph_page, pi, 0, pi-pi_magic); #ifdef POOL_DEBUG - for (ip = (int *)pi, i = sizeof(*pi) / sizeof(int); - i pp-pr_size / sizeof(int); i++) { - if (ip[i] != PI_MAGIC) { - panic(pool_do_get(%s): free list modified: - page %p; item addr %p; offset 0x%x=0x%x, - pp-pr_wchan, ph-ph_page, pi, - i * sizeof(int), ip[i]); + if (pool_debug ph-ph_magic) { + for (ip = (int *)pi, i = sizeof(*pi) / sizeof(int); + i pp-pr_size / sizeof(int); i++) { + if (ip[i] != ph-ph_magic) { + panic(pool_do_get(%s): free list modified: + page %p; item addr %p; offset 0x%x=0x%x, + pp-pr_wchan, ph-ph_page, pi, + i * sizeof(int), ip[i]); + } } } #endif /* POOL_DEBUG */ @@ -731,9 +736,11 @@ #ifdef DIAGNOSTIC pi-pi_magic = PI_MAGIC; #ifdef POOL_DEBUG - for (ip = (int *)pi, i = sizeof(*pi)/sizeof(int); - i pp-pr_size / sizeof(int); i++) - ip[i] = PI_MAGIC; + if (ph-ph_magic) { + for (ip = (int *)pi, i = sizeof(*pi)/sizeof(int); + i pp-pr_size / sizeof(int); i++) + ip[i] = ph-ph_magic; + } #endif /* POOL_DEBUG */ #endif /* DIAGNOSTIC */ @@ -886,9 +893,11 @@ #ifdef DIAGNOSTIC pi-pi_magic = PI_MAGIC; #ifdef POOL_DEBUG - for (ip = (int *)pi, i = sizeof(*pi)/sizeof(int); - i pp-pr_size / sizeof(int); i++) - ip[i] = PI_MAGIC; + if (ph-ph_magic) { + for (ip = (int *)pi, i = sizeof(*pi)/sizeof(int); + i pp-pr_size / sizeof(int); i++) + ip[i] = ph-ph_magic; + } #endif /* POOL_DEBUG */ #endif /* DIAGNOSTIC */ cp = (caddr_t)(cp + pp-pr_size); @@ -1273,14 +1282,16 @@ 0, pi-pi_magic); } #ifdef POOL_DEBUG - for (ip = (int *)pi, i = sizeof(*pi) / sizeof(int); - i pp-pr_size / sizeof(int); i++) { - if (ip[i] != PI_MAGIC) { - printf(pool(%s): free list modified: - page %p; item ordinal %d; addr %p - (p %p); offset 0x%x=0x%x\n, - pp-pr_wchan, ph-ph_page, n, pi, - page, i * sizeof(int), ip[i]); + if (pool_debug ph-ph_magic) { + for (ip = (int *)pi, i = sizeof(*pi) / sizeof(int); + i pp-pr_size / sizeof(int); i++) { + if (ip[i] != ph-ph_magic) { + printf(pool(%s): free
tweak for mount.h
o Collapse a few _KERNEL's and move some definitions to allow this. o Expose a few NFSMOUNT stuffs (one I will remove, the other I'm going to use later anyways). o Kill a protection for a forward decleration o Kill __STDC__ protection (which is under _KERNEL). OK ? Index: sys/mount.h === RCS file: /cvs/src/sys/sys/mount.h,v retrieving revision 1.101 diff -u -p -r1.101 mount.h --- sys/mount.h 4 Apr 2011 12:50:58 - 1.101 +++ sys/mount.h 4 Apr 2011 12:59:20 - @@ -156,9 +156,7 @@ struct nfs_args3 { /* * NFS mount option flags */ -#ifndef _KERNEL #defineNFSMNT_RESVPORT 0x /* always use reserved ports */ -#endif /* ! _KERNEL */ #defineNFSMNT_SOFT 0x0001 /* soft mount (hard is default) */ #defineNFSMNT_WSIZE0x0002 /* set write size */ #defineNFSMNT_RSIZE0x0004 /* set read size */ @@ -174,9 +172,7 @@ struct nfs_args3 { #defineNFSMNT_LEASETERM0x1000 /* set lease term (nqnfs) */ #defineNFSMNT_READAHEAD0x2000 /* set read ahead */ #defineNFSMNT_DEADTHRESH 0x4000 /* set dead server retry thresh */ -#ifdef _KERNEL /* Coming soon to a system call near you! */ #defineNFSMNT_NOAC 0x8000 /* disable attribute cache */ -#endif /* _KERNEL */ #defineNFSMNT_RDIRPLUS 0x0001 /* Use Readdirplus for V3 */ #defineNFSMNT_READDIRSIZE 0x0002 /* Set readdir size */ @@ -430,6 +426,24 @@ struct mount { #define MNT_DOOMED 0x0800 /* device behind filesystem is gone */ /* + * Flags for various system call interfaces. + * + * waitfor flags to vfs_sync() and getfsstat() + */ +#define MNT_WAIT 1 /* synchronously wait for I/O to complete */ +#define MNT_NOWAIT 2 /* start all I/O, but do not wait for it */ +#define MNT_LAZY 3 /* push data not written by filesystem syncer */ + +/* + * Generic file handle + */ +struct fhandle { + fsid_t fh_fsid;/* File system id of mount point */ + struct fid fh_fid; /* File sys specific id */ +}; +typedef struct fhandle fhandle_t; + +/* * Sysctl CTL_VFS definitions. * * Second level identifier specifies which filesystem. Second level @@ -492,16 +506,12 @@ extern long buflowpages, bufhighpages, b extern int bufcachepercent; extern void bufadjust(int); extern int bufbackoff(void); -#endif /* * Operations supported on mounted file system. */ -#ifdef _KERNEL -#ifdef __STDC__ struct nameidata; struct mbuf; -#endif extern int maxvfsconf; /* highest defined filesystem type */ extern struct vfsconf *vfsconf;/* head of list of filesystem types */ @@ -547,27 +557,8 @@ struct vfsops { #defineVFS_VPTOFH(VP, FIDP) (*(VP)-v_mount-mnt_op-vfs_vptofh)(VP, FIDP) #define VFS_CHECKEXP(MP, NAM, EXFLG, CRED) \ (*(MP)-mnt_op-vfs_checkexp)(MP, NAM, EXFLG, CRED) -#endif /* _KERNEL */ -/* - * Flags for various system call interfaces. - * - * waitfor flags to vfs_sync() and getfsstat() - */ -#define MNT_WAIT 1 /* synchronously wait for I/O to complete */ -#define MNT_NOWAIT 2 /* start all I/O, but do not wait for it */ -#define MNT_LAZY 3 /* push data not written by filesystem syncer */ -/* - * Generic file handle - */ -struct fhandle { - fsid_t fh_fsid;/* File system id of mount point */ - struct fid fh_fid; /* File sys specific id */ -}; -typedef struct fhandle fhandle_t; - -#ifdef _KERNEL #include net/radix.h #include sys/socket.h/* XXX for AF_MAX */ @@ -587,9 +578,7 @@ struct netexport { struct netcred ne_defexported; /* Default export */ struct radix_node_head *ne_rtable[AF_MAX+1]; /* Individual exports */ }; -#endif /* _KERNEL */ -#ifdef _KERNEL /* * exported vnode operations */ @@ -626,10 +615,7 @@ void vfsinit(void); intvfs_register(struct vfsconf *); intvfs_unregister(struct vfsconf *); #else /* _KERNEL */ - -#ifndef _SYS_STAT_H_ struct stat; -#endif __BEGIN_DECLS intfstatfs(int, struct statfs *);
Re: netinet6 bread crumbs
On Sat, Apr 02, 2011 at 02:13:45PM +0200, Stefan Sperling wrote: Feel free to ack or reject these individually. Kill redundant offsetof definitions. All of these files include sys/param.h. Those look fine. Index: in6.c === RCS file: /cvs/src/sys/netinet6/in6.c,v retrieving revision 1.89 diff -u -p -r1.89 in6.c --- in6.c 7 Oct 2010 22:07:06 - 1.89 +++ in6.c 2 Apr 2011 12:02:04 - @@ -910,14 +910,7 @@ in6_update_ifa(struct ifnet *ifp, struct */ if (ia == NULL) { hostIsNew = 1; - /* - * When in6_update_ifa() is called in a process of a received - * RA, it is called under an interrupt context. So, we should - * call malloc with M_NOWAIT. - */ - ia = malloc(sizeof(*ia), M_IFADDR, M_NOWAIT | M_ZERO); - if (ia == NULL) - return (ENOBUFS); + ia = malloc(sizeof(*ia), M_IFADDR, M_WAITOK | M_ZERO); This is a little bit suspect. But people who know the call path should look at this. typos look ok, but mah spjellingk is not vry good. (:
remove bufqs from vnds
Hi, So, it doesn't make sense to have a bufq for vnds. The disk that stores the image backing the vnd has it's own bufq ofcourse and what happens is that vnd puts a buf on it's bufq, which is promptly removed when we call vndstart, followed by a call to strategy so the buf ends up almost immediately on the bufq on the underlaying disk. Tested on vnd/svnd (and with the image on NFS. vnd is broken on nfs!). OK? Index: vnd.c === RCS file: /home/thib/cvs/src/sys/dev/vnd.c,v retrieving revision 1.107 diff -u -p -r1.107 vnd.c --- vnd.c 15 Feb 2011 20:02:11 - 1.107 +++ vnd.c 2 Apr 2011 11:34:38 - @@ -127,8 +127,6 @@ struct vnd_softc { struct disk sc_dk; char sc_dk_name[16]; - struct bufq sc_bufq; - char sc_file[VNDNLEN]; /* file we're covering */ int sc_flags; /* flags */ size_t sc_size; /* size of vnd in sectors */ @@ -159,7 +157,7 @@ int numvnd = 0; void vndattach(int); void vndclear(struct vnd_softc *); -void vndstart(struct vnd_softc *); +void vndstart(struct vnd_softc *, struct buf *); intvndsetcred(struct vnd_softc *, struct ucred *); void vndiodone(struct buf *); void vndshutdown(void); @@ -445,64 +443,50 @@ vndstrategy(struct buf *bp) /* No bypassing of buffer cache? */ if (vndsimple(bp-b_dev)) { - /* Loop until all queued requests are handled. */ - for (;;) { - int part = DISKPART(bp-b_dev); - daddr64_t off = DL_SECTOBLK(vnd-sc_dk.dk_label, - DL_GETPOFFSET(vnd-sc_dk.dk_label-d_partitions[part])); - aiov.iov_base = bp-b_data; - auio.uio_resid = aiov.iov_len = bp-b_bcount; - auio.uio_iov = aiov; - auio.uio_iovcnt = 1; - auio.uio_offset = dbtob((off_t)(bp-b_blkno + off)); - auio.uio_segflg = UIO_SYSSPACE; - auio.uio_procp = p; - - vn_lock(vnd-sc_vp, LK_EXCLUSIVE | LK_RETRY, p); - if (bp-b_flags B_READ) { - auio.uio_rw = UIO_READ; - bp-b_error = VOP_READ(vnd-sc_vp, auio, 0, - vnd-sc_cred); - if (vnd-sc_keyctx) - vndencrypt(vnd, bp-b_data, - bp-b_bcount, bp-b_blkno, 0); - } else { - if (vnd-sc_keyctx) - vndencrypt(vnd, bp-b_data, - bp-b_bcount, bp-b_blkno, 1); - auio.uio_rw = UIO_WRITE; - /* -* Upper layer has already checked I/O for -* limits, so there is no need to do it again. -*/ - bp-b_error = VOP_WRITE(vnd-sc_vp, auio, - IO_NOLIMIT, vnd-sc_cred); - /* Data in buffer cache needs to be in clear */ - if (vnd-sc_keyctx) - vndencrypt(vnd, bp-b_data, - bp-b_bcount, bp-b_blkno, 0); - } - VOP_UNLOCK(vnd-sc_vp, 0, p); - if (bp-b_error) - bp-b_flags |= B_ERROR; - bp-b_resid = auio.uio_resid; - s = splbio(); - biodone(bp); - splx(s); - - /* If nothing more is queued, we are done. */ - if (!bufq_peek(vnd-sc_bufq)) - return; - + int part = DISKPART(bp-b_dev); + daddr64_t off = DL_SECTOBLK(vnd-sc_dk.dk_label, + DL_GETPOFFSET(vnd-sc_dk.dk_label-d_partitions[part])); + aiov.iov_base = bp-b_data; + auio.uio_resid = aiov.iov_len = bp-b_bcount; + auio.uio_iov = aiov; + auio.uio_iovcnt = 1; + auio.uio_offset = dbtob((off_t)(bp-b_blkno + off)); + auio.uio_segflg = UIO_SYSSPACE; + auio.uio_procp = p; + + vn_lock(vnd-sc_vp, LK_EXCLUSIVE | LK_RETRY, p); + if (bp-b_flags B_READ) { + auio.uio_rw = UIO_READ; + bp-b_error = VOP_READ(vnd-sc_vp, auio, 0, + vnd-sc_cred); + if (vnd-sc_keyctx) + vndencrypt(vnd, bp-b_data, +
Re: atascsi dma_alloc() - make atascsi play nicer with bigmem
On Sat, Apr 02, 2011 at 09:15:37AM -0400, Kenneth R Westerback wrote: Another driver malloc'ing and passing potentially dma unsafe memory to do i/o into. ok? yub Ken Index: atascsi.c === RCS file: /cvs/src/sys/dev/ata/atascsi.c,v retrieving revision 1.101 diff -u -p -r1.101 atascsi.c --- atascsi.c 3 Feb 2011 21:22:19 - 1.101 +++ atascsi.c 2 Apr 2011 13:03:58 - @@ -26,6 +26,7 @@ #include sys/device.h #include sys/proc.h #include sys/queue.h +#include sys/pool.h #include scsi/scsi_all.h #include scsi/scsi_disk.h @@ -335,8 +336,8 @@ atascsi_probe(struct scsi_link *link) xa = scsi_io_get(ahp-ahp_iopool, SCSI_NOSLEEP); if (xa == NULL) panic(no free xfers on a new port); - /* XXX dma reachable */ - identify = malloc(sizeof(*identify), M_TEMP, M_WAITOK); + identify = dma_alloc(sizeof(*identify), + PR_WAITOK | PR_ZERO); xa-pmp_port = ap-ap_pmp_port; xa-data = identify; xa-datalen = sizeof(*identify); @@ -353,10 +354,10 @@ atascsi_probe(struct scsi_link *link) if (rv == 0) { bcopy(identify, ap-ap_identify, sizeof(ap-ap_identify)); - free(identify, M_TEMP); + dma_free(identify, sizeof(*identify)); break; } - free(identify, M_TEMP); + dma_free(identify, sizeof(*identify)); delay(500); } while (count--);
Re: Incorrect exit status from which(1)/whereis(1)
On Tue, Feb 15, 2011 at 05:30:11PM +, Jason McIntyre wrote: On Mon, Feb 14, 2011 at 11:31:18AM +, David Julio wrote: Is the exit status of which(1)/whereis(1) correct? $ which a b c which: a: Command not found which: b: Command not found which: c: Command not found $ echo $? 2 $ which -a a b c which: a: Command not found which: b: Command not found which: c: Command not found $ echo $? 1 If it is incorrect, below is my attempt to contribute. this command is not covered by posix, so there's no reference there. neither free nor netbsd document exit status for this command either. i have no access to such systems, but maybe someone who does can tell us how other bsd behave? SunOS 5.10 doesn't document the exit status, doesn't have a -a switch. Simple check show that there exit status is 1 if noone are found, 0 if all/some are found. Linux behaves the same way, but has an exit code of 2 if there where invalid arguments passed. DragonflyBSD (And FreeBSD) behave the same, but return 1 if there where invalid arguments. No access to a NetBSD box, and there man page doesn't document the exit status. if it's a doc bug, it can be fixed easy enough. that would seem strange behaviour though. if it's a software bug, any developer want to look at this? What's correct there, I've no idea. At the very least we should be consistent with our own man page so, the diff might be the right solution. kv, thib Index: which.c === RCS file: /cvs/src/usr.bin/which/which.c,v retrieving revision 1.16 diff -u -r1.16 which.c --- which.c 31 May 2010 14:01:49 - 1.16 +++ which.c 14 Feb 2011 11:02:10 - @@ -55,11 +55,7 @@ (void)setlocale(LC_ALL, ); - if (argc == 1) - usage(); - - /* Don't accept command args but check since old whereis(1) used to */ - while ((ch = getopt(argc, argv, a)) != -1) { + while ((ch = getopt(argc, argv, a)) != -1) switch (ch) { case 'a': allmatches = 1; @@ -67,7 +63,11 @@ default: usage(); } - } + argc -= optind; + argv += optind; + + if (argc == 0) + usage(); /* * which(1) uses user's $PATH. @@ -98,11 +98,11 @@ if (setuid(geteuid())) err(1, Can't set uid to %u, geteuid()); - for (n = optind; n argc; n++) + for (n = 0; n argc; n++) if (findprog(argv[n], path, progmode, allmatches) == 0) notfound++; - exit((notfound == 0) ? 0 : ((notfound == argc - 1) ? 2 : 1)); + exit((notfound == 0) ? 0 : ((notfound == argc) ? 2 : 1)); } int
Re: softraid clarification in manpage
On Thu, Jan 27, 2011 at 02:35:54PM -0500, Nick Guenther wrote: On Thu, Jan 27, 2011 at 9:39 AM, Jason McIntyre j...@kerhand.co.uk wrote: On Wed, Jan 26, 2011 at 04:24:07PM -0600, Amit Kulkarni wrote: Hi, I just configured a mirror using softraid, the manpage was extremely helpful. I just copy pasted the relevant commands. To a complete newbie, it was missing a few more lines. I just added those and sending the diff. I hope a variation of this is accepted to make the how to create mirror using softraid is complete. Googling brings a lot of RAIDFrame etc, there is very little out there on a pure data mirror. Thanks, amit Index: softraid.4 === RCS file: /cvs/src/share/man/man4/softraid.4,v retrieving revision 1.27 diff softraid.4 147a148,159 .Pp To use the freshly created mirror .Bd -literal -offset indent # mkdir /datamirror # mount /dev/sd0a /datamirror # chown normal_openbsd_user /datamirror .Ed .Pp Adding the following line to /etc/fstab will make it useful after every boot .Bd -literal -offset indent /dev/sd0a /datamirror ffs rw,softdep,nodev,nosuid 1 2 .Pp (as an aside, please send unified diffs (-u) in future) there is a line near the end of EXAMPLES: The RAID volume is now ready to be used as a normal disk device. for me, the instructions you've added come under the umbrella of using the raid partition as a normal disk partition. so i'd argue that it's outside the remit of this page. maybe other developers think otherwise though. As someone who has long been a newbie, this sort of trivial pointer can be a life-saver. If you think like a programmer and see your OS as a program this sort of thing comes naturally, but lots of people don't. IMO, this kind of details are most suitable for a FAQ entry. Maybe section 14.13 of the FAQ could use some love ?
cut vnd's over to bufqs, again.
hi, so cut vnds over to bufqs. this diff is similar to a diff that was commited, but got backed out after one of the hackathon fiasco's, with a small difference. there is no reason to keep an active count, bufq_peek is enough to figure out if the queue is empty or not. in vndiodone, there is no need to jump through hoops to figure out if we need to disk_unbusy(). We always need to there is a one-to-one against disk_busy() in vndstart, as we set the biodone callback to null so we don't end up there twice. OK? ciao, thib. Index: dev/vnd.c === RCS file: /usr/cvs/src/sys/dev/vnd.c,v retrieving revision 1.104 diff -u -p -r1.104 vnd.c --- dev/vnd.c 22 Dec 2010 13:12:14 - 1.104 +++ dev/vnd.c 28 Dec 2010 11:54:44 - @@ -1,4 +1,4 @@ -/* $OpenBSD: vnd.c,v 1.104 2010/12/22 13:12:14 jsing Exp $ */ +/* $OpenBSD: vnd.c,v 1.92 2009/06/04 05:57:27 krw Exp $*/ /* $NetBSD: vnd.c,v 1.26 1996/03/30 23:06:11 christos Exp $*/ /* @@ -127,6 +127,8 @@ struct vnd_softc { struct disk sc_dk; char sc_dk_name[16]; + struct bufq sc_bufq; + char sc_file[VNDNLEN]; /* file we're covering */ int sc_flags; /* flags */ size_t sc_size; /* size of vnd in sectors */ @@ -135,7 +137,6 @@ struct vnd_softc { size_t sc_ntracks;/* # of tracks per cylinder */ struct vnode*sc_vp; /* vnode */ struct ucred*sc_cred; /* credentials */ - struct buf sc_tab;/* transfer queue */ blf_ctx *sc_keyctx; /* key context */ struct rwlocksc_rwlock; }; @@ -209,6 +210,7 @@ vndattach(int num) vnd_softc = (struct vnd_softc *)mem; for (i = 0; i num; i++) { rw_init(vnd_softc[i].sc_rwlock, vndlock); + bufq_init(vnd_softc[i].sc_bufq, BUFQ_DEFAULT); } numvnd = num; @@ -489,8 +491,8 @@ vndstrategy(struct buf *bp) biodone(bp); splx(s); - /* If nothing more is queued, we are done. */ - if (!vnd-sc_tab.b_active) + /* If nothing more is queued, we are done. */ + if (!bufq_peek(vnd-sc_bufq)) return; /* @@ -498,9 +500,8 @@ vndstrategy(struct buf *bp) * routine might queue using same links. */ s = splbio(); - bp = vnd-sc_tab.b_actf; - vnd-sc_tab.b_actf = bp-b_actf; - vnd-sc_tab.b_active--; + bp = bufq_dequeue(vnd-sc_bufq); + KASSERT(bp != NULL); splx(s); } } @@ -596,13 +597,9 @@ vndstrategy(struct buf *bp) splx(s); return; } - /* -* Just sort by block number -*/ - nbp-vb_buf.b_cylinder = nbp-vb_buf.b_blkno; + + bufq_queue(vnd-sc_bufq, nbp-vb_buf); s = splbio(); - disksort(vnd-sc_tab, nbp-vb_buf); - vnd-sc_tab.b_active++; vndstart(vnd); splx(s); bn += sz; @@ -625,8 +622,9 @@ vndstart(struct vnd_softc *vnd) * Dequeue now since lower level strategy routine might * queue using same links */ - bp = vnd-sc_tab.b_actf; - vnd-sc_tab.b_actf = bp-b_actf; + bp = bufq_dequeue(vnd-sc_bufq); + if (bp == NULL) + return; DNPRINTF(VDB_IO, vndstart(%d): bp %p vp %p blkno %lld addr %p cnt %lx\n, @@ -675,13 +673,8 @@ vndiodone(struct buf *bp) out: putvndbuf(vbp); - - if (vnd-sc_tab.b_active) { - disk_unbusy(vnd-sc_dk, (pbp-b_bcount - pbp-b_resid), - (pbp-b_flags B_READ)); - if (!vnd-sc_tab.b_actf) - vnd-sc_tab.b_active--; - } + disk_unbusy(vnd-sc_dk, (pbp-b_bcount - pbp-b_resid), + (pbp-b_flags B_READ)); } /* ARGSUSED */
Re: yield in long kernel loops
On Wed, Oct 13, 2010 at 08:08:34PM -0400, Ted Unangst wrote: So it's not a good idea to perform long lasting operations in the kernel. The scheduler doesn't deal well with it and nobody else gets to run. One of those long loops is loading a large table into pf. If you're lucky, you'll run out of memory and pool will finally sleep. I stuck a couple yield() calls into the long loops after sufficient iteration. I also zapped PFR_FLAG_ATOMIC because it's not really atomic anyway. I also couldn't find any callers. Leftover? Mixing two things in the same diff, like this isn't helpful. Specially since the zapping of this flag is 80% of this diff or so. Another thing to fix at some point is that we call splsoftnet and splx multiple times per address in some cases, but fixing that was getting too complicated and requires some more code shuffling. Different thing. + if (++n % 1000 == 0) + yield(); While I see the point. This just screams HACK!. Not sure if this helps. Might be better to do this to something thats way eaiser to instrument, maybe something similar to what the guy on misc@ was hitting with /dev/urandom. my two cents (note, they are icelandic cents and so not really worth anything! ;)
Re: Slow I/O usb sticks
On Wed, Oct 06, 2010 at 04:06:47PM -0300, Gonzalo L. R. wrote: Hi guys, I have a slow I/O in usb sticks with big files, I use -current with the last weekend cvs code. If I cp a big file in the usb stick take several minutes (~10 minutes of 175M in a msdos stick, 2 minutes in a ffs stick), the usb stick have msdos fs, this not happend with a ffs usb stick. I have the same issue in my dell vostro 1510. Our MSDOS-FS code blows chunks, thats why. Since this isn't a bug report, not much else I can say. You can play around with profiling and read the code, it's in sys/msdosfs and it sure can use some lovin. Have fun! Regards OpenBSD 4.8-current (GENERIC.MP) #12: Sat Oct 2 15:49:43 ART 2010 r...@r0nin.sepp0.com.ar:/usr/src/sys/arch/i386/compile/GENERIC.MP cpu0: Intel(R) Core(TM)2 Duo CPU T7300 @ 2.00GHz (GenuineIntel 686-class) 2 GHz cpu0: FPU,V86,DE,PSE,TSC,MSR,PAE,MCE,CX8,APIC,SEP,MTRR,PGE,MCA,CMOV,PAT,PSE36,CFLUSH,DS,ACPI,MMX,FXSR,SSE,SSE2,SS,HTT,TM,SBF,SSE3,MWAIT,DS-CPL,VMX,EST,TM2,SSSE3,CX16,xTPR,PDCM real mem = 1046761472 (998MB) avail mem = 1019613184 (972MB) mainbus0 at root bios0 at mainbus0: AT/286+ BIOS, date 04/18/08, BIOS32 rev. 0 @ 0xfdc80, SMBIOS rev. 2.4 @ 0xe0010 (63 entries) bios0: vendor LENOVO version 7NETB2WW (2.12 ) date 04/18/2008 bios0: LENOVO 767474Y acpi0 at bios0: rev 2 acpi0: sleep states S0 S3 S4 S5 acpi0: tables DSDT FACP SSDT ECDT TCPA APIC MCFG HPET SLIC BOOT ASF! SSDT SSDT SSDT SSDT acpi0: wakeup devices LID_(S3) SLPB(S3) DURT(S3) IGBE(S4) EXP0(S4) EXP1(S4) EXP2(S4) EXP3(S4) EXP4(S4) PCI1(S4) USB0(S3) USB1(S3) USB2(S3) USB3(S3) USB4(S3) EHC0(S3) EHC1(S3) HDEF(S4) acpitimer0 at acpi0: 3579545 Hz, 24 bits acpiec0 at acpi0 acpimadt0 at acpi0 addr 0xfee0: PC-AT compat cpu0 at mainbus0: apid 0 (boot processor) cpu0: apic clock running at 199MHz cpu1 at mainbus0: apid 1 (application processor) cpu1: Intel(R) Core(TM)2 Duo CPU T7300 @ 2.00GHz (GenuineIntel 686-class) 2 GHz cpu1: FPU,V86,DE,PSE,TSC,MSR,PAE,MCE,CX8,APIC,SEP,MTRR,PGE,MCA,CMOV,PAT,PSE36,CFLUSH,DS,ACPI,MMX,FXSR,SSE,SSE2,SS,HTT,TM,SBF,SSE3,MWAIT,DS-CPL,VMX,EST,TM2,SSSE3,CX16,xTPR,PDCM ioapic0 at mainbus0: apid 1 pa 0xfec0, version 20, 24 pins ioapic0: misconfigured as apic 2, remapped to apid 1 acpihpet0 at acpi0: 14318179 Hz acpiprt0 at acpi0: bus 0 (PCI0) acpiprt1 at acpi0: bus -1 (AGP_) acpiprt2 at acpi0: bus 2 (EXP0) acpiprt3 at acpi0: bus 3 (EXP1) acpiprt4 at acpi0: bus -1 (EXP2) acpiprt5 at acpi0: bus -1 (EXP3) acpiprt6 at acpi0: bus -1 (EXP4) acpiprt7 at acpi0: bus 5 (PCI1) acpicpu0 at acpi0: C3, C2, C1, PSS acpicpu1 at acpi0: C3, C2, C1, PSS acpipwrres0 at acpi0: PUBS acpitz0 at acpi0: critical temperature 127 degC acpitz1 at acpi0: critical temperature 99 degC acpibtn0 at acpi0: LID_ acpibtn1 at acpi0: SLPB acpibat0 at acpi0: BAT0 model 42T4568 serial 6706 type LION oem SONY acpibat1 at acpi0: BAT1 not present acpibat2 at acpi0: BAT2 not present acpiac0 at acpi0: AC unit online acpithinkpad0 at acpi0 acpidock0 at acpi0: GDCK not docked (0) bios0: ROM list: 0xc/0x1! 0xe/0x1! cpu0: Enhanced SpeedStep 1996 MHz: speeds: 2001, 2000, 1600, 1200, 800 MHz pci0 at mainbus0 bus 0: configuration mode 1 (bios) pchb0 at pci0 dev 0 function 0 Intel GM965 Host rev 0x0c vga1 at pci0 dev 2 function 0 Intel GM965 Video rev 0x0c wsdisplay0 at vga1 mux 1: console (80x25, vt100 emulation) wsdisplay0: screen 1-5 added (80x25, vt100 emulation) intagp0 at vga1 agp0 at intagp0: aperture at 0xe000, size 0x1000 inteldrm0 at vga1: apic 1 int 16 (irq 10) drm0 at inteldrm0 Intel GM965 Video rev 0x0c at pci0 dev 2 function 1 not configured em0 at pci0 dev 25 function 0 Intel ICH8 IGP M AMT rev 0x03: apic 1 int 20 (irq 11), address 00:1d:72:92:6c:8f uhci0 at pci0 dev 26 function 0 Intel 82801H USB rev 0x03: apic 1 int 20 (irq 11) uhci1 at pci0 dev 26 function 1 Intel 82801H USB rev 0x03: apic 1 int 21 (irq 11) ehci0 at pci0 dev 26 function 7 Intel 82801H USB rev 0x03: apic 1 int 22 (irq 11) usb0 at ehci0: USB revision 2.0 uhub0 at usb0 Intel EHCI root hub rev 2.00/1.00 addr 1 azalia0 at pci0 dev 27 function 0 Intel 82801H HD Audio rev 0x03: apic 1 int 17 (irq 11) azalia0: codecs: Analog Devices AD1984, Conexant/0x2bfa, using Analog Devices AD1984 audio0 at azalia0 ppb0 at pci0 dev 28 function 0 Intel 82801H PCIE rev 0x03: apic 1 int 20 (irq 11) pci1 at ppb0 bus 2 ppb1 at pci0 dev 28 function 1 Intel 82801H PCIE rev 0x03: apic 1 int 21 (irq 11) pci2 at ppb1 bus 3 iwn0 at pci2 dev 0 function 0 Intel Wireless WiFi Link 4965 rev 0x61: apic 1 int 17 (irq 11), MIMO 2T3R, MoW1, address 00:1f:3b:a0:f8:55 uhci2 at pci0 dev 29 function 0 Intel 82801H USB rev 0x03: apic 1 int 16 (irq 10) uhci3 at pci0 dev 29 function 1 Intel 82801H USB rev 0x03: apic 1 int 17 (irq 11) ehci1 at pci0 dev 29 function 7 Intel 82801H USB rev 0x03: apic 1 int 19 (irq 11) usb1 at ehci1: USB revision 2.0
Re: more assertwaitok() love
On Thu, Sep 30, 2010 at 12:29:54AM +, Thordur Bjornsson wrote: Hi. Try to catch more places where we sleep and are not allowed. One thing of note, msleep() is missing in this diff, but there it is needed to call to sleep_setup routines with the mutex held, and after we release it we _will_ sleep so a sleep there with another mutex held will be caught by the assertwaitok() in mi_switch(). Also, define assertwaitok() out for !DIAGNOSTIC kernels. Noone wants to OK/comment on this besides matthew@ ? Comments/OKs? Index: kern/kern_rwlock.c === RCS file: /home/cvs/src/sys/kern/kern_rwlock.c,v retrieving revision 1.16 diff -u -p -r1.16 kern_rwlock.c --- kern/kern_rwlock.c24 Sep 2010 13:21:30 - 1.16 +++ kern/kern_rwlock.c30 Sep 2010 00:12:12 - @@ -87,6 +87,8 @@ rw_enter_read(struct rwlock *rwl) { unsigned long owner = rwl-rwl_owner; + assertwaitok(); + if (__predict_false((owner RWLOCK_WRLOCK) || rw_cas(rwl-rwl_owner, owner, owner + RWLOCK_READ_INCR))) rw_enter(rwl, RW_READ); @@ -97,6 +99,8 @@ rw_enter_write(struct rwlock *rwl) { struct proc *p = curproc; + assertwaitok(); + if (__predict_false(rw_cas(rwl-rwl_owner, 0, RW_PROC(p) | RWLOCK_WRLOCK))) rw_enter(rwl, RW_WRITE); @@ -190,6 +194,9 @@ rw_enter(struct rwlock *rwl, int flags) struct sleep_state sls; unsigned long inc, o; int error; + + if (!(flags RW_NOSLEEP)) + assertwaitok(); op = rw_ops[flags RW_OPMASK]; Index: kern/kern_synch.c === RCS file: /home/cvs/src/sys/kern/kern_synch.c,v retrieving revision 1.95 diff -u -p -r1.95 kern_synch.c --- kern/kern_synch.c 29 Jun 2010 00:28:14 - 1.95 +++ kern/kern_synch.c 29 Sep 2010 21:55:58 - @@ -121,6 +121,8 @@ tsleep(const volatile void *ident, int p return (0); } + assertwaitok(); + sleep_setup(sls, ident, priority, wmesg); sleep_setup_timeout(sls, timo); sleep_setup_signal(sls, priority); Index: kern/subr_pool.c === RCS file: /home/cvs/src/sys/kern/subr_pool.c,v retrieving revision 1.98 diff -u -p -r1.98 subr_pool.c --- kern/subr_pool.c 26 Sep 2010 21:03:57 - 1.98 +++ kern/subr_pool.c 30 Sep 2010 00:03:15 - @@ -455,10 +455,8 @@ pool_get(struct pool *pp, int flags) KASSERT(flags (PR_WAITOK | PR_NOWAIT)); -#ifdef DIAGNOSTIC if ((flags PR_WAITOK) != 0) assertwaitok(); -#endif /* DIAGNOSTIC */ mtx_enter(pp-pr_mtx); v = pool_do_get(pp, flags); Index: kern/subr_xxx.c === RCS file: /home/cvs/src/sys/kern/subr_xxx.c,v retrieving revision 1.12 diff -u -p -r1.12 subr_xxx.c --- kern/subr_xxx.c 28 Sep 2010 20:27:56 - 1.12 +++ kern/subr_xxx.c 29 Sep 2010 21:55:03 - @@ -156,13 +156,15 @@ blktochr(dev_t dev) /* * Check that we're in a context where it's okay to sleep. */ + +#ifdef DIAGNOSTIC void assertwaitok(void) { splassert(IPL_NONE); -#ifdef DIAGNOSTIC + if (curcpu()-ci_mutex_level != 0) panic(assertwaitok: non-zero mutex count: %d, curcpu()-ci_mutex_level); -#endif } +#endif Index: sys/systm.h === RCS file: /home/cvs/src/sys/sys/systm.h,v retrieving revision 1.86 diff -u -p -r1.86 systm.h --- sys/systm.h 21 Sep 2010 01:09:10 - 1.86 +++ sys/systm.h 30 Sep 2010 00:02:51 - @@ -179,7 +179,11 @@ void ttyprintf(struct tty *, const char void splassert_fail(int, int, const char *); extern int splassert_ctl; +#ifdef DIAGNOSTIC void assertwaitok(void); +#else +#define assertwaitok() do { /* nothing */ } while (0) +#endif void tablefull(const char *);
more assertwaitok() love
Hi. Try to catch more places where we sleep and are not allowed. One thing of note, msleep() is missing in this diff, but there it is needed to call to sleep_setup routines with the mutex held, and after we release it we _will_ sleep so a sleep there with another mutex held will be caught by the assertwaitok() in mi_switch(). Also, define assertwaitok() out for !DIAGNOSTIC kernels. Comments/OKs? Index: kern/kern_rwlock.c === RCS file: /home/cvs/src/sys/kern/kern_rwlock.c,v retrieving revision 1.16 diff -u -p -r1.16 kern_rwlock.c --- kern/kern_rwlock.c 24 Sep 2010 13:21:30 - 1.16 +++ kern/kern_rwlock.c 30 Sep 2010 00:12:12 - @@ -87,6 +87,8 @@ rw_enter_read(struct rwlock *rwl) { unsigned long owner = rwl-rwl_owner; + assertwaitok(); + if (__predict_false((owner RWLOCK_WRLOCK) || rw_cas(rwl-rwl_owner, owner, owner + RWLOCK_READ_INCR))) rw_enter(rwl, RW_READ); @@ -97,6 +99,8 @@ rw_enter_write(struct rwlock *rwl) { struct proc *p = curproc; + assertwaitok(); + if (__predict_false(rw_cas(rwl-rwl_owner, 0, RW_PROC(p) | RWLOCK_WRLOCK))) rw_enter(rwl, RW_WRITE); @@ -190,6 +194,9 @@ rw_enter(struct rwlock *rwl, int flags) struct sleep_state sls; unsigned long inc, o; int error; + + if (!(flags RW_NOSLEEP)) + assertwaitok(); op = rw_ops[flags RW_OPMASK]; Index: kern/kern_synch.c === RCS file: /home/cvs/src/sys/kern/kern_synch.c,v retrieving revision 1.95 diff -u -p -r1.95 kern_synch.c --- kern/kern_synch.c 29 Jun 2010 00:28:14 - 1.95 +++ kern/kern_synch.c 29 Sep 2010 21:55:58 - @@ -121,6 +121,8 @@ tsleep(const volatile void *ident, int p return (0); } + assertwaitok(); + sleep_setup(sls, ident, priority, wmesg); sleep_setup_timeout(sls, timo); sleep_setup_signal(sls, priority); Index: kern/subr_pool.c === RCS file: /home/cvs/src/sys/kern/subr_pool.c,v retrieving revision 1.98 diff -u -p -r1.98 subr_pool.c --- kern/subr_pool.c26 Sep 2010 21:03:57 - 1.98 +++ kern/subr_pool.c30 Sep 2010 00:03:15 - @@ -455,10 +455,8 @@ pool_get(struct pool *pp, int flags) KASSERT(flags (PR_WAITOK | PR_NOWAIT)); -#ifdef DIAGNOSTIC if ((flags PR_WAITOK) != 0) assertwaitok(); -#endif /* DIAGNOSTIC */ mtx_enter(pp-pr_mtx); v = pool_do_get(pp, flags); Index: kern/subr_xxx.c === RCS file: /home/cvs/src/sys/kern/subr_xxx.c,v retrieving revision 1.12 diff -u -p -r1.12 subr_xxx.c --- kern/subr_xxx.c 28 Sep 2010 20:27:56 - 1.12 +++ kern/subr_xxx.c 29 Sep 2010 21:55:03 - @@ -156,13 +156,15 @@ blktochr(dev_t dev) /* * Check that we're in a context where it's okay to sleep. */ + +#ifdef DIAGNOSTIC void assertwaitok(void) { splassert(IPL_NONE); -#ifdef DIAGNOSTIC + if (curcpu()-ci_mutex_level != 0) panic(assertwaitok: non-zero mutex count: %d, curcpu()-ci_mutex_level); -#endif } +#endif Index: sys/systm.h === RCS file: /home/cvs/src/sys/sys/systm.h,v retrieving revision 1.86 diff -u -p -r1.86 systm.h --- sys/systm.h 21 Sep 2010 01:09:10 - 1.86 +++ sys/systm.h 30 Sep 2010 00:02:51 - @@ -179,7 +179,11 @@ void ttyprintf(struct tty *, const char void splassert_fail(int, int, const char *); extern int splassert_ctl; +#ifdef DIAGNOSTIC void assertwaitok(void); +#else +#defineassertwaitok() do { /* nothing */ } while (0) +#endif void tablefull(const char *);
de-static uvm_swap
Hi, hitting some panics coming through the swap code and it is a bit annoying having them not show up in the ddb trace. OK ? Index: uvm/uvm_swap.c === RCS file: /home/cvs/src/sys/uvm/uvm_swap.c,v retrieving revision 1.97 diff -u -p -r1.97 uvm_swap.c --- uvm/uvm_swap.c 10 Sep 2010 16:34:09 - 1.97 +++ uvm/uvm_swap.c 24 Sep 2010 19:53:22 - @@ -218,11 +218,11 @@ struct pool vndbuf_pool; /* * local variables */ -static struct extent *swapmap; /* controls the mapping of /dev/drum */ +struct extent *swapmap;/* controls the mapping of /dev/drum */ /* list of all active swap devices [by priority] */ LIST_HEAD(swap_priority, swappri); -static struct swap_priority swap_priority; +struct swap_priority swap_priority; /* locks */ struct rwlock swap_syscall_lock = RWLOCK_INITIALIZER(swplk); @@ -230,25 +230,25 @@ struct rwlock swap_syscall_lock = RWLOCK /* * prototypes */ -static void swapdrum_add(struct swapdev *, int); -static struct swapdev *swapdrum_getsdp(int); +voidswapdrum_add(struct swapdev *, int); +struct swapdev *swapdrum_getsdp(int); -static struct swapdev *swaplist_find(struct vnode *, int); -static void swaplist_insert(struct swapdev *, -struct swappri *, int); -static void swaplist_trim(void); +struct swapdev *swaplist_find(struct vnode *, int); +voidswaplist_insert(struct swapdev *, +struct swappri *, int); +voidswaplist_trim(void); -static int swap_on(struct proc *, struct swapdev *); -static int swap_off(struct proc *, struct swapdev *); +int swap_on(struct proc *, struct swapdev *); +int swap_off(struct proc *, struct swapdev *); -static void sw_reg_strategy(struct swapdev *, struct buf *, int); +void sw_reg_strategy(struct swapdev *, struct buf *, int); void sw_reg_iodone(struct buf *); void sw_reg_iodone_internal(void *, void *); -static void sw_reg_start(struct swapdev *); +void sw_reg_start(struct swapdev *); -static int uvm_swap_io(struct vm_page **, int, int, int); +int uvm_swap_io(struct vm_page **, int, int, int); -static void swapmount(void); +void swapmount(void); boolean_t uvm_swap_allocpages(struct vm_page **, int); #ifdef UVM_SWAP_ENCRYPT @@ -481,7 +481,7 @@ uvm_swap_finicrypt_all(void) * FREE it if we don't need it... this it to prevent malloc blocking * here while adding swap) */ -static void +void swaplist_insert(struct swapdev *sdp, struct swappri *newspp, int priority) { struct swappri *spp, *pspp; @@ -533,7 +533,7 @@ swaplist_insert(struct swapdev *sdp, str * = caller must hold both swap_syscall_lock and uvm.swap_data_lock * = we return the swapdev we found (and removed) */ -static struct swapdev * +struct swapdev * swaplist_find(struct vnode *vp, boolean_t remove) { struct swapdev *sdp; @@ -566,7 +566,7 @@ swaplist_find(struct vnode *vp, boolean_ * * = caller must hold both swap_syscall_lock and uvm.swap_data_lock */ -static void +void swaplist_trim(void) { struct swappri *spp, *nextspp; @@ -587,7 +587,7 @@ swaplist_trim(void) * = caller must hold swap_syscall_lock * = uvm.swap_data_lock should be unlocked (we may sleep) */ -static void +void swapdrum_add(struct swapdev *sdp, int npages) { u_long result; @@ -607,7 +607,7 @@ swapdrum_add(struct swapdev *sdp, int np * = each swapdev takes one big contig chunk of the drum * = caller must hold uvm.swap_data_lock */ -static struct swapdev * +struct swapdev * swapdrum_getsdp(int pgno) { struct swapdev *sdp; @@ -895,7 +895,7 @@ out: * = caller should leave uvm.swap_data_lock unlocked, we may lock it * if needed. */ -static int +int swap_on(struct proc *p, struct swapdev *sdp) { static int count = 0; /* static */ @@ -1094,7 +1094,7 @@ bad: * * = swap data should be locked, we will unlock. */ -static int +int swap_off(struct proc *p, struct swapdev *sdp) { int error = 0; @@ -1243,7 +1243,7 @@ swstrategy(struct buf *bp) /* * sw_reg_strategy: handle swap i/o to regular files */ -static void +void sw_reg_strategy(struct swapdev *sdp, struct buf *bp, int bn) { struct vnode*vp; @@ -1416,7 +1416,7 @@ out: /* Arrive here at splbio */ } /* sw_reg_start: start an I/O request on the requested swapdev. */ -static void +void sw_reg_start(struct swapdev *sdp) { struct buf *bp; @@ -1762,7 +1762,7 @@ uvm_swap_get(struct vm_page *page, int s * uvm_swap_io: do an i/o operation to swap */ -static int +int uvm_swap_io(struct vm_page **pps, int startslot, int npages, int flags) { daddr64_t startblk; @@ -2086,7 +2086,7 @@ uvm_swap_io(struct vm_page **pps, int st return (result); } -static void +void swapmount(void) { struct swapdev *sdp;
Re: de-static uvm_swap
and I'd like to kill these to: Index: uvm_pdaemon.c === RCS file: /cvs/src/sys/uvm/uvm_pdaemon.c,v retrieving revision 1.55 diff -u -p -r1.55 uvm_pdaemon.c --- uvm_pdaemon.c 14 Oct 2009 17:53:30 - 1.55 +++ uvm_pdaemon.c 24 Sep 2010 22:31:47 - @@ -96,9 +96,9 @@ * local prototypes */ -static voiduvmpd_scan(void); -static boolean_t uvmpd_scan_inactive(struct pglist *); -static voiduvmpd_tune(void); +void uvmpd_scan(void); +boolean_t uvmpd_scan_inactive(struct pglist *); +void uvmpd_tune(void); /* * uvm_wait: wait (sleep) for the page daemon to free some pages @@ -155,7 +155,7 @@ uvm_wait(const char *wmsg) * = caller must call with page queues locked */ -static void +void uvmpd_tune(void) { UVMHIST_FUNC(uvmpd_tune); UVMHIST_CALLED(pdhist); @@ -329,7 +329,7 @@ uvm_aiodone_daemon(void *arg) * = we return TRUE if we are exiting because we met our target */ -static boolean_t +boolean_t uvmpd_scan_inactive(struct pglist *pglst) { boolean_t retval = FALSE; /* assume we haven't hit target */
Re: fstab.5: FSTAB_RQ
On Thu, Sep 23, 2010 at 06:36:43PM +0059, Jason McIntyre wrote: is there a reason why we don;t document FSTAB_RQ? Not one that I can think of. If this works as intended go ahead (it should). jmc Index: fstab.5 === RCS file: /cvs/src/share/man/man5/fstab.5,v retrieving revision 1.42 diff -u -r1.42 fstab.5 --- fstab.5 8 Jun 2009 17:03:15 - 1.42 +++ fstab.5 23 Sep 2010 17:36:53 - @@ -183,7 +183,8 @@ If .Fa fs_type is -.Dq rw +.Dq rw , +.Dq rq , or .Dq ro then the filesystem whose name is given in the @@ -243,7 +244,8 @@ .Xr fsck 8 will assume that the filesystem does not need to be checked. .Bd -literal -#define FSTAB_RWrw/* read-write device */ +#define FSTAB_RWrw/* read/write device */ +#define FSTAB_RQrq/* read/write with quotas * #define FSTAB_ROro/* read-only device */ #define FSTAB_SWsw/* swap device */ #define FSTAB_XXxx/* ignore totally */ @@ -253,7 +255,7 @@ char*fs_file; /* filesystem path prefix */ char*fs_vfstype;/* type of filesystem */ char*fs_mntops; /* comma separated mount options */ - char*fs_type; /* rw, ro, sw, or xx */ + char*fs_type; /* rw, rq, ro, sw, or xx */ int fs_freq;/* dump frequency, in days */ int fs_passno; /* pass number on parallel fsck */ };
Re: Source Overview
And if you value your sanity, stay out of anything resembling filesystems. This is a lie. Hacking on filesystems, and the VFS layer in general is a very rewarding experince, just ask Bob. NFS for example, has been a source of joy for OpenBSD developers for years! 2) Is there something like an openbsd janitors project where newbies can start contributing small patches? similar to the Linux janitors project? Not at all. The philosophy behind not having one is that it's considered dangerous to farm out work to the inexperienced (and this exact topic has been brought up before, usually by people whining that we didn't make them feel special enough by not having one). Also it leads to people doing KNF style diffs, just to do KNF style diffs. Noone learns anything. Most KNF style diffs you see coming from developers is due to them having to read some code, and they cleaned up a little while doing so. While KNF is great, doing KNF just for the sake of doing KNF is hardly ever worth it IMHO.