Re: vfs: drop proc from VOP_OPEN

2023-07-17 Thread Thordur Bjornsson
Note, this opens up pending items for cleanup, from my notes:
- fuse_file_open (into fb_setup)
 - nfs_vinvalbuf (and vinvalbuf itself).

I'm skeptical of touching the latter until I have (again) some facsimile
of the setup that lives in theo's basement :)

On Sat, Jul 15, 2023 at 02:20:36PM +0200, Thordur Bjornsson wrote:
> First of a few.
> 
> Many thanks to mpi@ for adding the curproc assert's!
> 
> Two questions:
> - NOCRED: Best this should just be NULL ?
> - FSCRED: This is only AFAICT used for mounts
>   Not 100% clear, but this could perhaps be done away with at the loss
>   of some metadata (it just forces spec_open to open up given secure
>   levels) in ufs. beck@, thoughts ? (i want to say the S word, but not
>   sure if it is strictly true).
> 
> diff --git sys/dev/softraid.c sys/dev/softraid.c
> index decea16cb24..2319b85b946 100644
> --- sys/dev/softraid.c
> +++ sys/dev/softraid.c
> @@ -333,7 +333,7 @@ sr_meta_probe(struct sr_discipline *sd, dev_t *dt, int 
> no_chunk)
>* XXX leaving dev open for now; move this to attach
>* and figure out the open/close dance for unwind.
>*/
> - error = VOP_OPEN(vn, FREAD | FWRITE, NOCRED, curproc);
> + error = VOP_OPEN(vn, FREAD | FWRITE, NOCRED);
>   if (error) {
>   DNPRINTF(SR_D_META,"%s: sr_meta_probe can't "
>   "open %s\n", DEVNAME(sc), devname);
> @@ -1037,7 +1037,7 @@ sr_meta_native_bootprobe(struct sr_softc *sc, dev_t 
> devno,
>   }
>  
>   /* open device */
> - error = VOP_OPEN(vn, FREAD, NOCRED, curproc);
> + error = VOP_OPEN(vn, FREAD, NOCRED);
>   if (error) {
>   DNPRINTF(SR_D_META, "%s: sr_meta_native_bootprobe open "
>   "failed\n", DEVNAME(sc));
> @@ -1093,7 +1093,7 @@ sr_meta_native_bootprobe(struct sr_softc *sc, dev_t 
> devno,
>   "allocate vnode for partition");
>   goto done;
>   }
> - error = VOP_OPEN(vn, FREAD, NOCRED, curproc);
> + error = VOP_OPEN(vn, FREAD, NOCRED);
>   if (error) {
>   DNPRINTF(SR_D_META, "%s: sr_meta_native_bootprobe "
>   "open failed, partition %d\n",
> @@ -2833,7 +2833,7 @@ sr_hotspare(struct sr_softc *sc, dev_t dev)
>   sr_error(sc, "sr_hotspare: cannot allocate vnode");
>   goto done;
>   }
> - if (VOP_OPEN(vn, FREAD | FWRITE, NOCRED, curproc)) {
> + if (VOP_OPEN(vn, FREAD | FWRITE, NOCRED)) {
>   DNPRINTF(SR_D_META,"%s: sr_hotspare cannot open %s\n",
>   DEVNAME(sc), devname);
>   vput(vn);
> @@ -3147,7 +3147,7 @@ sr_rebuild_init(struct sr_discipline *sd, dev_t dev, 
> int hotspare)
>   DEVNAME(sc));
>   goto done;
>   }
> - if (VOP_OPEN(vn, FREAD | FWRITE, NOCRED, curproc)) {
> + if (VOP_OPEN(vn, FREAD | FWRITE, NOCRED)) {
>   DNPRINTF(SR_D_META,"%s: sr_ioctl_setstate can't "
>   "open %s\n", DEVNAME(sc), devname);
>   vput(vn);
> diff --git sys/dev/softraid_crypto.c sys/dev/softraid_crypto.c
> index fbe8358e5dd..354c6560180 100644
> --- sys/dev/softraid_crypto.c
> +++ sys/dev/softraid_crypto.c
> @@ -665,7 +665,7 @@ sr_crypto_create_key_disk(struct sr_discipline *sd,
>   sr_error(sc, "cannot open key disk %s", devname);
>   goto done;
>   }
> - if (VOP_OPEN(vn, FREAD | FWRITE, NOCRED, curproc)) {
> + if (VOP_OPEN(vn, FREAD | FWRITE, NOCRED)) {
>   DNPRINTF(SR_D_META,"%s: sr_crypto_create_key_disk cannot "
>   "open %s\n", DEVNAME(sc), devname);
>   vput(vn);
> @@ -829,7 +829,7 @@ sr_crypto_read_key_disk(struct sr_discipline *sd, struct 
> sr_crypto *mdd_crypto,
>   sr_error(sc, "cannot open key disk %s", devname);
>   goto done;
>   }
> - if (VOP_OPEN(vn, FREAD, NOCRED, curproc)) {
> + if (VOP_OPEN(vn, FREAD, NOCRED)) {
>   DNPRINTF(SR_D_META,"%s: sr_crypto_read_key_disk cannot "
>   "open %s\n", DEVNAME(sc), devname);
>   vput(vn);
> diff --git sys/isofs/cd9660/cd9660_vfsops.c sys/isofs/cd9660/cd9660_vfsops.c
> index b844a2ff709..601c761a000 100644
> --- sys/isofs/cd9660/cd9660_vfsops.c
> +++ sys/isofs/cd9660/cd9660_vfsops.c
> @@ -240,7 +240,7 @@ iso_mountfs(struct vnode *devvp, st

vfs: drop proc from VOP_OPEN

2023-07-17 Thread Thordur Bjornsson
First of a few.

Many thanks to mpi@ for adding the curproc assert's!

Two questions:
- NOCRED: Best this should just be NULL ?
- FSCRED: This is only AFAICT used for mounts
  Not 100% clear, but this could perhaps be done away with at the loss
  of some metadata (it just forces spec_open to open up given secure
  levels) in ufs. beck@, thoughts ? (i want to say the S word, but not
  sure if it is strictly true).

diff --git sys/dev/softraid.c sys/dev/softraid.c
index decea16cb24..2319b85b946 100644
--- sys/dev/softraid.c
+++ sys/dev/softraid.c
@@ -333,7 +333,7 @@ sr_meta_probe(struct sr_discipline *sd, dev_t *dt, int 
no_chunk)
 * XXX leaving dev open for now; move this to attach
 * and figure out the open/close dance for unwind.
 */
-   error = VOP_OPEN(vn, FREAD | FWRITE, NOCRED, curproc);
+   error = VOP_OPEN(vn, FREAD | FWRITE, NOCRED);
if (error) {
DNPRINTF(SR_D_META,"%s: sr_meta_probe can't "
"open %s\n", DEVNAME(sc), devname);
@@ -1037,7 +1037,7 @@ sr_meta_native_bootprobe(struct sr_softc *sc, dev_t devno,
}
 
/* open device */
-   error = VOP_OPEN(vn, FREAD, NOCRED, curproc);
+   error = VOP_OPEN(vn, FREAD, NOCRED);
if (error) {
DNPRINTF(SR_D_META, "%s: sr_meta_native_bootprobe open "
"failed\n", DEVNAME(sc));
@@ -1093,7 +1093,7 @@ sr_meta_native_bootprobe(struct sr_softc *sc, dev_t devno,
"allocate vnode for partition");
goto done;
}
-   error = VOP_OPEN(vn, FREAD, NOCRED, curproc);
+   error = VOP_OPEN(vn, FREAD, NOCRED);
if (error) {
DNPRINTF(SR_D_META, "%s: sr_meta_native_bootprobe "
"open failed, partition %d\n",
@@ -2833,7 +2833,7 @@ sr_hotspare(struct sr_softc *sc, dev_t dev)
sr_error(sc, "sr_hotspare: cannot allocate vnode");
goto done;
}
-   if (VOP_OPEN(vn, FREAD | FWRITE, NOCRED, curproc)) {
+   if (VOP_OPEN(vn, FREAD | FWRITE, NOCRED)) {
DNPRINTF(SR_D_META,"%s: sr_hotspare cannot open %s\n",
DEVNAME(sc), devname);
vput(vn);
@@ -3147,7 +3147,7 @@ sr_rebuild_init(struct sr_discipline *sd, dev_t dev, int 
hotspare)
DEVNAME(sc));
goto done;
}
-   if (VOP_OPEN(vn, FREAD | FWRITE, NOCRED, curproc)) {
+   if (VOP_OPEN(vn, FREAD | FWRITE, NOCRED)) {
DNPRINTF(SR_D_META,"%s: sr_ioctl_setstate can't "
"open %s\n", DEVNAME(sc), devname);
vput(vn);
diff --git sys/dev/softraid_crypto.c sys/dev/softraid_crypto.c
index fbe8358e5dd..354c6560180 100644
--- sys/dev/softraid_crypto.c
+++ sys/dev/softraid_crypto.c
@@ -665,7 +665,7 @@ sr_crypto_create_key_disk(struct sr_discipline *sd,
sr_error(sc, "cannot open key disk %s", devname);
goto done;
}
-   if (VOP_OPEN(vn, FREAD | FWRITE, NOCRED, curproc)) {
+   if (VOP_OPEN(vn, FREAD | FWRITE, NOCRED)) {
DNPRINTF(SR_D_META,"%s: sr_crypto_create_key_disk cannot "
"open %s\n", DEVNAME(sc), devname);
vput(vn);
@@ -829,7 +829,7 @@ sr_crypto_read_key_disk(struct sr_discipline *sd, struct 
sr_crypto *mdd_crypto,
sr_error(sc, "cannot open key disk %s", devname);
goto done;
}
-   if (VOP_OPEN(vn, FREAD, NOCRED, curproc)) {
+   if (VOP_OPEN(vn, FREAD, NOCRED)) {
DNPRINTF(SR_D_META,"%s: sr_crypto_read_key_disk cannot "
"open %s\n", DEVNAME(sc), devname);
vput(vn);
diff --git sys/isofs/cd9660/cd9660_vfsops.c sys/isofs/cd9660/cd9660_vfsops.c
index b844a2ff709..601c761a000 100644
--- sys/isofs/cd9660/cd9660_vfsops.c
+++ sys/isofs/cd9660/cd9660_vfsops.c
@@ -240,7 +240,7 @@ iso_mountfs(struct vnode *devvp, struct mount *mp, struct 
proc *p,
if (error)
return (error);
 
-   error = VOP_OPEN(devvp, ronly ? FREAD : FREAD|FWRITE, FSCRED, p);
+   error = VOP_OPEN(devvp, ronly ? FREAD : FREAD|FWRITE, FSCRED);
if (error)
return (error);
 
diff --git sys/isofs/udf/udf_vfsops.c sys/isofs/udf/udf_vfsops.c
index c0fc5068e08..2a5b7f03314 100644
--- sys/isofs/udf/udf_vfsops.c
+++ sys/isofs/udf/udf_vfsops.c
@@ -243,7 +243,7 @@ udf_mountfs(struct vnode *devvp, struct mount *mp, uint32_t 
lb, struct proc *p)
if (error)
return (error);
 
-   error = VOP_OPEN(devvp, FREAD, FSCRED, p);
+   error = VOP_OPEN(devvp, FREAD, FSCRED);
if (error)
return (error);
 
diff --git sys/kern/kern_exec.c sys/kern/kern_exec.c
index 3f3112b018f..7253bc3cc8e 100644
--- 

Re: deadfs cleanup

2023-07-17 Thread Thordur Bjornsson
ping beck@

stacking vp->v_lock (rwlock, couldn't bring myself to call it interlock)
diffs ontop of this to kill v_id and VXLOCK. Could use an eyeball or two :)

On Tue, Jul 11, 2023 at 09:34:01PM +0200, thib4711 wrote:
> deadfs cleanup
> 
> chkvnlock() is useless, since deadfs vops are only ever assigned
> to a vnode at the tail end of vclean(), at which point the VXLOCK
> has been cleared and won't be taken again for this particular
> vnode until it is re-used through getnewvnode().
> 
> As a bonus, LK_DRAIN can soon retire as well.
> Juggle the tail end (mtx enter/leave) and the knote at the tail
> of vclean() for sanity while here.
> 
> diff --git sys/kern/vfs_subr.c sys/kern/vfs_subr.c
> index 650fe5b61a2..425b6871cdd 100644
> --- sys/kern/vfs_subr.c
> +++ sys/kern/vfs_subr.c
> @@ -1051,7 +1051,7 @@ vclean(struct vnode *vp, int flags, struct proc *p)
>* For active vnodes, it ensures that no other activity can
>* occur while the underlying object is being cleaned out.
>*/
> - VOP_LOCK(vp, LK_EXCLUSIVE | LK_DRAIN);
> + VOP_LOCK(vp, LK_EXCLUSIVE);
>  
>   /*
>* Clean out any VM data associated with the vnode.
> @@ -1099,19 +1099,21 @@ vclean(struct vnode *vp, int flags, struct proc *p)
>   /*
>* Done with purge, notify sleepers of the grim news.
>*/
> + mtx_enter(_mtx);
>   vp->v_op = _vops;
> - VN_KNOTE(vp, NOTE_REVOKE);
>   vp->v_tag = VT_NON;
>  #ifdef VFSLCKDEBUG
>   vp->v_flag &= ~VLOCKSWORK;
>  #endif
> - mtx_enter(_mtx);
>   vp->v_lflag &= ~VXLOCK;
>   if (vp->v_lflag & VXWANT) {
>   vp->v_lflag &= ~VXWANT;
>   do_wakeup = 1;
>   }
>   mtx_leave(_mtx);
> +
> + VN_KNOTE(vp, NOTE_REVOKE);
> +
>   if (do_wakeup)
>   wakeup(vp);
>  }
> diff --git sys/miscfs/deadfs/dead_vnops.c sys/miscfs/deadfs/dead_vnops.c
> index 9711f1618be..44496815567 100644
> --- sys/miscfs/deadfs/dead_vnops.c
> +++ sys/miscfs/deadfs/dead_vnops.c
> @@ -49,16 +49,10 @@ int   dead_ebadf(void *);
>  int  dead_open(void *);
>  int  dead_read(void *);
>  int  dead_write(void *);
> -int  dead_ioctl(void *);
>  int  dead_kqfilter(void *v);
> -int  dead_inactive(void *);
> -int  dead_lock(void *);
> -int  dead_bmap(void *);
>  int  dead_strategy(void *);
>  int  dead_print(void *);
>  
> -int  chkvnlock(struct vnode *);
> -
>  const struct vops dead_vops = {
>   .vop_lookup = vop_generic_lookup,
>   .vop_create = vop_generic_badop,
> @@ -70,7 +64,7 @@ const struct vops dead_vops = {
>   .vop_setattr= dead_ebadf,
>   .vop_read   = dead_read,
>   .vop_write  = dead_write,
> - .vop_ioctl  = dead_ioctl,
> + .vop_ioctl  = nullop,
>   .vop_kqfilter   = dead_kqfilter,
>   .vop_revoke = NULL,
>   .vop_fsync  = nullop,
> @@ -83,12 +77,12 @@ const struct vops dead_vops = {
>   .vop_readdir= dead_ebadf,
>   .vop_readlink   = dead_ebadf,
>   .vop_abortop= vop_generic_badop,
> - .vop_inactive   = dead_inactive,
> + .vop_inactive   = nullop,
>   .vop_reclaim= nullop,
> - .vop_lock   = dead_lock,
> + .vop_lock   = nullop,
>   .vop_unlock = nullop,
>   .vop_islocked   = nullop,
> - .vop_bmap   = dead_bmap,
> + .vop_bmap   = nullop,
>   .vop_strategy   = dead_strategy,
>   .vop_print  = dead_print,
>   .vop_pathconf   = dead_ebadf,
> @@ -105,50 +99,25 @@ dead_open(void *v)
>   return (ENXIO);
>  }
>  
> -/*
> - * Vnode op for read
> - */
>  int
>  dead_read(void *v)
>  {
>   struct vop_read_args *ap = v;
>  
> - if (chkvnlock(ap->a_vp))
> - panic("dead_read: lock");
>   /*
> -  * Return EOF for tty devices, EIO for others
> -  */
> + * Return EOF for tty devices, EIO for others
> + */
>   if ((ap->a_vp->v_flag & VISTTY) == 0)
>   return (EIO);
>   return (0);
>  }
>  
> -/*
> - * Vnode op for write
> - */
>  int
>  dead_write(void *v)
>  {
> - struct vop_write_args *ap = v;
> -
> - if (chkvnlock(ap->a_vp))
> - panic("dead_write: lock");
>   return (EIO);
>  }
>  
> -/*
> - * Device ioctl operation.
> - */
> -int
> -dead_ioctl(void *v)
> -{
> - struct vop_ioctl_args *ap = v;
> -
> - if (!chkvnlock(ap->a_vp))
> - return (EBADF);
> - return ((ap->a_vp->v_op->vop_ioctl)(ap));
> -}
> -
>  int
>  dead_kqfilter(void *v)
>  {
> @@ -180,51 +149,11 @@ dead_strategy(void *v)
>   struct vop_strategy_args *ap = v;
>   int s;
>  
> - if (ap->a_bp->b_vp == NULL || !chkvnlock(ap->a_bp->b_vp)) {
> - ap->a_bp->b_flags |= B_ERROR;
> - s = splbio();
> - biodone(ap->a_bp);
> - splx(s);
> - return (EIO);
> - }
> - return (VOP_STRATEGY(ap->a_bp->b_vp, ap->a_bp));
> -}
> -
> -int
> -dead_inactive(void *v)
> -{
> - struct vop_inactive_args *ap = v;
> -
> - 

Re: vfs: drop unnecessary cache_purge()s

2023-07-17 Thread Thordur Bjornsson
On Mon, Jul 17, 2023 at 11:38:49AM +0200, Sebastien Marie wrote:
> On Sat, Jul 15, 2023 at 09:21:40AM +0200, Thordur Bjornsson wrote:
>
> yes, vclean() will call cache_purge() after calling VOP_RECLAIM(). so we 
> ended-up 
> to have called cache_purge() several times.
>
> but the vnode isn't in the same state inside VOP_RECLAIM() and after calling 
> it. 
> it seems fine as the *_reclaim() is freeing v_data contents, and 
> cache_purge() 
> doesn't touch that.

I don't follow, that's exactly why this is redundant ?
Barring bugs, no more cache entries will be added to vnode due to VXLOCK
being held in vclean().

> also, you didn't change ufs_reclaim() to not call cache_purge() ? is it on 
> purpose ?

Nope, straight up forgot it.

diff --git sys/isofs/cd9660/cd9660_node.c sys/isofs/cd9660/cd9660_node.c
index bce99d77c22..300277f3b37 100644
--- sys/isofs/cd9660/cd9660_node.c
+++ sys/isofs/cd9660/cd9660_node.c
@@ -218,7 +218,6 @@ cd9660_reclaim(void *v)
/*
 * Purge old data structures associated with the inode.
 */
-   cache_purge(vp);
if (ip->i_devvp) {
vrele(ip->i_devvp);
ip->i_devvp = 0;
diff --git sys/msdosfs/msdosfs_denode.c sys/msdosfs/msdosfs_denode.c
index 7a33212b648..3707c97458e 100644
--- sys/msdosfs/msdosfs_denode.c
+++ sys/msdosfs/msdosfs_denode.c
@@ -600,7 +600,6 @@ msdosfs_reclaim(void *v)
/*
 * Purge old data structures associated with the denode.
 */
-   cache_purge(vp);
if (dep->de_devvp) {
vrele(dep->de_devvp);
dep->de_devvp = 0;
diff --git sys/nfs/nfs_node.c sys/nfs/nfs_node.c
index c8ac3b9bb14..38ad5db82fc 100644
--- sys/nfs/nfs_node.c
+++ sys/nfs/nfs_node.c
@@ -237,7 +237,6 @@ nfs_reclaim(void *v)
if (np->n_wcred)
crfree(np->n_wcred);
 
-   cache_purge(vp);
pool_put(_node_pool, vp->v_data);
vp->v_data = NULL;
 
diff --git sys/ntfs/ntfs_vnops.c sys/ntfs/ntfs_vnops.c
index d239112e991..d40e3d254f6 100644
--- sys/ntfs/ntfs_vnops.c
+++ sys/ntfs/ntfs_vnops.c
@@ -221,8 +221,6 @@ ntfs_reclaim(void *v)
return (error);

/* Purge old data structures associated with the inode. */
-   cache_purge(vp);
-
ntfs_frele(fp);
ntfs_ntput(ip);
 
diff --git sys/tmpfs/tmpfs_vnops.c sys/tmpfs/tmpfs_vnops.c
index bc1390d72c9..6ec13e686b2 100644
--- sys/tmpfs/tmpfs_vnops.c
+++ sys/tmpfs/tmpfs_vnops.c
@@ -1079,8 +1079,6 @@ tmpfs_reclaim(void *v)
racing = TMPFS_NODE_RECLAIMING(node);
rw_exit_write(>tn_nlock);
 
-   cache_purge(vp);
-
/*
 * If inode is not referenced, i.e. no links, then destroy it.
 * Note: if racing - inode is about to get a new vnode, leave it.
diff --git sys/ufs/ext2fs/ext2fs_vnops.c sys/ufs/ext2fs/ext2fs_vnops.c
index 235590d7c74..006a06b0dc8 100644
--- sys/ufs/ext2fs/ext2fs_vnops.c
+++ sys/ufs/ext2fs/ext2fs_vnops.c
@@ -1247,7 +1247,6 @@ ext2fs_reclaim(void *v)
/*
 * Purge old data structures associated with the inode.
 */
-   cache_purge(vp);
if (ip->i_devvp)
vrele(ip->i_devvp);
 
diff --git ufs/ufs_inode.c ufs/ufs_inode.c
index cc3b8b83229..f665c45176a 100644
--- ufs/ufs_inode.c
+++ ufs/ufs_inode.c
@@ -153,8 +153,6 @@ ufs_reclaim(struct vnode *vp)
/*
 * Purge old data structures associated with the inode.
 */
-   cache_purge(vp);
-
if (ip->i_devvp) {
vrele(ip->i_devvp);
}



vfs: drop unnecessary cache_purge()s

2023-07-16 Thread Thordur Bjornsson
VOP_RECLAIM is only ever called from vclean() to cleanup fs dependent
data, and vclean() calls cache_purge().

Makes all of the reclaim implementations the same in this regard.

diff --git sys/isofs/cd9660/cd9660_node.c sys/isofs/cd9660/cd9660_node.c
index bce99d77c22..300277f3b37 100644
--- sys/isofs/cd9660/cd9660_node.c
+++ sys/isofs/cd9660/cd9660_node.c
@@ -218,7 +218,6 @@ cd9660_reclaim(void *v)
/*
 * Purge old data structures associated with the inode.
 */
-   cache_purge(vp);
if (ip->i_devvp) {
vrele(ip->i_devvp);
ip->i_devvp = 0;
diff --git sys/msdosfs/msdosfs_denode.c sys/msdosfs/msdosfs_denode.c
index 7a33212b648..3707c97458e 100644
--- sys/msdosfs/msdosfs_denode.c
+++ sys/msdosfs/msdosfs_denode.c
@@ -600,7 +600,6 @@ msdosfs_reclaim(void *v)
/*
 * Purge old data structures associated with the denode.
 */
-   cache_purge(vp);
if (dep->de_devvp) {
vrele(dep->de_devvp);
dep->de_devvp = 0;
diff --git sys/nfs/nfs_node.c sys/nfs/nfs_node.c
index c8ac3b9bb14..38ad5db82fc 100644
--- sys/nfs/nfs_node.c
+++ sys/nfs/nfs_node.c
@@ -237,7 +237,6 @@ nfs_reclaim(void *v)
if (np->n_wcred)
crfree(np->n_wcred);
 
-   cache_purge(vp);
pool_put(_node_pool, vp->v_data);
vp->v_data = NULL;
 
diff --git sys/ntfs/ntfs_vnops.c sys/ntfs/ntfs_vnops.c
index d239112e991..d40e3d254f6 100644
--- sys/ntfs/ntfs_vnops.c
+++ sys/ntfs/ntfs_vnops.c
@@ -221,8 +221,6 @@ ntfs_reclaim(void *v)
return (error);

/* Purge old data structures associated with the inode. */
-   cache_purge(vp);
-
ntfs_frele(fp);
ntfs_ntput(ip);
 
diff --git sys/tmpfs/tmpfs_vnops.c sys/tmpfs/tmpfs_vnops.c
index bc1390d72c9..6ec13e686b2 100644
--- sys/tmpfs/tmpfs_vnops.c
+++ sys/tmpfs/tmpfs_vnops.c
@@ -1079,8 +1079,6 @@ tmpfs_reclaim(void *v)
racing = TMPFS_NODE_RECLAIMING(node);
rw_exit_write(>tn_nlock);
 
-   cache_purge(vp);
-
/*
 * If inode is not referenced, i.e. no links, then destroy it.
 * Note: if racing - inode is about to get a new vnode, leave it.
diff --git sys/ufs/ext2fs/ext2fs_vnops.c sys/ufs/ext2fs/ext2fs_vnops.c
index 235590d7c74..006a06b0dc8 100644
--- sys/ufs/ext2fs/ext2fs_vnops.c
+++ sys/ufs/ext2fs/ext2fs_vnops.c
@@ -1247,7 +1247,6 @@ ext2fs_reclaim(void *v)
/*
 * Purge old data structures associated with the inode.
 */
-   cache_purge(vp);
if (ip->i_devvp)
vrele(ip->i_devvp);
 



Re: Expose some scheduler statistics to userland via sysctl

2011-11-17 Thread Thordur Bjornsson
On 2011 Nov 17 (Thu) at 21:18:24 -0200 (-0200), Christiano F. Haesbaert wrote:
 Moving this to tech@
 
 Hi,
 
 I was studying the scheduler code after watching tedu's talk, I'd like
 to expose this statistics to userland so that I can try playing with
 cache affinity in the future:
 
 gimli:src: sysctl kern.schedstat  
   
   
 kern.schedstat.nmigrations=23744
 kern.schedstat.noidle=0
 kern.schedstat.stolen=9170
 kern.schedstat.choose=834843
 kern.schedstat.wasidle=808711
 kern.schedstat.nomigrations=2388
 
 Opinions ?

I see no point in exporting this out. They are essentially pointless
knobs that people _will_ fiddle with without a clue.

This belongs in a developers tree IMO.

 
 Index: sys/sys//sched.h
 ===
 RCS file: /cvs/src/sys/sys/sched.h,v
 retrieving revision 1.29
 diff -d -u -p -w -r1.29 sched.h
 --- sys/sys//sched.h  7 Jul 2011 18:00:33 -   1.29
 +++ sys/sys//sched.h  12 Nov 2011 13:51:04 -
 @@ -75,6 +75,34 @@
   * Posix defines a sched.h which may want to include sys/sched.h
   */
  
 +struct schedstat {
 + u_int64_t scs_nmigrations;
 + u_int64_t scs_noidle;
 + u_int64_t scs_stolen;
 +
 + u_int64_t scs_choose;
 + u_int64_t scs_wasidle;
 + u_int64_t scs_nomigrations;
 +};
 +
 +/* These sysctl names are only really used by sysctl(8) */
 +#define KERN_SCHEDSTAT_NMIGRATIONS   1
 +#define KERN_SCHEDSTAT_NOIDLE2
 +#define KERN_SCHEDSTAT_STOLEN3
 +#define KERN_SCHEDSTAT_CHOOSE4
 +#define KERN_SCHEDSTAT_WASIDLE   5
 +#define KERN_SCHEDSTAT_NOMIGRATIONS  6
 +#define KERN_SCHEDSTAT_MAXID 7
 +
 +#define CTL_KERN_SCHEDSTAT_NAMES {   \
 + { 0, 0 },   \
 + { nmigrations, CTLTYPE_QUAD },\
 + { noidle, CTLTYPE_QUAD }, \
 + { stolen, CTLTYPE_QUAD }, \
 + { choose, CTLTYPE_QUAD }, \
 + { wasidle, CTLTYPE_QUAD },\
 + { nomigrations, CTLTYPE_QUAD }\
 +}
  /*
   * CPU states.
   * XXX Not really scheduler state, but no other good place to put
 Index: sys/sys//sysctl.h
 ===
 RCS file: /cvs/src/sys/sys/sysctl.h,v
 retrieving revision 1.117
 diff -d -u -p -w -r1.117 sysctl.h
 --- sys/sys//sysctl.h 30 Aug 2011 01:09:29 -  1.117
 +++ sys/sys//sysctl.h 12 Nov 2011 13:40:45 -
 @@ -189,7 +189,8 @@ struct ctlname {
  #define  KERN_CONSDEV75  /* dev_t: console terminal 
 device */
  #define  KERN_NETLIVELOCKS   76  /* int: number of network 
 livelocks */
  #define  KERN_POOL_DEBUG 77  /* int: enable pool_debug */
 -#define  KERN_MAXID  78  /* number of valid kern ids */
 +#define KERN_SCHEDSTAT   78  /* struct: sched statistics */
 +#define  KERN_MAXID  79  /* number of valid kern ids */
  
  #define  CTL_KERN_NAMES { \
   { 0, 0 }, \
 @@ -270,6 +271,7 @@ struct ctlname {
   { consdev, CTLTYPE_STRUCT }, \
   { netlivelocks, CTLTYPE_INT }, \
   { pool_debug, CTLTYPE_INT }, \
 + { schedstat, CTLTYPE_STRUCT }, \
  }
  
  /*
 Index: sys/kern//kern_sched.c
 ===
 RCS file: /cvs/src/sys/kern/kern_sched.c,v
 retrieving revision 1.24
 diff -d -u -p -w -r1.24 kern_sched.c
 --- sys/kern//kern_sched.c12 Oct 2011 18:30:09 -  1.24
 +++ sys/kern//kern_sched.c12 Nov 2011 14:41:59 -
 @@ -35,6 +35,8 @@ void sched_kthreads_create(void *);
  int sched_proc_to_cpu_cost(struct cpu_info *ci, struct proc *p);
  struct proc *sched_steal_proc(struct cpu_info *);
  
 +struct schedstat schedstat;
 +
  /*
   * To help choosing which cpu should run which process we keep track
   * of cpus which are currently idle and which cpus have processes
 @@ -301,14 +303,6 @@ again:
   return (p); 
  }
  
 -uint64_t sched_nmigrations;
 -uint64_t sched_noidle;
 -uint64_t sched_stolen;
 -
 -uint64_t sched_choose;
 -uint64_t sched_wasidle;
 -uint64_t sched_nomigrations;
 -
  struct cpu_info *
  sched_choosecpu_fork(struct proc *parent, int flags)
  {
 @@ -374,7 +368,7 @@ sched_choosecpu(struct proc *p)
   if (p-p_flag  P_CPUPEG)
   return (p-p_cpu);
  
 - sched_choose++;
 + schedstat.scs_choose++;
  
   /*
* Look at all cpus that are currently idle and have nothing queued.
 @@ -393,7 +387,7 @@ sched_choosecpu(struct proc *p)
   if (cpuset_isset(set, p-p_cpu) ||
   (p-p_cpu == curcpu()  p-p_cpu-ci_schedstate.spc_nrun == 0 
   curproc == p)) {
 - sched_wasidle++;
 + schedstat.scs_wasidle++;
   return (p-p_cpu);
   }
  
 @@ -411,9 +405,9 @@ sched_choosecpu(struct proc 

Re: dd(1) human-readable output

2011-08-23 Thread Thordur Bjornsson
On 2011 Aug 23 (Tue) at 20:48:09 +0200 (+0200), Thomas Pfaff wrote:
 This patch makes dd(1) output change from e.g.
 
 $ dd if=/dev/sd0c of=/dev/null bs=512 count=16000
 16000+0 records in
 16000+0 records out
 8192000 bytes transferred in 3.002 secs (2728488 bytes/sec)
 
 to
 
 $ obj/dd if=/dev/sd0c of=/dev/null bs=512 count=16000
 16000+0 records in
 16000+0 records out
 8192000 bytes (7.8MB) transferred in 3.009 seconds (2.6MB/s)
 
 Any interest?
Yes. But with a caveat.

This is going to break atleast 3 scripts that get run periodically
on all of my development machines, while I'm happy with adjusting
them (this change would make them smaller actually) I'm unsure if
we want to throw this into the wild, since this output behaviour 
is _old_.

Now, a -h button or similar would solve that, but thats fugly.

So, while I'm all for it, I think the greybeards need to weigh
in on this :)

So, assuming we are fine with breaking script compatability
(that I personally hate), the diff looks OK to me.


 Index: Makefile
 ===
 RCS file: /cvs/src/bin/dd/Makefile,v
 retrieving revision 1.5
 diff -u -p -r1.5 Makefile
 --- Makefile  29 May 1998 04:34:20 -  1.5
 +++ Makefile  23 Aug 2011 18:43:43 -
 @@ -2,5 +2,7 @@
  
  PROG=dd
  SRCS=args.c conv.c conv_tab.c dd.c misc.c position.c
 +DPADD= ${LIBUTIL}
 +LDADD= -lutil
  
  .include bsd.prog.mk
 Index: misc.c
 ===
 RCS file: /cvs/src/bin/dd/misc.c,v
 retrieving revision 1.16
 diff -u -p -r1.16 misc.c
 --- misc.c27 Oct 2009 23:59:21 -  1.16
 +++ misc.c23 Aug 2011 18:43:43 -
 @@ -45,6 +45,7 @@
  #include errno.h
  #include time.h
  #include unistd.h
 +#include util.h
  
  #include dd.h
  #include extern.h
 @@ -57,6 +58,7 @@ summary(void)
   struct iovec iov[4];
   double microsecs;
   int i = 0;
 + char sizebuf[FMT_SCALED_STRSIZE], ratebuf[FMT_SCALED_STRSIZE];
  
   (void)gettimeofday(nowtv, (struct timezone *)NULL);
   timersub(nowtv, st.startv, nowtv);
 @@ -85,10 +87,19 @@ summary(void)
   iov[i].iov_base = buf[2];
   iov[i++].iov_len = strlen(buf[2]);
   }
 +
 + strlcpy(sizebuf, ?, sizeof sizebuf);
 + fmt_scaled(st.bytes, sizebuf);
 + sizebuf[strcspn(sizebuf, B)] = '\0';
 +
 + strlcpy(ratebuf, ?, sizeof ratebuf);
 + fmt_scaled(st.bytes * 100.0 / microsecs, ratebuf);
 + ratebuf[strcspn(ratebuf, B)] = '\0';
 +
   (void)snprintf(buf[3], sizeof(buf[3]),
 - %qd bytes transferred in %ld.%03ld secs (%0.0f bytes/sec)\n,
 - (long long)st.bytes, nowtv.tv_sec, nowtv.tv_usec / 1000,
 - ((double)st.bytes * 100) / microsecs);
 + %qd bytes (%sB) transferred in %ld.%03ld seconds (%sB/s)\n,
 + (long long)st.bytes, sizebuf, nowtv.tv_sec, nowtv.tv_usec / 1000,
 + ratebuf);
  
   iov[i].iov_base = buf[3];
   iov[i++].iov_len = strlen(buf[3]);



Re: kdump: resolve sysctl numbers

2011-07-27 Thread Thordur Bjornsson
On 2011 Jul 27 (Wed) at 19:22:34 +0200 (+0200), Jasper Lievisse Adriaanse wrote:
 On Wed, Jul 27, 2011 at 10:58:22AM -0400, Ted Unangst wrote:
  On Wed, Jul 27, 2011, Otto Moerbeek wrote:
  
   +#define SETNAME(name) do { names = (name); limit = nitems(name); } while 
   (0)
  
  userland is not supposed to use nitems I think?  But it keeps sneaking
  in because the kernel headers don't protect it.
 That's right. It's used in some places like pcidump, npppd and tmux, but it's
 locally defined as:
 
 #ifndef nitems
 #define nitems(_a)(sizeof((_a)) / sizeof((_a)[0]))
 #endif

What is the reason for this not being kosher yet,
and if it's not ment to be, why isn't it protected by _KERNEL ?



Re: vnode(9) man page

2011-07-18 Thread Thordur Bjornsson
On Mon, Jul 18, 2011 at 07:11:54AM +0059, Jason McIntyre wrote:
 On Sun, Jul 17, 2011 at 03:25:29PM +, Thordur Bjornsson wrote:
   
   The commit comment indicates to me that the underlying mechanism
   changed significantly, so now I wonder if the information that
   once was in vnode_if.src (regarding vnode locking disciplines) is
   irrelevant, obsolete or if it needs to be updated and moved to
   somewhere more appropriate? The diff below just removes the
   reference to the file which may not be the right thing to do.
  
  The locking discipline in that file is subtly wrong in some places,
  overtly wrong in others.
  
  I'm reworking it actually, and I hope to have a man page for it
  at some point.
  
  For now, the best bet if you are messing with this is to check
  to see what UFS/FFS does, as it as the best chance of being the
  most correct. 
   
 
 can we have the short term fix for now then, so at least the man page is
 not wrong?
Err, yes. I intended to commit the diff from Benny last night. It's in now.

Commited.



Introducing rrw locks;

2011-07-06 Thread Thordur Bjornsson
)
-   wakeup((void *)(lkp));
-   break;
-
-   case LK_DRAIN:
-   /*
-* Check that we do not already hold the lock, as it can 
-* never drain if we do. Unfortunately, we have no way to
-* check for holding a shared lock, but at least we can
-* check for an exclusive one.
-*/
-   if (WEHOLDIT(lkp, pid, cpu_id))
-   panic(lockmgr: draining against myself);
-   /*
-* If we are just polling, check to see if we will sleep.
-*/
-   if ((extflags  LK_NOWAIT)  ((lkp-lk_flags 
-(LK_HAVE_EXCL | LK_WANT_EXCL)) ||
-lkp-lk_sharecount != 0 || lkp-lk_waitcount != 0)) {
-   error = EBUSY;
-   break;
-   }
-   ACQUIRE(lkp, error, extflags, 1,
-   ((lkp-lk_flags 
-(LK_HAVE_EXCL | LK_WANT_EXCL)) ||
-lkp-lk_sharecount != 0 ||
-lkp-lk_waitcount != 0));
-   if (error)
-   break;
-   lkp-lk_flags |= LK_DRAINING | LK_HAVE_EXCL;
-   SETHOLDER(lkp, pid, cpu_id);
-   lkp-lk_exclusivecount = 1;
-   break;
-
-   default:
-   panic(lockmgr: unknown locktype request %d,
-   flags  LK_TYPE_MASK);
-   /* NOTREACHED */
-   }
-   if ((lkp-lk_flags  LK_WAITDRAIN) != 0 
-   ((lkp-lk_flags 
-   (LK_HAVE_EXCL | LK_WANT_EXCL)) == 0 
-   lkp-lk_sharecount == 0  lkp-lk_waitcount == 0)) {
-   lkp-lk_flags = ~LK_WAITDRAIN;
-   wakeup((void *)lkp-lk_flags);
+   if (flags  LK_RELEASE) {
+   rrw_exit(lkp-lk_lck);
+   return (0);
}
-   return (error);
-}
 
-#ifdef DIAGNOSTIC
-/*
- * Print out information about state of a lock. Used by VOP_PRINT
- * routines to display status about contained locks.
- */
-void
-lockmgr_printinfo(__volatile struct lock *lkp)
-{
+   if (flags  LK_SHARED)
+   rwflags |= RW_READ;
+   if (flags  (LK_EXCLUSIVE|LK_DRAIN))
+   rwflags |= RW_WRITE;
 
-   if (lkp-lk_sharecount)
-   printf( lock type %s: SHARED (count %d), lkp-lk_wmesg,
-   lkp-lk_sharecount);
-   else if (lkp-lk_flags  LK_HAVE_EXCL) {
-   printf( lock type %s: EXCL (count %d) by ,
-   lkp-lk_wmesg, lkp-lk_exclusivecount);
-   printf(pid %d, lkp-lk_lockholder);
-   } else
-   printf( not locked);
-   if (lkp-lk_waitcount  0)
-   printf( with %d pending, lkp-lk_waitcount);
+   if (flags  LK_RECURSEFAIL)
+   rwflags |= RW_RECURSEFAIL;
+   if (flags  LK_NOWAIT)
+   rwflags |= RW_NOSLEEP;
+
+   return (rrw_enter(lkp-lk_lck, rwflags));
 }
-#endif /* DIAGNOSTIC */
 
 #if defined(MULTIPROCESSOR)
 /*
@@ -343,7 +104,7 @@ lockmgr_printinfo(__volatile struct lock *lkp)
  * so that they show up in profiles.
  */
 
-struct __mp_lock kernel_lock; 
+struct __mp_lock kernel_lock;
 
 void
 _kernel_lock_init(void)
@@ -385,10 +146,4 @@ _kernel_proc_unlock(struct proc *p)
 {
__mp_unlock(kernel_lock);
 }
-
-#ifdef MP_LOCKDEBUG
-/* CPU-dependent timing, needs this to be settable from ddb. */
-int __mp_lock_spinout = 2;
-#endif
-
 #endif /* MULTIPROCESSOR */
diff --git a/kern/kern_rwlock.c b/kern/kern_rwlock.c
index d22ae3a..a85f8b1 100644
--- a/kern/kern_rwlock.c
+++ b/kern/kern_rwlock.c
@@ -2,27 +2,19 @@
 
 /*
  * Copyright (c) 2002, 2003 Artur Grabowski a...@openbsd.org
- * All rights reserved. 
+ * Copyright (c) 2011 Thordur Bjornsson t...@openbsd.org
  *
- * Redistribution and use in source and binary forms, with or without 
- * modification, are permitted provided that the following conditions 
- * are met: 
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
  *
- * 1. Redistributions of source code must retain the above copyright 
- *notice, this list of conditions and the following disclaimer. 
- * 2. The name of the author may not be used to endorse or promote products
- *derived from this software without specific prior written permission. 
- *
- * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,
- * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY
- * AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL
- * THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
- * EXEMPLARY, OR CONSEQUENTIAL  DAMAGES (INCLUDING, BUT NOT LIMITED TO,
- * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
- * OR BUSINESS INTERRUPTION) HOWEVER

Re: Move uvm_pglist* to uvm_page.c

2011-06-24 Thread Thordur Bjornsson
On Thu, Jun 23, 2011 at 07:04:48PM +0100, Owain Ainsworth wrote:
 How about this now? 
 
 On Tue, May 31, 2011 at 12:05:04AM +0100, Owain Ainsworth wrote:
  These functions used to be big and complicated, now they are glorified
  wrappers around pmemrange and don't really need their own file.
  Discussed with ariane@ a while ago.
  
  ok?

OK.

  diff --git conf/files conf/files
  index 02da860..017e5f9 100644
  --- conf/files
  +++ conf/files
  @@ -1007,7 +1007,6 @@ file uvm/uvm_object.c
   file uvm/uvm_page.c
   file uvm/uvm_pager.c
   file uvm/uvm_pdaemon.c
  -file uvm/uvm_pglist.c
   file uvm/uvm_pmemrange.c
   file uvm/uvm_stat.c
   file uvm/uvm_swap.c
  diff --git uvm/uvm_page.c uvm/uvm_page.c
  index 10ef7d1..ed8e6d4 100644
  --- uvm/uvm_page.c
  +++ uvm/uvm_page.c
  @@ -806,6 +806,81 @@ uvm_pagealloc_pg(struct vm_page *pg, struct uvm_object 
  *obj, voff_t off,
   }
   
   /*
  + * uvm_pglistalloc: allocate a list of pages
  + *
  + * = allocated pages are placed at the tail of rlist.  rlist is
  + *assumed to be properly initialized by caller.
  + * = returns 0 on success or errno on failure
  + * = doesn't take into account clean non-busy pages on inactive list
  + * that could be used(?)
  + * = params:
  + * sizethe size of the allocation, rounded to page size.
  + * low the low address of the allowed allocation range.
  + * highthe high address of the allowed allocation range.
  + * alignment   memory must be aligned to this power-of-two boundary.
  + * boundaryno segment in the allocation may cross this 
  + * power-of-two boundary (relative to zero).
  + * = flags:
  + * UVM_PLA_NOWAIT  fail if allocation fails
  + * UVM_PLA_WAITOK  wait for memory to become avail
  + * UVM_PLA_ZEROreturn zeroed memory
  + */
  +int
  +uvm_pglistalloc(psize_t size, paddr_t low, paddr_t high, paddr_t alignment,
  +paddr_t boundary, struct pglist *rlist, int nsegs, int flags)
  +{
  +   UVMHIST_FUNC(uvm_pglistalloc); UVMHIST_CALLED(pghist);
  +
  +   KASSERT((alignment  (alignment - 1)) == 0);
  +   KASSERT((boundary  (boundary - 1)) == 0);
  +   KASSERT(!(flags  UVM_PLA_WAITOK) ^ !(flags  UVM_PLA_NOWAIT));
  +
  +   if (size == 0)
  +   return (EINVAL);
  +
  +   if ((high  PAGE_MASK) != PAGE_MASK) {
  +   printf(uvm_pglistalloc: Upper boundary 0x%lx 
  +   not on pagemask.\n, (unsigned long)high);
  +   }
  +
  +   /*
  +* Our allocations are always page granularity, so our alignment
  +* must be, too.
  +*/
  +   if (alignment  PAGE_SIZE)
  +   alignment = PAGE_SIZE;
  +
  +   low = atop(roundup(low, alignment));
  +   /*
  +* high + 1 may result in overflow, in which case high becomes 0x0,
  +* which is the 'don't care' value.
  +* The only requirement in that case is that low is also 0x0, or the
  +* lowhigh assert will fail.
  +*/
  +   high = atop(high + 1);
  +   size = atop(round_page(size));
  +   alignment = atop(alignment);
  +   if (boundary  PAGE_SIZE  boundary != 0)
  +   boundary = PAGE_SIZE;
  +   boundary = atop(boundary);
  +
  +   return uvm_pmr_getpages(size, low, high, alignment, boundary, nsegs,
  +   flags, rlist);
  +}
  +
  +/*
  + * uvm_pglistfree: free a list of pages
  + *
  + * = pages should already be unmapped
  + */
  +void
  +uvm_pglistfree(struct pglist *list)
  +{
  +   UVMHIST_FUNC(uvm_pglistfree); UVMHIST_CALLED(pghist);
  +   uvm_pmr_freepageq(list);
  +}
  +
  +/*
* interface used by the buffer cache to allocate a buffer at a time.
* The pages are allocated wired in DMA accessible memory
*/
  diff --git uvm/uvm_pglist.c uvm/uvm_pglist.c
  deleted file mode 100644
  index d29fb14..000
  --- uvm/uvm_pglist.c
  +++ /dev/null
  @@ -1,136 +0,0 @@
  -/* $OpenBSD$   */
  -/* $NetBSD: uvm_pglist.c,v 1.13 2001/02/18 21:19:08 chs Exp $  */
  -
  -/*-
  - * Copyright (c) 1997 The NetBSD Foundation, Inc.
  - * All rights reserved.
  - *  
  - * This code is derived from software contributed to The NetBSD Foundation
  - * by Jason R. Thorpe of the Numerical Aerospace Simulation Facility,
  - * NASA Ames Research Center.  
  - *
  - * Redistribution and use in source and binary forms, with or without
  - * modification, are permitted provided that the following conditions
  - * are met:
  - * 1. Redistributions of source code must retain the above copyright
  - *notice, this list of conditions and the following disclaimer.
  - * 2. Redistributions in binary form must reproduce the above copyright 
  - *notice, this list of conditions and the following disclaimer in the
  - *documentation and/or other materials provided with the distribution.
  - *  
  - * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND 
  CONTRIBUTORS
  - * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 
  LIMITED
  - * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A 
  

Re: Future of ccd(4) and raid(4)?

2011-06-24 Thread Thordur Bjornsson
On Fri, Jun 24, 2011 at 03:38:48PM +, Christian Weisgerber wrote:
 Matthew Dempsky matt...@dempsky.org wrote:
 
  What should be done about ccd(4) and raid(4)?  They both seem
  superseded in functionality by softraid(4), which also has much more
  developer interest and active development.
 
 Is softraid ready at all?  I thought it was experimental, under
 construction, incomplete, don't-use-unless-you-want-to-contribute
 code.
I'm pretty sure it left that state some time ago, in all fairness
I'd sooner trust softraid for my data then ccd/raidframe.

softraid needs some of the bells and whistles raidframe has as
Benny already pointed out, but I think ccd(4) ought to go the
way of the Dodo.



Re: Identifying disks by name

2011-06-22 Thread Thordur Bjornsson
On Wed, Jun 22, 2011 at 09:07:41PM +0200, Wouter Coene wrote:
 On 22 jun 2011, at 20:53, Kenneth R Westerback wrote:
  On Wed, Jun 22, 2011 at 06:48:14PM +0200, Wouter Coene wrote:
  The patch below builds on the disk UID code to implement disk names. Disk
  names must match [a-zA-Z0-9_]{1,10} and are stored encoded as 6 bits per
  character into the disklabel UID field. With this patch, you can use disk
  names in /etc/fstab:
 
 bootdisk.a / ffs rw 1 1
 
  Why? This seems to add nothing but extra code and increase the
  chances some twit will name multiple disks 'MYDISK' and screw
  her/himself royally. Also, DUIDs identify a DISKLABEL and not a
  disk.
 
 Right now, nothing is stopping you from naming multiple disks 1234567890abcdef
 either. Twits will be twits, I'm just looking for something that's easier to
 type (and remember) than a hex-string.
 
 Also, this is certainly not useless if you have more than a handfull of disks
 or SAN volumes, or for removable media. Which of the following is more
 readable?
 
   mount 1234567890abcdef.a /mnt
   mount backups.a /mnt
For this type of stuff, you are looking for hotplugd(8); And duids make
writing hotplug scripts much easier.

While I do see the allure of having nice names for disks, this is
problematic (like ken pointed out) so I think I'll have to agree with
ken for now that this is extra code for a very minimal gain.

Anyways, always nice to see diffs!

cheers, thib.



Re: Identifying disks by name

2011-06-22 Thread Thordur Bjornsson
On Wed, Jun 22, 2011 at 09:34:55PM +0200, Janjaap van Velthooven wrote:
 On Wed, Jun 22, 2011 at 08:12:28PM +0100, Stuart Henderson wrote:
  On 2011/06/22 21:07, Wouter Coene wrote:
   Also, this is certainly not useless if you have more than a handfull of 
   disks
   or SAN volumes, or for removable media. Which of the following is more
   readable?
   
 mount 1234567890abcdef.a /mnt
 mount backups.a /mnt
  
  mount bac1.a /mnt isn't too bad :-)
 
 Just a vague idea for the moment;
 
 How aboot some mechanism that can do number lookups by name for disks?
 ( just like is done for host protocols ports or users and groups and possibly
 more things.. )
 
 for instance an /etc/disks with lines like:
 1234567890abcdef  backups bac1
 
 Anyways, as I have no code for something like this at this moment I'll shut up
 for now on this.

/etc/disks
DUIDmyrootdisk

/etc/fstab
myrootdisk.a / ffs rw,softdep 1 1

And now I'm royally fucked.



Re: Identifying disks by name

2011-06-22 Thread Thordur Bjornsson
On Wed, Jun 22, 2011 at 09:54:07PM +0200, Janjaap van Velthooven wrote:
 Thordur Bjornsson wrote and mailed:
  On Wed, Jun 22, 2011 at 09:34:55PM +0200, Janjaap van Velthooven wrote:
   Just a vague idea for the moment;
   
   How aboot some mechanism that can do number lookups by name for disks?
   ( just like is done for host protocols ports or users and groups and 
   possibly
   more things.. )
   
   for instance an /etc/disks with lines like:
   1234567890abcdef  backups bac1
   
   Anyways, as I have no code for something like this at this moment I'll 
   shut up
   for now on this.
 
  /etc/disks
  DUIDmyrootdisk
  
  /etc/fstab
  myrootdisk.a / ffs rw,softdep 1 1
  
  And now I'm royally fucked.

And I smoke to much dope (or not enough?).



Re: AVL tree

2011-05-19 Thread Thordur Bjornsson
On Thu, May 19, 2011 at 07:52:44PM +0300, Michael Pounov wrote:
 Add AVL tree implementation and merge few RB tree related macros.
 
 If you have comments or any claims, please send me feedback
 and I will fix them. 
cool. but tech@ removes attachments, send your diffs inline.

I'm assuming you implemented this as a macro a la RB/SPAY in
tree.h;

That being said, there is already an AVL tree implementation
floating around, that's not macros.

I've been beating on it (with some of the RB trees diffs we
have in the kernel switched over) for some time, and hopefully
it will be committable soon.


I think I'm not alone when I say that usage of yet another 
macro tree is not welcome, at least not in the kernel.

ciao!
thib



Re: AVL tree

2011-05-19 Thread Thordur Bjornsson
On Thu, May 19, 2011 at 07:21:21PM +0200, Mike Belopuhov wrote:
 On Thu, May 19, 2011 at 7:12 PM, Thordur Bjornsson t...@openbsd.org wrote:
  On Thu, May 19, 2011 at 07:52:44PM +0300, Michael Pounov wrote:
  Add AVL tree implementation and merge few RB tree related macros.
 
  If you have comments or any claims, please send me feedback
  and I will fix them.
  cool. but tech@ removes attachments, send your diffs inline.
 
  I'm assuming you implemented this as a macro a la RB/SPAY in
  tree.h;
 
  That being said, there is already an AVL tree implementation
  floating around, that's not macros.
 
  I've been beating on it (with some of the RB trees diffs we
  have in the kernel switched over) for some time, and hopefully
  it will be committable soon.
 
 
 what do you need it for? it's pretty much the same as r/b tree.
 do you think that lookup speed up is considerable?
 same questions apply to Michael.

It's not the same as an r/b tree.

The main reason for it is to cut down on the code bloat that
the tree.h macros introduce.

Also, my (limited though, have not done proper networking checks)
show no performance difference.



Re: Filesystem Hierarchy Standard (FHS) and OpenBSD

2011-05-10 Thread Thordur Bjornsson
On Mon, May 09, 2011 at 11:33:27PM -0400, Jeff Licquia wrote:
 (Sorry if this isn't the proper list for this discussion.  If not,
 please point me in the right direction.)
This is the proper list.
 
 Despite all the Linux in the names above, we're wanting to make
 sure that the FHS remains independent of any particular UNIX
 implementation, and continues to be useful to non-Linux UNIXes.
Good, at least the Linux kids haven't totally forgotten the other
grumpies out there :) 

 My question to you is: do you consider the FHS to be relevant to
 current and future development of OpenBSD?  If not, is this simply
 due to lack of maintenance; would your interest in the FHS be
 greater with more consistent updates?
 If you are interested, consider this an invitation to participate.
 We've set up a mailing list, Web site, etc., and are reviving the
 old bug tracker.  More details can be found here:
 
 http://www.linuxfoundation.org/collaborate/workgroups/lsb/fhs

There are numerous show stoppers, IMO.

First off, the document is very Linux specific. Although I can't
back up the claim, I'm pretty sure that other OSes wheren't given
much thought in the early days of this document.

Here are what I would call, show stoppers. And this applies to
OpenBSD, as I view it.

- OpenBSD has gone to great lengths to centralize all it's configuration
  into one place: /etc
  so anything contrarty to that, is a simple no go.

- A number of the directories do not make sense on OpenBSD:

  /lib
  For what libraries ? /bin and /sbin contains binaries that
  are statically linked (for a very good reason) so this is
  pointless.

  /opt
  Add-on application packages go into /usr/local/ on OpenBSD
  and the rest of the *BSDs
  Here there is one difference between Open and Free that I've
  come to dislike, FreeBSD stuffs configuration files into /usr/local/etc

  /media
  Mount point for removable media, okey; I thought that was
  what /mnt was for, and /mnt is still in the HFS ?
  (OK, I can see the point, just to help Gnome users :)

  /srv
  This doesn't even have a good rationale in the HFS, what exectly
  is this supposed to be, I think every *BSD Admin expects to find
  data for or from services provided by the system inside /var

  So the above things do not make sense in the general case, and
  as for the rest of the document, you can easly state that OpenBSD
  is atleast partially compliant!

Unfortunetly, i don't think the HFS is relevant to current or
future developments of OpenBSD; Atleast not in it's current state.

But I think the document is intresting, and maybe I'll butt in and
offer some of my opinions :)

Oh! And I almost forgot, we already have our very own HFS, it's
in hier(7) :-)

regards, thib.



Re: vnds considerd harmful.

2011-04-07 Thread Thordur Bjornsson
On Wed, Apr 06, 2011 at 04:25:15PM -0400, Jonathan Thornburg wrote:
 In http://marc.info/?l=openbsd-techm=130200205608892w=1,
 Thordur Bjornsson thib () openbsd ! org wrote:
  Now that I've disallowed swapping to vnd's the purpose
  of vnd (vs svnd) is suspect, it serves no purpose other
  then providing a different way of doing what svnd does
  (which imo, isn't even better).
  
  So, nuke vnds (keep svnds though!).
  
  This will make svndXn the same as vndXn etc. The idea is
  that in a few releases we'll simply remove the svnd0 notes.
 ^ - nodes.

With this diff svnd0 == vnd0 in your /dev.
 
 Could you clarify the semantics of the in a few releases plan?
 That is, are you proposing that the in a few releases OpenBSD will have
 (a) vnd == today's svnd,
bingo.

 (b) vnd == today's vnd,
 (c) vnd == some sort of merging of today's vnd and today's svnd, or
 (d) something else which hasn't occured to me yet
 
 I sort of think you're proposing (a), but I'm not entirely sure that I'm
 parsing your wording correctly...  [Hmm, I wonder if my failure-to-parse
 is related to a recent bout of perl hashes holding references to anonymous
 hashes holding references to anonymous lists. :) ]

So, yeah. vnd's will become today's svnd0's and the old style
bypassing of the buffer cache is gone (leaving only svnd0s).

Then in a few releases, the svnd device nodes will be removed.



vnds considerd harmful.

2011-04-05 Thread Thordur Bjornsson
Hi,

Now that I've disallowed swapping to vnd's the purpose
of vnd (vs svnd) is suspect, it serves no purpose other
then providing a different way of doing what svnd does
(which imo, isn't even better).

So, nuke vnds (keep svnds though!).

This will make svndXn the same as vndXn etc. The idea is
that in a few releases we'll simply remove the svnd0 notes.

comments/ok ?


Index: dev/vnd.c
===
RCS file: /home/thib/cvs/src/sys/dev/vnd.c,v
retrieving revision 1.108
diff -u -p -r1.108 vnd.c
--- dev/vnd.c   2 Apr 2011 15:24:03 -   1.108
+++ dev/vnd.c   3 Apr 2011 18:29:52 -
@@ -33,25 +33,11 @@
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
- *
- * from: Utah $Hdr: vn.c 1.13 94/04/02$
- *
- * @(#)vn.c8.6 (Berkeley) 4/1/94
  */
 
 /*
- * Vnode disk driver.
- *
- * Block/character interface to a vnode.  Allows one to treat a file
- * as a disk (e.g. build a filesystem in it, mount it, etc.).
- *
- * NOTE 1: This uses either the VOP_BMAP/VOP_STRATEGY interface to the
- * vnode or simple VOP_READ/VOP_WRITE.  The former is suitable for swapping
- * as it doesn't distort the local buffer cache.  The latter is good for
- * building disk images as it keeps the cache consistent after the block
- * device is closed.
+ * There is a security issue involved with this driver.
  *
- * NOTE 2: There is a security issue involved with this driver.
  * Once mounted all access to the contents of the mapped file via
  * the special file is controlled by the permissions on the special
  * file, the protection of the mapped file is ignored (effectively,
@@ -102,12 +88,8 @@ int vnddebug = 0x00;
  * DISKUNIT(), but with the minor masked off.
  */
 #definevndunit(x)  DISKUNIT(makedev(major(x), minor(x)  0x7ff))
-#definevndsimple(x)(minor(x)  0x800)
-
-/* same as MAKEDISKDEV, preserving the vndsimple() property */
 #defineVNDLABELDEV(dev)\
-   makedev(major(dev), DISKMINOR(vndunit(dev), RAW_PART) | \
-   (vndsimple(dev) ? 0x800 : 0))
+   makedev(major(dev), DISKMINOR(vndunit(dev), RAW_PART))
 
 struct vndbuf {
struct buf  vb_buf;
@@ -145,7 +127,6 @@ struct vnd_softc {
 #defineVNF_LABELLING   0x0100
 #defineVNF_WLABEL  0x0200
 #defineVNF_HAVELABEL   0x0400
-#defineVNF_SIMPLE  0x1000
 #defineVNF_READONLY0x2000
 
 #defineVNDRW(v)((v)-sc_flags  VNF_READONLY ? FREAD : 
FREAD|FWRITE)
@@ -157,7 +138,6 @@ int numvnd = 0;
 void   vndattach(int);
 
 void   vndclear(struct vnd_softc *);
-void   vndstart(struct vnd_softc *, struct buf *);
 intvndsetcred(struct vnd_softc *, struct ucred *);
 void   vndiodone(struct buf *);
 void   vndshutdown(void);
@@ -232,12 +212,6 @@ vndopen(dev_t dev, int flags, int mode, 
if ((error = vndlock(sc)) != 0)
return (error);
 
-   if (!vndsimple(dev)  sc-sc_vp != NULL 
-   (sc-sc_vp-v_type != VREG || sc-sc_keyctx != NULL)) {
-   error = EINVAL;
-   goto bad;
-   }
-
if ((flags  FWRITE)  (sc-sc_flags  VNF_READONLY)) {
error = EROFS;
goto bad;
@@ -252,20 +226,11 @@ vndopen(dev_t dev, int flags, int mode, 
part = DISKPART(dev);
pmask = 1  part;
 
-   /*
-* If any partition is open, all succeeding openings must be of the
-* same type or read-only.
-*/
-   if (sc-sc_dk.dk_openmask) {
-   if (((sc-sc_flags  VNF_SIMPLE) != 0) !=
-   (vndsimple(dev) != 0)  (flags  FWRITE)) {
-   error = EBUSY;
-   goto bad;
-   }
-   } else if (vndsimple(dev))
-   sc-sc_flags |= VNF_SIMPLE;
-   else
-   sc-sc_flags = ~VNF_SIMPLE;
+   /* XXX: OK ?*/
+   if (sc-sc_dk.dk_openmask  (flags  FWRITE)) {
+   error = EBUSY;
+   goto bad;
+   }
 
/* Check that the partition exists. */
if (part != RAW_PART 
@@ -360,30 +325,13 @@ vndclose(dev_t dev, int flags, int mode,
return (0);
 }
 
-/*
- * Two methods are used, the traditional buffercache bypassing and the
- * newer, cache-coherent on unmount, one.
- *
- * Former method:
- * Break the request into bsize pieces and submit using VOP_BMAP/VOP_STRATEGY.
- * Note that this driver can only be used for swapping over NFS on the hp
- * since nfs_strategy on the vax cannot handle u-areas and page tables.
- *
- * Latter method:
- * Repack the buffer into an uio structure and use VOP_READ/VOP_WRITE to
- * access the underlying file.
- */
 void
 vndstrategy(struct buf *bp)
 {
int unit = vndunit(bp-b_dev);
struct vnd_softc *vnd = vnd_softc[unit];
-   struct vndbuf *nbp;
-   int bsize;
off_t bn;
-   caddr_t addr;
-   

merge vfs_conf.c and vfs_init.c

2011-04-05 Thread Thordur Bjornsson
no need to have two tiny files around.

stuff everything into vfs_init, it belongs there
(along with other stuff, that will get moved soonish).

OK ?


Index: conf/files
===
RCS file: /home/thib/cvs/src/sys/conf/files,v
retrieving revision 1.511
diff -u -p -r1.511 files
--- conf/files  5 Apr 2011 18:51:25 -   1.511
+++ conf/files  5 Apr 2011 19:43:16 -
@@ -759,7 +759,6 @@ file kern/vfs_bio.c
 file kern/vfs_biomem.c
 file kern/vfs_cache.c
 file kern/vfs_cluster.c
-file kern/vfs_conf.c
 file kern/vfs_default.c
 file kern/vfs_init.c
 file kern/vfs_lockf.c
Index: kern/vfs_conf.c
===
RCS file: kern/vfs_conf.c
diff -N kern/vfs_conf.c
--- kern/vfs_conf.c 5 Apr 2011 18:51:25 -   1.41
+++ /dev/null   1 Jan 1970 00:00:00 -
@@ -1,179 +0,0 @@
-/* $OpenBSD: vfs_conf.c,v 1.41 2011/04/05 18:51:25 thib Exp $  */
-/* $NetBSD: vfs_conf.c,v 1.21.4.1 1995/11/01 00:06:26 jtc Exp $*/
-
-/*
- * Copyright (c) 1989, 1993
- * The Regents of the University of California.  All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- *notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- *notice, this list of conditions and the following disclaimer in the
- *documentation and/or other materials provided with the distribution.
- * 3. Neither the name of the University nor the names of its contributors
- *may be used to endorse or promote products derived from this software
- *without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
- * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
- * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
- * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
- * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
- * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
- * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
- * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
- * SUCH DAMAGE.
- *
- * @(#)vfs_conf.c  8.8 (Berkeley) 3/31/94
- */
-
-#include sys/param.h
-#include sys/mount.h
-#include sys/vnode.h
-#include sys/timeout.h
-
-#ifdef FFS
-#include ufs/ufs/quota.h
-#include ufs/ufs/inode.h
-#include ufs/ffs/ffs_extern.h
-#endif
-
-#ifdef EXT2FS
-#include ufs/ext2fs/ext2fs_extern.h
-#endif
-
-#ifdef CD9660
-#include isofs/cd9660/iso.h
-#include isofs/cd9660/cd9660_extern.h
-#endif
-
-#ifdef MFS
-#include ufs/mfs/mfs_extern.h
-#endif
-
-#ifdef NFSCLIENT
-#include nfs/rpcv2.h
-#include nfs/nfsproto.h
-#include nfs/nfsnode.h
-#include nfs/nfs.h
-#include nfs/nfsmount.h
-#endif
-
-/*
- * This defines the root filesystem.
- */
-struct vnode *rootvnode;
-
-/*
- * Set up the filesystem operations for vnodes.
- * The types are defined in mount.h.
- */
-
-
-#ifdef FFS
-extern const struct vfsops ffs_vfsops;
-#endif
-
-#ifdef MFS
-extern const struct vfsops mfs_vfsops;
-#endif
-
-#ifdef MSDOSFS
-extern const struct vfsops msdosfs_vfsops;
-#endif
-
-#ifdef NFSCLIENT
-extern const struct vfsops nfs_vfsops;
-#endif
-
-#ifdef PROCFS
-extern const struct vfsops procfs_vfsops;
-#endif
-
-#ifdef CD9660
-extern const struct vfsops cd9660_vfsops;
-#endif
-
-#ifdef EXT2FS
-extern const struct vfsops ext2fs_vfsops;
-#endif
-
-#ifdef NNPFS
-extern  const struct vfsops nnpfs_vfsops;
-#endif
-
-#ifdef NTFS
-extern  const struct vfsops ntfs_vfsops;
-#endif
-
-#ifdef UDF
-extern  const struct vfsops udf_vfsops;
-#endif
-
-/*
- * Set up the filesystem operations for vnodes.
- */
-static struct vfsconf vfsconflist[] = {
-
-/* Fast Filesystem */
-#ifdef FFS
-{ ffs_vfsops, MOUNT_FFS, 1, 0, MNT_LOCAL, NULL },
-#endif
-
-/* Memory-based Filesystem */
-#ifdef MFS
-{ mfs_vfsops, MOUNT_MFS, 3, 0, MNT_LOCAL, NULL },
-#endif
-
-#ifdef EXT2FS
-   { ext2fs_vfsops, MOUNT_EXT2FS, 17, 0, MNT_LOCAL, NULL },
-#endif
-/* ISO9660 (aka CDROM) Filesystem */
-#ifdef CD9660
-{ cd9660_vfsops, MOUNT_CD9660, 14, 0, MNT_LOCAL, NULL },
-#endif
-
-/* MSDOS Filesystem */
-#ifdef MSDOSFS
-{ msdosfs_vfsops, MOUNT_MSDOS, 4, 0, MNT_LOCAL, NULL },
-#endif
-
-/* Sun-compatible Network Filesystem */
-#ifdef NFSCLIENT
-{ nfs_vfsops, MOUNT_NFS, 2, 0, 0, NULL },
-#endif
-
-   /* NNPFS */
-#ifdef NNPFS
-   { nnpfs_vfsops, MOUNT_NNPFS, 21, 

no swapping to vnds

2011-04-04 Thread Thordur Bjornsson
Hi,

1) Swapping to svnds has issues (pagedaemon deadlocks) and has been
   broken since forever.
2) Swapping to vnds makes no sense, why add another layer when you
   can just swap to a regular file instead ?

so stop supporting swapping to vnds. If this turns out to be kosher
I have a diff tested that removes vnds in favour of svnds.


OK ?


Index: uvm/uvm_swap.c
===
RCS file: /home/thib/cvs/src/sys/uvm/uvm_swap.c,v
retrieving revision 1.100
diff -u -p -r1.100 uvm_swap.c
--- uvm/uvm_swap.c  21 Dec 2010 20:14:44 -  1.100
+++ uvm/uvm_swap.c  4 Apr 2011 09:14:59 -
@@ -912,6 +912,10 @@ swap_on(struct proc *p, struct swapdev *
vp = sdp-swd_vp;
dev = sdp-swd_dev;
 
+   /* no swapping to vnds. */
+   if (bdevsw[major(dev)].d_strategy == vndstrategy)
+   return (EOPNOTSUPP);
+
/*
 * open the swap file (mostly useful for block device files to
 * let device driver know what is up).



Re: no swapping to vnds

2011-04-04 Thread Thordur Bjornsson
On Mon, Apr 04, 2011 at 12:34:17PM +0200, Otto Moerbeek wrote:
 On Mon, Apr 04, 2011 at 09:22:41AM +, Thordur Bjornsson wrote:
 
  Hi,
  
  1) Swapping to svnds has issues (pagedaemon deadlocks) and has been
 broken since forever.
  2) Swapping to vnds makes no sense, why add another layer when you
 can just swap to a regular file instead ?
  
  so stop supporting swapping to vnds. If this turns out to be kosher
  I have a diff tested that removes vnds in favour of svnds.
 
 I don't know if this is the right check, but the  is redundant to get
 the address of a function.
It's the easiest check. It's hard to map a dev_t to a device since
it is MD, so checking for that function is the best way I could
come up with.

And doh on the ''. I'll commit with out it. 

 
   -Otto
 
  
  
  OK ?
  
  
  Index: uvm/uvm_swap.c
  ===
  RCS file: /home/thib/cvs/src/sys/uvm/uvm_swap.c,v
  retrieving revision 1.100
  diff -u -p -r1.100 uvm_swap.c
  --- uvm/uvm_swap.c  21 Dec 2010 20:14:44 -  1.100
  +++ uvm/uvm_swap.c  4 Apr 2011 09:14:59 -
  @@ -912,6 +912,10 @@ swap_on(struct proc *p, struct swapdev *
  vp = sdp-swd_vp;
  dev = sdp-swd_dev;
   
  +   /* no swapping to vnds. */
  +   if (bdevsw[major(dev)].d_strategy == vndstrategy)
  +   return (EOPNOTSUPP);
  +
  /*
   * open the swap file (mostly useful for block device files to
   * let device driver know what is up).



Re: pool_debug is good, but also bad

2011-04-04 Thread Thordur Bjornsson
On Sun, Apr 03, 2011 at 06:38:51PM -0600, Theo de Raadt wrote:
 based on a conversation at the bar.
 
 POOL_DEBUG is expensive.  But we really want it because it finds bugs
 before they hurt us. The solution to this is to make it simpler to
 turn off.
 
 This diff starts the kernel with pool debug on, but allows it to be
 turned off with sysctl kern.pool_debug=0.  This does not gaurantee
 that all the pool pages will be unchecked, but it does help.
 
 This will let people who care about performance turn it off permanently
 in sysctl.conf; I think we will add a line there for people to know how
 to use it.

I like this. Means I can turn it off and on easly when I'm testing
diffs on my workstations.

 
 Index: kern/subr_pool.c
 ===
 RCS file: /cvs/src/sys/kern/subr_pool.c,v
 retrieving revision 1.100
 diff -u -r1.100 subr_pool.c
 --- kern/subr_pool.c  3 Apr 2011 22:07:37 -   1.100
 +++ kern/subr_pool.c  3 Apr 2011 22:59:39 -
 @@ -42,7 +42,7 @@
  #include sys/sysctl.h
  
  #include uvm/uvm.h
 -
 +#include dev/rndvar.h
  
  /*
   * Pool resource management utility.
 @@ -74,6 +74,7 @@
   caddr_t ph_page;/* this page's address */
   caddr_t ph_colored; /* page's colored address */
   int ph_pagesize;
 + int ph_magic;
  };
  
  struct pool_item {
 @@ -89,6 +90,7 @@
  #else
  #define  PI_MAGIC 0xdeafbeef
  #endif
 +int  pool_debug = 1;
  
  #define  POOL_NEEDS_CATCHUP(pp)  
 \
   ((pp)-pr_nitems  (pp)-pr_minitems)
 @@ -441,7 +443,8 @@
   else
   ph = pool_get(phpool, (flags  ~(PR_WAITOK | PR_ZERO)) |
   PR_NOWAIT);
 -
 + if (pool_debug)
 + ph-ph_magic = PI_MAGIC;
   return (ph);
  }
  
 @@ -611,13 +614,15 @@
   page %p; item addr %p; offset 0x%x=0x%x,
   pp-pr_wchan, ph-ph_page, pi, 0, pi-pi_magic);
  #ifdef POOL_DEBUG
 - for (ip = (int *)pi, i = sizeof(*pi) / sizeof(int);
 - i  pp-pr_size / sizeof(int); i++) {
 - if (ip[i] != PI_MAGIC) {
 - panic(pool_do_get(%s): free list modified: 
 - page %p; item addr %p; offset 0x%x=0x%x,
 - pp-pr_wchan, ph-ph_page, pi,
 - i * sizeof(int), ip[i]);
 + if (pool_debug  ph-ph_magic) {
 + for (ip = (int *)pi, i = sizeof(*pi) / sizeof(int);
 + i  pp-pr_size / sizeof(int); i++) {
 + if (ip[i] != ph-ph_magic) {
 + panic(pool_do_get(%s): free list modified: 
 + page %p; item addr %p; offset 0x%x=0x%x,
 + pp-pr_wchan, ph-ph_page, pi,
 + i * sizeof(int), ip[i]);
 + }
   }
   }
  #endif /* POOL_DEBUG */
 @@ -731,9 +736,11 @@
  #ifdef DIAGNOSTIC
   pi-pi_magic = PI_MAGIC;
  #ifdef POOL_DEBUG
 - for (ip = (int *)pi, i = sizeof(*pi)/sizeof(int);
 - i  pp-pr_size / sizeof(int); i++)
 - ip[i] = PI_MAGIC;
 + if (ph-ph_magic) {
 + for (ip = (int *)pi, i = sizeof(*pi)/sizeof(int);
 + i  pp-pr_size / sizeof(int); i++)
 + ip[i] = ph-ph_magic;
 + }
  #endif /* POOL_DEBUG */
  #endif /* DIAGNOSTIC */
  
 @@ -886,9 +893,11 @@
  #ifdef DIAGNOSTIC
   pi-pi_magic = PI_MAGIC;
  #ifdef POOL_DEBUG
 - for (ip = (int *)pi, i = sizeof(*pi)/sizeof(int);
 - i  pp-pr_size / sizeof(int); i++)
 - ip[i] = PI_MAGIC;
 + if (ph-ph_magic) {
 + for (ip = (int *)pi, i = sizeof(*pi)/sizeof(int);
 + i  pp-pr_size / sizeof(int); i++)
 + ip[i] = ph-ph_magic;
 + }
  #endif /* POOL_DEBUG */
  #endif /* DIAGNOSTIC */
   cp = (caddr_t)(cp + pp-pr_size);
 @@ -1273,14 +1282,16 @@
   0, pi-pi_magic);
   }
  #ifdef POOL_DEBUG
 - for (ip = (int *)pi, i = sizeof(*pi) / sizeof(int);
 - i  pp-pr_size / sizeof(int); i++) {
 - if (ip[i] != PI_MAGIC) {
 - printf(pool(%s): free list modified: 
 - page %p; item ordinal %d; addr %p 
 - (p %p); offset 0x%x=0x%x\n,
 - pp-pr_wchan, ph-ph_page, n, pi,
 - page, i * sizeof(int), ip[i]);
 + if (pool_debug  ph-ph_magic) {
 + for (ip = (int *)pi, i = sizeof(*pi) / sizeof(int);
 + i  pp-pr_size / sizeof(int); i++) {
 + if (ip[i] != ph-ph_magic) {
 + printf(pool(%s): free 

tweak for mount.h

2011-04-04 Thread Thordur Bjornsson
o Collapse a few _KERNEL's and move some definitions to
  allow this.
o Expose a few NFSMOUNT stuffs (one I will remove, the other
  I'm going to use later anyways).
o Kill a protection for a forward decleration
o Kill __STDC__ protection (which is under _KERNEL).

OK ?

Index: sys/mount.h
===
RCS file: /cvs/src/sys/sys/mount.h,v
retrieving revision 1.101
diff -u -p -r1.101 mount.h
--- sys/mount.h 4 Apr 2011 12:50:58 -   1.101
+++ sys/mount.h 4 Apr 2011 12:59:20 -
@@ -156,9 +156,7 @@ struct nfs_args3 {
 /*
  * NFS mount option flags
  */
-#ifndef _KERNEL
 #defineNFSMNT_RESVPORT 0x  /* always use reserved 
ports */
-#endif /* ! _KERNEL */
 #defineNFSMNT_SOFT 0x0001  /* soft mount (hard is 
default) */
 #defineNFSMNT_WSIZE0x0002  /* set write size */
 #defineNFSMNT_RSIZE0x0004  /* set read size */
@@ -174,9 +172,7 @@ struct nfs_args3 {
 #defineNFSMNT_LEASETERM0x1000  /* set lease term (nqnfs) */
 #defineNFSMNT_READAHEAD0x2000  /* set read ahead */
 #defineNFSMNT_DEADTHRESH   0x4000  /* set dead server retry 
thresh */
-#ifdef _KERNEL /* Coming soon to a system call near you! */
 #defineNFSMNT_NOAC 0x8000  /* disable attribute cache 
*/
-#endif /* _KERNEL */
 #defineNFSMNT_RDIRPLUS 0x0001  /* Use Readdirplus for V3 */
 #defineNFSMNT_READDIRSIZE  0x0002  /* Set readdir size */
 
@@ -430,6 +426,24 @@ struct mount {
 #define MNT_DOOMED 0x0800  /* device behind filesystem is gone */
 
 /*
+ * Flags for various system call interfaces.
+ *
+ * waitfor flags to vfs_sync() and getfsstat()
+ */
+#define MNT_WAIT   1   /* synchronously wait for I/O to complete */
+#define MNT_NOWAIT 2   /* start all I/O, but do not wait for it */
+#define MNT_LAZY   3   /* push data not written by filesystem syncer */
+
+/*
+ * Generic file handle
+ */
+struct fhandle {
+   fsid_t  fh_fsid;/* File system id of mount point */
+   struct  fid fh_fid; /* File sys specific id */
+};
+typedef struct fhandle fhandle_t;
+
+/*
  * Sysctl CTL_VFS definitions.
  *
  * Second level identifier specifies which filesystem. Second level
@@ -492,16 +506,12 @@ extern long buflowpages, bufhighpages, b
 extern int bufcachepercent;
 extern void bufadjust(int);
 extern int bufbackoff(void);
-#endif
 
 /*
  * Operations supported on mounted file system.
  */
-#ifdef _KERNEL
-#ifdef __STDC__
 struct nameidata;
 struct mbuf;
-#endif
 
 extern int maxvfsconf; /* highest defined filesystem type */
 extern struct vfsconf *vfsconf;/* head of list of filesystem types */
@@ -547,27 +557,8 @@ struct vfsops {
 #defineVFS_VPTOFH(VP, FIDP)  
(*(VP)-v_mount-mnt_op-vfs_vptofh)(VP, FIDP)
 #define VFS_CHECKEXP(MP, NAM, EXFLG, CRED) \
(*(MP)-mnt_op-vfs_checkexp)(MP, NAM, EXFLG, CRED)
-#endif /* _KERNEL */
 
-/*
- * Flags for various system call interfaces.
- *
- * waitfor flags to vfs_sync() and getfsstat()
- */
-#define MNT_WAIT   1   /* synchronously wait for I/O to complete */
-#define MNT_NOWAIT 2   /* start all I/O, but do not wait for it */
-#define MNT_LAZY   3   /* push data not written by filesystem syncer */
 
-/*
- * Generic file handle
- */
-struct fhandle {
-   fsid_t  fh_fsid;/* File system id of mount point */
-   struct  fid fh_fid; /* File sys specific id */
-};
-typedef struct fhandle fhandle_t;
-
-#ifdef _KERNEL
 #include net/radix.h
 #include sys/socket.h/* XXX for AF_MAX */
 
@@ -587,9 +578,7 @@ struct netexport {
struct  netcred ne_defexported;   /* Default export */
struct  radix_node_head *ne_rtable[AF_MAX+1]; /* Individual exports */
 };
-#endif /* _KERNEL */
 
-#ifdef _KERNEL
 /*
  * exported vnode operations
  */
@@ -626,10 +615,7 @@ void   vfsinit(void);
 intvfs_register(struct vfsconf *);
 intvfs_unregister(struct vfsconf *);
 #else /* _KERNEL */
-
-#ifndef _SYS_STAT_H_
 struct stat;
-#endif
 
 __BEGIN_DECLS
 intfstatfs(int, struct statfs *);



Re: netinet6 bread crumbs

2011-04-02 Thread Thordur Bjornsson
On Sat, Apr 02, 2011 at 02:13:45PM +0200, Stefan Sperling wrote:
 Feel free to ack or reject these individually.
 
 
 Kill redundant offsetof definitions. All of these files include sys/param.h.
Those look fine.
 
 Index: in6.c
 ===
 RCS file: /cvs/src/sys/netinet6/in6.c,v
 retrieving revision 1.89
 diff -u -p -r1.89 in6.c
 --- in6.c 7 Oct 2010 22:07:06 -   1.89
 +++ in6.c 2 Apr 2011 12:02:04 -
 @@ -910,14 +910,7 @@ in6_update_ifa(struct ifnet *ifp, struct
*/
   if (ia == NULL) {
   hostIsNew = 1;
 - /*
 -  * When in6_update_ifa() is called in a process of a received
 -  * RA, it is called under an interrupt context.  So, we should
 -  * call malloc with M_NOWAIT.
 -  */
 - ia = malloc(sizeof(*ia), M_IFADDR, M_NOWAIT | M_ZERO);
 - if (ia == NULL)
 - return (ENOBUFS);
 + ia = malloc(sizeof(*ia), M_IFADDR, M_WAITOK | M_ZERO);
This is a little bit suspect. But people who know the call path should
look at this.

typos look ok, but mah spjellingk is not vry good. (:



remove bufqs from vnds

2011-04-02 Thread Thordur Bjornsson
Hi,

So, it doesn't make sense to have a bufq for vnds.
  
The disk that stores the image backing the vnd has it's own bufq
ofcourse and what happens is that vnd puts a buf on it's bufq,
which is promptly removed when we call vndstart, followed by a call
to strategy so the buf ends up almost immediately on the bufq
on the underlaying disk.

Tested on vnd/svnd (and with the image on NFS. vnd is broken on nfs!).

OK?


Index: vnd.c
===
RCS file: /home/thib/cvs/src/sys/dev/vnd.c,v
retrieving revision 1.107
diff -u -p -r1.107 vnd.c
--- vnd.c   15 Feb 2011 20:02:11 -  1.107
+++ vnd.c   2 Apr 2011 11:34:38 -
@@ -127,8 +127,6 @@ struct vnd_softc {
struct disk  sc_dk;
char sc_dk_name[16];
 
-   struct bufq  sc_bufq;
-
char sc_file[VNDNLEN];  /* file we're covering */
int  sc_flags;  /* flags */
size_t   sc_size;   /* size of vnd in sectors */
@@ -159,7 +157,7 @@ int numvnd = 0;
 void   vndattach(int);
 
 void   vndclear(struct vnd_softc *);
-void   vndstart(struct vnd_softc *);
+void   vndstart(struct vnd_softc *, struct buf *);
 intvndsetcred(struct vnd_softc *, struct ucred *);
 void   vndiodone(struct buf *);
 void   vndshutdown(void);
@@ -445,64 +443,50 @@ vndstrategy(struct buf *bp)
 
/* No bypassing of buffer cache?  */
if (vndsimple(bp-b_dev)) {
-   /* Loop until all queued requests are handled.  */
-   for (;;) {
-   int part = DISKPART(bp-b_dev);
-   daddr64_t off = DL_SECTOBLK(vnd-sc_dk.dk_label,
-   
DL_GETPOFFSET(vnd-sc_dk.dk_label-d_partitions[part]));
-   aiov.iov_base = bp-b_data;
-   auio.uio_resid = aiov.iov_len = bp-b_bcount;
-   auio.uio_iov = aiov;
-   auio.uio_iovcnt = 1;
-   auio.uio_offset = dbtob((off_t)(bp-b_blkno + off));
-   auio.uio_segflg = UIO_SYSSPACE;
-   auio.uio_procp = p;
-
-   vn_lock(vnd-sc_vp, LK_EXCLUSIVE | LK_RETRY, p);
-   if (bp-b_flags  B_READ) {
-   auio.uio_rw = UIO_READ;
-   bp-b_error = VOP_READ(vnd-sc_vp, auio, 0,
-   vnd-sc_cred);
-   if (vnd-sc_keyctx)
-   vndencrypt(vnd, bp-b_data,
-  bp-b_bcount, bp-b_blkno, 0);
-   } else {
-   if (vnd-sc_keyctx)
-   vndencrypt(vnd, bp-b_data,
-  bp-b_bcount, bp-b_blkno, 1);
-   auio.uio_rw = UIO_WRITE;
-   /*
-* Upper layer has already checked I/O for
-* limits, so there is no need to do it again.
-*/
-   bp-b_error = VOP_WRITE(vnd-sc_vp, auio,
-   IO_NOLIMIT, vnd-sc_cred);
-   /* Data in buffer cache needs to be in clear */
-   if (vnd-sc_keyctx)
-   vndencrypt(vnd, bp-b_data,
-  bp-b_bcount, bp-b_blkno, 0);
-   }
-   VOP_UNLOCK(vnd-sc_vp, 0, p);
-   if (bp-b_error)
-   bp-b_flags |= B_ERROR;
-   bp-b_resid = auio.uio_resid;
-   s = splbio();
-   biodone(bp);
-   splx(s);
-
-   /* If nothing more is queued, we are done. */
-   if (!bufq_peek(vnd-sc_bufq))
-   return;
-
+   int part = DISKPART(bp-b_dev);
+   daddr64_t off = DL_SECTOBLK(vnd-sc_dk.dk_label,
+   DL_GETPOFFSET(vnd-sc_dk.dk_label-d_partitions[part]));
+   aiov.iov_base = bp-b_data;
+   auio.uio_resid = aiov.iov_len = bp-b_bcount;
+   auio.uio_iov = aiov;
+   auio.uio_iovcnt = 1;
+   auio.uio_offset = dbtob((off_t)(bp-b_blkno + off));
+   auio.uio_segflg = UIO_SYSSPACE;
+   auio.uio_procp = p;
+
+   vn_lock(vnd-sc_vp, LK_EXCLUSIVE | LK_RETRY, p);
+   if (bp-b_flags  B_READ) {
+   auio.uio_rw = UIO_READ;
+   bp-b_error = VOP_READ(vnd-sc_vp, auio, 0,
+   vnd-sc_cred);
+   if (vnd-sc_keyctx)
+   vndencrypt(vnd, bp-b_data,
+   

Re: atascsi dma_alloc() - make atascsi play nicer with bigmem

2011-04-02 Thread Thordur Bjornsson
On Sat, Apr 02, 2011 at 09:15:37AM -0400, Kenneth R Westerback wrote:
 Another driver malloc'ing and passing potentially dma unsafe memory
 to do i/o into.
 
 ok?
yub
 
  Ken
 
 Index: atascsi.c
 ===
 RCS file: /cvs/src/sys/dev/ata/atascsi.c,v
 retrieving revision 1.101
 diff -u -p -r1.101 atascsi.c
 --- atascsi.c 3 Feb 2011 21:22:19 -   1.101
 +++ atascsi.c 2 Apr 2011 13:03:58 -
 @@ -26,6 +26,7 @@
  #include sys/device.h
  #include sys/proc.h
  #include sys/queue.h
 +#include sys/pool.h
  
  #include scsi/scsi_all.h
  #include scsi/scsi_disk.h
 @@ -335,8 +336,8 @@ atascsi_probe(struct scsi_link *link)
   xa = scsi_io_get(ahp-ahp_iopool, SCSI_NOSLEEP);
   if (xa == NULL)
   panic(no free xfers on a new port);
 - /* XXX dma reachable */
 - identify = malloc(sizeof(*identify), M_TEMP, M_WAITOK);
 + identify = dma_alloc(sizeof(*identify),
 + PR_WAITOK | PR_ZERO);
   xa-pmp_port = ap-ap_pmp_port;
   xa-data = identify;
   xa-datalen = sizeof(*identify);
 @@ -353,10 +354,10 @@ atascsi_probe(struct scsi_link *link)
   if (rv == 0) {
   bcopy(identify, ap-ap_identify,
   sizeof(ap-ap_identify));
 - free(identify, M_TEMP);
 + dma_free(identify, sizeof(*identify));
   break;
   }
 - free(identify, M_TEMP);
 + dma_free(identify, sizeof(*identify));
   delay(500);
   } while (count--);



Re: Incorrect exit status from which(1)/whereis(1)

2011-02-15 Thread Thordur Bjornsson
On Tue, Feb 15, 2011 at 05:30:11PM +, Jason McIntyre wrote:
 On Mon, Feb 14, 2011 at 11:31:18AM +, David Julio wrote:
  Is the exit status of which(1)/whereis(1) correct?
  
  $ which a b c
  which: a: Command not found
  which: b: Command not found
  which: c: Command not found
  
  $ echo $?
  2
 
  $ which -a a b c
  which: a: Command not found
  which: b: Command not found
  which: c: Command not found
  
  $ echo $?
  1
 
  If it is incorrect, below is my attempt to contribute.
 
 this command is not covered by posix, so there's no reference there.
 neither free nor netbsd document exit status for this command either.
 
 i have no access to such systems, but maybe someone who does can tell us
 how other bsd behave?
SunOS 5.10 doesn't document the exit status, doesn't have a -a switch.
Simple check show that  there exit status is 1 if noone are found, 0
if all/some are found.

Linux behaves the same way, but has an exit code of 2 if there where
invalid arguments passed.

DragonflyBSD (And FreeBSD) behave the same, but return 1 if there
where invalid arguments.

No access to a NetBSD box, and there man page doesn't document the
exit status.
   
 if it's a doc bug, it can be fixed easy enough. that would seem strange
 behaviour though. if it's a software bug, any developer want to look at
 this?
What's correct there, I've no idea.

At the very least we should be consistent with our own man page so, the 
diff might be the right solution.

kv, thib

  Index: which.c
  ===
  RCS file: /cvs/src/usr.bin/which/which.c,v
  retrieving revision 1.16
  diff -u -r1.16 which.c
  --- which.c 31 May 2010 14:01:49 -  1.16
  +++ which.c 14 Feb 2011 11:02:10 -
  @@ -55,11 +55,7 @@
  
  (void)setlocale(LC_ALL, );
  
  -   if (argc == 1)
  -   usage();
  -
  -   /* Don't accept command args but check since old whereis(1) used to */
  -   while ((ch = getopt(argc, argv, a)) != -1) {
  +   while ((ch = getopt(argc, argv, a)) != -1)
  switch (ch) {
  case 'a':
  allmatches = 1;
  @@ -67,7 +63,11 @@
  default:
  usage();
  }
  -   }
  +   argc -= optind;
  +   argv += optind;
  +
  +   if (argc == 0)
  +   usage();
  
  /*
   * which(1) uses user's $PATH.
  @@ -98,11 +98,11 @@
  if (setuid(geteuid()))
  err(1, Can't set uid to %u, geteuid());
  
  -   for (n = optind; n  argc; n++)
  +   for (n = 0; n  argc; n++)
  if (findprog(argv[n], path, progmode, allmatches) == 0)
  notfound++;
  
  -   exit((notfound == 0) ? 0 : ((notfound == argc - 1) ? 2 : 1));
  +   exit((notfound == 0) ? 0 : ((notfound == argc) ? 2 : 1));
   }
  
   int



Re: softraid clarification in manpage

2011-01-27 Thread Thordur Bjornsson
On Thu, Jan 27, 2011 at 02:35:54PM -0500, Nick Guenther wrote:
 On Thu, Jan 27, 2011 at 9:39 AM, Jason McIntyre j...@kerhand.co.uk wrote:
  On Wed, Jan 26, 2011 at 04:24:07PM -0600, Amit Kulkarni wrote:
  Hi,
 
  I just configured a mirror using softraid, the manpage was extremely
  helpful. I just copy pasted the relevant commands. To a complete
  newbie, it was missing a few more lines. I just added those and
  sending the diff. I hope a variation of this is accepted to make the
  how to create mirror using softraid is complete. Googling brings a
  lot of RAIDFrame etc, there is very little out there on a pure data
  mirror.
 
  Thanks,
  amit
 
 
  Index: softraid.4
  ===
  RCS file: /cvs/src/share/man/man4/softraid.4,v
  retrieving revision 1.27
  diff softraid.4
  147a148,159
   .Pp
   To use the freshly created mirror
   .Bd -literal -offset indent
   # mkdir /datamirror
   # mount /dev/sd0a /datamirror
   # chown normal_openbsd_user /datamirror
   .Ed
   .Pp
   Adding the following line to /etc/fstab will make it useful after every
 boot
   .Bd -literal -offset indent
   /dev/sd0a /datamirror ffs rw,softdep,nodev,nosuid 1 2
   .Pp
 
  (as an aside, please send unified diffs (-u) in future)
 
  there is a line near the end of EXAMPLES:
 
 The RAID volume is now ready to be used as a normal disk device.
 
  for me, the instructions you've added come under the umbrella of
  using the raid partition as a normal disk partition. so i'd argue
  that it's outside the remit of this page.
 
  maybe other developers think otherwise though.
 
 As someone who has long been a newbie, this sort of trivial pointer
 can be a life-saver. If you think like a programmer and see your OS as
 a program this sort of thing comes naturally, but lots of people
 don't.

IMO, this kind of details are most suitable for a FAQ entry.
Maybe section 14.13 of the FAQ could use some love ?



cut vnd's over to bufqs, again.

2010-12-29 Thread Thordur Bjornsson
hi,

so cut vnds over to bufqs. this diff is similar to a diff
that was commited, but got backed out after one of the
hackathon fiasco's, with a small difference.

there is no reason to keep an active count, bufq_peek is
enough to figure out if the queue is empty or not.

in vndiodone, there is no need to jump through hoops to
figure out if we need to disk_unbusy(). We always need to
there is a one-to-one against disk_busy() in vndstart, as
we set the biodone callback to null so we don't end up there
twice.

OK?

ciao, thib. 


Index: dev/vnd.c
===
RCS file: /usr/cvs/src/sys/dev/vnd.c,v
retrieving revision 1.104
diff -u -p -r1.104 vnd.c
--- dev/vnd.c   22 Dec 2010 13:12:14 -  1.104
+++ dev/vnd.c   28 Dec 2010 11:54:44 -
@@ -1,4 +1,4 @@
-/* $OpenBSD: vnd.c,v 1.104 2010/12/22 13:12:14 jsing Exp $ */
+/* $OpenBSD: vnd.c,v 1.92 2009/06/04 05:57:27 krw Exp $*/
 /* $NetBSD: vnd.c,v 1.26 1996/03/30 23:06:11 christos Exp $*/
 
 /*
@@ -127,6 +127,8 @@ struct vnd_softc {
struct disk  sc_dk;
char sc_dk_name[16];
 
+   struct bufq  sc_bufq;
+
char sc_file[VNDNLEN];  /* file we're covering */
int  sc_flags;  /* flags */
size_t   sc_size;   /* size of vnd in sectors */
@@ -135,7 +137,6 @@ struct vnd_softc {
size_t   sc_ntracks;/* # of tracks per cylinder */
struct vnode*sc_vp; /* vnode */
struct ucred*sc_cred;   /* credentials */
-   struct buf   sc_tab;/* transfer queue */
blf_ctx *sc_keyctx; /* key context */
struct rwlocksc_rwlock;
 };
@@ -209,6 +210,7 @@ vndattach(int num)
vnd_softc = (struct vnd_softc *)mem;
for (i = 0; i  num; i++) {
rw_init(vnd_softc[i].sc_rwlock, vndlock);
+   bufq_init(vnd_softc[i].sc_bufq, BUFQ_DEFAULT);
}
numvnd = num;
 
@@ -489,8 +491,8 @@ vndstrategy(struct buf *bp)
biodone(bp);
splx(s);
 
-   /* If nothing more is queued, we are done.  */
-   if (!vnd-sc_tab.b_active)
+   /* If nothing more is queued, we are done. */
+   if (!bufq_peek(vnd-sc_bufq))
return;
 
/*
@@ -498,9 +500,8 @@ vndstrategy(struct buf *bp)
 * routine might queue using same links.
 */
s = splbio();
-   bp = vnd-sc_tab.b_actf;
-   vnd-sc_tab.b_actf = bp-b_actf;
-   vnd-sc_tab.b_active--;
+   bp = bufq_dequeue(vnd-sc_bufq);
+   KASSERT(bp != NULL);
splx(s);
}
}
@@ -596,13 +597,9 @@ vndstrategy(struct buf *bp)
splx(s);
return;
}
-   /*
-* Just sort by block number
-*/
-   nbp-vb_buf.b_cylinder = nbp-vb_buf.b_blkno;
+
+   bufq_queue(vnd-sc_bufq, nbp-vb_buf);
s = splbio();
-   disksort(vnd-sc_tab, nbp-vb_buf);
-   vnd-sc_tab.b_active++;
vndstart(vnd);
splx(s);
bn += sz;
@@ -625,8 +622,9 @@ vndstart(struct vnd_softc *vnd)
 * Dequeue now since lower level strategy routine might
 * queue using same links
 */
-   bp = vnd-sc_tab.b_actf;
-   vnd-sc_tab.b_actf = bp-b_actf;
+   bp = bufq_dequeue(vnd-sc_bufq);
+   if (bp == NULL)
+   return;
 
DNPRINTF(VDB_IO,
vndstart(%d): bp %p vp %p blkno %lld addr %p cnt %lx\n,
@@ -675,13 +673,8 @@ vndiodone(struct buf *bp)
 
 out:
putvndbuf(vbp);
-
-   if (vnd-sc_tab.b_active) {
-   disk_unbusy(vnd-sc_dk, (pbp-b_bcount - pbp-b_resid),
-   (pbp-b_flags  B_READ));
-   if (!vnd-sc_tab.b_actf)
-   vnd-sc_tab.b_active--;
-   }
+   disk_unbusy(vnd-sc_dk, (pbp-b_bcount - pbp-b_resid),
+   (pbp-b_flags  B_READ));
 }
 
 /* ARGSUSED */



Re: yield in long kernel loops

2010-10-14 Thread Thordur Bjornsson
On Wed, Oct 13, 2010 at 08:08:34PM -0400, Ted Unangst wrote:
 So it's not a good idea to perform long lasting operations in the kernel.  
 The scheduler doesn't deal well with it and nobody else gets to run.
 
 One of those long loops is loading a large table into pf.  If you're 
 lucky, you'll run out of memory and pool will finally sleep.
 
 I stuck a couple yield() calls into the long loops after sufficient 
 iteration.
 
 I also zapped PFR_FLAG_ATOMIC because it's not really atomic anyway.  I 
 also couldn't find any callers.  Leftover?
Mixing two things in the same diff, like this isn't helpful.
Specially since the zapping of this flag is 80% of this diff or so.
 
 Another thing to fix at some point is that we call splsoftnet and splx 
 multiple times per address in some cases, but fixing that was getting too 
 complicated and requires some more code shuffling.
Different thing.


 + if (++n % 1000 == 0)
 + yield();
While I see the point. This just screams HACK!.

Not sure if this helps. Might be better to do this to something thats way
eaiser to instrument, maybe something similar to what the guy on misc@ was
hitting with /dev/urandom.

my two cents (note, they are icelandic cents and so not really worth anything! 
;)



Re: Slow I/O usb sticks

2010-10-06 Thread Thordur Bjornsson
On Wed, Oct 06, 2010 at 04:06:47PM -0300, Gonzalo L. R. wrote:
 Hi guys,
 
 I have a slow I/O in usb sticks with big files, I use -current with the 
 last weekend cvs code.
 
 If I cp a big file in the usb stick take several minutes (~10 minutes of 
 175M in a msdos stick, 2 minutes in a ffs stick), the usb stick have 
 msdos fs, this not happend with a ffs usb stick.
 
 I have the same issue in my dell vostro 1510.

Our MSDOS-FS code blows chunks, thats why.

Since this isn't a bug report, not much else I can say. You can play around
with profiling and read the code, it's in sys/msdosfs and it sure can use
some lovin.

Have fun!
 
 Regards
 OpenBSD 4.8-current (GENERIC.MP) #12: Sat Oct  2 15:49:43 ART 2010
 r...@r0nin.sepp0.com.ar:/usr/src/sys/arch/i386/compile/GENERIC.MP
 cpu0: Intel(R) Core(TM)2 Duo CPU T7300 @ 2.00GHz (GenuineIntel 686-class) 2 
 GHz
 cpu0: 
 FPU,V86,DE,PSE,TSC,MSR,PAE,MCE,CX8,APIC,SEP,MTRR,PGE,MCA,CMOV,PAT,PSE36,CFLUSH,DS,ACPI,MMX,FXSR,SSE,SSE2,SS,HTT,TM,SBF,SSE3,MWAIT,DS-CPL,VMX,EST,TM2,SSSE3,CX16,xTPR,PDCM
 real mem  = 1046761472 (998MB)
 avail mem = 1019613184 (972MB)
 mainbus0 at root
 bios0 at mainbus0: AT/286+ BIOS, date 04/18/08, BIOS32 rev. 0 @ 0xfdc80, 
 SMBIOS rev. 2.4 @ 0xe0010 (63 entries)
 bios0: vendor LENOVO version 7NETB2WW (2.12 ) date 04/18/2008
 bios0: LENOVO 767474Y
 acpi0 at bios0: rev 2
 acpi0: sleep states S0 S3 S4 S5
 acpi0: tables DSDT FACP SSDT ECDT TCPA APIC MCFG HPET SLIC BOOT ASF! SSDT 
 SSDT SSDT SSDT
 acpi0: wakeup devices LID_(S3) SLPB(S3) DURT(S3) IGBE(S4) EXP0(S4) EXP1(S4) 
 EXP2(S4) EXP3(S4) EXP4(S4) PCI1(S4) USB0(S3) USB1(S3) USB2(S3) USB3(S3) 
 USB4(S3) EHC0(S3) EHC1(S3) HDEF(S4)
 acpitimer0 at acpi0: 3579545 Hz, 24 bits
 acpiec0 at acpi0
 acpimadt0 at acpi0 addr 0xfee0: PC-AT compat
 cpu0 at mainbus0: apid 0 (boot processor)
 cpu0: apic clock running at 199MHz
 cpu1 at mainbus0: apid 1 (application processor)
 cpu1: Intel(R) Core(TM)2 Duo CPU T7300 @ 2.00GHz (GenuineIntel 686-class) 2 
 GHz
 cpu1: 
 FPU,V86,DE,PSE,TSC,MSR,PAE,MCE,CX8,APIC,SEP,MTRR,PGE,MCA,CMOV,PAT,PSE36,CFLUSH,DS,ACPI,MMX,FXSR,SSE,SSE2,SS,HTT,TM,SBF,SSE3,MWAIT,DS-CPL,VMX,EST,TM2,SSSE3,CX16,xTPR,PDCM
 ioapic0 at mainbus0: apid 1 pa 0xfec0, version 20, 24 pins
 ioapic0: misconfigured as apic 2, remapped to apid 1
 acpihpet0 at acpi0: 14318179 Hz
 acpiprt0 at acpi0: bus 0 (PCI0)
 acpiprt1 at acpi0: bus -1 (AGP_)
 acpiprt2 at acpi0: bus 2 (EXP0)
 acpiprt3 at acpi0: bus 3 (EXP1)
 acpiprt4 at acpi0: bus -1 (EXP2)
 acpiprt5 at acpi0: bus -1 (EXP3)
 acpiprt6 at acpi0: bus -1 (EXP4)
 acpiprt7 at acpi0: bus 5 (PCI1)
 acpicpu0 at acpi0: C3, C2, C1, PSS
 acpicpu1 at acpi0: C3, C2, C1, PSS
 acpipwrres0 at acpi0: PUBS
 acpitz0 at acpi0: critical temperature 127 degC
 acpitz1 at acpi0: critical temperature 99 degC
 acpibtn0 at acpi0: LID_
 acpibtn1 at acpi0: SLPB
 acpibat0 at acpi0: BAT0 model 42T4568 serial  6706 type LION oem SONY
 acpibat1 at acpi0: BAT1 not present
 acpibat2 at acpi0: BAT2 not present
 acpiac0 at acpi0: AC unit online
 acpithinkpad0 at acpi0
 acpidock0 at acpi0: GDCK not docked (0)
 bios0: ROM list: 0xc/0x1! 0xe/0x1!
 cpu0: Enhanced SpeedStep 1996 MHz: speeds: 2001, 2000, 1600, 1200, 800 MHz
 pci0 at mainbus0 bus 0: configuration mode 1 (bios)
 pchb0 at pci0 dev 0 function 0 Intel GM965 Host rev 0x0c
 vga1 at pci0 dev 2 function 0 Intel GM965 Video rev 0x0c
 wsdisplay0 at vga1 mux 1: console (80x25, vt100 emulation)
 wsdisplay0: screen 1-5 added (80x25, vt100 emulation)
 intagp0 at vga1
 agp0 at intagp0: aperture at 0xe000, size 0x1000
 inteldrm0 at vga1: apic 1 int 16 (irq 10)
 drm0 at inteldrm0
 Intel GM965 Video rev 0x0c at pci0 dev 2 function 1 not configured
 em0 at pci0 dev 25 function 0 Intel ICH8 IGP M AMT rev 0x03: apic 1 int 20 
 (irq 11), address 00:1d:72:92:6c:8f
 uhci0 at pci0 dev 26 function 0 Intel 82801H USB rev 0x03: apic 1 int 20 
 (irq 11)
 uhci1 at pci0 dev 26 function 1 Intel 82801H USB rev 0x03: apic 1 int 21 
 (irq 11)
 ehci0 at pci0 dev 26 function 7 Intel 82801H USB rev 0x03: apic 1 int 22 
 (irq 11)
 usb0 at ehci0: USB revision 2.0
 uhub0 at usb0 Intel EHCI root hub rev 2.00/1.00 addr 1
 azalia0 at pci0 dev 27 function 0 Intel 82801H HD Audio rev 0x03: apic 1 
 int 17 (irq 11)
 azalia0: codecs: Analog Devices AD1984, Conexant/0x2bfa, using Analog Devices 
 AD1984
 audio0 at azalia0
 ppb0 at pci0 dev 28 function 0 Intel 82801H PCIE rev 0x03: apic 1 int 20 
 (irq 11)
 pci1 at ppb0 bus 2
 ppb1 at pci0 dev 28 function 1 Intel 82801H PCIE rev 0x03: apic 1 int 21 
 (irq 11)
 pci2 at ppb1 bus 3
 iwn0 at pci2 dev 0 function 0 Intel Wireless WiFi Link 4965 rev 0x61: apic 
 1 int 17 (irq 11), MIMO 2T3R, MoW1, address 00:1f:3b:a0:f8:55
 uhci2 at pci0 dev 29 function 0 Intel 82801H USB rev 0x03: apic 1 int 16 
 (irq 10)
 uhci3 at pci0 dev 29 function 1 Intel 82801H USB rev 0x03: apic 1 int 17 
 (irq 11)
 ehci1 at pci0 dev 29 function 7 Intel 82801H USB rev 0x03: apic 1 int 19 
 (irq 11)
 usb1 at ehci1: USB revision 2.0
 

Re: more assertwaitok() love

2010-10-06 Thread Thordur Bjornsson
On Thu, Sep 30, 2010 at 12:29:54AM +, Thordur Bjornsson wrote:
 Hi.
 
 Try to catch more places where we sleep and are not allowed.
 
 One thing of note, msleep() is missing in this diff, but there
 it is needed to call to sleep_setup routines with the mutex
 held, and after we release it we _will_ sleep so a sleep there
 with another mutex held will be caught by the assertwaitok()
 in mi_switch().
 
 Also, define assertwaitok() out for !DIAGNOSTIC kernels.

Noone wants to OK/comment on this besides matthew@ ?

 
 Comments/OKs?
 Index: kern/kern_rwlock.c
 ===
 RCS file: /home/cvs/src/sys/kern/kern_rwlock.c,v
 retrieving revision 1.16
 diff -u -p -r1.16 kern_rwlock.c
 --- kern/kern_rwlock.c24 Sep 2010 13:21:30 -  1.16
 +++ kern/kern_rwlock.c30 Sep 2010 00:12:12 -
 @@ -87,6 +87,8 @@ rw_enter_read(struct rwlock *rwl)
  {
   unsigned long owner = rwl-rwl_owner;
  
 + assertwaitok();
 +
   if (__predict_false((owner  RWLOCK_WRLOCK) ||
   rw_cas(rwl-rwl_owner, owner, owner + RWLOCK_READ_INCR)))
   rw_enter(rwl, RW_READ);
 @@ -97,6 +99,8 @@ rw_enter_write(struct rwlock *rwl)
  {
   struct proc *p = curproc;
  
 + assertwaitok();
 +
   if (__predict_false(rw_cas(rwl-rwl_owner, 0,
   RW_PROC(p) | RWLOCK_WRLOCK)))
   rw_enter(rwl, RW_WRITE);
 @@ -190,6 +194,9 @@ rw_enter(struct rwlock *rwl, int flags)
   struct sleep_state sls;
   unsigned long inc, o;
   int error;
 +
 + if (!(flags  RW_NOSLEEP))
 + assertwaitok();
  
   op = rw_ops[flags  RW_OPMASK];
  
 Index: kern/kern_synch.c
 ===
 RCS file: /home/cvs/src/sys/kern/kern_synch.c,v
 retrieving revision 1.95
 diff -u -p -r1.95 kern_synch.c
 --- kern/kern_synch.c 29 Jun 2010 00:28:14 -  1.95
 +++ kern/kern_synch.c 29 Sep 2010 21:55:58 -
 @@ -121,6 +121,8 @@ tsleep(const volatile void *ident, int p
   return (0);
   }
  
 + assertwaitok();
 +
   sleep_setup(sls, ident, priority, wmesg);
   sleep_setup_timeout(sls, timo);
   sleep_setup_signal(sls, priority);
 Index: kern/subr_pool.c
 ===
 RCS file: /home/cvs/src/sys/kern/subr_pool.c,v
 retrieving revision 1.98
 diff -u -p -r1.98 subr_pool.c
 --- kern/subr_pool.c  26 Sep 2010 21:03:57 -  1.98
 +++ kern/subr_pool.c  30 Sep 2010 00:03:15 -
 @@ -455,10 +455,8 @@ pool_get(struct pool *pp, int flags)
  
   KASSERT(flags  (PR_WAITOK | PR_NOWAIT));
  
 -#ifdef DIAGNOSTIC
   if ((flags  PR_WAITOK) != 0)
   assertwaitok();
 -#endif /* DIAGNOSTIC */
  
   mtx_enter(pp-pr_mtx);
   v = pool_do_get(pp, flags);
 Index: kern/subr_xxx.c
 ===
 RCS file: /home/cvs/src/sys/kern/subr_xxx.c,v
 retrieving revision 1.12
 diff -u -p -r1.12 subr_xxx.c
 --- kern/subr_xxx.c   28 Sep 2010 20:27:56 -  1.12
 +++ kern/subr_xxx.c   29 Sep 2010 21:55:03 -
 @@ -156,13 +156,15 @@ blktochr(dev_t dev)
  /*
   * Check that we're in a context where it's okay to sleep.
   */
 +
 +#ifdef DIAGNOSTIC
  void
  assertwaitok(void)
  {
   splassert(IPL_NONE);
 -#ifdef DIAGNOSTIC
 +
   if (curcpu()-ci_mutex_level != 0)
   panic(assertwaitok: non-zero mutex count: %d,
   curcpu()-ci_mutex_level);
 -#endif
  }
 +#endif
 Index: sys/systm.h
 ===
 RCS file: /home/cvs/src/sys/sys/systm.h,v
 retrieving revision 1.86
 diff -u -p -r1.86 systm.h
 --- sys/systm.h   21 Sep 2010 01:09:10 -  1.86
 +++ sys/systm.h   30 Sep 2010 00:02:51 -
 @@ -179,7 +179,11 @@ void ttyprintf(struct tty *, const char 
  void splassert_fail(int, int, const char *);
  extern   int splassert_ctl;
  
 +#ifdef DIAGNOSTIC
  void assertwaitok(void);
 +#else
 +#define  assertwaitok()  do { /* nothing */ } while (0)
 +#endif
  
  void tablefull(const char *);



more assertwaitok() love

2010-09-29 Thread Thordur Bjornsson
Hi.

Try to catch more places where we sleep and are not allowed.

One thing of note, msleep() is missing in this diff, but there
it is needed to call to sleep_setup routines with the mutex
held, and after we release it we _will_ sleep so a sleep there
with another mutex held will be caught by the assertwaitok()
in mi_switch().

Also, define assertwaitok() out for !DIAGNOSTIC kernels.

Comments/OKs?
Index: kern/kern_rwlock.c
===
RCS file: /home/cvs/src/sys/kern/kern_rwlock.c,v
retrieving revision 1.16
diff -u -p -r1.16 kern_rwlock.c
--- kern/kern_rwlock.c  24 Sep 2010 13:21:30 -  1.16
+++ kern/kern_rwlock.c  30 Sep 2010 00:12:12 -
@@ -87,6 +87,8 @@ rw_enter_read(struct rwlock *rwl)
 {
unsigned long owner = rwl-rwl_owner;
 
+   assertwaitok();
+
if (__predict_false((owner  RWLOCK_WRLOCK) ||
rw_cas(rwl-rwl_owner, owner, owner + RWLOCK_READ_INCR)))
rw_enter(rwl, RW_READ);
@@ -97,6 +99,8 @@ rw_enter_write(struct rwlock *rwl)
 {
struct proc *p = curproc;
 
+   assertwaitok();
+
if (__predict_false(rw_cas(rwl-rwl_owner, 0,
RW_PROC(p) | RWLOCK_WRLOCK)))
rw_enter(rwl, RW_WRITE);
@@ -190,6 +194,9 @@ rw_enter(struct rwlock *rwl, int flags)
struct sleep_state sls;
unsigned long inc, o;
int error;
+
+   if (!(flags  RW_NOSLEEP))
+   assertwaitok();
 
op = rw_ops[flags  RW_OPMASK];
 
Index: kern/kern_synch.c
===
RCS file: /home/cvs/src/sys/kern/kern_synch.c,v
retrieving revision 1.95
diff -u -p -r1.95 kern_synch.c
--- kern/kern_synch.c   29 Jun 2010 00:28:14 -  1.95
+++ kern/kern_synch.c   29 Sep 2010 21:55:58 -
@@ -121,6 +121,8 @@ tsleep(const volatile void *ident, int p
return (0);
}
 
+   assertwaitok();
+
sleep_setup(sls, ident, priority, wmesg);
sleep_setup_timeout(sls, timo);
sleep_setup_signal(sls, priority);
Index: kern/subr_pool.c
===
RCS file: /home/cvs/src/sys/kern/subr_pool.c,v
retrieving revision 1.98
diff -u -p -r1.98 subr_pool.c
--- kern/subr_pool.c26 Sep 2010 21:03:57 -  1.98
+++ kern/subr_pool.c30 Sep 2010 00:03:15 -
@@ -455,10 +455,8 @@ pool_get(struct pool *pp, int flags)
 
KASSERT(flags  (PR_WAITOK | PR_NOWAIT));
 
-#ifdef DIAGNOSTIC
if ((flags  PR_WAITOK) != 0)
assertwaitok();
-#endif /* DIAGNOSTIC */
 
mtx_enter(pp-pr_mtx);
v = pool_do_get(pp, flags);
Index: kern/subr_xxx.c
===
RCS file: /home/cvs/src/sys/kern/subr_xxx.c,v
retrieving revision 1.12
diff -u -p -r1.12 subr_xxx.c
--- kern/subr_xxx.c 28 Sep 2010 20:27:56 -  1.12
+++ kern/subr_xxx.c 29 Sep 2010 21:55:03 -
@@ -156,13 +156,15 @@ blktochr(dev_t dev)
 /*
  * Check that we're in a context where it's okay to sleep.
  */
+
+#ifdef DIAGNOSTIC
 void
 assertwaitok(void)
 {
splassert(IPL_NONE);
-#ifdef DIAGNOSTIC
+
if (curcpu()-ci_mutex_level != 0)
panic(assertwaitok: non-zero mutex count: %d,
curcpu()-ci_mutex_level);
-#endif
 }
+#endif
Index: sys/systm.h
===
RCS file: /home/cvs/src/sys/sys/systm.h,v
retrieving revision 1.86
diff -u -p -r1.86 systm.h
--- sys/systm.h 21 Sep 2010 01:09:10 -  1.86
+++ sys/systm.h 30 Sep 2010 00:02:51 -
@@ -179,7 +179,11 @@ void   ttyprintf(struct tty *, const char 
 void   splassert_fail(int, int, const char *);
 extern int splassert_ctl;
 
+#ifdef DIAGNOSTIC
 void   assertwaitok(void);
+#else
+#defineassertwaitok()  do { /* nothing */ } while (0)
+#endif
 
 void   tablefull(const char *);



de-static uvm_swap

2010-09-24 Thread Thordur Bjornsson
Hi,

hitting some panics coming through the swap code and it is a bit
annoying having them not show up in the ddb trace.

OK ?

Index: uvm/uvm_swap.c
===
RCS file: /home/cvs/src/sys/uvm/uvm_swap.c,v
retrieving revision 1.97
diff -u -p -r1.97 uvm_swap.c
--- uvm/uvm_swap.c  10 Sep 2010 16:34:09 -  1.97
+++ uvm/uvm_swap.c  24 Sep 2010 19:53:22 -
@@ -218,11 +218,11 @@ struct pool vndbuf_pool;
 /*
  * local variables
  */
-static struct extent *swapmap; /* controls the mapping of /dev/drum */
+struct extent *swapmap;/* controls the mapping of /dev/drum */
 
 /* list of all active swap devices [by priority] */
 LIST_HEAD(swap_priority, swappri);
-static struct swap_priority swap_priority;
+struct swap_priority swap_priority;
 
 /* locks */
 struct rwlock swap_syscall_lock = RWLOCK_INITIALIZER(swplk);
@@ -230,25 +230,25 @@ struct rwlock swap_syscall_lock = RWLOCK
 /*
  * prototypes
  */
-static void swapdrum_add(struct swapdev *, int);
-static struct swapdev  *swapdrum_getsdp(int);
+voidswapdrum_add(struct swapdev *, int);
+struct swapdev *swapdrum_getsdp(int);
 
-static struct swapdev  *swaplist_find(struct vnode *, int);
-static void swaplist_insert(struct swapdev *, 
-struct swappri *, int);
-static void swaplist_trim(void);
+struct swapdev *swaplist_find(struct vnode *, int);
+voidswaplist_insert(struct swapdev *, 
+struct swappri *, int);
+voidswaplist_trim(void);
 
-static int swap_on(struct proc *, struct swapdev *);
-static int swap_off(struct proc *, struct swapdev *);
+int swap_on(struct proc *, struct swapdev *);
+int swap_off(struct proc *, struct swapdev *);
 
-static void sw_reg_strategy(struct swapdev *, struct buf *, int);
+void sw_reg_strategy(struct swapdev *, struct buf *, int);
 void sw_reg_iodone(struct buf *);
 void sw_reg_iodone_internal(void *, void *);
-static void sw_reg_start(struct swapdev *);
+void sw_reg_start(struct swapdev *);
 
-static int uvm_swap_io(struct vm_page **, int, int, int);
+int uvm_swap_io(struct vm_page **, int, int, int);
 
-static void swapmount(void);
+void swapmount(void);
 boolean_t uvm_swap_allocpages(struct vm_page **, int);
 
 #ifdef UVM_SWAP_ENCRYPT
@@ -481,7 +481,7 @@ uvm_swap_finicrypt_all(void)
  * FREE it if we don't need it... this it to prevent malloc blocking
  * here while adding swap)
  */
-static void
+void
 swaplist_insert(struct swapdev *sdp, struct swappri *newspp, int priority)
 {
struct swappri *spp, *pspp;
@@ -533,7 +533,7 @@ swaplist_insert(struct swapdev *sdp, str
  * = caller must hold both swap_syscall_lock and uvm.swap_data_lock
  * = we return the swapdev we found (and removed)
  */
-static struct swapdev *
+struct swapdev *
 swaplist_find(struct vnode *vp, boolean_t remove)
 {
struct swapdev *sdp;
@@ -566,7 +566,7 @@ swaplist_find(struct vnode *vp, boolean_
  *
  * = caller must hold both swap_syscall_lock and uvm.swap_data_lock
  */
-static void
+void
 swaplist_trim(void)
 {
struct swappri *spp, *nextspp;
@@ -587,7 +587,7 @@ swaplist_trim(void)
  * = caller must hold swap_syscall_lock
  * = uvm.swap_data_lock should be unlocked (we may sleep)
  */
-static void
+void
 swapdrum_add(struct swapdev *sdp, int npages)
 {
u_long result;
@@ -607,7 +607,7 @@ swapdrum_add(struct swapdev *sdp, int np
  * = each swapdev takes one big contig chunk of the drum
  * = caller must hold uvm.swap_data_lock
  */
-static struct swapdev *
+struct swapdev *
 swapdrum_getsdp(int pgno)
 {
struct swapdev *sdp;
@@ -895,7 +895,7 @@ out:
  * = caller should leave uvm.swap_data_lock unlocked, we may lock it
  * if needed.
  */
-static int
+int
 swap_on(struct proc *p, struct swapdev *sdp)
 {
static int count = 0;   /* static */
@@ -1094,7 +1094,7 @@ bad:
  *
  * = swap data should be locked, we will unlock.
  */
-static int
+int
 swap_off(struct proc *p, struct swapdev *sdp)
 {
int error = 0;
@@ -1243,7 +1243,7 @@ swstrategy(struct buf *bp)
 /*
  * sw_reg_strategy: handle swap i/o to regular files
  */
-static void
+void
 sw_reg_strategy(struct swapdev *sdp, struct buf *bp, int bn)
 {
struct vnode*vp;
@@ -1416,7 +1416,7 @@ out: /* Arrive here at splbio */
 }
 
 /* sw_reg_start: start an I/O request on the requested swapdev. */
-static void
+void
 sw_reg_start(struct swapdev *sdp)
 {
struct buf  *bp;
@@ -1762,7 +1762,7 @@ uvm_swap_get(struct vm_page *page, int s
  * uvm_swap_io: do an i/o operation to swap
  */
 
-static int
+int
 uvm_swap_io(struct vm_page **pps, int startslot, int npages, int flags)
 {
daddr64_t startblk;
@@ -2086,7 +2086,7 @@ uvm_swap_io(struct vm_page **pps, int st
return (result);
 }
 
-static void
+void
 swapmount(void)
 {
struct swapdev *sdp;



Re: de-static uvm_swap

2010-09-24 Thread Thordur Bjornsson
and I'd like to kill these to:


Index: uvm_pdaemon.c
===
RCS file: /cvs/src/sys/uvm/uvm_pdaemon.c,v
retrieving revision 1.55
diff -u -p -r1.55 uvm_pdaemon.c
--- uvm_pdaemon.c   14 Oct 2009 17:53:30 -  1.55
+++ uvm_pdaemon.c   24 Sep 2010 22:31:47 -
@@ -96,9 +96,9 @@
  * local prototypes
  */
 
-static voiduvmpd_scan(void);
-static boolean_t   uvmpd_scan_inactive(struct pglist *);
-static voiduvmpd_tune(void);
+void   uvmpd_scan(void);
+boolean_t  uvmpd_scan_inactive(struct pglist *);
+void   uvmpd_tune(void);
 
 /*
  * uvm_wait: wait (sleep) for the page daemon to free some pages
@@ -155,7 +155,7 @@ uvm_wait(const char *wmsg)
  * = caller must call with page queues locked
  */
 
-static void
+void
 uvmpd_tune(void)
 {
UVMHIST_FUNC(uvmpd_tune); UVMHIST_CALLED(pdhist);
@@ -329,7 +329,7 @@ uvm_aiodone_daemon(void *arg)
  * = we return TRUE if we are exiting because we met our target
  */
 
-static boolean_t
+boolean_t
 uvmpd_scan_inactive(struct pglist *pglst)
 {
boolean_t retval = FALSE;   /* assume we haven't hit target */



Re: fstab.5: FSTAB_RQ

2010-09-23 Thread Thordur Bjornsson
On Thu, Sep 23, 2010 at 06:36:43PM +0059, Jason McIntyre wrote:
 is there a reason why we don;t document FSTAB_RQ?
Not one that I can think of. If this works as intended
go ahead (it should).

 jmc
 
 Index: fstab.5
 ===
 RCS file: /cvs/src/share/man/man5/fstab.5,v
 retrieving revision 1.42
 diff -u -r1.42 fstab.5
 --- fstab.5   8 Jun 2009 17:03:15 -   1.42
 +++ fstab.5   23 Sep 2010 17:36:53 -
 @@ -183,7 +183,8 @@
  If
  .Fa fs_type
  is
 -.Dq rw
 +.Dq rw ,
 +.Dq rq ,
  or
  .Dq ro
  then the filesystem whose name is given in the
 @@ -243,7 +244,8 @@
  .Xr fsck 8
  will assume that the filesystem does not need to be checked.
  .Bd -literal
 -#define  FSTAB_RWrw/* read-write device */
 +#define  FSTAB_RWrw/* read/write device */
 +#define  FSTAB_RQrq/* read/write with quotas *
  #define  FSTAB_ROro/* read-only device */
  #define  FSTAB_SWsw/* swap device */
  #define  FSTAB_XXxx/* ignore totally */
 @@ -253,7 +255,7 @@
   char*fs_file;   /* filesystem path prefix */
   char*fs_vfstype;/* type of filesystem */
   char*fs_mntops; /* comma separated mount options */
 - char*fs_type;   /* rw, ro, sw, or xx */
 + char*fs_type;   /* rw, rq, ro, sw, or xx */
   int fs_freq;/* dump frequency, in days */
   int fs_passno;  /* pass number on parallel fsck */
  };



Re: Source Overview

2010-04-19 Thread Thordur Bjornsson
 And if you value your sanity, stay out of anything resembling filesystems.
This is a lie.

Hacking on filesystems, and the VFS layer in general is a very rewarding
experince, just ask Bob.

NFS for example, has been a source of joy for OpenBSD developers for
years!

 2)  Is there something like an openbsd janitors project where newbies
can
 start contributing small patches? similar to the Linux janitors project?

 Not at all. The philosophy behind not having one is that it's considered
dangerous
 to farm out work to the inexperienced (and this exact topic has been brought
up
 before, usually by people whining that we didn't make them feel special
enough
 by not having one).
Also it leads to people doing KNF style diffs, just to do KNF style
diffs. Noone
learns anything. Most KNF style diffs you see coming from developers is due
to
them having to read some code, and they cleaned up a little while doing so.

While KNF is great, doing KNF just for the sake of doing KNF is hardly
ever worth it
IMHO.