Module Name:    src
Committed By:   pooka
Date:           Mon Mar 23 11:48:33 UTC 2009

Modified Files:
        src/sys/rump/librump/rumpuser: rumpuser_pth.c
        src/sys/rump/librump/rumpvfs: genfs_io.c rumpblk.c

Log Message:
Support async writes, can benefit mmio case.


To generate a diff of this commit:
cvs rdiff -u -r1.30 -r1.31 src/sys/rump/librump/rumpuser/rumpuser_pth.c
cvs rdiff -u -r1.8 -r1.9 src/sys/rump/librump/rumpvfs/genfs_io.c
cvs rdiff -u -r1.12 -r1.13 src/sys/rump/librump/rumpvfs/rumpblk.c

Please note that diffs are not public domain; they are subject to the
copyright notices on the relevant files.

Modified files:

Index: src/sys/rump/librump/rumpuser/rumpuser_pth.c
diff -u src/sys/rump/librump/rumpuser/rumpuser_pth.c:1.30 src/sys/rump/librump/rumpuser/rumpuser_pth.c:1.31
--- src/sys/rump/librump/rumpuser/rumpuser_pth.c:1.30	Wed Mar 18 10:22:45 2009
+++ src/sys/rump/librump/rumpuser/rumpuser_pth.c	Mon Mar 23 11:48:32 2009
@@ -1,4 +1,4 @@
-/*	$NetBSD: rumpuser_pth.c,v 1.30 2009/03/18 10:22:45 cegger Exp $	*/
+/*	$NetBSD: rumpuser_pth.c,v 1.31 2009/03/23 11:48:32 pooka Exp $	*/
 
 /*
  * Copyright (c) 2007 Antti Kantee.  All Rights Reserved.
@@ -30,7 +30,7 @@
 
 #include <sys/cdefs.h>
 #if !defined(lint)
-__RCSID("$NetBSD: rumpuser_pth.c,v 1.30 2009/03/18 10:22:45 cegger Exp $");
+__RCSID("$NetBSD: rumpuser_pth.c,v 1.31 2009/03/23 11:48:32 pooka Exp $");
 #endif /* !lint */
 
 #ifdef __linux__
@@ -151,6 +151,7 @@
 
 		NOFAIL_ERRNO(pthread_mutex_lock(&rumpuser_aio_mtx.pthmtx));
 		rumpuser_aio_tail = (rumpuser_aio_tail+1) % N_AIOS;
+		pthread_cond_signal(&rumpuser_aio_cv.pthcv);
 	}
 }
 

Index: src/sys/rump/librump/rumpvfs/genfs_io.c
diff -u src/sys/rump/librump/rumpvfs/genfs_io.c:1.8 src/sys/rump/librump/rumpvfs/genfs_io.c:1.9
--- src/sys/rump/librump/rumpvfs/genfs_io.c:1.8	Sun Mar 22 13:38:54 2009
+++ src/sys/rump/librump/rumpvfs/genfs_io.c	Mon Mar 23 11:48:33 2009
@@ -1,4 +1,4 @@
-/*	$NetBSD: genfs_io.c,v 1.8 2009/03/22 13:38:54 pooka Exp $	*/
+/*	$NetBSD: genfs_io.c,v 1.9 2009/03/23 11:48:33 pooka Exp $	*/
 
 /*
  * Copyright (c) 2007 Antti Kantee.  All Rights Reserved.
@@ -29,7 +29,7 @@
  */
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: genfs_io.c,v 1.8 2009/03/22 13:38:54 pooka Exp $");
+__KERNEL_RCSID(0, "$NetBSD: genfs_io.c,v 1.9 2009/03/23 11:48:33 pooka Exp $");
 
 #include <sys/param.h>
 #include <sys/buf.h>
@@ -219,8 +219,10 @@
 
 		VOP_STRATEGY(devvp, bp);
 		
-		if (!async)
+		if (!async) {
+			biowait(bp);
 			putiobuf(bp);
+		}
 	}
 
 	/* skip to beginning of pages we're interested in */
@@ -274,6 +276,14 @@
 	    ap->a_flags, NULL);
 }
 
+static void
+rump_putiodone(struct buf *bp)
+{
+
+	kmem_free(bp->b_data, bp->b_bufsize);
+	putiobuf(bp);
+}
+
 /*
  * This is a slightly strangely structured routine.  It always puts
  * all the pages for a vnode.  It starts by releasing pages which
@@ -287,20 +297,17 @@
 genfs_do_putpages(struct vnode *vp, off_t startoff, off_t endoff, int flags,
 	struct vm_page **busypg)
 {
-	char databuf[MAXPHYS];
+	uint8_t databuf[MAXPHYS], *datap;
 	struct uvm_object *uobj = &vp->v_uobj;
 	struct vm_page *pg, *pg_next;
+	struct buf *bp, *mbp;
 	voff_t smallest;
 	voff_t curoff, bufoff;
 	off_t eof;
-	size_t xfersize;
+	size_t xfersize, skipbytes = 0;
 	int bshift = vp->v_mount->mnt_fs_bshift;
 	int bsize = 1 << bshift;
-#if 0
 	int async = (flags & PGO_SYNCIO) == 0;
-#else
-	int async = 0;
-#endif
 
 	DPRINTF(("genfs_do_putpages: vnode %p, startoff %lld, endoff %lld\n",
 	    vp, (long long)startoff, (long long)endoff));
@@ -337,6 +344,12 @@
 		return 0;
 	}
 
+	if (async) {
+		datap = kmem_alloc(MAXPHYS, KM_SLEEP);
+	} else {
+		datap = databuf;
+	}
+
 	/* we need to flush */
 	GOP_SIZE(vp, vp->v_writesize, &eof, 0);
 	for (curoff = smallest; curoff < eof; curoff += PAGE_SIZE) {
@@ -351,21 +364,34 @@
 		/* XXX: see comment about above KASSERT */
 		KASSERT((pg->flags & PG_BUSY) == 0);
 
-		curva = databuf + (curoff-smallest);
+		curva = datap + (curoff-smallest);
 		memcpy(curva, (void *)pg->uanon, PAGE_SIZE);
 		rumpvm_enterva((vaddr_t)curva, pg);
 
 		pg->flags |= PG_CLEAN | PG_BUSY;
 	}
 	KASSERT(curoff > smallest);
-
 	mutex_exit(&uobj->vmobjlock);
 
+	mbp = getiobuf(vp, true);
+	mbp->b_bufsize = MAXPHYS;
+	mbp->b_data = datap;
+	mbp->b_resid = mbp->b_bcount = curoff-smallest;
+	mbp->b_cflags |= BC_BUSY;
+	mbp->b_flags = B_WRITE;
+	if (async) {
+		mbp->b_flags |= B_ASYNC;
+		mbp->b_iodone = rump_putiodone;
+	}
+	mutex_enter(&vp->v_interlock);
+	++vp->v_numoutput;
+	mutex_exit(&vp->v_interlock);
+
 	/* then we write */
 	for (bufoff = 0; bufoff < curoff-smallest; bufoff+=xfersize) {
-		struct buf *bp;
 		struct vnode *devvp;
 		daddr_t bn, lbn;
+		size_t iotodo;
 		int run, error;
 
 		lbn = (smallest + bufoff) >> bshift;
@@ -382,46 +408,45 @@
 		 * in the kernel page cache while truncate has already
 		 * enlarged the file.  So just ignore those ranges.
 		 */
-		if (bn == -1)
+		if (bn == -1) {
+			skipbytes += xfersize;
 			continue;
-
-		bp = getiobuf(vp, true);
+		}
 
 		/* only write max what we are allowed to write */
-		bp->b_bcount = xfersize;
+		iotodo = xfersize;
 		if (smallest + bufoff + xfersize > eof)
-			bp->b_bcount -= (smallest+bufoff+xfersize) - eof;
-		bp->b_bcount = (bp->b_bcount + DEV_BSIZE-1) & ~(DEV_BSIZE-1);
+			iotodo -= (smallest+bufoff+xfersize) - eof;
+		iotodo = (iotodo + DEV_BSIZE-1) & ~(DEV_BSIZE-1);
 
-		KASSERT(bp->b_bcount > 0);
+		/*
+		 * Compensate for potentially smaller write.  This will
+		 * be zero except near eof.
+		 */
+		skipbytes += xfersize - iotodo;
+
+		KASSERT(iotodo > 0);
 		KASSERT(smallest >= 0);
 
 		DPRINTF(("putpages writing from %x to %x (vp size %x)\n",
 		    (int)(smallest + bufoff),
-		    (int)(smallest + bufoff + bp->b_bcount),
+		    (int)(smallest + bufoff + iotodo),
 		    (int)eof));
 
-		bp->b_bufsize = round_page(bp->b_bcount);
+		bp = getiobuf(vp, true);
+		nestiobuf_setup(mbp, bp, bufoff, iotodo);
 		bp->b_lblkno = 0;
 		bp->b_blkno = bn + (((smallest+bufoff)&(bsize-1))>>DEV_BSHIFT);
-		bp->b_data = databuf + bufoff;
-		bp->b_flags = B_WRITE;
-		bp->b_cflags |= BC_BUSY;
-
-		if (async) {
-			bp->b_flags |= B_ASYNC;
-			bp->b_iodone = uvm_aio_biodone;
-		}
 
-		vp->v_numoutput++;
 		VOP_STRATEGY(devvp, bp);
-		if (bp->b_error)
-			panic("%s: VOP_STRATEGY lazy bum %d",
-			    __func__, bp->b_error);
-		if (!async)
-			putiobuf(bp);
 	}
 	rumpvm_flushva(uobj);
+	nestiobuf_done(mbp, skipbytes, 0);
+
+	if (!async) {
+		biowait(mbp);
+		putiobuf(mbp);
+	}
 
 	mutex_enter(&uobj->vmobjlock);
 	goto restart;

Index: src/sys/rump/librump/rumpvfs/rumpblk.c
diff -u src/sys/rump/librump/rumpvfs/rumpblk.c:1.12 src/sys/rump/librump/rumpvfs/rumpblk.c:1.13
--- src/sys/rump/librump/rumpvfs/rumpblk.c:1.12	Mon Mar 23 10:26:49 2009
+++ src/sys/rump/librump/rumpvfs/rumpblk.c	Mon Mar 23 11:48:33 2009
@@ -1,4 +1,4 @@
-/*	$NetBSD: rumpblk.c,v 1.12 2009/03/23 10:26:49 pooka Exp $	*/
+/*	$NetBSD: rumpblk.c,v 1.13 2009/03/23 11:48:33 pooka Exp $	*/
 
 /*
  * Copyright (c) 2009 Antti Kantee.  All Rights Reserved.
@@ -34,7 +34,7 @@
  */
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: rumpblk.c,v 1.12 2009/03/23 10:26:49 pooka Exp $");
+__KERNEL_RCSID(0, "$NetBSD: rumpblk.c,v 1.13 2009/03/23 11:48:33 pooka Exp $");
 
 #include <sys/param.h>
 #include <sys/buf.h>
@@ -195,7 +195,8 @@
 		rblk->rblk_dl.d_secsize = DEV_BSIZE;
 		rblk->rblk_curpi = &rblk->rblk_pi;
 	} else {
-		if (rumpuser_ioctl(fd,DIOCGDINFO, &rblk->rblk_dl, &error) != -1) {
+		if (rumpuser_ioctl(fd, DIOCGDINFO, &rblk->rblk_dl,
+		    &error) != -1) {
 			rumpuser_close(fd, &dummy);
 			return error;
 		}
@@ -204,6 +205,9 @@
 	}
 	rblk->rblk_fd = fd;
 	rblk->rblk_mem = mem;
+	if (rblk->rblk_mem != NULL)
+		printf("rumpblk%d: using mmio for %s\n",
+		    minor(dev), rblk->rblk_path);
 
 	return 0;
 }
@@ -332,19 +336,12 @@
 	 * Synchronous I/O is done directly in the context mainly to
 	 * avoid unnecessary scheduling with the I/O thread.
 	 */
-	if (async && rump_threads) {
+	if (rump_threads) {
 		struct rumpuser_aio *rua;
 
 		rumpuser_mutex_enter(&rumpuser_aio_mtx);
-		/*
-		 * Check if our buffer is full.  Doing it this way
-		 * throttles the I/O a bit if we have a massive
-		 * async I/O burst.
-		 */
-		if ((rumpuser_aio_head+1) % N_AIOS == rumpuser_aio_tail) {
-			rumpuser_mutex_exit(&rumpuser_aio_mtx);
-			goto syncfallback;
-		}
+		while ((rumpuser_aio_head+1) % N_AIOS == rumpuser_aio_tail)
+			rumpuser_cv_wait(&rumpuser_aio_cv, &rumpuser_aio_mtx);
 
 		rua = &rumpuser_aios[rumpuser_aio_head];
 		KASSERT(rua->rua_bp == NULL);
@@ -359,8 +356,13 @@
 		rumpuser_aio_head = (rumpuser_aio_head+1) % N_AIOS;
 		rumpuser_cv_signal(&rumpuser_aio_cv);
 		rumpuser_mutex_exit(&rumpuser_aio_mtx);
+
+		/* make sure non-async writes end up on backing media */
+		if (BUF_ISWRITE(bp) && !async) {
+			biowait(bp);
+			rumpuser_fsync(rblk->rblk_fd, &error);
+		}
 	} else {
- syncfallback:
 		if (BUF_ISREAD(bp)) {
 			rumpuser_read_bio(rblk->rblk_fd, bp->b_data,
 			    bp->b_bcount, off, rump_biodone, bp);

Reply via email to