Module Name: src
Committed By: pooka
Date: Mon Mar 23 11:48:33 UTC 2009
Modified Files:
src/sys/rump/librump/rumpuser: rumpuser_pth.c
src/sys/rump/librump/rumpvfs: genfs_io.c rumpblk.c
Log Message:
Support async writes, can benefit mmio case.
To generate a diff of this commit:
cvs rdiff -u -r1.30 -r1.31 src/sys/rump/librump/rumpuser/rumpuser_pth.c
cvs rdiff -u -r1.8 -r1.9 src/sys/rump/librump/rumpvfs/genfs_io.c
cvs rdiff -u -r1.12 -r1.13 src/sys/rump/librump/rumpvfs/rumpblk.c
Please note that diffs are not public domain; they are subject to the
copyright notices on the relevant files.
Modified files:
Index: src/sys/rump/librump/rumpuser/rumpuser_pth.c
diff -u src/sys/rump/librump/rumpuser/rumpuser_pth.c:1.30 src/sys/rump/librump/rumpuser/rumpuser_pth.c:1.31
--- src/sys/rump/librump/rumpuser/rumpuser_pth.c:1.30 Wed Mar 18 10:22:45 2009
+++ src/sys/rump/librump/rumpuser/rumpuser_pth.c Mon Mar 23 11:48:32 2009
@@ -1,4 +1,4 @@
-/* $NetBSD: rumpuser_pth.c,v 1.30 2009/03/18 10:22:45 cegger Exp $ */
+/* $NetBSD: rumpuser_pth.c,v 1.31 2009/03/23 11:48:32 pooka Exp $ */
/*
* Copyright (c) 2007 Antti Kantee. All Rights Reserved.
@@ -30,7 +30,7 @@
#include <sys/cdefs.h>
#if !defined(lint)
-__RCSID("$NetBSD: rumpuser_pth.c,v 1.30 2009/03/18 10:22:45 cegger Exp $");
+__RCSID("$NetBSD: rumpuser_pth.c,v 1.31 2009/03/23 11:48:32 pooka Exp $");
#endif /* !lint */
#ifdef __linux__
@@ -151,6 +151,7 @@
NOFAIL_ERRNO(pthread_mutex_lock(&rumpuser_aio_mtx.pthmtx));
rumpuser_aio_tail = (rumpuser_aio_tail+1) % N_AIOS;
+ pthread_cond_signal(&rumpuser_aio_cv.pthcv);
}
}
Index: src/sys/rump/librump/rumpvfs/genfs_io.c
diff -u src/sys/rump/librump/rumpvfs/genfs_io.c:1.8 src/sys/rump/librump/rumpvfs/genfs_io.c:1.9
--- src/sys/rump/librump/rumpvfs/genfs_io.c:1.8 Sun Mar 22 13:38:54 2009
+++ src/sys/rump/librump/rumpvfs/genfs_io.c Mon Mar 23 11:48:33 2009
@@ -1,4 +1,4 @@
-/* $NetBSD: genfs_io.c,v 1.8 2009/03/22 13:38:54 pooka Exp $ */
+/* $NetBSD: genfs_io.c,v 1.9 2009/03/23 11:48:33 pooka Exp $ */
/*
* Copyright (c) 2007 Antti Kantee. All Rights Reserved.
@@ -29,7 +29,7 @@
*/
#include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: genfs_io.c,v 1.8 2009/03/22 13:38:54 pooka Exp $");
+__KERNEL_RCSID(0, "$NetBSD: genfs_io.c,v 1.9 2009/03/23 11:48:33 pooka Exp $");
#include <sys/param.h>
#include <sys/buf.h>
@@ -219,8 +219,10 @@
VOP_STRATEGY(devvp, bp);
- if (!async)
+ if (!async) {
+ biowait(bp);
putiobuf(bp);
+ }
}
/* skip to beginning of pages we're interested in */
@@ -274,6 +276,14 @@
ap->a_flags, NULL);
}
+static void
+rump_putiodone(struct buf *bp)
+{
+
+ kmem_free(bp->b_data, bp->b_bufsize);
+ putiobuf(bp);
+}
+
/*
* This is a slightly strangely structured routine. It always puts
* all the pages for a vnode. It starts by releasing pages which
@@ -287,20 +297,17 @@
genfs_do_putpages(struct vnode *vp, off_t startoff, off_t endoff, int flags,
struct vm_page **busypg)
{
- char databuf[MAXPHYS];
+ uint8_t databuf[MAXPHYS], *datap;
struct uvm_object *uobj = &vp->v_uobj;
struct vm_page *pg, *pg_next;
+ struct buf *bp, *mbp;
voff_t smallest;
voff_t curoff, bufoff;
off_t eof;
- size_t xfersize;
+ size_t xfersize, skipbytes = 0;
int bshift = vp->v_mount->mnt_fs_bshift;
int bsize = 1 << bshift;
-#if 0
int async = (flags & PGO_SYNCIO) == 0;
-#else
- int async = 0;
-#endif
DPRINTF(("genfs_do_putpages: vnode %p, startoff %lld, endoff %lld\n",
vp, (long long)startoff, (long long)endoff));
@@ -337,6 +344,12 @@
return 0;
}
+ if (async) {
+ datap = kmem_alloc(MAXPHYS, KM_SLEEP);
+ } else {
+ datap = databuf;
+ }
+
/* we need to flush */
GOP_SIZE(vp, vp->v_writesize, &eof, 0);
for (curoff = smallest; curoff < eof; curoff += PAGE_SIZE) {
@@ -351,21 +364,34 @@
/* XXX: see comment about above KASSERT */
KASSERT((pg->flags & PG_BUSY) == 0);
- curva = databuf + (curoff-smallest);
+ curva = datap + (curoff-smallest);
memcpy(curva, (void *)pg->uanon, PAGE_SIZE);
rumpvm_enterva((vaddr_t)curva, pg);
pg->flags |= PG_CLEAN | PG_BUSY;
}
KASSERT(curoff > smallest);
-
mutex_exit(&uobj->vmobjlock);
+ mbp = getiobuf(vp, true);
+ mbp->b_bufsize = MAXPHYS;
+ mbp->b_data = datap;
+ mbp->b_resid = mbp->b_bcount = curoff-smallest;
+ mbp->b_cflags |= BC_BUSY;
+ mbp->b_flags = B_WRITE;
+ if (async) {
+ mbp->b_flags |= B_ASYNC;
+ mbp->b_iodone = rump_putiodone;
+ }
+ mutex_enter(&vp->v_interlock);
+ ++vp->v_numoutput;
+ mutex_exit(&vp->v_interlock);
+
/* then we write */
for (bufoff = 0; bufoff < curoff-smallest; bufoff+=xfersize) {
- struct buf *bp;
struct vnode *devvp;
daddr_t bn, lbn;
+ size_t iotodo;
int run, error;
lbn = (smallest + bufoff) >> bshift;
@@ -382,46 +408,45 @@
* in the kernel page cache while truncate has already
* enlarged the file. So just ignore those ranges.
*/
- if (bn == -1)
+ if (bn == -1) {
+ skipbytes += xfersize;
continue;
-
- bp = getiobuf(vp, true);
+ }
/* only write max what we are allowed to write */
- bp->b_bcount = xfersize;
+ iotodo = xfersize;
if (smallest + bufoff + xfersize > eof)
- bp->b_bcount -= (smallest+bufoff+xfersize) - eof;
- bp->b_bcount = (bp->b_bcount + DEV_BSIZE-1) & ~(DEV_BSIZE-1);
+ iotodo -= (smallest+bufoff+xfersize) - eof;
+ iotodo = (iotodo + DEV_BSIZE-1) & ~(DEV_BSIZE-1);
- KASSERT(bp->b_bcount > 0);
+ /*
+ * Compensate for potentially smaller write. This will
+ * be zero except near eof.
+ */
+ skipbytes += xfersize - iotodo;
+
+ KASSERT(iotodo > 0);
KASSERT(smallest >= 0);
DPRINTF(("putpages writing from %x to %x (vp size %x)\n",
(int)(smallest + bufoff),
- (int)(smallest + bufoff + bp->b_bcount),
+ (int)(smallest + bufoff + iotodo),
(int)eof));
- bp->b_bufsize = round_page(bp->b_bcount);
+ bp = getiobuf(vp, true);
+ nestiobuf_setup(mbp, bp, bufoff, iotodo);
bp->b_lblkno = 0;
bp->b_blkno = bn + (((smallest+bufoff)&(bsize-1))>>DEV_BSHIFT);
- bp->b_data = databuf + bufoff;
- bp->b_flags = B_WRITE;
- bp->b_cflags |= BC_BUSY;
-
- if (async) {
- bp->b_flags |= B_ASYNC;
- bp->b_iodone = uvm_aio_biodone;
- }
- vp->v_numoutput++;
VOP_STRATEGY(devvp, bp);
- if (bp->b_error)
- panic("%s: VOP_STRATEGY lazy bum %d",
- __func__, bp->b_error);
- if (!async)
- putiobuf(bp);
}
rumpvm_flushva(uobj);
+ nestiobuf_done(mbp, skipbytes, 0);
+
+ if (!async) {
+ biowait(mbp);
+ putiobuf(mbp);
+ }
mutex_enter(&uobj->vmobjlock);
goto restart;
Index: src/sys/rump/librump/rumpvfs/rumpblk.c
diff -u src/sys/rump/librump/rumpvfs/rumpblk.c:1.12 src/sys/rump/librump/rumpvfs/rumpblk.c:1.13
--- src/sys/rump/librump/rumpvfs/rumpblk.c:1.12 Mon Mar 23 10:26:49 2009
+++ src/sys/rump/librump/rumpvfs/rumpblk.c Mon Mar 23 11:48:33 2009
@@ -1,4 +1,4 @@
-/* $NetBSD: rumpblk.c,v 1.12 2009/03/23 10:26:49 pooka Exp $ */
+/* $NetBSD: rumpblk.c,v 1.13 2009/03/23 11:48:33 pooka Exp $ */
/*
* Copyright (c) 2009 Antti Kantee. All Rights Reserved.
@@ -34,7 +34,7 @@
*/
#include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: rumpblk.c,v 1.12 2009/03/23 10:26:49 pooka Exp $");
+__KERNEL_RCSID(0, "$NetBSD: rumpblk.c,v 1.13 2009/03/23 11:48:33 pooka Exp $");
#include <sys/param.h>
#include <sys/buf.h>
@@ -195,7 +195,8 @@
rblk->rblk_dl.d_secsize = DEV_BSIZE;
rblk->rblk_curpi = &rblk->rblk_pi;
} else {
- if (rumpuser_ioctl(fd,DIOCGDINFO, &rblk->rblk_dl, &error) != -1) {
+ if (rumpuser_ioctl(fd, DIOCGDINFO, &rblk->rblk_dl,
+ &error) != -1) {
rumpuser_close(fd, &dummy);
return error;
}
@@ -204,6 +205,9 @@
}
rblk->rblk_fd = fd;
rblk->rblk_mem = mem;
+ if (rblk->rblk_mem != NULL)
+ printf("rumpblk%d: using mmio for %s\n",
+ minor(dev), rblk->rblk_path);
return 0;
}
@@ -332,19 +336,12 @@
* Synchronous I/O is done directly in the context mainly to
* avoid unnecessary scheduling with the I/O thread.
*/
- if (async && rump_threads) {
+ if (rump_threads) {
struct rumpuser_aio *rua;
rumpuser_mutex_enter(&rumpuser_aio_mtx);
- /*
- * Check if our buffer is full. Doing it this way
- * throttles the I/O a bit if we have a massive
- * async I/O burst.
- */
- if ((rumpuser_aio_head+1) % N_AIOS == rumpuser_aio_tail) {
- rumpuser_mutex_exit(&rumpuser_aio_mtx);
- goto syncfallback;
- }
+ while ((rumpuser_aio_head+1) % N_AIOS == rumpuser_aio_tail)
+ rumpuser_cv_wait(&rumpuser_aio_cv, &rumpuser_aio_mtx);
rua = &rumpuser_aios[rumpuser_aio_head];
KASSERT(rua->rua_bp == NULL);
@@ -359,8 +356,13 @@
rumpuser_aio_head = (rumpuser_aio_head+1) % N_AIOS;
rumpuser_cv_signal(&rumpuser_aio_cv);
rumpuser_mutex_exit(&rumpuser_aio_mtx);
+
+ /* make sure non-async writes end up on backing media */
+ if (BUF_ISWRITE(bp) && !async) {
+ biowait(bp);
+ rumpuser_fsync(rblk->rblk_fd, &error);
+ }
} else {
- syncfallback:
if (BUF_ISREAD(bp)) {
rumpuser_read_bio(rblk->rblk_fd, bp->b_data,
bp->b_bcount, off, rump_biodone, bp);