I had a need for the iovec versions of pread and pwrite and have a patch for people to try. The changes are loosely based on the work done in FreeBSD current and have been tested against an enhanced version of Iozone. This testing has been on 1.4.x, but there doesn't appear to be a big difference between HEAD and the 1.4 slip tag, so I would expect the patch to work for HEAD as well.

After applying the patch you will also need to

cd src/sys/kern
make sysent

to update all the auto-magic system call gunk.

---chuck
Index: sys/kern/sys_generic.c
===================================================================
RCS file: /local/dcvs/src/sys/kern/sys_generic.c,v
retrieving revision 1.23
diff -u -r1.23 sys_generic.c
--- sys/kern/sys_generic.c      14 Nov 2005 18:50:05 -0000      1.23
+++ sys/kern/sys_generic.c      24 Apr 2006 17:27:44 -0000
@@ -80,6 +80,9 @@
 static int     pollscan (struct proc *, struct pollfd *, u_int, int *);
 static int     selscan (struct proc *, fd_mask **, fd_mask **,
                        int, int *);
+static int     dofileread(int, struct file *, struct uio *, int, int *);
+static int     dofilewrite(int, struct file *, struct uio *, int, int *);
+
 
 struct file*
 holdfp(fdp, fd, flag)
@@ -108,6 +111,8 @@
        struct iovec aiov;
        int error;
 
+       if (uap->nbyte > INT_MAX)
+               return (EINVAL);
        aiov.iov_base = uap->buf;
        aiov.iov_len = uap->nbyte;
        auio.uio_iov = &aiov;
@@ -118,13 +123,13 @@
        auio.uio_segflg = UIO_USERSPACE;
        auio.uio_td = td;
 
-       error = kern_readv(uap->fd, &auio, 0, &uap->sysmsg_result);
+       error = kern_preadv(uap->fd, &auio, 0, &uap->sysmsg_result);
 
        return(error);
 }
 
 /*
- * Pread system call
+ * Positioned (Pread) read system call
  */
 int
 pread(struct pread_args *uap)
@@ -134,6 +139,8 @@
        struct iovec aiov;
        int error;
 
+       if (uap->nbyte > INT_MAX)
+               return (EINVAL);
        aiov.iov_base = uap->buf;
        aiov.iov_len = uap->nbyte;
        auio.uio_iov = &aiov;
@@ -144,11 +151,14 @@
        auio.uio_segflg = UIO_USERSPACE;
        auio.uio_td = td;
 
-       error = kern_readv(uap->fd, &auio, FOF_OFFSET, &uap->sysmsg_result);
+       error = kern_preadv(uap->fd, &auio, FOF_OFFSET, &uap->sysmsg_result);
 
        return(error);
 }
 
+/*
+ * Scatter read system call.
+ */
 int
 readv(struct readv_args *uap)
 {
@@ -168,24 +178,49 @@
        auio.uio_segflg = UIO_USERSPACE;
        auio.uio_td = td;
 
-       error = kern_readv(uap->fd, &auio, 0, &uap->sysmsg_result);
+       error = kern_preadv(uap->fd, &auio, 0, &uap->sysmsg_result);
 
        iovec_free(&iov, aiov);
        return (error);
 }
 
+
+/*
+ * Scatter positioned read system call.
+ */
+int
+preadv(struct preadv_args *uap)
+{
+       struct thread *td = curthread;
+       struct uio auio;
+       struct iovec aiov[UIO_SMALLIOV], *iov = NULL;
+       int error;
+
+       error = iovec_copyin(uap->iovp, &iov, aiov, uap->iovcnt,
+           &auio.uio_resid);
+       if (error)
+               return (error);
+       auio.uio_iov = iov;
+       auio.uio_iovcnt = uap->iovcnt;
+       auio.uio_offset = uap->offset;
+       auio.uio_rw = UIO_READ;
+       auio.uio_segflg = UIO_USERSPACE;
+       auio.uio_td = td;
+
+       error = kern_preadv(uap->fd, &auio, FOF_OFFSET, &uap->sysmsg_result);
+
+       iovec_free(&iov, aiov);
+       return(error);
+}
+
 int
-kern_readv(int fd, struct uio *auio, int flags, int *res)
+kern_preadv(int fd, struct uio *auio, int flags, int *res)
 {
        struct thread *td = curthread;
        struct proc *p = td->td_proc;
        struct file *fp;
        struct filedesc *fdp = p->p_fd;
-       int len, error;
-#ifdef KTRACE
-       struct iovec *ktriov = NULL;
-       struct uio ktruio;
-#endif
+       int error;
 
        KKASSERT(p);
 
@@ -194,12 +229,37 @@
                return (EBADF);
        if (flags & FOF_OFFSET && fp->f_type != DTYPE_VNODE) {
                error = ESPIPE;
-               goto done;
-       }
-       if (auio->uio_resid < 0) {
+       } else if (auio->uio_resid < 0) {
                error = EINVAL;
-               goto done;
+       } else {
+               error = dofileread(fd, fp, auio, flags, res);
+       }
+       fdrop(fp, td);
+       return(error);
+}
+
+/*
+ * Common code for readv and preadv that reads data in
+ * from a file using the passed in uio, offset, and flags.
+ */
+static int
+dofileread(int fd, struct file *fp, struct uio *auio, int flags, int *res)
+{
+       struct thread *td = curthread;
+       struct proc *p = td->td_proc;
+       ssize_t len;
+       int error;
+#ifdef KTRACE
+       struct iovec *ktriov = NULL;
+       struct uio ktruio;
+#endif
+
+       /* Finish zero length reads right here */
+       if (auio->uio_resid == 0) {
+               *res = 0;
+               return(0);
        }
+
 #ifdef KTRACE
        /*
         * if tracing, save a copy of iovec
@@ -231,9 +291,8 @@
 #endif
        if (error == 0)
                *res = len - auio->uio_resid;
-done:
-       fdrop(fp, td);
-       return (error);
+
+       return(error);
 }
 
 /*
@@ -257,7 +316,7 @@
        auio.uio_segflg = UIO_USERSPACE;
        auio.uio_td = td;
 
-       error = kern_writev(uap->fd, &auio, 0, &uap->sysmsg_result);
+       error = kern_pwritev(uap->fd, &auio, 0, &uap->sysmsg_result);
 
        return(error);
 }
@@ -283,7 +342,7 @@
        auio.uio_segflg = UIO_USERSPACE;
        auio.uio_td = td;
 
-       error = kern_writev(uap->fd, &auio, FOF_OFFSET, &uap->sysmsg_result);
+       error = kern_pwritev(uap->fd, &auio, FOF_OFFSET, &uap->sysmsg_result);
 
        return(error);
 }
@@ -307,41 +366,85 @@
        auio.uio_segflg = UIO_USERSPACE;
        auio.uio_td = td;
 
-       error = kern_writev(uap->fd, &auio, 0, &uap->sysmsg_result);
+       error = kern_pwritev(uap->fd, &auio, 0, &uap->sysmsg_result);
 
        iovec_free(&iov, aiov);
        return (error);
 }
 
+
 /*
- * Gather write system call
+ * Gather positioned write system call
  */
 int
-kern_writev(int fd, struct uio *auio, int flags, int *res)
+pwritev(struct pwritev_args *uap)
+{
+       struct thread *td = curthread;
+       struct uio auio;
+       struct iovec aiov[UIO_SMALLIOV], *iov = NULL;
+       int error;
+
+       error = iovec_copyin(uap->iovp, &iov, aiov, uap->iovcnt,
+           &auio.uio_resid);
+       if (error)
+               return (error);
+       auio.uio_iov = iov;
+       auio.uio_iovcnt = uap->iovcnt;
+       auio.uio_offset = uap->offset;
+       auio.uio_rw = UIO_WRITE;
+       auio.uio_segflg = UIO_USERSPACE;
+       auio.uio_td = td;
+
+       error = kern_pwritev(uap->fd, &auio, FOF_OFFSET, &uap->sysmsg_result);
+
+       iovec_free(&iov, aiov);
+       return(error);
+}
+
+int
+kern_pwritev(int fd, struct uio *auio, int flags, int *res)
 {
        struct thread *td = curthread;
        struct proc *p = td->td_proc;
        struct file *fp;
        struct filedesc *fdp = p->p_fd;
-       long len, error;
-#ifdef KTRACE
-       struct iovec *ktriov = NULL;
-       struct uio ktruio;
-#endif
+       int error;
 
        KKASSERT(p);
 
        fp = holdfp(fdp, fd, FWRITE);
        if (fp == NULL)
                return (EBADF);
-       if ((flags & FOF_OFFSET) && fp->f_type != DTYPE_VNODE) {
+       else if ((flags & FOF_OFFSET) && fp->f_type != DTYPE_VNODE) {
                error = ESPIPE;
-               goto done;
+       } else {
+               error = dofilewrite(fd, fp, auio, flags, res);
        }
+       
+       fdrop(fp, td);
+       return (error);
+}
+
+/*
+ * Common code for writev and pwritev that writes data to
+ * a file using the passed in uio, offset, and flags.
+ */
+static int
+dofilewrite(int fd, struct file *fp, struct uio *auio, int flags, int *res)
+{      
+       struct thread *td = curthread;
+       struct proc *p = td->td_proc;
+       ssize_t len;
+       int error;
+#ifdef KTRACE
+       struct iovec *ktriov = NULL;
+       struct uio ktruio;
+#endif
+
        if (auio->uio_resid < 0) {
-               error = EINVAL;
-               goto done;
+               return(EINVAL);
        }
+
 #ifdef KTRACE
        /*
         * if tracing, save a copy of iovec and uio
@@ -362,6 +465,7 @@
                if (auio->uio_resid != len && (error == ERESTART ||
                    error == EINTR || error == EWOULDBLOCK))
                        error = 0;
+               /* Socket layer is responsible for issuing SIGPIPE. */
                if (error == EPIPE)
                        psignal(p, SIGPIPE);
        }
@@ -377,9 +481,8 @@
 #endif
        if (error == 0)
                *res = len - auio->uio_resid;
-done:
-       fdrop(fp, td);
-       return (error);
+
+       return(error);
 }
 
 /*
Index: sys/kern/syscalls.master
===================================================================
RCS file: /local/dcvs/src/sys/kern/syscalls.master,v
retrieving revision 1.28
diff -u -r1.28 syscalls.master
--- sys/kern/syscalls.master    16 Nov 2005 02:24:30 -0000      1.28
+++ sys/kern/syscalls.master    22 Apr 2006 00:17:09 -0000
@@ -428,8 +428,9 @@
 286    UNIMPL  NOHIDE  nosys
 287    UNIMPL  NOHIDE  nosys
 288    UNIMPL  NOHIDE  nosys
-289    UNIMPL  NOHIDE  nosys
-290    UNIMPL  NOHIDE  nosys
+; 289 and 290 from NetBSD (OpenBSD: 267 and 268)
+289    STD     BSD     { ssize_t preadv(int fd, struct iovec *iovp, u_int 
iovcnt, off_t offset); }
+290    STD     BSD     { ssize_t pwritev(int fd, struct iovec *iovp, u_int 
iovcnt, off_t offset); }
 291    UNIMPL  NOHIDE  nosys
 292    UNIMPL  NOHIDE  nosys
 293    UNIMPL  NOHIDE  nosys
Index: sys/sys/kern_syscall.h
===================================================================
RCS file: /local/dcvs/src/sys/sys/kern_syscall.h,v
retrieving revision 1.28
diff -u -r1.28 kern_syscall.h
--- sys/sys/kern_syscall.h      9 Aug 2005 20:14:16 -0000       1.28
+++ sys/sys/kern_syscall.h      24 Apr 2006 19:46:01 -0000
@@ -91,8 +91,8 @@
 /*
  * Prototypes for syscalls in kern/sys_generic.c
  */
-int kern_readv(int fd, struct uio *auio, int flags, int *res);
-int kern_writev(int fd, struct uio *auio, int flags, int *res);
+int kern_preadv(int fd, struct uio *auio, int flags, int *res);
+int kern_pwritev(int fd, struct uio *auio, int flags, int *res);
 
 /*
  * Prototypes for syscalls in kern/kern_resource.c
Index: sys/sys/uio.h
===================================================================
RCS file: /local/dcvs/src/sys/sys/uio.h,v
retrieving revision 1.9
diff -u -r1.9 uio.h
--- sys/sys/uio.h       27 Jul 2004 13:11:22 -0000      1.9
+++ sys/sys/uio.h       17 Apr 2006 18:27:35 -0000
@@ -110,8 +110,10 @@
 #include <sys/cdefs.h>
 
 __BEGIN_DECLS
-ssize_t        readv (int, const struct iovec *, int);
-ssize_t        writev (int, const struct iovec *, int);
+ssize_t        readv(int, const struct iovec *, int);
+ssize_t        writev(int, const struct iovec *, int);
+ssize_t        preadv(int, const struct iovec *, int, off_t);
+ssize_t        pwritev(int, const struct iovec *, int, off_t);
 __END_DECLS
 
 #endif /* _KERNEL */

Reply via email to