Hi everyone,
I've done a basic implementation of posix_fallocate(2). I've looked
at both the FreeBSD and NetBSD code when doing this. My knowledge of
the VFS layer is very limited, so please let me know if anything looks
wrong.
I added a generic function in vfs_vnops.c called vn_fallocate() that
uses VOP_READ() and VOP_WRITE() underneath. It does not take
advantage of filesystem specific optimizations (there's no
VOP_FALLOCATE or similar).
If this looks good, then I will progress on writing a manpage and
adding the necessary libc glue.
In theory this diff could have been split in two, one that adds
vn_fallocate() and acts as a no-op and another patch on top of it that
implements posix_fallocate(2). Let me know if you need me to split
the diff.
Thanks,
Dimitris
Index: kern/init_sysent.c
===================================================================
RCS file: /cvs/src/sys/kern/init_sysent.c,v
retrieving revision 1.171
diff -u -p -r1.171 init_sysent.c
--- kern/init_sysent.c 9 Sep 2015 17:57:57 -0000 1.171
+++ kern/init_sysent.c 12 Sep 2015 17:37:54 -0000
@@ -1,4 +1,4 @@
-/* $OpenBSD: init_sysent.c,v 1.171 2015/09/09 17:57:57 deraadt Exp $
*/
+/* $OpenBSD$ */
/*
* System call switch table.
@@ -751,5 +751,7 @@ struct sysent sysent[] = {
sys___set_tcb }, /* 329 = __set_tcb */
{ 0, 0, SY_NOLOCK | 0,
sys___get_tcb }, /* 330 = __get_tcb */
+ { 3, s(struct sys_posix_fallocate_args), 0,
+ sys_posix_fallocate }, /* 331 = posix_fallocate */
};
Index: kern/syscalls.c
===================================================================
RCS file: /cvs/src/sys/kern/syscalls.c,v
retrieving revision 1.170
diff -u -p -r1.170 syscalls.c
--- kern/syscalls.c 9 Sep 2015 17:57:57 -0000 1.170
+++ kern/syscalls.c 12 Sep 2015 17:37:54 -0000
@@ -1,4 +1,4 @@
-/* $OpenBSD: syscalls.c,v 1.170 2015/09/09 17:57:57 deraadt Exp $ */
+/* $OpenBSD$ */
/*
* System call names.
@@ -393,4 +393,5 @@ char *syscallnames[] = {
"#328 (obsolete __tfork51)", /* 328 = obsolete __tfork51 */
"__set_tcb", /* 329 = __set_tcb */
"__get_tcb", /* 330 = __get_tcb */
+ "posix_fallocate", /* 331 = posix_fallocate */
};
Index: kern/syscalls.master
===================================================================
RCS file: /cvs/src/sys/kern/syscalls.master,v
retrieving revision 1.158
diff -u -p -r1.158 syscalls.master
--- kern/syscalls.master 9 Sep 2015 17:56:59 -0000 1.158
+++ kern/syscalls.master 12 Sep 2015 17:37:54 -0000
@@ -561,3 +561,4 @@
328 OBSOL __tfork51
329 STD NOLOCK { void sys___set_tcb(void *tcb); }
330 STD NOLOCK { void *sys___get_tcb(void); }
+331 STD { int sys_posix_fallocate(int fd, off_t offset, off_t
len); }
Index: kern/vfs_syscalls.c
===================================================================
RCS file: /cvs/src/sys/kern/vfs_syscalls.c,v
retrieving revision 1.227
diff -u -p -r1.227 vfs_syscalls.c
--- kern/vfs_syscalls.c 31 Aug 2015 16:13:11 -0000 1.227
+++ kern/vfs_syscalls.c 12 Sep 2015 17:37:54 -0000
@@ -3023,3 +3023,52 @@ sys_pwritev(struct proc *p, void *v, reg
1, &offset, retval));
}
+int
+sys_posix_fallocate(struct proc *p, void *v, register_t *retval)
+{
+ struct sys_posix_fallocate_args /* {
+ syscallarg(int) fd;
+ syscallarg(off_t) offset;
+ syscallarg(off_t) len;
+ } */ *uap = v;
+ struct vnode *vp;
+ struct file *fp;
+ int error;
+
+ if (SCARG(uap, offset) < 0 || SCARG(uap, len) < 0)
+ return (EINVAL);
+ if (SCARG(uap, offset) > LLONG_MAX - SCARG(uap, len))
+ return (EFBIG);
+
+ if ((error = getvnode(p, SCARG(uap, fd), &fp)) != 0)
+ return (error);
+
+ switch (fp->f_type) {
+ case DTYPE_VNODE:
+ break;
+ case DTYPE_PIPE:
+ case VFIFO:
+ error = ESPIPE;
+ goto bad;
+ default:
+ error = ENODEV;
+ goto bad;
+ }
+
+ if ((fp->f_flag & FWRITE) == 0) {
+ error = EBADF;
+ goto bad;
+ }
+
+ vp = fp->f_data;
+ vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p);
+ if (vp->v_type != VREG)
+ error = ENODEV;
+ else if ((error = vn_writechk(vp)) == 0)
+ error = vn_fallocate(vp, SCARG(uap, offset),
+ SCARG(uap, len), p);
+ VOP_UNLOCK(vp, 0, p);
+bad:
+ FRELE(fp, p);
+ return (error);
+}
Index: kern/vfs_vnops.c
===================================================================
RCS file: /cvs/src/sys/kern/vfs_vnops.c,v
retrieving revision 1.82
diff -u -p -r1.82 vfs_vnops.c
--- kern/vfs_vnops.c 1 May 2015 01:30:58 -0000 1.82
+++ kern/vfs_vnops.c 12 Sep 2015 17:37:54 -0000
@@ -573,3 +573,73 @@ vn_isunder(struct vnode *lvp, struct vno
return (0);
}
+
+int
+vn_fallocate(struct vnode *vp, off_t offset, off_t len, struct proc *p)
+{
+ struct vattr va;
+ uint8_t *buf;
+ off_t cur, fsize;
+ long blksize;
+ size_t resid;
+ int error;
+
+ error = VOP_GETATTR(vp, &va, p->p_ucred, p);
+ if (error != 0)
+ return (error);
+ fsize = va.va_size;
+ blksize = va.va_blocksize;
+
+ if (offset + len > fsize) {
+ /*
+ * Check if the filesystem supports the resulting
+ * filesystem size.
+ */
+ VATTR_NULL(&va);
+ va.va_size = offset + len;
+ error = VOP_SETATTR(vp, &va, p->p_ucred, p);
+ if (error != 0)
+ return (error);
+
+ /* All good, restore original file size. */
+ VATTR_NULL(&va);
+ va.va_size = fsize;
+ error = VOP_SETATTR(vp, &va, p->p_ucred, p);
+ if (error != 0)
+ return (error);
+ }
+
+ buf = malloc(blksize, M_TEMP, M_WAITOK);
+ /*
+ * We have to go through the entire region
+ * because there is no way to know at this level if
+ * the region has holes or not.
+ */
+ while (len != 0) {
+ cur = blksize;
+ if ((offset % blksize) != 0)
+ cur -= (offset % blksize);
+ if (cur > len)
+ cur = len;
+ if (offset < fsize) {
+ error = vn_rdwr(UIO_READ, vp, buf, cur, offset,
+ UIO_SYSSPACE, IO_NODELOCKED, p->p_ucred,
+ &resid, p);
+ if (error != 0)
+ break;
+ if (resid > 0)
+ memset(buf + cur - resid, 0, resid);
+ } else {
+ memset(buf, 0, cur);
+ }
+ error = vn_rdwr(UIO_WRITE, vp, buf, cur, offset,
+ UIO_SYSSPACE, IO_NODELOCKED, p->p_ucred,
+ &resid, p);
+ if (error != 0)
+ break;
+ len -= cur;
+ offset += cur;
+ }
+ free(buf, M_TEMP, blksize);
+ return (error);
+}
Index: sys/fcntl.h
===================================================================
RCS file: /cvs/src/sys/sys/fcntl.h,v
retrieving revision 1.21
diff -u -p -r1.21 fcntl.h
--- sys/fcntl.h 17 May 2015 01:22:01 -0000 1.21
+++ sys/fcntl.h 12 Sep 2015 17:37:54 -0000
@@ -207,6 +207,7 @@ __BEGIN_DECLS
int open(const char *, int, ...);
int creat(const char *, mode_t);
int fcntl(int, int, ...);
+int posix_fallocate(int, off_t, off_t);
#if __BSD_VISIBLE
int flock(int, int);
#endif
Index: sys/syscall.h
===================================================================
RCS file: /cvs/src/sys/sys/syscall.h,v
retrieving revision 1.169
diff -u -p -r1.169 syscall.h
--- sys/syscall.h 9 Sep 2015 17:57:57 -0000 1.169
+++ sys/syscall.h 12 Sep 2015 17:37:54 -0000
@@ -1,4 +1,4 @@
-/* $OpenBSD: syscall.h,v 1.169 2015/09/09 17:57:57 deraadt Exp $ */
+/* $OpenBSD$ */
/*
* System call numbers.
@@ -696,4 +696,7 @@
/* syscall: "__get_tcb" ret: "void *" args: */
#define SYS___get_tcb 330
-#define SYS_MAXSYSCALL 331
+/* syscall: "posix_fallocate" ret: "int" args: "int" "off_t" "off_t" */
+#define SYS_posix_fallocate 331
+
+#define SYS_MAXSYSCALL 332
Index: sys/syscallargs.h
===================================================================
RCS file: /cvs/src/sys/sys/syscallargs.h,v
retrieving revision 1.172
diff -u -p -r1.172 syscallargs.h
--- sys/syscallargs.h 9 Sep 2015 17:57:57 -0000 1.172
+++ sys/syscallargs.h 12 Sep 2015 17:37:54 -0000
@@ -1,4 +1,4 @@
-/* $OpenBSD: syscallargs.h,v 1.172 2015/09/09 17:57:57 deraadt Exp $
*/
+/* $OpenBSD$ */
/*
* System call argument lists.
@@ -1081,6 +1081,12 @@ struct sys___set_tcb_args {
syscallarg(void *) tcb;
};
+struct sys_posix_fallocate_args {
+ syscallarg(int) fd;
+ syscallarg(off_t) offset;
+ syscallarg(off_t) len;
+};
+
/*
* System call prototypes.
*/
@@ -1327,3 +1333,4 @@ int sys_symlinkat(struct proc *, void *,
int sys_unlinkat(struct proc *, void *, register_t *);
int sys___set_tcb(struct proc *, void *, register_t *);
int sys___get_tcb(struct proc *, void *, register_t *);
+int sys_posix_fallocate(struct proc *, void *, register_t *);
Index: sys/vnode.h
===================================================================
RCS file: /cvs/src/sys/sys/vnode.h,v
retrieving revision 1.132
diff -u -p -r1.132 vnode.h
--- sys/vnode.h 7 May 2015 08:53:33 -0000 1.132
+++ sys/vnode.h 12 Sep 2015 17:37:54 -0000
@@ -633,6 +633,7 @@ int vop_generic_kqfilter(void *);
int vop_generic_lookup(void *);
/* vfs_vnops.c */
+int vn_fallocate(struct vnode *, off_t, off_t, struct proc *);
int vn_isunder(struct vnode *, struct vnode *, struct proc *);
int vn_close(struct vnode *, int, struct ucred *, struct proc *);
int vn_open(struct nameidata *, int, int);