Re: [PATCH 2/4] vfs: pull btrfs clone API to vfs layer

2015-12-14 Thread Christoph Hellwig
On Wed, Dec 09, 2015 at 12:40:33PM -0800, Darrick J. Wong wrote:
> I tried this patch series on ppc64 (w/ 32-bit powerpc userland) and I think
> it needs to fix up the compat ioctl to make the vfs call...

Might need a proper signoff for Al, unless he wants to directly fold it..
--
To unsubscribe from this list: send the line "unsubscribe linux-cifs" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH 2/4] vfs: pull btrfs clone API to vfs layer

2015-12-14 Thread Darrick J. Wong
On Wed, Dec 09, 2015 at 12:40:33PM -0800, Darrick J. Wong wrote:
> On Thu, Dec 03, 2015 at 12:59:50PM +0100, Christoph Hellwig wrote:
> > The btrfs clone ioctls are now adopted by other file systems, with NFS
> > and CIFS already having support for them, and XFS being under active
> > development.  To avoid growth of various slightly incompatible
> > implementations, add one to the VFS.  Note that clones are different from
> > file copies in several ways:
> > 
> >  - they are atomic vs other writers
> >  - they support whole file clones
> >  - they support 64-bit legth clones
> >  - they do not allow partial success (aka short writes)
> >  - clones are expected to be a fast metadata operation
> > 
> > Because of that it would be rather cumbersome to try to piggyback them on
> > top of the recent clone_file_range infrastructure.  The converse isn't
> > true and the clone_file_range system call could try clone file range as
> > a first attempt to copy, something that further patches will enable.
> > 
> > Based on earlier work from Peng Tao.
> > 
> > Signed-off-by: Christoph Hellwig 
> > ---
> >  fs/btrfs/ctree.h|   3 +-
> >  fs/btrfs/file.c |   1 +
> >  fs/btrfs/ioctl.c|  49 ++-
> >  fs/cifs/cifsfs.c|  63 
> >  fs/cifs/cifsfs.h|   1 -
> >  fs/cifs/ioctl.c | 126 
> > +++-
> >  fs/ioctl.c  |  29 +++
> 
> I tried this patch series on ppc64 (w/ 32-bit powerpc userland) and I think
> it needs to fix up the compat ioctl to make the vfs call...

Bah, forgot to add:
Signed-off-by: Darrick J. Wong 

(Feel free to fold this three line chunk into the original patch...)

--D

> diff --git a/fs/compat_ioctl.c b/fs/compat_ioctl.c
> index dcf2653..70d4b10 100644
> --- a/fs/compat_ioctl.c
> +++ b/fs/compat_ioctl.c
> @@ -1580,6 +1580,10 @@ COMPAT_SYSCALL_DEFINE3(ioctl, unsigned int, fd, 
> unsigned int, cmd,
> goto out_fput;
>  #endif
>  
> +   case FICLONE:
> +   case FICLONERANGE:
> +   goto do_ioctl;
> +
> case FIBMAP:
> case FIGETBSZ:
> case FIONREAD:
> 
> --D
> 
> >  fs/nfs/nfs4file.c   |  87 -
> >  fs/read_write.c |  72 +++
> >  include/linux/fs.h  |   7 ++-
> >  include/uapi/linux/fs.h |   9 
> >  11 files changed, 254 insertions(+), 193 deletions(-)
> > 
> > diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
> > index ede7277..dd4733f 100644
> > --- a/fs/btrfs/ctree.h
> > +++ b/fs/btrfs/ctree.h
> > @@ -4025,7 +4025,6 @@ void btrfs_get_block_group_info(struct list_head 
> > *groups_list,
> >  void update_ioctl_balance_args(struct btrfs_fs_info *fs_info, int lock,
> >struct btrfs_ioctl_balance_args *bargs);
> >  
> > -
> >  /* file.c */
> >  int btrfs_auto_defrag_init(void);
> >  void btrfs_auto_defrag_exit(void);
> > @@ -4058,6 +4057,8 @@ int btrfs_fdatawrite_range(struct inode *inode, 
> > loff_t start, loff_t end);
> >  ssize_t btrfs_copy_file_range(struct file *file_in, loff_t pos_in,
> >   struct file *file_out, loff_t pos_out,
> >   size_t len, unsigned int flags);
> > +int btrfs_clone_file_range(struct file *file_in, loff_t pos_in,
> > +  struct file *file_out, loff_t pos_out, u64 len);
> >  
> >  /* tree-defrag.c */
> >  int btrfs_defrag_leaves(struct btrfs_trans_handle *trans,
> > diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c
> > index e67fe6a..232e300 100644
> > --- a/fs/btrfs/file.c
> > +++ b/fs/btrfs/file.c
> > @@ -2925,6 +2925,7 @@ const struct file_operations btrfs_file_operations = {
> > .compat_ioctl   = btrfs_ioctl,
> >  #endif
> > .copy_file_range = btrfs_copy_file_range,
> > +   .clone_file_range = btrfs_clone_file_range,
> >  };
> >  
> >  void btrfs_auto_defrag_exit(void)
> > diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c
> > index 0f92735..85b1cae 100644
> > --- a/fs/btrfs/ioctl.c
> > +++ b/fs/btrfs/ioctl.c
> > @@ -3906,49 +3906,10 @@ ssize_t btrfs_copy_file_range(struct file *file_in, 
> > loff_t pos_in,
> > return ret;
> >  }
> >  
> > -static noinline long btrfs_ioctl_clone(struct file *file, unsigned long 
> > srcfd,
> > -  u64 off, u64 olen, u64 destoff)
> > +int btrfs_clone_file_range(struct file *src_file, loff_t off,
> > +   struct file *dst_file, loff_t destoff, u64 len)
> >  {
> > -   struct fd src_file;
> > -   int ret;
> > -
> > -   /* the destination must be opened for writing */
> > -   if (!(file->f_mode & FMODE_WRITE) || (file->f_flags & O_APPEND))
> > -   return -EINVAL;
> > -
> > -   ret = mnt_want_write_file(file);
> > -   if (ret)
> > -   return ret;
> > -
> > -   src_file = fdget(srcfd);
> > -   if (!src_file.file) {
> > -   ret = -EBADF;
> > -   goto out_drop_write;
> > -   }
> 

Re: [PATCH 2/4] vfs: pull btrfs clone API to vfs layer

2015-12-09 Thread Darrick J. Wong
On Thu, Dec 03, 2015 at 12:59:50PM +0100, Christoph Hellwig wrote:
> The btrfs clone ioctls are now adopted by other file systems, with NFS
> and CIFS already having support for them, and XFS being under active
> development.  To avoid growth of various slightly incompatible
> implementations, add one to the VFS.  Note that clones are different from
> file copies in several ways:
> 
>  - they are atomic vs other writers
>  - they support whole file clones
>  - they support 64-bit legth clones
>  - they do not allow partial success (aka short writes)
>  - clones are expected to be a fast metadata operation
> 
> Because of that it would be rather cumbersome to try to piggyback them on
> top of the recent clone_file_range infrastructure.  The converse isn't
> true and the clone_file_range system call could try clone file range as
> a first attempt to copy, something that further patches will enable.
> 
> Based on earlier work from Peng Tao.
> 
> Signed-off-by: Christoph Hellwig 
> ---
>  fs/btrfs/ctree.h|   3 +-
>  fs/btrfs/file.c |   1 +
>  fs/btrfs/ioctl.c|  49 ++-
>  fs/cifs/cifsfs.c|  63 
>  fs/cifs/cifsfs.h|   1 -
>  fs/cifs/ioctl.c | 126 
> +++-
>  fs/ioctl.c  |  29 +++

I tried this patch series on ppc64 (w/ 32-bit powerpc userland) and I think
it needs to fix up the compat ioctl to make the vfs call...

diff --git a/fs/compat_ioctl.c b/fs/compat_ioctl.c
index dcf2653..70d4b10 100644
--- a/fs/compat_ioctl.c
+++ b/fs/compat_ioctl.c
@@ -1580,6 +1580,10 @@ COMPAT_SYSCALL_DEFINE3(ioctl, unsigned int, fd, unsigned 
int, cmd,
goto out_fput;
 #endif
 
+   case FICLONE:
+   case FICLONERANGE:
+   goto do_ioctl;
+
case FIBMAP:
case FIGETBSZ:
case FIONREAD:

--D

>  fs/nfs/nfs4file.c   |  87 -
>  fs/read_write.c |  72 +++
>  include/linux/fs.h  |   7 ++-
>  include/uapi/linux/fs.h |   9 
>  11 files changed, 254 insertions(+), 193 deletions(-)
> 
> diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
> index ede7277..dd4733f 100644
> --- a/fs/btrfs/ctree.h
> +++ b/fs/btrfs/ctree.h
> @@ -4025,7 +4025,6 @@ void btrfs_get_block_group_info(struct list_head 
> *groups_list,
>  void update_ioctl_balance_args(struct btrfs_fs_info *fs_info, int lock,
>  struct btrfs_ioctl_balance_args *bargs);
>  
> -
>  /* file.c */
>  int btrfs_auto_defrag_init(void);
>  void btrfs_auto_defrag_exit(void);
> @@ -4058,6 +4057,8 @@ int btrfs_fdatawrite_range(struct inode *inode, loff_t 
> start, loff_t end);
>  ssize_t btrfs_copy_file_range(struct file *file_in, loff_t pos_in,
> struct file *file_out, loff_t pos_out,
> size_t len, unsigned int flags);
> +int btrfs_clone_file_range(struct file *file_in, loff_t pos_in,
> +struct file *file_out, loff_t pos_out, u64 len);
>  
>  /* tree-defrag.c */
>  int btrfs_defrag_leaves(struct btrfs_trans_handle *trans,
> diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c
> index e67fe6a..232e300 100644
> --- a/fs/btrfs/file.c
> +++ b/fs/btrfs/file.c
> @@ -2925,6 +2925,7 @@ const struct file_operations btrfs_file_operations = {
>   .compat_ioctl   = btrfs_ioctl,
>  #endif
>   .copy_file_range = btrfs_copy_file_range,
> + .clone_file_range = btrfs_clone_file_range,
>  };
>  
>  void btrfs_auto_defrag_exit(void)
> diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c
> index 0f92735..85b1cae 100644
> --- a/fs/btrfs/ioctl.c
> +++ b/fs/btrfs/ioctl.c
> @@ -3906,49 +3906,10 @@ ssize_t btrfs_copy_file_range(struct file *file_in, 
> loff_t pos_in,
>   return ret;
>  }
>  
> -static noinline long btrfs_ioctl_clone(struct file *file, unsigned long 
> srcfd,
> -u64 off, u64 olen, u64 destoff)
> +int btrfs_clone_file_range(struct file *src_file, loff_t off,
> + struct file *dst_file, loff_t destoff, u64 len)
>  {
> - struct fd src_file;
> - int ret;
> -
> - /* the destination must be opened for writing */
> - if (!(file->f_mode & FMODE_WRITE) || (file->f_flags & O_APPEND))
> - return -EINVAL;
> -
> - ret = mnt_want_write_file(file);
> - if (ret)
> - return ret;
> -
> - src_file = fdget(srcfd);
> - if (!src_file.file) {
> - ret = -EBADF;
> - goto out_drop_write;
> - }
> -
> - /* the src must be open for reading */
> - if (!(src_file.file->f_mode & FMODE_READ)) {
> - ret = -EINVAL;
> - goto out_fput;
> - }
> -
> - ret = btrfs_clone_files(file, src_file.file, off, olen, destoff);
> -
> -out_fput:
> - fdput(src_file);
> -out_drop_write:
> - mnt_drop_write_file(file);
> - return ret;
> -}
> -
> -static long 

Re: [PATCH 2/4] vfs: pull btrfs clone API to vfs layer

2015-12-07 Thread Christoph Hellwig
On Sun, Dec 06, 2015 at 04:53:31PM -0800, Darrick J. Wong wrote:
> > +   if (S_ISDIR(inode_in->i_mode) || S_ISDIR(inode_out->i_mode))
> > +   return -EISDIR;
> > +   if (!S_ISREG(inode_in->i_mode) || !S_ISREG(inode_out->i_mode))
> > +   return -EOPNOTSUPP;
> 
> I thought we were moving to -EINVAL for wrong file types?
> 
> Though, perhaps "I've also prepared a btrfs patch for this and clone" from the
> earlier thread about generic/157 wasn't referring to /this/ patch. :)
> 
> In any case, I'm ok with EINVAL, and I haven't heard any objections to
> changing -EOPNOTSUPP -> -EINVAL when trying to reflink/dedupe/whatever
> non-file non-dir fds.

I'm fine with with EINVAL - not sure why I ended up with EOPNOTSUP,
probably because 157 is already failing as in general the errors for
something in the VFS vs a specific ioctl handler are just too different.
--
To unsubscribe from this list: send the line "unsubscribe linux-cifs" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH 2/4] vfs: pull btrfs clone API to vfs layer

2015-12-07 Thread Darrick J. Wong
On Mon, Dec 07, 2015 at 04:13:19PM +0100, Christoph Hellwig wrote:
> On Sun, Dec 06, 2015 at 04:53:31PM -0800, Darrick J. Wong wrote:
> > > + if (S_ISDIR(inode_in->i_mode) || S_ISDIR(inode_out->i_mode))
> > > + return -EISDIR;
> > > + if (!S_ISREG(inode_in->i_mode) || !S_ISREG(inode_out->i_mode))
> > > + return -EOPNOTSUPP;
> > 
> > I thought we were moving to -EINVAL for wrong file types?
> > 
> > Though, perhaps "I've also prepared a btrfs patch for this and clone" from 
> > the
> > earlier thread about generic/157 wasn't referring to /this/ patch. :)
> > 
> > In any case, I'm ok with EINVAL, and I haven't heard any objections to
> > changing -EOPNOTSUPP -> -EINVAL when trying to reflink/dedupe/whatever
> > non-file non-dir fds.
> 
> I'm fine with with EINVAL - not sure why I ended up with EOPNOTSUP,
> probably because 157 is already failing as in general the errors for
> something in the VFS vs a specific ioctl handler are just too different.

Ok, I'm going to ensure that generic/1[57-60] all look for EINVAL when
the file type is wrong, and resend the xfstests patches.  I'll also
patch them up to accept the error codes that btrfs spit out before the
ioctl hoist.

--D
--
To unsubscribe from this list: send the line "unsubscribe linux-cifs" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH 2/4] vfs: pull btrfs clone API to vfs layer

2015-12-07 Thread Darrick J. Wong
On Mon, Dec 07, 2015 at 04:13:19PM +0100, Christoph Hellwig wrote:
> On Sun, Dec 06, 2015 at 04:53:31PM -0800, Darrick J. Wong wrote:
> > > + if (S_ISDIR(inode_in->i_mode) || S_ISDIR(inode_out->i_mode))
> > > + return -EISDIR;
> > > + if (!S_ISREG(inode_in->i_mode) || !S_ISREG(inode_out->i_mode))
> > > + return -EOPNOTSUPP;
> > 
> > I thought we were moving to -EINVAL for wrong file types?
> > 
> > Though, perhaps "I've also prepared a btrfs patch for this and clone" from 
> > the
> > earlier thread about generic/157 wasn't referring to /this/ patch. :)
> > 
> > In any case, I'm ok with EINVAL, and I haven't heard any objections to
> > changing -EOPNOTSUPP -> -EINVAL when trying to reflink/dedupe/whatever
> > non-file non-dir fds.
> 
> I'm fine with with EINVAL - not sure why I ended up with EOPNOTSUP,
> probably because 157 is already failing as in general the errors for
> something in the VFS vs a specific ioctl handler are just too different.

Ok, will have respun fixes for 157/158 soon.

--D
--
To unsubscribe from this list: send the line "unsubscribe linux-cifs" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH 2/4] vfs: pull btrfs clone API to vfs layer

2015-12-06 Thread Darrick J. Wong
On Thu, Dec 03, 2015 at 12:59:50PM +0100, Christoph Hellwig wrote:
> The btrfs clone ioctls are now adopted by other file systems, with NFS
> and CIFS already having support for them, and XFS being under active
> development.  To avoid growth of various slightly incompatible
> implementations, add one to the VFS.  Note that clones are different from
> file copies in several ways:
> 
>  - they are atomic vs other writers
>  - they support whole file clones
>  - they support 64-bit legth clones
>  - they do not allow partial success (aka short writes)
>  - clones are expected to be a fast metadata operation
> 
> Because of that it would be rather cumbersome to try to piggyback them on
> top of the recent clone_file_range infrastructure.  The converse isn't
> true and the clone_file_range system call could try clone file range as
> a first attempt to copy, something that further patches will enable.
> 
> Based on earlier work from Peng Tao.



> diff --git a/fs/read_write.c b/fs/read_write.c
> index 6c1aa73..9e3dd8f 100644
> --- a/fs/read_write.c
> +++ b/fs/read_write.c
> @@ -1451,3 +1451,75 @@ out1:
>  out2:
>   return ret;
>  }
> +
> +static int clone_verify_area(struct file *file, loff_t pos, u64 len, bool 
> write)
> +{
> + struct inode *inode = file_inode(file);
> +
> + if (unlikely(pos < 0))
> + return -EINVAL;
> +
> +  if (unlikely((loff_t) (pos + len) < 0))
> + return -EINVAL;
> +
> + if (unlikely(inode->i_flctx && mandatory_lock(inode))) {
> + loff_t end = len ? pos + len - 1 : OFFSET_MAX;
> + int retval;
> +
> + retval = locks_mandatory_area(file, pos, end,
> + write ? F_WRLCK : F_RDLCK);
> + if (retval < 0)
> + return retval;
> + }
> +
> + return security_file_permission(file, write ? MAY_WRITE : MAY_READ);
> +}
> +
> +int vfs_clone_file_range(struct file *file_in, loff_t pos_in,
> + struct file *file_out, loff_t pos_out, u64 len)
> +{
> + struct inode *inode_in = file_inode(file_in);
> + struct inode *inode_out = file_inode(file_out);
> + int ret;
> +
> + if (inode_in->i_sb != inode_out->i_sb ||
> + file_in->f_path.mnt != file_out->f_path.mnt)
> + return -EXDEV;
> +
> + if (S_ISDIR(inode_in->i_mode) || S_ISDIR(inode_out->i_mode))
> + return -EISDIR;
> + if (!S_ISREG(inode_in->i_mode) || !S_ISREG(inode_out->i_mode))
> + return -EOPNOTSUPP;

I thought we were moving to -EINVAL for wrong file types?

Though, perhaps "I've also prepared a btrfs patch for this and clone" from the
earlier thread about generic/157 wasn't referring to /this/ patch. :)

In any case, I'm ok with EINVAL, and I haven't heard any objections to
changing -EOPNOTSUPP -> -EINVAL when trying to reflink/dedupe/whatever
non-file non-dir fds.

 Anyone object?

--D

> +
> + if (!(file_in->f_mode & FMODE_READ) ||
> + !(file_out->f_mode & FMODE_WRITE) ||
> + (file_out->f_flags & O_APPEND) ||
> + !file_in->f_op->clone_file_range)
> + return -EBADF;
> +
> + ret = clone_verify_area(file_in, pos_in, len, false);
> + if (ret)
> + return ret;
> +
> + ret = clone_verify_area(file_out, pos_out, len, true);
> + if (ret)
> + return ret;
> +
> + if (pos_in + len > i_size_read(inode_in))
> + return -EINVAL;
> +
> + ret = mnt_want_write_file(file_out);
> + if (ret)
> + return ret;
> +
> + ret = file_in->f_op->clone_file_range(file_in, pos_in,
> + file_out, pos_out, len);
> + if (!ret) {
> + fsnotify_access(file_in);
> + fsnotify_modify(file_out);
> + }
> +
> + mnt_drop_write_file(file_out);
> + return ret;
> +}
> +EXPORT_SYMBOL(vfs_clone_file_range);
> diff --git a/include/linux/fs.h b/include/linux/fs.h
> index af559ac..59bf96d 100644
> --- a/include/linux/fs.h
> +++ b/include/linux/fs.h
> @@ -1629,7 +1629,10 @@ struct file_operations {
>  #ifndef CONFIG_MMU
>   unsigned (*mmap_capabilities)(struct file *);
>  #endif
> - ssize_t (*copy_file_range)(struct file *, loff_t, struct file *, 
> loff_t, size_t, unsigned int);
> + ssize_t (*copy_file_range)(struct file *, loff_t, struct file *,
> + loff_t, size_t, unsigned int);
> + int (*clone_file_range)(struct file *, loff_t, struct file *, loff_t,
> + u64);
>  };
>  
>  struct inode_operations {
> @@ -1683,6 +1686,8 @@ extern ssize_t vfs_writev(struct file *, const struct 
> iovec __user *,
>   unsigned long, loff_t *);
>  extern ssize_t vfs_copy_file_range(struct file *, loff_t , struct file *,
>  loff_t, size_t, unsigned int);
> +extern int vfs_clone_file_range(struct file *file_in, loff_t pos_in,
> + struct file *file_out, loff_t pos_out, u64 len);
>  
>  struct super_operations {
>