[PATCH 03/25] vfs: check file ranges before cloning files

2018-10-12 Thread Darrick J. Wong
From: Darrick J. Wong 

Move the file range checks from vfs_clone_file_prep into a separate
generic_remap_checks function so that all the checks are collected in a
central location.  This forms the basis for adding more checks from
generic_write_checks that will make cloning's input checking more
consistent with write input checking.

Signed-off-by: Darrick J. Wong 
Reviewed-by: Christoph Hellwig 
Reviewed-by: Amir Goldstein 
---
 fs/ocfs2/refcounttree.c |2 +
 fs/read_write.c |   55 +
 fs/xfs/xfs_reflink.c|2 +
 include/linux/fs.h  |9 --
 mm/filemap.c|   69 +++
 5 files changed, 90 insertions(+), 47 deletions(-)


diff --git a/fs/ocfs2/refcounttree.c b/fs/ocfs2/refcounttree.c
index 7a5ee145c733..19e03936c5e1 100644
--- a/fs/ocfs2/refcounttree.c
+++ b/fs/ocfs2/refcounttree.c
@@ -4850,7 +4850,7 @@ int ocfs2_reflink_remap_range(struct file *file_in,
(OCFS2_I(inode_out)->ip_flags & OCFS2_INODE_SYSTEM_FILE))
goto out_unlock;
 
-   ret = vfs_clone_file_prep_inodes(inode_in, pos_in, inode_out, pos_out,
+   ret = vfs_clone_file_prep(file_in, pos_in, file_out, pos_out,
&len, is_dedupe);
if (ret <= 0)
goto out_unlock;
diff --git a/fs/read_write.c b/fs/read_write.c
index 260797b01851..d6e8e242a15f 100644
--- a/fs/read_write.c
+++ b/fs/read_write.c
@@ -1717,13 +1717,12 @@ static int clone_verify_area(struct file *file, loff_t 
pos, u64 len, bool write)
  * Returns: 0 for "nothing to clone", 1 for "something to clone", or
  * the usual negative error code.
  */
-int vfs_clone_file_prep_inodes(struct inode *inode_in, loff_t pos_in,
-  struct inode *inode_out, loff_t pos_out,
-  u64 *len, bool is_dedupe)
+int vfs_clone_file_prep(struct file *file_in, loff_t pos_in,
+   struct file *file_out, loff_t pos_out,
+   u64 *len, bool is_dedupe)
 {
-   loff_t bs = inode_out->i_sb->s_blocksize;
-   loff_t blen;
-   loff_t isize;
+   struct inode *inode_in = file_inode(file_in);
+   struct inode *inode_out = file_inode(file_out);
bool same_inode = (inode_in == inode_out);
int ret;
 
@@ -1740,10 +1739,10 @@ int vfs_clone_file_prep_inodes(struct inode *inode_in, 
loff_t pos_in,
if (!S_ISREG(inode_in->i_mode) || !S_ISREG(inode_out->i_mode))
return -EINVAL;
 
-   isize = i_size_read(inode_in);
-
/* Zero length dedupe exits immediately; reflink goes to EOF. */
if (*len == 0) {
+   loff_t isize = i_size_read(inode_in);
+
if (is_dedupe || pos_in == isize)
return 0;
if (pos_in > isize)
@@ -1751,36 +1750,11 @@ int vfs_clone_file_prep_inodes(struct inode *inode_in, 
loff_t pos_in,
*len = isize - pos_in;
}
 
-   /* Ensure offsets don't wrap and the input is inside i_size */
-   if (pos_in + *len < pos_in || pos_out + *len < pos_out ||
-   pos_in + *len > isize)
-   return -EINVAL;
-
-   /* Don't allow dedupe past EOF in the dest file */
-   if (is_dedupe) {
-   loff_t  disize;
-
-   disize = i_size_read(inode_out);
-   if (pos_out >= disize || pos_out + *len > disize)
-   return -EINVAL;
-   }
-
-   /* If we're linking to EOF, continue to the block boundary. */
-   if (pos_in + *len == isize)
-   blen = ALIGN(isize, bs) - pos_in;
-   else
-   blen = *len;
-
-   /* Only reflink if we're aligned to block boundaries */
-   if (!IS_ALIGNED(pos_in, bs) || !IS_ALIGNED(pos_in + blen, bs) ||
-   !IS_ALIGNED(pos_out, bs) || !IS_ALIGNED(pos_out + blen, bs))
-   return -EINVAL;
-
-   /* Don't allow overlapped reflink within the same file */
-   if (same_inode) {
-   if (pos_out + blen > pos_in && pos_out < pos_in + blen)
-   return -EINVAL;
-   }
+   /* Check that we don't violate system file offset limits. */
+   ret = generic_remap_checks(file_in, pos_in, file_out, pos_out, len,
+   is_dedupe);
+   if (ret)
+   return ret;
 
/* Wait for the completion of any pending IOs on both files */
inode_dio_wait(inode_in);
@@ -1813,7 +1787,7 @@ int vfs_clone_file_prep_inodes(struct inode *inode_in, 
loff_t pos_in,
 
return 1;
 }
-EXPORT_SYMBOL(vfs_clone_file_prep_inodes);
+EXPORT_SYMBOL(vfs_clone_file_prep);
 
 int do_clone_file_range(struct file *file_in, loff_t pos_in,
struct file *file_out, loff_t pos_out, u64 len)
@@ -1851,9 +1825,6 @@ int do_clone_file_range(struct file *file_in, loff_t 
pos_in,
if (ret)
return ret;
 
-   if (pos_in + len > i_size_read(inode_in))
- 

Re: [PATCH 03/25] vfs: check file ranges before cloning files

2018-10-11 Thread Amir Goldstein
On Thu, Oct 11, 2018 at 4:43 PM Christoph Hellwig  wrote:
>
> > -EXPORT_SYMBOL(vfs_clone_file_prep_inodes);
> > +EXPORT_SYMBOL(vfs_clone_file_prep);
>
> Btw, why isn't this EXPORT_SYMBOL_GPL?  It is rather Linux internal
> code, including some that I wrote which you lifted into the core
> in "vfs: refactor clone/dedupe_file_range common functions".

Because Al will shot down any attempt of those in vfs code:
https://lkml.org/lkml/2018/6/10/4

Thanks,
Amir.


Re: [PATCH 03/25] vfs: check file ranges before cloning files

2018-10-11 Thread Christoph Hellwig
> -EXPORT_SYMBOL(vfs_clone_file_prep_inodes);
> +EXPORT_SYMBOL(vfs_clone_file_prep);

Btw, why isn't this EXPORT_SYMBOL_GPL?  It is rather Linux internal
code, including some that I wrote which you lifted into the core
in "vfs: refactor clone/dedupe_file_range common functions".


[PATCH 03/25] vfs: check file ranges before cloning files

2018-10-10 Thread Darrick J. Wong
From: Darrick J. Wong 

Move the file range checks from vfs_clone_file_prep into a separate
generic_remap_checks function so that all the checks are collected in a
central location.  This forms the basis for adding more checks from
generic_write_checks that will make cloning's input checking more
consistent with write input checking.

Signed-off-by: Darrick J. Wong 
Reviewed-by: Christoph Hellwig 
Reviewed-by: Amir Goldstein 
---
 fs/ocfs2/refcounttree.c |2 +
 fs/read_write.c |   55 +
 fs/xfs/xfs_reflink.c|2 +
 include/linux/fs.h  |9 --
 mm/filemap.c|   69 +++
 5 files changed, 90 insertions(+), 47 deletions(-)


diff --git a/fs/ocfs2/refcounttree.c b/fs/ocfs2/refcounttree.c
index 7a5ee145c733..19e03936c5e1 100644
--- a/fs/ocfs2/refcounttree.c
+++ b/fs/ocfs2/refcounttree.c
@@ -4850,7 +4850,7 @@ int ocfs2_reflink_remap_range(struct file *file_in,
(OCFS2_I(inode_out)->ip_flags & OCFS2_INODE_SYSTEM_FILE))
goto out_unlock;
 
-   ret = vfs_clone_file_prep_inodes(inode_in, pos_in, inode_out, pos_out,
+   ret = vfs_clone_file_prep(file_in, pos_in, file_out, pos_out,
&len, is_dedupe);
if (ret <= 0)
goto out_unlock;
diff --git a/fs/read_write.c b/fs/read_write.c
index 260797b01851..d6e8e242a15f 100644
--- a/fs/read_write.c
+++ b/fs/read_write.c
@@ -1717,13 +1717,12 @@ static int clone_verify_area(struct file *file, loff_t 
pos, u64 len, bool write)
  * Returns: 0 for "nothing to clone", 1 for "something to clone", or
  * the usual negative error code.
  */
-int vfs_clone_file_prep_inodes(struct inode *inode_in, loff_t pos_in,
-  struct inode *inode_out, loff_t pos_out,
-  u64 *len, bool is_dedupe)
+int vfs_clone_file_prep(struct file *file_in, loff_t pos_in,
+   struct file *file_out, loff_t pos_out,
+   u64 *len, bool is_dedupe)
 {
-   loff_t bs = inode_out->i_sb->s_blocksize;
-   loff_t blen;
-   loff_t isize;
+   struct inode *inode_in = file_inode(file_in);
+   struct inode *inode_out = file_inode(file_out);
bool same_inode = (inode_in == inode_out);
int ret;
 
@@ -1740,10 +1739,10 @@ int vfs_clone_file_prep_inodes(struct inode *inode_in, 
loff_t pos_in,
if (!S_ISREG(inode_in->i_mode) || !S_ISREG(inode_out->i_mode))
return -EINVAL;
 
-   isize = i_size_read(inode_in);
-
/* Zero length dedupe exits immediately; reflink goes to EOF. */
if (*len == 0) {
+   loff_t isize = i_size_read(inode_in);
+
if (is_dedupe || pos_in == isize)
return 0;
if (pos_in > isize)
@@ -1751,36 +1750,11 @@ int vfs_clone_file_prep_inodes(struct inode *inode_in, 
loff_t pos_in,
*len = isize - pos_in;
}
 
-   /* Ensure offsets don't wrap and the input is inside i_size */
-   if (pos_in + *len < pos_in || pos_out + *len < pos_out ||
-   pos_in + *len > isize)
-   return -EINVAL;
-
-   /* Don't allow dedupe past EOF in the dest file */
-   if (is_dedupe) {
-   loff_t  disize;
-
-   disize = i_size_read(inode_out);
-   if (pos_out >= disize || pos_out + *len > disize)
-   return -EINVAL;
-   }
-
-   /* If we're linking to EOF, continue to the block boundary. */
-   if (pos_in + *len == isize)
-   blen = ALIGN(isize, bs) - pos_in;
-   else
-   blen = *len;
-
-   /* Only reflink if we're aligned to block boundaries */
-   if (!IS_ALIGNED(pos_in, bs) || !IS_ALIGNED(pos_in + blen, bs) ||
-   !IS_ALIGNED(pos_out, bs) || !IS_ALIGNED(pos_out + blen, bs))
-   return -EINVAL;
-
-   /* Don't allow overlapped reflink within the same file */
-   if (same_inode) {
-   if (pos_out + blen > pos_in && pos_out < pos_in + blen)
-   return -EINVAL;
-   }
+   /* Check that we don't violate system file offset limits. */
+   ret = generic_remap_checks(file_in, pos_in, file_out, pos_out, len,
+   is_dedupe);
+   if (ret)
+   return ret;
 
/* Wait for the completion of any pending IOs on both files */
inode_dio_wait(inode_in);
@@ -1813,7 +1787,7 @@ int vfs_clone_file_prep_inodes(struct inode *inode_in, 
loff_t pos_in,
 
return 1;
 }
-EXPORT_SYMBOL(vfs_clone_file_prep_inodes);
+EXPORT_SYMBOL(vfs_clone_file_prep);
 
 int do_clone_file_range(struct file *file_in, loff_t pos_in,
struct file *file_out, loff_t pos_out, u64 len)
@@ -1851,9 +1825,6 @@ int do_clone_file_range(struct file *file_in, loff_t 
pos_in,
if (ret)
return ret;
 
-   if (pos_in + len > i_size_read(inode_in))
-