[PATCH 3/9] iov_iter: refactor rw_copy_check_uvector and import_iovec

2020-09-24 Thread Christoph Hellwig
Split rw_copy_check_uvector into two new helpers with more sensible
calling conventions:

 - iovec_from_user copies a iovec from userspace either into the provided
   stack buffer if it fits, or allocates a new buffer for it.  Returns
   the actually used iovec.  It also verifies that iov_len does fit a
   signed type, and handles compat iovecs if the compat flag is set.
 - __import_iovec consolidates the native and compat versions of
   import_iovec. It calls iovec_from_user, then validates each iovec
   actually points to user addresses, and ensures the total length
   doesn't overflow.

This has two major implications:

 - the access_process_vm case loses the total lenght checking, which
   wasn't required anyway, given that each call receives two iovecs
   for the local and remote side of the operation, and it verifies
   the total length on the local side already.
 - instead of a single loop there now are two loops over the iovecs.
   Given that the iovecs are cache hot this doesn't make a major
   difference

Signed-off-by: Christoph Hellwig 
---
 include/linux/compat.h |   6 -
 include/linux/fs.h |  13 --
 include/linux/uio.h|  12 +-
 lib/iov_iter.c | 300 -
 mm/process_vm_access.c |  34 +++--
 5 files changed, 138 insertions(+), 227 deletions(-)

diff --git a/include/linux/compat.h b/include/linux/compat.h
index 654c1ec36671a4..b930de791ff16b 100644
--- a/include/linux/compat.h
+++ b/include/linux/compat.h
@@ -451,12 +451,6 @@ extern long compat_arch_ptrace(struct task_struct *child, 
compat_long_t request,
 
 struct epoll_event;/* fortunately, this one is fixed-layout */
 
-extern ssize_t compat_rw_copy_check_uvector(int type,
-   const struct compat_iovec __user *uvector,
-   unsigned long nr_segs,
-   unsigned long fast_segs, struct iovec *fast_pointer,
-   struct iovec **ret_pointer);
-
 extern void __user *compat_alloc_user_space(unsigned long len);
 
 int compat_restore_altstack(const compat_stack_t __user *uss);
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 7519ae003a082c..e69b45b6cc7b5f 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -178,14 +178,6 @@ typedef int (dio_iodone_t)(struct kiocb *iocb, loff_t 
offset,
 /* File supports async buffered reads */
 #define FMODE_BUF_RASYNC   ((__force fmode_t)0x4000)
 
-/*
- * Flag for rw_copy_check_uvector and compat_rw_copy_check_uvector
- * that indicates that they should check the contents of the iovec are
- * valid, but not check the memory that the iovec elements
- * points too.
- */
-#define CHECK_IOVEC_ONLY -1
-
 /*
  * Attribute flags.  These should be or-ed together to figure out what
  * has been changed!
@@ -1887,11 +1879,6 @@ static inline int call_mmap(struct file *file, struct 
vm_area_struct *vma)
return file->f_op->mmap(file, vma);
 }
 
-ssize_t rw_copy_check_uvector(int type, const struct iovec __user * uvector,
- unsigned long nr_segs, unsigned long fast_segs,
- struct iovec *fast_pointer,
- struct iovec **ret_pointer);
-
 extern ssize_t vfs_read(struct file *, char __user *, size_t, loff_t *);
 extern ssize_t vfs_write(struct file *, const char __user *, size_t, loff_t *);
 extern ssize_t vfs_readv(struct file *, const struct iovec __user *,
diff --git a/include/linux/uio.h b/include/linux/uio.h
index 3835a8a8e9eae0..92c11fe41c6228 100644
--- a/include/linux/uio.h
+++ b/include/linux/uio.h
@@ -266,9 +266,15 @@ bool csum_and_copy_from_iter_full(void *addr, size_t 
bytes, __wsum *csum, struct
 size_t hash_and_copy_to_iter(const void *addr, size_t bytes, void *hashp,
struct iov_iter *i);
 
-ssize_t import_iovec(int type, const struct iovec __user * uvector,
-unsigned nr_segs, unsigned fast_segs,
-struct iovec **iov, struct iov_iter *i);
+struct iovec *iovec_from_user(const struct iovec __user *uvector,
+   unsigned long nr_segs, unsigned long fast_segs,
+   struct iovec *fast_iov, bool compat);
+ssize_t import_iovec(int type, const struct iovec __user *uvec,
+unsigned nr_segs, unsigned fast_segs, struct iovec **iovp,
+struct iov_iter *i);
+ssize_t __import_iovec(int type, const struct iovec __user *uvec,
+unsigned nr_segs, unsigned fast_segs, struct iovec **iovp,
+struct iov_iter *i, bool compat);
 
 #ifdef CONFIG_COMPAT
 struct compat_iovec;
diff --git a/lib/iov_iter.c b/lib/iov_iter.c
index ccea9db3f72be8..d5d8afe31fca16 100644
--- a/lib/iov_iter.c
+++ b/lib/iov_iter.c
@@ -7,6 +7,7 @@
 #include 
 #include 
 #include 
+#include 
 #include 
 #include 
 #include 
@@ -1650,107 +1651,133 @@ const void *dup_iter(struct iov_iter *new, struct 
iov_iter *old, gfp_t flags)
 }
 EXPORT_SYMBOL(dup_iter);
 
-/**
- * rw_copy_check_uvector() - Copy an array of  iovec from userspace
- * 

Re: [PATCH 3/9] iov_iter: refactor rw_copy_check_uvector and import_iovec

2020-09-23 Thread Al Viro
On Wed, Sep 23, 2020 at 02:38:24PM +, David Laight wrote:
> From: Al Viro
> > Sent: 23 September 2020 15:17
> > 
> > On Wed, Sep 23, 2020 at 08:05:41AM +0200, Christoph Hellwig wrote:
> > 
> > > +struct iovec *iovec_from_user(const struct iovec __user *uvec,
> > > + unsigned long nr_segs, unsigned long fast_segs,
> > 
> > Hmm...  For fast_segs unsigned long had always been ridiculous
> > (4G struct iovec on caller stack frame?), but that got me wondering about
> > nr_segs and I wish I'd thought of that when introducing import_iovec().
> > 
> > The thing is, import_iovec() takes unsigned int there.  Which is fine
> > (hell, the maximal value that can be accepted in 1024), except that
> > we do pass unsigned long syscall argument to it in some places.
> 
> It will make diddly-squit difference.
> The parameters end up in registers on most calling conventions.
> Plausibly you get an extra 'REX' byte on x86 for the 64bit value.
> What you want to avoid is explicit sign/zero extension and value
> masking after arithmetic.

Don't tell me what I want; your telepathic abilities are consistently sucky.

I am *NOT* talking about microoptimization here.  I have described
the behaviour change of syscall caused by commit 5 years ago.  Which is
generally considered a problem.  Then I asked whether that behaviour
change would fall under the "if nobody noticed, it's not a userland ABI
breakage" exception.

Could you show me the point where I have expressed concerns about
the quality of amd64 code generated for that thing, before or after
the change in question?


Re: [PATCH 3/9] iov_iter: refactor rw_copy_check_uvector and import_iovec

2020-09-23 Thread Al Viro
On Wed, Sep 23, 2020 at 03:16:54PM +0100, Al Viro wrote:
> On Wed, Sep 23, 2020 at 08:05:41AM +0200, Christoph Hellwig wrote:
> 
> > +struct iovec *iovec_from_user(const struct iovec __user *uvec,
> > +   unsigned long nr_segs, unsigned long fast_segs,
> 
> Hmm...  For fast_segs unsigned long had always been ridiculous
> (4G struct iovec on caller stack frame?), but that got me wondering about
> nr_segs and I wish I'd thought of that when introducing import_iovec().
> 
> The thing is, import_iovec() takes unsigned int there.  Which is fine
> (hell, the maximal value that can be accepted in 1024), except that
> we do pass unsigned long syscall argument to it in some places.
> 
> E.g. vfs_readv() quietly truncates vlen to 32 bits, and vlen can
> come unchanged through sys_readv() -> do_readv() -> vfs_readv().
> With unsigned long passed by syscall glue.
> 
> AFAICS, passing 4G+1 as the third argument to readv(2) on 64bit box
> will be quietly treated as 1 these days.  Which would be fine, except
> that before "switch {compat_,}do_readv_writev() to {compat_,}import_iovec()"
> it used to fail with -EINVAL.
> 
> Userland, BTW, describes readv(2) iovcnt as int; process_vm_readv(),
> OTOH, has these counts unsigned long from the userland POV...
> 
> I suppose we ought to switch import_iovec() to unsigned long for nr_segs ;-/
> Strictly speaking that had been a userland ABI change, even though nothing
> except regression tests checking for expected errors would've been likely
> to notice.  And it looks like no regression tests covered that one...
> 
> Linus, does that qualify for your "if no userland has noticed the change,
> it's not a breakage"?

Egads...  We have sys_readv() with unsigned long for file descriptor, since
1.3.31 when it had been introduced.  And originally it did comparison with
NR_OPEN right in sys_readv().  Then in 2.1.60 it had been switched to
fget(), which used to take unsigned long at that point.  And in 2.1.90pre1
it went unsigned int, so non-zero upper 32 bits in readv(2) first argument
ceased to cause EBADF...

Of course, libc had it as int fd all along.


RE: [PATCH 3/9] iov_iter: refactor rw_copy_check_uvector and import_iovec

2020-09-23 Thread David Laight
From: Al Viro
> Sent: 23 September 2020 15:17
> 
> On Wed, Sep 23, 2020 at 08:05:41AM +0200, Christoph Hellwig wrote:
> 
> > +struct iovec *iovec_from_user(const struct iovec __user *uvec,
> > +   unsigned long nr_segs, unsigned long fast_segs,
> 
> Hmm...  For fast_segs unsigned long had always been ridiculous
> (4G struct iovec on caller stack frame?), but that got me wondering about
> nr_segs and I wish I'd thought of that when introducing import_iovec().
> 
> The thing is, import_iovec() takes unsigned int there.  Which is fine
> (hell, the maximal value that can be accepted in 1024), except that
> we do pass unsigned long syscall argument to it in some places.

It will make diddly-squit difference.
The parameters end up in registers on most calling conventions.
Plausibly you get an extra 'REX' byte on x86 for the 64bit value.
What you want to avoid is explicit sign/zero extension and value
masking after arithmetic.

On x86-64 the 'horrid' type is actually 'signed int'.
It often needs sign extending to 64bits (eg when being
used as an array subscript).

David

-
Registered Address Lakeside, Bramley Road, Mount Farm, Milton Keynes, MK1 1PT, 
UK
Registration No: 1397386 (Wales)



Re: [PATCH 3/9] iov_iter: refactor rw_copy_check_uvector and import_iovec

2020-09-23 Thread Al Viro
On Wed, Sep 23, 2020 at 08:05:41AM +0200, Christoph Hellwig wrote:

> +struct iovec *iovec_from_user(const struct iovec __user *uvec,
> + unsigned long nr_segs, unsigned long fast_segs,

Hmm...  For fast_segs unsigned long had always been ridiculous
(4G struct iovec on caller stack frame?), but that got me wondering about
nr_segs and I wish I'd thought of that when introducing import_iovec().

The thing is, import_iovec() takes unsigned int there.  Which is fine
(hell, the maximal value that can be accepted in 1024), except that
we do pass unsigned long syscall argument to it in some places.

E.g. vfs_readv() quietly truncates vlen to 32 bits, and vlen can
come unchanged through sys_readv() -> do_readv() -> vfs_readv().
With unsigned long passed by syscall glue.

AFAICS, passing 4G+1 as the third argument to readv(2) on 64bit box
will be quietly treated as 1 these days.  Which would be fine, except
that before "switch {compat_,}do_readv_writev() to {compat_,}import_iovec()"
it used to fail with -EINVAL.

Userland, BTW, describes readv(2) iovcnt as int; process_vm_readv(),
OTOH, has these counts unsigned long from the userland POV...

I suppose we ought to switch import_iovec() to unsigned long for nr_segs ;-/
Strictly speaking that had been a userland ABI change, even though nothing
except regression tests checking for expected errors would've been likely
to notice.  And it looks like no regression tests covered that one...

Linus, does that qualify for your "if no userland has noticed the change,
it's not a breakage"?


[PATCH 3/9] iov_iter: refactor rw_copy_check_uvector and import_iovec

2020-09-23 Thread Christoph Hellwig
Split rw_copy_check_uvector into two new helpers with more sensible
calling conventions:

 - iovec_from_user copies a iovec from userspace either into the provided
   stack buffer if it fits, or allocates a new buffer for it.  Returns
   the actually used iovec.  It also verifies that iov_len does fit a
   signed type, and handles compat iovecs if the compat flag is set.
 - __import_iovec consolidates the native and compat versions of
   import_iovec. It calls iovec_from_user, then validates each iovec
   actually points to user addresses, and ensures the total length
   doesn't overflow.

This has two major implications:

 - the access_process_vm case loses the total lenght checking, which
   wasn't required anyway, given that each call receives two iovecs
   for the local and remote side of the operation, and it verifies
   the total length on the local side already.
 - instead of a single loop there now are two loops over the iovecs.
   Given that the iovecs are cache hot this doesn't make a major
   difference

Signed-off-by: Christoph Hellwig 
---
 include/linux/compat.h |   6 -
 include/linux/fs.h |  13 --
 include/linux/uio.h|  12 +-
 lib/iov_iter.c | 300 -
 mm/process_vm_access.c |  34 +++--
 5 files changed, 138 insertions(+), 227 deletions(-)

diff --git a/include/linux/compat.h b/include/linux/compat.h
index 654c1ec36671a4..b930de791ff16b 100644
--- a/include/linux/compat.h
+++ b/include/linux/compat.h
@@ -451,12 +451,6 @@ extern long compat_arch_ptrace(struct task_struct *child, 
compat_long_t request,
 
 struct epoll_event;/* fortunately, this one is fixed-layout */
 
-extern ssize_t compat_rw_copy_check_uvector(int type,
-   const struct compat_iovec __user *uvector,
-   unsigned long nr_segs,
-   unsigned long fast_segs, struct iovec *fast_pointer,
-   struct iovec **ret_pointer);
-
 extern void __user *compat_alloc_user_space(unsigned long len);
 
 int compat_restore_altstack(const compat_stack_t __user *uss);
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 7519ae003a082c..e69b45b6cc7b5f 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -178,14 +178,6 @@ typedef int (dio_iodone_t)(struct kiocb *iocb, loff_t 
offset,
 /* File supports async buffered reads */
 #define FMODE_BUF_RASYNC   ((__force fmode_t)0x4000)
 
-/*
- * Flag for rw_copy_check_uvector and compat_rw_copy_check_uvector
- * that indicates that they should check the contents of the iovec are
- * valid, but not check the memory that the iovec elements
- * points too.
- */
-#define CHECK_IOVEC_ONLY -1
-
 /*
  * Attribute flags.  These should be or-ed together to figure out what
  * has been changed!
@@ -1887,11 +1879,6 @@ static inline int call_mmap(struct file *file, struct 
vm_area_struct *vma)
return file->f_op->mmap(file, vma);
 }
 
-ssize_t rw_copy_check_uvector(int type, const struct iovec __user * uvector,
- unsigned long nr_segs, unsigned long fast_segs,
- struct iovec *fast_pointer,
- struct iovec **ret_pointer);
-
 extern ssize_t vfs_read(struct file *, char __user *, size_t, loff_t *);
 extern ssize_t vfs_write(struct file *, const char __user *, size_t, loff_t *);
 extern ssize_t vfs_readv(struct file *, const struct iovec __user *,
diff --git a/include/linux/uio.h b/include/linux/uio.h
index 3835a8a8e9eae0..92c11fe41c6228 100644
--- a/include/linux/uio.h
+++ b/include/linux/uio.h
@@ -266,9 +266,15 @@ bool csum_and_copy_from_iter_full(void *addr, size_t 
bytes, __wsum *csum, struct
 size_t hash_and_copy_to_iter(const void *addr, size_t bytes, void *hashp,
struct iov_iter *i);
 
-ssize_t import_iovec(int type, const struct iovec __user * uvector,
-unsigned nr_segs, unsigned fast_segs,
-struct iovec **iov, struct iov_iter *i);
+struct iovec *iovec_from_user(const struct iovec __user *uvector,
+   unsigned long nr_segs, unsigned long fast_segs,
+   struct iovec *fast_iov, bool compat);
+ssize_t import_iovec(int type, const struct iovec __user *uvec,
+unsigned nr_segs, unsigned fast_segs, struct iovec **iovp,
+struct iov_iter *i);
+ssize_t __import_iovec(int type, const struct iovec __user *uvec,
+unsigned nr_segs, unsigned fast_segs, struct iovec **iovp,
+struct iov_iter *i, bool compat);
 
 #ifdef CONFIG_COMPAT
 struct compat_iovec;
diff --git a/lib/iov_iter.c b/lib/iov_iter.c
index ccea9db3f72be8..d5d8afe31fca16 100644
--- a/lib/iov_iter.c
+++ b/lib/iov_iter.c
@@ -7,6 +7,7 @@
 #include 
 #include 
 #include 
+#include 
 #include 
 #include 
 #include 
@@ -1650,107 +1651,133 @@ const void *dup_iter(struct iov_iter *new, struct 
iov_iter *old, gfp_t flags)
 }
 EXPORT_SYMBOL(dup_iter);
 
-/**
- * rw_copy_check_uvector() - Copy an array of  iovec from userspace
- *