Re: [PATCH] x86/uaccess: small optimization in unsafe_copy_to_user()

2021-04-17 Thread Al Viro
On Sat, Apr 17, 2021 at 03:11:49PM -0700, Linus Torvalds wrote:
> On Sat, Apr 17, 2021 at 1:35 PM Al Viro  wrote:
> >
> > No, wait - we have non-NULL buf->prev_reclen, so we'll hit
> > with buf->error completely ignored.  Nevermind.
> 
> Yeah, I'm pretty sure I even tested that -EINTR case at one point.
> 
> Of course, it could easily have gotten broken again, so that's not a
> strong argument.
> 
> That said, the "buf->err" handling has always been very confusing, and
> it would be lovely to get rid of that confusion.
> 
> I don't remember why we did it that way, but I think it's because
> low-level filesystems ended up changing the error that the filldir()
> function returned or something like that.

Propagating errors from e.g. filldir() out through ->iterate() instances 
turns out to be painful.  If anything, considering that there's a lot
more ->iterate/->iterate_shared instances that ->actor ones, it would
make sense to change the calling conventions for the latter.  IOW,
stop pretending that they return an error value and just have them
return a bool: "should the caller keep going".

Here's what I've got sitting in a local branch; not sure if it would
be better to invert the rules, though - I went for "should we keep
going", but we could do "should we stop" instead.

Change calling conventions for filldir_t

filldir_t instances (directory iterators callbacks) used to return 0 for
"OK, keep going" or -E... for "stop".  Note that it's *NOT* how the
error values are reported - the rules for those are callback-dependent
and ->iterate{,_shared}() instances only care about zero vs. non-zero
(look at emit_dir() and friends).

So let's just return bool - it's less confusing that way.

Signed-off-by: Al Viro 
---
diff --git a/Documentation/filesystems/porting.rst 
b/Documentation/filesystems/porting.rst
index 0302035781be..268c2ac616d1 100644
--- a/Documentation/filesystems/porting.rst
+++ b/Documentation/filesystems/porting.rst
@@ -890,3 +890,14 @@ been called or returned with non -EIOCBQUEUED code.
 
 mnt_want_write_file() can now only be paired with mnt_drop_write_file(),
 whereas previously it could be paired with mnt_drop_write() as well.
+
+---
+
+*mandatory*
+
+filldir_t (readdir callbacks) calling conventions have changed.  Instead of
+returning 0 or -E... it returns bool now.  true means "keep going" (as 0 used
+to to) and false - "no more" (as -E... in old calling conventions).  Rationale:
+callers never looked at specific -E... values anyway.  ->iterate() and
+->iterate_shared() instance require no changes at all, all filldir_t ones in
+the tree converted.
diff --git a/arch/alpha/kernel/osf_sys.c b/arch/alpha/kernel/osf_sys.c
index d5367a1c6300..6864794b663a 100644
--- a/arch/alpha/kernel/osf_sys.c
+++ b/arch/alpha/kernel/osf_sys.c
@@ -107,7 +107,7 @@ struct osf_dirent_callback {
int error;
 };
 
-static int
+static bool
 osf_filldir(struct dir_context *ctx, const char *name, int namlen,
loff_t offset, u64 ino, unsigned int d_type)
 {
@@ -119,11 +119,11 @@ osf_filldir(struct dir_context *ctx, const char *name, 
int namlen,
 
buf->error = -EINVAL;   /* only used if we fail */
if (reclen > buf->count)
-   return -EINVAL;
+   return false;
d_ino = ino;
if (sizeof(d_ino) < sizeof(ino) && d_ino != ino) {
buf->error = -EOVERFLOW;
-   return -EOVERFLOW;
+   return false;
}
if (buf->basep) {
if (put_user(offset, buf->basep))
@@ -140,10 +140,10 @@ osf_filldir(struct dir_context *ctx, const char *name, 
int namlen,
dirent = (void __user *)dirent + reclen;
buf->dirent = dirent;
buf->count -= reclen;
-   return 0;
+   return true;
 Efault:
buf->error = -EFAULT;
-   return -EFAULT;
+   return false;
 }
 
 SYSCALL_DEFINE4(osf_getdirentries, unsigned int, fd,
diff --git a/fs/afs/dir.c b/fs/afs/dir.c
index 714fcca9af99..aa3bdf389d47 100644
--- a/fs/afs/dir.c
+++ b/fs/afs/dir.c
@@ -24,9 +24,9 @@ static int afs_readdir(struct file *file, struct dir_context 
*ctx);
 static int afs_d_revalidate(struct dentry *dentry, unsigned int flags);
 static int afs_d_delete(const struct dentry *dentry);
 static void afs_d_iput(struct dentry *dentry, struct inode *inode);
-static int afs_lookup_one_filldir(struct dir_context *ctx, const char *name, 
int nlen,
+static bool afs_lookup_one_filldir(struct dir_context *ctx, const char *name, 
int nlen,
  loff_t fpos, u64 ino, unsigned dtype);
-static int afs_lookup_filldir(struct dir_context *ctx, const char *name, int 
nlen,
+static bool afs_lookup_filldir(struct dir_context *ctx, const char *name, int 
nlen,
  loff_t fpos, u64 ino, unsigned dtype);
 static int afs_create(struct user_namespace *mnt_userns, struct inode *dir,
  struct dentry *dentry, umode_t mode, bool excl);
@@ -527,7 +527,7 @@ static int 

Re: [PATCH] x86/uaccess: small optimization in unsafe_copy_to_user()

2021-04-17 Thread Linus Torvalds
On Sat, Apr 17, 2021 at 1:35 PM Al Viro  wrote:
>
> No, wait - we have non-NULL buf->prev_reclen, so we'll hit
> with buf->error completely ignored.  Nevermind.

Yeah, I'm pretty sure I even tested that -EINTR case at one point.

Of course, it could easily have gotten broken again, so that's not a
strong argument.

That said, the "buf->err" handling has always been very confusing, and
it would be lovely to get rid of that confusion.

I don't remember why we did it that way, but I think it's because
low-level filesystems ended up changing the error that the filldir()
function returned or something like that.

 Linus


Re: [PATCH] x86/uaccess: small optimization in unsafe_copy_to_user()

2021-04-17 Thread Al Viro
On Sat, Apr 17, 2021 at 08:30:42PM +, Al Viro wrote:

> and that thing is still there.  However, it does *NOT* do what it might
> appear to be doing; it ends up with getdents() returning -EINVAL instead.
> What we need is
>   buf->error = -EINTR;
> in addition to that return (in all 3 such places).  Do you have any problems 
> with
> the following delta?

No, wait - we have non-NULL buf->prev_reclen, so we'll hit
if (buf.prev_reclen) {
struct compat_linux_dirent __user * lastdirent;
lastdirent = (void __user *)buf.current_dir - buf.prev_reclen;

if (put_user(buf.ctx.pos, >d_off)) 
error = -EFAULT;
else
error = count - buf.count;
}
with buf->error completely ignored.  Nevermind.


Re: [PATCH] x86/uaccess: small optimization in unsafe_copy_to_user()

2021-04-17 Thread Al Viro
[tytso Cc'd]

On Sat, Apr 17, 2021 at 06:09:44PM +, Al Viro wrote:

> > Al - fairly trivial patch applied, comments?
> 
> Should be fine...  FWIW, I've a patch in the same area, making those suckers
> return bool.  Seeing that they are only ever called via dir_emit(), 
> dir_emit_dot()
> and dir_emit_dotdot() and all of those return ->actor(...) == 0...
> 
> Anyway, that'd be trivial to rebase on top of yours.

Actually... looking through that patch now I've noticed something fishy:
in 1f60fbe72749 ("ext4: allow readdir()'s of large empty directories to
be interrupted" we had
@@ -169,6 +169,8 @@ static int filldir(struct dir_context *ctx, const char 
*name, int namlen,
}
dirent = buf->previous;
if (dirent) {
+   if (signal_pending(current))
+   return -EINTR;

and that thing is still there.  However, it does *NOT* do what it might
appear to be doing; it ends up with getdents() returning -EINVAL instead.
What we need is
buf->error = -EINTR;
in addition to that return (in all 3 such places).  Do you have any problems 
with
the following delta?

diff --git a/fs/readdir.c b/fs/readdir.c
index 19434b3c982c..c742db935847 100644
--- a/fs/readdir.c
+++ b/fs/readdir.c
@@ -239,8 +239,10 @@ static int filldir(struct dir_context *ctx, const char 
*name, int namlen,
return -EOVERFLOW;
}
prev_reclen = buf->prev_reclen;
-   if (prev_reclen && signal_pending(current))
+   if (prev_reclen && signal_pending(current)) {
+   buf->error = -EINTR;
return -EINTR;
+   }
dirent = buf->current_dir;
prev = (void __user *) dirent - prev_reclen;
if (!user_write_access_begin(prev, reclen + prev_reclen))
@@ -321,8 +323,10 @@ static int filldir64(struct dir_context *ctx, const char 
*name, int namlen,
if (reclen > buf->count)
return -EINVAL;
prev_reclen = buf->prev_reclen;
-   if (prev_reclen && signal_pending(current))
+   if (prev_reclen && signal_pending(current)) {
+   buf->error = -EINTR;
return -EINTR;
+   }
dirent = buf->current_dir;
prev = (void __user *)dirent - prev_reclen;
if (!user_write_access_begin(prev, reclen + prev_reclen))
@@ -488,8 +492,10 @@ static int compat_filldir(struct dir_context *ctx, const 
char *name, int namlen,
return -EOVERFLOW;
}
prev_reclen = buf->prev_reclen;
-   if (prev_reclen && signal_pending(current))
+   if (prev_reclen && signal_pending(current)) {
+   buf->error = -EINTR;
return -EINTR;
+   }
dirent = buf->current_dir;
prev = (void __user *) dirent - prev_reclen;
if (!user_write_access_begin(prev, reclen + prev_reclen))


Re: [PATCH] x86/uaccess: small optimization in unsafe_copy_to_user()

2021-04-17 Thread Linus Torvalds
On Sat, Apr 17, 2021 at 12:44 PM Eric Dumazet  wrote:
>
> I thought put_cmsg() callers were from the kernel, with no possibility
> for user to abuse this interface trying to push GB of data.

My point is that "I thought" is not good enough for the unsafe interfaces.

It needs to be "I can see that the arguments are properly verified".

That is literally why they are called "unsafe". You need to make the
uses obviously safe. Because the functions themselves don't do that.

Linus


Re: [PATCH] x86/uaccess: small optimization in unsafe_copy_to_user()

2021-04-17 Thread Eric Dumazet
On Sat, Apr 17, 2021 at 6:03 PM Linus Torvalds
 wrote:
>
> On Fri, Apr 16, 2021 at 12:24 PM Eric Dumazet  wrote:
> >
> > From: Eric Dumazet 
> >
> > We have to loop only to copy u64 values.
> > After this first loop, we copy at most one u32, one u16 and one byte.
>
> As Al mentioned, at least in trivial cases the compiler understands
> that the subsequent loops can only be executed once, because earlier
> loops made sure that 'len' is always smaller than 2*size.
>
> But apparently the problem is the slightly more complex cases where
> the compiler just messes up and loses sight of that. Oh well.
>
> So the patch looks fine to me.
>
> HOWEVER.
>
> Looking at the put_cmsg() case in net-next, I'm very *VERY* unhappy
> about how you use those "unsafe" user access functions.
>
> Why? Because the point of the "unsafe" is to be a big red flag and
> make sure you are very VERY careful with it.
>
> And that code isn't.
>
> In particular, what if the "int len" argument is negative? Maybe it
> cannot happen, but when it comes to things like those unsafe user
> accessors, I really really want to see that all the arguments are
> *checked*.
>
> And you don't.
>
> You do
>
> if (!user_write_access_begin(cm, cmlen))
>
> ahead of time, and that will do basic range checking, but "cmlen" is
>
> sizeof(struct cmsghdr) + (len))
>
> so it's entirely possible that "cmlen" has a valid value, but "len"
> (and thus "cmlen - sizeof(*cm)", which is the length argument to the
> unsafe user copy) might be negative and that is never checked.
>
> End result: I want people to be a LOT more careful when they use those
> unsafe user space accessors. You need to make it REALLY REALLY obvious
> that everything you do is safe. And it's not at all obvious in the
> context of put_cmsg() - the safety currently relies on every single
> caller getting it right.

I thought put_cmsg() callers were from the kernel, with no possibility
for user to abuse this interface trying to push GB of data.

Which would be terrible even if we  ' fix'  possible overflows.

Maybe I was wrong.


>
> So either fix "len" to be some restricted type (ie "unsigned short"),
> or make really really sure that "len" is valid (ie never negative, and
> the cmlen addition didn't overflow.
>
> Really. The "unsafe" user accesses are named that way very explicitly,
> and for a very very good reason: the safety needs to be guaranteed and
> obvious within the context of those accesses. Not within some "oh,
> nobody will ever call this with a negative argument" garbage bullshit.
>
>   Linus


Re: [PATCH] x86/uaccess: small optimization in unsafe_copy_to_user()

2021-04-17 Thread Al Viro
On Sat, Apr 17, 2021 at 09:27:04AM -0700, Linus Torvalds wrote:
> On Sat, Apr 17, 2021 at 9:08 AM Linus Torvalds
>  wrote:
> >
> > Side note: I'm, looking at the readdir cases that I wrote, and I have
> > to just say that is broken too. So "stones and glass houses" etc, and
> > I'll have to fix that too.
> 
> In particular, the very very old OLD_READDIR interface that only fills
> in one dirent at a time didn't call verify_dirent_name(). Same for the
> compat version.
> 
> This requires a corrupt filesystem to be an issue (and even then,
> most/all would have the length of a directory entry in an 'unsigned
> char', so even corrupt filesystems would generally never have a
> negative name length).
> 
> So I don't think it's an issue in _practice_, but at the same time it
> is very much an example of the same issue that put_cmsg() has in
> net-next: unsafe user copies should be fully guarded and not have some
> "but this would never happen because callers would never do anything
> bad".
> 
> Al - fairly trivial patch applied, comments?

Should be fine...  FWIW, I've a patch in the same area, making those suckers
return bool.  Seeing that they are only ever called via dir_emit(), 
dir_emit_dot()
and dir_emit_dotdot() and all of those return ->actor(...) == 0...

Anyway, that'd be trivial to rebase on top of yours.


Re: [PATCH] x86/uaccess: small optimization in unsafe_copy_to_user()

2021-04-17 Thread Linus Torvalds
On Sat, Apr 17, 2021 at 9:08 AM Linus Torvalds
 wrote:
>
> Side note: I'm, looking at the readdir cases that I wrote, and I have
> to just say that is broken too. So "stones and glass houses" etc, and
> I'll have to fix that too.

In particular, the very very old OLD_READDIR interface that only fills
in one dirent at a time didn't call verify_dirent_name(). Same for the
compat version.

This requires a corrupt filesystem to be an issue (and even then,
most/all would have the length of a directory entry in an 'unsigned
char', so even corrupt filesystems would generally never have a
negative name length).

So I don't think it's an issue in _practice_, but at the same time it
is very much an example of the same issue that put_cmsg() has in
net-next: unsafe user copies should be fully guarded and not have some
"but this would never happen because callers would never do anything
bad".

Al - fairly trivial patch applied, comments?

 Linus
 fs/readdir.c | 6 ++
 1 file changed, 6 insertions(+)

diff --git a/fs/readdir.c b/fs/readdir.c
index 19434b3c982c..09e8ed7d4161 100644
--- a/fs/readdir.c
+++ b/fs/readdir.c
@@ -150,6 +150,9 @@ static int fillonedir(struct dir_context *ctx, const char *name, int namlen,
 
 	if (buf->result)
 		return -EINVAL;
+	buf->result = verify_dirent_name(name, namlen);
+	if (buf->result < 0)
+		return buf->result;
 	d_ino = ino;
 	if (sizeof(d_ino) < sizeof(ino) && d_ino != ino) {
 		buf->result = -EOVERFLOW;
@@ -405,6 +408,9 @@ static int compat_fillonedir(struct dir_context *ctx, const char *name,
 
 	if (buf->result)
 		return -EINVAL;
+	buf->result = verify_dirent_name(name, namlen);
+	if (buf->result < 0)
+		return buf->result;
 	d_ino = ino;
 	if (sizeof(d_ino) < sizeof(ino) && d_ino != ino) {
 		buf->result = -EOVERFLOW;


Re: [PATCH] x86/uaccess: small optimization in unsafe_copy_to_user()

2021-04-17 Thread Linus Torvalds
On Sat, Apr 17, 2021 at 9:03 AM Linus Torvalds
 wrote:
>
> Really. The "unsafe" user accesses are named that way very explicitly,
> and for a very very good reason: the safety needs to be guaranteed and
> obvious within the context of those accesses. Not within some "oh,
> nobody will ever call this with a negative argument" garbage bullshit.

Side note: I'm, looking at the readdir cases that I wrote, and I have
to just say that is broken too. So "stones and glass houses" etc, and
I'll have to fix that too.

 Linus


Re: [PATCH] x86/uaccess: small optimization in unsafe_copy_to_user()

2021-04-17 Thread Linus Torvalds
On Fri, Apr 16, 2021 at 12:24 PM Eric Dumazet  wrote:
>
> From: Eric Dumazet 
>
> We have to loop only to copy u64 values.
> After this first loop, we copy at most one u32, one u16 and one byte.

As Al mentioned, at least in trivial cases the compiler understands
that the subsequent loops can only be executed once, because earlier
loops made sure that 'len' is always smaller than 2*size.

But apparently the problem is the slightly more complex cases where
the compiler just messes up and loses sight of that. Oh well.

So the patch looks fine to me.

HOWEVER.

Looking at the put_cmsg() case in net-next, I'm very *VERY* unhappy
about how you use those "unsafe" user access functions.

Why? Because the point of the "unsafe" is to be a big red flag and
make sure you are very VERY careful with it.

And that code isn't.

In particular, what if the "int len" argument is negative? Maybe it
cannot happen, but when it comes to things like those unsafe user
accessors, I really really want to see that all the arguments are
*checked*.

And you don't.

You do

if (!user_write_access_begin(cm, cmlen))

ahead of time, and that will do basic range checking, but "cmlen" is

sizeof(struct cmsghdr) + (len))

so it's entirely possible that "cmlen" has a valid value, but "len"
(and thus "cmlen - sizeof(*cm)", which is the length argument to the
unsafe user copy) might be negative and that is never checked.

End result: I want people to be a LOT more careful when they use those
unsafe user space accessors. You need to make it REALLY REALLY obvious
that everything you do is safe. And it's not at all obvious in the
context of put_cmsg() - the safety currently relies on every single
caller getting it right.

So either fix "len" to be some restricted type (ie "unsigned short"),
or make really really sure that "len" is valid (ie never negative, and
the cmlen addition didn't overflow.

Really. The "unsafe" user accesses are named that way very explicitly,
and for a very very good reason: the safety needs to be guaranteed and
obvious within the context of those accesses. Not within some "oh,
nobody will ever call this with a negative argument" garbage bullshit.

  Linus


RE: [PATCH] x86/uaccess: small optimization in unsafe_copy_to_user()

2021-04-17 Thread David Laight
From: Al Viro  On Behalf Of Al Viro
> Sent: 16 April 2021 20:44
> On Fri, Apr 16, 2021 at 12:24:13PM -0700, Eric Dumazet wrote:
> > From: Eric Dumazet 
> >
> > We have to loop only to copy u64 values.
> > After this first loop, we copy at most one u32, one u16 and one byte.
> 
> Does it actually yield a better code?
> 
> FWIW, this
> void bar(unsigned);
> void foo(unsigned n)
> {
>   while (n >= 8) {
>   bar(n);
>   n -= 8;
>   }
>   while (n >= 4) {
>   bar(n);
>   n -= 4;
>   }
>   while (n >= 2) {
>   bar(n);
>   n -= 2;
>   }
>   while (n >= 1) {
>   bar(n);
>   n -= 1;
>   }
> }

This variant might be better:

void foo(unsigned n)
{
while (n >= 8) {
bar(8);
n -= 8;
}
if (likely(!n))
return;
if (n & 4)
bar(4);
if (n & 2)
bar(2);
if (n & 1)
bar(1);
}

I think Al's version might have optimised down to this,
but Eric's asm contains the n -= 4/2/1;

OTOH gcc can make a real pig's breakfast of code like this!

David

-
Registered Address Lakeside, Bramley Road, Mount Farm, Milton Keynes, MK1 1PT, 
UK
Registration No: 1397386 (Wales)



Re: [PATCH] x86/uaccess: small optimization in unsafe_copy_to_user()

2021-04-16 Thread Eric Dumazet
On Fri, Apr 16, 2021 at 10:11 PM Eric Dumazet  wrote:
>
> On Fri, Apr 16, 2021 at 9:44 PM Al Viro  wrote:
> >
> > On Fri, Apr 16, 2021 at 12:24:13PM -0700, Eric Dumazet wrote:
> > > From: Eric Dumazet 
> > >
> > > We have to loop only to copy u64 values.
> > > After this first loop, we copy at most one u32, one u16 and one byte.
> >
> > Does it actually yield a better code?
> >
>
> Yes, my patch gives a better code, on actual kernel use-case
>
> (net-next tree, look at put_cmsg())
>
> 5ca: 48 89 0f  mov%rcx,(%rdi)
>  5cd: 89 77 08  mov%esi,0x8(%rdi)
>  5d0: 89 57 0c  mov%edx,0xc(%rdi)
>  5d3: 48 83 c7 10  add$0x10,%rdi
>  5d7: 48 83 c1 f0  add$0xfff0,%rcx
>  5db: 48 83 f9 07  cmp$0x7,%rcx
>  5df: 76 40jbe621 
>  5e1: 66 66 66 66 66 66 2e data16 data16 data16 data16 data16 nopw
> %cs:0x0(%rax,%rax,1)
>  5e8: 0f 1f 84 00 00 00 00
>  5ef: 00
>  5f0: 49 8b 10  mov(%r8),%rdx
>  5f3: 48 89 17  mov%rdx,(%rdi)
>  5f6: 48 83 c7 08  add$0x8,%rdi
>  5fa: 49 83 c0 08  add$0x8,%r8
>  5fe: 48 83 c1 f8  add$0xfff8,%rcx
>  602: 48 83 f9 07  cmp$0x7,%rcx
>  606: 77 e8ja 5f0 
>  608: eb 17jmp621 
>  60a: 66 0f 1f 44 00 00nopw   0x0(%rax,%rax,1)
>  610: 41 8b 10  mov(%r8),%edx
>  613: 89 17mov%edx,(%rdi)
>  615: 48 83 c7 04  add$0x4,%rdi
>  619: 49 83 c0 04  add$0x4,%r8
>  61d: 48 83 c1 fc  add$0xfffc,%rcx
>  621: 48 83 f9 03  cmp$0x3,%rcx
>  625: 77 e9ja 610 
>  627: eb 1ajmp643 
>  629: 0f 1f 80 00 00 00 00 nopl   0x0(%rax)
>  630: 41 0f b7 10  movzwl (%r8),%edx
>  634: 66 89 17  mov%dx,(%rdi)
>  637: 48 83 c7 02  add$0x2,%rdi
>  63b: 49 83 c0 02  add$0x2,%r8
>  63f: 48 83 c1 fe  add$0xfffe,%rcx
>  643: 48 83 f9 01  cmp$0x1,%rcx
>  647: 77 e7ja 630 
>  649: eb 15jmp660 
>  64b: 0f 1f 44 00 00nopl   0x0(%rax,%rax,1)
>  650: 41 0f b6 08  movzbl (%r8),%ecx
>  654: 88 0fmov%cl,(%rdi)
>  656: 48 83 c7 01  add$0x1,%rdi
>  65a: 49 83 c0 01  add$0x1,%r8
>  65e: 31 c9xor%ecx,%ecx
>  660: 48 85 c9  test   %rcx,%rcx
>  663: 75 ebjne650 

After the change code is now what we would expect (no jmp around)
 5db: 48 83 f9 08  cmp$0x8,%rcx
 5df: 72 27jb 608 
 5e1: 66 66 66 66 66 66 2e data16 data16 data16 data16 data16 nopw
%cs:0x0(%rax,%rax,1)
 5e8: 0f 1f 84 00 00 00 00
 5ef: 00
 5f0: 49 8b 10  mov(%r8),%rdx
 5f3: 48 89 17  mov%rdx,(%rdi)
 5f6: 48 83 c7 08  add$0x8,%rdi
 5fa: 49 83 c0 08  add$0x8,%r8
 5fe: 48 83 c1 f8  add$0xfff8,%rcx
 602: 48 83 f9 08  cmp$0x8,%rcx
 606: 73 e8jae5f0 
 608: 48 83 f9 04  cmp$0x4,%rcx
 60c: 72 11jb 61f 
 60e: 41 8b 10  mov(%r8),%edx
 611: 89 17mov%edx,(%rdi)
 613: 48 83 c7 04  add$0x4,%rdi
 617: 49 83 c0 04  add$0x4,%r8
 61b: 48 83 c1 fc  add$0xfffc,%rcx
 61f: 48 83 f9 02  cmp$0x2,%rcx
 623: 72 13jb 638 
 625: 41 0f b7 10  movzwl (%r8),%edx
 629: 66 89 17  mov%dx,(%rdi)
 62c: 48 83 c7 02  add$0x2,%rdi
 630: 49 83 c0 02  add$0x2,%r8
 634: 48 83 c1 fe  add$0xfffe,%rcx
 638: 48 85 c9  test   %rcx,%rcx
 63b: 74 05je 642 
 63d: 41 8a 08  mov(%r8),%cl
 640: 88 0fmov%cl,(%rdi)

As I said, its minor, I am sure you can come up to something much better !

Thanks !

>

>
> > FWIW, this
> > void bar(unsigned);
> > void foo(unsigned n)
> > {
> > while (n >= 8) {
> > bar(n);
> > n -= 8;
> > }
> > while (n >= 4) {
> > bar(n);
> > n -= 4;
> > }
> > while (n >= 2) {
> > bar(n);
> > n -= 2;
> > }
> > while (n >= 1) {
> > bar(n);
> > n -= 1;
> > }
> > }
> >
> > will compile (with -O2) to
> > pushq   %rbp
> > pushq   %rbx
> > movl%edi, %ebx
> > subq$8, %rsp
> > cmpl$7, %edi
> > jbe .L2
> > movl%edi, %ebp
> > .L3:
> > movl%ebp, %edi
> > subl$8, %ebp
> > callbar@PLT
> > cmpl$7, %ebp
> > ja  .L3
> > andl$7, %ebx
> > .L2:
> > cmpl$3, %ebx
> >

Re: [PATCH] x86/uaccess: small optimization in unsafe_copy_to_user()

2021-04-16 Thread Eric Dumazet
On Fri, Apr 16, 2021 at 9:44 PM Al Viro  wrote:
>
> On Fri, Apr 16, 2021 at 12:24:13PM -0700, Eric Dumazet wrote:
> > From: Eric Dumazet 
> >
> > We have to loop only to copy u64 values.
> > After this first loop, we copy at most one u32, one u16 and one byte.
>
> Does it actually yield a better code?
>

Yes, my patch gives a better code, on actual kernel use-case

(net-next tree, look at put_cmsg())

5ca: 48 89 0f  mov%rcx,(%rdi)
 5cd: 89 77 08  mov%esi,0x8(%rdi)
 5d0: 89 57 0c  mov%edx,0xc(%rdi)
 5d3: 48 83 c7 10  add$0x10,%rdi
 5d7: 48 83 c1 f0  add$0xfff0,%rcx
 5db: 48 83 f9 07  cmp$0x7,%rcx
 5df: 76 40jbe621 
 5e1: 66 66 66 66 66 66 2e data16 data16 data16 data16 data16 nopw
%cs:0x0(%rax,%rax,1)
 5e8: 0f 1f 84 00 00 00 00
 5ef: 00
 5f0: 49 8b 10  mov(%r8),%rdx
 5f3: 48 89 17  mov%rdx,(%rdi)
 5f6: 48 83 c7 08  add$0x8,%rdi
 5fa: 49 83 c0 08  add$0x8,%r8
 5fe: 48 83 c1 f8  add$0xfff8,%rcx
 602: 48 83 f9 07  cmp$0x7,%rcx
 606: 77 e8ja 5f0 
 608: eb 17jmp621 
 60a: 66 0f 1f 44 00 00nopw   0x0(%rax,%rax,1)
 610: 41 8b 10  mov(%r8),%edx
 613: 89 17mov%edx,(%rdi)
 615: 48 83 c7 04  add$0x4,%rdi
 619: 49 83 c0 04  add$0x4,%r8
 61d: 48 83 c1 fc  add$0xfffc,%rcx
 621: 48 83 f9 03  cmp$0x3,%rcx
 625: 77 e9ja 610 
 627: eb 1ajmp643 
 629: 0f 1f 80 00 00 00 00 nopl   0x0(%rax)
 630: 41 0f b7 10  movzwl (%r8),%edx
 634: 66 89 17  mov%dx,(%rdi)
 637: 48 83 c7 02  add$0x2,%rdi
 63b: 49 83 c0 02  add$0x2,%r8
 63f: 48 83 c1 fe  add$0xfffe,%rcx
 643: 48 83 f9 01  cmp$0x1,%rcx
 647: 77 e7ja 630 
 649: eb 15jmp660 
 64b: 0f 1f 44 00 00nopl   0x0(%rax,%rax,1)
 650: 41 0f b6 08  movzbl (%r8),%ecx
 654: 88 0fmov%cl,(%rdi)
 656: 48 83 c7 01  add$0x1,%rdi
 65a: 49 83 c0 01  add$0x1,%r8
 65e: 31 c9xor%ecx,%ecx
 660: 48 85 c9  test   %rcx,%rcx
 663: 75 ebjne650 


> FWIW, this
> void bar(unsigned);
> void foo(unsigned n)
> {
> while (n >= 8) {
> bar(n);
> n -= 8;
> }
> while (n >= 4) {
> bar(n);
> n -= 4;
> }
> while (n >= 2) {
> bar(n);
> n -= 2;
> }
> while (n >= 1) {
> bar(n);
> n -= 1;
> }
> }
>
> will compile (with -O2) to
> pushq   %rbp
> pushq   %rbx
> movl%edi, %ebx
> subq$8, %rsp
> cmpl$7, %edi
> jbe .L2
> movl%edi, %ebp
> .L3:
> movl%ebp, %edi
> subl$8, %ebp
> callbar@PLT
> cmpl$7, %ebp
> ja  .L3
> andl$7, %ebx
> .L2:
> cmpl$3, %ebx
> jbe .L4
> movl%ebx, %edi
> andl$3, %ebx
> callbar@PLT
> .L4:
> cmpl$1, %ebx
> jbe .L5
> movl%ebx, %edi
> andl$1, %ebx
> callbar@PLT
> .L5:
> testl   %ebx, %ebx
> je  .L1
> addq$8, %rsp
> movl$1, %edi
> popq%rbx
> popq%rbp
> jmp bar@PLT
> .L1:
> addq$8, %rsp
> popq%rbx
> popq%rbp
> ret
>
> i.e. loop + if + if + if...


Re: [PATCH] x86/uaccess: small optimization in unsafe_copy_to_user()

2021-04-16 Thread Al Viro
On Fri, Apr 16, 2021 at 12:24:13PM -0700, Eric Dumazet wrote:
> From: Eric Dumazet 
> 
> We have to loop only to copy u64 values.
> After this first loop, we copy at most one u32, one u16 and one byte.

Does it actually yield a better code?

FWIW, this
void bar(unsigned);
void foo(unsigned n)
{
while (n >= 8) {
bar(n);
n -= 8;
}
while (n >= 4) {
bar(n);
n -= 4;
}
while (n >= 2) {
bar(n);
n -= 2;
}
while (n >= 1) {
bar(n);
n -= 1;
}
}

will compile (with -O2) to
pushq   %rbp
pushq   %rbx
movl%edi, %ebx
subq$8, %rsp
cmpl$7, %edi
jbe .L2
movl%edi, %ebp
.L3:
movl%ebp, %edi
subl$8, %ebp
callbar@PLT
cmpl$7, %ebp
ja  .L3
andl$7, %ebx
.L2:
cmpl$3, %ebx
jbe .L4
movl%ebx, %edi
andl$3, %ebx
callbar@PLT
.L4:
cmpl$1, %ebx
jbe .L5
movl%ebx, %edi
andl$1, %ebx
callbar@PLT
.L5:
testl   %ebx, %ebx
je  .L1
addq$8, %rsp
movl$1, %edi
popq%rbx
popq%rbp
jmp bar@PLT
.L1:
addq$8, %rsp
popq%rbx
popq%rbp
ret

i.e. loop + if + if + if...