[PATCH v2 3/3] powerpc/irq: don't use current_stack_pointer() in do_IRQ()

2020-01-23 Thread Christophe Leroy
Until commit 7306e83ccf5c ("powerpc: Don't use CURRENT_THREAD_INFO to
find the stack"), the current stack base address was obtained by
calling current_thread_info(). That inline function was simply masking
out the value of r1.

In that commit, it was changed to using current_stack_pointer(), which
is an heavier function as it is an outline assembly function which
cannot be inlined and which reads the content of the stack at 0(r1)

Revert to just using get_sp() for geting r1 and masking out its value
to obtain the base address of the stack pointer as before.

Signed-off-by: Christophe Leroy 
Fixes: 7306e83ccf5c ("powerpc: Don't use CURRENT_THREAD_INFO to find the stack")
---
v2: New in this series. Using get_sp()
---
 arch/powerpc/kernel/irq.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/powerpc/kernel/irq.c b/arch/powerpc/kernel/irq.c
index 9333e115418f..193c2b64ce37 100644
--- a/arch/powerpc/kernel/irq.c
+++ b/arch/powerpc/kernel/irq.c
@@ -647,7 +647,7 @@ void do_IRQ(struct pt_regs *regs)
void *cursp, *irqsp, *sirqsp;
 
/* Switch to the irq stack to handle this */
-   cursp = (void *)(current_stack_pointer() & ~(THREAD_SIZE - 1));
+   cursp = (void *)(get_sp() & ~(THREAD_SIZE - 1));
irqsp = hardirq_ctx[raw_smp_processor_id()];
sirqsp = softirq_ctx[raw_smp_processor_id()];
 
-- 
2.25.0



[PATCH v2 2/3] powerpc/irq: use IS_ENABLED() in check_stack_overflow()

2020-01-23 Thread Christophe Leroy
Instead of #ifdef, use IS_ENABLED(CONFIG_DEBUG_STACKOVERFLOW).
This enable GCC to check for code validity even when the option
is not selected.
---
v2: rebased

Signed-off-by: Christophe Leroy 
---
 arch/powerpc/kernel/irq.c | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/arch/powerpc/kernel/irq.c b/arch/powerpc/kernel/irq.c
index cd29c2eb2d8e..9333e115418f 100644
--- a/arch/powerpc/kernel/irq.c
+++ b/arch/powerpc/kernel/irq.c
@@ -598,9 +598,11 @@ u64 arch_irq_stat_cpu(unsigned int cpu)
 
 static inline void check_stack_overflow(void)
 {
-#ifdef CONFIG_DEBUG_STACKOVERFLOW
long sp;
 
+   if (!IS_ENABLED(CONFIG_DEBUG_STACKOVERFLOW))
+   return;
+
sp = get_sp() & (THREAD_SIZE - 1);
 
/* check for stack overflow: is there less than 2KB free? */
@@ -608,7 +610,6 @@ static inline void check_stack_overflow(void)
pr_err("do_IRQ: stack overflow: %ld\n", sp);
dump_stack();
}
-#endif
 }
 
 void __do_irq(struct pt_regs *regs)
-- 
2.25.0



[PATCH v2 1/3] powerpc/irq: don't use current_stack_pointer() in check_stack_overflow()

2020-01-23 Thread Christophe Leroy
current_stack_pointer() doesn't return the stack pointer, but the
caller's stack frame. See commit bfe9a2cfe91a ("powerpc: Reimplement
__get_SP() as a function not a define") and commit acf620ecf56c
("powerpc: Rename __get_SP() to current_stack_pointer()") for details.

The purpose of check_stack_overflow() is to verify that the stack has
not overflowed.

To really know whether the stack pointer is still within boundaries,
the check must be done directly on the value of r1.

Define a get_sp() macro to get the value of r1 directly. (Adapted from
arch/powerpc/boot/reg.h)

Signed-off-by: Christophe Leroy 
---
v2: Define a get_sp() macro instead of using asm("r1") locally.
---
 arch/powerpc/include/asm/reg.h | 3 +++
 arch/powerpc/kernel/irq.c  | 2 +-
 2 files changed, 4 insertions(+), 1 deletion(-)

diff --git a/arch/powerpc/include/asm/reg.h b/arch/powerpc/include/asm/reg.h
index 1aa46dff0957..d585f2566338 100644
--- a/arch/powerpc/include/asm/reg.h
+++ b/arch/powerpc/include/asm/reg.h
@@ -1450,6 +1450,9 @@ static inline void mtsrin(u32 val, u32 idx)
 
 extern unsigned long current_stack_pointer(void);
 
+register unsigned long __stack_pointer asm("r1");
+#define get_sp()   (__stack_pointer)
+
 extern unsigned long scom970_read(unsigned int address);
 extern void scom970_write(unsigned int address, unsigned long value);
 
diff --git a/arch/powerpc/kernel/irq.c b/arch/powerpc/kernel/irq.c
index add67498c126..cd29c2eb2d8e 100644
--- a/arch/powerpc/kernel/irq.c
+++ b/arch/powerpc/kernel/irq.c
@@ -601,7 +601,7 @@ static inline void check_stack_overflow(void)
 #ifdef CONFIG_DEBUG_STACKOVERFLOW
long sp;
 
-   sp = current_stack_pointer() & (THREAD_SIZE-1);
+   sp = get_sp() & (THREAD_SIZE - 1);
 
/* check for stack overflow: is there less than 2KB free? */
if (unlikely(sp < 2048)) {
-- 
2.25.0



Re: [PATCH 1/2] powerpc/irq: don't use current_stack_pointer() in check_stack_overflow()

2020-01-23 Thread Christophe Leroy




On 01/24/2020 06:19 AM, Christophe Leroy wrote:



Le 24/01/2020 à 06:46, Michael Ellerman a écrit :


If I do this it seems to work, but feels a little dicey:

asm ("" : "=r" (r1));
sp = r1 & (THREAD_SIZE - 1);



Or we could do add in asm/reg.h what we have in boot/reg.h:

register void *__stack_pointer asm("r1");
#define get_sp()    (__stack_pointer)

And use get_sp()



It works, and I guess doing it this way is acceptable as it's exactly 
what's done for current in asm/current.h with register r2.


Now I (still) get:

sp = get_sp() & (THREAD_SIZE - 1);
 b9c:   54 24 04 fe clrlwi  r4,r1,19
if (unlikely(sp < 2048)) {
 ba4:   2f 84 07 ff cmpwi   cr7,r4,2047





Allthough GCC 8.1 what doing exactly the same with the form CLANG don't 
like:


register unsigned long r1 asm("r1");
long sp = r1 & (THREAD_SIZE - 1);
 b84:   54 24 04 fe clrlwi  r4,r1,19
if (unlikely(sp < 2048)) {
 b8c:   2f 84 07 ff cmpwi   cr7,r4,2047


Christophe


Re: [PATCH] lib: Reduce user_access_begin() boundaries in strncpy_from_user() and strnlen_user()

2020-01-23 Thread Christophe Leroy




Le 23/01/2020 à 19:47, Linus Torvalds a écrit :

On Thu, Jan 23, 2020 at 12:34 AM Christophe Leroy
 wrote:


The range passed to user_access_begin() by strncpy_from_user() and
strnlen_user() starts at 'src' and goes up to the limit of userspace
allthough reads will be limited by the 'count' param.

On 32 bits powerpc (book3s/32) access has to be granted for each 256Mbytes
segment and the cost increases with the number of segments to unlock.

Limit the range with 'count' param.


Ack. I'm tempted to take this for 5.5 too, just so that the
unquestionably trivial fixes are in that baseline, and the
infrastructure is ready for any architecture that has issues like
this.


It would be nice, then the user_access_begin stuff for powerpc could go 
for 5.6 without worring about.


Thanks
Christophe


Re: [PATCH 1/2] powerpc/irq: don't use current_stack_pointer() in check_stack_overflow()

2020-01-23 Thread Christophe Leroy




Le 24/01/2020 à 06:46, Michael Ellerman a écrit :

Christophe Leroy  writes:


current_stack_pointer() doesn't return the stack pointer, but the
caller's stack frame. See commit bfe9a2cfe91a ("powerpc: Reimplement
__get_SP() as a function not a define") and commit acf620ecf56c
("powerpc: Rename __get_SP() to current_stack_pointer()") for details.

The purpose of check_stack_overflow() is to verify that the stack has
not overflowed.

To really know whether the stack pointer is still within boundaries,
the check must be done directly on the value of r1.

Signed-off-by: Christophe Leroy 
---
  arch/powerpc/kernel/irq.c | 5 ++---
  1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/arch/powerpc/kernel/irq.c b/arch/powerpc/kernel/irq.c
index bb34005ff9d2..4d468d835558 100644
--- a/arch/powerpc/kernel/irq.c
+++ b/arch/powerpc/kernel/irq.c
@@ -599,9 +599,8 @@ u64 arch_irq_stat_cpu(unsigned int cpu)
  static inline void check_stack_overflow(void)
  {
  #ifdef CONFIG_DEBUG_STACKOVERFLOW
-   long sp;
-
-   sp = current_stack_pointer() & (THREAD_SIZE-1);
+   register unsigned long r1 asm("r1");
+   long sp = r1 & (THREAD_SIZE - 1);


This appears to work but seems to be "unsupported" by GCC, and clang
actually complains about it:

   /linux/arch/powerpc/kernel/irq.c:603:12: error: variable 'r1' is 
uninitialized when used here [-Werror,-Wuninitialized]
   long sp = r1 & (THREAD_SIZE - 1);
 ^~

The GCC docs say:

   The only supported use for this feature is to specify registers for
   input and output operands when calling Extended asm (see Extended
   Asm).

https://gcc.gnu.org/onlinedocs/gcc-9.1.0/gcc/Local-Register-Variables.html#Local-Register-Variables


If I do this it seems to work, but feels a little dicey:

asm ("" : "=r" (r1));
sp = r1 & (THREAD_SIZE - 1);



Or we could do add in asm/reg.h what we have in boot/reg.h:

register void *__stack_pointer asm("r1");
#define get_sp()(__stack_pointer)

And use get_sp()

I'll try it.

Christophe


Re: [PATCH 1/2] powerpc/irq: don't use current_stack_pointer() in check_stack_overflow()

2020-01-23 Thread Michael Ellerman
Christophe Leroy  writes:

> current_stack_pointer() doesn't return the stack pointer, but the
> caller's stack frame. See commit bfe9a2cfe91a ("powerpc: Reimplement
> __get_SP() as a function not a define") and commit acf620ecf56c
> ("powerpc: Rename __get_SP() to current_stack_pointer()") for details.
>
> The purpose of check_stack_overflow() is to verify that the stack has
> not overflowed.
>
> To really know whether the stack pointer is still within boundaries,
> the check must be done directly on the value of r1.
>
> Signed-off-by: Christophe Leroy 
> ---
>  arch/powerpc/kernel/irq.c | 5 ++---
>  1 file changed, 2 insertions(+), 3 deletions(-)
>
> diff --git a/arch/powerpc/kernel/irq.c b/arch/powerpc/kernel/irq.c
> index bb34005ff9d2..4d468d835558 100644
> --- a/arch/powerpc/kernel/irq.c
> +++ b/arch/powerpc/kernel/irq.c
> @@ -599,9 +599,8 @@ u64 arch_irq_stat_cpu(unsigned int cpu)
>  static inline void check_stack_overflow(void)
>  {
>  #ifdef CONFIG_DEBUG_STACKOVERFLOW
> - long sp;
> -
> - sp = current_stack_pointer() & (THREAD_SIZE-1);
> + register unsigned long r1 asm("r1");
> + long sp = r1 & (THREAD_SIZE - 1);

This appears to work but seems to be "unsupported" by GCC, and clang
actually complains about it:

  /linux/arch/powerpc/kernel/irq.c:603:12: error: variable 'r1' is 
uninitialized when used here [-Werror,-Wuninitialized]
  long sp = r1 & (THREAD_SIZE - 1);
^~

The GCC docs say:

  The only supported use for this feature is to specify registers for
  input and output operands when calling Extended asm (see Extended
  Asm).

https://gcc.gnu.org/onlinedocs/gcc-9.1.0/gcc/Local-Register-Variables.html#Local-Register-Variables


If I do this it seems to work, but feels a little dicey:

asm ("" : "=r" (r1));
sp = r1 & (THREAD_SIZE - 1);


Generated code looks OK ish:

clang:

sp = r1 & (THREAD_SIZE - 1);
22e0:   a0 04 24 78 clrldi  r4,r1,50
if (unlikely(sp < 2048)) {
22e4:   ff 07 24 28 cmpldi  r4,2047
22e8:   58 00 81 40 ble 2340 


gcc:
if (unlikely(sp < 2048)) {
2eb4:   00 38 28 70 andi.   r8,r1,14336
...
2ecc:   24 00 82 40 bne c0002ef0 


cheers


Re: [PATCH v3 2/7] uaccess: Tell user_access_begin() if it's for a write or not

2020-01-23 Thread hpa
On January 23, 2020 11:57:57 AM PST, Linus Torvalds 
 wrote:
>On Thu, Jan 23, 2020 at 11:47 AM christophe leroy
> wrote:
>>
>> I'm going to leave it aside, at least for the time being, and do it
>as a
>> second step later after evaluating the real performance impact. I'll
>> respin tomorrow in that way.
>
>Ok, good.
>
>From a "narrow the access window type" standpoint it does seem to be a
>good idea to specify what kind of user accesses will be done, so I
>don't hate the idea, it's more that I'm not convinced it matters
>enough.
>
>On x86, we have made the rule that user_access_begin/end() can contain
>_very_ few operations, and objtool really does enforce that. With
>objtool and KASAN, you really end up with very small ranges of
>user_access_begin/end().
>
>And since we actually verify it statically on x86-64, I would say that
>the added benefit of narrowing by access type is fairly small. We're
>not going to have complicated code in that user access region, at
>least in generic code.
>
>> > Also, it shouldn't be a "is this a write". What if it's a read
>_and_ a
>> > write? Only a write? Only a read?
>>
>> Indeed that was more: does it includes a write. It's either RO or RW
>
>I would expect that most actual users would be RO or WO, so it's a bit
>odd to have those choices.
>
>Of course, often writing ends up requiring read permissions anyway if
>the architecture has problems with alignment handling or similar, but
>still... The real RW case does exist conceptually (we have
>"copy_in_user()", after all), but still feels like it shouldn't be
>seen as the only _interface_ choice.
>
>IOW, an architecture may decide to turn WO into RW because of
>architecture limitations (or, like x86 and arm, ignore the whole
>RO/RW/WO _entirely_ because there's just a single "allow user space
>accesses" flag), but on an interface layer if we add this flag, I
>really think it should be an explicit "read or write or both".
>
>So thus my "let's try to avoid doing it in the first place, but if we
>_do_ do this, then do it right" plea.
>
> Linus

I'm wondering if we should make it a static part of the API instead of a 
variable.

I have *deep* concern with carrying state in a "key" variable: it's a direct 
attack vector for a crowbar attack, especially since it is by definition live 
inside a user access region.

One major reason x86 restricts the regions like this is to minimize the amount 
of unconstrained state: we don't save and restore the state around, but enter 
and exit unconditionally, which means that a leaked state will end up having a 
limited lifespan. Nor is there any state inside the user access region which 
could be corrupted to leave the region open.
-- 
Sent from my Android device with K-9 Mail. Please excuse my brevity.


Re: [PATCH v2 02/27] nvdimm: remove prototypes for nonexistent functions

2020-01-23 Thread Dan Williams
[ add Aneesh ]


On Tue, Dec 3, 2019 at 4:10 PM Dan Williams  wrote:
>
> On Mon, Dec 2, 2019 at 7:48 PM Alastair D'Silva  wrote:
> >
> > From: Alastair D'Silva 
> >
> > These functions don't exist, so remove the prototypes for them.
> >
> > Signed-off-by: Alastair D'Silva 
> > Reviewed-by: Frederic Barrat 
> > ---
>
> This was already merged as commit:
>
> cda93d6965a1 libnvdimm: Remove prototypes for nonexistent functions
>
> You should have received a notification from the patchwork bot that it
> was already accepted.
>
> What baseline did you use for this submission?

I never got an answer to this, and I have not seen any updates. Can I
ask you to get an initial review from Aneesh who has been doing good
work in the nvdimm core, and then we can look to get this in the
pipeline for the v5.7 kernel?


Re: [PATCH v3 2/7] uaccess: Tell user_access_begin() if it's for a write or not

2020-01-23 Thread Linus Torvalds
On Thu, Jan 23, 2020 at 11:47 AM christophe leroy
 wrote:
>
> I'm going to leave it aside, at least for the time being, and do it as a
> second step later after evaluating the real performance impact. I'll
> respin tomorrow in that way.

Ok, good.

>From a "narrow the access window type" standpoint it does seem to be a
good idea to specify what kind of user accesses will be done, so I
don't hate the idea, it's more that I'm not convinced it matters
enough.

On x86, we have made the rule that user_access_begin/end() can contain
_very_ few operations, and objtool really does enforce that. With
objtool and KASAN, you really end up with very small ranges of
user_access_begin/end().

And since we actually verify it statically on x86-64, I would say that
the added benefit of narrowing by access type is fairly small. We're
not going to have complicated code in that user access region, at
least in generic code.

> > Also, it shouldn't be a "is this a write". What if it's a read _and_ a
> > write? Only a write? Only a read?
>
> Indeed that was more: does it includes a write. It's either RO or RW

I would expect that most actual users would be RO or WO, so it's a bit
odd to have those choices.

Of course, often writing ends up requiring read permissions anyway if
the architecture has problems with alignment handling or similar, but
still... The real RW case does exist conceptually (we have
"copy_in_user()", after all), but still feels like it shouldn't be
seen as the only _interface_ choice.

IOW, an architecture may decide to turn WO into RW because of
architecture limitations (or, like x86 and arm, ignore the whole
RO/RW/WO _entirely_ because there's just a single "allow user space
accesses" flag), but on an interface layer if we add this flag, I
really think it should be an explicit "read or write or both".

So thus my "let's try to avoid doing it in the first place, but if we
_do_ do this, then do it right" plea.

 Linus


Re: [PATCH v3 2/7] uaccess: Tell user_access_begin() if it's for a write or not

2020-01-23 Thread christophe leroy




Le 23/01/2020 à 19:02, Linus Torvalds a écrit :

On Thu, Jan 23, 2020 at 4:59 AM Christophe Leroy
 wrote:


On 32 bits powerPC (book3s/32), only write accesses to user are
protected and there is no point spending time on unlocking for reads.


Honestly, I'm starting to think that 32-bit ppc just needs to look
more like everybody else, than make these changes.


Well, beside ppc32, I was also seen it as an opportunity for the modern 
ppc64. On it, you can unlock either read or write or both. And this is 
what is done for get_user() / put_user() and friends: unlock only reads 
for get_user() and only writes for put_user().


Could also be a compromise between performance and security: keeping 
reads allowed at all time and only protect against writes on modern 
architectures which support it like ppc64.




We used to have a read/write argument to the old "verify_area()" and
"access_ok()" model, and it was a mistake. It was due to odd i386 user
access issues. We got rid of it. I'm not convinced this is any better
- it looks very similar and for odd ppc access issues.


I'm going to leave it aside, at least for the time being, and do it as a 
second step later after evaluating the real performance impact. I'll 
respin tomorrow in that way.




But if we really do want to do this, then:


Indeed I took the idea from a discussion in last Octobre (Subject: 
"book3s/32 KUAP (was Re: [PATCH] Convert filldir[64]() from __put_user() 
to unsafe_put_user())" )


https://lore.kernel.org/lkml/87h84avffi@mpe.ellerman.id.au/





Add an argument to user_access_begin() to tell when it's for write and
return an opaque key that will be used by user_access_end() to know
what was done by user_access_begin().


You should make it more opaque than "unsigned long".

Also, it shouldn't be a "is this a write". What if it's a read _and_ a
write? Only a write? Only a read?


Indeed that was more: does it includes a write. It's either RO or RW

Christophe


Re: [PATCH v3 2/7] uaccess: Tell user_access_begin() if it's for a write or not

2020-01-23 Thread Linus Torvalds
On Thu, Jan 23, 2020 at 4:59 AM Christophe Leroy
 wrote:
>
> On 32 bits powerPC (book3s/32), only write accesses to user are
> protected and there is no point spending time on unlocking for reads.

Honestly, I'm starting to think that 32-bit ppc just needs to look
more like everybody else, than make these changes.

We used to have a read/write argument to the old "verify_area()" and
"access_ok()" model, and it was a mistake. It was due to odd i386 user
access issues. We got rid of it. I'm not convinced this is any better
- it looks very similar and for odd ppc access issues.

But if we really do want to do this, then:

> Add an argument to user_access_begin() to tell when it's for write and
> return an opaque key that will be used by user_access_end() to know
> what was done by user_access_begin().

You should make it more opaque than "unsigned long".

Also, it shouldn't be a "is this a write". What if it's a read _and_ a
write? Only a write? Only a read?

Linus


Re: [PATCH] lib: Reduce user_access_begin() boundaries in strncpy_from_user() and strnlen_user()

2020-01-23 Thread Linus Torvalds
On Thu, Jan 23, 2020 at 12:34 AM Christophe Leroy
 wrote:
>
> The range passed to user_access_begin() by strncpy_from_user() and
> strnlen_user() starts at 'src' and goes up to the limit of userspace
> allthough reads will be limited by the 'count' param.
>
> On 32 bits powerpc (book3s/32) access has to be granted for each 256Mbytes
> segment and the cost increases with the number of segments to unlock.
>
> Limit the range with 'count' param.

Ack. I'm tempted to take this for 5.5 too, just so that the
unquestionably trivial fixes are in that baseline, and the
infrastructure is ready for any architecture that has issues like
this.

Adding 'linux-arch' to the participants, to see if other architectures
are at all looking at actually implementing the whole
user_access_begin/end() dance too..

   Linus


Re: [PATCH v2 1/6] fs/readdir: Fix filldir() and filldir64() use of user_access_begin()

2020-01-23 Thread Linus Torvalds
On Thu, Jan 23, 2020 at 4:00 AM Michael Ellerman  wrote:
>
> So I guess I'll wait and see what happens with patch 1.

I've committed my fixes to filldir[64]() directly - they really were
fixing me being lazy about the range, and the name length checking
really is a theoretical "access wrong user space pointer" issue with
corrupted filesystems regardless (even though I suspect it's entirely
theoretical - even a corrupt filesystem hopefully won't be passing in
negative directory entry lengths or something like that).

The "pass in read/write" part I'm not entirely convinced about.
Honestly, if this is just for ppc32 and nobody else really needs it,
make the ppc32s thing always just enable both user space reads and
writes. That's the semantics for x86 and arm as is, I'm not convinced
that we should complicate this for a legacy platform.

Linus


[PATCH v4] powerpc: use probe_user_read() and probe_user_write()

2020-01-23 Thread Christophe Leroy
Instead of opencoding, use probe_user_read() to failessly read
a user location and probe_user_write() for writing to user.

Signed-off-by: Christophe Leroy 
---
Link to v3: https://patchwork.ozlabs.org/patch/1026042/
v4: Reviving this patch after one year. Now probe_user_read/write() is in the 
kernel so patch 1 is gone.
v3: No change
v2: Using probe_user_read() instead of probe_user_address()
---
 arch/powerpc/kernel/process.c  | 12 +---
 arch/powerpc/kvm/book3s_64_mmu_radix.c |  6 ++
 arch/powerpc/mm/fault.c|  6 +-
 arch/powerpc/oprofile/backtrace.c  | 14 ++
 arch/powerpc/perf/callchain.c  | 20 +++-
 arch/powerpc/perf/core-book3s.c|  8 +---
 arch/powerpc/sysdev/fsl_pci.c  | 10 --
 7 files changed, 14 insertions(+), 62 deletions(-)

diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c
index 4df94b6e2f32..79f2cb6ecf87 100644
--- a/arch/powerpc/kernel/process.c
+++ b/arch/powerpc/kernel/process.c
@@ -1264,16 +1264,6 @@ void show_user_instructions(struct pt_regs *regs)
 
pc = regs->nip - (NR_INSN_TO_PRINT * 3 / 4 * sizeof(int));
 
-   /*
-* Make sure the NIP points at userspace, not kernel text/data or
-* elsewhere.
-*/
-   if (!__access_ok(pc, NR_INSN_TO_PRINT * sizeof(int), USER_DS)) {
-   pr_info("%s[%d]: Bad NIP, not dumping instructions.\n",
-   current->comm, current->pid);
-   return;
-   }
-
seq_buf_init(, buf, sizeof(buf));
 
while (n) {
@@ -1284,7 +1274,7 @@ void show_user_instructions(struct pt_regs *regs)
for (i = 0; i < 8 && n; i++, n--, pc += sizeof(int)) {
int instr;
 
-   if (probe_kernel_address((const void *)pc, instr)) {
+   if (probe_user_read(, (void __user *)pc, 
sizeof(instr))) {
seq_buf_printf(, " ");
continue;
}
diff --git a/arch/powerpc/kvm/book3s_64_mmu_radix.c 
b/arch/powerpc/kvm/book3s_64_mmu_radix.c
index da857c8ba6e4..231410dc9db4 100644
--- a/arch/powerpc/kvm/book3s_64_mmu_radix.c
+++ b/arch/powerpc/kvm/book3s_64_mmu_radix.c
@@ -63,12 +63,10 @@ unsigned long __kvmhv_copy_tofrom_guest_radix(int lpid, int 
pid,
}
isync();
 
-   pagefault_disable();
if (is_load)
-   ret = raw_copy_from_user(to, from, n);
+   ret = probe_user_read(to, (const void __user *)from, n);
else
-   ret = raw_copy_to_user(to, from, n);
-   pagefault_enable();
+   ret = probe_user_write((void __user *)to, from, n);
 
/* switch the pid first to avoid running host with unallocated pid */
if (quadrant == 1 && pid != old_pid)
diff --git a/arch/powerpc/mm/fault.c b/arch/powerpc/mm/fault.c
index b5047f9b5dec..9e119f98a725 100644
--- a/arch/powerpc/mm/fault.c
+++ b/arch/powerpc/mm/fault.c
@@ -279,12 +279,8 @@ static bool bad_stack_expansion(struct pt_regs *regs, 
unsigned long address,
if ((flags & FAULT_FLAG_WRITE) && (flags & FAULT_FLAG_USER) &&
access_ok(nip, sizeof(*nip))) {
unsigned int inst;
-   int res;
 
-   pagefault_disable();
-   res = __get_user_inatomic(inst, nip);
-   pagefault_enable();
-   if (!res)
+   if (!probe_user_read(, nip, sizeof(inst)))
return !store_updates_sp(inst);
*must_retry = true;
}
diff --git a/arch/powerpc/oprofile/backtrace.c 
b/arch/powerpc/oprofile/backtrace.c
index 43245f4a9bcb..2799b922f780 100644
--- a/arch/powerpc/oprofile/backtrace.c
+++ b/arch/powerpc/oprofile/backtrace.c
@@ -28,15 +28,12 @@ static unsigned int user_getsp32(unsigned int sp, int 
is_first)
unsigned int stack_frame[2];
void __user *p = compat_ptr(sp);
 
-   if (!access_ok(p, sizeof(stack_frame)))
-   return 0;
-
/*
 * The most likely reason for this is that we returned -EFAULT,
 * which means that we've done all that we can do from
 * interrupt context.
 */
-   if (__copy_from_user_inatomic(stack_frame, p, sizeof(stack_frame)))
+   if (probe_user_read(stack_frame, (void __user *)p, sizeof(stack_frame)))
return 0;
 
if (!is_first)
@@ -54,11 +51,7 @@ static unsigned long user_getsp64(unsigned long sp, int 
is_first)
 {
unsigned long stack_frame[3];
 
-   if (!access_ok((void __user *)sp, sizeof(stack_frame)))
-   return 0;
-
-   if (__copy_from_user_inatomic(stack_frame, (void __user *)sp,
-   sizeof(stack_frame)))
+   if (probe_user_read(stack_frame, (void __user *)sp, 

Re: [PATCH] powerpc: drmem: avoid NULL pointer dereference when drmem is unavailable

2020-01-23 Thread Michal Suchánek
On Thu, Jan 23, 2020 at 09:56:10AM -0600, Nathan Lynch wrote:
> Hello and thanks for the patch.
> 
> Libor Pechacek  writes:
> > In KVM guests drmem structure is only zero initialized. Trying to
> > manipulate DLPAR parameters results in a crash in this environment.
> 
> I think this statement needs qualification. Unless I'm mistaken, this
> happens only when you boot a guest without any hotpluggable memory
> configured, and then try to add or remove memory.
> 
> 
> > diff --git a/arch/powerpc/include/asm/drmem.h 
> > b/arch/powerpc/include/asm/drmem.h
> > index 3d76e1c388c2..28c3d936fdf3 100644
> > --- a/arch/powerpc/include/asm/drmem.h
> > +++ b/arch/powerpc/include/asm/drmem.h
> > @@ -27,12 +27,12 @@ struct drmem_lmb_info {
> >  extern struct drmem_lmb_info *drmem_info;
> >  
> >  #define for_each_drmem_lmb_in_range(lmb, start, end)   \
> > -   for ((lmb) = (start); (lmb) <= (end); (lmb)++)
> > +   for ((lmb) = (start); (lmb) < (end); (lmb)++)
> >  
> >  #define for_each_drmem_lmb(lmb)\
> > for_each_drmem_lmb_in_range((lmb),  \
> > _info->lmbs[0],   \
> > -   _info->lmbs[drmem_info->n_lmbs - 1])
> > +   _info->lmbs[drmem_info->n_lmbs])
> >  
> >  /*
> >   * The of_drconf_cell_v1 struct defines the layout of the LMB data
> > diff --git a/arch/powerpc/platforms/pseries/hotplug-memory.c 
> > b/arch/powerpc/platforms/pseries/hotplug-memory.c
> > index c126b94d1943..4ea6af002e27 100644
> > --- a/arch/powerpc/platforms/pseries/hotplug-memory.c
> > +++ b/arch/powerpc/platforms/pseries/hotplug-memory.c
> > @@ -236,9 +236,9 @@ static int get_lmb_range(u32 drc_index, int n_lmbs,
> > if (!start)
> > return -EINVAL;
> >  
> > -   end = [n_lmbs - 1];
> > +   end = [n_lmbs];
> >  
> > -   last_lmb = _info->lmbs[drmem_info->n_lmbs - 1];
> > +   last_lmb = _info->lmbs[drmem_info->n_lmbs];
> > if (end > last_lmb)
> > return -EINVAL;
> 
> Is this not undefined behavior? I'd rather do this in a way that does
> not involve forming out-of-bounds pointers. Even if it's safe, naming
> that pointer "last_lmb" now actively hinders understanding of the code;
> it should be named "limit" or something.

Indeed, the name might be misleading now.

However, the loop differes from anything else we have in the kernel.

The standard explicitly allows the pointer to point just after the last
element to allow expressing the iteration limit without danger of
overflow.

Thanks

Michal


Re: [PATCH] powerpc: drmem: avoid NULL pointer dereference when drmem is unavailable

2020-01-23 Thread Nathan Lynch
Hello and thanks for the patch.

Libor Pechacek  writes:
> In KVM guests drmem structure is only zero initialized. Trying to
> manipulate DLPAR parameters results in a crash in this environment.

I think this statement needs qualification. Unless I'm mistaken, this
happens only when you boot a guest without any hotpluggable memory
configured, and then try to add or remove memory.


> diff --git a/arch/powerpc/include/asm/drmem.h 
> b/arch/powerpc/include/asm/drmem.h
> index 3d76e1c388c2..28c3d936fdf3 100644
> --- a/arch/powerpc/include/asm/drmem.h
> +++ b/arch/powerpc/include/asm/drmem.h
> @@ -27,12 +27,12 @@ struct drmem_lmb_info {
>  extern struct drmem_lmb_info *drmem_info;
>  
>  #define for_each_drmem_lmb_in_range(lmb, start, end) \
> - for ((lmb) = (start); (lmb) <= (end); (lmb)++)
> + for ((lmb) = (start); (lmb) < (end); (lmb)++)
>  
>  #define for_each_drmem_lmb(lmb)  \
>   for_each_drmem_lmb_in_range((lmb),  \
>   _info->lmbs[0],   \
> - _info->lmbs[drmem_info->n_lmbs - 1])
> + _info->lmbs[drmem_info->n_lmbs])
>  
>  /*
>   * The of_drconf_cell_v1 struct defines the layout of the LMB data
> diff --git a/arch/powerpc/platforms/pseries/hotplug-memory.c 
> b/arch/powerpc/platforms/pseries/hotplug-memory.c
> index c126b94d1943..4ea6af002e27 100644
> --- a/arch/powerpc/platforms/pseries/hotplug-memory.c
> +++ b/arch/powerpc/platforms/pseries/hotplug-memory.c
> @@ -236,9 +236,9 @@ static int get_lmb_range(u32 drc_index, int n_lmbs,
>   if (!start)
>   return -EINVAL;
>  
> - end = [n_lmbs - 1];
> + end = [n_lmbs];
>  
> - last_lmb = _info->lmbs[drmem_info->n_lmbs - 1];
> + last_lmb = _info->lmbs[drmem_info->n_lmbs];
>   if (end > last_lmb)
>   return -EINVAL;

Is this not undefined behavior? I'd rather do this in a way that does
not involve forming out-of-bounds pointers. Even if it's safe, naming
that pointer "last_lmb" now actively hinders understanding of the code;
it should be named "limit" or something.

Anyway, I confess I've had the following patch sitting unsubmitted due
to a combination of perceived low urgency and lack of time. I've
verified it addresses your reported problem but I need a day or two to
check that it doesn't regress memory add/remove on suitably configured
guests:



powerpc/drmem: make drmem list traversal safe on non-drmem systems

A user has reported a crash when attempting to remove an LMB on a KVM
guest where the device tree lacks the nodes and properties
(ibm,dynamic-reconfiguration-memory etc) which allow the drmem code to
support dynamic memory removal:

pseries-hotplug-mem: Attempting to hot-remove 1 LMB(s)
Unable to handle kernel paging request for data at address 0x0010
Faulting instruction address: 0xc00f0a30
Oops: Kernel access of bad area, sig: 11 [#1]
[... regs etc ...]
NIP [c00f0a30] dlpar_memory+0x510/0xb50
LR [c00f09e8] dlpar_memory+0x4c8/0xb50
Call Trace:
[c001edf97ba0] [c00f09e8] dlpar_memory+0x4c8/0xb50 (unreliable)
[c001edf97c20] [c00e8390] handle_dlpar_errorlog+0x60/0x140
[c001edf97c90] [c00e85e0] dlpar_store+0x170/0x490
[c001edf97d40] [c0cb0c90] kobj_attr_store+0x30/0x50
[c001edf97d60] [c0591598] sysfs_kf_write+0x68/0xa0
[c001edf97d80] [c058fec4] kernfs_fop_write+0x104/0x270
[c001edf97dd0] [c04a1078] sys_write+0x128/0x380
[c001edf97e30] [c000b388] system_call+0x5c/0x70

Make for_each_drmem_lmb() safe for the case where the list of drmem
LMBs is unpopulated.

Signed-off-by: Nathan Lynch 

1 file changed, 36 insertions(+), 7 deletions(-)
arch/powerpc/include/asm/drmem.h | 43 +---

modified   arch/powerpc/include/asm/drmem.h
@@ -20,19 +20,48 @@ struct drmem_lmb {
 
 struct drmem_lmb_info {
struct drmem_lmb*lmbs;
-   int n_lmbs;
+   unsigned intn_lmbs;
u32 lmb_size;
 };
 
 extern struct drmem_lmb_info *drmem_info;
 
-#define for_each_drmem_lmb_in_range(lmb, start, end)   \
-   for ((lmb) = (start); (lmb) <= (end); (lmb)++)
+static inline bool drmem_present(void)
+{
+   return drmem_info->lmbs != NULL;
+}
+
+static inline struct drmem_lmb *drmem_lmb_index(unsigned int index)
+{
+   if (!drmem_present())
+   return NULL;
 
-#define for_each_drmem_lmb(lmb)\
-   for_each_drmem_lmb_in_range((lmb),  \
-   _info->lmbs[0],   \
-   _info->lmbs[drmem_info->n_lmbs - 1])
+   if (WARN_ON(index >= drmem_info->n_lmbs))
+   return NULL;
+
+   return _info->lmbs[index];
+}
+
+static inline struct drmem_lmb *drmem_first_lmb(void)

Re: [PATCH v2 07/10] powerpc/configs/skiroot: Enable security features

2020-01-23 Thread Joel Stanley
On Tue, 21 Jan 2020 at 04:30, Michael Ellerman  wrote:
>
> From: Joel Stanley 
>
> This turns on HARDENED_USERCOPY with HARDENED_USERCOPY_PAGESPAN, and
> FORTIFY_SOURCE.
>
> It also enables SECURITY_LOCKDOWN_LSM with _EARLY and
> LOCK_DOWN_KERNEL_FORCE_INTEGRITY options enabled. This still allows
> xmon to be used in read-only mode.
>
> MODULE_SIG is selected by lockdown, so it is still enabled.
>
> Signed-off-by: Joel Stanley 
> [mpe: Switch to lockdown integrity mode per oohal]
> Signed-off-by: Michael Ellerman 

I did some testing and with change we break kexec. As it's critical
for this kernel to be able to kexec we need to set KEXEC_FILE=y if
we're setting FORCE_INTEGRITY=y.

I've tested your series with that modification made and userspace was
once again able to kexec (with -s).

Cheers,

Joel

> ---
>  arch/powerpc/configs/skiroot_defconfig | 11 ++-
>  1 file changed, 10 insertions(+), 1 deletion(-)
>
> v2: Switch to lockdown integrity mode rather than confidentiality as noticed 
> by
> dja and discussed with jms and oohal.
>
> diff --git a/arch/powerpc/configs/skiroot_defconfig 
> b/arch/powerpc/configs/skiroot_defconfig
> index 24a210fe0049..93b478436a2b 100644
> --- a/arch/powerpc/configs/skiroot_defconfig
> +++ b/arch/powerpc/configs/skiroot_defconfig
> @@ -49,7 +49,6 @@ CONFIG_JUMP_LABEL=y
>  CONFIG_STRICT_KERNEL_RWX=y
>  CONFIG_MODULES=y
>  CONFIG_MODULE_UNLOAD=y
> -CONFIG_MODULE_SIG=y
>  CONFIG_MODULE_SIG_FORCE=y
>  CONFIG_MODULE_SIG_SHA512=y
>  CONFIG_PARTITION_ADVANCED=y
> @@ -272,6 +271,16 @@ CONFIG_NLS_ASCII=y
>  CONFIG_NLS_ISO8859_1=y
>  CONFIG_NLS_UTF8=y
>  CONFIG_ENCRYPTED_KEYS=y
> +CONFIG_SECURITY=y
> +CONFIG_HARDENED_USERCOPY=y
> +# CONFIG_HARDENED_USERCOPY_FALLBACK is not set
> +CONFIG_HARDENED_USERCOPY_PAGESPAN=y
> +CONFIG_FORTIFY_SOURCE=y
> +CONFIG_SECURITY_LOCKDOWN_LSM=y
> +CONFIG_SECURITY_LOCKDOWN_LSM_EARLY=y
> +CONFIG_LOCK_DOWN_KERNEL_FORCE_INTEGRITY=y
> +# CONFIG_INTEGRITY is not set
> +CONFIG_LSM="yama,loadpin,safesetid,integrity"
>  # CONFIG_CRYPTO_HW is not set
>  CONFIG_CRC16=y
>  CONFIG_CRC_ITU_T=y
> --
> 2.21.1
>


Re: [PATCH v3 2/7] uaccess: Tell user_access_begin() if it's for a write or not

2020-01-23 Thread Jani Nikula
On Thu, 23 Jan 2020, Christophe Leroy  wrote:
> On 32 bits powerPC (book3s/32), only write accesses to user are
> protected and there is no point spending time on unlocking for reads.
>
> On 64 bits powerpc (book3s/64 at least), access can be granted
> read only, write only or read/write.
>
> Add an argument to user_access_begin() to tell when it's for write and
> return an opaque key that will be used by user_access_end() to know
> what was done by user_access_begin().

IMHO an opaque key is a prime example of a case where the use of an
opaque typedef is warranted. Nobody needs to know or care it's
specifically an unsigned long.

BR,
Jani.


-- 
Jani Nikula, Intel Open Source Graphics Center


[PATCH v3 7/7] powerpc: Implement user_access_begin and friends

2020-01-23 Thread Christophe Leroy
Today, when a function like strncpy_from_user() is called,
the userspace access protection is de-activated and re-activated
for every word read.

By implementing user_access_begin and friends, the protection
is de-activated at the beginning of the copy and re-activated at the
end.

Implement user_access_begin(), user_access_end() and
unsafe_get_user(), unsafe_put_user() and unsafe_copy_to_user()

For the time being, we keep user_access_save() and
user_access_restore() as nops.

Signed-off-by: Christophe Leroy 
---
v2: no change
v3: adapted to the new format of user_access_begin/end()
---
 arch/powerpc/include/asm/uaccess.h | 100 ++---
 1 file changed, 90 insertions(+), 10 deletions(-)

diff --git a/arch/powerpc/include/asm/uaccess.h 
b/arch/powerpc/include/asm/uaccess.h
index cafad1960e76..30204e80df1b 100644
--- a/arch/powerpc/include/asm/uaccess.h
+++ b/arch/powerpc/include/asm/uaccess.h
@@ -91,9 +91,14 @@ static inline int __access_ok(unsigned long addr, unsigned 
long size,
__put_user_check((__typeof__(*(ptr)))(x), (ptr), sizeof(*(ptr)))
 
 #define __get_user(x, ptr) \
-   __get_user_nocheck((x), (ptr), sizeof(*(ptr)))
+   __get_user_nocheck((x), (ptr), sizeof(*(ptr)), true)
 #define __put_user(x, ptr) \
-   __put_user_nocheck((__typeof__(*(ptr)))(x), (ptr), sizeof(*(ptr)))
+   __put_user_nocheck((__typeof__(*(ptr)))(x), (ptr), sizeof(*(ptr)), true)
+
+#define __get_user_allowed(x, ptr) \
+   __get_user_nocheck((x), (ptr), sizeof(*(ptr)), false)
+#define __put_user_allowed(x, ptr) \
+   __put_user_nocheck((__typeof__(*(ptr)))(x), (ptr), sizeof(*(ptr)), 
false)
 
 #define __get_user_inatomic(x, ptr) \
__get_user_nosleep((x), (ptr), sizeof(*(ptr)))
@@ -138,10 +143,9 @@ extern long __put_user_bad(void);
: "r" (x), "b" (addr), "i" (-EFAULT), "0" (err))
 #endif /* __powerpc64__ */
 
-#define __put_user_size(x, ptr, size, retval)  \
+#define __put_user_size_allowed(x, ptr, size, retval)  \
 do {   \
retval = 0; \
-   allow_write_to_user(ptr, size); \
switch (size) { \
  case 1: __put_user_asm(x, ptr, retval, "stb"); break; \
  case 2: __put_user_asm(x, ptr, retval, "sth"); break; \
@@ -149,17 +153,26 @@ do {  
\
  case 8: __put_user_asm2(x, ptr, retval); break;   \
  default: __put_user_bad();\
}   \
+} while (0)
+
+#define __put_user_size(x, ptr, size, retval)  \
+do {   \
+   allow_write_to_user(ptr, size); \
+   __put_user_size_allowed(x, ptr, size, retval);  \
prevent_write_to_user(ptr, size);   \
 } while (0)
 
-#define __put_user_nocheck(x, ptr, size)   \
+#define __put_user_nocheck(x, ptr, size, allow)\
 ({ \
long __pu_err;  \
__typeof__(*(ptr)) __user *__pu_addr = (ptr);   \
if (!is_kernel_addr((unsigned long)__pu_addr))  \
might_fault();  \
__chk_user_ptr(ptr);\
-   __put_user_size((x), __pu_addr, (size), __pu_err);  \
+   if (allow)  
\
+   __put_user_size((x), __pu_addr, (size), __pu_err);  
\
+   else
\
+   __put_user_size_allowed((x), __pu_addr, (size), __pu_err);  
\
__pu_err;   \
 })
 
@@ -236,13 +249,12 @@ extern long __get_user_bad(void);
: "b" (addr), "i" (-EFAULT), "0" (err))
 #endif /* __powerpc64__ */
 
-#define __get_user_size(x, ptr, size, retval)  \
+#define __get_user_size_allowed(x, ptr, size, retval)  \
 do {   \
retval = 0; \
__chk_user_ptr(ptr);\
if (size > sizeof(x))   \
(x) = __get_user_bad(); \
-   allow_read_from_user(ptr, size);\
switch (size) { \
case 1: __get_user_asm(x, ptr, retval, "lbz"); break;   \
case 2: __get_user_asm(x, ptr, retval, "lhz"); break;   \
@@ -250,6 +262,12 @@ do {  

[PATCH v3 6/7] powerpc/32s: Prepare allow_user_access() for user_access_begin()

2020-01-23 Thread Christophe Leroy
In preparation of implementing user_access_begin and friends
on powerpc, allow_user_access() need to be prepared for
user_access_begin()

user_access_end() doesn't provide the address and size which
were passed to user_access_begin(), required by prevent_user_access()
to know which segment to modify. But user_access_end() takes an
opaque value returned by user_access_begin().

Make allow_user_access() return the value it writes to current->kuap.
This will allow user_access_end() to recalculate the segment range
without having to read current->kuap.

Signed-off-by: Christophe Leroy 
---
v3: Replaces the patch "Prepare prevent_user_access() for user_access_end()"
---
 arch/powerpc/include/asm/book3s/32/kup.h   | 15 ++-
 arch/powerpc/include/asm/book3s/64/kup-radix.h |  7 +--
 arch/powerpc/include/asm/kup.h |  8 ++--
 arch/powerpc/include/asm/nohash/32/kup-8xx.h   |  6 --
 4 files changed, 25 insertions(+), 11 deletions(-)

diff --git a/arch/powerpc/include/asm/book3s/32/kup.h 
b/arch/powerpc/include/asm/book3s/32/kup.h
index 3c1798e56b55..128dcbf3a19d 100644
--- a/arch/powerpc/include/asm/book3s/32/kup.h
+++ b/arch/powerpc/include/asm/book3s/32/kup.h
@@ -102,23 +102,28 @@ static inline void kuap_update_sr(u32 sr, u32 addr, u32 
end)
isync();/* Context sync required after mtsrin() */
 }
 
-static __always_inline void allow_user_access(void __user *to, const void 
__user *from,
- u32 size, unsigned long dir)
+/* Make sure we never return 0. We only use top and bottom 4 bits */
+static __always_inline unsigned long
+allow_user_access(void __user *to, const void __user *from, u32 size, unsigned 
long dir)
 {
u32 addr, end;
+   unsigned long kuap;
 
BUILD_BUG_ON(!__builtin_constant_p(dir));
if (!(dir & KUAP_W))
-   return;
+   return 0x100;
 
addr = (__force u32)to;
 
if (unlikely(addr >= TASK_SIZE || !size))
-   return;
+   return 0x100;
 
end = min(addr + size, TASK_SIZE);
-   current->thread.kuap = (addr & 0xf000) | end - 1) >> 28) + 1) & 
0xf);
+   kuap = (addr & 0xf000) | end - 1) >> 28) + 1) & 0xf);
+   current->thread.kuap = kuap;
kuap_update_sr(mfsrin(addr) & ~SR_KS, addr, end);   /* Clear Ks */
+
+   return kuap;
 }
 
 static __always_inline void prevent_user_access(void __user *to, const void 
__user *from,
diff --git a/arch/powerpc/include/asm/book3s/64/kup-radix.h 
b/arch/powerpc/include/asm/book3s/64/kup-radix.h
index f11315306d41..183f0c87017b 100644
--- a/arch/powerpc/include/asm/book3s/64/kup-radix.h
+++ b/arch/powerpc/include/asm/book3s/64/kup-radix.h
@@ -77,8 +77,9 @@ static inline void set_kuap(unsigned long value)
isync();
 }
 
-static __always_inline void allow_user_access(void __user *to, const void 
__user *from,
- unsigned long size, unsigned long 
dir)
+static __always_inline unsigned long
+allow_user_access(void __user *to, const void __user *from,
+ unsigned long size, unsigned long dir)
 {
// This is written so we can resolve to a single case at build time
BUILD_BUG_ON(!__builtin_constant_p(dir));
@@ -88,6 +89,8 @@ static __always_inline void allow_user_access(void __user 
*to, const void __user
set_kuap(AMR_KUAP_BLOCK_READ);
else
set_kuap(0);
+
+   return 1;
 }
 
 static inline void prevent_user_access(void __user *to, const void __user 
*from,
diff --git a/arch/powerpc/include/asm/kup.h b/arch/powerpc/include/asm/kup.h
index ff57bfcb88f7..691fec5afd4a 100644
--- a/arch/powerpc/include/asm/kup.h
+++ b/arch/powerpc/include/asm/kup.h
@@ -45,8 +45,12 @@ static inline void setup_kuep(bool disabled) { }
 void setup_kuap(bool disabled);
 #else
 static inline void setup_kuap(bool disabled) { }
-static inline void allow_user_access(void __user *to, const void __user *from,
-unsigned long size, unsigned long dir) { }
+static inline unsigned long allow_user_access(void __user *to, const void 
__user *from,
+ unsigned long size, unsigned long 
dir)
+{
+   return 1;
+}
+
 static inline void prevent_user_access(void __user *to, const void __user 
*from,
   unsigned long size, unsigned long dir) { 
}
 static inline bool
diff --git a/arch/powerpc/include/asm/nohash/32/kup-8xx.h 
b/arch/powerpc/include/asm/nohash/32/kup-8xx.h
index 1d70c80366fd..ee673d3c0ab6 100644
--- a/arch/powerpc/include/asm/nohash/32/kup-8xx.h
+++ b/arch/powerpc/include/asm/nohash/32/kup-8xx.h
@@ -34,10 +34,12 @@
 
 #include 
 
-static inline void allow_user_access(void __user *to, const void __user *from,
-unsigned long size, unsigned long dir)
+static inline unsigned long allow_user_access(void __user 

[PATCH v3 3/7] powerpc/32s: Fix bad_kuap_fault()

2020-01-23 Thread Christophe Leroy
At the moment, bad_kuap_fault() reports a fault only if a bad access
to userspace occurred while access to userspace was not granted.

But if a fault occurs for a write outside the allowed userspace
segment(s) that have been unlocked, bad_kuap_fault() fails to
detect it and the kernel loops forever in do_page_fault().

Fix it by checking that the accessed address is within the allowed
range.

Fixes: a68c31fc01ef ("powerpc/32s: Implement Kernel Userspace Access 
Protection")
Cc: sta...@vger.kernel.org
Signed-off-by: Christophe Leroy 
---
v2: added missing address parametre to bad_kuap_fault() in asm/kup.h
v3: no change
---
 arch/powerpc/include/asm/book3s/32/kup.h   | 9 +++--
 arch/powerpc/include/asm/book3s/64/kup-radix.h | 3 ++-
 arch/powerpc/include/asm/kup.h | 6 +-
 arch/powerpc/include/asm/nohash/32/kup-8xx.h   | 3 ++-
 arch/powerpc/mm/fault.c| 2 +-
 5 files changed, 17 insertions(+), 6 deletions(-)

diff --git a/arch/powerpc/include/asm/book3s/32/kup.h 
b/arch/powerpc/include/asm/book3s/32/kup.h
index f9dc597b0b86..d88008c8eb85 100644
--- a/arch/powerpc/include/asm/book3s/32/kup.h
+++ b/arch/powerpc/include/asm/book3s/32/kup.h
@@ -131,12 +131,17 @@ static inline void prevent_user_access(void __user *to, 
const void __user *from,
kuap_update_sr(mfsrin(addr) | SR_KS, addr, end);/* set Ks */
 }
 
-static inline bool bad_kuap_fault(struct pt_regs *regs, bool is_write)
+static inline bool
+bad_kuap_fault(struct pt_regs *regs, unsigned long address, bool is_write)
 {
+   unsigned long begin = regs->kuap & 0xf000;
+   unsigned long end = regs->kuap << 28;
+
if (!is_write)
return false;
 
-   return WARN(!regs->kuap, "Bug: write fault blocked by segment registers 
!");
+   return WARN(address < begin || address >= end,
+   "Bug: write fault blocked by segment registers !");
 }
 
 #endif /* CONFIG_PPC_KUAP */
diff --git a/arch/powerpc/include/asm/book3s/64/kup-radix.h 
b/arch/powerpc/include/asm/book3s/64/kup-radix.h
index f254de956d6a..dbbd22cb80f5 100644
--- a/arch/powerpc/include/asm/book3s/64/kup-radix.h
+++ b/arch/powerpc/include/asm/book3s/64/kup-radix.h
@@ -95,7 +95,8 @@ static inline void prevent_user_access(void __user *to, const 
void __user *from,
set_kuap(AMR_KUAP_BLOCKED);
 }
 
-static inline bool bad_kuap_fault(struct pt_regs *regs, bool is_write)
+static inline bool
+bad_kuap_fault(struct pt_regs *regs, unsigned long address, bool is_write)
 {
return WARN(mmu_has_feature(MMU_FTR_RADIX_KUAP) &&
(regs->kuap & (is_write ? AMR_KUAP_BLOCK_WRITE : 
AMR_KUAP_BLOCK_READ)),
diff --git a/arch/powerpc/include/asm/kup.h b/arch/powerpc/include/asm/kup.h
index 5b5e39643a27..812e66f31934 100644
--- a/arch/powerpc/include/asm/kup.h
+++ b/arch/powerpc/include/asm/kup.h
@@ -45,7 +45,11 @@ static inline void allow_user_access(void __user *to, const 
void __user *from,
 unsigned long size) { }
 static inline void prevent_user_access(void __user *to, const void __user 
*from,
   unsigned long size) { }
-static inline bool bad_kuap_fault(struct pt_regs *regs, bool is_write) { 
return false; }
+static inline bool
+bad_kuap_fault(struct pt_regs *regs, unsigned long address, bool is_write)
+{
+   return false;
+}
 #endif /* CONFIG_PPC_KUAP */
 
 static inline void allow_read_from_user(const void __user *from, unsigned long 
size)
diff --git a/arch/powerpc/include/asm/nohash/32/kup-8xx.h 
b/arch/powerpc/include/asm/nohash/32/kup-8xx.h
index 1006a427e99c..f2fea603b929 100644
--- a/arch/powerpc/include/asm/nohash/32/kup-8xx.h
+++ b/arch/powerpc/include/asm/nohash/32/kup-8xx.h
@@ -46,7 +46,8 @@ static inline void prevent_user_access(void __user *to, const 
void __user *from,
mtspr(SPRN_MD_AP, MD_APG_KUAP);
 }
 
-static inline bool bad_kuap_fault(struct pt_regs *regs, bool is_write)
+static inline bool
+bad_kuap_fault(struct pt_regs *regs, unsigned long address, bool is_write)
 {
return WARN(!((regs->kuap ^ MD_APG_KUAP) & 0xf000),
"Bug: fault blocked by AP register !");
diff --git a/arch/powerpc/mm/fault.c b/arch/powerpc/mm/fault.c
index b5047f9b5dec..1baeb045f7f4 100644
--- a/arch/powerpc/mm/fault.c
+++ b/arch/powerpc/mm/fault.c
@@ -233,7 +233,7 @@ static bool bad_kernel_fault(struct pt_regs *regs, unsigned 
long error_code,
 
// Read/write fault in a valid region (the exception table search passed
// above), but blocked by KUAP is bad, it can never succeed.
-   if (bad_kuap_fault(regs, is_write))
+   if (bad_kuap_fault(regs, address, is_write))
return true;
 
// What's left? Kernel fault on user in well defined regions (extable
-- 
2.25.0



[PATCH v3 2/7] uaccess: Tell user_access_begin() if it's for a write or not

2020-01-23 Thread Christophe Leroy
On 32 bits powerPC (book3s/32), only write accesses to user are
protected and there is no point spending time on unlocking for reads.

On 64 bits powerpc (book3s/64 at least), access can be granted
read only, write only or read/write.

Add an argument to user_access_begin() to tell when it's for write and
return an opaque key that will be used by user_access_end() to know
what was done by user_access_begin().

Signed-off-by: Christophe Leroy 
---
v3: new
---
 arch/x86/include/asm/uaccess.h |  5 +++--
 drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c | 15 ++-
 fs/readdir.c   | 16 ++--
 include/linux/uaccess.h|  4 ++--
 kernel/compat.c| 16 ++--
 kernel/exit.c  | 17 +++--
 lib/strncpy_from_user.c|  6 --
 lib/strnlen_user.c |  6 --
 lib/usercopy.c |  8 +---
 9 files changed, 59 insertions(+), 34 deletions(-)

diff --git a/arch/x86/include/asm/uaccess.h b/arch/x86/include/asm/uaccess.h
index 61d93f062a36..05eccdc0366a 100644
--- a/arch/x86/include/asm/uaccess.h
+++ b/arch/x86/include/asm/uaccess.h
@@ -709,7 +709,8 @@ extern struct movsl_mask {
  * checking before using them, but you have to surround them with the
  * user_access_begin/end() pair.
  */
-static __must_check __always_inline bool user_access_begin(const void __user 
*ptr, size_t len)
+static __must_check __always_inline unsigned long
+user_access_begin(const void __user *ptr, size_t len, bool write)
 {
if (unlikely(!access_ok(ptr,len)))
return 0;
@@ -717,7 +718,7 @@ static __must_check __always_inline bool 
user_access_begin(const void __user *pt
return 1;
 }
 #define user_access_begin(a,b) user_access_begin(a,b)
-#define user_access_end()  __uaccess_end()
+#define user_access_end(x) __uaccess_end()
 
 #define user_access_save() smap_save()
 #define user_access_restore(x) smap_restore(x)
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c 
b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
index bc3a67226163..509bfb6116ac 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
@@ -1615,6 +1615,7 @@ static int eb_copy_relocations(const struct 
i915_execbuffer *eb)
const unsigned int count = eb->buffer_count;
unsigned int i;
int err;
+   unsigned long key;
 
for (i = 0; i < count; i++) {
const unsigned int nreloc = eb->exec[i].relocation_count;
@@ -1662,14 +1663,15 @@ static int eb_copy_relocations(const struct 
i915_execbuffer *eb)
 * happened we would make the mistake of assuming that the
 * relocations were valid.
 */
-   if (!user_access_begin(urelocs, size))
+   key = user_access_begin(urelocs, size, true);
+   if (!key)
goto end;
 
for (copied = 0; copied < nreloc; copied++)
unsafe_put_user(-1,
[copied].presumed_offset,
end_user);
-   user_access_end();
+   user_access_end(key);
 
eb->exec[i].relocs_ptr = (uintptr_t)relocs;
}
@@ -1677,7 +1679,7 @@ static int eb_copy_relocations(const struct 
i915_execbuffer *eb)
return 0;
 
 end_user:
-   user_access_end();
+   user_access_end(key);
 end:
kvfree(relocs);
err = -EFAULT;
@@ -2906,6 +2908,7 @@ i915_gem_execbuffer2_ioctl(struct drm_device *dev, void 
*data,
struct drm_i915_gem_exec_object2 __user *user_exec_list =
u64_to_user_ptr(args->buffers_ptr);
unsigned int i;
+   unsigned long key;
 
/* Copy the new buffer offsets back to the user's exec list. */
/*
@@ -2915,7 +2918,9 @@ i915_gem_execbuffer2_ioctl(struct drm_device *dev, void 
*data,
 * And this range already got effectively checked earlier
 * when we did the "copy_from_user()" above.
 */
-   if (!user_access_begin(user_exec_list, count * 
sizeof(*user_exec_list)))
+   key = user_access_begin(user_exec_list,
+   count * sizeof(*user_exec_list), true);
+   if (!key)
goto end;
 
for (i = 0; i < args->buffer_count; i++) {
@@ -2929,7 +2934,7 @@ i915_gem_execbuffer2_ioctl(struct drm_device *dev, void 
*data,
end_user);
}
 end_user:
-   user_access_end();
+   user_access_end(key);
 end:;
}
 
diff --git a/fs/readdir.c b/fs/readdir.c
index 4b466cbb0f3a..47b9ef97e16e 100644
--- 

[PATCH v3 4/7] powerpc/kuap: Fix set direction in allow/prevent_user_access()

2020-01-23 Thread Christophe Leroy
__builtin_constant_p() always return 0 for pointers, so on RADIX
we always end up opening both direction (by writing 0 in SPR29):

0170 <._copy_to_user>:
...
 1b0:   4c 00 01 2c isync
 1b4:   39 20 00 00 li  r9,0
 1b8:   7d 3d 03 a6 mtspr   29,r9
 1bc:   4c 00 01 2c isync
 1c0:   48 00 00 01 bl  1c0 <._copy_to_user+0x50>
1c0: R_PPC64_REL24  .__copy_tofrom_user
...
0220 <._copy_from_user>:
...
 2ac:   4c 00 01 2c isync
 2b0:   39 20 00 00 li  r9,0
 2b4:   7d 3d 03 a6 mtspr   29,r9
 2b8:   4c 00 01 2c isync
 2bc:   7f c5 f3 78 mr  r5,r30
 2c0:   7f 83 e3 78 mr  r3,r28
 2c4:   48 00 00 01 bl  2c4 <._copy_from_user+0xa4>
2c4: R_PPC64_REL24  .__copy_tofrom_user
...

Use an explicit parameter for direction selection, so that GCC
is able to see it is a constant:

01b0 <._copy_to_user>:
...
 1f0:   4c 00 01 2c isync
 1f4:   3d 20 40 00 lis r9,16384
 1f8:   79 29 07 c6 rldicr  r9,r9,32,31
 1fc:   7d 3d 03 a6 mtspr   29,r9
 200:   4c 00 01 2c isync
 204:   48 00 00 01 bl  204 <._copy_to_user+0x54>
204: R_PPC64_REL24  .__copy_tofrom_user
...
0260 <._copy_from_user>:
...
 2ec:   4c 00 01 2c isync
 2f0:   39 20 ff ff li  r9,-1
 2f4:   79 29 00 04 rldicr  r9,r9,0,0
 2f8:   7d 3d 03 a6 mtspr   29,r9
 2fc:   4c 00 01 2c isync
 300:   7f c5 f3 78 mr  r5,r30
 304:   7f 83 e3 78 mr  r3,r28
 308:   48 00 00 01 bl  308 <._copy_from_user+0xa8>
308: R_PPC64_REL24  .__copy_tofrom_user
...

Signed-off-by: Christophe Leroy 
---
v2: no change
v3: no change
---
 arch/powerpc/include/asm/book3s/32/kup.h  | 13 ++--
 .../powerpc/include/asm/book3s/64/kup-radix.h | 11 +++
 arch/powerpc/include/asm/kup.h| 30 ++-
 arch/powerpc/include/asm/nohash/32/kup-8xx.h  |  4 +--
 arch/powerpc/include/asm/uaccess.h|  4 +--
 5 files changed, 43 insertions(+), 19 deletions(-)

diff --git a/arch/powerpc/include/asm/book3s/32/kup.h 
b/arch/powerpc/include/asm/book3s/32/kup.h
index d88008c8eb85..d765515bd1c1 100644
--- a/arch/powerpc/include/asm/book3s/32/kup.h
+++ b/arch/powerpc/include/asm/book3s/32/kup.h
@@ -102,11 +102,13 @@ static inline void kuap_update_sr(u32 sr, u32 addr, u32 
end)
isync();/* Context sync required after mtsrin() */
 }
 
-static inline void allow_user_access(void __user *to, const void __user *from, 
u32 size)
+static __always_inline void allow_user_access(void __user *to, const void 
__user *from,
+ u32 size, unsigned long dir)
 {
u32 addr, end;
 
-   if (__builtin_constant_p(to) && to == NULL)
+   BUILD_BUG_ON(!__builtin_constant_p(dir));
+   if (!(dir & KUAP_W))
return;
 
addr = (__force u32)to;
@@ -119,11 +121,16 @@ static inline void allow_user_access(void __user *to, 
const void __user *from, u
kuap_update_sr(mfsrin(addr) & ~SR_KS, addr, end);   /* Clear Ks */
 }
 
-static inline void prevent_user_access(void __user *to, const void __user 
*from, u32 size)
+static __always_inline void prevent_user_access(void __user *to, const void 
__user *from,
+   u32 size, unsigned long dir)
 {
u32 addr = (__force u32)to;
u32 end = min(addr + size, TASK_SIZE);
 
+   BUILD_BUG_ON(!__builtin_constant_p(dir));
+   if (!(dir & KUAP_W))
+   return;
+
if (!addr || addr >= TASK_SIZE || !size)
return;
 
diff --git a/arch/powerpc/include/asm/book3s/64/kup-radix.h 
b/arch/powerpc/include/asm/book3s/64/kup-radix.h
index dbbd22cb80f5..f11315306d41 100644
--- a/arch/powerpc/include/asm/book3s/64/kup-radix.h
+++ b/arch/powerpc/include/asm/book3s/64/kup-radix.h
@@ -77,20 +77,21 @@ static inline void set_kuap(unsigned long value)
isync();
 }
 
-static inline void allow_user_access(void __user *to, const void __user *from,
-unsigned long size)
+static __always_inline void allow_user_access(void __user *to, const void 
__user *from,
+ unsigned long size, unsigned long 
dir)
 {
// This is written so we can resolve to a single case at build time
-   if (__builtin_constant_p(to) && to == NULL)
+   BUILD_BUG_ON(!__builtin_constant_p(dir));
+   if (dir == KUAP_R)
set_kuap(AMR_KUAP_BLOCK_WRITE);
-   else if (__builtin_constant_p(from) && from == NULL)
+   else if (dir == KUAP_W)
set_kuap(AMR_KUAP_BLOCK_READ);
else
set_kuap(0);
 }
 
 static inline void prevent_user_access(void __user *to, const void __user 
*from,
-  unsigned long size)
+  

[PATCH v3 1/7] fs/readdir: Fix filldir() and filldir64() use of user_access_begin()

2020-01-23 Thread Christophe Leroy
Some architectures grant full access to userspace regardless of the
address/len passed to user_access_begin(), but other architectures
only grant access to the requested area.

For example, on 32 bits powerpc (book3s/32), access is granted by
segments of 256 Mbytes.

Modify filldir() and filldir64() to request the real area they need
to get access to, i.e. the area covering the parent dirent (if any)
and the contiguous current dirent.

Suggested-by: Linus Torvalds 
Fixes: 9f79b78ef744 ("Convert filldir[64]() from __put_user() to 
unsafe_put_user()")
Signed-off-by: Christophe Leroy 
---
v2: have user_access_begin() cover both parent dirent (if any) and current 
dirent
v3: replaced by patch from Linus
---
 fs/readdir.c | 70 +---
 1 file changed, 34 insertions(+), 36 deletions(-)

diff --git a/fs/readdir.c b/fs/readdir.c
index d26d5ea4de7b..4b466cbb0f3a 100644
--- a/fs/readdir.c
+++ b/fs/readdir.c
@@ -206,7 +206,7 @@ struct linux_dirent {
 struct getdents_callback {
struct dir_context ctx;
struct linux_dirent __user * current_dir;
-   struct linux_dirent __user * previous;
+   int prev_reclen;
int count;
int error;
 };
@@ -214,12 +214,13 @@ struct getdents_callback {
 static int filldir(struct dir_context *ctx, const char *name, int namlen,
   loff_t offset, u64 ino, unsigned int d_type)
 {
-   struct linux_dirent __user * dirent;
+   struct linux_dirent __user *dirent, *prev;
struct getdents_callback *buf =
container_of(ctx, struct getdents_callback, ctx);
unsigned long d_ino;
int reclen = ALIGN(offsetof(struct linux_dirent, d_name) + namlen + 2,
sizeof(long));
+   int prev_reclen;
 
buf->error = verify_dirent_name(name, namlen);
if (unlikely(buf->error))
@@ -232,28 +233,24 @@ static int filldir(struct dir_context *ctx, const char 
*name, int namlen,
buf->error = -EOVERFLOW;
return -EOVERFLOW;
}
-   dirent = buf->previous;
-   if (dirent && signal_pending(current))
+   prev_reclen = buf->prev_reclen;
+   if (prev_reclen && signal_pending(current))
return -EINTR;
-
-   /*
-* Note! This range-checks 'previous' (which may be NULL).
-* The real range was checked in getdents
-*/
-   if (!user_access_begin(dirent, sizeof(*dirent)))
-   goto efault;
-   if (dirent)
-   unsafe_put_user(offset, >d_off, efault_end);
dirent = buf->current_dir;
+   prev = (void __user *)dirent - prev_reclen;
+   if (!user_access_begin(prev, reclen + prev_reclen))
+   goto efault;
+
+   /* This might be 'dirent->d_off', but if so it will get overwritten */
+   unsafe_put_user(offset, >d_off, efault_end);
unsafe_put_user(d_ino, >d_ino, efault_end);
unsafe_put_user(reclen, >d_reclen, efault_end);
unsafe_put_user(d_type, (char __user *) dirent + reclen - 1, 
efault_end);
unsafe_copy_dirent_name(dirent->d_name, name, namlen, efault_end);
user_access_end();
 
-   buf->previous = dirent;
-   dirent = (void __user *)dirent + reclen;
-   buf->current_dir = dirent;
+   buf->current_dir = (void __user *)dirent + reclen;
+   buf->prev_reclen = reclen;
buf->count -= reclen;
return 0;
 efault_end:
@@ -267,7 +264,6 @@ SYSCALL_DEFINE3(getdents, unsigned int, fd,
struct linux_dirent __user *, dirent, unsigned int, count)
 {
struct fd f;
-   struct linux_dirent __user * lastdirent;
struct getdents_callback buf = {
.ctx.actor = filldir,
.count = count,
@@ -285,8 +281,10 @@ SYSCALL_DEFINE3(getdents, unsigned int, fd,
error = iterate_dir(f.file, );
if (error >= 0)
error = buf.error;
-   lastdirent = buf.previous;
-   if (lastdirent) {
+   if (buf.prev_reclen) {
+   struct linux_dirent __user *lastdirent;
+   lastdirent = (void __user *)buf.current_dir - buf.prev_reclen;
+
if (put_user(buf.ctx.pos, >d_off))
error = -EFAULT;
else
@@ -299,7 +297,7 @@ SYSCALL_DEFINE3(getdents, unsigned int, fd,
 struct getdents_callback64 {
struct dir_context ctx;
struct linux_dirent64 __user * current_dir;
-   struct linux_dirent64 __user * previous;
+   int prev_reclen;
int count;
int error;
 };
@@ -307,11 +305,12 @@ struct getdents_callback64 {
 static int filldir64(struct dir_context *ctx, const char *name, int namlen,
 loff_t offset, u64 ino, unsigned int d_type)
 {
-   struct linux_dirent64 __user *dirent;
+   struct linux_dirent64 __user *dirent, *prev;
struct getdents_callback64 *buf =
container_of(ctx, struct getdents_callback64, ctx);
int reclen = 

[PATCH v3 5/7] powerpc/32s: Drop NULL addr verification

2020-01-23 Thread Christophe Leroy
NULL addr is a user address. Don't waste time checking it. If
someone tries to access it, it will SIGFAULT the same way as for
address 1, so no need to make it special.

The special case is when not doing a write, in that case we want
to drop the entire function. This is now handled by 'dir' param
and not by the nulity of 'to' anymore.

Also make beginning of prevent_user_access() similar
to beginning of allow_user_access(), and tell the compiler
that writing in kernel space or with a 0 length is unlikely

Signed-off-by: Christophe Leroy 
---
v2: no change
v3: no change
---
 arch/powerpc/include/asm/book3s/32/kup.h | 10 ++
 1 file changed, 6 insertions(+), 4 deletions(-)

diff --git a/arch/powerpc/include/asm/book3s/32/kup.h 
b/arch/powerpc/include/asm/book3s/32/kup.h
index d765515bd1c1..3c1798e56b55 100644
--- a/arch/powerpc/include/asm/book3s/32/kup.h
+++ b/arch/powerpc/include/asm/book3s/32/kup.h
@@ -113,7 +113,7 @@ static __always_inline void allow_user_access(void __user 
*to, const void __user
 
addr = (__force u32)to;
 
-   if (!addr || addr >= TASK_SIZE || !size)
+   if (unlikely(addr >= TASK_SIZE || !size))
return;
 
end = min(addr + size, TASK_SIZE);
@@ -124,16 +124,18 @@ static __always_inline void allow_user_access(void __user 
*to, const void __user
 static __always_inline void prevent_user_access(void __user *to, const void 
__user *from,
u32 size, unsigned long dir)
 {
-   u32 addr = (__force u32)to;
-   u32 end = min(addr + size, TASK_SIZE);
+   u32 addr, end;
 
BUILD_BUG_ON(!__builtin_constant_p(dir));
if (!(dir & KUAP_W))
return;
 
-   if (!addr || addr >= TASK_SIZE || !size)
+   addr = (__force u32)to;
+
+   if (unlikely(addr >= TASK_SIZE || !size))
return;
 
+   end = min(addr + size, TASK_SIZE);
current->thread.kuap = 0;
kuap_update_sr(mfsrin(addr) | SR_KS, addr, end);/* set Ks */
 }
-- 
2.25.0



Re: [PATCH v2 1/6] fs/readdir: Fix filldir() and filldir64() use of user_access_begin()

2020-01-23 Thread Christophe Leroy




Le 23/01/2020 à 13:00, Michael Ellerman a écrit :

Michael Ellerman  writes:

Hi Christophe,

This patch is independent of the rest of the series AFAICS


And of course having hit send I immediately realise that's not true.


Without this, book3s/32 fails booting. (And without patch 2, it even 
hangs, looping forever in do_page_fault()).






So I'll take patches 2-6 via powerpc and assume this patch will go via
Linus or Al or elsewhere.


So I guess I'll wait and see what happens with patch 1.


We could eventually opt out user_access_begin() for 
CONFIG_PPC_BOOK3S_32, then you could take patches 3 and 6. That's enough 
to have user_access_begin() and stuff for 8xx and RADIX.


Patch 2 should be taken as well as a fix, and can be kept independant of 
the series (once we have patch 1, we normally don't hit the problem 
fixed by patch 2).


Won't don't need patch 4 until we want user_access_begin() supported by 
book3s/32



However, I'm about to send out a v3 with a different approach. It 
modifies the core part where user_access_begin() is returning an opaque 
value used by user_access_end(). And it also tells user_access_begin() 
whether it's a read or a write, so that we can limit unlocking to write 
acccesses on book3s/32, and fine grain rights on book3s/64.


Maybe you would prefer this change on top of first step, in which case 
I'll be able to make a v4 rebasing all this on top of patch 3 and 6 of 
v3 series. Tell me what you prefer.


Christophe


Re: [PATCH v2 6/6] powerpc: Implement user_access_begin and friends

2020-01-23 Thread Michael Ellerman
Michael Ellerman  writes:
> Christophe Leroy  writes:
>> Today, when a function like strncpy_from_user() is called,
>> the userspace access protection is de-activated and re-activated
>> for every word read.
>>
>> By implementing user_access_begin and friends, the protection
>> is de-activated at the beginning of the copy and re-activated at the
>> end.
>>
>> Implement user_access_begin(), user_access_end() and
>> unsafe_get_user(), unsafe_put_user() and unsafe_copy_to_user()
>>
>> For the time being, we keep user_access_save() and
>> user_access_restore() as nops.
>
> That means we will run with user access enabled in a few more places, but
> it's only used sparingly AFAICS:
>
>   kernel/trace/trace_branch.c:unsigned long flags = user_access_save();
>   lib/ubsan.c:unsigned long flags = user_access_save();
>   lib/ubsan.c:unsigned long ua_flags = user_access_save();
>   mm/kasan/common.c:  unsigned long flags = user_access_save();
>
> And we don't have objtool checking that user access enablement isn't
> leaking in the first place, so I guess it's OK for us not to implement
> these to begin with?

It looks like we can implement them on on all three KUAP
implementations.

For radix and 8xx we just return/set the relevant SPR.

For book3s/32/kup.h I think we'd just need to add a KUAP_CURRENT case to
allow_user_access()?

cheers


Re: [PATCH v2 6/6] powerpc: Implement user_access_begin and friends

2020-01-23 Thread Michael Ellerman
Christophe Leroy  writes:
> Today, when a function like strncpy_from_user() is called,
> the userspace access protection is de-activated and re-activated
> for every word read.
>
> By implementing user_access_begin and friends, the protection
> is de-activated at the beginning of the copy and re-activated at the
> end.
>
> Implement user_access_begin(), user_access_end() and
> unsafe_get_user(), unsafe_put_user() and unsafe_copy_to_user()
>
> For the time being, we keep user_access_save() and
> user_access_restore() as nops.

That means we will run with user access enabled in a few more places, but
it's only used sparingly AFAICS:

  kernel/trace/trace_branch.c:unsigned long flags = user_access_save();
  lib/ubsan.c:unsigned long flags = user_access_save();
  lib/ubsan.c:unsigned long ua_flags = user_access_save();
  mm/kasan/common.c:  unsigned long flags = user_access_save();

And we don't have objtool checking that user access enablement isn't
leaking in the first place, so I guess it's OK for us not to implement
these to begin with?

cheers


> diff --git a/arch/powerpc/include/asm/uaccess.h 
> b/arch/powerpc/include/asm/uaccess.h
> index cafad1960e76..ea67bbd56bd4 100644
> --- a/arch/powerpc/include/asm/uaccess.h
> +++ b/arch/powerpc/include/asm/uaccess.h
> @@ -91,9 +91,14 @@ static inline int __access_ok(unsigned long addr, unsigned 
> long size,
>   __put_user_check((__typeof__(*(ptr)))(x), (ptr), sizeof(*(ptr)))
>  
>  #define __get_user(x, ptr) \
> - __get_user_nocheck((x), (ptr), sizeof(*(ptr)))
> + __get_user_nocheck((x), (ptr), sizeof(*(ptr)), true)
>  #define __put_user(x, ptr) \
> - __put_user_nocheck((__typeof__(*(ptr)))(x), (ptr), sizeof(*(ptr)))
> + __put_user_nocheck((__typeof__(*(ptr)))(x), (ptr), sizeof(*(ptr)), true)
> +
> +#define __get_user_allowed(x, ptr) \
> + __get_user_nocheck((x), (ptr), sizeof(*(ptr)), false)
> +#define __put_user_allowed(x, ptr) \
> + __put_user_nocheck((__typeof__(*(ptr)))(x), (ptr), sizeof(*(ptr)), 
> false)
>  
>  #define __get_user_inatomic(x, ptr) \
>   __get_user_nosleep((x), (ptr), sizeof(*(ptr)))
> @@ -138,10 +143,9 @@ extern long __put_user_bad(void);
>   : "r" (x), "b" (addr), "i" (-EFAULT), "0" (err))
>  #endif /* __powerpc64__ */
>  
> -#define __put_user_size(x, ptr, size, retval)\
> +#define __put_user_size_allowed(x, ptr, size, retval)\
>  do { \
>   retval = 0; \
> - allow_write_to_user(ptr, size); \
>   switch (size) { \
> case 1: __put_user_asm(x, ptr, retval, "stb"); break; \
> case 2: __put_user_asm(x, ptr, retval, "sth"); break; \
> @@ -149,17 +153,26 @@ do {
> \
> case 8: __put_user_asm2(x, ptr, retval); break;   \
> default: __put_user_bad();\
>   }   \
> +} while (0)
> +
> +#define __put_user_size(x, ptr, size, retval)\
> +do { \
> + allow_write_to_user(ptr, size); \
> + __put_user_size_allowed(x, ptr, size, retval);  \
>   prevent_write_to_user(ptr, size);   \
>  } while (0)
>  
> -#define __put_user_nocheck(x, ptr, size) \
> +#define __put_user_nocheck(x, ptr, size, allow)  \
>  ({   \
>   long __pu_err;  \
>   __typeof__(*(ptr)) __user *__pu_addr = (ptr);   \
>   if (!is_kernel_addr((unsigned long)__pu_addr))  \
>   might_fault();  \
>   __chk_user_ptr(ptr);\
> - __put_user_size((x), __pu_addr, (size), __pu_err);  \
> + if (allow)  
> \
> + __put_user_size((x), __pu_addr, (size), __pu_err);  
> \
> + else
> \
> + __put_user_size_allowed((x), __pu_addr, (size), __pu_err);  
> \
>   __pu_err;   \
>  })
>  
> @@ -236,13 +249,12 @@ extern long __get_user_bad(void);
>   : "b" (addr), "i" (-EFAULT), "0" (err))
>  #endif /* __powerpc64__ */
>  
> -#define __get_user_size(x, ptr, size, retval)\
> +#define __get_user_size_allowed(x, ptr, size, retval)\
>  do { \
>   retval = 0; \
>   __chk_user_ptr(ptr);   

Re: [PATCH v2 1/6] fs/readdir: Fix filldir() and filldir64() use of user_access_begin()

2020-01-23 Thread Michael Ellerman
Michael Ellerman  writes:
> Hi Christophe,
>
> This patch is independent of the rest of the series AFAICS

And of course having hit send I immediately realise that's not true.

> So I'll take patches 2-6 via powerpc and assume this patch will go via
> Linus or Al or elsewhere.

So I guess I'll wait and see what happens with patch 1.

cheers


Re: [PATCH v2 1/6] fs/readdir: Fix filldir() and filldir64() use of user_access_begin()

2020-01-23 Thread Michael Ellerman
Hi Christophe,

This patch is independent of the rest of the series AFAICS, and it looks
like Linus has modified it quite a bit down thread.

So I'll take patches 2-6 via powerpc and assume this patch will go via
Linus or Al or elsewhere.

Also a couple of minor spelling fixes below.

cheers

Christophe Leroy  writes:
> Some architectures grand full access to userspace regardless of the
 ^
 grant
> address/len passed to user_access_begin(), but other architectures
> only grand access to the requested area.
   ^
   grant
>
> For exemple, on 32 bits powerpc (book3s/32), access is granted by
  ^
  example
> segments of 256 Mbytes.
>
> Modify filldir() and filldir64() to request the real area they need
> to get access to, i.e. the area covering the parent dirent (if any)
> and the contiguous current dirent.
>
> Fixes: 9f79b78ef744 ("Convert filldir[64]() from __put_user() to 
> unsafe_put_user()")
> Signed-off-by: Christophe Leroy 
> ---
> v2: have user_access_begin() cover both parent dirent (if any) and current 
> dirent
> ---
>  fs/readdir.c | 50 --
>  1 file changed, 28 insertions(+), 22 deletions(-)
>
> diff --git a/fs/readdir.c b/fs/readdir.c
> index d26d5ea4de7b..3f9b4488d9b7 100644
> --- a/fs/readdir.c
> +++ b/fs/readdir.c
> @@ -214,7 +214,7 @@ struct getdents_callback {
>  static int filldir(struct dir_context *ctx, const char *name, int namlen,
>  loff_t offset, u64 ino, unsigned int d_type)
>  {
> - struct linux_dirent __user * dirent;
> + struct linux_dirent __user * dirent, *dirent0;
>   struct getdents_callback *buf =
>   container_of(ctx, struct getdents_callback, ctx);
>   unsigned long d_ino;
> @@ -232,19 +232,22 @@ static int filldir(struct dir_context *ctx, const char 
> *name, int namlen,
>   buf->error = -EOVERFLOW;
>   return -EOVERFLOW;
>   }
> - dirent = buf->previous;
> - if (dirent && signal_pending(current))
> + dirent0 = buf->previous;
> + if (dirent0 && signal_pending(current))
>   return -EINTR;
>  
> - /*
> -  * Note! This range-checks 'previous' (which may be NULL).
> -  * The real range was checked in getdents
> -  */
> - if (!user_access_begin(dirent, sizeof(*dirent)))
> - goto efault;
> - if (dirent)
> - unsafe_put_user(offset, >d_off, efault_end);
>   dirent = buf->current_dir;
> + if (dirent0) {
> + int sz = (void __user *)dirent + reclen -
> +  (void __user *)dirent0;
> +
> + if (!user_access_begin(dirent0, sz))
> + goto efault;
> + unsafe_put_user(offset, >d_off, efault_end);
> + } else {
> + if (!user_access_begin(dirent, reclen))
> + goto efault;
> + }
>   unsafe_put_user(d_ino, >d_ino, efault_end);
>   unsafe_put_user(reclen, >d_reclen, efault_end);
>   unsafe_put_user(d_type, (char __user *) dirent + reclen - 1, 
> efault_end);
> @@ -307,7 +310,7 @@ struct getdents_callback64 {
>  static int filldir64(struct dir_context *ctx, const char *name, int namlen,
>loff_t offset, u64 ino, unsigned int d_type)
>  {
> - struct linux_dirent64 __user *dirent;
> + struct linux_dirent64 __user *dirent, *dirent0;
>   struct getdents_callback64 *buf =
>   container_of(ctx, struct getdents_callback64, ctx);
>   int reclen = ALIGN(offsetof(struct linux_dirent64, d_name) + namlen + 1,
> @@ -319,19 +322,22 @@ static int filldir64(struct dir_context *ctx, const 
> char *name, int namlen,
>   buf->error = -EINVAL;   /* only used if we fail.. */
>   if (reclen > buf->count)
>   return -EINVAL;
> - dirent = buf->previous;
> - if (dirent && signal_pending(current))
> + dirent0 = buf->previous;
> + if (dirent0 && signal_pending(current))
>   return -EINTR;
>  
> - /*
> -  * Note! This range-checks 'previous' (which may be NULL).
> -  * The real range was checked in getdents
> -  */
> - if (!user_access_begin(dirent, sizeof(*dirent)))
> - goto efault;
> - if (dirent)
> - unsafe_put_user(offset, >d_off, efault_end);
>   dirent = buf->current_dir;
> + if (dirent0) {
> + int sz = (void __user *)dirent + reclen -
> +  (void __user *)dirent0;
> +
> + if (!user_access_begin(dirent0, sz))
> + goto efault;
> + unsafe_put_user(offset, >d_off, efault_end);
> + } else {
> + if (!user_access_begin(dirent, reclen))
> + goto efault;
> + }
>   unsafe_put_user(ino, >d_ino, efault_end);
>   unsafe_put_user(reclen, >d_reclen, efault_end);
>   unsafe_put_user(d_type, >d_type, efault_end);
> -- 
> 2.25.0


[PATCH] powerpc/fsl_booke: avoid creating duplicate tlb1 entry

2020-01-23 Thread Laurentiu Tudor
In the current implementation, the call to loadcam_multi() is wrapped
between switch_to_as1() and restore_to_as0() calls so, when it tries
to create its own temporary AS=1 TLB1 entry, it ends up duplicating the
existing one created by switch_to_as1(). Add a check to skip creating
the temporary entry if already running in AS=1.

Fixes: d9e1831a4202 ("powerpc/85xx: Load all early TLB entries at once")
Signed-off-by: Laurentiu Tudor 
Cc: sta...@vger.kernel.org
---
 arch/powerpc/mm/nohash/tlb_low.S | 12 +++-
 1 file changed, 11 insertions(+), 1 deletion(-)

diff --git a/arch/powerpc/mm/nohash/tlb_low.S b/arch/powerpc/mm/nohash/tlb_low.S
index 2ca407cedbe7..eaeee402f96e 100644
--- a/arch/powerpc/mm/nohash/tlb_low.S
+++ b/arch/powerpc/mm/nohash/tlb_low.S
@@ -397,7 +397,7 @@ _GLOBAL(set_context)
  * extern void loadcam_entry(unsigned int index)
  *
  * Load TLBCAM[index] entry in to the L2 CAM MMU
- * Must preserve r7, r8, r9, and r10
+ * Must preserve r7, r8, r9, r10 and r11
  */
 _GLOBAL(loadcam_entry)
mflrr5
@@ -433,6 +433,10 @@ END_MMU_FTR_SECTION_IFSET(MMU_FTR_BIG_PHYS)
  */
 _GLOBAL(loadcam_multi)
mflrr8
+   /* Don't switch to AS=1 if already there */
+   mfmsr   r11
+   andi.   r11,r11,MSR_IS
+   bne 10f
 
/*
 * Set up temporary TLB entry that is the same as what we're
@@ -458,6 +462,7 @@ _GLOBAL(loadcam_multi)
mtmsr   r6
isync
 
+10:
mr  r9,r3
add r10,r3,r4
 2: bl  loadcam_entry
@@ -466,6 +471,10 @@ _GLOBAL(loadcam_multi)
mr  r3,r9
blt 2b
 
+   /* Don't return to AS=0 if we were in AS=1 at function start */
+   andi.   r11,r11,MSR_IS
+   bne 3f
+
/* Return to AS=0 and clear the temporary entry */
mfmsr   r6
rlwinm. r6,r6,0,~(MSR_IS|MSR_DS)
@@ -481,6 +490,7 @@ _GLOBAL(loadcam_multi)
tlbwe
isync
 
+3:
mtlrr8
blr
 #endif
-- 
2.17.1



Re: [PATCH v2 5/6] powerpc/32s: prepare prevent_user_access() for user_access_end()

2020-01-23 Thread Michael Ellerman
Christophe Leroy  writes:
> In preparation of implementing user_access_begin and friends
> on powerpc, the book3s/32 version of prevent_user_access() need
> to be prepared for user_access_end().
>
> user_access_end() doesn't provide the address and size which
> were passed to user_access_begin(), required by prevent_user_access()
> to know which segment to modify.
>
> The list of segments which where unprotected by allow_user_access()
> are available in current->kuap. But we don't want prevent_user_access()
> to read this all the time, especially everytime it is 0 (for instance
> because the access was not a write access).
>
> Implement a special direction case named KUAP_SELF. In this case only,
> the addr and end are retrieved from current->kuap.

Can we call it KUAP_CURRENT?

ie. "use the KUAP state in current"

cheers


[PATCH v2] powerpc/mm/hash: Fix sharing context ids between kernel & userspace

2020-01-23 Thread Michael Ellerman
From: "Aneesh Kumar K.V" 

Commit 0034d395f89d ("powerpc/mm/hash64: Map all the kernel regions in
the same 0xc range") has a bug in the definition of MIN_USER_CONTEXT.

The result is that the context id used for the vmemmap and the lowest
context id handed out to userspace are the same. The context id is
essentially the process identifier as far as the first stage of the
MMU translation is concerned.

This can result in multiple SLB entries with the same VSID (Virtual
Segment ID), accessible to the kernel and some random userspace
process that happens to get the overlapping id, which is not expected
eg:

  07 c00c0800 40066bdea7000500  1T  ESID=   c00c00  VSID=  66bdea7 
LLP:100
  12 00020800 40066bdea7000d80  1T  ESID=  200  VSID=  66bdea7 
LLP:100

Even though the user process and the kernel use the same VSID, the
permissions in the hash page table prevent the user process from
reading or writing to any kernel mappings.

It can also lead to SLB entries with different base page size
encodings (LLP), eg:

  05 c00c0800 6bde0053b500 256M ESID=c00c0  VSID=6bde0053b 
LLP:100
  09 0800 6bde0053bc80 256M ESID=0  VSID=6bde0053b 
LLP:  0

Such SLB entries can result in machine checks, eg. as seen on a G5:

  Oops: Machine check, sig: 7 [#1]
  BE PAGE SIZE=64K MU-Hash SMP NR_CPUS=4 NUMA Power Mac
  NIP: c026f248 LR: c0295e58 CTR: 
  REGS: c000erfd3d70 TRAP: 0200 Tainted: G M 
(5.5.0-rcl-gcc-8.2.0-00010-g228b667d8ea1)
  MSR: 90109032  CR: 24282048 XER: 
  DAR: c00c00612c80 DSISR: 0400 IRQMASK: 0
  ...
  NIP [c026f248] .kmem_cache_free+0x58/0x140
  LR  [c08808295e58] .putname 8x88/0xa
  Call Trace:
.putname+0xB8/0xa
.filename_lookup.part.76+0xbe/0x160
.do_faccessat+0xe0/0x380
system_call+0x5c/ex68

This happens with 256MB segments and 64K pages, as the duplicate VSID
is hit with the first vmemmap segment and the first user segment, and
older 32-bit userspace maps things in the first user segment.

On other CPUs a machine check is not seen. Instead the userspace
process can get stuck continuously faulting, with the fault never
properly serviced, due to the kernel not understanding that there is
already a HPTE for the address but with inaccessible permissions.

On machines with 1T segments we've not seen the bug hit other than by
deliberately exercising it. That seems to be just a matter of luck
though, due to the typical layout of the user virtual address space
and the ranges of vmemmap that are typically populated.

To fix it we add 2 to MIN_USER_CONTEXT. This ensures the lowest
context given to userspace doesn't overlap with the VMEMMAP context,
or with the context for INVALID_REGION_ID.

Fixes: 0034d395f89d ("powerpc/mm/hash64: Map all the kernel regions in the same 
0xc range")
Cc: sta...@vger.kernel.org # v5.2+
Reported-by: Christian Marillat 
Reported-by: Romain Dolbeau 
Signed-off-by: Aneesh Kumar K.V 
[mpe: Account for INVALID_REGION_ID, mostly rewrite change log]
Signed-off-by: Michael Ellerman 
---
 arch/powerpc/include/asm/book3s/64/mmu-hash.h | 5 -
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/arch/powerpc/include/asm/book3s/64/mmu-hash.h 
b/arch/powerpc/include/asm/book3s/64/mmu-hash.h
index 15b75005bc34..3fa1b962dc27 100644
--- a/arch/powerpc/include/asm/book3s/64/mmu-hash.h
+++ b/arch/powerpc/include/asm/book3s/64/mmu-hash.h
@@ -600,8 +600,11 @@ extern void slb_set_size(u16 size);
  *
  */
 #define MAX_USER_CONTEXT   ((ASM_CONST(1) << CONTEXT_BITS) - 2)
+
+// The + 2 accounts for INVALID_REGION and 1 more to avoid overlap with kernel
 #define MIN_USER_CONTEXT   (MAX_KERNEL_CTX_CNT + MAX_VMALLOC_CTX_CNT + \
-MAX_IO_CTX_CNT + MAX_VMEMMAP_CTX_CNT)
+MAX_IO_CTX_CNT + MAX_VMEMMAP_CTX_CNT + 2)
+
 /*
  * For platforms that support on 65bit VA we limit the context bits
  */
-- 
2.21.1



Re: [PATCH v2 net-next] net: convert suitable drivers to use phy_do_ioctl_running

2020-01-23 Thread David Miller
From: Heiner Kallweit 
Date: Tue, 21 Jan 2020 22:09:33 +0100

> Convert suitable drivers to use new helper phy_do_ioctl_running.
> 
> Signed-off-by: Heiner Kallweit 
> ---
> v2: I forgot the netdev mailing list

Applied to net-next.


Re: [PATCH kernel RFC 0/4] powerpc/powenv/ioda: Allow huge DMA window at 4GB

2020-01-23 Thread Alexey Kardashevskiy



On 23/01/2020 12:17, David Gibson wrote:
> On Thu, Jan 23, 2020 at 11:53:32AM +1100, Alexey Kardashevskiy wrote:
>> Anyone, ping?
> 
> Sorry, I've totally lost track of this one.  I think you'll need to
> repost.



It has not changed and still applies, and the question is more about how
we proceed with this feature that the patches themselves. Or it is just
not in your mailbox anymore so you cannot reply? :)



> 
> 
>>
>>
>> On 10/01/2020 15:18, Alexey Kardashevskiy wrote:
>>>
>>>
>>> On 02/12/2019 12:59, Alexey Kardashevskiy wrote:
 Here is an attempt to support bigger DMA space for devices
 supporting DMA masks less than 59 bits (GPUs come into mind
 first). POWER9 PHBs have an option to map 2 windows at 0
 and select a windows based on DMA address being below or above
 4GB.

 This adds the "iommu=iommu_bypass" kernel parameter and
 supports VFIO+pseries machine - current this requires telling
 upstream+unmodified QEMU about this via
 -global spapr-pci-host-bridge.dma64_win_addr=0x1
 or per-phb property. 4/4 advertises the new option but
 there is no automation around it in QEMU (should it be?).

 For now it is either 1<<59 or 4GB mode; dynamic switching is
 not supported (could be via sysfs).

 This is based on sha1
 a6ed68d6468b Linus Torvalds "Merge tag 'drm-next-2019-11-27' of 
 git://anongit.freedesktop.org/drm/drm".

 Please comment. Thanks.
>>>
>>>
>>> David, Alistair, ping? Thanks,
>>
>>
>>>
>>>



 Alexey Kardashevskiy (4):
   powerpc/powernv/ioda: Rework for huge DMA window at 4GB
   powerpc/powernv/ioda: Allow smaller TCE table levels
   powerpc/powernv/phb4: Add 4GB IOMMU bypass mode
   vfio/spapr_tce: Advertise and allow a huge DMA windows at 4GB

  arch/powerpc/include/asm/iommu.h  |   1 +
  arch/powerpc/include/asm/opal-api.h   |  11 +-
  arch/powerpc/include/asm/opal.h   |   2 +
  arch/powerpc/platforms/powernv/pci.h  |   1 +
  include/uapi/linux/vfio.h |   2 +
  arch/powerpc/platforms/powernv/opal-call.c|   2 +
  arch/powerpc/platforms/powernv/pci-ioda-tce.c |   4 +-
  arch/powerpc/platforms/powernv/pci-ioda.c | 219 ++
  drivers/vfio/vfio_iommu_spapr_tce.c   |  10 +-
  9 files changed, 202 insertions(+), 50 deletions(-)

>>>
>>
> 

-- 
Alexey


[PATCH] lib: Reduce user_access_begin() boundaries in strncpy_from_user() and strnlen_user()

2020-01-23 Thread Christophe Leroy
The range passed to user_access_begin() by strncpy_from_user() and
strnlen_user() starts at 'src' and goes up to the limit of userspace
allthough reads will be limited by the 'count' param.

On 32 bits powerpc (book3s/32) access has to be granted for each 256Mbytes
segment and the cost increases with the number of segments to unlock.

Limit the range with 'count' param.

Fixes: 594cc251fdd0 ("make 'user_access_begin()' do 'access_ok()'")
Signed-off-by: Christophe Leroy 
---
 lib/strncpy_from_user.c | 14 +++---
 lib/strnlen_user.c  | 14 +++---
 2 files changed, 14 insertions(+), 14 deletions(-)

diff --git a/lib/strncpy_from_user.c b/lib/strncpy_from_user.c
index dccb95af6003..706020b06617 100644
--- a/lib/strncpy_from_user.c
+++ b/lib/strncpy_from_user.c
@@ -30,13 +30,6 @@ static inline long do_strncpy_from_user(char *dst, const 
char __user *src,
const struct word_at_a_time constants = WORD_AT_A_TIME_CONSTANTS;
unsigned long res = 0;
 
-   /*
-* Truncate 'max' to the user-specified limit, so that
-* we only have one limit we need to check in the loop
-*/
-   if (max > count)
-   max = count;
-
if (IS_UNALIGNED(src, dst))
goto byte_at_a_time;
 
@@ -114,6 +107,13 @@ long strncpy_from_user(char *dst, const char __user *src, 
long count)
unsigned long max = max_addr - src_addr;
long retval;
 
+   /*
+* Truncate 'max' to the user-specified limit, so that
+* we only have one limit we need to check in the loop
+*/
+   if (max > count)
+   max = count;
+
kasan_check_write(dst, count);
check_object_size(dst, count, false);
if (user_access_begin(src, max)) {
diff --git a/lib/strnlen_user.c b/lib/strnlen_user.c
index 6c0005d5dd5c..41670d4a5816 100644
--- a/lib/strnlen_user.c
+++ b/lib/strnlen_user.c
@@ -26,13 +26,6 @@ static inline long do_strnlen_user(const char __user *src, 
unsigned long count,
unsigned long align, res = 0;
unsigned long c;
 
-   /*
-* Truncate 'max' to the user-specified limit, so that
-* we only have one limit we need to check in the loop
-*/
-   if (max > count)
-   max = count;
-
/*
 * Do everything aligned. But that means that we
 * need to also expand the maximum..
@@ -109,6 +102,13 @@ long strnlen_user(const char __user *str, long count)
unsigned long max = max_addr - src_addr;
long retval;
 
+   /*
+* Truncate 'max' to the user-specified limit, so that
+* we only have one limit we need to check in the loop
+*/
+   if (max > count)
+   max = count;
+
if (user_access_begin(str, max)) {
retval = do_strnlen_user(str, count, max);
user_access_end();
-- 
2.25.0