Re: [PATCH v1 2/3] powerpc/code-patching: Use dedicated memory routines for patching

2024-03-17 Thread Benjamin Gray
On Fri, 2024-03-15 at 06:36 +, Christophe Leroy wrote:
> 
> 
> Le 15/03/2024 à 03:57, Benjamin Gray a écrit :
> > The patching page set up as a writable alias may be in quadrant 1
> > (userspace) if the temporary mm path is used. This causes sanitiser
> > failures if so. Sanitiser failures also occur on the non-mm path
> > because the plain memset family is instrumented, and KASAN treats
> > the
> > patching window as poisoned.
> > 
> > Introduce locally defined patch_* variants of memset that perform
> > an
> > uninstrumented lower level set, as well as detecting write errors
> > like
> > the original single patch variant does.
> > 
> > copy_to_user() is not correct here, as the PTE makes it a proper
> > kernel
> > page (the EEA is privileged access only, RW). It just happens to be
> > in
> > quadrant 1 because that's the hardware's mechanism for using the
> > current
> > PID vs PID 0 in translations. Importantly, it's incorrect to allow
> > user
> > page accesses.
> > 
> > Now that the patching memsets are used, we also propagate a failure
> > up
> > to the caller as the single patch variant does.
> > 
> > Signed-off-by: Benjamin Gray 
> > 
> > ---
> > 
> > The patch_memcpy() can be optimised to 4 bytes at a time assuming
> > the
> > same requirements as regular instruction patching are being
> > followed
> > for the 'copy sequence of instructions' mode (i.e., they actually
> > are
> > instructions following instruction alignment rules).
> 
> Why not use copy_to_kernel_nofault() ?

I had not come across copy_to_kernel_nofault(). It looks like the
optimised memcpy() I wanted, so thanks.

> 
> 
> > ---
> >   arch/powerpc/lib/code-patching.c | 42
> > +---
> >   1 file changed, 38 insertions(+), 4 deletions(-)
> > 
> > diff --git a/arch/powerpc/lib/code-patching.c
> > b/arch/powerpc/lib/code-patching.c
> > index c6ab46156cda..c6633759b509 100644
> > --- a/arch/powerpc/lib/code-patching.c
> > +++ b/arch/powerpc/lib/code-patching.c
> > @@ -372,9 +372,43 @@ int patch_instruction(u32 *addr, ppc_inst_t
> > instr)
> >   }
> >   NOKPROBE_SYMBOL(patch_instruction);
> >   
> > +static int patch_memset64(u64 *addr, u64 val, size_t count)
> > +{
> > +   for (u64 *end = addr + count; addr < end; addr++)
> > +   __put_kernel_nofault(addr, , u64, failed);
> > +
> > +   return 0;
> > +
> > +failed:
> > +   return -EPERM;
> 
> Is it correct ? Shouldn't it be -EFAULT ?

The single instruction patch returns EPERM, which was set this way to
align with ftrace's expectations. I think it's best to keep the
single/multi patching variants consistent with each other where
possible.

> 
> > +}
> > +
> > +static int patch_memset32(u32 *addr, u32 val, size_t count)
> > +{
> > +   for (u32 *end = addr + count; addr < end; addr++)
> > +   __put_kernel_nofault(addr, , u32, failed);
> > +
> > +   return 0;
> > +
> > +failed:
> > +   return -EPERM;
> > +}
> > +
> > +static int patch_memcpy(void *dst, void *src, size_t len)
> > +{
> > +   for (void *end = src + len; src < end; dst++, src++)
> > +   __put_kernel_nofault(dst, src, u8, failed);
> > +
> > +   return 0;
> > +
> > +failed:
> > +   return -EPERM;
> > +}
> > +
> >   static int __patch_instructions(u32 *patch_addr, u32 *code,
> > size_t len, bool repeat_instr)
> >   {
> >     unsigned long start = (unsigned long)patch_addr;
> > +   int err;
> >   
> >     /* Repeat instruction */
> >     if (repeat_instr) {
> > @@ -383,19 +417,19 @@ static int __patch_instructions(u32
> > *patch_addr, u32 *code, size_t len, bool rep
> >     if (ppc_inst_prefixed(instr)) {
> >     u64 val = ppc_inst_as_ulong(instr);
> >   
> > -   memset64((u64 *)patch_addr, val, len / 8);
> > +   err = patch_memset64((u64 *)patch_addr,
> > val, len / 8);
> >     } else {
> >     u32 val = ppc_inst_val(instr);
> >   
> > -   memset32(patch_addr, val, len / 4);
> > +   err = patch_memset32(patch_addr, val, len
> > / 4);
> >     }
> >     } else {
> > -   memcpy(patch_addr, code, len);
> > +   err = patch_memcpy(patch_addr, code, len);
> 
> Use copy_to_kernel_nofault() instead of open coding a new less
> optimised 
> version of it.
> 
> >     }
> >   
> >     smp_wmb();  /* smp write barrier */
> >     flush_icache_range(start, start + len);
> > -   return 0;
> > +   return err;
> >   }
> >   
> >   /*



Re: [PATCH v1 2/3] powerpc/code-patching: Use dedicated memory routines for patching

2024-03-15 Thread Christophe Leroy


Le 15/03/2024 à 03:57, Benjamin Gray a écrit :
> The patching page set up as a writable alias may be in quadrant 1
> (userspace) if the temporary mm path is used. This causes sanitiser
> failures if so. Sanitiser failures also occur on the non-mm path
> because the plain memset family is instrumented, and KASAN treats the
> patching window as poisoned.
> 
> Introduce locally defined patch_* variants of memset that perform an
> uninstrumented lower level set, as well as detecting write errors like
> the original single patch variant does.
> 
> copy_to_user() is not correct here, as the PTE makes it a proper kernel
> page (the EEA is privileged access only, RW). It just happens to be in
> quadrant 1 because that's the hardware's mechanism for using the current
> PID vs PID 0 in translations. Importantly, it's incorrect to allow user
> page accesses.
> 
> Now that the patching memsets are used, we also propagate a failure up
> to the caller as the single patch variant does.
> 
> Signed-off-by: Benjamin Gray 
> 
> ---
> 
> The patch_memcpy() can be optimised to 4 bytes at a time assuming the
> same requirements as regular instruction patching are being followed
> for the 'copy sequence of instructions' mode (i.e., they actually are
> instructions following instruction alignment rules).

Why not use copy_to_kernel_nofault() ?


> ---
>   arch/powerpc/lib/code-patching.c | 42 +---
>   1 file changed, 38 insertions(+), 4 deletions(-)
> 
> diff --git a/arch/powerpc/lib/code-patching.c 
> b/arch/powerpc/lib/code-patching.c
> index c6ab46156cda..c6633759b509 100644
> --- a/arch/powerpc/lib/code-patching.c
> +++ b/arch/powerpc/lib/code-patching.c
> @@ -372,9 +372,43 @@ int patch_instruction(u32 *addr, ppc_inst_t instr)
>   }
>   NOKPROBE_SYMBOL(patch_instruction);
>   
> +static int patch_memset64(u64 *addr, u64 val, size_t count)
> +{
> + for (u64 *end = addr + count; addr < end; addr++)
> + __put_kernel_nofault(addr, , u64, failed);
> +
> + return 0;
> +
> +failed:
> + return -EPERM;

Is it correct ? Shouldn't it be -EFAULT ?

> +}
> +
> +static int patch_memset32(u32 *addr, u32 val, size_t count)
> +{
> + for (u32 *end = addr + count; addr < end; addr++)
> + __put_kernel_nofault(addr, , u32, failed);
> +
> + return 0;
> +
> +failed:
> + return -EPERM;
> +}
> +
> +static int patch_memcpy(void *dst, void *src, size_t len)
> +{
> + for (void *end = src + len; src < end; dst++, src++)
> + __put_kernel_nofault(dst, src, u8, failed);
> +
> + return 0;
> +
> +failed:
> + return -EPERM;
> +}
> +
>   static int __patch_instructions(u32 *patch_addr, u32 *code, size_t len, 
> bool repeat_instr)
>   {
>   unsigned long start = (unsigned long)patch_addr;
> + int err;
>   
>   /* Repeat instruction */
>   if (repeat_instr) {
> @@ -383,19 +417,19 @@ static int __patch_instructions(u32 *patch_addr, u32 
> *code, size_t len, bool rep
>   if (ppc_inst_prefixed(instr)) {
>   u64 val = ppc_inst_as_ulong(instr);
>   
> - memset64((u64 *)patch_addr, val, len / 8);
> + err = patch_memset64((u64 *)patch_addr, val, len / 8);
>   } else {
>   u32 val = ppc_inst_val(instr);
>   
> - memset32(patch_addr, val, len / 4);
> + err = patch_memset32(patch_addr, val, len / 4);
>   }
>   } else {
> - memcpy(patch_addr, code, len);
> + err = patch_memcpy(patch_addr, code, len);

Use copy_to_kernel_nofault() instead of open coding a new less optimised 
version of it.

>   }
>   
>   smp_wmb();  /* smp write barrier */
>   flush_icache_range(start, start + len);
> - return 0;
> + return err;
>   }
>   
>   /*


Re: [PATCH v1 2/3] powerpc/code-patching: Use dedicated memory routines for patching

2024-03-14 Thread Benjamin Gray
Also supersedes
https://lore.kernel.org/all/20240213043638.168048-1-bg...@linux.ibm.com/


[PATCH v1 2/3] powerpc/code-patching: Use dedicated memory routines for patching

2024-03-14 Thread Benjamin Gray
The patching page set up as a writable alias may be in quadrant 1
(userspace) if the temporary mm path is used. This causes sanitiser
failures if so. Sanitiser failures also occur on the non-mm path
because the plain memset family is instrumented, and KASAN treats the
patching window as poisoned.

Introduce locally defined patch_* variants of memset that perform an
uninstrumented lower level set, as well as detecting write errors like
the original single patch variant does.

copy_to_user() is not correct here, as the PTE makes it a proper kernel
page (the EEA is privileged access only, RW). It just happens to be in
quadrant 1 because that's the hardware's mechanism for using the current
PID vs PID 0 in translations. Importantly, it's incorrect to allow user
page accesses.

Now that the patching memsets are used, we also propagate a failure up
to the caller as the single patch variant does.

Signed-off-by: Benjamin Gray 

---

The patch_memcpy() can be optimised to 4 bytes at a time assuming the
same requirements as regular instruction patching are being followed
for the 'copy sequence of instructions' mode (i.e., they actually are
instructions following instruction alignment rules).
---
 arch/powerpc/lib/code-patching.c | 42 +---
 1 file changed, 38 insertions(+), 4 deletions(-)

diff --git a/arch/powerpc/lib/code-patching.c b/arch/powerpc/lib/code-patching.c
index c6ab46156cda..c6633759b509 100644
--- a/arch/powerpc/lib/code-patching.c
+++ b/arch/powerpc/lib/code-patching.c
@@ -372,9 +372,43 @@ int patch_instruction(u32 *addr, ppc_inst_t instr)
 }
 NOKPROBE_SYMBOL(patch_instruction);
 
+static int patch_memset64(u64 *addr, u64 val, size_t count)
+{
+   for (u64 *end = addr + count; addr < end; addr++)
+   __put_kernel_nofault(addr, , u64, failed);
+
+   return 0;
+
+failed:
+   return -EPERM;
+}
+
+static int patch_memset32(u32 *addr, u32 val, size_t count)
+{
+   for (u32 *end = addr + count; addr < end; addr++)
+   __put_kernel_nofault(addr, , u32, failed);
+
+   return 0;
+
+failed:
+   return -EPERM;
+}
+
+static int patch_memcpy(void *dst, void *src, size_t len)
+{
+   for (void *end = src + len; src < end; dst++, src++)
+   __put_kernel_nofault(dst, src, u8, failed);
+
+   return 0;
+
+failed:
+   return -EPERM;
+}
+
 static int __patch_instructions(u32 *patch_addr, u32 *code, size_t len, bool 
repeat_instr)
 {
unsigned long start = (unsigned long)patch_addr;
+   int err;
 
/* Repeat instruction */
if (repeat_instr) {
@@ -383,19 +417,19 @@ static int __patch_instructions(u32 *patch_addr, u32 
*code, size_t len, bool rep
if (ppc_inst_prefixed(instr)) {
u64 val = ppc_inst_as_ulong(instr);
 
-   memset64((u64 *)patch_addr, val, len / 8);
+   err = patch_memset64((u64 *)patch_addr, val, len / 8);
} else {
u32 val = ppc_inst_val(instr);
 
-   memset32(patch_addr, val, len / 4);
+   err = patch_memset32(patch_addr, val, len / 4);
}
} else {
-   memcpy(patch_addr, code, len);
+   err = patch_memcpy(patch_addr, code, len);
}
 
smp_wmb();  /* smp write barrier */
flush_icache_range(start, start + len);
-   return 0;
+   return err;
 }
 
 /*
-- 
2.44.0